From b3f134766195e32ea1f8d3e640d4dcccf5aa4f23 Mon Sep 17 00:00:00 2001 From: obrien Date: Thu, 20 Jun 2002 20:54:54 +0000 Subject: Remove GAWK. I removed it from the build 2 weeks ago and the world did not end. So finish the deed. --- contrib/awk/ACKNOWLEDGMENT | 34 - contrib/awk/AUTHORS | 9 - contrib/awk/COPYING | 340 - contrib/awk/ChangeLog | 2641 --- contrib/awk/FREEBSD-upgrade | 24 - contrib/awk/FUTURES | 85 - contrib/awk/INSTALL | 181 - contrib/awk/LIMITATIONS | 16 - contrib/awk/Makefile.am | 161 - contrib/awk/Makefile.in | 797 - contrib/awk/NEWS | 2077 -- contrib/awk/PORTS | 29 - contrib/awk/POSIX.STD | 119 - contrib/awk/PROBLEMS | 9 - contrib/awk/README | 104 - contrib/awk/README_d/README.FIRST | 21 - contrib/awk/README_d/README.tests | 46 - contrib/awk/acconfig.h | 39 - contrib/awk/aclocal.m4 | 129 - contrib/awk/alloca.c | 496 - contrib/awk/array.c | 858 - contrib/awk/awk.h | 1017 - contrib/awk/awk.y | 2479 --- contrib/awk/awkgram.y | 2875 --- contrib/awk/awklib/ChangeLog | 78 - contrib/awk/awklib/Makefile.am | 75 - contrib/awk/awklib/Makefile.in | 431 - contrib/awk/awklib/eg/data/BBS-list | 11 - contrib/awk/awklib/eg/data/inventory-shipped | 17 - contrib/awk/awklib/eg/lib/assert.awk | 20 - contrib/awk/awklib/eg/lib/bits2str.awk | 16 - contrib/awk/awklib/eg/lib/cliff_rand.awk | 14 - contrib/awk/awklib/eg/lib/ctime.awk | 11 - contrib/awk/awklib/eg/lib/ftrans.awk | 15 - contrib/awk/awklib/eg/lib/getopt.awk | 80 - contrib/awk/awklib/eg/lib/gettime.awk | 62 - contrib/awk/awklib/eg/lib/grcat.c | 34 - contrib/awk/awklib/eg/lib/groupawk.in | 87 - contrib/awk/awklib/eg/lib/join.awk | 16 - contrib/awk/awklib/eg/lib/libintl.awk | 9 - contrib/awk/awklib/eg/lib/mktime.awk | 105 - contrib/awk/awklib/eg/lib/nextfile.awk | 16 - contrib/awk/awklib/eg/lib/noassign.awk | 17 - contrib/awk/awklib/eg/lib/ord.awk | 44 - contrib/awk/awklib/eg/lib/passwdawk.in | 63 - contrib/awk/awklib/eg/lib/pwcat.c | 28 - contrib/awk/awklib/eg/lib/readable.awk | 16 - contrib/awk/awklib/eg/lib/rewind.awk | 20 - contrib/awk/awklib/eg/lib/round.awk | 32 - contrib/awk/awklib/eg/misc/arraymax.awk | 10 - contrib/awk/awklib/eg/misc/arraymax.data | 5 - contrib/awk/awklib/eg/misc/findpat.awk | 10 - contrib/awk/awklib/eg/misc/findpat.data | 7 - contrib/awk/awklib/eg/misc/findpat.sh | 10 - contrib/awk/awklib/eg/network/PostAgent.sh | 9 - contrib/awk/awklib/eg/network/coreserv.awk | 113 - contrib/awk/awklib/eg/network/eliza.awk | 270 - contrib/awk/awklib/eg/network/fingerclient.awk | 7 - contrib/awk/awklib/eg/network/geturl.awk | 24 - contrib/awk/awklib/eg/network/hello-serv.awk | 14 - contrib/awk/awklib/eg/network/maze.awk | 73 - contrib/awk/awklib/eg/network/mobag.awk | 72 - contrib/awk/awklib/eg/network/panic.awk | 18 - contrib/awk/awklib/eg/network/protbase.awk | 11 - contrib/awk/awklib/eg/network/protbase.request | 7 - contrib/awk/awklib/eg/network/protbase.result | 9 - contrib/awk/awklib/eg/network/remconf.awk | 54 - contrib/awk/awklib/eg/network/statist.awk | 85 - contrib/awk/awklib/eg/network/stoxdata.txt | 4 - contrib/awk/awklib/eg/network/stoxpred.awk | 116 - contrib/awk/awklib/eg/network/testserv.awk | 12 - contrib/awk/awklib/eg/network/urlchk.awk | 28 - contrib/awk/awklib/eg/network/webgrab.awk | 6 - contrib/awk/awklib/eg/prog/alarm.awk | 85 - contrib/awk/awklib/eg/prog/awksed.awk | 31 - contrib/awk/awklib/eg/prog/cut.awk | 139 - contrib/awk/awklib/eg/prog/dupword.awk | 21 - contrib/awk/awklib/eg/prog/egrep.awk | 102 - contrib/awk/awklib/eg/prog/extract.awk | 75 - contrib/awk/awklib/eg/prog/guide.awk | 7 - contrib/awk/awklib/eg/prog/histsort.awk | 15 - contrib/awk/awklib/eg/prog/id.awk | 67 - contrib/awk/awklib/eg/prog/igawk.sh | 130 - contrib/awk/awklib/eg/prog/labels.awk | 54 - contrib/awk/awklib/eg/prog/split.awk | 57 - contrib/awk/awklib/eg/prog/tee.awk | 39 - contrib/awk/awklib/eg/prog/testbits.awk | 27 - contrib/awk/awklib/eg/prog/translate.awk | 47 - contrib/awk/awklib/eg/prog/uniq.awk | 119 - contrib/awk/awklib/eg/prog/wc.awk | 69 - contrib/awk/awklib/eg/prog/wordfreq.awk | 20 - contrib/awk/awklib/extract.awk | 92 - contrib/awk/awklib/stamp-eg | 2 - contrib/awk/awktab.c | 3983 ---- contrib/awk/builtin.c | 2499 --- contrib/awk/configh.in | 321 - contrib/awk/configure | 6490 ------ contrib/awk/configure.in | 182 - contrib/awk/custom.h | 87 - contrib/awk/dfa.c | 2633 --- contrib/awk/dfa.h | 372 - contrib/awk/doc/ChangeLog | 212 - contrib/awk/doc/Makefile.am | 80 - contrib/awk/doc/Makefile.in | 463 - contrib/awk/doc/README.card | 19 - contrib/awk/doc/ad.block | 48 - contrib/awk/doc/awk.1 | 2628 --- contrib/awk/doc/awkcard.in | 1883 -- contrib/awk/doc/awkforai.txt | 150 - contrib/awk/doc/cardfonts | 37 - contrib/awk/doc/colors | 39 - contrib/awk/doc/gawk.1 | 3322 --- contrib/awk/doc/gawk.texi | 26169 ----------------------- contrib/awk/doc/gawkinet.texi | 5075 ----- contrib/awk/doc/igawk.1 | 73 - contrib/awk/doc/macros | 211 - contrib/awk/doc/no.colors | 31 - contrib/awk/doc/setter.outline | 77 - contrib/awk/doc/texinfo.tex | 6238 ------ contrib/awk/eval.c | 2154 -- contrib/awk/eval_p.c | 27 - contrib/awk/ext.c | 186 - contrib/awk/extension/dl.c | 96 - contrib/awk/extension/doit | 1 - contrib/awk/extension/filefuncs.c | 339 - contrib/awk/extension/foo.awk | 9 - contrib/awk/extension/fork.c | 106 - contrib/awk/extension/steps | 9 - contrib/awk/extension/testff.awk | 30 - contrib/awk/extension/testfork.awk | 20 - contrib/awk/field.c | 998 - contrib/awk/fixvers | 58 - contrib/awk/gawkmisc.c | 70 - contrib/awk/getopt.c | 1062 - contrib/awk/getopt.h | 180 - contrib/awk/getopt1.c | 188 - contrib/awk/install-sh | 238 - contrib/awk/io.c | 2400 --- contrib/awk/main.c | 1006 - contrib/awk/mkinstalldirs | 32 - contrib/awk/msg.c | 184 - contrib/awk/node.c | 571 - contrib/awk/patchlev.h | 1 - contrib/awk/patchlevel.h | 1 - contrib/awk/po/POTFILES.in | 23 - contrib/awk/po/cat-id-tbl.c | 411 - contrib/awk/po/gawk.pot | 1696 -- contrib/awk/po/he.gmo | Bin 33953 -> 0 bytes contrib/awk/po/stamp-cat-id | 1 - contrib/awk/posix/ChangeLog | 57 - contrib/awk/posix/gawkmisc.c | 171 - contrib/awk/profile.c | 1381 -- contrib/awk/profile_p.c | 27 - contrib/awk/protos.h | 128 - contrib/awk/re.c | 322 - contrib/awk/regex.c | 5854 ----- contrib/awk/replace.c | 70 - contrib/awk/stamp-h.in | 1 - contrib/awk/test/Makefile.am | 1036 - contrib/awk/test/addcomma.awk | 15 - contrib/awk/test/addcomma.in | 7 - contrib/awk/test/addcomma.ok | 7 - contrib/awk/test/arynasty.awk | 16 - contrib/awk/test/arynasty.ok | 1 - contrib/awk/test/arynocls.awk | 95 - contrib/awk/test/arynocls.in | 30 - contrib/awk/test/arynocls.ok | 0 contrib/awk/test/arysubnm.awk | 1 - contrib/awk/test/arysubnm.ok | 1 - contrib/awk/test/clos1way.awk | 21 - contrib/awk/test/clos1way.ok | 26 - contrib/awk/test/datanonl.awk | 3 - contrib/awk/test/datanonl.in | 1 - contrib/awk/test/datanonl.ok | 1 - contrib/awk/test/fnamedat.awk | 1 - contrib/awk/test/fnamedat.in | 1 - contrib/awk/test/fnamedat.ok | 1 - contrib/awk/test/fnaryscl.awk | 10 - contrib/awk/test/fnaryscl.ok | 1 - contrib/awk/test/fnasgnm.awk | 14 - contrib/awk/test/fnasgnm.in | 1 - contrib/awk/test/fnasgnm.ok | 1 - contrib/awk/test/fnparydl.awk | 31 - contrib/awk/test/fnparydl.ok | 10 - contrib/awk/test/fsfwfs.awk | 1 - contrib/awk/test/fsfwfs.in | 16 - contrib/awk/test/fsfwfs.ok | 16 - contrib/awk/test/funsemnl.awk | 3 - contrib/awk/test/funsemnl.ok | 1 - contrib/awk/test/funsmnam.awk | 6 - contrib/awk/test/funsmnam.ok | 1 - contrib/awk/test/getlnbuf.awk | 18 - contrib/awk/test/getlnbuf.in | 1708 -- contrib/awk/test/getlnbuf.ok | 1708 -- contrib/awk/test/getnr2tb.awk | 111 - contrib/awk/test/getnr2tb.in | 6 - contrib/awk/test/getnr2tb.ok | 6 - contrib/awk/test/getnr2tm.awk | 75 - contrib/awk/test/getnr2tm.in | 1 - contrib/awk/test/getnr2tm.ok | 1 - contrib/awk/test/gnuops2.awk | 72 - contrib/awk/test/gnuops2.ok | 7 - contrib/awk/test/gtlnbufv.awk | 2 - contrib/awk/test/hsprint.awk | 74 - contrib/awk/test/hsprint.ok | 66 - contrib/awk/test/igncdym.awk | 56 - contrib/awk/test/igncdym.in | 2 - contrib/awk/test/igncdym.ok | 4 - contrib/awk/test/leaddig.awk | 7 - contrib/awk/test/leaddig.ok | 1 - contrib/awk/test/leadnl.awk | 10 - contrib/awk/test/leadnl.in | 9 - contrib/awk/test/leadnl.ok | 8 - contrib/awk/test/lint.awk | 14 - contrib/awk/test/lint.ok | 5 - contrib/awk/test/nasty.awk | 92 - contrib/awk/test/nasty.ok | 2 - contrib/awk/test/nasty2.awk | 20 - contrib/awk/test/nasty2.ok | 2 - contrib/awk/test/nlinstr.awk | 8 - contrib/awk/test/nlinstr.in | 2 - contrib/awk/test/nlinstr.ok | 1 - contrib/awk/test/nlstrina.awk | 77 - contrib/awk/test/nlstrina.ok | 3 - contrib/awk/test/numindex.awk | 55 - contrib/awk/test/numindex.in | 4 - contrib/awk/test/numindex.ok | 1 - contrib/awk/test/octsub.awk | 1 - contrib/awk/test/octsub.ok | 1 - contrib/awk/test/ofmt.awk | 53 - contrib/awk/test/ofmt.in | 136 - contrib/awk/test/ofmt.ok | 6 - contrib/awk/test/ofmtbig.awk | 125 - contrib/awk/test/ofmtbig.in | 3 - contrib/awk/test/ofmtbig.ok | 2 - contrib/awk/test/ofmts.awk | 2 - contrib/awk/test/ofmts.in | 1 - contrib/awk/test/ofmts.ok | 1 - contrib/awk/test/opasnidx.awk | 1 - contrib/awk/test/opasnidx.ok | 1 - contrib/awk/test/opasnslf.awk | 6 - contrib/awk/test/opasnslf.ok | 3 - contrib/awk/test/paramtyp.awk | 20 - contrib/awk/test/paramtyp.ok | 6 - contrib/awk/test/parseme.awk | 1 - contrib/awk/test/parseme.ok | 3 - contrib/awk/test/poundbang.awk | 3 - contrib/awk/test/printf1.awk | 19 - contrib/awk/test/printf1.ok | 7 - contrib/awk/test/printfloat.awk | 62 - contrib/awk/test/procinfs.awk | 7 - contrib/awk/test/procinfs.ok | 3 - contrib/awk/test/psx96sub.awk | 7 - contrib/awk/test/psx96sub.ok | 2 - contrib/awk/test/rebt8b1.awk | 138 - contrib/awk/test/rebt8b1.ok | 16 - contrib/awk/test/rebt8b2.awk | 88 - contrib/awk/test/rebt8b2.ok | 511 - contrib/awk/test/redfilnm.awk | 98 - contrib/awk/test/redfilnm.in | 1 - contrib/awk/test/redfilnm.ok | 9 - contrib/awk/test/reg/Obsolete/exp.awk | 1 - contrib/awk/test/reg/Obsolete/exp.good | 2 - contrib/awk/test/reg/Obsolete/exp.in | 0 contrib/awk/test/reg/Obsolete/log.awk | 1 - contrib/awk/test/reg/Obsolete/log.good | 4 - contrib/awk/test/reg/Obsolete/log.in | 0 contrib/awk/test/regeq.awk | 29 - contrib/awk/test/regeq.in | 2 - contrib/awk/test/regeq.ok | 2 - contrib/awk/test/regtest.sh | 18 - contrib/awk/test/regx8bit.awk | 26 - contrib/awk/test/regx8bit.ok | 7 - contrib/awk/test/rsnul1nl.awk | 2 - contrib/awk/test/rsnul1nl.in | 3 - contrib/awk/test/rsnul1nl.ok | 2 - contrib/awk/test/shadow.awk | 21 - contrib/awk/test/shadow.ok | 7 - contrib/awk/test/sort1.awk | 31 - contrib/awk/test/sort1.ok | 11 - contrib/awk/test/splitdef.awk | 7 - contrib/awk/test/splitdef.ok | 1 - contrib/awk/test/strftime.awk | 15 - contrib/awk/test/strtod.awk | 1 - contrib/awk/test/strtod.in | 1 - contrib/awk/test/strtod.ok | 1 - contrib/awk/test/subslash.awk | 6 - contrib/awk/test/subslash.ok | 1 - contrib/awk/test/zeroflag.awk | 1 - contrib/awk/test/zeroflag.ok | 1 - contrib/awk/version.c | 53 - contrib/awk/version.in | 53 - 292 files changed, 108772 deletions(-) delete mode 100644 contrib/awk/ACKNOWLEDGMENT delete mode 100644 contrib/awk/AUTHORS delete mode 100644 contrib/awk/COPYING delete mode 100644 contrib/awk/ChangeLog delete mode 100644 contrib/awk/FREEBSD-upgrade delete mode 100644 contrib/awk/FUTURES delete mode 100644 contrib/awk/INSTALL delete mode 100644 contrib/awk/LIMITATIONS delete mode 100644 contrib/awk/Makefile.am delete mode 100644 contrib/awk/Makefile.in delete mode 100644 contrib/awk/NEWS delete mode 100644 contrib/awk/PORTS delete mode 100644 contrib/awk/POSIX.STD delete mode 100644 contrib/awk/PROBLEMS delete mode 100644 contrib/awk/README delete mode 100644 contrib/awk/README_d/README.FIRST delete mode 100644 contrib/awk/README_d/README.tests delete mode 100644 contrib/awk/acconfig.h delete mode 100644 contrib/awk/aclocal.m4 delete mode 100644 contrib/awk/alloca.c delete mode 100644 contrib/awk/array.c delete mode 100644 contrib/awk/awk.h delete mode 100644 contrib/awk/awk.y delete mode 100644 contrib/awk/awkgram.y delete mode 100644 contrib/awk/awklib/ChangeLog delete mode 100644 contrib/awk/awklib/Makefile.am delete mode 100644 contrib/awk/awklib/Makefile.in delete mode 100644 contrib/awk/awklib/eg/data/BBS-list delete mode 100644 contrib/awk/awklib/eg/data/inventory-shipped delete mode 100644 contrib/awk/awklib/eg/lib/assert.awk delete mode 100644 contrib/awk/awklib/eg/lib/bits2str.awk delete mode 100644 contrib/awk/awklib/eg/lib/cliff_rand.awk delete mode 100644 contrib/awk/awklib/eg/lib/ctime.awk delete mode 100644 contrib/awk/awklib/eg/lib/ftrans.awk delete mode 100644 contrib/awk/awklib/eg/lib/getopt.awk delete mode 100644 contrib/awk/awklib/eg/lib/gettime.awk delete mode 100644 contrib/awk/awklib/eg/lib/grcat.c delete mode 100644 contrib/awk/awklib/eg/lib/groupawk.in delete mode 100644 contrib/awk/awklib/eg/lib/join.awk delete mode 100644 contrib/awk/awklib/eg/lib/libintl.awk delete mode 100644 contrib/awk/awklib/eg/lib/mktime.awk delete mode 100644 contrib/awk/awklib/eg/lib/nextfile.awk delete mode 100644 contrib/awk/awklib/eg/lib/noassign.awk delete mode 100644 contrib/awk/awklib/eg/lib/ord.awk delete mode 100644 contrib/awk/awklib/eg/lib/passwdawk.in delete mode 100644 contrib/awk/awklib/eg/lib/pwcat.c delete mode 100644 contrib/awk/awklib/eg/lib/readable.awk delete mode 100644 contrib/awk/awklib/eg/lib/rewind.awk delete mode 100644 contrib/awk/awklib/eg/lib/round.awk delete mode 100644 contrib/awk/awklib/eg/misc/arraymax.awk delete mode 100644 contrib/awk/awklib/eg/misc/arraymax.data delete mode 100644 contrib/awk/awklib/eg/misc/findpat.awk delete mode 100644 contrib/awk/awklib/eg/misc/findpat.data delete mode 100644 contrib/awk/awklib/eg/misc/findpat.sh delete mode 100644 contrib/awk/awklib/eg/network/PostAgent.sh delete mode 100644 contrib/awk/awklib/eg/network/coreserv.awk delete mode 100644 contrib/awk/awklib/eg/network/eliza.awk delete mode 100644 contrib/awk/awklib/eg/network/fingerclient.awk delete mode 100644 contrib/awk/awklib/eg/network/geturl.awk delete mode 100644 contrib/awk/awklib/eg/network/hello-serv.awk delete mode 100644 contrib/awk/awklib/eg/network/maze.awk delete mode 100644 contrib/awk/awklib/eg/network/mobag.awk delete mode 100644 contrib/awk/awklib/eg/network/panic.awk delete mode 100644 contrib/awk/awklib/eg/network/protbase.awk delete mode 100644 contrib/awk/awklib/eg/network/protbase.request delete mode 100644 contrib/awk/awklib/eg/network/protbase.result delete mode 100644 contrib/awk/awklib/eg/network/remconf.awk delete mode 100644 contrib/awk/awklib/eg/network/statist.awk delete mode 100644 contrib/awk/awklib/eg/network/stoxdata.txt delete mode 100644 contrib/awk/awklib/eg/network/stoxpred.awk delete mode 100644 contrib/awk/awklib/eg/network/testserv.awk delete mode 100644 contrib/awk/awklib/eg/network/urlchk.awk delete mode 100644 contrib/awk/awklib/eg/network/webgrab.awk delete mode 100644 contrib/awk/awklib/eg/prog/alarm.awk delete mode 100644 contrib/awk/awklib/eg/prog/awksed.awk delete mode 100644 contrib/awk/awklib/eg/prog/cut.awk delete mode 100644 contrib/awk/awklib/eg/prog/dupword.awk delete mode 100644 contrib/awk/awklib/eg/prog/egrep.awk delete mode 100644 contrib/awk/awklib/eg/prog/extract.awk delete mode 100644 contrib/awk/awklib/eg/prog/guide.awk delete mode 100644 contrib/awk/awklib/eg/prog/histsort.awk delete mode 100644 contrib/awk/awklib/eg/prog/id.awk delete mode 100644 contrib/awk/awklib/eg/prog/igawk.sh delete mode 100644 contrib/awk/awklib/eg/prog/labels.awk delete mode 100644 contrib/awk/awklib/eg/prog/split.awk delete mode 100644 contrib/awk/awklib/eg/prog/tee.awk delete mode 100644 contrib/awk/awklib/eg/prog/testbits.awk delete mode 100644 contrib/awk/awklib/eg/prog/translate.awk delete mode 100644 contrib/awk/awklib/eg/prog/uniq.awk delete mode 100644 contrib/awk/awklib/eg/prog/wc.awk delete mode 100644 contrib/awk/awklib/eg/prog/wordfreq.awk delete mode 100644 contrib/awk/awklib/extract.awk delete mode 100644 contrib/awk/awklib/stamp-eg delete mode 100644 contrib/awk/awktab.c delete mode 100644 contrib/awk/builtin.c delete mode 100644 contrib/awk/configh.in delete mode 100755 contrib/awk/configure delete mode 100644 contrib/awk/configure.in delete mode 100644 contrib/awk/custom.h delete mode 100644 contrib/awk/dfa.c delete mode 100644 contrib/awk/dfa.h delete mode 100644 contrib/awk/doc/ChangeLog delete mode 100644 contrib/awk/doc/Makefile.am delete mode 100644 contrib/awk/doc/Makefile.in delete mode 100644 contrib/awk/doc/README.card delete mode 100644 contrib/awk/doc/ad.block delete mode 100644 contrib/awk/doc/awk.1 delete mode 100644 contrib/awk/doc/awkcard.in delete mode 100644 contrib/awk/doc/awkforai.txt delete mode 100644 contrib/awk/doc/cardfonts delete mode 100644 contrib/awk/doc/colors delete mode 100644 contrib/awk/doc/gawk.1 delete mode 100644 contrib/awk/doc/gawk.texi delete mode 100644 contrib/awk/doc/gawkinet.texi delete mode 100644 contrib/awk/doc/igawk.1 delete mode 100644 contrib/awk/doc/macros delete mode 100644 contrib/awk/doc/no.colors delete mode 100644 contrib/awk/doc/setter.outline delete mode 100644 contrib/awk/doc/texinfo.tex delete mode 100644 contrib/awk/eval.c delete mode 100644 contrib/awk/eval_p.c delete mode 100644 contrib/awk/ext.c delete mode 100644 contrib/awk/extension/dl.c delete mode 100755 contrib/awk/extension/doit delete mode 100644 contrib/awk/extension/filefuncs.c delete mode 100644 contrib/awk/extension/foo.awk delete mode 100644 contrib/awk/extension/fork.c delete mode 100755 contrib/awk/extension/steps delete mode 100644 contrib/awk/extension/testff.awk delete mode 100644 contrib/awk/extension/testfork.awk delete mode 100644 contrib/awk/field.c delete mode 100755 contrib/awk/fixvers delete mode 100644 contrib/awk/gawkmisc.c delete mode 100644 contrib/awk/getopt.c delete mode 100644 contrib/awk/getopt.h delete mode 100644 contrib/awk/getopt1.c delete mode 100755 contrib/awk/install-sh delete mode 100644 contrib/awk/io.c delete mode 100644 contrib/awk/main.c delete mode 100755 contrib/awk/mkinstalldirs delete mode 100644 contrib/awk/msg.c delete mode 100644 contrib/awk/node.c delete mode 100644 contrib/awk/patchlev.h delete mode 100644 contrib/awk/patchlevel.h delete mode 100644 contrib/awk/po/POTFILES.in delete mode 100644 contrib/awk/po/cat-id-tbl.c delete mode 100644 contrib/awk/po/gawk.pot delete mode 100644 contrib/awk/po/he.gmo delete mode 100644 contrib/awk/po/stamp-cat-id delete mode 100644 contrib/awk/posix/ChangeLog delete mode 100644 contrib/awk/posix/gawkmisc.c delete mode 100644 contrib/awk/profile.c delete mode 100644 contrib/awk/profile_p.c delete mode 100644 contrib/awk/protos.h delete mode 100644 contrib/awk/re.c delete mode 100644 contrib/awk/regex.c delete mode 100644 contrib/awk/replace.c delete mode 100644 contrib/awk/stamp-h.in delete mode 100644 contrib/awk/test/Makefile.am delete mode 100644 contrib/awk/test/addcomma.awk delete mode 100644 contrib/awk/test/addcomma.in delete mode 100644 contrib/awk/test/addcomma.ok delete mode 100644 contrib/awk/test/arynasty.awk delete mode 100644 contrib/awk/test/arynasty.ok delete mode 100644 contrib/awk/test/arynocls.awk delete mode 100644 contrib/awk/test/arynocls.in delete mode 100644 contrib/awk/test/arynocls.ok delete mode 100644 contrib/awk/test/arysubnm.awk delete mode 100644 contrib/awk/test/arysubnm.ok delete mode 100644 contrib/awk/test/clos1way.awk delete mode 100644 contrib/awk/test/clos1way.ok delete mode 100644 contrib/awk/test/datanonl.awk delete mode 100644 contrib/awk/test/datanonl.in delete mode 100644 contrib/awk/test/datanonl.ok delete mode 100644 contrib/awk/test/fnamedat.awk delete mode 100644 contrib/awk/test/fnamedat.in delete mode 100644 contrib/awk/test/fnamedat.ok delete mode 100644 contrib/awk/test/fnaryscl.awk delete mode 100644 contrib/awk/test/fnaryscl.ok delete mode 100644 contrib/awk/test/fnasgnm.awk delete mode 100644 contrib/awk/test/fnasgnm.in delete mode 100644 contrib/awk/test/fnasgnm.ok delete mode 100644 contrib/awk/test/fnparydl.awk delete mode 100644 contrib/awk/test/fnparydl.ok delete mode 100644 contrib/awk/test/fsfwfs.awk delete mode 100644 contrib/awk/test/fsfwfs.in delete mode 100644 contrib/awk/test/fsfwfs.ok delete mode 100644 contrib/awk/test/funsemnl.awk delete mode 100644 contrib/awk/test/funsemnl.ok delete mode 100644 contrib/awk/test/funsmnam.awk delete mode 100644 contrib/awk/test/funsmnam.ok delete mode 100644 contrib/awk/test/getlnbuf.awk delete mode 100644 contrib/awk/test/getlnbuf.in delete mode 100644 contrib/awk/test/getlnbuf.ok delete mode 100644 contrib/awk/test/getnr2tb.awk delete mode 100644 contrib/awk/test/getnr2tb.in delete mode 100644 contrib/awk/test/getnr2tb.ok delete mode 100644 contrib/awk/test/getnr2tm.awk delete mode 100644 contrib/awk/test/getnr2tm.in delete mode 100644 contrib/awk/test/getnr2tm.ok delete mode 100644 contrib/awk/test/gnuops2.awk delete mode 100644 contrib/awk/test/gnuops2.ok delete mode 100644 contrib/awk/test/gtlnbufv.awk delete mode 100644 contrib/awk/test/hsprint.awk delete mode 100644 contrib/awk/test/hsprint.ok delete mode 100644 contrib/awk/test/igncdym.awk delete mode 100644 contrib/awk/test/igncdym.in delete mode 100644 contrib/awk/test/igncdym.ok delete mode 100644 contrib/awk/test/leaddig.awk delete mode 100644 contrib/awk/test/leaddig.ok delete mode 100644 contrib/awk/test/leadnl.awk delete mode 100644 contrib/awk/test/leadnl.in delete mode 100644 contrib/awk/test/leadnl.ok delete mode 100644 contrib/awk/test/lint.awk delete mode 100644 contrib/awk/test/lint.ok delete mode 100644 contrib/awk/test/nasty.awk delete mode 100644 contrib/awk/test/nasty.ok delete mode 100644 contrib/awk/test/nasty2.awk delete mode 100644 contrib/awk/test/nasty2.ok delete mode 100644 contrib/awk/test/nlinstr.awk delete mode 100644 contrib/awk/test/nlinstr.in delete mode 100644 contrib/awk/test/nlinstr.ok delete mode 100644 contrib/awk/test/nlstrina.awk delete mode 100644 contrib/awk/test/nlstrina.ok delete mode 100644 contrib/awk/test/numindex.awk delete mode 100644 contrib/awk/test/numindex.in delete mode 100644 contrib/awk/test/numindex.ok delete mode 100644 contrib/awk/test/octsub.awk delete mode 100644 contrib/awk/test/octsub.ok delete mode 100644 contrib/awk/test/ofmt.awk delete mode 100644 contrib/awk/test/ofmt.in delete mode 100644 contrib/awk/test/ofmt.ok delete mode 100644 contrib/awk/test/ofmtbig.awk delete mode 100644 contrib/awk/test/ofmtbig.in delete mode 100644 contrib/awk/test/ofmtbig.ok delete mode 100644 contrib/awk/test/ofmts.awk delete mode 100644 contrib/awk/test/ofmts.in delete mode 100644 contrib/awk/test/ofmts.ok delete mode 100644 contrib/awk/test/opasnidx.awk delete mode 100644 contrib/awk/test/opasnidx.ok delete mode 100644 contrib/awk/test/opasnslf.awk delete mode 100644 contrib/awk/test/opasnslf.ok delete mode 100644 contrib/awk/test/paramtyp.awk delete mode 100644 contrib/awk/test/paramtyp.ok delete mode 100644 contrib/awk/test/parseme.awk delete mode 100644 contrib/awk/test/parseme.ok delete mode 100755 contrib/awk/test/poundbang.awk delete mode 100644 contrib/awk/test/printf1.awk delete mode 100644 contrib/awk/test/printf1.ok delete mode 100644 contrib/awk/test/printfloat.awk delete mode 100644 contrib/awk/test/procinfs.awk delete mode 100644 contrib/awk/test/procinfs.ok delete mode 100644 contrib/awk/test/psx96sub.awk delete mode 100644 contrib/awk/test/psx96sub.ok delete mode 100644 contrib/awk/test/rebt8b1.awk delete mode 100644 contrib/awk/test/rebt8b1.ok delete mode 100644 contrib/awk/test/rebt8b2.awk delete mode 100644 contrib/awk/test/rebt8b2.ok delete mode 100644 contrib/awk/test/redfilnm.awk delete mode 100644 contrib/awk/test/redfilnm.in delete mode 100644 contrib/awk/test/redfilnm.ok delete mode 100644 contrib/awk/test/reg/Obsolete/exp.awk delete mode 100644 contrib/awk/test/reg/Obsolete/exp.good delete mode 100644 contrib/awk/test/reg/Obsolete/exp.in delete mode 100644 contrib/awk/test/reg/Obsolete/log.awk delete mode 100644 contrib/awk/test/reg/Obsolete/log.good delete mode 100644 contrib/awk/test/reg/Obsolete/log.in delete mode 100644 contrib/awk/test/regeq.awk delete mode 100644 contrib/awk/test/regeq.in delete mode 100644 contrib/awk/test/regeq.ok delete mode 100755 contrib/awk/test/regtest.sh delete mode 100644 contrib/awk/test/regx8bit.awk delete mode 100644 contrib/awk/test/regx8bit.ok delete mode 100644 contrib/awk/test/rsnul1nl.awk delete mode 100644 contrib/awk/test/rsnul1nl.in delete mode 100644 contrib/awk/test/rsnul1nl.ok delete mode 100644 contrib/awk/test/shadow.awk delete mode 100644 contrib/awk/test/shadow.ok delete mode 100644 contrib/awk/test/sort1.awk delete mode 100644 contrib/awk/test/sort1.ok delete mode 100644 contrib/awk/test/splitdef.awk delete mode 100644 contrib/awk/test/splitdef.ok delete mode 100644 contrib/awk/test/strftime.awk delete mode 100644 contrib/awk/test/strtod.awk delete mode 100644 contrib/awk/test/strtod.in delete mode 100644 contrib/awk/test/strtod.ok delete mode 100644 contrib/awk/test/subslash.awk delete mode 100644 contrib/awk/test/subslash.ok delete mode 100644 contrib/awk/test/zeroflag.awk delete mode 100644 contrib/awk/test/zeroflag.ok delete mode 100644 contrib/awk/version.c delete mode 100644 contrib/awk/version.in (limited to 'contrib') diff --git a/contrib/awk/ACKNOWLEDGMENT b/contrib/awk/ACKNOWLEDGMENT deleted file mode 100644 index 0851ecf..0000000 --- a/contrib/awk/ACKNOWLEDGMENT +++ /dev/null @@ -1,34 +0,0 @@ -The current developers of Gawk would like to thank and acknowledge the -many people who have contributed to the development through bug reports -and fixes and suggestions. Unfortunately, we have not been organized -enough to keep track of all the names -- for that we apologize. - -The following people were involved in porting gawk to different platforms. - - Mike Lijewski (IBM RS6000) - Kent Williams (MSDOS 2.11) - Conrad Kwok (MSDOS earlier versions) - Scott Garfinkle (MSDOS earlier versions) - Hal Peterson (Cray) - -This group of people comprise the "GAWK crack portability team", who -test the pre-releases and ensure portability of gawk. - - Pat Rankin (VMS) - Michal Jaegermann - (Atari, NeXT, DEC 3100) - Scott Deifik (MSDOS 2.14, 2.15, 3.0) - Kai Uwe Rommel (OS/2) - Darrel Hankerson (DOS and formerly OS/2) - Mark Moraes (Code Center, Purify) - Kaveh Ghazi (Lots of Unix variants) - -Michal, Scott and Darrel go out of their way to make sure that gawk -works on non-32 bit systems, and keep me on track where portability is -concerned. Indeed, all of these folks are incredibly helpful; gawk would -not be the fine program it is now without them. - -Last, but far from least, we would like to thank Brian Kernighan who -has helped to clear up many dark corners of the language and provided a -restraining touch when we have been overly tempted by "feeping -creaturism". diff --git a/contrib/awk/AUTHORS b/contrib/awk/AUTHORS deleted file mode 100644 index c6e2507..0000000 --- a/contrib/awk/AUTHORS +++ /dev/null @@ -1,9 +0,0 @@ -Wed Feb 16 10:06:17 IST 2000 - -Gawk was written by Paul Rubin, and finished by Paul Finlason and -Richard Stallman. - -David Trueman and Arnold Robbins took it over, with David doing most -of the work to make it compatible with new awk. - -Circa 1994, Arnold Robbins took over maintenance. diff --git a/contrib/awk/COPYING b/contrib/awk/COPYING deleted file mode 100644 index d60c31a..0000000 --- a/contrib/awk/COPYING +++ /dev/null @@ -1,340 +0,0 @@ - GNU GENERAL PUBLIC LICENSE - Version 2, June 1991 - - Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - Everyone is permitted to copy and distribute verbatim copies - of this license document, but changing it is not allowed. - - Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software--to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - - GNU GENERAL PUBLIC LICENSE - TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION - - 0. This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The "Program", below, -refers to any such program or work, and a "work based on the Program" -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term "modification".) Each licensee is addressed as "you". - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - - 1. You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - - 2. You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - - a) You must cause the modified files to carry prominent notices - stating that you changed the files and the date of any change. - - b) You must cause any work that you distribute or publish, that in - whole or in part contains or is derived from the Program or any - part thereof, to be licensed as a whole at no charge to all third - parties under the terms of this License. - - c) If the modified program normally reads commands interactively - when run, you must cause it, when started running for such - interactive use in the most ordinary way, to print or display an - announcement including an appropriate copyright notice and a - notice that there is no warranty (or else, saying that you provide - a warranty) and that users may redistribute the program under - these conditions, and telling the user how to view a copy of this - License. (Exception: if the Program itself is interactive but - does not normally print such an announcement, your work based on - the Program is not required to print an announcement.) - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - - 3. You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - - a) Accompany it with the complete corresponding machine-readable - source code, which must be distributed under the terms of Sections - 1 and 2 above on a medium customarily used for software interchange; or, - - b) Accompany it with a written offer, valid for at least three - years, to give any third party, for a charge no more than your - cost of physically performing source distribution, a complete - machine-readable copy of the corresponding source code, to be - distributed under the terms of Sections 1 and 2 above on a medium - customarily used for software interchange; or, - - c) Accompany it with the information you received as to the offer - to distribute corresponding source code. (This alternative is - allowed only for noncommercial distribution and only if you - received the program in object code or executable form with such - an offer, in accord with Subsection b above.) - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - - 4. You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - - 5. You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - - 6. Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - - 7. If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - - 8. If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - - 9. The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and "any -later version", you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - - 10. If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - - NO WARRANTY - - 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - - 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. - - END OF TERMS AND CONDITIONS - - How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the "copyright" line and a pointer to where the full notice is found. - - - Copyright (C) - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - - Gnomovision version 69, Copyright (C) year name of author - Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. - This is free software, and you are welcome to redistribute it - under certain conditions; type `show c' for details. - -The hypothetical commands `show w' and `show c' should show the appropriate -parts of the General Public License. Of course, the commands you use may -be called something other than `show w' and `show c'; they could even be -mouse-clicks or menu items--whatever suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a "copyright disclaimer" for the program, if -necessary. Here is a sample; alter the names: - - Yoyodyne, Inc., hereby disclaims all copyright interest in the program - `Gnomovision' (which makes passes at compilers) written by James Hacker. - - , 1 April 1989 - Ty Coon, President of Vice - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Library General -Public License instead of this License. diff --git a/contrib/awk/ChangeLog b/contrib/awk/ChangeLog deleted file mode 100644 index ec85759..0000000 --- a/contrib/awk/ChangeLog +++ /dev/null @@ -1,2641 +0,0 @@ -Sun Jun 3 13:04:44 2001 Arnold D. Robbins - - * Release 3.1.0: Release tar file made. And there was - rejoicing. - -Wed Apr 25 11:44:07 2001 Arnold Robbins - - * Makefile.am (AM_MAKEFLAGS): Add definition per advice from - Nelson Beebe. - -Tue Apr 24 14:28:00 2001 Arnold Robbins - - * io.c (devopen): Patch from Jeurgen to robustify pulling - out hostname, port numbers, etc, to avoid any buffer overrun - problems. - -Mon Apr 23 10:26:38 2001 Arnold Robbins - - * awkgram.y: Fix grammar so that `print ... |& ".." |& getline' - dies with a parse-time error message. - -Sun Apr 22 16:46:48 2001 Arnold Robbins - - * io.c (socketopen): Fix from Juergen in recursive call. - -Thu Apr 19 18:39:20 2001 Pat Rankin - - * awk.h: Really fix logic around include of . - - * awk.h (callresult): New name for `result' macro. - * eval.c (r_get_lhs, case Node_builtin): Use it. - -Thu Apr 19 16:31:09 2001 Pat Rankin - - * io.c: Move code around to allow compilation with DEC C. - -Thu Apr 19 16:21:56 2001 Arnold D. Robbins - - * random.h: Move decl of random() here. - * random.c: Remove decl of random(). - -Mon Apr 9 11:41:58 2001 Arnold D. Robbins - - * dfa.c (dfainit): Initialize more members in the structure, - based on bug report in bug.gnu.utils by aaronl@vitelus.com - (Aaron Lehmann). - * awk.h: Fix logic around include of . - -Thu Apr 5 20:12:05 2001 Pat Rankin - - * dfa.c: for VMS, #include instead of . - * missing_d/mktime.c: likewise. - - * random.c: reorder include directives to get gawk config info - from random.h sooner. - [fcntl.h]: guard #include with HAVE_FCNTL_H test. - [unistd.h]: guard #include with HAVE_UNISTD_H test. - - * random.c (srandomdev): skip /dev/urandom usage if O_RDONLY - is not defined. - -Tue Mar 20 11:07:11 2001 Arnold D. Robbins - - * awkgram.y (function_body): Add opt_nls to end of production. - -Tue Mar 20 09:30:32 2001 Pat Rankin - - * awk.h (BROKEN_STRNCASECMP): Add decl of strcasecmp. - * io.c (two_way_open): Add `return FALSE;' for fussy compilers. - -Sun Mar 18 15:10:56 2001 Arnold D. Robbins - - * io.c (gawk_pclose): Set the exit value for close correctly - if the pipe died with a signal. - -Wed Mar 7 11:28:52 2001 Arnold D. Robbins - - * io.c (get_a_record): Correctly handle the case of a leading - single newline at the front of the file when RS = "". - -2001-02-26 Paul Eggert - - * COPYING: Incorporate latest version from FSF, which fixes a Y2k bug. - - * builtin.c (do_mktime): Allow the user to specify the - tm_isdst member as an optional trailing integer, and to - specify "out-of-range" members. Check for overflow when - subtracting 1 from month or 1900 from year. Allow years just - past INT_MAX, as they work on some hosts when INT_MAX - 1900 - is representable as an int. - - * doc/gawk.1, doc/gawk.texi: Document the above changes. - Also, document that the origin-zero Gregorian calendar is used. - Fix confusing wording about "midnight" by replacing it with 00:00 - ("midnight" is also 24:00, the end of the day). - Mention the typical range for time stamps. - Do not assume that years are nonnegative and are less than 10,000. - Suggest TZ=UTC0 instead of TZ=GMT0, as that's how recent versions - of GNU date behave. - GMT is not always the time of day in Greenwich these days. - Fix typos: "Emporer/Era", "1980's", "1970's". - - * m4/largefile.m4: Synchronized with latest version. - -Tue Feb 27 12:10:11 2001 Arnold D. Robbins - - * profile.c (pp_in_array): Change test to tree->type == Node_expression_list. - -Wed Feb 7 14:46:50 2001 Arnold D. Robbins - - * awkgram.y (LEX_FOR): Allow newline after `;' in for loops. - Per bug report from Brian Kernighan, bwk@research.bell-labs.com. - -Tue Feb 6 18:35:27 2001 Martin C. Brown - - * io.c (socket_open): Conditionalize various options based on - ifdef. Needed for BeOS port. - -Tue Feb 6 18:17:13 2001 Michal Jaegermann - - * regex.c (re_match_2_internal): Case maybe_pop_jump, for - charset and not_charset: change cast from (unsigned char) - to (unsigned). Catches last 8 chars with high bit set - if backtracking. See test/rebt8b1.awk, test/rebt8b2.awk. - -Tue Feb 6 11:20:21 2001 Arnold D. Robbins - - Have `for (iggy in foo)' save the elements and loop over them. - Make sorted for loops a dynamic test instead of a compile time test. - Still requires being Strong In The Ways Of The Source. - - * awk.h: (struct search): removed. - (assoc_scan, assoc_next): removed declarations. - * array.c (assoc_scan, assoc_next): removed functions. - * eval.c (interpret): remove Node_K_array_sorted_for. Change code - at Node_K_arrayfor. - (nodetypes): remove Node_K_array_sorted_for. - * configure.in: removed array sorting test. - * awkgram.y: removed sorted_in keyword and associated code. - -Sun Feb 4 14:57:49 2001 Arnold D. Robbins - - * eval.c (interpret): use tree->rnode->exec_count to hold count of - times if was true. - profile.c (interpret): ditto. - * main.c (pre_assign): gross hack. malloc fresh copy of assign so can - clear the '=', otherwise screws up profiling print out. - -Sun Jan 28 16:16:02 2001 Arnold D. Robbins - - Per request from Nelson Beebe, SIGHUP to pgawk dumps profile - and function call stack and exits, SIGUSR1 dumps and continues - running. - - * eval.c (dump_fcall_stack): New function, dumps awk function call - stack. - * awk.h (dump_fcall_stack): Add declaration. - (init_profiling_signals): Ditto. - * main.c (main): Call init_profiling_signals. - * profile.c (init_profiling_signals, dump_and_exit, just_dump): new - functions. - -Sun Jan 28 15:50:02 2001 Eli Zaretskii - - * io.c (gawk_popen): Restore the mode of stdin before running the - child process and switch it back if BINMODE is in effect after the - child returns. - (redirect): Restore the mode of stdin before running the child - process. - (close_redir): Switch mode of stdin back to binary if BINMODE is - in effect, after the child returns. - - * builtin.c (do_system): Restore the mode of stdin before running - the child process and switch it back if BINMODE is in effect after - the child returns. - - * awk.h (os_restore_mode): Add prototype. - -Thu Jan 18 14:03:06 2001 Arnold D. Robbins - - * custom.h, README_d/README.ultrix: Fixes for Ultrix - from Juergen Kahrs. - -Wed Jan 17 11:03:40 2001 Eli Zaretskii - - * io.c (redirect) [F_GETFL && O_APPEND]: Use binmode in the call - to fdopen. - -Mon Jan 15 16:29:52 2001 Arnold D. Robbins - - * profile.c (prec_level): Made Node_K_getline higher than < - but lower than others. Allows use of getline with redirection - inside an if. - -Wed Jan 10 15:35:06 2001 Arnold D. Robbins - - * eval.c (set_BINMODE): Rationalized string assignment. - -Sun Jan 7 15:26:16 2001 Arnold D. Robbins - - * getopt.h: Removed names in prototypes for getopt_long - and getopt_long_only, fixes problems on MINGW32. - -Thu Jan 4 10:13:46 2001 Arnold D. Robbins - - * configure.in: Add check for mcheck.h - * main.c: Include mcheck.h if have it. - (main): If TIDYMEM turned on in environment, also call mtrace(). - -Wed Jan 3 16:41:33 2001 Arnold D. Robbins - - Fixed minor memory leaks. - * re.c (re_update): When IGNORECASE changed, unref(t->re_text). - * eval.c (pop_fcall): Fix the logic to correctly free the vname - when copying array args back to their underlying source. - - Fixed massive memory leaks. - * node.c (dupnode): If PERM is set, do nothing. - (unref): Fix logic. Always turn off TEMP. Check just for MALLOC - when incrementing the stref. - * array.c (assoc_lookup): Turn off PERM also when saving subscript. - * builtin.c (sub_common): Turn off PERM also when making private copy - of string. - - Add a minor memory cleanup facility (undocumented): - * awk.h (do_tidy_mem, release_all_vars): Add declarations. - * main.c (do_tidy_mem): Add declaration. - (main): if $TIDYMEM exists, do_tidy_mem is true, and call mtrace(). - * awkgram.y (release_all_vars): New function. - -Sun Dec 31 10:47:37 2000 Arnold D. Robbins - - * awkgram.y (in_end_rule): Renamed `parsing_end_rule' to avoid - conflict with global var of same name. - -Sun Dec 24 10:36:54 2000 Eli Zaretskii - - * awkgram.y (snode): Reword the error message about the number of - arguments for a builtin, so as not to use the English `s' as a - plural suffix. - -Tue Dec 12 08:38:03 2000 Arnold D. Robbins - - * ext.c (do_ext): ifdef out use of `dummy'. Duh. - * regex.c (re_error_msgid): Revert to array of `char *' so that can - compile on K&R compilers. Fix all uses appropriately. - (re_error_msgid_idx): Removed. - -Fri Dec 8 11:47:26 2000 Arnold D. Robbins - - * ext.c (dummy): Make gcc specific via ifdef. - * builtin.c (do_dcgettext): make conditional compilation smarter. - * msg.c (warning, error, r_fatal): Finish switching back to - multi-version function header. - -Wed Dec 6 13:28:58 2000 Arnold D. Robbins - - * random.h: include to get ssize_t definition. - * awkgram.y (yyerror): Restore multi-version function header, - it seems that what ansi2knr produces doesn't quite do the - job on old compilers. - msg.c (msg): Ditto. - -Tue Dec 5 15:05:35 2000 Arnold D. Robbins - - * configure.in (AC_C_INLINE): Added macro call. - * Makefile.am (LN): Define it for install hooks. - -Sun Dec 3 17:28:53 2000 Arnold D. Robbins - - * awk.h (os_setbinmode): Declare new function. - (setmode): Remove definition: conflicts with MacOS X. - * main.c (main): Change call of setmode to os_setbindmode. - - * builtin.c (do_dcgettext): Improve ifdef for code, fixes MacOS X. - * custom.h (__APPLE__): Force definition of HAVE_MKTIME, won't - link otherwise. Harumph. - -Sun Nov 26 11:58:52 2000 Arnold D. Robbins - - * builtin.c (do_dcgettext, do_bindtextdomain): Add calls to - free_temp the various arguments. Sigh. - * io.c (yylex): nuked bstart variable, put all uses of mend variable - into TANDEM ifdef. - * main.c (load_environ): removed cp variable, value never used. - * random.c: Remvoed uses of `inline' keyword. - * Makefile.am (install-exec-hook, uninstall-local): new targets. - Adds creation of gawk-X.Y.Z and awk links, as in 3.0.x. - * configure.in (GAWK_AC_TYPE_SSIZE_T): Added. - m4/ssize_t.m4: new file. - -Wed Nov 22 14:47:18 2000 Arnold D. Robbins - - After consultation with Brian Kernighan and Michael Brennan, - nuked the abort keyword. - - * awk.h (Node_K_abort): removed. - * eval.c (aborting): removed decl. - (interpret): Removed Node_K_abort case. - * io.c (do_input): Removed checks for aborting. - * main.c (aborting): removed. - (main): Removed checks for aborting. - * profile.c (pprint): Removed Node_K_abort case. - * awk.y (LEX_ABORT): All stuff removed. - -Wed Nov 22 10:45:57 2000 Arnold D. Robbins - - * ext.c (dummy): Move inside #ifdef DYNAMIC. Helps on - PCs and other platforms that don't do dynamic loading. - * awk.h (RED_TCP): New flag, means use shutdown. - io.c (redflags2str): Add RED_TCP. - (SHUT_RD, SHUT_WR, SHUT_RDWR): Add conditional defines. - (redirect): Add RED_TCP to tflag if appropriate. Add more - #ifdef HAVE_SOCKETS as needed. - (close_redir): If RED_TCP set, shutdown(2) on each end of the socket. - -Tue Nov 21 16:25:41 2000 Arnold D. Robbins - - * awk.y: for (iggy in foo) loops: add test that index - in delete statement is a simple variable. - -Tue Nov 14 16:11:39 2000 Arnold D. Robbins - - * awk.h: Add appropriate conditional versions of the gettext - functions if we don't have or if ENABLE_NLS - is not defined or zero. - * configure.in: Add check for libintl.h header. - - From Scott Deifik for PCs. - * awk.h (lintwarn): Call set_loc unconditionally, makes - compilation work on PCs. - * builtin.c (do_dcgettext): Compile out cat_tab and code - if not ENABLE_NLS. - * ext.c: for MSC, no long long variable. - * random.c: use clock() instead of gettimeofday(). - * builtin.c: Fixed prototypes for new random functions (ADR). - -Sun Nov 12 17:45:44 2000 Arnold D. Robbins - - * builtin.c (parse_next_arg): Fix call to >= num_args so - running out of args check is correct, instead of core dumping. - (format_tree): Save and restore `the_args' and `args_size' - if a nested call is in progress, see explanatory comment. - See also tests/addcomma. - * Makefile.am: Fix things so that gawk/pgawk built first, - even if `make check' called before make. Add some - commentary. - -Wed Nov 8 14:39:20 2000 Arnold D. Robbins - - * configure.in: Only add -rdynamic for linux. - * dfa.h, dfa.c: upgraded to versions in grep 2.4.2. - -Tue Nov 7 18:17:17 2000 Arnold D. Robbins - - * All: Switched to ANSI function headers and added - `ansi2knr' automake option. Really cool. - -Tue Nov 7 16:57:49 2000 Arnold D. Robbins - - * io.c (redirect): Check for O_APPEND in flags when doing - fdopen() of /dev/fd/N. Thanks to bug report from - "John H. DuBois III" . - -Tue Nov 7 14:09:14 2000 Arnold D. Robbins - - * awk.h (os_is_setuid): declare function. - * main.c (main): call it if do_lint and warn if true. - * awkgram.y (tokentab): - - Made sure all extensions are actually marked as such. Ouch. - - Changed "sort" to "asort". Potential to break too much old code. - * getopt.h, getopt.c, getopt1.c: replaced with current versions - from glibc CVS archive. - -Mon Nov 6 18:14:33 2000 Arnold D. Robbins - - * random.c: Replaced with recent version from FreeBSD. - -Mon Nov 6 15:37:12 2000 Arnold D. Robbins - - Major simplification of automake machinery. - - * configure.in: - - INSTALL is forced only if not provided in environment - - lots of Makefile.in files removed since move to automake 1.4a - * Makefile.am, */Makefile.am: Moved directories that don't need - the automake machinery into EXTRA_DIST as appropriate and - removed the Makefile{,.am,.in} files as needed. - * eval_p.c, profile_p.c: New files to make it easier with automake - to compile pgawk. - -Tue Oct 24 12:20:18 2000 Arnold D. Robbins - - * awkgram.y (valinfo, var_comp, dump_vars): New functions to dump - the list of global variables. - * awk.h: Declare dump_vars. - * main.c (optab): new option "dump-variables". - (main): Code to handle it, set the output file and then call - dump_vars() at the end. - (usage): New option added to usage message. - -Sat Oct 21 22:59:59 2000 Arnold D. Robbins - - * awkgram.y (parms_shadow): For a function, check if any - parameters shadow global variables and print a warning. - (shadow_funcs): Go through all functions and call parms_shadow(). - (isnoeffect, isassignable): Add Node_LINT and NODE_BINMODE. - * main.c (main): If do_lint, call shadow_funcs(). - * awk.h: add declaration of shadow_funcs(). - * configure.in: added m4/Makefile and awklib/eg/network/Makefile - to list of generated makefiles. - -Tue Oct 17 10:47:35 2000 Arnold D. Robbins - - * array.c (assoc_lookup): Reverted change that did dupnode of - array indices. Creates significant problems if index is - numeric value and CONVFMT changes. Added fix to set - bucket->ahname->stfmt to -1 so that force_string never recalculates - the string value, and also turned off NUM and turned on STR. - See test/arynasty.awk. - -Mon Oct 16 12:21:26 2000 Arnold D. Robbins - - * All: Cleaned up various lint warnings for consistent phrasing. - * awk.y (in_end_rule): New variable for warning about unredirected - getline. It's ok in a BEGIN, but not in an END. - -Sun Oct 15 14:14:05 2000 Arnold D. Robbins - - * field.c (set_FS): Add lint warning for FS = "". - (do_split): Ditto for 3rd arg = "". - -Fri Oct 13 09:17:04 2000 Arnold D. Robbins - - * io.c (close_redir): Clear rp->fp on all closes. Remove - rp from list if either closing both ends or both ends - have been closed separately. Add exitwarn message for - co-process. - (flush_io): Add warning message if fflush of co-process - fails. Rationalize return value to either 0 or -1. - * builtin.c (do_gensub): 3rd arg of zero generates a - warning. - (do_fflush): rationalize return value: -1 for unopen or read-only - redirection, status of fflush otherwise. - -Wed Oct 11 22:11:19 2000 Arnold D. Robbins - - * awk.y (for loop): Check that there is a body as - part of the `is it a delete statement' check. - -Thu Oct 5 11:56:42 2000 Arnold D. Robbins - - * awk.h, awkgram.y, configure.in, eval.c: enabled - `for (i in_sorted array)' loops for those who - are Strong In The Way Of The Source. So there. - -Mon Oct 2 10:09:32 2000 Arnold D. Robbins - - * io.c (do_close): make close(x) for non-open x return -1 - and update ERRNO. close(FILENAME) no longer does anything - magic; this is all for better consistency with other awks - and is more logical, anyway. - -Thu Sep 28 17:27:16 2000 Arnold D. Robbins - - * io.c (close_one): Added a lint warning if it becomes - necessary to start multiplexing fd's, per ancient suggestion - from Scott Deifik, . - -Tue Sep 26 14:41:41 2000 Arnold D. Robbins - - * profile.c: Move enum for redirection placement to top - of file, and make the value a parameter to pp_redir. - Fix all the calls. This gets `|&' right everywhere. - -Sun Sep 24 16:38:04 2000 Arnold D. Robbins - - * awk.h (freenode): set the flags straight to UNINITIALIZED. - * node.c (unref): Fix test for MALLOC|TEMP to test the - actual flags, not zero. - * builtin.c (format_tree): ala print and concat, dupnode - the temp nodes from tree_evaling the arguments. See - test/nasty2.awk. - -Mon Sep 18 10:16:58 2000 Arnold D. Robbins - - * awkgram.y (snode): Make match 3rd arg and close 2nd arg fatal - errors if --tradtional. - -Thu Sep 14 12:22:42 2000 Arnold D. Robbins - - * eval.c (update_ERRNO): Call gettext on result of strerror. - i18n rules. - -Wed Sep 13 14:56:11 2000 Arnold D. Robbins - - * eval.c (r_tree_eval): Case for Node_concat. Dupnode the - strings ala do_print to get more consistent results. - Compare gawk 3.0.6 to nawk/mawk on test/nasty.awk. - Thanks to Andrew Sumner (andrewsumner@yahoo.com) for - pointing this one out. - -Wed Sep 13 10:06:47 2000 Arnold D. Robbins - - * io.c (two_way_close_type): New enumerated type. - (close_redir): New third param of type two_way_close_type. - Add smarts to two-way case for different close types. - Only remove it from the redir list if closing is for both ends. - (gawk_pclose): Check that rp->iop != NULL before closing, - all three versions. - * awkgram.y (tokentab): Allow 2nd argument to close. - (snode): Add lint warning. - -Sun Sep 10 14:16:10 2000 Arnold D. Robbins - - * field.c (set_FIELDWIDTHS): Generate a fatal error upon - encountering a negative width. - -Sun Sep 10 10:37:35 2000 Arnold D. Robbins - - * awkgram.y (snode): If first argument to dcgettext is a - string constant and --gen-po, dump the string constant to - the .po file too. - * main.c (nostalgia): Add call to fflush(stderr). - * eval.c (r_tree_eval): Add entries for Node_LINT and for - NODE_TEXTDOMAIN. - -Thu Sep 7 10:46:20 2000 Arnold D. Robbins - - * builtin.c (do_dcgettext): Per suggestion from Ulrich Drepper, - make the awk interface: - - str = dcgettext(string [, domain [, category]]) - -Wed Sep 6 16:28:12 2000 Arnold D. Robbins - - Bring gettext features out to the awk level! - - * awk.h: Add declarations of new functions `do_dcgettext' - `do_bindtextdomain', `set_TEXTDOMAIN' and variables - `TEXTDOMAIN', `TEXTDOMAIN_node'. New NODETYPE enum - `Node_TEXTDOMAIN'. - * eval.c (nodetypes): add Node_TEXTDOMAIN at end. - (set_TEXTDOMAIN): new function. - (r_get_lhs): add case for Node_TEXTDOMAIN. - * main.c (varinit): add entry for TEXTDOMAIN. - * node.c (format_val): If INTLSTR use dcgettext of string - and TEXTDOMAIN. - * awkgram.y (tokentab): Add entries for "dcgettext" and - "bindtextdomain". - * builtin.c (do_dcgettext, do_bindtextdomain): new functions. - -Tue Sep 5 17:01:34 2000 Arnold D. Robbins - - * profile.c (pp_string_fp): Use lower case versions of - isascii and isprint to avoid printing high-bit-set - characters. Make it smarter to break strings at 70 - chars or after embedded newline, for --gen-po. - Fix the calls to it everywhere for new boolean option - to yes/no break lines. - * m4/strtod.m4: new file, defines GAWK_AC_FUNC_STRTOD_C89. - * configure.in: GAWK_AC_FUNC_STRTOD_C89 call added - * acinclude.m4: include strtod.m4. - * acconfig.h: add entry for STRTOD_NOT_C89. - Remove entries for BITOPS and NON_DEC_DATA. - * missing/missing.c: add check for STRTOD_NOT_C89, use ours - if set. - * missing/strtod.c: make smarter for input like 0x345. - * awk.h: [STRTOD_NOT_C89]: define strtod gawk_strtod to get - our version. Avoids linker weirdness. - -Mon Sep 4 09:16:43 2000 Arnold D. Robbins - - * field.c (set_record): fix from Utz-Uwe Haus - to make sure there's - always enough room in the record. - * builtin.c (nondec2awknum): Fix octal conversions to exit - when hitting a non-digit, and not go to decimal. Make - check for non-octal better. Based on bug report from - Morris_Lee@tvratings.com. - -Sun Sep 3 13:52:11 2000 Arnold D. Robbins - - * builtin.c (format_tree): Allow positional parameters for - %*.* kinds of things. - - Made octal/hex constants and strtonum on by default. Made - --enable-non-decimal-data a runtime switch `--non-decimal-data'. - - * configure.in: Removed AC_ARG_ENABLE for --enable-bitops and - --enable-non-decimal-data. - In .developing check, remove the AC_DEFINEs. - * awk.h: Decls for bitwise functions now there by default. - Add decl of `do_non_decimal_data'. - * main.c (do_non_decimal_data): new variable - (optlist): add new entry for `--non-decimal-data'. - (main): turn off `do_non_decimal_data' if `do_traditional'. - (usage): add the new option. - * node.c (r_force_number): make check for non-decimal data a - runtime check based on do_non_decimal_data. - * awkgram.y (yylex): make non-decimal constants a runtime check. - * builtin.c: remove the ifdefs around the bit functions and - nondec2awknum. - -Tue Aug 29 18:45:56 2000 Arnold D. Robbins - - * configure.in: go back to ARRAYDEBUG if .developing set. - * awkgram.y: use ARRAYDEBUG for adump(), use multiple tests - for stopme(). - -Mon Aug 28 17:09:06 2000 Arnold D. Robbins - - * field.c (do_split): Add check for first arg is null string, - if so, skip the work and return zero. - -Mon Aug 14 23:01:55 2000 Arnold D. Robbins - - * Add %COUNT$... handling to printf. - awk.h (printf_count): new define in NODE structure. - (format_tree): added decl. - awkgram.y (count_args): new function to set printf_count in - a node. - [print productions]: call the function. - (snode): for do_sprintf, call count_args, set the count - in the lnode. - builtin.c (format_tree): new fourth arg is argument count. - Add smarts to handle the `$' in a format. - (do_sprintf): use new argument to format_tree. - node.c (format_val): ditto. - -Sun Aug 13 11:10:41 2000 Arnold D. Robbins - - Changes from Alan J. Broder (ajb@woti.com): - - Array third arg to match puts subtexts into the array: - * awk.y (tokentab): "match" gets third arg, and lint warning - * builtin.c (do_match): if third arg there, fill it with subtexts - - New builtin sort function: - * awk.h (do_sort): declared. - * array.c (do_sort, dup_table, merge, merge_sort, assoc_from_list, - assoc_sort_inplace): new functions. - - * eval.c (tree_eval): in debug code, make uninitialized var - a warning, not a fatal error. Breaks too many things. - -Wed Aug 9 10:51:41 2000 Arnold D. Robbins - - * eval.c (func_call): Increment the exec_count on the - function's node; this allows printing a call count for - functions. - profile.c (pp_func): print the count for functions. - * ALL: Changed DEBUG to GAWKDEBUG in all gawk files, so that - I don't get regex/dfa debugging. In some cases, changed - memory-related stuff to MEMDEBUG. Still have work to do. - * awk.h, node.c, profile.c: removed exec_count_init variable; - code has been cleaned up to not need different values for - profiling/not profiling. - -Thu Jul 5 21:10:59 2000 Arnold D. Robbins - - * eval.c (casetable): Removed the USE_PURE_ASCII stuff; it - was never documented. Latin 1 forever. - * main.c (main): only call `init_profiling' after arg parsing - if `do_profiling' is still false. Avoids resetting `prof_fp' - back to stderr. - -2000-02-17 Akim Demaille - - * m4: New directory. - * acinclude.m4: Removed, replaced by m4/*.m4. - * Makefile.am: Adjusted. - Added ACLOCAL_AMFLAGS. - * configure.in Adjusted. - Use AC_SYS_LARGEFILE not GAWK_AC_SYS_LARGEFILE, jm_FUNC_MKTIME, - not GAWK_FUNC_MKTIME. - * acconfig.h: Removed _FILE_OFFSET_BITS, _LARGEFILE_SOURCE and - _LARGE_FILES now templated by m4/largefile.m4. - -2000-02-15 Arnold Robbins - - * MOVED TO AUTOMAKE AND GETTEXT. - Just about every file touched. Work done by Arno Peters. - -Sun Jan 2 14:48:23 2000 Arnold D. Robbins - - First edit of the new millenium! - * awk.y (yylex): if lint checking, be obnoxious about gotos. - -Mon Oct 25 19:12:02 1999 Arnold D. Robbins - - * awk.h: remove C_ALLOCA ifdef. - * main.c (main): remove C_ALLOCA code. - * io.c (do_input): ditto. - -Mon Aug 9 17:36:24 1999 Arnold D. Robbins - - * bisonfix.sed: unconditionally #undef YYSTACK_USE_ALLOCA. - * configure.in: remove all alloca and ALLOCA related stuff. - * Makefile.in: ditto - -Thu Jul 29 18:32:05 1999 Arnold D. Robbins - - * awk.h (NODE): exec_count now in #ifndef NO_PROFILING. - * Makefile.in: changes to only recompile eval.c and profile.c to a - special version for profiling. - * custom.h [MSC_VER]: turn on NO_PROFILING to omit the exec_count - and save space. - * node.c (more_nodes): move setting of exec_count to - #ifndef NO_PROFILING. - -Thu Jul 1 12:12:05 1999 Arnold D. Robbins - - * configure.in (AC_PREREQ): update to 2.13. - GAWK_AC_C_STRINGIZE: convert to AC_C_STRINGIZE. - * aclocal.m4 (GAWK_AC_C_STRINGIZE): remove definition, now - part of autoconf. - * acconfig.h (HAVE_STRINGIZE): ditto. - -Wed Apr 28 11:08:05 1999 Arnold D. Robbins - - * array.c (assoc_lookup): fix call to free_temp(subs) to after - last use of subs. - -Sun Apr 25 16:48:06 1999 Arnold D. Robbins - - * io.c (redirect): add lint warning when same file is used for - > and >>. - -Thu Apr 22 15:05:30 1999 Arnold D. Robbins - - * array.c (assoc_lookup): Fix call to fatal to lintwarn instead. - * node.c (r_force_number): Use `0 &&' to disable warnings about - conversions: they're overzealous, methinks. - -Thu Apr 8 14:27:58 1999 Arnold D. Robbins - - New features for profiling: - * awk.h (NODE): add `exec_count' member. - (freenode): clear `exec_count' upon free. - * awk.y (func_count): new variable, counts total number of functions. - (func_install): increment func_count. - (struct finfo): information for use in sorting functions when - pretty printing. - (fcompare): compare two finfo structures. - (dump_funcs): print the functions in sorted order for profiling. - (getfname): return the name of a builtin function. - * eval.c (INCREMENT): new macro for counting execution of nodes. - (interpret): call INCREMENT() appropriately. - * main.c (do_profiling): new flag if doing profiling. - `--profiling': new option added to getopt_long machinery. - (main): For profiled version, set do_profile and output file. - Call `dump_prog' and `dump_funcs' if do_profiling at end. - (usage): add new argument. - * node.c (more_nodes, freenode): set exec_count to zero. - * profile.c: new file, does pretty printing and prints counts. - * Makefile.in: update to create two versions of gawk, regular - and `pgawk' which does profiling. - -Wed Mar 10 21:38:14 1999 Arnold D. Robbins - - * io.c (close_redir): use update_ERRNO() instead of manually - doing it. - -Mon Dec 21 15:58:21 1998 Arnold D. Robbins - - * configure.in: add BeOS to list of cases where we hardwire - GETPGRP_VOID. - custom.h: remove the #define from __be_os case. Cleaner to - do it all in configure. Based on email from Martin C. Brown, - mc@whoever.com. - -Mon Nov 30 20:52:52 1998 Arnold D. Robbins - - * eval.c (update_ERRNO): new function, mainly for use by - extension functions. - * awk.h: add decl. - -Tue Nov 24 18:13:29 1998 Arnold D. Robbins - - * Changes based on submission from Christos Zoulas at D.E. Shaw - that adds the following features: - - checking for use of uninitialized variables - - checking if a string that's not a number converts to 0 - - ability to load a dynamic library to add built-ins - - VERSION variable (may or may not stay) - Additional change: - - --lint=fatal makes lint errors become fatal - - LINT="fatal" has the same effect, any other positive - value makes lint errors be just warnings - * Makefile.in (includedir): new variable for gawk header files - (ext.c, ext.o): new source and object files - (OTHERS, extension): new directory for macro with example extension - (install): install header files - * acconfig.h (DYNAMIC): new macro, true if can do dynamic loading - * array.c (assoc_lookup): new parameter `reference' is true if we - want to do reference checking. Add appropriate reference checking - code. - * awk.h (UNITITIALIZED): new flag - (lintfunc): function pointer for correct function to use - (lintwarn): new macro to produce warnings - (result): new macro for func call result, used in commented out - code in eval.c. - (getnode, freenode): revised to set UNINITIALIZED. - (get_lhs): third arg for reference checking, change all calls - -- Add appropriate decls of new/changed functions - * awk.y (tokentab): new builtin "extension" for adding extensions - (node_common): set flags to UNINITIALIZED for Node_var. - * configure.in (dynamic linking): new check. Probably should - be a separate macro. - * eval.c (flag2str): add UNINITIALIZED to the table. - (r_tree_eval): add checks for UNINITIALIZED. - (push_args): appropriate changes for UNINITIALIZED to work. - (r_get_lhs): new third argument for reference checking. - (set_LINT): add code to handle setting `lintfunc' appropriately. - * ext.c: new file, for doing dynamic library extensions. - * extension/*: new directory with simple example code. - * main.c (VERSION_node, EXTENSION_node): new nodes for new vars. - (optab): change for "lint" to allow optional argument. - (lintfunc): definition. - (main): add case in option processing for --lint. - (varinit): add entries for VERSION and EXTENSION. - * node.c (r_force_number): checks that string really is a number. - (morenodes): set UNITIALIZED in the flags. - * re.c (all): change `result' to `res' globally to avoid conflict - with new macro. - * GLOBAL: change lint calls to warning() to lintwarn(). - * GLOBAL: change all calls to get_lhs() to have 3rd arg. - * GLOBAL: change all calls to assoc_lookup() to have 3rd arg. - -Sun Nov 22 17:07:39 1998 Arnold D. Robbins - - * patchlev.h: renamed from patchlevel.h to make life - easier for the PC guys. - (main.c): changed to include patchlev.h. - (Makefile.in): changed to ref patchlev.h where needed. - -Sat Nov 7 21:29:52 1998 Arnold D. Robbins - - * eval.c (r_get_lhs): case Node_field_spec. Fix the lint - warnings for field reference of null string or non-numeric value. - When turned on, $0 generated a warning! Oops. - -Thu Nov 5 16:58:38 1998 Arnold D. Robbins - - * main.c (init_fds): new function to pre-open 0, 1, and 2 on - /dev/null if they're not open. Robustness, more or less. - (main): call init_fds. - * io.c (str2mode): add smarts for two-letter strings - such as "rw", "r+", "wr", "w+" and "a+". - -Mon Nov 2 16:55:46 1998 Arnold D. Robbins - - * builtin.c (do_*): added lint checks for non-numeric - and/or non-string arguments, as appropriate. This should - have been done long ago. - -Tue Oct 20 21:56:06 1998 Arnold D. Robbins - - * awk.h (LINT_node): new variable for LINT special var - (Node_LINT): new node type. - (set_LINT): declare function. - * main.c (varinit): add LINT variable. - (usage): print an emphatic pointer to the manual for bug reports. - * eval.c (nodetypes): new entry for Node_LINT. - (r_get_lhs): case added for Node_LINT. - (set_LINT): set do_lint from LINT variable. - -Mon Oct 19 22:35:46 1998 Arnold D. Robbins - - * configure.in: for GCC, add -Wall to get warnings for development. - * Makefile.in (awktab.c): move sed stuff to separate script. - * bisonfix.sed: new script, with old fix and Solaris x86 fix. - * awk.h (nodetype2str): add declaration. - (load_procinfo): add declaration. - -Tue Oct 13 22:28:56 1998 Arnold D. Robbins - - * Changes to make PROCINFO["FS"] reflect the use of FIELDWIDTHS or FS. - eval.c (assign_val): new function that does the mechanics of - assignment - main.c (load_procinfo): add setting of PROCINFO["FS"] to "FS". - field.c (update_PROCINFO): new function to update the array. - (set_FS): call update_PROCINFO. - (set_FIELDWIDTHS): ditto. - -Sun Sep 27 10:18:05 1998 Arnold D. Robbins - - * awk.h (reisstring): new prototype. - * re.c (reisstring): new function, returns true if the re did - a simple string match. This is rather simplistic in its logic. - * io.c (get_a_record): in the case that RS is a regexp, AND - the re matched at the exact end of the buffer, add a call to - `reisstring' in case it's a simple string match. If so, we - don't need to read more into the buffer because we don't - have a regex like `x.*y' that might extend longer. - This should be very helpful for interactive /inet clients - where something like `RS = "\r\n"' happens. - -Thu Aug 13 22:07:40 1998 Arnold D. Robbins - - * io.c (socketopen): fixes from Juergen Kahrs to socket - opening code for "any host". - -Tue Jul 14 19:02:33 1998 Arnold D. Robbins - - * aclocal.m4 (GAWK_AC_LIB_SOCKETS): removed the caching; - configure gave different results the second time it was run! - -Fri Jul 10 09:11:06 1998 Arnold D. Robbins - - * eval.c (interpret): minor cleanups: add variable name to - fatal error Node_K_array_for and other minor changes. - -Mon Jun 22 16:53:34 1998 Arnold D. Robbins - - * Makefile.in (tags, TAGS): add $(LIBSRC). - -Tue Jun 2 15:23:05 1998 Arnold D. Robbins - - * io.c (devopen): relax previous change, don't require "any", - just that a port be there. The user can put 0 if they - don't care. - -Wed May 27 21:33:45 1998 Arnold D. Robbins - - * io.c (devopen): for /inet, require that local and remote - ports and the remote hostname be there, and that `any' - be used for a port if they don't care. - -Thu May 21 14:13:46 1998 Arnold D. Robbins - - * node.c (parse_escape): Add warning that is always on - for \q for any unknown q inside string or regex constant. - I got bit by this myself once too often. Or else I'm - just getting old and senile. - -Mon May 4 12:42:49 1998 Arnold D. Robbins - - * awk.h (NODETYPE): Sorted the Node_xxx entries for the - builtin variables. Gotta look nice, don't we? - * eval.c (nodetypes): ditto. - (genflags2str): added code to check that we don't - overflow the static buffer. This is just a debugging - routine, not worth the hassle of dynamic allocation. - -Mon Mar 2 16:06:16 1998 Arnold D. Robbins - - * Makefile.in (dist): remove any embedded copied RCS or CVS - directories. - -Mon Feb 23 00:09:52 1998 Arnold D. Robbins - - * awk.h (genflags2str): add declaration. - * eval.c (genflags2str): new function. - (flags2str): use new general purpose function. - * io.c (redflags2str): same. - -Sun Feb 22 23:57:29 1998 Arnold D. Robbins - - Significant changes to add two-way i/o and sockets!!! - - * Makefile.in: add @SOCKET_LIBS@ to LIBS variable. - * acconfig.h: add HAVE_SOCKETS and HAVE_PORTALS defs. - * aclocal.m4: new macro GAWK_AC_LIB_SOCKETS. - * awk.h: new node type, Node_redirect_twoway, and new redirection - flags: RED_TWOWAY, and RED_SOCKET. - * awk.y (parser): add TWOWAYIO token and appropriate productions. - (yylex): recognize `|&' token if not traditional. - * builtin.c (do_print, do_printf): flush buffer if TWOWAYIO. - * configure.in: add header checks for networking header files, - add --enable-portals switch, call GAWK_AC_LIB_SOCKETS - * eval.c (nodetypes): add string constant for Node_redirect_twoway. - * io.c (redflags2str): new function. - (redirect): better error message in default case, add code for - Node_redirect_twoway. - (socketopen): new function. - (iop_open, devopen): add recognition of `/inet/...'. - (two_way_open): new function. - -Sat Dec 13 21:15:07 1997 Arnold D. Robbins - - * awk.h (struct node): new member, `param_list' in union `x', becomes - `node->parmlist' in the code. - * awk.y (func_install): rearranged a bit, to build up a list of - the function parameter names and to save it in the `parmlist' field. - * eval.c (push_args): new parameter, `varnames', which is the list - of variable names. Use this to set the vname field of each - parameter's value as it's created. Special case arrays to include - where they came from, mainly for array vs. scalar diagnostics. - (r_tree_eval): don't set the `vname' field for parameters. - (pop_fcall): free the `vname' field if it's an array. - (func_call): pass in the `parmlist' field to call of push_args(). - (r_get_lhs): for Node_subscript, change error message to use - the `vname' field. - (stopme): new do-nothing function for use with debugging code - and setting breakpoints. - -Thu Dec 4 15:18:17 1997 Arnold D. Robbins - - * awk.y: fixed several lint checks and moved some into - test for do_lint_old. - * eval.c (fmt_index): add value of bad format spec to - error message. - -Tue Nov 18 22:19:02 1997 Arnold D. Robbins - - * Makefile.in (install): strip the installed binary. - From Anatoly A. Orehovsky (tolik@mpeks.tomsk.su). - -Sun Nov 16 22:12:39 1997 Arnold D. Robbins - - * array.c (in_array, assoc_lookup): add symbol->vname to - fatal calls for scalar in array context. - -Wed Nov 12 22:18:33 1997 Arnold D. Robbins - - * awk.h [ISASCII]: on all IS* macros, add cast to unsigned char. - [TOUPPER, TOLOWER]: new macros using unsigned char. - * awk.y: change to use of IS* vs. is* macros. - * builtin.c (nondec2awknum): change to use of IS* vs. is* macros, - change casts for casetable[] from int to unsigned char. - use new TOLOWER, TOUPPER macros - * dfa.c [ISASCII]: on all IS* macros, add cast to unsigned char. - (lex): change isdigit to ISDIGIT. - [TOUPPER, TOLOWER]: new macros using unsigned char, now used. - * eval.c (fmt_ok): change to use of IS* vs. is* macros. - * field.c (sc_parse_field): change to use of IS* vs. is* macros, - change casts for casetable[] from int to unsigned char. - (set_FS): change to use of IS* vs. is* macros. - * io.c (get_a_record): change to use of IS* vs. is* macros, - change casts for casetable[] from int to unsigned char. - * main.c (main): change to use of IS* vs. is* macros. - * node.c (r_force_number, parse_escape): change to use of IS* vs. - is* macros. - * re.c (make_regexp): change to use of IS* vs. is* macros. - * regex.c [ISASCII]: on all IS* macros, add cast to unsigned char. - -Sun Oct 19 12:36:47 1997 Arnold D. Robbins - - * ALL: Change email address to arnold@gnu.org in all relevant places. - -Wed Oct 15 03:38:12 1997 Arnold D. Robbins - - * awk.y (yylex): Don't allow newlines after ? or : if do_posix. - -Thu Oct 9 19:28:39 1997 Arnold D. Robbins - - * custom.h [SEQUENT]: removed; not needed any more since the - mmap code was ripped out. - -Wed Oct 8 17:22:03 1997 Arnold D. Robbins - - * configure.in: remove check for madvise; don't need it any more - after nuking use of mmap. - -Tue Oct 7 11:14:21 1997 Arnold D. Robbins - - * eval.c (flags2str): made the code table driven. Shortened a lot. - -Tue Sep 30 20:59:17 1997 Arnold D. Robbins - - * eval.c (r_get_lhs): case Node_field_spec. Add lint warnings - for field reference of null string or non-numeric value. - Based on patch submitted by Alan Broder, ajb@dtmr.com. - -Wed Sep 24 20:47:59 1997 Arnold D. Robbins - - * custom.h [TANDEM]: new changes. Finishes up Tandem - integration. - -Mon Sep 22 00:42:34 1997 Arnold D. Robbins - - * custom.h [__be_os]: remove BROKEN_TOKEN definition. - dfa.c, dfa.h: change `token' to `dfa_token' to avoid BeOS - compile problems. - -Thu Aug 7 22:35:17 1997 Arnold D. Robbins - - * Changes for BeOS from mc@whoever.com - awk.h (strncasecmp): bracket prototype. - custom.h [__be_os]: new stuff. - dfa.h, dfa.c [BROKEN_TOK]: new ifdefs to use dfa_token, not token. - -Fri Aug 1 13:32:49 1997 Arnold D. Robbins - - * Tandem changes: - awk.h [TANDEM]: misc additions, as needed. - io.c (get_a_record): changes for fixed length records; not used - on other systems. - main.c (MRL): new variable, TANDEM specific. - (main): update handling -mr option for TANDEM. - (load_environ): comment out whole routine if TANDEM. - missing.c [TANDEM]: new includes. - gawkmisc.c [TANDEM]: include `tmiscc'. - -Wed Jul 30 19:53:52 1997 Arnold D. Robbins - - * Close-on-exec changes: - awk.h: (os_close_on_exec, os_isdir): new functions. - gawkmisc.c: add include fcntl.h. - configure.in [AC_CHECK_HEADERS]: add fcntl.h. - io.c (devopen, iop_open): change to use os_isdir(), not S_IFDIR(). - (redirect, devopen, iop_open, gawk_popen): change all calls to - fcntl() to os_close_on_exec(). - -Tue Jul 29 11:09:45 1997 Arnold D. Robbins - - * eval.c (set_BINMODE): fixed check for digits to use isdigit() - instead of looping over digits and using strchr(). Duh. - -Sat Jul 26 22:52:08 1997 Arnold D. Robbins - - * eval.c (set_BINMODE): fix so that `-v BINMODE=w' works. - * node.c (r_force_number): add decl of strtod(); makes things - work on MIPS. - * Makefile.in (install-strip): new target. - -Fri Jul 18 13:28:05 1997 Arnold D. Robbins - - * io.c (redirect, devopen, iop_open, gawk_popen): set the - close-on-exec flag on all files and pipes opened for I/O. - Keeps children run via system() or other pipes from running out - of file descriptors. - - (Reported by Kenny McCormack, gazelle@yin.interaccess.com.) - -Tue Jul 8 22:18:00 1997 Arnold D. Robbins - - * awk.y [LEX_NEXT]: Removed support for `next file' as two words. - -Tue Jul 8 06:46:32 1997 Arnold D. Robbins - - * dfa.c: changes from pjr@jet.UK (Paul J Rippin) from an old - bug report against 2.14.0 that speed up initialization and - rewrite the inner loop into readable code. - -Thu Jul 3 11:44:50 1997 Arnold D. Robbins - - * Atari support moved into new `unsupported' directory. - awk.h, Makefile.in, gawkmisc.c, and missing.c modified. - -Sun Jun 29 14:17:37 1997 Arnold D. Robbins - - * awk.y (exp): fixed warning about `x = /foo/'. - -Wed Jun 25 09:07:57 1997 Arnold D. Robbins - - * PORTS: removed from distribution. - * Makefile.in (MISC): removed PORTS. - -Sun Jun 22 11:52:57 1997 Arnold D. Robbins - - * BINMODE changes - awk.h (Node_BINMODE): added. - (struct redirect): added mode field to save for io.c:close_one(). - (BINMODE, BINMODE_node, set_BINMODE): add declarations. - awk.y (isnoeffect): add Node_BINMODE. - eval.c (nodetypes): add Node_BINMODE string. - (r_tree_eval, r_get_lhs): add cases for Node_BINMODE. - (set_BINMODE): new function. - io.c (binmode): new function. - (nextfile, redirect, gawk_popen): add calls to binmode(). - main.c (BINMODE, BINMODE_node): add decls. - (main): add call to setmode() if BINMODE is set. - (varinit): add entry for BINMODE. - -Wed Jun 4 21:52:25 1997 Arnold D. Robbins - - * configure.in [AC_FUNC_MMAP]: removed call. - * awk.h [struct iobuf]: removed IOP_MMAPED flag and `getrec' member. - * io.c: removed all mmap related code. - -Sun Apr 27 16:23:56 1997 Arnold D. Robbins - - * aclocal.m4 [GAWK_AC_FUNC_MKTIME]: new macro. - * configure.in (GAWK_AC_FUNC_MKTIME): call it. - -Thu Apr 24 23:25:06 1997 Arnold D. Robbins - - * io.c (devopen): remove stat test for /dev/foo files. Finally. - -Fri Jul 26 09:23:15 1996 Arnold D. Robbins - - * Changes to add an abort statement, a la tawk - awk.h (Node_K_abort): new enum value for NODETYPE. - main.c (aborting): new flag variable. - (main): add logic to handle aborting. - eval.c (interpret): add case for Node_K_abort. - io.c (do_input): if aborting, break loop. - awk.y (tokentab): add entry for "abort" keyword - (PRODUCTIONS): add production for LEX_ABORT. - -Wed Jul 24 12:49:52 1996 Arnold D. Robbins - - * First cut at changes for i18n. - awk.h (do_intl): declare new flag variable. - [INTLSTR]: new flag def. - (m_tree_eval): fix definitions for INTLSTR. - (force_string): fix definitions for INTLSTR. - awk.y (yylex): add _"..." for international strings. - (dumpintlstr): new function. - main.c (do_intl): define new flag variable. - (optab): add "gen-po" entry. - (main): if do_intl, exit, don't run the program. - (gawkoption): add "gen-po" entry. - node.c (r_force_string): call gettext if flags indicate INTLSTR. - -Thu Mar 14 06:29:42 1996 Arnold D. Robbins - - * awk.h (do_mktime): added declaration of new function. - * builtin.c (do_mktime): new function. - * awk.y (tokentab): added "mktime" to list of gawk extensions. - * missing.c [HAVE_MKTIME]: added include of mktime.c if needed. - -Mon Feb 26 22:32:19 1996 Arnold D. Robbins - - * io.c (pidopen, useropen): added warnings to use PROCINFO[], - not special files. - * main.c (load_procinfo): new function. - * awk.y (variable): added call to load_procinfo() function. - -Mon Aug 7 15:23:00 2000 Arnold D. Robbins - - * Release 3.0.6: Release tar file made. - -Thu Aug 3 17:47:53 2000 Greg McGary - - * regex.c: patches for gcc bounded pointer handling. - -Thu Aug 3 13:09:09 2000 Arnold D. Robbins - - * array.c (in_array, do_delete): fix tests for index equality - when searching through the array to work correctly when - index is "". - -Fri Jul 14 21:40:17 2000 Pat Rankin - - * builtin.c (format_tree): Workaround a DEC C V5.7 bug by - splitting `strcpy() + 3' into two expressions (the builtin - inline strcpy evidently has erroneous return type of void * - instead of char *; reputedly fixed in V6.1). - - * eval.c (C): New macro. - [casetable]: Use it to add explicit casts for the character - values outside the range of 0 to 127. - * missing/strncasecmp.c [C, charmap]: Likewise. - - * io.c (redirect): Add EIO check on failed open for VMS. - -Fri Jul 14 11:57:23 2000 Arnold D. Robbins - - Efficiency hack: turn `for (iggy in foo) delete foo[iggy]' - into moral equivalent of `delete foo'. - * array.c (do_delete_loop): new routine. - * awk.h [NODETYPE]: new Node_K_delete_loop value. - Add declaration of do_delete_loop. - * awk.y [LEX_FOR]: Fix code to recognize special case. - * eval.c (nodetypes): new entry for Node_K_delete_loop. - (interpret): add case for Node_K_delete_loop, add more - diagnostic info in default (cant_happen) case. - -Tue Jul 11 22:15:10 2000 Pat Rankin - - * awk.y (nextc): Recast unsigned char values back to int to - prevent VAX C from truncating EOF to 255. - -Tue Jul 11 14:08:23 2000 Arnold D. Robbins - - * array.c (do_delete): switch to string comparison, not - cmp_nodes. - (assoc_find): add call to force_string on subscript. - * eval.c (interpret): Case Node_K_arrayfor: check for - Node_array_ref and fetch original_array. Yowser. - -Fri Jun 30 21:57:00 2000 Arnold D. Robbins - - * array.c (assoc_lookup): Don't force the subscript - to be a string. Not a good idea after the change - to using dupnode. - -Sun Jun 25 15:08:19 2000 Arnold D. Robbins - - * Release 3.0.5: Release tar file made. - -Wed Jun 14 13:03:45 2000 Arnold D. Robbins - - * field.c (set_record): manage a private buffer for $0. - Keeps things safe in case `getline var' rearranges the - IOBUF's contents that $0 is still pointing into. - -Tue Jun 13 16:27:55 2000 Paul Eggert - - Upgrade to latest and greatest version of largefile code. - - * configure.in (AC_CANONICAL_HOST): Remove. - (GAWK_AC_SYS_LARGEFILE): Defer until after AC_MINIX, - to avoid autoconf warnings. - - Rewrite largefile configuration so that we don't need to run - getconf and don't need AC_CANONICAL_HOST. - * config.guess, config.sub: Remove these files. - * Makefile.in (MISC): Remove config.guess, config.sub. - * m4/largefile.m4 (GAWK_AC_SYS_LARGEFILE_FLAGS, - GAWK_AC_SYS_LARGEFILE_SPACE_APPEND): Remove. - (GAWK_AC_SYS_LARGEFILE_TEST_INCLUDES): New macro. - (GAWK_AC_SYS_LARGEFILE_MACRO_VALUE): Change arguments from - CODE-TO-SET-DEFAULT to VALUE, INCLUDES, FUNCTION-BODY. - All uses changed. - Instead of inspecting the output of getconf, try to compile the - test program without and with the macro definition. - (GAWK_AC_SYS_LARGEFILE): Do not require AC_CANONICAL_HOST or check - for getconf. Instead, check for the needed flags by compiling - test programs. - - (GAWK_AC_SYS_LARGEFILE): Define _XOPEN_SOURCE to be 500 to - work around glibc 2.1.3 bug. - - (GAWK_AC_SYS_LARGEFILE_FLAGS): Don't use -n32 on IRIX if the - installer said otherwise. - - (GAWK_AC_SYS_LARGEFILE_FLAGS): Work around a bug in the QNX shell, - which doesn't propagate exit status of failed commands inside - shell assignments. - -Wed Jun 7 13:23:09 2000 Arnold D. Robbins - - * Updated copyright dates in appropriate files. - -Mon May 22 17:29:43 2000 Arnold D. Robbins - - * Makefile.in (clean): get `*/core' too. - -Sun May 7 16:33:05 2000 Arnold D. Robbins - - * array.c (concat_exp): Change ref to `lnode->stlen' and - `lnode->stptr' for SUBSEP to use `var_value->...'. - -Tue May 2 09:54:29 2000 Arnold D. Robbins - - Fix referencing freed memory as shown by test/arynocls.* tests. - * awk.h [Node_array_ref]: new node type. - [orig_array]: new macro element in NODE structure. - * field.c (do_split): handle case for Node_array_ref, fetch - the original array. - * array.c (in_array, do_delete): ditto. - * eval.c (nodetypes[]): add Node_array_ref string. - (r_tree_eval): handle case for Node_array_ref. - (push_args): push arrays as Node_array_ref, and pass them on. - (pop_fcall): don't unref lnode if it's an array when releasing - local arguments. Check for both Node_array and Node_array_ref. - (r_get_lhs): choke on Node_array_ref as for Node_array. - For Node_subscript, handle Node_array_ref. - -Tue May 2 09:52:12 2000 Bruno Haible - - * io.c (redirect): After reopening a `struct redirect', move it to - the head of the list. - -Sun Apr 2 17:51:40 2000 Arnold D. Robbins - - * re.c (re_update): Check if IGNORECASE has changed, and - if so recompute the re. See test/igncdym.awk. - -Mon Mar 20 16:18:34 2000 Arnold D. Robbins - - * io.c (set_RS): Added a lint warning about multicharacter RS, - per suggestion from Akim DeMaille (akim@epita.fr). - -Sun Feb 13 14:40:32 2000 Arnold D. Robbins - - * eval.c (push_args): Fix from Nide Naoyuki , - re-assign `f' in case tree_eval moved fcall_list around. - -Sun Feb 6 11:39:33 2000 Arnold D. Robbins - - * eval.c (op_assign): Fix it right. For ++ and --, get the lhs - in the operations, do the op, and then return. For += etc, - get the rhs FIRST, since the lhs can move around as a result, - *then* get the lhs and do the operation. See test/opasnidx.awk. - -Tue Feb 1 18:41:40 2000 Arnold D. Robbins - - * eval.c (op_assign): reget the rval after regetting - the left hand side. See test/opasnslf.awk for why. - -Thu Jan 27 18:06:31 2000 Arnold D. Robbins - - * awk.y (yylex): Made ']' not one of the characters - that sets `want_assign' to false. `a[i] /= 2' was - broken. Per bug report from Kristofer T. Karas - . - -Wed Dec 22 15:06:37 1999 Arnold D. Robbins - - * awk.y: Removed declarations of functions before - definition of `tokentab[]'. They're redundant with - what's in awk.h. - -Thu Dec 9 17:01:07 1999 Arnold D. Robbins - - * node.c (parse_escape): Add lint warning for unrecognized - escape sequences. - -Mon Dec 6 15:17:34 1999 Arnold D. Robbins - - * main.c (usage): Changed bug reporting email addresses to - be a reference to `Bugs' node in the online and printed - doc, instead. - -Thu Dec 2 13:08:18 1999 Arnold D. Robbins - - * builtin.c (do_compl): test `d' for negative inside the do_lint - test, not uval. Ooops. - -Fri Nov 26 10:58:36 1999 Arnold D. Robbins - - * array.c (assoc_find): ALWAYS compare indexes as strings, - don't use cmp_nodes in case they are numeric. Oh my. - Talk about a Day 1 bug! - -Tue Nov 23 11:58:53 1999 Arnold D. Robbins - - * regex.c (SYNTAX): cast argument to `unsigned char' instead of - &-ing with 0xFF. Hopefully somewhat more portable, ala 21 Nov 99 - changes to awk.y. - -Sun Nov 21 22:25:27 1999 Paul Eggert - - * aclocal.m4 (AC_SYS_LARGEFILE_FLAGS): Work around a - problem with the QNX 4.25 shell, which doesn't propagate exit - status of failed commands inside shell assignments. - -Sun Nov 21 20:33:35 1999 Arnold D. Robbins - - * awk.h (nextc): remove declaration, don't need it here. - awk.y (nextc): Cast values to unsigned char so that latin-1 - characters in strings don't turn themselves into EOF. - Most notably y-umlaut, which is decimal 255. - -Mon Nov 1 20:00:25 1999 Arnold D. Robbins - - * regex.c (init_syntax_once): move below definition of - ISALNUM etc., then use ISALNUM to init the table, so that - the word ops will work if i18n'ed. - (SYNTAX): And subscript with 0xFF for Latin-1 characters. - -Mon Oct 25 18:37:13 1999 Arnold D. Robbins - - * awk.h, main.c, io.c: undo previous changes (22 Oct 1999). - * main.c (main): move call to `init_fields()' to before - arg parsing. This allows `-v NF=blah' to work ok. - -Fri Oct 22 17:43:40 1999 Arnold D. Robbins - - * main.c (arg_assign): Add new arg, `initing' for icky special - casing of -v of special variables. Use it to check for NF. - May need to add other cases later. - (pre_assign): change call arg_assign, passing initing=TRUE; - io.c (nextfile): change call arg_assign, passing initing=FALSE; - awk.h: Change prototype for arg_assign. - -Tue Oct 19 16:06:48 1999 Paul Eggert - - * io.c (close_redir): Don't munge errno between setting it and - using it. - -Wed Oct 6 17:47:47 1999 Arnold D. Robbins - - * main.c (arg_assign): return NULL on bad variable. Allows - things like `./3x=stuff' to work as a filename. - -Thu Sep 23 21:35:46 1999 Paul Eggert - - * aclocal.m4 (GAWK_AC_SYS_LARGEFILE_FLAGS): Work around GCC - 2.95.1 bug in HP-UX 10.20 or later. (Had to fix the fix. ADR. :-) - -Tue Sep 21 13:31:36 1999 Arnold D. Robbins - - * builtin.c (format_tree): For '0', only set zero_flag if we - haven't seen the field width or precision yet. - -Mon Aug 9 13:06:01 1999 Arnold D. Robbins - - * array.c (assoc_lookup): Removed code that gave each array - a private copy of each index. Balloons memory usage for - no good reason that I can see. Just use dupnode in all - cases. - * configure.in: check for $srcdir/.developing adds extra - defines for my testing/debugging use. Yes, hack alert. - -Sun Aug 1 11:02:02 1999 Arnold D. Robbins - - * node.c (dupnode): turn off FIELD when copying nodes. - * array.c (do_adump, assoc_dump): new functions for array debugging. - * awk.y (tokentab): conditionally add "adump" function for debugging. - * awk.h: delcare new functions. - -Thu Jul 29 23:26:40 1999 Arnold D. Robbins - - From wsanchez@apple.com: - * Makefile.in (install-strip): new target, coding stds. compatibility. - * config.guess, config.sub: Add MacOS X recognition. - -Thu Jul 29 19:09:19 1999 Arnold D. Robbins - - * awk.y (func_install): make `function foo(foo)' a fatal error. - eval.c (r_tree_eval): diagnose use of a function name as a - variable inside the function. - -Sun Jul 4 16:53:14 1999 Arnold D. Robbins - - * eval.c (eval_condition): add extra braces to avoid - gcc warning. I'm not going to bother for the library - code like dfa and regex. - -Wed Jun 30 16:14:36 1999 Arnold D. Robbins - - * Release 3.0.4: Release tar file made. This time for sure. - -Wed Jun 30 16:10:11 1999 Arnold D. Robbins - - * awk.h: add include of , and comment about config.h - having to be included before any system headers. Otherwise, - with egcs-2.91.66 and later on Linux systems, and possibly - others, things break badly, due to the LFS macros. - * awk.y, builtin.c, eval.c, field.c, io.c: removed include - of assert.h - -Wed Jun 9 11:39:19 1999 Paul Eggert - - Port the large-file code to AIX, HP-UX, and IRIX. - Add cross-compilation support for large files. - - * config.guess, config.sub: New files. - - * configure.in (AC_CANONICAL_HOST): - Add; GAWK_AC_SYS_LARGEFILE needs this. - (GAWK_AC_SYS_LARGEFILE): Renamed from GAWK_AC_LARGE_FILES. - - * aclocal.m4 (GAWK_AC_SYS_LARGEFILE): Renamed from GAWK_AC_LARGE_FILES. - Add support for AIX and HP-UX. - (GAWK_AC_SYS_LARGEFILE_FLAGS, GAWK_AC_SYS_LARGEFILE_SPACE_APPEND, - GAWK_AC_SYS_LARGEFILE_MACRO_VALUE): New macros. - - * acconfig.h (_FILE_OFFSET_BITS, _LARGEFILE_SOURCE, _LARGE_FILES): - New macros. - - * Makefile.in (MISC): add config.guess and config.sub so they get - included in the distribution. - -Wed Jun 9 11:29:29 1999 Paul Eggert - - * io.c (iop_alloc): Don't mmap files whose sizes don't fit in `int'. - [ This isn't really needed, as HAVE_MMAP is #undef'ed at the top, - but it's there in case people want to take their life in their hands. ] - -Sun Jun 6 11:28:07 1999 Arnold D. Robbins - - * BETA Release 3.0.46: Release tar file made. - -Wed Jun 2 14:36:24 1999 Arnold D. Robbins - - * PORTS: Updated with a more recent list of systems - that gawk compiles and tests ok on. - -Tue Jun 1 14:24:59 1999 Arnold D. Robbins - - * BETA Release 3.0.45: Release tar file made. - -Tue May 25 16:32:37 1999 Arnold D. Robbins - - * builtin.c (format_tree): more smarts for weird cases, such as - zero precisions and zero values used with the `#' flag. - Thanks to Andreas Schwab (schwab@gnu.org) for pointing these out. - -Wed May 19 14:02:54 1999 Arnold D. Robbins - - * io.c (do_close): move test for `close(FILENAME)' to after - loop through all open redirections. Fixes problems in obscure - cases with redirections in END rules. - -Sun May 16 14:08:39 1999 Arnold D. Robbins - - * awk.y (yylex): fix group of characters including ',' to - set want_assign = FALSE. Fixes bizarre parsing problems in - function call lists, for example. - * io.c (get_a_record): repair logic for single-leading-newline - case. - -Tue May 11 16:48:11 1999 Arnold D. Robbins - - * aclocal.m4 (GAWK_AC_AIX_TWEAK): new macro. - * configure.in: call it - * Makefile.in: (awklib/all): pass CFLAGS on to sub-make so - that password programs will get AIX magic defines. Avoids - having to tweak program code for those in doc/gawk.texi. - -Mon May 3 16:56:23 1999 Arnold D. Robbins - - * array.c (do_delete): don't free_temp(subs) until after all - references to it are finished. - -Mon May 3 13:41:16 1999 Arnold D. Robbins - - * BETA Release 3.0.44: Release tar file made. - -Sun May 2 18:25:43 1999 Arnold D. Robbins - - * io.c (get_a_record): Do a really good job of stripping newlines - from the front of records when RS = "" and there's only one - newline at the front of the file, which the regex didn't catch. - -Wed Apr 28 12:27:49 1999 Arnold D. Robbins - - * configure.in: more HP stuff: fix the manual alloca code so that - gawk will compile and link on HP systems. See the comments. - -Sun Apr 25 13:39:16 1999 Arnold D. Robbins - - * Makefile.in (gawk): add $(CFLAGS) to linking step. - * configure.in: correctly do AC_FUNC_GETPGRP on HP systems too. - -Tue Apr 13 20:21:00 1999 Arnold D. Robbins - - * BETA Release 3.0.43: Release tar file made. - -Tue Apr 13 19:02:20 1999 Arnold D. Robbins - - * io.c (useropen, pidopen): add casts to int on arguments to - silence gcc warnings. - * regex.c (regcomp,regexec,regfree): add ifdef for APPLE. - -Thu Feb 4 10:38:02 1999 Arnold D. Robbins - - * custom.h: hacks for BeOS. Not documented in the manual right now. - * configure.in: hacks for BeOS. Check for HP-UX and define C_ALLOCA - if not using gcc. I wish they'd just fix bison already. - -Sun Dec 20 16:57:38 1998 Arnold D. Robbins - - * BETA Release 3.0.42: Release tar file made. - -Sun Nov 15 21:05:39 1998 Arnold D. Robbins - - * io.c (gawk_popen): Add WIN32 to list of systems that use - the non-real-pipe version. From the PC gawk guys. - -Wed Nov 4 11:32:24 1998 Arnold D. Robbins - - * BETA Release 3.0.41: Release tar file made. - -Tue Nov 3 16:24:35 1998 Arnold D. Robbins - - * eval.c (r_get_lhs): Fix the cases for the special variables, - don't unref their current value if it's the same as the internal - copy; perhaps the current one is used in a concatenation or some - other expression somewhere higher up in the call chain. Ouch. - See test/getnr2tm.awk. - -Sun Nov 1 15:24:52 1998 Arnold D. Robbins - - * builtin.c (format_tree): improve handling of zero-fill - when a precision is present. See test/zeroflag.awk. - -Wed Oct 28 20:40:17 1998 Arnold D. Robbins - - * eval.c (r_tree_eval): Case for Node_concat. Get lengths - separately, in case one expression has a side effect that - that changes another. Ugly, but it keeps gawk from core - dumping. See test/nasty.awk. - -Sun Oct 18 21:27:24 1998 Arnold D. Robbins - - * awk.y (append_right): bug fix, if `list' or `new' are NULL, - return `list', so that things don't break too badly. - * regex.c (re_compile_fastmap): remove unused variable `num_regs'. - -Thu Oct 8 19:36:57 1998 Arnold D. Robbins - - * BETA Release 3.0.40: Release tar file made. - -Mon Jul 27 10:14:33 1998 Arnold D. Robbins - - * node.c (parse_escape): Remove assignment with side effects - from ISXDIGIT test. Thanks to "Mihai T. LAZARESCU" - for pointing this out. - -Mon Apr 27 11:31:32 1998 Arnold D. Robbins - - * main.c (usage): fix the email address for the bug list. - (copyleft): update the copyright year. - -Mon Mar 23 21:22:32 1998 Arnold D. Robbins - - * eval.c (r_get_lhs): make sure that values of type - Node_param_list don't have the FUNC flag set. This means - we don't allow the use of a function name as a variable or - array from within the function. - -Sun Mar 22 19:12:32 1998 Paul Eggert - - * aclocal.m4 (GAWK_AC_LARGE_FILES): new macro that checks for - large file support, and updates CPPFLAGS, LDFLAGS, LIBS as - needed. - * configure.in: call GAWK_AC_LARGE_FILES. - * Makefile.in (CPPFLAGS, LDFLAGS): Let autoconf configure. - (COMPFLAGS): Add $(CPPFLAGS). - -Mon Mar 16 14:06:41 1998 Arnold D. Robbins - - * field.c (using_FIELDWIDTHS): new macro. - (using_fieldwidths): use new macro. - (do_split): in case for FS_DFLT, also check that - we're not using FIELDWIDTHS. Otherwise, split() would use - FIELDWIDTHS, not current value of FS. Oops. - -Sun Nov 16 20:08:59 1997 Arnold D. Robbins - - * builtin.c (sub_common): fix for count of matches in gsub - from Geert.Debyser@esat.kuleuven.ac.be. - -Wed Oct 15 03:38:12 1997 Arnold D. Robbins - - * field.c (set_FS): Use `sc_parsefield' if the value of FS is not - alphabetic OR if not ignoring case. Bug fix if IGNORECASE - is true and FS happens to be '^'. Sheesh, talk about obscure. - (rebuild_record): Add more smarts to the code that sets up the - fields. Thanks to Alan J. Broder (ajb@dtmr.com). - -Sun Oct 5 11:56:52 1997 Arnold D. Robbins - - * configure.in: if ISC add -D_SYSV3 to CFLAGS, per email from - Mario Vanoni (vanonim@dial.eunet.ch). - -Fri Sep 26 00:57:49 1997 Arnold D. Robbins - - * awk.y (append_right): return if either list is NULL. Prevents - syntax errors from causing core dumps. - -Wed Sep 17 15:34:15 1997 Arnold D. Robbins - - * field.c (rebuild_record): set things up so that all fields point - into the new record and release any changed fields without - causing memory leaks. Avoids problems when fields are extended - with the value of $0 or other fields and then $0 is assigned to. - -Mon Sep 15 16:12:55 1997 Arnold D. Robbins - - * builtin.c (do_print): when testing for NUMBER, make sure - it's not a string too. Thanks to Michael Brennan for - clarifying the semantics. - -Sun Sep 14 19:55:12 1997 Arnold D. Robbins - - * node.c (format_val): always format values ourselves: avoids - problems if OFMT is bizarre, like %s. - -Sun Sep 14 00:08:53 1997 Arnold D. Robbins - - * io.c (get_a_record): replace all occurrences of the test - `grRS == FALSE' with `RS_is_null' which makes ` RS = "\0" ' - actually work, is clearer code, and actually makes use of - the `RS_is_null' variable! - -Sun Aug 17 07:15:12 1997 Arnold D. Robbins - - * field.c (set_FS): Change logic to always set parse_field, even - if FS hasn't changed. Thanks to Igor Sheyn for catching this. - -Wed Aug 6 21:04:37 1997 Arnold D. Robbins - - * io.c (VMS et al gawk_popen): use pclose, not fclose, if - iop_alloc fails. - -Wed Jul 30 19:53:52 1997 Arnold D. Robbins - - * awk.y [variable]: fix case for subscript if $3 == NULL. - -Sun Jul 27 22:47:30 1997 Arnold D. Robbins - - * awk.y (get_src_buf): don't close file if it's stdin. - -Sun Jul 27 22:47:15 1997 Pat Rankin - - * io.c (#if VMS: vmsrtl_fileno): new routine. - (#if VMS: fileno): new macro substituted for stdio one. - -Thu Jul 17 20:05:59 1997 Arnold D. Robbins - - * builtin.c (do_print): When OFMT != CONVFMT, create a new - temporary node with just the numeric value valid and format it, - and use that for printing. Avoids memory corruption. - -Wed Jul 16 10:01:16 1997 Arnold D. Robbins - - * regex.c: When SYNTAX_TABLE is defined, but not emacs, then - CHAR_SET_SIZE is not defined, though used in regcomp. It should - be taken out of #ifdef SYNTAX_TABLE. Fix from bug group, from - Akim Demaille, demaille@inf.enst.fr. - * awk.h (isnondecimal): make test a little smarter. - builtin.c (nondec2awknum): add bailout for decimal numbers, e.g. - `00.1'. Fix from Larry Schwimmer . - -Thu Jun 19 19:00:40 1997 Arnold D. Robbins - - * eval.c (interpret): case Node_K_next, Node_K_nextfile: fatal - error if called from BEGIN or END. - (Fixed completely Mon May 3 13:31:42 1999.) - -Mon Jun 9 22:40:04 1997 Arnold D. Robbins - - * builtin.c (nondec2awknum): Allow `f' and `F' in hexadecimal numbers. - Gotta get more sleep... - * array.c (assoc_lookup): Fix from Tom Karzes (karzes@equator.com) - for memory leak when forcing type to Node_var_array. - -Thu May 15 12:49:08 1997 Arnold D. Robbins - - * Release 3.0.3: Release tar file made. - -Wed May 14 08:06:08 1997 Arnold D. Robbins - - * io.c (do_close): add lint warning if closing something that - isn't open. - -Tue May 13 12:14:12 1997 Arnold D. Robbins - - * random.c, builtin.c: remove __GLIBC__ tests, since it breaks - `make test'. I prefer consistency across platforms. - * Makefile.in (gawk): undid April 25 changes and added comment. - Putting COMPLAGS in breaks with -g on VMS POSIX. - -Sun May 11 14:48:04 1997 Darrell Hankerson - - * io.c [MSC_VER]: add cases for WIN32. - * regex.c [MSC_VER]: add cases for WIN32. - -Sun May 11 07:04:01 1997 Arnold D. Robbins - - * builtin.c (do_print): in the loop that evaluates each expression - to be printed, do a dupnode to avoid bizarre output. Thanks to - Michal for finding this problem. - * awk.y (yylex): fix scanning of hexadecimal constants. - -Wed May 7 15:09:25 1997 Arnold D. Robbins - - * io.c (get_a_record): fix casetable indexing with cast to int. - Keeps Michal happy. - -Tue May 6 16:40:19 1997 Arnold D. Robbins - - * eval.c (func_call): removed unneeded variables. - -Mon May 5 21:17:37 1997 Pat Rankin - - * missing/strftime.c [case 'v', VMS_EXT]: for VMS date format, two - digit day of month should not be zero padded on the 1st through - the 9th. - -Mon May 5 06:33:47 1997 Arnold D. Robbins - - * regex.h, regex.c: merge with current GLIBC version. - -Mon May 5 06:33:47 1997 Pat Rankin - - * io.c (nextfile): move the check for null return from iop_open - in the normal case and add one for the "no args" case. - -Fri Apr 25 16:52:33 1997 Arnold D. Robbins - - * array.c (grow_table): add a bunch more large primes so arrays - can get really big. Thanks to christos@deshaw.com. - * all files: remove ifdef'ed out code and update copyrights. - * Makefile.in (gawk): add $(COMPFLAGS) to command line. - * eval.c (flags2str): added case for FIELD. - -Thu Apr 24 22:39:23 1997 Arnold D. Robbins - - * COPYING: changed to current official version from FSF. - * regex.c: merge with GLIBC version. - * awk.h [_GNU_SOURCE]: bracket definition inside ifdef. - (NODE.source_line): move name member out of `x' union and - into `nodep'; avoids problems doing diagnostics. - (nondec2num): put decl into #if BITOPS || NONDECDATA - * posix/gawkmisc.c, missing/system.c, missing/strtod.c, - missing/strerror.c: move to generic GPL statement at top. - * builtin.c (nondec2num): put into #if BITOPS || NONDECDATA - -Wed Apr 23 22:14:14 1997 Arnold D. Robbins - - * dfa.c: misc changes for really pedantic SGI compilers. - * builtin.c: bracket defs of random() etc for GLIBC. - * random.c: bracket whole file for GLIBC. - * configure.in: extra goop for GETPGRP test for VMS POSIX. - * custom.h [VMS]: remove hard definition of GETPGRP_VOID. - -Fri Apr 18 07:55:47 1997 Arnold D. Robbins - - * BETA Release 3.0.34: Release tar file made. - -Tue Apr 15 21:35:45 1997 Arnold D. Robbins - - NEW UNDOCUMENTED FEATURE. USE THE SOURCE LUKE! - * acconfig.h [NONDECDATA]: new macro. - * awk.h: add decl of do_strtonum. - * awk.y (tokentab): add entry for strtonum function. - * builtin.c (do_strtonum): new function. - * configure.in (non-decimal-data): new --enable-* option. - * node.c (r_force_number): change to allow non-decimal data inside - ifdef NONDECDATA. - -Tue Apr 15 06:32:50 1997 Pat Rankin - - * missing/strftime.c (malloc, realloc, getenv, strchr): only - declare these when STDC_HEADERS is not defined. - : include these when STDC_HEADERS is defined. - * awk.h (freenode, tree_eval, m_tree_eval): reorganize definitions. - * alloca.c (malloc): if malloc is already defined as a macro, - presumeably by config.h, don't define or declare it. - -Wed Apr 9 22:45:27 1997 Arnold D. Robbins - - * Makefile.in [COMPFLAGS]: per suggestion from Karl Berry, put - $(CFLAGS) last. - -Tue Apr 8 23:54:46 1997 Arnold D. Robbins - - * eval.c (interpret): For Node_K_break and Node_K_continue, if - treating them like `next', also check the function call stack - and pop it if necessary. - -Mon Apr 7 18:22:37 1997 Arnold D. Robbins - - * awk.h: Add decls of new routines do_compl() and set_loc(). - * awk.y (tokentab): add entry for "compl" function. - * builtin.c (do_compl): new function to do ones complement. - (do_substr): rationalized yet again, now notices negative start - and length parameters. - * eval.c (push_args): fix if call_list gets realloc'ed in the - middle of things. Avoids crash for deeply nested function calls. - * main.c (catch_sig): add call to set_loc(). - * msg.c (set_loc, srcfile, srcline): new function and private - variables to help out in tracing down source of error messages. - -Fri Mar 28 08:42:27 1997 Arnold D. Robbins - - * io.c (iop_alloc, iop_close): Undo changes of Feb 11, apparently - other cleanups in io.c made mmap stuff start working again. - BAH! It's a mess, the test suite still fails. I'm leaving the - mmap stuff undefined for now. It'll probably get ripped out in 3.1. - -Thu Mar 27 08:48:57 1997 Arnold D. Robbins - - * custom.h [_SEQUENT_]: undef HAVE_MMAP. - -Wed Mar 26 09:08:16 1997 Arnold D. Robbins - - * io.c (iop_alloc): fix definition to make it static. - -Mon Mar 24 23:09:07 1997 Arnold D. Robbins - - * field.c (init_fields, etc..): more clean up use of Null_field - and the various flags. - * node.c (unref): if a field, free the node itself. Fixes - memory leak problems. - -Sun Mar 23 22:51:09 1997 Arnold D. Robbins - - * awk.h [FIELD]: new flag for node->flags field. - * builtin.c (sub_common): if FIELD is set, dup the string. - * field.c (init_fields): set up a new Null_field global var. - (init_fields, set_field, set_record) use the FIELD flag. - (getfield): use Null_field instead of private variable. - * io.c (wait_any): comment out calls to pclose and iop_close, - caused weird race conditions. See test/pipeio1.awk. Thanks - to Darrell Hankerson for tracing this one down. - -Tue Mar 18 20:57:18 1997 Arnold D. Robbins - - * dfa.c (inboth): free templist; plugs memory leak. - * field.c (init_fields, grow_fields_arr, set_field, rebuild_record, - set_record): remove PERM flag from entries in fields_arr[]. Fixes - nasty memory leak. - -Tue Mar 18 06:33:00 1997 Arnold D. Robbins - - * awk.y (dup_parms): robustified against parameter errors. - -Sun Mar 16 21:31:40 1997 Arnold D. Robbins - - NEW UNDOCUMENTED FEATURE. USE THE SOURCE LUKE! - * acconfig.h [BITOPS]: new macro. If set, do octal & hex and bit ops. - * awk.h [isnondecimal]: new macro, and decl of new functions. - * awk.y (yylex): add recognition of octal and hex constants. - * builtin.c (do_and, do_or, do_xor, do_lshift, do_rshift): new - functions that do bit operations. - (nondec2awknum): new function to convert octal or hex to double. - * configure.in: Add AC_ARG_ENABLE for bit operations. - * node.c (r_force_number): add octal and hex conversion. - -Sun Mar 16 21:28:56 1997 Arnold D. Robbins - - * awk.h [IOP_NOFREE_OBJ]: new macro. - * io.c (iop_open, iop_alloc): add new third parameter, which is - either NULL, meaning allocate a new IOP, or the address of one - already allocated. Have a static one in the `nextfile' - routine, and use the IOP_NOFREE_OBJ flag for it. All of this - keeps us from reading freed memory. The `swaplns' test fails - otherwise. - (iop_close): if IOP_NOFREE_OBJ is set, don't free the IOBUF. - -Wed Feb 26 06:21:02 1997 Arnold D. Robbins - - * eval.c (in_function, pop_fcall_stack, pop_fcall, push_args): - new functions. These manage "frames" of awk function call arguments. - The problem is that a `next' or a `nextfile' from a function - leaks memory. These changes allow us to free up that memory. - (interpret): for Node_K_next and Node_K_nextfile, check if in - a function call and free all function call frames. - -Fri Feb 21 06:23:19 1997 Arnold D. Robbins - - * Misc changes from Katsuyuki Okabe : - * builtin.c (do_substr): change a %d to %ld in warning message. - * eval.c (op_assign): fix format string for warning about %=. - -Wed Feb 19 23:29:02 1997 Arnold D. Robbins - - * main.c (main): add do_intervals to condition that causes - resetup() to be called again. Makes the --re-interval option - actually work. What a concept. - -Fri Feb 14 09:47:31 1997 Arnold D. Robbins - - * io.c [#include "awk.h"]: undef HAVE_MMAP to just use the old code. - Something is causing a file descriptor leak, and this is getting to - be just too much hair. I reserve the right to rip out the mmap - code entirely at a future date. - -Tue Feb 11 06:28:29 1997 Arnold D. Robbins - - * io.c (iop_alloc): for an mmap'ed file, close the file descriptor, - and then touch each page to get a private copy. Fixes nasty case - of truncating our input file. - (iop_close): don't call close on mmap'ed file. - -Wed Feb 5 17:59:04 1997 Arnold D. Robbins - - * eval.c (interpret): For Node_K_delete, just call do_delete; let - it handle the case of `delete array'. - * array.c (do_delete): Changed to handle case of `delete array', - and made smarter if the array is actually an uninitialized - parameter. - -Sun Jan 26 22:58:29 1997 Arnold D. Robbins - - * getopt.h, getopt.c, getopt1.c: replaced with new versions from - GLIBC 2. - -Sun Jan 19 23:37:03 1997 Arnold D. Robbins - - * eval.c (nodetype2str): not static, for debugging. - (flags2str) new function: for debugging. - * field.c (get_field): add new var that is like Nnull_string but - does not have numeric attributes, so that new fields are strings. - (set_record): turn off PERM flag before unrefing fields and field 0. - * array.c (in_array): always evaluate subscript, could have - side effects. - * builtin.c (do_strftime): way increase size of buffer to make sure - we don't have overflow problem. Keeps Paul Eggert happy. - * custom.h [__amigaos__]: define fork to vfork. From Fred Fish. - * dfa.c: move include of config.h to top, for RSXNT. From Kai - Uwe Rommel. - (ISALPHA, etc): change from Jacob Engelbrecht (jaen@novo.dk) - to better handle non-ascii environments. - * gawkmisc.c: remove amigados case, posix should now work fine. - * amiga/*: nuked per previous entry. - * Makefile.in: removed all references to amiga - * io.c [HAVE_SYS_PARAM_H]: Add #undef RE_DUP_MAX to avoid - spurious conflict with regex.h. - (flush_io): remove amiga ifdefs, not needed anymore. - (spec_setup): set getrec field for special files. Fix from - Mark Gray (markgray@pdt.net). - * node.c (more_nodes): fix to get the last entry in the array. - -Wed Jan 8 17:42:37 1997 Andreas Schwab - - * io.c (mmap_get_record): Fix return value if file ends without - record separator. - -Fri Jan 3 19:57:16 1997 Pat Rankin - - * awk.y (get_src_buf): Test for an empty source file by detecting - an initial read of 0 bytes rather than by relying on info from - stat(). - -Wed Dec 25 11:25:22 1996 Arnold D. Robbins - - * Release 3.0.2: Release tar file made. - -Wed Dec 25 11:17:32 1996 Arnold D. Robbins - - * Makefile.in (install, uninstall): use $(srcdir)/patchlevel.h. - Thanks to Richard Levitte, LeViMS@stacken.kth.se. - (install): remove chmod command; let $(INSTALL_PROGRAM) use -m. - -Mon Dec 23 20:36:59 1996 Pat Rankin - - * custom.h (#if VMS_POSIX): Define GETPGRP_VOID. - -Fri Dec 20 08:59:55 1996 Arnold D. Robbins - - * getopt.c, getopt1.c: comment out the `#if defined (_LIBC) || - !defined (__GNU_LIBRARY__)' and `#endif' to force use of this - getopt, even on systems like linux. This will be handled - better in 3.1 / glibc 2. - -Thu Dec 19 22:52:39 1996 Arnold D. Robbins - - * awk.y (yylex): In several places, after yyerror(), add call to - exit(). Otherwise, infinite messages. This should probably - be handled better. - -Wed Dec 18 22:42:10 1996 Darrel Hankerson - - * getopt.c (_getopt_internal): if 'W' and ';', if optind == argc, - return c, don't fall through. - -Wed Dec 18 10:09:44 1996 Arnold D. Robbins - - * configure.in [AC_PREREQ]: Update to 2.12 in order to switch to - autoconf 2.12. Lots of other files will be rebuilt automatically. - [AM_SANITY_CHECK_CC]: Removed, autoconf does it now. - * aclocal.m4 [AM_SANITY_CHECK_CC]: Removed, autoconf does it now. - -Tue Dec 17 22:23:16 1996 Arnold D. Robbins - - * builtin.c (do_strftime): fix case if format string is "". - Also fix it if format is not "" but result of strftime is "". - See comments in code. - -Tue Dec 10 23:09:26 1996 Arnold D. Robbins - - * Release 3.0.1: Release tar file made. - -Tue Dec 10 22:39:41 1996 Arnold D. Robbins - - * Makefile.in (dist): add dependency on `info'. Remove line that - does makeinfo. - (install): use $(LN) not $(LN_S) to link gawk gawk-version. - -Sun Dec 8 07:53:44 1996 Arnold D. Robbins - - * Makefile.in (gawk): took COMPFLAGS out of link line for help - on VMS posix. Shouldn't (I hope) affect anything else. - -Thu Nov 28 11:52:24 1996 Arnold D. Robbins - - * configure.in (AC_PROG_INSTALL): Set INSTALL to install-sh. - -Tue Nov 26 22:42:00 1996 Arnold D. Robbins - - * PORTS: Updated list of systems. - * Makefile.in (install): Fix some typos and add some improvements - for Ultrix. - -Sun Nov 24 22:16:26 1996 Arnold D. Robbins - - * builtin.c (do_printf): if no args, fatal error. Return silently - if --traditional. - -Thu Nov 7 20:54:43 1996 Arnold D. Robbins - - * io.c (inrec): make sure EOF hasn't already happened before - trying to read; prevents accessing freed buffer. Thanks to - Michal Jaegermann. - * Makefile.in [AWKSRC]: add random.h. - random.h: new file, redefines names of the `random' functions. - random.c, builtin.c: add include of random.h. - -Thu Nov 7 09:06:21 1996 Arnold D. Robbins - - * awk.y (snode): undo 4 Oct change, put do_split code back. - field.c (do_split): restore old code; add test for CONST, so - that re_parse_field is used if third arg to split is a regexp - constant. - -Mon Nov 4 12:57:11 1996 Arnold D. Robbins - - * main.c (main): Research -m[fr] options don't need literal '=' - characters. Brian's documentation was confusing. Fixed, not - that anyone actually uses these options with gawk. - -Sun Nov 3 11:23:21 1996 Arnold D. Robbins - - * field.c (def_parse_field): add \n to list of acceptable white space. - (posix_def_parse_field): new routine, just like def_parse_field(), - but only allows space and tab as separators. - (do_split, set_FS): make appropriate choice between the two - *def_parse_field() routines. - -Fri Oct 25 10:13:06 1996 Arnold D. Robbins - - * configure.in: remove test for random. - * Makefile.in: add random.c to list of files always compiled. - * missing.c: remove HAVE_RANDOM test. - * builtin.c: remove ifdef's for HAVE_RANDOM. - [GAWK_RAND_MAX]: use constant we know works with our random(). - * random.c: new file - moved from missing/ directory. - -Wed Oct 23 19:46:01 1996 Pat Rankin - - * builtin.c (do_tolower, do_toupper): Add `unsigned char *' casts. - -Tue Oct 22 21:27:52 1996 Arnold D. Robbins - - * builtin.c [GAWK_RANDOM_MAX]: Try to make definition a bit - smarter; don't use RAND_MAX if it's equal to SHRT_MAX, blows - things up. - -Tue Oct 22 08:49:20 1996 Arnold D. Robbins - - * main.c (copyleft): update copyright date to 1996. - too many files to list: update copyright date to 1996. - -Sun Oct 20 12:21:09 1996 Arnold D. Robbins - - * awk.y, dfa.c, eval.c, io.c, re.c: added various FIXME comments. - -Sat Oct 19 22:06:42 1996 Arnold D. Robbins - - * eval.c (nodetype2str): make static, add prototype. - * field.c (sc_parse_field): cast array subscripts to int to - shut up gcc warnings. - * gawkmisc.c: add prototype for xmalloc. - * awk.h: add prototype for getredirect. - * builtin.c (do_fflush): remove extern decl of getredirect. - * io.c (get_a_record, mmap_get_record): change decl of rs to int, - to shut up gcc warnings. - * awk.y (isassignable): add a default to switch to quiet gcc. - * getopt.c (_getopt_internal): give default value to `indfound'. - -Fri Oct 18 09:00:49 1996 Arnold D. Robbins - - * regex.h [RE_SYNTAX_AWK]: add RE_CONTEXT_INDEP_ANCHORS. - -Thu Oct 17 22:32:55 1996 Arnold D. Robbins - - * aclocal.m4 [AM_SANITY_CHECK_CC]: added. - * configure.in: use it. - -Thu Oct 17 21:43:25 1996 Arnold D. Robbins - - * configure.in: add checks for locale.h and setlocale(). - awk.h: include locale.h and define out setlocale() if not available. - main.c (main): call setlocale(). - builtin.c (do_tolower, do_toupper): use unsigned char pointers, - to get other charsets right in different locales. - -Wed Oct 16 21:32:53 1996 Arnold D. Robbins - - * builtin.c (format_tree): Change initial buffer size to 512 - and use a constant. Allows large values of %f per bug report - from sheyn@cs.bu.edu. - -Wed Oct 16 21:22:08 1996 Arnold D. Robbins - - * Makefile.in [MISC]: removed TAGS and tags - (local-distclean): added TAGS and tags - (maintainer-clean): removed TAGS and tags - -Wed Oct 16 12:28:43 1996 Arnold D. Robbins - - * main.c (version): Add call to copyleft(), per new standards. - version.c: Fix text of version string to match new standards. - -Sun Oct 6 22:19:45 1996 Arnold D. Robbins - - * regex.c: updated to Emacs 19.34b base. - -Sun Oct 6 21:57:34 1996 Arnold D. Robbins - - * re.c (make_regexp): fixed to handle \8 and \9 in the middle - of a regexp. - -Fri Oct 4 10:26:16 1996 Arnold D. Robbins - - * awk.y (snode): remove case for do_split; always making the - third arg a Node_regex is wrong. - field.c (do_split): rationalized to distinguish `/ /' from `" "'. - Generally fixed up. - * node.c (parse_escape): Allow single digit \x escapes. - -1996-10-02 Paul Eggert - - * builtin.c (format_tree): - Fix bug in %d and %i format: NaNs, and values - in the range LONG_MAX+1 .. ULONG_MAX, were mishandled. - Don't assume that double values <= -1 are converted to unsigned - long in the expected way; the C Standard doesn't guarantee this. - -1996-10-02 Paul Eggert - - * awk.h (INT_MAX): Remove unused symbol. - -Mon Sep 30 22:19:11 1996 Arnold D. Robbins - - * getopt.c (_getopt_internal): If 'W' is in the optstring followed - by a ';' then search through the long opts table. This makes - `-W foo=bar' same as `--foo=bar'. - * main.c (main): 'W' now prints an error message. - (gawk_option): deleted the routine. - -Sun Sep 29 23:04:54 1996 Arnold D. Robbins - - * builtin.c (sub_common): fix several bugs with gsub when - matching null strings. See test/gsubtest.awk. - -Fri Sep 20 17:35:54 1996 Pat Rankin - - * alloca.c (NULL): don't define if has already done so. - -Fri Sep 20 11:54:31 1996 Arnold D. Robbins - - * builtin.c (do_print): evaluate all the expressions first and - then print them. Avoids surprising behavior. See test/prtoeval.awk - for an example. - -Tue Sep 10 06:21:40 1996 Arnold D. Robbins - - * awk.h [FUNC]: new flag, marks a Node_parameter_list as really - being the function name; allows more checking in awk.y. - * awk.y (isassignable): now takes a NODE * instead of a type, to - check if a function parameter is marked FUNC, then it's the function - name, which is not assignable. Fix call from snode(). - (function_prologue): mark function name as FUNC. - (yyerror): don't call exit() anymore; gawk will now report - all syntax errors. - -Sun Sep 1 19:36:30 1996 Arnold D. Robbins - - * field.c (rebuild_record): after building new field 0, go through - all old fields, and if they used to point into the old one, - have them point into the new one. Then turn off PERM flag before - unref-ing field 0. - -Wed Aug 28 19:13:34 1996 Arnold D. Robbins - - * eval.c (set_IGNORECASE): Correctly parenthesize bit operations - in test and fix logic for string value. - -Wed Aug 28 22:06:33 1996 Arnold D. Robbins - - * main.c (usage): add email addresses for bug reporting, per - change in GNU Coding Standards from RMS. - -Sun Aug 11 23:13:22 1996 Arnold D. Robbins - - * Makefile.in (install): correct use of $(INSTALL_PROGRAM). - -Thu Aug 8 23:29:43 1996 Arnold D. Robbins - - * parse.y (isassignable): new function, checks in type can - be assigned to. - (snode): changed checking for 3rd arg of gsub to be more - general, supersedes earlier change. - -Thu Aug 8 13:58:26 1996 Arnold D. Robbins - - * parse.y (snode): If third arg to sub or gsub is builtin - function, complain, since can't substitute into result. - * eval.c (r_get_lhs): diagnose Node_builtin as an error, instead - of falling through into default case and using cant_happen(). - -Thu Aug 1 07:13:14 1996 Arnold D. Robbins - - * regex.h [RE_DEBUG]: new macro. - [RE_SYNTAX_GNU_AWK]: add RE_DEBUG. - [RE_SYNTAX_POSIX_AWK]: add RE_INTERVALS. - * regex.c (re_set_syntax): add #ifdef DEBUG code to turn on `debug' - flag if RE_DEBUG set, and turn off debug if not set and debug - was on. - * main.c (main): remove `do_intervals = TRUE' from `if (do_posix)', - it's now handled in the definition of RE_SYNTAX_POSIX_AWK. - -Mon Jul 29 17:49:07 1996 Pat Rankin - - * io.c (O_ACCMODE): define it if doesn't. - -Mon Jul 29 12:02:48 1996 Arnold D. Robbins - - * eval.c (set_IGNORECASE): made somewhat smarter. gawk -v IGNORECASE=0 - was acting the same as -v IGNORECASE=1. Thanks to Darrell Hankerson - for the bug report. - -Fri Jul 26 12:04:43 1996 Arnold D. Robbins - - * awk.h (format_val): add declaration of new routine. - * node.c (format_val): new routine, abstracts old guts of - r_forcestring; accepts format string and index as additional params. - (r_force_string): changed to call format_val. - * builtin.c (do_print): don't tree_eval the tree twice in case - OFMTidx != CONVFMTidx; doing so could cause side effects - (from bug report by Tobias Rettstadt, xassp@ipds.uni-kiel.de). - Instead, call format_val. - -Mon Jul 22 21:59:15 1996 Arnold D. Robbins - - * io.c (iop_close): change check for "is $0 in the input buffer" - to use `< (iop->buf + iop->secsiz + iop->size)' instead of - `< iop->end'. The latter is bogus if EOF has been hit on the - file. Fix from Darrel Hankerson based on bug report by - Charles Howes (howes@grid.direct.ca). See test/eofsplit.awk. - -Thu Jul 18 19:43:20 1996 Arnold D. Robbins - - * builtin.c (sub_common): backed out change of Feb 14 in favor of: - (do_gensub): Changed to use make_string and then to |= TEMP - flag, based on bug report and patch from Katsuyuki Okabe, - hgc02147@niftyserve.or.jp. - -Thu Jul 18 19:23:53 1996 Arnold D. Robbins - - * custom.h: added ifdef for QNX, based on bug report from - Michael Hunter, mphunter@qnx.com. - -Mon Jul 15 09:31:01 1996 Arnold D. Robbins - - * io.c (redirect): When finding the rp pointer, if it's not - NULL, set str = rp->value. This gets the '\0' terminated - version. Motivated by bug report from John Hawkinson - (jhawk@bbnplanet.com). - -Sun Jul 14 18:40:26 1996 Arnold D. Robbins - - * configure.in: added call to AC_CHECK_LIB(m, fmod), since - apparently some systems have fmod in the math library. - Portability: the Holy Grail. Sigh. - -Sun Jul 14 18:08:01 1996 Arnold D. Robbins - - * awk.h: add Jim Meyerings ISASCII etc hacks for ctype macros. - * builtin.c (do_toupper, do_tolower, sub_common): changed to use - upper-case versions of ctype macros. - * main.c (main): ditto. - * node.c (r_force_number, parse_escape): ditto. - -Sun Jul 14 06:34:18 1996 Arnold D. Robbins - - * field.c (set_record): made it always do the PERM flag. - Fixes cases where $0 is assigned to, e.g. by gsub, keeps - the fields valid. - (get_field): removed the call to reset_record in - case where ! field0_valid. We want to leave the fields alone - if they've been changed. - -Thu Jul 11 23:04:20 1996 Arnold D. Robbins - - * io.c (devopen): change tests of (flag & O_fooONLY) to - (flag & O_ACCMODE) == O_fooONLY. Per (long standing) bug - report from Chapman Flack. - (close_redir): change final conditional to just (status != 0) - so that ERRNO always set; the warning had its own `if (do_lint)' - anyway. - * eval.c (do_split): force type of array to be Node_var_array - instead of Node_var. Per (long standing) bug report from - Chapman Flack. - -Thu Jul 11 22:17:14 1996 Arnold D. Robbins - - * Makefile.in (install): added symlink of gawk to awk if - no awk in $(bindir). - (LN_S): new variable for symlinking. - (uninstall): remove awk if it's the same gawk. - * Configure.in: Added call to AC_PROG_LN_S for Makefile.in. - -Sun Jul 7 15:47:13 1996 Arnold D. Robbins - - * main.c (main): made `--posix' turn on interval expressions. - Gawk now matches its documentation. (What a concept!) - -Wed Jul 3 15:02:48 1996 Arnold D. Robbins - - * regex.h, regex.c: upgraded to changes from Emacs 19.31. - -Fri May 17 08:46:07 1996 Arnold D. Robbins - - * io.c (get_a_record): added `continued' flag. Fix from - Darrell Hankerson for when RS = "\n|something". - -Wed May 15 02:34:55 1996 Arnold D. Robbins - - * Makefile.in (awklib/all): now depends on gawk, fixes problem - with parallel make. - -Tue May 14 15:02:52 1996 Arnold D. Robbins - - * builtin.c (format_tree): fix handling of '*' to deal with - negative value for fieldwidth -- make positive and turn on - left justify. Per bug report from Michael Brennan. - -Sun May 12 20:42:06 1996 Arnold D. Robbins - - * eval.c (r_get_lhs): case Node_subscript. Check if array name - is actually a function, fatal error if so. - -Sun May 5 10:11:52 1996 Arnold D. Robbins - - * io.c (redirect): call flush_io() before creating a new output pipe, - per bug report from Brian Kernighan (bwk@research.bell-labs.com). - -Fri Mar 15 06:38:33 1996 Arnold D. Robbins - - * Makefile.in (install): use $(INSTALL_PROGRAM), not $(INSTALL). - (local-distclean): add `*~' to list of files to be removed. - (CFLAGS): now contains just @CFLAGS@. - (COMPFLAGS): replaces use of CFLAGS, has CFLAGS plus all the - other stuff. - -Wed Mar 13 14:19:38 1996 Arnold D. Robbins - - * io.c (mmap_get_record): fixed to not place sentinel at end - of mmap'ed object. Won't work if file is exact multiple of - disk block size. See comments in code for more info. - Thanks to Rick Adams (rick@uunet.uu.net) for help in testing. - -Sun Mar 10 22:50:23 1996 Arnold D. Robbins - - * io.c (do_close): notice if we were called as `close(FILENAME)' - and arrange to close the current input file. This turns out - to be easy to do, just call `nextfile(TRUE)'. Based on bug report - from Pascal A. Dupuis, . - -Thu Mar 7 08:08:51 1996 Arnold D. Robbins - - * field.c (init_fields, grow_fields, set_field, rebuild_record): - Nuke the `nodes' array everywhere. Anytime a field is unref'ed, - allocate a new node that is a copy of Nnull_string. This avoids - subtle memory management problems when doing a lot of assignment - to fields, and tweaking of NF. Make sure that fields_arr[0] always - has a type of Node_val! - * field.c (set_NF): If NF is decremented, clear fields between - NF and parse_high_water, otherwise if NF incremented, clear - fields between parse_high_water and NF. - * eval.c (nodetype2str): new function, used for diagnostics. - eval.c (interpret): use nodetype2str when finding invalid node. - -Mon Mar 4 09:02:28 1996 Arnold D. Robbins - - * builtin.c (do_toupper, do_tolower): use isascii along with - isupper/islower before changing case, in case characters have - the high bit set. This is a hack. - -Mon Feb 26 22:24:44 1996 Arnold D. Robbins - - * builtin.c (sub_common): if no match, and called from gensub, - don't free the temporary string, since the tmp_number then - writes over it. - -Sun Feb 25 23:13:01 1996 Arnold D. Robbins - - * builtin.c (format_tree): fixed %c to treat user input as - numeric also by adding test for MAYBE_NUM. - -Tue Feb 20 12:25:50 1996 Arnold D. Robbins - - * configure.in: Added AC_FUNC_MMAP call and add madvise to - list of functions to look for. - * awk.h [IOP_ISMAPPED]: new flag value for mmap support and new - `getrec' structure member in struct iobuf. - * io.c (iop_alloc, iop_close): changed to map/unmap input file - into memory if possible. - (mmap_get_record): new function to actually retrieve the - record from mmaped file. - -Thu Feb 1 08:56:46 1996 Arnold D. Robbins - - * builtin.c (do_substr): fixed lint message to use indx+1 when - start position is past end of string. - -Sun Jan 28 07:00:56 1996 Arnold D. Robbins - - * builtin.c (do_substr): rationalized handling of missing length - argument, as well as various accompanying lint warnings. Previous - code was slightly bogus. Talk about your Day 1 bugs. - -Thu Jan 25 14:09:11 1996 Arnold D. Robbins - - * builtin.c (do_substr): if length exceeds length of actual - string, do computation of needed substring length *after* - the lint warning. - -Wed Jan 24 10:06:16 1996 Arnold D. Robbins - - * Makefile.in (gawk): Add $(CFLAGS) to link line. - (Makefile): target depends on the Makefile.in files. - (OTHERS): Added TAGS and tags to the distribution. - (local-distclean): New rule. - (distclean): Use it. - (maintainer-clean): Don't `make distclean' before running submakes, - since that removes makefiles needed for the submakes. - * builtin.c (do_strftime): Remove hard coded limit on length of result. - Based on code from Paul Eggert (eggert@twinsun.com). - -Mon Jan 22 13:16:37 1996 Arnold D. Robbins - - * main.c (usage): takes new fp parameter which is either - stdout for `--help' (per the GNU Coding Standards) or stderr - if an error occurs. Fix all calls. - (version): prints to stdout per the coding stds. - (copyleft): prints to stdout now, not stderr, and exits. - -Fri Jan 19 08:10:29 1996 Arnold D. Robbins - - * regex.h [RE_GNU_AWK]: added RE_CONTEXT_INDEP_OPS to set of - bits we turn off for regular operation. Breaks things like - /^+[0-9]+/ to match a literal `+' at the beginning of, say, - a phone number. - -Wed Jan 10 23:19:36 1996 Arnold D. Robbins - - * 3.0.0 polished up and release tar file made. - -Wed Dec 27 11:46:16 1995 Arnold D. Robbins - - * 2.94.0 released to porting group (no, I haven't been good - about this file; I'll do better once 3.0 is released). - -Mon Aug 28 23:04:30 1995 Arnold D. Robbins - - * awk.h updated for NeXT - bracket TRUE/FALSE - * io.c (get_a_record): removed shadowing of 'start' in - * Makefile.in and doc/Makefile.in: fixed to use gawk.1 and gawk.texi, - instead of gawk.1.in and gawk.texi.in. - -Mon Aug 25 11:04:30 1995 Arnold D. Robbins - - * 2.90.0 released to porting group. - -Fri Aug 18 12:43:31 1995 Arnold D. Robbins - - * ChangeLog created. diff --git a/contrib/awk/FREEBSD-upgrade b/contrib/awk/FREEBSD-upgrade deleted file mode 100644 index a820801..0000000 --- a/contrib/awk/FREEBSD-upgrade +++ /dev/null @@ -1,24 +0,0 @@ -$FreeBSD$ - -Import of GNU awk 3.0.6 - -Obtained from: ftp://prep.ai.mit.edu/pub/gnu/gawk/gawk-3.0.6.tar.gz - -The following files and directories were removed for this import: - -README_d/README.VMS -README_d/README.atari -README_d/README.beos -README_d/README.linux -README_d/README.pc -README_d/README.sco -README_d/README.sgi -README_d/README.solaris -README_d/README.sony -README_d/README.sunos4 -README_d/README.ultrix -README_d/README.yacc -atari/ -missing/ -pc/ -vms/ diff --git a/contrib/awk/FUTURES b/contrib/awk/FUTURES deleted file mode 100644 index b2f7575..0000000 --- a/contrib/awk/FUTURES +++ /dev/null @@ -1,85 +0,0 @@ -This file lists future projects and enhancements for gawk. Items are listed -in roughly the order they will be done for a given release. This file is -mainly for use by the developers to help keep themselves on track, please -don't bug us too much about schedules or what all this really means. - -With the 3.0 release, we are acknowledging that awk is not PERL, nor should -it become PERL. (To paraphrase Dennis Ritchie, "If you want PERL, you -know where to get it.") - -The focus on the future is thus narrowed to performance and functional -enhancements, with only minor plans for significant new features. - -(OK, so 3.1 had a bad case of feature-itis. I think I'm mostly over it -now, though. :-) - -In 3.1 -====== - DONE: A PROCINFO array to replace /dev/pid, /dev/user, et al. - - DONE: Add `abort' statement a la Thompson awk. - - DONE: Provide awk profiling. - - DONE: Integrate GNU NLS support. - - DONE: Bring out hooks for NLS support into gawk itself. - - DONE: Do a reference card. - - DONE: Switch to full ANSI C and use ansi2kr. - - Additional manual features: - DONE: Document NLS support - -For 3.2 -======= - Move the loadable modules interface to libtool. - - Redo the loadable modules interface from the awk level. - - Rework management of array index storage. - - A RECLEN variable for fixed-length record input. PROCINFO["RS"] - would be "RS" or "RECLEN" depending upon what's in use. - - DBM storage of awk arrays. Try to allow multiple dbm packages. - - Look at ISO C 99 printf features. - - Add %'d for putting in commas in formatting? - - Consider integrating Fred Fish's DBUG library into gawk. - - Consider removing use of and/or need for the protos.h file. - - Additional manual features: - ? Add exercises - Document use of dbm arrays - ? Add an error messages section to the manual - -For 3.3 -======= - Use a new or improved dfa and/or regex library. - - ? Have strftime() pay attention to the value of ENVIRON["TZ"] - - Add a lint check if the return value of a function is used but - the function did not supply a value. - - Additional manual features: - ? A section on where gawk is bounded - regex - i/o - sun fp conversions - -For 3.4 -======= - Do an optimization pass over parse tree? - - Make awk '/foo/' files... run at egrep speeds (how?) - -For 4.x: -======== - -Provide awk debugging. diff --git a/contrib/awk/INSTALL b/contrib/awk/INSTALL deleted file mode 100644 index a2c8722..0000000 --- a/contrib/awk/INSTALL +++ /dev/null @@ -1,181 +0,0 @@ -Basic Installation -================== - - These are generic installation instructions. - - The `configure' shell script attempts to guess correct values for -various system-dependent variables used during compilation. It uses -those values to create a `Makefile' in each directory of the package. -It may also create one or more `.h' files containing system-dependent -definitions. Finally, it creates a shell script `config.status' that -you can run in the future to recreate the current configuration, a file -`config.cache' that saves the results of its tests to speed up -reconfiguring, and a file `config.log' containing compiler output -(useful mainly for debugging `configure'). - - If you need to do unusual things to compile the package, please try -to figure out how `configure' could check whether to do them, and mail -diffs or instructions to the address given in the `README' so they can -be considered for the next release. If at some point `config.cache' -contains results you don't want to keep, you may remove or edit it. - - The file `configure.in' is used to create `configure' by a program -called `autoconf'. You only need `configure.in' if you want to change -it or regenerate `configure' using a newer version of `autoconf'. - -The simplest way to compile this package is: - - 1. `cd' to the directory containing the package's source code and type - `./configure' to configure the package for your system. If you're - using `csh' on an old version of System V, you might need to type - `sh ./configure' instead to prevent `csh' from trying to execute - `configure' itself. - - Running `configure' takes awhile. While running, it prints some - messages telling which features it is checking for. - - 2. Type `make' to compile the package. - - 3. Optionally, type `make check' to run any self-tests that come with - the package. - - 4. Type `make install' to install the programs and any data files and - documentation. - - 5. You can remove the program binaries and object files from the - source code directory by typing `make clean'. To also remove the - files that `configure' created (so you can compile the package for - a different kind of computer), type `make distclean'. There is - also a `make maintainer-clean' target, but that is intended mainly - for the package's developers. If you use it, you may have to get - all sorts of other programs in order to regenerate files that came - with the distribution. - -Compilers and Options -===================== - - Some systems require unusual options for compilation or linking that -the `configure' script does not know about. You can give `configure' -initial values for variables by setting them in the environment. Using -a Bourne-compatible shell, you can do that on the command line like -this: - CC=c89 CFLAGS=-O2 LIBS=-lposix ./configure - -Or on systems that have the `env' program, you can do it like this: - env CPPFLAGS=-I/usr/local/include LDFLAGS=-s ./configure - -Compiling For Multiple Architectures -==================================== - - You can compile the package for more than one kind of computer at the -same time, by placing the object files for each architecture in their -own directory. To do this, you must use a version of `make' that -supports the `VPATH' variable, such as GNU `make'. `cd' to the -directory where you want the object files and executables to go and run -the `configure' script. `configure' automatically checks for the -source code in the directory that `configure' is in and in `..'. - - If you have to use a `make' that does not supports the `VPATH' -variable, you have to compile the package for one architecture at a time -in the source code directory. After you have installed the package for -one architecture, use `make distclean' before reconfiguring for another -architecture. - -Installation Names -================== - - By default, `make install' will install the package's files in -`/usr/local/bin', `/usr/local/man', etc. You can specify an -installation prefix other than `/usr/local' by giving `configure' the -option `--prefix=PATH'. - - You can specify separate installation prefixes for -architecture-specific files and architecture-independent files. If you -give `configure' the option `--exec-prefix=PATH', the package will use -PATH as the prefix for installing programs and libraries. -Documentation and other data files will still use the regular prefix. - - In addition, if you use an unusual directory layout you can give -options like `--bindir=PATH' to specify different values for particular -kinds of files. Run `configure --help' for a list of the directories -you can set and what kinds of files go in them. - - If the package supports it, you can cause programs to be installed -with an extra prefix or suffix on their names by giving `configure' the -option `--program-prefix=PREFIX' or `--program-suffix=SUFFIX'. - -Optional Features -================= - - Some packages pay attention to `--enable-FEATURE' options to -`configure', where FEATURE indicates an optional part of the package. -They may also pay attention to `--with-PACKAGE' options, where PACKAGE -is something like `gnu-as' or `x' (for the X Window System). The -`README' should mention any `--enable-' and `--with-' options that the -package recognizes. - - For packages that use the X Window System, `configure' can usually -find the X include and library files automatically, but if it doesn't, -you can use the `configure' options `--x-includes=DIR' and -`--x-libraries=DIR' to specify their locations. - -Specifying the System Type -========================== - - There may be some features `configure' can not figure out -automatically, but needs to determine by the type of host the package -will run on. Usually `configure' can figure that out, but if it prints -a message saying it can not guess the host type, give it the -`--host=TYPE' option. TYPE can either be a short name for the system -type, such as `sun4', or a canonical name with three fields: - CPU-COMPANY-SYSTEM - -See the file `config.sub' for the possible values of each field. If -`config.sub' isn't included in this package, then this package doesn't -need to know the host type. - - If you are building compiler tools for cross-compiling, you can also -use the `--target=TYPE' option to select the type of system they will -produce code for and the `--build=TYPE' option to select the type of -system on which you are compiling the package. - -Sharing Defaults -================ - - If you want to set default values for `configure' scripts to share, -you can create a site shell script called `config.site' that gives -default values for variables like `CC', `cache_file', and `prefix'. -`configure' looks for `PREFIX/share/config.site' if it exists, then -`PREFIX/etc/config.site' if it exists. Or, you can set the -`CONFIG_SITE' environment variable to the location of the site script. -A warning: not all `configure' scripts look for a site script. - -Operation Controls -================== - - `configure' recognizes the following options to control how it -operates. - -`--cache-file=FILE' - Use and save the results of the tests in FILE instead of - `./config.cache'. Set FILE to `/dev/null' to disable caching, for - debugging `configure'. - -`--help' - Print a summary of the options to `configure', and exit. - -`--quiet' -`--silent' -`-q' - Do not print messages saying which checks are being made. - -`--srcdir=DIR' - Look for the package's source code in directory DIR. Usually - `configure' can determine that directory automatically. - -`--version' - Print the version of Autoconf used to generate the `configure' - script, and exit. - -`configure' also accepts some other, not widely useful, options. - diff --git a/contrib/awk/LIMITATIONS b/contrib/awk/LIMITATIONS deleted file mode 100644 index 05e8bc4..0000000 --- a/contrib/awk/LIMITATIONS +++ /dev/null @@ -1,16 +0,0 @@ -This file describes limits of gawk on a Unix system (although it -is variable even then). Non-Unix systems may have other limits. - -# of fields in a record: MAX_LONG -Length of input record: MAX_INT -Length of output record: unlimited -Size of a field: MAX_INT -Size of a printf string: MAX_INT -Size of a literal string: MAX_INT -Characters in a character class: 2^(# of bits per byte) -# of file redirections: unlimited -# of pipe redirections: min(# of processes per user, # of open files) -double-precision floating point -Length of source line: unlimited -Number of input records in one file: MAX_LONG -Number of input records total: MAX_LONG diff --git a/contrib/awk/Makefile.am b/contrib/awk/Makefile.am deleted file mode 100644 index 08809d3..0000000 --- a/contrib/awk/Makefile.am +++ /dev/null @@ -1,161 +0,0 @@ -# -# Makefile.am --- automake input file for gawk -# -# Copyright (C) 2000-2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -# - -## process this file with automake to produce Makefile.in - -# Automatic de-ANSI-fication if needed -AUTOMAKE_OPTIONS = ansi2knr - -# This undocumented variable insures that aclocal runs -# correctly after changing configure.in -ACLOCAL_AMFLAGS = -I m4 - -# This insures that make flags get passed down to child makes. -AM_MAKEFLAGS = 'CFLAGS=$(CFLAGS)' 'LDFLAGS=$(LDFLAGS)' - -# Stuff to include in the dist that doesn't need it's own -# Makefile.am files -EXTRA_DIST = \ - COPYING \ - FUTURES \ - INSTALL \ - LIMITATIONS \ - NEWS \ - POSIX.STD \ - PROBLEMS \ - README_d \ - bisonfix.sed \ - depcomp \ - extension \ - fixvers \ - m4 \ - missing \ - missing_d \ - pc \ - posix \ - unsupported \ - vms - -# The order to do things in. -# Build in intl first in case we need the local gettext version. -# Build explicitly "." next in order to build gawk first, so -# that `make check' without a prior `make' works. -SUBDIRS = \ - intl \ - . \ - awklib \ - doc \ - po \ - test - -# what to make and install -bin_PROGRAMS = gawk pgawk - -# sources for both gawk and pgawk -base_sources = \ - array.c \ - awk.h \ - awkgram.y \ - builtin.c \ - custom.h \ - dfa.c \ - dfa.h \ - ext.c \ - field.c \ - gawkmisc.c \ - getopt.c \ - getopt.h \ - getopt1.c \ - io.c \ - main.c \ - msg.c \ - node.c \ - patchlev.h \ - protos.h \ - random.c \ - random.h \ - re.c \ - regex.c \ - regex.h \ - replace.c \ - version.in \ - version.c - -gawk_SOURCES = $(base_sources) eval.c profile.c -pgawk_SOURCES = $(base_sources) eval_p.c profile_p.c - -# Get extra libs as needed -LDADD = @INTLLIBS@ @SOCKET_LIBS@ - -# stuff for compiling gawk/pgawk -DEFPATH="\".:$(datadir)\"" - -DEFS= -DDEFPATH=$(DEFPATH) -DHAVE_CONFIG_H -DGAWK -DLOCALEDIR="\"$(datadir)/locale\"" - -INCLUDES = -I. -I$(srcdir) -I$(srcdir)/intl - -# We want hard links for install-exec-hook, below -LN= ln - -# First, add a link from gawk to gawk-X.Y.Z -# For systems where gawk is awk, add a link to awk -install-exec-hook: - (fullname=gawk-`./gawk --version | sed 1q | awk '{print $$3}'` ; \ - cd $(DESTDIR)$(bindir); \ - $(LN) gawk $$fullname ; \ - if [ ! -f awk ]; \ - then $(LN_S) gawk awk; \ - fi; exit 0) - -# Undo the above when uninstalling -uninstall-links: - (cd $(DESTDIR)$(bindir); \ - if [ -f awk ] && cmp awk gawk > /dev/null; then rm -f awk; fi ; \ - fullname=gawk-`./gawk --version | sed 1q | ./gawk '{print $$3}'` ; \ - rm -f $$fullname; exit 0) - -uninstall-recursive: uninstall-links - -# force there to be a gawk executable before running tests -check-local: gawk pgawk - -# A little extra clean up when making distributions. -# FIXME: most of this rule should go away upon switching to libtool. -dist-hook: - cd $(distdir)/extension ; rm -f *.o *.so - -# Special rules for individual files -awkgram.c: awkgram.y - $(YACC) $(AM_YFLAGS) $(YFLAGS) $< && sed -f $(srcdir)/bisonfix.sed < y.tab.c > $*.c && $(RM) y.tab.c - if test -f y.tab.h; then \ - if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \ - else :; fi - -# these force version and patchlevel to be up to date based -# on what's in configure.in. No-one else but me should have -# to use them. So there. -patchlev.h: configure.in - ./fixvers -p - -version.c: configure.in version.in - ./fixvers -v diff --git a/contrib/awk/Makefile.in b/contrib/awk/Makefile.in deleted file mode 100644 index c4a7bce..0000000 --- a/contrib/awk/Makefile.in +++ /dev/null @@ -1,797 +0,0 @@ -# Makefile.in generated automatically by automake 1.4a from Makefile.am - -# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000 -# Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = . - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_FLAG = -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : - -@SET_MAKE@ -AMDEP = @AMDEP@ -AMTAR = @AMTAR@ -AWK = @AWK@ -CATALOGS = @CATALOGS@ -CATOBJEXT = @CATOBJEXT@ -CC = @CC@ -CFLAGS = @CFLAGS@ -CPP = @CPP@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -DATADIRNAME = @DATADIRNAME@ -DEPDIR = @DEPDIR@ -GENCAT = @GENCAT@ -GMOFILES = @GMOFILES@ -GMSGFMT = @GMSGFMT@ -GT_NO = @GT_NO@ -GT_YES = @GT_YES@ -INCLUDE_LOCALE_H = @INCLUDE_LOCALE_H@ -INSTOBJEXT = @INSTOBJEXT@ -INTLDEPS = @INTLDEPS@ -INTLLIBS = @INTLLIBS@ -INTLOBJS = @INTLOBJS@ -LN_S = @LN_S@ -MAKEINFO = @MAKEINFO@ -MKINSTALLDIRS = @MKINSTALLDIRS@ -MSGFMT = @MSGFMT@ -PACKAGE = @PACKAGE@ -POFILES = @POFILES@ -POSUB = @POSUB@ -RANLIB = @RANLIB@ -SOCKET_LIBS = @SOCKET_LIBS@ -U = @U@ -USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@ -USE_NLS = @USE_NLS@ -VERSION = @VERSION@ -YACC = @YACC@ -install_sh = @install_sh@ -l = @l@ - -# -# Makefile.am --- automake input file for gawk -# -# Copyright (C) 2000-2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -# - -# Automatic de-ANSI-fication if needed - - -AUTOMAKE_OPTIONS = ansi2knr - -# This undocumented variable insures that aclocal runs -# correctly after changing configure.in -ACLOCAL_AMFLAGS = -I m4 - -# This insures that make flags get passed down to child makes. -AM_MAKEFLAGS = 'CFLAGS=$(CFLAGS)' 'LDFLAGS=$(LDFLAGS)' - -# Stuff to include in the dist that doesn't need it's own -# Makefile.am files -EXTRA_DIST = \ - COPYING \ - FUTURES \ - INSTALL \ - LIMITATIONS \ - NEWS \ - POSIX.STD \ - PROBLEMS \ - README_d \ - bisonfix.sed \ - depcomp \ - extension \ - fixvers \ - m4 \ - missing \ - missing_d \ - pc \ - posix \ - unsupported \ - vms - - -# The order to do things in. -# Build in intl first in case we need the local gettext version. -# Build explicitly "." next in order to build gawk first, so -# that `make check' without a prior `make' works. -SUBDIRS = \ - intl \ - . \ - awklib \ - doc \ - po \ - test - - -# what to make and install -bin_PROGRAMS = gawk pgawk - -# sources for both gawk and pgawk -base_sources = \ - array.c \ - awk.h \ - awkgram.y \ - builtin.c \ - custom.h \ - dfa.c \ - dfa.h \ - ext.c \ - field.c \ - gawkmisc.c \ - getopt.c \ - getopt.h \ - getopt1.c \ - io.c \ - main.c \ - msg.c \ - node.c \ - patchlev.h \ - protos.h \ - random.c \ - random.h \ - re.c \ - regex.c \ - regex.h \ - replace.c \ - version.in \ - version.c - - -gawk_SOURCES = $(base_sources) eval.c profile.c -pgawk_SOURCES = $(base_sources) eval_p.c profile_p.c - -# Get extra libs as needed -LDADD = @INTLLIBS@ @SOCKET_LIBS@ - -# stuff for compiling gawk/pgawk -DEFPATH = "\".:$(datadir)\"" - -DEFS = -DDEFPATH=$(DEFPATH) -DHAVE_CONFIG_H -DGAWK -DLOCALEDIR="\"$(datadir)/locale\"" - -INCLUDES = -I. -I$(srcdir) -I$(srcdir)/intl - -# We want hard links for install-exec-hook, below -LN = ln -subdir = . -ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_HEADER = config.h -CONFIG_CLEAN_FILES = -PROGRAMS = $(bin_PROGRAMS) - -CPPFLAGS = @CPPFLAGS@ -LDFLAGS = @LDFLAGS@ -LIBS = @LIBS@ -ANSI2KNR = @ANSI2KNR@ -am_gawk_OBJECTS = array$U.o awkgram$U.o builtin$U.o dfa$U.o ext$U.o \ -field$U.o gawkmisc$U.o getopt$U.o getopt1$U.o io$U.o main$U.o msg$U.o \ -node$U.o random$U.o re$U.o regex$U.o replace$U.o version$U.o eval$U.o \ -profile$U.o -gawk_OBJECTS = $(am_gawk_OBJECTS) -gawk_LDADD = $(LDADD) -gawk_DEPENDENCIES = -gawk_LDFLAGS = -am_pgawk_OBJECTS = array$U.o awkgram$U.o builtin$U.o dfa$U.o ext$U.o \ -field$U.o gawkmisc$U.o getopt$U.o getopt1$U.o io$U.o main$U.o msg$U.o \ -node$U.o random$U.o re$U.o regex$U.o replace$U.o version$U.o eval_p$U.o \ -profile_p$U.o -pgawk_OBJECTS = $(am_pgawk_OBJECTS) -pgawk_LDADD = $(LDADD) -pgawk_DEPENDENCIES = -pgawk_LDFLAGS = -COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ -DIST_SOURCES = $(gawk_SOURCES) $(pgawk_SOURCES) -depcomp = $(SHELL) $(top_srcdir)/depcomp -DEP_FILES = @AMDEP@ $(DEPDIR)/array$U.Po $(DEPDIR)/awkgram$U.Po \ -$(DEPDIR)/builtin$U.Po $(DEPDIR)/dfa$U.Po $(DEPDIR)/eval$U.Po \ -$(DEPDIR)/eval_p$U.Po $(DEPDIR)/ext$U.Po $(DEPDIR)/field$U.Po \ -$(DEPDIR)/gawkmisc$U.Po $(DEPDIR)/getopt$U.Po $(DEPDIR)/getopt1$U.Po \ -$(DEPDIR)/io$U.Po $(DEPDIR)/main$U.Po $(DEPDIR)/msg$U.Po \ -$(DEPDIR)/node$U.Po $(DEPDIR)/profile$U.Po $(DEPDIR)/profile_p$U.Po \ -$(DEPDIR)/random$U.Po $(DEPDIR)/re$U.Po $(DEPDIR)/regex$U.Po \ -$(DEPDIR)/replace$U.Po $(DEPDIR)/version$U.Po -DIST_COMMON = README ./stamp-h.in ABOUT-NLS AUTHORS COPYING ChangeLog \ -INSTALL Makefile.am Makefile.in NEWS acconfig.h acinclude.m4 aclocal.m4 \ -ansi2knr.1 ansi2knr.c awkgram.c configh.in configure configure.in \ -depcomp install-sh missing mkinstalldirs - - -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) - -GZIP_ENV = --best -SOURCES = $(gawk_SOURCES) $(pgawk_SOURCES) -OBJECTS = $(am_gawk_OBJECTS) $(am_pgawk_OBJECTS) - -all: all-redirect -.SUFFIXES: -.SUFFIXES: .c .h .o .y -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$@ CONFIG_HEADERS= $(SHELL) ./config.status - -$(ACLOCAL_M4): configure.in acinclude.m4 m4/arch.m4 m4/gettext.m4 \ - m4/jm-mktime.m4 m4/largefile.m4 m4/lcmessage.m4 \ - m4/progtest.m4 m4/socket.m4 m4/ssize_t.m4 m4/strtod.m4 - cd $(srcdir) && $(ACLOCAL) $(ACLOCAL_AMFLAGS) - -config.status: $(srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) - $(SHELL) ./config.status --recheck -$(srcdir)/configure: $(srcdir)/configure.in $(ACLOCAL_M4) $(CONFIGURE_DEPENDENCIES) - cd $(srcdir) && $(AUTOCONF) - -config.h: stamp-h - @if test ! -f $@; then \ - rm -f stamp-h; \ - $(MAKE) stamp-h; \ - else :; fi -stamp-h: $(srcdir)/configh.in $(top_builddir)/config.status - @rm -f stamp-h stamp-hT - @echo timestamp > stamp-hT 2> /dev/null - cd $(top_builddir) \ - && CONFIG_FILES= CONFIG_HEADERS=config.h:configh.in \ - $(SHELL) ./config.status - @mv stamp-hT stamp-h -$(srcdir)/configh.in: $(srcdir)/./stamp-h.in - @if test ! -f $@; then \ - rm -f $(srcdir)/./stamp-h.in; \ - $(MAKE) $(srcdir)/./stamp-h.in; \ - else :; fi -$(srcdir)/./stamp-h.in: $(top_srcdir)/configure.in $(ACLOCAL_M4) acconfig.h - @rm -f $(srcdir)/./stamp-h.in $(srcdir)/./stamp-h.inT - @echo timestamp > $(srcdir)/./stamp-h.inT 2> /dev/null - cd $(top_srcdir) && $(AUTOHEADER) - @mv $(srcdir)/./stamp-h.inT $(srcdir)/./stamp-h.in - -mostlyclean-hdr: - -clean-hdr: - -distclean-hdr: - -rm -f config.h - -maintainer-clean-hdr: - -mostlyclean-binPROGRAMS: - -clean-binPROGRAMS: - -test -z "$(bin_PROGRAMS)" || rm -f $(bin_PROGRAMS) - -distclean-binPROGRAMS: - -maintainer-clean-binPROGRAMS: - -install-binPROGRAMS: $(bin_PROGRAMS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(bindir) - @list='$(bin_PROGRAMS)'; for p in $$list; do \ - if test -f $$p; then \ - f="`echo $$p|sed -e 's/$(EXEEXT)$$//' -e '$(transform)' -e 's/$$/$(EXEEXT)/'`"; \ - echo " $(INSTALL_PROGRAM) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(bindir)/$$f"; \ - $(INSTALL_PROGRAM) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(bindir)/$$f; \ - else :; fi; \ - done - -uninstall-binPROGRAMS: - @$(NORMAL_UNINSTALL) - @list='$(bin_PROGRAMS)'; for p in $$list; do \ - f="`echo $$p|sed -e 's/$(EXEEXT)$$//' -e '$(transform)' -e 's/$$/$(EXEEXT)/'`"; \ - echo " rm -f $(DESTDIR)$(bindir)/$$f"; \ - rm -f $(DESTDIR)$(bindir)/$$f; \ - done - -mostlyclean-compile: - -rm -f *.o core *.core - -clean-compile: - -distclean-compile: - -rm -f *.tab.c - -maintainer-clean-compile: - -mostlyclean-krextra: - -clean-krextra: - -rm -f ansi2knr - -distclean-krextra: - -maintainer-clean-krextra: -ansi2knr: ansi2knr.o - $(LINK) ansi2knr.o $(LIBS) -ansi2knr.o: $(CONFIG_HEADER) - - -mostlyclean-kr: - -rm -f *_.c - -clean-kr: - -distclean-kr: - -maintainer-clean-kr: - -gawk: $(gawk_OBJECTS) $(gawk_DEPENDENCIES) - @rm -f gawk - $(LINK) $(gawk_LDFLAGS) $(gawk_OBJECTS) $(gawk_LDADD) $(LIBS) - -pgawk: $(pgawk_OBJECTS) $(pgawk_DEPENDENCIES) - @rm -f pgawk - $(LINK) $(pgawk_LDFLAGS) $(pgawk_OBJECTS) $(pgawk_LDADD) $(LIBS) -array_.c: array.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/array.c; then echo $(srcdir)/array.c; else echo array.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > array_.c -awkgram_.c: awkgram.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/awkgram.c; then echo $(srcdir)/awkgram.c; else echo awkgram.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > awkgram_.c -builtin_.c: builtin.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/builtin.c; then echo $(srcdir)/builtin.c; else echo builtin.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > builtin_.c -dfa_.c: dfa.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dfa.c; then echo $(srcdir)/dfa.c; else echo dfa.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > dfa_.c -eval_.c: eval.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/eval.c; then echo $(srcdir)/eval.c; else echo eval.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > eval_.c -eval_p_.c: eval_p.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/eval_p.c; then echo $(srcdir)/eval_p.c; else echo eval_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > eval_p_.c -ext_.c: ext.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ext.c; then echo $(srcdir)/ext.c; else echo ext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > ext_.c -field_.c: field.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/field.c; then echo $(srcdir)/field.c; else echo field.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > field_.c -gawkmisc_.c: gawkmisc.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gawkmisc.c; then echo $(srcdir)/gawkmisc.c; else echo gawkmisc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gawkmisc_.c -getopt_.c: getopt.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/getopt.c; then echo $(srcdir)/getopt.c; else echo getopt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > getopt_.c -getopt1_.c: getopt1.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/getopt1.c; then echo $(srcdir)/getopt1.c; else echo getopt1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > getopt1_.c -io_.c: io.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/io.c; then echo $(srcdir)/io.c; else echo io.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > io_.c -main_.c: main.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/main.c; then echo $(srcdir)/main.c; else echo main.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > main_.c -msg_.c: msg.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/msg.c; then echo $(srcdir)/msg.c; else echo msg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > msg_.c -node_.c: node.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/node.c; then echo $(srcdir)/node.c; else echo node.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > node_.c -profile_.c: profile.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/profile.c; then echo $(srcdir)/profile.c; else echo profile.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > profile_.c -profile_p_.c: profile_p.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/profile_p.c; then echo $(srcdir)/profile_p.c; else echo profile_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > profile_p_.c -random_.c: random.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > random_.c -re_.c: re.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/re.c; then echo $(srcdir)/re.c; else echo re.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > re_.c -regex_.c: regex.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/regex.c; then echo $(srcdir)/regex.c; else echo regex.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > regex_.c -replace_.c: replace.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/replace.c; then echo $(srcdir)/replace.c; else echo replace.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > replace_.c -version_.c: version.c $(ANSI2KNR) - $(CPP) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > version_.c -array_.o awkgram_.o builtin_.o dfa_.o eval_.o eval_p_.o ext_.o field_.o \ -gawkmisc_.o getopt_.o getopt1_.o io_.o main_.o msg_.o node_.o \ -profile_.o profile_p_.o random_.o re_.o regex_.o replace_.o version_.o \ -: $(ANSI2KNR) -.y.c: - $(YACC) $(AM_YFLAGS) $(YFLAGS) $< && mv y.tab.c $*.c - if test -f y.tab.h; then \ - if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \ - else :; fi - - -# This directory's subdirectories are mostly independent; you can cd -# into them and run `make' without going through this Makefile. -# To change the values of `make' variables: instead of editing Makefiles, -# (1) if the variable is set in `config.status', edit `config.status' -# (which will cause the Makefiles to be regenerated when you run `make'); -# (2) otherwise, pass the desired values on the `make' command line. - -all-recursive install-data-recursive install-exec-recursive \ -installdirs-recursive install-recursive uninstall-recursive \ -check-recursive installcheck-recursive info-recursive dvi-recursive: - @set fnord $(MAKEFLAGS); amf=$$2; \ - dot_seen=no; \ - target=`echo $@ | sed s/-recursive//`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - dot_seen=yes; \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ - done; \ - if test "$$dot_seen" = "no"; then \ - $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \ - fi; test -z "$$fail" - -mostlyclean-recursive clean-recursive distclean-recursive \ -maintainer-clean-recursive: - @set fnord $(MAKEFLAGS); amf=$$2; \ - dot_seen=no; \ - rev=''; list='$(SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = "."; then :; else \ - rev="$$subdir $$rev"; \ - fi; \ - done; \ - rev="$$rev ."; \ - target=`echo $@ | sed s/-recursive//`; \ - for subdir in $$rev; do \ - echo "Making $$target in $$subdir"; \ - if test "$$subdir" = "."; then \ - local_target="$$target-am"; \ - else \ - local_target="$$target"; \ - fi; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \ - || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \ - done && test -z "$$fail" -tags-recursive: - list='$(SUBDIRS)'; for subdir in $$list; do \ - test "$$subdir" = . || (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) tags); \ - done - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - mkid -fID $$unique $(LISP) - -TAGS: tags-recursive $(HEADERS) $(SOURCES) configh.in $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SUBDIRS)'; for subdir in $$list; do \ - if test "$$subdir" = .; then :; else \ - test -f $$subdir/TAGS && tags="$$tags -i $$here/$$subdir/TAGS"; \ - fi; \ - done; \ - list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)configh.in$$unique$(LISP)$$tags" \ - || etags $(ETAGS_ARGS) $$tags configh.in $$unique $(LISP) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -@AMDEP@include $(DEPDIR)/array$U.Po -@AMDEP@include $(DEPDIR)/awkgram$U.Po -@AMDEP@include $(DEPDIR)/builtin$U.Po -@AMDEP@include $(DEPDIR)/dfa$U.Po -@AMDEP@include $(DEPDIR)/eval$U.Po -@AMDEP@include $(DEPDIR)/eval_p$U.Po -@AMDEP@include $(DEPDIR)/ext$U.Po -@AMDEP@include $(DEPDIR)/field$U.Po -@AMDEP@include $(DEPDIR)/gawkmisc$U.Po -@AMDEP@include $(DEPDIR)/getopt$U.Po -@AMDEP@include $(DEPDIR)/getopt1$U.Po -@AMDEP@include $(DEPDIR)/io$U.Po -@AMDEP@include $(DEPDIR)/main$U.Po -@AMDEP@include $(DEPDIR)/msg$U.Po -@AMDEP@include $(DEPDIR)/node$U.Po -@AMDEP@include $(DEPDIR)/profile$U.Po -@AMDEP@include $(DEPDIR)/profile_p$U.Po -@AMDEP@include $(DEPDIR)/random$U.Po -@AMDEP@include $(DEPDIR)/re$U.Po -@AMDEP@include $(DEPDIR)/regex$U.Po -@AMDEP@include $(DEPDIR)/replace$U.Po -@AMDEP@include $(DEPDIR)/version$U.Po - -mostlyclean-depend: - -clean-depend: - -distclean-depend: - -rm -rf $(DEPDIR) - -maintainer-clean-depend: - -@AMDEP@CCDEPMODE = @CCDEPMODE@ - -.c.o: -@AMDEP@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ -@AMDEP@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ - $(COMPILE) -c -o $@ `test -f $< || echo '$(srcdir)/'`$< - - -distdir = $(PACKAGE)-$(VERSION) -top_distdir = $(distdir) - - -# This target untars the dist file and tries a VPATH configuration. Then -# it guarantees that the distribution is self-contained by making another -# tarfile. -distcheck: dist - -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) - GZIP=$(GZIP_ENV) gunzip -c $(distdir).tar.gz | $(AMTAR) xf - - chmod -R a-w $(distdir); chmod a+w $(distdir) - mkdir $(distdir)/=build - mkdir $(distdir)/=inst - chmod a-w $(distdir) - dc_install_base=`CDPATH=: && cd $(distdir)/=inst && pwd` \ - && cd $(distdir)/=build \ - && ../configure --srcdir=.. --prefix=$$dc_install_base \ - --with-included-gettext \ - && $(MAKE) $(AM_MAKEFLAGS) \ - && $(MAKE) $(AM_MAKEFLAGS) dvi \ - && $(MAKE) $(AM_MAKEFLAGS) check \ - && $(MAKE) $(AM_MAKEFLAGS) install \ - && $(MAKE) $(AM_MAKEFLAGS) installcheck \ - && $(MAKE) $(AM_MAKEFLAGS) uninstall \ - && test `find $$dc_install_base -type f -print | wc -l` -le 1 \ - && $(MAKE) $(AM_MAKEFLAGS) dist \ - && $(MAKE) $(AM_MAKEFLAGS) distclean \ - && rm -f $(distdir).tar.gz \ - && test `find . -type f -print | wc -l` -eq 0 - -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) - @banner="$(distdir).tar.gz is ready for distribution"; \ - dashes=`echo "$$banner" | sed s/./=/g`; \ - echo "$$dashes"; \ - echo "$$banner"; \ - echo "$$dashes" -dist: distdir - -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ - ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ - ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ - ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \ - || chmod -R a+r $(distdir) - $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz - -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) -dist-all: distdir - -find $(distdir) -type d ! -perm -777 -exec chmod a+rwx {} \; -o \ - ! -type d ! -perm -444 -links 1 -exec chmod a+r {} \; -o \ - ! -type d ! -perm -400 -exec chmod a+r {} \; -o \ - ! -type d ! -perm -444 -exec $(SHELL) $(install_sh) -c -m a+r {} {} \; \ - || chmod -R a+r $(distdir) - $(AMTAR) chof - $(distdir) | GZIP=$(GZIP_ENV) gzip -c > $(distdir).tar.gz - -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) -distdir: $(DISTFILES) - -chmod -R a+w $(distdir) > /dev/null 2>&1; rm -rf $(distdir) - mkdir $(distdir) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pR $$d/$$file $(distdir) \ - || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done - for subdir in $(SUBDIRS); do \ - if test "$$subdir" = .; then :; else \ - test -d $(distdir)/$$subdir \ - || mkdir $(distdir)/$$subdir \ - || exit 1; \ - (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir=../$(distdir) distdir=../$(distdir)/$$subdir distdir) \ - || exit 1; \ - fi; \ - done - $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-hook -info-am: -info: info-recursive -dvi-am: -dvi: dvi-recursive -check-am: all-am - $(MAKE) $(AM_MAKEFLAGS) check-local -check: check-recursive -installcheck-am: -installcheck: installcheck-recursive -all-recursive-am: config.h - $(MAKE) $(AM_MAKEFLAGS) all-recursive - -install-exec-am: install-binPROGRAMS - @$(NORMAL_INSTALL) - $(MAKE) $(AM_MAKEFLAGS) install-exec-hook -install-exec: install-exec-recursive - -install-data-am: -install-data: install-data-recursive - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-recursive -uninstall-am: uninstall-binPROGRAMS -uninstall: uninstall-recursive -all-am: Makefile $(ANSI2KNR) $(PROGRAMS) config.h -all-redirect: all-recursive-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install -installdirs: installdirs-recursive -installdirs-am: - $(mkinstalldirs) $(DESTDIR)$(bindir) - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: - -rm -f Makefile.in - -test -z "awkgram.c" || rm -f awkgram.c -mostlyclean-am: mostlyclean-hdr mostlyclean-binPROGRAMS \ - mostlyclean-compile mostlyclean-krextra mostlyclean-kr \ - mostlyclean-tags mostlyclean-depend mostlyclean-generic - -mostlyclean: mostlyclean-recursive - -clean-am: clean-hdr clean-binPROGRAMS clean-compile clean-krextra \ - clean-kr clean-tags clean-depend clean-generic \ - mostlyclean-am - -clean: clean-recursive - -distclean-am: distclean-hdr distclean-binPROGRAMS distclean-compile \ - distclean-krextra distclean-kr distclean-tags \ - distclean-depend distclean-generic clean-am - -distclean: distclean-recursive - -rm -f config.status - -maintainer-clean-am: maintainer-clean-hdr maintainer-clean-binPROGRAMS \ - maintainer-clean-compile maintainer-clean-krextra \ - maintainer-clean-kr maintainer-clean-tags \ - maintainer-clean-depend maintainer-clean-generic \ - distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-recursive - -rm -f config.status - -.PHONY: mostlyclean-hdr distclean-hdr clean-hdr maintainer-clean-hdr \ -mostlyclean-binPROGRAMS distclean-binPROGRAMS clean-binPROGRAMS \ -maintainer-clean-binPROGRAMS uninstall-binPROGRAMS install-binPROGRAMS \ -mostlyclean-compile distclean-compile clean-compile \ -maintainer-clean-compile mostlyclean-krextra distclean-krextra \ -clean-krextra maintainer-clean-krextra mostlyclean-kr distclean-kr \ -clean-kr maintainer-clean-kr install-recursive uninstall-recursive \ -install-data-recursive uninstall-data-recursive install-exec-recursive \ -uninstall-exec-recursive installdirs-recursive uninstalldirs-recursive \ -all-recursive check-recursive installcheck-recursive info-recursive \ -dvi-recursive mostlyclean-recursive distclean-recursive clean-recursive \ -maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ -distclean-tags clean-tags maintainer-clean-tags mostlyclean-depend \ -distclean-depend clean-depend maintainer-clean-depend distdir info-am \ -info dvi-am dvi check-local check check-am installcheck-am installcheck \ -all-recursive-am install-exec-am install-exec install-data-am \ -install-data install-am install uninstall-am uninstall all-redirect \ -all-am all install-strip installdirs-am installdirs mostlyclean-generic \ -distclean-generic clean-generic maintainer-clean-generic clean \ -mostlyclean distclean maintainer-clean - - -# First, add a link from gawk to gawk-X.Y.Z -# For systems where gawk is awk, add a link to awk -install-exec-hook: - (fullname=gawk-`./gawk --version | sed 1q | awk '{print $$3}'` ; \ - cd $(DESTDIR)$(bindir); \ - $(LN) gawk $$fullname ; \ - if [ ! -f awk ]; \ - then $(LN_S) gawk awk; \ - fi; exit 0) - -# Undo the above when uninstalling -uninstall-links: - (cd $(DESTDIR)$(bindir); \ - if [ -f awk ] && cmp awk gawk > /dev/null; then rm -f awk; fi ; \ - fullname=gawk-`./gawk --version | sed 1q | ./gawk '{print $$3}'` ; \ - rm -f $$fullname; exit 0) - -uninstall-recursive: uninstall-links - -# force there to be a gawk executable before running tests -check-local: gawk pgawk - -# A little extra clean up when making distributions. -# FIXME: most of this rule should go away upon switching to libtool. -dist-hook: - cd $(distdir)/extension ; rm -f *.o *.so - -# Special rules for individual files -awkgram.c: awkgram.y - $(YACC) $(AM_YFLAGS) $(YFLAGS) $< && sed -f $(srcdir)/bisonfix.sed < y.tab.c > $*.c && $(RM) y.tab.c - if test -f y.tab.h; then \ - if cmp -s y.tab.h $*.h; then rm -f y.tab.h; else mv y.tab.h $*.h; fi; \ - else :; fi - -# these force version and patchlevel to be up to date based -# on what's in configure.in. No-one else but me should have -# to use them. So there. -patchlev.h: configure.in - ./fixvers -p - -version.c: configure.in version.in - ./fixvers -v - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/contrib/awk/NEWS b/contrib/awk/NEWS deleted file mode 100644 index ab0aa84..0000000 --- a/contrib/awk/NEWS +++ /dev/null @@ -1,2077 +0,0 @@ -Changes from 3.0.6 to 3.1.0 ---------------------------- - -1. A new PROCINFO array provides info about the process. The non-I/O /dev/xxx - files are now obsolete, and their use always generates a warning. - -2. A new `mktime' builtin function was added for creating time stamps. The - `mktime' function written in awk was removed from the user's guide. - -3. New `--gen-po' option creates GNU gettext .po files for strings marked - with a leading underscore. - -4. Gawk now completely interprets special file names internally, ignoring the - existence of real /dev/stdin, /dev/stdout files, etc. - -5. The mmap code was removed. It was a worthwhile experiment that just - didn't work out. - -6. The BINMODE variable is new; on non-UNIX systems it affects how gawk - opens files for text vs. binary. - -7. The atari port is now unsupported. - -8. Gawk no longer supports `next file' as two words. - -9. On systems that support it, gawk now sets the `close on exec' flag on all - files and pipes it opens. This makes sure that child processes run via - system() or pipes have plenty of file descriptors available. - -10. New ports: Tandem and BeOS. The Tandem port is unsupported. - -11. If `--posix' is in effect, newlines are not allowed after ?:. - -12. Weird OFMT/CONVFMT formats no longer cause fatal errors. - -13. Diagnostics about array parameters now include the parameter's name, - not just its number. - -14. configure should now automatically add -D_SYSV3 for ISC Unix. - (This seems to have made it into the gawk 3.0.x line long ago.) - -15. It is now possible to open a two-way pipe via the `|&' operator. - See the discussion in the manual about putting `sort' into such a pipeline, - though. (NOTE! This is borrowed from ksh: it is not the same as - the same operator in csh!) - -16. The close() function now takes an optional second string argument - that allows closing one or the other end of the two-way pipe to - a co-process. This is needed to use `sort' in a co-process, see - the doc. - -17. If TCP/IP is available, special file names beginning with `/inet' - can be used with `|&' for IPC. Thanks to Juergen Kahrs for the initial - code. - -18. With `--enable-portals' on the configure command line, gawk will also - treat file names that start with `/p/' as a 4.4 BSD type portal file, - i.e., a two-way pipe for `|&'. - -19. Unrecognized escapes, such as "\q" now always generate a warning. - -20. The LINT variable is new; it provides dynamic control over the --lint - option. - -21. Lint warnings can be made fatal by using --lint=fatal or `LINT = "fatal"'. - Use this if you're really serious about portable code. - -22. Due to an enhanced sed script, there is no longer any need to worry - about finding or using alloca. alloca.c is thus now gone. - -23. A number of lint warnings have been added. Most notably, gawk will - detect if a variable is used before assigned to. Warnings for - when a string that isn't a number gets converted to a number are - in the code but disabled; they seem to be too picky in practice. - - Also, gawk will now warn about function parameter names that shadow - global variable names. - -24. It is now possible to dynamically add builtin functions on systems - that support dlopen. This facility is not (yet) as portable or well - integrated as it might be. *** WARNING *** THIS FEATURE WILL EVOLVE! - -25. There are *many* new tests in the test suite. - -26. Profiling has been added! A separate version of gawk, named pgawk, is - built and generates a run-time execution profile. The --profile option - can be used to change the default output file. In regular gawk, this - option pretty-prints the parse tree. - -27. Gawk has been internationalized, using GNU gettext. Translations for - future distributions are most welcome. Simultaneously, gawk was switched - over to using automake. You need Automake 1.4a (from the CVS archive) - if you want to muck with the Makefile.am files. - -28. New asort() function for sorting arrays. See the doc for details. - -29. The match function takes an optional array third argument to hold - the text matched by parenthesized sub-expressions. - -30. The bit op functions and octal and hex source code constants are on by - default, no longer a configure-time option. Recognition of non-decimal - data is now enabled at runtime with --non-decimal-data command line option. - -31. Internationalization features available at the awk level: new TEXTDOMAIN - variable and bindtextdomain() and dcgettext() functions. printf formats - may contain the "%2$3.5d" kind of notation for use in translations. See - the texinfo manual for details. - -32. The return value from close() has been rationalized. Most notably, - closing something that wasn't open returns -1 but remains non-fatal. - -33. The array effeciency change from 3.0.5 was reverted; the semantics were - not right. Additionally, index values of previously stored elements - can no longer change dynamically. - -34. The new option --dump-variables dumps a list of all global variables and - their final types and values to a file you give, or to `awkvars.out'. - -35. Gawk now uses a recent version of random.c courtesy of the FreeBSD - project. - -36. The gawk source code now uses ANSI C function definitions (new style), - with ansi2knr to translate code for old compilers. - -37. `for (iggy in foo)' loops should be more robust now in the face of - adding/deleting elements in the middle; they loop over just the elements - that are present in the array when the loop starts. - -Changes from 3.0.5 to 3.0.6 ---------------------------- - -This is a bug fix release only, pending further development on 3.1.0. - -Bugs fixed and changes made: - -1. Subscripting an array with a variable that is just a number no - longer magically converts the variable into a string. - -2. Similarly, running a `for (iggy in foo)' loop where `foo' is a - function parameter now works correctly. - -3. Similarly, `i = ""; v[i] = a; if (i in v) ...' now works again. - -4. Gawk now special cases `for (iggy in foo) delete foo[iggy]' and - treats it as the moral equivalent of `delete foo'. This should be - a major efficiency win when portably deleting large arrays. - -5. VMS port brought up to date. - -Changes from 3.0.4 to 3.0.5 ---------------------------- - -This is a bug fix release only, pending further development on 3.1.0. - -Bugs Fixed: - - 1. `function foo(foo)' is now a fatal error. - - 2. Array indexing is now much more efficient: where possible, only one - copy of an index string is kept, even if used in multiple arrays. - - 3. Support was added for MacOS X and an `install-strip' target. - - 4. [s]printf formatting for `0' flag and floating point formats now - works correctly. - - 5. HP-UX large file support with GCC 2.95.1 now works. - - 6. Arguments that contain `=' but that aren't syntactically valid are - now treated as filenames, instead of as fatal errors. - - 7. `-v NF=foo' now works. - - 8. Non-ascii alphanumeric characters are now treated as such in the - right locales by regex.c. Similarly, a Latin-1 y-umlaut (decimal - value 255) in the program text no longer acts like EOF. - - 9. Array indexes are always compared as strings; fixes an obscure bug - when user input gets used for the `x in array' test. - -10. The usage message now points users to the documentation for how - to report bugs. - -11. `/=' now works after an array. - -12. `b += b += 1' now works correctly. - -13. IGNORECASE changing with calls match() now works better. (Fix for - semi-obscure bug.) - -14. Multicharacter values for RS now generate a lint warning. - -15. The gawk open file caching is now much more efficient. - -16. Global arrays passed to functions are now managed better. In particular, - test/arynocls.awk won't crash referencing freed memory. - -17. In obscure cases, `getline var' can no longer clobber $0. - -Changes from 3.0.3 to 3.0.4 ---------------------------- - -This is a bug fix release only, pending further development on 3.1.0. - -Bugs Fixed: - - 1. A memory leak when turning a function parameter into an array was - fixed. - - 2. The non-decimal data option now works correctly. - - 3. Using an empty pair of brackets as an array subscript no longer causes - a core dump during parsing. In general, syntax errors should not - cause core dumps any more. - - 4. Standard input is no longer closed if it provides program source, - avoiding strange I/O problems. - - 5. Memory corruption during printing with `print' has been fixed. - - 6. The gsub function now correctly counts the number of matches. - - 7. A typo in doc/Makefile.in has been fixed, making installation work. - - 8. Calling `next' or `nextfile' from a BEGIN or END rule is now fatal. - - 9. Subtle problems in rebuilding $0 when fields were changed have been - fixed. - -10. `FS = FS' now correctly turns off the use of FIELDWIDTHS. - -11. Gawk now parses fields correctly when FS is a single character. - -12. It is now possible for RS to be the NUL character ("\0"). - -13. Weird problems with number conversions on MIPS and other systems - have been fixed. - -14. When parsing using FIELDWIDTHS is in effect, split() with no third - argument will still use the value of FS. - -15. Large File Support for Solaris, HP-UX, AIX, and IRIX is now enabled at - compile time, thanks to Paul Eggert. - -16. Attempting to use the name of a function as a variable or array - from within the function is now caught as a fatal error, instead - of as a core dump. - -17. A bug in parsing hex escapes was fixed. - -18. A weird bug with concatenation where one expression has side effects - that changes another was fixed. - -19. printf/sprintf now behave much better for uses of the '0' and '#' flags - and with precisions and field widths. - -20. Further strangenesses with concatenation and multiple accesses of some - of the special variables was fixed. - -21. The Atari port is marked as no longer supported. - -22. Build problems on HP-UX have been fixed. - -23. Minor fixes and additional explanations added to the documentation. - -24. For RS = "", even a single leading newline is now correctly stripped. - -25. Obscure parsing problems for regex constants like /=.../ fixed, so - that a regex constant is recognized, and not the /= operator. - -26. Fixed a bug when closing a redirection that matched the current - or last FILENAME. - -27. Build problems on AIX fixed. - -Changes from 3.0.2 to 3.0.3 ---------------------------- - -The horrendous per-record memory leak introduced in 3.0.1 is gone, finally. - -The `amiga' directory is now gone; Amiga support is now entirely handled -by the POSIX support. - -Win32 support has been added in the `pc' directory. See `README_d/README.pc' -for more info. - -The mmap changes are disabled in io.c, and will be removed entirely -in the next big release. They were an interesting experiment that just -really didn't work in practice. - -A minor memory leak that occurred when using `next' from within a -function has also been fixed. - -Problems with I/O from sub-processes via a pipe are now gone. - -Using "/dev/pid" and the other special /dev files no longer causes a core dump. - -The files regex.h, regex.c, getopt.h, getopt.c, and getopt1.c have been -merged with the versions in GNU libc. Thanks to Ulrich Drepper for his help. - -Some new undocumented features have been added. Use the source, Luke! -It is not clear yet whether these will ever be fully supported. - -Array performance should be much better for very very large arrays. "Virtual -memory required, real memory helpful." - -builtin.c:do_substr rationalized, again. - -The --re-interval option now works as advertised. - -The license text on some of the missing/* files is now generic. - -Lots more new test cases. - -Lots of other small bugs fixed, see the ChangeLog files for details. - -Changes from 3.0.1 to 3.0.2 ---------------------------- - -Gawk now uses autoconf 2.12. - -strftime now behaves correctly if passed an empty format string or if -the string formats to an empty result string. - -Several minor compilation and installation problems have been fixed. - -Minor page break issues in the user's guide have been fixed. - -Lexical errors no longer repeat ad infinitum. - -Changes from 3.0.0 to 3.0.1 ---------------------------- - -Troff source for a handy-dandy five color reference card is now provided. -Thanks to SSC for their macros. - -Gawk now behaves like Unix awk and mawk, in that newline acts as white -space for separating fields and for split(), by default. In posix mode, -only space and tab separate fields. The documentation has been updated to -reflect this. - -Tons and tons of small bugs fixed and new tests added, see the ChangeLogs. - -Lots fewer compile time warnings from gcc -Wall. Remaining ones aren't -worth fixing. - -Gawk now pays some attention to the locale settings. - -Fixes to gsub to catch several corner cases. - -The `print' statement now evaluates all expressions first, and then -prints them. This leads to less suprising behaviour if any expression has -output side effects. - -Miscellanious improvements in regex.h and regex.c. - -Gawk will now install itself as gawk-M.N.P in $(bindir), and link -`gawk' to it. This makes it easy to have multiple versions of gawk -simultaneously. It will also now install itself as `awk' in $(bindir) -if there is no `awk' there. This is in addition to installing itself as -`gawk'. This change benefits the Hurd, and possibly other systems. One -day, gawk will drop the `g', but not yet. - -`--posix' turns on interval expressions. Gawk now matches its documentation. - -`close(FILENAME)' now does something meaningful. - -Field management code in field.c majorly overhauled, several times. - -The gensub code has been fixed, several bugs are now gone. - -Gawk will use mmap for data file input if it is available. - -The printf/sprintf code has been improved. - -Minor issues in Makefile setup worked on and improved. - -builtin.c:do_substr rationalized. - -Regex matching fixed so that /+[0-9]/ now matches the leading +. - -For building on vms, the default compiler is now DEC C rather than VAX C. - -Changes from 2.15.6 to 3.0.0 ----------------------------- - -Fixed spelling of `Programming' in the copyright notice in all the files. - -New --re-interval option to turn on interval expressions. They're off -by default, except for --posix, to avoid breaking old programs. - -Passing regexp constants as parameters to user defined functions now -generates a lint warning. - -Several obscure regexp bugs fixed; alas, a small number remain. - -The manual has been thoroughly revised. It's now almost 50% bigger than -it used to be. - -The `+' modifier in printf is now reset correctly for each item. - -The do_unix variable is now named do_traditional. - -Handling of \ in sub and gsub rationalized (somewhat, see the manual for -the gory [and I do mean gory] details). - -IGNORECASE now uses ISO 8859-1 Latin-1 instead of straight ASCII. See the -source for how to revert to pure ASCII. - ---lint will now warn if an assignment occurs in a conditional context. -This may become obnoxious enough to need turning off in the future, but -"it seemed like a good idea at the time." - -%hf and %Lf are now diagnosed as invalid in printf, just like %lf. - -Gawk no longer incorrectly closes stdin in child processes used in -input pipelines. - -For integer formats, gawk now correctly treats the precision as the -number of digits to print, not the number of characters. - -gawk is now much better at catching the use of scalar values when -arrays are needed, both in function calls and the `x in y' constructs. - -New gensub function added. See the manual. - -If do_tradtional is true, octal and hex escapes in regexp constants are -treated literally. This matches historical behavior. - -yylex/nextc fixed so that even null characters can be included -in the source code. - -do_format now handles cases where a format specifier doesn't end in -a control letter. --lint reports an error. - -strftime() now uses a default time format equivalent to that of the -Unix date command, thus it can be called with no arguments. - -Gawk now catches functions that are used but not defined at parse time -instead of at run time. (This is a lint error, making it fatal could break -old code.) - -Arrays that max out are now handled correctly. - -Integer formats outside the range of an unsigned long are now detected -correctly using the SunOS 4.x cc compiler. - ---traditional option added as new preferred name for --compat, in keeping -with GCC. - ---lint-old option added, so that warnings about things not in old awk -are only given if explicitly asked for. - -`next file' has changed to one word, `nextfile'. `next file' is still -accepted but generates a lint warning. `next file' will go away eventually. - -Gawk with --lint will now notice empty source files and empty data files. - -Amiga support using the Unix emulation added. Thanks to fnf@ninemoons.com. - -test/Makefile is now "parallel-make safe". - -Gawk now uses POSIX regexps + GNU regex ops by default. --posix goes to -pure posix regexps, and --compat goes to traditional Unix regexps. However, -interval expressions, even though specified by POSIX, are turned off by -default, to avoid breaking old code. - -IGNORECASE now applies to string comparison as well as regexp operations. - -The AT&T Bell Labs Research awk fflush builtin function is now supported. -fflush is extended to flush stdout if no arg and everything if given -the null string as an argument. - -If RS is more than one character, it is treated as a regular expression -and records are delimited accordingly. The variable RT is set to the record -terminator string. This is disabled in compatibility mode. - -If FS is set to the null string (or the third arg. of split() is the null -string), splitting is done at every single character. This is disabled in -compatibility mode. - -Gawk now uses the Autoconf generated configure script, doing away with all -the config/* files and the machinery that went with them. The Makefile.in -has also changed accordingly, complete with all the standard GNU Makefile -targets. (Non-unix systems may still have their own config.h and Makefile; -see the appropriate README_d/README.* and/or subdirectory.) - -The source code has been cleaned up somewhat and the formatting improved. - -Changes from 2.15.5 to 2.15.6 ------------------------------ - -Copyrights updated on all changed files. - -test directory enhanced with four new tests. - -Gawk now generates a warning for \x without following hexadecimal digits. -In this case, it returns 'x', not \0. - -Several fixes in main.c related to variable initialization: - CONVFMT has a default value - resetup is called before initializing variables - the varinit table fixed up a bit (see the comments) - -gawk.1 updated with new BUG REPORTS section. - -A plain `print' inside a BEGIN or END now generates a lint warning (awk.y). - -Small fix in iop.c:get_a_record to avoid reading uninitialized memory. - -awk.y:yylex now does a better job of handling things if the source file -does not end in a newline. Probably there is more work to be done. - -Memory leaks fixed in awk.y, particularly in cases of duplicate function -parameters. Also, calling a function doesn't leak memory during parsing. - -Empty function bodies are now allowed (awk.y). - -Gawk now detects duplicate parameter names in functions (awk.y). - -New function `error' in msg.c added for use from awk.y. - -eval.c:r_get_lhs now checks if its argument is a parameter on the stack, -and pulls down the real variable. This catches more 'using an array as -a scalar' kinds of errors. - -main.c recovers C alloca space after parsing, this is important for -bison-based parsers. re.c recovers C alloca space after doing an research. -[Changes from Pat Rankin] - -builtin.c now declares the random() related functions based on -RANDOM_MISSING from config.h. [Suggested by Pat Rankin] - -awk.h now handles alloca correctly for HP-UX. [Kaveh Ghazi] - -regex.h and config/cray60 updated for Unicos 8.0. [Hal Peterson] - -Fixed re.c and dfa.c so that gawk no longer leaks memory when using -lots of dynamic regexps. - -Removed dependency on signed chars from `idx' variable in awk.h. Gawk -now passes its test suite if compiled with `gcc -fno-signed-char'. - -Fixed warning on close in io.c to go under lint control. Too many people -have complained about the spurious message, particularly when closing a -child pipeline early. - -Gawk now correctly handles RS = "" when input is from a terminal -(iop.c:get_a_record). - -Config file added for GNU. - -gawk 'BEGIN { exit 1 } ; END { exit }' now exits 1, as it should -(eval.c:interpret). - -sub and gsub now follow posix, \ escapes both & and \. Each \ must -be doubled initially in the program to get it into the string. -Thanks to Mike Brennan for pointing this out (builtin.c:sub_common). - -If FS is "", gawk behaves like mawk and nawk, making the whole record be $1. -Yet Another Dark Corner. Sigh (field.c:def_parse_field). - -Gawk now correctly recomputes string values for numbers if CONVFMT has -changed (awk.h:force_string, node.c:r_force_string). - -A regexp of the form `/* this looks like a comment but is not */' will -now generate a warning from --lint (awk.y). - -Gawk will no longer core dump if given an empty input file (awk.y:get_src_buf, -iop.c:optimal_bufsize). - -A printf format of the form %lf is handled correctly. The `l' generates -a lint warning (builtin.c:format_tree) [Thanks to Mark Moraes]. - -Lynxos config file added. - -`continue' outside a loop treated as `next' only in compatibility mode, -instead of by default; recent att nawk chokes on this now. `break' -outside a loop now treated as `next' in compatibility mode (eval.c). - -Bug fix in string concatenation, an arbitrary number of expressions -are allowed (eval.c). - -$1 += $2 now works correctly (eval.c). - -Changing IGNORECASE no longer resets field-splitting to FS if it was -using FIELDWIDTHS (eval.c, field.c). - -Major enhancement: $0 and NF for last record read are now preserved -into the END rule (io.c). - -Regexp fixes: - /./ now matches a newline (regex.h) - ^ and $ match beginning and end of string only, not any embedded - newlines (re.c) - regex.c should compile and work ok on 64-bit mips/sgi machines - -Changes from 2.15.4 to 2.15.5 ------------------------------ - -FUTURES file updated and re-arranged some with more rational schedule. - -Many prototypes handled better for ANSI C in protos.h. - -getopt.c updated somewhat. - -test/Makefile now removes junk directory, `bardargtest' renamed `badargs.' - -Bug fix in iop.c for RS = "". Eat trailing newlines off of record separator. - -Bug fix in Makefile.bsd44, use leading tab in actions. - -Fix in field.c:set_FS for FS == "\\" and IGNORECASE != 0. - -Config files updated or added: - cray60, DEC OSF/1 2.0, Utek, sgi405, next21, next30, atari/config.h, - sco. - -Fix in io.c for ENFILE as well as EMFILE, update decl of groupset to -include OSF/1. - -Rationalized printing as integers if numbers are outside the range of a long. -Changes to node.c:force_string and builtin.c. - -Made internal NF, NR, and FNR variables longs instead of ints. - -Add LIMITS_H_MISSING stuff to config.in and awk.h, and default defs for -INT_MAX and LONG_MAX, if no limits.h file. Add a standard decl of -the time() function for __STDC__. From ghazi@noc.rutgers.edu. - -Fix tree_eval in awk.h and r_tree_eval in eval.c to deal better with -function parameters, particularly ones that are arrays. - -Fix eval.c to print out array names of arrays used in scalar contexts. - -Fix eval.c in interpret to zero out source and sourceline initially. This -does a better job of providing source file and line number information. - -Fix to re_parse_field in field.c to not use isspace when RS = "", but rather -to explicitly look for blank and tab. - -Fix to sc_parse_field in field.c to catch the case of the FS character at the -end of a record. - -Lots of miscellanious bug fixes for memory leaks, courtesy Mark Moraes, -also fixes for arrays. - -io.c fixed to warn about lack of explicit closes if --lint. - -Updated missing/strftime.c to match posted strftime 6.2. - -Bug fix in builtin.c, in case of non-match in sub_common. - -Updated constant used for division in builtin.c:do_rand for DEC Alpha -and CRAY Y-MP. - -POSIXLY_CORRECT in the environment turns on --posix (fixed in main.c). - -Updated srandom prototype and calls in builtin.c. - -Fix awk.y to enforce posix semantics of unary +: result is numeric. - -Fix array.c to not rearrange the hash chain upon finding an index in -the array. This messed things up in cases like: - for (index1 in array) { - blah - if (index2 in array) # blew away the for - stuff - } - -Fixed spelling errors in the man page. - -Fixes in awk.y so that - gawk '' /path/to/file -will work without core dumping or finding parse errors. - -Fix main.c so that --lint will fuss about an empty program. -Yet another fix for argument parsing in the case of unrecognized options. - -Bug fix in dfa.c to not attempt to free null pointers. - -Bug fix in builtin.c to only use DEFAULT_G_PRECISION for %g or %G. - -Bug fix in field.c to achieve call by value semantics for split. - -Changes from 2.15.3 to 2.15.4 ------------------------------ - -Lots of lint fixes, and do_sprintf made mostly ANSI C compatible. - -Man page updated and edited. - -Copyrights updated. - -Arrays now grow dynamically, initially scaling up by an order of magnitude - and then doubling, up to ~ 64K. This should keep gawk's performance - graceful under heavy load. - -New `delete array' feature added. Only documented in the man page. - -Switched to dfa and regex suites from grep-2.0. These offer the ability to - move to POSIX regexps in the next release. - -Disabled GNU regex ops. - -Research awk -m option now recognized. It does nothing in gawk, since gawk - has no static limits. Only documented in the man page. - -New bionic (faster, better, stronger than before) hashing function. - -Bug fix in argument handling. `gawk -X' now notices there was no program. - Additional bug fixes to make --compat and --lint work again. - -Many changes for systems where sizeof(int) != sizeof(void *). - -Add explicit alloca(0) in io.c to recover space from C alloca. - -Fixed file descriptor leak in io.c. - -The --version option now follows the GNU coding standards and exits. - -Fixed several prototypes in protos.h. - -Several tests updated. On Solaris, warn that the out? tests will fail. - -Configuration files for SunOS with cc and Solaris 2.x added. - -Improved error messages in awk.y on gawk extensions if do_unix or do_compat. - -INSTALL file added. - -Fixed Atari Makefile and several VMS specific changes. - -Better conversion of numbers to strings on systems with broken sprintfs. - -Changes from 2.15.2 to 2.15.3 ------------------------------ - -Increased HASHSIZE to a decent number, 127 was way too small. - -FILENAME is now the null string in a BEGIN rule. - -Argument processing fixed for invalid options and missing arguments. - -This version will build on VMS. This included a fix to close all files - and pipes opened with redirections before closing stdout and stderr. - -More getpgrp() defines. - -Changes for BSD44: in io.c and Makefile.bsd44. - -All directories in the distribution are now writable. - -Separated LDFLAGS and CFLAGS in Makefile. CFLAGS can now be overridden by - user. - -Make dist now builds compressed archives ending in .gz and runs doschk. - -Amiga port. - -New getopt.c fixes Alpha OSF/1 problem. - -Make clean now removes possible test output. - -Improved algorithm for multiple adjacent string concatenations leads to - performance improvements. - -Fix nasty bug whereby command-line assignments, both with -v and at run time, - could create variables with syntactically illegal names. - -Fix obscure bug in printf with %0 flag and filling. - -Add a lint check for substr if provided length exceeds remaining characters - in string. - -Update atari support. - -PC support enhanced to include support for both DOS and OS/2. (Lots more - #ifdefs. Sigh.) - -Config files for Hitachi Unix and OSF/1, courtesy of Yoko Morishita - (morisita@sra.co.jp) - -Changes from 2.15.1 to 2.15.2 ------------------------------ - -Additions to the FUTURES file. - -Document undefined order of output when using both standard output - and /dev/stdout or any of the /dev output files that gawk emulates in - the absence of OS support. - -Clean up the distribution generation in Makefile.in: the info files are - now included, the distributed files are marked read-only and patched - distributions are now unpacked in a directory named with the patch level. - -Changes from 2.15 to 2.15.1 ---------------------------- - -Close stdout and stderr before all redirections on program exit. This allows - detection of write errors and also fixes the messages test on Solaris 2.x. - -Removed YYMAXDEPTH define in awk.y which was limiting the parser stack depth. - -Changes to config/bsd44, Makefile.bsd44 and configure to bring it into line - with the BSD4.4 release. - -Changed Makefile to use prefix, exec_prefix, bindir etc. - -make install now installs info files. - -make install now sets permissions on installed files. - -Make targets added: uninstall, distclean, mostlyclean and realclean. - -Added config.h to cleaner and clobber make targets. - -Changes to config/{hpux8x,sysv3,sysv4,ultrix41} to deal with alloca(). - -Change to getopt.h for portability. - -Added more special cases to the getpgrp() call. - -Added README.ibmrt-aos and config/ibmrt-aos. - -Changes from 2.14 to 2.15 ---------------------------- - -Command-line source can now be mixed with library functions. - -ARGIND variable tracks index in ARGV of FILENAME. - -GNU style long options in addition to short options. - -Plan 9 style special files interpreted by gawk: - /dev/pid - /dev/ppid - /dev/pgrpid - /dev/user - $1 = getuid - $2 = geteuid - $3 = getgid - $4 = getegid - $5 ... $NF = getgroups if supported - -ERRNO variable contains error string if getline or close fails. - -Very old options -a and -e have gone away. - -Inftest has been removed from the default target in test/Makefile -- the - results were too machine specific and resulted in too many false alarms. - -A README.amiga has been added. - -The "too many arguments supplied for format string" warning message is only - in effect under the lint option. - -Code improvements in dfa.c. - -Fixed all reported bugs: - - Writes are checked for failure (such as full filesystem). - - Stopped (at least some) runaway error messages. - - gsub(/^/, "x") does the right thing for $0 of 0, 1, or more length. - - close() on a command being piped to a getline now works properly. - - The input record will no longer be freed upon an explicit close() - of the input file. - - A NUL character in FS now works. - - In a substitute, \\& now means a literal backslash followed by what - was matched. - - Integer overflow of substring length in substr() is caught. - - An input record without a newline termination is handled properly. - - In io.c, check is against only EMFILE so that system file table - is not filled. - - Renamed all files with names longer than 14 characters. - - Escaped characters in regular expressions were being lost when - IGNORECASE was used. - - Long source lines were not being handled properly. - - Sourcefiles that ended in a tab but no newline were bombing. - - Patterns that could match zero characters in split() were not working - properly. - - The parsedebug option was not working. - - The grammar was being a bit too lenient, allowing some very dubious - programs to pass. - - Compilation with DEBUG defined now works. - - A variable read in with getline was not being treated as a potential - number. - - Array subscripts were not always of string type. - - -Changes from 2.13.2 to 2.14 ---------------------------- - -Updated manual! - -Added "next file" to skip efficiently to the next input file. - -Fixed potential of overflowing buffer in do_sprintf(). - -Plugged small memory leak in sub_common(). - -EOF on a redirect is now "sticky" -- it can only be cleared by close()ing - the pipe or file. - -Now works if used via a #! /bin/gawk line at the top of an executable file - when that line ends with whitespace. - -Added some checks to the grammar to catch redefinition of builtin functions. - This could eventually be the basis for an extension to allow redefining - functions, but in the mean time it's a good error catching facility. - -Negative integer exponents now work. - -Modified do_system() to make sure it had a non-null string to be passed - to system(3). Thus, system("") will flush any pending output but not go - through the overhead of forking an un-needed shell. - -A fix to floating point comparisons so that NaNs compare right on IEEE systems. - -Added code to make sure we're not opening directories for reading and such. - -Added code to do better diagnoses of weird or null file names. - -Allow continue outside of a loop, unless in strict posix mode. Lint option - will issue warning. - -New missing/strftime.c. There has been one change that affects gawk. Posix - now defines a %V conversion so the vms conversion has been changed to %v. - If this version is used with gawk -Wlint and they use %V in a call to - strftime, they'll get a warning. - -Error messages now conform to GNU standard (I hope). - -Changed comparisons to conform to the description found in the file POSIX. - This is inconsistent with the current POSIX draft, but that is broken. - Hopefully the final POSIX standard will conform to this version. - (Alas, this will have to wait for 1003.2b, which will be a revision to - the 1003.2 standard. That standard has been frozen with the broken - comparison rules.) - -The length of a string was a short and now is a size_t. - -Updated VMS help. - -Added quite a few new tests to the test suite and deleted many due to lack of - written releases. Test output is only removed if it is identical to the - "good" output. - -Fixed a couple of bugs for reference to $0 when $0 is "" -- particularly in - a BEGIN block. - -Fixed premature freeing in construct "$0 = $0". - -Removed the call to wait_any() in gawk_popen(), since on at least some systems, - if gawk's input was from a pipe, the predecessor process in the pipe was a - child of gawk and this caused a deadlock. - -Regexp can (once again) match a newline, if given explicitly. - -nextopen() makes sure file name is null terminated. - -Fixed VMS pipe simulation. Improved VMS I/O performance. - -Catch . used in variable names. - -Fixed bug in getline without redirect from a file -- it was quitting after the - first EOF, rather than trying the next file. - -Fixed bug in treatment of backslash at the end of a string -- it was bombing - rather than doing something sensible. It is not clear what this should mean, - but for now I issue a warning and take it as a literal backslash. - -Moved setting of regexp syntax to before the option parsing in main(), to - handle things like -v FS='[.,;]' - -Fixed bug when NF is set by user -- fields_arr must be expanded if necessary - and "new" fields must be initialized. - -Fixed several bugs in [g]sub() for no match found or the match is 0-length. - -Fixed bug where in gsub() a pattern anchored at the beginning would still - substitute throughout the string. - -make test does not assume that . is in PATH. - -Fixed bug when a field beyond the end of the record was requested after - $0 was altered (directly or indirectly). - -Fixed bug for assignment to field beyond end of record -- the assigned value - was not found on subsequent reference to that field. - -Fixed bug for FS a regexp and it matches at the end of a record. - -Fixed memory leak for an array local to a function. - -Fixed hanging of pipe redirection to getline - -Fixed coredump on access to $0 inside BEGIN block. - -Fixed treatment of RS = "". It now parses the fields correctly and strips - leading whitespace from a record if FS is a space. - -Fixed faking of /dev/stdin. - -Fixed problem with x += x - -Use of scalar as array and vice versa is now detected. - -IGNORECASE now obeyed for FS (even if FS is a single alphabetic character). - -Switch to GPL version 2. - -Renamed awk.tab.c to awktab.c for MSDOS and VMS tar programs. - -Renamed this file (CHANGES) to NEWS. - -Use fmod() instead of modf() and provide FMOD_MISSING #define to undo - this change. - -Correct the volatile declarations in eval.c. - -Avoid errant closing of the file descriptors for stdin, stdout and stderr. - -Be more flexible about where semi-colons can occur in programs. - -Check for write errors on all output, not just on close(). - -Eliminate the need for missing/{strtol.c,vprintf.c}. - -Use GNU getopt and eliminate missing/getopt.c. - -More "lint" checking. - - -Changes from 2.13.1 to 2.13.2 ------------------------------ - -Toward conformity with GNU standards, configure is a link to mkconf, the latter - to disappear in the next major release. - -Update to config/bsd43. - -Added config/apollo, config/msc60, config/cray2-50, config/interactive2.2 - -sgi33.cc added for compilation using cc rather than gcc. - -Ultrix41 now propagates to config.h properly -- as part of a general - mechanism in configure for kludges -- #define anything from a config file - just gets tacked onto the end of config.h -- to be used sparingly. - -Got rid of an unnecessary and troublesome declaration of vprintf(). - -Small improvement in locality of error messages. - -Try to diagnose use of array as scalar and vice versa -- to be improved in - the future. - -Fix for last bug fix for Cray division code--sigh. - -More changes to test suite to explicitly use sh. Also get rid of - a few generated files. - -Fixed off-by-one bug in string concatenation code. - -Fix for use of array that is passed in from a previous function parameter. - Addition to test suite for above. - -A number of changes associated with changing NF and access to fields - beyond the end of the current record. - -Change to missing/memcmp.c to avoid seg. fault on zero length input. - -Updates to test suite (including some inadvertently left out of the last patch) - to invoke sh explicitly (rather than rely on #!/bin/sh) and remove some - junk files. test/chem/good updated to correspond to bug fixes. - -Changes from 2.13.0 to 2.13.1 ------------------------------ - -More configs and PORTS. - -Fixed bug wherein a simple division produced an erroneous FPE, caused by - the Cray division workaround -- that code is now #ifdef'd only for - Cray *and* fixed. - -Fixed bug in modulus implementation -- it was very close to the above - code, so I noticed it. - -Fixed portability problem with limits.h in missing.c - -Fixed portability problem with tzname and daylight -- define TZNAME_MISSING - if strftime() is missing and tzname is also. - -Better support for Latin-1 character set. - -Fixed portability problem in test Makefile. - -Updated PROBLEMS file. - -=============================== gawk-2.13 released ========================= -Changes from 2.12.42 to 2.12.43 -------------------------------- - -Typo in awk.y - -Fixed up strftime.3 and added doc. for %V. - -Changes from 2.12.41 to 2.12.42 -------------------------------- - -Fixed bug in devopen() -- if you had write permission in /dev, - it would just create /dev/stdout etc.!! - -Final (?) VMS update. - -Make NeXT use GFMT_WORKAROUND - -Fixed bug in sub_common() for substitute on zero-length match. Improved the - code a bit while I was at it. - -Fixed grammar so that $i++ parses as ($i)++ - -Put support/* back in the distribution (didn't I already do this?!) - -Changes from 2.12.40 to 2.12.41 -------------------------------- - -VMS workaround for broken %g format. - -Changes from 2.12.39 to 2.12.40 -------------------------------- - -Minor man page update. - -Fixed latent bug in redirect(). - -Changes from 2.12.38 to 2.12.39 -------------------------------- - -Updates to test suite -- remove dependence on changing gawk.1 man page. - -Changes from 2.12.37 to 2.12.38 -------------------------------- - -Fixed bug in use of *= without whitespace following. - -VMS update. - -Updates to man page. - -Option handling updates in main.c - -test/manyfiles redone and added to bigtest. - -Fixed latent (on Sun) bug in handling of save_fs. - -Changes from 2.12.36 to 2.12.37 -------------------------------- - -Update REL in Makefile-dist. Incorporate test suite into main distribution. - -Minor fix in regtest. - -Changes from 2.12.35 to 2.12.36 -------------------------------- - -Release takes on dual personality -- 2.12.36 and 2.13.0 -- any further - patches before public release won't count for 2.13, although they will for - 2.12 -- be careful to avoid confusion! patchlevel.h will be the last thing - to change. - -Cray updates to deal with arithmetic problems. - -Minor test suite updates. - -Fixed latent bug in parser (freeing memory). - -Changes from 2.12.34 to 2.12.35 -------------------------------- - -VMS updates. - -Flush stdout at top of err() and stderr at bottom. - -Fixed bug in eval_condition() -- it wasn't testing for MAYBE_NUM and - doing the force_number(). - -Included the missing manyfiles.awk and a new test to catch the above bug which - I am amazed wasn't already caught by the test suite -- it's pretty basic. - -Changes from 2.12.33 to 2.12.34 -------------------------------- - -Atari updates -- including bug fix. - -More VMS updates -- also nuke vms/version.com. - -Fixed bug in handling of large numbers of redirections -- it was probably never - tested before (blush!). - -Minor rearrangement of code in r_force_number(). - -Made chem and regtest tests a bit more portable (Ultrix again). - -Added another test -- manyfiles -- not invoked under any other test -- very Unix - specific. - -Rough beginning of LIMITATIONS file -- need my AWK book to complete it. - -Changes from 2.12.32 to 2.12.33 -------------------------------- - -Expunge debug.? from various files. - -Remove vestiges of Floor and Ceil kludge. - -Special case integer division -- mainly for Cray, but maybe someone else - will benefit. - -Workaround for iop_close closing an output pipe descriptor on Cray -- - not conditional since I think it may fix a bug on SGI as well and I don't - think it can hurt elsewhere. - -Fixed memory leak in assoc_lookup(). - -Small cleanup in test suite. - -Changes from 2.12.31 to 2.12.32 -------------------------------- - -Nuked debug.c and debugging flag -- there are better ways. - -Nuked version.sh and version.c in subdirectories. - -Fixed bug in handling of IGNORECASE. - -Fixed bug when FIELDWIDTHS was set via -v option. - -Fixed (obscure) bug when $0 is assigned a numerical value. - -Fixed so that escape sequences in command-line assignments work (as it already - said in the comment). - -Added a few cases to test suite. - -Moved support/* back into distribution. - -VMS updates. - -Changes from 2.12.30 to 2.12.31 -------------------------------- - -Cosmetic manual page changes. - -Updated sunos3 config. - -Small changes in test suite including renaming files over 14 chars. in length. - -Changes from 2.12.29 to 2.12.30 -------------------------------- - -Bug fix for many string concatenations in a row. - -Changes from 2.12.28 to 2.12.29 -------------------------------- - -Minor cleanup in awk.y - -Minor VMS update. - -Minor atari update. - -Changes from 2.12.27 to 2.12.28 -------------------------------- - -Got rid of the debugging goop in eval.c -- there are better ways. - -Sequent port. - -VMS changes left out of the last patch -- sigh! config/vms.h renamed - to config/vms-conf.h. - -Fixed missing/tzset.c - -Removed use of gcvt() and GCVT_MISSING -- turns out it was no faster than - sprintf("%g") and caused all sorts of portability headaches. - -Tuned get_field() -- it was unnecessarily parsing the whole record on reference - to $0. - -Tuned interpret() a bit in the rule_node loop. - -In r_force_number(), worked around bug in Uglix strtod() and got rid of - ugly do{}while(0) at Michal's urging. - -Replaced do_deref() and deref with unref(node) -- much cleaner and a bit faster. - -Got rid of assign_number() -- contrary to comment, it was no faster than - just making a new node and freeing the old one. - -Replaced make_number() and tmp_number() with macros that call mk_number(). - -Changed freenode() and newnode() into macros -- the latter is getnode() - which calls more_nodes() as necessary. - -Changes from 2.12.26 to 2.12.27 -------------------------------- - -Completion of Cray 2 port (includes a kludge for floor() and ceil() - that may go or be changed -- I think that it may just be working around - a bug in chem that is being tweaked on the Cray). - -More VMS updates. - -Moved kludge over yacc's insertion of malloc and realloc declarations - from protos.h to the Makefile. - -Added a lisp interpreter in awk to the test suite. (Invoked under - bigtest.) - -Cleanup in r_force_number() -- I had never gotten around to a thorough - profile of the cache code and it turns out to be not worth it. - -Performance boost -- do lazy force_number()'ing for fields etc. i.e. - flag them (MAYBE_NUM) and call force_number only as necessary. - -Changes from 2.12.25 to 2.12.26 -------------------------------- - -Rework of regexp stuff so that dynamic regexps have reasonable - performance -- string used for compiled regexp is stored and - compared to new string -- if same, no recompilation is necessary. - Also, very dynamic regexps cause dfa-based searching to be turned - off. - -Code in dev_open() is back to returning fileno(std*) rather than - dup()ing it. This will be documented. Sorry for the run-around - on this. - -Minor atari updates. - -Minor vms update. - -Missing file from MSDOS port. - -Added warning (under lint) if third arg. of [g]sub is a constant and - handle it properly in the code (i.e. return how many matches). - -Changes from 2.12.24 to 2.12.25 -------------------------------- - -MSDOS port. - -Non-consequential changes to regexp variables in preparation for - a more serious change to fix a serious performance problem. - -Changes from 2.12.23 to 2.12.24 -------------------------------- - -Fixed bug in output flushing introduced a few patches back. This caused - serious performance losses. - -Changes from 2.12.22 to 2.12.23 -------------------------------- - -Accidentally left config/cray2-60 out of last patch. - -Added some missing dependencies to Makefile. - -Cleaned up mkconf a bit; made yacc the default parser (no alloca needed, - right?); added rs6000 hook for signed characters. - -Made regex.c with NO_ALLOCA undefined work. - -Fixed bug in dfa.c for systems where free(NULL) bombs. - -Deleted a few cant_happen()'s that *really* can't hapen. - -Changes from 2.12.21 to 2.12.22 -------------------------------- - -Added to config stuff the ability to choose YACC rather than bison. - -Fixed CHAR_UNSIGNED in config.h-dist. - -Second arg. of strtod() is char ** rather than const char **. - -stackb is now initially malloc()'ed since it may be realloc()'ed. - -VMS updates. - -Added SIZE_T_MISSING to config stuff and a default typedef to awk.h. - (Maybe it is not needed on any current systems??) - -re_compile_pattern()'s size is now size_t unconditionally. - -Changes from 2.12.20 to 2.12.21 -------------------------------- - -Corrected missing/gcvt.c. - -Got rid of use of dup2() and thus DUP_MISSING. - -Updated config/sgi33. - -Turned on (and fixed) in cmp_nodes() the behaviour that I *hope* will be in - POSIX 1003.2 for relational comparisons. - -Small updates to test suite. - -Changes from 2.12.19 to 2.12.20 -------------------------------- - -Sloppy, sloppy, sloppy!! I didn't even try to compile the last two - patches. This one fixes goofs in regex.c. - -Changes from 2.12.18 to 2.12.19 -------------------------------- - -Cleanup of last patch. - -Changes from 2.12.17 to 2.12.18 -------------------------------- - -Makefile renamed to Makefile-dist. - -Added alloca() configuration to mkconf. (A bit kludgey.) Just - add a single line containing ALLOCA_PW, ALLOCA_S or ALLOCA_C - to the appropriate config file to have Makefile-dist edited - accordingly. - -Reorganized output flushing to correspond with new semantics of - devopen() on "/dev/std*" etc. - -Fixed rest of last goof!! - -Save and restore errno in do_pathopen(). - -Miscellaneous atari updates. - -Get rid of the trailing comma in the NODETYPE definition (Cray - compiler won't take it). - -Try to make the use of `const' consistent since Cray compiler is - fussy about that. See the changes to `basename' and `myname'. - -It turns out that, according to section 3.8.3 (Macro Replacement) - of the ANSI Standard: ``If there are sequences of preprocessing - tokens within the list of arguments that would otherwise act as - preprocessing directives, the behavior is undefined.'' That means - that you cannot count on the behavior of the declaration of - re_compile_pattern in awk.h, and indeed the Cray compiler chokes on it. - -Replaced alloca with malloc/realloc/free in regex.c. It was much simpler - than expected. (Inside NO_ALLOCA for now -- by default no alloca.) - -Added a configuration file, config/cray60, for Unicos-6.0. - -Changes from 2.12.16 to 2.12.17 -------------------------------- - -Ooops. Goofed signal use in last patch. - -Changes from 2.12.15 to 2.12.16 -------------------------------- - -RENAMED *_dir to just * (e.g. missing_dir). - -Numerous VMS changes. - -Proper inclusion of atari and vms files. - -Added experimental (ifdef'd out) RELAXED_CONTINUATION and DEFAULT_FILETYPE - -- please comment on these! - -Moved pathopen() to io.c (sigh). - -Put local directory ahead in default AWKPATH. - -Added facility in mkconf to echo comments on stdout: lines beginning - with "#echo " will have the remainder of the line echoed when mkconf is run. - Any lines starting with "#" will otherwise be treated as comments. The - intent is to be able to say: - "#echo Make sure you uncomment alloca.c in the Makefile" - or the like. - -Prototype fix for V.4 - -Fixed version_string to not print leading @(#). - -Fixed FIELDWIDTHS to work with strict (turned out to be easy). - -Fixed conf for V.2. - -Changed semantics of /dev/fd/n to be like on real /dev/fd. - -Several configuration and updates in the makefile. - -Updated manpage. - -Include tzset.c and system.c from missing_dir that were accidently left out of - the last patch. - -Fixed bug in cmdline variable assignment -- arg was getting freed(!) in - call to variable. - -Backed out of parse-time constant folding for now, until I can figure out - how to do it right. - -Fixed devopen() so that getline <"-" works. - -Changes from 2.12.14 to 2.12.15 -------------------------------- - -Changed config/* to a condensed form that can be used with mkconf to generate - a config.h from config.h-dist -- much easier to maintain. Please check - carefully against what you had before for a particular system and report - any problems. vms.h remains separate since the stuff at the bottom - didn't quite fit the mkconf model -- hopefully cleared up later. - -Fixed bug in grammar -- didn't allow function definition to be separated from - other rules by a semi-colon. - -VMS fix to #includes in missing.c -- should we just be including awk.h? - -Updated README for texinfo.tex version. - -Updating of copyright in all .[chy] files. - -Added but commented out Michal's fix to strftime. - -Added tzset() emulation based on Rick Adams' code. Added TZSET_MISSING to - config.h-dist. - -Added strftime.3 man page for missing_dir - -More posix: func, **, **= don't work in -W posix - -More lint: ^, ^= not in old awk - -gawk.1: removed ref to -DNO_DEV_FD, other minor updating. - -Style change: pushbak becomes pushback() in yylex(). - -Changes from 2.12.13 to 2.12.14 -------------------------------- - -Better (?) organization of awk.h -- attempt to keep all system dependencies - near the top and move some of the non-general things out of the config.h - files. - -Change to handling of SYSTEM_MISSING. - -Small change to ultrix config. - -Do "/dev/fd/*" etc. checking at runtime. - -First pass at VMS port. - -Improvements to error handling (when lexeme spans buffers). - -Fixed backslash handling -- why didn't I notice this sooner? - -Added programs from book to test suite and new target "bigtest" to Makefile. - -Changes from 2.12.12 to 2.12.13 -------------------------------- - -Recognize OFS and ORS specially so that OFS = 9 works without efficiency hit. - Took advantage of opportunity to tune do_print*() for about 10% win on a - print with 5 args (i.e. small but significant). - -Somewhat pervasive changes to reconcile CONVFMT vs. OFMT. - -Better initialization of builtin vars. - -Make config/* consistent wrt STRTOL_MISSING. - -Small portability improvement to alloca.s - -Improvements to lint code in awk.y - -Replaced strtol() with a better one by Chris Torek. - -Changes from 2.12.11 to 2.12.12 -------------------------------- - -Added PORTS file to record successful ports. - -Added #define const to nothing if not STDC and added const to strtod() header. - -Added * to printf capabilities and partially implemented ' ' and '+' (has an - effect for %d only, silently ignored for other formats). I'm afraid that's - as far as I want to go before I look at a complete replacement for - do_sprintf(). - -Added warning for /regexp/ on LHS of MATCHOP. - -Changes from 2.12.10 to 2.12.11 -------------------------------- - -Small Makefile improvements. - -Some remaining nits from the NeXT port. - -Got rid of bcopy() define in awk.h -- not needed anymore (??) - -Changed private in builtin.c -- it is special on Sequent. - -Added subset implementation of strtol() and STRTOL_MISSING. - -A little bit of cleanup in debug.c, dfa.c. - -Changes from 2.12.9 to 2.12.10 ------------------------------- - -Redid compatability checking and checking for # of args. - -Removed all references to variables[] from outside awk.y, in preparation - for a more abstract interface to the symbol table. - -Got rid of a remaining use of bcopy() in regex.c. - -Changes from 2.12.8 to 2.12.9 ------------------------------ - -Portability improvements for atari, next and decstation. - -Bug fix in substr() -- wasn't handling 3rd arg. of -1 properly. - -Manpage updates. - -Moved support from src release to doc release. - -Updated FUTURES file. - -Added some "lint" warnings. - -Changes from 2.12.7 to 2.12.8 ------------------------------ - -Changed time() to systime(). - -Changed warning() in snode() to fatal(). - -strftime() now defaults second arg. to current time. - -Changes from 2.12.6 to 2.12.7 ------------------------------ - -Fixed bug in sub_common() involving inadequate allocation of a buffer. - -Added some missing files to the Makefile. - -Changes from 2.12.5 to 2.12.6 ------------------------------ - -Fixed bug wherein non-redirected getline could call iop_close() just - prior to a call from do_input(). - -Fixed bug in handling of /dev/stdout and /dev/stderr. - -Changes from 2.12.4 to 2.12.5 ------------------------------ - -Updated README and support directory. - -Changes from 2.12.3 to 2.12.4 ------------------------------ - -Updated CHANGES and TODO (should have been done in previous 2 patches). - -Changes from 2.12.2 to 2.12.3 ------------------------------ - -Brought regex.c and alloca.s into line with current FSF versions. - -Changes from 2.12.1 to 2.12.2 ------------------------------ - -Portability improvements; mostly moving system prototypes out of awk.h - -Introduction of strftime. - -Use of CONVFMT. - -Changes from 2.12 to 2.12.1 ------------------------------ - -Consolidated treatment of command-line assignments (thus correcting the --v treatment). - -Rationalized builtin-variable handling into a table-driven process, thus -simplifying variable() and eliminating spc_var(). - -Fixed bug in handling of command-line source that ended in a newline. - -Simplified install() and lookup(). - -Did away with double-mallocing of identifiers and now free second and later -instances of a name, after the first gets installed into the symbol table. - -Treat IGNORECASE specially, simplifying a lot of code, and allowing -checking against strict conformance only on setting it, rather than on each -pattern match. - -Fixed regexp matching when IGNORECASE is non-zero (broken when dfa.c was -added). - -Fixed bug where $0 was not being marked as valid, even after it was rebuilt. -This caused mangling of $0. - - -Changes from 2.11.1 to 2.12 ------------------------------ - -Makefile: - -Portability improvements in Makefile. -Move configuration stuff into config.h - -FSF files: - -Synchronized alloca.[cs] and regex.[ch] with FSF. - -array.c: - -Rationalized hash routines into one with a different algorithm. -delete() now works if the array is a local variable. -Changed interface of assoc_next() and avoided dereferencing past the end of the - array. - -awk.h: - -Merged non-prototype and prototype declarations in awk.h. -Expanded tree_eval #define to short-circuit more calls of r_tree_eval(). - -awk.y: - -Delinted some of the code in the grammar. -Fixed and improved some of the error message printing. -Changed to accomodate unlimited length source lines. -Line continuation now works as advertised. -Source lines can be arbitrarily long. -Refined grammar hacks so that /= assignment works. Regular expressions - starting with /= are recognized at the beginning of a line, after && or || - and after ~ or !~. More contexts can be added if necessary. -Fixed IGNORECASE (multiple scans for backslash). -Condensed expression_lists in array references. -Detect and warn for correct # args in builtin functions -- call most of them - with a fixed number (i.e. fill in defaults at parse-time rather than at - run-time). -Load ENVIRON only if it is referenced (detected at parse-time). -Treat NF, FS, RS, NR, FNR specially at parse time, to improve run time. -Fold constant expressions at parse time. -Do make_regexp() on third arg. of split() at parse tiem if it is a constant. - -builtin.c: - -srand() returns 0 the first time called. -Replaced alloca() with malloc() in do_sprintf(). -Fixed setting of RSTART and RLENGTH in do_match(). -Got rid of get_{one,two,three} and allowance for variable # of args. at - run-time -- this is now done at parse-time. -Fixed latent bug in [g]sub whereby changes to $0 would never get made. -Rewrote much of sub_common() for simplicity and performance. -Added ctime() and time() builtin functions (unless -DSTRICT). ctime() returns - a time string like the C function, given the number of seconds since the epoch - and time() returns the current time in seconds. -do_sprintf() now checks for mismatch between format string and number of - arguments supplied. - -dfa.c - -This is borrowed (almost unmodified) from GNU grep to provide faster searches. - -eval.c - -Node_var, Node_var_array and Node_param_list handled from macro rather - than in r_tree_eval(). -Changed cmp_nodes() to not do a force_number() -- this, combined with a - force_number() on ARGV[] and ENVIRON[] brings it into line with other awks -Greatly simplified cmp_nodes(). -Separated out Node_NF, Node_FS, Node_RS, Node_NR and Node_FNR in get_lhs(). -All adjacent string concatenations now done at once. - -field.c - -Added support for FIELDWIDTHS. -Fixed bug in get_field() whereby changes to a field were not always - properly reflected in $0. -Reordered tests in parse_field() so that reference off the end of the buffer - doesn't happen. -set_FS() now sets *parse_field i.e. routine to call depending on type of FS. -It also does make_regexp() for FS if needed. get_field() passes FS_regexp - to re_parse_field(), as does do_split(). -Changes to set_field() and set_record() to avoid malloc'ing and free'ing the - field nodes repeatedly. The fields now just point into $0 unless they are - assigned to another variable or changed. force_number() on the field is - *only* done when the field is needed. - -gawk.1 - -Fixed troff formatting problem on .TP lines. - -io.c - -Moved some code out into iop.c. -Output from pipes and system() calls is properly synchronized. -Status from pipe close properly returned. -Bug in getline with no redirect fixed. - -iop.c - -This file contains a totally revamped get_a_record and associated code. - -main.c - -Command line programs no longer use a temporary file. -Therefore, tmpnam() no longer required. -Deprecated -a and -e options -- they will go away in the next release, - but for now they cause a warning. -Moved -C, -V, -c options to -W ala posix. -Added -W posix option: throw out \x -Added -W lint option. - - -node.c - -force_number() now allows pure numerics to have leading whitespace. -Added make_string facility to optimize case of adding an already malloc'd - string. -Cleaned up and simplified do_deref(). -Fixed bug in handling of stref==255 in do_deref(). - -re.c - -contains the interface to regexp code - -Changes from 2.11.1 to FSF version of same ------------------------------------------- -Thu Jan 4 14:19:30 1990 Jim Kingdon (kingdon at albert) - - * Makefile (YACC): Add -y to bison part. - - * missing.c: Add #include . - -Sun Dec 24 16:16:05 1989 David J. MacKenzie (djm at hobbes.ai.mit.edu) - - * Makefile: Add (commented out) default defines for Sony News. - - * awk.h: Move declaration of vprintf so it will compile when - -DVPRINTF_MISSING is defined. - -Mon Nov 13 18:54:08 1989 Robert J. Chassell (bob at apple-gunkies.ai.mit.edu) - - * gawk.texinfo: changed @-commands that are not part of the - standard, currently released texinfmt.el to those that are. - Otherwise, only people with the as-yet unreleased makeinfo.c can - format this file. - -Changes from 2.11beta to 2.11.1 (production) --------------------------------------------- - -Went from "beta" to production status!!! - -Now flushes stdout before closing pipes or redirected files to -synchronize output. - -MS-DOS changes added in. - -Signal handler return type parameterized in Makefile and awk.h and -some lint removed. debug.c cleaned up. - -Fixed FS splitting to never match null strings, per book. - -Correction to the manual's description of FS. - -Some compilers break on char *foo = "string" + 4 so fixed version.sh and -main.c. - -Changes from 2.10beta to 2.11beta ---------------------------------- - -This release fixes all reported bugs that we could reproduce. Probably -some of the changes are not documented here. - -The next release will probably not be a beta release! - -The most important change is the addition of the -nostalgia option. :-) - -The documentation has been improved and brought up-to-date. - -There has been a lot of general cleaning up of the code that is not otherwise -documented here. There has been a movement toward using standard-conforming -library routines and providing them (in missing.d) for systems lacking them. -Improved (hopefully) configuration through Makfile modifications and missing.c. -In particular, straightened out confusion over vprintf #defines, declarations -etc. - -Deleted RCS log comments from source, to reduce source size by about one third. -Most of them were horribly out-of-date, anyway. - -Renamed source files to reflect (for the most part) their contents. - -More and improved error messages. Cleanup and fixes to yyerror(). -String constants are not altered in input buffer, so error messages come out -better. Fixed usage message. Make use of ANSI C strerror() function -(provided). - -Plugged many more memory leaks. The memory consumption is now quite -reasonable over a wide range of programs. - -Uses volatile declaration if STDC > 0 to avoid problems due to longjmp. - -New -a and -e options to use awk or egrep style regexps, respectively, -since POSIX says awk should use egrep regexps. Default is -a. - -Added -v option for setting variables before the first file is encountered. -Version information now uses -V and copyleft uses -C. - -Added a patchlevel.h file and its use for -V and -C. - -Append_right() optimized for major improvement to programs with a *lot* -of statements. - -Operator precedence has been corrected to match draft Posix. - -Tightened up grammar for builtin functions so that only length -may be called without arguments or parentheses. - -/regex/ is now a normal expression that can appear in any expression -context. - -Allow /= to begin a regexp. Allow ..[../..].. in a regexp. - -Allow empty compound statements ({}). - -Made return and next illegal outside a function and in BEGIN/END respectively. - -Division by zero is now illegal and causes a fatal error. - -Fixed exponentiation so that x ^ 0 and x ^= 0 both return 1. - -Fixed do_sqrt, do_log, and do_exp to do argument/return checking and -print an error message, per the manual. - -Fixed main to catch SIGSEGV to get source and data file line numbers. - -Fixed yyerror to print the ^ at the beginning of the bad token, not the end. - -Fix to substr() builtin: it was failing if the arguments -weren't already strings. - -Added new node value flag NUMERIC to indicate that a variable is -purely a number as opposed to type NUM which indicates that -the node's numeric value is valid. This is set in make_number(), -tmp_number and r_force_number() when appropriate and used in -cmp_nodes(). This fixed a bug in comparison of variables that had -numeric prefixes. The new code uses strtod() and eliminates is_a_number(). -A simple strtod() is provided for systems lacking one. It does no -overflow checking, so could be improved. - -Simplification and efficiency improvement in force_string. - -Added performance tweak in r_force_number(). - -Fixed a bug with nested loops and break/continue in functions. - -Fixed inconsistency in handling of empty fields when $0 has to be rebuilt. -Happens to simplify rebuild_record(). - -Cleaned up the code associated with opening a pipe for reading. Gawk -now has its own popen routine (gawk_popen) that allocates an IOBUF -and keeps track of the pid of the child process. gawk_pclose -marks the appropriate child as defunct in the right struct redirect. - -Cleaned up and fixed close_redir(). - -Fixed an obscure bug to do with redirection. Intermingled ">" and ">>" -redirects did not output in a predictable order. - -Improved handling of output buffering: now all print[f]s redirected to a tty -or pipe are flushed immediately and non-redirected output to a tty is flushed -before the next input record is read. - -Fixed a bug in get_a_record() where bcopy() could have copied over -a random pointer. - -Fixed a bug when RS="" and records separated by multiple blank lines. - -Got rid of SLOWIO code which was out-of-date anyway. - -Fix in get_field() for case where $0 is changed and then $(n) are -changed and then $0 is used. - -Fixed infinite loop on failure to open file for reading from getline. -Now handles redirect file open failures properly. - -Filenames such as /dev/stdin now allowed on the command line as well as -in redirects. - -Fixed so that gawk '$1' where $1 is a zero tests false. - -Fixed parsing so that `RLENGTH -1' parses the same as `RLENGTH - 1', -for example. - -The return from a user-defined function now defaults to the Null node. -This fixes a core-dump-causing bug when the return value of a function -is used and that function returns no value. - -Now catches floating point exceptions to avoid core dumps. - -Bug fix for deleting elements of an array -- under some conditions, it was -deleting more than one element at a time. - -Fix in AWKPATH code for running off the end of the string. - -Fixed handling of precision in *printf calls. %0.2d now works properly, -as does %c. [s]printf now recognizes %i and %X. - -Fixed a bug in printing of very large (>240) strings. - -Cleaned up erroneous behaviour for RS == "". - -Added IGNORECASE support to index(). - -Simplified and fixed newnode/freenode. - -Fixed reference to $(anything) in a BEGIN block. - -Eliminated use of USG rand48(). - -Bug fix in force_string for machines with 16-bit ints. - -Replaced use of mktemp() with tmpnam() and provided a partial implementation of -the latter for systems that don't have it. - -Added a portability check for includes in io.c. - -Minor portability fix in alloc.c plus addition of xmalloc(). - -Portability fix: on UMAX4.2, st_blksize is zero for a pipe, thus breaking -iop_alloc() -- fixed. - -Workaround for compiler bug on Sun386i in do_sprintf. - -More and improved prototypes in awk.h. - -Consolidated C escape parsing code into one place. - -strict flag is now turned on only when invoked with compatability option. -It now applies to fewer things. - -Changed cast of f._ptr in vprintf.c from (unsigned char *) to (char *). -Hopefully this is right for the systems that use this code (I don't). - -Support for pipes under MSDOS added. diff --git a/contrib/awk/PORTS b/contrib/awk/PORTS deleted file mode 100644 index 8fe8802..0000000 --- a/contrib/awk/PORTS +++ /dev/null @@ -1,29 +0,0 @@ -Gawk 3.0.5 has been successfully compiled and run "make test" -on the following: - - Linux 2.2.14 gcc 2.95.2 - Linux 2.2.13-SMP egcs-2.91.66 make test -j5 - IRIX64 6.5 gcc 2.8.1 - IRIX 5.3 gcc 2.7.2.2 - SunOS 5.3 gcc 2.5.8 - Linux 2.0.33 gcc 2.7.2.1 - IRIX64 6.5 gcc 2.95.1 - - Apple Macintosh PPC G3 Rhapsody 5.5 - DEC Alpha OSF/1 4.0F - DEC Alpha Linux - HP 9000/735 HP-UX 10.01 - HP PA 1.1 HP-UX 11.00 - IBM PowerPC AIX 4.2 - Intel Pentium III GNU/Linux 2.2.12-20smp (Redhat 6.1) - NeXT Turbostation Mach 3.3 - SGI MIPS IRIX 6.3 - SGI Origin 200 IRIX 6.5 - Sun SPARC GNU/Linux 2.2.12-42smp (Redhat 6.1) - Sun SPARC Solaris 2.6 - Sun SPARC Solaris 2.7 - - DEC Alpha OpenVMS - DEC Vax VMS DEC C - - OS/2 EMX GCC diff --git a/contrib/awk/POSIX.STD b/contrib/awk/POSIX.STD deleted file mode 100644 index 04d31fc..0000000 --- a/contrib/awk/POSIX.STD +++ /dev/null @@ -1,119 +0,0 @@ -March 2001: - -It looks like the revised 1003.2 standard will actually follow the -rules given below. Hallelujah! - -October 1998: - -The 1003.2 work has been at a stand-still for ages. Who knows if or -when a new revision will actually happen... - -August 1995: - -Although the published 1003.2 standard contained the incorrect -comparison rules of 11.2 draft as described below, no actual implementation -of awk (that I know of) actually used those rules. - -A revision of the 1003.2 standard is in progress, and in the May 1995 -draft, the rules were fixed (based on my submissions for interpretation -requests) to match the description given below. Thus, the next version -of the standard will have a correct description of the comparison -rules. - -June 1992: - -Right now, the numeric vs. string comparisons are screwed up in draft -11.2. What prompted me to check it out was the note in gnu.bug.utils -which observed that gawk was doing the comparison $1 == "000" -numerically. I think that we can agree that intuitively, this should -be done as a string comparison. Version 2.13.2 of gawk follows the -current POSIX draft. Following is how I (now) think this -stuff should be done. - -1. A numeric literal or the result of a numeric operation has the NUMERIC - attribute. - -2. A string literal or the result of a string operation has the STRING - attribute. - -3. Fields, getline input, FILENAME, ARGV elements, ENVIRON elements and the - elements of an array created by split() that are numeric strings - have the STRNUM attribute. Otherwise, they have the STRING attribute. - Uninitialized variables also have the STRNUM attribute. - -4. Attributes propagate across assignments, but are not changed by - any use. (Although a use may cause the entity to acquire an additional - value such that it has both a numeric and string value -- this leaves the - attribute unchanged.) - -When two operands are compared, either string comparison or numeric comparison -may be used, depending on the attributes of the operands, according to the -following (symmetric) matrix: - - +---------------------------------------------- - | STRING NUMERIC STRNUM ---------+---------------------------------------------- - | -STRING | string string string - | -NUMERIC | string numeric numeric - | -STRNUM | string numeric numeric ---------+---------------------------------------------- - -So, the following program should print all OKs. - -echo '0e2 0a 0 0b -0e2 0a 0 0b' | -$AWK ' -NR == 1 { - num = 0 - str = "0e2" - - print ++test ": " ( (str == "0e2") ? "OK" : "OOPS" ) - print ++test ": " ( ("0e2" != 0) ? "OK" : "OOPS" ) - print ++test ": " ( ("0" != $2) ? "OK" : "OOPS" ) - print ++test ": " ( ("0e2" == $1) ? "OK" : "OOPS" ) - - print ++test ": " ( (0 == "0") ? "OK" : "OOPS" ) - print ++test ": " ( (0 == num) ? "OK" : "OOPS" ) - print ++test ": " ( (0 != $2) ? "OK" : "OOPS" ) - print ++test ": " ( (0 == $1) ? "OK" : "OOPS" ) - - print ++test ": " ( ($1 != "0") ? "OK" : "OOPS" ) - print ++test ": " ( ($1 == num) ? "OK" : "OOPS" ) - print ++test ": " ( ($2 != 0) ? "OK" : "OOPS" ) - print ++test ": " ( ($2 != $1) ? "OK" : "OOPS" ) - print ++test ": " ( ($3 == 0) ? "OK" : "OOPS" ) - print ++test ": " ( ($3 == $1) ? "OK" : "OOPS" ) - print ++test ": " ( ($2 != $4) ? "OK" : "OOPS" ) # 15 -} -{ - a = "+2" - b = 2 - if (NR % 2) - c = a + b - print ++test ": " ( (a != b) ? "OK" : "OOPS" ) # 16 and 22 - - d = "2a" - b = 2 - if (NR % 2) - c = d + b - print ++test ": " ( (d != b) ? "OK" : "OOPS" ) - - print ++test ": " ( (d + 0 == b) ? "OK" : "OOPS" ) - - e = "2" - print ++test ": " ( (e == b "") ? "OK" : "OOPS" ) - - a = "2.13" - print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" ) - - a = "2.130000" - print ++test ": " ( (a != 2.13) ? "OK" : "OOPS" ) - - if (NR == 2) { - CONVFMT = "%.6f" - print ++test ": " ( (a == 2.13) ? "OK" : "OOPS" ) - } -}' diff --git a/contrib/awk/PROBLEMS b/contrib/awk/PROBLEMS deleted file mode 100644 index 0a2f77d..0000000 --- a/contrib/awk/PROBLEMS +++ /dev/null @@ -1,9 +0,0 @@ -This is a list of known problems in gawk 3.1. -I don't know when this will be fixed, if ever. See also FUTURES -and the gawk.texi doc for other things that need doing. - -1. The interactions with the lexer and yyerror need reworking. It is possible - to get line numbers that are one line off if --compat or --posix is - true and either `nextfile' or `delete array' are used. - - Really the whole lexical analysis stuff needs reworking. diff --git a/contrib/awk/README b/contrib/awk/README deleted file mode 100644 index a07db51..0000000 --- a/contrib/awk/README +++ /dev/null @@ -1,104 +0,0 @@ -README: - -This is GNU Awk 3.1.0. It is upwardly compatible with the Bell Labs -research version of awk. It is almost completely compliant with the -1993 POSIX 1003.2 standard for awk. (See the note below about POSIX.) - -There are lots of new features -- see NEWS and ChangeLog for details. - -Work to be done is described briefly in the FUTURES file. Changes in this -version are summarized in the NEWS file. Please read the LIMITATIONS file. - -Read the file POSIX.STD for a discussion of how the standard says -comparisons should be done vs. how they really should be done and how -gawk does them. - -To format the documentation with TeX, use at least version 2000-10-27.17 -of texinfo.tex. There is a usable copy of texinfo.tex in the doc directory. - -INSTALLATION: - -Check whether there is a system-specific README file for your system under -the `README_d' directory. If there's something there that you should -have read and didn't, and you bug me about it, I'm going to yell at you. - -See the file INSTALL for installation instructions. - -If you have neither bison nor yacc, use the awkgram.c file here. It was -generated with bison, and has no proprietary code in it. (Note that -modifying awkgram.y without bison or yacc will be difficult, at best. -You might want to get a copy of bison from the FSF too.) - -If you have a Win32, MS-DOS or OS/2 system, use the stuff in the `pc' -directory. Similarly, there is a separate directory for VMS. - -Ports for the Atari and Tandem are supplied, but they are unsupported. -Thus, their code appears in the `unsupported' directory. - -Appendix B of ``GAWK: Effective Awk Programming'' discusses configuration -in detail. The configuration process is based on GNU Autoconf and -Automake. - -After successful compilation, do `make check' to run the test suite. -There should be no output from the `cmp' invocations except in the -cases where there are small differences in floating point values, and -possibly in the case of strftime. Several of the tests ignore errors -on purpose; those are not a problem. If there are other differences, -please investigate and report the problem. - -PRINTING THE MANUAL - -The `doc' directory contains a recent version of texinfo.tex, which will -be necessary for printing the manual. Use `make dvi' to get a DVI file -from the manual. In the `doc' directory, use `make postscript' to get -PostScript versions of the manual, the man page, and the reference card. - -BUG REPORTS AND FIXES (Un*x systems): - -Please coordinate changes through Arnold Robbins. In particular, see -the section in the manual on reporting bugs. Note that comp.lang.awk -is about the worst place to post a gawk bug report. Please, use the -mechanisms outlined in the manual. - -Email should be sent to bug-gawk@gnu.org. This address sends mail to -Arnold Robbins and the general GNU utilities bug list. The advantage -to using this address is that bug reports are archived at GNU Central. - -Arnold Robbins - -BUG REPORTS AND FIXES, non-Unix systems: - -Amiga: - Fred Fish - fnf@ninemoons.com - -Alpha/Linux: - Michal Jaegermann - michal@gortel.phys.ualberta.ca - -BeOS: - Martin Brown - mc@whoever.com - -MS-DOS: - Scott Deifik - scottd@amgen.com - - Darrel Hankerson - hankedr@mail.auburn.edu - -MS-Windows: - Juan Grigera - juan@biophnet.unlp.edu.ar - -OS/2: - Kai Uwe Rommel - rommel@ars.de - -Tandem: - Stephen Davies - scldad@sdc.com.au - -VMS: - Pat Rankin - rankin@eql.caltech.edu diff --git a/contrib/awk/README_d/README.FIRST b/contrib/awk/README_d/README.FIRST deleted file mode 100644 index ef527f2..0000000 --- a/contrib/awk/README_d/README.FIRST +++ /dev/null @@ -1,21 +0,0 @@ -Sat Feb 18 23:07:55 EST 1995 - -Starting with 2.15.6, gawk will preserve the value of NF and $0 for -the last record read into the END rule(s). This is important to you -if your program uses - - print - -in an END rule to mean - - print "" - -(i.e., print nothing). Examine your awk programs carefully to make sure -that they use `print ""' instead of `print', otherwise you will get -strange results. - -If you send me email about this, without having read this -file, I will yell at you. - -Arnold Robbins -arnold@gnu.org diff --git a/contrib/awk/README_d/README.tests b/contrib/awk/README_d/README.tests deleted file mode 100644 index 1c969d7..0000000 --- a/contrib/awk/README_d/README.tests +++ /dev/null @@ -1,46 +0,0 @@ -Date: Sat, 22 Apr 2000 06:07:06 -0600 (MDT) -From: "Nelson H. F. Beebe" -To: arnold@gnu.org -Cc: beebe@math.utah.edu, sysstaff@math.utah.edu, othmer@math.utah.edu -Subject: gawk-3.0.4 and a GNU/Linux gotcha - -Yesterday, I was assisting a colleague install some software on his -GNU/Linux machine for which uname -r reports 2.2.14. - -A (mis)feature of this system, which I've never encountered before, -broke the build of one of my programs, and also of gawk-3.0.4. - -Namely, the kernel will not execute anything that resides in /tmp, -though it will if the same script is in /usr/tmp! - -% cat /tmp/foo.sh -#! /bin/sh -echo hello - -ls -l /tmp/foo.sh --rwxr-xr-x 1 othmer math 22 Apr 21 10:34 /tmp/foo.sh* - -% /tmp/foo.sh -bash: /tmp/foo.sh: Permission denied - -% cp /tmp/foo.sh /usr/tmp - -% /usr/tmp/foo.sh -hello - -Thus, programs that do a temporary install in /tmp, as some of mine do -in order to run the validation suite, will fail. - -gawk-3.0.4, and likely other gawk versions, hits this problem too. It -fails because test/poundbang starts with - -#! /tmp/gawk -f - -I tracked down where it comes from: - -% grep /tmp /etc/fstab -/dev/hda3 /tmp ext2 rw,nosuid,noexec,nouser,auto,async,nodev 1 1 - !!!!!! - -Since this is done via a mount command, potentially ANY directory tree -could be mounted with noexec. diff --git a/contrib/awk/acconfig.h b/contrib/awk/acconfig.h deleted file mode 100644 index 10ad23a..0000000 --- a/contrib/awk/acconfig.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * acconfig.h -- configuration definitions for gawk. - */ - -/* - * Copyright (C) 1995-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -@TOP@ - -#undef REGEX_MALLOC /* use malloc instead of alloca in regex.c */ -#undef SPRINTF_RET /* return type of sprintf */ -#undef HAVE_MKTIME /* we have the mktime function */ -#undef HAVE_SOCKETS /* we have sockets on this system */ -#undef HAVE_PORTALS /* we have portals on /p on this system */ -#undef DYNAMIC /* allow dynamic addition of builtins */ -#undef STRTOD_NOT_C89 /* strtod doesn't have C89 semantics */ -#undef ssize_t /* signed version of size_t */ - -@BOTTOM@ - -#include /* overrides for stuff autoconf can't deal with */ diff --git a/contrib/awk/aclocal.m4 b/contrib/awk/aclocal.m4 deleted file mode 100644 index 7ba39c3..0000000 --- a/contrib/awk/aclocal.m4 +++ /dev/null @@ -1,129 +0,0 @@ -dnl -dnl aclocal.m4 --- autoconf input file for gawk -dnl -dnl Copyright (C) 1995, 1996, 1998, 1999, 2000 the Free Software Foundation, Inc. -dnl -dnl This file is part of GAWK, the GNU implementation of the -dnl AWK Progamming Language. -dnl -dnl GAWK is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU General Public License as published by -dnl the Free Software Foundation; either version 2 of the License, or -dnl (at your option) any later version. -dnl -dnl GAWK is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -dnl GNU General Public License for more details. -dnl -dnl You should have received a copy of the GNU General Public License -dnl along with this program; if not, write to the Free Software -dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -dnl - -dnl gawk-specific macros for autoconf. one day hopefully part of autoconf - -AC_DEFUN(GAWK_AC_C_STRINGIZE, [ -AC_REQUIRE([AC_PROG_CPP]) -AC_MSG_CHECKING([for ANSI stringizing capability]) -AC_CACHE_VAL(gawk_cv_c_stringize, -AC_EGREP_CPP([#teststring],[ -#define x(y) #y - -char *s = x(teststring); -], gawk_cv_c_stringize=no, gawk_cv_c_stringize=yes)) -if test "${gawk_cv_c_stringize}" = yes -then - AC_DEFINE(HAVE_STRINGIZE) -fi -AC_MSG_RESULT([${gawk_cv_c_stringize}]) -])dnl - - -dnl By default, many hosts won't let programs access large files; -dnl one must use special compiler options to get large-file access to work. -dnl For more details about this brain damage please see: -dnl http://www.sas.com/standards/large.file/x_open.20Mar96.html - -dnl Written by Paul Eggert . - -dnl Internal subroutine of GAWK_AC_SYS_LARGEFILE. -dnl GAWK_AC_SYS_LARGEFILE_TEST_INCLUDES -AC_DEFUN(GAWK_AC_SYS_LARGEFILE_TEST_INCLUDES, - [[#include - int a[(off_t) 9223372036854775807 == 9223372036854775807 ? 1 : -1]; - ]]) - -dnl Internal subroutine of GAWK_AC_SYS_LARGEFILE. -dnl GAWK_AC_SYS_LARGEFILE_MACRO_VALUE(C-MACRO, VALUE, CACHE-VAR, COMMENT, INCLUDES, FUNCTION-BODY) -AC_DEFUN(GAWK_AC_SYS_LARGEFILE_MACRO_VALUE, - [AC_CACHE_CHECK([for $1 value needed for large files], $3, - [$3=no - AC_TRY_COMPILE(GAWK_AC_SYS_LARGEFILE_TEST_INCLUDES -$5 - , - [$6], - , - [AC_TRY_COMPILE([#define $1 $2] -GAWK_AC_SYS_LARGEFILE_TEST_INCLUDES -$5 - , - [$6], - [$3=$2])])]) - if test "[$]$3" != no; then - AC_DEFINE_UNQUOTED([$1], [$]$3, [$4]) - fi]) - -AC_DEFUN(GAWK_AC_SYS_LARGEFILE, - [AC_ARG_ENABLE(largefile, - [ --disable-largefile omit support for large files]) - if test "$enable_largefile" != no; then - - AC_CACHE_CHECK([for special C compiler options needed for large files], - gawk_cv_sys_largefile_CC, - [gawk_cv_sys_largefile_CC=no - if test "$GCC" != yes; then - # IRIX 6.2 and later do not support large files by default, - # so use the C compiler's -n32 option if that helps. - AC_TRY_COMPILE(GAWK_AC_SYS_LARGEFILE_TEST_INCLUDES, , , - [ac_save_CC="$CC" - CC="$CC -n32" - AC_TRY_COMPILE(GAWK_AC_SYS_LARGEFILE_TEST_INCLUDES, , - gawk_cv_sys_largefile_CC=' -n32') - CC="$ac_save_CC"]) - fi]) - if test "$gawk_cv_sys_largefile_CC" != no; then - CC="$CC$gawk_cv_sys_largefile_CC" - fi - - GAWK_AC_SYS_LARGEFILE_MACRO_VALUE(_FILE_OFFSET_BITS, 64, - gawk_cv_sys_file_offset_bits, - [Number of bits in a file offset, on hosts where this is settable.]) - GAWK_AC_SYS_LARGEFILE_MACRO_VALUE(_LARGEFILE_SOURCE, 1, - gawk_cv_sys_largefile_source, - [Define to make ftello visible on some hosts (e.g. HP-UX 10.20).], - [#include ], [return !ftello;]) - GAWK_AC_SYS_LARGEFILE_MACRO_VALUE(_LARGE_FILES, 1, - gawk_cv_sys_large_files, - [Define for large files, on AIX-style hosts.]) - GAWK_AC_SYS_LARGEFILE_MACRO_VALUE(_XOPEN_SOURCE, 500, - gawk_cv_sys_xopen_source, - [Define to make ftello visible on some hosts (e.g. glibc 2.1.3).], - [#include ], [return !ftello;]) - fi - ]) - -dnl Check for AIX and add _XOPEN_SOURCE_EXTENDED -AC_DEFUN(GAWK_AC_AIX_TWEAK, [ -AC_MSG_CHECKING([for AIX compilation hacks]) -AC_CACHE_VAL(gawk_cv_aix_hack, [ -if test -d /lpp/bos -then - CFLAGS="$CFLAGS -D_XOPEN_SOURCE_EXTENDED=1" - gawk_cv_aix_hack=yes -else - gawk_cv_aix_hack=no -fi -])dnl -AC_MSG_RESULT([${gawk_cv_aix_hack}]) -])dnl diff --git a/contrib/awk/alloca.c b/contrib/awk/alloca.c deleted file mode 100644 index 6bbd983..0000000 --- a/contrib/awk/alloca.c +++ /dev/null @@ -1,496 +0,0 @@ -/* alloca.c -- allocate automatically reclaimed memory - (Mostly) portable public-domain implementation -- D A Gwyn - - This implementation of the PWB library alloca function, - which is used to allocate space off the run-time stack so - that it is automatically reclaimed upon procedure exit, - was inspired by discussions with J. Q. Johnson of Cornell. - J.Otto Tennant contributed the Cray support. - - There are some preprocessor constants that can - be defined when compiling for your specific system, for - improved efficiency; however, the defaults should be okay. - - The general concept of this implementation is to keep - track of all alloca-allocated blocks, and reclaim any - that are found to be deeper in the stack than the current - invocation. This heuristic does not reclaim storage as - soon as it becomes invalid, but it will do so eventually. - - As a special case, alloca(0) reclaims storage without - allocating any. It is a good idea to use alloca(0) in - your main control loop, etc. to force garbage collection. */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#ifdef emacs -#include "blockinput.h" -#endif - -/* If compiling with GCC 2, this file's not needed. */ -#if !defined (__GNUC__) || __GNUC__ < 2 - -/* If someone has defined alloca as a macro, - there must be some other way alloca is supposed to work. */ -#ifndef alloca - -#ifdef emacs -#ifdef static -/* actually, only want this if static is defined as "" - -- this is for usg, in which emacs must undefine static - in order to make unexec workable - */ -#ifndef STACK_DIRECTION -you -lose --- must know STACK_DIRECTION at compile-time -#endif /* STACK_DIRECTION undefined */ -#endif /* static */ -#endif /* emacs */ - -/* If your stack is a linked list of frames, you have to - provide an "address metric" ADDRESS_FUNCTION macro. */ - -#if defined (CRAY) && defined (CRAY_STACKSEG_END) -long i00afunc (); -#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg)) -#else -#define ADDRESS_FUNCTION(arg) &(arg) -#endif - -#if __STDC__ -typedef void *pointer; -#else -typedef char *pointer; -#endif - -#ifndef NULL -#define NULL 0 -#endif - -#ifndef malloc -/* Different portions of Emacs need to call different versions of - malloc. The Emacs executable needs alloca to call xmalloc, because - ordinary malloc isn't protected from input signals. On the other - hand, the utilities in lib-src need alloca to call malloc; some of - them are very simple, and don't have an xmalloc routine. - - Non-Emacs programs expect this to call xmalloc. - - Callers below should use malloc. */ - -#ifndef emacs -#define malloc xmalloc -#endif -extern pointer malloc (); -#endif /* malloc */ - -/* Define STACK_DIRECTION if you know the direction of stack - growth for your system; otherwise it will be automatically - deduced at run-time. - - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown */ - -#ifndef STACK_DIRECTION -#define STACK_DIRECTION 0 /* Direction unknown. */ -#endif - -#if STACK_DIRECTION != 0 - -#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */ - -#else /* STACK_DIRECTION == 0; need run-time code. */ - -static int stack_dir; /* 1 or -1 once known. */ -#define STACK_DIR stack_dir - -static void -find_stack_direction () -{ - static char *addr = NULL; /* Address of first `dummy', once known. */ - auto char dummy; /* To get stack address. */ - - if (addr == NULL) - { /* Initial entry. */ - addr = ADDRESS_FUNCTION (dummy); - - find_stack_direction (); /* Recurse once. */ - } - else - { - /* Second entry. */ - if (ADDRESS_FUNCTION (dummy) > addr) - stack_dir = 1; /* Stack grew upward. */ - else - stack_dir = -1; /* Stack grew downward. */ - } -} - -#endif /* STACK_DIRECTION == 0 */ - -/* An "alloca header" is used to: - (a) chain together all alloca'ed blocks; - (b) keep track of stack depth. - - It is very important that sizeof(header) agree with malloc - alignment chunk size. The following default should work okay. */ - -#ifndef ALIGN_SIZE -#define ALIGN_SIZE sizeof(double) -#endif - -typedef union hdr -{ - char align[ALIGN_SIZE]; /* To force sizeof(header). */ - struct - { - union hdr *next; /* For chaining headers. */ - char *deep; /* For stack depth measure. */ - } h; -} header; - -static header *last_alloca_header = NULL; /* -> last alloca header. */ - -/* Return a pointer to at least SIZE bytes of storage, - which will be automatically reclaimed upon exit from - the procedure that called alloca. Originally, this space - was supposed to be taken from the current stack frame of the - caller, but that method cannot be made to work for some - implementations of C, for example under Gould's UTX/32. */ - -pointer -alloca (size) - unsigned size; -{ - auto char probe; /* Probes stack depth: */ - register char *depth = ADDRESS_FUNCTION (probe); - -#if STACK_DIRECTION == 0 - if (STACK_DIR == 0) /* Unknown growth direction. */ - find_stack_direction (); -#endif - - /* Reclaim garbage, defined as all alloca'd storage that - was allocated from deeper in the stack than currently. */ - - { - register header *hp; /* Traverses linked list. */ - -#ifdef emacs - BLOCK_INPUT; -#endif - - for (hp = last_alloca_header; hp != NULL;) - if ((STACK_DIR > 0 && hp->h.deep > depth) - || (STACK_DIR < 0 && hp->h.deep < depth)) - { - register header *np = hp->h.next; - - free ((pointer) hp); /* Collect garbage. */ - - hp = np; /* -> next header. */ - } - else - break; /* Rest are not deeper. */ - - last_alloca_header = hp; /* -> last valid storage. */ - -#ifdef emacs - UNBLOCK_INPUT; -#endif - } - - if (size == 0) - return NULL; /* No allocation required. */ - - /* Allocate combined header + user data storage. */ - - { - register pointer new = malloc (sizeof (header) + size); - /* Address of header. */ - - ((header *) new)->h.next = last_alloca_header; - ((header *) new)->h.deep = depth; - - last_alloca_header = (header *) new; - - /* User storage begins just after header. */ - - return (pointer) ((char *) new + sizeof (header)); - } -} - -#if defined (CRAY) && defined (CRAY_STACKSEG_END) - -#ifdef DEBUG_I00AFUNC -#include -#endif - -#ifndef CRAY_STACK -#define CRAY_STACK -#ifndef CRAY2 -/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */ -struct stack_control_header - { - long shgrow:32; /* Number of times stack has grown. */ - long shaseg:32; /* Size of increments to stack. */ - long shhwm:32; /* High water mark of stack. */ - long shsize:32; /* Current size of stack (all segments). */ - }; - -/* The stack segment linkage control information occurs at - the high-address end of a stack segment. (The stack - grows from low addresses to high addresses.) The initial - part of the stack segment linkage control information is - 0200 (octal) words. This provides for register storage - for the routine which overflows the stack. */ - -struct stack_segment_linkage - { - long ss[0200]; /* 0200 overflow words. */ - long sssize:32; /* Number of words in this segment. */ - long ssbase:32; /* Offset to stack base. */ - long:32; - long sspseg:32; /* Offset to linkage control of previous - segment of stack. */ - long:32; - long sstcpt:32; /* Pointer to task common address block. */ - long sscsnm; /* Private control structure number for - microtasking. */ - long ssusr1; /* Reserved for user. */ - long ssusr2; /* Reserved for user. */ - long sstpid; /* Process ID for pid based multi-tasking. */ - long ssgvup; /* Pointer to multitasking thread giveup. */ - long sscray[7]; /* Reserved for Cray Research. */ - long ssa0; - long ssa1; - long ssa2; - long ssa3; - long ssa4; - long ssa5; - long ssa6; - long ssa7; - long sss0; - long sss1; - long sss2; - long sss3; - long sss4; - long sss5; - long sss6; - long sss7; - }; - -#else /* CRAY2 */ -/* The following structure defines the vector of words - returned by the STKSTAT library routine. */ -struct stk_stat - { - long now; /* Current total stack size. */ - long maxc; /* Amount of contiguous space which would - be required to satisfy the maximum - stack demand to date. */ - long high_water; /* Stack high-water mark. */ - long overflows; /* Number of stack overflow ($STKOFEN) calls. */ - long hits; /* Number of internal buffer hits. */ - long extends; /* Number of block extensions. */ - long stko_mallocs; /* Block allocations by $STKOFEN. */ - long underflows; /* Number of stack underflow calls ($STKRETN). */ - long stko_free; /* Number of deallocations by $STKRETN. */ - long stkm_free; /* Number of deallocations by $STKMRET. */ - long segments; /* Current number of stack segments. */ - long maxs; /* Maximum number of stack segments so far. */ - long pad_size; /* Stack pad size. */ - long current_address; /* Current stack segment address. */ - long current_size; /* Current stack segment size. This - number is actually corrupted by STKSTAT to - include the fifteen word trailer area. */ - long initial_address; /* Address of initial segment. */ - long initial_size; /* Size of initial segment. */ - }; - -/* The following structure describes the data structure which trails - any stack segment. I think that the description in 'asdef' is - out of date. I only describe the parts that I am sure about. */ - -struct stk_trailer - { - long this_address; /* Address of this block. */ - long this_size; /* Size of this block (does not include - this trailer). */ - long unknown2; - long unknown3; - long link; /* Address of trailer block of previous - segment. */ - long unknown5; - long unknown6; - long unknown7; - long unknown8; - long unknown9; - long unknown10; - long unknown11; - long unknown12; - long unknown13; - long unknown14; - }; - -#endif /* CRAY2 */ -#endif /* not CRAY_STACK */ - -#ifdef CRAY2 -/* Determine a "stack measure" for an arbitrary ADDRESS. - I doubt that "lint" will like this much. */ - -static long -i00afunc (long *address) -{ - struct stk_stat status; - struct stk_trailer *trailer; - long *block, size; - long result = 0; - - /* We want to iterate through all of the segments. The first - step is to get the stack status structure. We could do this - more quickly and more directly, perhaps, by referencing the - $LM00 common block, but I know that this works. */ - - STKSTAT (&status); - - /* Set up the iteration. */ - - trailer = (struct stk_trailer *) (status.current_address - + status.current_size - - 15); - - /* There must be at least one stack segment. Therefore it is - a fatal error if "trailer" is null. */ - - if (trailer == 0) - abort (); - - /* Discard segments that do not contain our argument address. */ - - while (trailer != 0) - { - block = (long *) trailer->this_address; - size = trailer->this_size; - if (block == 0 || size == 0) - abort (); - trailer = (struct stk_trailer *) trailer->link; - if ((block <= address) && (address < (block + size))) - break; - } - - /* Set the result to the offset in this segment and add the sizes - of all predecessor segments. */ - - result = address - block; - - if (trailer == 0) - { - return result; - } - - do - { - if (trailer->this_size <= 0) - abort (); - result += trailer->this_size; - trailer = (struct stk_trailer *) trailer->link; - } - while (trailer != 0); - - /* We are done. Note that if you present a bogus address (one - not in any segment), you will get a different number back, formed - from subtracting the address of the first block. This is probably - not what you want. */ - - return (result); -} - -#else /* not CRAY2 */ -/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP. - Determine the number of the cell within the stack, - given the address of the cell. The purpose of this - routine is to linearize, in some sense, stack addresses - for alloca. */ - -static long -i00afunc (long address) -{ - long stkl = 0; - - long size, pseg, this_segment, stack; - long result = 0; - - struct stack_segment_linkage *ssptr; - - /* Register B67 contains the address of the end of the - current stack segment. If you (as a subprogram) store - your registers on the stack and find that you are past - the contents of B67, you have overflowed the segment. - - B67 also points to the stack segment linkage control - area, which is what we are really interested in. */ - - stkl = CRAY_STACKSEG_END (); - ssptr = (struct stack_segment_linkage *) stkl; - - /* If one subtracts 'size' from the end of the segment, - one has the address of the first word of the segment. - - If this is not the first segment, 'pseg' will be - nonzero. */ - - pseg = ssptr->sspseg; - size = ssptr->sssize; - - this_segment = stkl - size; - - /* It is possible that calling this routine itself caused - a stack overflow. Discard stack segments which do not - contain the target address. */ - - while (!(this_segment <= address && address <= stkl)) - { -#ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl); -#endif - if (pseg == 0) - break; - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - this_segment = stkl - size; - } - - result = address - this_segment; - - /* If you subtract pseg from the current end of the stack, - you get the address of the previous stack segment's end. - This seems a little convoluted to me, but I'll bet you save - a cycle somewhere. */ - - while (pseg != 0) - { -#ifdef DEBUG_I00AFUNC - fprintf (stderr, "%011o %011o\n", pseg, size); -#endif - stkl = stkl - pseg; - ssptr = (struct stack_segment_linkage *) stkl; - size = ssptr->sssize; - pseg = ssptr->sspseg; - result += size; - } - return (result); -} - -#endif /* not CRAY2 */ -#endif /* CRAY */ - -#endif /* no alloca */ -#endif /* not GCC version 2 */ diff --git a/contrib/awk/array.c b/contrib/awk/array.c deleted file mode 100644 index 905f3ba..0000000 --- a/contrib/awk/array.c +++ /dev/null @@ -1,858 +0,0 @@ -/* - * array.c - routines for associative arrays. - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -/* - * Tree walks (``for (iggy in foo)'') and array deletions use expensive - * linear searching. So what we do is start out with small arrays and - * grow them as needed, so that our arrays are hopefully small enough, - * most of the time, that they're pretty full and we're not looking at - * wasted space. - * - * The decision is made to grow the array if the average chain length is - * ``too big''. This is defined as the total number of entries in the table - * divided by the size of the array being greater than some constant. - */ - -#define AVG_CHAIN_MAX 10 /* don't want to linear search more than this */ - -#include "awk.h" - -static NODE *assoc_find P((NODE *symbol, NODE *subs, int hash1)); -static void grow_table P((NODE *symbol)); - -/* concat_exp --- concatenate expression list into a single string */ - -NODE * -concat_exp(register NODE *tree) -{ - register NODE *r; - char *str; - char *s; - size_t len; - int offset; - size_t subseplen; - char *subsep; - - if (tree->type != Node_expression_list) - return force_string(tree_eval(tree)); - r = force_string(tree_eval(tree->lnode)); - if (tree->rnode == NULL) - return r; - subseplen = SUBSEP_node->var_value->stlen; - subsep = SUBSEP_node->var_value->stptr; - len = r->stlen + subseplen + 2; - emalloc(str, char *, len, "concat_exp"); - memcpy(str, r->stptr, r->stlen+1); - s = str + r->stlen; - free_temp(r); - for (tree = tree->rnode; tree != NULL; tree = tree->rnode) { - if (subseplen == 1) - *s++ = *subsep; - else { - memcpy(s, subsep, subseplen+1); - s += subseplen; - } - r = force_string(tree_eval(tree->lnode)); - len += r->stlen + subseplen; - offset = s - str; - erealloc(str, char *, len, "concat_exp"); - s = str + offset; - memcpy(s, r->stptr, r->stlen+1); - s += r->stlen; - free_temp(r); - } - r = make_str_node(str, s - str, ALREADY_MALLOCED); - r->flags |= TEMP; - return r; -} - -/* assoc_clear --- flush all the values in symbol[] before doing a split() */ - -void -assoc_clear(NODE *symbol) -{ - int i; - NODE *bucket, *next; - - if (symbol->var_array == NULL) - return; - for (i = 0; i < symbol->array_size; i++) { - for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) { - next = bucket->ahnext; - unref(bucket->ahname); - unref(bucket->ahvalue); - freenode(bucket); - } - symbol->var_array[i] = NULL; - } - free(symbol->var_array); - symbol->var_array = NULL; - symbol->array_size = symbol->table_size = 0; - symbol->flags &= ~ARRAYMAXED; -} - -/* hash --- calculate the hash function of the string in subs */ - -unsigned int -hash(register const char *s, register size_t len, unsigned long hsize) -{ - register unsigned long h = 0; - - /* - * This is INCREDIBLY ugly, but fast. We break the string up into - * 8 byte units. On the first time through the loop we get the - * "leftover bytes" (strlen % 8). On every other iteration, we - * perform 8 HASHC's so we handle all 8 bytes. Essentially, this - * saves us 7 cmp & branch instructions. If this routine is - * heavily used enough, it's worth the ugly coding. - * - * OZ's original sdbm hash, copied from Margo Seltzers db package. - */ - - /* - * Even more speed: - * #define HASHC h = *s++ + 65599 * h - * Because 65599 = pow(2, 6) + pow(2, 16) - 1 we multiply by shifts - */ -#define HASHC htmp = (h << 6); \ - h = *s++ + htmp + (htmp << 10) - h - - unsigned long htmp; - - h = 0; - -#if defined(VAXC) - /* - * This was an implementation of "Duff's Device", but it has been - * redone, separating the switch for extra iterations from the - * loop. This is necessary because the DEC VAX-C compiler is - * STOOPID. - */ - switch (len & (8 - 1)) { - case 7: HASHC; - case 6: HASHC; - case 5: HASHC; - case 4: HASHC; - case 3: HASHC; - case 2: HASHC; - case 1: HASHC; - default: break; - } - - if (len > (8 - 1)) { - register size_t loop = len >> 3; - do { - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - HASHC; - } while (--loop); - } -#else /* ! VAXC */ - /* "Duff's Device" for those who can handle it */ - if (len > 0) { - register size_t loop = (len + 8 - 1) >> 3; - - switch (len & (8 - 1)) { - case 0: - do { /* All fall throughs */ - HASHC; - case 7: HASHC; - case 6: HASHC; - case 5: HASHC; - case 4: HASHC; - case 3: HASHC; - case 2: HASHC; - case 1: HASHC; - } while (--loop); - } - } -#endif /* ! VAXC */ - - if (h >= hsize) - h %= hsize; - return h; -} - -/* assoc_find --- locate symbol[subs] */ - -static NODE * /* NULL if not found */ -assoc_find(NODE *symbol, register NODE *subs, int hash1) -{ - register NODE *bucket; - NODE *s1, *s2; - - for (bucket = symbol->var_array[hash1]; bucket != NULL; - bucket = bucket->ahnext) { - /* - * This used to use cmp_nodes() here. That's wrong. - * Array indexes are strings; compare as such, always! - */ - s1 = bucket->ahname; - s1 = force_string(s1); - s2 = subs; - - if (s1->stlen == s2->stlen) { - if (s1->stlen == 0 /* "" is a valid index */ - || STREQN(s1->stptr, s2->stptr, s1->stlen)) - return bucket; - } - } - return NULL; -} - -/* in_array --- test whether the array element symbol[subs] exists or not */ - -int -in_array(NODE *symbol, NODE *subs) -{ - register int hash1; - int ret; - - if (symbol->type == Node_param_list) - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->type == Node_array_ref) - symbol = symbol->orig_array; - if ((symbol->flags & SCALAR) != 0) - fatal(_("attempt to use scalar `%s' as array"), symbol->vname); - /* - * evaluate subscript first, it could have side effects - */ - subs = concat_exp(subs); /* concat_exp returns a string node */ - if (symbol->var_array == NULL) { - free_temp(subs); - return 0; - } - hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); - ret = (assoc_find(symbol, subs, hash1) != NULL); - free_temp(subs); - return ret; -} - -/* - * assoc_lookup: - * Find SYMBOL[SUBS] in the assoc array. Install it with value "" if it - * isn't there. Returns a pointer ala get_lhs to where its value is stored. - * - * SYMBOL is the address of the node (or other pointer) being dereferenced. - * SUBS is a number or string used as the subscript. - */ - -NODE ** -assoc_lookup(NODE *symbol, NODE *subs, int reference) -{ - register int hash1; - register NODE *bucket; - - assert(symbol->type == Node_var_array || symbol->type == Node_var); - - (void) force_string(subs); - - if ((symbol->flags & SCALAR) != 0) - fatal(_("attempt to use scalar `%s' as array"), symbol->vname); - - if (symbol->var_array == NULL) { - if (symbol->type != Node_var_array) { - unref(symbol->var_value); - symbol->type = Node_var_array; - } - symbol->array_size = symbol->table_size = 0; /* sanity */ - symbol->flags &= ~ARRAYMAXED; - grow_table(symbol); - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - } else { - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - bucket = assoc_find(symbol, subs, hash1); - if (bucket != NULL) { - free_temp(subs); - return &(bucket->ahvalue); - } - } - - if (do_lint && reference) { - subs->stptr[subs->stlen] = '\0'; - lintwarn(_("reference to uninitialized element `%s[\"%s\"]'"), - symbol->vname, subs->stptr); - } - - /* It's not there, install it. */ - if (do_lint && subs->stlen == 0) - lintwarn(_("subscript of array `%s' is null string"), - symbol->vname); - - /* first see if we would need to grow the array, before installing */ - symbol->table_size++; - if ((symbol->flags & ARRAYMAXED) == 0 - && (symbol->table_size / symbol->array_size) > AVG_CHAIN_MAX) { - grow_table(symbol); - /* have to recompute hash value for new size */ - hash1 = hash(subs->stptr, subs->stlen, - (unsigned long) symbol->array_size); - } - - getnode(bucket); - bucket->type = Node_ahash; - - /* - * Freeze this string value --- it must never - * change, no matter what happens to the value - * that created it or to CONVFMT, etc. - * - * One day: Use an atom table to track array indices, - * and avoid the extra memory overhead. - */ - if (subs->flags & TEMP) - bucket->ahname = dupnode(subs); - else - bucket->ahname = copynode(subs); - - free_temp(subs); - - /* array subscripts are strings */ - bucket->ahname->flags &= ~(NUMBER|NUM); - bucket->ahname->flags |= (STRING|STR); - /* ensure that this string value never changes */ - bucket->ahname->stfmt = -1; - - bucket->ahvalue = Nnull_string; - bucket->ahnext = symbol->var_array[hash1]; - symbol->var_array[hash1] = bucket; - return &(bucket->ahvalue); -} - -/* do_delete --- perform `delete array[s]' */ - -void -do_delete(NODE *symbol, NODE *tree) -{ - register int hash1; - register NODE *bucket, *last; - NODE *subs; - - if (symbol->type == Node_param_list) { - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->type == Node_var) - return; - } - if (symbol->type == Node_array_ref) - symbol = symbol->orig_array; - if (symbol->type == Node_var_array) { - if (symbol->var_array == NULL) - return; - } else - fatal(_("delete: illegal use of variable `%s' as array"), - symbol->vname); - - if (tree == NULL) { /* delete array */ - assoc_clear(symbol); - return; - } - - subs = concat_exp(tree); /* concat_exp returns string node */ - hash1 = hash(subs->stptr, subs->stlen, (unsigned long) symbol->array_size); - - last = NULL; - for (bucket = symbol->var_array[hash1]; bucket != NULL; - last = bucket, bucket = bucket->ahnext) { - /* - * This used to use cmp_nodes() here. That's wrong. - * Array indexes are strings; compare as such, always! - */ - NODE *s1, *s2; - - s1 = bucket->ahname; - s1 = force_string(s1); - s2 = subs; - - if (s1->stlen == s2->stlen) { - if (s1->stlen == 0 /* "" is a valid index */ - || STREQN(s1->stptr, s2->stptr, s1->stlen)) - break; - } - } - - if (bucket == NULL) { - if (do_lint) - lintwarn(_("delete: index `%s' not in array `%s'"), - subs->stptr, symbol->vname); - free_temp(subs); - return; - } - free_temp(subs); - if (last != NULL) - last->ahnext = bucket->ahnext; - else - symbol->var_array[hash1] = bucket->ahnext; - unref(bucket->ahname); - unref(bucket->ahvalue); - freenode(bucket); - symbol->table_size--; - if (symbol->table_size <= 0) { - memset(symbol->var_array, '\0', - sizeof(NODE *) * symbol->array_size); - symbol->table_size = symbol->array_size = 0; - symbol->flags &= ~ARRAYMAXED; - free((char *) symbol->var_array); - symbol->var_array = NULL; - } -} - -/* do_delete_loop --- simulate ``for (iggy in foo) delete foo[iggy]'' */ - -/* - * The primary hassle here is that `iggy' needs to have some arbitrary - * array index put in it before we can clear the array, we can't - * just replace the loop with `delete foo'. - */ - -void -do_delete_loop(NODE *symbol, NODE *tree) -{ - size_t i; - NODE **lhs; - Func_ptr after_assign = NULL; - - if (symbol->type == Node_param_list) { - symbol = stack_ptr[symbol->param_cnt]; - if (symbol->type == Node_var) - return; - } - if (symbol->type == Node_array_ref) - symbol = symbol->orig_array; - if (symbol->type == Node_var_array) { - if (symbol->var_array == NULL) - return; - } else - fatal(_("delete: illegal use of variable `%s' as array"), - symbol->vname); - - /* get first index value */ - for (i = 0; i < symbol->array_size; i++) { - if (symbol->var_array[i] != NULL) { - lhs = get_lhs(tree->lnode, & after_assign, FALSE); - unref(*lhs); - *lhs = dupnode(symbol->var_array[i]->ahname); - break; - } - } - - /* blast the array in one shot */ - assoc_clear(symbol); -} - -/* grow_table --- grow a hash table */ - -static void -grow_table(NODE *symbol) -{ - NODE **old, **new, *chain, *next; - int i, j; - unsigned long hash1; - unsigned long oldsize, newsize; - /* - * This is an array of primes. We grow the table by an order of - * magnitude each time (not just doubling) so that growing is a - * rare operation. We expect, on average, that it won't happen - * more than twice. The final size is also chosen to be small - * enough so that MS-DOG mallocs can handle it. When things are - * very large (> 8K), we just double more or less, instead of - * just jumping from 8K to 64K. - */ - static long sizes[] = { 13, 127, 1021, 8191, 16381, 32749, 65497, -#if ! defined(MSDOS) && ! defined(OS2) && ! defined(atarist) - 131101, 262147, 524309, 1048583, 2097169, - 4194319, 8388617, 16777259, 33554467, - 67108879, 134217757, 268435459, 536870923, - 1073741827 -#endif - }; - - /* find next biggest hash size */ - newsize = oldsize = symbol->array_size; - for (i = 0, j = sizeof(sizes)/sizeof(sizes[0]); i < j; i++) { - if (oldsize < sizes[i]) { - newsize = sizes[i]; - break; - } - } - - if (newsize == oldsize) { /* table already at max (!) */ - symbol->flags |= ARRAYMAXED; - return; - } - - /* allocate new table */ - emalloc(new, NODE **, newsize * sizeof(NODE *), "grow_table"); - memset(new, '\0', newsize * sizeof(NODE *)); - - /* brand new hash table, set things up and return */ - if (symbol->var_array == NULL) { - symbol->table_size = 0; - goto done; - } - - /* old hash table there, move stuff to new, free old */ - old = symbol->var_array; - for (i = 0; i < oldsize; i++) { - if (old[i] == NULL) - continue; - - for (chain = old[i]; chain != NULL; chain = next) { - next = chain->ahnext; - hash1 = hash(chain->ahname->stptr, - chain->ahname->stlen, newsize); - - /* remove from old list, add to new */ - chain->ahnext = new[hash1]; - new[hash1] = chain; - } - } - free(old); - -done: - /* - * note that symbol->table_size does not change if an old array, - * and is explicitly set to 0 if a new one. - */ - symbol->var_array = new; - symbol->array_size = newsize; -} - -/* pr_node --- print simple node info */ - -static void -pr_node(NODE *n) -{ - if ((n->flags & (NUM|NUMBER)) != 0) - printf("%g", n->numbr); - else - printf("%.*s", (int) n->stlen, n->stptr); -} - -/* assoc_dump --- dump the contents of an array */ - -NODE * -assoc_dump(NODE *symbol) -{ - int i; - NODE *bucket; - - if (symbol->var_array == NULL) { - printf(_("%s: empty (null)\n"), symbol->vname); - return tmp_number((AWKNUM) 0); - } - - if (symbol->table_size == 0) { - printf(_("%s: empty (zero)\n"), symbol->vname); - return tmp_number((AWKNUM) 0); - } - - printf(_("%s: table_size = %d, array_size = %d\n"), symbol->vname, - (int) symbol->table_size, (int) symbol->array_size); - - for (i = 0; i < symbol->array_size; i++) { - for (bucket = symbol->var_array[i]; bucket != NULL; - bucket = bucket->ahnext) { - printf("%s: I: [(%p, %ld, %s) len %d <%.*s>] V: [", - symbol->vname, - bucket->ahname, - bucket->ahname->stref, - flags2str(bucket->ahname->flags), - (int) bucket->ahname->stlen, - (int) bucket->ahname->stlen, - bucket->ahname->stptr); - pr_node(bucket->ahvalue); - printf("]\n"); - } - } - - return tmp_number((AWKNUM) 0); -} - -/* do_adump --- dump an array: interface to assoc_dump */ - -NODE * -do_adump(NODE *tree) -{ - NODE *r, *a; - - a = tree->lnode; - - if (a->type == Node_param_list) { - printf(_("%s: is paramater\n"), a->vname); - a = stack_ptr[a->param_cnt]; - } - - if (a->type == Node_array_ref) { - printf(_("%s: array_ref to %s\n"), a->vname, - a->orig_array->vname); - a = a->orig_array; - } - - r = assoc_dump(a); - - return r; -} - -/* - * The following functions implement the builtin - * asort function. Initial work by Alan J. Broder, - * ajb@woti.com. - */ - -/* dup_table --- duplicate input symbol table "symbol" */ - -static void -dup_table(NODE *symbol, NODE *newsymb) -{ - NODE **old, **new, *chain, *bucket; - int i; - unsigned long cursize; - - /* find the current hash size */ - cursize = symbol->array_size; - - new = NULL; - - /* input is a brand new hash table, so there's nothing to copy */ - if (symbol->var_array == NULL) - newsymb->table_size = 0; - else { - /* old hash table there, dupnode stuff into a new table */ - - /* allocate new table */ - emalloc(new, NODE **, cursize * sizeof(NODE *), "dup_table"); - memset(new, '\0', cursize * sizeof(NODE *)); - - /* do the copying/dupnode'ing */ - old = symbol->var_array; - for (i = 0; i < cursize; i++) { - if (old[i] != NULL) { - for (chain = old[i]; chain != NULL; - chain = chain->ahnext) { - /* get a node for the linked list */ - getnode(bucket); - bucket->type = Node_ahash; - - /* - * copy the corresponding name and - * value from the original input list - */ - bucket->ahname = dupnode(chain->ahname); - bucket->ahvalue = dupnode(chain->ahvalue); - - /* - * put the node on the corresponding - * linked list in the new table - */ - bucket->ahnext = new[i]; - new[i] = bucket; - } - } - } - newsymb->table_size = symbol->table_size; - } - - newsymb->var_array = new; - newsymb->array_size = cursize; -} - -/* merge --- do a merge of two sorted lists */ - -static NODE * -merge(NODE *left, NODE *right) -{ - NODE *ans, *cur; - - if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) { - ans = cur = left; - left = left->ahnext; - } else { - ans = cur = right; - right = right->ahnext; - } - - while (left != NULL && right != NULL) { - if (cmp_nodes(left->ahvalue, right->ahvalue) <= 0) { - cur->ahnext = left; - cur = left; - left = left->ahnext; - } else { - cur->ahnext = right; - cur = right; - right = right->ahnext; - } - } - - cur->ahnext = (left != NULL ? left : right); - - return ans; -} - -/* merge_sort --- recursively sort the left and right sides of a list */ - -static NODE * -merge_sort(NODE *left, int size) -{ - NODE *right, *tmp; - int i, half; - - if (size <= 1) - return left; - - /* walk down the list, till just one before the midpoint */ - tmp = left; - half = size / 2; - for (i = 0; i < half-1; i++) - tmp = tmp->ahnext; - - /* split the list into two parts */ - right = tmp->ahnext; - tmp->ahnext = NULL; - - /* sort the left and right parts of the list */ - left = merge_sort(left, half); - right = merge_sort(right, size-half); - - /* merge the two sorted parts of the list */ - return merge(left, right); -} - - -/* - * assoc_from_list -- Populate an array with the contents of a list of NODEs, - * using increasing integers as the key. - */ - -static void -assoc_from_list(NODE *symbol, NODE *list) -{ - NODE *next; - int i = 0; - register int hash1; - - for (; list != NULL; list = next) { - next = list->ahnext; - - /* make an int out of i++ */ - i++; - list->ahname = make_number((AWKNUM) i); - (void) force_string(list->ahname); - - /* find the bucket where it belongs */ - hash1 = hash(list->ahname->stptr, list->ahname->stlen, - symbol->array_size); - - /* link the node into the chain at that bucket */ - list->ahnext = symbol->var_array[hash1]; - symbol->var_array[hash1] = list; - } -} - -/* - * assoc_sort_inplace --- sort all the values in symbol[], replacing - * the sorted values back into symbol[], indexed by integers starting with 1. - */ - -static NODE * -assoc_sort_inplace(NODE *symbol) -{ - int i, num; - NODE *bucket, *next, *list; - - if (symbol->var_array == NULL - || symbol->array_size <= 0 - || symbol->table_size <= 0) - return tmp_number((AWKNUM) 0); - - /* build a linked list out of all the entries in the table */ - list = NULL; - num = 0; - for (i = 0; i < symbol->array_size; i++) { - for (bucket = symbol->var_array[i]; bucket != NULL; bucket = next) { - next = bucket->ahnext; - unref(bucket->ahname); - bucket->ahnext = list; - list = bucket; - num++; - } - symbol->var_array[i] = NULL; - } - - /* - * Sort the linked list of NODEs. - * (The especially nice thing about using a merge sort here is that - * we require absolutely no additional storage. This is handy if the - * array has grown to be very large.) - */ - list = merge_sort(list, num); - - /* - * now repopulate the original array, using increasing - * integers as the key - */ - assoc_from_list(symbol, list); - - return tmp_number((AWKNUM) num); -} - -/* do_asort --- do the actual work to sort the input array */ - -NODE * -do_asort(NODE *tree) -{ - NODE *src, *dest; - - src = tree->lnode; - dest = NULL; - - if (src->type == Node_param_list) - src = stack_ptr[src->param_cnt]; - if (src->type == Node_array_ref) - src = src->orig_array; - if (src->type != Node_var_array) - fatal(_("asort: first argument is not an array")); - - if (tree->rnode != NULL) { /* 2nd optional arg */ - dest = tree->rnode->lnode; - if (dest->type == Node_param_list) - dest = stack_ptr[dest->param_cnt]; - if (dest->type == Node_array_ref) - dest = dest->orig_array; - if (dest->type != Node_var && dest->type != Node_var_array) - fatal(_("asort: second argument is not an array")); - dest->type = Node_var_array; - assoc_clear(dest); - dup_table(src, dest); - } - - return dest != NULL ? assoc_sort_inplace(dest) : assoc_sort_inplace(src); -} diff --git a/contrib/awk/awk.h b/contrib/awk/awk.h deleted file mode 100644 index e921e81..0000000 --- a/contrib/awk/awk.h +++ /dev/null @@ -1,1017 +0,0 @@ -/* - * awk.h -- Definitions for gawk. - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $FreeBSD$ - */ - -/* ------------------------------ Includes ------------------------------ */ - -/* - * config.h absolutely, positively, *M*U*S*T* be included before - * any system headers. Otherwise, extreme death, destruction - * and loss of life results. - * - * Well, OK, gawk just won't work on systems using egcs and LFS. But - * that's almost as bad. - */ -#ifdef HAVE_CONFIG_H -#include -#endif - -#ifndef _GNU_SOURCE -#define _GNU_SOURCE 1 /* enable GNU extensions */ -#endif /* _GNU_SOURCE */ - -#include -#include -#ifdef HAVE_LIMITS_H -#include -#endif /* HAVE_LIMITS_H */ -#include -#include - -#if defined(HAVE_LIBINTL_H) && defined(ENABLE_NLS) && ENABLE_NLS > 0 -#include -#else /* ! (HAVE_LOCALE_H && defined(ENABLE_NLS) && ENABLE_LS > 0) */ -#define gettext(msgid) (msgid) -#define gettext_noop(msgid) msgid -#define dgettext(domain, msgid) (msgid) -#define dcgettext(domain, msgid, cat) (msgid) -#define bindtextdomain(domain, directory) (directory) -#define textdomain(package) /* nothing */ -#ifndef LOCALEDIR -#define LOCALEDIR NULL -#endif /* LOCALEDIR */ -#endif /* ! (HAVE_LOCALE_H && defined(ENABLE_NLS) && ENABLE_LS > 0) */ -#define _(msgid) gettext(msgid) -#define N_(msgid) msgid - -#ifdef HAVE_LOCALE_H -#include -#endif /* HAVE_LOCALE_H */ -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ -#include -#else -#include -#endif -#include -#include -#include -#if ! defined(errno) && ! defined(MSDOS) && ! defined(OS2) -extern int errno; -#endif -#ifdef HAVE_SIGNUM_H -#include -#endif - -/* ----------------- System dependencies (with more includes) -----------*/ - -/* This section is the messiest one in the file, not a lot that can be done */ - -/* First, get the ctype stuff right; from Jim Meyering */ -#if defined(STDC_HEADERS) || (!defined(isascii) && !defined(HAVE_ISASCII)) -#define IN_CTYPE_DOMAIN(c) 1 -#else -#define IN_CTYPE_DOMAIN(c) isascii((unsigned char) c) -#endif - -#ifdef isblank -#define ISBLANK(c) (IN_CTYPE_DOMAIN(c) && isblank((unsigned char) c)) -#else -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -#define ISGRAPH(c) (IN_CTYPE_DOMAIN(c) && isgraph((unsigned char) c)) -#else -#define ISGRAPH(c) (IN_CTYPE_DOMAIN(c) && isprint((unsigned char) c) && !isspace((unsigned char) c)) -#endif - -#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint ((unsigned char) c)) -#define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit ((unsigned char) c)) -#define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum ((unsigned char) c)) -#define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha ((unsigned char) c)) -#define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl ((unsigned char) c)) -#define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower ((unsigned char) c)) -#define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (unsigned char) (c)) -#define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace ((unsigned char) c)) -#define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper ((unsigned char) c)) -#define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit ((unsigned char) c)) - -#define TOUPPER(c) toupper((unsigned char) c) -#define TOLOWER(c) tolower((unsigned char) c) - - -#ifdef __STDC__ -#define P(s) s -#define MALLOC_ARG_T size_t -#else /* not __STDC__ */ -#define P(s) () -#define MALLOC_ARG_T unsigned -#define volatile -#define const -#endif /* not __STDC__ */ - -#ifndef VMS -#include -#include -#else /* VMS */ -#include -#include -#include /* avoid in io.c */ -#endif /* VMS */ - -#ifdef STDC_HEADERS -#include -#else /* not STDC_HEADERS */ -#include "protos.h" -#endif /* not STDC_HEADERS */ - -#ifdef HAVE_STRING_H -#include -#ifdef NEED_MEMORY_H -#include -#endif /* NEED_MEMORY_H */ -#else /* not HAVE_STRING_H */ -#ifdef HAVE_STRINGS_H -#include -#endif /* HAVE_STRINGS_H */ -#endif /* not HAVE_STRING_H */ - -#ifdef NeXT -#if __GNUC__ < 2 || __GNUC_MINOR__ < 7 -#include -#endif -#undef atof -#define getopt GNU_getopt -#define GFMT_WORKAROUND -#endif /* NeXT */ - -#if defined(atarist) || defined(VMS) -#include -#endif /* atarist || VMS */ - -#if ! defined(MSDOS) && ! defined(OS2) && ! defined(WIN32) -#define O_BINARY 0 -#endif - -#if defined(TANDEM) -#define variable variabl -#define open(name, how, mode) open(name, how) /* !!! ANSI C !!! */ -#endif - -#if HAVE_UNISTD_H -#include -#endif /* HAVE_UNISTD_H */ - -#ifndef HAVE_VPRINTF -/* if you don't have vprintf, try this and cross your fingers. */ -#ifdef HAVE_DOPRNT -#define vfprintf(fp,fmt,arg) _doprnt((fmt), (arg), (fp)) -#else /* not HAVE_DOPRNT */ -you -lose -#endif /* not HAVE_DOPRNT */ -#endif /* HAVE_VPRINTF */ - -#ifndef HAVE_SETLOCALE -#define setlocale(locale, val) /* nothing */ -#endif /* HAVE_SETLOCALE */ - -/* use this as lintwarn("...") - this is a hack but it gives us the right semantics */ -#define lintwarn (*(set_loc(__FILE__, __LINE__),lintfunc)) -extern void set_prof_file P((const char *filename)); - -#ifdef VMS -#include "vms/redirect.h" -#endif /*VMS*/ - -#ifdef atarist -#include "unsupported/atari/redirect.h" -#endif - -#define GNU_REGEX -#ifdef GNU_REGEX -#ifdef __FreeBSD__ -#include -#else -#include "regex.h" -#endif -#include "dfa.h" -typedef struct Regexp { - struct re_pattern_buffer pat; - struct re_registers regs; - struct dfa dfareg; - int dfa; -} Regexp; -#define RESTART(rp,s) (rp)->regs.start[0] -#define REEND(rp,s) (rp)->regs.end[0] -#define SUBPATSTART(rp,s,n) (rp)->regs.start[n] -#define SUBPATEND(rp,s,n) (rp)->regs.end[n] -#endif /* GNU_REGEX */ - -/* Stuff for losing systems. */ -#ifdef STRTOD_NOT_C89 -extern double gawk_strtod(); -#define strtod gawk_strtod -#endif - -/* ------------------ Constants, Structures, Typedefs ------------------ */ - -#ifndef AWKNUM -#define AWKNUM double -#endif - -#ifndef TRUE -/* a bit hackneyed, but what the heck */ -#define TRUE 1 -#define FALSE 0 -#endif - -/* Figure out what '\a' really is. */ -#ifdef __STDC__ -#define BELL '\a' /* sure makes life easy, don't it? */ -#else -# if 'z' - 'a' == 25 /* ascii */ -# if 'a' != 97 /* machine is dumb enough to use mark parity */ -# define BELL '\207' -# else -# define BELL '\07' -# endif -# else -# define BELL '\057' -# endif -#endif - -typedef enum nodevals { - /* illegal entry == 0 */ - Node_illegal, - - /* binary operators lnode and rnode are the expressions to work on */ - Node_times, - Node_quotient, - Node_mod, - Node_plus, - Node_minus, - Node_cond_pair, /* conditional pair (see Node_line_range) */ - Node_subscript, - Node_concat, - Node_exp, - - /* unary operators subnode is the expression to work on */ - Node_preincrement, - Node_predecrement, - Node_postincrement, - Node_postdecrement, - Node_unary_minus, - Node_field_spec, - - /* assignments lnode is the var to assign to, rnode is the exp */ - Node_assign, - Node_assign_times, - Node_assign_quotient, - Node_assign_mod, - Node_assign_plus, - Node_assign_minus, - Node_assign_exp, - - /* boolean binaries lnode and rnode are expressions */ - Node_and, - Node_or, - - /* binary relationals compares lnode and rnode */ - Node_equal, - Node_notequal, - Node_less, - Node_greater, - Node_leq, - Node_geq, - Node_match, - Node_nomatch, - - /* unary relationals works on subnode */ - Node_not, - - /* program structures */ - Node_rule_list, /* lnode is a rule, rnode is rest of list */ - Node_rule_node, /* lnode is pattern, rnode is statement */ - Node_statement_list, /* lnode is statement, rnode is more list */ - Node_if_branches, /* lnode is to run on true, rnode on false */ - Node_expression_list, /* lnode is an exp, rnode is more list */ - Node_param_list, /* lnode is a variable, rnode is more list */ - - /* keywords */ - Node_K_if, /* lnode is conditonal, rnode is if_branches */ - Node_K_while, /* lnode is condtional, rnode is stuff to run */ - Node_K_for, /* lnode is for_struct, rnode is stuff to run */ - Node_K_arrayfor, /* lnode is for_struct, rnode is stuff to run */ - Node_K_break, /* no subs */ - Node_K_continue, /* no subs */ - Node_K_print, /* lnode is exp_list, rnode is redirect */ - Node_K_printf, /* lnode is exp_list, rnode is redirect */ - Node_K_next, /* no subs */ - Node_K_exit, /* subnode is return value, or NULL */ - Node_K_do, /* lnode is conditional, rnode stuff to run */ - Node_K_return, /* lnode is return value */ - Node_K_delete, /* lnode is array, rnode is subscript */ - Node_K_delete_loop, /* lnode is array, rnode is subscript */ - Node_K_getline, /* lnode is opt var, rnode is redirection */ - Node_K_function, /* lnode is statement list, rnode is params */ - Node_K_nextfile, /* no subs */ - - /* I/O redirection for print statements */ - Node_redirect_output, /* subnode is where to redirect */ - Node_redirect_append, /* subnode is where to redirect */ - Node_redirect_pipe, /* subnode is where to redirect */ - Node_redirect_pipein, /* subnode is where to redirect */ - Node_redirect_input, /* subnode is where to redirect */ - Node_redirect_twoway, /* subnode is where to redirect */ - - /* Variables */ - Node_var, /* rnode is value, lnode is array stuff */ - Node_var_array, /* array is ptr to elements, asize num of eles */ - Node_val, /* node is a value - type in flags */ - - /* Builtins subnode is explist to work on, proc is func to call */ - Node_builtin, - - /* - * pattern: conditional ',' conditional ; lnode of Node_line_range - * is the two conditionals (Node_cond_pair), other word (rnode place) - * is a flag indicating whether or not this range has been entered. - */ - Node_line_range, - - /* - * boolean test of membership in array - * lnode is string-valued, expression rnode is array name - */ - Node_in_array, - - Node_func, /* lnode is param. list, rnode is body */ - Node_func_call, /* lnode is name, rnode is argument list */ - - Node_cond_exp, /* lnode is conditonal, rnode is if_branches */ - Node_regex, /* a regexp, text, compiled, flags, etc */ - Node_hashnode, /* an identifier in the symbol table */ - Node_ahash, /* an array element */ - Node_array_ref, /* array passed by ref as parameter */ - Node_BINMODE, /* variables recognized in the grammar */ - Node_CONVFMT, - Node_FIELDWIDTHS, - Node_FNR, - Node_FS, - Node_IGNORECASE, - Node_LINT, - Node_NF, - Node_NR, - Node_OFMT, - Node_OFS, - Node_ORS, - Node_RS, - Node_TEXTDOMAIN, - Node_final /* sentry value, not legal */ -} NODETYPE; - -/* - * NOTE - this struct is a rather kludgey -- it is packed to minimize - * space usage, at the expense of cleanliness. Alter at own risk. - */ -typedef struct exp_node { - union { - struct { - union { - struct exp_node *lptr; - char *param_name; - long ll; - } l; - union { - struct exp_node *rptr; - struct exp_node *(*pptr)(); - Regexp *preg; - struct for_loop_header *hd; - struct exp_node **av; - int r_ent; /* range entered */ - } r; - union { - struct exp_node *extra; - long xl; - char **param_list; - } x; - char *name; - short number; - unsigned char reflags; -# define CASE 1 -# define CONST 2 -# define FS_DFLT 4 - } nodep; - struct { - AWKNUM fltnum; /* this is here for optimal packing of - * the structure on many machines - */ - char *sp; - size_t slen; - long sref; - int idx; - } val; - struct { - struct exp_node *next; - char *name; - size_t length; - struct exp_node *value; - } hash; -#define hnext sub.hash.next -#define hname sub.hash.name -#define hlength sub.hash.length -#define hvalue sub.hash.value - struct { - struct exp_node *next; - struct exp_node *name; - struct exp_node *value; - } ahash; -#define ahnext sub.ahash.next -#define ahname sub.ahash.name -#define ahvalue sub.ahash.value - } sub; - NODETYPE type; - unsigned short flags; -# define MALLOC 1 /* can be free'd */ -# define TEMP 2 /* should be free'd */ -# define PERM 4 /* can't be free'd */ -# define STRING 8 /* assigned as string */ -# define STR 16 /* string value is current */ -# define NUM 32 /* numeric value is current */ -# define NUMBER 64 /* assigned as number */ -# define MAYBE_NUM 128 /* user input: if NUMERIC then - * a NUMBER */ -# define ARRAYMAXED 256 /* array is at max size */ -# define SCALAR 512 /* used as scalar, can't be array */ -# define FUNC 1024 /* this parameter is really a - * function name; see awk.y */ -# define FIELD 2048 /* this is a field */ -# define INTLSTR 4096 /* use localized version */ -# define UNINITIALIZED 8192 /* value used before set */ - char *vname; -#ifndef NO_PROFILING - long exec_count; -#endif -} NODE; - -#define lnode sub.nodep.l.lptr -#define nextp sub.nodep.l.lptr -#define rnode sub.nodep.r.rptr -#define source_file sub.nodep.name -#define source_line sub.nodep.number -#define param_cnt sub.nodep.number -#define param sub.nodep.l.param_name -#define parmlist sub.nodep.x.param_list - -#define subnode lnode -#define proc sub.nodep.r.pptr -#define callresult sub.nodep.x.extra - -#define re_reg sub.nodep.r.preg -#define re_flags sub.nodep.reflags -#define re_text lnode -#define re_exp sub.nodep.x.extra -#define re_cnt sub.nodep.number - -#define forsub lnode -#define forloop rnode->sub.nodep.r.hd - -#define stptr sub.val.sp -#define stlen sub.val.slen -#define stref sub.val.sref -#define stfmt sub.val.idx - -#define numbr sub.val.fltnum - -#define var_value lnode -#define var_array sub.nodep.r.av -#define array_size sub.nodep.l.ll -#define table_size sub.nodep.x.xl - -#define orig_array sub.nodep.x.extra - -#define printf_count sub.nodep.x.xl - -#define condpair lnode -#define triggered sub.nodep.r.r_ent - -/* a regular for loop */ -typedef struct for_loop_header { - NODE *init; - NODE *cond; - NODE *incr; -} FOR_LOOP_HEADER; - -/* for faster input, bypass stdio */ -typedef struct iobuf { - const char *name; - int fd; - char *buf; - char *off; - char *end; - size_t size; /* this will be determined by an fstat() call */ - int cnt; - long secsiz; - int flag; -# define IOP_IS_TTY 1 -# define IOP_IS_INTERNAL 2 -# define IOP_NO_FREE 4 -# define IOP_NOFREE_OBJ 8 -} IOBUF; - -typedef void (*Func_ptr)(); - -/* structure used to dynamically maintain a linked-list of open files/pipes */ -struct redirect { - unsigned int flag; -# define RED_FILE 1 -# define RED_PIPE 2 -# define RED_READ 4 -# define RED_WRITE 8 -# define RED_APPEND 16 -# define RED_NOBUF 32 -# define RED_USED 64 /* closed temporarily to reuse fd */ -# define RED_EOF 128 -# define RED_TWOWAY 256 -# define RED_SOCKET 512 -# define RED_TCP 1024 - char *value; - FILE *fp; - FILE *ifp; /* input fp, needed for PIPES_SIMULATED */ - IOBUF *iop; - int pid; - int status; - struct redirect *prev; - struct redirect *next; - char *mode; -}; - -/* structure for our source, either a command line string or a source file */ -struct src { - enum srctype { CMDLINE = 1, SOURCEFILE } stype; - char *val; -}; - -/* for debugging purposes */ -struct flagtab { - int val; - char *name; -}; - -/* longjmp return codes, must be nonzero */ -/* Continue means either for loop/while continue, or next input record */ -#define TAG_CONTINUE 1 -/* Break means either for/while break, or stop reading input */ -#define TAG_BREAK 2 -/* Return means return from a function call; leave value in ret_node */ -#define TAG_RETURN 3 - -#ifndef LONG_MAX -#define LONG_MAX ((long)(~(1L << (sizeof (long) * 8 - 1)))) -#endif -#ifndef ULONG_MAX -#define ULONG_MAX (~(unsigned long)0) -#endif -#ifndef LONG_MIN -#define LONG_MIN ((long)(-LONG_MAX - 1L)) -#endif -#define HUGE LONG_MAX - -/* -------------------------- External variables -------------------------- */ -/* gawk builtin variables */ -extern long NF; -extern long NR; -extern long FNR; -extern int BINMODE; -extern int IGNORECASE; -extern int RS_is_null; -extern char *OFS; -extern int OFSlen; -extern char *ORS; -extern int ORSlen; -extern char *OFMT; -extern char *CONVFMT; -extern int CONVFMTidx; -extern int OFMTidx; -extern char *TEXTDOMAIN; -extern NODE *BINMODE_node, *CONVFMT_node, *FIELDWIDTHS_node, *FILENAME_node; -extern NODE *FNR_node, *FS_node, *IGNORECASE_node, *NF_node; -extern NODE *NR_node, *OFMT_node, *OFS_node, *ORS_node, *RLENGTH_node; -extern NODE *RSTART_node, *RS_node, *RT_node, *SUBSEP_node, *PROCINFO_node; -extern NODE *LINT_node, *ERRNO_node, *TEXTDOMAIN_node; -extern NODE **stack_ptr; -extern NODE *Nnull_string; -extern NODE **fields_arr; -extern int sourceline; -extern char *source; -extern NODE *expression_value; - -#if __GNUC__ < 2 -extern NODE *_t; /* used as temporary in tree_eval */ -#endif - -extern NODE *nextfree; -extern int field0_valid; -extern int do_traditional; -extern int do_posix; -extern int do_lint; -extern int do_lint_old; -extern int do_intervals; -extern int do_intl; -extern int do_non_decimal_data; -extern int do_dump_vars; -extern int do_tidy_mem; -extern int in_begin_rule; -extern int in_end_rule; - -extern const char *myname; - -extern char quote; -extern char *defpath; -extern char envsep; - -extern char casetable[]; /* for case-independent regexp matching */ - -/* ------------------------- Pseudo-functions ------------------------- */ - -#define is_identchar(c) (isalnum(c) || (c) == '_') -#define isnondecimal(str) (((str)[0]) == '0' && (ISDIGIT((str)[1]) \ - || (str)[1] == 'x' || (str)[1] == 'X')) - -#ifdef MPROF -#define getnode(n) emalloc((n), NODE *, sizeof(NODE), "getnode"), (n)->flags = UNINITIALIZED, (n)-exec_count = 0; -#define freenode(n) free(n) -#else /* not MPROF */ -#define getnode(n) if (nextfree) n = nextfree, nextfree = nextfree->nextp;\ - else n = more_nodes() -#ifndef NO_PROFILING -#define freenode(n) ((n)->flags = UNINITIALIZED,\ - (n)->exec_count = 0, (n)->nextp = nextfree, nextfree = (n)) -#else /* not PROFILING */ -#define freenode(n) ((n)->flags = UNINITIALIZED,\ - (n)->nextp = nextfree, nextfree = (n)) -#endif /* not PROFILING */ -#endif /* not MPROF */ - -#ifdef MEMDEBUG -#undef freenode -#define get_lhs(p, a, r) r_get_lhs((p), (a), (r)) -#define m_tree_eval(t, iscond) r_tree_eval(t, iscond) -#else -#define get_lhs(p, a, r) ((p)->type == Node_var && \ - ((p)->flags & UNINITIALIZED) == 0 && (r) ? \ - (&(p)->var_value): \ - r_get_lhs((p), (a), (r))) -#if __GNUC__ >= 2 -#define m_tree_eval(t, iscond) \ - ({NODE * _t = (t); \ - if (_t == NULL) \ - _t = Nnull_string; \ - else { \ - switch(_t->type) { \ - case Node_val: \ - if (_t->flags&INTLSTR) \ - _t = r_force_string(_t); \ - break; \ - case Node_var: \ - if ((_t->flags & UNINITIALIZED) == 0) { \ - _t = _t->var_value; \ - break; \ - } \ - /*FALLTHROUGH*/ \ - default: \ - _t = r_tree_eval(_t, iscond);\ - break; \ - } \ - } \ - _t;}) -#else -#define m_tree_eval(t, iscond) (_t = (t), _t == NULL ? Nnull_string : \ - (_t->type == Node_param_list ? \ - r_tree_eval(_t, iscond) : \ - ((_t->type == Node_val && (_t->flags&INTLSTR)) ? \ - r_force_string(_t) : \ - (_t->type == Node_val ? _t : \ - (_t->type == Node_var && \ - (_t->flags & UNINITIALIZED) == 0 ? _t->var_value : \ - r_tree_eval(_t, iscond)))))) -#endif /* __GNUC__ */ -#endif /* not MEMDEBUG */ -#define tree_eval(t) m_tree_eval(t, FALSE) - -#define make_number(x) mk_number((x), (unsigned int)(MALLOC|NUM|NUMBER)) -#define tmp_number(x) mk_number((x), (unsigned int)(MALLOC|TEMP|NUM|NUMBER)) - -#define free_temp(n) do { if ((n)->flags&TEMP) { unref(n); }} while (FALSE) -#define make_string(s, l) make_str_node((s), (size_t) (l), FALSE) -#define SCAN 1 -#define ALREADY_MALLOCED 2 - -#define cant_happen() r_fatal("internal error line %d, file: %s", \ - __LINE__, __FILE__) - -#ifdef HAVE_STRINGIZE -#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\ - (fatal("%s: %s: can't allocate memory (%s)",\ - (str), #var, strerror(errno)),0)) -#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\ - (MALLOC_ARG_T)(x))) ||\ - (fatal("%s: %s: can't allocate memory (%s)",\ - (str), #var, strerror(errno)),0)) -#else /* HAVE_STRINGIZE */ -#define emalloc(var,ty,x,str) (void)((var=(ty)malloc((MALLOC_ARG_T)(x))) ||\ - (fatal("%s: %s: can't allocate memory (%s)",\ - (str), "var", strerror(errno)),0)) -#define erealloc(var,ty,x,str) (void)((var=(ty)realloc((char *)var,\ - (MALLOC_ARG_T)(x))) ||\ - (fatal("%s: %s: can't allocate memory (%s)",\ - (str), "var", strerror(errno)),0)) -#endif /* HAVE_STRINGIZE */ - -#ifdef GAWKDEBUG -#define force_number r_force_number -#define force_string r_force_string -#else /* not GAWKDEBUG */ -#ifdef lint -extern AWKNUM force_number(); -#endif -#if __GNUC__ >= 2 -#define force_number(n) ({NODE *_tn = (n);\ - (_tn->flags & NUM) ?_tn->numbr : r_force_number(_tn);}) -#define force_string(s) ({NODE *_ts = (s);\ - ((_ts->flags & INTLSTR) ? \ - r_force_string(_ts) : \ - ((_ts->flags & STR) && \ - (_ts->stfmt == -1 || _ts->stfmt == CONVFMTidx)) ?\ - _ts : r_force_string(_ts));}) -#else -#ifdef MSDOS -extern double _msc51bug; -#define force_number(n) (_msc51bug=(_t = (n),\ - (_t->flags & NUM) ? _t->numbr : r_force_number(_t))) -#else /* not MSDOS */ -#define force_number(n) (_t = (n),\ - (_t->flags & NUM) ? _t->numbr : r_force_number(_t)) -#endif /* not MSDOS */ -#define force_string(s) (_t = (s),(_t->flags & INTLSTR) ? \ - r_force_string(_t) :\ - ((_t->flags & STR) && \ - (_t->stfmt == -1 || \ - _t->stfmt == CONVFMTidx))? \ - _t : r_force_string(_t)) - -#endif /* not __GNUC__ */ -#endif /* not GAWKDEBUG */ - -#define STREQ(a,b) (*(a) == *(b) && strcmp((a), (b)) == 0) -#define STREQN(a,b,n) ((n) && *(a)== *(b) && \ - strncmp((a), (b), (size_t) (n)) == 0) - -#define fatal set_loc(__FILE__, __LINE__), r_fatal - -/* ------------- Function prototypes or defs (as appropriate) ------------- */ - -/* array.c */ -extern NODE *concat_exp P((NODE *tree)); -extern void assoc_clear P((NODE *symbol)); -extern unsigned int hash P((const char *s, size_t len, unsigned long hsize)); -extern int in_array P((NODE *symbol, NODE *subs)); -extern NODE **assoc_lookup P((NODE *symbol, NODE *subs, int reference)); -extern void do_delete P((NODE *symbol, NODE *tree)); -extern void do_delete_loop P((NODE *symbol, NODE *tree)); -extern NODE *assoc_dump P((NODE *symbol)); -extern NODE *do_adump P((NODE *tree)); -extern NODE *do_asort P((NODE *tree)); -/* awkgram.c */ -extern char *tokexpand P((void)); -extern NODE *node P((NODE *left, NODETYPE op, NODE *right)); -extern NODE *install P((char *name, NODE *value)); -extern NODE *lookup P((const char *name)); -extern NODE *variable P((char *name, int can_free, NODETYPE type)); -extern int yyparse P((void)); -extern void dump_funcs P((void)); -extern void dump_vars P((const char *fname)); -extern void release_all_vars P((void)); -extern const char *getfname P((NODE *(*)())); -extern NODE *stopme P((NODE *tree)); -extern void shadow_funcs(); -/* builtin.c */ -extern double double_to_int P((double d)); -extern NODE *do_exp P((NODE *tree)); -extern NODE *do_fflush P((NODE *tree)); -extern NODE *do_index P((NODE *tree)); -extern NODE *do_int P((NODE *tree)); -extern NODE *do_length P((NODE *tree)); -extern NODE *do_log P((NODE *tree)); -extern NODE *do_mktime P((NODE *tree)); -extern NODE *do_sprintf P((NODE *tree)); -extern void do_printf P((NODE *tree)); -extern void print_simple P((NODE *tree, FILE *fp)); -extern NODE *do_sqrt P((NODE *tree)); -extern NODE *do_substr P((NODE *tree)); -extern NODE *do_strftime P((NODE *tree)); -extern NODE *do_systime P((NODE *tree)); -extern NODE *do_system P((NODE *tree)); -extern void do_print P((NODE *tree)); -extern NODE *do_tolower P((NODE *tree)); -extern NODE *do_toupper P((NODE *tree)); -extern NODE *do_atan2 P((NODE *tree)); -extern NODE *do_sin P((NODE *tree)); -extern NODE *do_cos P((NODE *tree)); -extern NODE *do_rand P((NODE *tree)); -extern NODE *do_srand P((NODE *tree)); -extern NODE *do_match P((NODE *tree)); -extern NODE *do_gsub P((NODE *tree)); -extern NODE *do_sub P((NODE *tree)); -extern NODE *do_gensub P((NODE *tree)); -extern NODE *format_tree P((const char *, int, NODE *, int)); -extern NODE *do_lshift P((NODE *tree)); -extern NODE *do_rshift P((NODE *tree)); -extern NODE *do_and P((NODE *tree)); -extern NODE *do_or P((NODE *tree)); -extern NODE *do_xor P((NODE *tree)); -extern NODE *do_compl P((NODE *tree)); -extern NODE *do_strtonum P((NODE *tree)); -extern AWKNUM nondec2awknum P((char *str, size_t len)); -extern NODE *do_dcgettext P((NODE *tree)); -extern NODE *do_bindtextdomain P((NODE *tree)); -/* eval.c */ -extern int interpret P((NODE *volatile tree)); -extern NODE *r_tree_eval P((NODE *tree, int iscond)); -extern int cmp_nodes P((NODE *t1, NODE *t2)); -extern NODE **r_get_lhs P((NODE *ptr, Func_ptr *assign, int reference)); -extern void set_IGNORECASE P((void)); -extern void set_OFS P((void)); -extern void set_ORS P((void)); -extern void set_OFMT P((void)); -extern void set_CONVFMT P((void)); -extern void set_BINMODE P((void)); -extern void set_LINT P((void)); -extern void set_TEXTDOMAIN P((void)); -extern void update_ERRNO P((void)); -extern char *flags2str P((int)); -extern char *genflags2str P((int flagval, struct flagtab *tab)); -extern char *nodetype2str P((NODETYPE type)); -extern NODE *assign_val P((NODE **lhs_p, NODE *rhs)); -#ifdef PROFILING -extern void dump_fcall_stack P((FILE *fp)); -#endif -/* ext.c */ -NODE *do_ext P((NODE *)); -#ifdef DYNAMIC -void make_builtin P((char *, NODE *(*)(NODE *), int)); -NODE *get_argument P((NODE *, int)); -void set_value P((NODE *)); -#endif -/* field.c */ -extern void init_fields P((void)); -extern void set_record P((char *buf, int cnt, int freeold)); -extern void reset_record P((void)); -extern void set_NF P((void)); -extern NODE **get_field P((long num, Func_ptr *assign)); -extern NODE *do_split P((NODE *tree)); -extern void set_FS P((void)); -extern void set_FS_if_not_FIELDWIDTHS P((void)); -extern void set_RS P((void)); -extern void set_FIELDWIDTHS P((void)); -extern int using_fieldwidths P((void)); -/* gawkmisc.c */ -extern char *gawk_name P((const char *filespec)); -extern void os_arg_fixup P((int *argcp, char ***argvp)); -extern int os_devopen P((const char *name, int flag)); -extern void os_close_on_exec P((int fd, const char *name, const char *what, - const char *dir)); -extern int os_isdir P((int fd)); -extern int os_is_setuid P((void)); -extern int os_setbinmode P((int fd, int mode)); -extern void os_restore_mode P((int fd)); -extern int optimal_bufsize P((int fd, struct stat *sbuf)); -extern int ispath P((const char *file)); -extern int isdirpunct P((int c)); -/* io.c */ -extern void set_FNR P((void)); -extern void set_NR P((void)); -extern void do_input P((void)); -extern struct redirect *redirect P((NODE *tree, int *errflg)); -extern NODE *do_close P((NODE *tree)); -extern int flush_io P((void)); -extern int close_io P((void)); -extern int devopen P((const char *name, const char *mode)); -extern int pathopen P((const char *file)); -extern NODE *do_getline P((NODE *tree)); -extern void do_nextfile P((void)); -extern struct redirect *getredirect P((char *str, int len)); -/* main.c */ -extern int main P((int argc, char **argv)); -extern void load_environ P((void)); -extern void load_procinfo P((void)); -extern char *arg_assign P((char *arg)); -extern RETSIGTYPE catchsig P((int sig, int code)); -/* msg.c */ -extern void err P((const char *s, const char *emsg, va_list argp)); -#if _MSC_VER == 510 -extern void msg P((va_list va_alist, ...)); -extern void error P((va_list va_alist, ...)); -extern void warning P((va_list va_alist, ...)); -extern void set_loc P((char *file, int line)); -extern void r_fatal P((va_list va_alist, ...)); -extern void (*lintfunc) P((va_list va_alist, ...)); -#else -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ -extern void msg (char *mesg, ...); -extern void error (char *mesg, ...); -extern void warning (char *mesg, ...); -extern void set_loc (char *file, int line); -extern void r_fatal (char *mesg, ...); -extern void (*lintfunc) (char *mesg, ...); -#else -extern void msg (); -extern void error (); -extern void warning (); -extern void set_loc (); -extern void r_fatal (); -extern void (*lintfunc) (); -#endif -#endif -/* profile.c */ -extern void init_profiling P((int *flag, const char *def_file)); -extern void init_profiling_signals P((void)); -extern void set_prof_file P((const char *filename)); -extern void dump_prog P((NODE *begin, NODE *prog, NODE *end)); -extern void pp_func P((char *name, size_t namelen, NODE *f)); -extern void pp_string_fp P((FILE *fp, char *str, size_t namelen, - int delim, int breaklines)); -/* node.c */ -extern AWKNUM r_force_number P((NODE *n)); -extern NODE *format_val P((char *format, int index, NODE *s)); -extern NODE *r_force_string P((NODE *s)); -extern NODE *dupnode P((NODE *n)); -extern NODE *copynode P((NODE *n)); -extern NODE *mk_number P((AWKNUM x, unsigned int flags)); -extern NODE *make_str_node P((char *s, size_t len, int scan )); -extern NODE *tmp_string P((char *s, size_t len )); -extern NODE *more_nodes P((void)); -#ifdef MEMDEBUG -extern void freenode P((NODE *it)); -#endif -extern void unref P((NODE *tmp)); -extern int parse_escape P((char **string_ptr)); -/* re.c */ -extern Regexp *make_regexp P((char *s, size_t len, int ignorecase, int dfa)); -extern int research P((Regexp *rp, char *str, int start, - size_t len, int need_start)); -extern void refree P((Regexp *rp)); -extern void reg_error P((const char *s)); -extern Regexp *re_update P((NODE *t)); -extern void resyntax P((int syntax)); -extern void resetup P((void)); -extern int avoid_dfa P((NODE *re, char *str, size_t len)); /* temporary */ -extern int reisstring P((char *text, size_t len, Regexp *re, char *buf)); - -/* strncasecmp.c */ -#ifndef BROKEN_STRNCASECMP -extern int strcasecmp P((const char *s1, const char *s2)); -extern int strncasecmp P((const char *s1, const char *s2, register size_t n)); -#endif - -#if defined(atarist) -#if defined(PIPES_SIMULATED) -/* unsupported/atari/tmpnam.c */ -extern char *tmpnam P((char *buf)); -extern char *tempnam P((const char *path, const char *base)); -#else -#include -#endif -#include -#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1) -#else -#define INVALID_HANDLE (-1) -#endif /* atarist */ - -#ifndef STATIC -#define STATIC static -#endif diff --git a/contrib/awk/awk.y b/contrib/awk/awk.y deleted file mode 100644 index f6fdfcc..0000000 --- a/contrib/awk/awk.y +++ /dev/null @@ -1,2479 +0,0 @@ -/* - * awk.y --- yacc/bison parser - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2000 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -%{ -#ifdef DEBUG -#define YYDEBUG 12 -#endif - -#include "awk.h" - -#define CAN_FREE TRUE -#define DONT_FREE FALSE - -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ -static void yyerror(const char *m, ...) ; -#else -static void yyerror(); /* va_alist */ -#endif -static char *get_src_buf P((void)); -static int yylex P((void)); -static NODE *node_common P((NODETYPE op)); -static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); -static NODE *mkrangenode P((NODE *cpair)); -static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); -static NODE *append_right P((NODE *list, NODE *new)); -static void func_install P((NODE *params, NODE *def)); -static void pop_var P((NODE *np, int freeit)); -static void pop_params P((NODE *params)); -static NODE *make_param P((char *name)); -static NODE *mk_rexp P((NODE *exp)); -static int dup_parms P((NODE *func)); -static void param_sanity P((NODE *arglist)); -static int isnoeffect P((NODETYPE t)); -static int isassignable P((NODE *n)); - -enum defref { FUNC_DEFINE, FUNC_USE }; -static void func_use P((char *name, enum defref how)); -static void check_funcs P((void)); - -static int want_assign; /* lexical scanning kludge */ -static int want_regexp; /* lexical scanning kludge */ -static int can_return; /* lexical scanning kludge */ -static int io_allowed = TRUE; /* lexical scanning kludge */ -static char *lexptr; /* pointer to next char during parsing */ -static char *lexend; -static char *lexptr_begin; /* keep track of where we were for error msgs */ -static char *lexeme; /* beginning of lexeme for debugging */ -static char *thisline = NULL; -#define YYDEBUG_LEXER_TEXT (lexeme) -static int param_counter; -static char *tokstart = NULL; -static char *tok = NULL; -static char *tokend; - -#define HASHSIZE 1021 /* this constant only used here */ -NODE *variables[HASHSIZE]; - -extern char *source; -extern int sourceline; -extern struct src *srcfiles; -extern int numfiles; -extern int errcount; -extern NODE *begin_block; -extern NODE *end_block; -%} - -%union { - long lval; - AWKNUM fval; - NODE *nodeval; - NODETYPE nodetypeval; - char *sval; - NODE *(*ptrval)(); -} - -%type function_prologue function_body -%type rexp exp start program rule simp_exp -%type non_post_simp_exp -%type pattern -%type action variable param_list -%type rexpression_list opt_rexpression_list -%type expression_list opt_expression_list -%type statements statement if_statement opt_param_list -%type opt_exp opt_variable regexp -%type input_redir output_redir -%type print -%type func_name -%type lex_builtin - -%token FUNC_CALL NAME REGEXP -%token ERROR -%token YNUMBER YSTRING -%token RELOP APPEND_OP -%token ASSIGNOP MATCHOP NEWLINE CONCAT_OP -%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE -%token LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE -%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION -%token LEX_GETLINE LEX_NEXTFILE -%token LEX_IN -%token LEX_AND LEX_OR INCREMENT DECREMENT -%token LEX_BUILTIN LEX_LENGTH - -/* these are just yylval numbers */ - -/* Lowest to highest */ -%right ASSIGNOP -%right '?' ':' -%left LEX_OR -%left LEX_AND -%left LEX_GETLINE -%nonassoc LEX_IN -%left FUNC_CALL LEX_BUILTIN LEX_LENGTH -%nonassoc ',' -%nonassoc MATCHOP -%nonassoc RELOP '<' '>' '|' APPEND_OP -%left CONCAT_OP -%left YSTRING YNUMBER -%left '+' '-' -%left '*' '/' '%' -%right '!' UNARY -%right '^' -%left INCREMENT DECREMENT -%left '$' -%left '(' ')' -%% - -start - : opt_nls program opt_nls - { - expression_value = $2; - check_funcs(); - } - ; - -program - : rule - { - if ($1 != NULL) - $$ = $1; - else - $$ = NULL; - yyerrok; - } - | program rule - /* add the rule to the tail of list */ - { - if ($2 == NULL) - $$ = $1; - else if ($1 == NULL) - $$ = $2; - else { - if ($1->type != Node_rule_list) - $1 = node($1, Node_rule_list, - (NODE*) NULL); - $$ = append_right($1, - node($2, Node_rule_list, (NODE *) NULL)); - } - yyerrok; - } - | error { $$ = NULL; } - | program error { $$ = NULL; } - | /* empty */ { $$ = NULL; } - ; - -rule - : LEX_BEGIN { io_allowed = FALSE; } - action - { - if (begin_block != NULL) { - if (begin_block->type != Node_rule_list) - begin_block = node(begin_block, Node_rule_list, - (NODE *) NULL); - (void) append_right(begin_block, node( - node((NODE *) NULL, Node_rule_node, $3), - Node_rule_list, (NODE *) NULL) ); - } else - begin_block = node((NODE *) NULL, Node_rule_node, $3); - $$ = NULL; - io_allowed = TRUE; - yyerrok; - } - | LEX_END { io_allowed = FALSE; } - action - { - if (end_block != NULL) { - if (end_block->type != Node_rule_list) - end_block = node(end_block, Node_rule_list, - (NODE *) NULL); - (void) append_right (end_block, node( - node((NODE *) NULL, Node_rule_node, $3), - Node_rule_list, (NODE *) NULL)); - } else - end_block = node((NODE *) NULL, Node_rule_node, $3); - $$ = NULL; - io_allowed = TRUE; - yyerrok; - } - | LEX_BEGIN statement_term - { - warning("BEGIN blocks must have an action part"); - errcount++; - yyerrok; - } - | LEX_END statement_term - { - warning("END blocks must have an action part"); - errcount++; - yyerrok; - } - | pattern action - { $$ = node($1, Node_rule_node, $2); yyerrok; } - | action - { $$ = node((NODE *) NULL, Node_rule_node, $1); yyerrok; } - | pattern statement_term - { - $$ = node($1, - Node_rule_node, - node(node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL), - Node_K_print, - (NODE *) NULL)); - yyerrok; - } - | function_prologue function_body - { - func_install($1, $2); - $$ = NULL; - yyerrok; - } - ; - -func_name - : NAME - { $$ = $1; } - | FUNC_CALL - { $$ = $1; } - | lex_builtin - { - yyerror("%s() is a built-in function, it cannot be redefined", - tokstart); - errcount++; - /* yyerrok; */ - } - ; - -lex_builtin - : LEX_BUILTIN - | LEX_LENGTH - ; - -function_prologue - : LEX_FUNCTION - { - param_counter = 0; - } - func_name '(' opt_param_list r_paren opt_nls - { - NODE *t; - - t = make_param($3); - t->flags |= FUNC; - $$ = append_right(t, $5); - can_return = TRUE; - /* check for duplicate parameter names */ - if (dup_parms($$)) - errcount++; - } - ; - -function_body - : l_brace statements r_brace opt_semi - { - $$ = $2; - can_return = FALSE; - } - | l_brace r_brace opt_semi opt_nls - { - $$ = node((NODE *) NULL, Node_K_return, (NODE *) NULL); - can_return = FALSE; - } - ; - - -pattern - : exp - { $$ = $1; } - | exp ',' exp - { $$ = mkrangenode(node($1, Node_cond_pair, $3)); } - ; - -regexp - /* - * In this rule, want_regexp tells yylex that the next thing - * is a regexp so it should read up to the closing slash. - */ - : '/' - { ++want_regexp; } - REGEXP '/' - { - NODE *n; - size_t len; - - getnode(n); - n->type = Node_regex; - len = strlen($3); - n->re_exp = make_string($3, len); - n->re_reg = make_regexp($3, len, FALSE, TRUE); - n->re_text = NULL; - n->re_flags = CONST; - n->re_cnt = 1; - $$ = n; - } - ; - -action - : l_brace statements r_brace opt_semi opt_nls - { $$ = $2; } - | l_brace r_brace opt_semi opt_nls - { $$ = NULL; } - ; - -statements - : statement - { - $$ = $1; - if (do_lint && isnoeffect($$->type)) - warning("statement may have no effect"); - } - | statements statement - { - if ($1 == NULL || $1->type != Node_statement_list) - $1 = node($1, Node_statement_list, (NODE *) NULL); - $$ = append_right($1, - node($2, Node_statement_list, (NODE *) NULL)); - yyerrok; - } - | error - { $$ = NULL; } - | statements error - { $$ = NULL; } - ; - -statement_term - : nls - | semi opt_nls - ; - -statement - : semi opt_nls - { $$ = NULL; } - | l_brace r_brace - { $$ = NULL; } - | l_brace statements r_brace - { $$ = $2; } - | if_statement - { $$ = $1; } - | LEX_WHILE '(' exp r_paren opt_nls statement - { $$ = node($3, Node_K_while, $6); } - | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls - { $$ = node($6, Node_K_do, $3); } - | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement - { - /* - * Efficiency hack. Recognize the special case of - * - * for (iggy in foo) - * delete foo[iggy] - * - * and treat it as if it were - * - * delete foo - * - * Check that the body is a `delete a[i]' statement, - * and that both the loop var and array names match. - */ - if ($8->type == Node_K_delete - && $8->rnode != NULL - && strcmp($3, $8->rnode->var_value->vname) == 0 - && strcmp($5, $8->lnode->vname) == 0) { - $8->type = Node_K_delete_loop; - $$ = $8; - } else { - $$ = node($8, Node_K_arrayfor, - make_for_loop(variable($3, CAN_FREE, Node_var), - (NODE *) NULL, variable($5, CAN_FREE, Node_var_array))); - } - } - | LEX_FOR '(' opt_exp semi exp semi opt_exp r_paren opt_nls statement - { - $$ = node($10, Node_K_for, (NODE *) make_for_loop($3, $5, $7)); - } - | LEX_FOR '(' opt_exp semi semi opt_exp r_paren opt_nls statement - { - $$ = node($9, Node_K_for, - (NODE *) make_for_loop($3, (NODE *) NULL, $6)); - } - | LEX_BREAK statement_term - /* for break, maybe we'll have to remember where to break to */ - { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); } - | LEX_CONTINUE statement_term - /* similarly */ - { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); } - | print '(' expression_list r_paren output_redir statement_term - { $$ = node($3, $1, $5); } - | print opt_rexpression_list output_redir statement_term - { - if ($1 == Node_K_print && $2 == NULL) { - static int warned = FALSE; - - $2 = node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL); - - if (do_lint && ! io_allowed && ! warned) { - warned = TRUE; - warning( - "plain `print' in BEGIN or END rule should probably be `print \"\"'"); - } - } - - $$ = node($2, $1, $3); - } - | LEX_NEXT opt_exp statement_term - { NODETYPE type; - - if ($2) { - if ($2 == lookup("file")) { - static int warned = FALSE; - - if (! warned) { - warned = TRUE; - warning("`next file' is obsolete; use `nextfile'"); - } - if (do_lint) - warning("`next file' is a gawk extension"); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error("`next file' is a gawk extension"); - } - if (! io_allowed) { - /* same thing */ - errcount++; - error("`next file' used in BEGIN or END action"); - } - type = Node_K_nextfile; - } else { - errcount++; - error("illegal expression after `next'"); - type = Node_K_next; /* sanity */ - } - } else { - if (! io_allowed) - yyerror("`next' used in BEGIN or END action"); - type = Node_K_next; - } - $$ = node((NODE *) NULL, type, (NODE *) NULL); - } - | LEX_NEXTFILE statement_term - { - if (do_lint) - warning("`nextfile' is a gawk extension"); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error("`nextfile' is a gawk extension"); - } - if (! io_allowed) { - /* same thing */ - errcount++; - error("`nextfile' used in BEGIN or END action"); - } - $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL); - } - | LEX_EXIT opt_exp statement_term - { $$ = node($2, Node_K_exit, (NODE *) NULL); } - | LEX_RETURN - { - if (! can_return) - yyerror("`return' used outside function context"); - } - opt_exp statement_term - { $$ = node($3, Node_K_return, (NODE *) NULL); } - | LEX_DELETE NAME '[' expression_list ']' statement_term - { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); } - | LEX_DELETE NAME statement_term - { - if (do_lint) - warning("`delete array' is a gawk extension"); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error("`delete array' is a gawk extension"); - } - $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); - } - | exp statement_term - { $$ = $1; } - ; - -print - : LEX_PRINT - { $$ = $1; } - | LEX_PRINTF - { $$ = $1; } - ; - -if_statement - : LEX_IF '(' exp r_paren opt_nls statement - { - $$ = node($3, Node_K_if, - node($6, Node_if_branches, (NODE *) NULL)); - } - | LEX_IF '(' exp r_paren opt_nls statement - LEX_ELSE opt_nls statement - { $$ = node($3, Node_K_if, - node($6, Node_if_branches, $9)); } - ; - -nls - : NEWLINE - { want_assign = FALSE; } - | nls NEWLINE - ; - -opt_nls - : /* empty */ - | nls - ; - -input_redir - : /* empty */ - { $$ = NULL; } - | '<' simp_exp - { $$ = node($2, Node_redirect_input, (NODE *) NULL); } - ; - -output_redir - : /* empty */ - { $$ = NULL; } - | '>' exp - { $$ = node($2, Node_redirect_output, (NODE *) NULL); } - | APPEND_OP exp - { $$ = node($2, Node_redirect_append, (NODE *) NULL); } - | '|' exp - { $$ = node($2, Node_redirect_pipe, (NODE *) NULL); } - ; - -opt_param_list - : /* empty */ - { $$ = NULL; } - | param_list - { $$ = $1; } - ; - -param_list - : NAME - { $$ = make_param($1); } - | param_list comma NAME - { $$ = append_right($1, make_param($3)); yyerrok; } - | error - { $$ = NULL; } - | param_list error - { $$ = NULL; } - | param_list comma error - { $$ = NULL; } - ; - -/* optional expression, as in for loop */ -opt_exp - : /* empty */ - { $$ = NULL; } - | exp - { $$ = $1; } - ; - -opt_rexpression_list - : /* empty */ - { $$ = NULL; } - | rexpression_list - { $$ = $1; } - ; - -rexpression_list - : rexp - { $$ = node($1, Node_expression_list, (NODE *) NULL); } - | rexpression_list comma rexp - { - $$ = append_right($1, - node($3, Node_expression_list, (NODE *) NULL)); - yyerrok; - } - | error - { $$ = NULL; } - | rexpression_list error - { $$ = NULL; } - | rexpression_list error rexp - { $$ = NULL; } - | rexpression_list comma error - { $$ = NULL; } - ; - -opt_expression_list - : /* empty */ - { $$ = NULL; } - | expression_list - { $$ = $1; } - ; - -expression_list - : exp - { $$ = node($1, Node_expression_list, (NODE *) NULL); } - | expression_list comma exp - { - $$ = append_right($1, - node($3, Node_expression_list, (NODE *) NULL)); - yyerrok; - } - | error - { $$ = NULL; } - | expression_list error - { $$ = NULL; } - | expression_list error exp - { $$ = NULL; } - | expression_list comma error - { $$ = NULL; } - ; - -/* Expressions, not including the comma operator. */ -exp : variable ASSIGNOP - { want_assign = FALSE; } - exp - { - if (do_lint && $4->type == Node_regex) - warning("Regular expression on left of assignment."); - $$ = node($1, $2, $4); - } - | '(' expression_list r_paren LEX_IN NAME - { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); } - | exp '|' LEX_GETLINE opt_variable - { - $$ = node($4, Node_K_getline, - node($1, Node_redirect_pipein, (NODE *) NULL)); - } - | LEX_GETLINE opt_variable input_redir - { - if (do_lint && ! io_allowed && $3 == NULL) - warning("non-redirected getline undefined inside BEGIN or END action"); - $$ = node($2, Node_K_getline, $3); - } - | exp LEX_AND exp - { $$ = node($1, Node_and, $3); } - | exp LEX_OR exp - { $$ = node($1, Node_or, $3); } - | exp MATCHOP exp - { - if ($1->type == Node_regex) - warning("Regular expression on left of MATCH operator."); - $$ = node($1, $2, mk_rexp($3)); - } - | regexp - { - $$ = $1; - if (do_lint && tokstart[0] == '*') { - /* possible C comment */ - int n = strlen(tokstart) - 1; - if (tokstart[n] == '*') - warning("regexp looks like a C comment, but is not"); - } - } - | '!' regexp %prec UNARY - { - $$ = node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_nomatch, - $2); - } - | exp LEX_IN NAME - { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); } - | exp RELOP exp - { - if (do_lint && $3->type == Node_regex) - warning("Regular expression on left of comparison."); - $$ = node($1, $2, $3); - } - | exp '<' exp - { $$ = node($1, Node_less, $3); } - | exp '>' exp - { $$ = node($1, Node_greater, $3); } - | exp '?' exp ':' exp - { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} - | simp_exp - { $$ = $1; } - | exp simp_exp %prec CONCAT_OP - { $$ = node($1, Node_concat, $2); } - ; - -rexp - : variable ASSIGNOP - { want_assign = FALSE; } - rexp - { $$ = node($1, $2, $4); } - | rexp LEX_AND rexp - { $$ = node($1, Node_and, $3); } - | rexp LEX_OR rexp - { $$ = node($1, Node_or, $3); } - | LEX_GETLINE opt_variable input_redir - { - if (do_lint && ! io_allowed && $3 == NULL) - warning("non-redirected getline undefined inside BEGIN or END action"); - $$ = node($2, Node_K_getline, $3); - } - | regexp - { $$ = $1; } - | '!' regexp %prec UNARY - { $$ = node((NODE *) NULL, Node_nomatch, $2); } - | rexp MATCHOP rexp - { $$ = node($1, $2, mk_rexp($3)); } - | rexp LEX_IN NAME - { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); } - | rexp RELOP rexp - { $$ = node($1, $2, $3); } - | rexp '?' rexp ':' rexp - { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} - | simp_exp - { $$ = $1; } - | rexp simp_exp %prec CONCAT_OP - { $$ = node($1, Node_concat, $2); } - ; - -simp_exp - : non_post_simp_exp - /* Binary operators in order of decreasing precedence. */ - | simp_exp '^' simp_exp - { $$ = node($1, Node_exp, $3); } - | simp_exp '*' simp_exp - { $$ = node($1, Node_times, $3); } - | simp_exp '/' simp_exp - { $$ = node($1, Node_quotient, $3); } - | simp_exp '%' simp_exp - { $$ = node($1, Node_mod, $3); } - | simp_exp '+' simp_exp - { $$ = node($1, Node_plus, $3); } - | simp_exp '-' simp_exp - { $$ = node($1, Node_minus, $3); } - | variable INCREMENT - { $$ = node($1, Node_postincrement, (NODE *) NULL); } - | variable DECREMENT - { $$ = node($1, Node_postdecrement, (NODE *) NULL); } - ; - -non_post_simp_exp - : '!' simp_exp %prec UNARY - { $$ = node($2, Node_not, (NODE *) NULL); } - | '(' exp r_paren - { $$ = $2; } - | LEX_BUILTIN - '(' opt_expression_list r_paren - { $$ = snode($3, Node_builtin, (int) $1); } - | LEX_LENGTH '(' opt_expression_list r_paren - { $$ = snode($3, Node_builtin, (int) $1); } - | LEX_LENGTH - { - if (do_lint) - warning("call of `length' without parentheses is not portable"); - $$ = snode((NODE *) NULL, Node_builtin, (int) $1); - if (do_posix) - warning("call of `length' without parentheses is deprecated by POSIX"); - } - | FUNC_CALL '(' opt_expression_list r_paren - { - $$ = node($3, Node_func_call, make_string($1, strlen($1))); - func_use($1, FUNC_USE); - param_sanity($3); - free($1); - } - | variable - | INCREMENT variable - { $$ = node($2, Node_preincrement, (NODE *) NULL); } - | DECREMENT variable - { $$ = node($2, Node_predecrement, (NODE *) NULL); } - | YNUMBER - { $$ = $1; } - | YSTRING - { $$ = $1; } - - | '-' simp_exp %prec UNARY - { - if ($2->type == Node_val) { - $2->numbr = -(force_number($2)); - $$ = $2; - } else - $$ = node($2, Node_unary_minus, (NODE *) NULL); - } - | '+' simp_exp %prec UNARY - { - /* - * was: $$ = $2 - * POSIX semantics: force a conversion to numeric type - */ - $$ = node (make_number(0.0), Node_plus, $2); - } - ; - -opt_variable - : /* empty */ - { $$ = NULL; } - | variable - { $$ = $1; } - ; - -variable - : NAME - { $$ = variable($1, CAN_FREE, Node_var); } - | NAME '[' expression_list ']' - { - if ($3 == NULL) { - fatal("invalid subscript expression"); - } else if ($3->rnode == NULL) { - $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode); - freenode($3); - } else - $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3); - } - | '$' non_post_simp_exp - { $$ = node($2, Node_field_spec, (NODE *) NULL); } - ; - -l_brace - : '{' opt_nls - ; - -r_brace - : '}' opt_nls { yyerrok; } - ; - -r_paren - : ')' { yyerrok; } - ; - -opt_semi - : /* empty */ - | semi - ; - -semi - : ';' { yyerrok; want_assign = FALSE; } - ; - -comma : ',' opt_nls { yyerrok; } - ; - -%% - -struct token { - const char *operator; /* text to match */ - NODETYPE value; /* node type */ - int class; /* lexical class */ - unsigned flags; /* # of args. allowed and compatability */ -# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ -# define A(n) (1<<(n)) -# define VERSION 0xFF00 /* old awk is zero */ -# define NOT_OLD 0x0100 /* feature not in old awk */ -# define NOT_POSIX 0x0200 /* feature not in POSIX */ -# define GAWKX 0x0400 /* gawk extension */ -# define RESX 0x0800 /* Bell Labs Research extension */ - NODE *(*ptr)(); /* function that implements this keyword */ -}; - - -/* Tokentab is sorted ascii ascending order, so it can be binary searched. */ -/* Function pointers come from declarations in awk.h. */ - -static struct token tokentab[] = { -{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0}, -{"END", Node_illegal, LEX_END, 0, 0}, -#ifdef ARRAYDEBUG -{"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump}, -#endif -#ifdef BITOPS -{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and}, -#endif /* BITOPS */ -{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, -{"break", Node_K_break, LEX_BREAK, 0, 0}, -{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close}, -#ifdef BITOPS -{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl}, -#endif /* BITOPS */ -{"continue", Node_K_continue, LEX_CONTINUE, 0, 0}, -{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, -{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0}, -{"do", Node_K_do, LEX_DO, NOT_OLD, 0}, -{"else", Node_illegal, LEX_ELSE, 0, 0}, -{"exit", Node_K_exit, LEX_EXIT, 0, 0}, -{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp}, -{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush}, -{"for", Node_K_for, LEX_FOR, 0, 0}, -{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, -{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, -{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, -{"if", Node_K_if, LEX_IF, 0, 0}, -{"in", Node_illegal, LEX_IN, 0, 0}, -{"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, -{"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, -{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, -{"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, -#ifdef BITOPS -{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift}, -#endif /* BITOPS */ -{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match}, -{"next", Node_K_next, LEX_NEXT, 0, 0}, -{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0}, -#ifdef BITOPS -{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or}, -#endif /* BITOPS */ -{"print", Node_K_print, LEX_PRINT, 0, 0}, -{"printf", Node_K_printf, LEX_PRINTF, 0, 0}, -{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, -{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0}, -#ifdef BITOPS -{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift}, -#endif /* BITOPS */ -{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, -{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split}, -{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf}, -{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt}, -{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, -#ifdef ARRAYDEBUG -{"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme}, -#endif -{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime}, -#ifdef BITOPS -{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -#endif /* BITOPS */ -{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, -{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, -{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, -{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, -{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, -{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, -{"while", Node_K_while, LEX_WHILE, 0, 0}, -#ifdef BITOPS -{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor}, -#endif /* BITOPS */ -}; - -/* yyerror --- print a syntax error message, show where */ - -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ -static void -yyerror(const char *m, ...) -#else -/* VARARGS0 */ -static void -yyerror(va_alist) -va_dcl -#endif -{ - va_list args; - const char *mesg = NULL; - register char *bp, *cp; - char *scan; - char buf[120]; - static char end_of_file_line[] = "(END OF FILE)"; - - errcount++; - /* Find the current line in the input file */ - if (lexptr && lexeme) { - if (thisline == NULL) { - cp = lexeme; - if (*cp == '\n') { - cp--; - mesg = "unexpected newline"; - } - for (; cp != lexptr_begin && *cp != '\n'; --cp) - continue; - if (*cp == '\n') - cp++; - thisline = cp; - } - /* NL isn't guaranteed */ - bp = lexeme; - while (bp < lexend && *bp && *bp != '\n') - bp++; - } else { - thisline = end_of_file_line; - bp = thisline + strlen(thisline); - } - msg("%.*s", (int) (bp - thisline), thisline); - bp = buf; - cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */ - if (lexptr != NULL) { - scan = thisline; - while (bp < cp && scan < lexeme) - if (*scan++ == '\t') - *bp++ = '\t'; - else - *bp++ = ' '; - *bp++ = '^'; - *bp++ = ' '; - } -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - va_start(args, m); - if (mesg == NULL) - mesg = m; -#else - va_start(args); - if (mesg == NULL) - mesg = va_arg(args, char *); -#endif - strcpy(bp, mesg); - err("", buf, args); - va_end(args); -} - -/* get_src_buf --- read the next buffer of source program */ - -static char * -get_src_buf() -{ - static int samefile = FALSE; - static int nextfile = 0; - static char *buf = NULL; - static int fd; - int n; - register char *scan; - static int len = 0; - static int did_newline = FALSE; - int newfile; - struct stat sbuf; - -# define SLOP 128 /* enough space to hold most source lines */ - -again: - newfile = FALSE; - if (nextfile > numfiles) - return NULL; - - if (srcfiles[nextfile].stype == CMDLINE) { - if (len == 0) { - len = strlen(srcfiles[nextfile].val); - if (len == 0) { - /* - * Yet Another Special case: - * gawk '' /path/name - * Sigh. - */ - static int warned = FALSE; - - if (do_lint && ! warned) { - warned = TRUE; - warning("empty program text on command line"); - } - ++nextfile; - goto again; - } - sourceline = 1; - lexptr = lexptr_begin = srcfiles[nextfile].val; - lexend = lexptr + len; - } else if (! did_newline && *(lexptr-1) != '\n') { - /* - * The following goop is to ensure that the source - * ends with a newline and that the entire current - * line is available for error messages. - */ - int offset; - - did_newline = TRUE; - offset = lexptr - lexeme; - for (scan = lexeme; scan > lexptr_begin; scan--) - if (*scan == '\n') { - scan++; - break; - } - len = lexptr - scan; - emalloc(buf, char *, len+1, "get_src_buf"); - memcpy(buf, scan, len); - thisline = buf; - lexptr = buf + len; - *lexptr = '\n'; - lexeme = lexptr - offset; - lexptr_begin = buf; - lexend = lexptr + 1; - } else { - len = 0; - lexeme = lexptr = lexptr_begin = NULL; - } - if (lexptr == NULL && ++nextfile <= numfiles) - goto again; - return lexptr; - } - if (! samefile) { - source = srcfiles[nextfile].val; - if (source == NULL) { - if (buf != NULL) { - free(buf); - buf = NULL; - } - len = 0; - return lexeme = lexptr = lexptr_begin = NULL; - } - fd = pathopen(source); - if (fd <= INVALID_HANDLE) { - char *in; - - /* suppress file name and line no. in error mesg */ - in = source; - source = NULL; - fatal("can't open source file \"%s\" for reading (%s)", - in, strerror(errno)); - } - len = optimal_bufsize(fd, & sbuf); - newfile = TRUE; - if (buf != NULL) - free(buf); - emalloc(buf, char *, len + SLOP, "get_src_buf"); - lexptr_begin = buf + SLOP; - samefile = TRUE; - sourceline = 1; - } else { - /* - * Here, we retain the current source line (up to length SLOP) - * in the beginning of the buffer that was overallocated above - */ - int offset; - int linelen; - - offset = lexptr - lexeme; - for (scan = lexeme; scan > lexptr_begin; scan--) - if (*scan == '\n') { - scan++; - break; - } - linelen = lexptr - scan; - if (linelen > SLOP) - linelen = SLOP; - thisline = buf + SLOP - linelen; - memcpy(thisline, scan, linelen); - lexeme = buf + SLOP - offset; - lexptr_begin = thisline; - } - n = read(fd, buf + SLOP, len); - if (n == -1) - fatal("can't read sourcefile \"%s\" (%s)", - source, strerror(errno)); - if (n == 0) { - if (newfile) { - static int warned = FALSE; - - if (do_lint && ! warned) { - warned = TRUE; - warning("source file `%s' is empty", source); - } - } - if (fileno(stdin) != fd) /* safety */ - close(fd); - samefile = FALSE; - nextfile++; - if (lexeme) - *lexeme = '\0'; - len = 0; - goto again; - } - lexptr = buf + SLOP; - lexend = lexptr + n; - return buf; -} - -/* tokadd --- add a character to the token buffer */ - -#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok) - -/* tokexpand --- grow the token buffer */ - -char * -tokexpand() -{ - static int toksize = 60; - int tokoffset; - - tokoffset = tok - tokstart; - toksize *= 2; - if (tokstart != NULL) - erealloc(tokstart, char *, toksize, "tokexpand"); - else - emalloc(tokstart, char *, toksize, "tokexpand"); - tokend = tokstart + toksize; - tok = tokstart + tokoffset; - return tok; -} - -/* nextc --- get the next input character */ - -#if DEBUG -int -nextc() -{ - int c; - - if (lexptr && lexptr < lexend) - c = (int) (unsigned char) *lexptr++; - else if (get_src_buf()) - c = (int) (unsigned char) *lexptr++; - else - c = EOF; - - return c; -} -#else -#define nextc() ((lexptr && lexptr < lexend) ? \ - ((int) (unsigned char) *lexptr++) : \ - (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \ - ) -#endif - -/* pushback --- push a character back on the input */ - -#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr) - -/* allow_newline --- allow newline after &&, ||, ? and : */ - -static void -allow_newline() -{ - int c; - - for (;;) { - c = nextc(); - if (c == EOF) - break; - if (c == '#') { - while ((c = nextc()) != '\n' && c != EOF) - continue; - if (c == EOF) - break; - } - if (c == '\n') - sourceline++; - if (! isspace(c)) { - pushback(); - break; - } - } -} - -/* yylex --- Read the input and turn it into tokens. */ - -static int -yylex() -{ - register int c, c1; - int seen_e = FALSE; /* These are for numbers */ - int seen_point = FALSE; - int esc_seen; /* for literal strings */ - int low, mid, high; - static int did_newline = FALSE; - char *tokkey; - static int lasttok = 0, eof_warned = FALSE; - int inhex = FALSE; - - if (nextc() == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - } - pushback(); -#ifdef OS2 - /* - * added for OS/2's extproc feature of cmd.exe - * (like #! in BSD sh) - */ - if (strncasecmp(lexptr, "extproc ", 8) == 0) { - while (*lexptr && *lexptr != '\n') - lexptr++; - } -#endif - lexeme = lexptr; - thisline = NULL; - if (want_regexp) { - int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ - /* - * Counting brackets is non-trivial. [[] is ok, - * and so is [\]], with a point being that /[/]/ as a regexp - * constant has to work. - * - * Do not count [ or ] if either one is preceded by a \. - * A `[' should be counted if - * a) it is the first one so far (in_brack == 0) - * b) it is the `[' in `[:' - * A ']' should be counted if not preceded by a \, since - * it is either closing `:]' or just a plain list. - * According to POSIX, []] is how you put a ] into a set. - * Try to handle that too. - * - * The code for \ handles \[ and \]. - */ - - want_regexp = FALSE; - tok = tokstart; - for (;;) { - c = nextc(); - switch (c) { - case '[': - /* one day check for `.' and `=' too */ - if ((c1 = nextc()) == ':' || in_brack == 0) - in_brack++; - pushback(); - break; - case ']': - if (tokstart[0] == '[' - && (tok == tokstart + 1 - || (tok == tokstart + 2 - && tokstart[1] == '^'))) - /* do nothing */; - else - in_brack--; - break; - case '\\': - if ((c = nextc()) == EOF) { - yyerror("unterminated regexp ends with \\ at end of file"); - return lasttok = REGEXP; /* kludge */ - } else if (c == '\n') { - sourceline++; - continue; - } else { - tokadd('\\'); - tokadd(c); - continue; - } - break; - case '/': /* end of the regexp */ - if (in_brack > 0) - break; - - pushback(); - tokadd('\0'); - yylval.sval = tokstart; - return lasttok = REGEXP; - case '\n': - pushback(); - yyerror("unterminated regexp"); - return lasttok = REGEXP; /* kludge */ - case EOF: - yyerror("unterminated regexp at end of file"); - return lasttok = REGEXP; /* kludge */ - } - tokadd(c); - } - } -retry: - while ((c = nextc()) == ' ' || c == '\t') - continue; - - lexeme = lexptr ? lexptr - 1 : lexptr; - thisline = NULL; - tok = tokstart; - yylval.nodetypeval = Node_illegal; - - switch (c) { - case EOF: - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - - case '\n': - sourceline++; - return lasttok = NEWLINE; - - case '#': /* it's a comment */ - while ((c = nextc()) != '\n') { - if (c == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - warning( - "source file does not end in newline"); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - } - } - sourceline++; - return lasttok = NEWLINE; - - case '\\': -#ifdef RELAXED_CONTINUATION - /* - * This code puports to allow comments and/or whitespace - * after the `\' at the end of a line used for continuation. - * Use it at your own risk. We think it's a bad idea, which - * is why it's not on by default. - */ - if (! do_traditional) { - /* strip trailing white-space and/or comment */ - while ((c = nextc()) == ' ' || c == '\t') - continue; - if (c == '#') { - if (do_lint) - warning( - "use of `\\ #...' line continuation is not portable"); - while ((c = nextc()) != '\n') - if (c == EOF) - break; - } - pushback(); - } -#endif /* RELAXED_CONTINUATION */ - if (nextc() == '\n') { - sourceline++; - goto retry; - } else { - yyerror("backslash not last character on line"); - exit(1); - } - break; - - case '$': - want_assign = TRUE; - return lasttok = '$'; - - case ':': - case '?': - allow_newline(); - return lasttok = c; - - case ')': - case '(': - case ';': - case '{': - case ',': - want_assign = FALSE; - /* fall through */ - case '[': - case ']': - return lasttok = c; - - case '*': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_times; - return lasttok = ASSIGNOP; - } else if (do_posix) { - pushback(); - return lasttok = '*'; - } else if (c == '*') { - /* make ** and **= aliases for ^ and ^= */ - static int did_warn_op = FALSE, did_warn_assgn = FALSE; - - if (nextc() == '=') { - if (do_lint && ! did_warn_assgn) { - did_warn_assgn = TRUE; - warning("**= is not allowed by POSIX"); - warning("operator `**=' is not supported in old awk"); - } - yylval.nodetypeval = Node_assign_exp; - return ASSIGNOP; - } else { - pushback(); - if (do_lint && ! did_warn_op) { - did_warn_op = TRUE; - warning("** is not allowed by POSIX"); - warning("operator `**' is not supported in old awk"); - } - return lasttok = '^'; - } - } - pushback(); - return lasttok = '*'; - - case '/': - if (want_assign) { - if (nextc() == '=') { - yylval.nodetypeval = Node_assign_quotient; - return lasttok = ASSIGNOP; - } - pushback(); - } - return lasttok = '/'; - - case '%': - if (nextc() == '=') { - yylval.nodetypeval = Node_assign_mod; - return lasttok = ASSIGNOP; - } - pushback(); - return lasttok = '%'; - - case '^': - { - static int did_warn_op = FALSE, did_warn_assgn = FALSE; - - if (nextc() == '=') { - if (do_lint && ! did_warn_assgn) { - did_warn_assgn = TRUE; - warning("operator `^=' is not supported in old awk"); - } - yylval.nodetypeval = Node_assign_exp; - return lasttok = ASSIGNOP; - } - pushback(); - if (do_lint && ! did_warn_op) { - did_warn_op = TRUE; - warning("operator `^' is not supported in old awk"); - } - return lasttok = '^'; - } - - case '+': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_plus; - return lasttok = ASSIGNOP; - } - if (c == '+') - return lasttok = INCREMENT; - pushback(); - return lasttok = '+'; - - case '!': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_notequal; - return lasttok = RELOP; - } - if (c == '~') { - yylval.nodetypeval = Node_nomatch; - want_assign = FALSE; - return lasttok = MATCHOP; - } - pushback(); - return lasttok = '!'; - - case '<': - if (nextc() == '=') { - yylval.nodetypeval = Node_leq; - return lasttok = RELOP; - } - yylval.nodetypeval = Node_less; - pushback(); - return lasttok = '<'; - - case '=': - if (nextc() == '=') { - yylval.nodetypeval = Node_equal; - return lasttok = RELOP; - } - yylval.nodetypeval = Node_assign; - pushback(); - return lasttok = ASSIGNOP; - - case '>': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_geq; - return lasttok = RELOP; - } else if (c == '>') { - yylval.nodetypeval = Node_redirect_append; - return lasttok = APPEND_OP; - } - yylval.nodetypeval = Node_greater; - pushback(); - return lasttok = '>'; - - case '~': - yylval.nodetypeval = Node_match; - want_assign = FALSE; - return lasttok = MATCHOP; - - case '}': - /* - * Added did newline stuff. Easier than - * hacking the grammar. - */ - if (did_newline) { - did_newline = FALSE; - return lasttok = c; - } - did_newline++; - --lexptr; /* pick up } next time */ - return lasttok = NEWLINE; - - case '"': - esc_seen = FALSE; - while ((c = nextc()) != '"') { - if (c == '\n') { - pushback(); - yyerror("unterminated string"); - exit(1); - } - if (c == '\\') { - c = nextc(); - if (c == '\n') { - sourceline++; - continue; - } - esc_seen = TRUE; - tokadd('\\'); - } - if (c == EOF) { - pushback(); - yyerror("unterminated string"); - exit(1); - } - tokadd(c); - } - yylval.nodeval = make_str_node(tokstart, - tok - tokstart, esc_seen ? SCAN : 0); - yylval.nodeval->flags |= PERM; - return lasttok = YSTRING; - - case '-': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_minus; - return lasttok = ASSIGNOP; - } - if (c == '-') - return lasttok = DECREMENT; - pushback(); - return lasttok = '-'; - - case '.': - c = nextc(); - pushback(); - if (! isdigit(c)) - return lasttok = '.'; - else - c = '.'; - /* FALL THROUGH */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - /* It's a number */ - for (;;) { - int gotnumber = FALSE; - - tokadd(c); - switch (c) { -#ifdef BITOPS - case 'x': - case 'X': - if (do_traditional) - goto done; - if (tok == tokstart + 2) - inhex = TRUE; - break; -#endif /* BITOTS */ - case '.': - if (seen_point) { - gotnumber = TRUE; - break; - } - seen_point = TRUE; - break; - case 'e': - case 'E': - if (inhex) - break; - if (seen_e) { - gotnumber = TRUE; - break; - } - seen_e = TRUE; - if ((c = nextc()) == '-' || c == '+') - tokadd(c); - else - pushback(); - break; -#ifdef BITOPS - case 'a': - case 'A': - case 'b': - case 'B': - case 'c': - case 'C': - case 'D': - case 'd': - case 'f': - case 'F': - if (do_traditional || ! inhex) - goto done; - /* fall through */ -#endif - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - break; - default: - done: - gotnumber = TRUE; - } - if (gotnumber) - break; - c = nextc(); - } - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - tokadd('\0'); -#ifdef BITOPS - if (! do_traditional && isnondecimal(tokstart)) - yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart))); - else -#endif /* BITOPS */ - yylval.nodeval = make_number(atof(tokstart)); - yylval.nodeval->flags |= PERM; - return lasttok = YNUMBER; - - case '&': - if ((c = nextc()) == '&') { - yylval.nodetypeval = Node_and; - allow_newline(); - want_assign = FALSE; - return lasttok = LEX_AND; - } - pushback(); - return lasttok = '&'; - - case '|': - if ((c = nextc()) == '|') { - yylval.nodetypeval = Node_or; - allow_newline(); - want_assign = FALSE; - return lasttok = LEX_OR; - } - pushback(); - return lasttok = '|'; - } - - if (c != '_' && ! isalpha(c)) { - yyerror("Invalid char '%c' in expression\n", c); - exit(1); - } - - /* it's some type of name-type-thing. Find its length. */ - tok = tokstart; - while (is_identchar(c)) { - tokadd(c); - c = nextc(); - } - tokadd('\0'); - emalloc(tokkey, char *, tok - tokstart, "yylex"); - memcpy(tokkey, tokstart, tok - tokstart); - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - - /* See if it is a special token. */ - low = 0; - high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1; - while (low <= high) { - int i; - - mid = (low + high) / 2; - c = *tokstart - tokentab[mid].operator[0]; - i = c ? c : strcmp(tokstart, tokentab[mid].operator); - - if (i < 0) /* token < mid */ - high = mid - 1; - else if (i > 0) /* token > mid */ - low = mid + 1; - else { - if (do_lint) { - if (tokentab[mid].flags & GAWKX) - warning("%s() is a gawk extension", - tokentab[mid].operator); - if (tokentab[mid].flags & RESX) - warning("%s() is a Bell Labs extension", - tokentab[mid].operator); - if (tokentab[mid].flags & NOT_POSIX) - warning("POSIX does not allow %s", - tokentab[mid].operator); - } - if (do_lint_old && (tokentab[mid].flags & NOT_OLD)) - warning("%s is not supported in old awk", - tokentab[mid].operator); - if ((do_traditional && (tokentab[mid].flags & GAWKX)) - || (do_posix && (tokentab[mid].flags & NOT_POSIX))) - break; - if (tokentab[mid].class == LEX_BUILTIN - || tokentab[mid].class == LEX_LENGTH - ) - yylval.lval = mid; - else - yylval.nodetypeval = tokentab[mid].value; - - free(tokkey); - return lasttok = tokentab[mid].class; - } - } - - yylval.sval = tokkey; - if (*lexptr == '(') - return lasttok = FUNC_CALL; - else { - want_assign = TRUE; - return lasttok = NAME; - } -} - -/* node_common --- common code for allocating a new node */ - -static NODE * -node_common(op) -NODETYPE op; -{ - register NODE *r; - - getnode(r); - r->type = op; - r->flags = MALLOC; - /* if lookahead is NL, lineno is 1 too high */ - if (lexeme && *lexeme == '\n') - r->source_line = sourceline - 1; - else - r->source_line = sourceline; - r->source_file = source; - return r; -} - -/* node --- allocates a node with defined lnode and rnode. */ - -NODE * -node(left, op, right) -NODE *left, *right; -NODETYPE op; -{ - register NODE *r; - - r = node_common(op); - r->lnode = left; - r->rnode = right; - return r; -} - -/* snode --- allocate a node with defined subnode and proc for builtin - functions. Checks for arg. count and supplies defaults where - possible. */ - -static NODE * -snode(subn, op, idx) -NODETYPE op; -int idx; -NODE *subn; -{ - register NODE *r; - register NODE *n; - int nexp = 0; - int args_allowed; - - r = node_common(op); - - /* traverse expression list to see how many args. given */ - for (n = subn; n != NULL; n = n->rnode) { - nexp++; - if (nexp > 3) - break; - } - - /* check against how many args. are allowed for this builtin */ - args_allowed = tokentab[idx].flags & ARGS; - if (args_allowed && (args_allowed & A(nexp)) == 0) - fatal("%s() cannot have %d argument%c", - tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's'); - - r->proc = tokentab[idx].ptr; - - /* special case processing for a few builtins */ - /* - * FIXME: go through these to make sure that everything done - * here is really right. Move anything that's not into - * the corresponding routine. - */ - if (nexp == 0 && r->proc == do_length) { - subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), - Node_expression_list, - (NODE *) NULL); - } else if (r->proc == do_match) { - if (subn->rnode->lnode->type != Node_regex) - subn->rnode->lnode = mk_rexp(subn->rnode->lnode); - } else if (r->proc == do_sub || r->proc == do_gsub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 2) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); - else if (subn->rnode->rnode->lnode->type == Node_val) { - if (do_lint) - warning("string literal as last arg of substitute"); - } else if (! isassignable(subn->rnode->rnode->lnode)) - yyerror("%s third parameter is not a changeable object", - r->proc == do_sub ? "sub" : "gsub"); - } else if (r->proc == do_gensub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 3) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); - } else if (r->proc == do_split) { - if (nexp == 2) - append_right(subn, - node(FS_node, Node_expression_list, (NODE *) NULL)); - n = subn->rnode->rnode->lnode; - if (n->type != Node_regex) - subn->rnode->rnode->lnode = mk_rexp(n); - if (nexp == 2) - subn->rnode->rnode->lnode->re_flags |= FS_DFLT; - } - - r->subnode = subn; - return r; -} - -/* - * mkrangenode: - * This allocates a Node_line_range node with defined condpair and - * zeroes the trigger word to avoid the temptation of assuming that calling - * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. - * Otherwise like node(). - */ - -static NODE * -mkrangenode(cpair) -NODE *cpair; -{ - register NODE *r; - - getnode(r); - r->type = Node_line_range; - r->condpair = cpair; - r->triggered = FALSE; - return r; -} - -/* make_for_loop --- build a for loop */ - -static NODE * -make_for_loop(init, cond, incr) -NODE *init, *cond, *incr; -{ - register FOR_LOOP_HEADER *r; - NODE *n; - - emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); - getnode(n); - n->type = Node_illegal; - r->init = init; - r->cond = cond; - r->incr = incr; - n->sub.nodep.r.hd = r; - return n; -} - -/* dup_parms --- return TRUE if there are duplicate parameters */ - -static int -dup_parms(func) -NODE *func; -{ - register NODE *np; - char *fname, **names; - int count, i, j, dups; - NODE *params; - - if (func == NULL) /* error earlier */ - return TRUE; - - fname = func->param; - count = func->param_cnt; - params = func->rnode; - - if (count == 0) /* no args, no problem */ - return FALSE; - - if (params == NULL) /* error earlier */ - return TRUE; - - emalloc(names, char **, count * sizeof(char *), "dup_parms"); - - i = 0; - for (np = params; np != NULL; np = np->rnode) { - if (np->param == NULL) { /* error earlier, give up, go home */ - free(names); - return TRUE; - } - names[i++] = np->param; - } - - dups = 0; - for (i = 1; i < count; i++) { - for (j = 0; j < i; j++) { - if (strcmp(names[i], names[j]) == 0) { - dups++; - error( - "function `%s': parameter #%d, `%s', duplicates parameter #%d", - fname, i+1, names[j], j+1); - } - } - } - - free(names); - return (dups > 0 ? TRUE : FALSE); -} - -/* - * install: - * Install a name in the symbol table, even if it is already there. - * Caller must check against redefinition if that is desired. - */ - -NODE * -install(name, value) -char *name; -NODE *value; -{ - register NODE *hp; - register size_t len; - register int bucket; - - len = strlen(name); - bucket = hash(name, len, (unsigned long) HASHSIZE); - getnode(hp); - hp->type = Node_hashnode; - hp->hnext = variables[bucket]; - variables[bucket] = hp; - hp->hlength = len; - hp->hvalue = value; - hp->hname = name; - hp->hvalue->vname = name; - return hp->hvalue; -} - -/* lookup --- find the most recent hash node for name installed by install */ - -NODE * -lookup(name) -const char *name; -{ - register NODE *bucket; - register size_t len; - - len = strlen(name); - for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)]; - bucket != NULL; bucket = bucket->hnext) - if (bucket->hlength == len && STREQN(bucket->hname, name, len)) - return bucket->hvalue; - - return NULL; -} - -/* - * append_right: - * Add new to the rightmost branch of LIST. This uses n^2 time, so we make - * a simple attempt at optimizing it. - */ - -static NODE * -append_right(list, new) -NODE *list, *new; -{ - register NODE *oldlist; - static NODE *savefront = NULL, *savetail = NULL; - - if (list == NULL || new == NULL) - return list; - - oldlist = list; - if (savefront == oldlist) { - savetail = savetail->rnode = new; - return oldlist; - } else - savefront = oldlist; - while (list->rnode != NULL) - list = list->rnode; - savetail = list->rnode = new; - return oldlist; -} - -/* - * func_install: - * check if name is already installed; if so, it had better have Null value, - * in which case def is added as the value. Otherwise, install name with def - * as value. - */ - -static void -func_install(params, def) -NODE *params; -NODE *def; -{ - NODE *r; - NODE *n; - - /* check for function foo(foo) { ... }. bleh. */ - for (n = params->rnode; n != NULL; n = n->rnode) { - if (strcmp(n->param, params->param) == 0) - fatal("function `%s': can't use function name as parameter name", - params->param); - } - - pop_params(params->rnode); - pop_var(params, FALSE); - r = lookup(params->param); - if (r != NULL) { - fatal("function name `%s' previously defined", params->param); - } else - (void) install(params->param, node(params, Node_func, def)); - - func_use(params->param, FUNC_DEFINE); -} - -/* pop_var --- remove a variable from the symbol table */ - -static void -pop_var(np, freeit) -NODE *np; -int freeit; -{ - register NODE *bucket, **save; - register size_t len; - char *name; - - name = np->param; - len = strlen(name); - save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]); - for (bucket = *save; bucket != NULL; bucket = bucket->hnext) { - if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { - *save = bucket->hnext; - freenode(bucket); - if (freeit) - free(np->param); - return; - } - save = &(bucket->hnext); - } -} - -/* pop_params --- remove list of function parameters from symbol table */ - -/* - * pop parameters out of the symbol table. do this in reverse order to - * avoid reading freed memory if there were duplicated parameters. - */ -static void -pop_params(params) -NODE *params; -{ - if (params == NULL) - return; - pop_params(params->rnode); - pop_var(params, TRUE); -} - -/* make_param --- make NAME into a function parameter */ - -static NODE * -make_param(name) -char *name; -{ - NODE *r; - - getnode(r); - r->type = Node_param_list; - r->rnode = NULL; - r->param = name; - r->param_cnt = param_counter++; - return (install(name, r)); -} - -static struct fdesc { - char *name; - short used; - short defined; - struct fdesc *next; -} *ftable[HASHSIZE]; - -/* func_use --- track uses and definitions of functions */ - -static void -func_use(name, how) -char *name; -enum defref how; -{ - struct fdesc *fp; - int len; - int ind; - - len = strlen(name); - ind = hash(name, len, HASHSIZE); - - for (fp = ftable[ind]; fp != NULL; fp = fp->next) { - if (strcmp(fp->name, name) == 0) { - if (how == FUNC_DEFINE) - fp->defined++; - else - fp->used++; - return; - } - } - - /* not in the table, fall through to allocate a new one */ - - emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use"); - memset(fp, '\0', sizeof(struct fdesc)); - emalloc(fp->name, char *, len + 1, "func_use"); - strcpy(fp->name, name); - if (how == FUNC_DEFINE) - fp->defined++; - else - fp->used++; - fp->next = ftable[ind]; - ftable[ind] = fp; -} - -/* check_funcs --- verify functions that are called but not defined */ - -static void -check_funcs() -{ - struct fdesc *fp, *next; - int i; - - for (i = 0; i < HASHSIZE; i++) { - for (fp = ftable[i]; fp != NULL; fp = fp->next) { -#ifdef REALLYMEAN - /* making this the default breaks old code. sigh. */ - if (fp->defined == 0) { - error( - "function `%s' called but never defined", fp->name); - errcount++; - } -#else - if (do_lint && fp->defined == 0) - warning( - "function `%s' called but never defined", fp->name); -#endif - if (do_lint && fp->used == 0) { - warning("function `%s' defined but never called", - fp->name); - } - } - } - - /* now let's free all the memory */ - for (i = 0; i < HASHSIZE; i++) { - for (fp = ftable[i]; fp != NULL; fp = next) { - next = fp->next; - free(fp->name); - free(fp); - } - } -} - -/* param_sanity --- look for parameters that are regexp constants */ - -static void -param_sanity(arglist) -NODE *arglist; -{ - NODE *argp, *arg; - int i; - - for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) { - arg = argp->lnode; - if (arg->type == Node_regex) - warning("regexp constant for parameter #%d yields boolean value", i); - } -} - -/* variable --- make sure NAME is in the symbol table */ - -NODE * -variable(name, can_free, type) -char *name; -int can_free; -NODETYPE type; -{ - register NODE *r; - static int env_loaded = FALSE; - - if (! env_loaded && STREQ(name, "ENVIRON")) { - load_environ(); - env_loaded = TRUE; - } - if ((r = lookup(name)) == NULL) - r = install(name, node(Nnull_string, type, (NODE *) NULL)); - else if (can_free) - free(name); - return r; -} - -/* mk_rexp --- make a regular expression constant */ - -static NODE * -mk_rexp(exp) -NODE *exp; -{ - NODE *n; - - if (exp->type == Node_regex) - return exp; - - getnode(n); - n->type = Node_regex; - n->re_exp = exp; - n->re_text = NULL; - n->re_reg = NULL; - n->re_flags = 0; - n->re_cnt = 1; - return n; -} - -/* isnoeffect --- when used as a statement, has no side effects */ - -/* - * To be completely general, we should recursively walk the parse - * tree, to make sure that all the subexpressions also have no effect. - * Instead, we just weaken the actual warning that's printed, up above - * in the grammar. - */ - -static int -isnoeffect(type) -NODETYPE type; -{ - switch (type) { - case Node_times: - case Node_quotient: - case Node_mod: - case Node_plus: - case Node_minus: - case Node_subscript: - case Node_concat: - case Node_exp: - case Node_unary_minus: - case Node_field_spec: - case Node_and: - case Node_or: - case Node_equal: - case Node_notequal: - case Node_less: - case Node_greater: - case Node_leq: - case Node_geq: - case Node_match: - case Node_nomatch: - case Node_not: - case Node_val: - case Node_in_array: - case Node_NF: - case Node_NR: - case Node_FNR: - case Node_FS: - case Node_RS: - case Node_FIELDWIDTHS: - case Node_IGNORECASE: - case Node_OFS: - case Node_ORS: - case Node_OFMT: - case Node_CONVFMT: - return TRUE; - default: - break; /* keeps gcc -Wall happy */ - } - - return FALSE; -} - -/* isassignable --- can this node be assigned to? */ - -static int -isassignable(n) -register NODE *n; -{ - switch (n->type) { - case Node_var: - case Node_FIELDWIDTHS: - case Node_RS: - case Node_FS: - case Node_FNR: - case Node_NR: - case Node_NF: - case Node_IGNORECASE: - case Node_OFMT: - case Node_CONVFMT: - case Node_ORS: - case Node_OFS: - case Node_field_spec: - case Node_subscript: - return TRUE; - case Node_param_list: - return ((n->flags & FUNC) == 0); /* ok if not func name */ - default: - break; /* keeps gcc -Wall happy */ - } - return FALSE; -} - -/* for debugging */ -NODE * -stopme(tree) -NODE *tree; -{ - return tmp_number((AWKNUM) 0.0); -} diff --git a/contrib/awk/awkgram.y b/contrib/awk/awkgram.y deleted file mode 100644 index 704b498..0000000 --- a/contrib/awk/awkgram.y +++ /dev/null @@ -1,2875 +0,0 @@ -/* - * awk.y --- yacc/bison parser - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -%{ -#ifdef GAWKDEBUG -#define YYDEBUG 12 -#endif - -#include "awk.h" - -#define CAN_FREE TRUE -#define DONT_FREE FALSE - -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ -static void yyerror(const char *m, ...) ; -#else -static void yyerror(); /* va_alist */ -#endif -static char *get_src_buf P((void)); -static int yylex P((void)); -static NODE *node_common P((NODETYPE op)); -static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); -static NODE *mkrangenode P((NODE *cpair)); -static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); -static NODE *append_right P((NODE *list, NODE *new)); -static void func_install P((NODE *params, NODE *def)); -static void pop_var P((NODE *np, int freeit)); -static void pop_params P((NODE *params)); -static NODE *make_param P((char *name)); -static NODE *mk_rexp P((NODE *exp)); -static int dup_parms P((NODE *func)); -static void param_sanity P((NODE *arglist)); -static void parms_shadow P((const char *fname, NODE *func)); -static int isnoeffect P((NODETYPE t)); -static int isassignable P((NODE *n)); -static void dumpintlstr P((char *str, size_t len)); -static void count_args P((NODE *n)); - -enum defref { FUNC_DEFINE, FUNC_USE }; -static void func_use P((char *name, enum defref how)); -static void check_funcs P((void)); - -static int want_assign; /* lexical scanning kludge */ -static int want_regexp; /* lexical scanning kludge */ -static int can_return; /* lexical scanning kludge */ -static int io_allowed = TRUE; /* lexical scanning kludge */ -static int parsing_end_rule = FALSE; /* for warnings */ -static char *lexptr; /* pointer to next char during parsing */ -static char *lexend; -static char *lexptr_begin; /* keep track of where we were for error msgs */ -static char *lexeme; /* beginning of lexeme for debugging */ -static char *thisline = NULL; -#define YYDEBUG_LEXER_TEXT (lexeme) -static int param_counter; -static char *tokstart = NULL; -static char *tok = NULL; -static char *tokend; - -static long func_count; /* total number of functions */ - -#define HASHSIZE 1021 /* this constant only used here */ -NODE *variables[HASHSIZE]; -static int var_count; /* total number of global variables */ - -extern char *source; -extern int sourceline; -extern struct src *srcfiles; -extern int numfiles; -extern int errcount; -extern NODE *begin_block; -extern NODE *end_block; -%} - -%union { - long lval; - AWKNUM fval; - NODE *nodeval; - NODETYPE nodetypeval; - char *sval; - NODE *(*ptrval)(); -} - -%type function_prologue function_body -%type rexp exp start program rule simp_exp -%type non_post_simp_exp -%type pattern -%type action variable param_list -%type rexpression_list opt_rexpression_list -%type expression_list opt_expression_list -%type statements statement if_statement opt_param_list -%type opt_exp opt_variable regexp -%type input_redir output_redir -%type print -%type func_name -%type lex_builtin - -%token FUNC_CALL NAME REGEXP -%token ERROR -%token YNUMBER YSTRING -%token RELOP APPEND_OP -%token ASSIGNOP MATCHOP NEWLINE CONCAT_OP -%token LEX_BEGIN LEX_END LEX_IF LEX_ELSE LEX_RETURN LEX_DELETE -%token LEX_WHILE LEX_DO LEX_FOR LEX_BREAK LEX_CONTINUE -%token LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT LEX_FUNCTION -%token LEX_GETLINE LEX_NEXTFILE -%token LEX_IN -%token LEX_AND LEX_OR INCREMENT DECREMENT -%token LEX_BUILTIN LEX_LENGTH - -/* these are just yylval numbers */ - -/* Lowest to highest */ -%right ASSIGNOP -%right '?' ':' -%left LEX_OR -%left LEX_AND -%left LEX_GETLINE -%nonassoc LEX_IN -%left FUNC_CALL LEX_BUILTIN LEX_LENGTH -%nonassoc ',' -%nonassoc MATCHOP -%nonassoc RELOP '<' '>' '|' APPEND_OP TWOWAYIO -%left CONCAT_OP -%left YSTRING YNUMBER -%left '+' '-' -%left '*' '/' '%' -%right '!' UNARY -%right '^' -%left INCREMENT DECREMENT -%left '$' -%left '(' ')' -%% - -start - : opt_nls program opt_nls - { - expression_value = $2; - check_funcs(); - } - ; - -program - : rule - { - if ($1 != NULL) - $$ = $1; - else - $$ = NULL; - yyerrok; - } - | program rule - /* add the rule to the tail of list */ - { - if ($2 == NULL) - $$ = $1; - else if ($1 == NULL) - $$ = $2; - else { - if ($1->type != Node_rule_list) - $1 = node($1, Node_rule_list, - (NODE*) NULL); - $$ = append_right($1, - node($2, Node_rule_list, (NODE *) NULL)); - } - yyerrok; - } - | error { $$ = NULL; } - | program error { $$ = NULL; } - | /* empty */ { $$ = NULL; } - ; - -rule - : LEX_BEGIN { io_allowed = FALSE; } - action - { - if (begin_block != NULL) { - if (begin_block->type != Node_rule_list) - begin_block = node(begin_block, Node_rule_list, - (NODE *) NULL); - (void) append_right(begin_block, node( - node((NODE *) NULL, Node_rule_node, $3), - Node_rule_list, (NODE *) NULL) ); - } else - begin_block = node((NODE *) NULL, Node_rule_node, $3); - $$ = NULL; - io_allowed = TRUE; - yyerrok; - } - | LEX_END { io_allowed = FALSE; parsing_end_rule = TRUE; } - action - { - if (end_block != NULL) { - if (end_block->type != Node_rule_list) - end_block = node(end_block, Node_rule_list, - (NODE *) NULL); - (void) append_right (end_block, node( - node((NODE *) NULL, Node_rule_node, $3), - Node_rule_list, (NODE *) NULL)); - } else - end_block = node((NODE *) NULL, Node_rule_node, $3); - $$ = NULL; - io_allowed = TRUE; - parsing_end_rule = FALSE; - yyerrok; - } - | LEX_BEGIN statement_term - { - warning(_("BEGIN blocks must have an action part")); - errcount++; - yyerrok; - } - | LEX_END statement_term - { - warning(_("END blocks must have an action part")); - errcount++; - yyerrok; - } - | pattern action - { $$ = node($1, Node_rule_node, $2); yyerrok; } - | action - { $$ = node((NODE *) NULL, Node_rule_node, $1); yyerrok; } - | pattern statement_term - { - $$ = node($1, - Node_rule_node, - node(node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL), - Node_K_print, - (NODE *) NULL)); - yyerrok; - } - | function_prologue function_body - { - func_install($1, $2); - $$ = NULL; - yyerrok; - } - ; - -func_name - : NAME - { $$ = $1; } - | FUNC_CALL - { $$ = $1; } - | lex_builtin - { - yyerror(_("`%s' is a built-in function, it cannot be redefined"), - tokstart); - errcount++; - /* yyerrok; */ - } - ; - -lex_builtin - : LEX_BUILTIN - | LEX_LENGTH - ; - -function_prologue - : LEX_FUNCTION - { - param_counter = 0; - } - func_name '(' opt_param_list r_paren opt_nls - { - NODE *t; - - t = make_param($3); - t->flags |= FUNC; - $$ = append_right(t, $5); - can_return = TRUE; - /* check for duplicate parameter names */ - if (dup_parms($$)) - errcount++; - } - ; - -function_body - : l_brace statements r_brace opt_semi opt_nls - { - $$ = $2; - can_return = FALSE; - } - | l_brace r_brace opt_semi opt_nls - { - $$ = node((NODE *) NULL, Node_K_return, (NODE *) NULL); - can_return = FALSE; - } - ; - - -pattern - : exp - { $$ = $1; } - | exp ',' exp - { $$ = mkrangenode(node($1, Node_cond_pair, $3)); } - ; - -regexp - /* - * In this rule, want_regexp tells yylex that the next thing - * is a regexp so it should read up to the closing slash. - */ - : '/' - { ++want_regexp; } - REGEXP '/' - { - NODE *n; - size_t len; - - getnode(n); - n->type = Node_regex; - len = strlen($3); - n->re_exp = make_string($3, len); - n->re_reg = make_regexp($3, len, FALSE, TRUE); - n->re_text = NULL; - n->re_flags = CONST; - n->re_cnt = 1; - $$ = n; - } - ; - -action - : l_brace statements r_brace opt_semi opt_nls - { $$ = $2; } - | l_brace r_brace opt_semi opt_nls - { $$ = NULL; } - ; - -statements - : statement - { - $$ = $1; - if (do_lint && isnoeffect($$->type)) - lintwarn(_("statement may have no effect")); - } - | statements statement - { - if ($1 == NULL || $1->type != Node_statement_list) - $1 = node($1, Node_statement_list, (NODE *) NULL); - $$ = append_right($1, - node($2, Node_statement_list, (NODE *) NULL)); - yyerrok; - } - | error - { $$ = NULL; } - | statements error - { $$ = NULL; } - ; - -statement_term - : nls - | semi opt_nls - ; - -statement - : semi opt_nls - { $$ = NULL; } - | l_brace r_brace - { $$ = NULL; } - | l_brace statements r_brace - { $$ = $2; } - | if_statement - { $$ = $1; } - | LEX_WHILE '(' exp r_paren opt_nls statement - { $$ = node($3, Node_K_while, $6); } - | LEX_DO opt_nls statement LEX_WHILE '(' exp r_paren opt_nls - { $$ = node($6, Node_K_do, $3); } - | LEX_FOR '(' NAME LEX_IN NAME r_paren opt_nls statement - { - /* - * Efficiency hack. Recognize the special case of - * - * for (iggy in foo) - * delete foo[iggy] - * - * and treat it as if it were - * - * delete foo - * - * Check that the body is a `delete a[i]' statement, - * and that both the loop var and array names match. - */ - if ($8 != NULL && $8->type == Node_K_delete - && $8->rnode != NULL - && ($8->rnode->type == Node_var || $8->rnode->type == Node_param_list) - && strcmp($3, $8->rnode->var_value->vname) == 0 - && strcmp($5, $8->lnode->vname) == 0) { - $8->type = Node_K_delete_loop; - $$ = $8; - } else { - $$ = node($8, Node_K_arrayfor, - make_for_loop(variable($3, CAN_FREE, Node_var), - (NODE *) NULL, variable($5, CAN_FREE, Node_var_array))); - } - } - | LEX_FOR '(' opt_exp semi opt_nls exp semi opt_nls opt_exp r_paren opt_nls statement - { - $$ = node($12, Node_K_for, (NODE *) make_for_loop($3, $6, $9)); - } - | LEX_FOR '(' opt_exp semi opt_nls semi opt_nls opt_exp r_paren opt_nls statement - { - $$ = node($11, Node_K_for, - (NODE *) make_for_loop($3, (NODE *) NULL, $8)); - } - | LEX_BREAK statement_term - /* for break, maybe we'll have to remember where to break to */ - { $$ = node((NODE *) NULL, Node_K_break, (NODE *) NULL); } - | LEX_CONTINUE statement_term - /* similarly */ - { $$ = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); } - | print '(' expression_list r_paren output_redir statement_term - { - $$ = node($3, $1, $5); - if ($$->type == Node_K_printf) - count_args($$); - } - | print opt_rexpression_list output_redir statement_term - { - if ($1 == Node_K_print && $2 == NULL) { - static int warned = FALSE; - - $2 = node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL); - - if (do_lint && ! io_allowed && ! warned) { - warned = TRUE; - lintwarn( - _("plain `print' in BEGIN or END rule should probably be `print \"\"'")); - } - } - - $$ = node($2, $1, $3); - if ($$->type == Node_K_printf) - count_args($$); - } - | LEX_NEXT statement_term - { NODETYPE type; - - if (! io_allowed) - yyerror(_("`next' used in BEGIN or END action")); - type = Node_K_next; - $$ = node((NODE *) NULL, type, (NODE *) NULL); - } - | LEX_NEXTFILE statement_term - { - if (do_lint) - lintwarn(_("`nextfile' is a gawk extension")); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error(_("`nextfile' is a gawk extension")); - } - if (! io_allowed) { - /* same thing */ - errcount++; - error(_("`nextfile' used in BEGIN or END action")); - } - $$ = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL); - } - | LEX_EXIT opt_exp statement_term - { $$ = node($2, Node_K_exit, (NODE *) NULL); } - | LEX_RETURN - { - if (! can_return) - yyerror(_("`return' used outside function context")); - } - opt_exp statement_term - { $$ = node($3, Node_K_return, (NODE *) NULL); } - | LEX_DELETE NAME '[' expression_list ']' statement_term - { $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, $4); } - | LEX_DELETE NAME statement_term - { - if (do_lint) - lintwarn(_("`delete array' is a gawk extension")); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error(_("`delete array' is a gawk extension")); - } - $$ = node(variable($2, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); - } - | exp statement_term - { $$ = $1; } - ; - -print - : LEX_PRINT - { $$ = $1; } - | LEX_PRINTF - { $$ = $1; } - ; - -if_statement - : LEX_IF '(' exp r_paren opt_nls statement - { - $$ = node($3, Node_K_if, - node($6, Node_if_branches, (NODE *) NULL)); - } - | LEX_IF '(' exp r_paren opt_nls statement - LEX_ELSE opt_nls statement - { $$ = node($3, Node_K_if, - node($6, Node_if_branches, $9)); } - ; - -nls - : NEWLINE - { want_assign = FALSE; } - | nls NEWLINE - ; - -opt_nls - : /* empty */ - | nls - ; - -input_redir - : /* empty */ - { $$ = NULL; } - | '<' simp_exp - { $$ = node($2, Node_redirect_input, (NODE *) NULL); } - ; - -output_redir - : /* empty */ - { $$ = NULL; } - | '>' exp - { $$ = node($2, Node_redirect_output, (NODE *) NULL); } - | APPEND_OP exp - { $$ = node($2, Node_redirect_append, (NODE *) NULL); } - | '|' exp - { $$ = node($2, Node_redirect_pipe, (NODE *) NULL); } - | TWOWAYIO exp - { - if ($2->type == Node_K_getline - && $2->rnode->type == Node_redirect_twoway) - yyerror(_("multistage two-way pipelines don't work")); - $$ = node($2, Node_redirect_twoway, (NODE *) NULL); - } - ; - -opt_param_list - : /* empty */ - { $$ = NULL; } - | param_list - { $$ = $1; } - ; - -param_list - : NAME - { $$ = make_param($1); } - | param_list comma NAME - { $$ = append_right($1, make_param($3)); yyerrok; } - | error - { $$ = NULL; } - | param_list error - { $$ = NULL; } - | param_list comma error - { $$ = NULL; } - ; - -/* optional expression, as in for loop */ -opt_exp - : /* empty */ - { $$ = NULL; } - | exp - { $$ = $1; } - ; - -opt_rexpression_list - : /* empty */ - { $$ = NULL; } - | rexpression_list - { $$ = $1; } - ; - -rexpression_list - : rexp - { $$ = node($1, Node_expression_list, (NODE *) NULL); } - | rexpression_list comma rexp - { - $$ = append_right($1, - node($3, Node_expression_list, (NODE *) NULL)); - yyerrok; - } - | error - { $$ = NULL; } - | rexpression_list error - { $$ = NULL; } - | rexpression_list error rexp - { $$ = NULL; } - | rexpression_list comma error - { $$ = NULL; } - ; - -opt_expression_list - : /* empty */ - { $$ = NULL; } - | expression_list - { $$ = $1; } - ; - -expression_list - : exp - { $$ = node($1, Node_expression_list, (NODE *) NULL); } - | expression_list comma exp - { - $$ = append_right($1, - node($3, Node_expression_list, (NODE *) NULL)); - yyerrok; - } - | error - { $$ = NULL; } - | expression_list error - { $$ = NULL; } - | expression_list error exp - { $$ = NULL; } - | expression_list comma error - { $$ = NULL; } - ; - -/* Expressions, not including the comma operator. */ -exp : variable ASSIGNOP - { want_assign = FALSE; } - exp - { - if (do_lint && $4->type == Node_regex) - lintwarn(_("regular expression on right of assignment")); - $$ = node($1, $2, $4); - } - | '(' expression_list r_paren LEX_IN NAME - { $$ = node(variable($5, CAN_FREE, Node_var_array), Node_in_array, $2); } - | exp '|' LEX_GETLINE opt_variable - { - $$ = node($4, Node_K_getline, - node($1, Node_redirect_pipein, (NODE *) NULL)); - } - | exp TWOWAYIO LEX_GETLINE opt_variable - { - $$ = node($4, Node_K_getline, - node($1, Node_redirect_twoway, (NODE *) NULL)); - } - | LEX_GETLINE opt_variable input_redir - { - if (do_lint && ! io_allowed && parsing_end_rule && $3 == NULL) - lintwarn(_("non-redirected `getline' undefined inside END action")); - $$ = node($2, Node_K_getline, $3); - } - | exp LEX_AND exp - { $$ = node($1, Node_and, $3); } - | exp LEX_OR exp - { $$ = node($1, Node_or, $3); } - | exp MATCHOP exp - { - if ($1->type == Node_regex) - warning(_("regular expression on left of `~' or `!~' operator")); - $$ = node($1, $2, mk_rexp($3)); - } - | regexp - { - $$ = $1; - if (do_lint && tokstart[0] == '*') { - /* possible C comment */ - int n = strlen(tokstart) - 1; - if (tokstart[n] == '*') - lintwarn(_("regexp constant `/%s/' looks like a C comment, but is not"), tokstart); - } - } - | '!' regexp %prec UNARY - { - $$ = node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_nomatch, - $2); - } - | exp LEX_IN NAME - { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); } - | exp RELOP exp - { - if (do_lint && $3->type == Node_regex) - lintwarn(_("regular expression on right of comparison")); - $$ = node($1, $2, $3); - } - | exp '<' exp - { $$ = node($1, Node_less, $3); } - | exp '>' exp - { $$ = node($1, Node_greater, $3); } - | exp '?' exp ':' exp - { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} - | simp_exp - { $$ = $1; } - | exp simp_exp %prec CONCAT_OP - { $$ = node($1, Node_concat, $2); } - ; - -rexp - : variable ASSIGNOP - { want_assign = FALSE; } - rexp - { $$ = node($1, $2, $4); } - | rexp LEX_AND rexp - { $$ = node($1, Node_and, $3); } - | rexp LEX_OR rexp - { $$ = node($1, Node_or, $3); } - | LEX_GETLINE opt_variable input_redir - { - if (do_lint && ! io_allowed && $3 == NULL) - lintwarn(_("non-redirected `getline' undefined inside BEGIN or END action")); - $$ = node($2, Node_K_getline, $3); - } - | regexp - { $$ = $1; } - | '!' regexp %prec UNARY - { $$ = node((NODE *) NULL, Node_nomatch, $2); } - | rexp MATCHOP rexp - { $$ = node($1, $2, mk_rexp($3)); } - | rexp LEX_IN NAME - { $$ = node(variable($3, CAN_FREE, Node_var_array), Node_in_array, $1); } - | rexp RELOP rexp - { $$ = node($1, $2, $3); } - | rexp '?' rexp ':' rexp - { $$ = node($1, Node_cond_exp, node($3, Node_if_branches, $5));} - | simp_exp - { $$ = $1; } - | rexp simp_exp %prec CONCAT_OP - { $$ = node($1, Node_concat, $2); } - ; - -simp_exp - : non_post_simp_exp - /* Binary operators in order of decreasing precedence. */ - | simp_exp '^' simp_exp - { $$ = node($1, Node_exp, $3); } - | simp_exp '*' simp_exp - { $$ = node($1, Node_times, $3); } - | simp_exp '/' simp_exp - { $$ = node($1, Node_quotient, $3); } - | simp_exp '%' simp_exp - { $$ = node($1, Node_mod, $3); } - | simp_exp '+' simp_exp - { $$ = node($1, Node_plus, $3); } - | simp_exp '-' simp_exp - { $$ = node($1, Node_minus, $3); } - | variable INCREMENT - { $$ = node($1, Node_postincrement, (NODE *) NULL); } - | variable DECREMENT - { $$ = node($1, Node_postdecrement, (NODE *) NULL); } - ; - -non_post_simp_exp - : '!' simp_exp %prec UNARY - { $$ = node($2, Node_not, (NODE *) NULL); } - | '(' exp r_paren - { $$ = $2; } - | LEX_BUILTIN - '(' opt_expression_list r_paren - { $$ = snode($3, Node_builtin, (int) $1); } - | LEX_LENGTH '(' opt_expression_list r_paren - { $$ = snode($3, Node_builtin, (int) $1); } - | LEX_LENGTH - { - if (do_lint) - lintwarn(_("call of `length' without parentheses is not portable")); - $$ = snode((NODE *) NULL, Node_builtin, (int) $1); - if (do_posix) - warning(_("call of `length' without parentheses is deprecated by POSIX")); - } - | FUNC_CALL '(' opt_expression_list r_paren - { - $$ = node($3, Node_func_call, make_string($1, strlen($1))); - func_use($1, FUNC_USE); - param_sanity($3); - free($1); - } - | variable - | INCREMENT variable - { $$ = node($2, Node_preincrement, (NODE *) NULL); } - | DECREMENT variable - { $$ = node($2, Node_predecrement, (NODE *) NULL); } - | YNUMBER - { $$ = $1; } - | YSTRING - { $$ = $1; } - - | '-' simp_exp %prec UNARY - { - if ($2->type == Node_val) { - $2->numbr = -(force_number($2)); - $$ = $2; - } else - $$ = node($2, Node_unary_minus, (NODE *) NULL); - } - | '+' simp_exp %prec UNARY - { - /* - * was: $$ = $2 - * POSIX semantics: force a conversion to numeric type - */ - $$ = node (make_number(0.0), Node_plus, $2); - } - ; - -opt_variable - : /* empty */ - { $$ = NULL; } - | variable - { $$ = $1; } - ; - -variable - : NAME - { $$ = variable($1, CAN_FREE, Node_var); } - | NAME '[' expression_list ']' - { - if ($3 == NULL) { - fatal(_("invalid subscript expression")); - } else if ($3->rnode == NULL) { - $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3->lnode); - freenode($3); - } else - $$ = node(variable($1, CAN_FREE, Node_var_array), Node_subscript, $3); - } - | '$' non_post_simp_exp - { $$ = node($2, Node_field_spec, (NODE *) NULL); } - ; - -l_brace - : '{' opt_nls - ; - -r_brace - : '}' opt_nls { yyerrok; } - ; - -r_paren - : ')' { yyerrok; } - ; - -opt_semi - : /* empty */ - | semi - ; - -semi - : ';' { yyerrok; want_assign = FALSE; } - ; - -comma : ',' opt_nls { yyerrok; } - ; - -%% - -struct token { - const char *operator; /* text to match */ - NODETYPE value; /* node type */ - int class; /* lexical class */ - unsigned flags; /* # of args. allowed and compatability */ -# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ -# define A(n) (1<<(n)) -# define VERSION_MASK 0xFF00 /* old awk is zero */ -# define NOT_OLD 0x0100 /* feature not in old awk */ -# define NOT_POSIX 0x0200 /* feature not in POSIX */ -# define GAWKX 0x0400 /* gawk extension */ -# define RESX 0x0800 /* Bell Labs Research extension */ - NODE *(*ptr)(); /* function that implements this keyword */ -}; - -/* Tokentab is sorted ascii ascending order, so it can be binary searched. */ -/* Function pointers come from declarations in awk.h. */ - -static struct token tokentab[] = { -{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0}, -{"END", Node_illegal, LEX_END, 0, 0}, -#ifdef ARRAYDEBUG -{"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump}, -#endif -{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and}, -{"asort", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_asort}, -{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, -{"bindtextdomain", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2), do_bindtextdomain}, -{"break", Node_K_break, LEX_BREAK, 0, 0}, -{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1)|A(2), do_close}, -{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl}, -{"continue", Node_K_continue, LEX_CONTINUE, 0, 0}, -{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, -{"dcgettext", Node_builtin, LEX_BUILTIN, GAWKX|A(1)|A(2)|A(3), do_dcgettext}, -{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0}, -{"do", Node_K_do, LEX_DO, NOT_OLD, 0}, -{"else", Node_illegal, LEX_ELSE, 0, 0}, -{"exit", Node_K_exit, LEX_EXIT, 0, 0}, -{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp}, -{"extension", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_ext}, -{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush}, -{"for", Node_K_for, LEX_FOR, 0, 0}, -{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, -{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, -{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, -{"if", Node_K_if, LEX_IF, 0, 0}, -{"in", Node_illegal, LEX_IN, 0, 0}, -{"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, -{"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, -{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, -{"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, -{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift}, -{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_match}, -{"mktime", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_mktime}, -{"next", Node_K_next, LEX_NEXT, 0, 0}, -{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0}, -{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or}, -{"print", Node_K_print, LEX_PRINT, 0, 0}, -{"printf", Node_K_printf, LEX_PRINTF, 0, 0}, -{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, -{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0}, -{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift}, -{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, -{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split}, -{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf}, -{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt}, -{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, -#if defined(GAWKDEBUG) || defined(ARRAYDEBUG) /* || ... */ -{"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme}, -#endif -{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime}, -{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, -{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, -{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, -{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, -{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, -{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, -{"while", Node_K_while, LEX_WHILE, 0, 0}, -{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor}, -}; - -/* getfname --- return name of a builtin function (for pretty printing) */ - -const char * -getfname(register NODE *(*fptr)()) -{ - register int i, j; - - j = sizeof(tokentab) / sizeof(tokentab[0]); - /* linear search, no other way to do it */ - for (i = 0; i < j; i++) - if (tokentab[i].ptr == fptr) - return tokentab[i].operator; - - fatal(_("fptr %x not in tokentab\n"), fptr); - return NULL; /* to stop warnings */ -} - -/* yyerror --- print a syntax error message, show where */ - -/* - * Function identifier purposely indented to avoid mangling - * by ansi2knr. Sigh. - */ - -static void -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - yyerror(const char *m, ...) -#else -/* VARARGS0 */ - yyerror(va_alist) - va_dcl -#endif -{ - va_list args; - const char *mesg = NULL; - register char *bp, *cp; - char *scan; - char buf[120]; - static char end_of_file_line[] = "(END OF FILE)"; - - errcount++; - /* Find the current line in the input file */ - if (lexptr && lexeme) { - if (thisline == NULL) { - cp = lexeme; - if (*cp == '\n') { - cp--; - mesg = _("unexpected newline"); - } - for (; cp != lexptr_begin && *cp != '\n'; --cp) - continue; - if (*cp == '\n') - cp++; - thisline = cp; - } - /* NL isn't guaranteed */ - bp = lexeme; - while (bp < lexend && *bp && *bp != '\n') - bp++; - } else { - thisline = end_of_file_line; - bp = thisline + strlen(thisline); - } - msg("%.*s", (int) (bp - thisline), thisline); - bp = buf; - cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */ - if (lexptr != NULL) { - scan = thisline; - while (bp < cp && scan < lexeme) - if (*scan++ == '\t') - *bp++ = '\t'; - else - *bp++ = ' '; - *bp++ = '^'; - *bp++ = ' '; - } -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - va_start(args, m); - if (mesg == NULL) - mesg = m; -#else - va_start(args); - if (mesg == NULL) - mesg = va_arg(args, char *); -#endif - strcpy(bp, mesg); - err("", buf, args); - va_end(args); -} - -/* get_src_buf --- read the next buffer of source program */ - -static char * -get_src_buf() -{ - static int samefile = FALSE; - static int nextfile = 0; - static char *buf = NULL; - static int fd; - int n; - register char *scan; - static int len = 0; - static int did_newline = FALSE; - int newfile; - struct stat sbuf; - -# define SLOP 128 /* enough space to hold most source lines */ - -again: - newfile = FALSE; - if (nextfile > numfiles) - return NULL; - - if (srcfiles[nextfile].stype == CMDLINE) { - if (len == 0) { - len = strlen(srcfiles[nextfile].val); - if (len == 0) { - /* - * Yet Another Special case: - * gawk '' /path/name - * Sigh. - */ - static int warned = FALSE; - - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("empty program text on command line")); - } - ++nextfile; - goto again; - } - sourceline = 1; - lexptr = lexptr_begin = srcfiles[nextfile].val; - lexend = lexptr + len; - } else if (! did_newline && *(lexptr-1) != '\n') { - /* - * The following goop is to ensure that the source - * ends with a newline and that the entire current - * line is available for error messages. - */ - int offset; - - did_newline = TRUE; - offset = lexptr - lexeme; - for (scan = lexeme; scan > lexptr_begin; scan--) - if (*scan == '\n') { - scan++; - break; - } - len = lexptr - scan; - emalloc(buf, char *, len+1, "get_src_buf"); - memcpy(buf, scan, len); - thisline = buf; - lexptr = buf + len; - *lexptr = '\n'; - lexeme = lexptr - offset; - lexptr_begin = buf; - lexend = lexptr + 1; - } else { - len = 0; - lexeme = lexptr = lexptr_begin = NULL; - } - if (lexptr == NULL && ++nextfile <= numfiles) - goto again; - return lexptr; - } - if (! samefile) { - source = srcfiles[nextfile].val; - if (source == NULL) { - if (buf != NULL) { - free(buf); - buf = NULL; - } - len = 0; - return lexeme = lexptr = lexptr_begin = NULL; - } - fd = pathopen(source); - if (fd <= INVALID_HANDLE) { - char *in; - - /* suppress file name and line no. in error mesg */ - in = source; - source = NULL; - fatal(_("can't open source file `%s' for reading (%s)"), - in, strerror(errno)); - } - len = optimal_bufsize(fd, & sbuf); - newfile = TRUE; - if (buf != NULL) - free(buf); - emalloc(buf, char *, len + SLOP, "get_src_buf"); - lexptr_begin = buf + SLOP; - samefile = TRUE; - sourceline = 1; - } else { - /* - * Here, we retain the current source line (up to length SLOP) - * in the beginning of the buffer that was overallocated above - */ - int offset; - int linelen; - - offset = lexptr - lexeme; - for (scan = lexeme; scan > lexptr_begin; scan--) - if (*scan == '\n') { - scan++; - break; - } - linelen = lexptr - scan; - if (linelen > SLOP) - linelen = SLOP; - thisline = buf + SLOP - linelen; - memcpy(thisline, scan, linelen); - lexeme = buf + SLOP - offset; - lexptr_begin = thisline; - } - n = read(fd, buf + SLOP, len); - if (n == -1) - fatal(_("can't read sourcefile `%s' (%s)"), - source, strerror(errno)); - if (n == 0) { - if (newfile) { - static int warned = FALSE; - - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("source file `%s' is empty"), source); - } - } - if (fd != fileno(stdin)) /* safety */ - close(fd); - samefile = FALSE; - nextfile++; - if (lexeme) - *lexeme = '\0'; - len = 0; - goto again; - } - lexptr = buf + SLOP; - lexend = lexptr + n; - return buf; -} - -/* tokadd --- add a character to the token buffer */ - -#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok) - -/* tokexpand --- grow the token buffer */ - -char * -tokexpand() -{ - static int toksize = 60; - int tokoffset; - - tokoffset = tok - tokstart; - toksize *= 2; - if (tokstart != NULL) - erealloc(tokstart, char *, toksize, "tokexpand"); - else - emalloc(tokstart, char *, toksize, "tokexpand"); - tokend = tokstart + toksize; - tok = tokstart + tokoffset; - return tok; -} - -/* nextc --- get the next input character */ - -#if GAWKDEBUG -int -nextc() -{ - int c; - - if (lexptr && lexptr < lexend) - c = (int) (unsigned char) *lexptr++; - else if (get_src_buf()) - c = (int) (unsigned char) *lexptr++; - else - c = EOF; - - return c; -} -#else -#define nextc() ((lexptr && lexptr < lexend) ? \ - ((int) (unsigned char) *lexptr++) : \ - (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \ - ) -#endif - -/* pushback --- push a character back on the input */ - -#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr) - -/* allow_newline --- allow newline after &&, ||, ? and : */ - -static void -allow_newline() -{ - int c; - - for (;;) { - c = nextc(); - if (c == EOF) - break; - if (c == '#') { - while ((c = nextc()) != '\n' && c != EOF) - continue; - if (c == EOF) - break; - } - if (c == '\n') - sourceline++; - if (! ISSPACE(c)) { - pushback(); - break; - } - } -} - -/* yylex --- Read the input and turn it into tokens. */ - -static int -yylex() -{ - register int c; - int seen_e = FALSE; /* These are for numbers */ - int seen_point = FALSE; - int esc_seen; /* for literal strings */ - int low, mid, high; - static int did_newline = FALSE; - char *tokkey; - static int lasttok = 0, eof_warned = FALSE; - int inhex = FALSE; - int intlstr = FALSE; - - if (nextc() == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - } - pushback(); -#ifdef OS2 - /* - * added for OS/2's extproc feature of cmd.exe - * (like #! in BSD sh) - */ - if (strncasecmp(lexptr, "extproc ", 8) == 0) { - while (*lexptr && *lexptr != '\n') - lexptr++; - } -#endif - lexeme = lexptr; - thisline = NULL; - if (want_regexp) { - int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ - /* - * Counting brackets is non-trivial. [[] is ok, - * and so is [\]], with a point being that /[/]/ as a regexp - * constant has to work. - * - * Do not count [ or ] if either one is preceded by a \. - * A `[' should be counted if - * a) it is the first one so far (in_brack == 0) - * b) it is the `[' in `[:' - * A ']' should be counted if not preceded by a \, since - * it is either closing `:]' or just a plain list. - * According to POSIX, []] is how you put a ] into a set. - * Try to handle that too. - * - * The code for \ handles \[ and \]. - */ - - want_regexp = FALSE; - tok = tokstart; - for (;;) { - c = nextc(); - switch (c) { - case '[': - /* one day check for `.' and `=' too */ - if (nextc() == ':' || in_brack == 0) - in_brack++; - pushback(); - break; - case ']': - if (tokstart[0] == '[' - && (tok == tokstart + 1 - || (tok == tokstart + 2 - && tokstart[1] == '^'))) - /* do nothing */; - else - in_brack--; - break; - case '\\': - if ((c = nextc()) == EOF) { - yyerror(_("unterminated regexp ends with `\\' at end of file")); - return lasttok = REGEXP; /* kludge */ - } else if (c == '\n') { - sourceline++; - continue; - } else { - tokadd('\\'); - tokadd(c); - continue; - } - break; - case '/': /* end of the regexp */ - if (in_brack > 0) - break; - - pushback(); - tokadd('\0'); - yylval.sval = tokstart; - return lasttok = REGEXP; - case '\n': - pushback(); - yyerror(_("unterminated regexp")); - return lasttok = REGEXP; /* kludge */ - case EOF: - yyerror(_("unterminated regexp at end of file")); - return lasttok = REGEXP; /* kludge */ - } - tokadd(c); - } - } -retry: - while ((c = nextc()) == ' ' || c == '\t') - continue; - - lexeme = lexptr ? lexptr - 1 : lexptr; - thisline = NULL; - tok = tokstart; - yylval.nodetypeval = Node_illegal; - - switch (c) { - case EOF: - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - - case '\n': - sourceline++; - return lasttok = NEWLINE; - - case '#': /* it's a comment */ - while ((c = nextc()) != '\n') { - if (c == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - lintwarn( - _("source file does not end in newline")); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - } - } - sourceline++; - return lasttok = NEWLINE; - - case '\\': -#ifdef RELAXED_CONTINUATION - /* - * This code puports to allow comments and/or whitespace - * after the `\' at the end of a line used for continuation. - * Use it at your own risk. We think it's a bad idea, which - * is why it's not on by default. - */ - if (! do_traditional) { - /* strip trailing white-space and/or comment */ - while ((c = nextc()) == ' ' || c == '\t') - continue; - if (c == '#') { - if (do_lint) - lintwarn( - _("use of `\\ #...' line continuation is not portable")); - while ((c = nextc()) != '\n') - if (c == EOF) - break; - } - pushback(); - } -#endif /* RELAXED_CONTINUATION */ - if (nextc() == '\n') { - sourceline++; - goto retry; - } else { - yyerror(_("backslash not last character on line")); - exit(1); - } - break; - - case '$': - want_assign = TRUE; - return lasttok = '$'; - - case ':': - case '?': - if (! do_posix) - allow_newline(); - return lasttok = c; - - case ')': - case '(': - case ';': - case '{': - case ',': - want_assign = FALSE; - /* fall through */ - case '[': - case ']': - return lasttok = c; - - case '*': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_times; - return lasttok = ASSIGNOP; - } else if (do_posix) { - pushback(); - return lasttok = '*'; - } else if (c == '*') { - /* make ** and **= aliases for ^ and ^= */ - static int did_warn_op = FALSE, did_warn_assgn = FALSE; - - if (nextc() == '=') { - if (! did_warn_assgn) { - did_warn_assgn = TRUE; - if (do_lint) - lintwarn(_("POSIX does not allow operator `**='")); - if (do_lint_old) - warning(_("old awk does not support operator `**='")); - } - yylval.nodetypeval = Node_assign_exp; - return ASSIGNOP; - } else { - pushback(); - if (! did_warn_op) { - did_warn_op = TRUE; - if (do_lint) - lintwarn(_("POSIX does not allow operator `**'")); - if (do_lint_old) - warning(_("old awk does not support operator `**'")); - } - return lasttok = '^'; - } - } - pushback(); - return lasttok = '*'; - - case '/': - if (want_assign) { - if (nextc() == '=') { - yylval.nodetypeval = Node_assign_quotient; - return lasttok = ASSIGNOP; - } - pushback(); - } - return lasttok = '/'; - - case '%': - if (nextc() == '=') { - yylval.nodetypeval = Node_assign_mod; - return lasttok = ASSIGNOP; - } - pushback(); - return lasttok = '%'; - - case '^': - { - static int did_warn_op = FALSE, did_warn_assgn = FALSE; - - if (nextc() == '=') { - if (do_lint_old && ! did_warn_assgn) { - did_warn_assgn = TRUE; - warning(_("operator `^=' is not supported in old awk")); - } - yylval.nodetypeval = Node_assign_exp; - return lasttok = ASSIGNOP; - } - pushback(); - if (do_lint_old && ! did_warn_op) { - did_warn_op = TRUE; - warning(_("operator `^' is not supported in old awk")); - } - return lasttok = '^'; - } - - case '+': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_plus; - return lasttok = ASSIGNOP; - } - if (c == '+') - return lasttok = INCREMENT; - pushback(); - return lasttok = '+'; - - case '!': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_notequal; - return lasttok = RELOP; - } - if (c == '~') { - yylval.nodetypeval = Node_nomatch; - want_assign = FALSE; - return lasttok = MATCHOP; - } - pushback(); - return lasttok = '!'; - - case '<': - if (nextc() == '=') { - yylval.nodetypeval = Node_leq; - return lasttok = RELOP; - } - yylval.nodetypeval = Node_less; - pushback(); - return lasttok = '<'; - - case '=': - if (nextc() == '=') { - yylval.nodetypeval = Node_equal; - return lasttok = RELOP; - } - yylval.nodetypeval = Node_assign; - pushback(); - return lasttok = ASSIGNOP; - - case '>': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_geq; - return lasttok = RELOP; - } else if (c == '>') { - yylval.nodetypeval = Node_redirect_append; - return lasttok = APPEND_OP; - } - yylval.nodetypeval = Node_greater; - pushback(); - return lasttok = '>'; - - case '~': - yylval.nodetypeval = Node_match; - want_assign = FALSE; - return lasttok = MATCHOP; - - case '}': - /* - * Added did newline stuff. Easier than - * hacking the grammar. - */ - if (did_newline) { - did_newline = FALSE; - return lasttok = c; - } - did_newline++; - --lexptr; /* pick up } next time */ - return lasttok = NEWLINE; - - case '"': - string: - esc_seen = FALSE; - while ((c = nextc()) != '"') { - if (c == '\n') { - pushback(); - yyerror(_("unterminated string")); - exit(1); - } - if (c == '\\') { - c = nextc(); - if (c == '\n') { - sourceline++; - continue; - } - esc_seen = TRUE; - tokadd('\\'); - } - if (c == EOF) { - pushback(); - yyerror(_("unterminated string")); - exit(1); - } - tokadd(c); - } - yylval.nodeval = make_str_node(tokstart, - tok - tokstart, esc_seen ? SCAN : 0); - yylval.nodeval->flags |= PERM; - if (intlstr) { - yylval.nodeval->flags |= INTLSTR; - intlstr = FALSE; - if (do_intl) - dumpintlstr(yylval.nodeval->stptr, - yylval.nodeval->stlen); - } - return lasttok = YSTRING; - - case '-': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_minus; - return lasttok = ASSIGNOP; - } - if (c == '-') - return lasttok = DECREMENT; - pushback(); - return lasttok = '-'; - - case '.': - c = nextc(); - pushback(); - if (! ISDIGIT(c)) - return lasttok = '.'; - else - c = '.'; - /* FALL THROUGH */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - /* It's a number */ - for (;;) { - int gotnumber = FALSE; - - tokadd(c); - switch (c) { - case 'x': - case 'X': - if (do_traditional) - goto done; - if (tok == tokstart + 2) - inhex = TRUE; - break; - case '.': - if (seen_point) { - gotnumber = TRUE; - break; - } - seen_point = TRUE; - break; - case 'e': - case 'E': - if (inhex) - break; - if (seen_e) { - gotnumber = TRUE; - break; - } - seen_e = TRUE; - if ((c = nextc()) == '-' || c == '+') - tokadd(c); - else - pushback(); - break; - case 'a': - case 'A': - case 'b': - case 'B': - case 'c': - case 'C': - case 'D': - case 'd': - case 'f': - case 'F': - if (do_traditional || ! inhex) - goto done; - /* fall through */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - break; - default: - done: - gotnumber = TRUE; - } - if (gotnumber) - break; - c = nextc(); - } - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } - tokadd('\0'); - if (! do_traditional && isnondecimal(tokstart)) - yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart))); - else - yylval.nodeval = make_number(atof(tokstart)); - yylval.nodeval->flags |= PERM; - return lasttok = YNUMBER; - - case '&': - if ((c = nextc()) == '&') { - yylval.nodetypeval = Node_and; - allow_newline(); - want_assign = FALSE; - return lasttok = LEX_AND; - } - pushback(); - return lasttok = '&'; - - case '|': - if ((c = nextc()) == '|') { - yylval.nodetypeval = Node_or; - allow_newline(); - want_assign = FALSE; - return lasttok = LEX_OR; - } else if (! do_traditional && c == '&') { - yylval.nodetypeval = Node_redirect_twoway; - want_assign = FALSE; - return lasttok = TWOWAYIO; - } - pushback(); - return lasttok = '|'; - } - - if (c != '_' && ! ISALPHA(c)) { - yyerror(_("invalid char '%c' in expression"), c); - exit(1); - } - - if (! do_traditional && c == '_') { - if ((c = nextc()) == '"') { - intlstr = TRUE; - goto string; - } - pushback(); - c = '_'; - } - - /* it's some type of name-type-thing. Find its length. */ - tok = tokstart; - while (is_identchar(c)) { - tokadd(c); - c = nextc(); - } - tokadd('\0'); - emalloc(tokkey, char *, tok - tokstart, "yylex"); - memcpy(tokkey, tokstart, tok - tokstart); - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - lintwarn(_("source file does not end in newline")); - eof_warned = TRUE; - } - - /* See if it is a special token. */ - low = 0; - high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1; - while (low <= high) { - int i; - - mid = (low + high) / 2; - c = *tokstart - tokentab[mid].operator[0]; - i = c ? c : strcmp(tokstart, tokentab[mid].operator); - - if (i < 0) /* token < mid */ - high = mid - 1; - else if (i > 0) /* token > mid */ - low = mid + 1; - else { - if (do_lint) { - if (tokentab[mid].flags & GAWKX) - lintwarn(_("`%s' is a gawk extension"), - tokentab[mid].operator); - if (tokentab[mid].flags & RESX) - lintwarn(_("`%s' is a Bell Labs extension"), - tokentab[mid].operator); - if (tokentab[mid].flags & NOT_POSIX) - lintwarn(_("POSIX does not allow `%s'"), - tokentab[mid].operator); - } - if (do_lint_old && (tokentab[mid].flags & NOT_OLD)) - warning(_("`%s' is not supported in old awk"), - tokentab[mid].operator); - if ((do_traditional && (tokentab[mid].flags & GAWKX)) - || (do_posix && (tokentab[mid].flags & NOT_POSIX))) - break; - if (tokentab[mid].class == LEX_BUILTIN - || tokentab[mid].class == LEX_LENGTH - ) - yylval.lval = mid; - else - yylval.nodetypeval = tokentab[mid].value; - - free(tokkey); - return lasttok = tokentab[mid].class; - } - } - - yylval.sval = tokkey; - if (*lexptr == '(') - return lasttok = FUNC_CALL; - else { - static short goto_warned = FALSE; - - want_assign = TRUE; -#define SMART_ALECK 1 - if (SMART_ALECK && do_lint - && ! goto_warned && strcasecmp(tokkey, "goto") == 0) { - goto_warned = TRUE; - lintwarn(_("`goto' considered harmful!\n")); - } - return lasttok = NAME; - } -} - -/* node_common --- common code for allocating a new node */ - -static NODE * -node_common(NODETYPE op) -{ - register NODE *r; - - getnode(r); - r->type = op; - r->flags = MALLOC; - if (r->type == Node_var) - r->flags |= UNINITIALIZED; - /* if lookahead is NL, lineno is 1 too high */ - if (lexeme && *lexeme == '\n') - r->source_line = sourceline - 1; - else - r->source_line = sourceline; - r->source_file = source; - return r; -} - -/* node --- allocates a node with defined lnode and rnode. */ - -NODE * -node(NODE *left, NODETYPE op, NODE *right) -{ - register NODE *r; - - r = node_common(op); - r->lnode = left; - r->rnode = right; - return r; -} - -/* snode --- allocate a node with defined subnode and proc for builtin - functions. Checks for arg. count and supplies defaults where - possible. */ - -static NODE * -snode(NODE *subn, NODETYPE op, int idx) -{ - register NODE *r; - register NODE *n; - int nexp = 0; - int args_allowed; - - r = node_common(op); - - /* traverse expression list to see how many args. given */ - for (n = subn; n != NULL; n = n->rnode) { - nexp++; - if (nexp > 3) - break; - } - - /* check against how many args. are allowed for this builtin */ - args_allowed = tokentab[idx].flags & ARGS; - if (args_allowed && (args_allowed & A(nexp)) == 0) - fatal(_("%d is invalid as number of arguments for %s"), - nexp, tokentab[idx].operator); - - r->proc = tokentab[idx].ptr; - - /* special case processing for a few builtins */ - if (nexp == 0 && r->proc == do_length) { - subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), - Node_expression_list, - (NODE *) NULL); - } else if (r->proc == do_match) { - static short warned = FALSE; - - if (subn->rnode->lnode->type != Node_regex) - subn->rnode->lnode = mk_rexp(subn->rnode->lnode); - - if (subn->rnode->rnode != NULL) { /* 3rd argument there */ - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("match: third argument is a gawk extension")); - } - if (do_traditional) - fatal(_("match: third argument is a gawk extension")); - } - } else if (r->proc == do_sub || r->proc == do_gsub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 2) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); - else if (subn->rnode->rnode->lnode->type == Node_val) { - if (do_lint) { - char *f; - - f = (r->proc == do_sub) ? "sub" : "gsub"; - lintwarn(_("%s: string literal as last arg of substitute has no effect"), f); - } - } else if (! isassignable(subn->rnode->rnode->lnode)) { - if (r->proc == do_sub) - yyerror(_("sub third parameter is not a changeable object")); - else - yyerror(_("gsub third parameter is not a changeable object")); - } - } else if (r->proc == do_gensub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 3) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); - } else if (r->proc == do_split) { - if (nexp == 2) - append_right(subn, - node(FS_node, Node_expression_list, (NODE *) NULL)); - n = subn->rnode->rnode->lnode; - if (n->type != Node_regex) - subn->rnode->rnode->lnode = mk_rexp(n); - if (nexp == 2) - subn->rnode->rnode->lnode->re_flags |= FS_DFLT; - } else if (r->proc == do_close) { - static short warned = FALSE; - - if ( nexp == 2) { - if (do_lint && nexp == 2 && ! warned) { - warned = TRUE; - lintwarn(_("close: second argument is a gawk extension")); - } - if (do_traditional) - fatal(_("close: second argument is a gawk extension")); - } - } else if (do_intl /* --gen-po */ - && r->proc == do_dcgettext /* dcgettext(...) */ - && subn->lnode->type == Node_val /* 1st arg is constant */ - && (subn->lnode->flags & STR) != 0) { /* it's a string constant */ - /* ala xgettext, dcgettext("some string" ...) dumps the string */ - NODE *str = subn->lnode; - - if ((str->flags & INTLSTR) != 0) - warning(_("use of dcgettext(_\"...\") is incorrect: remove leading underscore")); - /* don't dump it, the lexer already did */ - else - dumpintlstr(str->stptr, str->stlen); - } - - - r->subnode = subn; - if (r->proc == do_sprintf) { - count_args(r); - r->lnode->printf_count = r->printf_count; /* hack */ - } - return r; -} - -/* - * mkrangenode: - * This allocates a Node_line_range node with defined condpair and - * zeroes the trigger word to avoid the temptation of assuming that calling - * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. - * Otherwise like node(). - */ - -static NODE * -mkrangenode(NODE *cpair) -{ - register NODE *r; - - getnode(r); - r->type = Node_line_range; - r->condpair = cpair; - r->triggered = FALSE; - return r; -} - -/* make_for_loop --- build a for loop */ - -static NODE * -make_for_loop(NODE *init, NODE *cond, NODE *incr) -{ - register FOR_LOOP_HEADER *r; - NODE *n; - - emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); - getnode(n); - n->type = Node_illegal; - r->init = init; - r->cond = cond; - r->incr = incr; - n->sub.nodep.r.hd = r; - return n; -} - -/* dup_parms --- return TRUE if there are duplicate parameters */ - -static int -dup_parms(NODE *func) -{ - register NODE *np; - char *fname, **names; - int count, i, j, dups; - NODE *params; - - if (func == NULL) /* error earlier */ - return TRUE; - - fname = func->param; - count = func->param_cnt; - params = func->rnode; - - if (count == 0) /* no args, no problem */ - return FALSE; - - if (params == NULL) /* error earlier */ - return TRUE; - - emalloc(names, char **, count * sizeof(char *), "dup_parms"); - - i = 0; - for (np = params; np != NULL; np = np->rnode) { - if (np->param == NULL) { /* error earlier, give up, go home */ - free(names); - return TRUE; - } - names[i++] = np->param; - } - - dups = 0; - for (i = 1; i < count; i++) { - for (j = 0; j < i; j++) { - if (strcmp(names[i], names[j]) == 0) { - dups++; - error( - _("function `%s': parameter #%d, `%s', duplicates parameter #%d"), - fname, i+1, names[j], j+1); - } - } - } - - free(names); - return (dups > 0 ? TRUE : FALSE); -} - -/* parms_shadow --- check if parameters shadow globals */ - -static void -parms_shadow(const char *fname, NODE *func) -{ - int count, i; - - if (fname == NULL || func == NULL) /* error earlier */ - return; - - count = func->lnode->param_cnt; - - if (count == 0) /* no args, no problem */ - return; - - /* - * Use warning() and not lintwarn() so that can warn - * about all shadowed parameters. - */ - for (i = 0; i < count; i++) { - if (lookup(func->parmlist[i]) != NULL) { - warning( - _("function `%s': parameter `%s' shadows global variable"), - fname, func->parmlist[i]); - } - } -} - -/* - * install: - * Install a name in the symbol table, even if it is already there. - * Caller must check against redefinition if that is desired. - */ - -NODE * -install(char *name, NODE *value) -{ - register NODE *hp; - register size_t len; - register int bucket; - - var_count++; - len = strlen(name); - bucket = hash(name, len, (unsigned long) HASHSIZE); - getnode(hp); - hp->type = Node_hashnode; - hp->hnext = variables[bucket]; - variables[bucket] = hp; - hp->hlength = len; - hp->hvalue = value; - hp->hname = name; - hp->hvalue->vname = name; - return hp->hvalue; -} - -/* lookup --- find the most recent hash node for name installed by install */ - -NODE * -lookup(const char *name) -{ - register NODE *bucket; - register size_t len; - - len = strlen(name); - for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)]; - bucket != NULL; bucket = bucket->hnext) - if (bucket->hlength == len && STREQN(bucket->hname, name, len)) - return bucket->hvalue; - - return NULL; -} - -/* var_comp --- compare two variable names */ - -static int -var_comp(const void *v1, const void *v2) -{ - NODE **npp1, **npp2; - NODE *n1, *n2; - int minlen; - - npp1 = (NODE **) v1; - npp2 = (NODE **) v2; - n1 = *npp1; - n2 = *npp2; - - if (n1->hlength > n2->hlength) - minlen = n1->hlength; - else - minlen = n2->hlength; - - return strncmp(n1->hname, n2->hname, minlen); -} - -/* valinfo --- dump var info */ - -static void -valinfo(NODE *n, FILE *fp) -{ - if (n->flags & STRING) { - fprintf(fp, "string ("); - pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); - fprintf(fp, ")\n"); - } else if (n->flags & NUMBER) - fprintf(fp, "number (%.17g)\n", n->numbr); - else if (n->flags & STR) { - fprintf(fp, "string value ("); - pp_string_fp(fp, n->stptr, n->stlen, '"', FALSE); - fprintf(fp, ")\n"); - } else if (n->flags & NUM) - fprintf(fp, "number value (%.17g)\n", n->numbr); - else - fprintf(fp, "?? flags %s\n", flags2str(n->flags)); -} - - -/* dump_vars --- dump the symbol table */ - -void -dump_vars(const char *fname) -{ - int i, j; - NODE **table; - NODE *p; - FILE *fp; - - emalloc(table, NODE **, var_count * sizeof(NODE *), "dump_vars"); - - if (fname == NULL) - fp = stderr; - else if ((fp = fopen(fname, "w")) == NULL) { - warning(_("could not open `%s' for writing (%s)"), fname, strerror(errno)); - warning(_("sending profile to standard error")); - fp = stderr; - } - - for (i = j = 0; i < HASHSIZE; i++) - for (p = variables[i]; p != NULL; p = p->hnext) - table[j++] = p; - - assert(j == var_count); - - /* Shazzam! */ - qsort(table, j, sizeof(NODE *), var_comp); - - for (i = 0; i < j; i++) { - p = table[i]; - if (p->hvalue->type == Node_func) - continue; - fprintf(fp, "%.*s: ", (int) p->hlength, p->hname); - if (p->hvalue->type == Node_var_array) - fprintf(fp, "array, %ld elements\n", p->hvalue->table_size); - else if (p->hvalue->type == Node_var) - valinfo(p->hvalue->var_value, fp); - else { - NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); - - valinfo(*lhs, fp); - } - } - - if (fp != stderr && fclose(fp) != 0) - warning(_("%s: close failed (%s)"), fname, strerror(errno)); - - free(table); -} - -/* release_all_vars --- free all variable memory */ - -void -release_all_vars() -{ - int i; - NODE *p, *next; - - for (i = 0; i < HASHSIZE; i++) - for (p = variables[i]; p != NULL; p = next) { - next = p->hnext; - - if (p->hvalue->type == Node_func) - continue; - else if (p->hvalue->type == Node_var_array) - assoc_clear(p->hvalue); - else if (p->hvalue->type == Node_var) - unref(p->hvalue->var_value); - else { - NODE **lhs = get_lhs(p->hvalue, NULL, FALSE); - - unref((*lhs)->var_value); - } - unref(p); - } -} - -/* finfo --- for use in comparison and sorting of function names */ - -struct finfo { - char *name; - size_t nlen; - NODE *func; -}; - -/* fcompare --- comparison function for qsort */ - -static int -fcompare(const void *p1, const void *p2) -{ - struct finfo *f1, *f2; - int minlen; - - f1 = (struct finfo *) p1; - f2 = (struct finfo *) p2; - - if (f1->nlen > f2->nlen) - minlen = f2->nlen; - else - minlen = f1->nlen; - - return strncmp(f1->name, f2->name, minlen); -} - -/* dump_funcs --- print all functions */ - -void -dump_funcs() -{ - int i, j; - NODE *p; - static struct finfo *tab = NULL; - - if (func_count == 0) - return; - - if (tab == NULL) - emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "dump_funcs"); - - for (i = j = 0; i < HASHSIZE; i++) { - for (p = variables[i]; p != NULL; p = p->hnext) { - if (p->hvalue->type == Node_func) { - tab[j].name = p->hname; - tab[j].nlen = p->hlength; - tab[j].func = p->hvalue; - j++; - } - } - } - - assert(j == func_count); - - /* Shazzam! */ - qsort(tab, func_count, sizeof(struct finfo), fcompare); - - for (i = 0; i < j; i++) - pp_func(tab[i].name, tab[i].nlen, tab[i].func); - - free(tab); -} - -/* shadow_funcs --- check all functions for parameters that shadow globals */ - -void -shadow_funcs() -{ - int i, j; - NODE *p; - struct finfo *tab; - static int calls = 0; - - if (func_count == 0) - return; - - if (calls++ != 0) - fatal(_("shadow_funcs() called twice!")); - - emalloc(tab, struct finfo *, func_count * sizeof(struct finfo), "shadow_funcs"); - - for (i = j = 0; i < HASHSIZE; i++) { - for (p = variables[i]; p != NULL; p = p->hnext) { - if (p->hvalue->type == Node_func) { - tab[j].name = p->hname; - tab[j].nlen = p->hlength; - tab[j].func = p->hvalue; - j++; - } - } - } - - assert(j == func_count); - - /* Shazzam! */ - qsort(tab, func_count, sizeof(struct finfo), fcompare); - - for (i = 0; i < j; i++) - parms_shadow(tab[i].name, tab[i].func); - - free(tab); -} - -/* - * append_right: - * Add new to the rightmost branch of LIST. This uses n^2 time, so we make - * a simple attempt at optimizing it. - */ - -static NODE * -append_right(NODE *list, NODE *new) -{ - register NODE *oldlist; - static NODE *savefront = NULL, *savetail = NULL; - - if (list == NULL || new == NULL) - return list; - - oldlist = list; - if (savefront == oldlist) { - savetail = savetail->rnode = new; - return oldlist; - } else - savefront = oldlist; - while (list->rnode != NULL) - list = list->rnode; - savetail = list->rnode = new; - return oldlist; -} - -/* - * func_install: - * check if name is already installed; if so, it had better have Null value, - * in which case def is added as the value. Otherwise, install name with def - * as value. - * - * Extra work, build up and save a list of the parameter names in a table - * and hang it off params->parmlist. This is used to set the `vname' field - * of each function parameter during a function call. See eval.c. - */ - -static void -func_install(NODE *params, NODE *def) -{ - NODE *r, *n, *thisfunc; - char **pnames, *names, *sp; - size_t pcount = 0, space = 0; - int i; - - /* check for function foo(foo) { ... }. bleah. */ - for (n = params->rnode; n != NULL; n = n->rnode) { - if (strcmp(n->param, params->param) == 0) - fatal(_("function `%s': can't use function name as parameter name"), - params->param); - } - - thisfunc = NULL; /* turn off warnings */ - - /* symbol table managment */ - pop_var(params, FALSE); - r = lookup(params->param); - if (r != NULL) { - fatal(_("function name `%s' previously defined"), params->param); - } else { - thisfunc = node(params, Node_func, def); - (void) install(params->param, thisfunc); - } - - /* figure out amount of space to allocate */ - for (n = params->rnode; n != NULL; n = n->rnode) { - pcount++; - space += strlen(n->param) + 1; - } - - /* allocate it and fill it in */ - if (pcount != 0) { - emalloc(names, char *, space, "func_install"); - emalloc(pnames, char **, pcount * sizeof(char *), "func_install"); - sp = names; - for (i = 0, n = params->rnode; i < pcount; i++, n = n->rnode) { - pnames[i] = sp; - strcpy(sp, n->param); - sp += strlen(n->param) + 1; - } - thisfunc->parmlist = pnames; - } else { - thisfunc->parmlist = NULL; - } - - /* remove params from symbol table */ - pop_params(params->rnode); - - /* update lint table info */ - func_use(params->param, FUNC_DEFINE); - - func_count++; /* used by profiling / pretty printer */ -} - -/* pop_var --- remove a variable from the symbol table */ - -static void -pop_var(NODE *np, int freeit) -{ - register NODE *bucket, **save; - register size_t len; - char *name; - - name = np->param; - len = strlen(name); - save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]); - for (bucket = *save; bucket != NULL; bucket = bucket->hnext) { - if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { - var_count--; - *save = bucket->hnext; - freenode(bucket); - if (freeit) - free(np->param); - return; - } - save = &(bucket->hnext); - } -} - -/* pop_params --- remove list of function parameters from symbol table */ - -/* - * pop parameters out of the symbol table. do this in reverse order to - * avoid reading freed memory if there were duplicated parameters. - */ -static void -pop_params(NODE *params) -{ - if (params == NULL) - return; - pop_params(params->rnode); - pop_var(params, TRUE); -} - -/* make_param --- make NAME into a function parameter */ - -static NODE * -make_param(char *name) -{ - NODE *r; - - getnode(r); - r->type = Node_param_list; - r->rnode = NULL; - r->param = name; - r->param_cnt = param_counter++; - return (install(name, r)); -} - -static struct fdesc { - char *name; - short used; - short defined; - struct fdesc *next; -} *ftable[HASHSIZE]; - -/* func_use --- track uses and definitions of functions */ - -static void -func_use(char *name, enum defref how) -{ - struct fdesc *fp; - int len; - int ind; - - len = strlen(name); - ind = hash(name, len, HASHSIZE); - - for (fp = ftable[ind]; fp != NULL; fp = fp->next) { - if (strcmp(fp->name, name) == 0) { - if (how == FUNC_DEFINE) - fp->defined++; - else - fp->used++; - return; - } - } - - /* not in the table, fall through to allocate a new one */ - - emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use"); - memset(fp, '\0', sizeof(struct fdesc)); - emalloc(fp->name, char *, len + 1, "func_use"); - strcpy(fp->name, name); - if (how == FUNC_DEFINE) - fp->defined++; - else - fp->used++; - fp->next = ftable[ind]; - ftable[ind] = fp; -} - -/* check_funcs --- verify functions that are called but not defined */ - -static void -check_funcs() -{ - struct fdesc *fp, *next; - int i; - - for (i = 0; i < HASHSIZE; i++) { - for (fp = ftable[i]; fp != NULL; fp = fp->next) { -#ifdef REALLYMEAN - /* making this the default breaks old code. sigh. */ - if (fp->defined == 0) { - error( - _("function `%s' called but never defined"), fp->name); - errcount++; - } -#else - if (do_lint && fp->defined == 0) - lintwarn( - _("function `%s' called but never defined"), fp->name); -#endif - if (do_lint && fp->used == 0) { - lintwarn(_("function `%s' defined but never called"), - fp->name); - } - } - } - - /* now let's free all the memory */ - for (i = 0; i < HASHSIZE; i++) { - for (fp = ftable[i]; fp != NULL; fp = next) { - next = fp->next; - free(fp->name); - free(fp); - } - } -} - -/* param_sanity --- look for parameters that are regexp constants */ - -static void -param_sanity(NODE *arglist) -{ - NODE *argp, *arg; - int i; - - for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) { - arg = argp->lnode; - if (arg->type == Node_regex) - warning(_("regexp constant for parameter #%d yields boolean value"), i); - } -} - -/* variable --- make sure NAME is in the symbol table */ - -NODE * -variable(char *name, int can_free, NODETYPE type) -{ - register NODE *r; - static int env_loaded = FALSE; - static int procinfo_loaded = FALSE; - - if (! env_loaded && STREQ(name, "ENVIRON")) { - load_environ(); - env_loaded = TRUE; - } - if (! do_traditional && ! procinfo_loaded && STREQ(name, "PROCINFO")) { - load_procinfo(); - procinfo_loaded = TRUE; - } - if ((r = lookup(name)) == NULL) - r = install(name, node(Nnull_string, type, (NODE *) NULL)); - else if (can_free) - free(name); - return r; -} - -/* mk_rexp --- make a regular expression constant */ - -static NODE * -mk_rexp(NODE *exp) -{ - NODE *n; - - if (exp->type == Node_regex) - return exp; - - getnode(n); - n->type = Node_regex; - n->re_exp = exp; - n->re_text = NULL; - n->re_reg = NULL; - n->re_flags = 0; - n->re_cnt = 1; - return n; -} - -/* isnoeffect --- when used as a statement, has no side effects */ - -/* - * To be completely general, we should recursively walk the parse - * tree, to make sure that all the subexpressions also have no effect. - * Instead, we just weaken the actual warning that's printed, up above - * in the grammar. - */ - -static int -isnoeffect(NODETYPE type) -{ - switch (type) { - case Node_times: - case Node_quotient: - case Node_mod: - case Node_plus: - case Node_minus: - case Node_subscript: - case Node_concat: - case Node_exp: - case Node_unary_minus: - case Node_field_spec: - case Node_and: - case Node_or: - case Node_equal: - case Node_notequal: - case Node_less: - case Node_greater: - case Node_leq: - case Node_geq: - case Node_match: - case Node_nomatch: - case Node_not: - case Node_val: - case Node_in_array: - case Node_NF: - case Node_NR: - case Node_FNR: - case Node_FS: - case Node_RS: - case Node_FIELDWIDTHS: - case Node_IGNORECASE: - case Node_OFS: - case Node_ORS: - case Node_OFMT: - case Node_CONVFMT: - case Node_BINMODE: - case Node_LINT: - return TRUE; - default: - break; /* keeps gcc -Wall happy */ - } - - return FALSE; -} - -/* isassignable --- can this node be assigned to? */ - -static int -isassignable(register NODE *n) -{ - switch (n->type) { - case Node_var: - case Node_FIELDWIDTHS: - case Node_RS: - case Node_FS: - case Node_FNR: - case Node_NR: - case Node_NF: - case Node_IGNORECASE: - case Node_OFMT: - case Node_CONVFMT: - case Node_ORS: - case Node_OFS: - case Node_LINT: - case Node_BINMODE: - case Node_field_spec: - case Node_subscript: - return TRUE; - case Node_param_list: - return ((n->flags & FUNC) == 0); /* ok if not func name */ - default: - break; /* keeps gcc -Wall happy */ - } - return FALSE; -} - -/* stopme --- for debugging */ - -NODE * -stopme(NODE *tree) -{ - return tmp_number((AWKNUM) 0.0); -} - -/* dumpintlstr --- write out an initial .po file entry for the string */ - -static void -dumpintlstr(char *str, size_t len) -{ - char *cp; - - /* See the GNU gettext distribution for details on the file format */ - - if (source != NULL) { - /* ala the gettext sources, remove leading `./'s */ - for (cp = source; cp[0] == '.' && cp[1] == '/'; cp += 2) - continue; - printf("#: %s:%d\n", cp, sourceline); - } - - printf("msgid "); - fflush(stdout); - pp_string_fp(stdout, str, len, '"', TRUE); - putchar('\n'); - printf("msgstr \"\"\n\n"); -} - -/* count_args --- count the number of printf arguments */ - -static void -count_args(NODE *tree) -{ - size_t count = 0; - NODE *save_tree; - - assert(tree->type == Node_K_printf - || (tree->type == Node_builtin && tree->proc == do_sprintf)); - save_tree = tree; - - tree = tree->lnode; /* printf format string */ - - for (count = 0; tree != NULL; tree = tree->rnode) - count++; - - save_tree->printf_count = count; -} diff --git a/contrib/awk/awklib/ChangeLog b/contrib/awk/awklib/ChangeLog deleted file mode 100644 index 531fdb9..0000000 --- a/contrib/awk/awklib/ChangeLog +++ /dev/null @@ -1,78 +0,0 @@ -Sun Jun 3 13:04:44 2001 Arnold D. Robbins - - * Release 3.1.0: Release tar file made. And there was - rejoicing. - -2001-02-26 Paul Eggert - - * Makefile.am (stamp-eg): Use $(AWK), not awk, as the - native awk might not work. - -2001-02-26 Andreas Schwab - - * Makefile.am: Install igawk as script. - -Mon Nov 6 15:29:08 2000 Arnold D. Robbins - - * Makefile.am: Fixed to have all functionality from before - the switch to automake. - * extract.awk: Updated to match version in the doc. - -Sat Jul 26 23:08:29 1997 Arnold D. Robbins - - * Makefile.in (install-strip): new target. - -Mon Aug 7 15:23:00 2000 Arnold D. Robbins - - * Release 3.0.6: Release tar file made. - -Sun Jun 25 15:08:19 2000 Arnold D. Robbins - - * Release 3.0.5: Release tar file made. - -Wed Jun 30 16:14:36 1999 Arnold D. Robbins - - * Release 3.0.4: Release tar file made. This time for sure. - -Thu May 15 12:49:08 1997 Arnold D. Robbins - - * Release 3.0.3: Release tar file made. - -Fri Apr 18 07:55:47 1997 Arnold D. Robbins - - * BETA Release 3.0.34: Release tar file made. - -Sun Apr 13 15:40:55 1997 Arnold D. Robbins - - * Makefile.in (install): fix second for loop to use $$i. Sigh. - -Wed Dec 25 11:25:22 1996 Arnold D. Robbins - - * Release 3.0.2: Release tar file made. - -Wed Dec 25 11:17:32 1996 Arnold D. Robbins - - * Makefile.in (install): remove chmod command; let - $(INSTALL_PROGRAM) use -m. - -Tue Dec 17 22:29:49 1996 Arnold D. Robbins - - * Makefile.in (install): fix installation of files in eg/lib. - -Tue Dec 10 23:09:26 1996 Arnold D. Robbins - - * Release 3.0.1: Release tar file made. - -Sun Oct 20 12:30:41 1996 Arnold D. Robbins - - * Makefile.in (install): minor tweaks for portability. - -Fri Mar 15 06:33:38 1996 Arnold D. Robbins - - * Makefile.in (pwcat, grcat): Add $(LDFLAGS). - (clean): add `*~' to list of files to be removed. - -Wed Jan 24 10:06:16 1996 Arnold D. Robbins - - * Makefile.in (clean): Remove $(AUXAWK). - (maintainer-clean): Depend on distclean, not the other way around. diff --git a/contrib/awk/awklib/Makefile.am b/contrib/awk/awklib/Makefile.am deleted file mode 100644 index ecd23cf..0000000 --- a/contrib/awk/awklib/Makefile.am +++ /dev/null @@ -1,75 +0,0 @@ -# -# awklib/Makefile.am --- automake input file for gawk -# -# Copyright (C) 1995-2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -# - -## process this file with automake to produce Makefile.in - -EXTRA_DIST = ChangeLog extract.awk eg stamp-eg - -datadir = @datadir@/awk -libexecdir = @libexecdir@/awk - -bin_SCRIPTS = igawk -libexec_PROGRAMS = pwcat grcat -AUXAWK = passwd.awk group.awk -nodist_grcat_SOURCES = grcat.c -nodist_pwcat_SOURCES = pwcat.c - -all: stamp-eg $(AUXPROGS) igawk $(AUXAWK) - -install-exec-hook: $(AUXAWK) - $(mkinstalldirs) $(DESTDIR)$(datadir) - for i in $(AUXAWK) $(srcdir)/eg/lib/*.awk ; do \ - progname=`echo $$i | sed 's;.*/;;'` ; \ - $(INSTALL_DATA) $$i $(DESTDIR)$(datadir)/$$progname ; \ - done - -# libexecdir and datadir are removed in the top level Makefile's uninstall -uninstall-local: - rm -fr $(DESTDIR)$(libexecdir)/* $(DESTDIR)$(datadir)/* - rm -f $(DESTDIR)$(bindir)/igawk - -clean-local: - rm -f $(AUXAWK) igawk - -stamp-eg: $(srcdir)/../doc/gawk.texi $(srcdir)/../doc/gawkinet.texi - rm -fr eg stamp-eg - $(AWK) -f $(srcdir)/extract.awk $(srcdir)/../doc/gawk.texi $(srcdir)/../doc/gawkinet.texi - @echo 'some makes are stupid and will not check a directory' > stamp-eg - @echo 'against a file, so this file is a place holder. gack.' >> stamp-eg - -pwcat: $(srcdir)/eg/lib/pwcat.c - $(CC) $(CFLAGS) $(srcdir)/eg/lib/pwcat.c $(LDFLAGS) -o $@ - -grcat: $(srcdir)/eg/lib/grcat.c - $(CC) $(CFLAGS) $(srcdir)/eg/lib/grcat.c $(LDFLAGS) -o $@ - -igawk: $(srcdir)/eg/prog/igawk.sh - cp $(srcdir)/eg/prog/igawk.sh $@ ; chmod 755 $@ - -passwd.awk: $(srcdir)/eg/lib/passwdawk.in - (cd $(srcdir)/eg/lib ; \ - sed 's;/usr/local/libexec/awk;$(libexecdir);' < passwdawk.in) > passwd.awk - -group.awk: $(srcdir)/eg/lib/groupawk.in - (cd $(srcdir)/eg/lib ; \ - sed 's;/usr/local/libexec/awk;$(libexecdir);' < groupawk.in) > group.awk diff --git a/contrib/awk/awklib/Makefile.in b/contrib/awk/awklib/Makefile.in deleted file mode 100644 index 686b65a..0000000 --- a/contrib/awk/awklib/Makefile.in +++ /dev/null @@ -1,431 +0,0 @@ -# Makefile.in generated automatically by automake 1.4a from Makefile.am - -# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000 -# Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_FLAG = -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : - -@SET_MAKE@ -AMDEP = @AMDEP@ -AMTAR = @AMTAR@ -AWK = @AWK@ -CATALOGS = @CATALOGS@ -CATOBJEXT = @CATOBJEXT@ -CC = @CC@ -CFLAGS = @CFLAGS@ -CPP = @CPP@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -DATADIRNAME = @DATADIRNAME@ -DEPDIR = @DEPDIR@ -GENCAT = @GENCAT@ -GMOFILES = @GMOFILES@ -GMSGFMT = @GMSGFMT@ -GT_NO = @GT_NO@ -GT_YES = @GT_YES@ -INCLUDE_LOCALE_H = @INCLUDE_LOCALE_H@ -INSTOBJEXT = @INSTOBJEXT@ -INTLDEPS = @INTLDEPS@ -INTLLIBS = @INTLLIBS@ -INTLOBJS = @INTLOBJS@ -LN_S = @LN_S@ -MAKEINFO = @MAKEINFO@ -MKINSTALLDIRS = @MKINSTALLDIRS@ -MSGFMT = @MSGFMT@ -PACKAGE = @PACKAGE@ -POFILES = @POFILES@ -POSUB = @POSUB@ -RANLIB = @RANLIB@ -SOCKET_LIBS = @SOCKET_LIBS@ -U = @U@ -USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@ -USE_NLS = @USE_NLS@ -VERSION = @VERSION@ -YACC = @YACC@ -install_sh = @install_sh@ -l = @l@ - -# -# awklib/Makefile.am --- automake input file for gawk -# -# Copyright (C) 1995-2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -# - - -EXTRA_DIST = ChangeLog extract.awk eg stamp-eg - -datadir = @datadir@/awk -libexecdir = @libexecdir@/awk - -bin_SCRIPTS = igawk -libexec_PROGRAMS = pwcat grcat -AUXAWK = passwd.awk group.awk -nodist_grcat_SOURCES = grcat.c -nodist_pwcat_SOURCES = pwcat.c -subdir = awklib -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_HEADER = ../config.h -CONFIG_CLEAN_FILES = -PROGRAMS = $(libexec_PROGRAMS) - - -DEFS = @DEFS@ -I. -I$(srcdir) -I.. -CPPFLAGS = @CPPFLAGS@ -LDFLAGS = @LDFLAGS@ -LIBS = @LIBS@ -nodist_grcat_OBJECTS = grcat.o -grcat_OBJECTS = $(nodist_grcat_OBJECTS) -grcat_LDADD = $(LDADD) -grcat_DEPENDENCIES = -grcat_LDFLAGS = -nodist_pwcat_OBJECTS = pwcat.o -pwcat_OBJECTS = $(nodist_pwcat_OBJECTS) -pwcat_LDADD = $(LDADD) -pwcat_DEPENDENCIES = -pwcat_LDFLAGS = -SCRIPTS = $(bin_SCRIPTS) - -COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -CCLD = $(CC) -LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@ -DIST_SOURCES = -depcomp = $(SHELL) $(top_srcdir)/depcomp -DEP_FILES = @AMDEP@ $(DEPDIR)/grcat.Po $(DEPDIR)/pwcat.Po -DIST_COMMON = ChangeLog Makefile.am Makefile.in - - -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) - -GZIP_ENV = --best -SOURCES = $(nodist_grcat_SOURCES) $(nodist_pwcat_SOURCES) -OBJECTS = $(nodist_grcat_OBJECTS) $(nodist_pwcat_OBJECTS) - -all: all-redirect -.SUFFIXES: -.SUFFIXES: .c .o -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu awklib/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - - -mostlyclean-libexecPROGRAMS: - -clean-libexecPROGRAMS: - -test -z "$(libexec_PROGRAMS)" || rm -f $(libexec_PROGRAMS) - -distclean-libexecPROGRAMS: - -maintainer-clean-libexecPROGRAMS: - -install-libexecPROGRAMS: $(libexec_PROGRAMS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(libexecdir) - @list='$(libexec_PROGRAMS)'; for p in $$list; do \ - if test -f $$p; then \ - f="`echo $$p|sed -e 's/$(EXEEXT)$$//' -e '$(transform)' -e 's/$$/$(EXEEXT)/'`"; \ - echo " $(INSTALL_PROGRAM) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libexecdir)/$$f"; \ - $(INSTALL_PROGRAM) $(INSTALL_STRIP_FLAG) $$p $(DESTDIR)$(libexecdir)/$$f; \ - else :; fi; \ - done - -uninstall-libexecPROGRAMS: - @$(NORMAL_UNINSTALL) - @list='$(libexec_PROGRAMS)'; for p in $$list; do \ - f="`echo $$p|sed -e 's/$(EXEEXT)$$//' -e '$(transform)' -e 's/$$/$(EXEEXT)/'`"; \ - echo " rm -f $(DESTDIR)$(libexecdir)/$$f"; \ - rm -f $(DESTDIR)$(libexecdir)/$$f; \ - done - -mostlyclean-compile: - -rm -f *.o core *.core - -clean-compile: - -distclean-compile: - -rm -f *.tab.c - -maintainer-clean-compile: - -install-binSCRIPTS: $(bin_SCRIPTS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(bindir) - @list='$(bin_SCRIPTS)'; for p in $$list; do \ - f="`echo $$p|sed '$(transform)'`"; \ - if test -f $$p; then \ - echo " $(INSTALL_SCRIPT) $$p $(DESTDIR)$(bindir)/$$f"; \ - $(INSTALL_SCRIPT) $$p $(DESTDIR)$(bindir)/$$f; \ - elif test -f $(srcdir)/$$p; then \ - echo " $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(bindir)/$$f"; \ - $(INSTALL_SCRIPT) $(srcdir)/$$p $(DESTDIR)$(bindir)/$$f; \ - else :; fi; \ - done - -uninstall-binSCRIPTS: - @$(NORMAL_UNINSTALL) - @list='$(bin_SCRIPTS)'; for p in $$list; do \ - f="`echo $$p|sed '$(transform)'`"; \ - echo " rm -f $(DESTDIR)$(bindir)/$$f"; \ - rm -f $(DESTDIR)$(bindir)/$$f; \ - done - -tags: TAGS - -ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES) - list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - mkid -fID $$unique $(LISP) - -TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \ - $(TAGS_FILES) $(LISP) - tags=; \ - here=`pwd`; \ - list='$(SOURCES) $(HEADERS) $(TAGS_FILES)'; \ - unique=`for i in $$list; do \ - if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ - done | \ - $(AWK) ' { files[$$0] = 1; } \ - END { for (i in files) print i; }'`; \ - test -z "$(ETAGS_ARGS)$$unique$(LISP)$$tags" \ - || etags $(ETAGS_ARGS) $$tags $$unique $(LISP) - -mostlyclean-tags: - -clean-tags: - -distclean-tags: - -rm -f TAGS ID - -maintainer-clean-tags: - -@AMDEP@include $(DEPDIR)/grcat.Po -@AMDEP@include $(DEPDIR)/pwcat.Po - -mostlyclean-depend: - -clean-depend: - -distclean-depend: - -rm -rf $(DEPDIR) - -maintainer-clean-depend: - -@AMDEP@CCDEPMODE = @CCDEPMODE@ - -.c.o: -@AMDEP@ source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ -@AMDEP@ depfile='$(DEPDIR)/$*.Po' tmpdepfile='$(DEPDIR)/$*.TPo' @AMDEPBACKSLASH@ -@AMDEP@ $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ - $(COMPILE) -c -o $@ `test -f $< || echo '$(srcdir)/'`$< - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pR $$d/$$file $(distdir) \ - || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done -info-am: -info: info-am -dvi-am: -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: install-libexecPROGRAMS install-binSCRIPTS - @$(NORMAL_INSTALL) - $(MAKE) $(AM_MAKEFLAGS) install-exec-hook -install-exec: install-exec-am - -install-data-am: -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: uninstall-libexecPROGRAMS uninstall-binSCRIPTS \ - uninstall-local -uninstall: uninstall-am -all-am: Makefile $(PROGRAMS) $(SCRIPTS) -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install -installdirs: - $(mkinstalldirs) $(DESTDIR)$(libexecdir) $(DESTDIR)$(bindir) - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: - -rm -f Makefile.in -mostlyclean-am: mostlyclean-libexecPROGRAMS mostlyclean-compile \ - mostlyclean-tags mostlyclean-depend mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-libexecPROGRAMS clean-compile clean-tags clean-depend \ - clean-generic mostlyclean-am clean-local - -clean: clean-am - -distclean-am: distclean-libexecPROGRAMS distclean-compile \ - distclean-tags distclean-depend distclean-generic \ - clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-libexecPROGRAMS \ - maintainer-clean-compile maintainer-clean-tags \ - maintainer-clean-depend maintainer-clean-generic \ - distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: mostlyclean-libexecPROGRAMS distclean-libexecPROGRAMS \ -clean-libexecPROGRAMS maintainer-clean-libexecPROGRAMS \ -uninstall-libexecPROGRAMS install-libexecPROGRAMS mostlyclean-compile \ -distclean-compile clean-compile maintainer-clean-compile \ -uninstall-binSCRIPTS install-binSCRIPTS tags mostlyclean-tags \ -distclean-tags clean-tags maintainer-clean-tags mostlyclean-depend \ -distclean-depend clean-depend maintainer-clean-depend distdir info-am \ -info dvi-am dvi check check-am installcheck-am installcheck \ -install-exec-am install-exec install-data-am install-data install-am \ -install uninstall-local uninstall-am uninstall all-redirect all-am all \ -install-strip installdirs mostlyclean-generic distclean-generic \ -clean-generic maintainer-clean-generic clean mostlyclean distclean \ -maintainer-clean - - -all: stamp-eg $(AUXPROGS) igawk $(AUXAWK) - -install-exec-hook: $(AUXAWK) - $(mkinstalldirs) $(DESTDIR)$(datadir) - for i in $(AUXAWK) $(srcdir)/eg/lib/*.awk ; do \ - progname=`echo $$i | sed 's;.*/;;'` ; \ - $(INSTALL_DATA) $$i $(DESTDIR)$(datadir)/$$progname ; \ - done - -# libexecdir and datadir are removed in the top level Makefile's uninstall -uninstall-local: - rm -fr $(DESTDIR)$(libexecdir)/* $(DESTDIR)$(datadir)/* - rm -f $(DESTDIR)$(bindir)/igawk - -clean-local: - rm -f $(AUXAWK) igawk - -stamp-eg: $(srcdir)/../doc/gawk.texi $(srcdir)/../doc/gawkinet.texi - rm -fr eg stamp-eg - $(AWK) -f $(srcdir)/extract.awk $(srcdir)/../doc/gawk.texi $(srcdir)/../doc/gawkinet.texi - @echo 'some makes are stupid and will not check a directory' > stamp-eg - @echo 'against a file, so this file is a place holder. gack.' >> stamp-eg - -pwcat: $(srcdir)/eg/lib/pwcat.c - $(CC) $(CFLAGS) $(srcdir)/eg/lib/pwcat.c $(LDFLAGS) -o $@ - -grcat: $(srcdir)/eg/lib/grcat.c - $(CC) $(CFLAGS) $(srcdir)/eg/lib/grcat.c $(LDFLAGS) -o $@ - -igawk: $(srcdir)/eg/prog/igawk.sh - cp $(srcdir)/eg/prog/igawk.sh $@ ; chmod 755 $@ - -passwd.awk: $(srcdir)/eg/lib/passwdawk.in - (cd $(srcdir)/eg/lib ; \ - sed 's;/usr/local/libexec/awk;$(libexecdir);' < passwdawk.in) > passwd.awk - -group.awk: $(srcdir)/eg/lib/groupawk.in - (cd $(srcdir)/eg/lib ; \ - sed 's;/usr/local/libexec/awk;$(libexecdir);' < groupawk.in) > group.awk - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/contrib/awk/awklib/eg/data/BBS-list b/contrib/awk/awklib/eg/data/BBS-list deleted file mode 100644 index 1007417..0000000 --- a/contrib/awk/awklib/eg/data/BBS-list +++ /dev/null @@ -1,11 +0,0 @@ -aardvark 555-5553 1200/300 B -alpo-net 555-3412 2400/1200/300 A -barfly 555-7685 1200/300 A -bites 555-1675 2400/1200/300 A -camelot 555-0542 300 C -core 555-2912 1200/300 C -fooey 555-1234 2400/1200/300 B -foot 555-6699 1200/300 B -macfoo 555-6480 1200/300 A -sdace 555-3430 2400/1200/300 A -sabafoo 555-2127 1200/300 C diff --git a/contrib/awk/awklib/eg/data/inventory-shipped b/contrib/awk/awklib/eg/data/inventory-shipped deleted file mode 100644 index 6788a0e..0000000 --- a/contrib/awk/awklib/eg/data/inventory-shipped +++ /dev/null @@ -1,17 +0,0 @@ -Jan 13 25 15 115 -Feb 15 32 24 226 -Mar 15 24 34 228 -Apr 31 52 63 420 -May 16 34 29 208 -Jun 31 42 75 492 -Jul 24 34 67 436 -Aug 15 34 47 316 -Sep 13 55 37 277 -Oct 29 54 68 525 -Nov 20 87 82 577 -Dec 17 35 61 401 - -Jan 21 36 64 620 -Feb 26 58 80 652 -Mar 24 75 70 495 -Apr 21 70 74 514 diff --git a/contrib/awk/awklib/eg/lib/assert.awk b/contrib/awk/awklib/eg/lib/assert.awk deleted file mode 100644 index bbfc066..0000000 --- a/contrib/awk/awklib/eg/lib/assert.awk +++ /dev/null @@ -1,20 +0,0 @@ -# assert --- assert that a condition is true. Otherwise exit. - -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May, 1993 - -function assert(condition, string) -{ - if (! condition) { - printf("%s:%d: assertion failed: %s\n", - FILENAME, FNR, string) > "/dev/stderr" - _assert_exit = 1 - exit 1 - } -} - -END { - if (_assert_exit) - exit 1 -} diff --git a/contrib/awk/awklib/eg/lib/bits2str.awk b/contrib/awk/awklib/eg/lib/bits2str.awk deleted file mode 100644 index 9725ee8..0000000 --- a/contrib/awk/awklib/eg/lib/bits2str.awk +++ /dev/null @@ -1,16 +0,0 @@ -# bits2str --- turn a byte into readable 1's and 0's - -function bits2str(bits, data, mask) -{ - if (bits == 0) - return "0" - - mask = 1 - for (; bits != 0; bits = rshift(bits, 1)) - data = (and(bits, mask) ? "1" : "0") data - - while ((length(data) % 8) != 0) - data = "0" data - - return data -} diff --git a/contrib/awk/awklib/eg/lib/cliff_rand.awk b/contrib/awk/awklib/eg/lib/cliff_rand.awk deleted file mode 100644 index 345447e..0000000 --- a/contrib/awk/awklib/eg/lib/cliff_rand.awk +++ /dev/null @@ -1,14 +0,0 @@ -# cliff_rand.awk --- generate Cliff random numbers -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# December 2000 - -BEGIN { _cliff_seed = 0.1 } - -function cliff_rand() -{ - _cliff_seed = (100 * log(_cliff_seed)) % 1 - if (_cliff_seed < 0) - _cliff_seed = - _cliff_seed - return _cliff_seed -} diff --git a/contrib/awk/awklib/eg/lib/ctime.awk b/contrib/awk/awklib/eg/lib/ctime.awk deleted file mode 100644 index 0a50d26..0000000 --- a/contrib/awk/awklib/eg/lib/ctime.awk +++ /dev/null @@ -1,11 +0,0 @@ -# ctime.awk -# -# awk version of C ctime(3) function - -function ctime(ts, format) -{ - format = "%a %b %d %H:%M:%S %Z %Y" - if (ts == 0) - ts = systime() # use current time as default - return strftime(format, ts) -} diff --git a/contrib/awk/awklib/eg/lib/ftrans.awk b/contrib/awk/awklib/eg/lib/ftrans.awk deleted file mode 100644 index b0743e2..0000000 --- a/contrib/awk/awklib/eg/lib/ftrans.awk +++ /dev/null @@ -1,15 +0,0 @@ -# ftrans.awk --- handle data file transitions -# -# user supplies beginfile() and endfile() functions -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# November 1992 - -FNR == 1 { - if (_filename_ != "") - endfile(_filename_) - _filename_ = FILENAME - beginfile(FILENAME) -} - -END { endfile(_filename_) } diff --git a/contrib/awk/awklib/eg/lib/getopt.awk b/contrib/awk/awklib/eg/lib/getopt.awk deleted file mode 100644 index 9375305..0000000 --- a/contrib/awk/awklib/eg/lib/getopt.awk +++ /dev/null @@ -1,80 +0,0 @@ -# getopt.awk --- do C library getopt(3) function in awk -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# -# Initial version: March, 1991 -# Revised: May, 1993 - -# External variables: -# Optind -- index in ARGV of first non-option argument -# Optarg -- string value of argument to current option -# Opterr -- if nonzero, print our own diagnostic -# Optopt -- current option letter - -# Returns: -# -1 at end of options -# ? for unrecognized option -# a character representing the current option - -# Private Data: -# _opti -- index in multi-flag option, e.g., -abc -function getopt(argc, argv, options, thisopt, i) -{ - if (length(options) == 0) # no options given - return -1 - - if (argv[Optind] == "--") { # all done - Optind++ - _opti = 0 - return -1 - } else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) { - _opti = 0 - return -1 - } - if (_opti == 0) - _opti = 2 - thisopt = substr(argv[Optind], _opti, 1) - Optopt = thisopt - i = index(options, thisopt) - if (i == 0) { - if (Opterr) - printf("%c -- invalid option\n", - thisopt) > "/dev/stderr" - if (_opti >= length(argv[Optind])) { - Optind++ - _opti = 0 - } else - _opti++ - return "?" - } - if (substr(options, i + 1, 1) == ":") { - # get option argument - if (length(substr(argv[Optind], _opti + 1)) > 0) - Optarg = substr(argv[Optind], _opti + 1) - else - Optarg = argv[++Optind] - _opti = 0 - } else - Optarg = "" - if (_opti == 0 || _opti >= length(argv[Optind])) { - Optind++ - _opti = 0 - } else - _opti++ - return thisopt -} -BEGIN { - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - - # test program - if (_getopt_test) { - while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", - _go_c, Optarg) - printf("non-option arguments:\n") - for (; Optind < ARGC; Optind++) - printf("\tARGV[%d] = <%s>\n", - Optind, ARGV[Optind]) - } -} diff --git a/contrib/awk/awklib/eg/lib/gettime.awk b/contrib/awk/awklib/eg/lib/gettime.awk deleted file mode 100644 index c425123..0000000 --- a/contrib/awk/awklib/eg/lib/gettime.awk +++ /dev/null @@ -1,62 +0,0 @@ -# gettimeofday.awk --- get the time of day in a usable format -# -# Arnold Robbins, arnold@gnu.org, Public Domain, May 1993 -# - -# Returns a string in the format of output of date(1) -# Populates the array argument time with individual values: -# time["second"] -- seconds (0 - 59) -# time["minute"] -- minutes (0 - 59) -# time["hour"] -- hours (0 - 23) -# time["althour"] -- hours (0 - 12) -# time["monthday"] -- day of month (1 - 31) -# time["month"] -- month of year (1 - 12) -# time["monthname"] -- name of the month -# time["shortmonth"] -- short name of the month -# time["year"] -- year modulo 100 (0 - 99) -# time["fullyear"] -- full year -# time["weekday"] -- day of week (Sunday = 0) -# time["altweekday"] -- day of week (Monday = 0) -# time["dayname"] -- name of weekday -# time["shortdayname"] -- short name of weekday -# time["yearday"] -- day of year (0 - 365) -# time["timezone"] -- abbreviation of timezone name -# time["ampm"] -- AM or PM designation -# time["weeknum"] -- week number, Sunday first day -# time["altweeknum"] -- week number, Monday first day - -function gettimeofday(time, ret, now, i) -{ - # get time once, avoids unnecessary system calls - now = systime() - - # return date(1)-style output - ret = strftime("%a %b %d %H:%M:%S %Z %Y", now) - - # clear out target array - delete time - - # fill in values, force numeric values to be - # numeric by adding 0 - time["second"] = strftime("%S", now) + 0 - time["minute"] = strftime("%M", now) + 0 - time["hour"] = strftime("%H", now) + 0 - time["althour"] = strftime("%I", now) + 0 - time["monthday"] = strftime("%d", now) + 0 - time["month"] = strftime("%m", now) + 0 - time["monthname"] = strftime("%B", now) - time["shortmonth"] = strftime("%b", now) - time["year"] = strftime("%y", now) + 0 - time["fullyear"] = strftime("%Y", now) + 0 - time["weekday"] = strftime("%w", now) + 0 - time["altweekday"] = strftime("%u", now) + 0 - time["dayname"] = strftime("%A", now) - time["shortdayname"] = strftime("%a", now) - time["yearday"] = strftime("%j", now) + 0 - time["timezone"] = strftime("%Z", now) - time["ampm"] = strftime("%p", now) - time["weeknum"] = strftime("%U", now) + 0 - time["altweeknum"] = strftime("%W", now) + 0 - - return ret -} diff --git a/contrib/awk/awklib/eg/lib/grcat.c b/contrib/awk/awklib/eg/lib/grcat.c deleted file mode 100644 index d34ddd5..0000000 --- a/contrib/awk/awklib/eg/lib/grcat.c +++ /dev/null @@ -1,34 +0,0 @@ -/* - * grcat.c - * - * Generate a printable version of the group database - */ -/* - * Arnold Robbins, arnold@gnu.org, May 1993 - * Public Domain - */ - -#include -#include - -int -main(argc, argv) -int argc; -char **argv; -{ - struct group *g; - int i; - - while ((g = getgrent()) != NULL) { - printf("%s:%s:%d:", g->gr_name, g->gr_passwd, - g->gr_gid); - for (i = 0; g->gr_mem[i] != NULL; i++) { - printf("%s", g->gr_mem[i]); - if (g->gr_mem[i+1] != NULL) - putchar(','); - } - putchar('\n'); - } - endgrent(); - exit(0); -} diff --git a/contrib/awk/awklib/eg/lib/groupawk.in b/contrib/awk/awklib/eg/lib/groupawk.in deleted file mode 100644 index 9d8b402..0000000 --- a/contrib/awk/awklib/eg/lib/groupawk.in +++ /dev/null @@ -1,87 +0,0 @@ -# group.awk --- functions for dealing with the group file -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 -# Revised October 2000 - -BEGIN \ -{ - # Change to suit your system - _gr_awklib = "/usr/local/libexec/awk/" -} - -function _gr_init( oldfs, oldrs, olddol0, grcat, - using_fw, n, a, i) -{ - if (_gr_inited) - return - - oldfs = FS - oldrs = RS - olddol0 = $0 - using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") - FS = ":" - RS = "\n" - - grcat = _gr_awklib "grcat" - while ((grcat | getline) > 0) { - if ($1 in _gr_byname) - _gr_byname[$1] = _gr_byname[$1] "," $4 - else - _gr_byname[$1] = $0 - if ($3 in _gr_bygid) - _gr_bygid[$3] = _gr_bygid[$3] "," $4 - else - _gr_bygid[$3] = $0 - - n = split($4, a, "[ \t]*,[ \t]*") - for (i = 1; i <= n; i++) - if (a[i] in _gr_groupsbyuser) - _gr_groupsbyuser[a[i]] = \ - _gr_groupsbyuser[a[i]] " " $1 - else - _gr_groupsbyuser[a[i]] = $1 - - _gr_bycount[++_gr_count] = $0 - } - close(grcat) - _gr_count = 0 - _gr_inited++ - FS = oldfs - if (using_fw) - FIELDWIDTHS = FIELDWIDTHS - RS = oldrs - $0 = olddol0 -} -function getgrnam(group) -{ - _gr_init() - if (group in _gr_byname) - return _gr_byname[group] - return "" -} -function getgrgid(gid) -{ - _gr_init() - if (gid in _gr_bygid) - return _gr_bygid[gid] - return "" -} -function getgruser(user) -{ - _gr_init() - if (user in _gr_groupsbyuser) - return _gr_groupsbyuser[user] - return "" -} -function getgrent() -{ - _gr_init() - if (++_gr_count in _gr_bycount) - return _gr_bycount[_gr_count] - return "" -} -function endgrent() -{ - _gr_count = 0 -} diff --git a/contrib/awk/awklib/eg/lib/join.awk b/contrib/awk/awklib/eg/lib/join.awk deleted file mode 100644 index e17b427..0000000 --- a/contrib/awk/awklib/eg/lib/join.awk +++ /dev/null @@ -1,16 +0,0 @@ -# join.awk --- join an array into a string -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -function join(array, start, end, sep, result, i) -{ - if (sep == "") - sep = " " - else if (sep == SUBSEP) # magic value - sep = "" - result = array[start] - for (i = start + 1; i <= end; i++) - result = result sep array[i] - return result -} diff --git a/contrib/awk/awklib/eg/lib/libintl.awk b/contrib/awk/awklib/eg/lib/libintl.awk deleted file mode 100644 index a9402c2..0000000 --- a/contrib/awk/awklib/eg/lib/libintl.awk +++ /dev/null @@ -1,9 +0,0 @@ -function bindtextdomain(dir, domain) -{ - return dir -} - -function dcgettext(string, domain, category) -{ - return string -} diff --git a/contrib/awk/awklib/eg/lib/mktime.awk b/contrib/awk/awklib/eg/lib/mktime.awk deleted file mode 100644 index 57ff20e..0000000 --- a/contrib/awk/awklib/eg/lib/mktime.awk +++ /dev/null @@ -1,105 +0,0 @@ -# mktime.awk --- convert a canonical date representation -# into a timestamp -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -BEGIN \ -{ - # Initialize table of month lengths - _tm_months[0,1] = _tm_months[1,1] = 31 - _tm_months[0,2] = 28; _tm_months[1,2] = 29 - _tm_months[0,3] = _tm_months[1,3] = 31 - _tm_months[0,4] = _tm_months[1,4] = 30 - _tm_months[0,5] = _tm_months[1,5] = 31 - _tm_months[0,6] = _tm_months[1,6] = 30 - _tm_months[0,7] = _tm_months[1,7] = 31 - _tm_months[0,8] = _tm_months[1,8] = 31 - _tm_months[0,9] = _tm_months[1,9] = 30 - _tm_months[0,10] = _tm_months[1,10] = 31 - _tm_months[0,11] = _tm_months[1,11] = 30 - _tm_months[0,12] = _tm_months[1,12] = 31 -} -# decide if a year is a leap year -function _tm_isleap(year, ret) -{ - ret = (year % 4 == 0 && year % 100 != 0) || - (year % 400 == 0) - - return ret -} -# convert a date into seconds -function _tm_addup(a, total, yearsecs, daysecs, - hoursecs, i, j) -{ - hoursecs = 60 * 60 - daysecs = 24 * hoursecs - yearsecs = 365 * daysecs - - total = (a[1] - 1970) * yearsecs - - # extra day for leap years - for (i = 1970; i < a[1]; i++) - if (_tm_isleap(i)) - total += daysecs - - j = _tm_isleap(a[1]) - for (i = 1; i < a[2]; i++) - total += _tm_months[j, i] * daysecs - - total += (a[3] - 1) * daysecs - total += a[4] * hoursecs - total += a[5] * 60 - total += a[6] - - return total -} -# mktime --- convert a date into seconds, -# compensate for time zone - -function mktime(str, res1, res2, a, b, i, j, t, diff) -{ - i = split(str, a, " ") # don't rely on FS - - if (i != 6) - return -1 - - # force numeric - for (j in a) - a[j] += 0 - - # validate - if (a[1] < 1970 || - a[2] < 1 || a[2] > 12 || - a[3] < 1 || a[3] > 31 || - a[4] < 0 || a[4] > 23 || - a[5] < 0 || a[5] > 59 || - a[6] < 0 || a[6] > 60 ) - return -1 - - res1 = _tm_addup(a) - t = strftime("%Y %m %d %H %M %S", res1) - - if (_tm_debug) - printf("(%s) -> (%s)\n", str, t) > "/dev/stderr" - - split(t, b, " ") - res2 = _tm_addup(b) - - diff = res1 - res2 - - if (_tm_debug) - printf("diff = %d seconds\n", diff) > "/dev/stderr" - - res1 += diff - - return res1 -} -BEGIN { - if (_tm_test) { - printf "Enter date as yyyy mm dd hh mm ss: " - getline _tm_test_date - t = mktime(_tm_test_date) - r = strftime("%Y %m %d %H %M %S", t) - printf "Got back (%s)\n", r - } -} diff --git a/contrib/awk/awklib/eg/lib/nextfile.awk b/contrib/awk/awklib/eg/lib/nextfile.awk deleted file mode 100644 index caedf0e..0000000 --- a/contrib/awk/awklib/eg/lib/nextfile.awk +++ /dev/null @@ -1,16 +0,0 @@ -# nextfile --- skip remaining records in current file -# correctly handle successive occurrences of the same file -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May, 1993 - -# this should be read in before the "main" awk program - -function nextfile() { _abandon_ = FILENAME; next } - -_abandon_ == FILENAME { - if (FNR == 1) - _abandon_ = "" - else - next -} diff --git a/contrib/awk/awklib/eg/lib/noassign.awk b/contrib/awk/awklib/eg/lib/noassign.awk deleted file mode 100644 index d6d176e..0000000 --- a/contrib/awk/awklib/eg/lib/noassign.awk +++ /dev/null @@ -1,17 +0,0 @@ -# noassign.awk --- library file to avoid the need for a -# special option that disables command-line assignments -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# October 1999 - -function disable_assigns(argc, argv, i) -{ - for (i = 1; i < argc; i++) - if (argv[i] ~ /^[A-Za-z_][A-Za-z_0-9]*=.*/) - argv[i] = ("./" argv[i]) -} - -BEGIN { - if (No_command_assign) - disable_assigns(ARGC, ARGV) -} diff --git a/contrib/awk/awklib/eg/lib/ord.awk b/contrib/awk/awklib/eg/lib/ord.awk deleted file mode 100644 index 3eacbcc..0000000 --- a/contrib/awk/awklib/eg/lib/ord.awk +++ /dev/null @@ -1,44 +0,0 @@ -# ord.awk --- do ord and chr - -# Global identifiers: -# _ord_: numerical values indexed by characters -# _ord_init: function to initialize _ord_ -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# 16 January, 1992 -# 20 July, 1992, revised - -BEGIN { _ord_init() } - -function _ord_init( low, high, i, t) -{ - low = sprintf("%c", 7) # BEL is ascii 7 - if (low == "\a") { # regular ascii - low = 0 - high = 127 - } else if (sprintf("%c", 128 + 7) == "\a") { - # ascii, mark parity - low = 128 - high = 255 - } else { # ebcdic(!) - low = 0 - high = 255 - } - - for (i = low; i <= high; i++) { - t = sprintf("%c", i) - _ord_[t] = i - } -} -function ord(str, c) -{ - # only first character is of interest - c = substr(str, 1, 1) - return _ord_[c] -} - -function chr(c) -{ - # force c to be numeric by adding 0 - return sprintf("%c", c + 0) -} diff --git a/contrib/awk/awklib/eg/lib/passwdawk.in b/contrib/awk/awklib/eg/lib/passwdawk.in deleted file mode 100644 index 5f6f9e2..0000000 --- a/contrib/awk/awklib/eg/lib/passwdawk.in +++ /dev/null @@ -1,63 +0,0 @@ -# passwd.awk --- access password file information -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 -# Revised October 2000 - -BEGIN { - # tailor this to suit your system - _pw_awklib = "/usr/local/libexec/awk/" -} - -function _pw_init( oldfs, oldrs, olddol0, pwcat, using_fw) -{ - if (_pw_inited) - return - - oldfs = FS - oldrs = RS - olddol0 = $0 - using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") - FS = ":" - RS = "\n" - - pwcat = _pw_awklib "pwcat" - while ((pwcat | getline) > 0) { - _pw_byname[$1] = $0 - _pw_byuid[$3] = $0 - _pw_bycount[++_pw_total] = $0 - } - close(pwcat) - _pw_count = 0 - _pw_inited = 1 - FS = oldfs - if (using_fw) - FIELDWIDTHS = FIELDWIDTHS - RS = oldrs - $0 = olddol0 -} -function getpwnam(name) -{ - _pw_init() - if (name in _pw_byname) - return _pw_byname[name] - return "" -} -function getpwuid(uid) -{ - _pw_init() - if (uid in _pw_byuid) - return _pw_byuid[uid] - return "" -} -function getpwent() -{ - _pw_init() - if (_pw_count < _pw_total) - return _pw_bycount[++_pw_count] - return "" -} -function endpwent() -{ - _pw_count = 0 -} diff --git a/contrib/awk/awklib/eg/lib/pwcat.c b/contrib/awk/awklib/eg/lib/pwcat.c deleted file mode 100644 index b9a7134..0000000 --- a/contrib/awk/awklib/eg/lib/pwcat.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * pwcat.c - * - * Generate a printable version of the password database - */ -/* - * Arnold Robbins, arnold@gnu.org, May 1993 - * Public Domain - */ - -#include -#include - -int -main(argc, argv) -int argc; -char **argv; -{ - struct passwd *p; - - while ((p = getpwent()) != NULL) - printf("%s:%s:%d:%d:%s:%s:%s\n", - p->pw_name, p->pw_passwd, p->pw_uid, - p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell); - - endpwent(); - exit(0); -} diff --git a/contrib/awk/awklib/eg/lib/readable.awk b/contrib/awk/awklib/eg/lib/readable.awk deleted file mode 100644 index 51705ae..0000000 --- a/contrib/awk/awklib/eg/lib/readable.awk +++ /dev/null @@ -1,16 +0,0 @@ -# readable.awk --- library file to skip over unreadable files -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# October 2000 - -BEGIN { - for (i = 1; i < ARGC; i++) { - if (ARGV[i] ~ /^[A-Za-z_][A-Za-z0-9_]*=.*/ \ - || ARGV[i] == "-") - continue # assignment or standard input - else if ((getline junk < ARGV[i]) < 0) # unreadable - delete ARGV[i] - else - close(ARGV[i]) - } -} diff --git a/contrib/awk/awklib/eg/lib/rewind.awk b/contrib/awk/awklib/eg/lib/rewind.awk deleted file mode 100644 index 3328661..0000000 --- a/contrib/awk/awklib/eg/lib/rewind.awk +++ /dev/null @@ -1,20 +0,0 @@ -# rewind.awk --- rewind the current file and start over -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# September 2000 - -function rewind( i) -{ - # shift remaining arguments up - for (i = ARGC; i > ARGIND; i--) - ARGV[i] = ARGV[i-1] - - # make sure gawk knows to keep going - ARGC++ - - # make current file next to get done - ARGV[ARGIND+1] = FILENAME - - # do it - nextfile -} diff --git a/contrib/awk/awklib/eg/lib/round.awk b/contrib/awk/awklib/eg/lib/round.awk deleted file mode 100644 index bf16c2b..0000000 --- a/contrib/awk/awklib/eg/lib/round.awk +++ /dev/null @@ -1,32 +0,0 @@ -# round --- do normal rounding -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# August, 1996 - -function round(x, ival, aval, fraction) -{ - ival = int(x) # integer part, int() truncates - - # see if fractional part - if (ival == x) # no fraction - return x - - if (x < 0) { - aval = -x # absolute value - ival = int(aval) - fraction = aval - ival - if (fraction >= .5) - return int(x) - 1 # -2.5 --> -3 - else - return int(x) # -2.3 --> -2 - } else { - fraction = x - ival - if (fraction >= .5) - return ival + 1 - else - return ival - } -} - -# test harness -{ print $0, round($0) } diff --git a/contrib/awk/awklib/eg/misc/arraymax.awk b/contrib/awk/awklib/eg/misc/arraymax.awk deleted file mode 100644 index 20dd176..0000000 --- a/contrib/awk/awklib/eg/misc/arraymax.awk +++ /dev/null @@ -1,10 +0,0 @@ -{ - if ($1 > max) - max = $1 - arr[$1] = $0 -} - -END { - for (x = 1; x <= max; x++) - print arr[x] -} diff --git a/contrib/awk/awklib/eg/misc/arraymax.data b/contrib/awk/awklib/eg/misc/arraymax.data deleted file mode 100644 index dbee328..0000000 --- a/contrib/awk/awklib/eg/misc/arraymax.data +++ /dev/null @@ -1,5 +0,0 @@ -5 I am the Five man -2 Who are you? The new number two! -4 . . . And four on the floor -1 Who is number one? -3 I three you. diff --git a/contrib/awk/awklib/eg/misc/findpat.awk b/contrib/awk/awklib/eg/misc/findpat.awk deleted file mode 100644 index e9bef9e..0000000 --- a/contrib/awk/awklib/eg/misc/findpat.awk +++ /dev/null @@ -1,10 +0,0 @@ -{ - if ($1 == "FIND") - regex = $2 - else { - where = match($0, regex) - if (where != 0) - print "Match of", regex, "found at", - where, "in", $0 - } -} diff --git a/contrib/awk/awklib/eg/misc/findpat.data b/contrib/awk/awklib/eg/misc/findpat.data deleted file mode 100644 index 9f72969..0000000 --- a/contrib/awk/awklib/eg/misc/findpat.data +++ /dev/null @@ -1,7 +0,0 @@ -FIND ru+n -My program runs -but not very quickly -FIND Melvin -JF+KM -This line is property of Reality Engineering Co. -Melvin was here. diff --git a/contrib/awk/awklib/eg/misc/findpat.sh b/contrib/awk/awklib/eg/misc/findpat.sh deleted file mode 100644 index 3971032..0000000 --- a/contrib/awk/awklib/eg/misc/findpat.sh +++ /dev/null @@ -1,10 +0,0 @@ -awk '{ - if ($1 == "FIND") - regex = $2 - else { - where = match($0, regex) - if (where != 0) - print "Match of", regex, "found at", \ - where, "in", $0 - } -}' diff --git a/contrib/awk/awklib/eg/network/PostAgent.sh b/contrib/awk/awklib/eg/network/PostAgent.sh deleted file mode 100644 index ccf9a68..0000000 --- a/contrib/awk/awklib/eg/network/PostAgent.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/sh -MobAg=/tmp/MobileAgent.$$ -# direct script to mobile agent file -cat > $MobAg -# execute agent concurrently -gawk -f $MobAg $MobAg > /dev/null & -# HTTP header, terminator and body -gawk 'BEGIN { print "\r\nAgent started" }' -rm $MobAg # delete script file of agent diff --git a/contrib/awk/awklib/eg/network/coreserv.awk b/contrib/awk/awklib/eg/network/coreserv.awk deleted file mode 100644 index e9e6171..0000000 --- a/contrib/awk/awklib/eg/network/coreserv.awk +++ /dev/null @@ -1,113 +0,0 @@ -# CGI Library and core of a web server -# -# Juergen Kahrs, Juergen.Kahrs@vr-web.de -# with Arnold Robbins, arnold@gnu.org -# September 2000 - -# Global arrays -# GETARG --- arguments to CGI GET command -# MENU --- menu items (path names) -# PARAM --- parameters of form x=y - -# Optional variable MyHost contains host address -# Optional variable MyPort contains port number -# Needs TopHeader, TopDoc, TopFooter -# Sets MyPrefix, HttpService, Status, Reason - -BEGIN { - if (MyHost == "") { - "uname -n" | getline MyHost - close("uname -n") - } - if (MyPort == 0) MyPort = 8080 - HttpService = "/inet/tcp/" MyPort "/0/0" - MyPrefix = "http://" MyHost ":" MyPort - SetUpServer() - while ("awk" != "complex") { - # header lines are terminated this way - RS = ORS = "\r\n" - Status = 200 # this means OK - Reason = "OK" - Header = TopHeader - Document = TopDoc - Footer = TopFooter - if (GETARG["Method"] == "GET") { - HandleGET() - } else if (GETARG["Method"] == "HEAD") { - # not yet implemented - } else if (GETARG["Method"] != "") { - print "bad method", GETARG["Method"] - } - Prompt = Header Document Footer - print "HTTP/1.0", Status, Reason |& HttpService - print "Connection: Close" |& HttpService - print "Pragma: no-cache" |& HttpService - len = length(Prompt) + length(ORS) - print "Content-length:", len |& HttpService - print ORS Prompt |& HttpService - # ignore all the header lines - while ((HttpService |& getline) > 0) - continue - # stop talking to this client - close(HttpService) - # wait for new client request - HttpService |& getline - # do some logging - print systime(), strftime(), $0 - CGI_setup($1, $2, $3) - } -} - -function CGI_setup( method, uri, version, i) -{ - delete GETARG - delete MENU - delete PARAM - GETARG["Method"] = method - GETARG["URI"] = uri - GETARG["Version"] = version - - i = index(uri, "?") - if (i > 0) { # is there a "?" indicating a CGI request? - split(substr(uri, 1, i-1), MENU, "[/:]") - split(substr(uri, i+1), PARAM, "&") - for (i in PARAM) { - PARAM[i] = _CGI_decode(PARAM[i]) - j = index(PARAM[i], "=") - GETARG[substr(PARAM[i], 1, j-1)] = \ - substr(PARAM[i], j+1) - } - } else { # there is no "?", no need for splitting PARAMs - split(uri, MENU, "[/:]") - } - for (i in MENU) # decode characters in path - if (i > 4) # but not those in host name - MENU[i] = _CGI_decode(MENU[i]) -} -function _CGI_decode(str, hexdigs, i, pre, code1, code2, - val, result) -{ - hexdigs = "123456789abcdef" - - i = index(str, "%") - if (i == 0) # no work to do - return str - - do { - pre = substr(str, 1, i-1) # part before %xx - code1 = substr(str, i+1, 1) # first hex digit - code2 = substr(str, i+2, 1) # second hex digit - str = substr(str, i+3) # rest of string - - code1 = tolower(code1) - code2 = tolower(code2) - val = index(hexdigs, code1) * 16 \ - + index(hexdigs, code2) - - result = result pre sprintf("%c", val) - i = index(str, "%") - } while (i != 0) - if (length(str) > 0) - result = result str - return result -} diff --git a/contrib/awk/awklib/eg/network/eliza.awk b/contrib/awk/awklib/eg/network/eliza.awk deleted file mode 100644 index 2e022b4..0000000 --- a/contrib/awk/awklib/eg/network/eliza.awk +++ /dev/null @@ -1,270 +0,0 @@ -function SetUpServer() { - SetUpEliza() - TopHeader = \ - "An HTTP-based System with GAWK\ - \ - " - TopDoc = "\ -

Please choose one of the following actions:

\ -
" - TopFooter = "" -} -function HandleGET() { - # A real HTTP server would treat some parts of the URI as a file name. - # We take parts of the URI as menu choices and go on accordingly. - if(MENU[2] == "AboutServer") { - Document = "This is not a CGI script.\ - This is an httpd, an HTML file, and a CGI script all \ - in one GAWK script. It needs no separate www-server, \ - no installation, and no root privileges.\ -

To run it, do this:

    \ -
  • start this script with \"gawk -f httpserver.awk\",
  • \ -
  • and on the same host let your www browser open location\ - \"http://localhost:8080\"
  • \ -
\

\ Details of HTTP come from:

    \ -
  • Hethmon: Illustrated Guide to HTTP

    \ -
  • RFC 2068

JK 14.9.1997

" - } else if (MENU[2] == "AboutELIZA") { - Document = "This is an implementation of the famous ELIZA\ - program by Joseph Weizenbaum. It is written in GAWK and\ -/bin/sh: expad: command not found - } else if (MENU[2] == "StartELIZA") { - gsub(/\+/, " ", GETARG["YouSay"]) - # Here we also have to substitute coded special characters - Document = "
" \ - "

" ElizaSays(GETARG["YouSay"]) "

\ -

\ -

" - } -} -function ElizaSays(YouSay) { - if (YouSay == "") { - cost = 0 - answer = "HI, IM ELIZA, TELL ME YOUR PROBLEM" - } else { - q = toupper(YouSay) - gsub("'", "", q) - if(q == qold) { - answer = "PLEASE DONT REPEAT YOURSELF !" - } else { - if (index(q, "SHUT UP") > 0) { - answer = "WELL, PLEASE PAY YOUR BILL. ITS EXACTLY ... $"\ - int(100*rand()+30+cost/100) - } else { - qold = q - w = "-" # no keyword recognized yet - for (i in k) { # search for keywords - if (index(q, i) > 0) { - w = i - break - } - } - if (w == "-") { # no keyword, take old subject - w = wold - subj = subjold - } else { # find subject - subj = substr(q, index(q, w) + length(w)+1) - wold = w - subjold = subj # remember keyword and subject - } - for (i in conj) - gsub(i, conj[i], q) # conjugation - # from all answers to this keyword, select one randomly - answer = r[indices[int(split(k[w], indices) * rand()) + 1]] - # insert subject into answer - gsub("_", subj, answer) - } - } - } - cost += length(answer) # for later payment : 1 cent per character - return answer -} -function SetUpEliza() { - srand() - wold = "-" - subjold = " " - - # table for conjugation - conj[" ARE " ] = " AM " - conj["WERE " ] = "WAS " - conj[" YOU " ] = " I " - conj["YOUR " ] = "MY " - conj[" IVE " ] =\ - conj[" I HAVE " ] = " YOU HAVE " - conj[" YOUVE " ] =\ - conj[" YOU HAVE "] = " I HAVE " - conj[" IM " ] =\ - conj[" I AM " ] = " YOU ARE " - conj[" YOURE " ] =\ - conj[" YOU ARE " ] = " I AM " - - # table of all answers - r[1] = "DONT YOU BELIEVE THAT I CAN _" - r[2] = "PERHAPS YOU WOULD LIKE TO BE ABLE TO _ ?" - r[3] = "YOU WANT ME TO BE ABLE TO _ ?" - r[4] = "PERHAPS YOU DONT WANT TO _ " - r[5] = "DO YOU WANT TO BE ABLE TO _ ?" - r[6] = "WHAT MAKES YOU THINK I AM _ ?" - r[7] = "DOES IT PLEASE YOU TO BELIEVE I AM _ ?" - r[8] = "PERHAPS YOU WOULD LIKE TO BE _ ?" - r[9] = "DO YOU SOMETIMES WISH YOU WERE _ ?" - r[10] = "DONT YOU REALLY _ ?" - r[11] = "WHY DONT YOU _ ?" - r[12] = "DO YOU WISH TO BE ABLE TO _ ?" - r[13] = "DOES THAT TROUBLE YOU ?" - r[14] = "TELL ME MORE ABOUT SUCH FEELINGS" - r[15] = "DO YOU OFTEN FEEL _ ?" - r[16] = "DO YOU ENJOY FEELING _ ?" - r[17] = "DO YOU REALLY BELIEVE I DONT _ ?" - r[18] = "PERHAPS IN GOOD TIME I WILL _ " - r[19] = "DO YOU WANT ME TO _ ?" - r[20] = "DO YOU THINK YOU SHOULD BE ABLE TO _ ?" - r[21] = "WHY CANT YOU _ ?" - r[22] = "WHY ARE YOU INTERESTED IN WHETHER OR NOT I AM _ ?" - r[23] = "WOULD YOU PREFER IF I WERE NOT _ ?" - r[24] = "PERHAPS IN YOUR FANTASIES I AM _ " - r[25] = "HOW DO YOU KNOW YOU CANT _ ?" - r[26] = "HAVE YOU TRIED ?" - r[27] = "PERHAPS YOU CAN NOW _ " - r[28] = "DID YOU COME TO ME BECAUSE YOU ARE _ ?" - r[29] = "HOW LONG HAVE YOU BEEN _ ?" - r[30] = "DO YOU BELIEVE ITS NORMAL TO BE _ ?" - r[31] = "DO YOU ENJOY BEING _ ?" - r[32] = "WE WERE DISCUSSING YOU -- NOT ME" - r[33] = "Oh, I _" - r[34] = "YOU'RE NOT REALLY TALKING ABOUT ME, ARE YOU ?" - r[35] = "WHAT WOULD IT MEAN TO YOU, IF YOU GOT _ ?" - r[36] = "WHY DO YOU WANT _ ?" - r[37] = "SUPPOSE YOU SOON GOT _" - r[38] = "WHAT IF YOU NEVER GOT _ ?" - r[39] = "I SOMETIMES ALSO WANT _" - r[40] = "WHY DO YOU ASK ?" - r[41] = "DOES THAT QUESTION INTEREST YOU ?" - r[42] = "WHAT ANSWER WOULD PLEASE YOU THE MOST ?" - r[43] = "WHAT DO YOU THINK ?" - r[44] = "ARE SUCH QUESTIONS IN YOUR MIND OFTEN ?" - r[45] = "WHAT IS IT THAT YOU REALLY WANT TO KNOW ?" - r[46] = "HAVE YOU ASKED ANYONE ELSE ?" - r[47] = "HAVE YOU ASKED SUCH QUESTIONS BEFORE ?" - r[48] = "WHAT ELSE COMES TO MIND WHEN YOU ASK THAT ?" - r[49] = "NAMES DON'T INTEREST ME" - r[50] = "I DONT CARE ABOUT NAMES -- PLEASE GO ON" - r[51] = "IS THAT THE REAL REASON ?" - r[52] = "DONT ANY OTHER REASONS COME TO MIND ?" - r[53] = "DOES THAT REASON EXPLAIN ANYTHING ELSE ?" - r[54] = "WHAT OTHER REASONS MIGHT THERE BE ?" - r[55] = "PLEASE DON'T APOLOGIZE !" - r[56] = "APOLOGIES ARE NOT NECESSARY" - r[57] = "WHAT FEELINGS DO YOU HAVE WHEN YOU APOLOGIZE ?" - r[58] = "DON'T BE SO DEFENSIVE" - r[59] = "WHAT DOES THAT DREAM SUGGEST TO YOU ?" - r[60] = "DO YOU DREAM OFTEN ?" - r[61] = "WHAT PERSONS APPEAR IN YOUR DREAMS ?" - r[62] = "ARE YOU DISTURBED BY YOUR DREAMS ?" - r[63] = "HOW DO YOU DO ... PLEASE STATE YOUR PROBLEM" - r[64] = "YOU DON'T SEEM QUITE CERTAIN" - r[65] = "WHY THE UNCERTAIN TONE ?" - r[66] = "CAN'T YOU BE MORE POSITIVE ?" - r[67] = "YOU AREN'T SURE ?" - r[68] = "DON'T YOU KNOW ?" - r[69] = "WHY NO _ ?" - r[70] = "DON'T SAY NO, IT'S ALWAYS SO NEGATIVE" - r[71] = "WHY NOT ?" - r[72] = "ARE YOU SURE ?" - r[73] = "WHY NO ?" - r[74] = "WHY ARE YOU CONCERNED ABOUT MY _ ?" - r[75] = "WHAT ABOUT YOUR OWN _ ?" - r[76] = "CAN'T YOU THINK ABOUT A SPECIFIC EXAMPLE ?" - r[77] = "WHEN ?" - r[78] = "WHAT ARE YOU THINKING OF ?" - r[79] = "REALLY, ALWAYS ?" - r[80] = "DO YOU REALLY THINK SO ?" - r[81] = "BUT YOU ARE NOT SURE YOU _ " - r[82] = "DO YOU DOUBT YOU _ ?" - r[83] = "IN WHAT WAY ?" - r[84] = "WHAT RESEMBLANCE DO YOU SEE ?" - r[85] = "WHAT DOES THE SIMILARITY SUGGEST TO YOU ?" - r[86] = "WHAT OTHER CONNECTION DO YOU SEE ?" - r[87] = "COULD THERE REALLY BE SOME CONNECTIONS ?" - r[88] = "HOW ?" - r[89] = "YOU SEEM QUITE POSITIVE" - r[90] = "ARE YOU SURE ?" - r[91] = "I SEE" - r[92] = "I UNDERSTAND" - r[93] = "WHY DO YOU BRING UP THE TOPIC OF FRIENDS ?" - r[94] = "DO YOUR FRIENDS WORRY YOU ?" - r[95] = "DO YOUR FRIENDS PICK ON YOU ?" - r[96] = "ARE YOU SURE YOU HAVE ANY FRIENDS ?" - r[97] = "DO YOU IMPOSE ON YOUR FRIENDS ?" - r[98] = "PERHAPS YOUR LOVE FOR FRIENDS WORRIES YOU" - r[99] = "DO COMPUTERS WORRY YOU ?" - r[100] = "ARE YOU TALKING ABOUT ME IN PARTICULAR ?" - r[101] = "ARE YOU FRIGHTENED BY MACHINES ?" - r[102] = "WHY DO YOU MENTION COMPUTERS ?" - r[103] = "WHAT DO YOU THINK MACHINES HAVE TO DO WITH YOUR PROBLEMS ?" - r[104] = "DON'T YOU THINK COMPUTERS CAN HELP PEOPLE ?" - r[105] = "WHAT IS IT ABOUT MACHINES THAT WORRIES YOU ?" - r[106] = "SAY, DO YOU HAVE ANY PSYCHOLOGICAL PROBLEMS ?" - r[107] = "WHAT DOES THAT SUGGEST TO YOU ?" - r[108] = "I SEE" - r[109] = "IM NOT SURE I UNDERSTAND YOU FULLY" - r[110] = "COME COME ELUCIDATE YOUR THOUGHTS" - r[111] = "CAN YOU ELABORATE ON THAT ?" - r[112] = "THAT IS QUITE INTERESTING" - r[113] = "WHY DO YOU HAVE PROBLEMS WITH MONEY ?" - r[114] = "DO YOU THINK MONEY IS EVERYTHING ?" - r[115] = "ARE YOU SURE THAT MONEY IS THE PROBLEM ?" - r[116] = "I THINK WE WANT TO TALK ABOUT YOU, NOT ABOUT ME" - r[117] = "WHAT'S ABOUT ME ?" - r[118] = "WHY DO YOU ALWAYS BRING UP MY NAME ?" - # table for looking up answers that - # fit to a certain keyword - k["CAN YOU"] = "1 2 3" - k["CAN I"] = "4 5" - k["YOU ARE"] =\ - k["YOURE"] = "6 7 8 9" - k["I DONT"] = "10 11 12 13" - k["I FEEL"] = "14 15 16" - k["WHY DONT YOU"] = "17 18 19" - k["WHY CANT I"] = "20 21" - k["ARE YOU"] = "22 23 24" - k["I CANT"] = "25 26 27" - k["I AM"] =\ - k["IM "] = "28 29 30 31" - k["YOU "] = "32 33 34" - k["I WANT"] = "35 36 37 38 39" - k["WHAT"] =\ - k["HOW"] =\ - k["WHO"] =\ - k["WHERE"] =\ - k["WHEN"] =\ - k["WHY"] = "40 41 42 43 44 45 46 47 48" - k["NAME"] = "49 50" - k["CAUSE"] = "51 52 53 54" - k["SORRY"] = "55 56 57 58" - k["DREAM"] = "59 60 61 62" - k["HELLO"] =\ - k["HI "] = "63" - k["MAYBE"] = "64 65 66 67 68" - k[" NO "] = "69 70 71 72 73" - k["YOUR"] = "74 75" - k["ALWAYS"] = "76 77 78 79" - k["THINK"] = "80 81 82" - k["LIKE"] = "83 84 85 86 87 88 89" - k["YES"] = "90 91 92" - k["FRIEND"] = "93 94 95 96 97 98" - k["COMPUTER"] = "99 100 101 102 103 104 105" - k["-"] = "106 107 108 109 110 111 112" - k["MONEY"] = "113 114 115" - k["ELIZA"] = "116 117 118" -} diff --git a/contrib/awk/awklib/eg/network/fingerclient.awk b/contrib/awk/awklib/eg/network/fingerclient.awk deleted file mode 100644 index bcc2c94..0000000 --- a/contrib/awk/awklib/eg/network/fingerclient.awk +++ /dev/null @@ -1,7 +0,0 @@ -BEGIN { - NetService = "/inet/tcp/0/localhost/finger" - print "var{name}" |& NetService - while ((NetService |& getline) > 0) - print $0 - close(NetService) -} diff --git a/contrib/awk/awklib/eg/network/geturl.awk b/contrib/awk/awklib/eg/network/geturl.awk deleted file mode 100644 index 53853e5..0000000 --- a/contrib/awk/awklib/eg/network/geturl.awk +++ /dev/null @@ -1,24 +0,0 @@ -BEGIN { - if (ARGC != 2) { - print "GETURL - retrieve Web page via HTTP 1.0" - print "IN:\n the URL as a command-line parameter" - print "PARAM(S):\n -v Proxy=MyProxy" - print "OUT:\n the page content on stdout" - print " the page header on stderr" - print "JK 16.05.1997" - print "ADR 13.08.2000" - exit - } - URL = ARGV[1]; ARGV[1] = "" - if (Proxy == "") Proxy = "127.0.0.1" - if (ProxyPort == 0) ProxyPort = 80 - if (Method == "") Method = "GET" - HttpService = "/inet/tcp/0/" Proxy "/" ProxyPort - ORS = RS = "\r\n\r\n" - print Method " " URL " HTTP/1.0" |& HttpService - HttpService |& getline Header - print Header > "/dev/stderr" - while ((HttpService |& getline) > 0) - printf "%s", $0 - close(HttpService) -} diff --git a/contrib/awk/awklib/eg/network/hello-serv.awk b/contrib/awk/awklib/eg/network/hello-serv.awk deleted file mode 100644 index 003ee08..0000000 --- a/contrib/awk/awklib/eg/network/hello-serv.awk +++ /dev/null @@ -1,14 +0,0 @@ -BEGIN { - RS = ORS = "\r\n" - HttpService = "/inet/tcp/8080/0/0" - Hello = "" \ - "A Famous Greeting" \ - "

Hello, world

" - Len = length(Hello) + length(ORS) - print "HTTP/1.0 200 OK" |& HttpService - print "Content-Length: " Len ORS |& HttpService - print Hello |& HttpService - while ((HttpService |& getline) > 0) - continue; - close(HttpService) -} diff --git a/contrib/awk/awklib/eg/network/maze.awk b/contrib/awk/awklib/eg/network/maze.awk deleted file mode 100644 index 94888f8..0000000 --- a/contrib/awk/awklib/eg/network/maze.awk +++ /dev/null @@ -1,73 +0,0 @@ -function SetUpServer() { - TopHeader = "Walk through a maze" - TopDoc = "\ -

Please choose one of the following actions:

\ - " - TopFooter = "" - srand() -} -function HandleGET() { - if (MENU[2] == "AboutServer") { - Document = "If your browser has a VRML 2 plugin,\ - this server shows you a simple VRML scene." - } else if (MENU[2] == "VRMLtest") { - XSIZE = YSIZE = 11 # initially, everything is wall - for (y = 0; y < YSIZE; y++) - for (x = 0; x < XSIZE; x++) - Maze[x, y] = "#" - delete Maze[0, 1] # entry is not wall - delete Maze[XSIZE-1, YSIZE-2] # exit is not wall - MakeMaze(1, 1) - Document = "\ -#VRML V2.0 utf8\n\ -Group {\n\ - children [\n\ - PointLight {\n\ - ambientIntensity 0.2\n\ - color 0.7 0.7 0.7\n\ - location 0.0 8.0 10.0\n\ - }\n\ - DEF B1 Background {\n\ - skyColor [0 0 0, 1.0 1.0 1.0 ]\n\ - skyAngle 1.6\n\ - groundColor [1 1 1, 0.8 0.8 0.8, 0.2 0.2 0.2 ]\n\ - groundAngle [ 1.2 1.57 ]\n\ - }\n\ - DEF Wall Shape {\n\ - geometry Box {size 1 1 1}\n\ - appearance Appearance { material Material { diffuseColor 0 0 1 } }\n\ - }\n\ - DEF Entry Viewpoint {\n\ - position 0.5 1.0 5.0\n\ - orientation 0.0 0.0 -1.0 0.52\n\ - }\n" - for (i in Maze) { - split(i, t, SUBSEP) - Document = Document " Transform { translation " - Document = Document t[1] " 0 -" t[2] " children USE Wall }\n" - } - Document = Document " ] # end of group for world\n}" - Reason = "OK" ORS "Content-type: model/vrml" - Header = Footer = "" - } -} -function MakeMaze(x, y) { - delete Maze[x, y] # here we are, we have no wall here - p = 0 # count unvisited fields in all directions - if (x-2 SUBSEP y in Maze) d[p++] = "-x" - if (x SUBSEP y-2 in Maze) d[p++] = "-y" - if (x+2 SUBSEP y in Maze) d[p++] = "+x" - if (x SUBSEP y+2 in Maze) d[p++] = "+y" - if (p>0) { # if there are univisited fields, go there - p = int(p*rand()) # choose one unvisited field at random - if (d[p] == "-x") { delete Maze[x - 1, y]; MakeMaze(x - 2, y) - } else if (d[p] == "-y") { delete Maze[x, y - 1]; MakeMaze(x, y - 2) - } else if (d[p] == "+x") { delete Maze[x + 1, y]; MakeMaze(x + 2, y) - } else if (d[p] == "+y") { delete Maze[x, y + 1]; MakeMaze(x, y + 2) - } # we are back from recursion - MakeMaze(x, y); # try again while there are unvisited fields - } -} diff --git a/contrib/awk/awklib/eg/network/mobag.awk b/contrib/awk/awklib/eg/network/mobag.awk deleted file mode 100644 index a8c5500..0000000 --- a/contrib/awk/awklib/eg/network/mobag.awk +++ /dev/null @@ -1,72 +0,0 @@ -BEGIN { - if (ARGC != 2) { - print "MOBAG - a simple mobile agent" - print "CALL:\n gawk -f mobag.awk mobag.awk" - print "IN:\n the name of this script as a command-line parameter" - print "PARAM:\n -v MyOrigin=myhost.com" - print "OUT:\n the result on stdout" - print "JK 29.03.1998 01.04.1998" - exit - } - if (MyOrigin == "") { - "uname -n" | getline MyOrigin - close("uname -n") - } -} -#ReadMySelf -/^function / { FUNC = $2 } -/^END/ || /^#ReadMySelf/ { FUNC = $1 } -FUNC != "" { MOBFUN[FUNC] = MOBFUN[FUNC] RS $0 } -(FUNC != "") && (/^}/ || /^#EndOfMySelf/) \ - { FUNC = "" } -#EndOfMySelf -function migrate(Destination, MobCode, Label) { - MOBVAR["Label"] = Label - MOBVAR["Destination"] = Destination - RS = ORS = "\r\n" - HttpService = "/inet/tcp/0/" Destination - for (i in MOBFUN) - MobCode = (MobCode "\n" MOBFUN[i]) - MobCode = MobCode "\n\nBEGIN {" - for (i in MOBVAR) - MobCode = (MobCode "\n MOBVAR[\"" i "\"] = \"" MOBVAR[i] "\"") - MobCode = MobCode "\n}\n" - print "POST /cgi-bin/PostAgent.sh HTTP/1.0" |& HttpService - print "Content-length:", length(MobCode) ORS |& HttpService - printf "%s", MobCode |& HttpService - while ((HttpService |& getline) > 0) - print $0 - close(HttpService) -} -END { - if (ARGC != 2) exit # stop when called with wrong parameters - if (MyOrigin != "") # is this the originating host? - MyInit() # if so, initialize the application - else # we are on a host with migrated data - MyJob() # so we do our job -} -function MyInit() { - MOBVAR["MyOrigin"] = MyOrigin - MOBVAR["Machines"] = "localhost/80 max/80 moritz/80 castor/80" - split(MOBVAR["Machines"], Machines) # which host is the first? - migrate(Machines[1], "", "") # go to the first host - while (("/inet/tcp/8080/0/0" |& getline) > 0) # wait for result - print $0 # print result - close("/inet/tcp/8080/0/0") -} -function MyJob() { - # forget this host - sub(MOBVAR["Destination"], "", MOBVAR["Machines"]) - MOBVAR["Result"]=MOBVAR["Result"] SUBSEP SUBSEP MOBVAR["Destination"] ":" - while (("who" | getline) > 0) # who is logged in? - MOBVAR["Result"] = MOBVAR["Result"] SUBSEP $0 - close("who") - if (index(MOBVAR["Machines"], "/") > 0) { # any more machines to visit? - split(MOBVAR["Machines"], Machines) # which host is next? - migrate(Machines[1], "", "") # go there - } else { # no more machines - gsub(SUBSEP, "\n", MOBVAR["Result"]) # send result to origin - print MOBVAR["Result"] |& "/inet/tcp/0/" MOBVAR["MyOrigin"] "/8080" - close("/inet/tcp/0/" MOBVAR["MyOrigin"] "/8080") - } -} diff --git a/contrib/awk/awklib/eg/network/panic.awk b/contrib/awk/awklib/eg/network/panic.awk deleted file mode 100644 index 6db8c46..0000000 --- a/contrib/awk/awklib/eg/network/panic.awk +++ /dev/null @@ -1,18 +0,0 @@ -BEGIN { - RS = ORS = "\r\n" - if (MyPort == 0) MyPort = 8080 - HttpService = "/inet/tcp/" MyPort "/0/0" - Hello = "Out Of Service" \ - "

" \ - "This site is temporarily out of service." \ - "

" - Len = length(Hello) + length(ORS) - while ("awk" != "complex") { - print "HTTP/1.0 200 OK" |& HttpService - print "Content-Length: " Len ORS |& HttpService - print Hello |& HttpService - while ((HttpService |& getline) > 0) - continue; - close(HttpService) - } -} diff --git a/contrib/awk/awklib/eg/network/protbase.awk b/contrib/awk/awklib/eg/network/protbase.awk deleted file mode 100644 index 16199e4..0000000 --- a/contrib/awk/awklib/eg/network/protbase.awk +++ /dev/null @@ -1,11 +0,0 @@ -{ request = request "\n" $0 } - -END { - BLASTService = "/inet/tcp/0/www.ncbi.nlm.nih.gov/80" - printf "POST /cgi-bin/BLAST/nph-blast_report HTTP/1.0\n" |& BLASTService - printf "Content-Length: " length(request) "\n\n" |& BLASTService - printf request |& BLASTService - while ((BLASTService |& getline) > 0) - print $0 - close(BLASTService) -} diff --git a/contrib/awk/awklib/eg/network/protbase.request b/contrib/awk/awklib/eg/network/protbase.request deleted file mode 100644 index 4c5c3d2..0000000 --- a/contrib/awk/awklib/eg/network/protbase.request +++ /dev/null @@ -1,7 +0,0 @@ -PROGRAM blastn -DATALIB month -EXPECT 0.75 -BEGIN ->GAWK310 the gawking gene GNU AWK -tgcttggctgaggagccataggacgagagcttcctggtgaagtgtgtttcttgaaatcat -caccaccatggacagcaaa diff --git a/contrib/awk/awklib/eg/network/protbase.result b/contrib/awk/awklib/eg/network/protbase.result deleted file mode 100644 index a087af4..0000000 --- a/contrib/awk/awklib/eg/network/protbase.result +++ /dev/null @@ -1,9 +0,0 @@ -Sequences producing significant alignments: (bits) Value - -gb|AC021182.14|AC021182 Homo sapiens chromosome 7 clone RP11-733... 38 0.20 -gb|AC021056.12|AC021056 Homo sapiens chromosome 3 clone RP11-115... 38 0.20 -emb|AL160278.10|AL160278 Homo sapiens chromosome 9 clone RP11-57... 38 0.20 -emb|AL391139.11|AL391139 Homo sapiens chromosome X clone RP11-35... 38 0.20 -emb|AL365192.6|AL365192 Homo sapiens chromosome 6 clone RP3-421H... 38 0.20 -emb|AL138812.9|AL138812 Homo sapiens chromosome 11 clone RP1-276... 38 0.20 -gb|AC073881.3|AC073881 Homo sapiens chromosome 15 clone CTD-2169... 38 0.20 diff --git a/contrib/awk/awklib/eg/network/remconf.awk b/contrib/awk/awklib/eg/network/remconf.awk deleted file mode 100644 index f4f21e1..0000000 --- a/contrib/awk/awklib/eg/network/remconf.awk +++ /dev/null @@ -1,54 +0,0 @@ -function SetUpServer() { - TopHeader = "Remote Configuration" - TopDoc = "\ -

Please choose one of the following actions:

\ - " - TopFooter = "" - if (ConfigFile == "") ConfigFile = "config.asc" -} -function HandleGET() { - if(MENU[2] == "AboutServer") { - Document = "This is a GUI for remote configuration of an\ - embedded system. It is is implemented as one GAWK script." - } else if (MENU[2] == "ReadConfig") { - RS = "\n" - while ((getline < ConfigFile) > 0) - config[$1] = $2; - close(ConfigFile) - RS = "\r\n" - Document = "Configuration has been read." - } else if (MENU[2] == "CheckConfig") { - Document = "" - for (i in config) - Document = Document "" \ - "" - Document = Document "
" i "" config[i] "
" - } else if (MENU[2] == "ChangeConfig") { - if ("Param" in GETARG) { # any parameter to set? - if (GETARG["Param"] in config) { # is parameter valid? - config[GETARG["Param"]] = GETARG["Value"] - Document = (GETARG["Param"] " = " GETARG["Value"] ".") - } else { - Document = "Parameter " GETARG["Param"] " is invalid." - } - } else { - Document = "

Change one parameter

\ - \ - \ - \ - \ -
ParameterValue
" - } - } else if (MENU[2] == "SaveConfig") { - for (i in config) - printf("%s %s\n", i, config[i]) > ConfigFile - close(ConfigFile) - Document = "Configuration has been saved." - } -} diff --git a/contrib/awk/awklib/eg/network/statist.awk b/contrib/awk/awklib/eg/network/statist.awk deleted file mode 100644 index 8a58394..0000000 --- a/contrib/awk/awklib/eg/network/statist.awk +++ /dev/null @@ -1,85 +0,0 @@ -function SetUpServer() { - TopHeader = "Statistics with GAWK" - TopDoc = "\ -

Please choose one of the following actions:

\ - " - TopFooter = "" - GnuPlot = "gnuplot 2>&1" - m1=m2=0; v1=v2=1; n1=n2=10 -} -function HandleGET() { - if(MENU[2] == "AboutServer") { - Document = "This is a GUI for a statistical computation.\ - It compares means and variances of two distributions.\ - It is implemented as one GAWK script and uses GNUPLOT." - } else if (MENU[2] == "EnterParameters") { - Document = "" - if ("m1" in GETARG) { # are there parameters to compare? - Document = Document "" - m1 = GETARG["m1"]; v1 = GETARG["v1"]; n1 = GETARG["n1"] - m2 = GETARG["m2"]; v2 = GETARG["v2"]; n2 = GETARG["n2"] - t = (m1-m2)/sqrt(v1/n1+v2/n2) - df = (v1/n1+v2/n2)*(v1/n1+v2/n2)/((v1/n1)*(v1/n1)/(n1-1) \ - + (v2/n2)*(v2/n2) /(n2-1)) - if (v1>v2) { - f = v1/v2 - df1 = n1 - 1 - df2 = n2 - 1 - } else { - f = v2/v1 - df1 = n2 - 1 - df2 = n1 - 1 - } - print "pt=ibeta(" df/2 ",0.5," df/(df+t*t) ")" |& GnuPlot - print "pF=2.0*ibeta(" df2/2 "," df1/2 "," \ - df2/(df2+df1*f) ")" |& GnuPlot - print "print pt, pF" |& GnuPlot - RS="\n"; GnuPlot |& getline; RS="\r\n" # $1 is pt, $2 is pF - print "invsqrt2pi=1.0/sqrt(2.0*pi)" |& GnuPlot - print "nd(x)=invsqrt2pi/sd*exp(-0.5*((x-mu)/sd)**2)" |& GnuPlot - print "set term png small color" |& GnuPlot - #print "set term postscript color" |& GnuPlot - #print "set term gif medium size 320,240" |& GnuPlot - print "set yrange[-0.3:]" |& GnuPlot - print "set label 'p(m1=m2) =" $1 "' at 0,-0.1 left" |& GnuPlot - print "set label 'p(v1=v2) =" $2 "' at 0,-0.2 left" |& GnuPlot - print "plot mu=" m1 ",sd=" sqrt(v1) ", nd(x) title 'sample 1',\ - mu=" m2 ",sd=" sqrt(v2) ", nd(x) title 'sample 2'" |& GnuPlot - print "quit" |& GnuPlot - GnuPlot |& getline Image - while ((GnuPlot |& getline) > 0) - Image = Image RS $0 - close(GnuPlot) - } - Document = Document "\ -

Do these samples have the same Gaussian distribution?

\ -
\ - \ - - \ - - \ - - \ - \ - - \ - - \ - - \ - \ -
1. Mean 1. Variance1. Count
2. Mean 2. Variance2. Count

" - } else if (MENU[2] ~ "Image") { - Reason = "OK" ORS "Content-type: image/png" - #Reason = "OK" ORS "Content-type: application/x-postscript" - #Reason = "OK" ORS "Content-type: image/gif" - Header = Footer = "" - Document = Image - } -} diff --git a/contrib/awk/awklib/eg/network/stoxdata.txt b/contrib/awk/awklib/eg/network/stoxdata.txt deleted file mode 100644 index 1b6d015..0000000 --- a/contrib/awk/awklib/eg/network/stoxdata.txt +++ /dev/null @@ -1,4 +0,0 @@ -Date,Open,High,Low,Close,Volume -9-Oct-00,22.75,22.75,21.375,22.375,7888500 -6-Oct-00,23.8125,24.9375,21.5625,22,10701100 -5-Oct-00,24.4375,24.625,23.125,23.50,5810300 diff --git a/contrib/awk/awklib/eg/network/stoxpred.awk b/contrib/awk/awklib/eg/network/stoxpred.awk deleted file mode 100644 index 5cdc2a8..0000000 --- a/contrib/awk/awklib/eg/network/stoxpred.awk +++ /dev/null @@ -1,116 +0,0 @@ -function ReadQuotes() { - # Retrieve historical data for each ticker symbol - FS = "," - for (stock = 1; stock <= StockCount; stock++) { - URL = "http://chart.yahoo.com/table.csv?s=" name[stock] \ - "&a=" month "&b=" day "&c=" year-1 \ - "&d=" month "&e=" day "&f=" year \ - "g=d&q=q&y=0&z=" name[stock] "&x=.csv" - printf("GET " URL " HTTP/1.0\r\n\r\n") |& YahooData - while ((YahooData |& getline) > 0) { - if (NF == 6 && $1 ~ /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/) { - if (stock == 1) - days[++daycount] = $1; - quote[$1, stock] = $5 - } - } - close(YahooData) - } - FS = " " -} -function CleanUp() { - # clean up time series; eliminate incomplete data sets - for (d = 1; d <= daycount; d++) { - for (stock = 1; stock <= StockCount; stock++) - if (! ((days[d], stock) in quote)) - stock = StockCount + 10 - if (stock > StockCount + 1) - continue - datacount++ - for (stock = 1; stock <= StockCount; stock++) - data[datacount, stock] = int(0.5 + quote[days[d], stock]) - } - delete quote - delete days -} -function Prediction() { - # Predict each ticker symbol by prolonging yesterday's trend - for (stock = 1; stock <= StockCount; stock++) { - if (data[1, stock] > data[2, stock]) { - predict[stock] = "up" - } else if (data[1, stock] < data[2, stock]) { - predict[stock] = "down" - } else { - predict[stock] = "neutral" - } - if ((data[1, stock] > data[2, stock]) && (data[2, stock] > data[3, stock])) - hot[stock] = 1 - if ((data[1, stock] < data[2, stock]) && (data[2, stock] < data[3, stock])) - avoid[stock] = 1 - } - # Do a plausibility check: how many predictions proved correct? - for (s = 1; s <= StockCount; s++) { - for (d = 1; d <= datacount-2; d++) { - if (data[d+1, s] > data[d+2, s]) { - UpCount++ - } else if (data[d+1, s] < data[d+2, s]) { - DownCount++ - } else { - NeutralCount++ - } - if (((data[d, s] > data[d+1, s]) && (data[d+1, s] > data[d+2, s])) || - ((data[d, s] < data[d+1, s]) && (data[d+1, s] < data[d+2, s])) || - ((data[d, s] == data[d+1, s]) && (data[d+1, s] == data[d+2, s]))) - CorrectCount++ - } - } -} -function Report() { - # Generate report - report = "\nThis is your daily " - report = report "stock market report for "strftime("%A, %B %d, %Y")".\n" - report = report "Here are the predictions for today:\n\n" - for (stock = 1; stock <= StockCount; stock++) - report = report "\t" name[stock] "\t" predict[stock] "\n" - for (stock in hot) { - if (HotCount++ == 0) - report = report "\nThe most promising shares for today are these:\n\n" - report = report "\t" name[stock] "\t\thttp://biz.yahoo.com/n/" \ - tolower(substr(name[stock], 1, 1)) "/" tolower(name[stock]) ".html\n" - } - for (stock in avoid) { - if (AvoidCount++ == 0) - report = report "\nThe stock shares to avoid today are these:\n\n" - report = report "\t" name[stock] "\t\thttp://biz.yahoo.com/n/" \ - tolower(substr(name[stock], 1, 1)) "/" tolower(name[stock]) ".html\n" - } - report = report "\nThis sums up to " HotCount+0 " winners and " AvoidCount+0 - report = report " losers. When using this kind\nof prediction scheme for" - report = report " the 12 months which lie behind us,\nwe get " UpCount - report = report " 'ups' and " DownCount " 'downs' and " NeutralCount - report = report " 'neutrals'. Of all\nthese " UpCount+DownCount+NeutralCount - report = report " predictions " CorrectCount " proved correct next day.\n" - report = report "A success rate of "\ - int(100*CorrectCount/(UpCount+DownCount+NeutralCount)) "%.\n" - report = report "Random choice would have produced a 33% success rate.\n" - report = report "Disclaimer: Like every other prediction of the stock\n" - report = report "market, this report is, of course, complete nonsense.\n" - report = report "If you are stupid enough to believe these predictions\n" - report = report "you should visit a doctor who can treat your ailment." -} -function SendMail() { - # send report to customers - customer["uncle.scrooge@ducktown.gov"] = "Uncle Scrooge" - customer["more@utopia.org" ] = "Sir Thomas More" - customer["spinoza@denhaag.nl" ] = "Baruch de Spinoza" - customer["marx@highgate.uk" ] = "Karl Marx" - customer["keynes@the.long.run" ] = "John Maynard Keynes" - customer["bierce@devil.hell.org" ] = "Ambrose Bierce" - customer["laplace@paris.fr" ] = "Pierre Simon de Laplace" - for (c in customer) { - MailPipe = "mail -s 'Daily Stock Prediction Newsletter'" c - print "Good morning " customer[c] "," | MailPipe - print report "\n.\n" | MailPipe - close(MailPipe) - } -} diff --git a/contrib/awk/awklib/eg/network/testserv.awk b/contrib/awk/awklib/eg/network/testserv.awk deleted file mode 100644 index 812bfe6..0000000 --- a/contrib/awk/awklib/eg/network/testserv.awk +++ /dev/null @@ -1,12 +0,0 @@ -BEGIN { - CGI_setup("GET", - "http://www.gnu.org/cgi-bin/foo?p1=stuff&p2=stuff%26junk" \ - "&percent=a %25 sign", - "1.0") - for (i in MENU) - printf "MENU[\"%s\"] = %s\n", i, MENU[i] - for (i in PARAM) - printf "PARAM[\"%s\"] = %s\n", i, PARAM[i] - for (i in GETARG) - printf "GETARG[\"%s\"] = %s\n", i, GETARG[i] -} diff --git a/contrib/awk/awklib/eg/network/urlchk.awk b/contrib/awk/awklib/eg/network/urlchk.awk deleted file mode 100644 index 6ddedfa..0000000 --- a/contrib/awk/awklib/eg/network/urlchk.awk +++ /dev/null @@ -1,28 +0,0 @@ -BEGIN { - if (ARGC != 2) { - print "URLCHK - check if URLs have changed" - print "IN:\n the file with URLs as a command-line parameter" - print " file contains URL, old length, new length" - print "PARAMS:\n -v Proxy=MyProxy -v ProxyPort=8080" - print "OUT:\n same as file with URLs" - print "JK 02.03.1998" - exit - } - URLfile = ARGV[1]; ARGV[1] = "" - if (Proxy != "") Proxy = " -v Proxy=" Proxy - if (ProxyPort != "") ProxyPort = " -v ProxyPort=" ProxyPort - while ((getline < URLfile) > 0) - Length[$1] = $3 + 0 - close(URLfile) # now, URLfile is read in and can be updated - GetHeader = "gawk " Proxy ProxyPort " -v Method=\"HEAD\" -f geturl.awk " - for (i in Length) { - GetThisHeader = GetHeader i " 2>&1" - while ((GetThisHeader | getline) > 0) - if (toupper($0) ~ /CONTENT-LENGTH/) NewLength = $2 + 0 - close(GetThisHeader) - print i, Length[i], NewLength > URLfile - if (Length[i] != NewLength) # report only changed URLs - print i, Length[i], NewLength - } - close(URLfile) -} diff --git a/contrib/awk/awklib/eg/network/webgrab.awk b/contrib/awk/awklib/eg/network/webgrab.awk deleted file mode 100644 index 4173880..0000000 --- a/contrib/awk/awklib/eg/network/webgrab.awk +++ /dev/null @@ -1,6 +0,0 @@ -BEGIN { RS = "http://[#%&\\+\\-\\./0-9\\:;\\?A-Z_a-z\\~]*" } -RT != "" { - command = ("gawk -v Proxy=MyProxy -f geturl.awk " RT \ - " > doc" NR ".html") - print command -} diff --git a/contrib/awk/awklib/eg/prog/alarm.awk b/contrib/awk/awklib/eg/prog/alarm.awk deleted file mode 100644 index 26252fa..0000000 --- a/contrib/awk/awklib/eg/prog/alarm.awk +++ /dev/null @@ -1,85 +0,0 @@ -# alarm.awk --- set an alarm -# -# Requires gettimeofday library function -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -# usage: alarm time [ "message" [ count [ delay ] ] ] - -BEGIN \ -{ - # Initial argument sanity checking - usage1 = "usage: alarm time ['message' [count [delay]]]" - usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1]) - - if (ARGC < 2) { - print usage1 > "/dev/stderr" - print usage2 > "/dev/stderr" - exit 1 - } else if (ARGC == 5) { - delay = ARGV[4] + 0 - count = ARGV[3] + 0 - message = ARGV[2] - } else if (ARGC == 4) { - count = ARGV[3] + 0 - message = ARGV[2] - } else if (ARGC == 3) { - message = ARGV[2] - } else if (ARGV[1] !~ /[0-9]?[0-9]:[0-9][0-9]/) { - print usage1 > "/dev/stderr" - print usage2 > "/dev/stderr" - exit 1 - } - - # set defaults for once we reach the desired time - if (delay == 0) - delay = 180 # 3 minutes - if (count == 0) - count = 5 - if (message == "") - message = sprintf("\aIt is now %s!\a", ARGV[1]) - else if (index(message, "\a") == 0) - message = "\a" message "\a" - # split up alarm time - split(ARGV[1], atime, ":") - hour = atime[1] + 0 # force numeric - minute = atime[2] + 0 # force numeric - - # get current broken down time - gettimeofday(now) - - # if time given is 12-hour hours and it's after that - # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m., - # then add 12 to real hour - if (hour < 12 && now["hour"] > hour) - hour += 12 - - # set target time in seconds since midnight - target = (hour * 60 * 60) + (minute * 60) - - # get current time in seconds since midnight - current = (now["hour"] * 60 * 60) + \ - (now["minute"] * 60) + now["second"] - - # how long to sleep for - naptime = target - current - if (naptime <= 0) { - print "time is in the past!" > "/dev/stderr" - exit 1 - } - # zzzzzz..... go away if interrupted - if (system(sprintf("sleep %d", naptime)) != 0) - exit 1 - - # time to notify! - command = sprintf("sleep %d", delay) - for (i = 1; i <= count; i++) { - print message - # if sleep command interrupted, go away - if (system(command) != 0) - break - } - - exit 0 -} diff --git a/contrib/awk/awklib/eg/prog/awksed.awk b/contrib/awk/awklib/eg/prog/awksed.awk deleted file mode 100644 index 4b188c6..0000000 --- a/contrib/awk/awklib/eg/prog/awksed.awk +++ /dev/null @@ -1,31 +0,0 @@ -# awksed.awk --- do s/foo/bar/g using just print -# Thanks to Michael Brennan for the idea -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# August 1995 - -function usage() -{ - print "usage: awksed pat repl [files...]" > "/dev/stderr" - exit 1 -} - -BEGIN { - # validate arguments - if (ARGC < 3) - usage() - - RS = ARGV[1] - ORS = ARGV[2] - - # don't use arguments as files - ARGV[1] = ARGV[2] = "" -} - -# look ma, no hands! -{ - if (RT == "") - printf "%s", $0 - else - print -} diff --git a/contrib/awk/awklib/eg/prog/cut.awk b/contrib/awk/awklib/eg/prog/cut.awk deleted file mode 100644 index d9c6c9b..0000000 --- a/contrib/awk/awklib/eg/prog/cut.awk +++ /dev/null @@ -1,139 +0,0 @@ -# cut.awk --- implement cut in awk -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -# Options: -# -f list Cut fields -# -d c Field delimiter character -# -c list Cut characters -# -# -s Suppress lines without the delimiter -# -# Requires getopt and join library functions - -function usage( e1, e2) -{ - e1 = "usage: cut [-f list] [-d c] [-s] [files...]" - e2 = "usage: cut [-c list] [files...]" - print e1 > "/dev/stderr" - print e2 > "/dev/stderr" - exit 1 -} -BEGIN \ -{ - FS = "\t" # default - OFS = FS - while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) { - if (c == "f") { - by_fields = 1 - fieldlist = Optarg - } else if (c == "c") { - by_chars = 1 - fieldlist = Optarg - OFS = "" - } else if (c == "d") { - if (length(Optarg) > 1) { - printf("Using first character of %s" \ - " for delimiter\n", Optarg) > "/dev/stderr" - Optarg = substr(Optarg, 1, 1) - } - FS = Optarg - OFS = FS - if (FS == " ") # defeat awk semantics - FS = "[ ]" - } else if (c == "s") - suppress++ - else - usage() - } - - for (i = 1; i < Optind; i++) - ARGV[i] = "" - if (by_fields && by_chars) - usage() - - if (by_fields == 0 && by_chars == 0) - by_fields = 1 # default - - if (fieldlist == "") { - print "cut: needs list for -c or -f" > "/dev/stderr" - exit 1 - } - - if (by_fields) - set_fieldlist() - else - set_charlist() -} -function set_fieldlist( n, m, i, j, k, f, g) -{ - n = split(fieldlist, f, ",") - j = 1 # index in flist - for (i = 1; i <= n; i++) { - if (index(f[i], "-") != 0) { # a range - m = split(f[i], g, "-") - if (m != 2 || g[1] >= g[2]) { - printf("bad field list: %s\n", - f[i]) > "/dev/stderr" - exit 1 - } - for (k = g[1]; k <= g[2]; k++) - flist[j++] = k - } else - flist[j++] = f[i] - } - nfields = j - 1 -} -function set_charlist( field, i, j, f, g, t, - filler, last, len) -{ - field = 1 # count total fields - n = split(fieldlist, f, ",") - j = 1 # index in flist - for (i = 1; i <= n; i++) { - if (index(f[i], "-") != 0) { # range - m = split(f[i], g, "-") - if (m != 2 || g[1] >= g[2]) { - printf("bad character list: %s\n", - f[i]) > "/dev/stderr" - exit 1 - } - len = g[2] - g[1] + 1 - if (g[1] > 1) # compute length of filler - filler = g[1] - last - 1 - else - filler = 0 - if (filler) - t[field++] = filler - t[field++] = len # length of field - last = g[2] - flist[j++] = field - 1 - } else { - if (f[i] > 1) - filler = f[i] - last - 1 - else - filler = 0 - if (filler) - t[field++] = filler - t[field++] = 1 - last = f[i] - flist[j++] = field - 1 - } - } - FIELDWIDTHS = join(t, 1, field - 1) - nfields = j - 1 -} -{ - if (by_fields && suppress && index($0, FS) != 0) - next - - for (i = 1; i <= nfields; i++) { - if ($flist[i] != "") { - printf "%s", $flist[i] - if (i < nfields && $flist[i+1] != "") - printf "%s", OFS - } - } - print "" -} diff --git a/contrib/awk/awklib/eg/prog/dupword.awk b/contrib/awk/awklib/eg/prog/dupword.awk deleted file mode 100644 index a2cc7d2b..0000000 --- a/contrib/awk/awklib/eg/prog/dupword.awk +++ /dev/null @@ -1,21 +0,0 @@ -# dupword.awk --- find duplicate words in text -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# December 1991 -# Revised October 2000 - -{ - $0 = tolower($0) - gsub(/[^[:alnum:][:blank:]]/, " "); - $0 = $0 # re-split - if (NF == 0) - next - if ($1 == prev) - printf("%s:%d: duplicate %s\n", - FILENAME, FNR, $1) - for (i = 2; i <= NF; i++) - if ($i == $(i-1)) - printf("%s:%d: duplicate %s\n", - FILENAME, FNR, $i) - prev = $NF -} diff --git a/contrib/awk/awklib/eg/prog/egrep.awk b/contrib/awk/awklib/eg/prog/egrep.awk deleted file mode 100644 index 73f175c..0000000 --- a/contrib/awk/awklib/eg/prog/egrep.awk +++ /dev/null @@ -1,102 +0,0 @@ -# egrep.awk --- simulate egrep in awk -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -# Options: -# -c count of lines -# -s silent - use exit value -# -v invert test, success if no match -# -i ignore case -# -l print filenames only -# -e argument is pattern -# -# Requires getopt and file transition library functions - -BEGIN { - while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) { - if (c == "c") - count_only++ - else if (c == "s") - no_print++ - else if (c == "v") - invert++ - else if (c == "i") - IGNORECASE = 1 - else if (c == "l") - filenames_only++ - else if (c == "e") - pattern = Optarg - else - usage() - } - if (pattern == "") - pattern = ARGV[Optind++] - - for (i = 1; i < Optind; i++) - ARGV[i] = "" - if (Optind >= ARGC) { - ARGV[1] = "-" - ARGC = 2 - } else if (ARGC - Optind > 1) - do_filenames++ - -# if (IGNORECASE) -# pattern = tolower(pattern) -} -#{ -# if (IGNORECASE) -# $0 = tolower($0) -#} -function beginfile(junk) -{ - fcount = 0 -} -function endfile(file) -{ - if (! no_print && count_only) - if (do_filenames) - print file ":" fcount - else - print fcount - - total += fcount -} -{ - matches = ($0 ~ pattern) - if (invert) - matches = ! matches - - fcount += matches # 1 or 0 - - if (! matches) - next - - if (! count_only) { - if (no_print) - nextfile - - if (filenames_only) { - print FILENAME - nextfile - } - - if (do_filenames) - print FILENAME ":" $0 - else - print - } -} -END \ -{ - if (total == 0) - exit 1 - exit 0 -} -function usage( e) -{ - e = "Usage: egrep [-csvil] [-e pat] [files ...]" - e = e "\n\tegrep [-csvil] pat [files ...]" - print e > "/dev/stderr" - exit 1 -} diff --git a/contrib/awk/awklib/eg/prog/extract.awk b/contrib/awk/awklib/eg/prog/extract.awk deleted file mode 100644 index 5cb191a..0000000 --- a/contrib/awk/awklib/eg/prog/extract.awk +++ /dev/null @@ -1,75 +0,0 @@ -# extract.awk --- extract files and run programs -# from texinfo files -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 -# Revised September 2000 - -BEGIN { IGNORECASE = 1 } - -/^@c(omment)?[ \t]+system/ \ -{ - if (NF < 3) { - e = (FILENAME ":" FNR) - e = (e ": badly formed `system' line") - print e > "/dev/stderr" - next - } - $1 = "" - $2 = "" - stat = system($0) - if (stat != 0) { - e = (FILENAME ":" FNR) - e = (e ": warning: system returned " stat) - print e > "/dev/stderr" - } -} -/^@c(omment)?[ \t]+file/ \ -{ - if (NF != 3) { - e = (FILENAME ":" FNR ": badly formed `file' line") - print e > "/dev/stderr" - next - } - if ($3 != curfile) { - if (curfile != "") - close(curfile) - curfile = $3 - } - - for (;;) { - if ((getline line) <= 0) - unexpected_eof() - if (line ~ /^@c(omment)?[ \t]+endfile/) - break - else if (line ~ /^@(end[ \t]+)?group/) - continue - else if (line ~ /^@c(omment+)?[ \t]+/) - continue - if (index(line, "@") == 0) { - print line > curfile - continue - } - n = split(line, a, "@") - # if a[1] == "", means leading @, - # don't add one back in. - for (i = 2; i <= n; i++) { - if (a[i] == "") { # was an @@ - a[i] = "@" - if (a[i+1] == "") - i++ - } - } - print join(a, 1, n, SUBSEP) > curfile - } -} -function unexpected_eof() { - printf("%s:%d: unexpected EOF or error\n", - FILENAME, FNR) > "/dev/stderr" - exit 1 -} - -END { - if (curfile) - close(curfile) -} diff --git a/contrib/awk/awklib/eg/prog/guide.awk b/contrib/awk/awklib/eg/prog/guide.awk deleted file mode 100644 index a2dea1b..0000000 --- a/contrib/awk/awklib/eg/prog/guide.awk +++ /dev/null @@ -1,7 +0,0 @@ -BEGIN { - TEXTDOMAIN = "guide" - bindtextdomain(".") # for testing - print _"Don't Panic" - print _"The Answer Is", 42 - print "Pardon me, Zaphod who?" -} diff --git a/contrib/awk/awklib/eg/prog/histsort.awk b/contrib/awk/awklib/eg/prog/histsort.awk deleted file mode 100644 index c0a9165a..0000000 --- a/contrib/awk/awklib/eg/prog/histsort.awk +++ /dev/null @@ -1,15 +0,0 @@ -# histsort.awk --- compact a shell history file -# Thanks to Byron Rakitzis for the general idea -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -{ - if (data[$0]++ == 0) - lines[++count] = $0 -} - -END { - for (i = 1; i <= count; i++) - print lines[i] -} diff --git a/contrib/awk/awklib/eg/prog/id.awk b/contrib/awk/awklib/eg/prog/id.awk deleted file mode 100644 index af78f76..0000000 --- a/contrib/awk/awklib/eg/prog/id.awk +++ /dev/null @@ -1,67 +0,0 @@ -# id.awk --- implement id in awk -# -# Requires user and group library functions -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 -# Revised February 1996 - -# output is: -# uid=12(foo) euid=34(bar) gid=3(baz) \ -# egid=5(blat) groups=9(nine),2(two),1(one) - -BEGIN \ -{ - uid = PROCINFO["uid"] - euid = PROCINFO["euid"] - gid = PROCINFO["gid"] - egid = PROCINFO["egid"] - - printf("uid=%d", uid) - pw = getpwuid(uid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } - - if (euid != uid) { - printf(" euid=%d", euid) - pw = getpwuid(euid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } - } - - printf(" gid=%d", gid) - pw = getgrgid(gid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } - - if (egid != gid) { - printf(" egid=%d", egid) - pw = getgrgid(egid) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } - } - - for (i = 1; ("group" i) in PROCINFO; i++) { - if (i == 1) - printf(" groups=") - group = PROCINFO["group" i] - printf("%d", group) - pw = getgrgid(group) - if (pw != "") { - split(pw, a, ":") - printf("(%s)", a[1]) - } - if (("group" (i+1)) in PROCINFO) - printf(",") - } - - print "" -} diff --git a/contrib/awk/awklib/eg/prog/igawk.sh b/contrib/awk/awklib/eg/prog/igawk.sh deleted file mode 100644 index 7144ce5..0000000 --- a/contrib/awk/awklib/eg/prog/igawk.sh +++ /dev/null @@ -1,130 +0,0 @@ -#! /bin/sh -# igawk --- like gawk but do @include processing -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# July 1993 - -if [ "$1" = debug ] -then - set -x - shift -else - # cleanup on exit, hangup, interrupt, quit, termination - trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15 -fi - -while [ $# -ne 0 ] # loop over arguments -do - case $1 in - --) shift; break;; - - -W) shift - set -- -W"$@" - continue;; - - -[vF]) opts="$opts $1 '$2'" - shift;; - - -[vF]*) opts="$opts '$1'" ;; - - -f) echo @include "$2" >> /tmp/ig.s.$$ - shift;; - - -f*) f=`echo "$1" | sed 's/-f//'` - echo @include "$f" >> /tmp/ig.s.$$ ;; - - -?file=*) # -Wfile or --file - f=`echo "$1" | sed 's/-.file=//'` - echo @include "$f" >> /tmp/ig.s.$$ ;; - - -?file) # get arg, $2 - echo @include "$2" >> /tmp/ig.s.$$ - shift;; - - -?source=*) # -Wsource or --source - t=`echo "$1" | sed 's/-.source=//'` - echo "$t" >> /tmp/ig.s.$$ ;; - - -?source) # get arg, $2 - echo "$2" >> /tmp/ig.s.$$ - shift;; - - -?version) - echo igawk: version 1.0 1>&2 - gawk --version - exit 0 ;; - - -[W-]*) opts="$opts '$1'" ;; - - *) break;; - esac - shift -done - -if [ ! -s /tmp/ig.s.$$ ] -then - if [ -z "$1" ] - then - echo igawk: no program! 1>&2 - exit 1 - else - echo "$1" > /tmp/ig.s.$$ - shift - fi -fi - -# at this point, /tmp/ig.s.$$ has the program -gawk -- ' -# process @include directives - -function pathto(file, i, t, junk) -{ - if (index(file, "/") != 0) - return file - - for (i = 1; i <= ndirs; i++) { - t = (pathlist[i] "/" file) - if ((getline junk < t) > 0) { - # found it - close(t) - return t - } - } - return "" -} -BEGIN { - path = ENVIRON["AWKPATH"] - ndirs = split(path, pathlist, ":") - for (i = 1; i <= ndirs; i++) { - if (pathlist[i] == "") - pathlist[i] = "." - } - stackptr = 0 - input[stackptr] = ARGV[1] # ARGV[1] is first file - - for (; stackptr >= 0; stackptr--) { - while ((getline < input[stackptr]) > 0) { - if (tolower($1) != "@include") { - print - continue - } - fpath = pathto($2) - if (fpath == "") { - printf("igawk:%s:%d: cannot find %s\n", - input[stackptr], FNR, $2) > "/dev/stderr" - continue - } - if (! (fpath in processed)) { - processed[fpath] = input[stackptr] - input[++stackptr] = fpath # push onto stack - } else - print $2, "included in", input[stackptr], - "already included in", - processed[fpath] > "/dev/stderr" - } - close(input[stackptr]) - } -}' /tmp/ig.s.$$ > /tmp/ig.e.$$ -eval gawk -f /tmp/ig.e.$$ $opts -- "$@" - -exit $? diff --git a/contrib/awk/awklib/eg/prog/labels.awk b/contrib/awk/awklib/eg/prog/labels.awk deleted file mode 100644 index fa9c4da..0000000 --- a/contrib/awk/awklib/eg/prog/labels.awk +++ /dev/null @@ -1,54 +0,0 @@ -# labels.awk --- print mailing labels -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# June 1992 - -# Each label is 5 lines of data that may have blank lines. -# The label sheets have 2 blank lines at the top and 2 at -# the bottom. - -BEGIN { RS = "" ; MAXLINES = 100 } - -function printpage( i, j) -{ - if (Nlines <= 0) - return - - printf "\n\n" # header - - for (i = 1; i <= Nlines; i += 10) { - if (i == 21 || i == 61) - print "" - for (j = 0; j < 5; j++) { - if (i + j > MAXLINES) - break - printf " %-41s %s\n", line[i+j], line[i+j+5] - } - print "" - } - - printf "\n\n" # footer - - for (i in line) - line[i] = "" -} - -# main rule -{ - if (Count >= 20) { - printpage() - Count = 0 - Nlines = 0 - } - n = split($0, a, "\n") - for (i = 1; i <= n; i++) - line[++Nlines] = a[i] - for (; i <= 5; i++) - line[++Nlines] = "" - Count++ -} - -END \ -{ - printpage() -} diff --git a/contrib/awk/awklib/eg/prog/split.awk b/contrib/awk/awklib/eg/prog/split.awk deleted file mode 100644 index 2906a85..0000000 --- a/contrib/awk/awklib/eg/prog/split.awk +++ /dev/null @@ -1,57 +0,0 @@ -# split.awk --- do split in awk -# -# Requires ord and chr library functions -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -# usage: split [-num] [file] [outname] - -BEGIN { - outfile = "x" # default - count = 1000 - if (ARGC > 4) - usage() - - i = 1 - if (ARGV[i] ~ /^-[0-9]+$/) { - count = -ARGV[i] - ARGV[i] = "" - i++ - } - # test argv in case reading from stdin instead of file - if (i in ARGV) - i++ # skip data file name - if (i in ARGV) { - outfile = ARGV[i] - ARGV[i] = "" - } - - s1 = s2 = "a" - out = (outfile s1 s2) -} -{ - if (++tcount > count) { - close(out) - if (s2 == "z") { - if (s1 == "z") { - printf("split: %s is too large to split\n", - FILENAME) > "/dev/stderr" - exit 1 - } - s1 = chr(ord(s1) + 1) - s2 = "a" - } - else - s2 = chr(ord(s2) + 1) - out = (outfile s1 s2) - tcount = 1 - } - print > out -} -function usage( e) -{ - e = "usage: split [-num] [file] [outname]" - print e > "/dev/stderr" - exit 1 -} diff --git a/contrib/awk/awklib/eg/prog/tee.awk b/contrib/awk/awklib/eg/prog/tee.awk deleted file mode 100644 index eafc4b9..0000000 --- a/contrib/awk/awklib/eg/prog/tee.awk +++ /dev/null @@ -1,39 +0,0 @@ -# tee.awk --- tee in awk -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 -# Revised December 1995 - -BEGIN \ -{ - for (i = 1; i < ARGC; i++) - copy[i] = ARGV[i] - - if (ARGV[1] == "-a") { - append = 1 - delete ARGV[1] - delete copy[1] - ARGC-- - } - if (ARGC < 2) { - print "usage: tee [-a] file ..." > "/dev/stderr" - exit 1 - } - ARGV[1] = "-" - ARGC = 2 -} -{ - # moving the if outside the loop makes it run faster - if (append) - for (i in copy) - print >> copy[i] - else - for (i in copy) - print > copy[i] - print -} -END \ -{ - for (i in copy) - close(copy[i]) -} diff --git a/contrib/awk/awklib/eg/prog/testbits.awk b/contrib/awk/awklib/eg/prog/testbits.awk deleted file mode 100644 index 143cd91..0000000 --- a/contrib/awk/awklib/eg/prog/testbits.awk +++ /dev/null @@ -1,27 +0,0 @@ -# bits2str --- turn a byte into readable 1's and 0's - -function bits2str(bits, data, mask) -{ - if (bits == 0) - return "0" - - mask = 1 - for (; bits != 0; bits = rshift(bits, 1)) - data = (and(bits, mask) ? "1" : "0") data - - while ((length(data) % 8) != 0) - data = "0" data - - return data -} -BEGIN { - printf "123 = %s\n", bits2str(123) - printf "0123 = %s\n", bits2str(0123) - printf "0x99 = %s\n", bits2str(0x99) - comp = compl(0x99) - printf "compl(0x99) = %#x = %s\n", comp, bits2str(comp) - shift = lshift(0x99, 2) - printf "lshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift) - shift = rshift(0x99, 2) - printf "rshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift) -} diff --git a/contrib/awk/awklib/eg/prog/translate.awk b/contrib/awk/awklib/eg/prog/translate.awk deleted file mode 100644 index 803700c..0000000 --- a/contrib/awk/awklib/eg/prog/translate.awk +++ /dev/null @@ -1,47 +0,0 @@ -# translate.awk --- do tr-like stuff -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# August 1989 - -# Bugs: does not handle things like: tr A-Z a-z, it has -# to be spelled out. However, if `to' is shorter than `from', -# the last character in `to' is used for the rest of `from'. - -function stranslate(from, to, target, lf, lt, t_ar, i, c) -{ - lf = length(from) - lt = length(to) - for (i = 1; i <= lt; i++) - t_ar[substr(from, i, 1)] = substr(to, i, 1) - if (lt < lf) - for (; i <= lf; i++) - t_ar[substr(from, i, 1)] = substr(to, lt, 1) - for (i = 1; i <= lf; i++) { - c = substr(from, i, 1) - if (index(target, c) > 0) - gsub(c, t_ar[c], target) - } - return target -} - -function translate(from, to) -{ - return $0 = stranslate(from, to, $0) -} - -# main program -BEGIN { - if (ARGC < 3) { - print "usage: translate from to" > "/dev/stderr" - exit - } - FROM = ARGV[1] - TO = ARGV[2] - ARGC = 2 - ARGV[1] = "-" -} - -{ - translate(FROM, TO) - print -} diff --git a/contrib/awk/awklib/eg/prog/uniq.awk b/contrib/awk/awklib/eg/prog/uniq.awk deleted file mode 100644 index cfb50c7..0000000 --- a/contrib/awk/awklib/eg/prog/uniq.awk +++ /dev/null @@ -1,119 +0,0 @@ -# uniq.awk --- do uniq in awk -# -# Requires getopt and join library functions -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -function usage( e) -{ - e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]" - print e > "/dev/stderr" - exit 1 -} - -# -c count lines. overrides -d and -u -# -d only repeated lines -# -u only non-repeated lines -# -n skip n fields -# +n skip n characters, skip fields first - -BEGIN \ -{ - count = 1 - outputfile = "/dev/stdout" - opts = "udc0:1:2:3:4:5:6:7:8:9:" - while ((c = getopt(ARGC, ARGV, opts)) != -1) { - if (c == "u") - non_repeated_only++ - else if (c == "d") - repeated_only++ - else if (c == "c") - do_count++ - else if (index("0123456789", c) != 0) { - # getopt requires args to options - # this messes us up for things like -5 - if (Optarg ~ /^[0-9]+$/) - fcount = (c Optarg) + 0 - else { - fcount = c + 0 - Optind-- - } - } else - usage() - } - - if (ARGV[Optind] ~ /^\+[0-9]+$/) { - charcount = substr(ARGV[Optind], 2) + 0 - Optind++ - } - - for (i = 1; i < Optind; i++) - ARGV[i] = "" - - if (repeated_only == 0 && non_repeated_only == 0) - repeated_only = non_repeated_only = 1 - - if (ARGC - Optind == 2) { - outputfile = ARGV[ARGC - 1] - ARGV[ARGC - 1] = "" - } -} -function are_equal( n, m, clast, cline, alast, aline) -{ - if (fcount == 0 && charcount == 0) - return (last == $0) - - if (fcount > 0) { - n = split(last, alast) - m = split($0, aline) - clast = join(alast, fcount+1, n) - cline = join(aline, fcount+1, m) - } else { - clast = last - cline = $0 - } - if (charcount) { - clast = substr(clast, charcount + 1) - cline = substr(cline, charcount + 1) - } - - return (clast == cline) -} -NR == 1 { - last = $0 - next -} - -{ - equal = are_equal() - - if (do_count) { # overrides -d and -u - if (equal) - count++ - else { - printf("%4d %s\n", count, last) > outputfile - last = $0 - count = 1 # reset - } - next - } - - if (equal) - count++ - else { - if ((repeated_only && count > 1) || - (non_repeated_only && count == 1)) - print last > outputfile - last = $0 - count = 1 - } -} - -END { - if (do_count) - printf("%4d %s\n", count, last) > outputfile - else if ((repeated_only && count > 1) || - (non_repeated_only && count == 1)) - print last > outputfile -} diff --git a/contrib/awk/awklib/eg/prog/wc.awk b/contrib/awk/awklib/eg/prog/wc.awk deleted file mode 100644 index f46616b..0000000 --- a/contrib/awk/awklib/eg/prog/wc.awk +++ /dev/null @@ -1,69 +0,0 @@ -# wc.awk --- count lines, words, characters -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -# Options: -# -l only count lines -# -w only count words -# -c only count characters -# -# Default is to count lines, words, characters -# -# Requires getopt and file transition library functions - -BEGIN { - # let getopt print a message about - # invalid options. we ignore them - while ((c = getopt(ARGC, ARGV, "lwc")) != -1) { - if (c == "l") - do_lines = 1 - else if (c == "w") - do_words = 1 - else if (c == "c") - do_chars = 1 - } - for (i = 1; i < Optind; i++) - ARGV[i] = "" - - # if no options, do all - if (! do_lines && ! do_words && ! do_chars) - do_lines = do_words = do_chars = 1 - - print_total = (ARGC - i > 2) -} -function beginfile(file) -{ - chars = lines = words = 0 - fname = FILENAME -} -function endfile(file) -{ - tchars += chars - tlines += lines - twords += words - if (do_lines) - printf "\t%d", lines - if (do_words) - printf "\t%d", words - if (do_chars) - printf "\t%d", chars - printf "\t%s\n", fname -} -# do per line -{ - chars += length($0) + 1 # get newline - lines++ - words += NF -} -END { - if (print_total) { - if (do_lines) - printf "\t%d", tlines - if (do_words) - printf "\t%d", twords - if (do_chars) - printf "\t%d", tchars - print "\ttotal" - } -} diff --git a/contrib/awk/awklib/eg/prog/wordfreq.awk b/contrib/awk/awklib/eg/prog/wordfreq.awk deleted file mode 100644 index 62db5cf..0000000 --- a/contrib/awk/awklib/eg/prog/wordfreq.awk +++ /dev/null @@ -1,20 +0,0 @@ -# wordfreq.awk --- print list of word frequencies - -{ - $0 = tolower($0) # remove case distinctions - # remove punctuation - gsub(/[^[:alnum:]_[:blank:]]/, "", $0) - for (i = 1; i <= NF; i++) - freq[$i]++ -} - -END { - for (word in freq) - printf "%s\t%d\n", word, freq[word] -} -END { - sort = "sort +1 -nr" - for (word in freq) - printf "%s\t%d\n", word, freq[word] | sort - close(sort) -} diff --git a/contrib/awk/awklib/extract.awk b/contrib/awk/awklib/extract.awk deleted file mode 100644 index 1b052e7..0000000 --- a/contrib/awk/awklib/extract.awk +++ /dev/null @@ -1,92 +0,0 @@ -# extract.awk --- extract files and run programs -# from texinfo files -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 -# Revised September 2000 - -BEGIN { IGNORECASE = 1 } - -/^@c(omment)?[ \t]+system/ \ -{ - if (NF < 3) { - e = (FILENAME ":" FNR) - e = (e ": badly formed `system' line") - print e > "/dev/stderr" - next - } - $1 = "" - $2 = "" - stat = system($0) - if (stat != 0) { - e = (FILENAME ":" FNR) - e = (e ": warning: system returned " stat) - print e > "/dev/stderr" - } -} -/^@c(omment)?[ \t]+file/ \ -{ - if (NF != 3) { - e = (FILENAME ":" FNR ": badly formed `file' line") - print e > "/dev/stderr" - next - } - if ($3 != curfile) { - if (curfile != "") - close(curfile) - curfile = $3 - } - - for (;;) { - if ((getline line) <= 0) - unexpected_eof() - if (line ~ /^@c(omment)?[ \t]+endfile/) - break - else if (line ~ /^@(end[ \t]+)?group/) - continue - else if (line ~ /^@c(omment+)?[ \t]+/) - continue - if (index(line, "@") == 0) { - print line > curfile - continue - } - n = split(line, a, "@") - # if a[1] == "", means leading @, - # don't add one back in. - for (i = 2; i <= n; i++) { - if (a[i] == "") { # was an @@ - a[i] = "@" - if (a[i+1] == "") - i++ - } - } - print join(a, 1, n, SUBSEP) > curfile - } -} -function unexpected_eof() -{ - printf("%s:%d: unexpected EOF or error\n", - FILENAME, FNR) > "/dev/stderr" - exit 1 -} - -END { - if (curfile) - close(curfile) -} -# join.awk --- join an array into a string -# -# Arnold Robbins, arnold@gnu.org, Public Domain -# May 1993 - -function join(array, start, end, sep, result, i) -{ - if (sep == "") - sep = " " - else if (sep == SUBSEP) # magic value - sep = "" - result = array[start] - for (i = start + 1; i <= end; i++) - result = result sep array[i] - return result -} diff --git a/contrib/awk/awklib/stamp-eg b/contrib/awk/awklib/stamp-eg deleted file mode 100644 index 241abd9..0000000 --- a/contrib/awk/awklib/stamp-eg +++ /dev/null @@ -1,2 +0,0 @@ -some makes are stupid and will not check a directory -against a file, so this file is a place holder. gack. diff --git a/contrib/awk/awktab.c b/contrib/awk/awktab.c deleted file mode 100644 index 23aa4fb..0000000 --- a/contrib/awk/awktab.c +++ /dev/null @@ -1,3983 +0,0 @@ - -/* A Bison parser, made from ./awk.y - by GNU Bison version 1.25 - */ - -#define YYBISON 1 /* Identify Bison output. */ - -#define FUNC_CALL 258 -#define NAME 259 -#define REGEXP 260 -#define ERROR 261 -#define YNUMBER 262 -#define YSTRING 263 -#define RELOP 264 -#define APPEND_OP 265 -#define ASSIGNOP 266 -#define MATCHOP 267 -#define NEWLINE 268 -#define CONCAT_OP 269 -#define LEX_BEGIN 270 -#define LEX_END 271 -#define LEX_IF 272 -#define LEX_ELSE 273 -#define LEX_RETURN 274 -#define LEX_DELETE 275 -#define LEX_WHILE 276 -#define LEX_DO 277 -#define LEX_FOR 278 -#define LEX_BREAK 279 -#define LEX_CONTINUE 280 -#define LEX_PRINT 281 -#define LEX_PRINTF 282 -#define LEX_NEXT 283 -#define LEX_EXIT 284 -#define LEX_FUNCTION 285 -#define LEX_GETLINE 286 -#define LEX_NEXTFILE 287 -#define LEX_IN 288 -#define LEX_AND 289 -#define LEX_OR 290 -#define INCREMENT 291 -#define DECREMENT 292 -#define LEX_BUILTIN 293 -#define LEX_LENGTH 294 -#define UNARY 295 - -#line 26 "./awk.y" - -#ifdef DEBUG -#define YYDEBUG 12 -#endif - -#include "awk.h" - -#define CAN_FREE TRUE -#define DONT_FREE FALSE - -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ -static void yyerror(const char *m, ...) ; -#else -static void yyerror(); /* va_alist */ -#endif -static char *get_src_buf P((void)); -static int yylex P((void)); -static NODE *node_common P((NODETYPE op)); -static NODE *snode P((NODE *subn, NODETYPE op, int sindex)); -static NODE *mkrangenode P((NODE *cpair)); -static NODE *make_for_loop P((NODE *init, NODE *cond, NODE *incr)); -static NODE *append_right P((NODE *list, NODE *new)); -static void func_install P((NODE *params, NODE *def)); -static void pop_var P((NODE *np, int freeit)); -static void pop_params P((NODE *params)); -static NODE *make_param P((char *name)); -static NODE *mk_rexp P((NODE *exp)); -static int dup_parms P((NODE *func)); -static void param_sanity P((NODE *arglist)); -static int isnoeffect P((NODETYPE t)); -static int isassignable P((NODE *n)); - -enum defref { FUNC_DEFINE, FUNC_USE }; -static void func_use P((char *name, enum defref how)); -static void check_funcs P((void)); - -static int want_assign; /* lexical scanning kludge */ -static int want_regexp; /* lexical scanning kludge */ -static int can_return; /* lexical scanning kludge */ -static int io_allowed = TRUE; /* lexical scanning kludge */ -static char *lexptr; /* pointer to next char during parsing */ -static char *lexend; -static char *lexptr_begin; /* keep track of where we were for error msgs */ -static char *lexeme; /* beginning of lexeme for debugging */ -static char *thisline = NULL; -#define YYDEBUG_LEXER_TEXT (lexeme) -static int param_counter; -static char *tokstart = NULL; -static char *tok = NULL; -static char *tokend; - -#define HASHSIZE 1021 /* this constant only used here */ -NODE *variables[HASHSIZE]; - -extern char *source; -extern int sourceline; -extern struct src *srcfiles; -extern int numfiles; -extern int errcount; -extern NODE *begin_block; -extern NODE *end_block; - -#line 89 "./awk.y" -typedef union { - long lval; - AWKNUM fval; - NODE *nodeval; - NODETYPE nodetypeval; - char *sval; - NODE *(*ptrval)(); -} YYSTYPE; -#include - -#ifndef __cplusplus -#ifndef __STDC__ -#define const -#endif -#endif - - - -#define YYFINAL 312 -#define YYFLAG -32768 -#define YYNTBASE 62 - -#define YYTRANSLATE(x) ((unsigned)(x) <= 295 ? yytranslate[x] : 107) - -static const char yytranslate[] = { 0, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 51, 2, 2, 54, 50, 2, 2, 55, - 56, 48, 46, 42, 47, 2, 49, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 41, 61, 43, - 2, 44, 40, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 57, 2, 58, 53, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 59, 45, 60, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, - 36, 37, 38, 39, 52 -}; - -#if YYDEBUG != 0 -static const short yyprhs[] = { 0, - 0, 4, 6, 9, 11, 14, 15, 16, 20, 21, - 25, 28, 31, 34, 36, 39, 42, 44, 46, 48, - 50, 52, 53, 61, 66, 71, 73, 77, 78, 83, - 89, 94, 96, 99, 101, 104, 106, 109, 112, 115, - 119, 121, 128, 137, 146, 157, 167, 170, 173, 180, - 185, 189, 192, 196, 197, 202, 209, 213, 216, 218, - 220, 227, 237, 239, 242, 243, 245, 246, 249, 250, - 253, 256, 259, 260, 262, 264, 268, 270, 273, 277, - 278, 280, 281, 283, 285, 289, 291, 294, 298, 302, - 303, 305, 307, 311, 313, 316, 320, 324, 325, 330, - 336, 341, 345, 349, 353, 357, 359, 362, 366, 370, - 374, 378, 384, 386, 389, 390, 395, 399, 403, 407, - 409, 412, 416, 420, 424, 430, 432, 435, 437, 441, - 445, 449, 453, 457, 461, 464, 467, 470, 474, 479, - 484, 486, 491, 493, 496, 499, 501, 503, 506, 509, - 510, 512, 514, 519, 522, 525, 528, 530, 531, 533, - 535 -}; - -static const short yyrhs[] = { 83, - 63, 83, 0, 64, 0, 63, 64, 0, 1, 0, - 63, 1, 0, 0, 0, 15, 65, 75, 0, 0, - 16, 66, 75, 0, 15, 77, 0, 16, 77, 0, - 72, 75, 0, 75, 0, 72, 77, 0, 69, 71, - 0, 4, 0, 3, 0, 68, 0, 38, 0, 39, - 0, 0, 30, 70, 67, 55, 86, 103, 83, 0, - 101, 76, 102, 104, 0, 101, 102, 104, 83, 0, - 93, 0, 93, 42, 93, 0, 0, 49, 74, 5, - 49, 0, 101, 76, 102, 104, 83, 0, 101, 102, - 104, 83, 0, 78, 0, 76, 78, 0, 1, 0, - 76, 1, 0, 82, 0, 105, 83, 0, 105, 83, - 0, 101, 102, 0, 101, 76, 102, 0, 81, 0, - 21, 55, 93, 103, 83, 78, 0, 22, 83, 78, - 21, 55, 93, 103, 83, 0, 23, 55, 4, 33, - 4, 103, 83, 78, 0, 23, 55, 88, 105, 93, - 105, 88, 103, 83, 78, 0, 23, 55, 88, 105, - 105, 88, 103, 83, 78, 0, 24, 77, 0, 25, - 77, 0, 80, 55, 92, 103, 85, 77, 0, 80, - 89, 85, 77, 0, 28, 88, 77, 0, 32, 77, - 0, 29, 88, 77, 0, 0, 19, 79, 88, 77, - 0, 20, 4, 57, 92, 58, 77, 0, 20, 4, - 77, 0, 93, 77, 0, 26, 0, 27, 0, 17, - 55, 93, 103, 83, 78, 0, 17, 55, 93, 103, - 83, 78, 18, 83, 78, 0, 13, 0, 82, 13, - 0, 0, 82, 0, 0, 43, 97, 0, 0, 44, - 93, 0, 10, 93, 0, 45, 93, 0, 0, 87, - 0, 4, 0, 87, 106, 4, 0, 1, 0, 87, - 1, 0, 87, 106, 1, 0, 0, 93, 0, 0, - 90, 0, 95, 0, 90, 106, 95, 0, 1, 0, - 90, 1, 0, 90, 1, 95, 0, 90, 106, 1, - 0, 0, 92, 0, 93, 0, 92, 106, 93, 0, - 1, 0, 92, 1, 0, 92, 1, 93, 0, 92, - 106, 1, 0, 0, 100, 11, 94, 93, 0, 55, - 92, 103, 33, 4, 0, 93, 45, 31, 99, 0, - 31, 99, 84, 0, 93, 34, 93, 0, 93, 35, - 93, 0, 93, 12, 93, 0, 73, 0, 51, 73, - 0, 93, 33, 4, 0, 93, 9, 93, 0, 93, - 43, 93, 0, 93, 44, 93, 0, 93, 40, 93, - 41, 93, 0, 97, 0, 93, 97, 0, 0, 100, - 11, 96, 95, 0, 95, 34, 95, 0, 95, 35, - 95, 0, 31, 99, 84, 0, 73, 0, 51, 73, - 0, 95, 12, 95, 0, 95, 33, 4, 0, 95, - 9, 95, 0, 95, 40, 95, 41, 95, 0, 97, - 0, 95, 97, 0, 98, 0, 97, 53, 97, 0, - 97, 48, 97, 0, 97, 49, 97, 0, 97, 50, - 97, 0, 97, 46, 97, 0, 97, 47, 97, 0, - 100, 36, 0, 100, 37, 0, 51, 97, 0, 55, - 93, 103, 0, 38, 55, 91, 103, 0, 39, 55, - 91, 103, 0, 39, 0, 3, 55, 91, 103, 0, - 100, 0, 36, 100, 0, 37, 100, 0, 7, 0, - 8, 0, 47, 97, 0, 46, 97, 0, 0, 100, - 0, 4, 0, 4, 57, 92, 58, 0, 54, 98, - 0, 59, 83, 0, 60, 83, 0, 56, 0, 0, - 105, 0, 61, 0, 42, 83, 0 -}; - -#endif - -#if YYDEBUG != 0 -static const short yyrline[] = { 0, - 150, 158, 166, 182, 183, 184, 188, 190, 204, 206, - 220, 226, 232, 234, 236, 249, 258, 260, 262, 272, - 273, 277, 281, 296, 301, 310, 312, 321, 323, 341, - 343, 348, 354, 362, 364, 369, 370, 374, 376, 378, - 380, 382, 384, 386, 413, 417, 422, 425, 428, 430, - 450, 489, 508, 510, 515, 517, 519, 533, 538, 540, - 545, 550, 557, 559, 563, 564, 568, 570, 575, 577, - 579, 581, 586, 588, 593, 595, 597, 599, 601, 607, - 609, 614, 616, 621, 623, 629, 631, 633, 635, 640, - 642, 647, 649, 655, 657, 659, 661, 666, 669, 674, - 676, 681, 687, 689, 691, 697, 707, 715, 717, 723, - 725, 727, 729, 731, 736, 739, 740, 742, 744, 750, - 752, 754, 756, 758, 760, 762, 764, 769, 771, 773, - 775, 777, 779, 781, 783, 785, 790, 792, 794, 797, - 799, 807, 814, 815, 817, 819, 821, 824, 832, 843, - 845, 850, 852, 862, 867, 871, 875, 879, 880, 884, - 887 -}; -#endif - - -#if YYDEBUG != 0 || defined (YYERROR_VERBOSE) - -static const char * const yytname[] = { "$","error","$undefined.","FUNC_CALL", -"NAME","REGEXP","ERROR","YNUMBER","YSTRING","RELOP","APPEND_OP","ASSIGNOP","MATCHOP", -"NEWLINE","CONCAT_OP","LEX_BEGIN","LEX_END","LEX_IF","LEX_ELSE","LEX_RETURN", -"LEX_DELETE","LEX_WHILE","LEX_DO","LEX_FOR","LEX_BREAK","LEX_CONTINUE","LEX_PRINT", -"LEX_PRINTF","LEX_NEXT","LEX_EXIT","LEX_FUNCTION","LEX_GETLINE","LEX_NEXTFILE", -"LEX_IN","LEX_AND","LEX_OR","INCREMENT","DECREMENT","LEX_BUILTIN","LEX_LENGTH", -"'?'","':'","','","'<'","'>'","'|'","'+'","'-'","'*'","'/'","'%'","'!'","UNARY", -"'^'","'$'","'('","')'","'['","']'","'{'","'}'","';'","start","program","rule", -"@1","@2","func_name","lex_builtin","function_prologue","@3","function_body", -"pattern","regexp","@4","action","statements","statement_term","statement","@5", -"print","if_statement","nls","opt_nls","input_redir","output_redir","opt_param_list", -"param_list","opt_exp","opt_rexpression_list","rexpression_list","opt_expression_list", -"expression_list","exp","@6","rexp","@7","simp_exp","non_post_simp_exp","opt_variable", -"variable","l_brace","r_brace","r_paren","opt_semi","semi","comma", NULL -}; -#endif - -static const short yyr1[] = { 0, - 62, 63, 63, 63, 63, 63, 65, 64, 66, 64, - 64, 64, 64, 64, 64, 64, 67, 67, 67, 68, - 68, 70, 69, 71, 71, 72, 72, 74, 73, 75, - 75, 76, 76, 76, 76, 77, 77, 78, 78, 78, - 78, 78, 78, 78, 78, 78, 78, 78, 78, 78, - 78, 78, 78, 79, 78, 78, 78, 78, 80, 80, - 81, 81, 82, 82, 83, 83, 84, 84, 85, 85, - 85, 85, 86, 86, 87, 87, 87, 87, 87, 88, - 88, 89, 89, 90, 90, 90, 90, 90, 90, 91, - 91, 92, 92, 92, 92, 92, 92, 94, 93, 93, - 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, - 93, 93, 93, 93, 96, 95, 95, 95, 95, 95, - 95, 95, 95, 95, 95, 95, 95, 97, 97, 97, - 97, 97, 97, 97, 97, 97, 98, 98, 98, 98, - 98, 98, 98, 98, 98, 98, 98, 98, 98, 99, - 99, 100, 100, 100, 101, 102, 103, 104, 104, 105, - 106 -}; - -static const short yyr2[] = { 0, - 3, 1, 2, 1, 2, 0, 0, 3, 0, 3, - 2, 2, 2, 1, 2, 2, 1, 1, 1, 1, - 1, 0, 7, 4, 4, 1, 3, 0, 4, 5, - 4, 1, 2, 1, 2, 1, 2, 2, 2, 3, - 1, 6, 8, 8, 10, 9, 2, 2, 6, 4, - 3, 2, 3, 0, 4, 6, 3, 2, 1, 1, - 6, 9, 1, 2, 0, 1, 0, 2, 0, 2, - 2, 2, 0, 1, 1, 3, 1, 2, 3, 0, - 1, 0, 1, 1, 3, 1, 2, 3, 3, 0, - 1, 1, 3, 1, 2, 3, 3, 0, 4, 5, - 4, 3, 3, 3, 3, 1, 2, 3, 3, 3, - 3, 5, 1, 2, 0, 4, 3, 3, 3, 1, - 2, 3, 3, 3, 5, 1, 2, 1, 3, 3, - 3, 3, 3, 3, 2, 2, 2, 3, 4, 4, - 1, 4, 1, 2, 2, 1, 1, 2, 2, 0, - 1, 1, 4, 2, 2, 2, 1, 0, 1, 1, - 2 -}; - -static const short yydefact[] = { 65, - 63, 66, 0, 64, 4, 0, 152, 146, 147, 7, - 9, 22, 150, 0, 0, 0, 141, 0, 0, 28, - 0, 0, 0, 65, 0, 2, 0, 0, 106, 14, - 26, 113, 128, 143, 0, 0, 0, 160, 0, 11, - 36, 65, 0, 12, 0, 67, 151, 144, 145, 0, - 0, 0, 0, 149, 143, 148, 0, 107, 137, 154, - 143, 94, 0, 92, 155, 5, 3, 1, 16, 0, - 13, 15, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 114, 0, 0, 0, 0, 0, 0, 98, - 135, 136, 34, 0, 54, 0, 0, 65, 0, 0, - 0, 59, 60, 80, 80, 0, 65, 0, 32, 0, - 41, 0, 0, 158, 65, 0, 0, 92, 0, 8, - 37, 10, 18, 17, 20, 21, 0, 19, 0, 102, - 0, 0, 0, 0, 95, 65, 157, 0, 0, 138, - 0, 158, 109, 105, 108, 103, 104, 0, 27, 110, - 111, 150, 133, 134, 130, 131, 132, 129, 0, 0, - 80, 0, 0, 0, 80, 47, 48, 0, 81, 0, - 52, 156, 35, 33, 158, 86, 150, 0, 0, 120, - 69, 0, 84, 126, 143, 58, 0, 39, 65, 159, - 38, 142, 153, 0, 68, 139, 140, 29, 96, 161, - 0, 97, 93, 158, 65, 0, 101, 99, 0, 0, - 0, 57, 0, 0, 152, 0, 51, 53, 65, 67, - 121, 0, 0, 0, 0, 0, 87, 0, 0, 0, - 0, 0, 0, 0, 127, 115, 40, 31, 77, 75, - 0, 0, 100, 24, 25, 112, 65, 55, 0, 65, - 0, 0, 0, 30, 119, 69, 71, 70, 72, 50, - 88, 89, 85, 124, 122, 123, 117, 118, 0, 0, - 65, 78, 0, 0, 0, 0, 0, 0, 0, 80, - 0, 0, 116, 23, 79, 76, 61, 56, 42, 0, - 65, 80, 0, 49, 125, 65, 65, 0, 0, 65, - 0, 43, 44, 65, 0, 62, 0, 46, 45, 0, - 0, 0 -}; - -static const short yydefgoto[] = { 310, - 25, 26, 39, 43, 127, 128, 27, 45, 69, 28, - 29, 57, 30, 108, 40, 109, 161, 110, 111, 2, - 3, 130, 226, 241, 242, 168, 181, 182, 116, 117, - 112, 159, 183, 270, 32, 33, 46, 34, 113, 114, - 140, 189, 42, 139 -}; - -static const short yypact[] = { -6, --32768, 0, 875,-32768,-32768, -40, -38,-32768,-32768, -7, - -7,-32768, 10, 10, 10, -31, -26, 1735, 1735,-32768, - 1715, 1735, 1131, -6, 932,-32768, -24, 72,-32768,-32768, - 1304, 205,-32768, 5, 709, 1110, 1131,-32768, -24,-32768, - 0, -6, -24,-32768, 85, 3,-32768,-32768,-32768, 1110, - 1110, 1735, 1620, 8, 106, 8, 81,-32768, 8,-32768, --32768,-32768, 37, 1250,-32768,-32768,-32768,-32768,-32768, 709, --32768,-32768, 1620, 1620, 90, 1620, 1620, 1620, 1620, 1620, - 1620, 65, 205, 1735, 1735, 1735, 1735, 1735, 1735,-32768, --32768,-32768,-32768, 50,-32768, 111, 70, -6, 93, -7, - -7,-32768,-32768, 1620, 1620, -7, -6, 758,-32768, 819, --32768, 1040, 709, 100, -6, 99, 55, 1402, 9,-32768, --32768,-32768,-32768,-32768,-32768,-32768, 109,-32768, 1735,-32768, - 99, 99, 1250, 119, 1620, -6,-32768, 133, 1180,-32768, - 758, 100, 1327, 794,-32768, 1515, 1451, 1353, 1402, 1327, - 1327, 10, 125, 125, 8, 8, 8, 8, 1620, 1620, - 1620, 42, 1620, 981, 1657,-32768,-32768, -7, 1402, -7, --32768,-32768,-32768,-32768, 100,-32768, 10, 1715, 1131,-32768, - 96, 39, 1538, 205, 117,-32768, 758,-32768, -6,-32768, --32768,-32768,-32768, 7, 205,-32768,-32768,-32768, 1402,-32768, - 166,-32768, 1402, 100, -6, 1620,-32768, 1402, 1250, -7, - 1131,-32768, 1250, 151, -12, 100,-32768,-32768, -6, 3, --32768, 37, 1620, 1620, 1620, -7, 1678, 1201, 1678, 1678, - 181, 1678, 1678, 1678, 205,-32768,-32768,-32768,-32768,-32768, - 99, 56,-32768,-32768,-32768, 1402, -6,-32768, 11, -6, - 131, 183, 1061,-32768,-32768, 96, 1402, 1402, 1402,-32768, - 1538,-32768, 1538, 635, 83,-32768, 1599, 1579, 1474, 1678, - -6,-32768, 103, 981, -7, 981, 1620, 99, 623, 1620, - -7, 1678, 1538,-32768,-32768,-32768, 170,-32768,-32768, 1250, - -6, 1620, 99,-32768, 1538, -6, -6, 981, 99, -6, - 981,-32768,-32768, -6, 981,-32768, 981,-32768,-32768, 190, - 191,-32768 -}; - -static const short yypgoto[] = {-32768, --32768, 167,-32768,-32768,-32768,-32768,-32768,-32768,-32768,-32768, - 211,-32768, 107, -53, 315, -105,-32768,-32768,-32768, 199, - 97, -22, -62,-32768,-32768, -103,-32768,-32768, 94, -14, - -3,-32768, -202,-32768, 318, 177, -134, 95, 124, -69, - 407, -138, 420, -177 -}; - - -#define YYLAST 1790 - - -static const short yytable[] = { 31, - 142, 170, 174, 205, 228, 1, 1, 239, 63, 135, - 240, 135, 4, 7, 36, 90, 141, 207, 37, 64, - 252, 31, 119, 50, 261, 263, 264, 265, 51, 267, - 268, 269, 118, 118, 24, 174, 219, 135, 175, 227, - 91, 92, 220, 188, 37, 129, 118, 118, -83, 133, - 136, -83, 136, 38, 1, 135, 272, 210, 214, 187, - 89, 216, -73, 22, 273, 244, 193, 283, 275, 143, - 144, 204, 146, 147, 148, 149, 150, 151, 136, 295, - 136, 174, -83, -83, 1, 134, 7, 123, 124, 8, - 9, 229, 137, 145,-32768, 152, 136, 136, 211, -83, - 169, 169, 38, 285, 160, 223, 286, 47, 48, 49, - -91, -74, 55, 55, 162, 55, 61, 237, 14, 15, - 65, 68, 125, 126, 163, 55, 35, 236, 18, 19, - 24, 199, 38, 52, 71, 203, 22, 53, 121, 224, - 225, 91, 92, 131, 132, 120, 55, 165, 35, 122, - 70, 35, 91, 92, 137, 208, 209, 169, 55, 213, - 38, 169, 35, 194, 222, 201, 35, 198, 287, 243, - 289, 251, 86, 87, 88, 64, 293, 89, 55, 55, - 55, 55, 55, 55, 266, 277, 278, 296, 299, 311, - 312, 67, 303, 281, 164, 306, 249, 255, 60, 308, - 0, 309, 246, 172, 185, 0, 55, 118, 41, 41, - 0, 191, 55, 0, 0, 0, 0, 0, 0, 257, - 258, 259, 0, 55, 0, 0, 41, 55, 0, 0, - 0, 58, 200, 0, 0, 0, 0, 55, 55, 0, - 55, 55, 55, 55, 55, 55, 47, 0, 0, 279, - 84, 85, 86, 87, 88, 0, 0, 89, 0, 0, - 0, 0, 0, 55, 0, 0, 0, 0, 0, 0, - 0, 47, 55, 290, 0, 0, 169, 55, 0, 0, - 0, 0, 0, 0, 0, 238, 0, 0, 169, 0, - 0, 0, 0, 55, 0, 0, 0, 55, 41, 41, - 0, 245, 55, 55, 41, 0, 0, 55, 0, 0, - 41, 0, 0, 0, 0, 254, 0, 0, 0, 0, - 180, 185, 185, 185, 185, 44, 185, 185, 185, 0, - 0, 0, 0, 0, 0, 54, 56, 0, 59, 0, - 55, 0, 72, 274, 0, 0, 276, 0, 83, 0, - 0, 55, 55, 55, 0, 55, 0, 55, 55, 55, - 41, 55, 55, 55, 185, 0, 41, 284, 41, 59, - 0, 0, 0, 55, 0, 0, 185, 55, 0, 0, - 0, 83, 0, 0, 55, 0, 0, 298, 221, 55, - 0, 0, 301, 302, 0, 0, 305, 0, 0, 0, - 307, 153, 154, 155, 156, 157, 158, 0, 41, 0, - 0, 0, 0, 0, 166, 167, 0, 0, 0, 0, - 171, 0, 0, 0, 41, 0, 186, 184, 0, 83, - 0, 0, 0, 0, 0, 83, 0, 180, 180, 180, - 180, 0, 180, 180, 180, 0, 195, 0, 0, 0, - 83, 0, 0, 0, 115, 0, 0, 0, 0, 0, - 83, 83, 0, 83, 83, 83, 83, 83, 83, 138, - 0, 0, 0, 41, 0, 0, 212, 0, 0, 41, - 180, 0, 217, 0, 218, 0, 83, 0, 0, 115, - 0, 0, 180, 0, 0, 59, 0, 0, 0, 0, - 235, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 83, 0, 0, 0, - 83, 0, 192, 0, 248, 83, 83, 115, 0, 0, - 83, 0, 115, 190, 0, 0, 0, 196, 197, 0, - 260, 0, 0, 0, 184, 184, 184, 184, 0, 184, - 184, 184, 0, 0, 0, 0, 0, 0, 0, 0, - 115, 190, 0, 83, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 83, 83, 83, 0, 235, 0, - 235, 235, 235, 115, 235, 235, 235, 184, 0, 288, - 0, 0, 0, 0, 190, 294, 83, 0, 0, 184, - 235, 0, 0, 0, 0, 0, 115, 83, 0, 0, - 0, 0, 235, 0, 0, 247, 0, 0, 0, 250, - 0, 0, 0, 190, 0, 6, 7, 0, 256, 8, - 9, 73, 0, 0, 74, 253, 0, 0, 7, 0, - 0, 8, 9,-32768, 0, 0, 0, 271, 0, 0, - 0, 0, 0, 0, 0, 75, 76, 77, 14, 15, - 16, 17, 78, 0, 0, 80, 81, 82, 18, 19, - 14, 15, 280, 52, 0, 0, 22, 53, 0, 0, - 18, 19, 0, 38, 291, 52, 0, 0, 22, 53, - 0, 0, 0, 115, 0, 115, 297, 0, 292, 300, - 0, 0, 0, 0, 0, 304, 0, 0, 0, 93, - 0, 6, 7, 0, 0, 8, 9, 115, 0, 0, - 115, 0, 0, 0, 115, 94, 115, 95, 96, 97, - 98, 99, 100, 101, 102, 103, 104, 105, 0, 13, - 106, 0, 0, 0, 14, 15, 16, 17, 0, 0, - 0, 0, 0, 0, 18, 19, 0, 20, 173, 21, - 6, 7, 22, 23, 8, 9, 0, 24, 107, 38, - 0, 0, 0, 0, 94, 0, 95, 96, 97, 98, - 99, 100, 101, 102, 103, 104, 105, 0, 13, 106, - 0, 0, 0, 14, 15, 16, 17, 7, 0, 0, - 8, 9, 73, 18, 19,-32768, 20, 0, 21, 0, - 0, 22, 23, 0, 0, 0, 24, 107, 38, 176, - 0, 6, 7, 0, 0, 8, 9, 0, -82, 14, - 15, -82, 0, 0, 0, 0, 80, 81, 82, 18, - 19, 0, 0, 0, 52, 0, 0, 22, 53, 177, - 0, 0, 0, 0, 14, 15, 16, 17, 0, 0, - 0, 0, -82, -82, 18, 19, 0, 20, 0, 178, - 0, 0, 22, 179, -6, 5, 0, 6, 7, -82, - 0, 8, 9, 0, 0, 0, 0, -6, 0, 10, - 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 12, 13, 0, 0, 0, 0, - 14, 15, 16, 17, 0, 0, 0, 0, 0, 0, - 18, 19, 0, 20, 0, 21, 0, 0, 22, 23, - 0, -65, 66, 24, 6, 7, 0, 0, 8, 9, - 0, 0, 0, 0, 1, 0, 10, 11, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 12, 13, 0, 0, 0, 0, 14, 15, 16, - 17, 0, 0, 0, 0, 0, 0, 18, 19, 0, - 20, 0, 21, 6, 7, 22, 23, 8, 9, 0, - 24, 0, 0, 0, 0, 0, 0, 94, 0, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, - 0, 13, 106, 0, 0, 0, 14, 15, 16, 17, - 0, 0, 0, 0, 0, 0, 18, 19, 0, 20, - 0, 21, 0, 0, 22, 23, 0, 0, 0, 24, - 0, 38, 6, 7, 0, 0, 8, 9, 73, 0, - 0, 74, 1, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 6, 7, 0, 0, 8, 9, 0, - 0, 0, 75, 76, 77, 14, 15, 16, 17, 78, - 0, 0, 80, 81, 82, 18, 19, 0, 0, 0, - 52, 13, 0, 22, 53, 0, 14, 15, 16, 17, - 38, 0, 0, 0, 0, 0, 18, 19, 0, 20, - 62, 21, 6, 7, 22, 23, 8, 9, 0, 0, - 0, 38, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 62, 0, 6, 7, 0, 0, 8, 9, 0, - 13, 0, 0, 0, 0, 14, 15, 16, 17, 0, - 0, 0, 0, 0, 0, 18, 19, 0, 20, 0, - 21, 13, 0, 22, 23, -90, 14, 15, 16, 17, - 0, 0, 0, 0, 0, 0, 18, 19, 0, 20, - 202, 21, 6, 7, 22, 23, 8, 9, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 262, 0, 6, 7, 0, 0, 8, 9, 0, - 13, 0, 0, 0, 0, 14, 15, 16, 17, 0, - 0, 0, 0, 0, 0, 18, 19, 0, 20, 0, - 21, 177, 0, 22, 23, 0, 14, 15, 16, 17, - 0, 0, 0, 0, 0, 0, 18, 19, 0, 20, - 0, 178, 6, 7, 22, 53, 8, 9, 73, 0, - 0, 74, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 75, 76, 77, 14, 15, 16, 17, 78, - 0, 0, 80, 81, 82, 18, 19, 0, 0, 0, - 52, 0, 0, 22, 53, 137, 6, 7, 0, 0, - 8, 9, 73, 0, 0, 74, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 7, 0, 0, 8, 9,-32768, 75, 76, 77, 14, - 15, 16, 17, 78, 0, 79, 80, 81, 82, 18, - 19, 0, 0, 0, 52, 6, 7, 22, 53, 8, - 9, 73, 14, 15, 74, 0, 0, 0, 0,-32768, --32768,-32768, 18, 19, 0, 0, 0, 52, 0, 0, - 22, 53, 0, 0, 0, 75, 76, 77, 14, 15, - 16, 17, 78, 206, 0, 80, 81, 82, 18, 19, - 0, 0, 0, 52, 6, 7, 22, 53, 8, 9, - 73, 0, 0, 74, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 75, 76, 77, 14, 15, 16, - 17, 78, 0, 0, 80, 81, 82, 18, 19, 0, - 0, 0, 52, 6, 7, 22, 53, 8, 9, 73, - 0, 0, 74, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 6, 7, 0, 0, - 8, 9, 229, 75, 76, 230, 14, 15, 16, 17, - 0, 0, 0, 80, 81, 82, 18, 19, 0, 0, - 0, 52, 0, 0, 22, 53, 231, 232, 233, 14, - 15, 16, 17, 234, 282, 0, 0, 6, 7, 18, - 19, 8, 9, 73, 52, 0, 74, 22, 53, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 6, 7, 0, 0, 8, 9, 229, 75, 0, 230, - 14, 15, 16, 17, 0, 0, 0, 80, 81, 82, - 18, 19, 0, 0, 0, 52, 0, 0, 22, 53, - 231, 232, 233, 14, 15, 16, 17, 234, 0, 0, - 0, 6, 7, 18, 19, 8, 9, 229, 52, 0, - 230, 22, 53, 0, 0, 0, 0, 0, 0, 0, - 0, 6, 7, 0, 0, 8, 9, 229, 0, 0, - 230, 231, 232, 0, 14, 15, 16, 17, 0, 0, - 0, 0, 6, 7, 18, 19, 8, 9, 0, 52, - 0, 231, 22, 53, 14, 15, 16, 17, 0, 0, - 0, 0, 0, 0, 18, 19, 0, 0, 0, 52, - 13, 0, 22, 53, 0, 14, 15, 16, 17, 6, - 215, 0, 0, 8, 9, 18, 19, 0, 20, 0, - 21, 0, 0, 22, 23, 0, 0, 0, 0, 0, - 6, 7, 0, 0, 8, 9, 0, 13, 0, 0, - 0, 0, 14, 15, 16, 17, 0, 0, 0, 0, - 0, 0, 18, 19, 0, 20, 0, 21, 177, 0, - 22, 23, 0, 14, 15, 16, 17, 6, 7, 0, - 0, 8, 9, 18, 19, 0, 20, 0, 178, 0, - 0, 22, 53, 0, 0, 0, 0, 6, 7, 0, - 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, - 14, 15, 16, 17, 0, 0, 0, 0, 0, 0, - 18, 19, 0, 20, 0, 52, 0, 0, 22, 53, - 14, 15, 16, 17, 0, 0, 0, 0, 0, 0, - 18, 19, 0, 0, 0, 52, 0, 0, 22, 53 -}; - -static const short yycheck[] = { 3, - 70, 105, 108, 142, 182, 13, 13, 1, 23, 1, - 4, 1, 13, 4, 55, 11, 70, 152, 57, 23, - 33, 25, 37, 55, 227, 228, 229, 230, 55, 232, - 233, 234, 36, 37, 59, 141, 175, 1, 108, 1, - 36, 37, 177, 113, 57, 43, 50, 51, 10, 53, - 42, 13, 42, 61, 13, 1, 1, 161, 164, 113, - 53, 165, 56, 54, 242, 204, 58, 270, 58, 73, - 74, 141, 76, 77, 78, 79, 80, 81, 42, 282, - 42, 187, 44, 45, 13, 5, 4, 3, 4, 7, - 8, 9, 56, 4, 12, 31, 42, 42, 57, 61, - 104, 105, 61, 1, 55, 10, 4, 13, 14, 15, - 56, 56, 18, 19, 4, 21, 22, 187, 36, 37, - 24, 25, 38, 39, 55, 31, 3, 11, 46, 47, - 59, 135, 61, 51, 28, 139, 54, 55, 42, 44, - 45, 36, 37, 50, 51, 39, 52, 55, 25, 43, - 27, 28, 36, 37, 56, 159, 160, 161, 64, 163, - 61, 165, 39, 55, 179, 33, 43, 49, 274, 4, - 276, 21, 48, 49, 50, 179, 280, 53, 84, 85, - 86, 87, 88, 89, 4, 55, 4, 18, 292, 0, - 0, 25, 298, 256, 98, 301, 211, 220, 22, 305, - -1, 307, 206, 107, 110, -1, 112, 211, 10, 11, - -1, 115, 118, -1, -1, -1, -1, -1, -1, 223, - 224, 225, -1, 129, -1, -1, 28, 133, -1, -1, - -1, 21, 136, -1, -1, -1, -1, 143, 144, -1, - 146, 147, 148, 149, 150, 151, 152, -1, -1, 253, - 46, 47, 48, 49, 50, -1, -1, 53, -1, -1, - -1, -1, -1, 169, -1, -1, -1, -1, -1, -1, - -1, 177, 178, 277, -1, -1, 280, 183, -1, -1, - -1, -1, -1, -1, -1, 189, -1, -1, 292, -1, - -1, -1, -1, 199, -1, -1, -1, 203, 100, 101, - -1, 205, 208, 209, 106, -1, -1, 213, -1, -1, - 112, -1, -1, -1, -1, 219, -1, -1, -1, -1, - 110, 227, 228, 229, 230, 11, 232, 233, 234, -1, - -1, -1, -1, -1, -1, 18, 19, -1, 21, -1, - 246, -1, 28, 247, -1, -1, 250, -1, 31, -1, - -1, 257, 258, 259, -1, 261, -1, 263, 264, 265, - 162, 267, 268, 269, 270, -1, 168, 271, 170, 52, - -1, -1, -1, 279, -1, -1, 282, 283, -1, -1, - -1, 64, -1, -1, 290, -1, -1, 291, 178, 295, - -1, -1, 296, 297, -1, -1, 300, -1, -1, -1, - 304, 84, 85, 86, 87, 88, 89, -1, 210, -1, - -1, -1, -1, -1, 100, 101, -1, -1, -1, -1, - 106, -1, -1, -1, 226, -1, 112, 110, -1, 112, - -1, -1, -1, -1, -1, 118, -1, 227, 228, 229, - 230, -1, 232, 233, 234, -1, 129, -1, -1, -1, - 133, -1, -1, -1, 35, -1, -1, -1, -1, -1, - 143, 144, -1, 146, 147, 148, 149, 150, 151, 63, - -1, -1, -1, 275, -1, -1, 162, -1, -1, 281, - 270, -1, 168, -1, 170, -1, 169, -1, -1, 70, - -1, -1, 282, -1, -1, 178, -1, -1, -1, -1, - 183, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 199, -1, -1, -1, - 203, -1, 116, -1, 210, 208, 209, 108, -1, -1, - 213, -1, 113, 114, -1, -1, -1, 131, 132, -1, - 226, -1, -1, -1, 227, 228, 229, 230, -1, 232, - 233, 234, -1, -1, -1, -1, -1, -1, -1, -1, - 141, 142, -1, 246, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 257, 258, 259, -1, 261, -1, - 263, 264, 265, 164, 267, 268, 269, 270, -1, 275, - -1, -1, -1, -1, 175, 281, 279, -1, -1, 282, - 283, -1, -1, -1, -1, -1, 187, 290, -1, -1, - -1, -1, 295, -1, -1, 209, -1, -1, -1, 213, - -1, -1, -1, 204, -1, 3, 4, -1, 222, 7, - 8, 9, -1, -1, 12, 216, -1, -1, 4, -1, - -1, 7, 8, 9, -1, -1, -1, 241, -1, -1, - -1, -1, -1, -1, -1, 33, 34, 35, 36, 37, - 38, 39, 40, -1, -1, 43, 44, 45, 46, 47, - 36, 37, 253, 51, -1, -1, 54, 55, -1, -1, - 46, 47, -1, 61, 278, 51, -1, -1, 54, 55, - -1, -1, -1, 274, -1, 276, 290, -1, 279, 293, - -1, -1, -1, -1, -1, 299, -1, -1, -1, 1, - -1, 3, 4, -1, -1, 7, 8, 298, -1, -1, - 301, -1, -1, -1, 305, 17, 307, 19, 20, 21, - 22, 23, 24, 25, 26, 27, 28, 29, -1, 31, - 32, -1, -1, -1, 36, 37, 38, 39, -1, -1, - -1, -1, -1, -1, 46, 47, -1, 49, 1, 51, - 3, 4, 54, 55, 7, 8, -1, 59, 60, 61, - -1, -1, -1, -1, 17, -1, 19, 20, 21, 22, - 23, 24, 25, 26, 27, 28, 29, -1, 31, 32, - -1, -1, -1, 36, 37, 38, 39, 4, -1, -1, - 7, 8, 9, 46, 47, 12, 49, -1, 51, -1, - -1, 54, 55, -1, -1, -1, 59, 60, 61, 1, - -1, 3, 4, -1, -1, 7, 8, -1, 10, 36, - 37, 13, -1, -1, -1, -1, 43, 44, 45, 46, - 47, -1, -1, -1, 51, -1, -1, 54, 55, 31, - -1, -1, -1, -1, 36, 37, 38, 39, -1, -1, - -1, -1, 44, 45, 46, 47, -1, 49, -1, 51, - -1, -1, 54, 55, 0, 1, -1, 3, 4, 61, - -1, 7, 8, -1, -1, -1, -1, 13, -1, 15, - 16, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 30, 31, -1, -1, -1, -1, - 36, 37, 38, 39, -1, -1, -1, -1, -1, -1, - 46, 47, -1, 49, -1, 51, -1, -1, 54, 55, - -1, 0, 1, 59, 3, 4, -1, -1, 7, 8, - -1, -1, -1, -1, 13, -1, 15, 16, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 30, 31, -1, -1, -1, -1, 36, 37, 38, - 39, -1, -1, -1, -1, -1, -1, 46, 47, -1, - 49, -1, 51, 3, 4, 54, 55, 7, 8, -1, - 59, -1, -1, -1, -1, -1, -1, 17, -1, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - -1, 31, 32, -1, -1, -1, 36, 37, 38, 39, - -1, -1, -1, -1, -1, -1, 46, 47, -1, 49, - -1, 51, -1, -1, 54, 55, -1, -1, -1, 59, - -1, 61, 3, 4, -1, -1, 7, 8, 9, -1, - -1, 12, 13, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, 3, 4, -1, -1, 7, 8, -1, - -1, -1, 33, 34, 35, 36, 37, 38, 39, 40, - -1, -1, 43, 44, 45, 46, 47, -1, -1, -1, - 51, 31, -1, 54, 55, -1, 36, 37, 38, 39, - 61, -1, -1, -1, -1, -1, 46, 47, -1, 49, - 1, 51, 3, 4, 54, 55, 7, 8, -1, -1, - -1, 61, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 1, -1, 3, 4, -1, -1, 7, 8, -1, - 31, -1, -1, -1, -1, 36, 37, 38, 39, -1, - -1, -1, -1, -1, -1, 46, 47, -1, 49, -1, - 51, 31, -1, 54, 55, 56, 36, 37, 38, 39, - -1, -1, -1, -1, -1, -1, 46, 47, -1, 49, - 1, 51, 3, 4, 54, 55, 7, 8, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, 1, -1, 3, 4, -1, -1, 7, 8, -1, - 31, -1, -1, -1, -1, 36, 37, 38, 39, -1, - -1, -1, -1, -1, -1, 46, 47, -1, 49, -1, - 51, 31, -1, 54, 55, -1, 36, 37, 38, 39, - -1, -1, -1, -1, -1, -1, 46, 47, -1, 49, - -1, 51, 3, 4, 54, 55, 7, 8, 9, -1, - -1, 12, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, 33, 34, 35, 36, 37, 38, 39, 40, - -1, -1, 43, 44, 45, 46, 47, -1, -1, -1, - 51, -1, -1, 54, 55, 56, 3, 4, -1, -1, - 7, 8, 9, -1, -1, 12, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 4, -1, -1, 7, 8, 9, 33, 34, 35, 36, - 37, 38, 39, 40, -1, 42, 43, 44, 45, 46, - 47, -1, -1, -1, 51, 3, 4, 54, 55, 7, - 8, 9, 36, 37, 12, -1, -1, -1, -1, 43, - 44, 45, 46, 47, -1, -1, -1, 51, -1, -1, - 54, 55, -1, -1, -1, 33, 34, 35, 36, 37, - 38, 39, 40, 41, -1, 43, 44, 45, 46, 47, - -1, -1, -1, 51, 3, 4, 54, 55, 7, 8, - 9, -1, -1, 12, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, 33, 34, 35, 36, 37, 38, - 39, 40, -1, -1, 43, 44, 45, 46, 47, -1, - -1, -1, 51, 3, 4, 54, 55, 7, 8, 9, - -1, -1, 12, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 3, 4, -1, -1, - 7, 8, 9, 33, 34, 12, 36, 37, 38, 39, - -1, -1, -1, 43, 44, 45, 46, 47, -1, -1, - -1, 51, -1, -1, 54, 55, 33, 34, 35, 36, - 37, 38, 39, 40, 41, -1, -1, 3, 4, 46, - 47, 7, 8, 9, 51, -1, 12, 54, 55, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - 3, 4, -1, -1, 7, 8, 9, 33, -1, 12, - 36, 37, 38, 39, -1, -1, -1, 43, 44, 45, - 46, 47, -1, -1, -1, 51, -1, -1, 54, 55, - 33, 34, 35, 36, 37, 38, 39, 40, -1, -1, - -1, 3, 4, 46, 47, 7, 8, 9, 51, -1, - 12, 54, 55, -1, -1, -1, -1, -1, -1, -1, - -1, 3, 4, -1, -1, 7, 8, 9, -1, -1, - 12, 33, 34, -1, 36, 37, 38, 39, -1, -1, - -1, -1, 3, 4, 46, 47, 7, 8, -1, 51, - -1, 33, 54, 55, 36, 37, 38, 39, -1, -1, - -1, -1, -1, -1, 46, 47, -1, -1, -1, 51, - 31, -1, 54, 55, -1, 36, 37, 38, 39, 3, - 4, -1, -1, 7, 8, 46, 47, -1, 49, -1, - 51, -1, -1, 54, 55, -1, -1, -1, -1, -1, - 3, 4, -1, -1, 7, 8, -1, 31, -1, -1, - -1, -1, 36, 37, 38, 39, -1, -1, -1, -1, - -1, -1, 46, 47, -1, 49, -1, 51, 31, -1, - 54, 55, -1, 36, 37, 38, 39, 3, 4, -1, - -1, 7, 8, 46, 47, -1, 49, -1, 51, -1, - -1, 54, 55, -1, -1, -1, -1, 3, 4, -1, - -1, 7, 8, -1, -1, -1, -1, -1, -1, -1, - 36, 37, 38, 39, -1, -1, -1, -1, -1, -1, - 46, 47, -1, 49, -1, 51, -1, -1, 54, 55, - 36, 37, 38, 39, -1, -1, -1, -1, -1, -1, - 46, 47, -1, -1, -1, 51, -1, -1, 54, 55 -}; -/* -*-C-*- Note some compilers choke on comments on `#line' lines. */ -#line 3 "/usr/share/bison.simple" - -/* Skeleton output parser for bison, - Copyright (C) 1984, 1989, 1990 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* As a special exception, when this file is copied by Bison into a - Bison output file, you may use that output file without restriction. - This special exception was added by the Free Software Foundation - in version 1.24 of Bison. */ - -#ifndef alloca -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not GNU C. */ -#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__) || defined (__sparc) || defined (__sgi) -#include -#else /* not sparc */ -#if defined (MSDOS) && !defined (__TURBOC__) -#include -#else /* not MSDOS, or __TURBOC__ */ -#if defined(_AIX) -#include - #pragma alloca -#else /* not MSDOS, __TURBOC__, or _AIX */ -#ifdef __hpux -#ifdef __cplusplus -extern "C" { -void *alloca (unsigned int); -}; -#else /* not __cplusplus */ -void *alloca (); -#endif /* not __cplusplus */ -#endif /* __hpux */ -#endif /* not _AIX */ -#endif /* not MSDOS, or __TURBOC__ */ -#endif /* not sparc. */ -#endif /* not GNU C. */ -#endif /* alloca not defined. */ - -/* This is the parser code that is written into each bison parser - when the %semantic_parser declaration is not specified in the grammar. - It was written by Richard Stallman by simplifying the hairy parser - used when %semantic_parser is specified. */ - -/* Note: there must be only one dollar sign in this file. - It is replaced by the list of actions, each action - as one case of the switch. */ - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY -2 -#define YYEOF 0 -#define YYACCEPT return(0) -#define YYABORT return(1) -#define YYERROR goto yyerrlab1 -/* Like YYERROR except do call yyerror. - This remains here temporarily to ease the - transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ -#define YYFAIL goto yyerrlab -#define YYRECOVERING() (!!yyerrstatus) -#define YYBACKUP(token, value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { yychar = (token), yylval = (value); \ - yychar1 = YYTRANSLATE (yychar); \ - YYPOPSTACK; \ - goto yybackup; \ - } \ - else \ - { yyerror ("syntax error: cannot back up"); YYERROR; } \ -while (0) - -#define YYTERROR 1 -#define YYERRCODE 256 - -#ifndef YYPURE -#define YYLEX yylex() -#endif - -#ifdef YYPURE -#ifdef YYLSP_NEEDED -#ifdef YYLEX_PARAM -#define YYLEX yylex(&yylval, &yylloc, YYLEX_PARAM) -#else -#define YYLEX yylex(&yylval, &yylloc) -#endif -#else /* not YYLSP_NEEDED */ -#ifdef YYLEX_PARAM -#define YYLEX yylex(&yylval, YYLEX_PARAM) -#else -#define YYLEX yylex(&yylval) -#endif -#endif /* not YYLSP_NEEDED */ -#endif - -/* If nonreentrant, generate the variables here */ - -#ifndef YYPURE - -int yychar; /* the lookahead symbol */ -YYSTYPE yylval; /* the semantic value of the */ - /* lookahead symbol */ - -#ifdef YYLSP_NEEDED -YYLTYPE yylloc; /* location data for the lookahead */ - /* symbol */ -#endif - -int yynerrs; /* number of parse errors so far */ -#endif /* not YYPURE */ - -#if YYDEBUG != 0 -int yydebug; /* nonzero means print parse trace */ -/* Since this is uninitialized, it does not stop multiple parsers - from coexisting. */ -#endif - -/* YYINITDEPTH indicates the initial size of the parser's stacks */ - -#ifndef YYINITDEPTH -#define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH is the maximum size the stacks can grow to - (effective only if the built-in stack extension method is used). */ - -#if YYMAXDEPTH == 0 -#undef YYMAXDEPTH -#endif - -#ifndef YYMAXDEPTH -#define YYMAXDEPTH 10000 -#endif - -#ifndef YYPARSE_RETURN_TYPE -#define YYPARSE_RETURN_TYPE int -#endif - -/* Prevent warning if -Wstrict-prototypes. */ -#ifdef __GNUC__ -YYPARSE_RETURN_TYPE yyparse (void); -#endif - -#if __GNUC__ > 1 /* GNU C and GNU C++ define this. */ -#define __yy_memcpy(TO,FROM,COUNT) __builtin_memcpy(TO,FROM,COUNT) -#else /* not GNU C or C++ */ -#ifndef __cplusplus - -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_memcpy (to, from, count) - char *to; - char *from; - int count; -{ - register char *f = from; - register char *t = to; - register int i = count; - - while (i-- > 0) - *t++ = *f++; -} - -#else /* __cplusplus */ - -/* This is the most reliable way to avoid incompatibilities - in available built-in functions on various systems. */ -static void -__yy_memcpy (char *to, char *from, int count) -{ - register char *f = from; - register char *t = to; - register int i = count; - - while (i-- > 0) - *t++ = *f++; -} - -#endif -#endif - -#line 196 "/usr/share/bison.simple" - -/* The user can define YYPARSE_PARAM as the name of an argument to be passed - into yyparse. The argument should have type void *. - It should actually point to an object. - Grammar actions can access the variable by casting it - to the proper pointer type. */ - -#ifdef YYPARSE_PARAM -#ifdef __cplusplus -#define YYPARSE_PARAM_ARG void *YYPARSE_PARAM -#define YYPARSE_PARAM_DECL -#else /* not __cplusplus */ -#define YYPARSE_PARAM_ARG YYPARSE_PARAM -#define YYPARSE_PARAM_DECL void *YYPARSE_PARAM; -#endif /* not __cplusplus */ -#else /* not YYPARSE_PARAM */ -#define YYPARSE_PARAM_ARG -#define YYPARSE_PARAM_DECL -#endif /* not YYPARSE_PARAM */ - -YYPARSE_RETURN_TYPE -yyparse(YYPARSE_PARAM_ARG) - YYPARSE_PARAM_DECL -{ - register int yystate; - register int yyn; - register short *yyssp; - register YYSTYPE *yyvsp; - int yyerrstatus; /* number of tokens to shift before error messages enabled */ - int yychar1 = 0; /* lookahead token as an internal (translated) token number */ - - short yyssa[YYINITDEPTH]; /* the state stack */ - YYSTYPE yyvsa[YYINITDEPTH]; /* the semantic value stack */ - - short *yyss = yyssa; /* refer to the stacks thru separate pointers */ - YYSTYPE *yyvs = yyvsa; /* to allow yyoverflow to reallocate them elsewhere */ - -#ifdef YYLSP_NEEDED - YYLTYPE yylsa[YYINITDEPTH]; /* the location stack */ - YYLTYPE *yyls = yylsa; - YYLTYPE *yylsp; - -#define YYPOPSTACK (yyvsp--, yyssp--, yylsp--) -#else -#define YYPOPSTACK (yyvsp--, yyssp--) -#endif - - int yystacksize = YYINITDEPTH; - -#ifdef YYPURE - int yychar; - YYSTYPE yylval; - int yynerrs; -#ifdef YYLSP_NEEDED - YYLTYPE yylloc; -#endif -#endif - - YYSTYPE yyval; /* the variable used to return */ - /* semantic values from the action */ - /* routines */ - - int yylen; - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Starting parse\n"); -#endif - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - - yyssp = yyss - 1; - yyvsp = yyvs; -#ifdef YYLSP_NEEDED - yylsp = yyls; -#endif - -/* Push a new state, which is found in yystate . */ -/* In all cases, when you get here, the value and location stacks - have just been pushed. so pushing a state here evens the stacks. */ -yynewstate: - - *++yyssp = yystate; - - if (yyssp >= yyss + yystacksize - 1) - { - /* Give user a chance to reallocate the stack */ - /* Use copies of these so that the &'s don't force the real ones into memory. */ - YYSTYPE *yyvs1 = yyvs; - short *yyss1 = yyss; -#ifdef YYLSP_NEEDED - YYLTYPE *yyls1 = yyls; -#endif - - /* Get the current used size of the three stacks, in elements. */ - int size = yyssp - yyss + 1; - -#ifdef yyoverflow - /* Each stack pointer address is followed by the size of - the data in use in that stack, in bytes. */ -#ifdef YYLSP_NEEDED - /* This used to be a conditional around just the two extra args, - but that might be undefined if yyoverflow is a macro. */ - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yyls1, size * sizeof (*yylsp), - &yystacksize); -#else - yyoverflow("parser stack overflow", - &yyss1, size * sizeof (*yyssp), - &yyvs1, size * sizeof (*yyvsp), - &yystacksize); -#endif - - yyss = yyss1; yyvs = yyvs1; -#ifdef YYLSP_NEEDED - yyls = yyls1; -#endif -#else /* no yyoverflow */ - /* Extend the stack our own way. */ - if (yystacksize >= YYMAXDEPTH) - { - yyerror("parser stack overflow"); - return 2; - } - yystacksize *= 2; - if (yystacksize > YYMAXDEPTH) - yystacksize = YYMAXDEPTH; - yyss = (short *) alloca (yystacksize * sizeof (*yyssp)); - __yy_memcpy ((char *)yyss, (char *)yyss1, size * sizeof (*yyssp)); - yyvs = (YYSTYPE *) alloca (yystacksize * sizeof (*yyvsp)); - __yy_memcpy ((char *)yyvs, (char *)yyvs1, size * sizeof (*yyvsp)); -#ifdef YYLSP_NEEDED - yyls = (YYLTYPE *) alloca (yystacksize * sizeof (*yylsp)); - __yy_memcpy ((char *)yyls, (char *)yyls1, size * sizeof (*yylsp)); -#endif -#endif /* no yyoverflow */ - - yyssp = yyss + size - 1; - yyvsp = yyvs + size - 1; -#ifdef YYLSP_NEEDED - yylsp = yyls + size - 1; -#endif - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Stack size increased to %d\n", yystacksize); -#endif - - if (yyssp >= yyss + yystacksize - 1) - YYABORT; - } - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Entering state %d\n", yystate); -#endif - - goto yybackup; - yybackup: - -/* Do appropriate processing given the current state. */ -/* Read a lookahead token if we need one and don't already have one. */ -/* yyresume: */ - - /* First try to decide what to do without reference to lookahead token. */ - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* yychar is either YYEMPTY or YYEOF - or a valid token in external form. */ - - if (yychar == YYEMPTY) - { -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Reading a token: "); -#endif - yychar = YYLEX; - } - - /* Convert token to internal form (in yychar1) for indexing tables with */ - - if (yychar <= 0) /* This means end of input. */ - { - yychar1 = 0; - yychar = YYEOF; /* Don't call YYLEX any more */ - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Now at end of input.\n"); -#endif - } - else - { - yychar1 = YYTRANSLATE(yychar); - -#if YYDEBUG != 0 - if (yydebug) - { - fprintf (stderr, "Next token is %d (%s", yychar, yytname[yychar1]); - /* Give the individual parser a way to print the precise meaning - of a token, for further debugging info. */ -#ifdef YYPRINT - YYPRINT (stderr, yychar, yylval); -#endif - fprintf (stderr, ")\n"); - } -#endif - } - - yyn += yychar1; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != yychar1) - goto yydefault; - - yyn = yytable[yyn]; - - /* yyn is what to do for this token type in this state. - Negative => reduce, -yyn is rule number. - Positive => shift, yyn is new state. - New state is final state => don't bother to shift, - just return success. - 0, or most negative number => error. */ - - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrlab; - - if (yyn == YYFINAL) - YYACCEPT; - - /* Shift the lookahead token. */ - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Shifting token %d (%s), ", yychar, yytname[yychar1]); -#endif - - /* Discard the token being shifted unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; - - *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif - - /* count tokens shifted since error; after three, turn off error status. */ - if (yyerrstatus) yyerrstatus--; - - yystate = yyn; - goto yynewstate; - -/* Do the default action for the current state. */ -yydefault: - - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - -/* Do a reduction. yyn is the number of a rule to reduce with. */ -yyreduce: - yylen = yyr2[yyn]; - if (yylen > 0) - yyval = yyvsp[1-yylen]; /* implement default value of the action */ - -#if YYDEBUG != 0 - if (yydebug) - { - int i; - - fprintf (stderr, "Reducing via rule %d (line %d), ", - yyn, yyrline[yyn]); - - /* Print the symbols being reduced, and their result. */ - for (i = yyprhs[yyn]; yyrhs[i] > 0; i++) - fprintf (stderr, "%s ", yytname[yyrhs[i]]); - fprintf (stderr, " -> %s\n", yytname[yyr1[yyn]]); - } -#endif - - - switch (yyn) { - -case 1: -#line 151 "./awk.y" -{ - expression_value = yyvsp[-1].nodeval; - check_funcs(); - ; - break;} -case 2: -#line 159 "./awk.y" -{ - if (yyvsp[0].nodeval != NULL) - yyval.nodeval = yyvsp[0].nodeval; - else - yyval.nodeval = NULL; - yyerrok; - ; - break;} -case 3: -#line 168 "./awk.y" -{ - if (yyvsp[0].nodeval == NULL) - yyval.nodeval = yyvsp[-1].nodeval; - else if (yyvsp[-1].nodeval == NULL) - yyval.nodeval = yyvsp[0].nodeval; - else { - if (yyvsp[-1].nodeval->type != Node_rule_list) - yyvsp[-1].nodeval = node(yyvsp[-1].nodeval, Node_rule_list, - (NODE*) NULL); - yyval.nodeval = append_right(yyvsp[-1].nodeval, - node(yyvsp[0].nodeval, Node_rule_list, (NODE *) NULL)); - } - yyerrok; - ; - break;} -case 4: -#line 182 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 5: -#line 183 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 6: -#line 184 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 7: -#line 188 "./awk.y" -{ io_allowed = FALSE; ; - break;} -case 8: -#line 190 "./awk.y" -{ - if (begin_block != NULL) { - if (begin_block->type != Node_rule_list) - begin_block = node(begin_block, Node_rule_list, - (NODE *) NULL); - (void) append_right(begin_block, node( - node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval), - Node_rule_list, (NODE *) NULL) ); - } else - begin_block = node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval); - yyval.nodeval = NULL; - io_allowed = TRUE; - yyerrok; - ; - break;} -case 9: -#line 204 "./awk.y" -{ io_allowed = FALSE; ; - break;} -case 10: -#line 206 "./awk.y" -{ - if (end_block != NULL) { - if (end_block->type != Node_rule_list) - end_block = node(end_block, Node_rule_list, - (NODE *) NULL); - (void) append_right (end_block, node( - node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval), - Node_rule_list, (NODE *) NULL)); - } else - end_block = node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval); - yyval.nodeval = NULL; - io_allowed = TRUE; - yyerrok; - ; - break;} -case 11: -#line 221 "./awk.y" -{ - warning("BEGIN blocks must have an action part"); - errcount++; - yyerrok; - ; - break;} -case 12: -#line 227 "./awk.y" -{ - warning("END blocks must have an action part"); - errcount++; - yyerrok; - ; - break;} -case 13: -#line 233 "./awk.y" -{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_rule_node, yyvsp[0].nodeval); yyerrok; ; - break;} -case 14: -#line 235 "./awk.y" -{ yyval.nodeval = node((NODE *) NULL, Node_rule_node, yyvsp[0].nodeval); yyerrok; ; - break;} -case 15: -#line 237 "./awk.y" -{ - yyval.nodeval = node(yyvsp[-1].nodeval, - Node_rule_node, - node(node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL), - Node_K_print, - (NODE *) NULL)); - yyerrok; - ; - break;} -case 16: -#line 250 "./awk.y" -{ - func_install(yyvsp[-1].nodeval, yyvsp[0].nodeval); - yyval.nodeval = NULL; - yyerrok; - ; - break;} -case 17: -#line 259 "./awk.y" -{ yyval.sval = yyvsp[0].sval; ; - break;} -case 18: -#line 261 "./awk.y" -{ yyval.sval = yyvsp[0].sval; ; - break;} -case 19: -#line 263 "./awk.y" -{ - yyerror("%s() is a built-in function, it cannot be redefined", - tokstart); - errcount++; - /* yyerrok; */ - ; - break;} -case 22: -#line 278 "./awk.y" -{ - param_counter = 0; - ; - break;} -case 23: -#line 282 "./awk.y" -{ - NODE *t; - - t = make_param(yyvsp[-4].sval); - t->flags |= FUNC; - yyval.nodeval = append_right(t, yyvsp[-2].nodeval); - can_return = TRUE; - /* check for duplicate parameter names */ - if (dup_parms(yyval.nodeval)) - errcount++; - ; - break;} -case 24: -#line 297 "./awk.y" -{ - yyval.nodeval = yyvsp[-2].nodeval; - can_return = FALSE; - ; - break;} -case 25: -#line 302 "./awk.y" -{ - yyval.nodeval = node((NODE *) NULL, Node_K_return, (NODE *) NULL); - can_return = FALSE; - ; - break;} -case 26: -#line 311 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 27: -#line 313 "./awk.y" -{ yyval.nodeval = mkrangenode(node(yyvsp[-2].nodeval, Node_cond_pair, yyvsp[0].nodeval)); ; - break;} -case 28: -#line 322 "./awk.y" -{ ++want_regexp; ; - break;} -case 29: -#line 324 "./awk.y" -{ - NODE *n; - size_t len; - - getnode(n); - n->type = Node_regex; - len = strlen(yyvsp[-1].sval); - n->re_exp = make_string(yyvsp[-1].sval, len); - n->re_reg = make_regexp(yyvsp[-1].sval, len, FALSE, TRUE); - n->re_text = NULL; - n->re_flags = CONST; - n->re_cnt = 1; - yyval.nodeval = n; - ; - break;} -case 30: -#line 342 "./awk.y" -{ yyval.nodeval = yyvsp[-3].nodeval; ; - break;} -case 31: -#line 344 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 32: -#line 349 "./awk.y" -{ - yyval.nodeval = yyvsp[0].nodeval; - if (do_lint && isnoeffect(yyval.nodeval->type)) - warning("statement may have no effect"); - ; - break;} -case 33: -#line 355 "./awk.y" -{ - if (yyvsp[-1].nodeval == NULL || yyvsp[-1].nodeval->type != Node_statement_list) - yyvsp[-1].nodeval = node(yyvsp[-1].nodeval, Node_statement_list, (NODE *) NULL); - yyval.nodeval = append_right(yyvsp[-1].nodeval, - node(yyvsp[0].nodeval, Node_statement_list, (NODE *) NULL)); - yyerrok; - ; - break;} -case 34: -#line 363 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 35: -#line 365 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 38: -#line 375 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 39: -#line 377 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 40: -#line 379 "./awk.y" -{ yyval.nodeval = yyvsp[-1].nodeval; ; - break;} -case 41: -#line 381 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 42: -#line 383 "./awk.y" -{ yyval.nodeval = node(yyvsp[-3].nodeval, Node_K_while, yyvsp[0].nodeval); ; - break;} -case 43: -#line 385 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_K_do, yyvsp[-5].nodeval); ; - break;} -case 44: -#line 387 "./awk.y" -{ - /* - * Efficiency hack. Recognize the special case of - * - * for (iggy in foo) - * delete foo[iggy] - * - * and treat it as if it were - * - * delete foo - * - * Check that the body is a `delete a[i]' statement, - * and that both the loop var and array names match. - */ - if (yyvsp[0].nodeval->type == Node_K_delete - && yyvsp[0].nodeval->rnode != NULL - && strcmp(yyvsp[-5].sval, yyvsp[0].nodeval->rnode->var_value->vname) == 0 - && strcmp(yyvsp[-3].sval, yyvsp[0].nodeval->lnode->vname) == 0) { - yyvsp[0].nodeval->type = Node_K_delete_loop; - yyval.nodeval = yyvsp[0].nodeval; - } else { - yyval.nodeval = node(yyvsp[0].nodeval, Node_K_arrayfor, - make_for_loop(variable(yyvsp[-5].sval, CAN_FREE, Node_var), - (NODE *) NULL, variable(yyvsp[-3].sval, CAN_FREE, Node_var_array))); - } - ; - break;} -case 45: -#line 414 "./awk.y" -{ - yyval.nodeval = node(yyvsp[0].nodeval, Node_K_for, (NODE *) make_for_loop(yyvsp[-7].nodeval, yyvsp[-5].nodeval, yyvsp[-3].nodeval)); - ; - break;} -case 46: -#line 418 "./awk.y" -{ - yyval.nodeval = node(yyvsp[0].nodeval, Node_K_for, - (NODE *) make_for_loop(yyvsp[-6].nodeval, (NODE *) NULL, yyvsp[-3].nodeval)); - ; - break;} -case 47: -#line 424 "./awk.y" -{ yyval.nodeval = node((NODE *) NULL, Node_K_break, (NODE *) NULL); ; - break;} -case 48: -#line 427 "./awk.y" -{ yyval.nodeval = node((NODE *) NULL, Node_K_continue, (NODE *) NULL); ; - break;} -case 49: -#line 429 "./awk.y" -{ yyval.nodeval = node(yyvsp[-3].nodeval, yyvsp[-5].nodetypeval, yyvsp[-1].nodeval); ; - break;} -case 50: -#line 431 "./awk.y" -{ - if (yyvsp[-3].nodetypeval == Node_K_print && yyvsp[-2].nodeval == NULL) { - static int warned = FALSE; - - yyvsp[-2].nodeval = node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL); - - if (do_lint && ! io_allowed && ! warned) { - warned = TRUE; - warning( - "plain `print' in BEGIN or END rule should probably be `print \"\"'"); - } - } - - yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-3].nodetypeval, yyvsp[-1].nodeval); - ; - break;} -case 51: -#line 451 "./awk.y" -{ NODETYPE type; - - if (yyvsp[-1].nodeval) { - if (yyvsp[-1].nodeval == lookup("file")) { - static int warned = FALSE; - - if (! warned) { - warned = TRUE; - warning("`next file' is obsolete; use `nextfile'"); - } - if (do_lint) - warning("`next file' is a gawk extension"); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error("`next file' is a gawk extension"); - } - if (! io_allowed) { - /* same thing */ - errcount++; - error("`next file' used in BEGIN or END action"); - } - type = Node_K_nextfile; - } else { - errcount++; - error("illegal expression after `next'"); - type = Node_K_next; /* sanity */ - } - } else { - if (! io_allowed) - yyerror("`next' used in BEGIN or END action"); - type = Node_K_next; - } - yyval.nodeval = node((NODE *) NULL, type, (NODE *) NULL); - ; - break;} -case 52: -#line 490 "./awk.y" -{ - if (do_lint) - warning("`nextfile' is a gawk extension"); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error("`nextfile' is a gawk extension"); - } - if (! io_allowed) { - /* same thing */ - errcount++; - error("`nextfile' used in BEGIN or END action"); - } - yyval.nodeval = node((NODE *) NULL, Node_K_nextfile, (NODE *) NULL); - ; - break;} -case 53: -#line 509 "./awk.y" -{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_exit, (NODE *) NULL); ; - break;} -case 54: -#line 511 "./awk.y" -{ - if (! can_return) - yyerror("`return' used outside function context"); - ; - break;} -case 55: -#line 516 "./awk.y" -{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_return, (NODE *) NULL); ; - break;} -case 56: -#line 518 "./awk.y" -{ yyval.nodeval = node(variable(yyvsp[-4].sval, CAN_FREE, Node_var_array), Node_K_delete, yyvsp[-2].nodeval); ; - break;} -case 57: -#line 520 "./awk.y" -{ - if (do_lint) - warning("`delete array' is a gawk extension"); - if (do_traditional) { - /* - * can't use yyerror, since may have overshot - * the source line - */ - errcount++; - error("`delete array' is a gawk extension"); - } - yyval.nodeval = node(variable(yyvsp[-1].sval, CAN_FREE, Node_var_array), Node_K_delete, (NODE *) NULL); - ; - break;} -case 58: -#line 534 "./awk.y" -{ yyval.nodeval = yyvsp[-1].nodeval; ; - break;} -case 59: -#line 539 "./awk.y" -{ yyval.nodetypeval = yyvsp[0].nodetypeval; ; - break;} -case 60: -#line 541 "./awk.y" -{ yyval.nodetypeval = yyvsp[0].nodetypeval; ; - break;} -case 61: -#line 546 "./awk.y" -{ - yyval.nodeval = node(yyvsp[-3].nodeval, Node_K_if, - node(yyvsp[0].nodeval, Node_if_branches, (NODE *) NULL)); - ; - break;} -case 62: -#line 552 "./awk.y" -{ yyval.nodeval = node(yyvsp[-6].nodeval, Node_K_if, - node(yyvsp[-3].nodeval, Node_if_branches, yyvsp[0].nodeval)); ; - break;} -case 63: -#line 558 "./awk.y" -{ want_assign = FALSE; ; - break;} -case 67: -#line 569 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 68: -#line 571 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_input, (NODE *) NULL); ; - break;} -case 69: -#line 576 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 70: -#line 578 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_output, (NODE *) NULL); ; - break;} -case 71: -#line 580 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_append, (NODE *) NULL); ; - break;} -case 72: -#line 582 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_redirect_pipe, (NODE *) NULL); ; - break;} -case 73: -#line 587 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 74: -#line 589 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 75: -#line 594 "./awk.y" -{ yyval.nodeval = make_param(yyvsp[0].sval); ; - break;} -case 76: -#line 596 "./awk.y" -{ yyval.nodeval = append_right(yyvsp[-2].nodeval, make_param(yyvsp[0].sval)); yyerrok; ; - break;} -case 77: -#line 598 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 78: -#line 600 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 79: -#line 602 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 80: -#line 608 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 81: -#line 610 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 82: -#line 615 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 83: -#line 617 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 84: -#line 622 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL); ; - break;} -case 85: -#line 624 "./awk.y" -{ - yyval.nodeval = append_right(yyvsp[-2].nodeval, - node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL)); - yyerrok; - ; - break;} -case 86: -#line 630 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 87: -#line 632 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 88: -#line 634 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 89: -#line 636 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 90: -#line 641 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 91: -#line 643 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 92: -#line 648 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL); ; - break;} -case 93: -#line 650 "./awk.y" -{ - yyval.nodeval = append_right(yyvsp[-2].nodeval, - node(yyvsp[0].nodeval, Node_expression_list, (NODE *) NULL)); - yyerrok; - ; - break;} -case 94: -#line 656 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 95: -#line 658 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 96: -#line 660 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 97: -#line 662 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 98: -#line 667 "./awk.y" -{ want_assign = FALSE; ; - break;} -case 99: -#line 669 "./awk.y" -{ - if (do_lint && yyvsp[0].nodeval->type == Node_regex) - warning("Regular expression on left of assignment."); - yyval.nodeval = node(yyvsp[-3].nodeval, yyvsp[-2].nodetypeval, yyvsp[0].nodeval); - ; - break;} -case 100: -#line 675 "./awk.y" -{ yyval.nodeval = node(variable(yyvsp[0].sval, CAN_FREE, Node_var_array), Node_in_array, yyvsp[-3].nodeval); ; - break;} -case 101: -#line 677 "./awk.y" -{ - yyval.nodeval = node(yyvsp[0].nodeval, Node_K_getline, - node(yyvsp[-3].nodeval, Node_redirect_pipein, (NODE *) NULL)); - ; - break;} -case 102: -#line 682 "./awk.y" -{ - if (do_lint && ! io_allowed && yyvsp[0].nodeval == NULL) - warning("non-redirected getline undefined inside BEGIN or END action"); - yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_getline, yyvsp[0].nodeval); - ; - break;} -case 103: -#line 688 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_and, yyvsp[0].nodeval); ; - break;} -case 104: -#line 690 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_or, yyvsp[0].nodeval); ; - break;} -case 105: -#line 692 "./awk.y" -{ - if (yyvsp[-2].nodeval->type == Node_regex) - warning("Regular expression on left of MATCH operator."); - yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, mk_rexp(yyvsp[0].nodeval)); - ; - break;} -case 106: -#line 698 "./awk.y" -{ - yyval.nodeval = yyvsp[0].nodeval; - if (do_lint && tokstart[0] == '*') { - /* possible C comment */ - int n = strlen(tokstart) - 1; - if (tokstart[n] == '*') - warning("regexp looks like a C comment, but is not"); - } - ; - break;} -case 107: -#line 708 "./awk.y" -{ - yyval.nodeval = node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_nomatch, - yyvsp[0].nodeval); - ; - break;} -case 108: -#line 716 "./awk.y" -{ yyval.nodeval = node(variable(yyvsp[0].sval, CAN_FREE, Node_var_array), Node_in_array, yyvsp[-2].nodeval); ; - break;} -case 109: -#line 718 "./awk.y" -{ - if (do_lint && yyvsp[0].nodeval->type == Node_regex) - warning("Regular expression on left of comparison."); - yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval); - ; - break;} -case 110: -#line 724 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_less, yyvsp[0].nodeval); ; - break;} -case 111: -#line 726 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_greater, yyvsp[0].nodeval); ; - break;} -case 112: -#line 728 "./awk.y" -{ yyval.nodeval = node(yyvsp[-4].nodeval, Node_cond_exp, node(yyvsp[-2].nodeval, Node_if_branches, yyvsp[0].nodeval));; - break;} -case 113: -#line 730 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 114: -#line 732 "./awk.y" -{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_concat, yyvsp[0].nodeval); ; - break;} -case 115: -#line 737 "./awk.y" -{ want_assign = FALSE; ; - break;} -case 116: -#line 739 "./awk.y" -{ yyval.nodeval = node(yyvsp[-3].nodeval, yyvsp[-2].nodetypeval, yyvsp[0].nodeval); ; - break;} -case 117: -#line 741 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_and, yyvsp[0].nodeval); ; - break;} -case 118: -#line 743 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_or, yyvsp[0].nodeval); ; - break;} -case 119: -#line 745 "./awk.y" -{ - if (do_lint && ! io_allowed && yyvsp[0].nodeval == NULL) - warning("non-redirected getline undefined inside BEGIN or END action"); - yyval.nodeval = node(yyvsp[-1].nodeval, Node_K_getline, yyvsp[0].nodeval); - ; - break;} -case 120: -#line 751 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 121: -#line 753 "./awk.y" -{ yyval.nodeval = node((NODE *) NULL, Node_nomatch, yyvsp[0].nodeval); ; - break;} -case 122: -#line 755 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, mk_rexp(yyvsp[0].nodeval)); ; - break;} -case 123: -#line 757 "./awk.y" -{ yyval.nodeval = node(variable(yyvsp[0].sval, CAN_FREE, Node_var_array), Node_in_array, yyvsp[-2].nodeval); ; - break;} -case 124: -#line 759 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, yyvsp[-1].nodetypeval, yyvsp[0].nodeval); ; - break;} -case 125: -#line 761 "./awk.y" -{ yyval.nodeval = node(yyvsp[-4].nodeval, Node_cond_exp, node(yyvsp[-2].nodeval, Node_if_branches, yyvsp[0].nodeval));; - break;} -case 126: -#line 763 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 127: -#line 765 "./awk.y" -{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_concat, yyvsp[0].nodeval); ; - break;} -case 129: -#line 772 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_exp, yyvsp[0].nodeval); ; - break;} -case 130: -#line 774 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_times, yyvsp[0].nodeval); ; - break;} -case 131: -#line 776 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_quotient, yyvsp[0].nodeval); ; - break;} -case 132: -#line 778 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_mod, yyvsp[0].nodeval); ; - break;} -case 133: -#line 780 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_plus, yyvsp[0].nodeval); ; - break;} -case 134: -#line 782 "./awk.y" -{ yyval.nodeval = node(yyvsp[-2].nodeval, Node_minus, yyvsp[0].nodeval); ; - break;} -case 135: -#line 784 "./awk.y" -{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_postincrement, (NODE *) NULL); ; - break;} -case 136: -#line 786 "./awk.y" -{ yyval.nodeval = node(yyvsp[-1].nodeval, Node_postdecrement, (NODE *) NULL); ; - break;} -case 137: -#line 791 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_not, (NODE *) NULL); ; - break;} -case 138: -#line 793 "./awk.y" -{ yyval.nodeval = yyvsp[-1].nodeval; ; - break;} -case 139: -#line 796 "./awk.y" -{ yyval.nodeval = snode(yyvsp[-1].nodeval, Node_builtin, (int) yyvsp[-3].lval); ; - break;} -case 140: -#line 798 "./awk.y" -{ yyval.nodeval = snode(yyvsp[-1].nodeval, Node_builtin, (int) yyvsp[-3].lval); ; - break;} -case 141: -#line 800 "./awk.y" -{ - if (do_lint) - warning("call of `length' without parentheses is not portable"); - yyval.nodeval = snode((NODE *) NULL, Node_builtin, (int) yyvsp[0].lval); - if (do_posix) - warning("call of `length' without parentheses is deprecated by POSIX"); - ; - break;} -case 142: -#line 808 "./awk.y" -{ - yyval.nodeval = node(yyvsp[-1].nodeval, Node_func_call, make_string(yyvsp[-3].sval, strlen(yyvsp[-3].sval))); - func_use(yyvsp[-3].sval, FUNC_USE); - param_sanity(yyvsp[-1].nodeval); - free(yyvsp[-3].sval); - ; - break;} -case 144: -#line 816 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_preincrement, (NODE *) NULL); ; - break;} -case 145: -#line 818 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_predecrement, (NODE *) NULL); ; - break;} -case 146: -#line 820 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 147: -#line 822 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 148: -#line 825 "./awk.y" -{ - if (yyvsp[0].nodeval->type == Node_val) { - yyvsp[0].nodeval->numbr = -(force_number(yyvsp[0].nodeval)); - yyval.nodeval = yyvsp[0].nodeval; - } else - yyval.nodeval = node(yyvsp[0].nodeval, Node_unary_minus, (NODE *) NULL); - ; - break;} -case 149: -#line 833 "./awk.y" -{ - /* - * was: $$ = $2 - * POSIX semantics: force a conversion to numeric type - */ - yyval.nodeval = node (make_number(0.0), Node_plus, yyvsp[0].nodeval); - ; - break;} -case 150: -#line 844 "./awk.y" -{ yyval.nodeval = NULL; ; - break;} -case 151: -#line 846 "./awk.y" -{ yyval.nodeval = yyvsp[0].nodeval; ; - break;} -case 152: -#line 851 "./awk.y" -{ yyval.nodeval = variable(yyvsp[0].sval, CAN_FREE, Node_var); ; - break;} -case 153: -#line 853 "./awk.y" -{ - if (yyvsp[-1].nodeval == NULL) { - fatal("invalid subscript expression"); - } else if (yyvsp[-1].nodeval->rnode == NULL) { - yyval.nodeval = node(variable(yyvsp[-3].sval, CAN_FREE, Node_var_array), Node_subscript, yyvsp[-1].nodeval->lnode); - freenode(yyvsp[-1].nodeval); - } else - yyval.nodeval = node(variable(yyvsp[-3].sval, CAN_FREE, Node_var_array), Node_subscript, yyvsp[-1].nodeval); - ; - break;} -case 154: -#line 863 "./awk.y" -{ yyval.nodeval = node(yyvsp[0].nodeval, Node_field_spec, (NODE *) NULL); ; - break;} -case 156: -#line 871 "./awk.y" -{ yyerrok; ; - break;} -case 157: -#line 875 "./awk.y" -{ yyerrok; ; - break;} -case 160: -#line 884 "./awk.y" -{ yyerrok; want_assign = FALSE; ; - break;} -case 161: -#line 887 "./awk.y" -{ yyerrok; ; - break;} -} - /* the action file gets copied in in place of this dollarsign */ -#line 498 "/usr/share/bison.simple" - - yyvsp -= yylen; - yyssp -= yylen; -#ifdef YYLSP_NEEDED - yylsp -= yylen; -#endif - -#if YYDEBUG != 0 - if (yydebug) - { - short *ssp1 = yyss - 1; - fprintf (stderr, "state stack now"); - while (ssp1 != yyssp) - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); - } -#endif - - *++yyvsp = yyval; - -#ifdef YYLSP_NEEDED - yylsp++; - if (yylen == 0) - { - yylsp->first_line = yylloc.first_line; - yylsp->first_column = yylloc.first_column; - yylsp->last_line = (yylsp-1)->last_line; - yylsp->last_column = (yylsp-1)->last_column; - yylsp->text = 0; - } - else - { - yylsp->last_line = (yylsp+yylen-1)->last_line; - yylsp->last_column = (yylsp+yylen-1)->last_column; - } -#endif - - /* Now "shift" the result of the reduction. - Determine what state that goes to, - based on the state we popped back to - and the rule number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTBASE] + *yyssp; - if (yystate >= 0 && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTBASE]; - - goto yynewstate; - -yyerrlab: /* here on detecting error */ - - if (! yyerrstatus) - /* If not already recovering from an error, report this error. */ - { - ++yynerrs; - -#ifdef YYERROR_VERBOSE - yyn = yypact[yystate]; - - if (yyn > YYFLAG && yyn < YYLAST) - { - int size = 0; - char *msg; - int x, count; - - count = 0; - /* Start X at -yyn if nec to avoid negative indexes in yycheck. */ - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - size += strlen(yytname[x]) + 15, count++; - msg = (char *) malloc(size + 15); - if (msg != 0) - { - strcpy(msg, "parse error"); - - if (count < 5) - { - count = 0; - for (x = (yyn < 0 ? -yyn : 0); - x < (sizeof(yytname) / sizeof(char *)); x++) - if (yycheck[x + yyn] == x) - { - strcat(msg, count == 0 ? ", expecting `" : " or `"); - strcat(msg, yytname[x]); - strcat(msg, "'"); - count++; - } - } - yyerror(msg); - free(msg); - } - else - yyerror ("parse error; also virtual memory exceeded"); - } - else -#endif /* YYERROR_VERBOSE */ - yyerror("parse error"); - } - - goto yyerrlab1; -yyerrlab1: /* here on error raised explicitly by an action */ - - if (yyerrstatus == 3) - { - /* if just tried and failed to reuse lookahead token after an error, discard it. */ - - /* return failure if at end of input */ - if (yychar == YYEOF) - YYABORT; - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Discarding token %d (%s).\n", yychar, yytname[yychar1]); -#endif - - yychar = YYEMPTY; - } - - /* Else will try to reuse lookahead token - after shifting the error token. */ - - yyerrstatus = 3; /* Each real token shifted decrements this */ - - goto yyerrhandle; - -yyerrdefault: /* current state does not do anything special for the error token. */ - -#if 0 - /* This is wrong; only states that explicitly want error tokens - should shift them. */ - yyn = yydefact[yystate]; /* If its default is to accept any token, ok. Otherwise pop it.*/ - if (yyn) goto yydefault; -#endif - -yyerrpop: /* pop the current state because it cannot handle the error token */ - - if (yyssp == yyss) YYABORT; - yyvsp--; - yystate = *--yyssp; -#ifdef YYLSP_NEEDED - yylsp--; -#endif - -#if YYDEBUG != 0 - if (yydebug) - { - short *ssp1 = yyss - 1; - fprintf (stderr, "Error: state stack now"); - while (ssp1 != yyssp) - fprintf (stderr, " %d", *++ssp1); - fprintf (stderr, "\n"); - } -#endif - -yyerrhandle: - - yyn = yypact[yystate]; - if (yyn == YYFLAG) - goto yyerrdefault; - - yyn += YYTERROR; - if (yyn < 0 || yyn > YYLAST || yycheck[yyn] != YYTERROR) - goto yyerrdefault; - - yyn = yytable[yyn]; - if (yyn < 0) - { - if (yyn == YYFLAG) - goto yyerrpop; - yyn = -yyn; - goto yyreduce; - } - else if (yyn == 0) - goto yyerrpop; - - if (yyn == YYFINAL) - YYACCEPT; - -#if YYDEBUG != 0 - if (yydebug) - fprintf(stderr, "Shifting error token, "); -#endif - - *++yyvsp = yylval; -#ifdef YYLSP_NEEDED - *++yylsp = yylloc; -#endif - - yystate = yyn; - goto yynewstate; -} -#line 890 "./awk.y" - - -struct token { - const char *operator; /* text to match */ - NODETYPE value; /* node type */ - int class; /* lexical class */ - unsigned flags; /* # of args. allowed and compatability */ -# define ARGS 0xFF /* 0, 1, 2, 3 args allowed (any combination */ -# define A(n) (1<<(n)) -# define VERSION 0xFF00 /* old awk is zero */ -# define NOT_OLD 0x0100 /* feature not in old awk */ -# define NOT_POSIX 0x0200 /* feature not in POSIX */ -# define GAWKX 0x0400 /* gawk extension */ -# define RESX 0x0800 /* Bell Labs Research extension */ - NODE *(*ptr)(); /* function that implements this keyword */ -}; - - -/* Tokentab is sorted ascii ascending order, so it can be binary searched. */ -/* Function pointers come from declarations in awk.h. */ - -static struct token tokentab[] = { -{"BEGIN", Node_illegal, LEX_BEGIN, 0, 0}, -{"END", Node_illegal, LEX_END, 0, 0}, -#ifdef ARRAYDEBUG -{"adump", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_adump}, -#endif -#ifdef BITOPS -{"and", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_and}, -#endif /* BITOPS */ -{"atan2", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_atan2}, -{"break", Node_K_break, LEX_BREAK, 0, 0}, -{"close", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_close}, -#ifdef BITOPS -{"compl", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_compl}, -#endif /* BITOPS */ -{"continue", Node_K_continue, LEX_CONTINUE, 0, 0}, -{"cos", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_cos}, -{"delete", Node_K_delete, LEX_DELETE, NOT_OLD, 0}, -{"do", Node_K_do, LEX_DO, NOT_OLD, 0}, -{"else", Node_illegal, LEX_ELSE, 0, 0}, -{"exit", Node_K_exit, LEX_EXIT, 0, 0}, -{"exp", Node_builtin, LEX_BUILTIN, A(1), do_exp}, -{"fflush", Node_builtin, LEX_BUILTIN, RESX|A(0)|A(1), do_fflush}, -{"for", Node_K_for, LEX_FOR, 0, 0}, -{"func", Node_K_function, LEX_FUNCTION, NOT_POSIX|NOT_OLD, 0}, -{"function", Node_K_function, LEX_FUNCTION, NOT_OLD, 0}, -{"gensub", Node_builtin, LEX_BUILTIN, GAWKX|A(3)|A(4), do_gensub}, -{"getline", Node_K_getline, LEX_GETLINE, NOT_OLD, 0}, -{"gsub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_gsub}, -{"if", Node_K_if, LEX_IF, 0, 0}, -{"in", Node_illegal, LEX_IN, 0, 0}, -{"index", Node_builtin, LEX_BUILTIN, A(2), do_index}, -{"int", Node_builtin, LEX_BUILTIN, A(1), do_int}, -{"length", Node_builtin, LEX_LENGTH, A(0)|A(1), do_length}, -{"log", Node_builtin, LEX_BUILTIN, A(1), do_log}, -#ifdef BITOPS -{"lshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_lshift}, -#endif /* BITOPS */ -{"match", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2), do_match}, -{"next", Node_K_next, LEX_NEXT, 0, 0}, -{"nextfile", Node_K_nextfile, LEX_NEXTFILE, GAWKX, 0}, -#ifdef BITOPS -{"or", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_or}, -#endif /* BITOPS */ -{"print", Node_K_print, LEX_PRINT, 0, 0}, -{"printf", Node_K_printf, LEX_PRINTF, 0, 0}, -{"rand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0), do_rand}, -{"return", Node_K_return, LEX_RETURN, NOT_OLD, 0}, -#ifdef BITOPS -{"rshift", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_rshift}, -#endif /* BITOPS */ -{"sin", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_sin}, -{"split", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_split}, -{"sprintf", Node_builtin, LEX_BUILTIN, 0, do_sprintf}, -{"sqrt", Node_builtin, LEX_BUILTIN, A(1), do_sqrt}, -{"srand", Node_builtin, LEX_BUILTIN, NOT_OLD|A(0)|A(1), do_srand}, -#ifdef ARRAYDEBUG -{"stopme", Node_builtin, LEX_BUILTIN, GAWKX|A(0), stopme}, -#endif -{"strftime", Node_builtin, LEX_BUILTIN, GAWKX|A(0)|A(1)|A(2), do_strftime}, -#ifdef BITOPS -{"strtonum", Node_builtin, LEX_BUILTIN, GAWKX|A(1), do_strtonum}, -#endif /* BITOPS */ -{"sub", Node_builtin, LEX_BUILTIN, NOT_OLD|A(2)|A(3), do_sub}, -{"substr", Node_builtin, LEX_BUILTIN, A(2)|A(3), do_substr}, -{"system", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_system}, -{"systime", Node_builtin, LEX_BUILTIN, GAWKX|A(0), do_systime}, -{"tolower", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_tolower}, -{"toupper", Node_builtin, LEX_BUILTIN, NOT_OLD|A(1), do_toupper}, -{"while", Node_K_while, LEX_WHILE, 0, 0}, -#ifdef BITOPS -{"xor", Node_builtin, LEX_BUILTIN, GAWKX|A(2), do_xor}, -#endif /* BITOPS */ -}; - -/* yyerror --- print a syntax error message, show where */ - -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ -static void -yyerror(const char *m, ...) -#else -/* VARARGS0 */ -static void -yyerror(va_alist) -va_dcl -#endif -{ - va_list args; - const char *mesg = NULL; - register char *bp, *cp; - char *scan; - char buf[120]; - static char end_of_file_line[] = "(END OF FILE)"; - - errcount++; - /* Find the current line in the input file */ - if (lexptr && lexeme) { - if (thisline == NULL) { - cp = lexeme; - if (*cp == '\n') { - cp--; - mesg = "unexpected newline"; - } - for (; cp != lexptr_begin && *cp != '\n'; --cp) - continue; - if (*cp == '\n') - cp++; - thisline = cp; - } - /* NL isn't guaranteed */ - bp = lexeme; - while (bp < lexend && *bp && *bp != '\n') - bp++; - } else { - thisline = end_of_file_line; - bp = thisline + strlen(thisline); - } - msg("%.*s", (int) (bp - thisline), thisline); - bp = buf; - cp = buf + sizeof(buf) - 24; /* 24 more than longest msg. input */ - if (lexptr != NULL) { - scan = thisline; - while (bp < cp && scan < lexeme) - if (*scan++ == '\t') - *bp++ = '\t'; - else - *bp++ = ' '; - *bp++ = '^'; - *bp++ = ' '; - } -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - va_start(args, m); - if (mesg == NULL) - mesg = m; -#else - va_start(args); - if (mesg == NULL) - mesg = va_arg(args, char *); -#endif - strcpy(bp, mesg); - err("", buf, args); - va_end(args); -} - -/* get_src_buf --- read the next buffer of source program */ - -static char * -get_src_buf() -{ - static int samefile = FALSE; - static int nextfile = 0; - static char *buf = NULL; - static int fd; - int n; - register char *scan; - static int len = 0; - static int did_newline = FALSE; - int newfile; - struct stat sbuf; - -# define SLOP 128 /* enough space to hold most source lines */ - -again: - newfile = FALSE; - if (nextfile > numfiles) - return NULL; - - if (srcfiles[nextfile].stype == CMDLINE) { - if (len == 0) { - len = strlen(srcfiles[nextfile].val); - if (len == 0) { - /* - * Yet Another Special case: - * gawk '' /path/name - * Sigh. - */ - static int warned = FALSE; - - if (do_lint && ! warned) { - warned = TRUE; - warning("empty program text on command line"); - } - ++nextfile; - goto again; - } - sourceline = 1; - lexptr = lexptr_begin = srcfiles[nextfile].val; - lexend = lexptr + len; - } else if (! did_newline && *(lexptr-1) != '\n') { - /* - * The following goop is to ensure that the source - * ends with a newline and that the entire current - * line is available for error messages. - */ - int offset; - - did_newline = TRUE; - offset = lexptr - lexeme; - for (scan = lexeme; scan > lexptr_begin; scan--) - if (*scan == '\n') { - scan++; - break; - } - len = lexptr - scan; - emalloc(buf, char *, len+1, "get_src_buf"); - memcpy(buf, scan, len); - thisline = buf; - lexptr = buf + len; - *lexptr = '\n'; - lexeme = lexptr - offset; - lexptr_begin = buf; - lexend = lexptr + 1; - } else { - len = 0; - lexeme = lexptr = lexptr_begin = NULL; - } - if (lexptr == NULL && ++nextfile <= numfiles) - goto again; - return lexptr; - } - if (! samefile) { - source = srcfiles[nextfile].val; - if (source == NULL) { - if (buf != NULL) { - free(buf); - buf = NULL; - } - len = 0; - return lexeme = lexptr = lexptr_begin = NULL; - } - fd = pathopen(source); - if (fd <= INVALID_HANDLE) { - char *in; - - /* suppress file name and line no. in error mesg */ - in = source; - source = NULL; - fatal("can't open source file \"%s\" for reading (%s)", - in, strerror(errno)); - } - len = optimal_bufsize(fd, & sbuf); - newfile = TRUE; - if (buf != NULL) - free(buf); - emalloc(buf, char *, len + SLOP, "get_src_buf"); - lexptr_begin = buf + SLOP; - samefile = TRUE; - sourceline = 1; - } else { - /* - * Here, we retain the current source line (up to length SLOP) - * in the beginning of the buffer that was overallocated above - */ - int offset; - int linelen; - - offset = lexptr - lexeme; - for (scan = lexeme; scan > lexptr_begin; scan--) - if (*scan == '\n') { - scan++; - break; - } - linelen = lexptr - scan; - if (linelen > SLOP) - linelen = SLOP; - thisline = buf + SLOP - linelen; - memcpy(thisline, scan, linelen); - lexeme = buf + SLOP - offset; - lexptr_begin = thisline; - } - n = read(fd, buf + SLOP, len); - if (n == -1) - fatal("can't read sourcefile \"%s\" (%s)", - source, strerror(errno)); - if (n == 0) { - if (newfile) { - static int warned = FALSE; - - if (do_lint && ! warned) { - warned = TRUE; - warning("source file `%s' is empty", source); - } - } - if (fileno(stdin) != fd) /* safety */ - close(fd); - samefile = FALSE; - nextfile++; - if (lexeme) - *lexeme = '\0'; - len = 0; - goto again; - } - lexptr = buf + SLOP; - lexend = lexptr + n; - return buf; -} - -/* tokadd --- add a character to the token buffer */ - -#define tokadd(x) (*tok++ = (x), tok == tokend ? tokexpand() : tok) - -/* tokexpand --- grow the token buffer */ - -char * -tokexpand() -{ - static int toksize = 60; - int tokoffset; - - tokoffset = tok - tokstart; - toksize *= 2; - if (tokstart != NULL) - erealloc(tokstart, char *, toksize, "tokexpand"); - else - emalloc(tokstart, char *, toksize, "tokexpand"); - tokend = tokstart + toksize; - tok = tokstart + tokoffset; - return tok; -} - -/* nextc --- get the next input character */ - -#if DEBUG -int -nextc() -{ - int c; - - if (lexptr && lexptr < lexend) - c = (int) (unsigned char) *lexptr++; - else if (get_src_buf()) - c = (int) (unsigned char) *lexptr++; - else - c = EOF; - - return c; -} -#else -#define nextc() ((lexptr && lexptr < lexend) ? \ - ((int) (unsigned char) *lexptr++) : \ - (get_src_buf() ? ((int) (unsigned char) *lexptr++) : EOF) \ - ) -#endif - -/* pushback --- push a character back on the input */ - -#define pushback() (lexptr && lexptr > lexptr_begin ? lexptr-- : lexptr) - -/* allow_newline --- allow newline after &&, ||, ? and : */ - -static void -allow_newline() -{ - int c; - - for (;;) { - c = nextc(); - if (c == EOF) - break; - if (c == '#') { - while ((c = nextc()) != '\n' && c != EOF) - continue; - if (c == EOF) - break; - } - if (c == '\n') - sourceline++; - if (! isspace(c)) { - pushback(); - break; - } - } -} - -/* yylex --- Read the input and turn it into tokens. */ - -static int -yylex() -{ - register int c, c1; - int seen_e = FALSE; /* These are for numbers */ - int seen_point = FALSE; - int esc_seen; /* for literal strings */ - int low, mid, high; - static int did_newline = FALSE; - char *tokkey; - static int lasttok = 0, eof_warned = FALSE; - int inhex = FALSE; - - if (nextc() == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - } - pushback(); -#ifdef OS2 - /* - * added for OS/2's extproc feature of cmd.exe - * (like #! in BSD sh) - */ - if (strncasecmp(lexptr, "extproc ", 8) == 0) { - while (*lexptr && *lexptr != '\n') - lexptr++; - } -#endif - lexeme = lexptr; - thisline = NULL; - if (want_regexp) { - int in_brack = 0; /* count brackets, [[:alnum:]] allowed */ - /* - * Counting brackets is non-trivial. [[] is ok, - * and so is [\]], with a point being that /[/]/ as a regexp - * constant has to work. - * - * Do not count [ or ] if either one is preceded by a \. - * A `[' should be counted if - * a) it is the first one so far (in_brack == 0) - * b) it is the `[' in `[:' - * A ']' should be counted if not preceded by a \, since - * it is either closing `:]' or just a plain list. - * According to POSIX, []] is how you put a ] into a set. - * Try to handle that too. - * - * The code for \ handles \[ and \]. - */ - - want_regexp = FALSE; - tok = tokstart; - for (;;) { - c = nextc(); - switch (c) { - case '[': - /* one day check for `.' and `=' too */ - if ((c1 = nextc()) == ':' || in_brack == 0) - in_brack++; - pushback(); - break; - case ']': - if (tokstart[0] == '[' - && (tok == tokstart + 1 - || (tok == tokstart + 2 - && tokstart[1] == '^'))) - /* do nothing */; - else - in_brack--; - break; - case '\\': - if ((c = nextc()) == EOF) { - yyerror("unterminated regexp ends with \\ at end of file"); - return lasttok = REGEXP; /* kludge */ - } else if (c == '\n') { - sourceline++; - continue; - } else { - tokadd('\\'); - tokadd(c); - continue; - } - break; - case '/': /* end of the regexp */ - if (in_brack > 0) - break; - - pushback(); - tokadd('\0'); - yylval.sval = tokstart; - return lasttok = REGEXP; - case '\n': - pushback(); - yyerror("unterminated regexp"); - return lasttok = REGEXP; /* kludge */ - case EOF: - yyerror("unterminated regexp at end of file"); - return lasttok = REGEXP; /* kludge */ - } - tokadd(c); - } - } -retry: - while ((c = nextc()) == ' ' || c == '\t') - continue; - - lexeme = lexptr ? lexptr - 1 : lexptr; - thisline = NULL; - tok = tokstart; - yylval.nodetypeval = Node_illegal; - - switch (c) { - case EOF: - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - - case '\n': - sourceline++; - return lasttok = NEWLINE; - - case '#': /* it's a comment */ - while ((c = nextc()) != '\n') { - if (c == EOF) { - if (lasttok != NEWLINE) { - lasttok = NEWLINE; - if (do_lint && ! eof_warned) { - warning( - "source file does not end in newline"); - eof_warned = TRUE; - } - return NEWLINE; /* fake it */ - } - return 0; - } - } - sourceline++; - return lasttok = NEWLINE; - - case '\\': -#ifdef RELAXED_CONTINUATION - /* - * This code puports to allow comments and/or whitespace - * after the `\' at the end of a line used for continuation. - * Use it at your own risk. We think it's a bad idea, which - * is why it's not on by default. - */ - if (! do_traditional) { - /* strip trailing white-space and/or comment */ - while ((c = nextc()) == ' ' || c == '\t') - continue; - if (c == '#') { - if (do_lint) - warning( - "use of `\\ #...' line continuation is not portable"); - while ((c = nextc()) != '\n') - if (c == EOF) - break; - } - pushback(); - } -#endif /* RELAXED_CONTINUATION */ - if (nextc() == '\n') { - sourceline++; - goto retry; - } else { - yyerror("backslash not last character on line"); - exit(1); - } - break; - - case '$': - want_assign = TRUE; - return lasttok = '$'; - - case ':': - case '?': - allow_newline(); - return lasttok = c; - - case ')': - case '(': - case ';': - case '{': - case ',': - want_assign = FALSE; - /* fall through */ - case '[': - case ']': - return lasttok = c; - - case '*': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_times; - return lasttok = ASSIGNOP; - } else if (do_posix) { - pushback(); - return lasttok = '*'; - } else if (c == '*') { - /* make ** and **= aliases for ^ and ^= */ - static int did_warn_op = FALSE, did_warn_assgn = FALSE; - - if (nextc() == '=') { - if (do_lint && ! did_warn_assgn) { - did_warn_assgn = TRUE; - warning("**= is not allowed by POSIX"); - warning("operator `**=' is not supported in old awk"); - } - yylval.nodetypeval = Node_assign_exp; - return ASSIGNOP; - } else { - pushback(); - if (do_lint && ! did_warn_op) { - did_warn_op = TRUE; - warning("** is not allowed by POSIX"); - warning("operator `**' is not supported in old awk"); - } - return lasttok = '^'; - } - } - pushback(); - return lasttok = '*'; - - case '/': - if (want_assign) { - if (nextc() == '=') { - yylval.nodetypeval = Node_assign_quotient; - return lasttok = ASSIGNOP; - } - pushback(); - } - return lasttok = '/'; - - case '%': - if (nextc() == '=') { - yylval.nodetypeval = Node_assign_mod; - return lasttok = ASSIGNOP; - } - pushback(); - return lasttok = '%'; - - case '^': - { - static int did_warn_op = FALSE, did_warn_assgn = FALSE; - - if (nextc() == '=') { - if (do_lint && ! did_warn_assgn) { - did_warn_assgn = TRUE; - warning("operator `^=' is not supported in old awk"); - } - yylval.nodetypeval = Node_assign_exp; - return lasttok = ASSIGNOP; - } - pushback(); - if (do_lint && ! did_warn_op) { - did_warn_op = TRUE; - warning("operator `^' is not supported in old awk"); - } - return lasttok = '^'; - } - - case '+': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_plus; - return lasttok = ASSIGNOP; - } - if (c == '+') - return lasttok = INCREMENT; - pushback(); - return lasttok = '+'; - - case '!': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_notequal; - return lasttok = RELOP; - } - if (c == '~') { - yylval.nodetypeval = Node_nomatch; - want_assign = FALSE; - return lasttok = MATCHOP; - } - pushback(); - return lasttok = '!'; - - case '<': - if (nextc() == '=') { - yylval.nodetypeval = Node_leq; - return lasttok = RELOP; - } - yylval.nodetypeval = Node_less; - pushback(); - return lasttok = '<'; - - case '=': - if (nextc() == '=') { - yylval.nodetypeval = Node_equal; - return lasttok = RELOP; - } - yylval.nodetypeval = Node_assign; - pushback(); - return lasttok = ASSIGNOP; - - case '>': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_geq; - return lasttok = RELOP; - } else if (c == '>') { - yylval.nodetypeval = Node_redirect_append; - return lasttok = APPEND_OP; - } - yylval.nodetypeval = Node_greater; - pushback(); - return lasttok = '>'; - - case '~': - yylval.nodetypeval = Node_match; - want_assign = FALSE; - return lasttok = MATCHOP; - - case '}': - /* - * Added did newline stuff. Easier than - * hacking the grammar. - */ - if (did_newline) { - did_newline = FALSE; - return lasttok = c; - } - did_newline++; - --lexptr; /* pick up } next time */ - return lasttok = NEWLINE; - - case '"': - esc_seen = FALSE; - while ((c = nextc()) != '"') { - if (c == '\n') { - pushback(); - yyerror("unterminated string"); - exit(1); - } - if (c == '\\') { - c = nextc(); - if (c == '\n') { - sourceline++; - continue; - } - esc_seen = TRUE; - tokadd('\\'); - } - if (c == EOF) { - pushback(); - yyerror("unterminated string"); - exit(1); - } - tokadd(c); - } - yylval.nodeval = make_str_node(tokstart, - tok - tokstart, esc_seen ? SCAN : 0); - yylval.nodeval->flags |= PERM; - return lasttok = YSTRING; - - case '-': - if ((c = nextc()) == '=') { - yylval.nodetypeval = Node_assign_minus; - return lasttok = ASSIGNOP; - } - if (c == '-') - return lasttok = DECREMENT; - pushback(); - return lasttok = '-'; - - case '.': - c = nextc(); - pushback(); - if (! isdigit(c)) - return lasttok = '.'; - else - c = '.'; - /* FALL THROUGH */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - /* It's a number */ - for (;;) { - int gotnumber = FALSE; - - tokadd(c); - switch (c) { -#ifdef BITOPS - case 'x': - case 'X': - if (do_traditional) - goto done; - if (tok == tokstart + 2) - inhex = TRUE; - break; -#endif /* BITOTS */ - case '.': - if (seen_point) { - gotnumber = TRUE; - break; - } - seen_point = TRUE; - break; - case 'e': - case 'E': - if (inhex) - break; - if (seen_e) { - gotnumber = TRUE; - break; - } - seen_e = TRUE; - if ((c = nextc()) == '-' || c == '+') - tokadd(c); - else - pushback(); - break; -#ifdef BITOPS - case 'a': - case 'A': - case 'b': - case 'B': - case 'c': - case 'C': - case 'D': - case 'd': - case 'f': - case 'F': - if (do_traditional || ! inhex) - goto done; - /* fall through */ -#endif - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - break; - default: - done: - gotnumber = TRUE; - } - if (gotnumber) - break; - c = nextc(); - } - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - tokadd('\0'); -#ifdef BITOPS - if (! do_traditional && isnondecimal(tokstart)) - yylval.nodeval = make_number(nondec2awknum(tokstart, strlen(tokstart))); - else -#endif /* BITOPS */ - yylval.nodeval = make_number(atof(tokstart)); - yylval.nodeval->flags |= PERM; - return lasttok = YNUMBER; - - case '&': - if ((c = nextc()) == '&') { - yylval.nodetypeval = Node_and; - allow_newline(); - want_assign = FALSE; - return lasttok = LEX_AND; - } - pushback(); - return lasttok = '&'; - - case '|': - if ((c = nextc()) == '|') { - yylval.nodetypeval = Node_or; - allow_newline(); - want_assign = FALSE; - return lasttok = LEX_OR; - } - pushback(); - return lasttok = '|'; - } - - if (c != '_' && ! isalpha(c)) { - yyerror("Invalid char '%c' in expression\n", c); - exit(1); - } - - /* it's some type of name-type-thing. Find its length. */ - tok = tokstart; - while (is_identchar(c)) { - tokadd(c); - c = nextc(); - } - tokadd('\0'); - emalloc(tokkey, char *, tok - tokstart, "yylex"); - memcpy(tokkey, tokstart, tok - tokstart); - if (c != EOF) - pushback(); - else if (do_lint && ! eof_warned) { - warning("source file does not end in newline"); - eof_warned = TRUE; - } - - /* See if it is a special token. */ - low = 0; - high = (sizeof(tokentab) / sizeof(tokentab[0])) - 1; - while (low <= high) { - int i; - - mid = (low + high) / 2; - c = *tokstart - tokentab[mid].operator[0]; - i = c ? c : strcmp(tokstart, tokentab[mid].operator); - - if (i < 0) /* token < mid */ - high = mid - 1; - else if (i > 0) /* token > mid */ - low = mid + 1; - else { - if (do_lint) { - if (tokentab[mid].flags & GAWKX) - warning("%s() is a gawk extension", - tokentab[mid].operator); - if (tokentab[mid].flags & RESX) - warning("%s() is a Bell Labs extension", - tokentab[mid].operator); - if (tokentab[mid].flags & NOT_POSIX) - warning("POSIX does not allow %s", - tokentab[mid].operator); - } - if (do_lint_old && (tokentab[mid].flags & NOT_OLD)) - warning("%s is not supported in old awk", - tokentab[mid].operator); - if ((do_traditional && (tokentab[mid].flags & GAWKX)) - || (do_posix && (tokentab[mid].flags & NOT_POSIX))) - break; - if (tokentab[mid].class == LEX_BUILTIN - || tokentab[mid].class == LEX_LENGTH - ) - yylval.lval = mid; - else - yylval.nodetypeval = tokentab[mid].value; - - free(tokkey); - return lasttok = tokentab[mid].class; - } - } - - yylval.sval = tokkey; - if (*lexptr == '(') - return lasttok = FUNC_CALL; - else { - want_assign = TRUE; - return lasttok = NAME; - } -} - -/* node_common --- common code for allocating a new node */ - -static NODE * -node_common(op) -NODETYPE op; -{ - register NODE *r; - - getnode(r); - r->type = op; - r->flags = MALLOC; - /* if lookahead is NL, lineno is 1 too high */ - if (lexeme && *lexeme == '\n') - r->source_line = sourceline - 1; - else - r->source_line = sourceline; - r->source_file = source; - return r; -} - -/* node --- allocates a node with defined lnode and rnode. */ - -NODE * -node(left, op, right) -NODE *left, *right; -NODETYPE op; -{ - register NODE *r; - - r = node_common(op); - r->lnode = left; - r->rnode = right; - return r; -} - -/* snode --- allocate a node with defined subnode and proc for builtin - functions. Checks for arg. count and supplies defaults where - possible. */ - -static NODE * -snode(subn, op, idx) -NODETYPE op; -int idx; -NODE *subn; -{ - register NODE *r; - register NODE *n; - int nexp = 0; - int args_allowed; - - r = node_common(op); - - /* traverse expression list to see how many args. given */ - for (n = subn; n != NULL; n = n->rnode) { - nexp++; - if (nexp > 3) - break; - } - - /* check against how many args. are allowed for this builtin */ - args_allowed = tokentab[idx].flags & ARGS; - if (args_allowed && (args_allowed & A(nexp)) == 0) - fatal("%s() cannot have %d argument%c", - tokentab[idx].operator, nexp, nexp == 1 ? ' ' : 's'); - - r->proc = tokentab[idx].ptr; - - /* special case processing for a few builtins */ - /* - * FIXME: go through these to make sure that everything done - * here is really right. Move anything that's not into - * the corresponding routine. - */ - if (nexp == 0 && r->proc == do_length) { - subn = node(node(make_number(0.0), Node_field_spec, (NODE *) NULL), - Node_expression_list, - (NODE *) NULL); - } else if (r->proc == do_match) { - if (subn->rnode->lnode->type != Node_regex) - subn->rnode->lnode = mk_rexp(subn->rnode->lnode); - } else if (r->proc == do_sub || r->proc == do_gsub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 2) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); - else if (subn->rnode->rnode->lnode->type == Node_val) { - if (do_lint) - warning("string literal as last arg of substitute"); - } else if (! isassignable(subn->rnode->rnode->lnode)) - yyerror("%s third parameter is not a changeable object", - r->proc == do_sub ? "sub" : "gsub"); - } else if (r->proc == do_gensub) { - if (subn->lnode->type != Node_regex) - subn->lnode = mk_rexp(subn->lnode); - if (nexp == 3) - append_right(subn, node(node(make_number(0.0), - Node_field_spec, - (NODE *) NULL), - Node_expression_list, - (NODE *) NULL)); - } else if (r->proc == do_split) { - if (nexp == 2) - append_right(subn, - node(FS_node, Node_expression_list, (NODE *) NULL)); - n = subn->rnode->rnode->lnode; - if (n->type != Node_regex) - subn->rnode->rnode->lnode = mk_rexp(n); - if (nexp == 2) - subn->rnode->rnode->lnode->re_flags |= FS_DFLT; - } - - r->subnode = subn; - return r; -} - -/* - * mkrangenode: - * This allocates a Node_line_range node with defined condpair and - * zeroes the trigger word to avoid the temptation of assuming that calling - * 'node( foo, Node_line_range, 0)' will properly initialize 'triggered'. - * Otherwise like node(). - */ - -static NODE * -mkrangenode(cpair) -NODE *cpair; -{ - register NODE *r; - - getnode(r); - r->type = Node_line_range; - r->condpair = cpair; - r->triggered = FALSE; - return r; -} - -/* make_for_loop --- build a for loop */ - -static NODE * -make_for_loop(init, cond, incr) -NODE *init, *cond, *incr; -{ - register FOR_LOOP_HEADER *r; - NODE *n; - - emalloc(r, FOR_LOOP_HEADER *, sizeof(FOR_LOOP_HEADER), "make_for_loop"); - getnode(n); - n->type = Node_illegal; - r->init = init; - r->cond = cond; - r->incr = incr; - n->sub.nodep.r.hd = r; - return n; -} - -/* dup_parms --- return TRUE if there are duplicate parameters */ - -static int -dup_parms(func) -NODE *func; -{ - register NODE *np; - char *fname, **names; - int count, i, j, dups; - NODE *params; - - if (func == NULL) /* error earlier */ - return TRUE; - - fname = func->param; - count = func->param_cnt; - params = func->rnode; - - if (count == 0) /* no args, no problem */ - return FALSE; - - if (params == NULL) /* error earlier */ - return TRUE; - - emalloc(names, char **, count * sizeof(char *), "dup_parms"); - - i = 0; - for (np = params; np != NULL; np = np->rnode) { - if (np->param == NULL) { /* error earlier, give up, go home */ - free(names); - return TRUE; - } - names[i++] = np->param; - } - - dups = 0; - for (i = 1; i < count; i++) { - for (j = 0; j < i; j++) { - if (strcmp(names[i], names[j]) == 0) { - dups++; - error( - "function `%s': parameter #%d, `%s', duplicates parameter #%d", - fname, i+1, names[j], j+1); - } - } - } - - free(names); - return (dups > 0 ? TRUE : FALSE); -} - -/* - * install: - * Install a name in the symbol table, even if it is already there. - * Caller must check against redefinition if that is desired. - */ - -NODE * -install(name, value) -char *name; -NODE *value; -{ - register NODE *hp; - register size_t len; - register int bucket; - - len = strlen(name); - bucket = hash(name, len, (unsigned long) HASHSIZE); - getnode(hp); - hp->type = Node_hashnode; - hp->hnext = variables[bucket]; - variables[bucket] = hp; - hp->hlength = len; - hp->hvalue = value; - hp->hname = name; - hp->hvalue->vname = name; - return hp->hvalue; -} - -/* lookup --- find the most recent hash node for name installed by install */ - -NODE * -lookup(name) -const char *name; -{ - register NODE *bucket; - register size_t len; - - len = strlen(name); - for (bucket = variables[hash(name, len, (unsigned long) HASHSIZE)]; - bucket != NULL; bucket = bucket->hnext) - if (bucket->hlength == len && STREQN(bucket->hname, name, len)) - return bucket->hvalue; - - return NULL; -} - -/* - * append_right: - * Add new to the rightmost branch of LIST. This uses n^2 time, so we make - * a simple attempt at optimizing it. - */ - -static NODE * -append_right(list, new) -NODE *list, *new; -{ - register NODE *oldlist; - static NODE *savefront = NULL, *savetail = NULL; - - if (list == NULL || new == NULL) - return list; - - oldlist = list; - if (savefront == oldlist) { - savetail = savetail->rnode = new; - return oldlist; - } else - savefront = oldlist; - while (list->rnode != NULL) - list = list->rnode; - savetail = list->rnode = new; - return oldlist; -} - -/* - * func_install: - * check if name is already installed; if so, it had better have Null value, - * in which case def is added as the value. Otherwise, install name with def - * as value. - */ - -static void -func_install(params, def) -NODE *params; -NODE *def; -{ - NODE *r; - NODE *n; - - /* check for function foo(foo) { ... }. bleh. */ - for (n = params->rnode; n != NULL; n = n->rnode) { - if (strcmp(n->param, params->param) == 0) - fatal("function `%s': can't use function name as parameter name", - params->param); - } - - pop_params(params->rnode); - pop_var(params, FALSE); - r = lookup(params->param); - if (r != NULL) { - fatal("function name `%s' previously defined", params->param); - } else - (void) install(params->param, node(params, Node_func, def)); - - func_use(params->param, FUNC_DEFINE); -} - -/* pop_var --- remove a variable from the symbol table */ - -static void -pop_var(np, freeit) -NODE *np; -int freeit; -{ - register NODE *bucket, **save; - register size_t len; - char *name; - - name = np->param; - len = strlen(name); - save = &(variables[hash(name, len, (unsigned long) HASHSIZE)]); - for (bucket = *save; bucket != NULL; bucket = bucket->hnext) { - if (len == bucket->hlength && STREQN(bucket->hname, name, len)) { - *save = bucket->hnext; - freenode(bucket); - if (freeit) - free(np->param); - return; - } - save = &(bucket->hnext); - } -} - -/* pop_params --- remove list of function parameters from symbol table */ - -/* - * pop parameters out of the symbol table. do this in reverse order to - * avoid reading freed memory if there were duplicated parameters. - */ -static void -pop_params(params) -NODE *params; -{ - if (params == NULL) - return; - pop_params(params->rnode); - pop_var(params, TRUE); -} - -/* make_param --- make NAME into a function parameter */ - -static NODE * -make_param(name) -char *name; -{ - NODE *r; - - getnode(r); - r->type = Node_param_list; - r->rnode = NULL; - r->param = name; - r->param_cnt = param_counter++; - return (install(name, r)); -} - -static struct fdesc { - char *name; - short used; - short defined; - struct fdesc *next; -} *ftable[HASHSIZE]; - -/* func_use --- track uses and definitions of functions */ - -static void -func_use(name, how) -char *name; -enum defref how; -{ - struct fdesc *fp; - int len; - int ind; - - len = strlen(name); - ind = hash(name, len, HASHSIZE); - - for (fp = ftable[ind]; fp != NULL; fp = fp->next) { - if (strcmp(fp->name, name) == 0) { - if (how == FUNC_DEFINE) - fp->defined++; - else - fp->used++; - return; - } - } - - /* not in the table, fall through to allocate a new one */ - - emalloc(fp, struct fdesc *, sizeof(struct fdesc), "func_use"); - memset(fp, '\0', sizeof(struct fdesc)); - emalloc(fp->name, char *, len + 1, "func_use"); - strcpy(fp->name, name); - if (how == FUNC_DEFINE) - fp->defined++; - else - fp->used++; - fp->next = ftable[ind]; - ftable[ind] = fp; -} - -/* check_funcs --- verify functions that are called but not defined */ - -static void -check_funcs() -{ - struct fdesc *fp, *next; - int i; - - for (i = 0; i < HASHSIZE; i++) { - for (fp = ftable[i]; fp != NULL; fp = fp->next) { -#ifdef REALLYMEAN - /* making this the default breaks old code. sigh. */ - if (fp->defined == 0) { - error( - "function `%s' called but never defined", fp->name); - errcount++; - } -#else - if (do_lint && fp->defined == 0) - warning( - "function `%s' called but never defined", fp->name); -#endif - if (do_lint && fp->used == 0) { - warning("function `%s' defined but never called", - fp->name); - } - } - } - - /* now let's free all the memory */ - for (i = 0; i < HASHSIZE; i++) { - for (fp = ftable[i]; fp != NULL; fp = next) { - next = fp->next; - free(fp->name); - free(fp); - } - } -} - -/* param_sanity --- look for parameters that are regexp constants */ - -static void -param_sanity(arglist) -NODE *arglist; -{ - NODE *argp, *arg; - int i; - - for (i = 1, argp = arglist; argp != NULL; argp = argp->rnode, i++) { - arg = argp->lnode; - if (arg->type == Node_regex) - warning("regexp constant for parameter #%d yields boolean value", i); - } -} - -/* variable --- make sure NAME is in the symbol table */ - -NODE * -variable(name, can_free, type) -char *name; -int can_free; -NODETYPE type; -{ - register NODE *r; - static int env_loaded = FALSE; - - if (! env_loaded && STREQ(name, "ENVIRON")) { - load_environ(); - env_loaded = TRUE; - } - if ((r = lookup(name)) == NULL) - r = install(name, node(Nnull_string, type, (NODE *) NULL)); - else if (can_free) - free(name); - return r; -} - -/* mk_rexp --- make a regular expression constant */ - -static NODE * -mk_rexp(exp) -NODE *exp; -{ - NODE *n; - - if (exp->type == Node_regex) - return exp; - - getnode(n); - n->type = Node_regex; - n->re_exp = exp; - n->re_text = NULL; - n->re_reg = NULL; - n->re_flags = 0; - n->re_cnt = 1; - return n; -} - -/* isnoeffect --- when used as a statement, has no side effects */ - -/* - * To be completely general, we should recursively walk the parse - * tree, to make sure that all the subexpressions also have no effect. - * Instead, we just weaken the actual warning that's printed, up above - * in the grammar. - */ - -static int -isnoeffect(type) -NODETYPE type; -{ - switch (type) { - case Node_times: - case Node_quotient: - case Node_mod: - case Node_plus: - case Node_minus: - case Node_subscript: - case Node_concat: - case Node_exp: - case Node_unary_minus: - case Node_field_spec: - case Node_and: - case Node_or: - case Node_equal: - case Node_notequal: - case Node_less: - case Node_greater: - case Node_leq: - case Node_geq: - case Node_match: - case Node_nomatch: - case Node_not: - case Node_val: - case Node_in_array: - case Node_NF: - case Node_NR: - case Node_FNR: - case Node_FS: - case Node_RS: - case Node_FIELDWIDTHS: - case Node_IGNORECASE: - case Node_OFS: - case Node_ORS: - case Node_OFMT: - case Node_CONVFMT: - return TRUE; - default: - break; /* keeps gcc -Wall happy */ - } - - return FALSE; -} - -/* isassignable --- can this node be assigned to? */ - -static int -isassignable(n) -register NODE *n; -{ - switch (n->type) { - case Node_var: - case Node_FIELDWIDTHS: - case Node_RS: - case Node_FS: - case Node_FNR: - case Node_NR: - case Node_NF: - case Node_IGNORECASE: - case Node_OFMT: - case Node_CONVFMT: - case Node_ORS: - case Node_OFS: - case Node_field_spec: - case Node_subscript: - return TRUE; - case Node_param_list: - return ((n->flags & FUNC) == 0); /* ok if not func name */ - default: - break; /* keeps gcc -Wall happy */ - } - return FALSE; -} - -/* for debugging */ -NODE * -stopme(tree) -NODE *tree; -{ - return tmp_number((AWKNUM) 0.0); -} diff --git a/contrib/awk/builtin.c b/contrib/awk/builtin.c deleted file mode 100644 index dcf3ac3..0000000 --- a/contrib/awk/builtin.c +++ /dev/null @@ -1,2499 +0,0 @@ -/* - * builtin.c - Builtin functions and various utility procedures - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $FreeBSD$ - */ - - -#include "awk.h" -#if defined(HAVE_FCNTL_H) -#include -#endif -#undef HUGE -#undef CHARBITS -#undef INTBITS -#include -#ifndef __FreeBSD__ -#include "random.h" - -/* can declare these, since we always use the random shipped with gawk */ -extern char *initstate P((unsigned long seed, char *state, long n)); -extern char *setstate P((char *state)); -extern long random P((void)); -extern void srandom P((unsigned long seed)); -#endif - -extern NODE **fields_arr; -extern int output_is_tty; - -static NODE *sub_common P((NODE *tree, int how_many, int backdigs)); - -#ifdef _CRAY -/* Work around a problem in conversion of doubles to exact integers. */ -#include -#define Floor(n) floor((n) * (1.0 + DBL_EPSILON)) -#define Ceil(n) ceil((n) * (1.0 + DBL_EPSILON)) - -/* Force the standard C compiler to use the library math functions. */ -extern double exp(double); -double (*Exp)() = exp; -#define exp(x) (*Exp)(x) -extern double log(double); -double (*Log)() = log; -#define log(x) (*Log)(x) -#else -#define Floor(n) floor(n) -#define Ceil(n) ceil(n) -#endif - -#define DEFAULT_G_PRECISION 6 - -#ifdef GFMT_WORKAROUND -/* semi-temporary hack, mostly to gracefully handle VMS */ -static void sgfmt P((char *buf, const char *format, int alt, - int fwidth, int precision, double value)); -#endif /* GFMT_WORKAROUND */ - -/* - * Since we supply the version of random(), we know what - * value to use here. - */ -#define GAWK_RANDOM_MAX 0x7fffffffL - -static void efwrite P((const void *ptr, size_t size, size_t count, FILE *fp, - const char *from, struct redirect *rp, int flush)); - -/* efwrite --- like fwrite, but with error checking */ - -static void -efwrite(const void *ptr, - size_t size, - size_t count, - FILE *fp, - const char *from, - struct redirect *rp, - int flush) -{ - errno = 0; - if (fwrite(ptr, size, count, fp) != count) - goto wrerror; - if (flush - && ((fp == stdout && output_is_tty) - || (rp != NULL && (rp->flag & RED_NOBUF)))) { - fflush(fp); - if (ferror(fp)) - goto wrerror; - } - return; - -wrerror: - fatal(_("%s to \"%s\" failed (%s)"), from, - rp ? rp->value : _("standard output"), - errno ? strerror(errno) : _("reason unknown")); -} - -/* do_exp --- exponential function */ - -NODE * -do_exp(NODE *tree) -{ - NODE *tmp; - double d, res; - - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("exp: received non-numeric argument")); - d = force_number(tmp); - free_temp(tmp); - errno = 0; - res = exp(d); - if (errno == ERANGE) - warning(_("exp: argument %g is out of range"), d); - return tmp_number((AWKNUM) res); -} - -/* stdfile --- return fp for a standard file */ - -/* - * This function allows `fflush("/dev/stdout")' to work. - * The other files will be available via getredirect(). - * /dev/stdin is not included, since fflush is only for output. - */ - -static FILE * -stdfile(char *name, size_t len) -{ - if (len == 11) { - if (STREQN(name, "/dev/stderr", 11)) - return stderr; - else if (STREQN(name, "/dev/stdout", 11)) - return stdout; - } - - return NULL; -} - -/* do_fflush --- flush output, either named file or pipe or everything */ - -NODE * -do_fflush(NODE *tree) -{ - struct redirect *rp; - NODE *tmp; - FILE *fp; - int status = 0; - char *file; - - /* fflush() --- flush stdout */ - if (tree == NULL) { - status = fflush(stdout); - return tmp_number((AWKNUM) status); - } - - tmp = tree_eval(tree->lnode); - tmp = force_string(tmp); - file = tmp->stptr; - - /* fflush("") --- flush all */ - if (tmp->stlen == 0) { - status = flush_io(); - free_temp(tmp); - return tmp_number((AWKNUM) status); - } - - rp = getredirect(tmp->stptr, tmp->stlen); - status = -1; - if (rp != NULL) { - if ((rp->flag & (RED_WRITE|RED_APPEND)) == 0) { - if (rp->flag & RED_PIPE) - warning(_("fflush: cannot flush: pipe `%s' opened for reading, not writing"), - file); - else - warning(_("fflush: cannot flush: file `%s' opened for reading, not writing"), - file); - free_temp(tmp); - return tmp_number((AWKNUM) status); - } - fp = rp->fp; - if (fp != NULL) - status = fflush(fp); - } else if ((fp = stdfile(tmp->stptr, tmp->stlen)) != NULL) { - status = fflush(fp); - } else { - status = -1; - warning(_("fflush: `%s' is not an open file, pipe or co-process"), file); - } - free_temp(tmp); - return tmp_number((AWKNUM) status); -} - -/* do_index --- find index of a string */ - -NODE * -do_index(NODE *tree) -{ - NODE *s1, *s2; - register char *p1, *p2; - register size_t l1, l2; - long ret; - - - s1 = tree_eval(tree->lnode); - s2 = tree_eval(tree->rnode->lnode); - if (do_lint) { - if ((s1->flags & (STRING|STR)) == 0) - lintwarn(_("index: received non-string first argument")); - if ((s2->flags & (STRING|STR)) == 0) - lintwarn(_("index: received non-string second argument")); - } - force_string(s1); - force_string(s2); - p1 = s1->stptr; - p2 = s2->stptr; - l1 = s1->stlen; - l2 = s2->stlen; - ret = 0; - - /* IGNORECASE will already be false if posix */ - if (IGNORECASE) { - while (l1 > 0) { - if (l2 > l1) - break; - if (casetable[(unsigned char)*p1] == casetable[(unsigned char)*p2] - && (l2 == 1 || strncasecmp(p1, p2, l2) == 0)) { - ret = 1 + s1->stlen - l1; - break; - } - l1--; - p1++; - } - } else { - while (l1 > 0) { - if (l2 > l1) - break; - if (*p1 == *p2 - && (l2 == 1 || STREQN(p1, p2, l2))) { - ret = 1 + s1->stlen - l1; - break; - } - l1--; - p1++; - } - } - free_temp(s1); - free_temp(s2); - return tmp_number((AWKNUM) ret); -} - -/* double_to_int --- convert double to int, used several places */ - -double -double_to_int(double d) -{ - if (d >= 0) - d = Floor(d); - else - d = Ceil(d); - return d; -} - -/* do_int --- convert double to int for awk */ - -NODE * -do_int(NODE *tree) -{ - NODE *tmp; - double d; - - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("int: received non-numeric argument")); - d = force_number(tmp); - d = double_to_int(d); - free_temp(tmp); - return tmp_number((AWKNUM) d); -} - -/* do_length --- length of a string or $0 */ - -NODE * -do_length(NODE *tree) -{ - NODE *tmp; - size_t len; - - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (STRING|STR)) == 0) - lintwarn(_("length: received non-string argument")); - len = force_string(tmp)->stlen; - free_temp(tmp); - return tmp_number((AWKNUM) len); -} - -/* do_log --- the log function */ - -NODE * -do_log(NODE *tree) -{ - NODE *tmp; - double d, arg; - - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("log: received non-numeric argument")); - arg = (double) force_number(tmp); - if (arg < 0.0) - warning(_("log: received negative argument %g"), arg); - d = log(arg); - free_temp(tmp); - return tmp_number((AWKNUM) d); -} - -/* - * format_tree() formats nodes of a tree, starting with a left node, - * and accordingly to a fmt_string providing a format like in - * printf family from C library. Returns a string node which value - * is a formatted string. Called by sprintf function. - * - * It is one of the uglier parts of gawk. Thanks to Michal Jaegermann - * for taming this beast and making it compatible with ANSI C. - */ - -NODE * -format_tree( - const char *fmt_string, - int n0, - register NODE *carg, - int num_args) -{ -/* copy 'l' bytes from 's' to 'obufout' checking for space in the process */ -/* difference of pointers should be of ptrdiff_t type, but let us be kind */ -#define bchunk(s, l) if (l) { \ - while ((l) > ofre) { \ - long olen = obufout - obuf; \ - erealloc(obuf, char *, osiz * 2, "format_tree"); \ - ofre += osiz; \ - osiz *= 2; \ - obufout = obuf + olen; \ - } \ - memcpy(obufout, s, (size_t) (l)); \ - obufout += (l); \ - ofre -= (l); \ -} - -/* copy one byte from 's' to 'obufout' checking for space in the process */ -#define bchunk_one(s) { \ - if (ofre <= 0) { \ - long olen = obufout - obuf; \ - erealloc(obuf, char *, osiz * 2, "format_tree"); \ - ofre += osiz; \ - osiz *= 2; \ - obufout = obuf + olen; \ - } \ - *obufout++ = *s; \ - --ofre; \ -} - -/* Is there space for something L big in the buffer? */ -#define chksize(l) if ((l) > ofre) { \ - long olen = obufout - obuf; \ - erealloc(obuf, char *, osiz * 2, "format_tree"); \ - obufout = obuf + olen; \ - ofre += osiz; \ - osiz *= 2; \ -} - - static NODE **the_args = 0; - static size_t args_size = 0; - size_t cur_arg = 0; - - auto NODE **save_args = 0; - auto size_t save_args_size = 0; - static int call_level = 0; - - NODE *r; - int i; - int toofew = FALSE; - char *obuf, *obufout; - size_t osiz, ofre; - char *chbuf; - const char *s0, *s1; - int cs1; - NODE *arg; - long fw, prec, argnum; - int used_dollar; - int lj, alt, big, bigbig, small, have_prec, need_format; - long *cur = NULL; -#ifdef sun386 /* Can't cast unsigned (int/long) from ptr->value */ - long tmp_uval; /* on 386i 4.0.1 C compiler -- it just hangs */ -#endif - unsigned long uval; - int sgn; - int base = 0; - char cpbuf[30]; /* if we have numbers bigger than 30 */ - char *cend = &cpbuf[30];/* chars, we lose, but seems unlikely */ - char *cp; - char *fill; - double tmpval; - char signchar = FALSE; - size_t len; - int zero_flag = FALSE; - static char sp[] = " "; - static char zero_string[] = "0"; - static char lchbuf[] = "0123456789abcdef"; - static char Uchbuf[] = "0123456789ABCDEF"; - -#define INITIAL_OUT_SIZE 512 - emalloc(obuf, char *, INITIAL_OUT_SIZE, "format_tree"); - obufout = obuf; - osiz = INITIAL_OUT_SIZE; - ofre = osiz - 1; - - /* - * Icky problem. If the args make a nested call to printf/sprintf, - * we end up clobbering the static variable `the_args'. Not good. - * We don't just malloc and free the_args each time, since most of the - * time there aren't nested calls. But if this is a nested call, - * save the memory pointed to by the_args and allocate a fresh - * array. Then free it on end. - */ - if (++call_level > 1) { /* nested */ - save_args = the_args; - save_args_size = args_size; - - args_size = 0; /* force fresh allocation */ - } - - if (args_size == 0) { - /* allocate array */ - emalloc(the_args, NODE **, (num_args+1) * sizeof(NODE *), "format_tree"); - args_size = num_args + 1; - } else if (num_args + 1 > args_size) { - /* grow it */ - erealloc(the_args, NODE **, (num_args+1) * sizeof(NODE *), "format_tree"); - args_size = num_args + 1; - } - - - /* fill it in */ - /* - * We ignore the_args[0] since format strings use - * 1-based numbers to indicate the arguments. It's - * easiest to just convert to int and index, without - * having to remember to subtract 1. - */ - memset(the_args, '\0', num_args * sizeof(NODE *)); - for (i = 1; carg != NULL; i++, carg = carg->rnode) { - NODE *tmp; - - /* Here lies the wumpus's other brother. R.I.P. */ - tmp = tree_eval(carg->lnode); - the_args[i] = dupnode(tmp); - free_temp(tmp); - } - assert(i == num_args); - cur_arg = 1; - - /* - * Check first for use of `count$'. - * If plain argument retrieval was used earlier, choke. - * Otherwise, return the requested argument. - * If not `count$' now, but it was used earlier, choke. - * If this format is more than total number of args, choke. - * Otherwise, return the current argument. - */ -#define parse_next_arg() { \ - if (argnum > 0) { \ - if (cur_arg > 1) \ - fatal(_("must use `count$' on all formats or none")); \ - arg = the_args[argnum]; \ - } else if (used_dollar) { \ - fatal(_("must use `count$' on all formats or none")); \ - arg = 0; /* shutup the compiler */ \ - } else if (cur_arg >= num_args) { \ - arg = 0; /* shutup the compiler */ \ - toofew = TRUE; \ - break; \ - } else { \ - arg = the_args[cur_arg]; \ - cur_arg++; \ - } \ -} - - need_format = FALSE; - used_dollar = FALSE; - - s0 = s1 = fmt_string; - while (n0-- > 0) { - if (*s1 != '%') { - s1++; - continue; - } - need_format = TRUE; - bchunk(s0, s1 - s0); - s0 = s1; - cur = &fw; - fw = 0; - prec = 0; - argnum = 0; - have_prec = FALSE; - signchar = FALSE; - zero_flag = FALSE; - lj = alt = big = bigbig = small = FALSE; - fill = sp; - cp = cend; - chbuf = lchbuf; - s1++; - -retry: - if (n0-- <= 0) /* ran out early! */ - break; - - switch (cs1 = *s1++) { - case (-1): /* dummy case to allow for checking */ -check_pos: - if (cur != &fw) - break; /* reject as a valid format */ - goto retry; - case '%': - need_format = FALSE; - bchunk_one("%"); - s0 = s1; - break; - - case '0': - /* - * Only turn on zero_flag if we haven't seen - * the field width or precision yet. Otherwise, - * screws up floating point formatting. - */ - if (cur == & fw) - zero_flag = TRUE; - if (lj) - goto retry; - /* FALL through */ - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (cur == NULL) - break; - if (prec >= 0) - *cur = cs1 - '0'; - /* - * with a negative precision *cur is already set - * to -1, so it will remain negative, but we have - * to "eat" precision digits in any case - */ - while (n0 > 0 && *s1 >= '0' && *s1 <= '9') { - --n0; - *cur = *cur * 10 + *s1++ - '0'; - } - if (prec < 0) /* negative precision is discarded */ - have_prec = FALSE; - if (cur == &prec) - cur = NULL; - if (n0 == 0) /* badly formatted control string */ - continue; - goto retry; - case '$': - if (do_traditional) - fatal(_("`$' is not permitted in awk formats")); - if (cur == &fw) { - argnum = fw; - fw = 0; - used_dollar = TRUE; - if (argnum <= 0) - fatal(_("arg count with `$' must be > 0")); - if (argnum >= num_args) - fatal(_("arg count %d greater than total number of supplied arguments"), argnum); - } else - fatal(_("`$' not permitted after period in format")); - goto retry; - case '*': - if (cur == NULL) - break; - if (! do_traditional && ISDIGIT(*s1)) { - int val = 0; - - for (; n0 > 0 && *s1 && ISDIGIT(*s1); s1++, n0--) { - val *= 10; - val += *s1 - '0'; - } - if (*s1 != '$') { - fatal(_("no `$' supplied for positional field width or precision")); - } else { - s1++; - n0--; - } - - arg = the_args[val]; - } else { - parse_next_arg(); - } - *cur = force_number(arg); - if (*cur < 0 && cur == &fw) { - *cur = -*cur; - lj++; - } - if (cur == &prec) { - if (*cur >= 0) - have_prec = TRUE; - else - have_prec = FALSE; - cur = NULL; - } - goto retry; - case ' ': /* print ' ' or '-' */ - /* 'space' flag is ignored */ - /* if '+' already present */ - if (signchar != FALSE) - goto check_pos; - /* FALL THROUGH */ - case '+': /* print '+' or '-' */ - signchar = cs1; - goto check_pos; - case '-': - if (prec < 0) - break; - if (cur == &prec) { - prec = -1; - goto retry; - } - fill = sp; /* if left justified then other */ - lj++; /* filling is ignored */ - goto check_pos; - case '.': - if (cur != &fw) - break; - cur = ≺ - have_prec = TRUE; - goto retry; - case '#': - alt = TRUE; - goto check_pos; - case 'l': - if (big) - break; - else { - static int warned = FALSE; - - if (do_lint && ! warned) { - lintwarn(_("`l' is meaningless in awk formats; ignored")); - warned = TRUE; - } - if (do_posix) - fatal(_("`l' is not permitted in POSIX awk formats")); - } - big = TRUE; - goto retry; - case 'L': - if (bigbig) - break; - else { - static int warned = FALSE; - - if (do_lint && ! warned) { - lintwarn(_("`L' is meaningless in awk formats; ignored")); - warned = TRUE; - } - if (do_posix) - fatal(_("`L' is not permitted in POSIX awk formats")); - } - bigbig = TRUE; - goto retry; - case 'h': - if (small) - break; - else { - static int warned = FALSE; - - if (do_lint && ! warned) { - lintwarn(_("`h' is meaningless in awk formats; ignored")); - warned = TRUE; - } - if (do_posix) - fatal(_("`h' is not permitted in POSIX awk formats")); - } - small = TRUE; - goto retry; - case 'c': - need_format = FALSE; - if (zero_flag && ! lj) - fill = zero_string; - parse_next_arg(); - /* user input that looks numeric is numeric */ - if ((arg->flags & (MAYBE_NUM|NUMBER)) == MAYBE_NUM) - (void) force_number(arg); - if (arg->flags & NUMBER) { -#ifdef sun386 - tmp_uval = arg->numbr; - uval = (unsigned long) tmp_uval; -#else - uval = (unsigned long) arg->numbr; -#endif - cpbuf[0] = uval; - prec = 1; - cp = cpbuf; - goto pr_tail; - } - if (have_prec == FALSE) - prec = 1; - else if (prec > arg->stlen) - prec = arg->stlen; - cp = arg->stptr; - goto pr_tail; - case 's': - need_format = FALSE; - if (zero_flag && ! lj) - fill = zero_string; - parse_next_arg(); - arg = force_string(arg); - if (! have_prec || prec > arg->stlen) - prec = arg->stlen; - cp = arg->stptr; - goto pr_tail; - case 'd': - case 'i': - need_format = FALSE; - parse_next_arg(); - tmpval = force_number(arg); - - /* - * ``The result of converting a zero value with a - * precision of zero is no characters.'' - */ - if (have_prec && prec == 0 && tmpval == 0) - goto pr_tail; - - if (tmpval < 0) { - if (tmpval < LONG_MIN) - goto out_of_range; - sgn = TRUE; - uval = - (unsigned long) (long) tmpval; - } else { - /* Use !, so that NaNs are out of range. - The cast avoids a SunOS 4.1.x cc bug. */ - if (! (tmpval <= (unsigned long) ULONG_MAX)) - goto out_of_range; - sgn = FALSE; - uval = (unsigned long) tmpval; - } - do { - *--cp = (char) ('0' + uval % 10); - uval /= 10; - } while (uval > 0); - - /* add more output digits to match the precision */ - if (have_prec) { - while (cend - cp < prec) - *--cp = '0'; - } - - if (sgn) - *--cp = '-'; - else if (signchar) - *--cp = signchar; - /* - * When to fill with zeroes is of course not simple. - * First: No zero fill if left-justifying. - * Next: There seem to be two cases: - * A '0' without a precision, e.g. %06d - * A precision with no field width, e.g. %.10d - * Any other case, we don't want to fill with zeroes. - */ - if (! lj - && ((zero_flag && ! have_prec) - || (fw == 0 && have_prec))) - fill = zero_string; - if (prec > fw) - fw = prec; - prec = cend - cp; - if (fw > prec && ! lj && fill != sp - && (*cp == '-' || signchar)) { - bchunk_one(cp); - cp++; - prec--; - fw--; - } - goto pr_tail; - case 'X': - chbuf = Uchbuf; /* FALL THROUGH */ - case 'x': - base += 6; /* FALL THROUGH */ - case 'u': - base += 2; /* FALL THROUGH */ - case 'o': - base += 8; - need_format = FALSE; - parse_next_arg(); - tmpval = force_number(arg); - - /* - * ``The result of converting a zero value with a - * precision of zero is no characters.'' - * - * If I remember the ANSI C standard, though, - * it says that for octal conversions - * the precision is artificially increased - * to add an extra 0 if # is supplied. - * Indeed, in C, - * printf("%#.0o\n", 0); - * prints a single 0. - */ - if (! alt && have_prec && prec == 0 && tmpval == 0) - goto pr_tail; - - if (tmpval < 0) { - if (tmpval < LONG_MIN) - goto out_of_range; - uval = (unsigned long) (long) tmpval; - } else { - /* Use !, so that NaNs are out of range. - The cast avoids a SunOS 4.1.x cc bug. */ - if (! (tmpval <= (unsigned long) ULONG_MAX)) - goto out_of_range; - uval = (unsigned long) tmpval; - } - /* - * When to fill with zeroes is of course not simple. - * First: No zero fill if left-justifying. - * Next: There seem to be two cases: - * A '0' without a precision, e.g. %06d - * A precision with no field width, e.g. %.10d - * Any other case, we don't want to fill with zeroes. - */ - if (! lj - && ((zero_flag && ! have_prec) - || (fw == 0 && have_prec))) - fill = zero_string; - do { - *--cp = chbuf[uval % base]; - uval /= base; - } while (uval > 0); - - /* add more output digits to match the precision */ - if (have_prec) { - while (cend - cp < prec) - *--cp = '0'; - } - - if (alt && tmpval != 0) { - if (base == 16) { - *--cp = cs1; - *--cp = '0'; - if (fill != sp) { - bchunk(cp, 2); - cp += 2; - fw -= 2; - } - } else if (base == 8) - *--cp = '0'; - } - base = 0; - if (prec > fw) - fw = prec; - prec = cend - cp; - pr_tail: - if (! lj) { - while (fw > prec) { - bchunk_one(fill); - fw--; - } - } - bchunk(cp, (int) prec); - while (fw > prec) { - bchunk_one(fill); - fw--; - } - s0 = s1; - break; - - out_of_range: - /* out of range - emergency use of %g format */ - cs1 = 'g'; - goto format_float; - - case 'g': - case 'G': - case 'e': - case 'f': - case 'E': - need_format = FALSE; - parse_next_arg(); - tmpval = force_number(arg); - format_float: - if (! have_prec) - prec = DEFAULT_G_PRECISION; - chksize(fw + prec + 9); /* 9 == slop */ - - cp = cpbuf; - *cp++ = '%'; - if (lj) - *cp++ = '-'; - if (signchar) - *cp++ = signchar; - if (alt) - *cp++ = '#'; - if (zero_flag) - *cp++ = '0'; - strcpy(cp, "*.*"); - cp += 3; - *cp++ = cs1; - *cp = '\0'; -#ifndef GFMT_WORKAROUND - (void) sprintf(obufout, cpbuf, - (int) fw, (int) prec, (double) tmpval); -#else /* GFMT_WORKAROUND */ - if (cs1 == 'g' || cs1 == 'G') - sgfmt(obufout, cpbuf, (int) alt, - (int) fw, (int) prec, (double) tmpval); - else - (void) sprintf(obufout, cpbuf, - (int) fw, (int) prec, (double) tmpval); -#endif /* GFMT_WORKAROUND */ - len = strlen(obufout); - ofre -= len; - obufout += len; - s0 = s1; - break; - default: - break; - } - if (toofew) - fatal("%s\n\t`%s'\n\t%*s%s", - _("not enough arguments to satisfy format string"), - fmt_string, s1 - fmt_string - 2, "", - _("^ ran out for this one")); - } - if (do_lint) { - if (need_format) - lintwarn( - _("[s]printf: format specifier does not have control letter")); - if (carg != NULL) - lintwarn( - _("too many arguments supplied for format string")); - } - bchunk(s0, s1 - s0); - r = make_str_node(obuf, obufout - obuf, ALREADY_MALLOCED); - r->flags |= TEMP; - - for (i = 1; i < num_args; i++) { - unref(the_args[i]); - } - - if (call_level-- > 1) { - free(the_args); - the_args = save_args; - args_size = save_args_size; - } - - return r; -} - -/* do_sprintf --- perform sprintf */ - -NODE * -do_sprintf(NODE *tree) -{ - NODE *r; - NODE *sfmt = force_string(tree_eval(tree->lnode)); - - r = format_tree(sfmt->stptr, sfmt->stlen, tree->rnode, tree->printf_count); - free_temp(sfmt); - return r; -} - -/* do_printf --- perform printf, including redirection */ - -void -do_printf(NODE *tree) -{ - struct redirect *rp = NULL; - register FILE *fp; - - if (tree->lnode == NULL) { - if (do_traditional) { - if (do_lint) - lintwarn(_("printf: no arguments")); - return; /* bwk accepts it silently */ - } - fatal(_("printf: no arguments")); - } - - if (tree->rnode != NULL) { - int errflg; /* not used, sigh */ - - rp = redirect(tree->rnode, &errflg); - if (rp != NULL) { - fp = rp->fp; - if (fp == NULL) - return; - } else - return; - } else - fp = stdout; - tree->lnode->printf_count = tree->printf_count; - tree = do_sprintf(tree->lnode); - efwrite(tree->stptr, sizeof(char), tree->stlen, fp, "printf", rp, TRUE); - if (rp != NULL && (rp->flag & RED_TWOWAY) != 0) - fflush(rp->fp); - free_temp(tree); -} - -/* do_sqrt --- do the sqrt function */ - -NODE * -do_sqrt(NODE *tree) -{ - NODE *tmp; - double arg; - - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("sqrt: received non-numeric argument")); - arg = (double) force_number(tmp); - free_temp(tmp); - if (arg < 0.0) - warning(_("sqrt: called with negative argument %g"), arg); - return tmp_number((AWKNUM) sqrt(arg)); -} - -/* do_substr --- do the substr function */ - -NODE * -do_substr(NODE *tree) -{ - NODE *t1, *t2, *t3; - NODE *r; - register size_t indx; - size_t length; - double d_index, d_length; - - t1 = force_string(tree_eval(tree->lnode)); - t2 = tree_eval(tree->rnode->lnode); - d_index = force_number(t2); - free_temp(t2); - - if (d_index < 1.0) { - if (do_lint) - lintwarn(_("substr: start index %g is invalid, using 1"), - d_index); - d_index = 1; - } - if (do_lint && double_to_int(d_index) != d_index) - lintwarn(_("substr: non-integer start index %g will be truncated"), - d_index); - - indx = d_index - 1; /* awk indices are from 1, C's are from 0 */ - - if (tree->rnode->rnode == NULL) { /* third arg. missing */ - /* use remainder of string */ - length = t1->stlen - indx; - } else { - t3 = tree_eval(tree->rnode->rnode->lnode); - d_length = force_number(t3); - free_temp(t3); - if (d_length <= 0.0) { - if (do_lint) - lintwarn(_("substr: length %g is <= 0"), d_length); - free_temp(t1); - return Nnull_string; - } - if (do_lint && double_to_int(d_length) != d_length) - lintwarn( - _("substr: non-integer length %g will be truncated"), - d_length); - length = d_length; - } - - if (t1->stlen == 0) { - if (do_lint) - lintwarn(_("substr: source string is zero length")); - free_temp(t1); - return Nnull_string; - } - if ((indx + length) > t1->stlen) { - if (do_lint) - lintwarn( - _("substr: length %d at start index %d exceeds length of first argument (%d)"), - length, indx+1, t1->stlen); - length = t1->stlen - indx; - } - if (indx >= t1->stlen) { - if (do_lint) - lintwarn(_("substr: start index %d is past end of string"), - indx+1); - free_temp(t1); - return Nnull_string; - } - r = tmp_string(t1->stptr + indx, length); - free_temp(t1); - return r; -} - -/* do_strftime --- format a time stamp */ - -NODE * -do_strftime(NODE *tree) -{ - NODE *t1, *t2, *ret; - struct tm *tm; - time_t fclock; - char *bufp; - size_t buflen, bufsize; - char buf[BUFSIZ]; - /* FIXME: One day make %d be %e, after C 99 is common. */ - static char def_format[] = "%a %b %d %H:%M:%S %Z %Y"; - char *format; - int formatlen; - - /* set defaults first */ - format = def_format; /* traditional date format */ - formatlen = strlen(format); - (void) time(&fclock); /* current time of day */ - - t1 = t2 = NULL; - if (tree != NULL) { /* have args */ - if (tree->lnode != NULL) { - NODE *tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (STRING|STR)) == 0) - lintwarn(_("strftime: recieved non-string first argument")); - t1 = force_string(tmp); - format = t1->stptr; - formatlen = t1->stlen; - if (formatlen == 0) { - if (do_lint) - lintwarn(_("strftime: received empty format string")); - free_temp(t1); - return tmp_string("", 0); - } - } - - if (tree->rnode != NULL) { - t2 = tree_eval(tree->rnode->lnode); - if (do_lint && (t2->flags & (NUM|NUMBER)) == 0) - lintwarn(_("strftime: recieved non-numeric second argument")); - fclock = (time_t) force_number(t2); - free_temp(t2); - } - } - - tm = localtime(&fclock); - - bufp = buf; - bufsize = sizeof(buf); - for (;;) { - *bufp = '\0'; - buflen = strftime(bufp, bufsize, format, tm); - /* - * buflen can be zero EITHER because there's not enough - * room in the string, or because the control command - * goes to the empty string. Make a reasonable guess that - * if the buffer is 1024 times bigger than the length of the - * format string, it's not failing for lack of room. - * Thanks to Paul Eggert for pointing out this issue. - */ - if (buflen > 0 || bufsize >= 1024 * formatlen) - break; - bufsize *= 2; - if (bufp == buf) - emalloc(bufp, char *, bufsize, "do_strftime"); - else - erealloc(bufp, char *, bufsize, "do_strftime"); - } - ret = tmp_string(bufp, buflen); - if (bufp != buf) - free(bufp); - if (t1) - free_temp(t1); - return ret; -} - -/* do_systime --- get the time of day */ - -NODE * -do_systime(NODE *tree) -{ - time_t lclock; - - (void) time(&lclock); - return tmp_number((AWKNUM) lclock); -} - -/* do_mktime --- turn a time string into a timestamp */ - -NODE * -do_mktime(NODE *tree) -{ - NODE *t1; - struct tm then; - long year; - int month, day, hour, minute, second, count; - int dst = -1; /* default is unknown */ - time_t then_stamp; - char save; - - t1 = tree_eval(tree->lnode); - if (do_lint && (t1->flags & (STRING|STR)) == 0) - lintwarn(_("mktime: received non-string argument")); - t1 = force_string(t1); - - save = t1->stptr[t1->stlen]; - t1->stptr[t1->stlen] = '\0'; - - count = sscanf(t1->stptr, "%ld %d %d %d %d %d %d", - & year, & month, & day, - & hour, & minute, & second, - & dst); - - t1->stptr[t1->stlen] = save; - free_temp(t1); - - if (count < 6 - || month < month - 1 - || year < year - 1900 || year - 1900 != (int) (year - 1900)) - return tmp_number((AWKNUM) -1); - - memset(& then, '\0', sizeof(then)); - then.tm_sec = second; - then.tm_min = minute; - then.tm_hour = hour; - then.tm_mday = day; - then.tm_mon = month - 1; - then.tm_year = year - 1900; - then.tm_isdst = dst; - - then_stamp = mktime(& then); - return tmp_number((AWKNUM) then_stamp); -} - -/* do_system --- run an external command */ - -NODE * -do_system(NODE *tree) -{ - NODE *tmp; - int ret = 0; - char *cmd; - char save; - - (void) flush_io(); /* so output is synchronous with gawk's */ - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (STRING|STR)) == 0) - lintwarn(_("system: recieved non-string argument")); - cmd = force_string(tmp)->stptr; - - if (cmd && *cmd) { - /* insure arg to system is zero-terminated */ - - /* - * From: David Trueman - * To: arnold@cc.gatech.edu (Arnold Robbins) - * Date: Wed, 3 Nov 1993 12:49:41 -0400 - * - * It may not be necessary to save the character, but - * I'm not sure. It would normally be the field - * separator. If the parse has not yet gone beyond - * that, it could mess up (although I doubt it). If - * FIELDWIDTHS is being used, it might be the first - * character of the next field. Unless someone wants - * to check it out exhaustively, I suggest saving it - * for now... - */ - save = cmd[tmp->stlen]; - cmd[tmp->stlen] = '\0'; - - os_restore_mode(fileno(stdin)); - ret = system(cmd); - ret = (ret >> 8) & 0xff; - if ((BINMODE & 1) != 0) - os_setbinmode(fileno(stdin), O_BINARY); - - cmd[tmp->stlen] = save; - } - free_temp(tmp); - return tmp_number((AWKNUM) ret); -} - -extern NODE **fmt_list; /* declared in eval.c */ - -/* do_print --- print items, separated by OFS, terminated with ORS */ - -void -do_print(register NODE *tree) -{ - register NODE **t; - struct redirect *rp = NULL; - register FILE *fp; - int numnodes, i; - NODE *save; - NODE *tval; - - if (tree->rnode) { - int errflg; /* not used, sigh */ - - rp = redirect(tree->rnode, &errflg); - if (rp != NULL) { - fp = rp->fp; - if (fp == NULL) - return; - } else - return; - } else - fp = stdout; - - /* - * General idea is to evaluate all the expressions first and - * then print them, otherwise you get suprising behavior. - * See test/prtoeval.awk for an example program. - */ - save = tree = tree->lnode; - for (numnodes = 0; tree != NULL; tree = tree->rnode) - numnodes++; - emalloc(t, NODE **, numnodes * sizeof(NODE *), "do_print"); - - tree = save; - for (i = 0; tree != NULL; i++, tree = tree->rnode) { - NODE *n; - - /* Here lies the wumpus. R.I.P. */ - n = tree_eval(tree->lnode); - t[i] = dupnode(n); - free_temp(n); - - if ((t[i]->flags & (NUMBER|STRING)) == NUMBER) { - if (OFMTidx == CONVFMTidx) - (void) force_string(t[i]); - else { - tval = tmp_number(t[i]->numbr); - unref(t[i]); - t[i] = format_val(OFMT, OFMTidx, tval); - } - } - } - - for (i = 0; i < numnodes; i++) { - efwrite(t[i]->stptr, sizeof(char), t[i]->stlen, fp, "print", rp, FALSE); - unref(t[i]); - - if (i != numnodes - 1 && OFSlen > 0) - efwrite(OFS, sizeof(char), (size_t) OFSlen, - fp, "print", rp, FALSE); - - } - if (ORSlen > 0) - efwrite(ORS, sizeof(char), (size_t) ORSlen, fp, "print", rp, TRUE); - - if (rp != NULL && (rp->flag & RED_TWOWAY) != 0) - fflush(rp->fp); - - free(t); -} - -/* do_tolower --- lower case a string */ - -NODE * -do_tolower(NODE *tree) -{ - NODE *t1, *t2; - register unsigned char *cp, *cp2; - - t1 = tree_eval(tree->lnode); - if (do_lint && (t1->flags & (STRING|STR)) == 0) - lintwarn(_("tolower: recieved non-string argument")); - t1 = force_string(t1); - t2 = tmp_string(t1->stptr, t1->stlen); - for (cp = (unsigned char *)t2->stptr, - cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) - if (ISUPPER(*cp)) - *cp = TOLOWER(*cp); - free_temp(t1); - return t2; -} - -/* do_toupper --- upper case a string */ - -NODE * -do_toupper(NODE *tree) -{ - NODE *t1, *t2; - register unsigned char *cp, *cp2; - - t1 = tree_eval(tree->lnode); - if (do_lint && (t1->flags & (STRING|STR)) == 0) - lintwarn(_("toupper: recieved non-string argument")); - t1 = force_string(t1); - t2 = tmp_string(t1->stptr, t1->stlen); - for (cp = (unsigned char *)t2->stptr, - cp2 = (unsigned char *)(t2->stptr + t2->stlen); cp < cp2; cp++) - if (ISLOWER(*cp)) - *cp = TOUPPER(*cp); - free_temp(t1); - return t2; -} - -/* do_atan2 --- do the atan2 function */ - -NODE * -do_atan2(NODE *tree) -{ - NODE *t1, *t2; - double d1, d2; - - t1 = tree_eval(tree->lnode); - t2 = tree_eval(tree->rnode->lnode); - if (do_lint) { - if ((t1->flags & (NUM|NUMBER)) == 0) - lintwarn(_("atan2: received non-numeric first argument")); - if ((t2->flags & (NUM|NUMBER)) == 0) - lintwarn(_("atan2: received non-numeric second argument")); - } - d1 = force_number(t1); - d2 = force_number(t2); - free_temp(t1); - free_temp(t2); - return tmp_number((AWKNUM) atan2(d1, d2)); -} - -/* do_sin --- do the sin function */ - -NODE * -do_sin(NODE *tree) -{ - NODE *tmp; - double d; - - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("sin: received non-numeric argument")); - d = sin((double) force_number(tmp)); - free_temp(tmp); - return tmp_number((AWKNUM) d); -} - -/* do_cos --- do the cos function */ - -NODE * -do_cos(NODE *tree) -{ - NODE *tmp; - double d; - - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("cos: received non-numeric argument")); - d = cos((double) force_number(tmp)); - free_temp(tmp); - return tmp_number((AWKNUM) d); -} - -/* do_rand --- do the rand function */ - -static int firstrand = TRUE; -static char state[512]; - -/* ARGSUSED */ -NODE * -do_rand(NODE *tree) -{ - if (firstrand) { - (void) initstate((unsigned) 1, state, sizeof state); - srandom(1); - firstrand = FALSE; - } - return tmp_number((AWKNUM) random() / GAWK_RANDOM_MAX); -} - -/* do_srand --- seed the random number generator */ - -NODE * -do_srand(NODE *tree) -{ - NODE *tmp; - static long save_seed = 1; - long ret = save_seed; /* SVR4 awk srand returns previous seed */ - - if (firstrand) { - (void) initstate((unsigned) 1, state, sizeof state); - /* don't need to srandom(1), we're changing the seed below */ - firstrand = FALSE; - } else - (void) setstate(state); - - if (tree == NULL) -#ifdef __FreeBSD__ - srandom((unsigned int) (save_seed = (long) time((time_t *) 0) - ^ (getpid() << 16))); -#else - srandom((unsigned int) (save_seed = (long) time((time_t *) 0))); -#endif - else { - tmp = tree_eval(tree->lnode); - if (do_lint && (tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("srand: received non-numeric argument")); - srandom((unsigned int) (save_seed = (long) force_number(tmp))); - free_temp(tmp); - } - return tmp_number((AWKNUM) ret); -} - -/* do_match --- match a regexp, set RSTART and RLENGTH, - * optional third arg is array filled with text of - * subpatterns enclosed in parens. - */ - -NODE * -do_match(NODE *tree) -{ - NODE *t1, *dest, *it; - int rstart, len, ii; - AWKNUM rlength; - Regexp *rp; - regoff_t s; - char *start; - - t1 = force_string(tree_eval(tree->lnode)); - tree = tree->rnode; - rp = re_update(tree->lnode); - - dest = NULL; - if (tree->rnode != NULL) { /* 3rd optional arg for the subpatterns */ - dest = tree->rnode->lnode; - if (dest->type == Node_param_list) - dest = stack_ptr[dest->param_cnt]; - if (dest->type == Node_array_ref) - dest = dest->orig_array; - if (dest->type != Node_var && dest->type != Node_var_array) - fatal(_("match: third argument is not an array")); - dest->type = Node_var_array; - assoc_clear(dest); - } - - rstart = research(rp, t1->stptr, 0, t1->stlen, TRUE); - if (rstart >= 0) { /* match succeded */ - rstart++; /* 1-based indexing */ - rlength = REEND(rp, t1->stptr) - RESTART(rp, t1->stptr); - - /* Build the array only if the caller wants the optional subpatterns */ - if (dest != NULL) { - for (ii = 0; (s = SUBPATSTART(rp, t1->stptr, ii)) != -1; ii++) { - start = t1->stptr + s; - len = SUBPATEND(rp, t1->stptr, ii) - s; - - it = make_string(start, len); - it->flags |= MAYBE_NUM; - *assoc_lookup(dest, tmp_number((AWKNUM) (ii)), FALSE) = it; - } - } - } else { /* match failed */ - rstart = 0; - rlength = -1.0; - } - free_temp(t1); - unref(RSTART_node->var_value); - RSTART_node->var_value = make_number((AWKNUM) rstart); - unref(RLENGTH_node->var_value); - RLENGTH_node->var_value = make_number(rlength); - return tmp_number((AWKNUM) rstart); -} - -/* sub_common --- the common code (does the work) for sub, gsub, and gensub */ - -/* - * Gsub can be tricksy; particularly when handling the case of null strings. - * The following awk code was useful in debugging problems. It is too bad - * that it does not readily translate directly into the C code, below. - * - * #! /usr/local/bin/mawk -f - * - * BEGIN { - * TRUE = 1; FALSE = 0 - * print "--->", mygsub("abc", "b+", "FOO") - * print "--->", mygsub("abc", "x*", "X") - * print "--->", mygsub("abc", "b*", "X") - * print "--->", mygsub("abc", "c", "X") - * print "--->", mygsub("abc", "c+", "X") - * print "--->", mygsub("abc", "x*$", "X") - * } - * - * function mygsub(str, regex, replace, origstr, newstr, eosflag, nonzeroflag) - * { - * origstr = str; - * eosflag = nonzeroflag = FALSE - * while (match(str, regex)) { - * if (RLENGTH > 0) { # easy case - * nonzeroflag = TRUE - * if (RSTART == 1) { # match at front of string - * newstr = newstr replace - * } else { - * newstr = newstr substr(str, 1, RSTART-1) replace - * } - * str = substr(str, RSTART+RLENGTH) - * } else if (nonzeroflag) { - * # last match was non-zero in length, and at the - * # current character, we get a zero length match, - * # which we don't really want, so skip over it - * newstr = newstr substr(str, 1, 1) - * str = substr(str, 2) - * nonzeroflag = FALSE - * } else { - * # 0-length match - * if (RSTART == 1) { - * newstr = newstr replace substr(str, 1, 1) - * str = substr(str, 2) - * } else { - * return newstr str replace - * } - * } - * if (length(str) == 0) - * if (eosflag) - * break; - * else - * eosflag = TRUE - * } - * if (length(str) > 0) - * newstr = newstr str # rest of string - * - * return newstr - * } - */ - -/* - * NB: `howmany' conflicts with a SunOS 4.x macro in . - */ - -static NODE * -sub_common(NODE *tree, int how_many, int backdigs) -{ - register char *scan; - register char *bp, *cp; - char *buf; - size_t buflen; - register char *matchend; - register size_t len; - char *matchstart; - char *text; - size_t textlen; - char *repl; - char *replend; - size_t repllen; - int sofar; - int ampersands; - int matches = 0; - Regexp *rp; - NODE *s; /* subst. pattern */ - NODE *t; /* string to make sub. in; $0 if none given */ - NODE *tmp; - NODE **lhs = &tree; /* value not used -- just different from NULL */ - int priv = FALSE; - Func_ptr after_assign = NULL; - - int global = (how_many == -1); - long current; - int lastmatchnonzero; - - tmp = tree->lnode; - rp = re_update(tmp); - - tree = tree->rnode; - s = tree->lnode; - - tree = tree->rnode; - tmp = tree->lnode; - t = force_string(tree_eval(tmp)); - - /* do the search early to avoid work on non-match */ - if (research(rp, t->stptr, 0, t->stlen, TRUE) == -1 || - RESTART(rp, t->stptr) > t->stlen) { - free_temp(t); - return tmp_number((AWKNUM) 0.0); - } - - if (tmp->type == Node_val) - lhs = NULL; - else - lhs = get_lhs(tmp, &after_assign, FALSE); - t->flags |= STRING; - /* - * create a private copy of the string - */ - if (t->stref > 1 || (t->flags & (PERM|FIELD)) != 0) { - tmp = copynode(t); - t = tmp; - priv = TRUE; - } - text = t->stptr; - textlen = t->stlen; - buflen = textlen + 2; - - s = force_string(tree_eval(s)); - repl = s->stptr; - replend = repl + s->stlen; - repllen = replend - repl; - emalloc(buf, char *, buflen + 2, "sub_common"); - buf[buflen] = '\0'; - buf[buflen + 1] = '\0'; - ampersands = 0; - for (scan = repl; scan < replend; scan++) { - if (*scan == '&') { - repllen--; - ampersands++; - } else if (*scan == '\\') { - if (backdigs) { /* gensub, behave sanely */ - if (ISDIGIT(scan[1])) { - ampersands++; - scan++; - } else { /* \q for any q --> q */ - repllen--; - scan++; - } - } else { /* (proposed) posix '96 mode */ - if (strncmp(scan, "\\\\\\&", 4) == 0) { - /* \\\& --> \& */ - repllen -= 2; - scan += 3; - } else if (strncmp(scan, "\\\\&", 3) == 0) { - /* \\& --> \ */ - ampersands++; - repllen--; - scan += 2; - } else if (scan[1] == '&') { - /* \& --> & */ - repllen--; - scan++; - } /* else - leave alone, it goes into the output */ - } - } - } - - lastmatchnonzero = FALSE; - bp = buf; - for (current = 1;; current++) { - matches++; - matchstart = t->stptr + RESTART(rp, t->stptr); - matchend = t->stptr + REEND(rp, t->stptr); - - /* - * create the result, copying in parts of the original - * string - */ - len = matchstart - text + repllen - + ampersands * (matchend - matchstart); - sofar = bp - buf; - while (buflen < (sofar + len + 1)) { - buflen *= 2; - erealloc(buf, char *, buflen, "sub_common"); - bp = buf + sofar; - } - for (scan = text; scan < matchstart; scan++) - *bp++ = *scan; - if (global || current == how_many) { - /* - * If the current match matched the null string, - * and the last match didn't and did a replacement, - * then skip this one. - */ - if (lastmatchnonzero && matchstart == matchend) { - lastmatchnonzero = FALSE; - matches--; - goto empty; - } - /* - * If replacing all occurrences, or this is the - * match we want, copy in the replacement text, - * making substitutions as we go. - */ - for (scan = repl; scan < replend; scan++) - if (*scan == '&') - for (cp = matchstart; cp < matchend; cp++) - *bp++ = *cp; - else if (*scan == '\\') { - if (backdigs) { /* gensub, behave sanely */ - if (ISDIGIT(scan[1])) { - int dig = scan[1] - '0'; - char *start, *end; - - start = t->stptr - + SUBPATSTART(rp, t->stptr, dig); - end = t->stptr - + SUBPATEND(rp, t->stptr, dig); - - for (cp = start; cp < end; cp++) - *bp++ = *cp; - scan++; - } else /* \q for any q --> q */ - *bp++ = *++scan; - } else { /* posix '96 mode, bleah */ - if (strncmp(scan, "\\\\\\&", 4) == 0) { - /* \\\& --> \& */ - *bp++ = '\\'; - *bp++ = '&'; - scan += 3; - } else if (strncmp(scan, "\\\\&", 3) == 0) { - /* \\& --> \ */ - *bp++ = '\\'; - for (cp = matchstart; cp < matchend; cp++) - *bp++ = *cp; - scan += 2; - } else if (scan[1] == '&') { - /* \& --> & */ - *bp++ = '&'; - scan++; - } else - *bp++ = *scan; - } - } else - *bp++ = *scan; - if (matchstart != matchend) - lastmatchnonzero = TRUE; - } else { - /* - * don't want this match, skip over it by copying - * in current text. - */ - for (cp = matchstart; cp < matchend; cp++) - *bp++ = *cp; - } - empty: - /* catch the case of gsub(//, "blah", whatever), i.e. empty regexp */ - if (matchstart == matchend && matchend < text + textlen) { - *bp++ = *matchend; - matchend++; - } - textlen = text + textlen - matchend; - text = matchend; - - if ((current >= how_many && !global) - || ((long) textlen <= 0 && matchstart == matchend) - || research(rp, t->stptr, text - t->stptr, textlen, TRUE) == -1) - break; - - } - sofar = bp - buf; - if (buflen - sofar - textlen - 1) { - buflen = sofar + textlen + 2; - erealloc(buf, char *, buflen, "sub_common"); - bp = buf + sofar; - } - for (scan = matchend; scan < text + textlen; scan++) - *bp++ = *scan; - *bp = '\0'; - textlen = bp - buf; - free(t->stptr); - t->stptr = buf; - t->stlen = textlen; - - free_temp(s); - if (matches > 0 && lhs) { - if (priv) { - unref(*lhs); - *lhs = t; - } - if (after_assign != NULL) - (*after_assign)(); - t->flags &= ~(NUM|NUMBER); - } - return tmp_number((AWKNUM) matches); -} - -/* do_gsub --- global substitution */ - -NODE * -do_gsub(NODE *tree) -{ - return sub_common(tree, -1, FALSE); -} - -/* do_sub --- single substitution */ - -NODE * -do_sub(NODE *tree) -{ - return sub_common(tree, 1, FALSE); -} - -/* do_gensub --- fix up the tree for sub_common for the gensub function */ - -NODE * -do_gensub(NODE *tree) -{ - NODE n1, n2, n3, *t, *tmp, *target, *ret; - long how_many = 1; /* default is one substitution */ - double d; - - /* - * We have to pull out the value of the global flag, and - * build up a tree without the flag in it, turning it into the - * kind of tree that sub_common() expects. It helps to draw - * a picture of this ... - */ - n1 = *tree; - n2 = *(tree->rnode); - n1.rnode = & n2; - - t = tree_eval(n2.rnode->lnode); /* value of global flag */ - - tmp = force_string(tree_eval(n2.rnode->rnode->lnode)); /* target */ - - /* - * We make copy of the original target string, and pass that - * in to sub_common() as the target to make the substitution in. - * We will then return the result string as the return value of - * this function. - */ - target = make_string(tmp->stptr, tmp->stlen); - free_temp(tmp); - - n3 = *(n2.rnode->rnode); - n3.lnode = target; - n2.rnode = & n3; - - if ((t->flags & (STR|STRING)) != 0) { - if (t->stlen > 0 && (t->stptr[0] == 'g' || t->stptr[0] == 'G')) - how_many = -1; - else - how_many = 1; - } else { - d = force_number(t); - if (d > 0) - how_many = d; - else - how_many = 1; - if (d == 0) - warning(_("gensub: 3rd argument of 0 treated as 1")); - } - - free_temp(t); - - ret = sub_common(&n1, how_many, TRUE); - free_temp(ret); - - /* - * Note that we don't care what sub_common() returns, since the - * easiest thing for the programmer is to return the string, even - * if no substitutions were done. - */ - target->flags |= TEMP; - return target; -} - -#ifdef GFMT_WORKAROUND -/* - * printf's %g format [can't rely on gcvt()] - * caveat: don't use as argument to *printf()! - * 'format' string HAS to be of "*.*g" kind, or we bomb! - */ -static void -sgfmt(char *buf, /* return buffer; assumed big enough to hold result */ - const char *format, - int alt, /* use alternate form flag */ - int fwidth, /* field width in a format */ - int prec, /* indicates desired significant digits, not decimal places */ - double g) /* value to format */ -{ - char dform[40]; - register char *gpos; - register char *d, *e, *p; - int again = FALSE; - - strncpy(dform, format, sizeof dform - 1); - dform[sizeof dform - 1] = '\0'; - gpos = strrchr(dform, '.'); - - if (g == 0.0 && ! alt) { /* easy special case */ - *gpos++ = 'd'; - *gpos = '\0'; - (void) sprintf(buf, dform, fwidth, 0); - return; - } - - /* advance to location of 'g' in the format */ - while (*gpos && *gpos != 'g' && *gpos != 'G') - gpos++; - - if (prec <= 0) /* negative precision is ignored */ - prec = (prec < 0 ? DEFAULT_G_PRECISION : 1); - - if (*gpos == 'G') - again = TRUE; - /* start with 'e' format (it'll provide nice exponent) */ - *gpos = 'e'; - prec--; - (void) sprintf(buf, dform, fwidth, prec, g); - if ((e = strrchr(buf, 'e')) != NULL) { /* find exponent */ - int expn = atoi(e+1); /* fetch exponent */ - if (expn >= -4 && expn <= prec) { /* per K&R2, B1.2 */ - /* switch to 'f' format and re-do */ - *gpos = 'f'; - prec -= expn; /* decimal precision */ - (void) sprintf(buf, dform, fwidth, prec, g); - e = buf + strlen(buf); - while (*--e == ' ') - continue; - e++; - } - else if (again) - *gpos = 'E'; - - /* if 'alt' in force, then trailing zeros are not removed */ - if (! alt && (d = strrchr(buf, '.')) != NULL) { - /* throw away an excess of precision */ - for (p = e; p > d && *--p == '0'; ) - prec--; - if (d == p) - prec--; - if (prec < 0) - prec = 0; - /* and do that once again */ - again = TRUE; - } - if (again) - (void) sprintf(buf, dform, fwidth, prec, g); - } -} -#endif /* GFMT_WORKAROUND */ - -#define BITS_PER_BYTE 8 /* if not true, you lose. too bad. */ - -/* do_lshift --- perform a << operation */ - -NODE * -do_lshift(NODE *tree) -{ - NODE *s1, *s2; - unsigned long uval, ushift, res; - AWKNUM val, shift; - - s1 = tree_eval(tree->lnode); - s2 = tree_eval(tree->rnode->lnode); - val = force_number(s1); - shift = force_number(s2); - - if (do_lint) { - if ((s1->flags & (NUM|NUMBER)) == 0) - lintwarn(_("lshift: received non-numeric first argument")); - if ((s2->flags & (NUM|NUMBER)) == 0) - lintwarn(_("lshift: received non-numeric first argument")); - if (val < 0 || shift < 0) - lintwarn(_("lshift(%lf, %lf): negative values will give strange results"), val, shift); - if (double_to_int(val) != val || double_to_int(shift) != shift) - lintwarn(_("lshift(%lf, %lf): fractional values will be truncated"), val, shift); - if (shift > (sizeof(unsigned long) * BITS_PER_BYTE)) - lintwarn(_("lshift(%lf, %lf): too large shift value will give strange results"), val, shift); - } - - free_temp(s1); - free_temp(s2); - - uval = (unsigned long) val; - ushift = (unsigned long) shift; - - res = uval << ushift; - return tmp_number((AWKNUM) res); -} - -/* do_rshift --- perform a >> operation */ - -NODE * -do_rshift(NODE *tree) -{ - NODE *s1, *s2; - unsigned long uval, ushift, res; - AWKNUM val, shift; - - s1 = tree_eval(tree->lnode); - s2 = tree_eval(tree->rnode->lnode); - val = force_number(s1); - shift = force_number(s2); - - if (do_lint) { - if ((s1->flags & (NUM|NUMBER)) == 0) - lintwarn(_("rshift: received non-numeric first argument")); - if ((s2->flags & (NUM|NUMBER)) == 0) - lintwarn(_("rshift: received non-numeric first argument")); - if (val < 0 || shift < 0) - lintwarn(_("rshift(%lf, %lf): negative values will give strange results"), val, shift); - if (double_to_int(val) != val || double_to_int(shift) != shift) - lintwarn(_("rshift(%lf, %lf): fractional values will be truncated"), val, shift); - if (shift > (sizeof(unsigned long) * BITS_PER_BYTE)) - lintwarn(_("rshift(%lf, %lf): too large shift value will give strange results"), val, shift); - } - - free_temp(s1); - free_temp(s2); - - uval = (unsigned long) val; - ushift = (unsigned long) shift; - - res = uval >> ushift; - return tmp_number((AWKNUM) res); -} - -/* do_and --- perform an & operation */ - -NODE * -do_and(NODE *tree) -{ - NODE *s1, *s2; - unsigned long uleft, uright, res; - AWKNUM left, right; - - s1 = tree_eval(tree->lnode); - s2 = tree_eval(tree->rnode->lnode); - left = force_number(s1); - right = force_number(s2); - - if (do_lint) { - if ((s1->flags & (NUM|NUMBER)) == 0) - lintwarn(_("and: received non-numeric first argument")); - if ((s2->flags & (NUM|NUMBER)) == 0) - lintwarn(_("and: received non-numeric first argument")); - if (left < 0 || right < 0) - lintwarn(_("and(%lf, %lf): negative values will give strange results"), left, right); - if (double_to_int(left) != left || double_to_int(right) != right) - lintwarn(_("and(%lf, %lf): fractional values will be truncated"), left, right); - } - - free_temp(s1); - free_temp(s2); - - uleft = (unsigned long) left; - uright = (unsigned long) right; - - res = uleft & uright; - return tmp_number((AWKNUM) res); -} - -/* do_or --- perform an | operation */ - -NODE * -do_or(NODE *tree) -{ - NODE *s1, *s2; - unsigned long uleft, uright, res; - AWKNUM left, right; - - s1 = tree_eval(tree->lnode); - s2 = tree_eval(tree->rnode->lnode); - left = force_number(s1); - right = force_number(s2); - - if (do_lint) { - if ((s1->flags & (NUM|NUMBER)) == 0) - lintwarn(_("or: received non-numeric first argument")); - if ((s2->flags & (NUM|NUMBER)) == 0) - lintwarn(_("or: received non-numeric first argument")); - if (left < 0 || right < 0) - lintwarn(_("or(%lf, %lf): negative values will give strange results"), left, right); - if (double_to_int(left) != left || double_to_int(right) != right) - lintwarn(_("or(%lf, %lf): fractional values will be truncated"), left, right); - } - - free_temp(s1); - free_temp(s2); - - uleft = (unsigned long) left; - uright = (unsigned long) right; - - res = uleft | uright; - return tmp_number((AWKNUM) res); -} - -/* do_xor --- perform an ^ operation */ - -NODE * -do_xor(NODE *tree) -{ - NODE *s1, *s2; - unsigned long uleft, uright, res; - AWKNUM left, right; - - s1 = tree_eval(tree->lnode); - s2 = tree_eval(tree->rnode->lnode); - left = force_number(s1); - right = force_number(s2); - - if (do_lint) { - if ((s1->flags & (NUM|NUMBER)) == 0) - lintwarn(_("xor: received non-numeric first argument")); - if ((s2->flags & (NUM|NUMBER)) == 0) - lintwarn(_("xor: received non-numeric first argument")); - if (left < 0 || right < 0) - lintwarn(_("xor(%lf, %lf): negative values will give strange results"), left, right); - if (double_to_int(left) != left || double_to_int(right) != right) - lintwarn(_("xor(%lf, %lf): fractional values will be truncated"), left, right); - } - - free_temp(s1); - free_temp(s2); - - uleft = (unsigned long) left; - uright = (unsigned long) right; - - res = uleft ^ uright; - return tmp_number((AWKNUM) res); -} - -/* do_compl --- perform a ~ operation */ - -NODE * -do_compl(NODE *tree) -{ - NODE *tmp; - double d; - unsigned long uval; - - tmp = tree_eval(tree->lnode); - d = force_number(tmp); - free_temp(tmp); - - if (do_lint) { - if ((tmp->flags & (NUM|NUMBER)) == 0) - lintwarn(_("compl: received non-numeric argument")); - if (d < 0) - lintwarn(_("compl(%lf): negative value will give strange results"), d); - if (double_to_int(d) != d) - lintwarn(_("compl(%lf): fractional value will be truncated"), d); - } - - uval = (unsigned long) d; - uval = ~ uval; - return tmp_number((AWKNUM) uval); -} - -/* do_strtonum --- the strtonum function */ - -NODE * -do_strtonum(NODE *tree) -{ - NODE *tmp; - double d; - - tmp = tree_eval(tree->lnode); - - if ((tmp->flags & (NUM|NUMBER)) != 0) - d = (double) force_number(tmp); - else if (isnondecimal(tmp->stptr)) - d = nondec2awknum(tmp->stptr, tmp->stlen); - else - d = (double) force_number(tmp); - - free_temp(tmp); - return tmp_number((AWKNUM) d); -} - -/* nondec2awknum --- convert octal or hex value to double */ - -/* - * Because of awk's concatenation rules and the way awk.y:yylex() - * collects a number, this routine has to be willing to stop on the - * first invalid character. - */ - -AWKNUM -nondec2awknum(char *str, size_t len) -{ - AWKNUM retval = 0.0; - char save; - short val; - char *start = str; - - if (*str == '0' && (str[1] == 'x' || str[1] == 'X')) { - assert(len > 2); - - for (str += 2, len -= 2; len > 0; len--, str++) { - switch (*str) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - val = *str - '0'; - break; - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - val = *str - 'a' + 10; - break; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - val = *str - 'A' + 10; - break; - default: - goto done; - } - retval = (retval * 16) + val; - } - } else if (*str == '0') { - if (strchr(str, '8') != NULL || strchr(str, '9') != NULL) - goto decimal; - for (; len > 0; len--) { - if (! ISDIGIT(*str)) - goto done; - else if (*str == '8' || *str == '9') { - str = start; - goto decimal; - } - retval = (retval * 8) + (*str - '0'); - str++; - } - } else { -decimal: - save = str[len]; - retval = strtod(str, NULL); - str[len] = save; - } -done: - return retval; -} - -/* do_dcgettext --- handle i18n translations */ - -/* - * awk usage is - * - * str = dcgettext(string [, domain [, category]]) - * - * Default domain is TEXTDOMAIN, default category is LC_MESSAGES. - */ - -NODE * -do_dcgettext(NODE *tree) -{ - NODE *tmp, *t1, *t2; - char *string; - char *the_result; -#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT - int lc_cat = -1; - char *category, *domain; - static struct category_table { - int val; - char *name; - } cat_tab[] = { -#ifdef LC_ALL - { LC_ALL, "LC_ALL" }, -#endif /* LC_ALL */ -#ifdef LC_COLLATE - { LC_COLLATE, "LC_COLLATE" }, -#endif /* LC_COLLATE */ -#ifdef LC_CTYPE - { LC_CTYPE, "LC_CTYPE" }, -#endif /* LC_CTYPE */ -#ifdef LC_MESSAGES - { LC_MESSAGES, "LC_MESSAGES" }, -#endif /* LC_MESSAGES */ -#ifdef LC_MONETARY - { LC_MONETARY, "LC_MONETARY" }, -#endif /* LC_MONETARY */ -#ifdef LC_NUMERIC - { LC_NUMERIC, "LC_NUMERIC" }, -#endif /* LC_NUMERIC */ -#ifdef LC_RESPONSE - { LC_RESPONSE, "LC_RESPONSE" }, -#endif /* LC_RESPONSE */ -#ifdef LC_TIME - { LC_TIME, "LC_TIME" }, -#endif /* LC_TIME */ - }; -#endif /* ENABLE_NLS */ - - tmp = tree->lnode; /* first argument */ - t1 = force_string(tree_eval(tmp)); - string = t1->stptr; - - t2 = NULL; -#if ENABLE_NLS && HAVE_LC_MESSAGES && HAVE_DCGETTEXT - tree = tree->rnode; /* second argument */ - if (tree != NULL) { - tmp = tree->lnode; - t2 = force_string(tree_eval(tmp)); - domain = t2->stptr; - } else - domain = TEXTDOMAIN; - - if (tree != NULL && tree->rnode != NULL) { /* third argument */ - int low, high, i, mid; - NODE *t; - - tree = tree->rnode; - tmp = tree->lnode; - t = force_string(tree_eval(tmp)); - category = t->stptr; - - /* binary search the table */ - low = 0; - high = (sizeof(cat_tab) / sizeof(cat_tab[0])) - 1; - while (low <= high) { - mid = (low + high) / 2; - i = strcmp(category, cat_tab[mid].name); - - if (i < 0) /* category < mid */ - high = mid - 1; - else if (i > 0) /* category > mid */ - low = mid + 1; - else { - lc_cat = cat_tab[mid].val; - break; - } - } - if (lc_cat == -1) /* not there */ - fatal(_("dcgettext: `%s' is not a valid locale category"), category); - - free_temp(t); - } else - lc_cat = LC_MESSAGES; - - the_result = dcgettext(domain, string, lc_cat); -#else - the_result = string; -#endif - free_temp(t1); - if (t2 != NULL) - free_temp(t2); - - return tmp_string(the_result, strlen(the_result)); -} - -/* do_bindtextdomain --- set the directory for a text domain */ - -/* - * awk usage is - * - * binding = bindtextdomain(dir [, domain]) - * - * If dir is "", pass NULL to C version. - * Default domain is TEXTDOMAIN. - */ - -NODE * -do_bindtextdomain(NODE *tree) -{ - NODE *tmp, *t1, *t2; - char *directory, *domain; - char *the_result; - - t1 = t2 = NULL; - /* set defaults */ - directory = NULL; - domain = TEXTDOMAIN; - - tmp = tree->lnode; /* first argument */ - t1 = force_string(tree_eval(tmp)); - if (t1->stlen > 0) - directory = t1->stptr; - - tree = tree->rnode; /* second argument */ - if (tree != NULL) { - tmp = tree->lnode; - t2 = force_string(tree_eval(tmp)); - domain = t2->stptr; - } - - the_result = bindtextdomain(domain, directory); - - free_temp(t1); - if (t2 != NULL) - free_temp(t1); - - return tmp_string(the_result, strlen(the_result)); -} diff --git a/contrib/awk/configh.in b/contrib/awk/configh.in deleted file mode 100644 index 8c8a517..0000000 --- a/contrib/awk/configh.in +++ /dev/null @@ -1,321 +0,0 @@ -/* configh.in. Generated automatically from configure.in by autoheader. */ -/* - * acconfig.h -- configuration definitions for gawk. - */ - -/* - * Copyright (C) 1995-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - - -/* Define if on AIX 3. - System headers sometimes define this. - We just want to avoid a redefinition error message. */ -#ifndef _ALL_SOURCE -#undef _ALL_SOURCE -#endif - -/* Define if using alloca.c. */ -#undef C_ALLOCA - -/* Define if type char is unsigned and you are not using gcc. */ -#ifndef __CHAR_UNSIGNED__ -#undef __CHAR_UNSIGNED__ -#endif - -/* Define to empty if the keyword does not work. */ -#undef const - -/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems. - This function is required for alloca.c support on those systems. */ -#undef CRAY_STACKSEG_END - -/* Define to the type of elements in the array set by `getgroups'. - Usually this is either `int' or `gid_t'. */ -#undef GETGROUPS_T - -/* Define if the `getpgrp' function takes no argument. */ -#undef GETPGRP_VOID - -/* Define to `int' if doesn't define. */ -#undef gid_t - -/* Define if you have alloca, as a function or macro. */ -#undef HAVE_ALLOCA - -/* Define if you have and it should be used (not on Ultrix). */ -#undef HAVE_ALLOCA_H - -/* Define if you don't have vprintf but do have _doprnt. */ -#undef HAVE_DOPRNT - -/* Define if you have a working `mmap' system call. */ -#undef HAVE_MMAP - -/* Define if your struct stat has st_blksize. */ -#undef HAVE_ST_BLKSIZE - -/* Define if you have the ANSI # stringizing operator in cpp. */ -#undef HAVE_STRINGIZE - -/* Define if you have that is POSIX.1 compatible. */ -#undef HAVE_SYS_WAIT_H - -/* Define if your struct tm has tm_zone. */ -#undef HAVE_TM_ZONE - -/* Define if you don't have tm_zone but do have the external array - tzname. */ -#undef HAVE_TZNAME - -/* Define if you have the vprintf function. */ -#undef HAVE_VPRINTF - -/* Define as __inline if that's what the C compiler calls it. */ -#undef inline - -/* Define if on MINIX. */ -#undef _MINIX - -/* Define to `long' if doesn't define. */ -#undef off_t - -/* Define to `int' if doesn't define. */ -#undef pid_t - -/* Define if the system does not provide POSIX.1 features except - with this defined. */ -#undef _POSIX_1_SOURCE - -/* Define if you need to in order for stat and other things to work. */ -#undef _POSIX_SOURCE - -/* Define as the return type of signal handlers (int or void). */ -#undef RETSIGTYPE - -/* Define to `unsigned' if doesn't define. */ -#undef size_t - -/* If using the C implementation of alloca, define if you know the - direction of stack growth for your system; otherwise it will be - automatically deduced at run-time. - STACK_DIRECTION > 0 => grows toward higher addresses - STACK_DIRECTION < 0 => grows toward lower addresses - STACK_DIRECTION = 0 => direction of growth unknown - */ -#undef STACK_DIRECTION - -/* Define if you have the ANSI C header files. */ -#undef STDC_HEADERS - -/* Define if you can safely include both and . */ -#undef TIME_WITH_SYS_TIME - -/* Define if your declares struct tm. */ -#undef TM_IN_SYS_TIME - -/* Define to `int' if doesn't define. */ -#undef uid_t - -#undef REGEX_MALLOC /* use malloc instead of alloca in regex.c */ -#undef SPRINTF_RET /* return type of sprintf */ -#undef HAVE_MKTIME /* we have the mktime function */ -#undef HAVE_SOCKETS /* we have sockets on this system */ -#undef HAVE_PORTALS /* we have portals on /p on this system */ -#undef DYNAMIC /* allow dynamic addition of builtins */ -#undef STRTOD_NOT_C89 /* strtod doesn't have C89 semantics */ -#undef ssize_t /* signed version of size_t */ - -/* Define if you have the __argz_count function. */ -#undef HAVE___ARGZ_COUNT - -/* Define if you have the __argz_next function. */ -#undef HAVE___ARGZ_NEXT - -/* Define if you have the __argz_stringify function. */ -#undef HAVE___ARGZ_STRINGIFY - -/* Define if you have the alarm function. */ -#undef HAVE_ALARM - -/* Define if you have the dcgettext function. */ -#undef HAVE_DCGETTEXT - -/* Define if you have the fmod function. */ -#undef HAVE_FMOD - -/* Define if you have the getcwd function. */ -#undef HAVE_GETCWD - -/* Define if you have the getpagesize function. */ -#undef HAVE_GETPAGESIZE - -/* Define if you have the memcmp function. */ -#undef HAVE_MEMCMP - -/* Define if you have the memcpy function. */ -#undef HAVE_MEMCPY - -/* Define if you have the memset function. */ -#undef HAVE_MEMSET - -/* Define if you have the munmap function. */ -#undef HAVE_MUNMAP - -/* Define if you have the putenv function. */ -#undef HAVE_PUTENV - -/* Define if you have the setenv function. */ -#undef HAVE_SETENV - -/* Define if you have the setlocale function. */ -#undef HAVE_SETLOCALE - -/* Define if you have the stpcpy function. */ -#undef HAVE_STPCPY - -/* Define if you have the strcasecmp function. */ -#undef HAVE_STRCASECMP - -/* Define if you have the strchr function. */ -#undef HAVE_STRCHR - -/* Define if you have the strdup function. */ -#undef HAVE_STRDUP - -/* Define if you have the strerror function. */ -#undef HAVE_STRERROR - -/* Define if you have the strftime function. */ -#undef HAVE_STRFTIME - -/* Define if you have the strncasecmp function. */ -#undef HAVE_STRNCASECMP - -/* Define if you have the strtod function. */ -#undef HAVE_STRTOD - -/* Define if you have the system function. */ -#undef HAVE_SYSTEM - -/* Define if you have the tzset function. */ -#undef HAVE_TZSET - -/* Define if you have the header file. */ -#undef HAVE_ARGZ_H - -/* Define if you have the header file. */ -#undef HAVE_FCNTL_H - -/* Define if you have the header file. */ -#undef HAVE_LIBINTL_H - -/* Define if you have the header file. */ -#undef HAVE_LIMITS_H - -/* Define if you have the header file. */ -#undef HAVE_LOCALE_H - -/* Define if you have the header file. */ -#undef HAVE_MALLOC_H - -/* Define if you have the header file. */ -#undef HAVE_MCHECK_H - -/* Define if you have the header file. */ -#undef HAVE_MEMORY_H - -/* Define if you have the header file. */ -#undef HAVE_NETDB_H - -/* Define if you have the header file. */ -#undef HAVE_NETINET_IN_H - -/* Define if you have the header file. */ -#undef HAVE_NL_TYPES_H - -/* Define if you have the header file. */ -#undef HAVE_SIGNUM_H - -/* Define if you have the header file. */ -#undef HAVE_STDARG_H - -/* Define if you have the header file. */ -#undef HAVE_STDLIB_H - -/* Define if you have the header file. */ -#undef HAVE_STRING_H - -/* Define if you have the header file. */ -#undef HAVE_STRINGS_H - -/* Define if you have the header file. */ -#undef HAVE_SYS_PARAM_H - -/* Define if you have the header file. */ -#undef HAVE_SYS_SOCKET_H - -/* Define if you have the header file. */ -#undef HAVE_SYS_TIME_H - -/* Define if you have the header file. */ -#undef HAVE_UNISTD_H - -/* Define if you have the i library (-li). */ -#undef HAVE_LIBI - -/* Define if you have the intl library (-lintl). */ -#undef HAVE_LIBINTL - -/* Define if you have the m library (-lm). */ -#undef HAVE_LIBM - -/* Name of package */ -#undef PACKAGE - -/* Version number of package */ -#undef VERSION - -/* Number of bits in a file offset, on hosts where this is settable. */ -#undef _FILE_OFFSET_BITS - -/* Define for large files, on AIX-style hosts. */ -#undef _LARGE_FILES - -/* Define if compiler has function prototypes */ -#undef PROTOTYPES - -/* Define to 1 if you have the stpcpy function. */ -#undef HAVE_STPCPY - -/* Define if your locale.h file contains LC_MESSAGES. */ -#undef HAVE_LC_MESSAGES - -/* Define to 1 if NLS is requested. */ -#undef ENABLE_NLS - -/* Define to 1 if you have gettext and don't want to use GNU gettext. */ -#undef HAVE_GETTEXT - -/* Define as 1 if you have catgets and don't want to use GNU gettext. */ -#undef HAVE_CATGETS - - -#include /* overrides for stuff autoconf can't deal with */ diff --git a/contrib/awk/configure b/contrib/awk/configure deleted file mode 100755 index e4ef6d7..0000000 --- a/contrib/awk/configure +++ /dev/null @@ -1,6490 +0,0 @@ -#! /bin/sh - -# Guess values for system-dependent variables and create Makefiles. -# Generated automatically using autoconf version 2.13 -# Copyright (C) 1992, 93, 94, 95, 96 Free Software Foundation, Inc. -# -# This configure script is free software; the Free Software Foundation -# gives unlimited permission to copy, distribute and modify it. - -# Defaults: -ac_help= -ac_default_prefix=/usr/local -# Any additions from configure.in: -ac_help="$ac_help - --disable-dependency-tracking Speeds up one-time builds - --enable-dependency-tracking Do not reject slow dependency extractors" -ac_help="$ac_help - --enable-portals Enable /p as path prefix for portals" -ac_help="$ac_help - --disable-largefile omit support for large files" -ac_help="$ac_help - --disable-nls do not use Native Language Support" -ac_help="$ac_help - --with-included-gettext use the GNU gettext library included here" -ac_help="$ac_help - --with-catgets use catgets functions if available" - -# Initialize some variables set by options. -# The variables have the same names as the options, with -# dashes changed to underlines. -build=NONE -cache_file=./config.cache -exec_prefix=NONE -host=NONE -no_create= -nonopt=NONE -no_recursion= -prefix=NONE -program_prefix=NONE -program_suffix=NONE -program_transform_name=s,x,x, -silent= -site= -srcdir= -target=NONE -verbose= -x_includes=NONE -x_libraries=NONE -bindir='${exec_prefix}/bin' -sbindir='${exec_prefix}/sbin' -libexecdir='${exec_prefix}/libexec' -datadir='${prefix}/share' -sysconfdir='${prefix}/etc' -sharedstatedir='${prefix}/com' -localstatedir='${prefix}/var' -libdir='${exec_prefix}/lib' -includedir='${prefix}/include' -oldincludedir='/usr/include' -infodir='${prefix}/info' -mandir='${prefix}/man' - -# Initialize some other variables. -subdirs= -MFLAGS= MAKEFLAGS= -SHELL=${CONFIG_SHELL-/bin/sh} -# Maximum number of lines to put in a shell here document. -ac_max_here_lines=12 - -ac_prev= -for ac_option -do - - # If the previous option needs an argument, assign it. - if test -n "$ac_prev"; then - eval "$ac_prev=\$ac_option" - ac_prev= - continue - fi - - case "$ac_option" in - -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) ac_optarg= ;; - esac - - # Accept the important Cygnus configure options, so we can diagnose typos. - - case "$ac_option" in - - -bindir | --bindir | --bindi | --bind | --bin | --bi) - ac_prev=bindir ;; - -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) - bindir="$ac_optarg" ;; - - -build | --build | --buil | --bui | --bu) - ac_prev=build ;; - -build=* | --build=* | --buil=* | --bui=* | --bu=*) - build="$ac_optarg" ;; - - -cache-file | --cache-file | --cache-fil | --cache-fi \ - | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) - ac_prev=cache_file ;; - -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ - | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) - cache_file="$ac_optarg" ;; - - -datadir | --datadir | --datadi | --datad | --data | --dat | --da) - ac_prev=datadir ;; - -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ - | --da=*) - datadir="$ac_optarg" ;; - - -disable-* | --disable-*) - ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - eval "enable_${ac_feature}=no" ;; - - -enable-* | --enable-*) - ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } - fi - ac_feature=`echo $ac_feature| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "enable_${ac_feature}='$ac_optarg'" ;; - - -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ - | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ - | --exec | --exe | --ex) - ac_prev=exec_prefix ;; - -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ - | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ - | --exec=* | --exe=* | --ex=*) - exec_prefix="$ac_optarg" ;; - - -gas | --gas | --ga | --g) - # Obsolete; use --with-gas. - with_gas=yes ;; - - -help | --help | --hel | --he) - # Omit some internal or obsolete options to make the list less imposing. - # This message is too long to be a string in the A/UX 3.1 sh. - cat << EOF -Usage: configure [options] [host] -Options: [defaults in brackets after descriptions] -Configuration: - --cache-file=FILE cache test results in FILE - --help print this message - --no-create do not create output files - --quiet, --silent do not print \`checking...' messages - --version print the version of autoconf that created configure -Directory and file names: - --prefix=PREFIX install architecture-independent files in PREFIX - [$ac_default_prefix] - --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX - [same as prefix] - --bindir=DIR user executables in DIR [EPREFIX/bin] - --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] - --libexecdir=DIR program executables in DIR [EPREFIX/libexec] - --datadir=DIR read-only architecture-independent data in DIR - [PREFIX/share] - --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] - --sharedstatedir=DIR modifiable architecture-independent data in DIR - [PREFIX/com] - --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] - --libdir=DIR object code libraries in DIR [EPREFIX/lib] - --includedir=DIR C header files in DIR [PREFIX/include] - --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] - --infodir=DIR info documentation in DIR [PREFIX/info] - --mandir=DIR man documentation in DIR [PREFIX/man] - --srcdir=DIR find the sources in DIR [configure dir or ..] - --program-prefix=PREFIX prepend PREFIX to installed program names - --program-suffix=SUFFIX append SUFFIX to installed program names - --program-transform-name=PROGRAM - run sed PROGRAM on installed program names -EOF - cat << EOF -Host type: - --build=BUILD configure for building on BUILD [BUILD=HOST] - --host=HOST configure for HOST [guessed] - --target=TARGET configure for TARGET [TARGET=HOST] -Features and packages: - --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) - --enable-FEATURE[=ARG] include FEATURE [ARG=yes] - --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] - --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) - --x-includes=DIR X include files are in DIR - --x-libraries=DIR X library files are in DIR -EOF - if test -n "$ac_help"; then - echo "--enable and --with options recognized:$ac_help" - fi - exit 0 ;; - - -host | --host | --hos | --ho) - ac_prev=host ;; - -host=* | --host=* | --hos=* | --ho=*) - host="$ac_optarg" ;; - - -includedir | --includedir | --includedi | --included | --include \ - | --includ | --inclu | --incl | --inc) - ac_prev=includedir ;; - -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ - | --includ=* | --inclu=* | --incl=* | --inc=*) - includedir="$ac_optarg" ;; - - -infodir | --infodir | --infodi | --infod | --info | --inf) - ac_prev=infodir ;; - -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) - infodir="$ac_optarg" ;; - - -libdir | --libdir | --libdi | --libd) - ac_prev=libdir ;; - -libdir=* | --libdir=* | --libdi=* | --libd=*) - libdir="$ac_optarg" ;; - - -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ - | --libexe | --libex | --libe) - ac_prev=libexecdir ;; - -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ - | --libexe=* | --libex=* | --libe=*) - libexecdir="$ac_optarg" ;; - - -localstatedir | --localstatedir | --localstatedi | --localstated \ - | --localstate | --localstat | --localsta | --localst \ - | --locals | --local | --loca | --loc | --lo) - ac_prev=localstatedir ;; - -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ - | --localstate=* | --localstat=* | --localsta=* | --localst=* \ - | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) - localstatedir="$ac_optarg" ;; - - -mandir | --mandir | --mandi | --mand | --man | --ma | --m) - ac_prev=mandir ;; - -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) - mandir="$ac_optarg" ;; - - -nfp | --nfp | --nf) - # Obsolete; use --without-fp. - with_fp=no ;; - - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) - no_create=yes ;; - - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) - no_recursion=yes ;; - - -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ - | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ - | --oldin | --oldi | --old | --ol | --o) - ac_prev=oldincludedir ;; - -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ - | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ - | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) - oldincludedir="$ac_optarg" ;; - - -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) - ac_prev=prefix ;; - -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) - prefix="$ac_optarg" ;; - - -program-prefix | --program-prefix | --program-prefi | --program-pref \ - | --program-pre | --program-pr | --program-p) - ac_prev=program_prefix ;; - -program-prefix=* | --program-prefix=* | --program-prefi=* \ - | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) - program_prefix="$ac_optarg" ;; - - -program-suffix | --program-suffix | --program-suffi | --program-suff \ - | --program-suf | --program-su | --program-s) - ac_prev=program_suffix ;; - -program-suffix=* | --program-suffix=* | --program-suffi=* \ - | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) - program_suffix="$ac_optarg" ;; - - -program-transform-name | --program-transform-name \ - | --program-transform-nam | --program-transform-na \ - | --program-transform-n | --program-transform- \ - | --program-transform | --program-transfor \ - | --program-transfo | --program-transf \ - | --program-trans | --program-tran \ - | --progr-tra | --program-tr | --program-t) - ac_prev=program_transform_name ;; - -program-transform-name=* | --program-transform-name=* \ - | --program-transform-nam=* | --program-transform-na=* \ - | --program-transform-n=* | --program-transform-=* \ - | --program-transform=* | --program-transfor=* \ - | --program-transfo=* | --program-transf=* \ - | --program-trans=* | --program-tran=* \ - | --progr-tra=* | --program-tr=* | --program-t=*) - program_transform_name="$ac_optarg" ;; - - -q | -quiet | --quiet | --quie | --qui | --qu | --q \ - | -silent | --silent | --silen | --sile | --sil) - silent=yes ;; - - -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) - ac_prev=sbindir ;; - -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ - | --sbi=* | --sb=*) - sbindir="$ac_optarg" ;; - - -sharedstatedir | --sharedstatedir | --sharedstatedi \ - | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ - | --sharedst | --shareds | --shared | --share | --shar \ - | --sha | --sh) - ac_prev=sharedstatedir ;; - -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ - | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ - | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ - | --sha=* | --sh=*) - sharedstatedir="$ac_optarg" ;; - - -site | --site | --sit) - ac_prev=site ;; - -site=* | --site=* | --sit=*) - site="$ac_optarg" ;; - - -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) - ac_prev=srcdir ;; - -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) - srcdir="$ac_optarg" ;; - - -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ - | --syscon | --sysco | --sysc | --sys | --sy) - ac_prev=sysconfdir ;; - -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ - | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) - sysconfdir="$ac_optarg" ;; - - -target | --target | --targe | --targ | --tar | --ta | --t) - ac_prev=target ;; - -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) - target="$ac_optarg" ;; - - -v | -verbose | --verbose | --verbos | --verbo | --verb) - verbose=yes ;; - - -version | --version | --versio | --versi | --vers) - echo "configure generated by autoconf version 2.13" - exit 0 ;; - - -with-* | --with-*) - ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - case "$ac_option" in - *=*) ;; - *) ac_optarg=yes ;; - esac - eval "with_${ac_package}='$ac_optarg'" ;; - - -without-* | --without-*) - ac_package=`echo $ac_option|sed -e 's/-*without-//'` - # Reject names that are not valid shell variable names. - if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then - { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } - fi - ac_package=`echo $ac_package| sed 's/-/_/g'` - eval "with_${ac_package}=no" ;; - - --x) - # Obsolete; use --with-x. - with_x=yes ;; - - -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ - | --x-incl | --x-inc | --x-in | --x-i) - ac_prev=x_includes ;; - -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ - | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) - x_includes="$ac_optarg" ;; - - -x-libraries | --x-libraries | --x-librarie | --x-librari \ - | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) - ac_prev=x_libraries ;; - -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ - | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) - x_libraries="$ac_optarg" ;; - - -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } - ;; - - *) - if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then - echo "configure: warning: $ac_option: invalid host type" 1>&2 - fi - if test "x$nonopt" != xNONE; then - { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } - fi - nonopt="$ac_option" - ;; - - esac -done - -if test -n "$ac_prev"; then - { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } -fi - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -# File descriptor usage: -# 0 standard input -# 1 file creation -# 2 errors and warnings -# 3 some systems may open it to /dev/tty -# 4 used on the Kubota Titan -# 6 checking for... messages and results -# 5 compiler messages saved in config.log -if test "$silent" = yes; then - exec 6>/dev/null -else - exec 6>&1 -fi -exec 5>./config.log - -echo "\ -This file contains any messages produced by compilers while -running configure, to aid debugging if configure makes a mistake. -" 1>&5 - -# Strip out --no-create and --no-recursion so they do not pile up. -# Also quote any args containing shell metacharacters. -ac_configure_args= -for ac_arg -do - case "$ac_arg" in - -no-create | --no-create | --no-creat | --no-crea | --no-cre \ - | --no-cr | --no-c) ;; - -no-recursion | --no-recursion | --no-recursio | --no-recursi \ - | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; - *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) - ac_configure_args="$ac_configure_args '$ac_arg'" ;; - *) ac_configure_args="$ac_configure_args $ac_arg" ;; - esac -done - -# NLS nuisances. -# Only set these to C if already set. These must not be set unconditionally -# because not all systems understand e.g. LANG=C (notably SCO). -# Fixing LC_MESSAGES prevents Solaris sh from translating var values in `set'! -# Non-C LC_CTYPE values break the ctype check. -if test "${LANG+set}" = set; then LANG=C; export LANG; fi -if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi -if test "${LC_MESSAGES+set}" = set; then LC_MESSAGES=C; export LC_MESSAGES; fi -if test "${LC_CTYPE+set}" = set; then LC_CTYPE=C; export LC_CTYPE; fi - -# confdefs.h avoids OS command line length limits that DEFS can exceed. -rm -rf conftest* confdefs.h -# AIX cpp loses on an empty file, so make sure it contains at least a newline. -echo > confdefs.h - -# A filename unique to this package, relative to the directory that -# configure is in, which we can look for to find out if srcdir is correct. -ac_unique_file=awk.h - -# Find the source files, if location was not specified. -if test -z "$srcdir"; then - ac_srcdir_defaulted=yes - # Try the directory containing this script, then its parent. - ac_prog=$0 - ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` - test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. - srcdir=$ac_confdir - if test ! -r $srcdir/$ac_unique_file; then - srcdir=.. - fi -else - ac_srcdir_defaulted=no -fi -if test ! -r $srcdir/$ac_unique_file; then - if test "$ac_srcdir_defaulted" = yes; then - { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } - else - { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } - fi -fi -srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` - -# Prefer explicitly selected file to automatically selected ones. -if test -z "$CONFIG_SITE"; then - if test "x$prefix" != xNONE; then - CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" - else - CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" - fi -fi -for ac_site_file in $CONFIG_SITE; do - if test -r "$ac_site_file"; then - echo "loading site script $ac_site_file" - . "$ac_site_file" - fi -done - -if test -r "$cache_file"; then - echo "loading cache $cache_file" - . $cache_file -else - echo "creating cache $cache_file" - > $cache_file -fi - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -ac_exeext= -ac_objext=o -if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then - # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. - if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then - ac_n= ac_c=' -' ac_t=' ' - else - ac_n=-n ac_c= ac_t= - fi -else - ac_n= ac_c='\c' ac_t= -fi - - - -ac_aux_dir= -for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do - if test -f $ac_dir/install-sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install-sh -c" - break - elif test -f $ac_dir/install.sh; then - ac_aux_dir=$ac_dir - ac_install_sh="$ac_aux_dir/install.sh -c" - break - fi -done -if test -z "$ac_aux_dir"; then - { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; } -fi -ac_config_guess=$ac_aux_dir/config.guess -ac_config_sub=$ac_aux_dir/config.sub -ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# ./install, which can be erroneously created by make from ./install.sh. -echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:570: checking for a BSD compatible install" >&5 -if test -z "$INSTALL"; then -if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" - for ac_dir in $PATH; do - # Account for people who put trailing slashes in PATH elements. - case "$ac_dir/" in - /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - if test -f $ac_dir/$ac_prog; then - if test $ac_prog = install && - grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - else - ac_cv_path_install="$ac_dir/$ac_prog -c" - break 2 - fi - fi - done - ;; - esac - done - IFS="$ac_save_IFS" - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL="$ac_cv_path_install" - else - # As a last resort, use the slow shell script. We don't cache a - # path for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the path is relative. - INSTALL="$ac_install_sh" - fi -fi -echo "$ac_t""$INSTALL" 1>&6 - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - -echo $ac_n "checking whether build environment is sane""... $ac_c" 1>&6 -echo "configure:623: checking whether build environment is sane" >&5 -# Just in case -sleep 1 -echo timestamp > conftestfile -# Do `set' in a subshell so we don't clobber the current shell's -# arguments. Must try -L first in case configure is actually a -# symlink; some systems play weird games with the mod time of symlinks -# (eg FreeBSD returns the mod time of the symlink's containing -# directory). -if ( - set X `ls -Lt $srcdir/configure conftestfile 2> /dev/null` - if test "$*" = "X"; then - # -L didn't work. - set X `ls -t $srcdir/configure conftestfile` - fi - if test "$*" != "X $srcdir/configure conftestfile" \ - && test "$*" != "X conftestfile $srcdir/configure"; then - - # If neither matched, then we have a broken ls. This can happen - # if, for instance, CONFIG_SHELL is bash and it inherits a - # broken ls alias from the environment. This has actually - # happened. Such a system could not be considered "sane". - { echo "configure: error: ls -t appears to fail. Make sure there is not a broken -alias in your environment" 1>&2; exit 1; } - fi - - test "$2" = conftestfile - ) -then - # Ok. - : -else - { echo "configure: error: newly created file is older than distributed files! -Check your system clock" 1>&2; exit 1; } -fi -rm -f conftest* -echo "$ac_t""yes" 1>&6 -if test "$program_transform_name" = s,x,x,; then - program_transform_name= -else - # Double any \ or $. echo might interpret backslashes. - cat <<\EOF_SED > conftestsed -s,\\,\\\\,g; s,\$,$$,g -EOF_SED - program_transform_name="`echo $program_transform_name|sed -f conftestsed`" - rm -f conftestsed -fi -test "$program_prefix" != NONE && - program_transform_name="s,^,${program_prefix},; $program_transform_name" -# Use a double $ so make ignores it. -test "$program_suffix" != NONE && - program_transform_name="s,\$\$,${program_suffix},; $program_transform_name" - -# sed with no file args requires a program. -test "$program_transform_name" = "" && program_transform_name="s,x,x," - - -test x"${MISSING+set}" = xset || \ - MISSING="\${SHELL} `CDPATH=:; cd $ac_aux_dir && pwd`/missing" -# Use eval to expand $SHELL -if eval "$MISSING --run :"; then - am_missing_run="$MISSING --run " -else - am_missing_run= - am_backtick='`' - echo "configure: warning: ${am_backtick}missing' script is too old or missing" 1>&2 -fi - -for ac_prog in gawk mawk nawk awk -do -# Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:696: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_AWK'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$AWK"; then - ac_cv_prog_AWK="$AWK" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_AWK="$ac_prog" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -AWK="$ac_cv_prog_AWK" -if test -n "$AWK"; then - echo "$ac_t""$AWK" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -test -n "$AWK" && break -done - -echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:726: checking whether ${MAKE-make} sets \${MAKE}" >&5 -set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftestmake <<\EOF -all: - @echo 'ac_maketemp="${MAKE}"' -EOF -# GNU make sometimes prints "make[1]: Entering...", which would confuse us. -eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=` -if test -n "$ac_maketemp"; then - eval ac_cv_prog_make_${ac_make}_set=yes -else - eval ac_cv_prog_make_${ac_make}_set=no -fi -rm -f conftestmake -fi -if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SET_MAKE= -else - echo "$ac_t""no" 1>&6 - SET_MAKE="MAKE=${MAKE-make}" -fi - - -# Check whether --enable-dependency-tracking or --disable-dependency-tracking was given. -if test "${enable_dependency_tracking+set}" = set; then - enableval="$enable_dependency_tracking" - : -fi - -if test "x$enable_dependency_tracking" = xno; then - AMDEP="#" -else - am_depcomp="$ac_aux_dir/depcomp" - if test ! -f "$am_depcomp"; then - AMDEP="#" - else - AMDEP= - fi -fi - -if test -z "$AMDEP"; then - AMDEPBACKSLASH='\' -else - AMDEPBACKSLASH= -fi - - - - - -if test -d .deps || mkdir .deps 2> /dev/null || test -d .deps; then - DEPDIR=.deps -else - DEPDIR=_deps -fi - - -# test to see if srcdir already configured -if test "`CDPATH=:; cd $srcdir && pwd`" != "`pwd`" && - test -f $srcdir/config.status; then - { echo "configure: error: source directory already configured; run "make distclean" there first" 1>&2; exit 1; } -fi - -# Define the identity of the package. -PACKAGE=gawk -VERSION=3.1.0 -cat >> confdefs.h <> confdefs.h <> confdefs.h <<\EOF -#define HAVE_PORTALS 1 -EOF - -fi - - -for ac_prog in 'bison -y' byacc -do -# Extract the first word of "$ac_prog", so it can be a program name with args. -set dummy $ac_prog; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:861: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_YACC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$YACC"; then - ac_cv_prog_YACC="$YACC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_YACC="$ac_prog" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -YACC="$ac_cv_prog_YACC" -if test -n "$YACC"; then - echo "$ac_t""$YACC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -test -n "$YACC" && break -done -test -n "$YACC" || YACC="yacc" - -echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6 -echo "configure:892: checking whether ln -s works" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - rm -f conftestdata -if ln -s X conftestdata 2>/dev/null -then - rm -f conftestdata - ac_cv_prog_LN_S="ln -s" -else - ac_cv_prog_LN_S=ln -fi -fi -LN_S="$ac_cv_prog_LN_S" -if test "$ac_cv_prog_LN_S" = "ln -s"; then - echo "$ac_t""yes" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -# Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:915: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="gcc" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:945: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_prog_rejected=no - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test "$ac_dir/$ac_word" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - break - fi - done - IFS="$ac_save_ifs" -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# -gt 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - set dummy "$ac_dir/$ac_word" "$@" - shift - ac_cv_prog_CC="$@" - fi -fi -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - if test -z "$CC"; then - case "`uname -s`" in - *win32* | *WIN32*) - # Extract the first word of "cl", so it can be a program name with args. -set dummy cl; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:996: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_CC="cl" - break - fi - done - IFS="$ac_save_ifs" -fi -fi -CC="$ac_cv_prog_CC" -if test -n "$CC"; then - echo "$ac_t""$CC" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - ;; - esac - fi - test -z "$CC" && { echo "configure: error: no acceptable cc found in \$PATH" 1>&2; exit 1; } -fi - -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works""... $ac_c" 1>&6 -echo "configure:1028: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) works" >&5 - -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -cat > conftest.$ac_ext << EOF - -#line 1039 "configure" -#include "confdefs.h" - -main(){return(0);} -EOF -if { (eval echo configure:1044: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - ac_cv_prog_cc_works=yes - # If we can't run a trivial program, we are probably using a cross compiler. - if (./conftest; exit) 2>/dev/null; then - ac_cv_prog_cc_cross=no - else - ac_cv_prog_cc_cross=yes - fi -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - ac_cv_prog_cc_works=no -fi -rm -fr conftest* -ac_ext=c -# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. -ac_cpp='$CPP $CPPFLAGS' -ac_compile='${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5' -ac_link='${CC-cc} -o conftest${ac_exeext} $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5' -cross_compiling=$ac_cv_prog_cc_cross - -echo "$ac_t""$ac_cv_prog_cc_works" 1>&6 -if test $ac_cv_prog_cc_works = no; then - { echo "configure: error: installation or configuration problem: C compiler cannot create executables." 1>&2; exit 1; } -fi -echo $ac_n "checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler""... $ac_c" 1>&6 -echo "configure:1070: checking whether the C compiler ($CC $CFLAGS $LDFLAGS) is a cross-compiler" >&5 -echo "$ac_t""$ac_cv_prog_cc_cross" 1>&6 -cross_compiling=$ac_cv_prog_cc_cross - -echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 -echo "configure:1075: checking whether we are using GNU C" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.c <&5; (eval $ac_try) 2>&5; }; } | egrep yes >/dev/null 2>&1; then - ac_cv_prog_gcc=yes -else - ac_cv_prog_gcc=no -fi -fi - -echo "$ac_t""$ac_cv_prog_gcc" 1>&6 - -if test $ac_cv_prog_gcc = yes; then - GCC=yes -else - GCC= -fi - -ac_test_CFLAGS="${CFLAGS+set}" -ac_save_CFLAGS="$CFLAGS" -CFLAGS= -echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 -echo "configure:1103: checking whether ${CC-cc} accepts -g" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_cc_g'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - echo 'void f(){}' > conftest.c -if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then - ac_cv_prog_cc_g=yes -else - ac_cv_prog_cc_g=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_prog_cc_g" 1>&6 -if test "$ac_test_CFLAGS" = set; then - CFLAGS="$ac_save_CFLAGS" -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" - else - CFLAGS="-g" - fi -else - if test "$GCC" = yes; then - CFLAGS="-O2" - else - CFLAGS= - fi -fi - - -echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:1136: checking how to run the C preprocessor" >&5 -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then -if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - # This must be in double quotes, not single quotes, because CPP may get - # substituted into the Makefile and "${CC-cc}" will confuse make. - CPP="${CC-cc} -E" - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1157: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -E -traditional-cpp" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1174: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -nologo -E" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1191: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP=/lib/cpp -fi -rm -f conftest* -fi -rm -f conftest* -fi -rm -f conftest* - ac_cv_prog_CPP="$CPP" -fi - CPP="$ac_cv_prog_CPP" -else - ac_cv_prog_CPP="$CPP" -fi -echo "$ac_t""$CPP" 1>&6 - - - - - - - -depcc="$CC" -depcpp="$CPP" -echo $ac_n "checking dependency style of $depcc""... $ac_c" 1>&6 -echo "configure:1224: checking dependency style of $depcc" >&5 -if eval "test \"`echo '$''{'am_cv_CC_dependencies_compiler_type'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - -if test -z "$AMDEP"; then - echo '#include "conftest.h"' > conftest.c - echo 'int i;' > conftest.h - - am_cv_CC_dependencies_compiler_type=none - for depmode in `sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < "$am_depcomp"`; do - case "$depmode" in - nosideeffect) - # after this tag, mechanisms are not by side-effect, so they'll - # only be used when explicitly requested - if test "x$enable_dependency_tracking" = xyes; then - continue - else - break - fi - ;; - none) break ;; - esac - # We check with `-c' and `-o' for the sake of the "dashmstdout" - # mode. It turns out that the SunPro C++ compiler does not properly - # handle `-M -o', and we need to detect this. - if depmode="$depmode" \ - source=conftest.c object=conftest.o \ - depfile=conftest.Po tmpdepfile=conftest.TPo \ - $SHELL $am_depcomp $depcc -c conftest.c -o conftest.o >/dev/null 2>&1 && - grep conftest.h conftest.Po > /dev/null 2>&1; then - am_cv_CC_dependencies_compiler_type="$depmode" - break - fi - done - - rm -f conftest.* -else - am_cv_CC_dependencies_compiler_type=none -fi - -fi - -echo "$ac_t""$am_cv_CC_dependencies_compiler_type" 1>&6 -CCDEPMODE="depmode=$am_cv_CC_dependencies_compiler_type" - - -echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 -echo "configure:1272: checking how to run the C preprocessor" >&5 -# On Suns, sometimes $CPP names a directory. -if test -n "$CPP" && test -d "$CPP"; then - CPP= -fi -if test -z "$CPP"; then -if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - # This must be in double quotes, not single quotes, because CPP may get - # substituted into the Makefile and "${CC-cc}" will confuse make. - CPP="${CC-cc} -E" - # On the NeXT, cc -E runs the code through the compiler's parser, - # not just through cpp. - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1293: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -E -traditional-cpp" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1310: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP="${CC-cc} -nologo -E" - cat > conftest.$ac_ext < -Syntax Error -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1327: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - : -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CPP=/lib/cpp -fi -rm -f conftest* -fi -rm -f conftest* -fi -rm -f conftest* - ac_cv_prog_CPP="$CPP" -fi - CPP="$ac_cv_prog_CPP" -else - ac_cv_prog_CPP="$CPP" -fi -echo "$ac_t""$CPP" 1>&6 - -# Extract the first word of "ranlib", so it can be a program name with args. -set dummy ranlib; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:1354: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test -n "$RANLIB"; then - ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_prog_RANLIB="ranlib" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" -fi -fi -RANLIB="$ac_cv_prog_RANLIB" -if test -n "$RANLIB"; then - echo "$ac_t""$RANLIB" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - -# This is a hack. Different versions of install on different systems -# are just too different. Chuck it and use install-sh. -# -# If the user supplies $INSTALL, figure they know what they're doing. -if test "x$INSTALL" = "x" -then - INSTALL="$srcdir/install-sh -c" - export INSTALL -fi - -# Find a good install program. We prefer a C program (faster), -# so one script is as good as another. But avoid the broken or -# incompatible versions: -# SysV /etc/install, /usr/sbin/install -# SunOS /usr/etc/install -# IRIX /sbin/install -# AIX /bin/install -# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag -# AFS /usr/afsws/bin/install, which mishandles nonexistent args -# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" -# ./install, which can be erroneously created by make from ./install.sh. -echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 -echo "configure:1404: checking for a BSD compatible install" >&5 -if test -z "$INSTALL"; then -if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - IFS="${IFS= }"; ac_save_IFS="$IFS"; IFS=":" - for ac_dir in $PATH; do - # Account for people who put trailing slashes in PATH elements. - case "$ac_dir/" in - /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; - *) - # OSF1 and SCO ODT 3.0 have their own names for install. - # Don't use installbsd from OSF since it installs stuff as root - # by default. - for ac_prog in ginstall scoinst install; do - if test -f $ac_dir/$ac_prog; then - if test $ac_prog = install && - grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then - # AIX install. It has an incompatible calling convention. - : - else - ac_cv_path_install="$ac_dir/$ac_prog -c" - break 2 - fi - fi - done - ;; - esac - done - IFS="$ac_save_IFS" - -fi - if test "${ac_cv_path_install+set}" = set; then - INSTALL="$ac_cv_path_install" - else - # As a last resort, use the slow shell script. We don't cache a - # path for INSTALL within a source directory, because that will - # break other packages using the cache if that directory is - # removed, or if the path is relative. - INSTALL="$ac_install_sh" - fi -fi -echo "$ac_t""$INSTALL" 1>&6 - -# Use test -z because SunOS4 sh mishandles braces in ${var-val}. -# It thinks the first close brace ends the variable substitution. -test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' - -test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL_PROGRAM}' - -test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' - - -echo $ac_n "checking whether ${MAKE-make} sets \${MAKE}""... $ac_c" 1>&6 -echo "configure:1458: checking whether ${MAKE-make} sets \${MAKE}" >&5 -set dummy ${MAKE-make}; ac_make=`echo "$2" | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_prog_make_${ac_make}_set'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftestmake <<\EOF -all: - @echo 'ac_maketemp="${MAKE}"' -EOF -# GNU make sometimes prints "make[1]: Entering...", which would confuse us. -eval `${MAKE-make} -f conftestmake 2>/dev/null | grep temp=` -if test -n "$ac_maketemp"; then - eval ac_cv_prog_make_${ac_make}_set=yes -else - eval ac_cv_prog_make_${ac_make}_set=no -fi -rm -f conftestmake -fi -if eval "test \"`echo '$ac_cv_prog_make_'${ac_make}_set`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SET_MAKE= -else - echo "$ac_t""no" 1>&6 - SET_MAKE="MAKE=${MAKE-make}" -fi - - -# This is mainly for my use during testing and development. -# Yes, it's a bit of a hack. -echo $ac_n "checking for special development options""... $ac_c" 1>&6 -echo "configure:1488: checking for special development options" >&5 -if test -f $srcdir/.developing -then - # add other debug flags as appropriate, save GAWKDEBUG for emergencies - CFLAGS="$CFLAGS -DARRAYDEBUG" - # turn on compiler warnings if we're doing development - if test "$GCC" = yes - then - CFLAGS="$CFLAGS -Wall" - fi - echo "$ac_t""yes" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - - -echo $ac_n "checking for AIX""... $ac_c" 1>&6 -echo "configure:1506: checking for AIX" >&5 -cat > conftest.$ac_ext <&5 | - egrep "yes" >/dev/null 2>&1; then - rm -rf conftest* - echo "$ac_t""yes" 1>&6; cat >> confdefs.h <<\EOF -#define _ALL_SOURCE 1 -EOF - -else - rm -rf conftest* - echo "$ac_t""no" 1>&6 -fi -rm -f conftest* - - -echo $ac_n "checking for POSIXized ISC""... $ac_c" 1>&6 -echo "configure:1530: checking for POSIXized ISC" >&5 -if test -d /etc/conf/kconfig.d && - grep _POSIX_VERSION /usr/include/sys/unistd.h >/dev/null 2>&1 -then - echo "$ac_t""yes" 1>&6 - ISC=yes # If later tests want to check for ISC. - cat >> confdefs.h <<\EOF -#define _POSIX_SOURCE 1 -EOF - - if test "$GCC" = yes; then - CC="$CC -posix" - else - CC="$CC -Xp" - fi -else - echo "$ac_t""no" 1>&6 - ISC= -fi - -ac_safe=`echo "minix/config.h" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for minix/config.h""... $ac_c" 1>&6 -echo "configure:1552: checking for minix/config.h" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1562: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - MINIX=yes -else - echo "$ac_t""no" 1>&6 -MINIX= -fi - -if test "$MINIX" = yes; then - cat >> confdefs.h <<\EOF -#define _POSIX_SOURCE 1 -EOF - - cat >> confdefs.h <<\EOF -#define _POSIX_1_SOURCE 2 -EOF - - cat >> confdefs.h <<\EOF -#define _MINIX 1 -EOF - -fi - - - # Check whether --enable-largefile or --disable-largefile was given. -if test "${enable_largefile+set}" = set; then - enableval="$enable_largefile" - : -fi - - if test "$enable_largefile" != no; then - - echo $ac_n "checking for special C compiler options needed for large files""... $ac_c" 1>&6 -echo "configure:1609: checking for special C compiler options needed for large files" >&5 -if eval "test \"`echo '$''{'ac_cv_sys_largefile_CC'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_cv_sys_largefile_CC=no - if test "$GCC" != yes; then - # IRIX 6.2 and later do not support large files by default, - # so use the C compiler's -n32 option if that helps. - cat > conftest.$ac_ext < - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply "#define LARGE_OFF_T 9223372036854775807", - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -# define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; - -int main() { - -; return 0; } -EOF -if { (eval echo configure:1634: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_save_CC="$CC" - CC="$CC -n32" - cat > conftest.$ac_ext < - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply "#define LARGE_OFF_T 9223372036854775807", - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -# define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; - -int main() { - -; return 0; } -EOF -if { (eval echo configure:1659: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_sys_largefile_CC=' -n32' -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* - CC="$ac_save_CC" -fi -rm -f conftest* - fi -fi - -echo "$ac_t""$ac_cv_sys_largefile_CC" 1>&6 - if test "$ac_cv_sys_largefile_CC" != no; then - CC="$CC$ac_cv_sys_largefile_CC" - fi - - echo $ac_n "checking for _FILE_OFFSET_BITS value needed for large files""... $ac_c" 1>&6 -echo "configure:1679: checking for _FILE_OFFSET_BITS value needed for large files" >&5 -if eval "test \"`echo '$''{'ac_cv_sys_file_offset_bits'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_cv_sys_file_offset_bits=no - cat > conftest.$ac_ext < - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply "#define LARGE_OFF_T 9223372036854775807", - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -# define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; - -int main() { - -; return 0; } -EOF -if { (eval echo configure:1701: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - cat > conftest.$ac_ext < - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply "#define LARGE_OFF_T 9223372036854775807", - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -# define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; - - -int main() { - -; return 0; } -EOF -if { (eval echo configure:1726: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_sys_file_offset_bits=64 -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_sys_file_offset_bits" 1>&6 - if test "$ac_cv_sys_file_offset_bits" != no; then - cat >> confdefs.h <&6 -echo "configure:1746: checking for _LARGE_FILES value needed for large files" >&5 -if eval "test \"`echo '$''{'ac_cv_sys_large_files'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_cv_sys_large_files=no - cat > conftest.$ac_ext < - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply "#define LARGE_OFF_T 9223372036854775807", - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -# define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; - -int main() { - -; return 0; } -EOF -if { (eval echo configure:1768: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - cat > conftest.$ac_ext < - /* Check that off_t can represent 2**63 - 1 correctly. - We can't simply "#define LARGE_OFF_T 9223372036854775807", - since some C++ compilers masquerading as C compilers - incorrectly reject 9223372036854775807. */ -# define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62)) - int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721 - && LARGE_OFF_T % 2147483647 == 1) - ? 1 : -1]; - - -int main() { - -; return 0; } -EOF -if { (eval echo configure:1793: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_sys_large_files=1 -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_sys_large_files" 1>&6 - if test "$ac_cv_sys_large_files" != no; then - cat >> confdefs.h <&6 -echo "configure:1816: checking for AIX compilation hacks" >&5 -if eval "test \"`echo '$''{'gawk_cv_aix_hack'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - -if test -d /lpp/bos -then - CFLAGS="$CFLAGS -D_XOPEN_SOURCE_EXTENDED=1" - gawk_cv_aix_hack=yes -else - gawk_cv_aix_hack=no -fi - -fi -echo "$ac_t""${gawk_cv_aix_hack}" 1>&6 - - -echo $ac_n "checking for Linux/Alpha compilation hacks""... $ac_c" 1>&6 -echo "configure:1834: checking for Linux/Alpha compilation hacks" >&5 -if eval "test \"`echo '$''{'gawk_cv_linux_alpha_hack'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - -if test "Linux" = "`uname`" && test "alpha" = "`uname -m`" -then - # this isn't necessarily always true, - # the vendor's compiler is also often found - if test "$GCC" = yes - then - CFLAGS="$CFLAGS -mieee" - gawk_cv_linux_alpha_hack=yes - else - gawk_cv_linux_alpha_hack=no - fi -else - gawk_cv_linux_alpha_hack=no -fi - -fi -echo "$ac_t""${gawk_cv_linux_alpha_hack}" 1>&6 - - -if test "$ISC" = 1 # will be set by test for ISC -then - CFLAGS="$CFLAGS -D_SYSV3" -fi - - - - -echo $ac_n "checking for ${CC-cc} option to accept ANSI C""... $ac_c" 1>&6 -echo "configure:1867: checking for ${CC-cc} option to accept ANSI C" >&5 -if eval "test \"`echo '$''{'am_cv_prog_cc_stdc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - am_cv_prog_cc_stdc=no -ac_save_CC="$CC" -# Don't try gcc -ansi; that turns off useful extensions and -# breaks some systems' header files. -# AIX -qlanglvl=ansi -# Ultrix and OSF/1 -std1 -# HP-UX 10.20 and later -Ae -# HP-UX older versions -Aa -D_HPUX_SOURCE -# SVR4 -Xc -D__EXTENSIONS__ -for ac_arg in "" -qlanglvl=ansi -std1 -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" -do - CC="$ac_save_CC $ac_arg" - cat > conftest.$ac_ext < -#include -#include -#include -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; - -int main() { - -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - -; return 0; } -EOF -if { (eval echo configure:1921: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - am_cv_prog_cc_stdc="$ac_arg"; break -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* -done -CC="$ac_save_CC" - -fi - -if test -z "$am_cv_prog_cc_stdc"; then - echo "$ac_t""none needed" 1>&6 -else - echo "$ac_t""$am_cv_prog_cc_stdc" 1>&6 -fi -case "x$am_cv_prog_cc_stdc" in - x|xno) ;; - *) CC="$CC $am_cv_prog_cc_stdc" ;; -esac - - - -echo $ac_n "checking for function prototypes""... $ac_c" 1>&6 -echo "configure:1947: checking for function prototypes" >&5 -if test "$am_cv_prog_cc_stdc" != no; then - echo "$ac_t""yes" 1>&6 - cat >> confdefs.h <<\EOF -#define PROTOTYPES 1 -EOF - - U= ANSI2KNR= -else - echo "$ac_t""no" 1>&6 - U=_ ANSI2KNR=./ansi2knr - # Ensure some checks needed by ansi2knr itself. - echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:1960: checking for ANSI C header files" >&5 -if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#include -#include -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:1973: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - ac_cv_header_stdc=yes -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "memchr" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "free" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. -if test "$cross_compiling" = yes; then - : -else - cat > conftest.$ac_ext < -#define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int main () { int i; for (i = 0; i < 256; i++) -if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); -exit (0); } - -EOF -if { (eval echo configure:2040: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_header_stdc=no -fi -rm -fr conftest* -fi - -fi -fi - -echo "$ac_t""$ac_cv_header_stdc" 1>&6 -if test $ac_cv_header_stdc = yes; then - cat >> confdefs.h <<\EOF -#define STDC_HEADERS 1 -EOF - -fi - - for ac_hdr in string.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:2067: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:2077: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - -fi - - -ALL_LINGUAS="he" -echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:2108: checking for working const" >&5 -if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext <j = 5; -} -{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ - const int foo = 10; -} - -; return 0; } -EOF -if { (eval echo configure:2162: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_const=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_c_const=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_c_const" 1>&6 -if test $ac_cv_c_const = no; then - cat >> confdefs.h <<\EOF -#define const -EOF - -fi - -echo $ac_n "checking for inline""... $ac_c" 1>&6 -echo "configure:2183: checking for inline" >&5 -if eval "test \"`echo '$''{'ac_cv_c_inline'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_cv_c_inline=no -for ac_kw in inline __inline__ __inline; do - cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_inline=$ac_kw; break -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* -done - -fi - -echo "$ac_t""$ac_cv_c_inline" 1>&6 -case "$ac_cv_c_inline" in - inline | yes) ;; - no) cat >> confdefs.h <<\EOF -#define inline -EOF - ;; - *) cat >> confdefs.h <&6 -echo "configure:2223: checking for off_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_off_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])off_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_off_t=yes -else - rm -rf conftest* - ac_cv_type_off_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_off_t" 1>&6 -if test $ac_cv_type_off_t = no; then - cat >> confdefs.h <<\EOF -#define off_t long -EOF - -fi - -echo $ac_n "checking for size_t""... $ac_c" 1>&6 -echo "configure:2256: checking for size_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])size_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_size_t=yes -else - rm -rf conftest* - ac_cv_type_size_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_size_t" 1>&6 -if test $ac_cv_type_size_t = no; then - cat >> confdefs.h <<\EOF -#define size_t unsigned -EOF - -fi - -# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works -# for constant arguments. Useless! -echo $ac_n "checking for working alloca.h""... $ac_c" 1>&6 -echo "configure:2291: checking for working alloca.h" >&5 -if eval "test \"`echo '$''{'ac_cv_header_alloca_h'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -int main() { -char *p = alloca(2 * sizeof(int)); -; return 0; } -EOF -if { (eval echo configure:2303: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - ac_cv_header_alloca_h=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_alloca_h=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_header_alloca_h" 1>&6 -if test $ac_cv_header_alloca_h = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_ALLOCA_H 1 -EOF - -fi - -echo $ac_n "checking for alloca""... $ac_c" 1>&6 -echo "configure:2324: checking for alloca" >&5 -if eval "test \"`echo '$''{'ac_cv_func_alloca_works'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -# define alloca _alloca -# else -# if HAVE_ALLOCA_H -# include -# else -# ifdef _AIX - #pragma alloca -# else -# ifndef alloca /* predefined by HP cc +Olibcalls */ -char *alloca (); -# endif -# endif -# endif -# endif -#endif - -int main() { -char *p = (char *) alloca(1); -; return 0; } -EOF -if { (eval echo configure:2357: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - ac_cv_func_alloca_works=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_func_alloca_works=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_func_alloca_works" 1>&6 -if test $ac_cv_func_alloca_works = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_ALLOCA 1 -EOF - -fi - -if test $ac_cv_func_alloca_works = no; then - # The SVR3 libPW and SVR4 libucb both contain incompatible functions - # that cause trouble. Some versions do not even contain alloca or - # contain a buggy version. If you still want to use their alloca, - # use ar to extract alloca.o from them instead of compiling alloca.c. - ALLOCA=alloca.${ac_objext} - cat >> confdefs.h <<\EOF -#define C_ALLOCA 1 -EOF - - -echo $ac_n "checking whether alloca needs Cray hooks""... $ac_c" 1>&6 -echo "configure:2389: checking whether alloca needs Cray hooks" >&5 -if eval "test \"`echo '$''{'ac_cv_os_cray'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext <&5 | - egrep "webecray" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_os_cray=yes -else - rm -rf conftest* - ac_cv_os_cray=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_os_cray" 1>&6 -if test $ac_cv_os_cray = yes; then -for ac_func in _getb67 GETB67 getb67; do - echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2419: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:2447: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - cat >> confdefs.h <&6 -fi - -done -fi - -echo $ac_n "checking stack direction for C alloca""... $ac_c" 1>&6 -echo "configure:2474: checking stack direction for C alloca" >&5 -if eval "test \"`echo '$''{'ac_cv_c_stack_direction'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_c_stack_direction=0 -else - cat > conftest.$ac_ext < addr) ? 1 : -1; -} -main () -{ - exit (find_stack_direction() < 0); -} -EOF -if { (eval echo configure:2501: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_c_stack_direction=1 -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_c_stack_direction=-1 -fi -rm -fr conftest* -fi - -fi - -echo "$ac_t""$ac_cv_c_stack_direction" 1>&6 -cat >> confdefs.h <&6 -echo "configure:2526: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:2536: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - -for ac_func in getpagesize -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2565: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:2593: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - -echo $ac_n "checking for working mmap""... $ac_c" 1>&6 -echo "configure:2618: checking for working mmap" >&5 -if eval "test \"`echo '$''{'ac_cv_func_mmap_fixed_mapped'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_func_mmap_fixed_mapped=no -else - cat > conftest.$ac_ext < -#include -#include - -/* This mess was copied from the GNU getpagesize.h. */ -#ifndef HAVE_GETPAGESIZE -# ifdef HAVE_UNISTD_H -# include -# endif - -/* Assume that all systems that can run configure have sys/param.h. */ -# ifndef HAVE_SYS_PARAM_H -# define HAVE_SYS_PARAM_H 1 -# endif - -# ifdef _SC_PAGESIZE -# define getpagesize() sysconf(_SC_PAGESIZE) -# else /* no _SC_PAGESIZE */ -# ifdef HAVE_SYS_PARAM_H -# include -# ifdef EXEC_PAGESIZE -# define getpagesize() EXEC_PAGESIZE -# else /* no EXEC_PAGESIZE */ -# ifdef NBPG -# define getpagesize() NBPG * CLSIZE -# ifndef CLSIZE -# define CLSIZE 1 -# endif /* no CLSIZE */ -# else /* no NBPG */ -# ifdef NBPC -# define getpagesize() NBPC -# else /* no NBPC */ -# ifdef PAGESIZE -# define getpagesize() PAGESIZE -# endif /* PAGESIZE */ -# endif /* no NBPC */ -# endif /* no NBPG */ -# endif /* no EXEC_PAGESIZE */ -# else /* no HAVE_SYS_PARAM_H */ -# define getpagesize() 8192 /* punt totally */ -# endif /* no HAVE_SYS_PARAM_H */ -# endif /* no _SC_PAGESIZE */ - -#endif /* no HAVE_GETPAGESIZE */ - -#ifdef __cplusplus -extern "C" { void *malloc(unsigned); } -#else -char *malloc(); -#endif - -int -main() -{ - char *data, *data2, *data3; - int i, pagesize; - int fd; - - pagesize = getpagesize(); - - /* - * First, make a file with some known garbage in it. - */ - data = malloc(pagesize); - if (!data) - exit(1); - for (i = 0; i < pagesize; ++i) - *(data + i) = rand(); - umask(0); - fd = creat("conftestmmap", 0600); - if (fd < 0) - exit(1); - if (write(fd, data, pagesize) != pagesize) - exit(1); - close(fd); - - /* - * Next, try to mmap the file at a fixed address which - * already has something else allocated at it. If we can, - * also make sure that we see the same garbage. - */ - fd = open("conftestmmap", O_RDWR); - if (fd < 0) - exit(1); - data2 = malloc(2 * pagesize); - if (!data2) - exit(1); - data2 += (pagesize - ((int) data2 & (pagesize - 1))) & (pagesize - 1); - if (data2 != mmap(data2, pagesize, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED, fd, 0L)) - exit(1); - for (i = 0; i < pagesize; ++i) - if (*(data + i) != *(data2 + i)) - exit(1); - - /* - * Finally, make sure that changes to the mapped area - * do not percolate back to the file as seen by read(). - * (This is a bug on some variants of i386 svr4.0.) - */ - for (i = 0; i < pagesize; ++i) - *(data2 + i) = *(data2 + i) + 1; - data3 = malloc(pagesize); - if (!data3) - exit(1); - if (read(fd, data3, pagesize) != pagesize) - exit(1); - for (i = 0; i < pagesize; ++i) - if (*(data + i) != *(data3 + i)) - exit(1); - close(fd); - unlink("conftestmmap"); - exit(0); -} - -EOF -if { (eval echo configure:2766: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_func_mmap_fixed_mapped=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_func_mmap_fixed_mapped=no -fi -rm -fr conftest* -fi - -fi - -echo "$ac_t""$ac_cv_func_mmap_fixed_mapped" 1>&6 -if test $ac_cv_func_mmap_fixed_mapped = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_MMAP 1 -EOF - -fi - - - for ac_hdr in argz.h limits.h locale.h nl_types.h malloc.h string.h \ -unistd.h sys/param.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:2794: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:2804: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - - for ac_func in getcwd munmap putenv setenv setlocale strchr strcasecmp \ -strdup __argz_count __argz_stringify __argz_next -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2834: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:2862: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - - - if test "${ac_cv_func_stpcpy+set}" != "set"; then - for ac_func in stpcpy -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:2891: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:2919: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - - fi - if test "${ac_cv_func_stpcpy}" = "yes"; then - cat >> confdefs.h <<\EOF -#define HAVE_STPCPY 1 -EOF - - fi - - if test $ac_cv_header_locale_h = yes; then - echo $ac_n "checking for LC_MESSAGES""... $ac_c" 1>&6 -echo "configure:2953: checking for LC_MESSAGES" >&5 -if eval "test \"`echo '$''{'am_cv_val_LC_MESSAGES'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -int main() { -return LC_MESSAGES -; return 0; } -EOF -if { (eval echo configure:2965: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - am_cv_val_LC_MESSAGES=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - am_cv_val_LC_MESSAGES=no -fi -rm -f conftest* -fi - -echo "$ac_t""$am_cv_val_LC_MESSAGES" 1>&6 - if test $am_cv_val_LC_MESSAGES = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_LC_MESSAGES 1 -EOF - - fi - fi - echo $ac_n "checking whether NLS is requested""... $ac_c" 1>&6 -echo "configure:2986: checking whether NLS is requested" >&5 - # Check whether --enable-nls or --disable-nls was given. -if test "${enable_nls+set}" = set; then - enableval="$enable_nls" - USE_NLS=$enableval -else - USE_NLS=yes -fi - - echo "$ac_t""$USE_NLS" 1>&6 - - - USE_INCLUDED_LIBINTL=no - - if test "$USE_NLS" = "yes"; then - cat >> confdefs.h <<\EOF -#define ENABLE_NLS 1 -EOF - - echo $ac_n "checking whether included gettext is requested""... $ac_c" 1>&6 -echo "configure:3006: checking whether included gettext is requested" >&5 - # Check whether --with-included-gettext or --without-included-gettext was given. -if test "${with_included_gettext+set}" = set; then - withval="$with_included_gettext" - nls_cv_force_use_gnu_gettext=$withval -else - nls_cv_force_use_gnu_gettext=no -fi - - echo "$ac_t""$nls_cv_force_use_gnu_gettext" 1>&6 - - nls_cv_use_gnu_gettext="$nls_cv_force_use_gnu_gettext" - if test "$nls_cv_force_use_gnu_gettext" != "yes"; then - nls_cv_header_intl= - nls_cv_header_libgt= - CATOBJEXT=NONE - - ac_safe=`echo "libintl.h" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for libintl.h""... $ac_c" 1>&6 -echo "configure:3025: checking for libintl.h" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:3035: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - echo $ac_n "checking for gettext in libc""... $ac_c" 1>&6 -echo "configure:3052: checking for gettext in libc" >&5 -if eval "test \"`echo '$''{'gt_cv_func_gettext_libc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -int main() { -return (int) gettext ("") -; return 0; } -EOF -if { (eval echo configure:3064: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - gt_cv_func_gettext_libc=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - gt_cv_func_gettext_libc=no -fi -rm -f conftest* -fi - -echo "$ac_t""$gt_cv_func_gettext_libc" 1>&6 - - if test "$gt_cv_func_gettext_libc" != "yes"; then - echo $ac_n "checking for bindtextdomain in -lintl""... $ac_c" 1>&6 -echo "configure:3080: checking for bindtextdomain in -lintl" >&5 -ac_lib_var=`echo intl'_'bindtextdomain | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lintl $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - echo $ac_n "checking for gettext in -lintl""... $ac_c" 1>&6 -echo "configure:3115: checking for gettext in -lintl" >&5 -ac_lib_var=`echo intl'_'gettext | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lintl $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo intl | sed -e 's/[^a-zA-Z0-9_]/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -else - echo "$ac_t""no" 1>&6 -fi - - fi - - if test "$gt_cv_func_gettext_libc" = "yes" \ - || test "$ac_cv_lib_intl_gettext" = "yes"; then - cat >> confdefs.h <<\EOF -#define HAVE_GETTEXT 1 -EOF - - # Extract the first word of "msgfmt", so it can be a program name with args. -set dummy msgfmt; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3176: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MSGFMT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MSGFMT" in - /*) - ac_cv_path_MSGFMT="$MSGFMT" # Let the user override the test with a path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test -z "`$ac_dir/$ac_word -h 2>&1 | grep 'dv '`"; then - ac_cv_path_MSGFMT="$ac_dir/$ac_word" - break - fi - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_MSGFMT" && ac_cv_path_MSGFMT="no" - ;; -esac -fi -MSGFMT="$ac_cv_path_MSGFMT" -if test -n "$MSGFMT"; then - echo "$ac_t""$MSGFMT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - if test "$MSGFMT" != "no"; then - for ac_func in dcgettext -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:3210: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:3238: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - - # Extract the first word of "gmsgfmt", so it can be a program name with args. -set dummy gmsgfmt; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3265: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_GMSGFMT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$GMSGFMT" in - /*) - ac_cv_path_GMSGFMT="$GMSGFMT" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_GMSGFMT="$GMSGFMT" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_GMSGFMT="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_GMSGFMT" && ac_cv_path_GMSGFMT="$MSGFMT" - ;; -esac -fi -GMSGFMT="$ac_cv_path_GMSGFMT" -if test -n "$GMSGFMT"; then - echo "$ac_t""$GMSGFMT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - # Extract the first word of "xgettext", so it can be a program name with args. -set dummy xgettext; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3301: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_XGETTEXT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$XGETTEXT" in - /*) - ac_cv_path_XGETTEXT="$XGETTEXT" # Let the user override the test with a path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test -z "`$ac_dir/$ac_word -h 2>&1 | grep '(HELP)'`"; then - ac_cv_path_XGETTEXT="$ac_dir/$ac_word" - break - fi - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_XGETTEXT" && ac_cv_path_XGETTEXT=":" - ;; -esac -fi -XGETTEXT="$ac_cv_path_XGETTEXT" -if test -n "$XGETTEXT"; then - echo "$ac_t""$XGETTEXT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - CATOBJEXT=.gmo - DATADIRNAME=share -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - CATOBJEXT=.mo - DATADIRNAME=lib -fi -rm -f conftest* - INSTOBJEXT=.mo - fi - fi - -else - echo "$ac_t""no" 1>&6 -fi - - - if test "$CATOBJEXT" = "NONE"; then - echo $ac_n "checking whether catgets can be used""... $ac_c" 1>&6 -echo "configure:3364: checking whether catgets can be used" >&5 - # Check whether --with-catgets or --without-catgets was given. -if test "${with_catgets+set}" = set; then - withval="$with_catgets" - nls_cv_use_catgets=$withval -else - nls_cv_use_catgets=no -fi - - echo "$ac_t""$nls_cv_use_catgets" 1>&6 - - if test "$nls_cv_use_catgets" = "yes"; then - echo $ac_n "checking for main in -li""... $ac_c" 1>&6 -echo "configure:3377: checking for main in -li" >&5 -ac_lib_var=`echo i'_'main | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-li $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo i | sed -e 's/[^a-zA-Z0-9_]/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - - echo $ac_n "checking for catgets""... $ac_c" 1>&6 -echo "configure:3420: checking for catgets" >&5 -if eval "test \"`echo '$''{'ac_cv_func_catgets'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char catgets(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_catgets) || defined (__stub___catgets) -choke me -#else -catgets(); -#endif - -; return 0; } -EOF -if { (eval echo configure:3448: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_catgets=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_catgets=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'catgets`\" = yes"; then - echo "$ac_t""yes" 1>&6 - cat >> confdefs.h <<\EOF -#define HAVE_CATGETS 1 -EOF - - INTLOBJS="\$(CATOBJS)" - # Extract the first word of "gencat", so it can be a program name with args. -set dummy gencat; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3470: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_GENCAT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$GENCAT" in - /*) - ac_cv_path_GENCAT="$GENCAT" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_GENCAT="$GENCAT" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_GENCAT="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_GENCAT" && ac_cv_path_GENCAT="no" - ;; -esac -fi -GENCAT="$ac_cv_path_GENCAT" -if test -n "$GENCAT"; then - echo "$ac_t""$GENCAT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - if test "$GENCAT" != "no"; then - # Extract the first word of "gmsgfmt", so it can be a program name with args. -set dummy gmsgfmt; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3506: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_GMSGFMT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$GMSGFMT" in - /*) - ac_cv_path_GMSGFMT="$GMSGFMT" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_GMSGFMT="$GMSGFMT" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_GMSGFMT="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_GMSGFMT" && ac_cv_path_GMSGFMT="no" - ;; -esac -fi -GMSGFMT="$ac_cv_path_GMSGFMT" -if test -n "$GMSGFMT"; then - echo "$ac_t""$GMSGFMT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - if test "$GMSGFMT" = "no"; then - # Extract the first word of "msgfmt", so it can be a program name with args. -set dummy msgfmt; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3543: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_GMSGFMT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$GMSGFMT" in - /*) - ac_cv_path_GMSGFMT="$GMSGFMT" # Let the user override the test with a path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test -z "`$ac_dir/$ac_word -h 2>&1 | grep 'dv '`"; then - ac_cv_path_GMSGFMT="$ac_dir/$ac_word" - break - fi - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_GMSGFMT" && ac_cv_path_GMSGFMT="no" - ;; -esac -fi -GMSGFMT="$ac_cv_path_GMSGFMT" -if test -n "$GMSGFMT"; then - echo "$ac_t""$GMSGFMT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - fi - # Extract the first word of "xgettext", so it can be a program name with args. -set dummy xgettext; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3578: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_XGETTEXT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$XGETTEXT" in - /*) - ac_cv_path_XGETTEXT="$XGETTEXT" # Let the user override the test with a path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test -z "`$ac_dir/$ac_word -h 2>&1 | grep '(HELP)'`"; then - ac_cv_path_XGETTEXT="$ac_dir/$ac_word" - break - fi - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_XGETTEXT" && ac_cv_path_XGETTEXT=":" - ;; -esac -fi -XGETTEXT="$ac_cv_path_XGETTEXT" -if test -n "$XGETTEXT"; then - echo "$ac_t""$XGETTEXT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - USE_INCLUDED_LIBINTL=yes - CATOBJEXT=.cat - INSTOBJEXT=.cat - DATADIRNAME=lib - INTLDEPS='$(top_builddir)/intl/libintl.a' - INTLLIBS=$INTLDEPS - LIBS=`echo $LIBS | sed -e 's/-lintl//'` - nls_cv_header_intl=intl/libintl.h - nls_cv_header_libgt=intl/libgettext.h - fi -else - echo "$ac_t""no" 1>&6 -fi - - fi - fi - - if (test "$gt_cv_func_gettext_libc" = "yes" \ - || test "$gt_cv_func_gettext_libintl" = "yes") \ - && test "$ac_cv_func_dcgettext" = no; then - nls_cv_use_gnu_gettext=yes - elif test "$CATOBJEXT" = "NONE"; then - nls_cv_use_gnu_gettext=yes - fi - fi - - if test "$nls_cv_use_gnu_gettext" = "yes"; then - INTLOBJS="\$(GETTOBJS)" - # Extract the first word of "msgfmt", so it can be a program name with args. -set dummy msgfmt; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3640: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_MSGFMT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$MSGFMT" in - /*) - ac_cv_path_MSGFMT="$MSGFMT" # Let the user override the test with a path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test -z "`$ac_dir/$ac_word -h 2>&1 | grep 'dv '`"; then - ac_cv_path_MSGFMT="$ac_dir/$ac_word" - break - fi - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_MSGFMT" && ac_cv_path_MSGFMT="msgfmt" - ;; -esac -fi -MSGFMT="$ac_cv_path_MSGFMT" -if test -n "$MSGFMT"; then - echo "$ac_t""$MSGFMT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - # Extract the first word of "gmsgfmt", so it can be a program name with args. -set dummy gmsgfmt; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3674: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_GMSGFMT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$GMSGFMT" in - /*) - ac_cv_path_GMSGFMT="$GMSGFMT" # Let the user override the test with a path. - ;; - ?:/*) - ac_cv_path_GMSGFMT="$GMSGFMT" # Let the user override the test with a dos path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" - ac_dummy="$PATH" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - ac_cv_path_GMSGFMT="$ac_dir/$ac_word" - break - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_GMSGFMT" && ac_cv_path_GMSGFMT="$MSGFMT" - ;; -esac -fi -GMSGFMT="$ac_cv_path_GMSGFMT" -if test -n "$GMSGFMT"; then - echo "$ac_t""$GMSGFMT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - # Extract the first word of "xgettext", so it can be a program name with args. -set dummy xgettext; ac_word=$2 -echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 -echo "configure:3710: checking for $ac_word" >&5 -if eval "test \"`echo '$''{'ac_cv_path_XGETTEXT'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - case "$XGETTEXT" in - /*) - ac_cv_path_XGETTEXT="$XGETTEXT" # Let the user override the test with a path. - ;; - *) - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$ac_word; then - if test -z "`$ac_dir/$ac_word -h 2>&1 | grep '(HELP)'`"; then - ac_cv_path_XGETTEXT="$ac_dir/$ac_word" - break - fi - fi - done - IFS="$ac_save_ifs" - test -z "$ac_cv_path_XGETTEXT" && ac_cv_path_XGETTEXT=":" - ;; -esac -fi -XGETTEXT="$ac_cv_path_XGETTEXT" -if test -n "$XGETTEXT"; then - echo "$ac_t""$XGETTEXT" 1>&6 -else - echo "$ac_t""no" 1>&6 -fi - - - USE_INCLUDED_LIBINTL=yes - CATOBJEXT=.gmo - INSTOBJEXT=.mo - DATADIRNAME=share - INTLDEPS='$(top_builddir)/intl/libintl.a' - INTLLIBS=$INTLDEPS - LIBS=`echo $LIBS | sed -e 's/-lintl//'` - nls_cv_header_intl=intl/libintl.h - nls_cv_header_libgt=intl/libgettext.h - fi - - if test "$XGETTEXT" != ":"; then - if $XGETTEXT --omit-header /dev/null 2> /dev/null; then - : ; - else - echo "$ac_t""found xgettext program is not GNU xgettext; ignore it" 1>&6 - XGETTEXT=":" - fi - fi - - # We need to process the po/ directory. - POSUB=po - else - DATADIRNAME=share - nls_cv_header_intl=intl/libintl.h - nls_cv_header_libgt=intl/libgettext.h - fi - if test -z "$nls_cv_header_intl"; then - # Clean out junk possibly left behind by a previous configuration. - rm -f intl/libintl.h - fi - - - - - # If this is used in GNU gettext we have to set USE_NLS to `yes' - # because some of the sources are only built for this goal. - if test "$PACKAGE" = gettext; then - USE_NLS=yes - USE_INCLUDED_LIBINTL=yes - fi - - for lang in $ALL_LINGUAS; do - GMOFILES="$GMOFILES $lang.gmo" - POFILES="$POFILES $lang.po" - done - - - - - - - - - - - - - - - if test "x$CATOBJEXT" != "x"; then - if test "x$ALL_LINGUAS" = "x"; then - LINGUAS= - else - echo $ac_n "checking for catalogs to be installed""... $ac_c" 1>&6 -echo "configure:3807: checking for catalogs to be installed" >&5 - NEW_LINGUAS= - for lang in ${LINGUAS=$ALL_LINGUAS}; do - case "$ALL_LINGUAS" in - *$lang*) NEW_LINGUAS="$NEW_LINGUAS $lang" ;; - esac - done - LINGUAS=$NEW_LINGUAS - echo "$ac_t""$LINGUAS" 1>&6 - fi - - if test -n "$LINGUAS"; then - for lang in $LINGUAS; do CATALOGS="$CATALOGS $lang$CATOBJEXT"; done - fi - fi - - if test $ac_cv_header_locale_h = yes; then - INCLUDE_LOCALE_H="#include " - else - INCLUDE_LOCALE_H="\ -/* The system does not provide the header . Take care yourself. */" - fi - - - test -d intl || mkdir intl - if test "$CATOBJEXT" = ".cat"; then - ac_safe=`echo "linux/version.h" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for linux/version.h""... $ac_c" 1>&6 -echo "configure:3835: checking for linux/version.h" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:3845: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - msgformat=linux -else - echo "$ac_t""no" 1>&6 -msgformat=xopen -fi - - - sed -e '/^#/d' $srcdir/intl/$msgformat-msg.sed > intl/po2msg.sed - fi - sed -e '/^#.*[^\\]$/d' -e '/^#$/d' \ - $srcdir/intl/po2tbl.sed.in > intl/po2tbl.sed - - if test "$PACKAGE" = "gettext"; then - GT_NO="#NO#" - GT_YES= - else - GT_NO= - GT_YES="#YES#" - fi - - - - MKINSTALLDIRS= - if test -n "$ac_aux_dir"; then - MKINSTALLDIRS="$ac_aux_dir/mkinstalldirs" - fi - if test -z "$MKINSTALLDIRS"; then - MKINSTALLDIRS="\$(top_srcdir)/mkinstalldirs" - fi - - - l= - - - test -d po || mkdir po - case "$srcdir" in - .) - posrcprefix="../" ;; - /* | [A-Za-z]:*) - posrcprefix="$srcdir/" ;; - *) - posrcprefix="../$srcdir/" ;; - esac - rm -f po/POTFILES - sed -e "/^#/d" -e "/^\$/d" -e "s,.*, $posrcprefix& \\\\," -e "\$s/\(.*\) \\\\/\1/" \ - < $srcdir/po/POTFILES.in > po/POTFILES - - -echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 -echo "configure:3910: checking for ANSI C header files" >&5 -if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#include -#include -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:3923: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - ac_cv_header_stdc=yes -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -if test $ac_cv_header_stdc = yes; then - # SunOS 4.x string.h does not declare mem*, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "memchr" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "free" >/dev/null 2>&1; then - : -else - rm -rf conftest* - ac_cv_header_stdc=no -fi -rm -f conftest* - -fi - -if test $ac_cv_header_stdc = yes; then - # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. -if test "$cross_compiling" = yes; then - : -else - cat > conftest.$ac_ext < -#define ISLOWER(c) ('a' <= (c) && (c) <= 'z') -#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) -#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) -int main () { int i; for (i = 0; i < 256; i++) -if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); -exit (0); } - -EOF -if { (eval echo configure:3990: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - : -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_header_stdc=no -fi -rm -fr conftest* -fi - -fi -fi - -echo "$ac_t""$ac_cv_header_stdc" 1>&6 -if test $ac_cv_header_stdc = yes; then - cat >> confdefs.h <<\EOF -#define STDC_HEADERS 1 -EOF - -fi - -echo $ac_n "checking for sys/wait.h that is POSIX.1 compatible""... $ac_c" 1>&6 -echo "configure:4014: checking for sys/wait.h that is POSIX.1 compatible" >&5 -if eval "test \"`echo '$''{'ac_cv_header_sys_wait_h'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#ifndef WEXITSTATUS -#define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) -#endif -#ifndef WIFEXITED -#define WIFEXITED(stat_val) (((stat_val) & 255) == 0) -#endif -int main() { -int s; -wait (&s); -s = WIFEXITED (s) ? WEXITSTATUS (s) : 1; -; return 0; } -EOF -if { (eval echo configure:4035: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_header_sys_wait_h=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_sys_wait_h=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_header_sys_wait_h" 1>&6 -if test $ac_cv_header_sys_wait_h = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_SYS_WAIT_H 1 -EOF - -fi - -for ac_hdr in fcntl.h limits.h locale.h libintl.h mcheck.h \ - netdb.h netinet/in.h signum.h stdarg.h string.h \ - sys/param.h sys/socket.h unistd.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:4061: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:4071: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - - -if test "$ac_cv_header_string_h" = yes -then - for ac_hdr in memory.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:4104: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:4114: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - -else - for ac_hdr in strings.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:4145: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:4155: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - -fi - -echo $ac_n "checking for pid_t""... $ac_c" 1>&6 -echo "configure:4184: checking for pid_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_pid_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])pid_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_pid_t=yes -else - rm -rf conftest* - ac_cv_type_pid_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_pid_t" 1>&6 -if test $ac_cv_type_pid_t = no; then - cat >> confdefs.h <<\EOF -#define pid_t int -EOF - -fi - -echo $ac_n "checking return type of signal handlers""... $ac_c" 1>&6 -echo "configure:4217: checking return type of signal handlers" >&5 -if eval "test \"`echo '$''{'ac_cv_type_signal'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#ifdef signal -#undef signal -#endif -#ifdef __cplusplus -extern "C" void (*signal (int, void (*)(int)))(int); -#else -void (*signal ()) (); -#endif - -int main() { -int i; -; return 0; } -EOF -if { (eval echo configure:4239: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_type_signal=void -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_type_signal=int -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_type_signal" 1>&6 -cat >> confdefs.h <&6 -echo "configure:4258: checking for size_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_size_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])size_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_size_t=yes -else - rm -rf conftest* - ac_cv_type_size_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_size_t" 1>&6 -if test $ac_cv_type_size_t = no; then - cat >> confdefs.h <<\EOF -#define size_t unsigned -EOF - -fi - -echo $ac_n "checking for uid_t in sys/types.h""... $ac_c" 1>&6 -echo "configure:4291: checking for uid_t in sys/types.h" >&5 -if eval "test \"`echo '$''{'ac_cv_type_uid_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "uid_t" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_uid_t=yes -else - rm -rf conftest* - ac_cv_type_uid_t=no -fi -rm -f conftest* - -fi - -echo "$ac_t""$ac_cv_type_uid_t" 1>&6 -if test $ac_cv_type_uid_t = no; then - cat >> confdefs.h <<\EOF -#define uid_t int -EOF - - cat >> confdefs.h <<\EOF -#define gid_t int -EOF - -fi - -echo $ac_n "checking type of array argument to getgroups""... $ac_c" 1>&6 -echo "configure:4325: checking type of array argument to getgroups" >&5 -if eval "test \"`echo '$''{'ac_cv_type_getgroups'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_type_getgroups=cross -else - cat > conftest.$ac_ext < -#define NGID 256 -#undef MAX -#define MAX(x, y) ((x) > (y) ? (x) : (y)) -main() -{ - gid_t gidset[NGID]; - int i, n; - union { gid_t gval; long lval; } val; - - val.lval = -1; - for (i = 0; i < NGID; i++) - gidset[i] = val.gval; - n = getgroups (sizeof (gidset) / MAX (sizeof (int), sizeof (gid_t)) - 1, - gidset); - /* Exit non-zero if getgroups seems to require an array of ints. This - happens when gid_t is short but getgroups modifies an array of ints. */ - exit ((n > 0 && gidset[n] != val.gval) ? 1 : 0); -} - -EOF -if { (eval echo configure:4358: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_type_getgroups=gid_t -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_type_getgroups=int -fi -rm -fr conftest* -fi - -if test $ac_cv_type_getgroups = cross; then - cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "getgroups.*int.*gid_t" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_getgroups=gid_t -else - rm -rf conftest* - ac_cv_type_getgroups=int -fi -rm -f conftest* - -fi -fi - -echo "$ac_t""$ac_cv_type_getgroups" 1>&6 -cat >> confdefs.h <&6 -echo "configure:4400: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:4410: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - - echo $ac_n "checking for ssize_t""... $ac_c" 1>&6 -echo "configure:4437: checking for ssize_t" >&5 -if eval "test \"`echo '$''{'ac_cv_type_ssize_t'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#if STDC_HEADERS -#include -#include -#endif -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "(^|[^a-zA-Z_0-9])ssize_t[^a-zA-Z_0-9]" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_type_ssize_t=yes -else - rm -rf conftest* - ac_cv_type_ssize_t=no -fi -rm -f conftest* - -fi -echo "$ac_t""$ac_cv_type_ssize_t" 1>&6 -if test $ac_cv_type_ssize_t = no; then - cat >> confdefs.h <<\EOF -#define ssize_t int -EOF - -fi - - - -cat > conftest.$ac_ext < -EOF -if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | - egrep "int.*sprintf" >/dev/null 2>&1; then - rm -rf conftest* - cat >> confdefs.h <<\EOF -#define SPRINTF_RET int -EOF - -else - rm -rf conftest* - cat >> confdefs.h <<\EOF -#define SPRINTF_RET char * -EOF - -fi -rm -f conftest* - - -cat >> confdefs.h <<\EOF -#define REGEX_MALLOC 1 -EOF - -echo $ac_n "checking for vprintf""... $ac_c" 1>&6 -echo "configure:4498: checking for vprintf" >&5 -if eval "test \"`echo '$''{'ac_cv_func_vprintf'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char vprintf(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_vprintf) || defined (__stub___vprintf) -choke me -#else -vprintf(); -#endif - -; return 0; } -EOF -if { (eval echo configure:4526: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_vprintf=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_vprintf=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'vprintf`\" = yes"; then - echo "$ac_t""yes" 1>&6 - cat >> confdefs.h <<\EOF -#define HAVE_VPRINTF 1 -EOF - -else - echo "$ac_t""no" 1>&6 -fi - -if test "$ac_cv_func_vprintf" != yes; then -echo $ac_n "checking for _doprnt""... $ac_c" 1>&6 -echo "configure:4550: checking for _doprnt" >&5 -if eval "test \"`echo '$''{'ac_cv_func__doprnt'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char _doprnt(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub__doprnt) || defined (__stub____doprnt) -choke me -#else -_doprnt(); -#endif - -; return 0; } -EOF -if { (eval echo configure:4578: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func__doprnt=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func__doprnt=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'_doprnt`\" = yes"; then - echo "$ac_t""yes" 1>&6 - cat >> confdefs.h <<\EOF -#define HAVE_DOPRNT 1 -EOF - -else - echo "$ac_t""no" 1>&6 -fi - -fi - - -for ac_hdr in stdlib.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:4607: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:4617: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - -for ac_func in strtod -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:4646: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:4674: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - -echo $ac_n "checking for strtod with C89 semantics""... $ac_c" 1>&6 -echo "configure:4699: checking for strtod with C89 semantics" >&5 -if eval "test \"`echo '$''{'gawk_ac_cv_func_strtod_c89'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - gawk_ac_cv_func_strtod_c89=no -else - cat > conftest.$ac_ext < -#else -extern double strtod(); -#endif - -int -main () -{ -#if ! HAVE_STRTOD - exit(1); -#else - double d; - char *str = "0x345a"; - - d = strtod(str, 0); - if (d == 0) - exit (0); - else - exit (1); -} -EOF -if { (eval echo configure:4732: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - gawk_ac_cv_func_strtod_c89=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - gawk_ac_cv_func_strtod_c89=no -fi -rm -fr conftest* -fi - -fi - -echo "$ac_t""$gawk_ac_cv_func_strtod_c89" 1>&6 -if test $gawk_ac_cv_func_strtod_c89 = no; then - cat >> confdefs.h <<\EOF -#define STRTOD_NOT_C89 1 -EOF - -fi - -echo $ac_n "checking whether time.h and sys/time.h may both be included""... $ac_c" 1>&6 -echo "configure:4755: checking whether time.h and sys/time.h may both be included" >&5 -if eval "test \"`echo '$''{'ac_cv_header_time'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#include -int main() { -struct tm *tp; -; return 0; } -EOF -if { (eval echo configure:4769: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_header_time=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_time=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_header_time" 1>&6 -if test $ac_cv_header_time = yes; then - cat >> confdefs.h <<\EOF -#define TIME_WITH_SYS_TIME 1 -EOF - -fi - -for ac_hdr in sys/time.h unistd.h -do -ac_safe=`echo "$ac_hdr" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 -echo "configure:4793: checking for $ac_hdr" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:4803: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_hdr=HAVE_`echo $ac_hdr | sed 'y%abcdefghijklmnopqrstuvwxyz./-%ABCDEFGHIJKLMNOPQRSTUVWXYZ___%'` - cat >> confdefs.h <&6 -fi -done - -for ac_func in alarm -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:4832: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:4860: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - -echo $ac_n "checking for working mktime""... $ac_c" 1>&6 -echo "configure:4885: checking for working mktime" >&5 -if eval "test \"`echo '$''{'ac_cv_func_working_mktime'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - ac_cv_func_working_mktime=no -else - cat > conftest.$ac_ext < -# include -#else -# if HAVE_SYS_TIME_H -# include -# else -# include -# endif -#endif - -#if HAVE_UNISTD_H -# include -#endif - -#if !HAVE_ALARM -# define alarm(X) /* empty */ -#endif - -/* Work around redefinition to rpl_putenv by other config tests. */ -#undef putenv - -static time_t time_t_max; - -/* Values we'll use to set the TZ environment variable. */ -static const char *const tz_strings[] = { - (const char *) 0, "TZ=GMT0", "TZ=JST-9", - "TZ=EST+3EDT+2,M10.1.0/00:00:00,M2.3.0/00:00:00" -}; -#define N_STRINGS (sizeof (tz_strings) / sizeof (tz_strings[0])) - -/* Fail if mktime fails to convert a date in the spring-forward gap. - Based on a problem report from Andreas Jaeger. */ -static void -spring_forward_gap () -{ - /* glibc (up to about 1998-10-07) failed this test) */ - struct tm tm; - - /* Use the portable POSIX.1 specification "TZ=PST8PDT,M4.1.0,M10.5.0" - instead of "TZ=America/Vancouver" in order to detect the bug even - on systems that don't support the Olson extension, or don't have the - full zoneinfo tables installed. */ - putenv ("TZ=PST8PDT,M4.1.0,M10.5.0"); - - tm.tm_year = 98; - tm.tm_mon = 3; - tm.tm_mday = 5; - tm.tm_hour = 2; - tm.tm_min = 0; - tm.tm_sec = 0; - tm.tm_isdst = -1; - if (mktime (&tm) == (time_t)-1) - exit (1); -} - -static void -mktime_test (now) - time_t now; -{ - struct tm *lt; - if ((lt = localtime (&now)) && mktime (lt) != now) - exit (1); - now = time_t_max - now; - if ((lt = localtime (&now)) && mktime (lt) != now) - exit (1); -} - -static void -irix_6_4_bug () -{ - /* Based on code from Ariel Faigon. */ - struct tm tm; - tm.tm_year = 96; - tm.tm_mon = 3; - tm.tm_mday = 0; - tm.tm_hour = 0; - tm.tm_min = 0; - tm.tm_sec = 0; - tm.tm_isdst = -1; - mktime (&tm); - if (tm.tm_mon != 2 || tm.tm_mday != 31) - exit (1); -} - -static void -bigtime_test (j) - int j; -{ - struct tm tm; - time_t now; - tm.tm_year = tm.tm_mon = tm.tm_mday = tm.tm_hour = tm.tm_min = tm.tm_sec = j; - now = mktime (&tm); - if (now != (time_t) -1) - { - struct tm *lt = localtime (&now); - if (! (lt - && lt->tm_year == tm.tm_year - && lt->tm_mon == tm.tm_mon - && lt->tm_mday == tm.tm_mday - && lt->tm_hour == tm.tm_hour - && lt->tm_min == tm.tm_min - && lt->tm_sec == tm.tm_sec - && lt->tm_yday == tm.tm_yday - && lt->tm_wday == tm.tm_wday - && ((lt->tm_isdst < 0 ? -1 : 0 < lt->tm_isdst) - == (tm.tm_isdst < 0 ? -1 : 0 < tm.tm_isdst)))) - exit (1); - } -} - -int -main () -{ - time_t t, delta; - int i, j; - - /* This test makes some buggy mktime implementations loop. - Give up after 60 seconds; a mktime slower than that - isn't worth using anyway. */ - alarm (60); - - for (time_t_max = 1; 0 < time_t_max; time_t_max *= 2) - continue; - time_t_max--; - delta = time_t_max / 997; /* a suitable prime number */ - for (i = 0; i < N_STRINGS; i++) - { - if (tz_strings[i]) - putenv (tz_strings[i]); - - for (t = 0; t <= time_t_max - delta; t += delta) - mktime_test (t); - mktime_test ((time_t) 60 * 60); - mktime_test ((time_t) 60 * 60 * 24); - - for (j = 1; 0 < j; j *= 2) - bigtime_test (j); - bigtime_test (j - 1); - } - irix_6_4_bug (); - spring_forward_gap (); - exit (0); -} -EOF -if { (eval echo configure:5042: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_func_working_mktime=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_func_working_mktime=no -fi -rm -fr conftest* -fi - -fi - -echo "$ac_t""$ac_cv_func_working_mktime" 1>&6 -if test $ac_cv_func_working_mktime = no; then - LIBOBJS="$LIBOBJS mktime.${ac_objext}" -fi - -case "$ac_cv_func_working_mktime" in -yes) cat >> confdefs.h <<\EOF -#define HAVE_MKTIME 1 -EOF - - ;; -esac - -echo $ac_n "checking for fmod in -lm""... $ac_c" 1>&6 -echo "configure:5070: checking for fmod in -lm" >&5 -ac_lib_var=`echo m'_'fmod | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lm $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_lib=HAVE_LIB`echo m | sed -e 's/[^a-zA-Z0-9_]/_/g' \ - -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'` - cat >> confdefs.h <&6 -fi - -for ac_func in memset memcpy memcmp fmod setlocale strchr strerror \ - strftime strncasecmp strtod system tzset -do -echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 -echo "configure:5120: checking for $ac_func" >&5 -if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char $ac_func(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_$ac_func) || defined (__stub___$ac_func) -choke me -#else -$ac_func(); -#endif - -; return 0; } -EOF -if { (eval echo configure:5148: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_$ac_func=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_$ac_func=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then - echo "$ac_t""yes" 1>&6 - ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` - cat >> confdefs.h <&6 -fi -done - - -ac_safe=`echo "dlfcn.h" | sed 'y%./+-%__p_%'` -echo $ac_n "checking for dlfcn.h""... $ac_c" 1>&6 -echo "configure:5175: checking for dlfcn.h" >&5 -if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -EOF -ac_try="$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" -{ (eval echo configure:5185: \"$ac_try\") 1>&5; (eval $ac_try) 2>&5; } -ac_err=`grep -v '^ *+' conftest.out | grep -v "^conftest.${ac_ext}\$"` -if test -z "$ac_err"; then - rm -rf conftest* - eval "ac_cv_header_$ac_safe=yes" -else - echo "$ac_err" >&5 - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_header_$ac_safe=no" -fi -rm -f conftest* -fi -if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then - echo "$ac_t""yes" 1>&6 - echo $ac_n "checking for dlopen in -ldl""... $ac_c" 1>&6 -echo "configure:5202: checking for dlopen in -ldl" >&5 -ac_lib_var=`echo dl'_'dlopen | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-ldl $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - cat >> confdefs.h <<\EOF -#define DYNAMIC 1 -EOF - - LIBS="$LIBS -ldl" - if test "$GCC" = yes - then - # Add others here as appropriate, - # one day use GNU libtool. - if uname | egrep -i linux > /dev/null - then - LDFLAGS="$LDFLAGS -rdynamic" - fi - fi - -else - echo "$ac_t""no" 1>&6 -fi - -else - echo "$ac_t""no" 1>&6 -fi - - -case `(uname) 2> /dev/null` in -*VMS*|*BeOS*) - cat >> confdefs.h <<\EOF -#define GETPGRP_VOID 1 -EOF - - ;; -*) echo $ac_n "checking whether getpgrp takes no argument""... $ac_c" 1>&6 -echo "configure:5268: checking whether getpgrp takes no argument" >&5 -if eval "test \"`echo '$''{'ac_cv_func_getpgrp_void'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$cross_compiling" = yes; then - { echo "configure: error: cannot check getpgrp if cross compiling" 1>&2; exit 1; } -else - cat > conftest.$ac_ext < -#include - -int pid; -int pg1, pg2, pg3, pg4; -int ng, np, s, child; - -main() -{ - pid = getpid(); - pg1 = getpgrp(0); - pg2 = getpgrp(); - pg3 = getpgrp(pid); - pg4 = getpgrp(1); - - /* - * If all of these values are the same, it's pretty sure that - * we're on a system that ignores getpgrp's first argument. - */ - if (pg2 == pg4 && pg1 == pg3 && pg2 == pg3) - exit(0); - - child = fork(); - if (child < 0) - exit(1); - else if (child == 0) { - np = getpid(); - /* - * If this is Sys V, this will not work; pgrp will be - * set to np because setpgrp just changes a pgrp to be - * the same as the pid. - */ - setpgrp(np, pg1); - ng = getpgrp(0); /* Same result for Sys V and BSD */ - if (ng == pg1) { - exit(1); - } else { - exit(0); - } - } else { - wait(&s); - exit(s>>8); - } -} - -EOF -if { (eval echo configure:5331: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_func_getpgrp_void=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_func_getpgrp_void=no -fi -rm -fr conftest* -fi - - -fi - -echo "$ac_t""$ac_cv_func_getpgrp_void" 1>&6 -if test $ac_cv_func_getpgrp_void = yes; then - cat >> confdefs.h <<\EOF -#define GETPGRP_VOID 1 -EOF - -fi - - ;; -esac - - -gawk_have_sockets=no -# Check for system-dependent location of socket libraries - -SOCKET_LIBS= -if test "$ISC" = yes; then - SOCKET_LIBS="-lnsl_s -linet" -else - # Martyn.Johnson@cl.cam.ac.uk says this is needed for Ultrix, if the X - # libraries were built with DECnet support. And karl@cs.umb.edu says - # the Alpha needs dnet_stub (dnet does not exist). - # - # ADR: Is this needed just for sockets??? -# AC_CHECK_LIB(dnet, dnet_ntoa, [SOCKET_LIBS="$SOCKET_LIBS -ldnet"]) -# if test $ac_cv_lib_dnet_ntoa = no; then -# AC_CHECK_LIB(dnet_stub, dnet_ntoa, -# [SOCKET_LIBS="$SOCKET_LIBS -ldnet_stub"]) -# fi - - # msh@cis.ufl.edu says -lnsl (and -lsocket) are needed for his 386/AT, - # to get the SysV transport functions. - # chad@anasazi.com says the Pyramid MIS-ES running DC/OSx (SVR4) - # needs -lnsl. - # The nsl library prevents programs from opening the X display - # on Irix 5.2, according to dickey@clark.net. - echo $ac_n "checking for gethostbyname""... $ac_c" 1>&6 -echo "configure:5383: checking for gethostbyname" >&5 -if eval "test \"`echo '$''{'ac_cv_func_gethostbyname'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char gethostbyname(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_gethostbyname) || defined (__stub___gethostbyname) -choke me -#else -gethostbyname(); -#endif - -; return 0; } -EOF -if { (eval echo configure:5411: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_gethostbyname=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_gethostbyname=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'gethostbyname`\" = yes"; then - echo "$ac_t""yes" 1>&6 - : -else - echo "$ac_t""no" 1>&6 -fi - - if test $ac_cv_func_gethostbyname = no; then - echo $ac_n "checking for gethostbyname in -lnsl""... $ac_c" 1>&6 -echo "configure:5432: checking for gethostbyname in -lnsl" >&5 -ac_lib_var=`echo nsl'_'gethostbyname | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lnsl $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SOCKET_LIBS="$SOCKET_LIBS -lnsl" -else - echo "$ac_t""no" 1>&6 -fi - - fi - - # lieder@skyler.mavd.honeywell.com says without -lsocket, - # socket/setsockopt and other routines are undefined under SCO ODT - # 2.0. But -lsocket is broken on IRIX 5.2 (and is not necessary - # on later versions), says simon@lia.di.epfl.ch: it contains - # gethostby* variants that don't use the nameserver (or something). - # -lsocket must be given before -lnsl if both are needed. - # We assume that if connect needs -lnsl, so does gethostbyname. - echo $ac_n "checking for connect""... $ac_c" 1>&6 -echo "configure:5481: checking for connect" >&5 -if eval "test \"`echo '$''{'ac_cv_func_connect'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -/* Override any gcc2 internal prototype to avoid an error. */ -/* We use char because int might match the return type of a gcc2 - builtin and then its argument prototype would still apply. */ -char connect(); - -int main() { - -/* The GNU C library defines this for functions which it implements - to always fail with ENOSYS. Some functions are actually named - something starting with __ and the normal name is an alias. */ -#if defined (__stub_connect) || defined (__stub___connect) -choke me -#else -connect(); -#endif - -; return 0; } -EOF -if { (eval echo configure:5509: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_func_connect=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_func_connect=no" -fi -rm -f conftest* -fi - -if eval "test \"`echo '$ac_cv_func_'connect`\" = yes"; then - echo "$ac_t""yes" 1>&6 - : -else - echo "$ac_t""no" 1>&6 -fi - - if test $ac_cv_func_connect = no; then - echo $ac_n "checking for connect in -lsocket""... $ac_c" 1>&6 -echo "configure:5530: checking for connect in -lsocket" >&5 -ac_lib_var=`echo socket'_'connect | sed 'y%./+-%__p_%'` -if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_save_LIBS="$LIBS" -LIBS="-lsocket $SOCKET_LIBS $LIBS" -cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=yes" -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - eval "ac_cv_lib_$ac_lib_var=no" -fi -rm -f conftest* -LIBS="$ac_save_LIBS" - -fi -if eval "test \"`echo '$ac_cv_lib_'$ac_lib_var`\" = yes"; then - echo "$ac_t""yes" 1>&6 - SOCKET_LIBS="-lsocket $SOCKET_LIBS" - gawk_have_sockets=yes -else - echo "$ac_t""no" 1>&6 -fi - - else - gawk_have_sockets=yes - fi -fi - -if test "${gawk_have_sockets}" = "yes" -then - echo $ac_n "checking where to find the socket library calls""... $ac_c" 1>&6 -echo "configure:5578: checking where to find the socket library calls" >&5 - case "${SOCKET_LIBS}" in - ?*) gawk_lib_loc="${SOCKET_LIBS}" ;; - *) gawk_lib_loc="the standard library" ;; - esac - echo "$ac_t""${gawk_lib_loc}" 1>&6 - - cat >> confdefs.h <<\EOF -#define HAVE_SOCKETS 1 -EOF - -fi - - -echo $ac_n "checking for st_blksize in struct stat""... $ac_c" 1>&6 -echo "configure:5593: checking for st_blksize in struct stat" >&5 -if eval "test \"`echo '$''{'ac_cv_struct_st_blksize'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -int main() { -struct stat s; s.st_blksize; -; return 0; } -EOF -if { (eval echo configure:5606: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_struct_st_blksize=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_struct_st_blksize=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_struct_st_blksize" 1>&6 -if test $ac_cv_struct_st_blksize = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_ST_BLKSIZE 1 -EOF - -fi - -echo $ac_n "checking whether time.h and sys/time.h may both be included""... $ac_c" 1>&6 -echo "configure:5627: checking whether time.h and sys/time.h may both be included" >&5 -if eval "test \"`echo '$''{'ac_cv_header_time'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -#include -int main() { -struct tm *tp; -; return 0; } -EOF -if { (eval echo configure:5641: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_header_time=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_header_time=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_header_time" 1>&6 -if test $ac_cv_header_time = yes; then - cat >> confdefs.h <<\EOF -#define TIME_WITH_SYS_TIME 1 -EOF - -fi - -echo $ac_n "checking whether struct tm is in sys/time.h or time.h""... $ac_c" 1>&6 -echo "configure:5662: checking whether struct tm is in sys/time.h or time.h" >&5 -if eval "test \"`echo '$''{'ac_cv_struct_tm'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include -int main() { -struct tm *tp; tp->tm_sec; -; return 0; } -EOF -if { (eval echo configure:5675: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_struct_tm=time.h -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_struct_tm=sys/time.h -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_struct_tm" 1>&6 -if test $ac_cv_struct_tm = sys/time.h; then - cat >> confdefs.h <<\EOF -#define TM_IN_SYS_TIME 1 -EOF - -fi - -echo $ac_n "checking for tm_zone in struct tm""... $ac_c" 1>&6 -echo "configure:5696: checking for tm_zone in struct tm" >&5 -if eval "test \"`echo '$''{'ac_cv_struct_tm_zone'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#include <$ac_cv_struct_tm> -int main() { -struct tm tm; tm.tm_zone; -; return 0; } -EOF -if { (eval echo configure:5709: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_struct_tm_zone=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_struct_tm_zone=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_struct_tm_zone" 1>&6 -if test "$ac_cv_struct_tm_zone" = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_TM_ZONE 1 -EOF - -else - echo $ac_n "checking for tzname""... $ac_c" 1>&6 -echo "configure:5729: checking for tzname" >&5 -if eval "test \"`echo '$''{'ac_cv_var_tzname'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext < -#ifndef tzname /* For SGI. */ -extern char *tzname[]; /* RS6000 and others reject char **tzname. */ -#endif -int main() { -atoi(*tzname); -; return 0; } -EOF -if { (eval echo configure:5744: \"$ac_link\") 1>&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext}; then - rm -rf conftest* - ac_cv_var_tzname=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_var_tzname=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_var_tzname" 1>&6 - if test $ac_cv_var_tzname = yes; then - cat >> confdefs.h <<\EOF -#define HAVE_TZNAME 1 -EOF - - fi -fi - - -echo $ac_n "checking whether char is unsigned""... $ac_c" 1>&6 -echo "configure:5767: checking whether char is unsigned" >&5 -if eval "test \"`echo '$''{'ac_cv_c_char_unsigned'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - if test "$GCC" = yes; then - # GCC predefines this symbol on systems where it applies. -cat > conftest.$ac_ext <&5 | - egrep "yes" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_c_char_unsigned=yes -else - rm -rf conftest* - ac_cv_c_char_unsigned=no -fi -rm -f conftest* - -else -if test "$cross_compiling" = yes; then - { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } -else - cat > conftest.$ac_ext <&5; (eval $ac_link) 2>&5; } && test -s conftest${ac_exeext} && (./conftest; exit) 2>/dev/null -then - ac_cv_c_char_unsigned=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -fr conftest* - ac_cv_c_char_unsigned=no -fi -rm -fr conftest* -fi - -fi -fi - -echo "$ac_t""$ac_cv_c_char_unsigned" 1>&6 -if test $ac_cv_c_char_unsigned = yes && test "$GCC" != yes; then - cat >> confdefs.h <<\EOF -#define __CHAR_UNSIGNED__ 1 -EOF - -fi - -echo $ac_n "checking for working const""... $ac_c" 1>&6 -echo "configure:5830: checking for working const" >&5 -if eval "test \"`echo '$''{'ac_cv_c_const'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext <j = 5; -} -{ /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */ - const int foo = 10; -} - -; return 0; } -EOF -if { (eval echo configure:5884: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_const=yes -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 - rm -rf conftest* - ac_cv_c_const=no -fi -rm -f conftest* -fi - -echo "$ac_t""$ac_cv_c_const" 1>&6 -if test $ac_cv_c_const = no; then - cat >> confdefs.h <<\EOF -#define const -EOF - -fi - -echo $ac_n "checking for inline""... $ac_c" 1>&6 -echo "configure:5905: checking for inline" >&5 -if eval "test \"`echo '$''{'ac_cv_c_inline'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - ac_cv_c_inline=no -for ac_kw in inline __inline__ __inline; do - cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then - rm -rf conftest* - ac_cv_c_inline=$ac_kw; break -else - echo "configure: failed program was:" >&5 - cat conftest.$ac_ext >&5 -fi -rm -f conftest* -done - -fi - -echo "$ac_t""$ac_cv_c_inline" 1>&6 -case "$ac_cv_c_inline" in - inline | yes) ;; - no) cat >> confdefs.h <<\EOF -#define inline -EOF - ;; - *) cat >> confdefs.h <&6 -echo "configure:5947: checking for preprocessor stringizing operator" >&5 -if eval "test \"`echo '$''{'ac_cv_c_stringize'+set}'`\" = set"; then - echo $ac_n "(cached) $ac_c" 1>&6 -else - cat > conftest.$ac_ext <&5 | - egrep "#teststring" >/dev/null 2>&1; then - rm -rf conftest* - ac_cv_c_stringize=no -else - rm -rf conftest* - ac_cv_c_stringize=yes -fi -rm -f conftest* - -fi - -if test "${ac_cv_c_stringize}" = yes -then - cat >> confdefs.h <<\EOF -#define HAVE_STRINGIZE 1 -EOF - -fi -echo "$ac_t""${ac_cv_c_stringize}" 1>&6 - - -trap '' 1 2 15 -cat > confcache <<\EOF -# This file is a shell script that caches the results of configure -# tests run on this system so they can be shared between configure -# scripts and configure runs. It is not useful on other systems. -# If it contains results you don't want to keep, you may remove or edit it. -# -# By default, configure uses ./config.cache as the cache file, -# creating it if it does not exist already. You can give configure -# the --cache-file=FILE option to use a different cache file; that is -# what configure does when it calls configure scripts in -# subdirectories, so they share the cache. -# Giving --cache-file=/dev/null disables caching, for debugging configure. -# config.status only pays attention to the cache file if you give it the -# --recheck option to rerun configure. -# -EOF -# The following way of writing the cache mishandles newlines in values, -# but we know of no workaround that is simple, portable, and efficient. -# So, don't put newlines in cache variables' values. -# Ultrix sh set writes to stderr and can't be redirected directly, -# and sets the high bit in the cache file unless we assign to the vars. -(set) 2>&1 | - case `(ac_space=' '; set | grep ac_space) 2>&1` in - *ac_space=\ *) - # `set' does not quote correctly, so add quotes (double-quote substitution - # turns \\\\ into \\, and sed turns \\ into \). - sed -n \ - -e "s/'/'\\\\''/g" \ - -e "s/^\\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\\)=\\(.*\\)/\\1=\${\\1='\\2'}/p" - ;; - *) - # `set' quotes correctly as required by POSIX, so do not add quotes. - sed -n -e 's/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=${\1=\2}/p' - ;; - esac >> confcache -if cmp -s $cache_file confcache; then - : -else - if test -w $cache_file; then - echo "updating cache $cache_file" - cat confcache > $cache_file - else - echo "not updating unwritable cache $cache_file" - fi -fi -rm -f confcache - -trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 - -test "x$prefix" = xNONE && prefix=$ac_default_prefix -# Let make expand exec_prefix. -test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' - -# Any assignment to VPATH causes Sun make to only execute -# the first set of double-colon rules, so remove it if not needed. -# If there is a colon in the path, we need to keep it. -if test "x$srcdir" = x.; then - ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' -fi - -trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 - -DEFS=-DHAVE_CONFIG_H - -# Without the "./", some shells look in PATH for config.status. -: ${CONFIG_STATUS=./config.status} - -echo creating $CONFIG_STATUS -rm -f $CONFIG_STATUS -cat > $CONFIG_STATUS </dev/null | sed 1q`: -# -# $0 $ac_configure_args -# -# Compiler output produced by configure, useful for debugging -# configure, is in ./config.log if it exists. - -ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" -for ac_option -do - case "\$ac_option" in - -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) - echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" - exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; - -version | --version | --versio | --versi | --vers | --ver | --ve | --v) - echo "$CONFIG_STATUS generated by autoconf version 2.13" - exit 0 ;; - -help | --help | --hel | --he | --h) - echo "\$ac_cs_usage"; exit 0 ;; - *) echo "\$ac_cs_usage"; exit 1 ;; - esac -done - -ac_given_srcdir=$srcdir -ac_given_INSTALL="$INSTALL" - -trap 'rm -fr `echo "Makefile \ - awklib/Makefile \ - doc/Makefile \ - intl/Makefile \ - po/Makefile.in \ - test/Makefile config.h:configh.in" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 -EOF -cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF -$ac_vpsub -$extrasub -s%@SHELL@%$SHELL%g -s%@CFLAGS@%$CFLAGS%g -s%@CPPFLAGS@%$CPPFLAGS%g -s%@CXXFLAGS@%$CXXFLAGS%g -s%@FFLAGS@%$FFLAGS%g -s%@DEFS@%$DEFS%g -s%@LDFLAGS@%$LDFLAGS%g -s%@LIBS@%$LIBS%g -s%@exec_prefix@%$exec_prefix%g -s%@prefix@%$prefix%g -s%@program_transform_name@%$program_transform_name%g -s%@bindir@%$bindir%g -s%@sbindir@%$sbindir%g -s%@libexecdir@%$libexecdir%g -s%@datadir@%$datadir%g -s%@sysconfdir@%$sysconfdir%g -s%@sharedstatedir@%$sharedstatedir%g -s%@localstatedir@%$localstatedir%g -s%@libdir@%$libdir%g -s%@includedir@%$includedir%g -s%@oldincludedir@%$oldincludedir%g -s%@infodir@%$infodir%g -s%@mandir@%$mandir%g -s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g -s%@INSTALL_SCRIPT@%$INSTALL_SCRIPT%g -s%@INSTALL_DATA@%$INSTALL_DATA%g -s%@PACKAGE@%$PACKAGE%g -s%@VERSION@%$VERSION%g -s%@ACLOCAL@%$ACLOCAL%g -s%@AUTOCONF@%$AUTOCONF%g -s%@AUTOMAKE@%$AUTOMAKE%g -s%@AUTOHEADER@%$AUTOHEADER%g -s%@MAKEINFO@%$MAKEINFO%g -s%@AMTAR@%$AMTAR%g -s%@install_sh@%$install_sh%g -s%@AWK@%$AWK%g -s%@SET_MAKE@%$SET_MAKE%g -s%@AMDEP@%$AMDEP%g -s%@AMDEPBACKSLASH@%$AMDEPBACKSLASH%g -s%@DEPDIR@%$DEPDIR%g -s%@YACC@%$YACC%g -s%@LN_S@%$LN_S%g -s%@CC@%$CC%g -s%@CPP@%$CPP%g -s%@CCDEPMODE@%$CCDEPMODE%g -s%@RANLIB@%$RANLIB%g -s%@U@%$U%g -s%@ANSI2KNR@%$ANSI2KNR%g -s%@ALLOCA@%$ALLOCA%g -s%@USE_NLS@%$USE_NLS%g -s%@MSGFMT@%$MSGFMT%g -s%@GMSGFMT@%$GMSGFMT%g -s%@XGETTEXT@%$XGETTEXT%g -s%@GENCAT@%$GENCAT%g -s%@USE_INCLUDED_LIBINTL@%$USE_INCLUDED_LIBINTL%g -s%@CATALOGS@%$CATALOGS%g -s%@CATOBJEXT@%$CATOBJEXT%g -s%@DATADIRNAME@%$DATADIRNAME%g -s%@GMOFILES@%$GMOFILES%g -s%@INSTOBJEXT@%$INSTOBJEXT%g -s%@INTLDEPS@%$INTLDEPS%g -s%@INTLLIBS@%$INTLLIBS%g -s%@INTLOBJS@%$INTLOBJS%g -s%@POFILES@%$POFILES%g -s%@POSUB@%$POSUB%g -s%@INCLUDE_LOCALE_H@%$INCLUDE_LOCALE_H%g -s%@GT_NO@%$GT_NO%g -s%@GT_YES@%$GT_YES%g -s%@MKINSTALLDIRS@%$MKINSTALLDIRS%g -s%@l@%$l%g -s%@SOCKET_LIBS@%$SOCKET_LIBS%g - -CEOF -EOF - -cat >> $CONFIG_STATUS <<\EOF - -# Split the substitutions into bite-sized pieces for seds with -# small command number limits, like on Digital OSF/1 and HP-UX. -ac_max_sed_cmds=90 # Maximum number of lines to put in a sed script. -ac_file=1 # Number of current file. -ac_beg=1 # First line for current file. -ac_end=$ac_max_sed_cmds # Line after last line for current file. -ac_more_lines=: -ac_sed_cmds="" -while $ac_more_lines; do - if test $ac_beg -gt 1; then - sed "1,${ac_beg}d; ${ac_end}q" conftest.subs > conftest.s$ac_file - else - sed "${ac_end}q" conftest.subs > conftest.s$ac_file - fi - if test ! -s conftest.s$ac_file; then - ac_more_lines=false - rm -f conftest.s$ac_file - else - if test -z "$ac_sed_cmds"; then - ac_sed_cmds="sed -f conftest.s$ac_file" - else - ac_sed_cmds="$ac_sed_cmds | sed -f conftest.s$ac_file" - fi - ac_file=`expr $ac_file + 1` - ac_beg=$ac_end - ac_end=`expr $ac_end + $ac_max_sed_cmds` - fi -done -if test -z "$ac_sed_cmds"; then - ac_sed_cmds=cat -fi -EOF - -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - # Adjust a relative srcdir, top_srcdir, and INSTALL for subdirectories. - - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dir_suffix. - ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dir_suffix= ac_dots= - fi - - case "$ac_given_srcdir" in - .) srcdir=. - if test -z "$ac_dots"; then top_srcdir=. - else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; - /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; - *) # Relative path. - srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" - top_srcdir="$ac_dots$ac_given_srcdir" ;; - esac - - case "$ac_given_INSTALL" in - [/$]*) INSTALL="$ac_given_INSTALL" ;; - *) INSTALL="$ac_dots$ac_given_INSTALL" ;; - esac - - echo creating "$ac_file" - rm -f "$ac_file" - configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." - case "$ac_file" in - *Makefile*) ac_comsub="1i\\ -# $configure_input" ;; - *) ac_comsub= ;; - esac - - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - sed -e "$ac_comsub -s%@configure_input@%$configure_input%g -s%@srcdir@%$srcdir%g -s%@top_srcdir@%$top_srcdir%g -s%@INSTALL@%$INSTALL%g -" $ac_file_inputs | (eval "$ac_sed_cmds") > $ac_file -fi; done -rm -f conftest.s* - -# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where -# NAME is the cpp macro being defined and VALUE is the value it is being given. -# -# ac_d sets the value in "#define NAME VALUE" lines. -ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)' -ac_dB='\([ ][ ]*\)[^ ]*%\1#\2' -ac_dC='\3' -ac_dD='%g' -# ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE". -ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' -ac_uB='\([ ]\)%\1#\2define\3' -ac_uC=' ' -ac_uD='\4%g' -# ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE". -ac_eA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' -ac_eB='$%\1#\2define\3' -ac_eC=' ' -ac_eD='%g' - -if test "${CONFIG_HEADERS+set}" != set; then -EOF -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -fi -for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then - # Support "outfile[:infile[:infile...]]", defaulting infile="outfile.in". - case "$ac_file" in - *:*) ac_file_in=`echo "$ac_file"|sed 's%[^:]*:%%'` - ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; - *) ac_file_in="${ac_file}.in" ;; - esac - - echo creating $ac_file - - rm -f conftest.frag conftest.in conftest.out - ac_file_inputs=`echo $ac_file_in|sed -e "s%^%$ac_given_srcdir/%" -e "s%:% $ac_given_srcdir/%g"` - cat $ac_file_inputs > conftest.in - -EOF - -# Transform confdefs.h into a sed script conftest.vals that substitutes -# the proper values into config.h.in to produce config.h. And first: -# Protect against being on the right side of a sed subst in config.status. -# Protect against being in an unquoted here document in config.status. -rm -f conftest.vals -cat > conftest.hdr <<\EOF -s/[\\&%]/\\&/g -s%[\\$`]%\\&%g -s%#define \([A-Za-z_][A-Za-z0-9_]*\) *\(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp -s%ac_d%ac_u%gp -s%ac_u%ac_e%gp -EOF -sed -n -f conftest.hdr confdefs.h > conftest.vals -rm -f conftest.hdr - -# This sed command replaces #undef with comments. This is necessary, for -# example, in the case of _POSIX_SOURCE, which is predefined and required -# on some systems where configure will not decide to define it. -cat >> conftest.vals <<\EOF -s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */% -EOF - -# Break up conftest.vals because some shells have a limit on -# the size of here documents, and old seds have small limits too. - -rm -f conftest.tail -while : -do - ac_lines=`grep -c . conftest.vals` - # grep -c gives empty output for an empty file on some AIX systems. - if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi - # Write a limited-size here document to conftest.frag. - echo ' cat > conftest.frag <> $CONFIG_STATUS - sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS - echo 'CEOF - sed -f conftest.frag conftest.in > conftest.out - rm -f conftest.in - mv conftest.out conftest.in -' >> $CONFIG_STATUS - sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail - rm -f conftest.vals - mv conftest.tail conftest.vals -done -rm -f conftest.vals - -cat >> $CONFIG_STATUS <<\EOF - rm -f conftest.frag conftest.h - echo "/* $ac_file. Generated automatically by configure. */" > conftest.h - cat conftest.in >> conftest.h - rm -f conftest.in - if cmp -s $ac_file conftest.h 2>/dev/null; then - echo "$ac_file is unchanged" - rm -f conftest.h - else - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` - if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then - # The file is in a subdirectory. - test ! -d "$ac_dir" && mkdir "$ac_dir" - fi - rm -f $ac_file - mv conftest.h $ac_file - fi -fi; done - -EOF - -cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF -srcdir=$ac_given_srcdir -while test -n "$ac_sources"; do - set $ac_dests; ac_dest=$1; shift; ac_dests=$* - set $ac_sources; ac_source=$1; shift; ac_sources=$* - - echo "linking $srcdir/$ac_source to $ac_dest" - - if test ! -r $srcdir/$ac_source; then - { echo "configure: error: $srcdir/$ac_source: File not found" 1>&2; exit 1; } - fi - rm -f $ac_dest - - # Make relative symlinks. - # Remove last slash and all that follows it. Not all systems have dirname. - ac_dest_dir=`echo $ac_dest|sed 's%/[^/][^/]*$%%'` - if test "$ac_dest_dir" != "$ac_dest" && test "$ac_dest_dir" != .; then - # The dest file is in a subdirectory. - test ! -d "$ac_dest_dir" && mkdir "$ac_dest_dir" - ac_dest_dir_suffix="/`echo $ac_dest_dir|sed 's%^\./%%'`" - # A "../" for each directory in $ac_dest_dir_suffix. - ac_dots=`echo $ac_dest_dir_suffix|sed 's%/[^/]*%../%g'` - else - ac_dest_dir_suffix= ac_dots= - fi - - case "$srcdir" in - [/$]*) ac_rel_source="$srcdir/$ac_source" ;; - *) ac_rel_source="$ac_dots$srcdir/$ac_source" ;; - esac - - # Make a symlink if possible; otherwise try a hard link. - if ln -s $ac_rel_source $ac_dest 2>/dev/null || - ln $srcdir/$ac_source $ac_dest; then : - else - { echo "configure: error: can not link $ac_dest to $srcdir/$ac_source" 1>&2; exit 1; } - fi -done -EOF -cat >> $CONFIG_STATUS < `echo $am_file | sed 's%:.*%%;s%[^/]*$%%'`stamp-h$am_indx - ;; - esac - am_indx=\`expr \$am_indx + 1\` - done -AMDEP="$AMDEP" -ac_aux_dir="$ac_aux_dir" - - -EOF -cat >> $CONFIG_STATUS <<\EOF -test -z "$CONFIG_HEADERS" || echo timestamp > stamp-h - -test x"$AMDEP" != x"" || -for mf in $CONFIG_FILES; do - case "$mf" in - Makefile) dirpart=.;; - */Makefile) dirpart=`echo "$mf" | sed -e 's|/[^/]*$||'`;; - *) continue;; - esac - grep '^DEP_FILES *= *[^ #]' < "$mf" > /dev/null || continue - # Extract the definition of DEP_FILES from the Makefile without - # running `make'. - DEPDIR=`sed -n -e '/^DEPDIR = / s///p' < "$mf"` - test -z "$DEPDIR" && continue - # When using ansi2knr, U may be empty or an underscore; expand it - U=`sed -n -e '/^U = / s///p' < "$mf"` - test -d "$dirpart/$DEPDIR" || mkdir "$dirpart/$DEPDIR" - # We invoke sed twice because it is the simplest approach to - # changing $(DEPDIR) to its actual value in the expansion. - for file in `sed -n -e ' - /^DEP_FILES = .*\\\\$/ { - s/^DEP_FILES = // - :loop - s/\\\\$// - p - n - /\\\\$/ b loop - p - } - /^DEP_FILES = / s/^DEP_FILES = //p' < "$mf" | \ - sed -e 's/\$(DEPDIR)/'"$DEPDIR"'/g' -e 's/\$U/'"$U"'/g'`; do - # Make sure the directory exists. - test -f "$dirpart/$file" && continue - fdir=`echo "$file" | sed -e 's|/[^/]*$||'` - $ac_aux_dir/mkinstalldirs "$dirpart/$fdir" > /dev/null 2>&1 - # echo "creating $dirpart/$file" - echo '# dummy' > "$dirpart/$file" - done -done - -case "$CONFIG_FILES" in *po/Makefile.in*) - sed -e "/POTFILES =/r po/POTFILES" po/Makefile.in > po/Makefile - esac - -exit 0 -EOF -chmod +x $CONFIG_STATUS -rm -fr confdefs* $ac_clean_files -test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 - diff --git a/contrib/awk/configure.in b/contrib/awk/configure.in deleted file mode 100644 index f3de822..0000000 --- a/contrib/awk/configure.in +++ /dev/null @@ -1,182 +0,0 @@ -dnl -dnl configure.in --- autoconf input file for gawk -dnl -dnl Copyright (C) 1995-2001 the Free Software Foundation, Inc. -dnl -dnl This file is part of GAWK, the GNU implementation of the -dnl AWK Programming Language. -dnl -dnl GAWK is free software; you can redistribute it and/or modify -dnl it under the terms of the GNU General Public License as published by -dnl the Free Software Foundation; either version 2 of the License, or -dnl (at your option) any later version. -dnl -dnl GAWK is distributed in the hope that it will be useful, -dnl but WITHOUT ANY WARRANTY; without even the implied warranty of -dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -dnl GNU General Public License for more details. -dnl -dnl You should have received a copy of the GNU General Public License -dnl along with this program; if not, write to the Free Software -dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -dnl - -dnl Process this file with autoconf to produce a configure script. - -AC_INIT(awk.h) -AC_PREREQ(2.13) -AM_INIT_AUTOMAKE(gawk, 3.1.0) -AM_CONFIG_HEADER(config.h:configh.in) - -dnl Additional argument stuff -AC_ARG_ENABLE(portals, [ --enable-portals Enable /p as path prefix for portals], AC_DEFINE(HAVE_PORTALS)) - -dnl checks for programs -AC_PROG_YACC -AC_PROG_LN_S -AC_PROG_CC -AC_PROG_CPP -AC_PROG_RANLIB - -# This is a hack. Different versions of install on different systems -# are just too different. Chuck it and use install-sh. -# -# If the user supplies $INSTALL, figure they know what they're doing. -if test "x$INSTALL" = "x" -then - INSTALL="$srcdir/install-sh -c" - export INSTALL -fi - -AC_PROG_INSTALL - -AC_PROG_MAKE_SET - -# This is mainly for my use during testing and development. -# Yes, it's a bit of a hack. -AC_MSG_CHECKING([for special development options]) -if test -f $srcdir/.developing -then - # add other debug flags as appropriate, save GAWKDEBUG for emergencies - CFLAGS="$CFLAGS -DARRAYDEBUG" - # turn on compiler warnings if we're doing development - if test "$GCC" = yes - then - CFLAGS="$CFLAGS -Wall" - fi - AC_MSG_RESULT([yes]) -else - AC_MSG_RESULT([no]) -fi - -AC_SUBST(CFLAGS) - -dnl checks for systems -AC_AIX -AC_ISC_POSIX -AC_MINIX -AC_SYS_LARGEFILE -GAWK_AC_AIX_TWEAK -GAWK_AC_LINUX_ALPHA - -if test "$ISC" = 1 # will be set by test for ISC -then -dnl need -D_SYSV3 for ISC - CFLAGS="$CFLAGS -D_SYSV3" -fi - -dnl check for C compiler for automake -AM_PROG_CC_STDC -AM_C_PROTOTYPES - -ALL_LINGUAS="he" -AM_GNU_GETTEXT - -dnl checks for header files -AC_HEADER_STDC -AC_HEADER_SYS_WAIT -AC_CHECK_HEADERS(fcntl.h limits.h locale.h libintl.h mcheck.h \ - netdb.h netinet/in.h signum.h stdarg.h string.h \ - sys/param.h sys/socket.h unistd.h) - -if test "$ac_cv_header_string_h" = yes -then - AC_CHECK_HEADERS(memory.h) -else - AC_CHECK_HEADERS(strings.h) -fi - -dnl checks for typedefs -AC_TYPE_PID_T -AC_TYPE_SIGNAL -AC_SIZE_T -AC_TYPE_GETGROUPS -GAWK_AC_TYPE_SSIZE_T -AC_EGREP_HEADER([int.*sprintf], stdio.h, - AC_DEFINE(SPRINTF_RET, int), - AC_DEFINE(SPRINTF_RET, char *)) - -dnl checks for functions -AC_DEFINE(REGEX_MALLOC) -AC_FUNC_VPRINTF -dnl one day use [ AC_CHECK_TYPE(ssize_t, int) ] -GAWK_AC_FUNC_STRTOD_C89 -AC_FUNC_MKTIME -case "$ac_cv_func_working_mktime" in -yes) AC_DEFINE(HAVE_MKTIME) - ;; -esac - -AC_CHECK_LIB(m, fmod) -AC_CHECK_FUNCS(memset memcpy memcmp fmod setlocale strchr strerror \ - strftime strncasecmp strtod system tzset) - -dnl check for dynamic linking -dnl This is known to be very primitive -AC_CHECK_HEADER(dlfcn.h, - AC_CHECK_LIB(dl, dlopen, - AC_DEFINE(DYNAMIC) - LIBS="$LIBS -ldl" - if test "$GCC" = yes - then - # Add others here as appropriate, - # one day use GNU libtool. - if uname | egrep -i linux > /dev/null - then - LDFLAGS="$LDFLAGS -rdynamic" - fi - fi -)) - -dnl check for how to use getpgrp -dnl have to hardwire it for VMS POSIX. Sigh. -dnl ditto for BeOS. -case `(uname) 2> /dev/null` in -*VMS*|*BeOS*) - AC_DEFINE(GETPGRP_VOID) - ;; -*) AC_FUNC_GETPGRP - ;; -esac - -dnl check for sockets -GAWK_AC_LIB_SOCKETS - -dnl checks for structure members -AC_STRUCT_ST_BLKSIZE -AC_HEADER_TIME -AC_STRUCT_TM -AC_STRUCT_TIMEZONE - -dnl checks for compiler characteristics -AC_C_CHAR_UNSIGNED -AC_C_CONST -AC_C_INLINE -AC_C_STRINGIZE - -AC_OUTPUT(Makefile \ - awklib/Makefile \ - doc/Makefile \ - intl/Makefile \ - po/Makefile.in \ - test/Makefile) diff --git a/contrib/awk/custom.h b/contrib/awk/custom.h deleted file mode 100644 index 75ceab2..0000000 --- a/contrib/awk/custom.h +++ /dev/null @@ -1,87 +0,0 @@ -/* - * custom.h - * - * This file is for use on systems where Autoconf isn't quite able to - * get things right. It is included after config.h in awk.h, to override - * definitions from Autoconf that are erroneous. See the manual for more - * information. - * - * If you make additions to this file for your system, please send me - * the information, to arnold@gnu.org. - */ - -/* - * Copyright (C) 1995-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -/* for MIPS RiscOS, from Nelson H. F. Beebe, beebe@math.utah.edu */ -#if defined(__host_mips) && defined(SYSTYPE_BSD43) -#undef HAVE_STRTOD -#undef HAVE_STRERROR -#endif - -/* for VMS POSIX, from Pat Rankin, rankin@eql.caltech.edu */ -#ifdef VMS_POSIX -#undef VMS -#include "vms/redirect.h" -#endif - -/* For QNX, based on submission from Michael Hunter, mphunter@qnx.com */ -#ifdef __QNX__ -#define GETPGRP_VOID 1 -#endif - -/* For Amigas, from Fred Fish, fnf@ninemoons.com */ -#ifdef __amigaos__ -#define fork vfork -#endif - -/* For BeOS, from mc@whoever.com */ -#if defined(__dest_os) && __dest_os == __be_os -#define BROKEN_STRNCASECMP -#define ELIDE_CODE -#include -#endif - -/* For Tandems, based on code from scldad@sdc.com.au */ -#ifdef TANDEM -#define tempnam(a,b) tmpnam(NULL) -#define variable(a,b,c) variabl(a,b,c) -#define srandom srand -#define random rand - -#include -#endif - -/* For 16-bit DOS */ -#if defined(MSC_VER) && defined(MSDOS) -#define NO_PROFILING 1 -#endif - -/* For MacOS X, which is almost BSD Unix */ -#ifdef __APPLE__ -#define HAVE_MKTIME 1 -#endif - -/* For ULTRIX 4.3 */ -#ifdef ultrix -#define HAVE_MKTIME 1 -#endif - diff --git a/contrib/awk/dfa.c b/contrib/awk/dfa.c deleted file mode 100644 index 51d0efd..0000000 --- a/contrib/awk/dfa.c +++ /dev/null @@ -1,2633 +0,0 @@ -/* dfa.c - deterministic extended regexp routines for GNU - Copyright 1988, 1998, 2000 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ - -/* Written June, 1988 by Mike Haertel - Modified July, 1988 by Arthur David Olson to assist BMG speedups */ - -#include -__FBSDID("$FreeBSD$"); - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include - -#ifndef VMS -#include -#else -#include -#endif -#ifdef STDC_HEADERS -#include -#else -extern char *calloc(), *malloc(), *realloc(); -extern void free(); -#endif - -#if defined(HAVE_STRING_H) || defined(STDC_HEADERS) -#include -#undef index -#define index strchr -#else -#include -#endif - -#ifndef DEBUG /* use the same approach as regex.c */ -#undef assert -#define assert(e) -#endif /* DEBUG */ - -#ifndef isgraph -#define isgraph(C) (isprint(C) && !isspace(C)) -#endif - -#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) -#define ISALPHA(C) isalpha(C) -#define ISUPPER(C) isupper(C) -#define ISLOWER(C) islower(C) -#define ISDIGIT(C) isdigit(C) -#define ISXDIGIT(C) isxdigit(C) -#define ISSPACE(C) isspace(C) -#define ISPUNCT(C) ispunct(C) -#define ISALNUM(C) isalnum(C) -#define ISPRINT(C) isprint(C) -#define ISGRAPH(C) isgraph(C) -#define ISCNTRL(C) iscntrl(C) -#else -#define ISALPHA(C) (isascii(C) && isalpha(C)) -#define ISUPPER(C) (isascii(C) && isupper(C)) -#define ISLOWER(C) (isascii(C) && islower(C)) -#define ISDIGIT(C) (isascii(C) && isdigit(C)) -#define ISXDIGIT(C) (isascii(C) && isxdigit(C)) -#define ISSPACE(C) (isascii(C) && isspace(C)) -#define ISPUNCT(C) (isascii(C) && ispunct(C)) -#define ISALNUM(C) (isascii(C) && isalnum(C)) -#define ISPRINT(C) (isascii(C) && isprint(C)) -#define ISGRAPH(C) (isascii(C) && isgraph(C)) -#define ISCNTRL(C) (isascii(C) && iscntrl(C)) -#endif - -/* ISASCIIDIGIT differs from ISDIGIT, as follows: - - Its arg may be any int or unsigned int; it need not be an unsigned char. - - It's guaranteed to evaluate its argument exactly once. - - It's typically faster. - Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that - only '0' through '9' are digits. Prefer ISASCIIDIGIT to ISDIGIT unless - it's important to use the locale's definition of `digit' even when the - host does not conform to Posix. */ -#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) - -/* If we (don't) have I18N. */ -/* glibc defines _ */ -#ifndef _ -# ifdef HAVE_LIBINTL_H -# include -# ifndef _ -# define _(Str) gettext (Str) -# endif -# else -# define _(Str) (Str) -# endif -#endif - -#ifndef __FreeBSD__ -#include "regex.h" -#else -#include -#endif -#include "dfa.h" - -/* HPUX, define those as macros in sys/param.h */ -#ifdef setbit -# undef setbit -#endif -#ifdef clrbit -# undef clrbit -#endif - -static void dfamust PARAMS ((struct dfa *dfa)); - -static ptr_t xcalloc PARAMS ((size_t n, size_t s)); -static ptr_t xmalloc PARAMS ((size_t n)); -static ptr_t xrealloc PARAMS ((ptr_t p, size_t n)); -#ifdef DEBUG -static void prtok PARAMS ((token t)); -#endif -static int tstbit PARAMS ((int b, charclass c)); -static void setbit PARAMS ((int b, charclass c)); -static void clrbit PARAMS ((int b, charclass c)); -static void copyset PARAMS ((charclass src, charclass dst)); -static void zeroset PARAMS ((charclass s)); -static void notset PARAMS ((charclass s)); -static int equal PARAMS ((charclass s1, charclass s2)); -static int charclass_index PARAMS ((charclass s)); -static int looking_at PARAMS ((const char *s)); -static token lex PARAMS ((void)); -static void addtok PARAMS ((token t)); -static void atom PARAMS ((void)); -static int nsubtoks PARAMS ((int tindex)); -static void copytoks PARAMS ((int tindex, int ntokens)); -static void closure PARAMS ((void)); -static void branch PARAMS ((void)); -static void regexp PARAMS ((int toplevel)); -static void copy PARAMS ((position_set *src, position_set *dst)); -static void insert PARAMS ((position p, position_set *s)); -static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m)); -static void delete PARAMS ((position p, position_set *s)); -static int state_index PARAMS ((struct dfa *d, position_set *s, - int newline, int letter)); -static void build_state PARAMS ((int s, struct dfa *d)); -static void build_state_zero PARAMS ((struct dfa *d)); -static char *icatalloc PARAMS ((char *old, char *new)); -static char *icpyalloc PARAMS ((char *string)); -static char *istrstr PARAMS ((char *lookin, char *lookfor)); -static void ifree PARAMS ((char *cp)); -static void freelist PARAMS ((char **cpp)); -static char **enlist PARAMS ((char **cpp, char *new, size_t len)); -static char **comsubs PARAMS ((char *left, char *right)); -static char **addlists PARAMS ((char **old, char **new)); -static char **inboth PARAMS ((char **left, char **right)); - -#ifdef __FreeBSD__ -static int collate_range_cmp (a, b) - int a, b; -{ - int r; - static char s[2][2]; - - if ((unsigned char)a == (unsigned char)b) - return 0; - s[0][0] = a; - s[1][0] = b; - if ((r = strcoll(s[0], s[1])) == 0) - r = (unsigned char)a - (unsigned char)b; - return r; -} -#endif - -static ptr_t -xcalloc (size_t n, size_t s) -{ - ptr_t r = calloc(n, s); - - if (!r) - dfaerror(_("Memory exhausted")); - return r; -} - -static ptr_t -xmalloc (size_t n) -{ - ptr_t r = malloc(n); - - assert(n != 0); - if (!r) - dfaerror(_("Memory exhausted")); - return r; -} - -static ptr_t -xrealloc (ptr_t p, size_t n) -{ - ptr_t r = realloc(p, n); - - assert(n != 0); - if (!r) - dfaerror(_("Memory exhausted")); - return r; -} - -#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t))) -#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t))) -#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t))) - -/* Reallocate an array of type t if nalloc is too small for index. */ -#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \ - if ((index) >= (nalloc)) \ - { \ - while ((index) >= (nalloc)) \ - (nalloc) *= 2; \ - REALLOC(p, t, nalloc); \ - } - -#ifdef DEBUG - -static void -prtok (token t) -{ - char *s; - - if (t < 0) - fprintf(stderr, "END"); - else if (t < NOTCHAR) - fprintf(stderr, "%c", t); - else - { - switch (t) - { - case EMPTY: s = "EMPTY"; break; - case BACKREF: s = "BACKREF"; break; - case BEGLINE: s = "BEGLINE"; break; - case ENDLINE: s = "ENDLINE"; break; - case BEGWORD: s = "BEGWORD"; break; - case ENDWORD: s = "ENDWORD"; break; - case LIMWORD: s = "LIMWORD"; break; - case NOTLIMWORD: s = "NOTLIMWORD"; break; - case QMARK: s = "QMARK"; break; - case STAR: s = "STAR"; break; - case PLUS: s = "PLUS"; break; - case CAT: s = "CAT"; break; - case OR: s = "OR"; break; - case ORTOP: s = "ORTOP"; break; - case LPAREN: s = "LPAREN"; break; - case RPAREN: s = "RPAREN"; break; - default: s = "CSET"; break; - } - fprintf(stderr, "%s", s); - } -} -#endif /* DEBUG */ - -/* Stuff pertaining to charclasses. */ - -static int -tstbit (int b, charclass c) -{ - return c[b / INTBITS] & 1 << b % INTBITS; -} - -static void -setbit (int b, charclass c) -{ - c[b / INTBITS] |= 1 << b % INTBITS; -} - -static void -clrbit (int b, charclass c) -{ - c[b / INTBITS] &= ~(1 << b % INTBITS); -} - -static void -copyset (charclass src, charclass dst) -{ - int i; - - for (i = 0; i < CHARCLASS_INTS; ++i) - dst[i] = src[i]; -} - -static void -zeroset (charclass s) -{ - int i; - - for (i = 0; i < CHARCLASS_INTS; ++i) - s[i] = 0; -} - -static void -notset (charclass s) -{ - int i; - - for (i = 0; i < CHARCLASS_INTS; ++i) - s[i] = ~s[i]; -} - -static int -equal (charclass s1, charclass s2) -{ - int i; - - for (i = 0; i < CHARCLASS_INTS; ++i) - if (s1[i] != s2[i]) - return 0; - return 1; -} - -/* A pointer to the current dfa is kept here during parsing. */ -static struct dfa *dfa; - -/* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ -static int -charclass_index (charclass s) -{ - int i; - - for (i = 0; i < dfa->cindex; ++i) - if (equal(s, dfa->charclasses[i])) - return i; - REALLOC_IF_NECESSARY(dfa->charclasses, charclass, dfa->calloc, dfa->cindex); - ++dfa->cindex; - copyset(s, dfa->charclasses[i]); - return i; -} - -/* Syntax bits controlling the behavior of the lexical analyzer. */ -static reg_syntax_t syntax_bits, syntax_bits_set; - -/* Flag for case-folding letters into sets. */ -static int case_fold; - -/* End-of-line byte in data. */ -static unsigned char eolbyte; - -/* Entry point to set syntax options. */ -void -dfasyntax (reg_syntax_t bits, int fold, int eol) -{ - syntax_bits_set = 1; - syntax_bits = bits; - case_fold = fold; - eolbyte = eol; -} - -/* Lexical analyzer. All the dross that deals with the obnoxious - GNU Regex syntax bits is located here. The poor, suffering - reader is referred to the GNU Regex documentation for the - meaning of the @#%!@#%^!@ syntax bits. */ - -static char *lexptr; /* Pointer to next input character. */ -static int lexleft; /* Number of characters remaining. */ -static token lasttok; /* Previous token returned; initially END. */ -static int laststart; /* True if we're separated from beginning or (, | - only by zero-width characters. */ -static int parens; /* Count of outstanding left parens. */ -static int minrep, maxrep; /* Repeat counts for {m,n}. */ - -/* Note that characters become unsigned here. */ -#define FETCH(c, eoferr) \ - { \ - if (! lexleft) \ - { \ - if (eoferr != 0) \ - dfaerror (eoferr); \ - else \ - return lasttok = END; \ - } \ - (c) = (unsigned char) *lexptr++; \ - --lexleft; \ - } - -#ifdef __STDC__ -#define FUNC(F, P) static int F(int c) { return P(c); } -#else -#define FUNC(F, P) static int F(c) int c; { return P(c); } -#endif - -FUNC(is_alpha, ISALPHA) -FUNC(is_upper, ISUPPER) -FUNC(is_lower, ISLOWER) -FUNC(is_digit, ISDIGIT) -FUNC(is_xdigit, ISXDIGIT) -FUNC(is_space, ISSPACE) -FUNC(is_punct, ISPUNCT) -FUNC(is_alnum, ISALNUM) -FUNC(is_print, ISPRINT) -FUNC(is_graph, ISGRAPH) -FUNC(is_cntrl, ISCNTRL) - -static int -is_blank (int c) -{ - return (c == ' ' || c == '\t'); -} - -/* The following list maps the names of the Posix named character classes - to predicate functions that determine whether a given character is in - the class. The leading [ has already been eaten by the lexical analyzer. */ -static struct { - const char *name; - int (*pred) PARAMS ((int)); -} prednames[] = { - { ":alpha:]", is_alpha }, - { ":upper:]", is_upper }, - { ":lower:]", is_lower }, - { ":digit:]", is_digit }, - { ":xdigit:]", is_xdigit }, - { ":space:]", is_space }, - { ":punct:]", is_punct }, - { ":alnum:]", is_alnum }, - { ":print:]", is_print }, - { ":graph:]", is_graph }, - { ":cntrl:]", is_cntrl }, - { ":blank:]", is_blank }, - { 0 } -}; - -/* Return non-zero if C is a `word-constituent' byte; zero otherwise. */ -#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_') - -static int -looking_at (char const *s) -{ - size_t len; - - len = strlen(s); - if (lexleft < len) - return 0; - return strncmp(s, lexptr, len) == 0; -} - -static token -lex (void) -{ - token c, c1, c2; - int backslash = 0, invert; - charclass ccl; - int i; - char lo[2]; - char hi[2]; - - /* Basic plan: We fetch a character. If it's a backslash, - we set the backslash flag and go through the loop again. - On the plus side, this avoids having a duplicate of the - main switch inside the backslash case. On the minus side, - it means that just about every case begins with - "if (backslash) ...". */ - for (i = 0; i < 2; ++i) - { - FETCH(c, 0); - switch (c) - { - case '\\': - if (backslash) - goto normal_char; - if (lexleft == 0) - dfaerror(_("Unfinished \\ escape")); - backslash = 1; - break; - - case '^': - if (backslash) - goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS - || lasttok == END - || lasttok == LPAREN - || lasttok == OR) - return lasttok = BEGLINE; - goto normal_char; - - case '$': - if (backslash) - goto normal_char; - if (syntax_bits & RE_CONTEXT_INDEP_ANCHORS - || lexleft == 0 - || (syntax_bits & RE_NO_BK_PARENS - ? lexleft > 0 && *lexptr == ')' - : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == ')') - || (syntax_bits & RE_NO_BK_VBAR - ? lexleft > 0 && *lexptr == '|' - : lexleft > 1 && lexptr[0] == '\\' && lexptr[1] == '|') - || ((syntax_bits & RE_NEWLINE_ALT) - && lexleft > 0 && *lexptr == '\n')) - return lasttok = ENDLINE; - goto normal_char; - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (backslash && !(syntax_bits & RE_NO_BK_REFS)) - { - laststart = 0; - return lasttok = BACKREF; - } - goto normal_char; - - case '`': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = BEGLINE; /* FIXME: should be beginning of string */ - goto normal_char; - - case '\'': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = ENDLINE; /* FIXME: should be end of string */ - goto normal_char; - - case '<': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = BEGWORD; - goto normal_char; - - case '>': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = ENDWORD; - goto normal_char; - - case 'b': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = LIMWORD; - goto normal_char; - - case 'B': - if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) - return lasttok = NOTLIMWORD; - goto normal_char; - - case '?': - if (syntax_bits & RE_LIMITED_OPS) - goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - return lasttok = QMARK; - - case '*': - if (backslash) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - return lasttok = STAR; - - case '+': - if (syntax_bits & RE_LIMITED_OPS) - goto normal_char; - if (backslash != ((syntax_bits & RE_BK_PLUS_QM) != 0)) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - return lasttok = PLUS; - - case '{': - if (!(syntax_bits & RE_INTERVALS)) - goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0)) - goto normal_char; - if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) - goto normal_char; - - if (syntax_bits & RE_NO_BK_BRACES) - { - /* Scan ahead for a valid interval; if it's not valid, - treat it as a literal '{'. */ - int lo = -1, hi = -1; - char const *p = lexptr; - char const *lim = p + lexleft; - for (; p != lim && ISASCIIDIGIT (*p); p++) - lo = (lo < 0 ? 0 : lo * 10) + *p - '0'; - if (p != lim && *p == ',') - while (++p != lim && ISASCIIDIGIT (*p)) - hi = (hi < 0 ? 0 : hi * 10) + *p - '0'; - else - hi = lo; - if (p == lim || *p != '}' - || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo)) - goto normal_char; - } - - minrep = 0; - /* Cases: - {M} - exact count - {M,} - minimum count, maximum is infinity - {M,N} - M through N */ - FETCH(c, _("unfinished repeat count")); - if (ISASCIIDIGIT (c)) - { - minrep = c - '0'; - for (;;) - { - FETCH(c, _("unfinished repeat count")); - if (! ISASCIIDIGIT (c)) - break; - minrep = 10 * minrep + c - '0'; - } - } - else - dfaerror(_("malformed repeat count")); - if (c == ',') - { - FETCH (c, _("unfinished repeat count")); - if (! ISASCIIDIGIT (c)) - maxrep = -1; - else - { - maxrep = c - '0'; - for (;;) - { - FETCH (c, _("unfinished repeat count")); - if (! ISASCIIDIGIT (c)) - break; - maxrep = 10 * maxrep + c - '0'; - } - if (0 <= maxrep && maxrep < minrep) - dfaerror (_("malformed repeat count")); - } - } - else - maxrep = minrep; - if (!(syntax_bits & RE_NO_BK_BRACES)) - { - if (c != '\\') - dfaerror(_("malformed repeat count")); - FETCH(c, _("unfinished repeat count")); - } - if (c != '}') - dfaerror(_("malformed repeat count")); - laststart = 0; - return lasttok = REPMN; - - case '|': - if (syntax_bits & RE_LIMITED_OPS) - goto normal_char; - if (backslash != ((syntax_bits & RE_NO_BK_VBAR) == 0)) - goto normal_char; - laststart = 1; - return lasttok = OR; - - case '\n': - if (syntax_bits & RE_LIMITED_OPS - || backslash - || !(syntax_bits & RE_NEWLINE_ALT)) - goto normal_char; - laststart = 1; - return lasttok = OR; - - case '(': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) - goto normal_char; - ++parens; - laststart = 1; - return lasttok = LPAREN; - - case ')': - if (backslash != ((syntax_bits & RE_NO_BK_PARENS) == 0)) - goto normal_char; - if (parens == 0 && syntax_bits & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - --parens; - laststart = 0; - return lasttok = RPAREN; - - case '.': - if (backslash) - goto normal_char; - zeroset(ccl); - notset(ccl); - if (!(syntax_bits & RE_DOT_NEWLINE)) - clrbit(eolbyte, ccl); - if (syntax_bits & RE_DOT_NOT_NULL) - clrbit('\0', ccl); - laststart = 0; - return lasttok = CSET + charclass_index(ccl); - - case 'w': - case 'W': - if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) - goto normal_char; - zeroset(ccl); - for (c2 = 0; c2 < NOTCHAR; ++c2) - if (IS_WORD_CONSTITUENT(c2)) - setbit(c2, ccl); - if (c == 'W') - notset(ccl); - laststart = 0; - return lasttok = CSET + charclass_index(ccl); - - case '[': - if (backslash) - goto normal_char; - zeroset(ccl); - FETCH(c, _("Unbalanced [")); - if (c == '^') - { - FETCH(c, _("Unbalanced [")); - invert = 1; - } - else - invert = 0; - do - { - /* Nobody ever said this had to be fast. :-) - Note that if we're looking at some other [:...:] - construct, we just treat it as a bunch of ordinary - characters. We can do this because we assume - regex has checked for syntax errors before - dfa is ever called. */ - if (c == '[' && (syntax_bits & RE_CHAR_CLASSES)) - for (c1 = 0; prednames[c1].name; ++c1) - if (looking_at(prednames[c1].name)) - { - int (*pred)() = prednames[c1].pred; - if (case_fold - && (pred == is_upper || pred == is_lower)) - pred = is_alpha; - - for (c2 = 0; c2 < NOTCHAR; ++c2) - if ((*pred)(c2)) - setbit(c2, ccl); - lexptr += strlen(prednames[c1].name); - lexleft -= strlen(prednames[c1].name); - FETCH(c1, _("Unbalanced [")); - goto skip; - } - if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH(c, _("Unbalanced [")); - FETCH(c1, _("Unbalanced [")); - if (c1 == '-') - { - FETCH(c2, _("Unbalanced [")); - if (c2 == ']') - { - /* In the case [x-], the - is an ordinary hyphen, - which is left in c1, the lookahead character. */ - --lexptr; - ++lexleft; - c2 = c; - } - else - { - if (c2 == '\\' - && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH(c2, _("Unbalanced [")); - FETCH(c1, _("Unbalanced [")); - } - } - else - c2 = c; -#ifdef __FreeBSD__ - { token c3; - - if (collate_range_cmp(c, c2) > 0) { - FETCH(c2, "Invalid range"); - goto skip; - } - - for (c3 = 0; c3 < NOTCHAR; ++c3) - if ( collate_range_cmp(c, c3) <= 0 - && collate_range_cmp(c3, c2) <= 0 - ) { - setbit(c3, ccl); - if (case_fold) - if (ISUPPER(c3)) - setbit(tolower(c3), ccl); - else if (ISLOWER(c3)) - setbit(toupper(c3), ccl); - } - } -#else - lo[0] = c; lo[1] = '\0'; - hi[0] = c2; hi[1] = '\0'; - for (c = 0; c < NOTCHAR; c++) - { - char ch[2]; - ch[0] = c; ch[1] = '\0'; - if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0) - { - setbit (c, ccl); - if (case_fold) - { - if (ISUPPER (c)) - setbit (tolower (c), ccl); - else if (ISLOWER (c)) - setbit (toupper (c), ccl); - } - } - } -#endif - - skip: - ; - } - while ((c = c1) != ']'); - if (invert) - { - notset(ccl); - if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) - clrbit(eolbyte, ccl); - } - laststart = 0; - return lasttok = CSET + charclass_index(ccl); - - default: - normal_char: - laststart = 0; - if (case_fold && ISALPHA(c)) - { - zeroset(ccl); - setbit(c, ccl); - if (isupper(c)) - setbit(tolower(c), ccl); - else - setbit(toupper(c), ccl); - return lasttok = CSET + charclass_index(ccl); - } - return c; - } - } - - /* The above loop should consume at most a backslash - and some other character. */ - abort(); - return END; /* keeps pedantic compilers happy. */ -} - -/* Recursive descent parser for regular expressions. */ - -static token tok; /* Lookahead token. */ -static int depth; /* Current depth of a hypothetical stack - holding deferred productions. This is - used to determine the depth that will be - required of the real stack later on in - dfaanalyze(). */ - -/* Add the given token to the parse tree, maintaining the depth count and - updating the maximum depth if necessary. */ -static void -addtok (token t) -{ - REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex); - dfa->tokens[dfa->tindex++] = t; - - switch (t) - { - case QMARK: - case STAR: - case PLUS: - break; - - case CAT: - case OR: - case ORTOP: - --depth; - break; - - default: - ++dfa->nleaves; - case EMPTY: - ++depth; - break; - } - if (depth > dfa->depth) - dfa->depth = depth; -} - -/* The grammar understood by the parser is as follows. - - regexp: - regexp OR branch - branch - - branch: - branch closure - closure - - closure: - closure QMARK - closure STAR - closure PLUS - atom - - atom: - - CSET - BACKREF - BEGLINE - ENDLINE - BEGWORD - ENDWORD - LIMWORD - NOTLIMWORD - - - The parser builds a parse tree in postfix form in an array of tokens. */ - -static void -atom (void) -{ - if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF - || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD - || tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD) - { - addtok(tok); - tok = lex(); - } - else if (tok == LPAREN) - { - tok = lex(); - regexp(0); - if (tok != RPAREN) - dfaerror(_("Unbalanced (")); - tok = lex(); - } - else - addtok(EMPTY); -} - -/* Return the number of tokens in the given subexpression. */ -static int -nsubtoks (int tindex) -{ - int ntoks1; - - switch (dfa->tokens[tindex - 1]) - { - default: - return 1; - case QMARK: - case STAR: - case PLUS: - return 1 + nsubtoks(tindex - 1); - case CAT: - case OR: - case ORTOP: - ntoks1 = nsubtoks(tindex - 1); - return 1 + ntoks1 + nsubtoks(tindex - 1 - ntoks1); - } -} - -/* Copy the given subexpression to the top of the tree. */ -static void -copytoks (int tindex, int ntokens) -{ - int i; - - for (i = 0; i < ntokens; ++i) - addtok(dfa->tokens[tindex + i]); -} - -static void -closure (void) -{ - int tindex, ntokens, i; - - atom(); - while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN) - if (tok == REPMN) - { - ntokens = nsubtoks(dfa->tindex); - tindex = dfa->tindex - ntokens; - if (maxrep < 0) - addtok(PLUS); - if (minrep == 0) - addtok(QMARK); - for (i = 1; i < minrep; ++i) - { - copytoks(tindex, ntokens); - addtok(CAT); - } - for (; i < maxrep; ++i) - { - copytoks(tindex, ntokens); - addtok(QMARK); - addtok(CAT); - } - tok = lex(); - } - else - { - addtok(tok); - tok = lex(); - } -} - -static void -branch (void) -{ - closure(); - while (tok != RPAREN && tok != OR && tok >= 0) - { - closure(); - addtok(CAT); - } -} - -static void -regexp (int toplevel) -{ - branch(); - while (tok == OR) - { - tok = lex(); - branch(); - if (toplevel) - addtok(ORTOP); - else - addtok(OR); - } -} - -/* Main entry point for the parser. S is a string to be parsed, len is the - length of the string, so s can include NUL characters. D is a pointer to - the struct dfa to parse into. */ -void -dfaparse (char *s, size_t len, struct dfa *d) -{ - dfa = d; - lexptr = s; - lexleft = len; - lasttok = END; - laststart = 1; - parens = 0; - - if (! syntax_bits_set) - dfaerror(_("No regexp syntax bits specified")); - - tok = lex(); - depth = d->depth; - - regexp(1); - - if (tok != END) - dfaerror(_("Unbalanced )")); - - addtok(END - d->nregexps); - addtok(CAT); - - if (d->nregexps) - addtok(ORTOP); - - ++d->nregexps; -} - -/* Some primitives for operating on sets of positions. */ - -/* Copy one set to another; the destination must be large enough. */ -static void -copy (position_set *src, position_set *dst) -{ - int i; - - for (i = 0; i < src->nelem; ++i) - dst->elems[i] = src->elems[i]; - dst->nelem = src->nelem; -} - -/* Insert a position in a set. Position sets are maintained in sorted - order according to index. If position already exists in the set with - the same index then their constraints are logically or'd together. - S->elems must point to an array large enough to hold the resulting set. */ -static void -insert (position p, position_set *s) -{ - int i; - position t1, t2; - - for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i) - continue; - if (i < s->nelem && p.index == s->elems[i].index) - s->elems[i].constraint |= p.constraint; - else - { - t1 = p; - ++s->nelem; - while (i < s->nelem) - { - t2 = s->elems[i]; - s->elems[i++] = t1; - t1 = t2; - } - } -} - -/* Merge two sets of positions into a third. The result is exactly as if - the positions of both sets were inserted into an initially empty set. */ -static void -merge (position_set *s1, position_set *s2, position_set *m) -{ - int i = 0, j = 0; - - m->nelem = 0; - while (i < s1->nelem && j < s2->nelem) - if (s1->elems[i].index > s2->elems[j].index) - m->elems[m->nelem++] = s1->elems[i++]; - else if (s1->elems[i].index < s2->elems[j].index) - m->elems[m->nelem++] = s2->elems[j++]; - else - { - m->elems[m->nelem] = s1->elems[i++]; - m->elems[m->nelem++].constraint |= s2->elems[j++].constraint; - } - while (i < s1->nelem) - m->elems[m->nelem++] = s1->elems[i++]; - while (j < s2->nelem) - m->elems[m->nelem++] = s2->elems[j++]; -} - -/* Delete a position from a set. */ -static void -delete (position p, position_set *s) -{ - int i; - - for (i = 0; i < s->nelem; ++i) - if (p.index == s->elems[i].index) - break; - if (i < s->nelem) - for (--s->nelem; i < s->nelem; ++i) - s->elems[i] = s->elems[i + 1]; -} - -/* Find the index of the state corresponding to the given position set with - the given preceding context, or create a new state if there is no such - state. Newline and letter tell whether we got here on a newline or - letter, respectively. */ -static int -state_index (struct dfa *d, position_set *s, int newline, int letter) -{ - int hash = 0; - int constraint; - int i, j; - - newline = newline ? 1 : 0; - letter = letter ? 1 : 0; - - for (i = 0; i < s->nelem; ++i) - hash ^= s->elems[i].index + s->elems[i].constraint; - - /* Try to find a state that exactly matches the proposed one. */ - for (i = 0; i < d->sindex; ++i) - { - if (hash != d->states[i].hash || s->nelem != d->states[i].elems.nelem - || newline != d->states[i].newline || letter != d->states[i].letter) - continue; - for (j = 0; j < s->nelem; ++j) - if (s->elems[j].constraint - != d->states[i].elems.elems[j].constraint - || s->elems[j].index != d->states[i].elems.elems[j].index) - break; - if (j == s->nelem) - return i; - } - - /* We'll have to create a new state. */ - REALLOC_IF_NECESSARY(d->states, dfa_state, d->salloc, d->sindex); - d->states[i].hash = hash; - MALLOC(d->states[i].elems.elems, position, s->nelem); - copy(s, &d->states[i].elems); - d->states[i].newline = newline; - d->states[i].letter = letter; - d->states[i].backref = 0; - d->states[i].constraint = 0; - d->states[i].first_end = 0; - for (j = 0; j < s->nelem; ++j) - if (d->tokens[s->elems[j].index] < 0) - { - constraint = s->elems[j].constraint; - if (SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 0) - || SUCCEEDS_IN_CONTEXT(constraint, newline, 0, letter, 1) - || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 0) - || SUCCEEDS_IN_CONTEXT(constraint, newline, 1, letter, 1)) - d->states[i].constraint |= constraint; - if (! d->states[i].first_end) - d->states[i].first_end = d->tokens[s->elems[j].index]; - } - else if (d->tokens[s->elems[j].index] == BACKREF) - { - d->states[i].constraint = NO_CONSTRAINT; - d->states[i].backref = 1; - } - - ++d->sindex; - - return i; -} - -/* Find the epsilon closure of a set of positions. If any position of the set - contains a symbol that matches the empty string in some context, replace - that position with the elements of its follow labeled with an appropriate - constraint. Repeat exhaustively until no funny positions are left. - S->elems must be large enough to hold the result. */ -static void -epsclosure (position_set *s, struct dfa *d) -{ - int i, j; - int *visited; - position p, old; - - MALLOC(visited, int, d->tindex); - for (i = 0; i < d->tindex; ++i) - visited[i] = 0; - - for (i = 0; i < s->nelem; ++i) - if (d->tokens[s->elems[i].index] >= NOTCHAR - && d->tokens[s->elems[i].index] != BACKREF - && d->tokens[s->elems[i].index] < CSET) - { - old = s->elems[i]; - p.constraint = old.constraint; - delete(s->elems[i], s); - if (visited[old.index]) - { - --i; - continue; - } - visited[old.index] = 1; - switch (d->tokens[old.index]) - { - case BEGLINE: - p.constraint &= BEGLINE_CONSTRAINT; - break; - case ENDLINE: - p.constraint &= ENDLINE_CONSTRAINT; - break; - case BEGWORD: - p.constraint &= BEGWORD_CONSTRAINT; - break; - case ENDWORD: - p.constraint &= ENDWORD_CONSTRAINT; - break; - case LIMWORD: - p.constraint &= LIMWORD_CONSTRAINT; - break; - case NOTLIMWORD: - p.constraint &= NOTLIMWORD_CONSTRAINT; - break; - default: - break; - } - for (j = 0; j < d->follows[old.index].nelem; ++j) - { - p.index = d->follows[old.index].elems[j].index; - insert(p, s); - } - /* Force rescan to start at the beginning. */ - i = -1; - } - - free(visited); -} - -/* Perform bottom-up analysis on the parse tree, computing various functions. - Note that at this point, we're pretending constructs like \< are real - characters rather than constraints on what can follow them. - - Nullable: A node is nullable if it is at the root of a regexp that can - match the empty string. - * EMPTY leaves are nullable. - * No other leaf is nullable. - * A QMARK or STAR node is nullable. - * A PLUS node is nullable if its argument is nullable. - * A CAT node is nullable if both its arguments are nullable. - * An OR node is nullable if either argument is nullable. - - Firstpos: The firstpos of a node is the set of positions (nonempty leaves) - that could correspond to the first character of a string matching the - regexp rooted at the given node. - * EMPTY leaves have empty firstpos. - * The firstpos of a nonempty leaf is that leaf itself. - * The firstpos of a QMARK, STAR, or PLUS node is the firstpos of its - argument. - * The firstpos of a CAT node is the firstpos of the left argument, union - the firstpos of the right if the left argument is nullable. - * The firstpos of an OR node is the union of firstpos of each argument. - - Lastpos: The lastpos of a node is the set of positions that could - correspond to the last character of a string matching the regexp at - the given node. - * EMPTY leaves have empty lastpos. - * The lastpos of a nonempty leaf is that leaf itself. - * The lastpos of a QMARK, STAR, or PLUS node is the lastpos of its - argument. - * The lastpos of a CAT node is the lastpos of its right argument, union - the lastpos of the left if the right argument is nullable. - * The lastpos of an OR node is the union of the lastpos of each argument. - - Follow: The follow of a position is the set of positions that could - correspond to the character following a character matching the node in - a string matching the regexp. At this point we consider special symbols - that match the empty string in some context to be just normal characters. - Later, if we find that a special symbol is in a follow set, we will - replace it with the elements of its follow, labeled with an appropriate - constraint. - * Every node in the firstpos of the argument of a STAR or PLUS node is in - the follow of every node in the lastpos. - * Every node in the firstpos of the second argument of a CAT node is in - the follow of every node in the lastpos of the first argument. - - Because of the postfix representation of the parse tree, the depth-first - analysis is conveniently done by a linear scan with the aid of a stack. - Sets are stored as arrays of the elements, obeying a stack-like allocation - scheme; the number of elements in each set deeper in the stack can be - used to determine the address of a particular set's array. */ -void -dfaanalyze (struct dfa *d, int searchflag) -{ - int *nullable; /* Nullable stack. */ - int *nfirstpos; /* Element count stack for firstpos sets. */ - position *firstpos; /* Array where firstpos elements are stored. */ - int *nlastpos; /* Element count stack for lastpos sets. */ - position *lastpos; /* Array where lastpos elements are stored. */ - int *nalloc; /* Sizes of arrays allocated to follow sets. */ - position_set tmp; /* Temporary set for merging sets. */ - position_set merged; /* Result of merging sets. */ - int wants_newline; /* True if some position wants newline info. */ - int *o_nullable; - int *o_nfirst, *o_nlast; - position *o_firstpos, *o_lastpos; - int i, j; - position *pos; - -#ifdef DEBUG - fprintf(stderr, "dfaanalyze:\n"); - for (i = 0; i < d->tindex; ++i) - { - fprintf(stderr, " %d:", i); - prtok(d->tokens[i]); - } - putc('\n', stderr); -#endif - - d->searchflag = searchflag; - - MALLOC(nullable, int, d->depth); - o_nullable = nullable; - MALLOC(nfirstpos, int, d->depth); - o_nfirst = nfirstpos; - MALLOC(firstpos, position, d->nleaves); - o_firstpos = firstpos, firstpos += d->nleaves; - MALLOC(nlastpos, int, d->depth); - o_nlast = nlastpos; - MALLOC(lastpos, position, d->nleaves); - o_lastpos = lastpos, lastpos += d->nleaves; - MALLOC(nalloc, int, d->tindex); - for (i = 0; i < d->tindex; ++i) - nalloc[i] = 0; - MALLOC(merged.elems, position, d->nleaves); - - CALLOC(d->follows, position_set, d->tindex); - - for (i = 0; i < d->tindex; ++i) -#ifdef DEBUG - { /* Nonsyntactic #ifdef goo... */ -#endif - switch (d->tokens[i]) - { - case EMPTY: - /* The empty set is nullable. */ - *nullable++ = 1; - - /* The firstpos and lastpos of the empty leaf are both empty. */ - *nfirstpos++ = *nlastpos++ = 0; - break; - - case STAR: - case PLUS: - /* Every element in the firstpos of the argument is in the follow - of every element in the lastpos. */ - tmp.nelem = nfirstpos[-1]; - tmp.elems = firstpos; - pos = lastpos; - for (j = 0; j < nlastpos[-1]; ++j) - { - merge(&tmp, &d->follows[pos[j].index], &merged); - REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position, - nalloc[pos[j].index], merged.nelem - 1); - copy(&merged, &d->follows[pos[j].index]); - } - - case QMARK: - /* A QMARK or STAR node is automatically nullable. */ - if (d->tokens[i] != PLUS) - nullable[-1] = 1; - break; - - case CAT: - /* Every element in the firstpos of the second argument is in the - follow of every element in the lastpos of the first argument. */ - tmp.nelem = nfirstpos[-1]; - tmp.elems = firstpos; - pos = lastpos + nlastpos[-1]; - for (j = 0; j < nlastpos[-2]; ++j) - { - merge(&tmp, &d->follows[pos[j].index], &merged); - REALLOC_IF_NECESSARY(d->follows[pos[j].index].elems, position, - nalloc[pos[j].index], merged.nelem - 1); - copy(&merged, &d->follows[pos[j].index]); - } - - /* The firstpos of a CAT node is the firstpos of the first argument, - union that of the second argument if the first is nullable. */ - if (nullable[-2]) - nfirstpos[-2] += nfirstpos[-1]; - else - firstpos += nfirstpos[-1]; - --nfirstpos; - - /* The lastpos of a CAT node is the lastpos of the second argument, - union that of the first argument if the second is nullable. */ - if (nullable[-1]) - nlastpos[-2] += nlastpos[-1]; - else - { - pos = lastpos + nlastpos[-2]; - for (j = nlastpos[-1] - 1; j >= 0; --j) - pos[j] = lastpos[j]; - lastpos += nlastpos[-2]; - nlastpos[-2] = nlastpos[-1]; - } - --nlastpos; - - /* A CAT node is nullable if both arguments are nullable. */ - nullable[-2] = nullable[-1] && nullable[-2]; - --nullable; - break; - - case OR: - case ORTOP: - /* The firstpos is the union of the firstpos of each argument. */ - nfirstpos[-2] += nfirstpos[-1]; - --nfirstpos; - - /* The lastpos is the union of the lastpos of each argument. */ - nlastpos[-2] += nlastpos[-1]; - --nlastpos; - - /* An OR node is nullable if either argument is nullable. */ - nullable[-2] = nullable[-1] || nullable[-2]; - --nullable; - break; - - default: - /* Anything else is a nonempty position. (Note that special - constructs like \< are treated as nonempty strings here; - an "epsilon closure" effectively makes them nullable later. - Backreferences have to get a real position so we can detect - transitions on them later. But they are nullable. */ - *nullable++ = d->tokens[i] == BACKREF; - - /* This position is in its own firstpos and lastpos. */ - *nfirstpos++ = *nlastpos++ = 1; - --firstpos, --lastpos; - firstpos->index = lastpos->index = i; - firstpos->constraint = lastpos->constraint = NO_CONSTRAINT; - - /* Allocate the follow set for this position. */ - nalloc[i] = 1; - MALLOC(d->follows[i].elems, position, nalloc[i]); - break; - } -#ifdef DEBUG - /* ... balance the above nonsyntactic #ifdef goo... */ - fprintf(stderr, "node %d:", i); - prtok(d->tokens[i]); - putc('\n', stderr); - fprintf(stderr, nullable[-1] ? " nullable: yes\n" : " nullable: no\n"); - fprintf(stderr, " firstpos:"); - for (j = nfirstpos[-1] - 1; j >= 0; --j) - { - fprintf(stderr, " %d:", firstpos[j].index); - prtok(d->tokens[firstpos[j].index]); - } - fprintf(stderr, "\n lastpos:"); - for (j = nlastpos[-1] - 1; j >= 0; --j) - { - fprintf(stderr, " %d:", lastpos[j].index); - prtok(d->tokens[lastpos[j].index]); - } - putc('\n', stderr); - } -#endif - - /* For each follow set that is the follow set of a real position, replace - it with its epsilon closure. */ - for (i = 0; i < d->tindex; ++i) - if (d->tokens[i] < NOTCHAR || d->tokens[i] == BACKREF - || d->tokens[i] >= CSET) - { -#ifdef DEBUG - fprintf(stderr, "follows(%d:", i); - prtok(d->tokens[i]); - fprintf(stderr, "):"); - for (j = d->follows[i].nelem - 1; j >= 0; --j) - { - fprintf(stderr, " %d:", d->follows[i].elems[j].index); - prtok(d->tokens[d->follows[i].elems[j].index]); - } - putc('\n', stderr); -#endif - copy(&d->follows[i], &merged); - epsclosure(&merged, d); - if (d->follows[i].nelem < merged.nelem) - REALLOC(d->follows[i].elems, position, merged.nelem); - copy(&merged, &d->follows[i]); - } - - /* Get the epsilon closure of the firstpos of the regexp. The result will - be the set of positions of state 0. */ - merged.nelem = 0; - for (i = 0; i < nfirstpos[-1]; ++i) - insert(firstpos[i], &merged); - epsclosure(&merged, d); - - /* Check if any of the positions of state 0 will want newline context. */ - wants_newline = 0; - for (i = 0; i < merged.nelem; ++i) - if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint)) - wants_newline = 1; - - /* Build the initial state. */ - d->salloc = 1; - d->sindex = 0; - MALLOC(d->states, dfa_state, d->salloc); - state_index(d, &merged, wants_newline, 0); - - free(o_nullable); - free(o_nfirst); - free(o_firstpos); - free(o_nlast); - free(o_lastpos); - free(nalloc); - free(merged.elems); -} - -/* Find, for each character, the transition out of state s of d, and store - it in the appropriate slot of trans. - - We divide the positions of s into groups (positions can appear in more - than one group). Each group is labeled with a set of characters that - every position in the group matches (taking into account, if necessary, - preceding context information of s). For each group, find the union - of the its elements' follows. This set is the set of positions of the - new state. For each character in the group's label, set the transition - on this character to be to a state corresponding to the set's positions, - and its associated backward context information, if necessary. - - If we are building a searching matcher, we include the positions of state - 0 in every state. - - The collection of groups is constructed by building an equivalence-class - partition of the positions of s. - - For each position, find the set of characters C that it matches. Eliminate - any characters from C that fail on grounds of backward context. - - Search through the groups, looking for a group whose label L has nonempty - intersection with C. If L - C is nonempty, create a new group labeled - L - C and having the same positions as the current group, and set L to - the intersection of L and C. Insert the position in this group, set - C = C - L, and resume scanning. - - If after comparing with every group there are characters remaining in C, - create a new group labeled with the characters of C and insert this - position in that group. */ -void -dfastate (int s, struct dfa *d, int trans[]) -{ - position_set grps[NOTCHAR]; /* As many as will ever be needed. */ - charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */ - int ngrps = 0; /* Number of groups actually used. */ - position pos; /* Current position being considered. */ - charclass matches; /* Set of matching characters. */ - int matchesf; /* True if matches is nonempty. */ - charclass intersect; /* Intersection with some label set. */ - int intersectf; /* True if intersect is nonempty. */ - charclass leftovers; /* Stuff in the label that didn't match. */ - int leftoversf; /* True if leftovers is nonempty. */ - static charclass letters; /* Set of characters considered letters. */ - static charclass newline; /* Set of characters that aren't newline. */ - position_set follows; /* Union of the follows of some group. */ - position_set tmp; /* Temporary space for merging sets. */ - int state; /* New state. */ - int wants_newline; /* New state wants to know newline context. */ - int state_newline; /* New state on a newline transition. */ - int wants_letter; /* New state wants to know letter context. */ - int state_letter; /* New state on a letter transition. */ - static int initialized; /* Flag for static initialization. */ - int i, j, k; - - /* Initialize the set of letters, if necessary. */ - if (! initialized) - { - initialized = 1; - for (i = 0; i < NOTCHAR; ++i) - if (IS_WORD_CONSTITUENT(i)) - setbit(i, letters); - setbit(eolbyte, newline); - } - - zeroset(matches); - - for (i = 0; i < d->states[s].elems.nelem; ++i) - { - pos = d->states[s].elems.elems[i]; - if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR) - setbit(d->tokens[pos.index], matches); - else if (d->tokens[pos.index] >= CSET) - copyset(d->charclasses[d->tokens[pos.index] - CSET], matches); - else - continue; - - /* Some characters may need to be eliminated from matches because - they fail in the current context. */ - if (pos.constraint != 0xFF) - { - if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, - d->states[s].newline, 1)) - clrbit(eolbyte, matches); - if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, - d->states[s].newline, 0)) - for (j = 0; j < CHARCLASS_INTS; ++j) - matches[j] &= newline[j]; - if (! MATCHES_LETTER_CONTEXT(pos.constraint, - d->states[s].letter, 1)) - for (j = 0; j < CHARCLASS_INTS; ++j) - matches[j] &= ~letters[j]; - if (! MATCHES_LETTER_CONTEXT(pos.constraint, - d->states[s].letter, 0)) - for (j = 0; j < CHARCLASS_INTS; ++j) - matches[j] &= letters[j]; - - /* If there are no characters left, there's no point in going on. */ - for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j) - continue; - if (j == CHARCLASS_INTS) - continue; - } - - for (j = 0; j < ngrps; ++j) - { - /* If matches contains a single character only, and the current - group's label doesn't contain that character, go on to the - next group. */ - if (d->tokens[pos.index] >= 0 && d->tokens[pos.index] < NOTCHAR - && !tstbit(d->tokens[pos.index], labels[j])) - continue; - - /* Check if this group's label has a nonempty intersection with - matches. */ - intersectf = 0; - for (k = 0; k < CHARCLASS_INTS; ++k) - (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0; - if (! intersectf) - continue; - - /* It does; now find the set differences both ways. */ - leftoversf = matchesf = 0; - for (k = 0; k < CHARCLASS_INTS; ++k) - { - /* Even an optimizing compiler can't know this for sure. */ - int match = matches[k], label = labels[j][k]; - - (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0; - (matches[k] = match & ~label) ? (matchesf = 1) : 0; - } - - /* If there were leftovers, create a new group labeled with them. */ - if (leftoversf) - { - copyset(leftovers, labels[ngrps]); - copyset(intersect, labels[j]); - MALLOC(grps[ngrps].elems, position, d->nleaves); - copy(&grps[j], &grps[ngrps]); - ++ngrps; - } - - /* Put the position in the current group. Note that there is no - reason to call insert() here. */ - grps[j].elems[grps[j].nelem++] = pos; - - /* If every character matching the current position has been - accounted for, we're done. */ - if (! matchesf) - break; - } - - /* If we've passed the last group, and there are still characters - unaccounted for, then we'll have to create a new group. */ - if (j == ngrps) - { - copyset(matches, labels[ngrps]); - zeroset(matches); - MALLOC(grps[ngrps].elems, position, d->nleaves); - grps[ngrps].nelem = 1; - grps[ngrps].elems[0] = pos; - ++ngrps; - } - } - - MALLOC(follows.elems, position, d->nleaves); - MALLOC(tmp.elems, position, d->nleaves); - - /* If we are a searching matcher, the default transition is to a state - containing the positions of state 0, otherwise the default transition - is to fail miserably. */ - if (d->searchflag) - { - wants_newline = 0; - wants_letter = 0; - for (i = 0; i < d->states[0].elems.nelem; ++i) - { - if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint)) - wants_newline = 1; - if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint)) - wants_letter = 1; - } - copy(&d->states[0].elems, &follows); - state = state_index(d, &follows, 0, 0); - if (wants_newline) - state_newline = state_index(d, &follows, 1, 0); - else - state_newline = state; - if (wants_letter) - state_letter = state_index(d, &follows, 0, 1); - else - state_letter = state; - for (i = 0; i < NOTCHAR; ++i) - trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state; - trans[eolbyte] = state_newline; - } - else - for (i = 0; i < NOTCHAR; ++i) - trans[i] = -1; - - for (i = 0; i < ngrps; ++i) - { - follows.nelem = 0; - - /* Find the union of the follows of the positions of the group. - This is a hideously inefficient loop. Fix it someday. */ - for (j = 0; j < grps[i].nelem; ++j) - for (k = 0; k < d->follows[grps[i].elems[j].index].nelem; ++k) - insert(d->follows[grps[i].elems[j].index].elems[k], &follows); - - /* If we are building a searching matcher, throw in the positions - of state 0 as well. */ - if (d->searchflag) - for (j = 0; j < d->states[0].elems.nelem; ++j) - insert(d->states[0].elems.elems[j], &follows); - - /* Find out if the new state will want any context information. */ - wants_newline = 0; - if (tstbit(eolbyte, labels[i])) - for (j = 0; j < follows.nelem; ++j) - if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint)) - wants_newline = 1; - - wants_letter = 0; - for (j = 0; j < CHARCLASS_INTS; ++j) - if (labels[i][j] & letters[j]) - break; - if (j < CHARCLASS_INTS) - for (j = 0; j < follows.nelem; ++j) - if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint)) - wants_letter = 1; - - /* Find the state(s) corresponding to the union of the follows. */ - state = state_index(d, &follows, 0, 0); - if (wants_newline) - state_newline = state_index(d, &follows, 1, 0); - else - state_newline = state; - if (wants_letter) - state_letter = state_index(d, &follows, 0, 1); - else - state_letter = state; - - /* Set the transitions for each character in the current label. */ - for (j = 0; j < CHARCLASS_INTS; ++j) - for (k = 0; k < INTBITS; ++k) - if (labels[i][j] & 1 << k) - { - int c = j * INTBITS + k; - - if (c == eolbyte) - trans[c] = state_newline; - else if (IS_WORD_CONSTITUENT(c)) - trans[c] = state_letter; - else if (c < NOTCHAR) - trans[c] = state; - } - } - - for (i = 0; i < ngrps; ++i) - free(grps[i].elems); - free(follows.elems); - free(tmp.elems); -} - -/* Some routines for manipulating a compiled dfa's transition tables. - Each state may or may not have a transition table; if it does, and it - is a non-accepting state, then d->trans[state] points to its table. - If it is an accepting state then d->fails[state] points to its table. - If it has no table at all, then d->trans[state] is NULL. - TODO: Improve this comment, get rid of the unnecessary redundancy. */ - -static void -build_state (int s, struct dfa *d) -{ - int *trans; /* The new transition table. */ - int i; - - /* Set an upper limit on the number of transition tables that will ever - exist at once. 1024 is arbitrary. The idea is that the frequently - used transition tables will be quickly rebuilt, whereas the ones that - were only needed once or twice will be cleared away. */ - if (d->trcount >= 1024) - { - for (i = 0; i < d->tralloc; ++i) - if (d->trans[i]) - { - free((ptr_t) d->trans[i]); - d->trans[i] = NULL; - } - else if (d->fails[i]) - { - free((ptr_t) d->fails[i]); - d->fails[i] = NULL; - } - d->trcount = 0; - } - - ++d->trcount; - - /* Set up the success bits for this state. */ - d->success[s] = 0; - if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0, - s, *d)) - d->success[s] |= 4; - if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1, - s, *d)) - d->success[s] |= 2; - if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0, - s, *d)) - d->success[s] |= 1; - - MALLOC(trans, int, NOTCHAR); - dfastate(s, d, trans); - - /* Now go through the new transition table, and make sure that the trans - and fail arrays are allocated large enough to hold a pointer for the - largest state mentioned in the table. */ - for (i = 0; i < NOTCHAR; ++i) - if (trans[i] >= d->tralloc) - { - int oldalloc = d->tralloc; - - while (trans[i] >= d->tralloc) - d->tralloc *= 2; - REALLOC(d->realtrans, int *, d->tralloc + 1); - d->trans = d->realtrans + 1; - REALLOC(d->fails, int *, d->tralloc); - REALLOC(d->success, int, d->tralloc); - REALLOC(d->newlines, int, d->tralloc); - while (oldalloc < d->tralloc) - { - d->trans[oldalloc] = NULL; - d->fails[oldalloc++] = NULL; - } - } - - /* Keep the newline transition in a special place so we can use it as - a sentinel. */ - d->newlines[s] = trans[eolbyte]; - trans[eolbyte] = -1; - - if (ACCEPTING(s, *d)) - d->fails[s] = trans; - else - d->trans[s] = trans; -} - -static void -build_state_zero (struct dfa *d) -{ - d->tralloc = 1; - d->trcount = 0; - CALLOC(d->realtrans, int *, d->tralloc + 1); - d->trans = d->realtrans + 1; - CALLOC(d->fails, int *, d->tralloc); - MALLOC(d->success, int, d->tralloc); - MALLOC(d->newlines, int, d->tralloc); - build_state(0, d); -} - -/* Search through a buffer looking for a match to the given struct dfa. - Find the first occurrence of a string matching the regexp in the buffer, - and the shortest possible version thereof. Return a pointer to the first - character after the match, or NULL if none is found. Begin points to - the beginning of the buffer, and end points to the first character after - its end. We store a newline in *end to act as a sentinel, so end had - better point somewhere valid. Newline is a flag indicating whether to - allow newlines to be in the matching string. If count is non- - NULL it points to a place we're supposed to increment every time we - see a newline. Finally, if backref is non-NULL it points to a place - where we're supposed to store a 1 if backreferencing happened and the - match needs to be verified by a backtracking matcher. Otherwise - we store a 0 in *backref. */ -char * -dfaexec (struct dfa *d, char *begin, char *end, - int newline, int *count, int *backref) -{ - register int s, s1, tmp; /* Current state. */ - register unsigned char *p; /* Current input character. */ - register int **trans, *t; /* Copy of d->trans so it can be optimized - into a register. */ - register unsigned char eol = eolbyte; /* Likewise for eolbyte. */ - static int sbit[NOTCHAR]; /* Table for anding with d->success. */ - static int sbit_init; - - if (! sbit_init) - { - int i; - - sbit_init = 1; - for (i = 0; i < NOTCHAR; ++i) - sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1; - sbit[eol] = 4; - } - - if (! d->tralloc) - build_state_zero(d); - - s = s1 = 0; - p = (unsigned char *) begin; - trans = d->trans; - *end = eol; - - for (;;) - { - while ((t = trans[s]) != 0) { /* hand-optimized loop */ - s1 = t[*p++]; - if ((t = trans[s1]) == 0) { - tmp = s ; s = s1 ; s1 = tmp ; /* swap */ - break; - } - s = t[*p++]; - } - - if (s >= 0 && p <= (unsigned char *) end && d->fails[s]) - { - if (d->success[s] & sbit[*p]) - { - if (backref) - *backref = (d->states[s].backref != 0); - return (char *) p; - } - - s1 = s; - s = d->fails[s][*p++]; - continue; - } - - /* If the previous character was a newline, count it. */ - if (count && (char *) p <= end && p[-1] == eol) - ++*count; - - /* Check if we've run off the end of the buffer. */ - if ((char *) p > end) - return NULL; - - if (s >= 0) - { - build_state(s, d); - trans = d->trans; - continue; - } - - if (p[-1] == eol && newline) - { - s = d->newlines[s1]; - continue; - } - - s = 0; - } -} - -/* Initialize the components of a dfa that the other routines don't - initialize for themselves. */ -void -dfainit (struct dfa *d) -{ - d->calloc = 1; - MALLOC(d->charclasses, charclass, d->calloc); - d->cindex = 0; - - d->talloc = 1; - MALLOC(d->tokens, token, d->talloc); - d->tindex = d->depth = d->nleaves = d->nregexps = 0; - - d->searchflag = 0; - d->tralloc = 0; - - d->musts = 0; - d->realtrans = 0; - d->fails = 0; - d->newlines = 0; - d->success = 0; - -} - -/* Parse and analyze a single string of the given length. */ -void -dfacomp (char *s, size_t len, struct dfa *d, int searchflag) -{ - if (case_fold) /* dummy folding in service of dfamust() */ - { - char *lcopy; - int i; - - lcopy = malloc(len); - if (!lcopy) - dfaerror(_("out of memory")); - - /* This is a kludge. */ - case_fold = 0; - for (i = 0; i < len; ++i) - if (ISUPPER ((unsigned char) s[i])) - lcopy[i] = tolower ((unsigned char) s[i]); - else - lcopy[i] = s[i]; - - dfainit(d); - dfaparse(lcopy, len, d); - free(lcopy); - dfamust(d); - d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0; - case_fold = 1; - dfaparse(s, len, d); - dfaanalyze(d, searchflag); - } - else - { - dfainit(d); - dfaparse(s, len, d); - dfamust(d); - dfaanalyze(d, searchflag); - } -} - -/* Free the storage held by the components of a dfa. */ -void -dfafree (struct dfa *d) -{ - int i; - struct dfamust *dm, *ndm; - - free((ptr_t) d->charclasses); - free((ptr_t) d->tokens); - for (i = 0; i < d->sindex; ++i) - free((ptr_t) d->states[i].elems.elems); - free((ptr_t) d->states); - for (i = 0; i < d->tindex; ++i) - if (d->follows[i].elems) - free((ptr_t) d->follows[i].elems); - free((ptr_t) d->follows); - for (i = 0; i < d->tralloc; ++i) - if (d->trans[i]) - free((ptr_t) d->trans[i]); - else if (d->fails[i]) - free((ptr_t) d->fails[i]); - if (d->realtrans) free((ptr_t) d->realtrans); - if (d->fails) free((ptr_t) d->fails); - if (d->newlines) free((ptr_t) d->newlines); - if (d->success) free((ptr_t) d->success); - for (dm = d->musts; dm; dm = ndm) - { - ndm = dm->next; - free(dm->must); - free((ptr_t) dm); - } -} - -/* Having found the postfix representation of the regular expression, - try to find a long sequence of characters that must appear in any line - containing the r.e. - Finding a "longest" sequence is beyond the scope here; - we take an easy way out and hope for the best. - (Take "(ab|a)b"--please.) - - We do a bottom-up calculation of sequences of characters that must appear - in matches of r.e.'s represented by trees rooted at the nodes of the postfix - representation: - sequences that must appear at the left of the match ("left") - sequences that must appear at the right of the match ("right") - lists of sequences that must appear somewhere in the match ("in") - sequences that must constitute the match ("is") - - When we get to the root of the tree, we use one of the longest of its - calculated "in" sequences as our answer. The sequence we find is returned in - d->must (where "d" is the single argument passed to "dfamust"); - the length of the sequence is returned in d->mustn. - - The sequences calculated for the various types of node (in pseudo ANSI c) - are shown below. "p" is the operand of unary operators (and the left-hand - operand of binary operators); "q" is the right-hand operand of binary - operators. - - "ZERO" means "a zero-length sequence" below. - - Type left right is in - ---- ---- ----- -- -- - char c # c # c # c # c - - CSET ZERO ZERO ZERO ZERO - - STAR ZERO ZERO ZERO ZERO - - QMARK ZERO ZERO ZERO ZERO - - PLUS p->left p->right ZERO p->in - - CAT (p->is==ZERO)? (q->is==ZERO)? (p->is!=ZERO && p->in plus - p->left : q->right : q->is!=ZERO) ? q->in plus - p->is##q->left p->right##q->is p->is##q->is : p->right##q->left - ZERO - - OR longest common longest common (do p->is and substrings common to - leading trailing q->is have same p->in and q->in - (sub)sequence (sub)sequence length and - of p->left of p->right content) ? - and q->left and q->right p->is : NULL - - If there's anything else we recognize in the tree, all four sequences get set - to zero-length sequences. If there's something we don't recognize in the tree, - we just return a zero-length sequence. - - Break ties in favor of infrequent letters (choosing 'zzz' in preference to - 'aaa')? - - And. . .is it here or someplace that we might ponder "optimizations" such as - egrep 'psi|epsilon' -> egrep 'psi' - egrep 'pepsi|epsilon' -> egrep 'epsi' - (Yes, we now find "epsi" as a "string - that must occur", but we might also - simplify the *entire* r.e. being sought) - grep '[c]' -> grep 'c' - grep '(ab|a)b' -> grep 'ab' - grep 'ab*' -> grep 'a' - grep 'a*b' -> grep 'b' - - There are several issues: - - Is optimization easy (enough)? - - Does optimization actually accomplish anything, - or is the automaton you get from "psi|epsilon" (for example) - the same as the one you get from "psi" (for example)? - - Are optimizable r.e.'s likely to be used in real-life situations - (something like 'ab*' is probably unlikely; something like is - 'psi|epsilon' is likelier)? */ - -static char * -icatalloc (char *old, char *new) -{ - char *result; - size_t oldsize, newsize; - - newsize = (new == NULL) ? 0 : strlen(new); - if (old == NULL) - oldsize = 0; - else if (newsize == 0) - return old; - else oldsize = strlen(old); - if (old == NULL) - result = (char *) malloc(newsize + 1); - else - result = (char *) realloc((void *) old, oldsize + newsize + 1); - if (result != NULL && new != NULL) - (void) strcpy(result + oldsize, new); - return result; -} - -static char * -icpyalloc (char *string) -{ - return icatalloc((char *) NULL, string); -} - -static char * -istrstr (char *lookin, char *lookfor) -{ - char *cp; - size_t len; - - len = strlen(lookfor); - for (cp = lookin; *cp != '\0'; ++cp) - if (strncmp(cp, lookfor, len) == 0) - return cp; - return NULL; -} - -static void -ifree (char *cp) -{ - if (cp != NULL) - free(cp); -} - -static void -freelist (char **cpp) -{ - int i; - - if (cpp == NULL) - return; - for (i = 0; cpp[i] != NULL; ++i) - { - free(cpp[i]); - cpp[i] = NULL; - } -} - -static char ** -enlist (char **cpp, char *new, size_t len) -{ - int i, j; - - if (cpp == NULL) - return NULL; - if ((new = icpyalloc(new)) == NULL) - { - freelist(cpp); - return NULL; - } - new[len] = '\0'; - /* Is there already something in the list that's new (or longer)? */ - for (i = 0; cpp[i] != NULL; ++i) - if (istrstr(cpp[i], new) != NULL) - { - free(new); - return cpp; - } - /* Eliminate any obsoleted strings. */ - j = 0; - while (cpp[j] != NULL) - if (istrstr(new, cpp[j]) == NULL) - ++j; - else - { - free(cpp[j]); - if (--i == j) - break; - cpp[j] = cpp[i]; - cpp[i] = NULL; - } - /* Add the new string. */ - cpp = (char **) realloc((char *) cpp, (i + 2) * sizeof *cpp); - if (cpp == NULL) - return NULL; - cpp[i] = new; - cpp[i + 1] = NULL; - return cpp; -} - -/* Given pointers to two strings, return a pointer to an allocated - list of their distinct common substrings. Return NULL if something - seems wild. */ -static char ** -comsubs (char *left, char *right) -{ - char **cpp; - char *lcp; - char *rcp; - size_t i, len; - - if (left == NULL || right == NULL) - return NULL; - cpp = (char **) malloc(sizeof *cpp); - if (cpp == NULL) - return NULL; - cpp[0] = NULL; - for (lcp = left; *lcp != '\0'; ++lcp) - { - len = 0; - rcp = index(right, *lcp); - while (rcp != NULL) - { - for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) - continue; - if (i > len) - len = i; - rcp = index(rcp + 1, *lcp); - } - if (len == 0) - continue; - if ((cpp = enlist(cpp, lcp, len)) == NULL) - break; - } - return cpp; -} - -static char ** -addlists (char **old, char **new) -{ - int i; - - if (old == NULL || new == NULL) - return NULL; - for (i = 0; new[i] != NULL; ++i) - { - old = enlist(old, new[i], strlen(new[i])); - if (old == NULL) - break; - } - return old; -} - -/* Given two lists of substrings, return a new list giving substrings - common to both. */ -static char ** -inboth (char **left, char **right) -{ - char **both; - char **temp; - int lnum, rnum; - - if (left == NULL || right == NULL) - return NULL; - both = (char **) malloc(sizeof *both); - if (both == NULL) - return NULL; - both[0] = NULL; - for (lnum = 0; left[lnum] != NULL; ++lnum) - { - for (rnum = 0; right[rnum] != NULL; ++rnum) - { - temp = comsubs(left[lnum], right[rnum]); - if (temp == NULL) - { - freelist(both); - return NULL; - } - both = addlists(both, temp); - freelist(temp); - free(temp); - if (both == NULL) - return NULL; - } - } - return both; -} - -typedef struct -{ - char **in; - char *left; - char *right; - char *is; -} must; - -static void -resetmust (must *mp) -{ - mp->left[0] = mp->right[0] = mp->is[0] = '\0'; - freelist(mp->in); -} - -static void -dfamust (struct dfa *dfa) -{ - must *musts; - must *mp; - char *result; - int ri; - int i; - int exact; - token t; - static must must0; - struct dfamust *dm; - static char empty_string[] = ""; - - result = empty_string; - exact = 0; - musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts); - if (musts == NULL) - return; - mp = musts; - for (i = 0; i <= dfa->tindex; ++i) - mp[i] = must0; - for (i = 0; i <= dfa->tindex; ++i) - { - mp[i].in = (char **) malloc(sizeof *mp[i].in); - mp[i].left = malloc(2); - mp[i].right = malloc(2); - mp[i].is = malloc(2); - if (mp[i].in == NULL || mp[i].left == NULL || - mp[i].right == NULL || mp[i].is == NULL) - goto done; - mp[i].left[0] = mp[i].right[0] = mp[i].is[0] = '\0'; - mp[i].in[0] = NULL; - } -#ifdef DEBUG - fprintf(stderr, "dfamust:\n"); - for (i = 0; i < dfa->tindex; ++i) - { - fprintf(stderr, " %d:", i); - prtok(dfa->tokens[i]); - } - putc('\n', stderr); -#endif - for (ri = 0; ri < dfa->tindex; ++ri) - { - switch (t = dfa->tokens[ri]) - { - case LPAREN: - case RPAREN: - goto done; /* "cannot happen" */ - case EMPTY: - case BEGLINE: - case ENDLINE: - case BEGWORD: - case ENDWORD: - case LIMWORD: - case NOTLIMWORD: - case BACKREF: - resetmust(mp); - break; - case STAR: - case QMARK: - if (mp <= musts) - goto done; /* "cannot happen" */ - --mp; - resetmust(mp); - break; - case OR: - case ORTOP: - if (mp < &musts[2]) - goto done; /* "cannot happen" */ - { - char **new; - must *lmp; - must *rmp; - int j, ln, rn, n; - - rmp = --mp; - lmp = --mp; - /* Guaranteed to be. Unlikely, but. . . */ - if (strcmp(lmp->is, rmp->is) != 0) - lmp->is[0] = '\0'; - /* Left side--easy */ - i = 0; - while (lmp->left[i] != '\0' && lmp->left[i] == rmp->left[i]) - ++i; - lmp->left[i] = '\0'; - /* Right side */ - ln = strlen(lmp->right); - rn = strlen(rmp->right); - n = ln; - if (n > rn) - n = rn; - for (i = 0; i < n; ++i) - if (lmp->right[ln - i - 1] != rmp->right[rn - i - 1]) - break; - for (j = 0; j < i; ++j) - lmp->right[j] = lmp->right[(ln - i) + j]; - lmp->right[j] = '\0'; - new = inboth(lmp->in, rmp->in); - if (new == NULL) - goto done; - freelist(lmp->in); - free((char *) lmp->in); - lmp->in = new; - } - break; - case PLUS: - if (mp <= musts) - goto done; /* "cannot happen" */ - --mp; - mp->is[0] = '\0'; - break; - case END: - if (mp != &musts[1]) - goto done; /* "cannot happen" */ - for (i = 0; musts[0].in[i] != NULL; ++i) - if (strlen(musts[0].in[i]) > strlen(result)) - result = musts[0].in[i]; - if (strcmp(result, musts[0].is) == 0) - exact = 1; - goto done; - case CAT: - if (mp < &musts[2]) - goto done; /* "cannot happen" */ - { - must *lmp; - must *rmp; - - rmp = --mp; - lmp = --mp; - /* In. Everything in left, plus everything in - right, plus catenation of - left's right and right's left. */ - lmp->in = addlists(lmp->in, rmp->in); - if (lmp->in == NULL) - goto done; - if (lmp->right[0] != '\0' && - rmp->left[0] != '\0') - { - char *tp; - - tp = icpyalloc(lmp->right); - if (tp == NULL) - goto done; - tp = icatalloc(tp, rmp->left); - if (tp == NULL) - goto done; - lmp->in = enlist(lmp->in, tp, - strlen(tp)); - free(tp); - if (lmp->in == NULL) - goto done; - } - /* Left-hand */ - if (lmp->is[0] != '\0') - { - lmp->left = icatalloc(lmp->left, - rmp->left); - if (lmp->left == NULL) - goto done; - } - /* Right-hand */ - if (rmp->is[0] == '\0') - lmp->right[0] = '\0'; - lmp->right = icatalloc(lmp->right, rmp->right); - if (lmp->right == NULL) - goto done; - /* Guaranteed to be */ - if (lmp->is[0] != '\0' && rmp->is[0] != '\0') - { - lmp->is = icatalloc(lmp->is, rmp->is); - if (lmp->is == NULL) - goto done; - } - else - lmp->is[0] = '\0'; - } - break; - default: - if (t < END) - { - /* "cannot happen" */ - goto done; - } - else if (t == '\0') - { - /* not on *my* shift */ - goto done; - } - else if (t >= CSET) - { - /* easy enough */ - resetmust(mp); - } - else - { - /* plain character */ - resetmust(mp); - mp->is[0] = mp->left[0] = mp->right[0] = t; - mp->is[1] = mp->left[1] = mp->right[1] = '\0'; - mp->in = enlist(mp->in, mp->is, (size_t)1); - if (mp->in == NULL) - goto done; - } - break; - } -#ifdef DEBUG - fprintf(stderr, " node: %d:", ri); - prtok(dfa->tokens[ri]); - fprintf(stderr, "\n in:"); - for (i = 0; mp->in[i]; ++i) - fprintf(stderr, " \"%s\"", mp->in[i]); - fprintf(stderr, "\n is: \"%s\"\n", mp->is); - fprintf(stderr, " left: \"%s\"\n", mp->left); - fprintf(stderr, " right: \"%s\"\n", mp->right); -#endif - ++mp; - } - done: - if (strlen(result)) - { - dm = (struct dfamust *) malloc(sizeof (struct dfamust)); - dm->exact = exact; - dm->must = malloc(strlen(result) + 1); - strcpy(dm->must, result); - dm->next = dfa->musts; - dfa->musts = dm; - } - mp = musts; - for (i = 0; i <= dfa->tindex; ++i) - { - freelist(mp[i].in); - ifree((char *) mp[i].in); - ifree(mp[i].left); - ifree(mp[i].right); - ifree(mp[i].is); - } - free((char *) mp); -} diff --git a/contrib/awk/dfa.h b/contrib/awk/dfa.h deleted file mode 100644 index f2fef4b..0000000 --- a/contrib/awk/dfa.h +++ /dev/null @@ -1,372 +0,0 @@ -/* dfa.h - declarations for GNU deterministic regexp compiler - Copyright (C) 1988, 1998 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ - -/* Written June, 1988 by Mike Haertel */ - -/* FIXME: - 2. We should not export so much of the DFA internals. - In addition to clobbering modularity, we eat up valuable - name space. */ - -# undef PARAMS -#if __STDC__ -# ifndef _PTR_T -# define _PTR_T - typedef void * ptr_t; -# endif -# define PARAMS(x) x -#else -# ifndef _PTR_T -# define _PTR_T - typedef char * ptr_t; -# endif -# define PARAMS(x) () -#endif - -/* Number of bits in an unsigned char. */ -#ifndef CHARBITS -#define CHARBITS 8 -#endif - -/* First integer value that is greater than any character code. */ -#define NOTCHAR (1 << CHARBITS) - -/* INTBITS need not be exact, just a lower bound. */ -#ifndef INTBITS -#define INTBITS (CHARBITS * sizeof (int)) -#endif - -/* Number of ints required to hold a bit for every character. */ -#define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS) - -/* Sets of unsigned characters are stored as bit vectors in arrays of ints. */ -typedef int charclass[CHARCLASS_INTS]; - -/* The regexp is parsed into an array of tokens in postfix form. Some tokens - are operators and others are terminal symbols. Most (but not all) of these - codes are returned by the lexical analyzer. */ - -typedef enum -{ - END = -1, /* END is a terminal symbol that matches the - end of input; any value of END or less in - the parse tree is such a symbol. Accepting - states of the DFA are those that would have - a transition on END. */ - - /* Ordinary character values are terminal symbols that match themselves. */ - - EMPTY = NOTCHAR, /* EMPTY is a terminal symbol that matches - the empty string. */ - - BACKREF, /* BACKREF is generated by \; it - it not completely handled. If the scanner - detects a transition on backref, it returns - a kind of "semi-success" indicating that - the match will have to be verified with - a backtracking matcher. */ - - BEGLINE, /* BEGLINE is a terminal symbol that matches - the empty string if it is at the beginning - of a line. */ - - ENDLINE, /* ENDLINE is a terminal symbol that matches - the empty string if it is at the end of - a line. */ - - BEGWORD, /* BEGWORD is a terminal symbol that matches - the empty string if it is at the beginning - of a word. */ - - ENDWORD, /* ENDWORD is a terminal symbol that matches - the empty string if it is at the end of - a word. */ - - LIMWORD, /* LIMWORD is a terminal symbol that matches - the empty string if it is at the beginning - or the end of a word. */ - - NOTLIMWORD, /* NOTLIMWORD is a terminal symbol that - matches the empty string if it is not at - the beginning or end of a word. */ - - QMARK, /* QMARK is an operator of one argument that - matches zero or one occurences of its - argument. */ - - STAR, /* STAR is an operator of one argument that - matches the Kleene closure (zero or more - occurrences) of its argument. */ - - PLUS, /* PLUS is an operator of one argument that - matches the positive closure (one or more - occurrences) of its argument. */ - - REPMN, /* REPMN is a lexical token corresponding - to the {m,n} construct. REPMN never - appears in the compiled token vector. */ - - CAT, /* CAT is an operator of two arguments that - matches the concatenation of its - arguments. CAT is never returned by the - lexical analyzer. */ - - OR, /* OR is an operator of two arguments that - matches either of its arguments. */ - - ORTOP, /* OR at the toplevel in the parse tree. - This is used for a boyer-moore heuristic. */ - - LPAREN, /* LPAREN never appears in the parse tree, - it is only a lexeme. */ - - RPAREN, /* RPAREN never appears in the parse tree. */ - - CSET /* CSET and (and any value greater) is a - terminal symbol that matches any of a - class of characters. */ -} token; - -/* Sets are stored in an array in the compiled dfa; the index of the - array corresponding to a given set token is given by SET_INDEX(t). */ -#define SET_INDEX(t) ((t) - CSET) - -/* Sometimes characters can only be matched depending on the surrounding - context. Such context decisions depend on what the previous character - was, and the value of the current (lookahead) character. Context - dependent constraints are encoded as 8 bit integers. Each bit that - is set indicates that the constraint succeeds in the corresponding - context. - - bit 7 - previous and current are newlines - bit 6 - previous was newline, current isn't - bit 5 - previous wasn't newline, current is - bit 4 - neither previous nor current is a newline - bit 3 - previous and current are word-constituents - bit 2 - previous was word-constituent, current isn't - bit 1 - previous wasn't word-constituent, current is - bit 0 - neither previous nor current is word-constituent - - Word-constituent characters are those that satisfy isalnum(). - - The macro SUCCEEDS_IN_CONTEXT determines whether a a given constraint - succeeds in a particular context. Prevn is true if the previous character - was a newline, currn is true if the lookahead character is a newline. - Prevl and currl similarly depend upon whether the previous and current - characters are word-constituent letters. */ -#define MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \ - ((constraint) & 1 << (((prevn) ? 2 : 0) + ((currn) ? 1 : 0) + 4)) -#define MATCHES_LETTER_CONTEXT(constraint, prevl, currl) \ - ((constraint) & 1 << (((prevl) ? 2 : 0) + ((currl) ? 1 : 0))) -#define SUCCEEDS_IN_CONTEXT(constraint, prevn, currn, prevl, currl) \ - (MATCHES_NEWLINE_CONTEXT(constraint, prevn, currn) \ - && MATCHES_LETTER_CONTEXT(constraint, prevl, currl)) - -/* The following macros give information about what a constraint depends on. */ -#define PREV_NEWLINE_DEPENDENT(constraint) \ - (((constraint) & 0xc0) >> 2 != ((constraint) & 0x30)) -#define PREV_LETTER_DEPENDENT(constraint) \ - (((constraint) & 0x0c) >> 2 != ((constraint) & 0x03)) - -/* Tokens that match the empty string subject to some constraint actually - work by applying that constraint to determine what may follow them, - taking into account what has gone before. The following values are - the constraints corresponding to the special tokens previously defined. */ -#define NO_CONSTRAINT 0xff -#define BEGLINE_CONSTRAINT 0xcf -#define ENDLINE_CONSTRAINT 0xaf -#define BEGWORD_CONSTRAINT 0xf2 -#define ENDWORD_CONSTRAINT 0xf4 -#define LIMWORD_CONSTRAINT 0xf6 -#define NOTLIMWORD_CONSTRAINT 0xf9 - -/* States of the recognizer correspond to sets of positions in the parse - tree, together with the constraints under which they may be matched. - So a position is encoded as an index into the parse tree together with - a constraint. */ -typedef struct -{ - unsigned index; /* Index into the parse array. */ - unsigned constraint; /* Constraint for matching this position. */ -} position; - -/* Sets of positions are stored as arrays. */ -typedef struct -{ - position *elems; /* Elements of this position set. */ - int nelem; /* Number of elements in this set. */ -} position_set; - -/* A state of the dfa consists of a set of positions, some flags, - and the token value of the lowest-numbered position of the state that - contains an END token. */ -typedef struct -{ - int hash; /* Hash of the positions of this state. */ - position_set elems; /* Positions this state could match. */ - char newline; /* True if previous state matched newline. */ - char letter; /* True if previous state matched a letter. */ - char backref; /* True if this state matches a \. */ - unsigned char constraint; /* Constraint for this state to accept. */ - int first_end; /* Token value of the first END in elems. */ -} dfa_state; - -/* Element of a list of strings, at least one of which is known to - appear in any R.E. matching the DFA. */ -struct dfamust -{ - int exact; - char *must; - struct dfamust *next; -}; - -/* A compiled regular expression. */ -struct dfa -{ - /* Stuff built by the scanner. */ - charclass *charclasses; /* Array of character sets for CSET tokens. */ - int cindex; /* Index for adding new charclasses. */ - int calloc; /* Number of charclasses currently allocated. */ - - /* Stuff built by the parser. */ - token *tokens; /* Postfix parse array. */ - int tindex; /* Index for adding new tokens. */ - int talloc; /* Number of tokens currently allocated. */ - int depth; /* Depth required of an evaluation stack - used for depth-first traversal of the - parse tree. */ - int nleaves; /* Number of leaves on the parse tree. */ - int nregexps; /* Count of parallel regexps being built - with dfaparse(). */ - - /* Stuff owned by the state builder. */ - dfa_state *states; /* States of the dfa. */ - int sindex; /* Index for adding new states. */ - int salloc; /* Number of states currently allocated. */ - - /* Stuff built by the structure analyzer. */ - position_set *follows; /* Array of follow sets, indexed by position - index. The follow of a position is the set - of positions containing characters that - could conceivably follow a character - matching the given position in a string - matching the regexp. Allocated to the - maximum possible position index. */ - int searchflag; /* True if we are supposed to build a searching - as opposed to an exact matcher. A searching - matcher finds the first and shortest string - matching a regexp anywhere in the buffer, - whereas an exact matcher finds the longest - string matching, but anchored to the - beginning of the buffer. */ - - /* Stuff owned by the executor. */ - int tralloc; /* Number of transition tables that have - slots so far. */ - int trcount; /* Number of transition tables that have - actually been built. */ - int **trans; /* Transition tables for states that can - never accept. If the transitions for a - state have not yet been computed, or the - state could possibly accept, its entry in - this table is NULL. */ - int **realtrans; /* Trans always points to realtrans + 1; this - is so trans[-1] can contain NULL. */ - int **fails; /* Transition tables after failing to accept - on a state that potentially could do so. */ - int *success; /* Table of acceptance conditions used in - dfaexec and computed in build_state. */ - int *newlines; /* Transitions on newlines. The entry for a - newline in any transition table is always - -1 so we can count lines without wasting - too many cycles. The transition for a - newline is stored separately and handled - as a special case. Newline is also used - as a sentinel at the end of the buffer. */ - struct dfamust *musts; /* List of strings, at least one of which - is known to appear in any r.e. matching - the dfa. */ -}; - -/* Some macros for user access to dfa internals. */ - -/* ACCEPTING returns true if s could possibly be an accepting state of r. */ -#define ACCEPTING(s, r) ((r).states[s].constraint) - -/* ACCEPTS_IN_CONTEXT returns true if the given state accepts in the - specified context. */ -#define ACCEPTS_IN_CONTEXT(prevn, currn, prevl, currl, state, dfa) \ - SUCCEEDS_IN_CONTEXT((dfa).states[state].constraint, \ - prevn, currn, prevl, currl) - -/* FIRST_MATCHING_REGEXP returns the index number of the first of parallel - regexps that a given state could accept. Parallel regexps are numbered - starting at 1. */ -#define FIRST_MATCHING_REGEXP(state, dfa) (-(dfa).states[state].first_end) - -/* Entry points. */ - -/* dfasyntax() takes three arguments; the first sets the syntax bits described - earlier in this file, the second sets the case-folding flag, and the - third specifies the line terminator. */ -extern void dfasyntax PARAMS ((reg_syntax_t, int, int)); - -/* Compile the given string of the given length into the given struct dfa. - Final argument is a flag specifying whether to build a searching or an - exact matcher. */ -extern void dfacomp PARAMS ((char *, size_t, struct dfa *, int)); - -/* Execute the given struct dfa on the buffer of characters. The - first char * points to the beginning, and the second points to the - first character after the end of the buffer, which must be a writable - place so a sentinel end-of-buffer marker can be stored there. The - second-to-last argument is a flag telling whether to allow newlines to - be part of a string matching the regexp. The next-to-last argument, - if non-NULL, points to a place to increment every time we see a - newline. The final argument, if non-NULL, points to a flag that will - be set if further examination by a backtracking matcher is needed in - order to verify backreferencing; otherwise the flag will be cleared. - Returns NULL if no match is found, or a pointer to the first - character after the first & shortest matching string in the buffer. */ -extern char *dfaexec PARAMS ((struct dfa *, char *, char *, int, int *, int *)); - -/* Free the storage held by the components of a struct dfa. */ -extern void dfafree PARAMS ((struct dfa *)); - -/* Entry points for people who know what they're doing. */ - -/* Initialize the components of a struct dfa. */ -extern void dfainit PARAMS ((struct dfa *)); - -/* Incrementally parse a string of given length into a struct dfa. */ -extern void dfaparse PARAMS ((char *, size_t, struct dfa *)); - -/* Analyze a parsed regexp; second argument tells whether to build a searching - or an exact matcher. */ -extern void dfaanalyze PARAMS ((struct dfa *, int)); - -/* Compute, for each possible character, the transitions out of a given - state, storing them in an array of integers. */ -extern void dfastate PARAMS ((int, struct dfa *, int [])); - -/* Error handling. */ - -/* dfaerror() is called by the regexp routines whenever an error occurs. It - takes a single argument, a NUL-terminated string describing the error. - The default dfaerror() prints the error message to stderr and exits. - The user can provide a different dfafree() if so desired. */ -extern void dfaerror PARAMS ((const char *)); diff --git a/contrib/awk/doc/ChangeLog b/contrib/awk/doc/ChangeLog deleted file mode 100644 index 79dac76..0000000 --- a/contrib/awk/doc/ChangeLog +++ /dev/null @@ -1,212 +0,0 @@ -Sun Jun 3 13:04:44 2001 Arnold D. Robbins - - * Release 3.1.0: Release tar file made. And there was - rejoicing. - -Mon May 14 19:57:31 2001 Arnold D. Robbins - - * gawk.texi, gawkinet.texi: Versions for distribution - put in place. - * gawk.1, awkcard.in: Minor edits for consistency of - usage, formatting. - -Wed Nov 22 14:57:59 2000 Arnold D. Robbins - - * gawk.texi, gawk.1, awkcard.in: Removed all documentation - of abort. - -Sun Aug 13 11:23:50 2000 Arnold D. Robbins - - * gawk.texi, gawk.1, awkcard.in: documented sort function - and optional third argument to match. - -Sun Aug 13 00:40:41 2000 Arnold D. Robbins - - * gawk.texi: hardwired publisher info. - * publisher.texi: Removed. Not needed any more. - * gawkinet.texi: Added title page stuff. - -Thu Jul 5 21:05:57 2000 Arnold D. Robbins - - * gawk.texi: moved to use of @command, @option everywhere - appropriate. Removed all @page and @group in anticipation - of re-page breaking. Updated stuff for install-info. - Added FDL. - -Tue Nov 10 11:42:26 1998 Arnold D. Robbins - - * publisher.texi: new file with publisher related info. - * Makefile.in: updated dvi and postscript targets to make - them lots smarter about not reformatting if need be. - -Mon Aug 7 15:23:00 2000 Arnold D. Robbins - - * Release 3.0.6: Release tar file made. - -Sun Jun 25 15:08:19 2000 Arnold D. Robbins - - * Release 3.0.5: Release tar file made. - -Wed May 17 19:04:54 2000 Arnold D. Robbins - - * gawk.texi, gawk.1, awkcard.in: Documented %u. Ooops. - -Tue May 2 11:44:13 2000 Arnold D. Robbins - - * texinfo.tex: Updated to version 1999-10-01.07. - * gawk.texi: Redid page breaking for new texinfo.tex. - -Thu Apr 6 12:32:49 2000 Arnold D. Robbins - - * gawk.texi: Change info dir file entry to `(gawk)' from - `(gawk.info)'. - * Makefile.in [$(infodir)/gawk.info]: Fix grep test is - accordance with above. - -Sun Feb 13 15:36:32 2000 Paul Eggert - - * gawk.texi: Mention that arithmetic is done in double - precision floating point, and point to Goldberg's paper for - people who want to know more. Fix some other minor floating - point discussion issues. - -Wed Nov 3 17:04:35 1999 Arnold D. Robbins - - * gawk.1: Lots of troff ``lint'' from Paul Eggert. Not all - of his changes, just the ones I thought worth doing. - -Mon Oct 11 16:53:54 1999 Arnold D. Robbins - - * Makefile.in (gawk.dvi): Put $(srcdir) first in TEXINPUTS, - and also just use texi2dvi, don't run texindex and tex - manually. Doing so is no longer necessary. - -Mon Aug 9 13:06:01 1999 Arnold D. Robbins - - * gawk.texi: New node `Array Efficiency' on the best use - of subscripting to avoid memory bloat. - -Thu Jul 29 23:15:34 1999 Arnold D. Robbins - - * Makefile.in ($(infodir)/gawk.info): Removed loop around - $(INSTALL_DATA), since there's only one Info file to install, - install it directly. - -Wed Jun 30 16:14:36 1999 Arnold D. Robbins - - * Release 3.0.4: Release tar file made. This time for sure. - -Wed Oct 7 21:59:33 1998 Arnold D. Robbins - - * texinfo.tex: Updated to version 2.227, from Texinfo 3.12. - -Sun Oct 19 12:26:08 1997 Arnold D. Robbins - - * ALL: change references to arnold@gnu.ai.mit.edu to arnold@gnu.org. - -Tue Sep 23 10:31:17 1997 Arnold D. Robbins - - * texinfo.tex: Updated to version 2.218, from Texinfo 3.11. - -Fri Jul 4 08:19:00 1997 Arnold D. Robbins - - * Makefile.in ($(infodir)/gawk.info): Don't make dependent upon - gawk.info, in case installed one is newer. Instead, check that - an installed gawk.info exists and is identical to current one. - If so, just exit; otherwise do the install. - -Wed Jul 2 14:55:12 1997 Arnold D. Robbins - - * Makefile.in ($(infodir)/gawk.info): typo fix. - -Thu May 15 12:49:08 1997 Arnold D. Robbins - - * Release 3.0.3: Release tar file made. - -Fri Apr 18 07:55:47 1997 Arnold D. Robbins - - * BETA Release 3.0.34: Release tar file made. - -Sun Apr 13 15:39:20 1997 Arnold D. Robbins - - * Makefile.in ($(infodir)/gawk.info): exit 0 in case install-info - fails. - -Thu Jan 2 23:17:53 1997 Fred Fish - - * Makefile.in (awkcard.tr): Use ':' chars to separate parts of - sed command, since $(srcdir) may expand to something with '/' - characters in it, which confuses sed terribly. - * gawk.texi (Amiga Installation): Note change of configuration - from "m68k-cbm-amigados" to "m68k-amigaos". Point ftp users - towards current ADE distribution and not obsolete Aminet - "gcc" distribution. Change "FreshFish" to "Geek Gadgets". - -Wed Dec 25 11:25:22 1996 Arnold D. Robbins - - * Release 3.0.2: Release tar file made. - -Wed Dec 25 11:17:32 1996 Arnold D. Robbins - - * Makefile.in ($(mandir)/igawk$(manext),$(mandir)/gawk$(manext)): - remove chmod command; let $(INSTALL_DATA) use -m. - -Tue Dec 17 22:38:28 1996 Arnold D. Robbins - - * Makefile.in (gawk.info,gawk.dvi,postscript): run makeinfo, TeX, - and/or troff against files in $(srcdir). Thanks to Ulrich Drepper. - ($(infodir)/gawk.info): use --info-dir to install-info, not - --infodir. - -Tue Dec 10 23:09:26 1996 Arnold D. Robbins - - * Release 3.0.1: Release tar file made. - -Mon Dec 9 12:48:54 1996 Arnold D. Robbins - - * no.colors: new file from Michal for old troffs. - * Makefile.in [AWKCARD]: changes to parameterize old/new troff. - -Sun Dec 1 15:04:56 1996 Arnold D. Robbins - - * texinfo.tex: Updated to version 2.193, from Karl Berry. - -Tue Nov 26 22:57:15 1996 Arnold D. Robbins - - * Makefile.in ($(infodir)/gawk.info): Change option in call - to `install-info' to `--info-dir' from `--infodir'. - -Mon Nov 4 13:30:39 1996 Arnold D. Robbins - - * Makefile.in: updates for reference card. - (ad.block, awkcard.in, cardfonts, colors, macros, setter.outline): - new files for reference card. - -Wed Oct 16 12:43:02 1996 Arnold D. Robbins - - * texinfo.tex: Updated to version 2.185, from texinfo-3.9 dist. - -Sun Aug 11 23:12:08 1996 Arnold D. Robbins - - * Makefile.in ($(infodir)/gawk.info): correct use of - $(INSTALL_DATA) and remove chmod command. - -Thu Jul 11 22:06:50 1996 Arnold D. Robbins - - * Makefile.in ($(mandir)/gawk.$(ext), $(mandir)/igawk.$(ext)): - made dependant on files in $(srcdir). - -Fri Mar 15 06:45:35 1996 Arnold D. Robbins - - * Makefile.in (clean): add `*~' to list of files to be removed. - -Thu Jan 25 23:40:15 1996 Arnold D. Robbins - - * Makefile.in (dvi): run texindex and tex an extra time. - This gets the cross references right. Sigh. - -Wed Jan 24 11:51:54 1996 Arnold D. Robbins - - * Makefile.in (maintainer-clean): - Depend on distclean, not the other way around. - Output warning message as per GNU standards. diff --git a/contrib/awk/doc/Makefile.am b/contrib/awk/doc/Makefile.am deleted file mode 100644 index 3a9e4b4..0000000 --- a/contrib/awk/doc/Makefile.am +++ /dev/null @@ -1,80 +0,0 @@ -# -# doc/Makefile.am --- automake input file for gawk -# -# Copyright (C) 2000, 2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -# - -## process this file with automake to produce Makefile.in - -info_TEXINFOS = gawk.texi gawkinet.texi - -man_MANS = gawk.1 igawk.1 - -EXTRA_DIST = ChangeLog README.card ad.block setter.outline \ - awkcard.in awkforai.txt texinfo.tex cardfonts \ - macros colors no.colors $(man_MANS) \ - uf002331.eps uf002331.jpg lflashlight.eps rflashlight.eps \ - statist.jpg statist.eps - -MAKEINFO = @MAKEINFO@ --no-split - -TROFF = groff -t -Tps -SEDME = sed -e "s/^level0 restore/level0 restore flashme 100 72 moveto (Copyright `date '+%m-%d-%y %T'`, FSF, Inc. (all)) show/" \ - -e "s/^\/level0 save def/\/level0 save def 30 -48 translate/" - -CARDSRC = $(srcdir)/macros $(srcdir)/cardfonts $(srcdir)/colors awkcard.tr -CARDSRC_N = $(srcdir)/macros $(srcdir)/cardfonts $(srcdir)/no.colors awkcard.tr -CARDFILES= $(CARDSRC) ad.block awkcard.in setter.outline - -# Use this if your troff can correctly handle macros from 'colors' file -AWKCARD = awkcard.ps - -# Uncomment the following definition of AWKCARD if your troff can produce -# Postscript but still has troubles with macros from 'colors'. As this -# is not groff you will have to change TROFF macro as well. Do not forget -# to ensure that awkcard.tr is processed by tbl. -#AWKCARD = awkcard.nc - -postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD) - -gawk.ps: gawk.dvi - dvips -o gawk.ps gawk.dvi - -gawkinet.ps: gawkinet.dvi - dvips -o gawkinet.ps gawkinet.dvi - -gawk.1.ps: gawk.1 - -groff -man $(srcdir)/gawk.1 > gawk.1.ps - -igawk.1.ps: igawk.1 - -groff -man $(srcdir)/igawk.1 > igawk.1.ps - -awkcard.tr: awkcard.in - sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr - -awkcard.ps: $(CARDFILES) - $(TROFF) $(CARDSRC) | $(SEDME) | cat $(srcdir)/setter.outline - > awkcard.ps - -awkcard.nc: $(CARDFILES) - $(TROFF) $(CARDSRC_N) | $(SEDME) | cat $(srcdir)/setter.outline - > awkcard.ps && touch awkcard.nc - -clean: - rm -f *.ps *~ awkcard.nc - diff --git a/contrib/awk/doc/Makefile.in b/contrib/awk/doc/Makefile.in deleted file mode 100644 index 555450d..0000000 --- a/contrib/awk/doc/Makefile.in +++ /dev/null @@ -1,463 +0,0 @@ -# Makefile.in generated automatically by automake 1.4a from Makefile.am - -# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000 -# Free Software Foundation, Inc. -# This Makefile.in is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -SHELL = @SHELL@ - -srcdir = @srcdir@ -top_srcdir = @top_srcdir@ -VPATH = @srcdir@ -prefix = @prefix@ -exec_prefix = @exec_prefix@ - -bindir = @bindir@ -sbindir = @sbindir@ -libexecdir = @libexecdir@ -datadir = @datadir@ -sysconfdir = @sysconfdir@ -sharedstatedir = @sharedstatedir@ -localstatedir = @localstatedir@ -libdir = @libdir@ -infodir = @infodir@ -mandir = @mandir@ -includedir = @includedir@ -oldincludedir = /usr/include - -pkgdatadir = $(datadir)/@PACKAGE@ -pkglibdir = $(libdir)/@PACKAGE@ -pkgincludedir = $(includedir)/@PACKAGE@ - -top_builddir = .. - -ACLOCAL = @ACLOCAL@ -AUTOCONF = @AUTOCONF@ -AUTOMAKE = @AUTOMAKE@ -AUTOHEADER = @AUTOHEADER@ - -INSTALL = @INSTALL@ -INSTALL_PROGRAM = @INSTALL_PROGRAM@ -INSTALL_DATA = @INSTALL_DATA@ -INSTALL_SCRIPT = @INSTALL_SCRIPT@ -INSTALL_STRIP_FLAG = -transform = @program_transform_name@ - -NORMAL_INSTALL = : -PRE_INSTALL = : -POST_INSTALL = : -NORMAL_UNINSTALL = : -PRE_UNINSTALL = : -POST_UNINSTALL = : - -@SET_MAKE@ -AMDEP = @AMDEP@ -AMTAR = @AMTAR@ -AWK = @AWK@ -CATALOGS = @CATALOGS@ -CATOBJEXT = @CATOBJEXT@ -CC = @CC@ -CFLAGS = @CFLAGS@ -CPP = @CPP@ -CXX = @CXX@ -CXXCPP = @CXXCPP@ -DATADIRNAME = @DATADIRNAME@ -DEPDIR = @DEPDIR@ -GENCAT = @GENCAT@ -GMOFILES = @GMOFILES@ -GMSGFMT = @GMSGFMT@ -GT_NO = @GT_NO@ -GT_YES = @GT_YES@ -INCLUDE_LOCALE_H = @INCLUDE_LOCALE_H@ -INSTOBJEXT = @INSTOBJEXT@ -INTLDEPS = @INTLDEPS@ -INTLLIBS = @INTLLIBS@ -INTLOBJS = @INTLOBJS@ -LN_S = @LN_S@ -MKINSTALLDIRS = @MKINSTALLDIRS@ -MSGFMT = @MSGFMT@ -PACKAGE = @PACKAGE@ -POFILES = @POFILES@ -POSUB = @POSUB@ -RANLIB = @RANLIB@ -SOCKET_LIBS = @SOCKET_LIBS@ -U = @U@ -USE_INCLUDED_LIBINTL = @USE_INCLUDED_LIBINTL@ -USE_NLS = @USE_NLS@ -VERSION = @VERSION@ -YACC = @YACC@ -install_sh = @install_sh@ -l = @l@ - -# -# doc/Makefile.am --- automake input file for gawk -# -# Copyright (C) 2000, 2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -# - - -info_TEXINFOS = gawk.texi gawkinet.texi - -man_MANS = gawk.1 igawk.1 - -EXTRA_DIST = ChangeLog README.card ad.block setter.outline \ - awkcard.in awkforai.txt texinfo.tex cardfonts \ - macros colors no.colors $(man_MANS) \ - uf002331.eps uf002331.jpg lflashlight.eps rflashlight.eps \ - statist.jpg statist.eps - - -MAKEINFO = @MAKEINFO@ --no-split - -TROFF = groff -t -Tps -SEDME = sed -e "s/^level0 restore/level0 restore flashme 100 72 moveto (Copyright `date '+%m-%d-%y %T'`, FSF, Inc. (all)) show/" \ - -e "s/^\/level0 save def/\/level0 save def 30 -48 translate/" - - -CARDSRC = $(srcdir)/macros $(srcdir)/cardfonts $(srcdir)/colors awkcard.tr -CARDSRC_N = $(srcdir)/macros $(srcdir)/cardfonts $(srcdir)/no.colors awkcard.tr -CARDFILES = $(CARDSRC) ad.block awkcard.in setter.outline - -# Use this if your troff can correctly handle macros from 'colors' file -AWKCARD = awkcard.ps -subdir = doc -mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs -CONFIG_HEADER = ../config.h -CONFIG_CLEAN_FILES = -DIST_SOURCES = -TEXI2DVI = texi2dvi -INFO_DEPS = gawk.info gawkinet.info -DVIS = gawk.dvi gawkinet.dvi -TEXINFOS = gawk.texi gawkinet.texi -man1dir = $(mandir)/man1 -MANS = $(man_MANS) - -NROFF = nroff -DIST_COMMON = ChangeLog Makefile.am Makefile.in texinfo.tex - - -DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) - -GZIP_ENV = --best -all: all-redirect -.SUFFIXES: -.SUFFIXES: .dvi .info .ps .texi .texinfo .txi -$(srcdir)/Makefile.in: Makefile.am $(top_srcdir)/configure.in $(ACLOCAL_M4) - cd $(top_srcdir) && $(AUTOMAKE) --gnu doc/Makefile - -Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status - cd $(top_builddir) \ - && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status - - -gawk.info: gawk.texi -gawk.dvi: gawk.texi - - -gawkinet.info: gawkinet.texi -gawkinet.dvi: gawkinet.texi - - -DVIPS = dvips - -.texi.info: - @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] - cd $(srcdir) \ - && $(MAKEINFO) `echo $< | sed 's,.*/,,'` - -.texi.dvi: - TEXINPUTS=$(srcdir):$$TEXINPUTS \ - MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $< - -.texi: - @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] - cd $(srcdir) \ - && $(MAKEINFO) `echo $< | sed 's,.*/,,'` - -.texinfo.info: - @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] - cd $(srcdir) \ - && $(MAKEINFO) `echo $< | sed 's,.*/,,'` - -.texinfo: - @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] - cd $(srcdir) \ - && $(MAKEINFO) `echo $< | sed 's,.*/,,'` - -.texinfo.dvi: - TEXINPUTS=$(srcdir):$$TEXINPUTS \ - MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $< - -.txi.info: - @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] - cd $(srcdir) \ - && $(MAKEINFO) `echo $< | sed 's,.*/,,'` - -.txi.dvi: - TEXINPUTS=$(srcdir):$$TEXINPUTS \ - MAKEINFO='$(MAKEINFO) -I $(srcdir)' $(TEXI2DVI) $< - -.txi: - @cd $(srcdir) && rm -f $@ $@-[0-9] $@-[0-9][0-9] - cd $(srcdir) \ - && $(MAKEINFO) `echo $< | sed 's,.*/,,'` -.dvi.ps: - $(DVIPS) $< -o $@ - -install-info-am: $(INFO_DEPS) - @$(NORMAL_INSTALL) - $(mkinstalldirs) $(DESTDIR)$(infodir) - @list='$(INFO_DEPS)'; \ - for file in $$list; do \ - d=$(srcdir); \ - for ifile in `CDPATH=: && cd $$d && echo $$file $$file-[0-9] $$file-[0-9][0-9]`; do \ - if test -f $$d/$$ifile; then \ - echo " $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile"; \ - $(INSTALL_DATA) $$d/$$ifile $(DESTDIR)$(infodir)/$$ifile; \ - else : ; fi; \ - done; \ - done - @$(POST_INSTALL) - @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \ - list='$(INFO_DEPS)'; \ - for file in $$list; do \ - echo " install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file";\ - install-info --info-dir=$(DESTDIR)$(infodir) $(DESTDIR)$(infodir)/$$file || :;\ - done; \ - else : ; fi - -uninstall-info: - $(PRE_UNINSTALL) - @if $(SHELL) -c 'install-info --version | sed 1q | fgrep -s -v -i debian' >/dev/null 2>&1; then \ - list='$(INFO_DEPS)'; \ - for file in $$list; do \ - echo " install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file"; \ - install-info --info-dir=$(DESTDIR)$(infodir) --remove $(DESTDIR)$(infodir)/$$file; \ - done; \ - else :; fi - @$(NORMAL_UNINSTALL) - @list='$(INFO_DEPS)'; \ - for file in $$list; do \ - (if cd $(DESTDIR)$(infodir); then \ - echo " rm -f $$file $$file-[0-9] $$file-[0-9][0-9])"; \ - rm -f $$file $$file-[0-9] $$file-[0-9][0-9]; \ - else :; fi); \ - done - -dist-info: $(INFO_DEPS) - list='$(INFO_DEPS)'; \ - for base in $$list; do \ - d=$(srcdir); \ - for file in `CDPATH=: && cd $$d && eval echo $$base*`; do \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file; \ - done; \ - done - -mostlyclean-aminfo: - -rm -f gawk.aux gawk.cp gawk.cps gawk.dvi gawk.fn gawk.fns gawk.pgs \ - gawk.ky gawk.kys gawk.ps gawk.log gawk.pg gawk.toc gawk.tp \ - gawk.tps gawk.vr gawk.vrs gawk.op gawk.tr gawk.cv gawk.cn \ - gawk.cm gawk.ov gawkinet.aux gawkinet.cp gawkinet.cps \ - gawkinet.dvi gawkinet.fn gawkinet.fns gawkinet.pgs \ - gawkinet.ky gawkinet.kys gawkinet.ps gawkinet.log gawkinet.pg \ - gawkinet.toc gawkinet.tp gawkinet.tps gawkinet.vr \ - gawkinet.vrs gawkinet.op gawkinet.tr gawkinet.cv gawkinet.cn \ - gawkinet.cm gawkinet.ov - -clean-aminfo: - -distclean-aminfo: - -maintainer-clean-aminfo: - cd $(srcdir) && for i in $(INFO_DEPS); do \ - rm -f $$i; \ - if test "`echo $$i-[0-9]*`" != "$$i-[0-9]*"; then \ - rm -f $$i-[0-9]*; \ - fi; \ - done - -install-man1: - $(mkinstalldirs) $(DESTDIR)$(man1dir) - @list='$(man1_MANS)'; \ - l2='$(man_MANS)'; for i in $$l2; do \ - case "$$i" in \ - *.1*) list="$$list $$i" ;; \ - esac; \ - done; \ - for i in $$list; do \ - if test -f $(srcdir)/$$i; then file=$(srcdir)/$$i; \ - else file=$$i; fi; \ - ext=`echo $$i | sed -e 's/^.*\\.//'`; \ - inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \ - inst=`echo $$inst | sed -e 's/^.*\///'`; \ - inst=`echo $$inst | sed '$(transform)'`.$$ext; \ - echo " $(INSTALL_DATA) $$file $(DESTDIR)$(man1dir)/$$inst"; \ - $(INSTALL_DATA) $$file $(DESTDIR)$(man1dir)/$$inst; \ - done - -uninstall-man1: - @list='$(man1_MANS)'; \ - l2='$(man_MANS)'; for i in $$l2; do \ - case "$$i" in \ - *.1*) list="$$list $$i" ;; \ - esac; \ - done; \ - for i in $$list; do \ - ext=`echo $$i | sed -e 's/^.*\\.//'`; \ - inst=`echo $$i | sed -e 's/\\.[0-9a-z]*$$//'`; \ - inst=`echo $$inst | sed -e 's/^.*\///'`; \ - inst=`echo $$inst | sed '$(transform)'`.$$ext; \ - echo " rm -f $(DESTDIR)$(man1dir)/$$inst"; \ - rm -f $(DESTDIR)$(man1dir)/$$inst; \ - done -install-man: $(MANS) - @$(NORMAL_INSTALL) - $(MAKE) $(AM_MAKEFLAGS) install-man1 -uninstall-man: - @$(NORMAL_UNINSTALL) - $(MAKE) $(AM_MAKEFLAGS) uninstall-man1 -tags: TAGS -TAGS: - - -distdir = $(top_builddir)/$(PACKAGE)-$(VERSION)/$(subdir) - -distdir: $(DISTFILES) - @for file in $(DISTFILES); do \ - d=$(srcdir); \ - if test -d $$d/$$file; then \ - cp -pR $$d/$$file $(distdir) \ - || exit 1; \ - else \ - test -f $(distdir)/$$file \ - || cp -p $$d/$$file $(distdir)/$$file \ - || exit 1; \ - fi; \ - done - $(MAKE) $(AM_MAKEFLAGS) top_distdir="$(top_distdir)" distdir="$(distdir)" dist-info -info-am: $(INFO_DEPS) -info: info-am -dvi-am: $(DVIS) -dvi: dvi-am -check-am: all-am -check: check-am -installcheck-am: -installcheck: installcheck-am -install-exec-am: -install-exec: install-exec-am - -install-data-am: install-info-am install-man -install-data: install-data-am - -install-am: all-am - @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am -install: install-am -uninstall-am: uninstall-info uninstall-man -uninstall: uninstall-am -all-am: Makefile $(INFO_DEPS) $(MANS) -all-redirect: all-am -install-strip: - $(MAKE) $(AM_MAKEFLAGS) INSTALL_STRIP_FLAG=-s install -installdirs: - $(mkinstalldirs) $(DESTDIR)$(infodir) $(DESTDIR)$(mandir)/man1 - - -mostlyclean-generic: - -clean-generic: - -distclean-generic: - -rm -f Makefile $(CONFIG_CLEAN_FILES) - -rm -f config.cache config.log stamp-h stamp-h[0-9]* - -maintainer-clean-generic: - -rm -f Makefile.in -mostlyclean-am: mostlyclean-aminfo mostlyclean-generic - -mostlyclean: mostlyclean-am - -clean-am: clean-aminfo clean-generic mostlyclean-am - -clean: clean-am - -distclean-am: distclean-aminfo distclean-generic clean-am - -distclean: distclean-am - -maintainer-clean-am: maintainer-clean-aminfo maintainer-clean-generic \ - distclean-am - @echo "This command is intended for maintainers to use;" - @echo "it deletes files that may require special tools to rebuild." - -maintainer-clean: maintainer-clean-am - -.PHONY: install-info-am uninstall-info mostlyclean-aminfo \ -distclean-aminfo clean-aminfo maintainer-clean-aminfo install-man1 \ -uninstall-man1 install-man uninstall-man tags distdir info-am info \ -dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ -install-exec install-data-am install-data install-am install \ -uninstall-am uninstall all-redirect all-am all install-strip \ -installdirs mostlyclean-generic distclean-generic clean-generic \ -maintainer-clean-generic clean mostlyclean distclean maintainer-clean - - -# Uncomment the following definition of AWKCARD if your troff can produce -# Postscript but still has troubles with macros from 'colors'. As this -# is not groff you will have to change TROFF macro as well. Do not forget -# to ensure that awkcard.tr is processed by tbl. -#AWKCARD = awkcard.nc - -postscript: gawk.ps gawkinet.ps gawk.1.ps igawk.1.ps $(AWKCARD) - -gawk.ps: gawk.dvi - dvips -o gawk.ps gawk.dvi - -gawkinet.ps: gawkinet.dvi - dvips -o gawkinet.ps gawkinet.dvi - -gawk.1.ps: gawk.1 - -groff -man $(srcdir)/gawk.1 > gawk.1.ps - -igawk.1.ps: igawk.1 - -groff -man $(srcdir)/igawk.1 > igawk.1.ps - -awkcard.tr: awkcard.in - sed 's:SRCDIR:$(srcdir):' < $(srcdir)/awkcard.in > awkcard.tr - -awkcard.ps: $(CARDFILES) - $(TROFF) $(CARDSRC) | $(SEDME) | cat $(srcdir)/setter.outline - > awkcard.ps - -awkcard.nc: $(CARDFILES) - $(TROFF) $(CARDSRC_N) | $(SEDME) | cat $(srcdir)/setter.outline - > awkcard.ps && touch awkcard.nc - -clean: - rm -f *.ps *~ awkcard.nc - -# Tell versions [3.59,3.63) of GNU make to not export all variables. -# Otherwise a system limit (for SysV at least) may be exceeded. -.NOEXPORT: diff --git a/contrib/awk/doc/README.card b/contrib/awk/doc/README.card deleted file mode 100644 index ef77cda..0000000 --- a/contrib/awk/doc/README.card +++ /dev/null @@ -1,19 +0,0 @@ -Mon Dec 9 12:45:48 EST 1996 - -The AWK reference card included here requires a modern version of troff -(ditroff). GNU Troff (groff) is known to work. - -If your troff is able to produce Postscript but does not know how to -properly use the macros from `colors' file then try to uncomment in -Makefile the defintion which sets AWKCARD to awkcard.nc (no colors). -This will definitely require changes to the TROFF macro and you have to -ensure that the tbl preprocessor is called. For example, the following -modifications on NeXT: - -TROFF = tbl -SEDME = ptroff -t | sed -e \ - "s/^level0 restore/level0 restore flashme 100 72 moveto\ - (Copyright `date`, FSF, Inc. (all)) show/" \ - -e "s/^\/level0 save def/\/level0 save def 30 -48 translate/" - -will produce a correctly formatted, albeit monochromatic, reference card. diff --git a/contrib/awk/doc/ad.block b/contrib/awk/doc/ad.block deleted file mode 100644 index ee99a5a..0000000 --- a/contrib/awk/doc/ad.block +++ /dev/null @@ -1,48 +0,0 @@ -.\" AWK Reference Card --- Arnold Robbins, arnold@gnu.org -.\" This file is the Ad block (included in cover) -.\" -.\" Copyright (C) 1996, 1998, 2000, 2001 Free Software Foundation, Inc. -.\" -.\" Permission is granted to make and distribute verbatim copies of -.\" this reference card provided the copyright notice and this permission -.\" notice are preserved on all copies. -.\" -.\" Permission is granted to process this file through troff and print the -.\" results, provided the printed document carries copying permission -.\" notice identical to this one except for the removal of this paragraph -.\" (this paragraph not being relevant to the printed reference card). -.\" -.\" Permission is granted to copy and distribute modified versions of this -.\" reference card under the conditions for verbatim copying, provided that -.\" the entire resulting derived work is distributed under the terms of a -.\" permission notice identical to this one. -.\" -.\" Permission is granted to copy and distribute translations of this -.\" reference card into another language, under the above conditions for -.\" modified versions, except that this permission notice may be stated in -.\" a translation approved by the Foundation. -.\" -.ft HB -.ps 10 -.vs 12 -.ES -.nf -.ce 7 -\*(CBFree Software Foundation, Inc. -.ft H -59 Temple Place \(em Suite 330 -Boston, MA 02111-1307 USA -Phone: +1-617-542-5942 -Fax (including Japan): +1-617-542-2652 -E-mail: gnu@gnu.org -URL: http://www.gnu.org - -.ce 5 -.ft HB -\*(CGSource Distributions on CD-ROM -Deluxe Distributions -Emacs, Gawk, Make and GDB Manuals -Emacs and GDB References\*(CX -.EB "\f(HBOTHER FSF PRODUCTS:\*(FR" -.ps -.vs diff --git a/contrib/awk/doc/awk.1 b/contrib/awk/doc/awk.1 deleted file mode 100644 index 807f79f..0000000 --- a/contrib/awk/doc/awk.1 +++ /dev/null @@ -1,2628 +0,0 @@ -.\" $FreeBSD$ -.ds PX \s-1POSIX\s+1 -.ds UX \s-1UNIX\s+1 -.ds AN \s-1ANSI\s+1 -.TH GAWK 1 "Apr 28 1999" "Free Software Foundation" "Utility Commands" -.SH NAME -gawk \- pattern scanning and processing language -.SH SYNOPSIS -.B gawk -[ POSIX or GNU style options ] -.B \-f -.I program-file -[ -.B \-\^\- -] file .\^.\^. -.br -.B gawk -[ POSIX or GNU style options ] -[ -.B \-\^\- -] -.I program-text -file .\^.\^. -.SH DESCRIPTION -.I Gawk -is the GNU Project's implementation of the AWK programming language. -It conforms to the definition of the language in -the \*(PX 1003.2 Command Language And Utilities Standard. -This version in turn is based on the description in -.IR "The AWK Programming Language" , -by Aho, Kernighan, and Weinberger, -with the additional features found in the System V Release 4 version -of \*(UX -.IR awk . -.I Gawk -also provides more recent Bell Labs -.I awk -extensions, and some GNU-specific extensions. -.PP -The command line consists of options to -.I gawk -itself, the AWK program text (if not supplied via the -.B \-f -or -.B \-\^\-file -options), and values to be made -available in the -.B ARGC -and -.B ARGV -pre-defined AWK variables. -.SH OPTION FORMAT -.PP -.I Gawk -options may be either the traditional \*(PX one letter options, -or the GNU style long options. \*(PX options start with a single ``\-'', -while long options start with ``\-\^\-''. -Long options are provided for both GNU-specific features and -for \*(PX mandated features. -.PP -Following the \*(PX standard, -.IR gawk -specific -options are supplied via arguments to the -.B \-W -option. Multiple -.B \-W -options may be supplied -Each -.B \-W -option has a corresponding long option, as detailed below. -Arguments to long options are either joined with the option -by an -.B = -sign, with no intervening spaces, or they may be provided in the -next command line argument. -Long options may be abbreviated, as long as the abbreviation -remains unique. -.SH OPTIONS -.PP -.I Gawk -accepts the following options. -.TP -.PD 0 -.BI \-F " fs" -.TP -.PD -.BI \-\^\-field-separator " fs" -Use -.I fs -for the input field separator (the value of the -.B FS -predefined -variable). -.TP -.PD 0 -\fB\-v\fI var\fB\^=\^\fIval\fR -.TP -.PD -\fB\-\^\-assign \fIvar\fB\^=\^\fIval\fR -Assign the value -.IR val , -to the variable -.IR var , -before execution of the program begins. -Such variable values are available to the -.B BEGIN -block of an AWK program. -.TP -.PD 0 -.BI \-f " program-file" -.TP -.PD -.BI \-\^\-file " program-file" -Read the AWK program source from the file -.IR program-file , -instead of from the first command line argument. -Multiple -.B \-f -(or -.BR \-\^\-file ) -options may be used. -.TP -.PD 0 -.BI \-mf " NNN" -.TP -.PD -.BI \-mr " NNN" -Set various memory limits to the value -.IR NNN . -The -.B f -flag sets the maximum number of fields, and the -.B r -flag sets the maximum record size. These two flags and the -.B \-m -option are from the Bell Labs research version of \*(UX -.IR awk . -They are ignored by -.IR gawk , -since -.I gawk -has no pre-defined limits. -.TP -.PD 0 -.B "\-W traditional" -.TP -.PD 0 -.B "\-W compat" -.TP -.PD 0 -.B \-\^\-traditional -.TP -.PD -.B \-\^\-compat -Run in -.I compatibility -mode. In compatibility mode, -.I gawk -behaves identically to \*(UX -.IR awk ; -none of the GNU-specific extensions are recognized. -The use of -.B \-\^\-traditional -is preferred over the other forms of this option. -See -.BR "GNU EXTENSIONS" , -below, for more information. -.TP -.PD 0 -.B "\-W copyleft" -.TP -.PD 0 -.B "\-W copyright" -.TP -.PD 0 -.B \-\^\-copyleft -.TP -.PD -.B \-\^\-copyright -Print the short version of the GNU copyright information message on -the standard output, and exits successfully. -.TP -.PD 0 -.B "\-W help" -.TP -.PD 0 -.B "\-W usage" -.TP -.PD 0 -.B \-\^\-help -.TP -.PD -.B \-\^\-usage -Print a relatively short summary of the available options on -the standard output. -(Per the -.IR "GNU Coding Standards" , -these options cause an immediate, successful exit.) -.TP -.PD 0 -.B "\-W lint" -.TP -.PD -.B \-\^\-lint -Provide warnings about constructs that are -dubious or non-portable to other AWK implementations. -.TP -.PD 0 -.B "\-W lint\-old" -.TP -.PD -.B \-\^\-lint\-old -Provide warnings about constructs that are -not portable to the original version of Unix -.IR awk . -.ig -.\" This option is left undocumented, on purpose. -.TP -.PD 0 -.B "\-W nostalgia" -.TP -.PD -.B \-\^\-nostalgia -Provide a moment of nostalgia for long time -.I awk -users. -.. -.TP -.PD 0 -.B "\-W posix" -.TP -.PD -.B \-\^\-posix -This turns on -.I compatibility -mode, with the following additional restrictions: -.RS -.TP \w'\(bu'u+1n -\(bu -.B \ex -escape sequences are not recognized. -.TP -\(bu -Only space and tab act as field separators when -.B FS -is set to a single space, newline does not. -.TP -\(bu -The synonym -.B func -for the keyword -.B function -is not recognized. -.TP -\(bu -The operators -.B ** -and -.B **= -cannot be used in place of -.B ^ -and -.BR ^= . -.TP -\(bu -The -.B fflush() -function is not available. -.RE -.TP -.PD 0 -.B "\-W re\-interval" -.TP -.PD -.B \-\^\-re\-interval -Enable the use of -.I "interval expressions" -in regular expression matching -(see -.BR "Regular Expressions" , -below). -Interval expressions were not traditionally available in the -AWK language. The POSIX standard added them, to make -.I awk -and -.I egrep -consistent with each other. -However, their use is likely -to break old AWK programs, so -.I gawk -only provides them if they are requested with this option, or when -.B \-\^\-posix -is specified. -.TP -.PD 0 -.BI "\-W source " program-text -.TP -.PD -.BI \-\^\-source " program-text" -Use -.I program-text -as AWK program source code. -This option allows the easy intermixing of library functions (used via the -.B \-f -and -.B \-\^\-file -options) with source code entered on the command line. -It is intended primarily for medium to large AWK programs used -in shell scripts. -.TP -.PD 0 -.B "\-W version" -.TP -.PD -.B \-\^\-version -Print version information for this particular copy of -.I gawk -on the standard output. -This is useful mainly for knowing if the current copy of -.I gawk -on your system -is up to date with respect to whatever the Free Software Foundation -is distributing. -This is also useful when reporting bugs. -(Per the -.IR "GNU Coding Standards" , -these options cause an immediate, successful exit.) -.TP -.B \-\^\- -Signal the end of options. This is useful to allow further arguments to the -AWK program itself to start with a ``\-''. -This is mainly for consistency with the argument parsing convention used -by most other \*(PX programs. -.PP -In compatibility mode, -any other options are flagged as illegal, but are otherwise ignored. -In normal operation, as long as program text has been supplied, unknown -options are passed on to the AWK program in the -.B ARGV -array for processing. This is particularly useful for running AWK -programs via the ``#!'' executable interpreter mechanism. -.SH AWK PROGRAM EXECUTION -.PP -An AWK program consists of a sequence of pattern-action statements -and optional function definitions. -.RS -.PP -\fIpattern\fB { \fIaction statements\fB }\fR -.br -\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR -.RE -.PP -.I Gawk -first reads the program source from the -.IR program-file (s) -if specified, -from arguments to -.BR \-\^\-source , -or from the first non-option argument on the command line. -The -.B \-f -and -.B \-\^\-source -options may be used multiple times on the command line. -.I Gawk -will read the program text as if all the -.IR program-file s -and command line source texts -had been concatenated together. This is useful for building libraries -of AWK functions, without having to include them in each new AWK -program that uses them. It also provides the ability to mix library -functions with command line programs. -.PP -The environment variable -.B AWKPATH -specifies a search path to use when finding source files named with -the -.B \-f -option. If this variable does not exist, the default path is -\fB".:/usr/local/share/awk"\fR. -(The actual directory may vary, depending upon how -.I gawk -was built and installed.) -If a file name given to the -.B \-f -option contains a ``/'' character, no path search is performed. -.PP -.I Gawk -executes AWK programs in the following order. -First, -all variable assignments specified via the -.B \-v -option are performed. -Next, -.I gawk -compiles the program into an internal form. -Then, -.I gawk -executes the code in the -.B BEGIN -block(s) (if any), -and then proceeds to read -each file named in the -.B ARGV -array. -If there are no files named on the command line, -.I gawk -reads the standard input. -.PP -If a filename on the command line has the form -.IB var = val -it is treated as a variable assignment. The variable -.I var -will be assigned the value -.IR val . -(This happens after any -.B BEGIN -block(s) have been run.) -Command line variable assignment -is most useful for dynamically assigning values to the variables -AWK uses to control how input is broken into fields and records. It -is also useful for controlling state if multiple passes are needed over -a single data file. -.PP -If the value of a particular element of -.B ARGV -is empty (\fB""\fR), -.I gawk -skips over it. -.PP -For each record in the input, -.I gawk -tests to see if it matches any -.I pattern -in the AWK program. -For each pattern that the record matches, the associated -.I action -is executed. -The patterns are tested in the order they occur in the program. -.PP -Finally, after all the input is exhausted, -.I gawk -executes the code in the -.B END -block(s) (if any). -.SH VARIABLES, RECORDS AND FIELDS -AWK variables are dynamic; they come into existence when they are -first used. Their values are either floating-point numbers or strings, -or both, -depending upon how they are used. AWK also has one dimensional -arrays; arrays with multiple dimensions may be simulated. -Several pre-defined variables are set as a program -runs; these will be described as needed and summarized below. -.SS Records -Normally, records are separated by newline characters. You can control how -records are separated by assigning values to the built-in variable -.BR RS . -If -.B RS -is any single character, that character separates records. -Otherwise, -.B RS -is a regular expression. Text in the input that matches this -regular expression will separate the record. -However, in compatibility mode, -only the first character of its string -value is used for separating records. -If -.B RS -is set to the null string, then records are separated by -blank lines. -When -.B RS -is set to the null string, the newline character always acts as -a field separator, in addition to whatever value -.B FS -may have. -.SS Fields -.PP -As each input record is read, -.I gawk -splits the record into -.IR fields , -using the value of the -.B FS -variable as the field separator. -If -.B FS -is a single character, fields are separated by that character. -If -.B FS -is the null string, then each individual character becomes a -separate field. -Otherwise, -.B FS -is expected to be a full regular expression. -In the special case that -.B FS -is a single space, fields are separated -by runs of spaces and/or tabs and/or newlines. -(But see the discussion of -.BR \-\-posix , -below). -Note that the value of -.B IGNORECASE -(see below) will also affect how fields are split when -.B FS -is a regular expression, and how records are separated when -.B RS -is a regular expression. -.PP -If the -.B FIELDWIDTHS -variable is set to a space separated list of numbers, each field is -expected to have fixed width, and -.I gawk -will split up the record using the specified widths. The value of -.B FS -is ignored. -Assigning a new value to -.B FS -overrides the use of -.BR FIELDWIDTHS , -and restores the default behavior. -.PP -Each field in the input record may be referenced by its position, -.BR $1 , -.BR $2 , -and so on. -.B $0 -is the whole record. The value of a field may be assigned to as well. -Fields need not be referenced by constants: -.RS -.PP -.ft B -n = 5 -.br -print $n -.ft R -.RE -.PP -prints the fifth field in the input record. -The variable -.B NF -is set to the total number of fields in the input record. -.PP -References to non-existent fields (i.e. fields after -.BR $NF ) -produce the null-string. However, assigning to a non-existent field -(e.g., -.BR "$(NF+2) = 5" ) -will increase the value of -.BR NF , -create any intervening fields with the null string as their value, and -cause the value of -.B $0 -to be recomputed, with the fields being separated by the value of -.BR OFS . -References to negative numbered fields cause a fatal error. -Decrementing -.B NF -causes the values of fields past the new value to be lost, and the value of -.B $0 -to be recomputed, with the fields being separated by the value of -.BR OFS . -.SS Built-in Variables -.PP -.IR Gawk 's -built-in variables are: -.PP -.TP \w'\fBFIELDWIDTHS\fR'u+1n -.B ARGC -The number of command line arguments (does not include options to -.IR gawk , -or the program source). -.TP -.B ARGIND -The index in -.B ARGV -of the current file being processed. -.TP -.B ARGV -Array of command line arguments. The array is indexed from -0 to -.B ARGC -\- 1. -Dynamically changing the contents of -.B ARGV -can control the files used for data. -.TP -.B CONVFMT -The conversion format for numbers, \fB"%.6g"\fR, by default. -.TP -.B ENVIRON -An array containing the values of the current environment. -The array is indexed by the environment variables, each element being -the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be -.BR /home/arnold ). -Changing this array does not affect the environment seen by programs which -.I gawk -spawns via redirection or the -.B system() -function. -(This may change in a future version of -.IR gawk .) -.\" but don't hold your breath... -.TP -.B ERRNO -If a system error occurs either doing a redirection for -.BR getline , -during a read for -.BR getline , -or during a -.BR close() , -then -.B ERRNO -will contain -a string describing the error. -.TP -.B FIELDWIDTHS -A white-space separated list of fieldwidths. When set, -.I gawk -parses the input into fields of fixed width, instead of using the -value of the -.B FS -variable as the field separator. -The fixed field width facility is still experimental; the -semantics may change as -.I gawk -evolves over time. -.TP -.B FILENAME -The name of the current input file. -If no files are specified on the command line, the value of -.B FILENAME -is ``\-''. -However, -.B FILENAME -is undefined inside the -.B BEGIN -block. -.TP -.B FNR -The input record number in the current input file. -.TP -.B FS -The input field separator, a space by default. See -.BR Fields , -above. -.TP -.B IGNORECASE -Controls the case-sensitivity of all regular expression -and string operations. If -.B IGNORECASE -has a non-zero value, then string comparisons and -pattern matching in rules, -field splitting with -.BR FS , -record separating with -.BR RS , -regular expression -matching with -.B ~ -and -.BR !~ , -and the -.BR gensub() , -.BR gsub() , -.BR index() , -.BR match() , -.BR split() , -and -.B sub() -pre-defined functions will all ignore case when doing regular expression -operations. Thus, if -.B IGNORECASE -is not equal to zero, -.B /aB/ -matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP, -and \fB"AB"\fP. -As with all AWK variables, the initial value of -.B IGNORECASE -is zero, so all regular expression and string -operations are normally case-sensitive. -Under Unix, the full ISO 8859-1 Latin-1 character set is used -when ignoring case. -.B NOTE: -In versions of -.I gawk -prior to 3.0, -.B IGNORECASE -only affected regular expression operations. It now affects string -comparisons as well. -.TP -.B NF -The number of fields in the current input record. -.TP -.B NR -The total number of input records seen so far. -.TP -.B OFMT -The output format for numbers, \fB"%.6g"\fR, by default. -.TP -.B OFS -The output field separator, a space by default. -.TP -.B ORS -The output record separator, by default a newline. -.TP -.B RS -The input record separator, by default a newline. -.TP -.B RT -The record terminator. -.I Gawk -sets -.B RT -to the input text that matched the character or regular expression -specified by -.BR RS . -.TP -.B RSTART -The index of the first character matched by -.BR match() ; -0 if no match. -.TP -.B RLENGTH -The length of the string matched by -.BR match() ; -\-1 if no match. -.TP -.B SUBSEP -The character used to separate multiple subscripts in array -elements, by default \fB"\e034"\fR. -.SS Arrays -.PP -Arrays are subscripted with an expression between square brackets -.RB ( [ " and " ] ). -If the expression is an expression list -.RI ( expr ", " expr " ...)" -then the array subscript is a string consisting of the -concatenation of the (string) value of each expression, -separated by the value of the -.B SUBSEP -variable. -This facility is used to simulate multiply dimensioned -arrays. For example: -.PP -.RS -.ft B -i = "A";\^ j = "B";\^ k = "C" -.br -x[i, j, k] = "hello, world\en" -.ft R -.RE -.PP -assigns the string \fB"hello, world\en"\fR to the element of the array -.B x -which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in AWK -are associative, i.e. indexed by string values. -.PP -The special operator -.B in -may be used in an -.B if -or -.B while -statement to see if an array has an index consisting of a particular -value. -.PP -.RS -.ft B -.nf -if (val in array) - print array[val] -.fi -.ft -.RE -.PP -If the array has multiple subscripts, use -.BR "(i, j) in array" . -.PP -The -.B in -construct may also be used in a -.B for -loop to iterate over all the elements of an array. -.PP -An element may be deleted from an array using the -.B delete -statement. -The -.B delete -statement may also be used to delete the entire contents of an array, -just by specifying the array name without a subscript. -.SS Variable Typing And Conversion -.PP -Variables and fields -may be (floating point) numbers, or strings, or both. How the -value of a variable is interpreted depends upon its context. If used in -a numeric expression, it will be treated as a number, if used as a string -it will be treated as a string. -.PP -To force a variable to be treated as a number, add 0 to it; to force it -to be treated as a string, concatenate it with the null string. -.PP -When a string must be converted to a number, the conversion is accomplished -using -.IR atof (3). -A number is converted to a string by using the value of -.B CONVFMT -as a format string for -.IR sprintf (3), -with the numeric value of the variable as the argument. -However, even though all numbers in AWK are floating-point, -integral values are -.I always -converted as integers. Thus, given -.PP -.RS -.ft B -.nf -CONVFMT = "%2.2f" -a = 12 -b = a "" -.fi -.ft R -.RE -.PP -the variable -.B b -has a string value of \fB"12"\fR and not \fB"12.00"\fR. -.PP -.I Gawk -performs comparisons as follows: -If two variables are numeric, they are compared numerically. -If one value is numeric and the other has a string value that is a -``numeric string,'' then comparisons are also done numerically. -Otherwise, the numeric value is converted to a string and a string -comparison is performed. -Two strings are compared, of course, as strings. -According to the \*(PX standard, even if two strings are -numeric strings, a numeric comparison is performed. However, this is -clearly incorrect, and -.I gawk -does not do this. -.PP -Note that string constants, such as \fB"57"\fP, are -.I not -numeric strings, they are string constants. The idea of ``numeric string'' -only applies to fields, -.B getline -input, -.BR FILENAME , -.B ARGV -elements, -.B ENVIRON -elements and the elements of an array created by -.B split() -that are numeric strings. -The basic idea is that -.IR "user input" , -and only user input, that looks numeric, -should be treated that way. -.PP -Uninitialized variables have the numeric value 0 and the string value "" -(the null, or empty, string). -.SH PATTERNS AND ACTIONS -AWK is a line oriented language. The pattern comes first, and then the -action. Action statements are enclosed in -.B { -and -.BR } . -Either the pattern may be missing, or the action may be missing, but, -of course, not both. If the pattern is missing, the action will be -executed for every single record of input. -A missing action is equivalent to -.RS -.PP -.B "{ print }" -.RE -.PP -which prints the entire record. -.PP -Comments begin with the ``#'' character, and continue until the -end of the line. -Blank lines may be used to separate statements. -Normally, a statement ends with a newline, however, this is not the -case for lines ending in -a ``,'', -.BR { , -.BR ? , -.BR : , -.BR && , -or -.BR || . -Lines ending in -.B do -or -.B else -also have their statements automatically continued on the following line. -In other cases, a line can be continued by ending it with a ``\e'', -in which case the newline will be ignored. -.PP -Multiple statements may -be put on one line by separating them with a ``;''. -This applies to both the statements within the action part of a -pattern-action pair (the usual case), -and to the pattern-action statements themselves. -.SS Patterns -AWK patterns may be one of the following: -.PP -.RS -.nf -.B BEGIN -.B END -.BI / "regular expression" / -.I "relational expression" -.IB pattern " && " pattern -.IB pattern " || " pattern -.IB pattern " ? " pattern " : " pattern -.BI ( pattern ) -.BI ! " pattern" -.IB pattern1 ", " pattern2 -.fi -.RE -.PP -.B BEGIN -and -.B END -are two special kinds of patterns which are not tested against -the input. -The action parts of all -.B BEGIN -patterns are merged as if all the statements had -been written in a single -.B BEGIN -block. They are executed before any -of the input is read. Similarly, all the -.B END -blocks are merged, -and executed when all the input is exhausted (or when an -.B exit -statement is executed). -.B BEGIN -and -.B END -patterns cannot be combined with other patterns in pattern expressions. -.B BEGIN -and -.B END -patterns cannot have missing action parts. -.PP -For -.BI / "regular expression" / -patterns, the associated statement is executed for each input record that matches -the regular expression. -Regular expressions are the same as those in -.IR egrep (1), -and are summarized below. -.PP -A -.I "relational expression" -may use any of the operators defined below in the section on actions. -These generally test whether certain fields match certain regular expressions. -.PP -The -.BR && , -.BR || , -and -.B ! -operators are logical AND, logical OR, and logical NOT, respectively, as in C. -They do short-circuit evaluation, also as in C, and are used for combining -more primitive pattern expressions. As in most languages, parentheses -may be used to change the order of evaluation. -.PP -The -.B ?\^: -operator is like the same operator in C. If the first pattern is true -then the pattern used for testing is the second pattern, otherwise it is -the third. Only one of the second and third patterns is evaluated. -.PP -The -.IB pattern1 ", " pattern2 -form of an expression is called a -.IR "range pattern" . -It matches all input records starting with a record that matches -.IR pattern1 , -and continuing until a record that matches -.IR pattern2 , -inclusive. It does not combine with any other sort of pattern expression. -.SS Regular Expressions -Regular expressions are the extended kind found in -.IR egrep . -They are composed of characters as follows: -.TP \w'\fB[^\fIabc...\fB]\fR'u+2n -.I c -matches the non-metacharacter -.IR c . -.TP -.I \ec -matches the literal character -.IR c . -.TP -.B . -matches any character -.I including -newline. -.TP -.B ^ -matches the beginning of a string. -.TP -.B $ -matches the end of a string. -.TP -.BI [ abc... ] -character list, matches any of the characters -.IR abc... . -.TP -.BI [^ abc... ] -negated character list, matches any character except -.IR abc... . -.TP -.IB r1 | r2 -alternation: matches either -.I r1 -or -.IR r2 . -.TP -.I r1r2 -concatenation: matches -.IR r1 , -and then -.IR r2 . -.TP -.IB r + -matches one or more -.IR r 's. -.TP -.IB r * -matches zero or more -.IR r 's. -.TP -.IB r ? -matches zero or one -.IR r 's. -.TP -.BI ( r ) -grouping: matches -.IR r . -.TP -.PD 0 -.IB r { n } -.TP -.PD 0 -.IB r { n ,} -.TP -.PD -.IB r { n , m } -One or two numbers inside braces denote an -.IR "interval expression" . -If there is one number in the braces, the preceding regexp -.I r -is repeated -.I n -times. If there are two numbers separated by a comma, -.I r -is repeated -.I n -to -.I m -times. -If there is one number followed by a comma, then -.I r -is repeated at least -.I n -times. -.sp .5 -Interval expressions are only available if either -.B \-\^\-posix -or -.B \-\^\-re\-interval -is specified on the command line. -.TP -.B \ey -matches the empty string at either the beginning or the -end of a word. -.TP -.B \eB -matches the empty string within a word. -.TP -.B \e< -matches the empty string at the beginning of a word. -.TP -.B \e> -matches the empty string at the end of a word. -.TP -.B \ew -matches any word-constituent character (letter, digit, or underscore). -.TP -.B \eW -matches any character that is not word-constituent. -.TP -.B \e` -matches the empty string at the beginning of a buffer (string). -.TP -.B \e' -matches the empty string at the end of a buffer. -.PP -The escape sequences that are valid in string constants (see below) -are also legal in regular expressions. -.PP -.I "Character classes" -are a new feature introduced in the POSIX standard. -A character class is a special notation for describing -lists of characters that have a specific attribute, but where the -actual characters themselves can vary from country to country and/or -from character set to character set. For example, the notion of what -is an alphabetic character differs in the USA and in France. -.PP -A character class is only valid in a regexp -.I inside -the brackets of a character list. Character classes consist of -.BR [: , -a keyword denoting the class, and -.BR :] . -Here are the character -classes defined by the POSIX standard. -.TP -.B [:alnum:] -Alphanumeric characters. -.TP -.B [:alpha:] -Alphabetic characters. -.TP -.B [:blank:] -Space or tab characters. -.TP -.B [:cntrl:] -Control characters. -.TP -.B [:digit:] -Numeric characters. -.TP -.B [:graph:] -Characters that are both printable and visible. -(A space is printable, but not visible, while an -.B a -is both.) -.TP -.B [:lower:] -Lower-case alphabetic characters. -.TP -.B [:print:] -Printable characters (characters that are not control characters.) -.TP -.B [:punct:] -Punctuation characters (characters that are not letter, digits, -control characters, or space characters). -.TP -.B [:space:] -Space characters (such as space, tab, and formfeed, to name a few). -.TP -.B [:upper:] -Upper-case alphabetic characters. -.TP -.B [:xdigit:] -Characters that are hexadecimal digits. -.PP -For example, before the POSIX standard, to match alphanumeric -characters, you would have had to write -.BR /[A\-Za\-z0\-9]/ . -If your character set had other alphabetic characters in it, this would not -match them. With the POSIX character classes, you can write -.BR /[[:alnum:]]/ , -and this will match -.I all -the alphabetic and numeric characters in your character set. -.PP -Two additional special sequences can appear in character lists. -These apply to non-ASCII character sets, which can have single symbols -(called -.IR "collating elements" ) -that are represented with more than one -character, as well as several characters that are equivalent for -.IR collating , -or sorting, purposes. (E.g., in French, a plain ``e'' -and a grave-accented e\` are equivalent.) -.TP -Collating Symbols -A collating symbols is a multi-character collating element enclosed in -.B [. -and -.BR .] . -For example, if -.B ch -is a collating element, then -.B [[.ch.]] -is a regexp that matches this collating element, while -.B [ch] -is a regexp that matches either -.B c -or -.BR h . -.TP -Equivalence Classes -An equivalence class is a locale-specific name for a list of -characters that are equivalent. The name is enclosed in -.B [= -and -.BR =] . -For example, the name -.B e -might be used to represent all of -``e,'' ``e\`,'' and ``e\`.'' -In this case, -.B [[=e=]] -is a regexp -that matches any of -.BR e , -.BR e\' , -or -.BR e\` . -.PP -These features are very valuable in non-English speaking locales. -The library functions that -.I gawk -uses for regular expression matching -currently only recognize POSIX character classes; they do not recognize -collating symbols or equivalence classes. -.PP -The -.BR \ey , -.BR \eB , -.BR \e< , -.BR \e> , -.BR \ew , -.BR \eW , -.BR \e` , -and -.B \e' -operators are specific to -.IR gawk ; -they are extensions based on facilities in the GNU regexp libraries. -.PP -The various command line options -control how -.I gawk -interprets characters in regexps. -.TP -No options -In the default case, -.I gawk -provide all the facilities of -POSIX regexps and the GNU regexp operators described above. -However, interval expressions are not supported. -.TP -.B \-\^\-posix -Only POSIX regexps are supported, the GNU operators are not special. -(E.g., -.B \ew -matches a literal -.BR w ). -Interval expressions are allowed. -.TP -.B \-\^\-traditional -Traditional Unix -.I awk -regexps are matched. The GNU operators -are not special, interval expressions are not available, and neither -are the POSIX character classes -.RB ( [[:alnum:]] -and so on). -Characters described by octal and hexadecimal escape sequences are -treated literally, even if they represent regexp metacharacters. -.TP -.B \-\^\-re\-interval -Allow interval expressions in regexps, even if -.B \-\^\-traditional -has been provided. -.SS Actions -Action statements are enclosed in braces, -.B { -and -.BR } . -Action statements consist of the usual assignment, conditional, and looping -statements found in most languages. The operators, control statements, -and input/output statements -available are patterned after those in C. -.SS Operators -.PP -The operators in AWK, in order of decreasing precedence, are -.PP -.TP "\w'\fB*= /= %= ^=\fR'u+1n" -.BR ( \&... ) -Grouping -.TP -.B $ -Field reference. -.TP -.B "++ \-\^\-" -Increment and decrement, both prefix and postfix. -.TP -.B ^ -Exponentiation (\fB**\fR may also be used, and \fB**=\fR for -the assignment operator). -.TP -.B "+ \- !" -Unary plus, unary minus, and logical negation. -.TP -.B "* / %" -Multiplication, division, and modulus. -.TP -.B "+ \-" -Addition and subtraction. -.TP -.I space -String concatenation. -.TP -.PD 0 -.B "< >" -.TP -.PD 0 -.B "<= >=" -.TP -.PD -.B "!= ==" -The regular relational operators. -.TP -.B "~ !~" -Regular expression match, negated match. -.B NOTE: -Do not use a constant regular expression -.RB ( /foo/ ) -on the left-hand side of a -.B ~ -or -.BR !~ . -Only use one on the right-hand side. The expression -.BI "/foo/ ~ " exp -has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR. -This is usually -.I not -what was intended. -.TP -.B in -Array membership. -.TP -.B && -Logical AND. -.TP -.B || -Logical OR. -.TP -.B ?: -The C conditional expression. This has the form -.IB expr1 " ? " expr2 " : " expr3\c -\&. If -.I expr1 -is true, the value of the expression is -.IR expr2 , -otherwise it is -.IR expr3 . -Only one of -.I expr2 -and -.I expr3 -is evaluated. -.TP -.PD 0 -.B "= += \-=" -.TP -.PD -.B "*= /= %= ^=" -Assignment. Both absolute assignment -.BI ( var " = " value ) -and operator-assignment (the other forms) are supported. -.SS Control Statements -.PP -The control statements are -as follows: -.PP -.RS -.nf -\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR] -\fBwhile (\fIcondition\fB) \fIstatement \fR -\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR -\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR -\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR -\fBbreak\fR -\fBcontinue\fR -\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR -\fBdelete \fIarray\^\fR -\fBexit\fR [ \fIexpression\fR ] -\fB{ \fIstatements \fB} -.fi -.RE -.SS "I/O Statements" -.PP -The input/output statements are as follows: -.PP -.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n" -.BI close( file ) -Close file (or pipe, see below). -.TP -.B getline -Set -.B $0 -from next input record; set -.BR NF , -.BR NR , -.BR FNR . -.TP -.BI "getline <" file -Set -.B $0 -from next record of -.IR file ; -set -.BR NF . -.TP -.BI getline " var" -Set -.I var -from next input record; set -.BR NR , -.BR FNR . -.TP -.BI getline " var" " <" file -Set -.I var -from next record of -.IR file . -.TP -.B next -Stop processing the current input record. The next input record -is read and processing starts over with the first pattern in the -AWK program. If the end of the input data is reached, the -.B END -block(s), if any, are executed. -.TP -.B "nextfile" -Stop processing the current input file. The next input record read -comes from the next input file. -.B FILENAME -and -.B ARGIND -are updated, -.B FNR -is reset to 1, and processing starts over with the first pattern in the -AWK program. If the end of the input data is reached, the -.B END -block(s), if any, are executed. -.B NOTE: -Earlier versions of gawk used -.BR "next file" , -as two words. While this usage is still recognized, it generates a -warning message and will eventually be removed. -.TP -.B print -Prints the current record. -The output record is terminated with the value of the -.B ORS -variable. -.TP -.BI print " expr-list" -Prints expressions. -Each expression is separated by the value of the -.B OFS -variable. -The output record is terminated with the value of the -.B ORS -variable. -.TP -.BI print " expr-list" " >" file -Prints expressions on -.IR file . -Each expression is separated by the value of the -.B OFS -variable. The output record is terminated with the value of the -.B ORS -variable. -.TP -.BI printf " fmt, expr-list" -Format and print. -.TP -.BI printf " fmt, expr-list" " >" file -Format and print on -.IR file . -.TP -.BI system( cmd-line ) -Execute the command -.IR cmd-line , -and return the exit status. -(This may not be available on non-\*(PX systems.) -.TP -\&\fBfflush(\fR[\fIfile\^\fR]\fB)\fR -Flush any buffers associated with the open output file or pipe -.IR file . -If -.I file -is missing, then standard output is flushed. -If -.I file -is the null string, -then all open output files and pipes -have their buffers flushed. -.PP -Other input/output redirections are also allowed. For -.B print -and -.BR printf , -.BI >> file -appends output to the -.IR file , -while -.BI | " command" -writes on a pipe. -In a similar fashion, -.IB command " | getline" -pipes into -.BR getline . -The -.BR getline -command will return 0 on end of file, and \-1 on an error. -.SS The \fIprintf\fP\^ Statement -.PP -The AWK versions of the -.B printf -statement and -.B sprintf() -function -(see below) -accept the following conversion specification formats: -.TP -.B %c -An \s-1ASCII\s+1 character. -If the argument used for -.B %c -is numeric, it is treated as a character and printed. -Otherwise, the argument is assumed to be a string, and the only first -character of that string is printed. -.TP -.PD 0 -.B %d -.TP -.PD -.B %i -A decimal number (the integer part). -.TP -.PD 0 -.B %e -.TP -.PD -.B %E -A floating point number of the form -.BR [\-]d.dddddde[+\^\-]dd . -The -.B %E -format uses -.B E -instead of -.BR e . -.TP -.B %f -A floating point number of the form -.BR [\-]ddd.dddddd . -.TP -.PD 0 -.B %g -.TP -.PD -.B %G -Use -.B %e -or -.B %f -conversion, whichever is shorter, with nonsignificant zeros suppressed. -The -.B %G -format uses -.B %E -instead of -.BR %e . -.TP -.B %o -An unsigned octal number (again, an integer). -.TP -.B %s -A character string. -.TP -.PD 0 -.B %x -.TP -.PD -.B %X -An unsigned hexadecimal number (an integer). -.The -.B %X -format uses -.B ABCDEF -instead of -.BR abcdef . -.TP -.B %% -A single -.B % -character; no argument is converted. -.PP -There are optional, additional parameters that may lie between the -.B % -and the control letter: -.TP -.B \- -The expression should be left-justified within its field. -.TP -.I space -For numeric conversions, prefix positive values with a space, and -negative values with a minus sign. -.TP -.B + -The plus sign, used before the width modifier (see below), -says to always supply a sign for numeric conversions, even if the data -to be formatted is positive. The -.B + -overrides the space modifier. -.TP -.B # -Use an ``alternate form'' for certain control letters. -For -.BR %o , -supply a leading zero. -For -.BR %x , -and -.BR %X , -supply a leading -.BR 0x -or -.BR 0X -for -a nonzero result. -For -.BR %e , -.BR %E , -and -.BR %f , -the result will always contain a -decimal point. -For -.BR %g , -and -.BR %G , -trailing zeros are not removed from the result. -.TP -.B 0 -A leading -.B 0 -(zero) acts as a flag, that indicates output should be -padded with zeroes instead of spaces. -This applies even to non-numeric output formats. -This flag only has an effect when the field width is wider than the -value to be printed. -.TP -.I width -The field should be padded to this width. The field is normally padded -with spaces. If the -.B 0 -flag has been used, it is padded with zeroes. -.TP -.BI \&. prec -A number that specifies the precision to use when printing. -For the -.BR %e , -.BR %E , -and -.BR %f -formats, this specifies the -number of digits you want printed to the right of the decimal point. -For the -.BR %g , -and -.B %G -formats, it specifies the maximum number -of significant digits. For the -.BR %d , -.BR %o , -.BR %i , -.BR %u , -.BR %x , -and -.B %X -formats, it specifies the minimum number of -digits to print. For a string, it specifies the maximum number of -characters from the string that should be printed. -.PP -The dynamic -.I width -and -.I prec -capabilities of the \*(AN C -.B printf() -routines are supported. -A -.B * -in place of either the -.B width -or -.B prec -specifications will cause their values to be taken from -the argument list to -.B printf -or -.BR sprintf() . -.SS Special File Names -.PP -When doing I/O redirection from either -.B print -or -.B printf -into a file, -or via -.B getline -from a file, -.I gawk -recognizes certain special filenames internally. These filenames -allow access to open file descriptors inherited from -.IR gawk 's -parent process (usually the shell). -Other special filenames provide access to information about the running -.B gawk -process. -The filenames are: -.TP \w'\fB/dev/stdout\fR'u+1n -.B /dev/pid -Reading this file returns the process ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/ppid -Reading this file returns the parent process ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/pgrpid -Reading this file returns the process group ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/user -Reading this file returns a single record terminated with a newline. -The fields are separated with spaces. -.B $1 -is the value of the -.IR getuid (2) -system call, -.B $2 -is the value of the -.IR geteuid (2) -system call, -.B $3 -is the value of the -.IR getgid (2) -system call, and -.B $4 -is the value of the -.IR getegid (2) -system call. -If there are any additional fields, they are the group IDs returned by -.IR getgroups (2). -Multiple groups may not be supported on all systems. -.TP -.B /dev/stdin -The standard input. -.TP -.B /dev/stdout -The standard output. -.TP -.B /dev/stderr -The standard error output. -.TP -.BI /dev/fd/\^ n -The file associated with the open file descriptor -.IR n . -.PP -These are particularly useful for error messages. For example: -.PP -.RS -.ft B -print "You blew it!" > "/dev/stderr" -.ft R -.RE -.PP -whereas you would otherwise have to use -.PP -.RS -.ft B -print "You blew it!" | "cat 1>&2" -.ft R -.RE -.PP -These file names may also be used on the command line to name data files. -.SS Numeric Functions -.PP -AWK has the following pre-defined arithmetic functions: -.PP -.TP \w'\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR'u+1n -.BI atan2( y , " x" ) -returns the arctangent of -.I y/x -in radians. -.TP -.BI cos( expr ) -returns the cosine of -.IR expr , -which is in radians. -.TP -.BI exp( expr ) -the exponential function. -.TP -.BI int( expr ) -truncates to integer. -.TP -.BI log( expr ) -the natural logarithm function. -.TP -.B rand() -returns a random number between 0 and 1. -.TP -.BI sin( expr ) -returns the sine of -.IR expr , -which is in radians. -.TP -.BI sqrt( expr ) -the square root function. -.TP -\&\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR -uses -.I expr -as a new seed for the random number generator. If no -.I expr -is provided, the time of day will be used. -The return value is the previous seed for the random -number generator. -.SS String Functions -.PP -.I Gawk -has the following pre-defined string functions: -.PP -.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n" -\fBgensub(\fIr\fB, \fIs\fB, \fIh \fR[\fB, \fIt\fR]\fB)\fR -search the target string -.I t -for matches of the regular expression -.IR r . -If -.I h -is a string beginning with -.B g -or -.BR G , -then replace all matches of -.I r -with -.IR s . -Otherwise, -.I h -is a number indicating which match of -.I r -to replace. -If no -.I t -is supplied, -.B $0 -is used instead. -Within the replacement text -.IR s , -the sequence -.BI \e n\fR, -where -.I n -is a digit from 1 to 9, may be used to indicate just the text that -matched the -.IR n 'th -parenthesized subexpression. The sequence -.B \e0 -represents the entire matched text, as does the character -.BR & . -Unlike -.B sub() -and -.BR gsub() , -the modified string is returned as the result of the function, -and the original target string is -.I not -changed. -.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n" -\fBgsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR -for each substring matching the regular expression -.I r -in the string -.IR t , -substitute the string -.IR s , -and return the number of substitutions. -If -.I t -is not supplied, use -.BR $0 . -An -.B & -in the replacement text is replaced with the text that was actually matched. -Use -.B \e& -to get a literal -.BR & . -See -.I "AWK Language Programming" -for a fuller discussion of the rules for -.BR &'s -and backslashes in the replacement text of -.BR sub() , -.BR gsub() , -and -.BR gensub() . -.TP -.BI index( s , " t" ) -returns the index of the string -.I t -in the string -.IR s , -or 0 if -.I t -is not present. -.TP -\fBlength(\fR[\fIs\fR]\fB) -returns the length of the string -.IR s , -or the length of -.B $0 -if -.I s -is not supplied. -.TP -.BI match( s , " r" ) -returns the position in -.I s -where the regular expression -.I r -occurs, or 0 if -.I r -is not present, and sets the values of -.B RSTART -and -.BR RLENGTH . -.TP -\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR]\fB)\fR -splits the string -.I s -into the array -.I a -on the regular expression -.IR r , -and returns the number of fields. If -.I r -is omitted, -.B FS -is used instead. -The array -.I a -is cleared first. -Splitting behaves identically to field splitting, described above. -.TP -.BI sprintf( fmt , " expr-list" ) -prints -.I expr-list -according to -.IR fmt , -and returns the resulting string. -.TP -\fBsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR -just like -.BR gsub() , -but only the first matching substring is replaced. -.TP -\fBsubstr(\fIs\fB, \fIi \fR[\fB, \fIn\fR]\fB)\fR -returns the at most -.IR n -character -substring of -.I s -starting at -.IR i . -If -.I n -is omitted, the rest of -.I s -is used. -.TP -.BI tolower( str ) -returns a copy of the string -.IR str , -with all the upper-case characters in -.I str -translated to their corresponding lower-case counterparts. -Non-alphabetic characters are left unchanged. -.TP -.BI toupper( str ) -returns a copy of the string -.IR str , -with all the lower-case characters in -.I str -translated to their corresponding upper-case counterparts. -Non-alphabetic characters are left unchanged. -.SS Time Functions -.PP -Since one of the primary uses of AWK programs is processing log files -that contain time stamp information, -.I gawk -provides the following two functions for obtaining time stamps and -formatting them. -.PP -.TP "\w'\fBsystime()\fR'u+1n" -.B systime() -returns the current time of day as the number of seconds since the Epoch -(Midnight UTC, January 1, 1970 on \*(PX systems). -.TP -\fBstrftime(\fR[\fIformat \fR[\fB, \fItimestamp\fR]]\fB)\fR -formats -.I timestamp -according to the specification in -.IR format. -The -.I timestamp -should be of the same form as returned by -.BR systime() . -If -.I timestamp -is missing, the current time of day is used. -If -.I format -is missing, a default format equivalent to the output of -.IR date (1) -will be used. -See the specification for the -.B strftime() -function in \*(AN C for the format conversions that are -guaranteed to be available. -A public-domain version of -.IR strftime (3) -and a man page for it come with -.IR gawk ; -if that version was used to build -.IR gawk , -then all of the conversions described in that man page are available to -.IR gawk. -.SS String Constants -.PP -String constants in AWK are sequences of characters enclosed -between double quotes (\fB"\fR). Within strings, certain -.I "escape sequences" -are recognized, as in C. These are: -.PP -.TP \w'\fB\e\^\fIddd\fR'u+1n -.B \e\e -A literal backslash. -.TP -.B \ea -The ``alert'' character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character. -.TP -.B \eb -backspace. -.TP -.B \ef -form-feed. -.TP -.B \en -newline. -.TP -.B \er -carriage return. -.TP -.B \et -horizontal tab. -.TP -.B \ev -vertical tab. -.TP -.BI \ex "\^hex digits" -The character represented by the string of hexadecimal digits following -the -.BR \ex . -As in \*(AN C, all following hexadecimal digits are considered part of -the escape sequence. -(This feature should tell us something about language design by committee.) -E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. -.TP -.BI \e ddd -The character represented by the 1-, 2-, or 3-digit sequence of octal -digits. E.g. \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. -.TP -.BI \e c -The literal character -.IR c\^ . -.PP -The escape sequences may also be used inside constant regular expressions -(e.g., -.B "/[\ \et\ef\en\er\ev]/" -matches whitespace characters). -.PP -In compatibility mode, the characters represented by octal and -hexadecimal escape sequences are treated literally when used in -regexp constants. Thus, -.B /a\e52b/ -is equivalent to -.BR /a\e*b/ . -.SH FUNCTIONS -Functions in AWK are defined as follows: -.PP -.RS -\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR -.RE -.PP -Functions are executed when they are called from within expressions -in either patterns or actions. Actual parameters supplied in the function -call are used to instantiate the formal parameters declared in the function. -Arrays are passed by reference, other variables are passed by value. -.PP -Since functions were not originally part of the AWK language, the provision -for local variables is rather clumsy: They are declared as extra parameters -in the parameter list. The convention is to separate local variables from -real parameters by extra spaces in the parameter list. For example: -.PP -.RS -.ft B -.nf -function f(p, q, a, b) # a & b are local -{ - \&..... -} - -/abc/ { ... ; f(1, 2) ; ... } -.fi -.ft R -.RE -.PP -The left parenthesis in a function call is required -to immediately follow the function name, -without any intervening white space. -This is to avoid a syntactic ambiguity with the concatenation operator. -This restriction does not apply to the built-in functions listed above. -.PP -Functions may call each other and may be recursive. -Function parameters used as local variables are initialized -to the null string and the number zero upon function invocation. -.PP -Use -.BI return " expr" -to return a value from a function. The return value is undefined if no -value is provided, or if the function returns by ``falling off'' the -end. -.PP -If -.B \-\^\-lint -has been provided, -.I gawk -will warn about calls to undefined functions at parse time, -instead of at run time. -Calling an undefined function at run time is a fatal error. -.PP -The word -.B func -may be used in place of -.BR function . -.SH EXAMPLES -.nf -Print and sort the login names of all users: - -.ft B - BEGIN { FS = ":" } - { print $1 | "sort" } - -.ft R -Count lines in a file: - -.ft B - { nlines++ } - END { print nlines } - -.ft R -Precede each line by its number in the file: - -.ft B - { print FNR, $0 } - -.ft R -Concatenate and line number (a variation on a theme): - -.ft B - { print NR, $0 } -.ft R -.fi -.SH SEE ALSO -.IR egrep (1), -.IR getpid (2), -.IR getppid (2), -.IR getpgrp (2), -.IR getuid (2), -.IR geteuid (2), -.IR getgid (2), -.IR getegid (2), -.IR getgroups (2) -.PP -.IR "The AWK Programming Language" , -Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger, -Addison-Wesley, 1988. ISBN 0-201-07981-X. -.PP -.IR "AWK Language Programming" , -Edition 1.0, published by the Free Software Foundation, 1995. -.SH POSIX COMPATIBILITY -A primary goal for -.I gawk -is compatibility with the \*(PX standard, as well as with the -latest version of \*(UX -.IR awk . -To this end, -.I gawk -incorporates the following user visible -features which are not described in the AWK book, -but are part of the Bell Labs version of -.IR awk , -and are in the \*(PX standard. -.PP -The -.B \-v -option for assigning variables before program execution starts is new. -The book indicates that command line variable assignment happens when -.I awk -would otherwise open the argument as a file, which is after the -.B BEGIN -block is executed. However, in earlier implementations, when such an -assignment appeared before any file names, the assignment would happen -.I before -the -.B BEGIN -block was run. Applications came to depend on this ``feature.'' -When -.I awk -was changed to match its documentation, this option was added to -accommodate applications that depended upon the old behavior. -(This feature was agreed upon by both the AT&T and GNU developers.) -.PP -The -.B \-W -option for implementation specific features is from the \*(PX standard. -.PP -When processing arguments, -.I gawk -uses the special option ``\fB\-\^\-\fP'' to signal the end of -arguments. -In compatibility mode, it will warn about, but otherwise ignore, -undefined options. -In normal operation, such arguments are passed on to the AWK program for -it to process. -.PP -The AWK book does not define the return value of -.BR srand() . -The \*(PX standard -has it return the seed it was using, to allow keeping track -of random number sequences. Therefore -.B srand() -in -.I gawk -also returns its current seed. -.PP -Other new features are: -The use of multiple -.B \-f -options (from MKS -.IR awk ); -the -.B ENVIRON -array; the -.BR \ea , -and -.BR \ev -escape sequences (done originally in -.I gawk -and fed back into AT&T's); the -.B tolower() -and -.B toupper() -built-in functions (from AT&T); and the \*(AN C conversion specifications in -.B printf -(done first in AT&T's version). -.SH GNU EXTENSIONS -.I Gawk -has a number of extensions to \*(PX -.IR awk . -They are described in this section. All the extensions described here -can be disabled by -invoking -.I gawk -with the -.B \-\^\-traditional -option. -.PP -The following features of -.I gawk -are not available in -\*(PX -.IR awk . -.RS -.TP \w'\(bu'u+1n -\(bu -The -.B \ex -escape sequence. -(Disabled with -.BR \-\^\-posix .) -.TP \w'\(bu'u+1n -\(bu -The -.B fflush() -function. -(Disabled with -.BR \-\^\-posix .) -.TP -\(bu -The -.BR systime(), -.BR strftime(), -and -.B gensub() -functions. -.TP -\(bu -The special file names available for I/O redirection are not recognized. -.TP -\(bu -The -.BR ARGIND , -.BR ERRNO , -and -.B RT -variables are not special. -.TP -\(bu -The -.B IGNORECASE -variable and its side-effects are not available. -.TP -\(bu -The -.B FIELDWIDTHS -variable and fixed-width field splitting. -.TP -\(bu -The use of -.B RS -as a regular expression. -.TP -\(bu -The ability to split out individual characters using the null string -as the value of -.BR FS , -and as the third argument to -.BR split() . -.TP -\(bu -No path search is performed for files named via the -.B \-f -option. Therefore the -.B AWKPATH -environment variable is not special. -.TP -\(bu -The use of -.B "nextfile" -to abandon processing of the current input file. -.TP -\(bu -The use of -.BI delete " array" -to delete the entire contents of an array. -.RE -.PP -The AWK book does not define the return value of the -.B close() -function. -.IR Gawk\^ 's -.B close() -returns the value from -.IR fclose (3), -or -.IR pclose (3), -when closing a file or pipe, respectively. -.PP -When -.I gawk -is invoked with the -.B \-\^\-traditional -option, -if the -.I fs -argument to the -.B \-F -option is ``t'', then -.B FS -will be set to the tab character. -Note that typing -.B "gawk \-F\et \&..." -simply causes the shell to quote the ``t,'', and does not pass -``\et'' to the -.B \-F -option. -Since this is a rather ugly special case, it is not the default behavior. -This behavior also does not occur if -.B \-\^\-posix -has been specified. -To really get a tab character as the field separator, it is best to use -quotes: -.BR "gawk \-F'\et' \&..." . -.ig -.PP -If -.I gawk -was compiled for debugging, it will -accept the following additional options: -.TP -.PD 0 -.B \-Wparsedebug -.TP -.PD -.B \-\^\-parsedebug -Turn on -.IR yacc (1) -or -.IR bison (1) -debugging output during program parsing. -This option should only be of interest to the -.I gawk -maintainers, and may not even be compiled into -.IR gawk . -.. -.SH HISTORICAL FEATURES -There are two features of historical AWK implementations that -.I gawk -supports. -First, it is possible to call the -.B length() -built-in function not only with no argument, but even without parentheses! -Thus, -.RS -.PP -.ft B -a = length # Holy Algol 60, Batman! -.ft R -.RE -.PP -is the same as either of -.RS -.PP -.ft B -a = length() -.br -a = length($0) -.ft R -.RE -.PP -This feature is marked as ``deprecated'' in the \*(PX standard, and -.I gawk -will issue a warning about its use if -.B \-\^\-lint -is specified on the command line. -.PP -The other feature is the use of either the -.B continue -or the -.B break -statements outside the body of a -.BR while , -.BR for , -or -.B do -loop. Traditional AWK implementations have treated such usage as -equivalent to the -.B next -statement. -.I Gawk -will support this usage if -.B \-\^\-traditional -has been specified. -.SH ENVIRONMENT -If -.B POSIXLY_CORRECT -exists in the environment, then -.I gawk -behaves exactly as if -.B \-\^\-posix -had been specified on the command line. -If -.B \-\^\-lint -has been specified, -.I gawk -will issue a warning message to this effect. -.PP -The -.B AWKPATH -environment variable can be used to provide a list of directories that -.I gawk -will search when looking for files named via the -.B \-f -and -.B \-\^\-file -options. -.SH BUGS -The -.B \-F -option is not necessary given the command line variable assignment feature; -it remains only for backwards compatibility. -.PP -If your system actually has support for -.B /dev/fd -and the associated -.BR /dev/stdin , -.BR /dev/stdout , -and -.B /dev/stderr -files, you may get different output from -.I gawk -than you would get on a system without those files. When -.I gawk -interprets these files internally, it synchronizes output to the standard -output with output to -.BR /dev/stdout , -while on a system with those files, the output is actually to different -open files. -Caveat Emptor. -.PP -Syntactically invalid single character programs tend to overflow -the parse stack, generating a rather unhelpful message. Such programs -are surprisingly difficult to diagnose in the completely general case, -and the effort to do so really is not worth it. -.SH VERSION INFORMATION -This man page documents -.IR gawk , -version 3.0.4. -.SH AUTHORS -The original version of \*(UX -.I awk -was designed and implemented by Alfred Aho, -Peter Weinberger, and Brian Kernighan of AT&T Bell Labs. Brian Kernighan -continues to maintain and enhance it. -.PP -Paul Rubin and Jay Fenlason, -of the Free Software Foundation, wrote -.IR gawk , -to be compatible with the original version of -.I awk -distributed in Seventh Edition \*(UX. -John Woods contributed a number of bug fixes. -David Trueman, with contributions -from Arnold Robbins, made -.I gawk -compatible with the new version of \*(UX -.IR awk . -Arnold Robbins is the current maintainer. -.PP -The initial DOS port was done by Conrad Kwok and Scott Garfinkle. -Scott Deifik is the current DOS maintainer. Pat Rankin did the -port to VMS, and Michal Jaegermann did the port to the Atari ST. -The port to OS/2 was done by Kai Uwe Rommel, with contributions and -help from Darrel Hankerson. Fred Fish supplied support for the Amiga. -.SH BUG REPORTS -If you find a bug in -.IR gawk , -please send electronic mail to -.BR bug-gnu-utils@gnu.org , -.I with -a carbon copy to -.BR arnold@gnu.org . -Please include your operating system and its revision, the version of -.IR gawk , -what C compiler you used to compile it, and a test program -and data that are as small as possible for reproducing the problem. -.PP -Before sending a bug report, please do two things. First, verify that -you have the latest version of -.IR gawk . -Many bugs (usually subtle ones) are fixed at each release, and if -yours is out of date, the problem may already have been solved. -Second, please read this man page and the reference manual carefully to -be sure that what you think is a bug really is, instead of just a quirk -in the language. -.PP -Whatever you do, do -.B NOT -post a bug report in -.BR comp.lang.awk . -While the -.I gawk -developers occasionally read this newsgroup, posting bug reports there -is an unreliable way to report bugs. Instead, please use the electronic mail -addresses given above. -.SH ACKNOWLEDGEMENTS -Brian Kernighan of Bell Labs -provided valuable assistance during testing and debugging. -We thank him. -.SH COPYING PERMISSIONS -Copyright \(co) 1996,97,98,99 Free Software Foundation, Inc. -.PP -Permission is granted to make and distribute verbatim copies of -this manual page provided the copyright notice and this permission -notice are preserved on all copies. -.ig -Permission is granted to process this file through troff and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual page). -.. -.PP -Permission is granted to copy and distribute modified versions of this -manual page under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. -.PP -Permission is granted to copy and distribute translations of this -manual page into another language, under the above conditions for -modified versions, except that this permission notice may be stated in -a translation approved by the Foundation. diff --git a/contrib/awk/doc/awkcard.in b/contrib/awk/doc/awkcard.in deleted file mode 100644 index 43f73fb..0000000 --- a/contrib/awk/doc/awkcard.in +++ /dev/null @@ -1,1883 +0,0 @@ -.\" AWK Reference Card --- Arnold Robbins, arnold@gnu.org -.\" -.\" Copyright (C) 1996-2001 Free Software Foundation, Inc. -.\" -.\" Permission is granted to make and distribute verbatim copies of -.\" this reference card provided the copyright notice and this permission -.\" notice are preserved on all copies. -.\" -.\" Permission is granted to process this file through troff and print the -.\" results, provided the printed document carries copying permission -.\" notice identical to this one except for the removal of this paragraph -.\" (this paragraph not being relevant to the printed reference card). -.\" -.\" Permission is granted to copy and distribute modified versions of this -.\" reference card under the conditions for verbatim copying, provided that -.\" the entire resulting derived work is distributed under the terms of a -.\" permission notice identical to this one. -.\" -.\" Permission is granted to copy and distribute translations of this -.\" reference card into another language, under the above conditions for -.\" modified versions, except that this permission notice may be stated in -.\" a translation approved by the Foundation. -.\" -.\" Strings to save typing -.ds AK \*(FCawk\*(FR -.ds GK \*(FCgawk\*(FR -.ds NK Bell Labs \*(FCawk\*(FR -.ds MK \*(FCmawk\*(FR -.\" -.\" -.de TD\" tab defaults -.ta .2i .78i 1i 1.2i 1.4i 1.7i -.. -.de TE -.TD -.. - -.sp -.ce -\*(CD\f(HB\s+8AWK REFERENCE\s0\*(FR -.sp -.\" --- Table Of Contents -.ta 2.4i 2.6iR -.lc . -.ES -.in +.2i -.nf -\*(FRAction Statements 7 -Arrays 11 -Awk Program Execution 4 -Bit Manipulation Functions (\*(GK) 16 -Bug Reports 2 -Closing Redirections 12 -Command Line Arguments (standard) 2 -Command Line Arguments (\*(GK) 3 -Command Line Arguments (\*(MK) 4 -Conversions And Comparisons 9 -Copying Permissions 18 -Definitions 2 -Dynamic Extensions (\*(GK) 18 -Environment Variables (\*(GK) 18 -Escape Sequences 8 -Expressions 11 -Fields 6 -FTP/HTTP Information 18 -Historical Features (\*(GK) 18 -Input Control 12 -Internationalization (\*(GK) 16 -Lines And Statements 5 -Localization (\*(GK) 17 -Numeric Functions 14 -Output Control 12 -Pattern Elements 7 -POSIX Character Classes (\*(GK) 6 -Printf Formats 13 -Records 6 -Regular Expressions 5 -Special Filenames 14 -String Functions 15 -Time Functions (\*(GK) 16 -User-defined Functions 17 -Variables 8\*(CX -.in -.2i -.EB "\s+2\f(HBCONTENTS\*(FR\s0" -.sp .4 -.TD -.fi -\*(CD\*(FRArnold Robbins wrote this reference card. -We thank -Brian Kernighan and Michael Brennan who reviewed it. -.sp .4 -.SL -.sp .4 -.so SRCDIR/ad.block -.\" a subtlety here; this line changes color. We rely on it -.\" also to provide a blank line. -\*(CD -.SL -.nf -\*(FR\(co Copyright 1996-2001, Free Software Foundation -59 Temple Place \(em Suite 330 -Boston, MA 02111-1307 USA -.nf -.BT - - -.\" -.\" -.\" --- Definitions -.fi -.ES -\*(CDThis card describes POSIX AWK, as well as the three -freely available \*(AK implementations -(see \fHFTP Information\fP below). -\*(CLCommon extensions (in two or more versions) are printed in light blue. -\*(CBFeatures specific to just one version\(emusually GNU AWK (\*(GK)\(emare -printed in dark blue. -\*(CRExceptions and deprecated features are printed in red. -\*(CDFeatures mandated by POSIX are printed in black. -.sp .5 -Several type faces are used to clarify the meaning: -.br -.nr IN \w'\(bu ' -\(bu \*(FC\*(CN\fP is used for computer input. -.br -.fi -.in +\n(INu -.ti -\n(INu -\(bu\|\^\*(FI\*(IN\fP is used for emphasis, to indicate user input and for syntactic -placeholders, such as \*(FIvariable\fP or \*(FIaction\fP. -.in -\n(INu -.br -\(bu \*(RN is used for explanatory text. -.sp .5 -\*(FInumber\fP \- a floating point number as in ANSI C, such as -\*(FC3\*(FR, -\*(FC2.3\*(FR, -\*(FC.4\*(FR, -\*(FC1.4e2\*(FR -or -\*(FC4.1E5\*(FR. -\*(CBNumbers may also be given in octal or hexadecimal: e.g., -\*(FC011\*(FR or \*(FC0x11\*(FR.\*(CD -.sp .5 -\*(FIescape sequences\fP \- a special sequence of characters beginning -with a backslash, used to describe otherwise unprintable characters. -(See \fHEscape Sequences\fP below.) -.sp .5 -\*(FIstring\fP \- a group of characters enclosed in double quotes. -Strings may contain \*(FIescape sequences\*(FR. -.sp .5 -\*(FIregexp\fP \- a regular expression, either a regexp constant -enclosed in forward slashes, or a dynamic regexp computed at run-time. -Regexp constants may contain \*(FIescape sequences\*(FR. -.sp .5 -\*(FIname\fP \- a variable, array or function name. -.sp .5 -\*(FIentry\fP(\*(FIN\fP) \- entry \*(FIentry\fP in section \*(FIN\fP of the -UNIX reference manual. -.sp .5 -\*(FIpattern\fP \- an expression describing an input record to be matched. -.sp .5 -\*(FIaction\fP \- statements to execute when an input record is matched. -.sp .5 -\*(FIrule\fP \- a pattern-action pair, where the pattern or action may -be missing.\*(CX -.EB "\s+2\f(HBDEFINITIONS\*(FR\s0" - -.\" -.\" -.\" --- Command Line Arguments -.ES -.fi -\*(CDCommand line arguments control setting the field separator, -setting variables before the \*(FCBEGIN\fP rule is run, and -the location of AWK program source code. -Implementation-specific command line arguments change -the behavior of the running interpreter. -.sp .5 -.TS -expand; -l lw(2.2i). -\*(FC\-F \*(FIfs\*(FR use \*(FIfs\fP for the input field separator. -\*(FC\-v\*(FI var\*(FC\^=\^\*(FIval\*(FR T{ -assign the value \*(FIval\*(FR to the variable \*(FIvar\*(FR -before execution of the program begins. Such -variable values are available to the \*(FCBEGIN\fP rule. -T} -\*(FC\-f \*(FIprog-file\*(FR T{ -read the AWK program source from the file -\*(FIprog-file\*(FR, instead of from the first command -line argument. Multiple \*(FC\-f\*(FR options may be used. -T} -\*(FC\-\^\-\*(FR signal the end of options. -.TE -.sp .5 -.fi -\*(CLThe following options are accepted by both \*(NK and \*(GK -\*(CR(ignored by \*(GK, not in \*(MK).\*(CL -.sp .5 -.nf -.TS -expand, tab(%); -l lw(2.2i). -\*(FC\-mf \*(FIval\*(FR%set the maximum number of fields to \*(FIval\fP -\*(FC\-mr \*(FIval\*(FR%set the maximum record size to \*(FIval\fP\*(CX -.TE -.EB "\s+2\f(HBCOMMAND LINE ARGUMENTS (standard)\*(FR\s0" - -.\" --- Bug Reports -.ES -.fi -\*(CDIf you find a bug in this reference card, please report it via electronic -mail to \*(FCbug-gawk@gnu.org\*(FR.\*(CX -.EB "\s+2\f(HBBUG REPORTS\*(FR\s0" - -.BT - -.\" -.\" -.\" --- Command Line Arguments (gawk) -.ES -.fi -\*(CDThe following options are specific to \*(GK. -You may also use ``\*(FC\-W \*(FIoption\*(FR'' -for full POSIX compliance. -Long options may abbreviated as long as the abbreviation -remains unique. -.sp .5 -.ig -.\" This option is left undocumented, on purpose. -\*(FC\-\^\-nostalgia\*(FR%T{ -provide a moment of nostalgia for -long time \*(AK users. -T} -.. -.TS -expand, tab(%); -l lw(1.3i). -\*(FC\-\^\-assign \*(FIvar\*(FC\^=\^\*(FIval\*(FR%just like \*(FC\-v\fP. -\*(FC\-\^\-field-separator \*(FIfs\*(FR%just like \*(FC\-F\fP. -\*(FC\-\^\-file \*(FIprog-file%\*(FRjust like \*(FC\-f\fP. -.TE -.TS -expand, tab(%); -ls -l lw(2.2i). -\*(FC\-\^\-compat\*(FR, \*(FC\-\^\-traditional\*(FR -%T{ -disable \*(GK-specific extensions -(the use of \*(FC\-\^\-traditional\*(FR is preferred). -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-copyleft\*(FR, \*(FC\-\^\-copyright\*(FR -%T{ -print the short version of the GNU -copyright information on \*(FCstdout\*(FR. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-dump-variables\*(FR[\*(FC=\*(FIfile\*(FR] -%T{ -Print a sorted list of global variables, -their types and final values to -\*(FIfile\*(FR. -If no \*(FIfile\*(FR -is provided, \*(FCgawk\*(FR -uses \*(FCawkvars.out\*(FR. -T} -\*(FC\-\^\-gen\-po\*(FR%T{ -process the program and print a GNU \*(FCgettext\*(FR -format \*(FC\&.po\*(FR format file on standard output, -containing the text of all strings that were marked -for localization. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-help\*(FR, \*(FC\-\^\-usage\*(FR -%T{ -print a short summary of the available -options on \*(FCstdout\*(FR, then exit zero. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-lint\*(FR[\*(FC=fatal\*(FR] -%T{ -warn about constructs that are dubious -or non-portable to other \*(AKs. -With an optional argument of \*(FCfatal\*(FR, -lint warnings become fatal errors. -T} -.T& -l lw(2.2i). -\*(FC\-\^\-lint\-old\*(FR%T{ -warn about constructs that are not -portable to the original version of -Unix \*(AK. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-non\-decimal\-data\*(FR -%T{ -recognize octal and hexadecimal values in input data. -\*(FIUse this option with great caution!\*(FR -T} -.T& -l lw(2.2i). -\*(FC\-\^\-posix\*(FR%T{ -disable common and GNU extensions. -Enable \*(FIinterval expressions\*(FR in regular -expression matching (see \fHRegular -Expressions\fP below). -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-profile\*(FR[\*(FC=\*(FIprof_file\*(FR] -%T{ -send profiling data to \*(FIprof_file\*(FR -(default: \*(FCawkprof.out\*(FR). -With \*(FIgawk\*(FR, -the profile is just a ``pretty printed'' version of the program. -With \*(FIpgawk\*(FR, -the profile contains execution counts in the left margin -of each statement in the program. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-re\-interval\*(FR -%T{ -enable \*(FIinterval expressions\*(FR in regular -expression matching (see \fHRegular -Expressions\fP below). Useful if -\*(FC\-\^\-posix\*(FR is not specified. -T} -.T& -ls -l lw(2.2i). -\*(FC\-\^\-source '\*(FItext\*(FC'\*(FR -%use \*(FItext\*(FR as AWK program source code. -\*(FC\-\^\-version\*(FR%T{ -print version information on \*(FCstdout\fP -and exit zero. -T} -.TE -.sp .5 -.fi -In compatibility mode, -any other options are flagged as invalid, but are otherwise ignored. -In normal operation, as long as program text has been supplied, unknown -options are passed on to the AWK program in -\*(FCARGV\*(FR -for processing. This is most useful for running AWK -programs via the \*(FC#!\*(FR executable interpreter mechanism. -.sp .5 -\*(FIpgawk\fP accepts two signals. -\*(FCSIGUSR1\fP causes it to dump a profile and function call stack to the -profile file. It then continues to run. -\*(FCSIGHUP\fP -causes it to dump the profile and function call stack and then exit.\*(CB -.EB "\s+2\f(HBCOMMAND LINE ARGUMENTS (\*(GK\f(HB)\*(FR\s0" - -.BT - -.\" -.\" -.\" --- Command Line Arguments (mawk) -.ES -.fi -\*(CDThe following options are specific to \*(MK. -.sp .5 -.fi -.TS -expand; -l lw(1.8i). -\*(FC\-W dump\*(FR T{ -print an assembly listing of the program to -\*(FCstdout\fP and exit zero. -T} -\*(FC\-W exec \*(FIfile\*(FR T{ -read program text from \*(FIfile\fP. No other -options are processed. Useful with \*(FC#!\fP. -T} -\*(FC\-W interactive\*(FR T{ -unbuffer \*(FCstdout\fP and line buffer \*(FCstdin\fP. -Lines are always records, ignoring \*(FCRS\fP. -T} -\*(FC\-W posix_space\*(FR T{ -\*(FC\en\*(FR separates fields when \*(FCRS = "\^"\fP. -T} -\*(FC\-W sprintf=\*(FInum\*(FR T{ -adjust the size of \*(MK's internal -\*(FCsprintf\*(FR buffer. -T} -\*(FC\-W version\*(FR T{ -print version and copyright on -\*(FCstdout\fP and limit information on \*(FCstderr\fP -and exit zero. -T} -.TE -.sp .5 -.fi -The options may be abbreviated using just the first letter, e.g., -\*(FC\-We\*(FR, -\*(FC\-Wv\*(FR -and so on.\*(CB -.EB "\s+2\f(HBCOMMAND LINE ARGUMENTS (\*(MK\f(HB)\*(FR\s0" - -.\" --- Awk Program Execution -.ES -.fi -\*(CDAWK programs are a sequence of pattern-action statements -and optional function definitions. -.sp .5 - \*(FIpattern\*(FC { \*(FIaction statements\*(FC }\*(FR -.br - \*(FCfunction \*(FIname\*(FC(\*(FIparameter list\*(FC) { \*(FIstatements\*(FC }\*(FR -.sp .5 -\*(AK first reads the program source from the -\*(FIprog-file\*(FR(s), if specified, -\*(CBfrom arguments to \*(FC\-\^\-source\*(FR,\*(CD -or from the first non-option argument on the command line. -The program text is read as if all the \*(FIprog-file\*(FR(s) -\*(CBand command line -source texts\*(CD had been concatenated. -.sp .5 -AWK programs execute in the following order. -First, all variable assignments specified via the \*(FC\-v\fP -option are performed. -Next, \*(AK executes the code in the -\*(FCBEGIN\fP rules(s), if any, and then proceeds to read -the files \*(FC1\fP through \*(FCARGC \- 1\fP in the \*(FCARGV\fP array. -(Adjusting \*(FCARGC\fP and \*(FCARGV\fP thus provides control over -the input files that will be processed.) -If there are no files named on the command line, -\*(AK reads the standard input. -.sp .5 -If a command line argument has the form -\*(FIvar\*(FC=\*(FIval\*(FR, -it is treated as a variable assignment. The variable -\*(FIvar\fP will be assigned the value \*(FIval\*(FR. -(This happens after any \*(FCBEGIN\fP rule(s) have been run.) -... delete this paragraph if no space -Command line variable assignment -is most useful for dynamically assigning values to the variables -\*(AK uses to control how input is broken into fields and records. It -is also useful for controlling state if multiple passes are needed over -a single data file. -.sp .5 -If the value of a particular element of \*(FCARGV\fP is empty -(\*(FC"\^"\*(FR), \*(AK skips over it. -.sp .5 -For each record in the input, \*(AK tests to see if it matches any -\*(FIpattern\fP in the AWK program. -For each pattern that the record matches, the associated -\*(FIaction\fP is executed. -The patterns are tested in the order they occur in the program. -.sp .5 -Finally, after all the input is exhausted, -\*(AK executes the code in the \*(FCEND\fP rule(s), if any. -.sp .5 -If a program only has a \*(FCBEGIN\fP rule, no input files are processed. -If a program only has an \*(FCEND\fP rule, the input will be read. -\*(CX -.EB "\s+2\f(HBAWK PROGRAM EXECUTION\*(FR\s0" - - -.BT - -.\" --- Lines And Statements -.ES -.fi -\*(CDAWK is a line-oriented language. The pattern comes first, and then the -action. Action statements are enclosed in \*(FC{\fP and \*(FC}\*(FR. -Either the pattern or the action may be missing, but -not both. If the pattern is missing, the action is -executed for every input record. -A missing action is equivalent to -.sp .5 - \*(FC{ print }\fP -.sp .5 -which prints the entire record. -.sp .5 -Comments begin with the \*(FC#\*(FR character, and continue until the -end of the line. -Normally, a statement ends with a newline, but lines ending in -a ``,'', -\*(FC{\*(FR, -\*(CB\*(FC?\*(FR, -\*(FC:\*(FR,\*(CD -\*(FC&&\*(FR -or -\*(FC||\*(FR -are automatically continued. -Lines ending in \*(FCdo\fP or \*(FCelse\fP -also have their statements automatically continued on the following line. -In other cases, a line can be continued by ending it with a ``\e'', -in which case the newline is ignored. However, a ``\e'' after a -\*(FC#\*(FR is not special. -.sp .5 -Multiple statements may be put on one line by separating them with a ``;''. -This applies to both the statements within the action part of a -pattern-action pair (the usual case) -and to the pattern-action statements themselves.\*(CX -.EB "\s+2\f(HBLINES AND STATEMENTS\*(FR\s0" - - - -.\" --- Regular Expressions -.ES -.fi -\*(CDRegular expressions are the extended kind originally defined by -\*(FCegrep\fP. -\*(CBAdditional GNU regexp operators are supported by \*(GK. -A \*(FIword-constituent\fP character is a letter, digit, or -underscore (\*(FC_\fP).\*(CD -.sp .5 -.TS -center, tab(~); -cp8 sp8 -cp8 sp8 -lp8|lp8. -.\" .vs 10 -_ -Summary of Regular Expressions -In Decreasing Precedence -_ -\*(FC(\^\*(FIr\*(FC)\*(FR~regular expression (for grouping) -\*(FIc\*(FR~if non-special char, matches itself -\*(FC\e\*(FI\^c\*(FR~turn off special meaning of \*(FIc\fP -\*(FC^\*(FR~beginning of string (note: \*(FInot\fP line) -\*(FC$\*(FR~end of string (note: \*(FInot\fP line) -\*(FC.\*(FR~any single character, including newline -\*(FC[\*(FR...\*(FC]\*(FR~any one character in ... or range -\*(FC[^\*(FR...\*(FC]\*(FR~any one character not in ... or range -\*(CB\*(FC\ey\*(FR~word boundary -\*(FC\eB\*(FR~middle of a word -\*(FC\e<\*(FR~beginning of a word -\*(FC\e>\*(FR~end of a word -\*(FC\ew\*(FR~any word-constituent character -\*(FC\eW\*(FR~any non-word-constituent character -\*(FC\e`\*(FR~beginning of a string -\*(FC\e'\*(FR~end of a string\*(CD -\*(FIr\*(FC*\*(FR~zero or more occurrences of \*(FIr\*(FR -\*(FIr\*(FC+\*(FR~one or more occurrences of \*(FIr\*(FR -\*(FIr\*(FC?\*(FR~zero or one occurrences of \*(FIr\*(FR -\*(FIr\*(FC{\*(FIn\*(FC,\*(FIm\*(FC}\*(FR~\*(FIn\fP to \*(FIm\fP occurrences of \*(FIr\*(FR \*(CR(POSIX: see note below)\*(CD -\*(FIr1\*(FC|\|\*(FIr2\*(FR~\*(FIr1\*(FR or \*(FIr2\*(FR -.TE -.sp .5 -.fi -\*(CRThe \*(FIr\*(FC{\*(FIn\*(FC,\*(FIm\*(FC}\*(FR notation is called an -\*(FIinterval expression\fP. POSIX mandates it for AWK regexps, but -most \*(AKs don't implement it. \*(CBUse \*(FC\-\^\-re\-interval\*(FR -or \*(FC\-\^\-posix\*(FR to enable -this feature in \*(GK.\*(CX -.EB "\s+2\f(HBREGULAR EXPRESSIONS\*(FR\s0" - - -.BT - -.\" --- POSIX Character Classes (gawk) -.ES -.fi -\*(CDIn regular expressions, within character ranges -(\*(FC[\*(FR...\*(FC]\*(FR), -the notation \*(FC[[:\*(FIclass\*(FC:]]\*(FR defines character classes: -.sp .5 -.TS -center, tab(~); -lp8 lp8 lp8 lp8. -\*(FCalnum\*(FR~alphanumeric~\*(FClower\*(FR~lower-case -\*(FCalpha\*(FR~alphabetic~\*(FCprint\*(FR~printable -\*(FCblank\*(FR~space or tab~\*(FCpunct\*(FR~punctuation -\*(FCcntrl\*(FR~control~\*(FCspace\*(FR~whitespace -\*(FCdigit\*(FR~decimal~\*(FCupper\*(FR~upper-case -\*(FCgraph\*(FR~non-spaces~\*(FCxdigit\*(FR~hexadecimal -.TE -.fi -.sp .5 -Recognition of these character classes is disabled -when \*(FC\-\-traditional\*(FR is supplied.\*(CB -.EB "\s+2\f(HBPOSIX CHARACTER CLASSES (\*(GK\f(HB)\*(FR\s0" - -.\" --- Records -.ES -.fi -\*(CDNormally, records are separated by newline characters. -Assigning values to the built-in variable \*(FCRS\*(FR -controls how records are separated. -If \*(FCRS\fP is any single character, that character separates records. -\*(CLOtherwise, \*(FCRS\fP is a regular expression. -\*(CR(Not \*(NK.)\*(CL -Text in the input that matches this -regular expression separates the record. -\*(CB\*(GK sets \*(FCRT\*(FR to the value of the -input text that matched the regular expression. -The value of \*(FCIGNORECASE\fP -also affects how records are separated when -\*(FCRS\fP is a regular expression.\*(CD -If \*(FCRS\fP is set to the null string, -then records are separated by one or more blank lines. -When \*(FCRS\fP is set to the null string, -the newline character always acts as -a field separator, in addition to whatever value -\*(FCFS\fP may have. -\*(CB\*(MK does not apply exceptional rules to \*(FCFS\fP -when \*(FCRS = "\^"\fP.\*(CX -.EB "\s+2\f(HBRECORDS\*(FR\s0" - -.\" --- Fields -.ES -.fi -\*(CDAs each input record is read, \*(AK splits the record into -\*(FIfields\*(FR, using the value of the \*(FCFS\fP -variable as the field separator. -If \*(FCFS\fP is a single character, -fields are separated by that character. -\*(CLIf \*(FCFS\fP is the null string, -then each individual character becomes a separate field.\*(CD -Otherwise, \*(FCFS\fP is expected to be a full regular expression. -In the special case that \*(FCFS\fP -is a single space, fields are separated -by runs of spaces and/or tabs -\*(CLand/or newlines\*(CD. -Leading and trailing whitespace are ignored. -\*(CBThe value of \*(FCIGNORECASE\fP -also affects how fields are split when -\*(FCFS\fP is a regular expression.\*(CD -.sp .5 -\*(CBIf the \*(FCFIELDWIDTHS\fP -variable is set to a space-separated list of numbers, each field is -expected to have a fixed width, and \*(GK -splits up the record using the specified widths. -The value of \*(FCFS\fP is ignored. -Assigning a new value to \*(FCFS\fP -overrides the use of \*(FCFIELDWIDTHS\*(FR, -and restores the default behavior.\*(CD -.sp .5 -Each field in the input record may be referenced by its position, -\*(FC$1\*(FR, \*(FC$2\*(FR and so on. -\*(FC$0\fP is the whole record. -Fields may also be assigned new values. -.sp .5 -The variable \*(FCNF\fP -is set to the total number of fields in the input record. -.sp .5 -References to non-existent fields (i.e., fields after \*(FC$NF\*(FR) -produce the null-string. However, assigning to a non-existent field -(e.g., \*(FC$(NF+2) = 5\*(FR) increases the value of -\*(FCNF\*(FR, creates any intervening fields with the null string as their value, -and causes the value of \*(FC$0\fP -to be recomputed with the fields being separated by the -value of \*(FCOFS\*(FR. -References to negative numbered fields cause a fatal error. -Decreasing the value of \*(FCNF\fP causes the trailing fields to be lost -\*(CR(not \*(NK).\*(CX -.EB "\s+2\f(HBFIELDS\*(FR\s0" - -.BT - -.\" --- Pattern Elements -.ES -.fi -\*(CDAWK patterns may be one of the following. -.sp .5 -.nf - \*(FCBEGIN - END - \*(FIexpression - pat1\*(FC,\*(FIpat2\*(FR -.sp .5 -.fi -\*(FCBEGIN\fP and \*(FCEND\fP are special patterns that provide start-up -and clean-up actions respectively. They must have actions. There can -be multiple \*(FCBEGIN\fP and \*(FCEND\fP rules; they are merged and -executed as if there had just been one large rule. They may occur anywhere -in a program, including different source files. -.sp .5 -Expression patterns can be any expression, as described -under \fHExpressions\fP. -.sp .5 -The \*(FIpat1\*(FC,\*(FIpat2\*(FR pattern -is called a \*(FIrange pattern\*(FR. -It matches all input records starting with a record that matches -\*(FIpat1\*(FR, and continuing until a record that matches -\*(FIpat2\*(FR, inclusive. -It does not combine with any other pattern expression.\*(CX -.EB "\s+2\f(HBPATTERN ELEMENTS\*(FR\s0" - - -.\" --- Action Statements -.ES -.fi -.in +.2i -.ti -.2i -\*(CD\*(FCbreak\*(FR -.br -break out of the nearest enclosing \*(FCdo\*(FR, \*(FCfor\*(FR, -or \*(FCwhile\*(FR loop. -.ti -.2i -\*(FCcontinue\*(FR -.br -skip the rest of the loop body. -Evaluate the \*(FIcondition\*(FR -part of the nearest enclosing \*(FCdo\*(FR or \*(FCwhile\*(FR loop, -or go to the \*(FIincr\*(FR part of a \*(FCfor\*(FR loop. -.ti -.2i -\*(FCdelete \*(FIarray\^\*(FC[\^\*(FIindex\^\*(FC]\*(FR -.br -delete element \*(FIindex\*(FR from array \*(FIarray\*(FR. -.ti -.2i -\*(CL\*(FCdelete \*(FIarray\^\*(FR -.br -delete all elements from array \*(FIarray\*(FR.\*(CD -.ti -.2i -\*(FCdo \*(FIstatement \*(FCwhile (\*(FIcondition\*(FC)\*(FR -.br -execute \*(FIstatement\*(FR while \*(FIcondition\*(FR is true. -The \*(FIstatement\*(FR is always executed at least once. -.ti -.2i -\*(FCexit\*(FR [ \*(FIexpression\*(FR ] -.br -terminate input record processing. -Execute the \*(FCEND\*(FR rule(s) if present. -If present, \*(FIexpression\*(FR becomes \*(AK's return value. -.ti -.2i -\*(FCfor (\*(FIinit\*(FC; \*(FIcond\*(FC; \*(FIincr\*(FC) \*(FIstatement\*(FR -.br -execute \*(FIinit\*(FR. -Evaluate \*(FIcond\*(FR. -If it is true, execute \*(FIstatement\*(FR. -Execute \*(FIincr\*(FR before going back to the top to -re-evaluate \*(FIcond\*(FR. -Any of the three may be omitted. -A missing \*(FIcond\*(FR is considered to be true. -.ti -.2i -\*(FCfor (\*(FIvar \*(FCin\*(FI array\*(FC) \*(FIstatement\*(FR -.br -execute \*(FIstatement\*(FR once for each subscript in \*(FIarray\*(FR, -with \*(FIvar\*(FR set to a different subscript each time through -the loop. -.ti -.2i -\*(CD\*(FCif (\*(FIcondition\*(FC) \*(FIstatement1\*(FR [ \*(FCelse\*(FI statement2\*(FR ] -.br -if \*(FIcondition\*(FR is true, execute \*(FIstatement1\*(FR, -otherwise execute \*(FIstatement2\*(FR. Each \*(FCelse\*(FR -matches the closest \*(FCif\*(FR. -.ti -.2i -\*(FCnext\*(FR see \fHInput Control.\fP -.ti -.2i -\*(CL\*(FCnextfile\*(FR \*(CR(not \*(MK) \*(CLsee \fHInput Control.\fP\*(CD -.ti -.2i -\*(FCwhile (\*(FIcondition\*(FC) \*(FIstatement \*(FR -.br -while \*(FIcondition\*(FR is true, execute \*(FIstatement\*(FR. -.ti -.2i -\*(FC{ \*(FIstatements \*(FC}\*(FR -.br -a list of statements enclosed in braces can be used anywhere -that a single statement would otherwise be used.\*(CX -.in -.2i -.EB "\s+2\f(HBACTION STATEMENTS\*(FR\s0" - - -.BT - -.\" --- Escape Sequences -.ES -.fi -\*(CDWithin strings constants (\*(FC"..."\fP) and regexp -constants (\*(FC/.../\fP), escape sequences may be used to -generate otherwise unprintable characters. This table lists -the available escape sequences. -.sp .5 -.TS -center, tab(~); -lp8 lp8 lp8 lp8. -\*(FC\ea\fP~alert (bell)~\*(FC\er\fP~carriage return -\*(FC\eb\fP~backspace~\*(FC\et\fP~horizontal tab -\*(FC\ef\fP~form feed~\*(FC\ev\fP~vertical tab -\*(FC\en\fP~newline~\*(FC\e\e\fP~backslash -\*(FC\e\*(FIddd\*(FR~octal value \*(FIddd\fP~\*(CL\*(FC\ex\*(FIhh\*(FR~hex value \*(FIhh\fP\*(CD -\*(FC\e"\fP~double quote~\*(FC\e/\fP~forward slash\*(CX -.TE -.EB "\s+2\f(HBESCAPE SEQUENCES\*(FR\s0" -.sp .7 -.\" --- Variables -.ES -.fi -.TS -expand; -l lw(2i). -\*(CD\*(FCARGC\fP T{ -number of command line arguments. -T} -\*(CB\*(FCARGIND\fP T{ -index in \*(FCARGV\fP of current data file.\*(CD -T} -\*(FCARGV\fP T{ -array of command line arguments. Indexed from -0 to \*(FCARGC\fP \- 1. Dynamically changing the -contents of \*(FCARGV\fP can control the files used -for data. -T} -\*(CL\*(FCBINMODE\fP T{ -controls ``binary'' mode for all file I/O. Values of 1, 2, or 3, -indicate input, output, or all files, respectively, should use binary -I/O. \*(CR(Not \*(NK.) \*(CLApplies only to non-POSIX systems. -\*(CBFor \*(GK, string values of \*(FC"r"\fP, or \*(FC"w"\fP specify -that input files, or output files, respectively, should use binary I/O. -String values of \*(FC"rw"\fP or \*(FC"wr"\fP specify that all files -should use binary I/O. Any other string value is treated as \*(FC"rw"\fP, -but generates a warning message.\*(CD -T} -\*(FCCONVFMT\fP T{ -conversion format for numbers, default value -is \*(FC"%.6g"\*(FR. -T} -\*(FCENVIRON\fP T{ -array containing the current environment. -The array is indexed by the environment -variables, each element being the value of -that variable. -T} -\*(CB\*(FCERRNO\fP T{ -string describing the error if a -\*(FCgetline\*(FR -redirection or read -fails, or if -\*(FCclose()\*(FR fails. -T} -\*(FCFIELDWIDTHS\fP T{ -white-space separated list of fieldwidths. Used -to parse the input into fields of fixed width, -instead of the value of \*(FCFS\fP.\*(CD -T} -\*(FCFILENAME\fP T{ -name of the current input file. If no files given -on the command line, \*(FCFILENAME\fP is ``\-''. -\*(FCFILENAME\fP is undefined inside the \*(FCBEGIN\fP rule -(unless set by \*(FCgetline\fP). -T} -\*(FCFNR\fP T{ -record number in current input file. -T} -\*(FCFS\fP T{ -input field separator, a space by default -(see \fHFields\fP above). -T} -\*(CB\*(FCIGNORECASE\fP T{ -if non-zero, all regular expression and string -operations ignore case. -Array subscripting and \*(FCasort()\*(FR are \*(FInot\*(FR affected. -T} -\*(CB\*(FCLINT\fP T{ -provides dynamic control of the \*(FC\-\^\-lint\fP -option from within an AWK program. -When true, \*(GK -prints lint warnings. -When assigned the string value \*(FC"fatal"\*(FR, -lint warnings become fatal errors, exactly like -\*(FC\-\-lint=fatal\*(FR. -Any other true value just prints warnings.\*(CD -T} -\*(FCNF\fP T{ -number of fields in the current input record. -T} -\*(FCNR\fP T{ -total number of input records seen so far. -T} -\*(FCOFMT\fP T{ -output format for numbers, \*(FC"%.6g"\*(FR, by default. -\*(CROld versions of \*(AK used this for number -to string conversion.\*(CX -T} -.TE -.EB "\s+2\f(HBVARIABLES\*(FR\s0" -.BT - -.\" --- Variables (continued) -.ES -.fi -.TS -expand; -l lw(2i). -\*(CD\*(FCOFS\fP T{ -output field separator, a space by default. -T} -\*(FCORS\fP T{ -output record separator, a newline by default. -T} -\*(CB\*(FCPROCINFO\fP T{ -elements of this array provide access to info -about the running AWK program. See -\*(AM for details.\*(CD -T} -\*(FCRLENGTH\fP T{ -length of the string matched by \*(FCmatch()\*(FR; -\-1 if no match. -T} -\*(FCRS\fP T{ -input record separator, a newline by default -(see \fHRecords\fP above). -T} -\*(FCRSTART\fP T{ -index of the first character matched by -\*(FCmatch()\*(FR; 0 if no match. -T} -\*(CB\*(FCRT\fP T{ -record terminator. \*(GK sets \*(FCRT\fP to the input -text that matched the character or regular -expression specified by \*(FCRS\*(FR.\*(CD -T} -\*(FCSUBSEP\fP T{ -character(s) used to separate multiple subscripts -in array elements, by default \*(FC"\e034"\*(FR. (See -\fHArrays\fP below). -T} -\*(CB\*(FCTEXTDOMAIN\fP T{ -the application's text domain for internationalization; -used to find the localized -translations for the program's strings.\*(CX -T} -.TE -.EB "\s+2\f(HBVARIABLES (continued)\*(FR\s0" - -.\" --- Conversions and Comparisons -.ES -.fi -\*(CDVariables and fields may be (floating point) numbers, strings or both. -Context determines how the value of a variable is interpreted. If used in -a numeric expression, it will be treated as a number, if used as a string -it will be treated as a string. -.sp .5 -To force a variable to be treated as a number, add 0 to it; to force it -to be treated as a string, concatenate it with the null string. -.sp .5 -When a string must be converted to a number, the conversion is accomplished -using \*(FIstrtod\*(FR(3). -A number is converted to a string by using the value of \*(FCCONVFMT\fP -as a format string for \*(FIsprintf\*(FR(3), -with the numeric value of the variable as the argument. -However, even though all numbers in AWK are floating-point, -integral values are \*(FIalways\fP converted as integers. -.sp .5 -Comparisons are performed as follows: -If two variables are numeric, they are compared numerically. -If one value is numeric and the other has a string value that is a -``numeric string,'' then comparisons are also done numerically. -Otherwise, the numeric value is converted to a string, and a string -comparison is performed. -Two strings are compared, of course, as strings. -.sp .5 -Note that string constants, such as \*(FC"57"\fP, are \*(FInot\fP -numeric strings, they are string constants. The idea of ``numeric string'' -only applies to fields, \*(FCgetline\fP input, -\*(FCFILENAME\*(FR, \*(FCARGV\fP elements, \*(FCENVIRON\fP -elements and the elements of an array created by -\*(FCsplit()\fP that are numeric strings. -The basic idea is that \*(FIuser input\*(FR, -and only user input, that looks numeric, -should be treated that way. -\*(CRNote that the POSIX standard applies the concept of -``numeric string'' everywhere, even to string constants. -However, this is -clearly incorrect, and none of the three free \*(AK\*(FRs do this.\*(CD -(Fortunately, this is fixed in the next version of the standard.) -.sp .5 -Uninitialized variables have the numeric value 0 and the string value -\*(FC"\^"\fP -(the null, or empty, string).\*(CX -.EB "\s+2\f(HBCONVERSIONS AND COMPARISONS\*(FR\s0" - -.BT - -.ES -\*(CX -.sp 61 -.EB "\s+2\f(HBNOTES\*(FR\s0" - -.BT - -.\" --- Arrays -.ES -.fi -\*(CDAn array subscript is an expression between square brackets -(\*(FC[ \*(FRand \*(FC]\*(FR). -If the expression is a list -(\*(FIexpr\*(FC, \*(FIexpr \*(FR...), -then the subscript is a string consisting of the -concatenation of the (string) value of each expression, -separated by the value of the \*(FCSUBSEP\fP variable. -This simulates multi-dimensional -arrays. For example: -.nf -.sp .5 - \*(FCi = "A";\^ j = "B";\^ k = "C" - x[i, j, k] = "hello, world\en"\*(FR -.sp .5 -.fi -assigns \*(FC"hello, world\en"\*(FR to the element of the array -\*(FCx\fP -indexed by the string \*(FC"A\e034B\e034C"\*(FR. All arrays in AWK -are associative, i.e., indexed by string values. -.sp .5 -Use the special operator \*(FCin\fP in an \*(FCif\fP -or \*(FCwhile\fP statement to see if a particular value is -an array index. -.sp .5 -.nf - \*(FCif (val in array) - print array[val]\*(FR -.sp .5 -.fi -If the array has multiple subscripts, use -\*(FC(i, j) in array\*(FR. -.sp .5 -Use the \*(FCin\fP construct in a \*(FCfor\fP -loop to iterate over all the elements of an array. -.sp .5 -Use the \*(FCdelete\fP statement to delete an -element from an array. -\*(CLSpecifying just the array name without a subscript in -the \*(FCdelete\fP -statement deletes the entire contents of an array.\*(CX -.EB "\s+2\f(HBARRAYS\*(FR\s0" - -.\" --- Expressions -.ES -.fi -\*(CDExpressions are used as patterns, for controlling conditional action -statements, and to produce parameter values when calling functions. -Expressions may also be used as simple statements, -particularly if they have side-effects such as assignment. -Expressions mix \*(FIoperands\fP and \*(FIoperators\fP. Operands are -constants, fields, variables, array elements, and the return -values from function calls (both built-in and user-defined). -.sp .5 -Regexp constants (\*(FC/\*(FIpat\*(FC/\*(FR), when used as simple expressions, -i.e., not used on the right-hand side of -\*(FC~\fP and \*(FC!~\fP, or as arguments to the -\*(CB\*(FCgensub()\fP,\*(CD -\*(FCgsub()\fP, -\*(FCmatch()\fP, -\*(FCsplit()\fP, -and -\*(FCsub()\fP, -functions, mean \*(FC$0 ~ /\*(FIpat\*(FC/\*(FR. -.sp .5 -The AWK operators, in order of decreasing precedence, are: -.sp .5 -.fi -.TS -expand; -l lw(1.8i). -\*(FC(\&...)\*(FR grouping -\*(FC$\fP field reference -\*(FC++ \-\^\-\fP T{ -increment and decrement, -prefix and postfix -T} -\*(FC^\fP \*(CL\*(FC**\*(FR\*(CD exponentiation -\*(FC+ \- !\fP unary plus, unary minus, and logical negation -\*(FC* / %\fP multiplication, division, and modulus -\*(FC+ \-\fP addition and subtraction -\*(FIspace\fP string concatenation -\*(FC< >\fP less than, greater than -\*(FC<= >=\fP less than or equal, greater than or equal -\*(FC!= ==\fP not equal, equal -\*(FC~ !~\fP regular expression match, negated match -\*(FCin\fP array membership -\*(FC&&\fP logical AND, short circuit -\*(FC||\fP logical OR, short circuit -\*(FC?\^:\fP in-line conditional expression -.T& -l s -l lw(1.8i). -\*(FC=\0+=\0\-=\0*=\0/=\0%=\0^=\0\*(CL**=\*(CD\fP - assignment operators\*(CX -.TE -.EB "\s+2\f(HBEXPRESSIONS\*(FR\s0" - -.BT - -.\" --- Input Control -.ES -.fi -.TS -expand; -l lw(1.8i). -\*(FCgetline\fP T{ -set \*(FC$0\fP from next record; -set \*(FCNF\*(FR, \*(FCNR\*(FR, \*(FCFNR\*(FR. -T} -\*(FCgetline < \*(FIfile\*(FR set \*(FC$0\fP from next record of \*(FIfile\*(FR; set \*(FCNF\*(FR. -\*(FCgetline \*(FIv\*(FR T{ -set \*(FIv\fP from next input record; -set \*(FCNR\*(FR, \*(FCFNR\*(FR. -T} -\*(FCgetline \*(FIv \*(FC< \*(FIfile\*(FR set \*(FIv\fP from next record of \*(FIfile\*(FR. -\*(FIcmd \*(FC| getline\*(FR pipe into \*(FCgetline\*(FR; set \*(FC$0\*(FR, \*(FCNF\*(FR. -\*(FIcmd \*(FC| getline \*(FIv\*(FR pipe into \*(FCgetline\*(FR; set \*(FIv\*(FR. -\*(CB\*(FIcmd \*(FC|& getline\*(FR co-process pipe into \*(FCgetline\*(FR; set \*(FC$0\*(FR, \*(FCNF\*(FR. -.TE -.fi -.in +.2i -.ti -.2i -\*(FIcmd \*(FC|& getline \*(FIv\*(FR -.br -co-process pipe into \*(FCgetline\*(FR; set \*(FIv\*(FR. -.ti -.2i -\*(FCnext\fP -.br -stop processing the current input -record. Read next input record and -start over with the first pattern in the -program. Upon end of the input data, -execute any \*(FCEND\fP rule(s). -.br -.ti -.2i -\*(CL\*(FCnextfile\fP -.br -stop processing the current input file. -The next input record comes from the -next input file. \*(FCFILENAME\fP \*(CBand -\*(FCARGIND\fP\*(CL are updated, \*(FCFNR\fP is reset to 1, -and processing starts over with the first -pattern in the AWK program. Upon end -of input data, execute any \*(FCEND\fP rule(s). -\*(CREarlier versions of \*(GK used -\*(FCnext file\*(FR, as two words. -This usage is no longer supported. -\*(CR\*(MK does not currently support \*(FCnextfile\*(FR.\*(CD -.in -.2i -.sp .5 -.fi -\*(FCgetline\*(FR returns 0 on end of file and \-1 on an error. -\*(CBUpon an error, \*(FCERRNO\*(FR contains a string describing -the problem.\*(CX -.EB "\s+2\f(HBINPUT CONTROL\*(FR\s0" - -.\" --- Output Control -.ES -.fi -.in +.2i -.ti -.2i -\*(CL\*(FCfflush(\*(FR[\*(FIfile\^\*(FR]\*(FC)\*(FR -.br -flush any buffers associated -with the open output file or pipe \*(FIfile\*(FR.\*(CD -\*(CBIf no \*(FIfile\fP, then flush standard output. -If \*(FIfile\fP is null, then flush all open output files and pipes -\*(CR(not \*(NK)\*(CD. -.ti -.2i -\*(FCprint\fP -.br -print the current record. Terminate output record -with \*(FCORS\fP. -.ti -.2i -\*(FCprint \*(FIexpr-list\*(FR -.br -print expressions. Each expression is separated -by the value of \*(FCOFS\fP. Terminate the output record -with \*(FCORS\fP. -.ti -.2i -\*(FCprintf \*(FIfmt\*(FC, \*(FIexpr-list\*(FR -.br -format and print (see \fHPrintf Formats\fP below). -.ti -.2i -\*(FCsystem(\*(FIcmd\*(FC)\*(FR -.br -execute the command \*(FIcmd\*(FR, -and return the exit status -\*(CR(may not be available on non-POSIX systems)\*(CD. -.sp .5 -.in -.2i -I/O redirections may be used with both \*(FCprint\fP and \*(FCprintf\fP. -.sp .5 -.in +.2i -.ti -.2i -\*(CD\*(FCprint "hello" > \*(FIfile\*(FR -.br -print data to \*(FIfile\fP. The first time the file is written to, it -is truncated. Subsequent commands append data. -.ti -.2i -\*(FCprint "hello" >> \*(FIfile\*(FR -.br -append data to \*(FIfile\fP. The previous contents of \*(FIfile\*(FR are not lost. -.ti -.2i -\*(FCprint "hello" | \*(FIcmd\*(FR -.br -print data down a pipeline to \*(FIcmd\*(FR. -.ti -.2i -\*(CB\*(FCprint "hello" |& \*(FIcmd\*(FR -.br -print data down a pipeline to co-process \*(FIcmd\*(FR.\*(CX -.in -.2i -.EB "\s+2\f(HBOUTPUT CONTROL\*(FR\s0" - -.ES -.fi -.in +.2i -.ti -.2i -\*(CD\*(FCclose(\*(FIfile\*(FC)\*(FR -.br -close input or output file, pipe \*(CBor co-process.\*(CD -.ti -.2i -\*(CB\*(FCclose(\*(FIcommand\*(FC, \*(FIhow\*(FC)\*(FR -.br -close one end of co-process pipe. -Use \*(FC"to"\*(FR for the write end, or -\*(FC"from"\*(FR for the read end.\*(CD -.in -.2i -.sp .5 -On success, \*(FCclose()\*(FR returns zero for a file, or the exit status for a process. -It returns \-1 if \*(FIfile\*(FR -was never opened, or -if there was a system problem. -\*(CB\*(FCERRNO\*(FR describes -the error.\*(CX -.EB "\s+2\f(HBCLOSING REDIRECTIONS\*(FR\s0" - -.BT - -.\" --- Printf Formats -.ES -.fi -\*(CDThe \*(FCprintf\fP statement and -\*(FCsprintf()\fP function -accept the following conversion specification formats: -.sp .5 -.nf -\*(FC%c\fP an \s-1ASCII\s+1 character -\*(FC%d\fP a decimal number (the integer part) -\*(FC%i\fP a decimal number (the integer part) -\*(FC%e\fP a floating point number of the form - \*(FC[\-]d.dddddde[+\^\-]dd\*(FR -\*(FC%E\fP like \*(FC%e\fP, but use \*(FCE\fP instead of \*(FCe\*(FR -\*(FC%f\fP a floating point number of the form - \*(FC[\-]ddd.dddddd\*(FR -\*(FC%g\fP use \*(FC%e\fP or \*(FC%f\fP, whichever is shorter, with - nonsignificant zeros suppressed -\*(FC%G\fP like \*(FC%g\fP, but use \*(FC%E\fP instead of \*(FC%e\*(FR -\*(FC%o\fP an unsigned octal integer -\*(FC%u\fP an unsigned decimal integer -\*(FC%s\fP a character string -\*(FC%x\fP an unsigned hexadecimal integer -\*(FC%X\fP like \*(FC%x\fP, but use \*(FCABCDEF\fP for 10\(en15 -\*(FC%%\fP A literal \*(FC%\fP; no argument is converted -.sp .5 -.fi -Optional, additional parameters may lie between the \*(FC%\fP -and the control letter: -.sp .5 -.TS -expand; -l lw(2.2i). -\*(CB\*(FIcount\*(FC$\*(FR T{ -use the -\*(FIcount\*(FR'th -argument at this point in the formatting -(a \*(FIpositional specifier\*(FR). -Use in translated versions of -format strings, not in the original text of an AWK program.\*(CD -T} -\*(FC\-\fP T{ -left-justify the expression within its field. -T} -\*(FIspace\fP T{ -for numeric conversions, prefix positive values -with a space and negative values with a -minus sign. -T} -\*(FC+\fP T{ -used before the \*(FIwidth\fP modifier means to always -supply a sign for numeric conversions, even if -the data to be formatted is positive. The \*(FC+\fP -overrides the space modifier. -T} -\*(FC#\fP T{ -use an ``alternate form'' for some control letters. -T} - \*(FC%o\*(FR T{ -supply a leading zero. -T} - \*(FC%x\*(FR, \*(FC%X\*(FR T{ -supply a leading \*(FC0x\*(FR or \*(FC0X\*(FR for a nonzero result. -T} - \*(FC%e\*(FR, \*(FC%E\*(FR, \*(FC%f\*(FR T{ -the result always has a decimal point. -T} - \*(FC%g\*(FR, \*(FC%G\*(FR T{ -trailing zeros are not removed. -T} -\*(FC0\fP T{ -a leading zero acts as a flag, indicating output -should be padded with zeros instead of spaces. -This applies even to non-numeric output formats. -Only has an effect when the field width is wider -than the value to be printed. -T} -\*(FIwidth\fP T{ -pad the field to this width. The field is normally -padded with spaces. If the \*(FC0\fP flag has been used, -pad with zeros. -T} -\*(FC.\*(FIprec\*(FR T{ -precision. -The meaning of the \*(FIprec\*(FR varies by control letter: -T} - \*(FC%d\*(FR, \*(FC%o\*(FR, \*(FC%i\*(FR, - \*(FC%u\*(FR, \*(FC%x\*(FR, \*(FC%X\fP T{ -the minimum number of digits to print. -T} - \*(FC%e\*(FR, \*(FC%E\*(FR, \*(FC%f\*(FR T{ -the number of digits to print to the right of the decimal point. -T} - \*(FC%g\*(FR, \*(FC%G\fP T{ -the maximum number of significant digits. -T} - \*(FC%s\fP T{ -the maximum number of characters to print. -T} -.TE -.sp .5 -.fi -The dynamic \*(FIwidth\fP and \*(FIprec\fP capabilities of the ANSI C -\*(FCprintf()\fP routines are supported. -A \*(FC*\fP in place of either the \*(FIwidth\fP or \*(FIprec\fP -specifications causes their values to be taken from -the argument list to \*(FCprintf\fP or \*(FCsprintf()\*(FR. -\*(CBUse \*(FC*\*(FIn\*(FC$\*(FR to use positional specifiers -with a dynamic width or precision.\*(CX -.EB "\s+2\f(HBPRINTF FORMATS\*(FR\s0" - - -.BT - -.\" --- Special Filenames -.ES -.fi -\*(CDWhen doing I/O redirection from either \*(FCprint\fP -or \*(FCprintf\fP into a file or via \*(FCgetline\fP -from a file, all three implementations of \*(FCawk\fP -recognize certain special filenames internally. These filenames -allow access to open file descriptors inherited from the -parent process (usually the shell). -These filenames may also be used on the command line to name data files. -The filenames are: -.sp .5 -.TS -expand; -l lw(2i). -\*(FC"\-"\fP standard input -\*(FC/dev/stdin\fP standard input \*(CR(not \*(MK)\*(CD -\*(FC/dev/stdout\fP standard output -\*(FC/dev/stderr\fP standard error output -.TE -.sp .5 -.fi -\*(CBThe following names are specific to \*(GK. -.sp .5 -.in +.2i -.ti -.2i -\*(FC/dev/fd/\^\*(FIn\*(FR -.br -File associated with the open file descriptor \*(FIn\*(FR. -.ti -.2i -\*(FC/inet/tcp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR -.br -File for TCP/IP connection on local port \*(FIlport\*(FR to -remote host \*(FIrhost\*(FR on remote port \*(FIrport\*(FR. -Use a port of \*(FC0\*(FR to have the system pick a port. -Usable only with the \*(FC|&\*(FR two-way I/O operator. -.ti -.2i -\*(FC/inet/udp/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR -.br -Similar, but use UDP/IP instead of TCP/IP. -.ti -.2i -\*(CR\*(FC/inet/raw/\*(FIlport\*(FC/\*(FIrhost\*(FC/\*(FIrport\*(FR -.br -.\" Similar, but use raw IP sockets. -Reserved for future use.\*(CB -.in -.2i -.sp .5 -.fi -Other special filenames provide access to information about the running -\*(FCgawk\fP process. -Reading from these files returns a single record. -The filenames and what they return are:\*(FR -.sp .5 -.TS -expand; -l lw(2i). -\*(FC/dev/pid\fP process ID of current process -\*(FC/dev/ppid\fP parent process ID of current process -\*(FC/dev/pgrpid\fP process group ID of current process -\*(FC/dev/user\fP T{ -.nf -a single newline-terminated record. -The fields are separated with spaces. -\*(FC$1\fP is the return value of \*(FIgetuid\*(FR(2), -\*(FC$2\fP is the return value of \*(FIgeteuid\*(FR(2), -\*(FC$3\fP is the return value of \*(FIgetgid\*(FR(2) , and -\*(FC$4\fP is the return value of \*(FIgetegid\*(FR(2). -.fi -Any additional fields are the group IDs returned -by \*(FIgetgroups\*(FR(2). Multiple groups may not be -supported on all systems. -T} -.TE -.sp .5 -.fi -\*(CRThese filenames are now obsolete. -Use the \*(FCPROCINFO\fP array to obtain the information they provide.\*(CL -.EB "\s+2\f(HBSPECIAL FILENAMES\*(FR\s0" - -.\" --- Builtin Numeric Functions -.ES -.fi -.TS -expand; -l lw(2i). -\*(CD\*(FCatan2(\*(FIy\*(FC, \*(FIx\*(FC)\*(FR the arctangent of \*(FIy/x\fP in radians. -\*(FCcos(\*(FIexpr\*(FC)\*(FR the cosine of \*(FIexpr\fP, which is in radians. -\*(FCexp(\*(FIexpr\*(FC)\*(FR the exponential function (\*(FIe \*(FC^ \*(FIx\*(FR). -\*(FCint(\*(FIexpr\*(FC)\*(FR truncates to integer. -\*(FClog(\*(FIexpr\*(FC)\*(FR the natural logarithm function (base \*(FIe\^\*(FR). -\*(FCrand()\fP a random number between 0 and 1. -\*(FCsin(\*(FIexpr\*(FC)\*(FR the sine of \*(FIexpr\fP, which is in radians. -\*(FCsqrt(\*(FIexpr\*(FC)\*(FR the square root function. -\&\*(FCsrand(\*(FR[\*(FIexpr\^\*(FR]\*(FC)\*(FR T{ -uses \*(FIexpr\fP as a new seed for the random number -generator. If no \*(FIexpr\fP, the time of day is used. -Returns previous seed for the random number -generator.\*(CX -T} -.TE -.EB "\s+2\f(HBNUMERIC FUNCTIONS\*(FR\s0" - - -.BT - -.\" --- Builtin String Functions -.ES -.fi -.in +.2i -.ti -.2i -\*(CB\*(FCasort(\*(FIs\*(FC \*(FR[\*(FC, \*(FId\*(FR]\*(FC)\*(FR -.br -sorts the source array \*(FIs\*(FR, replacing the indices with numeric -values 1 through \*(FIn\*(FR (the number of elements in the array), -and returns the number of elements. -If destination \*(FId\*(FR is supplied, \*(FIs\*(FR is copied to \*(FId\*(FR, -\*(FId\*(FR is sorted, and \*(FIs\*(FR is unchanged.\*(CD -.ti -.2i -\*(CB\*(FCgensub(\*(FIr\*(FC, \*(FIs\*(FC, \*(FIh \*(FR[\*(FC, \*(FIt\*(FR]\*(FC)\*(FR -.br -search the target string -\*(FIt\fP for matches of the regular expression \*(FIr\*(FR. If -\*(FIh\fP is a string beginning with \*(FCg\fP or \*(FCG\*(FR, -replace all matches of \*(FIr\fP with \*(FIs\*(FR. Otherwise, \*(FIh\fP -is a number indicating which match of \*(FIr\fP to replace. -If \*(FIt\fP is not supplied, \*(FC$0\fP is used instead. Within the -replacement text \*(FIs\*(FR, the sequence \*(FC\e\*(FIn\*(FR, -where \*(FIn\fP is a digit from 1 to 9, may be used to indicate just -the text that matched the \*(FIn\*(FRth parenthesized subexpression. -The sequence \*(FC\e0\fP represents the entire matched text, as does -the character \*(FC&\*(FR. Unlike \*(FCsub()\fP and \*(FCgsub()\*(FR, -the modified string is returned as the result of the function, -and the original target string is \*(FInot\fP changed.\*(CD -.ti -.2i -\*(FCgsub(\*(FIr\*(FC, \*(FIs \*(FR[\*(FC, \*(FIt\*(FR]\*(FC)\*(FR -.br -for each substring matching the -regular expression \*(FIr\fP in the string \*(FIt\*(FR, substitute the -string \*(FIs\*(FR, and return the number of substitutions. If -\*(FIt\fP is not supplied, use \*(FC$0\*(FR. An \*(FC&\fP in the -replacement text is replaced with the text that was actually matched. -Use \*(FC\e&\fP to get a literal \*(FC&\*(FR. See \*(AM -for a fuller discussion of the rules for \*(FC&\*(FR's and backslashes -in the replacement text of \*(CB\*(FCgensub()\*(FR,\*(CD \*(FCsub()\*(FR -and \*(FCgsub()\*(FR -.ti -.2i -\*(FCindex(\*(FIs\*(FC, \*(FIt\*(FC)\*(FR -.br -returns the index of the string -\*(FIt\fP in the string \*(FIs\*(FR, or 0 if \*(FIt\fP is not present. -.ti -.2i -\*(FClength(\*(FR[\*(FIs\*(FR]\*(FC)\*(FR -.br -returns the length of the string -\*(FIs\*(FR, or the length of \*(FC$0\fP if \*(FIs\fP is not supplied. -.ti -.2i -\*(FCmatch(\*(FIs\*(FC, \*(FIr \*(CB\*(FR[\*(FC, \*(FIa\*(FR]\*(CD\*(FC)\*(FR -.br -returns the position in -\*(FIs\fP where the regular expression \*(FIr\fP occurs, or 0 if -\*(FIr\fP is not present, and sets the values of variables -\*(FCRSTART\fP -and \*(FCRLENGTH\*(FR. -\*(CBIf \*(FIa\*(FR is supplied, the text matching all of \*(FIr\*(FR -is placed in \*(FIa\*(FC[0]\*(FR. If there were parenthesized -subexpressions, the matching texts are placed -in \*(FIa\*(FC[1]\*(FR, \*(FIa\*(FC[2]\*(FR, and so on.\*(CD -.ti -.2i -\*(FCsplit(\*(FIs\*(FC, \*(FIa \*(FR[\*(FC, \*(FIr\*(FR]\*(FC)\*(FR -.br -splits the string -\*(FIs\fP into the array \*(FIa\fP using the regular expression \*(FIr\*(FR, -and returns the number of fields. If \*(FIr\fP is omitted, \*(FCFS\fP -is used instead. The array \*(FIa\fP is cleared first. -Splitting behaves identically to field splitting. -(See \fHFields\fP, above.) -.ti -.2i -\*(FCsprintf(\*(FIfmt\*(FC, \*(FIexpr-list\*(FC)\*(FR -.br -prints \*(FIexpr-list\fP -according to \*(FIfmt\*(FR, and returns the resulting string. -.ti -.2i -\*(CB\*(FCstrtonum(\*(FIs\*(FC)\*(FR -.br -examines \*(FIs\*(FR, and returns its numeric value. -If \*(FIs\*(FR begins with a leading \*(FC0\*(FR, -\*(FCstrtonum()\*(FR assumes that \*(FIs\*(FR -is an octal number. -If \*(FIs\*(FR begins with a leading \*(FC0x\*(FR -or \*(FC0X\*(FR, \*(FCstrtonum()\*(FR assumes that -\*(FIs\*(FR is a hexadecimal number.\*(CD -.ti -.2i -\*(FCsub(\*(FIr\*(FC, \*(FIs \*(FR[\*(FC, \*(FIt\*(FR]\*(FC)\*(FR -.br -just like -\*(FCgsub()\*(FR, but only the first matching substring is replaced. -.ti -.2i -\*(FCsubstr(\*(FIs\*(FC, \*(FIi \*(FR[\*(FC, \*(FIn\*(FR]\*(FC)\*(FR -.br -returns the at most -\*(FIn\*(FR-character substring of \*(FIs\fP starting at \*(FIi\*(FR. -If \*(FIn\fP is omitted, the rest of \*(FIs\fP is used. -.ti -.2i -\*(FCtolower(\*(FIstr\*(FC)\*(FR -.br -returns a copy of the string \*(FIstr\*(FR, -with all the upper-case characters in \*(FIstr\fP translated to their -corresponding lower-case counterparts. Non-alphabetic characters are -left unchanged.\*(CX -.in -.2i -.EB "\s+2\f(HBSTRING FUNCTIONS\*(FR\s0" - -.BT - -.\" --- Builtin String Functions -.ES -.fi -.in +.2i -.ti -.2i -\*(CD\*(FCtoupper(\*(FIstr\*(FC)\*(FR -.br -returns a copy of the string \*(FIstr\*(FR, -with all the lower-case characters in \*(FIstr\fP translated to their -corresponding upper-case counterparts. Non-alphabetic characters are -left unchanged.\*(CX -.in -.2i -.EB "\s+2\f(HBSTRING FUNCTIONS (continued)\*(FR\s0" - -.\" --- Builtin Time Functions -.ES -.fi -\*(CD\*(GK -provides the following functions for obtaining time stamps and -formatting them. -.sp .5 -.fi -.in +.2i -.ti -.2i -\*(FCmktime(\*(FIdatespec\*(FC)\*(FR -.br -turns \*(FIdatespec\fP into a time -stamp of the same form as returned by \*(FCsystime()\*(FR. -The \*(FIdatespec\fP is a string of the form -\*(FC"\*(FIYYYY MM DD HH MM SS[ DST]\*(FC"\*(FR. -.ti -.2i -\*(FCstrftime(\*(FR[\*(FIformat \*(FR[\*(FC, \*(FItimestamp\*(FR]]\*(FC)\*(FR -.br -formats \*(FItimestamp\fP -according to the specification in \*(FIformat\*(FR. The -\*(FItimestamp\fP should be of the same form as returned by -\*(FCsystime()\*(FR. -If \*(FItimestamp\fP is missing, the current time of day is used. If -\*(FIformat\fP is missing, a default format equivalent to the output -of \*(FIdate\*(FR(1) is used. -.ti -.2i -\*(FCsystime()\fP -.br -returns the current time of day as the number of -seconds since the Epoch.\*(CB -.in -.2i -.EB "\s+2\f(HBTIME FUNCTIONS (\*(GK\f(HB)\*(FR\s0" - -.\" --- Builtin Bit Manipulation Functions -.ES -.fi -\*(CD\*(GK -provides the following functions for doing bitwise operations. -.sp .5 -.fi -.in +.2i -.ti -.2i -\*(FCand(\*(FIv1\*(FC, \*(FIv2\*(FC)\*(FR -.br -returns the bitwise AND of the values provided by -\*(FIv1\*(FR and \*(FIv2\*(FR. -.ti -.2i -\*(FCcompl(\*(FIval\*(FC)\*(FR -.br -returns the bitwise complement of -\*(FIval\*(FR. -.ti -.2i -\*(FClshift(\*(FIval\*(FC, \*(FIcount\*(FC)\*(FR -.br -returns the value of \*(FIval\*(FR, -shifted left by \*(FIcount\*(FR bits. -.ti -.2i -\*(FCor(\*(FIv1\*(FC, \*(FIv2\*(FC)\*(FR -.br -returns the bitwise OR of the values provided by -\*(FIv1\*(FR and \*(FIv2\*(FR. -.ti -.2i -\*(FCrshift(\*(FIval\*(FC, \*(FIcount\*(FC)\*(FR -.br -returns the value of \*(FIval\*(FR, -shifted right by \*(FIcount\*(FR bits. -.ti -.2i -\*(FCxor(\*(FIv1\*(FC, \*(FIv2\*(FC)\*(FR -.br -teturns the bitwise XOR of the values provided by -\*(FIv1\*(FR and \*(FIv2\*(FR.\*(CB -.in -.2i -.EB "\s+2\f(HBBIT MANIPULATION FUNCTIONS (\*(GK\f(HB)\*(FR\s0" - -.\" --- Builtin Internationalizatin Functions -.ES -.fi -\*(CD\*(GK -provides the following functions for runtime message translation. -.in +.2i -.sp .5 -.ti -.2i -\*(FCbindtextdomain(\*(FIdirectory \*(FR[\*(FC, \*(FIdomain\*(FR]\*(FC)\*(FR -.br -specifies the directory where \*(GK looks for the \*(FC\&.mo\*(FR -files, in case they -will not or cannot be placed in the ``standard'' locations -(e.g., during testing.) -It returns the directory where \*(FIdomain\*(FR is ``bound.'' -.sp .5 -The default \*(FIdomain\*(FR is the value of \*(FCTEXTDOMAIN\*(FR. -When \*(FIdirectory\*(FR is the null string (\*(FC"\^"\*(FR), -\*(FCbindtextdomain()\*(FR returns the current binding for the -given \*(FIdomain\*(FR. -.ti -.2i -\*(FCdcgettext(\*(FIstring \*(FR[\*(FC, \*(FIdomain \*(FR[\*(FC, \*(FIcategory\*(FR]]\*(FC)\*(FR -.br -returns the translation of \*(FIstring\*(FR in text domain -\*(FIdomain\*(FR for locale category \*(FIcategory\*(FR. -The default value for \*(FIdomain\*(FR is the current value of \*(FCTEXTDOMAIN\*(FR. -The default value for \*(FIcategory\*(FR is \*(FC"LC_MESSAGES"\*(FR. -.sp .5 -If you supply a value for \*(FIcategory\*(FR, it must be a string equal to -one of the known locale categories. -You must also supply a text domain. Use \*(FCTEXTDOMAIN\*(FR -to use the current domain.\*(CB -.in -.2i -.EB "\s+2\f(HBINTERNATIONALIZATION (\*(GK\f(HB)\*(FR\s0" - -.BT - -.\" --- User-defined Functions -.ES -.fi -\*(CDFunctions in AWK are defined as follows: -.sp .5 -.nf - \*(FCfunction \*(FIname\*(FC(\*(FIparameter list\*(FC) - { - \*(FIstatements - \*(FC}\*(FR -.sp .5 -.fi -Functions are executed when they are called from within expressions -in either patterns or actions. Actual parameters supplied in the function -call instantiate the formal parameters declared in the function. -Arrays are passed by reference, other variables are passed by value. -.sp .5 -Local variables are declared as extra parameters -in the parameter list. The convention is to separate local variables from -real parameters by extra spaces in the parameter list. For example: -.sp .5 -.nf - \*(FC# a and b are local - function f(p, q, a, b) - { - \&..... - } -.sp .3 - /abc/ { ... ; f(1, 2) ; ... }\*(FR -.fi -.sp .5 -The left parenthesis in a function call is required -to immediately follow the function name -without any intervening white space. -This is to avoid a syntactic ambiguity with the concatenation operator. -This restriction does not apply to the built-in functions. -.sp .5 -Functions may call each other and may be recursive. -Function parameters used as local variables are initialized -to the null string and the number zero upon function invocation. -.sp .5 -Use \*(FCreturn\fP to return a value from a function. The return value -is undefined if no value is provided, or if the function returns by -``falling off'' the end. -.sp .5 -\*(CLThe word -\*(FCfunc\fP -may be used in place of -\*(FCfunction\*(FR. -\*(CRNote: This usage is deprecated.\*(CX -.EB "\s+2\f(HBUSER-DEFINED FUNCTIONS\*(FR\s0" - -.\" --- Localization -.ES -.fi -\*(CDThere are several steps involved in producing and running a localizable -\*(AK program. -.sp .5 -1. Add a \*(FCBEGIN\*(FR action to assign a value to the -\*(FCTEXTDOMAIN\*(FR variable to set the text domain for -your program. -.sp .5 -.ti +5n -\*(FCBEGIN { TEXTDOMAIN = "myprog" }\*(FR -.sp .5 -This allows \*(GK to find the \*(FC\&.mo\*(FR -file associated with your program. -Without this step, \*(GK uses the \*(FCmessages\*(FR text domain, -which probably won't work. -.sp .5 -2. Mark all strings that should be translated with leading underscores. -.sp .5 -3. Use the \*(FCdcgettext()\*(FR -and/or \*(FCbindtextdomain()\*(FR -functions in your program, as necessary or appropriate. -.sp .5 -4. Run -.sp .5 -.ti +5n -\*(FCgawk \-\^\-gen\-po \-f myprog.awk > myprog.po\*(FR -.sp .5 -to generate a \*(FC\&.po\*(FR -file for your program. -.sp .5 -5. Provide appropriate translations, and build and install a corresponding -\*(FC\&.mo\*(FR file. -.sp .5 -The internationalization features are described in full detail in \*(AM.\*(CB -.EB "\s+2\f(HBLOCALIZATION (\*(GK\f(HB)\*(FR\s0" - - -.BT - -.\" --- Extensions -.ES -.fi -.in +.2i -.ti -.2i -\*(CD\*(FCextension(\*(FIlib\*(FC, \*(FIfunc\*(FC)\*(FR -.br -dynamically load the shared library -\*(FIlib\*(FR -and call -\*(FIfunc\*(FR -in it to initialize the library. -This adds new built-in functions to \*(GK. -It returns the value returned by -\*(FIfunc\*(FR.\*(CB -.in -.2i -.EB "\s+2\f(HBDYNAMIC EXTENSIONS (\*(GK\f(HB)\*(FR\s0" - -.\" --- Environment Variables -.ES -.fi -\*(CDThe environment variable \*(FCAWKPATH\fP specifies a search path to use -when finding source files named with the \*(FC\-f\fP -option. -The default path is -\*(FC".:/usr/local/share/awk"\*(FR, -if this variable does not exist. -(The actual directory may vary, -depending upon how \*(GK was built and installed.) -If a file name given to the \*(FC\-f\fP option contains a ``/'' character, -no path search is performed. -.sp .5 -If \*(FCPOSIXLY_CORRECT\fP exists in the environment, then \*(GK -behaves exactly as if \*(FC\-\^\-posix\fP had been specified on the -command line.\*(CB -.EB "\s+2\f(HBENVIRONMENT VARIABLES (\*(GK\f(HB)\*(FR\s0" - -.\" --- Historical Features -.ES -.fi -\*(CD\*(GK supports two features of historical AWK implementations. -First, it is possible to call the \*(FClength()\fP -built-in function not only with no argument, but even without parentheses. -This feature is marked as ``deprecated'' in the POSIX standard, and \*(GK -issues a warning about its use if \*(FC\-\^\-lint\fP -is specified on the command line. -.sp .5 -The other feature is the use of \*(FCcontinue\fP -or \*(FCbreak\fP statements outside the body of a -\*(FCwhile\*(FR, \*(FCfor\*(FR, or \*(FCdo\fP loop. -Historical AWK implementations have treated such usage as -equivalent to the \*(FCnext\fP statement. -\*(GK supports this usage if \*(FC\-\^\-traditional\fP -is specified.\*(CB -.EB "\s+2\f(HBHISTORICAL FEATURES (\*(GK\f(HB)\*(FR\s0" - - -.\" --- FTP Information -.ES -.nf -\*(CDHost: \*(FCgnudist.gnu.org\*(FR -File: \*(FC/gnu/gawk/gawk-3.1.0.tar.gz\fP -.in +.2i -.fi -GNU \*(AK (\*(GK). There may be a later version. -.in -.2i -.nf -.sp .5 -\*(FChttp://cm.bell-labs.com/who/bwk/awk.tar.gz\fP -.in +.2i -.fi -\*(NK. This version requires an ANSI C compiler; -GCC (the GNU C compiler) works well. -.in -.2i -.nf -.sp .5 -Host: \*(FCftp.whidbey.net\*(FR -File: \*(FC/pub/brennan/mawk1.3.3.tar.gz\fP -.in +.2i -.fi -Michael Brennan's \*(MK. There may be a newer version.\*(CX -.in -.2i -.EB "\s+2\f(HBFTP/HTTP INFORMATION\*(FR\s0" - -.\" --- Copying Permissions -.ES -.fi -\*(CDCopyright \(co 1996-2001 Free Software Foundation, Inc. -.sp .5 -Permission is granted to make and distribute verbatim copies of this -reference card provided the copyright notice and this permission notice -are preserved on all copies. -.sp .5 -Permission is granted to copy and distribute modified versions of this -reference card under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. -.sp .5 -Permission is granted to copy and distribute translations of this -reference card into another language, under the above conditions for -modified versions, except that this permission notice may be stated in a -translation approved by the Foundation.\*(CX -.EB "\s+2\f(HBCOPYING PERMISSIONS\*(FR\s0" -.BT diff --git a/contrib/awk/doc/awkforai.txt b/contrib/awk/doc/awkforai.txt deleted file mode 100644 index 3fca320..0000000 --- a/contrib/awk/doc/awkforai.txt +++ /dev/null @@ -1,150 +0,0 @@ -Draft for ACM SIGPLAN Patterns (Language Trends) - -1996 - -Why GAWK for AI? - -Ronald P. Loui - -Most people are surprised when I tell them what language we use in our -undergraduate AI programming class. That's understandable. We use -GAWK. GAWK, Gnu's version of Aho, Weinberger, and Kernighan's old -pattern scanning language isn't even viewed as a programming language by -most people. Like PERL and TCL, most prefer to view it as a "scripting -language." It has no objects; it is not functional; it does no built-in -logic programming. Their surprise turns to puzzlement when I confide -that (a) while the students are allowed to use any language they want; -(b) with a single exception, the best work consistently results from -those working in GAWK. (footnote: The exception was a PASCAL -programmer who is now an NSF graduate fellow getting a Ph.D. in -mathematics at Harvard.) Programmers in C, C++, and LISP haven't even -been close (we have not seen work in PROLOG or JAVA). - -Why GAWK? - -There are some quick answers that have to do with the pragmatics of -undergraduate programming. Then there are more instructive answers that -might be valuable to those who debate programming paradigms or to those -who study the history of AI languages. And there are some deep -philosophical answers that expose the nature of reasoning and symbolic -AI. I think the answers, especially the last ones, can be even more -surprising than the observed effectiveness of GAWK for AI. - -First it must be confessed that PERL programmers can cobble together AI -projects well, too. Most of GAWK's attractiveness is reproduced in -PERL, and the success of PERL forebodes some of the success of GAWK. -Both are powerful string-processing languages that allow the programmer -to exploit many of the features of a UNIX environment. Both provide -powerful constructions for manipulating a wide variety of data in -reasonably efficient ways. Both are interpreted, which can reduce -development time. Both have short learning curves. The GAWK manual can -be consumed in a single lab session and the language can be mastered by -the next morning by the average student. GAWK's automatic -initialization, implicit coercion, I/O support and lack of pointers -forgive many of the mistakes that young programmers are likely to make. -Those who have seen C but not mastered it are happy to see that GAWK -retains some of the same sensibilities while adding what must be -regarded as spoonsful of syntactic sugar. Some will argue that -PERL has superior functionality, but for quick AI applications, the -additional functionality is rarely missed. In fact, PERL's terse syntax -is not friendly when regular expressions begin to proliferate and -strings contain fragments of HTML, WWW addresses, or shell commands. -PERL provides new ways of doing things, but not necessarily ways of -doing new things. - -In the end, despite minor difference, both PERL and GAWK minimize -programmer time. Neither really provides the programmer the setting in -which to worry about minimizing run-time. - -There are further simple answers. Probably the best is the fact that -increasingly, undergraduate AI programming is involving the Web. Oren -Etzioni (University of Washington, Seattle) has for a while been arguing -that the "softbot" is replacing the mechanical engineers' robot as the -most glamorous AI testbed. If the artifact whose behavior needs to be -controlled in an intelligent way is the software agent, then a language -that is well-suited to controlling the software environment is the -appropriate language. That would imply a scripting language. If the -robot is KAREL, then the right language is "turn left; turn right." If -the robot is Netscape, then the right language is something that can -generate "netscape -remote 'openURL(http://cs.wustl.edu/~loui)'" with -elan. - -Of course, there are deeper answers. Jon Bentley found two pearls in -GAWK: its regular expressions and its associative arrays. GAWK asks -the programmer to use the file system for data organization and the -operating system for debugging tools and subroutine libraries. There is -no issue of user-interface. This forces the programmer to return to the -question of what the program does, not how it looks. There is no time -spent programming a binsort when the data can be shipped to /bin/sort -in no time. (footnote: I am reminded of my IBM colleague Ben Grosof's -advice for Palo Alto: Don't worry about whether it's highway 101 or 280. -Don't worry if you have to head south for an entrance to go north. Just -get on the highway as quickly as possible.) - -There are some similarities between GAWK and LISP that are illuminating. -Both provided a powerful uniform data structure (the associative array -implemented as a hash table for GAWK and the S-expression, or list of -lists, for LISP). Both were well-supported in their environments (GAWK -being a child of UNIX, and LISP being the heart of lisp machines). Both -have trivial syntax and find their power in the programmer's willingness -to use the simple blocks to build a complex approach. - -Deeper still, is the nature of AI programming. AI is about -functionality and exploratory programming. It is about bottom-up design -and the building of ambitions as greater behaviors can be demonstrated. -Woe be to the top-down AI programmer who finds that the bottom-level -refinements, "this subroutine parses the sentence," cannot actually be -implemented. Woe be to the programmer who perfects the data structures -for that heapsort when the whole approach to the high-level problem -needs to be rethought, and the code is sent to the junkheap the next day. - -AI programming requires high-level thinking. There have always been a few -gifted programmers who can write high-level programs in assembly language. -Most however need the ambient abstraction to have a higher floor. - -Now for the surprising philosophical answers. First, AI has discovered -that brute-force combinatorics, as an approach to generating intelligent -behavior, does not often provide the solution. Chess, neural nets, and -genetic programming show the limits of brute computation. The -alternative is clever program organization. (footnote: One might add -that the former are the AI approaches that work, but that is easily -dismissed: those are the AI approaches that work in general, precisely -because cleverness is problem-specific.) So AI programmers always want -to maximize the content of their program, not optimize the efficiency -of an approach. They want minds, not insects. Instead of enumerating -large search spaces, they define ways of reducing search, ways of -bringing different knowledge to the task. A language that maximizes -what the programmer can attempt rather than one that provides tremendous -control over how to attempt it, will be the AI choice in the end. - -Second, inference is merely the expansion of notation. No matter whether -the logic that underlies an AI program is fuzzy, probabilistic, deontic, -defeasible, or deductive, the logic merely defines how strings can be -transformed into other strings. A language that provides the best -support for string processing in the end provides the best support for -logic, for the exploration of various logics, and for most forms of -symbolic processing that AI might choose to call "reasoning" instead of -"logic." The implication is that PROLOG, which saves the AI programmer -from having to write a unifier, saves perhaps two dozen lines of GAWK -code at the expense of strongly biasing the logic and representational -expressiveness of any approach. - -I view these last two points as news not only to the programming language -community, but also to much of the AI community that has not reflected on -the past decade's lessons. - -In the puny language, GAWK, which Aho, Weinberger, and Kernighan thought -not much more important than grep or sed, I find lessons in AI's trends, -AI's history, and the foundations of AI. What I have found not only -surprising but also hopeful, is that when I have approached the AI -people who still enjoy programming, some of them are not the least bit -surprised. - - -R. Loui (loui@ai.wustl.edu) is Associate Professor of Computer Science, -at Washington University in St. Louis. He has published in AI Journal, -Computational Intelligence, ACM SIGART, AI Magazine, AI and Law, the ACM -Computing Surveys Symposium on AI, Cognitive Science, Minds and -Machines, Journal of Philosophy, and is on this year's program -committees for AAAI (National AI conference) and KR (Knowledge -Representation and Reasoning). diff --git a/contrib/awk/doc/cardfonts b/contrib/awk/doc/cardfonts deleted file mode 100644 index dc44ce1..0000000 --- a/contrib/awk/doc/cardfonts +++ /dev/null @@ -1,37 +0,0 @@ -.\" AWK Reference Card --- Arnold Robbins, arnold@gnu.org -.\" cardfonts --- this file sets the fonts to use for the reference card -.\" -.\" Copyright (C) 1996 Free Software Foundation, Inc. -.\" -.\" Permission is granted to make and distribute verbatim copies of -.\" this reference card provided the copyright notice and this permission -.\" notice are preserved on all copies. -.\" -.\" Permission is granted to process this file through troff and print the -.\" results, provided the printed document carries copying permission -.\" notice identical to this one except for the removal of this paragraph -.\" (this paragraph not being relevant to the printed reference card). -.\" -.\" Permission is granted to copy and distribute modified versions of this -.\" reference card under the conditions for verbatim copying, provided that -.\" the entire resulting derived work is distributed under the terms of a -.\" permission notice identical to this one. -.\" -.\" Permission is granted to copy and distribute translations of this -.\" reference card into another language, under the above conditions for -.\" modified versions, except that this permission notice may be stated in -.\" a translation approved by the Foundation. -.\" -.ig -Strings for inline font change. -FR - font roman -FI - font italic -FC - font courier -.. -.ds FR \fR -.ds FI \fI -.ds FC \f(CB -.ds RN Times Roman -.ds IN Times Italic -.ds CN Courier Bold -.ds AM \fIGAWK: Effective AWK Programming\fP diff --git a/contrib/awk/doc/colors b/contrib/awk/doc/colors deleted file mode 100644 index 933d25e..0000000 --- a/contrib/awk/doc/colors +++ /dev/null @@ -1,39 +0,0 @@ -.\" AWK Reference Card --- Arnold Robbins, arnold@gnu.org -.\" This file sets the colors to use. -.\" -.\" Copyright (C) 1996,97,99 Free Software Foundation, Inc. -.\" -.\" Permission is granted to make and distribute verbatim copies of -.\" this reference card provided the copyright notice and this permission -.\" notice are preserved on all copies. -.\" -.\" Permission is granted to process this file through troff and print the -.\" results, provided the printed document carries copying permission -.\" notice identical to this one except for the removal of this paragraph -.\" (this paragraph not being relevant to the printed reference card). -.\" -.\" Permission is granted to copy and distribute modified versions of this -.\" reference card under the conditions for verbatim copying, provided that -.\" the entire resulting derived work is distributed under the terms of a -.\" permission notice identical to this one. -.\" -.\" Permission is granted to copy and distribute translations of this -.\" reference card into another language, under the above conditions for -.\" modified versions, except that this permission notice may be stated in -.\" a translation approved by the Foundation. -.\" -.ig -Strings for inline color change. -CR - color red -CG - color green -CL - color light blue -CB - color blue -CD - color dark, i.e. black -CX - color boX, i.e. for the surrounding boxes (red for now) -.. -.ds CR \X'ps: exec 0 .96 .65 0 setcmykcolor' -.ds CG \X'ps: exec 1.0 0 .51 .43 setcmykcolor' -.ds CL \X'ps: exec .69 .34 0 0 setcmykcolor' -.ds CB \X'ps: exec 1 .72 0 .06 setcmykcolor' -.ds CD \X'ps: exec 1 1 1 1 setcmykcolor' -.ds CX \*(CG diff --git a/contrib/awk/doc/gawk.1 b/contrib/awk/doc/gawk.1 deleted file mode 100644 index 3e3c62b..0000000 --- a/contrib/awk/doc/gawk.1 +++ /dev/null @@ -1,3322 +0,0 @@ -.ds PX \s-1POSIX\s+1 -.ds UX \s-1UNIX\s+1 -.ds AN \s-1ANSI\s+1 -.ds GN \s-1GNU\s+1 -.ds AK \s-1AWK\s+1 -.ds EP \fIGAWK: Effective AWK Programming\fP -.if !\n(.g \{\ -. if !\w|\*(lq| \{\ -. ds lq `` -. if \w'\(lq' .ds lq "\(lq -. \} -. if !\w|\*(rq| \{\ -. ds rq '' -. if \w'\(rq' .ds rq "\(rq -. \} -.\} -.TH GAWK 1 "May 29 2001" "Free Software Foundation" "Utility Commands" -.SH NAME -gawk \- pattern scanning and processing language -.SH SYNOPSIS -.B gawk -[ \*(PX or \*(GN style options ] -.B \-f -.I program-file -[ -.B \-\^\- -] file .\|.\|. -.br -.B gawk -[ \*(PX or \*(GN style options ] -[ -.B \-\^\- -] -.I program-text -file .\|.\|. -.sp -.B pgawk -[ \*(PX or \*(GN style options ] -.B \-f -.I program-file -[ -.B \-\^\- -] file .\|.\|. -.br -.B pgawk -[ \*(PX or \*(GN style options ] -[ -.B \-\^\- -] -.I program-text -file .\|.\|. -.SH DESCRIPTION -.I Gawk -is the \*(GN Project's implementation of the \*(AK programming language. -It conforms to the definition of the language in -the \*(PX 1003.2 Command Language And Utilities Standard. -This version in turn is based on the description in -.IR "The AWK Programming Language" , -by Aho, Kernighan, and Weinberger, -with the additional features found in the System V Release 4 version -of \*(UX -.IR awk . -.I Gawk -also provides more recent Bell Laboratories -.I awk -extensions, and a number of \*(GN-specific extensions. -.PP -.I Pgawk -is the profiling version of -.IR gawk . -It is identical in every way to -.IR gawk , -except that programs run more slowly, -and it automatically produces an execution profile in the file -.B awkprof.out -when done. -See the -.B \-\^\-profile -option, below. -.PP -The command line consists of options to -.I gawk -itself, the \*(AK program text (if not supplied via the -.B \-f -or -.B \-\^\-file -options), and values to be made -available in the -.B ARGC -and -.B ARGV -pre-defined \*(AK variables. -.SH OPTION FORMAT -.PP -.I Gawk -options may be either traditional \*(PX one letter options, -or \*(GN style long options. \*(PX options start with a single \*(lq\-\*(rq, -while long options start with \*(lq\-\^\-\*(rq. -Long options are provided for both \*(GN-specific features and -for \*(PX-mandated features. -.PP -Following the \*(PX standard, -.IR gawk -specific -options are supplied via arguments to the -.B \-W -option. Multiple -.B \-W -options may be supplied -Each -.B \-W -option has a corresponding long option, as detailed below. -Arguments to long options are either joined with the option -by an -.B = -sign, with no intervening spaces, or they may be provided in the -next command line argument. -Long options may be abbreviated, as long as the abbreviation -remains unique. -.SH OPTIONS -.PP -.I Gawk -accepts the following options, listed alphabetically. -.TP -.PD 0 -.BI \-F " fs" -.TP -.PD -.BI \-\^\-field-separator " fs" -Use -.I fs -for the input field separator (the value of the -.B FS -predefined -variable). -.TP -.PD 0 -\fB\-v\fI var\fB\^=\^\fIval\fR -.TP -.PD -\fB\-\^\-assign \fIvar\fB\^=\^\fIval\fR -Assign the value -.I val -to the variable -.IR var , -before execution of the program begins. -Such variable values are available to the -.B BEGIN -block of an \*(AK program. -.TP -.PD 0 -.BI \-f " program-file" -.TP -.PD -.BI \-\^\-file " program-file" -Read the \*(AK program source from the file -.IR program-file , -instead of from the first command line argument. -Multiple -.B \-f -(or -.BR \-\^\-file ) -options may be used. -.TP -.PD 0 -.BI \-mf " NNN" -.TP -.PD -.BI \-mr " NNN" -Set various memory limits to the value -.IR NNN . -The -.B f -flag sets the maximum number of fields, and the -.B r -flag sets the maximum record size. These two flags and the -.B \-m -option are from the Bell Laboratories research version of \*(UX -.IR awk . -They are ignored by -.IR gawk , -since -.I gawk -has no pre-defined limits. -.TP -.PD 0 -.B "\-W compat" -.TP -.PD 0 -.B "\-W traditional" -.TP -.PD 0 -.B \-\^\-compat -.TP -.PD -.B \-\^\-traditional -Run in -.I compatibility -mode. In compatibility mode, -.I gawk -behaves identically to \*(UX -.IR awk ; -none of the \*(GN-specific extensions are recognized. -The use of -.B \-\^\-traditional -is preferred over the other forms of this option. -See -.BR "GNU EXTENSIONS" , -below, for more information. -.TP -.PD 0 -.B "\-W copyleft" -.TP -.PD 0 -.B "\-W copyright" -.TP -.PD 0 -.B \-\^\-copyleft -.TP -.PD -.B \-\^\-copyright -Print the short version of the \*(GN copyright information message on -the standard output and exit successfully. -.TP -.PD 0 -\fB\-W dump-variables\fR[\fB=\fIfile\fR] -.TP -.PD -\fB\-\^\-dump-variables\fR[\fB=\fIfile\fR] -Print a sorted list of global variables, their types and final values to -.IR file . -If no -.I file -is provided, -.I gawk -uses a file named -.I awkvars.out -in the current directory. -.sp .5 -Having a list of all the global variables is a good way to look for -typographical errors in your programs. -You would also use this option if you have a large program with a lot of -functions, and you want to be sure that your functions don't -inadvertently use global variables that you meant to be local. -(This is a particularly easy mistake to make with simple variable -names like -.BR i , -.BR j , -and so on.) -.TP -.PD 0 -.B "\-W help" -.TP -.PD 0 -.B "\-W usage" -.TP -.PD 0 -.B \-\^\-help -.TP -.PD -.B \-\^\-usage -Print a relatively short summary of the available options on -the standard output. -(Per the -.IR "GNU Coding Standards" , -these options cause an immediate, successful exit.) -.TP -.PD 0 -.BR "\-W lint" [ =fatal ] -.TP -.PD -.BR \-\^\-lint [ =fatal ] -Provide warnings about constructs that are -dubious or non-portable to other \*(AK implementations. -With an optional argument of -.BR fatal , -lint warnings become fatal errors. -This may be drastic, but its use will certainly encourage the -development of cleaner \*(AK programs. -.TP -.PD 0 -.B "\-W lint\-old" -.TP -.PD -.B \-\^\-lint\-old -Provide warnings about constructs that are -not portable to the original version of Unix -.IR awk . -.TP -.PD 0 -.B "\-W gen\-po" -.TP -.PD -.B \-\^\-gen\-po -Scan and parse the \*(AK program, and generate a \*(GN -.B \&.po -format file on standard output with entries for all localizable -strings in the program. The program itself is not executed. -See the \*(GN -.I gettext -distribution for more information on -.B \&.po -files. -.TP -.PD 0 -.B "\-W non\-decimal\-data" -.TP -.PD -.B "\-\^\-non\-decimal\-data" -Recognize octal and hexadecimal values in input data. -.I "Use this option with great caution!" -.ig -.\" This option is left undocumented, on purpose. -.TP -.PD 0 -.B "\-W nostalgia" -.TP -.PD -.B \-\^\-nostalgia -Provide a moment of nostalgia for long time -.I awk -users. -.. -.TP -.PD 0 -.B "\-W posix" -.TP -.PD -.B \-\^\-posix -This turns on -.I compatibility -mode, with the following additional restrictions: -.RS -.TP "\w'\(bu'u+1n" -\(bu -.B \ex -escape sequences are not recognized. -.TP -\(bu -Only space and tab act as field separators when -.B FS -is set to a single space, newline does not. -.TP -\(bu -You cannot continue lines after -.B ? -and -.BR : . -.TP -\(bu -The synonym -.B func -for the keyword -.B function -is not recognized. -.TP -\(bu -The operators -.B ** -and -.B **= -cannot be used in place of -.B ^ -and -.BR ^= . -.TP -\(bu -The -.B fflush() -function is not available. -.RE -.TP -.PD 0 -\fB\-W profile\fR[\fB=\fIprof_file\fR] -.TP -.PD -\fB\-\^\-profile\fR[\fB=\fIprof_file\fR] -Send profiling data to -.IR prof_file . -The default is -.BR awkprof.out . -When run with -.IR gawk , -the profile is just a \*(lqpretty printed\*(rq version of the program. -When run with -.IR pgawk , -the profile contains execution counts of each statement in the program -in the left margin and function call counts for each user-defined function. -.TP -.PD 0 -.B "\-W re\-interval" -.TP -.PD -.B \-\^\-re\-interval -Enable the use of -.I "interval expressions" -in regular expression matching -(see -.BR "Regular Expressions" , -below). -Interval expressions were not traditionally available in the -\*(AK language. The \*(PX standard added them, to make -.I awk -and -.I egrep -consistent with each other. -However, their use is likely -to break old \*(AK programs, so -.I gawk -only provides them if they are requested with this option, or when -.B \-\^\-posix -is specified. -.TP -.PD 0 -.BI "\-W source " program-text -.TP -.PD -.BI \-\^\-source " program-text" -Use -.I program-text -as \*(AK program source code. -This option allows the easy intermixing of library functions (used via the -.B \-f -and -.B \-\^\-file -options) with source code entered on the command line. -It is intended primarily for medium to large \*(AK programs used -in shell scripts. -.TP -.PD 0 -.B "\-W version" -.TP -.PD -.B \-\^\-version -Print version information for this particular copy of -.I gawk -on the standard output. -This is useful mainly for knowing if the current copy of -.I gawk -on your system -is up to date with respect to whatever the Free Software Foundation -is distributing. -This is also useful when reporting bugs. -(Per the -.IR "GNU Coding Standards" , -these options cause an immediate, successful exit.) -.TP -.PD 0 -.B \-\^\- -Signal the end of options. This is useful to allow further arguments to the -\*(AK program itself to start with a \*(lq\-\*(rq. -This is mainly for consistency with the argument parsing convention used -by most other \*(PX programs. -.PP -In compatibility mode, -any other options are flagged as invalid, but are otherwise ignored. -In normal operation, as long as program text has been supplied, unknown -options are passed on to the \*(AK program in the -.B ARGV -array for processing. This is particularly useful for running \*(AK -programs via the \*(lq#!\*(rq executable interpreter mechanism. -.SH AWK PROGRAM EXECUTION -.PP -An \*(AK program consists of a sequence of pattern-action statements -and optional function definitions. -.RS -.PP -\fIpattern\fB { \fIaction statements\fB }\fR -.br -\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements\fB }\fR -.RE -.PP -.I Gawk -first reads the program source from the -.IR program-file (s) -if specified, -from arguments to -.BR \-\^\-source , -or from the first non-option argument on the command line. -The -.B \-f -and -.B \-\^\-source -options may be used multiple times on the command line. -.I Gawk -reads the program text as if all the -.IR program-file s -and command line source texts -had been concatenated together. This is useful for building libraries -of \*(AK functions, without having to include them in each new \*(AK -program that uses them. It also provides the ability to mix library -functions with command line programs. -.PP -The environment variable -.B AWKPATH -specifies a search path to use when finding source files named with -the -.B \-f -option. If this variable does not exist, the default path is -\fB".:/usr/local/share/awk"\fR. -(The actual directory may vary, depending upon how -.I gawk -was built and installed.) -If a file name given to the -.B \-f -option contains a \*(lq/\*(rq character, no path search is performed. -.PP -.I Gawk -executes \*(AK programs in the following order. -First, -all variable assignments specified via the -.B \-v -option are performed. -Next, -.I gawk -compiles the program into an internal form. -Then, -.I gawk -executes the code in the -.B BEGIN -block(s) (if any), -and then proceeds to read -each file named in the -.B ARGV -array. -If there are no files named on the command line, -.I gawk -reads the standard input. -.PP -If a filename on the command line has the form -.IB var = val -it is treated as a variable assignment. The variable -.I var -will be assigned the value -.IR val . -(This happens after any -.B BEGIN -block(s) have been run.) -Command line variable assignment -is most useful for dynamically assigning values to the variables -\*(AK uses to control how input is broken into fields and records. -It is also useful for controlling state if multiple passes are needed over -a single data file. -.PP -If the value of a particular element of -.B ARGV -is empty (\fB""\fR), -.I gawk -skips over it. -.PP -For each record in the input, -.I gawk -tests to see if it matches any -.I pattern -in the \*(AK program. -For each pattern that the record matches, the associated -.I action -is executed. -The patterns are tested in the order they occur in the program. -.PP -Finally, after all the input is exhausted, -.I gawk -executes the code in the -.B END -block(s) (if any). -.SH VARIABLES, RECORDS AND FIELDS -\*(AK variables are dynamic; they come into existence when they are -first used. Their values are either floating-point numbers or strings, -or both, -depending upon how they are used. \*(AK also has one dimensional -arrays; arrays with multiple dimensions may be simulated. -Several pre-defined variables are set as a program -runs; these will be described as needed and summarized below. -.SS Records -Normally, records are separated by newline characters. You can control how -records are separated by assigning values to the built-in variable -.BR RS . -If -.B RS -is any single character, that character separates records. -Otherwise, -.B RS -is a regular expression. Text in the input that matches this -regular expression separates the record. -However, in compatibility mode, -only the first character of its string -value is used for separating records. -If -.B RS -is set to the null string, then records are separated by -blank lines. -When -.B RS -is set to the null string, the newline character always acts as -a field separator, in addition to whatever value -.B FS -may have. -.SS Fields -.PP -As each input record is read, -.I gawk -splits the record into -.IR fields , -using the value of the -.B FS -variable as the field separator. -If -.B FS -is a single character, fields are separated by that character. -If -.B FS -is the null string, then each individual character becomes a -separate field. -Otherwise, -.B FS -is expected to be a full regular expression. -In the special case that -.B FS -is a single space, fields are separated -by runs of spaces and/or tabs and/or newlines. -(But see the discussion of -.BR \-\^\-posix , -below). -.B NOTE: -The value of -.B IGNORECASE -(see below) also affects how fields are split when -.B FS -is a regular expression, and how records are separated when -.B RS -is a regular expression. -.PP -If the -.B FIELDWIDTHS -variable is set to a space separated list of numbers, each field is -expected to have fixed width, and -.I gawk -splits up the record using the specified widths. The value of -.B FS -is ignored. -Assigning a new value to -.B FS -overrides the use of -.BR FIELDWIDTHS , -and restores the default behavior. -.PP -Each field in the input record may be referenced by its position, -.BR $1 , -.BR $2 , -and so on. -.B $0 -is the whole record. -Fields need not be referenced by constants: -.RS -.PP -.ft B -n = 5 -.br -print $n -.ft R -.RE -.PP -prints the fifth field in the input record. -.PP -The variable -.B NF -is set to the total number of fields in the input record. -.PP -References to non-existent fields (i.e. fields after -.BR $NF ) -produce the null-string. However, assigning to a non-existent field -(e.g., -.BR "$(NF+2) = 5" ) -increases the value of -.BR NF , -creates any intervening fields with the null string as their value, and -causes the value of -.B $0 -to be recomputed, with the fields being separated by the value of -.BR OFS . -References to negative numbered fields cause a fatal error. -Decrementing -.B NF -causes the values of fields past the new value to be lost, and the value of -.B $0 -to be recomputed, with the fields being separated by the value of -.BR OFS . -.PP -Assigning a value to an existing field -causes the whole record to be rebuilt when -.B $0 -is referenced. -Similarly, assigning a value to -.B $0 -causes the record to be resplit, creating new -values for the fields. -.SS Built-in Variables -.PP -.IR Gawk\^ "'s" -built-in variables are: -.PP -.TP "\w'\fBFIELDWIDTHS\fR'u+1n" -.B ARGC -The number of command line arguments (does not include options to -.IR gawk , -or the program source). -.TP -.B ARGIND -The index in -.B ARGV -of the current file being processed. -.TP -.B ARGV -Array of command line arguments. The array is indexed from -0 to -.B ARGC -\- 1. -Dynamically changing the contents of -.B ARGV -can control the files used for data. -.TP -.B BINMODE -On non-POSIX systems, specifies use of \*(lqbinary\*(rq mode for all file I/O. -Numeric values of 1, 2, or 3, specify that input files, output files, or -all files, respectively, should use binary I/O. -String values of \fB"r"\fR, or \fB"w"\fR specify that input files, or output files, -respectively, should use binary I/O. -String values of \fB"rw"\fR or \fB"wr"\fR specify that all files -should use binary I/O. -Any other string value is treated as \fB"rw"\fR, but generates a warning message. -.TP -.B CONVFMT -The conversion format for numbers, \fB"%.6g"\fR, by default. -.TP -.B ENVIRON -An array containing the values of the current environment. -The array is indexed by the environment variables, each element being -the value of that variable (e.g., \fBENVIRON["HOME"]\fP might be -.BR /home/arnold ). -Changing this array does not affect the environment seen by programs which -.I gawk -spawns via redirection or the -.B system() -function. -.TP -.B ERRNO -If a system error occurs either doing a redirection for -.BR getline , -during a read for -.BR getline , -or during a -.BR close() , -then -.B ERRNO -will contain -a string describing the error. -The value is subject to translation in non-English locales. -.TP -.B FIELDWIDTHS -A white-space separated list of fieldwidths. When set, -.I gawk -parses the input into fields of fixed width, instead of using the -value of the -.B FS -variable as the field separator. -.TP -.B FILENAME -The name of the current input file. -If no files are specified on the command line, the value of -.B FILENAME -is \*(lq\-\*(rq. -However, -.B FILENAME -is undefined inside the -.B BEGIN -block -(unless set by -.BR getline ). -.TP -.B FNR -The input record number in the current input file. -.TP -.B FS -The input field separator, a space by default. See -.BR Fields , -above. -.TP -.B IGNORECASE -Controls the case-sensitivity of all regular expression -and string operations. If -.B IGNORECASE -has a non-zero value, then string comparisons and -pattern matching in rules, -field splitting with -.BR FS , -record separating with -.BR RS , -regular expression -matching with -.B ~ -and -.BR !~ , -and the -.BR gensub() , -.BR gsub() , -.BR index() , -.BR match() , -.BR split() , -and -.B sub() -built-in functions all ignore case when doing regular expression -operations. -.B NOTE: -Array subscripting is -.I not -affected, nor is the -.B asort() -function. -.sp .5 -Thus, if -.B IGNORECASE -is not equal to zero, -.B /aB/ -matches all of the strings \fB"ab"\fP, \fB"aB"\fP, \fB"Ab"\fP, -and \fB"AB"\fP. -As with all \*(AK variables, the initial value of -.B IGNORECASE -is zero, so all regular expression and string -operations are normally case-sensitive. -Under Unix, the full ISO 8859-1 Latin-1 character set is used -when ignoring case. -.TP -.B LINT -Provides dynamic control of the -.B \-\^\-lint -option from within an \*(AK program. -When true, -.I gawk -prints lint warnings. When false, it does not. -When assigned the string value \fB"fatal"\fP, -lint warnings become fatal errors, exactly like -.BR \-\^\-lint=fatal . -Any other true value just prints warnings. -.TP -.B NF -The number of fields in the current input record. -.TP -.B NR -The total number of input records seen so far. -.TP -.B OFMT -The output format for numbers, \fB"%.6g"\fR, by default. -.TP -.B OFS -The output field separator, a space by default. -.TP -.B ORS -The output record separator, by default a newline. -.TP -.B PROCINFO -The elements of this array provide access to information about the -running \*(AK program. -On some systems, -there may be elements in the array, \fB"group1"\fP through -\fB"group\fIn\fB"\fR for some -.IR n , -which is the number of supplementary groups that the process has. -Use the -.B in -operator to test for these elements. -The following elements are guaranteed to be available: -.RS -.TP \w'\fBPROCINFO["pgrpid"]\fR'u+1n -\fBPROCINFO["egid"]\fP -the value of the -.IR getegid (2) -system call. -.TP -\fBPROCINFO["euid"]\fP -the value of the -.IR geteuid (2) -system call. -.TP -\fBPROCINFO["FS"]\fP -\fB"FS"\fP if field splitting with -.B FS -is in effect, or \fB"FIELDWIDTHS"\fP if field splitting with -.B FIELDWIDTHS -is in effect. -.TP -\fBPROCINFO["gid"]\fP -the value of the -.IR getgid (2) -system call. -.TP -\fBPROCINFO["pgrpid"]\fP -the process group ID of the current process. -.TP -\fBPROCINFO["pid"]\fP -the process ID of the current process. -.TP -\fBPROCINFO["ppid"]\fP -the parent process ID of the current process. -.TP -\fBPROCINFO["uid"]\fP -the value of the -.IR getuid (2) -system call. -.RE -.TP -.B RS -The input record separator, by default a newline. -.TP -.B RT -The record terminator. -.I Gawk -sets -.B RT -to the input text that matched the character or regular expression -specified by -.BR RS . -.TP -.B RSTART -The index of the first character matched by -.BR match() ; -0 if no match. -.TP -.B RLENGTH -The length of the string matched by -.BR match() ; -\-1 if no match. -.TP -.B SUBSEP -The character used to separate multiple subscripts in array -elements, by default \fB"\e034"\fR. -.TP -.B TEXTDOMAIN -The text domain of the \*(AK program; used to find the localized -translations for the program's strings. -.SS Arrays -.PP -Arrays are subscripted with an expression between square brackets -.RB ( [ " and " ] ). -If the expression is an expression list -.RI ( expr ", " expr " .\|.\|.)" -then the array subscript is a string consisting of the -concatenation of the (string) value of each expression, -separated by the value of the -.B SUBSEP -variable. -This facility is used to simulate multiply dimensioned -arrays. For example: -.PP -.RS -.ft B -i = "A";\^ j = "B";\^ k = "C" -.br -x[i, j, k] = "hello, world\en" -.ft R -.RE -.PP -assigns the string \fB"hello, world\en"\fR to the element of the array -.B x -which is indexed by the string \fB"A\e034B\e034C"\fR. All arrays in \*(AK -are associative, i.e. indexed by string values. -.PP -The special operator -.B in -may be used in an -.B if -or -.B while -statement to see if an array has an index consisting of a particular -value. -.PP -.RS -.ft B -.nf -if (val in array) - print array[val] -.fi -.ft -.RE -.PP -If the array has multiple subscripts, use -.BR "(i, j) in array" . -.PP -The -.B in -construct may also be used in a -.B for -loop to iterate over all the elements of an array. -.PP -An element may be deleted from an array using the -.B delete -statement. -The -.B delete -statement may also be used to delete the entire contents of an array, -just by specifying the array name without a subscript. -.SS Variable Typing And Conversion -.PP -Variables and fields -may be (floating point) numbers, or strings, or both. How the -value of a variable is interpreted depends upon its context. If used in -a numeric expression, it will be treated as a number, if used as a string -it will be treated as a string. -.PP -To force a variable to be treated as a number, add 0 to it; to force it -to be treated as a string, concatenate it with the null string. -.PP -When a string must be converted to a number, the conversion is accomplished -using -.IR strtod (3). -A number is converted to a string by using the value of -.B CONVFMT -as a format string for -.IR sprintf (3), -with the numeric value of the variable as the argument. -However, even though all numbers in \*(AK are floating-point, -integral values are -.I always -converted as integers. Thus, given -.PP -.RS -.ft B -.nf -CONVFMT = "%2.2f" -a = 12 -b = a "" -.fi -.ft R -.RE -.PP -the variable -.B b -has a string value of \fB"12"\fR and not \fB"12.00"\fR. -.PP -.I Gawk -performs comparisons as follows: -If two variables are numeric, they are compared numerically. -If one value is numeric and the other has a string value that is a -\*(lqnumeric string,\*(rq then comparisons are also done numerically. -Otherwise, the numeric value is converted to a string and a string -comparison is performed. -Two strings are compared, of course, as strings. -Note that the POSIX standard applies the concept of -\*(lqnumeric string\*(rq everywhere, even to string constants. -However, this is -clearly incorrect, and -.I gawk -does not do this. -(Fortunately, this is fixed in the next version of the standard.) -.PP -Note that string constants, such as \fB"57"\fP, are -.I not -numeric strings, they are string constants. -The idea of \*(lqnumeric string\*(rq -only applies to fields, -.B getline -input, -.BR FILENAME , -.B ARGV -elements, -.B ENVIRON -elements and the elements of an array created by -.B split() -that are numeric strings. -The basic idea is that -.IR "user input" , -and only user input, that looks numeric, -should be treated that way. -.PP -Uninitialized variables have the numeric value 0 and the string value "" -(the null, or empty, string). -.SS Octal and Hexadecimal Constants -Starting with version 3.1 of -.I gawk , -you may use C-style octal and hexadecimal constants in your AWK -program source code. -For example, the octal value -.B 011 -is equal to decimal -.BR 9 , -and the hexadecimal value -.B 0x11 -is equal to decimal 17. -.SS String Constants -.PP -String constants in \*(AK are sequences of characters enclosed -between double quotes (\fB"\fR). Within strings, certain -.I "escape sequences" -are recognized, as in C. These are: -.PP -.TP "\w'\fB\e\^\fIddd\fR'u+1n" -.B \e\e -A literal backslash. -.TP -.B \ea -The \*(lqalert\*(rq character; usually the \s-1ASCII\s+1 \s-1BEL\s+1 character. -.TP -.B \eb -backspace. -.TP -.B \ef -form-feed. -.TP -.B \en -newline. -.TP -.B \er -carriage return. -.TP -.B \et -horizontal tab. -.TP -.B \ev -vertical tab. -.TP -.BI \ex "\^hex digits" -The character represented by the string of hexadecimal digits following -the -.BR \ex . -As in \*(AN C, all following hexadecimal digits are considered part of -the escape sequence. -(This feature should tell us something about language design by committee.) -E.g., \fB"\ex1B"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. -.TP -.BI \e ddd -The character represented by the 1-, 2-, or 3-digit sequence of octal -digits. -E.g., \fB"\e033"\fR is the \s-1ASCII\s+1 \s-1ESC\s+1 (escape) character. -.TP -.BI \e c -The literal character -.IR c\^ . -.PP -The escape sequences may also be used inside constant regular expressions -(e.g., -.B "/[\ \et\ef\en\er\ev]/" -matches whitespace characters). -.PP -In compatibility mode, the characters represented by octal and -hexadecimal escape sequences are treated literally when used in -regular expression constants. Thus, -.B /a\e52b/ -is equivalent to -.BR /a\e*b/ . -.SH PATTERNS AND ACTIONS -\*(AK is a line-oriented language. The pattern comes first, and then the -action. Action statements are enclosed in -.B { -and -.BR } . -Either the pattern may be missing, or the action may be missing, but, -of course, not both. If the pattern is missing, the action is -executed for every single record of input. -A missing action is equivalent to -.RS -.PP -.B "{ print }" -.RE -.PP -which prints the entire record. -.PP -Comments begin with the \*(lq#\*(rq character, and continue until the -end of the line. -Blank lines may be used to separate statements. -Normally, a statement ends with a newline, however, this is not the -case for lines ending in -a \*(lq,\*(rq, -.BR { , -.BR ? , -.BR : , -.BR && , -or -.BR || . -Lines ending in -.B do -or -.B else -also have their statements automatically continued on the following line. -In other cases, a line can be continued by ending it with a \*(lq\e\*(rq, -in which case the newline will be ignored. -.PP -Multiple statements may -be put on one line by separating them with a \*(lq;\*(rq. -This applies to both the statements within the action part of a -pattern-action pair (the usual case), -and to the pattern-action statements themselves. -.SS Patterns -\*(AK patterns may be one of the following: -.PP -.RS -.nf -.B BEGIN -.B END -.BI / "regular expression" / -.I "relational expression" -.IB pattern " && " pattern -.IB pattern " || " pattern -.IB pattern " ? " pattern " : " pattern -.BI ( pattern ) -.BI ! " pattern" -.IB pattern1 ", " pattern2 -.fi -.RE -.PP -.B BEGIN -and -.B END -are two special kinds of patterns which are not tested against -the input. -The action parts of all -.B BEGIN -patterns are merged as if all the statements had -been written in a single -.B BEGIN -block. They are executed before any -of the input is read. Similarly, all the -.B END -blocks are merged, -and executed when all the input is exhausted (or when an -.B exit -statement is executed). -.B BEGIN -and -.B END -patterns cannot be combined with other patterns in pattern expressions. -.B BEGIN -and -.B END -patterns cannot have missing action parts. -.PP -For -.BI / "regular expression" / -patterns, the associated statement is executed for each input record that matches -the regular expression. -Regular expressions are the same as those in -.IR egrep (1), -and are summarized below. -.PP -A -.I "relational expression" -may use any of the operators defined below in the section on actions. -These generally test whether certain fields match certain regular expressions. -.PP -The -.BR && , -.BR || , -and -.B ! -operators are logical AND, logical OR, and logical NOT, respectively, as in C. -They do short-circuit evaluation, also as in C, and are used for combining -more primitive pattern expressions. As in most languages, parentheses -may be used to change the order of evaluation. -.PP -The -.B ?\^: -operator is like the same operator in C. If the first pattern is true -then the pattern used for testing is the second pattern, otherwise it is -the third. Only one of the second and third patterns is evaluated. -.PP -The -.IB pattern1 ", " pattern2 -form of an expression is called a -.IR "range pattern" . -It matches all input records starting with a record that matches -.IR pattern1 , -and continuing until a record that matches -.IR pattern2 , -inclusive. It does not combine with any other sort of pattern expression. -.SS Regular Expressions -Regular expressions are the extended kind found in -.IR egrep . -They are composed of characters as follows: -.TP "\w'\fB[^\fIabc.\|.\|.\fB]\fR'u+2n" -.I c -matches the non-metacharacter -.IR c . -.TP -.I \ec -matches the literal character -.IR c . -.TP -.B . -matches any character -.I including -newline. -.TP -.B ^ -matches the beginning of a string. -.TP -.B $ -matches the end of a string. -.TP -.BI [ abc.\|.\|. ] -character list, matches any of the characters -.IR abc.\|.\|. . -.TP -.BI [^ abc.\|.\|. ] -negated character list, matches any character except -.IR abc.\|.\|. . -.TP -.IB r1 | r2 -alternation: matches either -.I r1 -or -.IR r2 . -.TP -.I r1r2 -concatenation: matches -.IR r1 , -and then -.IR r2 . -.TP -.IB r\^ + -matches one or more -.IR r\^ "'s." -.TP -.IB r * -matches zero or more -.IR r\^ "'s." -.TP -.IB r\^ ? -matches zero or one -.IR r\^ "'s." -.TP -.BI ( r ) -grouping: matches -.IR r . -.TP -.PD 0 -.IB r { n } -.TP -.PD 0 -.IB r { n ,} -.TP -.PD -.IB r { n , m } -One or two numbers inside braces denote an -.IR "interval expression" . -If there is one number in the braces, the preceding regular expression -.I r -is repeated -.I n -times. If there are two numbers separated by a comma, -.I r -is repeated -.I n -to -.I m -times. -If there is one number followed by a comma, then -.I r -is repeated at least -.I n -times. -.sp .5 -Interval expressions are only available if either -.B \-\^\-posix -or -.B \-\^\-re\-interval -is specified on the command line. -.TP -.B \ey -matches the empty string at either the beginning or the -end of a word. -.TP -.B \eB -matches the empty string within a word. -.TP -.B \e< -matches the empty string at the beginning of a word. -.TP -.B \e> -matches the empty string at the end of a word. -.TP -.B \ew -matches any word-constituent character (letter, digit, or underscore). -.TP -.B \eW -matches any character that is not word-constituent. -.TP -.B \e` -matches the empty string at the beginning of a buffer (string). -.TP -.B \e' -matches the empty string at the end of a buffer. -.PP -The escape sequences that are valid in string constants (see below) -are also valid in regular expressions. -.PP -.I "Character classes" -are a new feature introduced in the \*(PX standard. -A character class is a special notation for describing -lists of characters that have a specific attribute, but where the -actual characters themselves can vary from country to country and/or -from character set to character set. For example, the notion of what -is an alphabetic character differs in the USA and in France. -.PP -A character class is only valid in a regular expression -.I inside -the brackets of a character list. Character classes consist of -.BR [: , -a keyword denoting the class, and -.BR :] . -The character -classes defined by the \*(PX standard are: -.TP "\w'\fB[:alnum:]\fR'u+2n" -.B [:alnum:] -Alphanumeric characters. -.TP -.B [:alpha:] -Alphabetic characters. -.TP -.B [:blank:] -Space or tab characters. -.TP -.B [:cntrl:] -Control characters. -.TP -.B [:digit:] -Numeric characters. -.TP -.B [:graph:] -Characters that are both printable and visible. -(A space is printable, but not visible, while an -.B a -is both.) -.TP -.B [:lower:] -Lower-case alphabetic characters. -.TP -.B [:print:] -Printable characters (characters that are not control characters.) -.TP -.B [:punct:] -Punctuation characters (characters that are not letter, digits, -control characters, or space characters). -.TP -.B [:space:] -Space characters (such as space, tab, and formfeed, to name a few). -.TP -.B [:upper:] -Upper-case alphabetic characters. -.TP -.B [:xdigit:] -Characters that are hexadecimal digits. -.PP -For example, before the \*(PX standard, to match alphanumeric -characters, you would have had to write -.BR /[A\-Za\-z0\-9]/ . -If your character set had other alphabetic characters in it, this would not -match them, and if your character set collated differently from -\s-1ASCII\s+1, this might not even match the -\s-1ASCII\s+1 alphanumeric characters. -With the \*(PX character classes, you can write -.BR /[[:alnum:]]/ , -and this matches -the alphabetic and numeric characters in your character set. -.PP -Two additional special sequences can appear in character lists. -These apply to non-\s-1ASCII\s+1 character sets, which can have single symbols -(called -.IR "collating elements" ) -that are represented with more than one -character, as well as several characters that are equivalent for -.IR collating , -or sorting, purposes. (E.g., in French, a plain \*(lqe\*(rq -and a grave-accented e\` are equivalent.) -.TP -Collating Symbols -A collating symbol is a multi-character collating element enclosed in -.B [. -and -.BR .] . -For example, if -.B ch -is a collating element, then -.B [[.ch.]] -is a regular expression that matches this collating element, while -.B [ch] -is a regular expression that matches either -.B c -or -.BR h . -.TP -Equivalence Classes -An equivalence class is a locale-specific name for a list of -characters that are equivalent. The name is enclosed in -.B [= -and -.BR =] . -For example, the name -.B e -might be used to represent all of -\*(lqe,\*(rq \*(lqe\h'-\w:e:u'\`,\*(rq and \*(lqe\h'-\w:e:u'\`.\*(rq -In this case, -.B [[=e=]] -is a regular expression -that matches any of -.BR e , -....BR "e\'" , -.BR "e\h'-\w:e:u'\'" , -or -....BR "e\`" . -.BR "e\h'-\w:e:u'\`" . -.PP -These features are very valuable in non-English speaking locales. -The library functions that -.I gawk -uses for regular expression matching -currently only recognize \*(PX character classes; they do not recognize -collating symbols or equivalence classes. -.PP -The -.BR \ey , -.BR \eB , -.BR \e< , -.BR \e> , -.BR \ew , -.BR \eW , -.BR \e` , -and -.B \e' -operators are specific to -.IR gawk ; -they are extensions based on facilities in the \*(GN regular expression libraries. -.PP -The various command line options -control how -.I gawk -interprets characters in regular expressions. -.TP -No options -In the default case, -.I gawk -provide all the facilities of -\*(PX regular expressions and the \*(GN regular expression operators described above. -However, interval expressions are not supported. -.TP -.B \-\^\-posix -Only \*(PX regular expressions are supported, the \*(GN operators are not special. -(E.g., -.B \ew -matches a literal -.BR w ). -Interval expressions are allowed. -.TP -.B \-\^\-traditional -Traditional Unix -.I awk -regular expressions are matched. The \*(GN operators -are not special, interval expressions are not available, and neither -are the \*(PX character classes -.RB ( [[:alnum:]] -and so on). -Characters described by octal and hexadecimal escape sequences are -treated literally, even if they represent regular expression metacharacters. -.TP -.B \-\^\-re\-interval -Allow interval expressions in regular expressions, even if -.B \-\^\-traditional -has been provided. -.SS Actions -Action statements are enclosed in braces, -.B { -and -.BR } . -Action statements consist of the usual assignment, conditional, and looping -statements found in most languages. The operators, control statements, -and input/output statements -available are patterned after those in C. -.SS Operators -.PP -The operators in \*(AK, in order of decreasing precedence, are -.PP -.TP "\w'\fB*= /= %= ^=\fR'u+1n" -.BR ( \&.\|.\|. ) -Grouping -.TP -.B $ -Field reference. -.TP -.B "++ \-\^\-" -Increment and decrement, both prefix and postfix. -.TP -.B ^ -Exponentiation (\fB**\fR may also be used, and \fB**=\fR for -the assignment operator). -.TP -.B "+ \- !" -Unary plus, unary minus, and logical negation. -.TP -.B "* / %" -Multiplication, division, and modulus. -.TP -.B "+ \-" -Addition and subtraction. -.TP -.I space -String concatenation. -.TP -.PD 0 -.B "< >" -.TP -.PD 0 -.B "<= >=" -.TP -.PD -.B "!= ==" -The regular relational operators. -.TP -.B "~ !~" -Regular expression match, negated match. -.B NOTE: -Do not use a constant regular expression -.RB ( /foo/ ) -on the left-hand side of a -.B ~ -or -.BR !~ . -Only use one on the right-hand side. The expression -.BI "/foo/ ~ " exp -has the same meaning as \fB(($0 ~ /foo/) ~ \fIexp\fB)\fR. -This is usually -.I not -what was intended. -.TP -.B in -Array membership. -.TP -.B && -Logical AND. -.TP -.B || -Logical OR. -.TP -.B ?: -The C conditional expression. This has the form -.IB expr1 " ? " expr2 " : " expr3\c -\&. -If -.I expr1 -is true, the value of the expression is -.IR expr2 , -otherwise it is -.IR expr3 . -Only one of -.I expr2 -and -.I expr3 -is evaluated. -.TP -.PD 0 -.B "= += \-=" -.TP -.PD -.B "*= /= %= ^=" -Assignment. Both absolute assignment -.BI ( var " = " value ) -and operator-assignment (the other forms) are supported. -.SS Control Statements -.PP -The control statements are -as follows: -.PP -.RS -.nf -\fBif (\fIcondition\fB) \fIstatement\fR [ \fBelse\fI statement \fR] -\fBwhile (\fIcondition\fB) \fIstatement \fR -\fBdo \fIstatement \fBwhile (\fIcondition\fB)\fR -\fBfor (\fIexpr1\fB; \fIexpr2\fB; \fIexpr3\fB) \fIstatement\fR -\fBfor (\fIvar \fBin\fI array\fB) \fIstatement\fR -\fBbreak\fR -\fBcontinue\fR -\fBdelete \fIarray\^\fB[\^\fIindex\^\fB]\fR -\fBdelete \fIarray\^\fR -\fBexit\fR [ \fIexpression\fR ] -\fB{ \fIstatements \fB} -.fi -.RE -.SS "I/O Statements" -.PP -The input/output statements are as follows: -.PP -.TP "\w'\fBprintf \fIfmt, expr-list\fR'u+1n" -\fBclose(\fIfile \fR[\fB, \fIhow\fR]\fB)\fR -Close file, pipe or co-process. -The optional -.I how -should only be used when closing one end of a -two-way pipe to a co-process. -It must be a string value, either -\fB"to"\fR or \fB"from"\fR. -.TP -.B getline -Set -.B $0 -from next input record; set -.BR NF , -.BR NR , -.BR FNR . -.TP -.BI "getline <" file -Set -.B $0 -from next record of -.IR file ; -set -.BR NF . -.TP -.BI getline " var" -Set -.I var -from next input record; set -.BR NR , -.BR FNR . -.TP -.BI getline " var" " <" file -Set -.I var -from next record of -.IR file . -.TP -\fIcommand\fB | getline \fR[\fIvar\fR] -Run -.I command -piping the output either into -.B $0 -or -.IR var , -as above. -.TP -\fIcommand\fB |& getline \fR[\fIvar\fR] -Run -.I command -as a co-process -piping the output either into -.B $0 -or -.IR var , -as above. -Co-processes are a -.I gawk -extension. -.TP -.B next -Stop processing the current input record. The next input record -is read and processing starts over with the first pattern in the -\*(AK program. If the end of the input data is reached, the -.B END -block(s), if any, are executed. -.TP -.B "nextfile" -Stop processing the current input file. The next input record read -comes from the next input file. -.B FILENAME -and -.B ARGIND -are updated, -.B FNR -is reset to 1, and processing starts over with the first pattern in the -\*(AK program. If the end of the input data is reached, the -.B END -block(s), if any, are executed. -.TP -.B print -Prints the current record. -The output record is terminated with the value of the -.B ORS -variable. -.TP -.BI print " expr-list" -Prints expressions. -Each expression is separated by the value of the -.B OFS -variable. -The output record is terminated with the value of the -.B ORS -variable. -.TP -.BI print " expr-list" " >" file -Prints expressions on -.IR file . -Each expression is separated by the value of the -.B OFS -variable. The output record is terminated with the value of the -.B ORS -variable. -.TP -.BI printf " fmt, expr-list" -Format and print. -.TP -.BI printf " fmt, expr-list" " >" file -Format and print on -.IR file . -.TP -.BI system( cmd-line ) -Execute the command -.IR cmd-line , -and return the exit status. -(This may not be available on non-\*(PX systems.) -.TP -\&\fBfflush(\fR[\fIfile\^\fR]\fB)\fR -Flush any buffers associated with the open output file or pipe -.IR file . -If -.I file -is missing, then standard output is flushed. -If -.I file -is the null string, -then all open output files and pipes -have their buffers flushed. -.PP -Additional output redirections are allowed for -.B print -and -.BR printf . -.TP -.BI "print .\|.\|. >>" " file" -appends output to the -.IR file . -.TP -.BI "print .\|.\|. |" " command" -writes on a pipe. -.TP -.BI "print .\|.\|. |&" " command" -sends data to a co-process. -.PP -The -.BR getline -command returns 0 on end of file and \-1 on an error. -Upon an error, -.B ERRNO -contains a string describing the problem. -.PP -.B NOTE: -If using a pipe or co-process to -.BR getline , -or from -.B print -or -.B printf -within a loop, you -.I must -use -.B close() -to create new instances of the command. -\*(AK does not automatically close pipes or co-processes when -they return EOF. -.SS The \fIprintf\fP\^ Statement -.PP -The \*(AK versions of the -.B printf -statement and -.B sprintf() -function -(see below) -accept the following conversion specification formats: -.TP "\w'\fB%g\fR, \fB%G\fR'u+2n" -.B %c -An \s-1ASCII\s+1 character. -If the argument used for -.B %c -is numeric, it is treated as a character and printed. -Otherwise, the argument is assumed to be a string, and the only first -character of that string is printed. -.TP -.BR "%d" "," " %i" -A decimal number (the integer part). -.TP -.B %e , " %E" -A floating point number of the form -.BR [\-]d.dddddde[+\^\-]dd . -The -.B %E -format uses -.B E -instead of -.BR e . -.TP -.B %f -A floating point number of the form -.BR [\-]ddd.dddddd . -.TP -.B %g , " %G" -Use -.B %e -or -.B %f -conversion, whichever is shorter, with nonsignificant zeros suppressed. -The -.B %G -format uses -.B %E -instead of -.BR %e . -.TP -.B %o -An unsigned octal number (also an integer). -.TP -.PD -.B %u -An unsigned decimal number (again, an integer). -.TP -.B %s -A character string. -.TP -.B %x , " %X" -An unsigned hexadecimal number (an integer). -The -.B %X -format uses -.B ABCDEF -instead of -.BR abcdef . -.TP -.B %% -A single -.B % -character; no argument is converted. -.PP -Optional, additional parameters may lie between the -.B % -and the control letter: -.TP -.IB count $ -Use the -.IR count "'th" -argument at this point in the formatting. -This is called a -.I "positional specifier" -and -is intended primarily for use in translated versions of -format strings, not in the original text of an AWK program. -It is a -.I gawk -extension. -.TP -.B \- -The expression should be left-justified within its field. -.TP -.I space -For numeric conversions, prefix positive values with a space, and -negative values with a minus sign. -.TP -.B + -The plus sign, used before the width modifier (see below), -says to always supply a sign for numeric conversions, even if the data -to be formatted is positive. The -.B + -overrides the space modifier. -.TP -.B # -Use an \*(lqalternate form\*(rq for certain control letters. -For -.BR %o , -supply a leading zero. -For -.BR %x , -and -.BR %X , -supply a leading -.BR 0x -or -.BR 0X -for -a nonzero result. -For -.BR %e , -.BR %E , -and -.BR %f , -the result always contains a -decimal point. -For -.BR %g , -and -.BR %G , -trailing zeros are not removed from the result. -.TP -.B 0 -A leading -.B 0 -(zero) acts as a flag, that indicates output should be -padded with zeroes instead of spaces. -This applies even to non-numeric output formats. -This flag only has an effect when the field width is wider than the -value to be printed. -.TP -.I width -The field should be padded to this width. The field is normally padded -with spaces. If the -.B 0 -flag has been used, it is padded with zeroes. -.TP -.BI \&. prec -A number that specifies the precision to use when printing. -For the -.BR %e , -.BR %E , -and -.BR %f -formats, this specifies the -number of digits you want printed to the right of the decimal point. -For the -.BR %g , -and -.B %G -formats, it specifies the maximum number -of significant digits. For the -.BR %d , -.BR %o , -.BR %i , -.BR %u , -.BR %x , -and -.B %X -formats, it specifies the minimum number of -digits to print. For -.BR %s , -it specifies the maximum number of -characters from the string that should be printed. -.PP -The dynamic -.I width -and -.I prec -capabilities of the \*(AN C -.B printf() -routines are supported. -A -.B * -in place of either the -.B width -or -.B prec -specifications causes their values to be taken from -the argument list to -.B printf -or -.BR sprintf() . -To use a positional specifier with a dynamic width or precision, -supply the -.IB count $ -after the -.B * -in the format string. -For example, \fB"%3$*2$.*1$s"\fP. -.SS Special File Names -.PP -When doing I/O redirection from either -.B print -or -.B printf -into a file, -or via -.B getline -from a file, -.I gawk -recognizes certain special filenames internally. These filenames -allow access to open file descriptors inherited from -.IR gawk\^ "'s" -parent process (usually the shell). -These file names may also be used on the command line to name data files. -The filenames are: -.TP "\w'\fB/dev/stdout\fR'u+1n" -.B /dev/stdin -The standard input. -.TP -.B /dev/stdout -The standard output. -.TP -.B /dev/stderr -The standard error output. -.TP -.BI /dev/fd/\^ n -The file associated with the open file descriptor -.IR n . -.PP -These are particularly useful for error messages. For example: -.PP -.RS -.ft B -print "You blew it!" > "/dev/stderr" -.ft R -.RE -.PP -whereas you would otherwise have to use -.PP -.RS -.ft B -print "You blew it!" | "cat 1>&2" -.ft R -.RE -.PP -The following special filenames may be used with the -.B |& -co-process operator for creating TCP/IP network connections. -.TP "\w'\fB/inet/tcp/\fIlport\fB/\fIrhost\fB/\fIrport\fR'u+2n" -.BI /inet/tcp/ lport / rhost / rport -File for TCP/IP connection on local port -.I lport -to -remote host -.I rhost -on remote port -.IR rport . -Use a port of -.B 0 -to have the system pick a port. -.TP -.BI /inet/udp/ lport / rhost / rport -Similar, but use UDP/IP instead of TCP/IP. -.TP -.BI /inet/raw/ lport / rhost / rport -.\" Similar, but use raw IP sockets. -Reserved for future use. -.PP -Other special filenames provide access to information about the running -.I gawk -process. -.B "These filenames are now obsolete." -Use the -.B PROCINFO -array to obtain the information they provide. -The filenames are: -.TP "\w'\fB/dev/stdout\fR'u+1n" -.B /dev/pid -Reading this file returns the process ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/ppid -Reading this file returns the parent process ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/pgrpid -Reading this file returns the process group ID of the current process, -in decimal, terminated with a newline. -.TP -.B /dev/user -Reading this file returns a single record terminated with a newline. -The fields are separated with spaces. -.B $1 -is the value of the -.IR getuid (2) -system call, -.B $2 -is the value of the -.IR geteuid (2) -system call, -.B $3 -is the value of the -.IR getgid (2) -system call, and -.B $4 -is the value of the -.IR getegid (2) -system call. -If there are any additional fields, they are the group IDs returned by -.IR getgroups (2). -Multiple groups may not be supported on all systems. -.SS Numeric Functions -.PP -\*(AK has the following built-in arithmetic functions: -.PP -.TP "\w'\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR'u+1n" -.BI atan2( y , " x" ) -Returns the arctangent of -.I y/x -in radians. -.TP -.BI cos( expr ) -Returns the cosine of -.IR expr , -which is in radians. -.TP -.BI exp( expr ) -The exponential function. -.TP -.BI int( expr ) -Truncates to integer. -.TP -.BI log( expr ) -The natural logarithm function. -.TP -.B rand() -Returns a random number between 0 and 1. -.TP -.BI sin( expr ) -Returns the sine of -.IR expr , -which is in radians. -.TP -.BI sqrt( expr ) -The square root function. -.TP -\&\fBsrand(\fR[\fIexpr\^\fR]\fB)\fR -Uses -.I expr -as a new seed for the random number generator. If no -.I expr -is provided, the time of day is used. -The return value is the previous seed for the random -number generator. -.SS String Functions -.PP -.I Gawk -has the following built-in string functions: -.PP -.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n" -\fBasort(\fIs \fR[\fB, \fId\fR]\fB)\fR -Returns the number of elements in the source -array -.IR s . -The contents of -.I s -are sorted using -.IR gawk\^ "'s" -normal rules for -comparing values, and the indexes of the -sorted values of -.I s -are replaced with sequential -integers starting with 1. If the optional -destination array -.I d -is specified, then -.I s -is first duplicated into -.IR d , -and then -.I d -is sorted, leaving the indexes of the -source array -.I s -unchanged. -.TP -\fBgensub(\fIr\fB, \fIs\fB, \fIh \fR[\fB, \fIt\fR]\fB)\fR -Search the target string -.I t -for matches of the regular expression -.IR r . -If -.I h -is a string beginning with -.B g -or -.BR G , -then replace all matches of -.I r -with -.IR s . -Otherwise, -.I h -is a number indicating which match of -.I r -to replace. -If -.I t -is not supplied, -.B $0 -is used instead. -Within the replacement text -.IR s , -the sequence -.BI \e n\fR, -where -.I n -is a digit from 1 to 9, may be used to indicate just the text that -matched the -.IR n 'th -parenthesized subexpression. The sequence -.B \e0 -represents the entire matched text, as does the character -.BR & . -Unlike -.B sub() -and -.BR gsub() , -the modified string is returned as the result of the function, -and the original target string is -.I not -changed. -.TP "\w'\fBsprintf(\^\fIfmt\fB\^, \fIexpr-list\^\fB)\fR'u+1n" -\fBgsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR -For each substring matching the regular expression -.I r -in the string -.IR t , -substitute the string -.IR s , -and return the number of substitutions. -If -.I t -is not supplied, use -.BR $0 . -An -.B & -in the replacement text is replaced with the text that was actually matched. -Use -.B \e& -to get a literal -.BR & . -(This must be typed as \fB"\e\e&"\fP; -see \*(EP -for a fuller discussion of the rules for -.BR &'s -and backslashes in the replacement text of -.BR sub() , -.BR gsub() , -and -.BR gensub() .) -.TP -.BI index( s , " t" ) -Returns the index of the string -.I t -in the string -.IR s , -or 0 if -.I t -is not present. -.TP -\fBlength(\fR[\fIs\fR]\fB) -Returns the length of the string -.IR s , -or the length of -.B $0 -if -.I s -is not supplied. -.TP -\fBmatch(\fIs\fB, \fIr \fR[\fB, \fIa\fR]\fB)\fR -Returns the position in -.I s -where the regular expression -.I r -occurs, or 0 if -.I r -is not present, and sets the values of -.B RSTART -and -.BR RLENGTH . -Note that the argument order is the same as for the -.B ~ -operator: -.IB str " ~" -.IR re . -.ft R -If array -.I a -is provided, -.I a -is cleared and then elements 1 through -.I n -are filled with the portions of -.I s -that match the corresponding parenthesized -subexpression in -.IR r . -The 0'th element of -.I a -contains the portion -of -.I s -matched by the entire regular expression -.IR r . -.TP -\fBsplit(\fIs\fB, \fIa \fR[\fB, \fIr\fR]\fB)\fR -Splits the string -.I s -into the array -.I a -on the regular expression -.IR r , -and returns the number of fields. If -.I r -is omitted, -.B FS -is used instead. -The array -.I a -is cleared first. -Splitting behaves identically to field splitting, described above. -.TP -.BI sprintf( fmt , " expr-list" ) -Prints -.I expr-list -according to -.IR fmt , -and returns the resulting string. -.TP -.BI strtonum( str ) -Examines -.IR str , -and returns its numeric value. -If -.I str -begins -with a leading -.BR 0 , -.B strtonum() -assumes that -.I str -is an octal number. -If -.I str -begins -with a leading -.B 0x -or -.BR 0X , -.B strtonum() -assumes that -.I str -is a hexadecimal number. -.TP -\fBsub(\fIr\fB, \fIs \fR[\fB, \fIt\fR]\fB)\fR -Just like -.BR gsub() , -but only the first matching substring is replaced. -.TP -\fBsubstr(\fIs\fB, \fIi \fR[\fB, \fIn\fR]\fB)\fR -Returns the at most -.IR n -character -substring of -.I s -starting at -.IR i . -If -.I n -is omitted, the rest of -.I s -is used. -.TP -.BI tolower( str ) -Returns a copy of the string -.IR str , -with all the upper-case characters in -.I str -translated to their corresponding lower-case counterparts. -Non-alphabetic characters are left unchanged. -.TP -.BI toupper( str ) -Returns a copy of the string -.IR str , -with all the lower-case characters in -.I str -translated to their corresponding upper-case counterparts. -Non-alphabetic characters are left unchanged. -.SS Time Functions -Since one of the primary uses of \*(AK programs is processing log files -that contain time stamp information, -.I gawk -provides the following functions for obtaining time stamps and -formatting them. -.PP -.TP "\w'\fBsystime()\fR'u+1n" -\fBmktime(\fIdatespec\fB)\fR -Rurns -.I datespec -into a time stamp of the same form as returned by -.BR systime() . -The -.I datespec -is a string of the form -.IR "YYYY MM DD HH MM SS[ DST]" . -The contents of the string are six or seven numbers representing respectively -the full year including century, -the month from 1 to 12, -the day of the month from 1 to 31, -the hour of the day from 0 to 23, -the minute from 0 to 59, -and the second from 0 to 60, -and an optional daylight saving flag. -The values of these numbers need not be within the ranges specified; -for example, an hour of \-1 means 1 hour before midnight. -The origin-zero Gregorian calendar is assumed, -with year 0 preceding year 1 and year \-1 preceding year 0. -The time is assumed to be in the local timezone. -If the daylight saving flag is positive, -the time is assumed to be daylight saving time; -if zero, the time is assumed to be standard time; -and if negative (the default), -.B mktime() -attempts to determine whether daylight saving time is in effect -for the specified time. -If -.I datespec -does not contain enough elements or if the resulting time -is out of range, -.B mktime() -returns \-1. -.TP -\fBstrftime(\fR[\fIformat \fR[\fB, \fItimestamp\fR]]\fB)\fR -Formats -.I timestamp -according to the specification in -.IR format. -The -.I timestamp -should be of the same form as returned by -.BR systime() . -If -.I timestamp -is missing, the current time of day is used. -If -.I format -is missing, a default format equivalent to the output of -.IR date (1) -is used. -See the specification for the -.B strftime() -function in \*(AN C for the format conversions that are -guaranteed to be available. -A public-domain version of -.IR strftime (3) -and a man page for it come with -.IR gawk ; -if that version was used to build -.IR gawk , -then all of the conversions described in that man page are available to -.IR gawk. -.TP -.B systime() -Returns the current time of day as the number of seconds since the Epoch -(1970-01-01 00:00:00 UTC on \*(PX systems). -.SS Bit Manipulations Functions -Starting with version 3.1 of -.IR gawk , -the following bit manipulation functions are available. -They work by converting double-precision floating point -values to -.B "unsigned long" -integers, doing the operation, and then converting the -result back to floating point. -The functions are: -.TP "\w'\fBrshift(\fIval\fB, \fIcount\fB)\fR'u+2n" -\fBand(\fIv1\fB, \fIv2\fB)\fR -Return the bitwise AND of the values provided by -.I v1 -and -.IR v2 . -.TP -\fBcompl(\fIval\fB)\fR -Return the bitwise complement of -.IR val . -.TP -\fBlshift(\fIval\fB, \fIcount\fB)\fR -Return the value of -.IR val , -shifted left by -.I count -bits. -.TP -\fBor(\fIv1\fB, \fIv2\fB)\fR -Return the bitwise OR of the values provided by -.I v1 -and -.IR v2 . -.TP -\fBrshift(\fIval\fB, \fIcount\fB)\fR -Return the value of -.IR val , -shifted right by -.I count -bits. -.TP -\fBxor(\fIv1\fB, \fIv2\fB)\fR -Return the bitwise XOR of the values provided by -.I v1 -and -.IR v2 . -.PP -.SS Internationalization Functions -Starting with version 3.1 of -.IR gawk , -the following functions may be used from within your AWK program for -translating strings at run-time. -For full details, see \*(EP. -.TP -\fBbindtextdomain(\fIdirectory \fR[\fB, \fIdomain\fR]\fB)\fR -Specifies the directory where -.I gawk -looks for the -.B \&.mo -files, in case they -will not or cannot be placed in the ``standard'' locations -(e.g., during testing). -It returns the directory where -.I domain -is ``bound.'' -.sp .5 -The default -.I domain -is the value of -.BR TEXTDOMAIN . -If -.I directory -is the null string (\fB""\fR), then -.B bindtextdomain() -returns the current binding for the -given -.IR domain . -.TP -\fBdcgettext(\fIstring \fR[\fB, \fIdomain \fR[\fB, \fIcategory\fR]]\fB)\fR -Returns the translation of -.I string -in -text domain -.I domain -for locale category -.IR category . -The default value for -.I domain -is the current value of -.BR TEXTDOMAIN . -The default value for -.I category -is \fB"LC_MESSAGES"\fR. -.sp .5 -If you supply a value for -.IR category , -it must be a string equal to -one of the known locale categories described -in \*(EP. -You must also supply a text domain. Use -.B TEXTDOMAIN -if you want to use the current domain. -.SH USER-DEFINED FUNCTIONS -Functions in \*(AK are defined as follows: -.PP -.RS -\fBfunction \fIname\fB(\fIparameter list\fB) { \fIstatements \fB}\fR -.RE -.PP -Functions are executed when they are called from within expressions -in either patterns or actions. Actual parameters supplied in the function -call are used to instantiate the formal parameters declared in the function. -Arrays are passed by reference, other variables are passed by value. -.PP -Since functions were not originally part of the \*(AK language, the provision -for local variables is rather clumsy: They are declared as extra parameters -in the parameter list. The convention is to separate local variables from -real parameters by extra spaces in the parameter list. For example: -.PP -.RS -.ft B -.nf -function f(p, q, a, b) # a and b are local -{ - \&.\|.\|. -} - -/abc/ { .\|.\|. ; f(1, 2) ; .\|.\|. } -.fi -.ft R -.RE -.PP -The left parenthesis in a function call is required -to immediately follow the function name, -without any intervening white space. -This is to avoid a syntactic ambiguity with the concatenation operator. -This restriction does not apply to the built-in functions listed above. -.PP -Functions may call each other and may be recursive. -Function parameters used as local variables are initialized -to the null string and the number zero upon function invocation. -.PP -Use -.BI return " expr" -to return a value from a function. The return value is undefined if no -value is provided, or if the function returns by \*(lqfalling off\*(rq the -end. -.PP -If -.B \-\^\-lint -has been provided, -.I gawk -warns about calls to undefined functions at parse time, -instead of at run time. -Calling an undefined function at run time is a fatal error. -.PP -The word -.B func -may be used in place of -.BR function . -.SH DYNAMICALLY LOADING NEW FUNCTIONS -Beginning with version 3.1 of -.IR gawk , -you can dynamically add new built-in functions to the running -.I gawk -interpreter. -The full details are beyond the scope of this manual page; -see \*(EP for the details. -.PP -.TP 8 -\fBextension(\fIobject\fB, \fIfunction\fB)\fR -Dynamically link the shared object file named by -.IR object , -and invoke -.I function -in that object, to perform initialization. -These should both be provided as strings. -Returns the value returned by -.IR function . -.PP -.ft B -This function is provided and documented in \*(EP, -but everything about this feature is likely to change -in the next release. -We STRONGLY recommend that you do not use this feature -for anything that you aren't willing to redo. -.ft R -.SH SIGNALS -.I pgawk -accepts two signals. -.B SIGUSR1 -causes it to dump a profile and function call stack to the -profile file, which is either -.BR awkprof.out , -or whatever file was named with the -.B \-\^\-profile -option. It then continues to run. -.B SIGHUP -causes it to dump the profile and function call stack and then exit. -.SH EXAMPLES -.nf -Print and sort the login names of all users: - -.ft B - BEGIN { FS = ":" } - { print $1 | "sort" } - -.ft R -Count lines in a file: - -.ft B - { nlines++ } - END { print nlines } - -.ft R -Precede each line by its number in the file: - -.ft B - { print FNR, $0 } - -.ft R -Concatenate and line number (a variation on a theme): - -.ft B - { print NR, $0 } -.ft R -.fi -.SH INTERNATIONALIZATION -.PP -String constants are sequences of characters enclosed in double -quotes. In non-English speaking environments, it is possible to mark -strings in the \*(AK program as requiring translation to the native -natural language. Such strings are marked in the \*(AK program with -a leading underscore (\*(lq_\*(rq). For example, -.sp -.RS -.ft B -gawk 'BEGIN { print "hello, world" }' -.RE -.sp -.ft R -always prints -.BR "hello, world" . -But, -.sp -.RS -.ft B -gawk 'BEGIN { print _"hello, world" }' -.RE -.sp -.ft R -might print -.B "bonjour, monde" -in France. -.PP -There are several steps involved in producing and running a localizable -\*(AK program. -.TP "\w'4.'u+2n" -1. -Add a -.B BEGIN -action to assign a value to the -.B TEXTDOMAIN -variable to set the text domain to a name associated with your program. -.sp -.ti +5n -.ft B -BEGIN { TEXTDOMAIN = "myprog" } -.ft R -.sp -This allows -.I gawk -to find the -.B \&.mo -file associated with your program. -Without this step, -.I gawk -uses the -.B messages -text domain, -which likely does not contain translations for your program. -.TP -2. -Mark all strings that should be translated with leading underscores. -.TP -3. -If necessary, use the -.B dcgettext() -and/or -.B bindtextdomain() -functions in your program, as appropriate. -.TP -4. -Run -.B "gawk \-\^\-gen\-po \-f myprog.awk > myprog.po" -to generate a -.B \&.po -file for your program. -.TP -5. -Provide appropriate translations, and build and install a corresponding -.B \&.mo -file. -.PP -The internationalization features are described in full detail in \*(EP. -.SH POSIX COMPATIBILITY -A primary goal for -.I gawk -is compatibility with the \*(PX standard, as well as with the -latest version of \*(UX -.IR awk . -To this end, -.I gawk -incorporates the following user visible -features which are not described in the \*(AK book, -but are part of the Bell Laboratories version of -.IR awk , -and are in the \*(PX standard. -.PP -The book indicates that command line variable assignment happens when -.I awk -would otherwise open the argument as a file, which is after the -.B BEGIN -block is executed. However, in earlier implementations, when such an -assignment appeared before any file names, the assignment would happen -.I before -the -.B BEGIN -block was run. Applications came to depend on this \*(lqfeature.\*(rq -When -.I awk -was changed to match its documentation, the -.B \-v -option for assigning variables before program execution was added to -accommodate applications that depended upon the old behavior. -(This feature was agreed upon by both the Bell Laboratories and the \*(GN developers.) -.PP -The -.B \-W -option for implementation specific features is from the \*(PX standard. -.PP -When processing arguments, -.I gawk -uses the special option \*(lq\-\^\-\*(rq to signal the end of -arguments. -In compatibility mode, it warns about but otherwise ignores -undefined options. -In normal operation, such arguments are passed on to the \*(AK program for -it to process. -.PP -The \*(AK book does not define the return value of -.BR srand() . -The \*(PX standard -has it return the seed it was using, to allow keeping track -of random number sequences. Therefore -.B srand() -in -.I gawk -also returns its current seed. -.PP -Other new features are: -The use of multiple -.B \-f -options (from MKS -.IR awk ); -the -.B ENVIRON -array; the -.BR \ea , -and -.BR \ev -escape sequences (done originally in -.I gawk -and fed back into the Bell Laboratories version); the -.B tolower() -and -.B toupper() -built-in functions (from the Bell Laboratories version); and the \*(AN C conversion specifications in -.B printf -(done first in the Bell Laboratories version). -.SH HISTORICAL FEATURES -There are two features of historical \*(AK implementations that -.I gawk -supports. -First, it is possible to call the -.B length() -built-in function not only with no argument, but even without parentheses! -Thus, -.RS -.PP -.ft B -a = length # Holy Algol 60, Batman! -.ft R -.RE -.PP -is the same as either of -.RS -.PP -.ft B -a = length() -.br -a = length($0) -.ft R -.RE -.PP -This feature is marked as \*(lqdeprecated\*(rq in the \*(PX standard, and -.I gawk -issues a warning about its use if -.B \-\^\-lint -is specified on the command line. -.PP -The other feature is the use of either the -.B continue -or the -.B break -statements outside the body of a -.BR while , -.BR for , -or -.B do -loop. Traditional \*(AK implementations have treated such usage as -equivalent to the -.B next -statement. -.I Gawk -supports this usage if -.B \-\^\-traditional -has been specified. -.SH GNU EXTENSIONS -.I Gawk -has a number of extensions to \*(PX -.IR awk . -They are described in this section. All the extensions described here -can be disabled by -invoking -.I gawk -with the -.B \-\^\-traditional -option. -.PP -The following features of -.I gawk -are not available in -\*(PX -.IR awk . -.\" Environment vars and startup stuff -.TP "\w'\(bu'u+1n" -\(bu -No path search is performed for files named via the -.B \-f -option. Therefore the -.B AWKPATH -environment variable is not special. -.\" POSIX and language recognition issues -.TP -\(bu -The -.B \ex -escape sequence. -(Disabled with -.BR \-\^\-posix .) -.TP -\(bu -The -.B fflush() -function. -(Disabled with -.BR \-\^\-posix .) -.TP -\(bu -The ability to continue lines after -.B ? -and -.BR : . -(Disabled with -.BR \-\^\-posix .) -.TP -\(bu -Octal and hexadecimal constants in AWK programs. -.\" Special variables -.TP -\(bu -The -.BR ARGIND , -.BR BINMODE , -.BR ERRNO , -.BR LINT , -.B RT -and -.B TEXTDOMAIN -variables are not special. -.TP -\(bu -The -.B IGNORECASE -variable and its side-effects are not available. -.TP -\(bu -The -.B FIELDWIDTHS -variable and fixed-width field splitting. -.TP -\(bu -The -.B PROCINFO -array is not available. -.\" I/O stuff -.TP -\(bu -The use of -.B RS -as a regular expression. -.TP -\(bu -The special file names available for I/O redirection are not recognized. -.TP -\(bu -The -.B |& -operator for creating co-processes. -.\" Changes to standard awk functions -.TP -\(bu -The ability to split out individual characters using the null string -as the value of -.BR FS , -and as the third argument to -.BR split() . -.TP -\(bu -The optional second argument to the -.B close() -function. -.TP -\(bu -The optional third argument to the -.B match() -function. -.TP -\(bu -The ability to use positional specifiers with -.B printf -and -.BR sprintf() . -.\" New keywords or changes to keywords -.TP -\(bu -The use of -.BI delete " array" -to delete the entire contents of an array. -.TP -\(bu -The use of -.B "nextfile" -to abandon processing of the current input file. -.\" New functions -.TP -\(bu -The -.BR and() , -.BR asort() , -.BR bindtextdomain() , -.BR compl() , -.BR dcgettext() , -.BR gensub() , -.BR lshift() , -.BR mktime() , -.BR or() , -.BR rshift() , -.BR strftime() , -.BR strtonum() , -.B systime() -and -.B xor() -functions. -.\" I18N stuff -.TP -\(bu -Localizable strings. -.\" Extending gawk -.TP -\(bu -Adding new built-in functions dynamically with the -.B extension() -function. -.PP -The \*(AK book does not define the return value of the -.B close() -function. -.IR Gawk\^ "'s" -.B close() -returns the value from -.IR fclose (3), -or -.IR pclose (3), -when closing an output file or pipe, respectively. -It returns the process's exit status when closing an input pipe. -The return value is \-1 if the named file, pipe -or co-process was not opened with a redirection. -.PP -When -.I gawk -is invoked with the -.B \-\^\-traditional -option, -if the -.I fs -argument to the -.B \-F -option is \*(lqt\*(rq, then -.B FS -is set to the tab character. -Note that typing -.B "gawk \-F\et \&.\|.\|." -simply causes the shell to quote the \*(lqt,\*(rq, and does not pass -\*(lq\et\*(rq to the -.B \-F -option. -Since this is a rather ugly special case, it is not the default behavior. -This behavior also does not occur if -.B \-\^\-posix -has been specified. -To really get a tab character as the field separator, it is best to use -single quotes: -.BR "gawk \-F'\et' \&.\|.\|." . -.ig -.PP -If -.I gawk -was compiled for debugging, it -accepts the following additional options: -.TP -.PD 0 -.B \-Wparsedebug -.TP -.PD -.B \-\^\-parsedebug -Turn on -.IR yacc (1) -or -.IR bison (1) -debugging output during program parsing. -This option should only be of interest to the -.I gawk -maintainers, and may not even be compiled into -.IR gawk . -.. -.SH ENVIRONMENT VARIABLES -The -.B AWKPATH -environment variable can be used to provide a list of directories that -.I gawk -searches when looking for files named via the -.B \-f -and -.B \-\^\-file -options. -.PP -If -.B POSIXLY_CORRECT -exists in the environment, then -.I gawk -behaves exactly as if -.B \-\^\-posix -had been specified on the command line. -If -.B \-\^\-lint -has been specified, -.I gawk -issues a warning message to this effect. -.SH SEE ALSO -.IR egrep (1), -.IR getpid (2), -.IR getppid (2), -.IR getpgrp (2), -.IR getuid (2), -.IR geteuid (2), -.IR getgid (2), -.IR getegid (2), -.IR getgroups (2) -.PP -.IR "The AWK Programming Language" , -Alfred V. Aho, Brian W. Kernighan, Peter J. Weinberger, -Addison-Wesley, 1988. ISBN 0-201-07981-X. -.PP -\*(EP, -Edition 3.0, published by the Free Software Foundation, 2001. -.SH BUGS -The -.B \-F -option is not necessary given the command line variable assignment feature; -it remains only for backwards compatibility. -.PP -Syntactically invalid single character programs tend to overflow -the parse stack, generating a rather unhelpful message. Such programs -are surprisingly difficult to diagnose in the completely general case, -and the effort to do so really is not worth it. -.ig -.PP -.I Gawk -suffers from ``feeping creaturism.'' -It's too bad -.I perl -is so inelegant. -.. -.SH AUTHORS -The original version of \*(UX -.I awk -was designed and implemented by Alfred Aho, -Peter Weinberger, and Brian Kernighan of Bell Laboratories. Brian Kernighan -continues to maintain and enhance it. -.PP -Paul Rubin and Jay Fenlason, -of the Free Software Foundation, wrote -.IR gawk , -to be compatible with the original version of -.I awk -distributed in Seventh Edition \*(UX. -John Woods contributed a number of bug fixes. -David Trueman, with contributions -from Arnold Robbins, made -.I gawk -compatible with the new version of \*(UX -.IR awk . -Arnold Robbins is the current maintainer. -.PP -The initial DOS port was done by Conrad Kwok and Scott Garfinkle. -Scott Deifik is the current DOS maintainer. Pat Rankin did the -port to VMS, and Michal Jaegermann did the port to the Atari ST. -The port to OS/2 was done by Kai Uwe Rommel, with contributions and -help from Darrel Hankerson. Fred Fish supplied support for the Amiga, -Stephen Davies provided the Tandem port, -and Martin Brown provided the BeOS port. -.SH VERSION INFORMATION -This man page documents -.IR gawk , -version 3.1.0. -.SH BUG REPORTS -If you find a bug in -.IR gawk , -please send electronic mail to -.BR bug-gawk@gnu.org . -Please include your operating system and its revision, the version of -.I gawk -(from -.BR "gawk \-\^\-version" ), -what C compiler you used to compile it, and a test program -and data that are as small as possible for reproducing the problem. -.PP -Before sending a bug report, please do two things. First, verify that -you have the latest version of -.IR gawk . -Many bugs (usually subtle ones) are fixed at each release, and if -yours is out of date, the problem may already have been solved. -Second, please read this man page and the reference manual carefully to -be sure that what you think is a bug really is, instead of just a quirk -in the language. -.PP -Whatever you do, do -.B NOT -post a bug report in -.BR comp.lang.awk . -While the -.I gawk -developers occasionally read this newsgroup, posting bug reports there -is an unreliable way to report bugs. Instead, please use the electronic mail -addresses given above. -.SH ACKNOWLEDGEMENTS -Brian Kernighan of Bell Laboratories -provided valuable assistance during testing and debugging. -We thank him. -.SH COPYING PERMISSIONS -Copyright \(co 1989, 1991\-2001 Free Software Foundation, Inc. -.PP -Permission is granted to make and distribute verbatim copies of -this manual page provided the copyright notice and this permission -notice are preserved on all copies. -.ig -Permission is granted to process this file through troff and print the -results, provided the printed document carries copying permission -notice identical to this one except for the removal of this paragraph -(this paragraph not being relevant to the printed manual page). -.. -.PP -Permission is granted to copy and distribute modified versions of this -manual page under the conditions for verbatim copying, provided that -the entire resulting derived work is distributed under the terms of a -permission notice identical to this one. -.PP -Permission is granted to copy and distribute translations of this -manual page into another language, under the above conditions for -modified versions, except that this permission notice may be stated in -a translation approved by the Foundation. diff --git a/contrib/awk/doc/gawk.texi b/contrib/awk/doc/gawk.texi deleted file mode 100644 index 808ef6e..0000000 --- a/contrib/awk/doc/gawk.texi +++ /dev/null @@ -1,26169 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header (This is for running Texinfo on a region.) -@setfilename gawk.info -@settitle The GNU Awk User's Guide -@c %**end of header (This is for running Texinfo on a region.) - -@dircategory GNU Packages -@direntry -* Gawk: (gawk). A text scanning and processing language. -@end direntry -@dircategory Individual utilities -@direntry -* awk: (gawk)Invoking gawk. Text scanning and processing. -@end direntry - -@c @set xref-automatic-section-title - -@c The following information should be updated here only! -@c This sets the edition of the document, the version of gawk it -@c applies to and all the info about who's publishing this edition - -@c These apply across the board. -@set UPDATE-MONTH March, 2001 -@set VERSION 3.1 -@set PATCHLEVEL 0 - -@set FSF - -@set TITLE GAWK: Effective AWK Programming -@set SUBTITLE A User's Guide for GNU Awk -@set EDITION 3 - -@iftex -@set DOCUMENT book -@set CHAPTER chapter -@set APPENDIX appendix -@set SECTION section -@set SUBSECTION subsection -@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}} -@end iftex -@ifinfo -@set DOCUMENT Info file -@set CHAPTER major node -@set APPENDIX major node -@set SECTION minor node -@set SUBSECTION node -@set DARKCORNER (d.c.) -@end ifinfo -@ifhtml -@set DOCUMENT Web page -@set CHAPTER chapter -@set APPENDIX appendix -@set SECTION section -@set SUBSECTION subsection -@set DARKCORNER (d.c.) -@end ifhtml - -@c some special symbols -@iftex -@set LEQ @math{@leq} -@end iftex -@ifnottex -@set LEQ <= -@end ifnottex - -@set FN file name -@set FFN File Name -@set DF data file -@set DDF Data File -@set PVERSION version - -@ignore -Some comments on the layout for TeX. -1. Use at least texinfo.tex 2000-09-06.09 -2. I have done A LOT of work to make this look good. There are `@page' commands - and use of `@group ... @end group' in a number of places. If you muck - with anything, it's your responsibility not to break the layout. -@end ignore - -@c merge the function and variable indexes into the concept index -@ifinfo -@synindex fn cp -@synindex vr cp -@end ifinfo -@iftex -@syncodeindex fn cp -@syncodeindex vr cp -@end iftex - -@c If "finalout" is commented out, the printed output will show -@c black boxes that mark lines that are too long. Thus, it is -@c unwise to comment it out when running a master in case there are -@c overfulls which are deemed okay. - -@iftex -@finalout -@end iftex - -@c Comment out the "smallbook" for technical review. Saves -@c considerable paper. Remember to turn it back on *before* -@c starting the page-breaking work. -@smallbook - -@ifinfo -This file documents @command{awk}, a program that you can use to select -particular records in a file and perform operations upon them. - -This is Edition @value{EDITION} of @cite{@value{TITLE}: @value{SUBTITLE}}, -for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation of AWK. - -Copyright (C) 1989, 1991, 1992, 1993, 1996-2001 Free Software Foundation, Inc. - -Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.1 or -any later version published by the Free Software Foundation; with the -Invariant Sections being ``GNU General Public License'', the Front-Cover -texts being (a) (see below), and with the Back-Cover Texts being (b) -(see below). A copy of the license is included in the section entitled -``GNU Free Documentation License''. - -@enumerate a -@item -``A GNU Manual'' - -@item -``You have freedom to copy and modify this GNU Manual, like GNU -software. Copies published by the Free Software Foundation raise -funds for GNU development.'' -@end enumerate -@end ifinfo - -@c Uncomment this for the release. Leaving it off saves paper -@c during editing and review. -@setchapternewpage odd - -@titlepage -@title @value{TITLE} -@subtitle @value{SUBTITLE} -@subtitle Edition @value{EDITION} -@subtitle @value{UPDATE-MONTH} -@author Arnold D. Robbins - -@c Include the Distribution inside the titlepage environment so -@c that headings are turned off. Headings on and off do not work. - -@page -@vskip 0pt plus 1filll -@ignore -The programs and applications presented in this book have been -included for their instructional value. They have been tested with care -but are not guaranteed for any particular purpose. The publisher does not -offer any warranties or representations, nor does it accept any -liabilities with respect to the programs or applications. -So there. -@sp 2 -UNIX is a registered trademark of The Open Group in the United States and other countries. @* -Microsoft, MS and MS-DOS are registered trademarks, and Windows is a -trademark of Microsoft Corporation in the United States and other -countries. @* -Atari, 520ST, 1040ST, TT, STE, Mega and Falcon are registered trademarks -or trademarks of Atari Corporation. @* -DEC, Digital, OpenVMS, ULTRIX and VMS are trademarks of Digital Equipment -Corporation. @* -@end ignore -``To boldly go where no man has gone before'' is a -Registered Trademark of Paramount Pictures Corporation. @* -@c sorry, i couldn't resist -@sp 3 -Copyright @copyright{} 1989, 1991, 1992, 1993, 1996-2001 Free Software Foundation, Inc. -@sp 2 - -This is Edition @value{EDITION} of @cite{@value{TITLE}: @value{SUBTITLE}}, -for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU -implementation of AWK. - -@sp 2 -Published by: -@sp 1 - -Free Software Foundation @* -59 Temple Place --- Suite 330 @* -Boston, MA 02111-1307 USA @* -Phone: +1-617-542-5942 @* -Fax: +1-617-542-2652 @* -Email: @email{gnu@@gnu.org} @* -URL: @uref{http://www.gnu.org/} @* - -@c This one is correct for gawk 3.1.0 from the FSF -ISBN 1-882114-28-0 @* - -Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.1 or -any later version published by the Free Software Foundation; with the -Invariant Sections being ``GNU General Public License'', the Front-Cover -texts being (a) (see below), and with the Back-Cover Texts being (b) -(see below). A copy of the license is included in the section entitled -``GNU Free Documentation License''. - -@enumerate a -@item -``A GNU Manual'' - -@item -``You have freedom to copy and modify this GNU Manual, like GNU -software. Copies published by the Free Software Foundation raise -funds for GNU development.'' -@end enumerate -@sp 2 -Cover art by Etienne Suvasa. -@end titlepage - -@c Thanks to Bob Chassell for directions on doing dedications. -@iftex -@headings off -@page -@w{ } -@sp 9 -@center @i{To Miriam, for making me complete.} -@sp 1 -@center @i{To Chana, for the joy you bring us.} -@sp 1 -@center @i{To Rivka, for the exponential increase.} -@sp 1 -@center @i{To Nachum, for the added dimension.} -@sp 1 -@center @i{To Malka, for the new beginning.} -@w{ } -@page -@w{ } -@page -@headings on -@end iftex - -@iftex -@headings off -@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @| -@oddheading @| @| @strong{@thischapter}@ @ @ @thispage -@end iftex - -@ifinfo -@node Top, Foreword, (dir), (dir) -@top General Introduction -@c Preface node should come right after the Top -@c node, in `unnumbered' sections, then the chapter, `What is gawk'. -@c Licensing nodes are appendices, they're not central to AWK. - -This file documents @command{awk}, a program that you can use to select -particular records in a file and perform operations upon them. - -This is Edition @value{EDITION} of @cite{@value{TITLE}: @value{SUBTITLE}}, -for the @value{VERSION}.@value{PATCHLEVEL} version of the GNU implementation -of AWK. - -@end ifinfo - -@menu -* Foreword:: Some nice words about this - @value{DOCUMENT}. -* Preface:: What this @value{DOCUMENT} is about; brief - history and acknowledgments. -* Getting Started:: A basic introduction to using - @command{awk}. How to run an @command{awk} - program. Command-line syntax. -* Regexp:: All about matching things using regular - expressions. -* Reading Files:: How to read files and manipulate fields. -* Printing:: How to print using @command{awk}. Describes - the @code{print} and @code{printf} - statements. Also describes redirection of - output. -* Expressions:: Expressions are the basic building blocks - of statements. -* Patterns and Actions:: Overviews of patterns and actions. -* Arrays:: The description and use of arrays. Also - includes array-oriented control statements. -* Functions:: Built-in and user-defined functions. -* Internationalization:: Getting @command{gawk} to speak your - language. -* Advanced Features:: Stuff for advanced users, specific to - @command{gawk}. -* Invoking Gawk:: How to run @command{gawk}. -* Library Functions:: A Library of @command{awk} Functions. -* Sample Programs:: Many @command{awk} programs with complete - explanations. -* Language History:: The evolution of the @command{awk} - language. -* Installation:: Installing @command{gawk} under various - operating systems. -* Notes:: Notes about @command{gawk} extensions and - possible future work. -* Basic Concepts:: A very quick intoduction to programming - concepts. -* Glossary:: An explanation of some unfamiliar terms. -* Copying:: Your right to copy and distribute - @command{gawk}. -* GNU Free Documentation License:: The license for this @value{DOCUMENT}. -* Index:: Concept and Variable Index. - -@detailmenu -* History:: The history of @command{gawk} and - @command{awk}. -* Names:: What name to use to find @command{awk}. -* This Manual:: Using this @value{DOCUMENT}. Includes - sample input files that you can use. -* Conventions:: Typographical Conventions. -* Manual History:: Brief history of the GNU project and this - @value{DOCUMENT}. -* How To Contribute:: Helping to save the world. -* Acknowledgments:: Acknowledgments. -* Running gawk:: How to run @command{gawk} programs; - includes command-line syntax. -* One-shot:: Running a short throw-away @command{awk} - program. -* Read Terminal:: Using no input files (input from terminal - instead). -* Long:: Putting permanent @command{awk} programs in - files. -* Executable Scripts:: Making self-contained @command{awk} - programs. -* Comments:: Adding documentation to @command{gawk} - programs. -* Quoting:: More discussion of shell quoting issues. -* Sample Data Files:: Sample data files for use in the - @command{awk} programs illustrated in this - @value{DOCUMENT}. -* Very Simple:: A very simple example. -* Two Rules:: A less simple one-line example using two - rules. -* More Complex:: A more complex example. -* Statements/Lines:: Subdividing or combining statements into - lines. -* Other Features:: Other Features of @command{awk}. -* When:: When to use @command{gawk} and when to use - other things. -* Regexp Usage:: How to Use Regular Expressions. -* Escape Sequences:: How to write non-printing characters. -* Regexp Operators:: Regular Expression Operators. -* Character Lists:: What can go between @samp{[...]}. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. -* Leftmost Longest:: How much text matches. -* Computed Regexps:: Using Dynamic Regexps. -* Records:: Controlling how data is split into records. -* Fields:: An introduction to fields. -* Non-Constant Fields:: Non-constant Field Numbers. -* Changing Fields:: Changing the Contents of a Field. -* Field Separators:: The field separator and how to change it. -* Regexp Field Splitting:: Using regexps as the field separator. -* Single Character Fields:: Making each character a separate field. -* Command Line Field Separator:: Setting @code{FS} from the command-line. -* Field Splitting Summary:: Some final points and a summary table. -* Constant Size:: Reading constant width data. -* Multiple Line:: Reading multi-line records. -* Getline:: Reading files under explicit program - control using the @code{getline} function. -* Plain Getline:: Using @code{getline} with no arguments. -* Getline/Variable:: Using @code{getline} into a variable. -* Getline/File:: Using @code{getline} from a file. -* Getline/Variable/File:: Using @code{getline} into a variable from a - file. -* Getline/Pipe:: Using @code{getline} from a pipe. -* Getline/Variable/Pipe:: Using @code{getline} into a variable from a - pipe. -* Getline/Coprocess:: Using @code{getline} from a coprocess. -* Getline/Variable/Coprocess:: Using @code{getline} into a variable from a - coprocess. -* Getline Notes:: Important things to know about - @code{getline}. -* Getline Summary:: Summary of @code{getline} Variants. -* Print:: The @code{print} statement. -* Print Examples:: Simple examples of @code{print} statements. -* Output Separators:: The output separators and how to change - them. -* OFMT:: Controlling Numeric Output With - @code{print}. -* Printf:: The @code{printf} statement. -* Basic Printf:: Syntax of the @code{printf} statement. -* Control Letters:: Format-control letters. -* Format Modifiers:: Format-specification modifiers. -* Printf Examples:: Several examples. -* Redirection:: How to redirect output to multiple files - and pipes. -* Special Files:: File name interpretation in @command{gawk}. - @command{gawk} allows access to inherited - file descriptors. -* Special FD:: Special files for I/O. -* Special Process:: Special files for process information. -* Special Network:: Special files for network communications. -* Special Caveats:: Things to watch out for. -* Close Files And Pipes:: Closing Input and Output Files and Pipes. -* Constants:: String, numeric and regexp constants. -* Scalar Constants:: Numeric and string constants. -* Non-decimal-numbers:: What are octal and hex numbers. -* Regexp Constants:: Regular Expression constants. -* Using Constant Regexps:: When and how to use a regexp constant. -* Variables:: Variables give names to values for later - use. -* Using Variables:: Using variables in your programs. -* Assignment Options:: Setting variables on the command-line and a - summary of command-line syntax. This is an - advanced method of input. -* Conversion:: The conversion of strings to numbers and - vice versa. -* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, - etc.) -* Concatenation:: Concatenating strings. -* Assignment Ops:: Changing the value of a variable or a - field. -* Increment Ops:: Incrementing the numeric value of a - variable. -* Truth Values:: What is ``true'' and what is ``false''. -* Typing and Comparison:: How variables acquire types and how this - affects comparison of numbers and strings - with @samp{<}, etc. -* Boolean Ops:: Combining comparison expressions using - boolean operators @samp{||} (``or''), - @samp{&&} (``and'') and @samp{!} (``not''). -* Conditional Exp:: Conditional expressions select between two - subexpressions under control of a third - subexpression. -* Function Calls:: A function call is an expression. -* Precedence:: How various operators nest. -* Pattern Overview:: What goes into a pattern. -* Regexp Patterns:: Using regexps as patterns. -* Expression Patterns:: Any expression can be used as a pattern. -* Ranges:: Pairs of patterns specify record ranges. -* BEGIN/END:: Specifying initialization and cleanup - rules. -* Using BEGIN/END:: How and why to use BEGIN/END rules. -* I/O And BEGIN/END:: I/O issues in BEGIN/END rules. -* Empty:: The empty pattern, which matches every - record. -* Using Shell Variables:: How to use shell variables with - @command{awk}. -* Action Overview:: What goes into an action. -* Statements:: Describes the various control statements in - detail. -* If Statement:: Conditionally execute some @command{awk} - statements. -* While Statement:: Loop until some condition is satisfied. -* Do Statement:: Do specified action while looping until - some condition is satisfied. -* For Statement:: Another looping statement, that provides - initialization and increment clauses. -* Break Statement:: Immediately exit the innermost enclosing - loop. -* Continue Statement:: Skip to the end of the innermost enclosing - loop. -* Next Statement:: Stop processing the current input record. -* Nextfile Statement:: Stop processing the current file. -* Exit Statement:: Stop execution of @command{awk}. -* Built-in Variables:: Summarizes the built-in variables. -* User-modified:: Built-in variables that you change to - control @command{awk}. -* Auto-set:: Built-in variables where @command{awk} - gives you information. -* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}. -* Array Intro:: Introduction to Arrays -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Array Example:: Basic Example of an Array -* Scanning an Array:: A variation of the @code{for} statement. It - loops through the indices of an array's - existing elements. -* Delete:: The @code{delete} statement removes an - element from an array. -* Numeric Array Subscripts:: How to use numbers as subscripts in - @command{awk}. -* Uninitialized Subscripts:: Using Uninitialized variables as - subscripts. -* Multi-dimensional:: Emulating multidimensional arrays in - @command{awk}. -* Multi-scanning:: Scanning multidimensional arrays. -* Array Sorting:: Sorting array values and indices. -* Built-in:: Summarizes the built-in functions. -* Calling Built-in:: How to call built-in functions. -* Numeric Functions:: Functions that work with numbers, including - @code{int}, @code{sin} and @code{rand}. -* String Functions:: Functions for string manipulation, such as - @code{split}, @code{match} and - @code{sprintf}. -* Gory Details:: More than you want to know about @samp{\} - and @samp{&} with @code{sub}, @code{gsub}, - and @code{gensub}. -* I/O Functions:: Functions for files and shell commands. -* Time Functions:: Functions for dealing with timestamps. -* Bitwise Functions:: Functions for bitwise operations. -* I18N Functions:: Functions for string translation. -* User-defined:: Describes User-defined functions in detail. -* Definition Syntax:: How to write definitions and what they - mean. -* Function Example:: An example function definition and what it - does. -* Function Caveats:: Things to watch out for. -* Return Statement:: Specifying the value a function returns. -* Dynamic Typing:: How variable types can change at runtime. -* I18N and L10N:: Internationalization and Localization. -* Explaining gettext:: How GNU @code{gettext} works. -* Programmer i18n:: Features for the programmer. -* Translator i18n:: Features for the translator. -* String Extraction:: Extracting marked strings. -* Printf Ordering:: Rearranging @code{printf} arguments. -* I18N Portability:: @command{awk}-level portability issues. -* I18N Example:: A simple i18n example. -* Gawk I18N:: @command{gawk} is also internationalized. -* Non-decimal Data:: Allowing non-decimal input data. -* Two-way I/O:: Two-way communications with another - process. -* TCP/IP Networking:: Using @command{gawk} for network - programming. -* Portal Files:: Using @command{gawk} with BSD portals. -* Profiling:: Profiling your @command{awk} programs. -* Command Line:: How to run @command{awk}. -* Options:: Command-line options and their meanings. -* Other Arguments:: Input file names and variable assignments. -* AWKPATH Variable:: Searching directories for @command{awk} - programs. -* Obsolete:: Obsolete Options and/or features. -* Undocumented:: Undocumented Options and Features. -* Known Bugs:: Known Bugs in @command{gawk}. -* Library Names:: How to best name private global variables - in library functions. -* General Functions:: Functions that are of general use. -* Nextfile Function:: Two implementations of a @code{nextfile} - function. -* Assert Function:: A function for assertions in @command{awk} - programs. -* Round Function:: A function for rounding if @code{sprintf} - does not do it correctly. -* Cliff Random Function:: The Cliff Random Number Generator. -* Ordinal Functions:: Functions for using characters as numbers - and vice versa. -* Join Function:: A function to join an array into a string. -* Gettimeofday Function:: A function to get formatted times. -* Data File Management:: Functions for managing command-line data - files. -* Filetrans Function:: A function for handling data file - transitions. -* Rewind Function:: A function for rereading the current file. -* File Checking:: Checking that data files are readable. -* Ignoring Assigns:: Treating assignments as file names. -* Getopt Function:: A function for processing command-line - arguments. -* Passwd Functions:: Functions for getting user information. -* Group Functions:: Functions for getting group information. -* Running Examples:: How to run these examples. -* Clones:: Clones of common utilities. -* Cut Program:: The @command{cut} utility. -* Egrep Program:: The @command{egrep} utility. -* Id Program:: The @command{id} utility. -* Split Program:: The @command{split} utility. -* Tee Program:: The @command{tee} utility. -* Uniq Program:: The @command{uniq} utility. -* Wc Program:: The @command{wc} utility. -* Miscellaneous Programs:: Some interesting @command{awk} programs. -* Dupword Program:: Finding duplicated words in a document. -* Alarm Program:: An alarm clock. -* Translate Program:: A program similar to the @command{tr} - utility. -* Labels Program:: Printing mailing labels. -* Word Sorting:: A program to produce a word usage count. -* History Sorting:: Eliminating duplicate entries from a - history file. -* Extract Program:: Pulling out programs from Texinfo source - files. -* Simple Sed:: A Simple Stream Editor. -* Igawk Program:: A wrapper for @command{awk} that includes - files. -* V7/SVR3.1:: The major changes between V7 and System V - Release 3.1. -* SVR4:: Minor changes between System V Releases 3.1 - and 4. -* POSIX:: New features from the POSIX standard. -* BTL:: New features from the Bell Laboratories - version of @command{awk}. -* POSIX/GNU:: The extensions in @command{gawk} not in - POSIX @command{awk}. -* Contributors:: The major contributors to @command{gawk}. -* Gawk Distribution:: What is in the @command{gawk} distribution. -* Getting:: How to get the distribution. -* Extracting:: How to extract the distribution. -* Distribution contents:: What is in the distribution. -* Unix Installation:: Installing @command{gawk} under various - versions of Unix. -* Quick Installation:: Compiling @command{gawk} under Unix. -* Additional Configuration Options:: Other compile-time options. -* Configuration Philosophy:: How it's all supposed to work. -* Non-Unix Installation:: Installation on Other Operating Systems. -* Amiga Installation:: Installing @command{gawk} on an Amiga. -* BeOS Installation:: Installing @command{gawk} on BeOS. -* PC Installation:: Installing and Compiling @command{gawk} on - MS-DOS and OS/2. -* PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling @command{gawk} for MS-DOS, Win32, - and OS/2. -* PC Using:: Running @command{gawk} on MS-DOS, Win32 and - OS/2. -* VMS Installation:: Installing @command{gawk} on VMS. -* VMS Compilation:: How to compile @command{gawk} under VMS. -* VMS Installation Details:: How to install @command{gawk} under VMS. -* VMS Running:: How to run @command{gawk} under VMS. -* VMS POSIX:: Alternate instructions for VMS POSIX. -* Unsupported:: Systems whose ports are no longer - supported. -* Atari Installation:: Installing @command{gawk} on the Atari ST. -* Atari Compiling:: Compiling @command{gawk} on Atari. -* Atari Using:: Running @command{gawk} on Atari. -* Tandem Installation:: Installing @command{gawk} on a Tandem. -* Bugs:: Reporting Problems and Bugs. -* Other Versions:: Other freely available @command{awk} - implementations. -* Compatibility Mode:: How to disable certain @command{gawk} - extensions. -* Additions:: Making Additions To @command{gawk}. -* Adding Code:: Adding code to the main body of - @command{gawk}. -* New Ports:: Porting @command{gawk} to a new operating - system. -* Dynamic Extensions:: Adding new built-in functions to - @command{gawk}. -* Internals:: A brief look at some @command{gawk} - internals. -* Sample Library:: A example of new functions. -* Internal File Description:: What the new functions will do. -* Internal File Ops:: The code for internal file operations. -* Using Internal File Ops:: How to use an external extension. -* Future Extensions:: New features that may be implemented one - day. -* Basic High Level:: The high level view. -* Basic Data Typing:: A very quick intro to data types. -* Floating Point Issues:: Stuff to know about floating-point numbers. -@end detailmenu -@end menu - -@c dedication for Info file -@ifinfo -@center To Miriam, for making me complete. -@sp 1 -@center To Chana, for the joy you bring us. -@sp 1 -@center To Rivka, for the exponential increase. -@sp 1 -@center To Nachum, for the added dimension. -@sp 1 -@center To Malka, for the new beginning. -@end ifinfo - -@summarycontents -@contents - -@node Foreword, Preface, Top, Top -@unnumbered Foreword - -Arnold Robbins and I are good friends. We were introduced 11 years ago -by circumstances---and our favorite programming language, AWK. -The circumstances started a couple of years -earlier. I was working at a new job and noticed an unplugged -Unix computer sitting in the corner. No one knew how to use it, -and neither did I. However, -a couple of days later it was running, and -I was @code{root} and the one-and-only user. -That day, I began the transition from statistician to Unix programmer. - -On one of many trips to the library or bookstore in search of -books on Unix, I found the gray AWK book, a.k.a. Aho, Kernighan and -Weinberger, @cite{The AWK Programming Language}, Addison-Wesley, -1988. AWK's simple programming paradigm---find a pattern in the -input and then perform an action---often reduced complex or tedious -data manipulations to few lines of code. I was excited to try my -hand at programming in AWK. - -Alas, the @command{awk} on my computer was a limited version of the -language described in the AWK book. I discovered that my computer -had ``old @command{awk}'' and the AWK book described ``new @command{awk}.'' -I learned that this was typical; the old version refused to step -aside or relinquish its name. If a system had a new @command{awk}, it was -invariably called @command{nawk}, and few systems had it. -The best way to get a new @command{awk} was to @command{ftp} the source code for -@command{gawk} from @code{prep.ai.mit.edu}. @command{gawk} was a version of -new @command{awk} written by David Trueman and Arnold, and available under -the GNU General Public License. - -(Incidentally, -it's no longer difficult to find a new @command{awk}. @command{gawk} ships with -Linux, and you can download binaries or source code for almost -any system; my wife uses @command{gawk} on her VMS box.) - -My Unix system started out unplugged from the wall; it certainly was not -plugged into a network. So, oblivious to the existence of @command{gawk} -and the Unix community in general, and desiring a new @command{awk}, I wrote -my own, called @command{mawk}. -Before I was finished I knew about @command{gawk}, -but it was too late to stop, so I eventually posted -to a @code{comp.sources} newsgroup. - -A few days after my posting, I got a friendly email -from Arnold introducing -himself. He suggested we share design and algorithms and -attached a draft of the POSIX standard so -that I could update @command{mawk} to support language extensions added -after publication of the AWK book. - -Frankly, if our roles had -been reversed, I would not have been so open and we probably would -have never met. I'm glad we did meet. -He is an AWK expert's AWK expert and a genuinely nice person. -Arnold contributes significant amounts of his -expertise and time to the Free Software Foundation. - -This book is the @command{gawk} reference manual, but at its core it -is a book about AWK programming that -will appeal to a wide audience. -It is a definitive reference to the AWK language as defined by the -1987 Bell Labs release and codified in the 1992 POSIX Utilities -standard. - -On the other hand, the novice AWK programmer can study -a wealth of practical programs that emphasize -the power of AWK's basic idioms: -data driven control-flow, pattern matching with regular expressions, -and associative arrays. -Those looking for something new can try out @command{gawk}'s -interface to network protocols via special @file{/inet} files. - -The programs in this book make clear that an AWK program is -typically much smaller and faster to develop than -a counterpart written in C. -Consequently, there is often a payoff to prototype an -algorithm or design in AWK to get it running quickly and expose -problems early. Often, the interpreted performance is adequate -and the AWK prototype becomes the product. - -The new @command{pgawk} (profiling @command{gawk}), produces -program execution counts. -I recently experimented with an algorithm that for -@math{n} lines of input, exhibited -@tex -$\sim\! Cn^2$ -@end tex -@ifnottex -~ C n^2 -@end ifnottex -performance, while -theory predicted -@tex -$\sim\! Cn\log n$ -@end tex -@ifnottex -~ C n log n -@end ifnottex -behavior. A few minutes poring -over the @file{awkprof.out} profile pinpointed the problem to -a single line of code. @command{pgawk} is a welcome addition to -my programmer's toolbox. - -Arnold has distilled over a decade of experience writing and -using AWK programs, and developing @command{gawk}, into this book. If you use -AWK or want to learn how, then read this book. - -@display -Michael Brennan -Author of @command{mawk} -@end display - -@node Preface, Getting Started, Foreword, Top -@unnumbered Preface -@c I saw a comment somewhere that the preface should describe the book itself, -@c and the introduction should describe what the book covers. -@c -@c 12/2000: Chuck wants the preface & intro combined. - -Several kinds of tasks occur repeatedly -when working with text files. -You might want to extract certain lines and discard the rest. -Or you may need to make changes wherever certain patterns appear, -but leave the rest of the file alone. -Writing single-use programs for these tasks in languages such as C, C++ or Pascal -is time-consuming and inconvenient. -Such jobs are often easier with @command{awk}. -The @command{awk} utility interprets a special-purpose programming language -that makes it easy to handle simple data-reformatting jobs. - -The GNU implementation of @command{awk} is called @command{gawk}; it is fully -compatible with the System V Release 4 version of -@command{awk}. @command{gawk} is also compatible with the POSIX -specification of the @command{awk} language. This means that all -properly written @command{awk} programs should work with @command{gawk}. -Thus, we usually don't distinguish between @command{gawk} and other -@command{awk} implementations. - -@cindex uses of @command{awk} -@cindex applications of @command{awk} -Using @command{awk} allows you to: - -@itemize @bullet -@item -Manage small, personal databases - -@item -Generate reports - -@item -Validate data - -@item -Produce indexes and perform other document preparation tasks - -@item -Experiment with algorithms that you can adapt later to other computer -languages. -@end itemize - -@cindex uses of @command{gawk} -In addition, -@command{gawk} -provides facilities that make it easy to: - -@itemize @bullet -@item -Extract bits and pieces of data for processing - -@item -Sort data - -@item -Perform simple network communications. -@end itemize - -This @value{DOCUMENT} teaches you about the @command{awk} language and -how you can use it effectively. You should already be familiar with basic -system commands, such as @command{cat} and @command{ls},@footnote{These commands -are available on POSIX-compliant systems, as well as on traditional Unix -based systems. If you are using some other operating system, you still need to -be familiar with the ideas of I/O redirection and pipes.} as well as basic shell -facilities, such as Input/Output (I/O) redirection and pipes. - -Implementations of the @command{awk} language are available for many -different computing environments. This @value{DOCUMENT}, while describing -the @command{awk} language in general, also describes the particular -implementation of @command{awk} called @command{gawk} (which stands for -``GNU awk''). @command{gawk} runs on a broad range of Unix systems, -ranging from 80386 PC-based computers, up through large-scale systems, -such as Crays. @command{gawk} has also been ported to Mac OS X, -MS-DOS, Microsoft Windows (all versions) and OS/2 PC's, Atari and Amiga -micro-computers, BeOS, Tandem D20, and VMS. - -@menu -* History:: The history of @command{gawk} and - @command{awk}. -* Names:: What name to use to find @command{awk}. -* This Manual:: Using this @value{DOCUMENT}. Includes sample - input files that you can use. -* Conventions:: Typographical Conventions. -* Manual History:: Brief history of the GNU project and this - @value{DOCUMENT}. -* How To Contribute:: Helping to save the world. -* Acknowledgments:: Acknowledgments. -@end menu - -@node History, Names, Preface, Preface -@unnumberedsec History of @command{awk} and @command{gawk} -@cindex recipe for a programming language -@cindex programming language, recipe for -@center Recipe For A Programming Language - -@multitable {2 parts} {1 part @code{egrep}} {1 part @code{snobol}} -@item @tab 1 part @code{egrep} @tab 1 part @code{snobol} -@item @tab 2 parts @code{ed} @tab 3 parts C -@end multitable - -@quotation -Blend all parts well using @code{lex} and @code{yacc}. -Document minimally and release. - -After eight years, add another part @code{egrep} and two -more parts C. Document very well and release. -@end quotation - -@cindex acronym -@cindex history of @command{awk} -@cindex Aho, Alfred -@cindex Weinberger, Peter -@cindex Kernighan, Brian -@cindex old @command{awk} -@cindex new @command{awk} -The name @command{awk} comes from the initials of its designers: Alfred V.@: -Aho, Peter J.@: Weinberger and Brian W.@: Kernighan. The original version of -@command{awk} was written in 1977 at AT&T Bell Laboratories. -In 1985, a new version made the programming -language more powerful, introducing user-defined functions, multiple input -streams, and computed regular expressions. -This new version became widely available with Unix System V -Release 3.1 (SVR3.1). -The version in SVR4 added some new features and cleaned -up the behavior in some of the ``dark corners'' of the language. -The specification for @command{awk} in the POSIX Command Language -and Utilities standard further clarified the language. -Both the @command{gawk} designers and the original Bell Laboratories @command{awk} -designers provided feedback for the POSIX specification. - -@cindex Rubin, Paul -@cindex Fenlason, Jay -@cindex Trueman, David -Paul Rubin wrote the GNU implementation, @command{gawk}, in 1986. -Jay Fenlason completed it, with advice from Richard Stallman. John Woods -contributed parts of the code as well. In 1988 and 1989, David Trueman, with -help from me, thoroughly reworked @command{gawk} for compatibility -with the newer @command{awk}. -Circa 1995, I became the primary maintainer. -Current development focuses on bug fixes, -performance improvements, standards compliance, and occasionally, new features. - -In May of 1997, J@"urgen Kahrs felt the need for network access -from @command{awk}, and with a little help from me, set about adding -features to do this for @command{gawk}. At that time, he also -wrote the bulk of -@cite{TCP/IP Internetworking with @command{gawk}} -(a separate document, available as part of the @command{gawk} distribution). -His code finally became part of the main @command{gawk} distribution -with @command{gawk} @value{PVERSION} 3.1. - -@xref{Contributors, ,Major Contributors to @command{gawk}}, -for a complete list of those who made important contributions to @command{gawk}. - -@node Names, This Manual, History, Preface -@section A Rose by Any Other Name - -@cindex old @command{awk} vs. new @command{awk} -@cindex new @command{awk} vs. old @command{awk} -The @command{awk} language has evolved over the years. Full details are -provided in @ref{Language History, ,The Evolution of the @command{awk} Language}. -The language described in this @value{DOCUMENT} -is often referred to as ``new @command{awk}'' (@command{nawk}). - -Because of this, many systems have multiple -versions of @command{awk}. -Some systems have an @command{awk} utility that implements the -original version of the @command{awk} language and a @command{nawk} utility -for the new -version. -Others have an @command{oawk} for the ``old @command{awk}'' -language and plain @command{awk} for the new one. Still others only -have one version, which is usually the new one.@footnote{Often, these systems -use @command{gawk} for their @command{awk} implementation!} - -All in all, this makes it difficult for you to know which version of -@command{awk} you should run when writing your programs. The best advice -I can give here is to check your local documentation. Look for @command{awk}, -@command{oawk}, and @command{nawk}, as well as for @command{gawk}. -It is likely that you already -have some version of new @command{awk} on your system, which is what -you should use when running your programs. (Of course, if you're reading -this @value{DOCUMENT}, chances are good that you have @command{gawk}!) - -Throughout this @value{DOCUMENT}, whenever we refer to a language feature -that should be available in any complete implementation of POSIX @command{awk}, -we simply use the term @command{awk}. When referring to a feature that is -specific to the GNU implementation, we use the term @command{gawk}. - -@node This Manual, Conventions, Names, Preface -@section Using This Book -@cindex book, using this -@cindex using this book -@cindex language, @command{awk} -@cindex program, @command{awk} -@ignore -@cindex @command{awk} language -@cindex @command{awk} program -@end ignore -@cindex Brandon, Dick -@cindex sex, comparisons with -@quotation -@i{Documentation is like sex: when it is good, it is very, very good; and -when it is bad, it is better than nothing.}@* -Dick Brandon -@end quotation - -The term @command{awk} refers to a particular program as well as to the language you -use to tell this program what to do. When we need to be careful, we call -the program ``the @command{awk} utility'' and the language ``the @command{awk} -language.'' -This @value{DOCUMENT} explains -both the @command{awk} language and how to run the @command{awk} utility. -The term @dfn{@command{awk} program} refers to a program written by you in -the @command{awk} programming language. - -Primarily, this @value{DOCUMENT} explains the features of @command{awk}, -as defined in the POSIX standard. It does so in the context of the -@command{gawk} implementation. While doing so, it also -attempts to describe important differences between @command{gawk} -and other @command{awk} implementations.@footnote{All such differences -appear in the index under the heading ``differences between @command{gawk} and -@command{awk}.''} Finally, any @command{gawk} features that are not in -the POSIX standard for @command{awk} are noted. - -@ifnotinfo -This @value{DOCUMENT} has the difficult task of being both a tutorial and a reference. -If you are a novice, feel free to skip over details that seem too complex. -You should also ignore the many cross references; they are for the -expert user and for the online Info version of the document. -@end ifnotinfo - -There are -subsections labelled -as @strong{Advanced Notes} -scattered throughout the @value{DOCUMENT}. -They add a more complete explanation of points that are relevant, but not likely -to be of interest on first reading. -All appear in the index, under the heading ``advanced notes.'' - -Most of the time, the examples use complete @command{awk} programs. -In some of the more advanced sections, only the part of the @command{awk} -program that illustrates the concept currently being described is shown. - -While this @value{DOCUMENT} is aimed principally at people who have not been -exposed -to @command{awk}, there is a lot of information here that even the @command{awk} -expert should find useful. In particular, the description of POSIX -@command{awk} and the example programs in -@ref{Library Functions, ,A Library of @command{awk} Functions}, and in -@ref{Sample Programs, ,Practical @command{awk} Programs}, -should be of interest. - -@ref{Getting Started, ,Getting Started with @command{awk}}, -provides the essentials you need to know to begin using @command{awk}. - -@ref{Regexp, ,Regular Expressions}, -introduces regular expressions in general, and in particular the flavors -supported by POSIX @command{awk} and @command{gawk}. - -@ref{Reading Files, , Reading Input Files}, -describes how @command{awk} reads your data. -It introduces the concepts of records and fields, as well -as the @code{getline} command. -I/O redirection is first described here. - -@ref{Printing, , Printing Output}, -describes how @command{awk} programs can produce output with -@code{print} and @code{printf}. - -@ref{Expressions}, -describes expressions, which are the basic building blocks -for getting most things done in a program. - -@ref{Patterns and Actions, ,Patterns Actions and Variables}, -describes how to write patterns for matching records, actions for -doing something when a record is matched, and the built-in variables -@command{awk} and @command{gawk} use. - -@ref{Arrays, ,Arrays in @command{awk}}, -covers @command{awk}'s one-and-only data structure: associative arrays. -Deleting array elements and whole arrays is also described, as well as -sorting arrays in @command{gawk}. - -@ref{Functions}, -describes the built-in functions @command{awk} and -@command{gawk} provide for you, as well as how to define -your own functions. - -@ref{Internationalization, ,Internationalization with @command{gawk}}, -describes special features in @command{gawk} for translating program -messages into different languages at runtime. - -@ref{Advanced Features, ,Advanced Features of @command{gawk}}, -describes a number of @command{gawk}-specific advanced features. -Of particular note -are the abilities to have two-way communications with another process, -perform TCP/IP networking, and -profile your @command{awk} programs. - -@ref{Invoking Gawk, ,Running @command{awk} and @command{gawk}}, -describes how to run @command{gawk}, the meaning of its -command-line options, and how it finds @command{awk} -program source files. - -@ref{Library Functions, ,A Library of @command{awk} Functions}, and -@ref{Sample Programs, ,Practical @command{awk} Programs}, -provide many sample @command{awk} programs. -Reading them allows you to see @command{awk} being used -for solving real problems. - -@ref{Language History, ,The Evolution of the @command{awk} Language}, -describes how the @command{awk} language has evolved since it was -first released to present. It also describes how @command{gawk} -has acquired features over time. - -@ref{Installation, ,Installing @command{gawk}}, -describes how to get @command{gawk}, how to compile it -under Unix, and how to compile and use it on different -non-Unix systems. It also describes how to report bugs -in @command{gawk} and where to get three other freely -available implementations of @command{awk}. - -@ref{Notes, ,Implementation Notes}, -describes how to disable @command{gawk}'s extensions, as -well as how to contribute new code to @command{gawk}, -how to write extension libraries, and some possible -future directions for @command{gawk} development. - -@ref{Basic Concepts, ,Basic Programming Concepts}, -provides some very cursory background material for those who -are completely unfamiliar with computer programming. -Also centralized there is a discussion of some of the issues -involved in using floating-point numbers. - -The -@ref{Glossary}, -defines most, if not all, the significant terms used -throughout the book. -If you find terms that you aren't familiar with, try looking them up. - -@ref{Copying, ,GNU General Public License}, and -@ref{GNU Free Documentation License}, -present the licenses that cover the @command{gawk} source code, -and this @value{DOCUMENT}, respectively. - -@node Conventions, Manual History, This Manual, Preface -@section Typographical Conventions - -@cindex Texinfo -This @value{DOCUMENT} is written using Texinfo, the GNU documentation -formatting language. -A single Texinfo source file is used to produce both the printed and online -versions of the documentation. -@iftex -Because of this, the typographical conventions -are slightly different than in other books you may have read. -@end iftex -@ifnottex -This @value{SECTION} briefly documents the typographical conventions used in Texinfo. -@end ifnottex - -Examples you would type at the command-line are preceded by the common -shell primary and secondary prompts, @samp{$} and @samp{>}. -Output from the command is preceded by the glyph ``@print{}''. -This typically represents the command's standard output. -Error messages, and other output on the command's standard error, are preceded -by the glyph ``@error{}''. For example: - -@example -$ echo hi on stdout -@print{} hi on stdout -$ echo hello on stderr 1>&2 -@error{} hello on stderr -@end example - -@iftex -In the text, command names appear in @code{this font}, while code segments -appear in the same font and quoted, @samp{like this}. Some things are -emphasized @emph{like this}, and if a point needs to be made -strongly, it is done @strong{like this}. The first occurrence of -a new term is usually its @dfn{definition} and appears in the same -font as the previous occurrence of ``definition'' in this sentence. -@value{FN}s are indicated like this: @file{/path/to/ourfile}. -@end iftex - -Characters that you type at the keyboard look @kbd{like this}. In particular, -there are special characters called ``control characters.'' These are -characters that you type by holding down both the @kbd{CONTROL} key and -another key, at the same time. For example, a @kbd{Ctrl-d} is typed -by first pressing and holding the @kbd{CONTROL} key, next -pressing the @kbd{d} key and finally releasing both keys. - -@c fakenode --- for prepinfo -@subsubheading Dark Corners -@cindex Kernighan, Brian -@quotation -@i{Dark corners are basically fractal --- no matter how much -you illuminate, there's always a smaller but darker one.}@* -Brian Kernighan -@end quotation - -@cindex d.c., see ``dark corner'' -@cindex dark corner -Until the POSIX standard (and @cite{The Gawk Manual}), -many features of @command{awk} were either poorly documented or not -documented at all. Descriptions of such features -(often called ``dark corners'') are noted in this @value{DOCUMENT} with -@iftex -the picture of a flashlight in the margin, as shown here. -@value{DARKCORNER} -@end iftex -@ifnottex -``(d.c.)''. -@end ifnottex -They also appear in the index under the heading ``dark corner.'' - -As noted by the opening quote, though, any -coverage of dark corners -is, by definition, something that is incomplete. - -@node Manual History, How To Contribute, Conventions, Preface -@unnumberedsec The GNU Project and This Book -@cindex Torvalds, Linus -@cindex sex, comparisons with -@quotation -@i{Software is like sex: it's better when it's free.}@* -Linus Torvalds -@end quotation - -@cindex FSF -@cindex Free Software Foundation -@cindex Stallman, Richard -The Free Software Foundation (FSF) is a non-profit organization dedicated -to the production and distribution of freely distributable software. -It was founded by Richard M.@: Stallman, the author of the original -Emacs editor. GNU Emacs is the most widely used version of Emacs today. - -@cindex GNU Project -@cindex GPL -@cindex General Public License -@cindex GNU General Public License -@cindex online documentation -@cindex documentation, online -The GNU@footnote{GNU stands for ``GNU's not Unix.''} -Project is an ongoing effort on the part of the Free Software -Foundation to create a complete, freely distributable, POSIX-compliant -computing environment. -The FSF uses the ``GNU General Public License'' (GPL) to ensure that -their software's -source code is always available to the end user. A -copy of the GPL is included -@ifnotinfo -in this @value{DOCUMENT} -@end ifnotinfo -for your reference -(@pxref{Copying, ,GNU General Public License}). -The GPL applies to the C language source code for @command{gawk}. -To find out more about the FSF and the GNU Project online, -see @uref{http://www.gnu.org, the GNU Project's home page}. -This @value{DOCUMENT} may also be read from -@uref{http://www.gnu.org/manual/gawk/, their web site}. - -A shell, an editor (Emacs), highly portable optimizing C, C++, and -Objective-C compilers, a symbolic debugger and dozens of large and -small utilities (such as @command{gawk}), have all been completed and are -freely available. The GNU operating -system kernel (the HURD), has been released but is still in an early -stage of development. - -@cindex Linux -@cindex GNU/Linux -@cindex BSD-based operating systems -@cindex NetBSD -@cindex FreeBSD -@cindex OpenBSD -Until the GNU operating system is more fully developed, you should -consider using GNU/Linux, a freely distributable, Unix-like operating -system for Intel 80386, DEC Alpha, Sun SPARC, IBM S/390, and other -systems.@footnote{The terminology ``GNU/Linux'' is explained -in the @ref{Glossary}.} -There are -many books on GNU/Linux. One that is freely available is @cite{Linux -Installation and Getting Started}, by Matt Welsh. -Many GNU/Linux distributions are often available in computer stores or -bundled on CD-ROMs with books about Linux. -(There are three other freely available, Unix-like operating systems for -80386 and other systems: NetBSD, FreeBSD, and OpenBSD. All are based on the -4.4-Lite Berkeley Software Distribution, and they use recent versions -of @command{gawk} for their versions of @command{awk}.) - -@ifnotinfo -The @value{DOCUMENT} you are reading now is actually free---at least, the -information in it is free to anyone. The machine readable -source code for the @value{DOCUMENT} comes with @command{gawk}; anyone -may take this @value{DOCUMENT} to a copying machine and make as many -copies of it as they like. (Take a moment to check the Free Documentation -License; see @ref{GNU Free Documentation License}.) - -Although you could just print it out yourself, bound books are much -easier to read and use. Furthermore, -the proceeds from sales of this book go back to the FSF -to help fund development of more free software. -@end ifnotinfo - -@ignore -@cindex Close, Diane -The @value{DOCUMENT} itself has gone through several previous, -preliminary editions. -Paul Rubin wrote the very first draft of @cite{The GAWK Manual}; -it was around 40 pages in size. -Diane Close and Richard Stallman improved it, yielding the -version which I started working with in the fall of 1988. -It was around 90 pages long and barely described the original, ``old'' -version of @command{awk}. After substantial revision, the first version of -the @cite{The GAWK Manual} to be released was Edition 0.11 Beta in -October of 1989. The manual then underwent more substantial revision -for Edition 0.13 of December 1991. -David Trueman, Pat Rankin and Michal Jaegermann contributed sections -of the manual for Edition 0.13. -That edition was published by the -FSF as a bound book early in 1992. Since then there were several -minor revisions, notably Edition 0.14 of November 1992 that was published -by the FSF in January of 1993 and Edition 0.16 of August 1993. - -Edition 1.0 of @cite{GAWK: The GNU Awk User's Guide} represented a significant re-working -of @cite{The GAWK Manual}, with much additional material. -The FSF and I agreed that I was now the primary author. -@c I also felt that the manual needed a more descriptive title. - -In January 1996, SSC published Edition 1.0 under the title @cite{Effective AWK Programming}. -In February 1997, they published Edition 1.0.3 which had minor changes -as a ``second edition.'' -In 1999, the FSF published this same version as Edition 2 -of @cite{GAWK: The GNU Awk User's Guide}. - -Edition @value{EDITION} maintains the basic structure of Edition 1.0, -but with significant additional material, reflecting the host of new features -in @command{gawk} @value{PVERSION} @value{VERSION}. -Of particular note is -@ref{Array Sorting, ,Sorting Array Values and Indices with @command{gawk}}, -@ref{Bitwise Functions, ,Using @command{gawk}'s Bit Manipulation Functions}, -@ref{Internationalization, ,Internationalization with @command{gawk}}, -@ref{Advanced Features, ,Advanced Features of @command{gawk}}, -and -@ref{Dynamic Extensions, ,Adding New Built-in Functions to @command{gawk}}. -@end ignore - -@cindex Close, Diane -The @value{DOCUMENT} itself has gone through a number of previous editions. -Paul Rubin wrote the very first draft of @cite{The GAWK Manual}; -it was around 40 pages in size. -Diane Close and Richard Stallman improved it, yielding a -version that was -around 90 pages long and barely described the original, ``old'' -version of @command{awk}. - -I started working with that version in the fall of 1988. -As work on it progressed, -the FSF published several preliminary versions (numbered 0.@var{x}). -In 1996, Edition 1.0 was released with @command{gawk} 3.0.0. -The FSF published the first two editions under -the title @cite{The GNU Awk User's Guide}. - -This edition maintains the basic structure of Edition 1.0, -but with significant additional material, reflecting the host of new features -in @command{gawk} @value{PVERSION} @value{VERSION}. -Of particular note is -@ref{Array Sorting, ,Sorting Array Values and Indices with @command{gawk}}, -as well as -@ref{Bitwise Functions, ,Using @command{gawk}'s Bit Manipulation Functions}, -@ref{Internationalization, ,Internationalization with @command{gawk}}, -and also -@ref{Advanced Features, ,Advanced Features of @command{gawk}}, -and -@ref{Dynamic Extensions, ,Adding New Built-in Functions to @command{gawk}}. - -@cite{@value{TITLE}} will undoubtedly continue to evolve. -An electronic version -comes with the @command{gawk} distribution from the FSF. -If you find an error in this @value{DOCUMENT}, please report it! -@xref{Bugs, ,Reporting Problems and Bugs}, for information on submitting -problem reports electronically, or write to me in care of the publisher. - -@node How To Contribute, Acknowledgments, Manual History, Preface -@unnumberedsec How to Contribute - -As the maintainer of GNU @command{awk}, -I am starting a collection of publicly available @command{awk} -programs. -For more information, -see @uref{ftp://ftp.freefriends.org/arnold/Awkstuff}. -If you have written an interesting @command{awk} program, or have written a -@command{gawk} extension that you would like to -share with the rest of the world, please contact me (@email{arnold@@gnu.org}). -Making things available on the Internet helps keep the -@command{gawk} distribution down to manageable size. - -@node Acknowledgments, , How To Contribute, Preface -@unnumberedsec Acknowledgments - -The initial draft of @cite{The GAWK Manual} had the following acknowledgments: - -@quotation -Many people need to be thanked for their assistance in producing this -manual. Jay Fenlason contributed many ideas and sample programs. Richard -Mlynarik and Robert Chassell gave helpful comments on drafts of this -manual. The paper @cite{A Supplemental Document for @command{awk}} by John W.@: -Pierce of the Chemistry Department at UC San Diego, pinpointed several -issues relevant both to @command{awk} implementation and to this manual, that -would otherwise have escaped us. -@end quotation - -@cindex Stallman, Richard -I would like to acknowledge Richard M.@: Stallman, for his vision of a -better world and for his courage in founding the FSF and starting the -GNU project. - -The following people (in alphabetical order) -provided helpful comments on various -versions of this book, up to and including this edition. -Rick Adams, -Nelson H.F. Beebe, -Karl Berry, -Dr.@: Michael Brennan, -Rich Burridge, -Claire Coutier, -Diane Close, -Scott Deifik, -Christopher (``Topher'') Eliot, -Jeffrey Friedl, -Dr.@: Darrel Hankerson, -Michal Jaegermann, -Dr.@: Richard J.@: LeBlanc, -Michael Lijewski, -Pat Rankin, -Miriam Robbins, -Mary Sheehan, -and -Chuck Toporek. - -@cindex Berry, Karl -@cindex Chassell, Robert J.@: -@cindex Texinfo -Robert J.@: Chassell provided much valuable advice on -the use of Texinfo. -He also deserves special thanks for -convincing me @emph{not} to title this @value{DOCUMENT} -@cite{How To Gawk Politely}. -Karl Berry helped significantly with the @TeX{} part of Texinfo. - -@cindex Hartholz, Marshall -@cindex Hartholz, Elaine -@cindex Schreiber, Bert -@cindex Schreiber, Rita -I would like to thank Marshall and Elaine Hartholz of Seattle and -Dr.@: Bert and Rita Schreiber of Detroit for large amounts of quiet vacation -time in their homes, which allowed me to make significant progress on -this @value{DOCUMENT} and on @command{gawk} itself. - -@cindex Hughes, Phil -Phil Hughes of SSC -contributed in a very important way by loaning me his laptop GNU/Linux -system, not once, but twice, which allowed me to do a lot of work while -away from home. - -@cindex Trueman, David -David Trueman deserves special credit; he has done a yeoman job -of evolving @command{gawk} so that it performs well and without bugs. -Although he is no longer involved with @command{gawk}, -working with him on this project was a significant pleasure. - -@cindex Drepper, Ulrich -@cindex GNITS mailing list -The intrepid members of the GNITS mailing list, and most notably Ulrich -Drepper, provided invaluable help and feedback for the design of the -internationalization features. - -@cindex Beebe, Nelson -@cindex Brown, Martin -@cindex Deifik, Scott -@cindex Hankerson, Darrel -@cindex Jaegermann, Michal -@cindex Kahrs, J@"urgen -@cindex Rankin, Pat -@cindex Rommel, Kai Uwe -@cindex Zaretskii, Eli -Nelson Beebe, -Martin Brown, -Scott Deifik, -Darrel Hankerson, -Michal Jaegermann, -J@"urgen Kahrs, -Pat Rankin, -Kai Uwe Rommel, -and Eli Zaretskii -(in alphabetical order) -are long-time members of the -@command{gawk} ``crack portability team.'' Without their hard work and -help, @command{gawk} would not be nearly the fine program it is today. It -has been and continues to be a pleasure working with this team of fine -people. - -@cindex Kernighan, Brian -David and I would like to thank Brian Kernighan of Bell Laboratories for -invaluable assistance during the testing and debugging of @command{gawk}, and for -help in clarifying numerous points about the language. We could not have -done nearly as good a job on either @command{gawk} or its documentation without -his help. - -Chuck Toporek, Mary Sheehan, and Claire Coutier of O'Reilly & Associates contributed -significant editorial help for this @value{DOCUMENT} for the -3.1 release of @command{gawk}. - -@cindex Robbins, Miriam -@cindex Robbins, Jean -@cindex Robbins, Harry -@cindex G-d -I must thank my wonderful wife, Miriam, for her patience through -the many versions of this project, for her proof-reading, -and for sharing me with the computer. -I would like to thank my parents for their love, and for the grace with -which they raised and educated me. -Finally, I also must acknowledge my gratitude to G-d, for the many opportunities -He has sent my way, as well as for the gifts He has given me with which to -take advantage of those opportunities. -@sp 2 -@noindent -Arnold Robbins @* -Nof Ayalon @* -ISRAEL @* -March, 2001 - -@ignore -@c Try this -@iftex -@page -@headings off -@majorheading I@ @ @ @ The @command{awk} Language and @command{gawk} -Part I describes the @command{awk} language and @command{gawk} program in detail. -It starts with the basics, and continues through all of the features of @command{awk} -and @command{gawk}. It contains the following chapters: - -@itemize @bullet -@item -@ref{Getting Started, ,Getting Started with @command{awk}}. - -@item -@ref{Regexp, ,Regular Expressions}. - -@item -@ref{Reading Files, , Reading Input Files}. - -@item -@ref{Printing, , Printing Output}. - -@item -@ref{Expressions}. - -@item -@ref{Patterns and Actions, ,Patterns Actions and Variables}. - -@item -@ref{Arrays, ,Arrays in @command{awk}}. - -@item -@ref{Functions}. - -@item -@ref{Internationalization, ,Internationalization with @command{gawk}}. - -@item -@ref{Advanced Features, ,Advanced Features of @command{gawk}}. - -@item -@ref{Invoking Gawk, ,Running @command{awk} and @command{gawk}}. -@end itemize - -@page -@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @| -@oddheading @| @| @strong{@thischapter}@ @ @ @thispage -@end iftex -@end ignore - -@node Getting Started, Regexp, Preface, Top -@chapter Getting Started with @command{awk} -@cindex script, definition of -@cindex rule, definition of -@cindex program, definition of -@cindex basic function of @command{awk} - -The basic function of @command{awk} is to search files for lines (or other -units of text) that contain certain patterns. When a line matches one -of the patterns, @command{awk} performs specified actions on that line. -@command{awk} keeps processing input lines in this way until it reaches -the end of the input files. - -@cindex data-driven languages -@cindex procedural languages -@cindex language, data-driven -@cindex language, procedural -Programs in @command{awk} are different from programs in most other languages, -because @command{awk} programs are @dfn{data-driven}; that is, you describe -the data you want to work with and then what to do when you find it. -Most other languages are @dfn{procedural}; you have to describe, in great -detail, every step the program is to take. When working with procedural -languages, it is usually much -harder to clearly describe the data your program will process. -For this reason, @command{awk} programs are often refreshingly easy to -write and read. - -@cindex program, definition of -@cindex rule, definition of -When you run @command{awk}, you specify an @command{awk} @dfn{program} that -tells @command{awk} what to do. The program consists of a series of -@dfn{rules}. (It may also contain @dfn{function definitions}, -an advanced feature that we will ignore for now. -@xref{User-defined, ,User-Defined Functions}.) Each rule specifies one -pattern to search for and one action to perform -upon finding the pattern. - -Syntactically, a rule consists of a pattern followed by an action. The -action is enclosed in curly braces to separate it from the pattern. -Newlines usually separate rules. Therefore, an @command{awk} -program looks like this: - -@example -@var{pattern} @{ @var{action} @} -@var{pattern} @{ @var{action} @} -@dots{} -@end example - -@menu -* Running gawk:: How to run @command{gawk} programs; includes - command-line syntax. -* Sample Data Files:: Sample data files for use in the @command{awk} - programs illustrated in this @value{DOCUMENT}. -* Very Simple:: A very simple example. -* Two Rules:: A less simple one-line example using two - rules. -* More Complex:: A more complex example. -* Statements/Lines:: Subdividing or combining statements into - lines. -* Other Features:: Other Features of @command{awk}. -* When:: When to use @command{gawk} and when to use - other things. -@end menu - -@node Running gawk, Sample Data Files, Getting Started, Getting Started -@section How to Run @command{awk} Programs - -@cindex command-line formats -@cindex running @command{awk} programs -There are several ways to run an @command{awk} program. If the program is -short, it is easiest to include it in the command that runs @command{awk}, -like this: - -@example -awk '@var{program}' @var{input-file1} @var{input-file2} @dots{} -@end example - -When the program is long, it is usually more convenient to put it in a file -and run it with a command like this: - -@example -awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{} -@end example - -This @value{SECTION} discusses both mechanisms, along with several -variations of each. - -@menu -* One-shot:: Running a short throw-away @command{awk} - program. -* Read Terminal:: Using no input files (input from terminal - instead). -* Long:: Putting permanent @command{awk} programs in - files. -* Executable Scripts:: Making self-contained @command{awk} programs. -* Comments:: Adding documentation to @command{gawk} - programs. -* Quoting:: More discussion of shell quoting issues. -@end menu - -@node One-shot, Read Terminal, Running gawk, Running gawk -@subsection One-Shot Throw-Away @command{awk} Programs - -Once you are familiar with @command{awk}, you will often type in simple -programs the moment you want to use them. Then you can write the -program as the first argument of the @command{awk} command, like this: - -@example -awk '@var{program}' @var{input-file1} @var{input-file2} @dots{} -@end example - -@noindent -where @var{program} consists of a series of @var{patterns} and -@var{actions}, as described earlier. - -@cindex single quotes, why needed -This command format instructs the @dfn{shell}, or command interpreter, -to start @command{awk} and use the @var{program} to process records in the -input file(s). There are single quotes around @var{program} so -the shell won't interpret any @command{awk} characters as special shell -characters. The quotes also cause the shell to treat all of @var{program} as -a single argument for @command{awk}, and allow @var{program} to be more -than one line long. - -This format is also useful for running short or medium-sized @command{awk} -programs from shell scripts, because it avoids the need for a separate -file for the @command{awk} program. A self-contained shell script is more -reliable because there are no other files to misplace. - -@ref{Very Simple, ,Some Simple Examples}, -@ifnotinfo -later in this @value{CHAPTER}, -@end ifnotinfo -presents several short, -self-contained programs. - -@c Removed for gawk 3.1, doesn't really add anything here. -@ignore -As an interesting side point, the command - -@example -awk '/foo/' @var{files} @dots{} -@end example - -@noindent -is essentially the same as - -@cindex @command{egrep} utility -@example -egrep foo @var{files} @dots{} -@end example -@end ignore - -@node Read Terminal, Long, One-shot, Running gawk -@subsection Running @command{awk} Without Input Files - -@cindex standard input -@cindex input, standard -You can also run @command{awk} without any input files. If you type the -following command line: - -@example -awk '@var{program}' -@end example - -@noindent -@command{awk} applies the @var{program} to the @dfn{standard input}, -which usually means whatever you type on the terminal. This continues -until you indicate end-of-file by typing @kbd{Ctrl-d}. -(On other operating systems, the end-of-file character may be different. -For example, on OS/2 and MS-DOS, it is @kbd{Ctrl-z}.) - -As an example, the following program prints a friendly piece of advice -(from Douglas Adams's @cite{The Hitchhiker's Guide to the Galaxy}), -to keep you from worrying about the complexities of computer programming. -(@code{BEGIN} is a feature we haven't discussed yet.): - -@example -$ awk "BEGIN @{ print \"Don't Panic!\" @}" -@print{} Don't Panic! -@end example - -@cindex quoting, shell -@cindex shell quoting -This program does not read any input. The @samp{\} before each of the -inner double quotes is necessary because of the shell's quoting -rules---in particular because it mixes both single quotes and -double quotes.@footnote{Although we generally recommend the use of single -quotes around the program text, double quotes are needed here in order to -put the single quote into the message.} - -This next simple @command{awk} program -emulates the @command{cat} utility; it copies whatever you type at the -keyboard to its standard output. (Why this works is explained shortly.) - -@example -$ awk '@{ print @}' -Now is the time for all good men -@print{} Now is the time for all good men -to come to the aid of their country. -@print{} to come to the aid of their country. -Four score and seven years ago, ... -@print{} Four score and seven years ago, ... -What, me worry? -@print{} What, me worry? -@kbd{Ctrl-d} -@end example - -@node Long, Executable Scripts, Read Terminal, Running gawk -@subsection Running Long Programs - -@cindex running long programs -@cindex @code{-f} option -@cindex command-line option, @code{-f} -@cindex program file -@cindex file, @command{awk} program -Sometimes your @command{awk} programs can be very long. In this case, it is -more convenient to put the program into a separate file. In order to tell -@command{awk} to use that file for its program, you type: - -@example -awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{} -@end example - -The @option{-f} instructs the @command{awk} utility to get the @command{awk} program -from the file @var{source-file}. Any @value{FN} can be used for -@var{source-file}. For example, you could put the program: - -@example -BEGIN @{ print "Don't Panic!" @} -@end example - -@noindent -into the file @file{advice}. Then this command: - -@example -awk -f advice -@end example - -@noindent -does the same thing as this one: - -@example -awk "BEGIN @{ print \"Don't Panic!\" @}" -@end example - -@cindex quoting, shell -@cindex shell quoting -@noindent -This was explained earlier -(@pxref{Read Terminal, ,Running @command{awk} Without Input Files}). -Note that you don't usually need single quotes around the @value{FN} that you -specify with @option{-f}, because most @value{FN}s don't contain any of the shell's -special characters. Notice that in @file{advice}, the @command{awk} -program did not have single quotes around it. The quotes are only needed -for programs that are provided on the @command{awk} command line. - -If you want to identify your @command{awk} program files clearly as such, -you can add the extension @file{.awk} to the @value{FN}. This doesn't -affect the execution of the @command{awk} program but it does make -``housekeeping'' easier. - -@node Executable Scripts, Comments, Long, Running gawk -@subsection Executable @command{awk} Programs -@cindex executable scripts -@cindex scripts, executable -@cindex self-contained programs -@cindex program, self-contained -@cindex @code{#!} (executable scripts) - -Once you have learned @command{awk}, you may want to write self-contained -@command{awk} scripts, using the @samp{#!} script mechanism. You can do -this on many Unix systems@footnote{The @samp{#!} mechanism works on -Linux systems, -systems derived from the 4.4-Lite Berkeley Software Distribution, -and most commercial Unix systems.} as well as on the GNU system. -For example, you could update the file @file{advice} to look like this: - -@example -#! /bin/awk -f - -BEGIN @{ print "Don't Panic!" @} -@end example - -@noindent -After making this file executable (with the @command{chmod} utility), -simply type @samp{advice} -at the shell and the system arranges to run @command{awk}@footnote{The -line beginning with @samp{#!} lists the full @value{FN} of an interpreter -to run and an optional initial command-line argument to pass to that -interpreter. The operating system then runs the interpreter with the given -argument and the full argument list of the executed program. The first argument -in the list is the full @value{FN} of the @command{awk} program. The rest of the -argument list is either options to @command{awk}, or @value{DF}s, -or both.} as if you had -typed @samp{awk -f advice}: - -@example -$ chmod +x advice -$ advice -@print{} Don't Panic! -@end example - -@noindent -Self-contained @command{awk} scripts are useful when you want to write a -program that users can invoke without their having to know that the program is -written in @command{awk}. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Portability Issues with @samp{#!} -@cindex advanced notes - -Some systems limit the length of the interpreter name to 32 characters. -Often, this can be dealt with by using a symbolic link. - -You should not put more than one argument on the @samp{#!} -line after the path to @command{awk}. It does not work. The operating system -treats the rest of the line as a single argument and passes it to @command{awk}. -Doing this leads to confusing behavior---most likely a usage diagnostic -of some sort from @command{awk}. - -@cindex portability issues -Finally, -the value of @code{ARGV[0]} -(@pxref{Built-in Variables}) -varies depending upon your operating system. -Some systems put @samp{awk} there, some put the full pathname -of @command{awk} (such as @file{/bin/awk}), and some put the name -of your script (@samp{advice}). Don't rely on the value of @code{ARGV[0]} -to provide your script name. - -@node Comments, Quoting, Executable Scripts, Running gawk -@subsection Comments in @command{awk} Programs -@cindex @code{#} (comment) -@cindex comments -@cindex use of comments -@cindex documenting @command{awk} programs -@cindex programs, documenting - -A @dfn{comment} is some text that is included in a program for the sake -of human readers; it is not really an executable part of the program. Comments -can explain what the program does and how it works. Nearly all -programming languages have provisions for comments, as programs are -typically hard to understand without them. - -In the @command{awk} language, a comment starts with the sharp sign -character (@samp{#}) and continues to the end of the line. -The @samp{#} does not have to be the first character on the line. The -@command{awk} language ignores the rest of a line following a sharp sign. -For example, we could have put the following into @file{advice}: - -@example -# This program prints a nice friendly message. It helps -# keep novice users from being afraid of the computer. -BEGIN @{ print "Don't Panic!" @} -@end example - -You can put comment lines into keyboard-composed throw-away @command{awk} -programs, but this usually isn't very useful; the purpose of a -comment is to help you or another person understand the program -when reading it at a later time. - -@cindex quoting, shell -@cindex shell quoting -@strong{Caution:} As mentioned in -@ref{One-shot, ,One-Shot Throw-Away @command{awk} Programs}, -you can enclose small to medium programs in single quotes, in order to keep -your shell scripts self-contained. When doing so, @emph{don't} put -an apostrophe (i.e., a single quote) into a comment (or anywhere else -in your program). The shell interprets the quote as the closing -quote for the entire program. As a result, usually the shell -prints a message about mismatched quotes, and if @command{awk} actually -runs, it will probably print strange messages about syntax errors. -For example, look at the following: - -@example -$ awk '@{ print "hello" @} # let's be cute' -> -@end example - -The shell sees that the first two quotes match, and that -a new quoted object begins at the end of the command-line. -It therefore prompts with the secondary prompt, waiting for more input. -With Unix @command{awk}, closing the quoted string produces this result: - -@example -$ awk '@{ print "hello" @} # let's be cute' -> ' -@error{} awk: can't open file be -@error{} source line number 1 -@end example - -Putting a backslash before the single quote in @samp{let's} wouldn't help, -since backslashes are not special inside single quotes. -The next @value{SUBSECTION} describes the shell's quoting rules. - -@node Quoting, , Comments, Running gawk -@subsection Shell Quoting Issues -@c the indexing here is purposely different, until we -@c get a way to mark the defining instance for an index entry -@cindex quoting rules, shell -@cindex shell quoting rules - -For short to medium length @command{awk} programs, it is most convenient -to enter the program on the @command{awk} command line. -This is best done by enclosing the entire program in single quotes. -This is true whether you are entering the program interactively at -the shell prompt, or writing it as part of a larger shell script: - -@example -awk '@var{program text}' @var{input-file1} @var{input-file2} @dots{} -@end example - -@cindex @command{csh} utility -Once you are working with the shell, it is helpful to have a basic -knowledge of shell quoting rules. The following rules apply only to -POSIX-compliant, Bourne-style shells (such as @command{bash}, the GNU Bourne-Again -Shell). If you use @command{csh}, you're on your own. - -@itemize @bullet -@item -Quoted items can be concatenated with nonquoted items as well as with other -quoted items. The shell turns everything into one argument for -the command. - -@item -Preceding any single character with a backslash (@samp{\}) quotes -that character. The shell removes the backslash and passes the quoted -character on to the command. - -@item -Single quotes protect everything between the opening and closing quotes. -The shell does no interpretation of the quoted text, passing it on verbatim -to the command. -It is @emph{impossible} to embed a single quote inside single-quoted text. -Refer back to -@ref{Comments, ,Comments in @command{awk} Programs}, -for an example showing what happens if you try. - -@item -Double quotes protect most things between the opening and closing quotes. -The shell does at least variable and command substitution on the quoted text. -Different shells may do additional kinds of processing on double-quoted text. - -Since certain characters within double-quoted text are processed by the shell, -they must be @dfn{escaped} within the text. Of note are the characters -@samp{$}, @samp{`}, @samp{\} and @samp{"}, all of which must be preceded by -a backslash within double-quoted text if they are to be passed on literally -to the program. (The leading backslash is stripped first.) -Thus, the example seen -@ifnotinfo -previously -@end ifnotinfo -in @ref{Read Terminal, ,Running @command{awk} Without Input Files}, -is applicable: - -@example -$ awk "BEGIN @{ print \"Don't Panic!\" @}" -@print{} Don't Panic! -@end example - -Note that the single quote is not special within double quotes. - -@item -Null strings are removed when they occur as part of a non-null -command-line argument, while explicit non-null objects are kept. -For example, to specify that the field separator @code{FS} should -be set to the null string, use: - -@example -awk -F "" '@var{program}' @var{files} # correct -@end example - -@noindent -Don't use this: - -@example -awk -F"" '@var{program}' @var{files} # wrong! -@end example - -@noindent -In the second case, @command{awk} will attempt to use the text of the program -as the value of @code{FS}, and the first @value{FN} as the text of the program! -This results in syntax errors at best, and confusing behavior at worst. -@end itemize - -@cindex shell quoting, tricks -Mixing single and double quotes is difficult. You have to resort -to shell quoting tricks, like this: - -@example -$ awk 'BEGIN @{ print "Here is a single quote <'"'"'>" @}' -@print{} Here is a single quote <'> -@end example - -@noindent -This program consists of three concatenated quoted strings. The first and the -third are single-quoted, the second is double-quoted. - -This can be ``simplified'' to: - -@example -$ awk 'BEGIN @{ print "Here is a single quote <'\''>" @}' -@print{} Here is a single quote <'> -@end example - -@noindent -Judge for yourself which of these two is the more readable. - -Another option is to use double quotes, escaping the embedded, @command{awk}-level -double quotes: - -@example -$ awk "BEGIN @{ print \"Here is a single quote <'>\" @}" -@print{} Here is a single quote <'> -@end example - -@noindent -This option is also painful, because double quotes, backslashes, and dollar signs -are very common in @command{awk} programs. - -If you really need both single and double quotes in your @command{awk} -program, it is probably best to move it into a separate file, where -the shell won't be part of the picture, and you can say what you mean. - -@node Sample Data Files, Very Simple, Running gawk, Getting Started -@section @value{DDF}s for the Examples -@c For gawk >= 3.2, update these data files. No-one has such slow modems! - -@cindex input file, sample -@cindex sample input files -@cindex @file{BBS-list} file -Many of the examples in this @value{DOCUMENT} take their input from two sample -@value{DF}s. The first, called @file{BBS-list}, represents a list of -computer bulletin board systems together with information about those systems. -The second @value{DF}, called @file{inventory-shipped}, contains -information about monthly shipments. In both files, -each line is considered to be one @dfn{record}. - -In the file @file{BBS-list}, each record contains the name of a computer -bulletin board, its phone number, the board's baud rate(s), and a code for -the number of hours it is operational. An @samp{A} in the last column -means the board operates 24 hours a day. A @samp{B} in the last -column means the board only operates on evening and weekend hours. -A @samp{C} means the board operates only on weekends: - -@c 2e: Update the baud rates to reflect today's faster modems -@example -@c system if test ! -d eg ; then mkdir eg ; fi -@c system if test ! -d eg/lib ; then mkdir eg/lib ; fi -@c system if test ! -d eg/data ; then mkdir eg/data ; fi -@c system if test ! -d eg/prog ; then mkdir eg/prog ; fi -@c system if test ! -d eg/misc ; then mkdir eg/misc ; fi -@c file eg/data/BBS-list -aardvark 555-5553 1200/300 B -alpo-net 555-3412 2400/1200/300 A -barfly 555-7685 1200/300 A -bites 555-1675 2400/1200/300 A -camelot 555-0542 300 C -core 555-2912 1200/300 C -fooey 555-1234 2400/1200/300 B -foot 555-6699 1200/300 B -macfoo 555-6480 1200/300 A -sdace 555-3430 2400/1200/300 A -sabafoo 555-2127 1200/300 C -@c endfile -@end example - -@cindex @file{inventory-shipped} file -The second @value{DF}, called @file{inventory-shipped}, represents -information about shipments during the year. -Each record contains the month, the number -of green crates shipped, the number of red boxes shipped, the number of -orange bags shipped, and the number of blue packages shipped, -respectively. There are 16 entries, covering the 12 months of last year -and the first four months of the current year. - -@example -@c file eg/data/inventory-shipped -Jan 13 25 15 115 -Feb 15 32 24 226 -Mar 15 24 34 228 -Apr 31 52 63 420 -May 16 34 29 208 -Jun 31 42 75 492 -Jul 24 34 67 436 -Aug 15 34 47 316 -Sep 13 55 37 277 -Oct 29 54 68 525 -Nov 20 87 82 577 -Dec 17 35 61 401 - -Jan 21 36 64 620 -Feb 26 58 80 652 -Mar 24 75 70 495 -Apr 21 70 74 514 -@c endfile -@end example - -@ifinfo -If you are reading this in GNU Emacs using Info, you can copy the regions -of text showing these sample files into your own test files. This way you -can try out the examples shown in the remainder of this document. You do -this by using the command @kbd{M-x write-region} to copy text from the Info -file into a file for use with @command{awk} -(@xref{Misc File Ops, , Miscellaneous File Operations, emacs, GNU Emacs Manual}, -for more information). Using this information, create your own -@file{BBS-list} and @file{inventory-shipped} files and practice what you -learn in this @value{DOCUMENT}. - -@cindex Texinfo -If you are using the stand-alone version of Info, -see @ref{Extract Program, ,Extracting Programs from Texinfo Source Files}, -for an @command{awk} program that extracts these @value{DF}s from -@file{gawk.texi}, the Texinfo source file for this Info file. -@end ifinfo - -@node Very Simple, Two Rules, Sample Data Files, Getting Started -@section Some Simple Examples - -The following command runs a simple @command{awk} program that searches the -input file @file{BBS-list} for the character string @samp{foo}. (A -string of characters is usually called a @dfn{string}. -The term @dfn{string} is based on similar usage in English, such -as ``a string of pearls,'' or, ``a string of cars in a train.''): - -@example -awk '/foo/ @{ print $0 @}' BBS-list -@end example - -@noindent -When lines containing @samp{foo} are found, they are printed because -@w{@samp{print $0}} means print the current line. (Just @samp{print} by -itself means the same thing, so we could have written that -instead.) - -You will notice that slashes (@samp{/}) surround the string @samp{foo} -in the @command{awk} program. The slashes indicate that @samp{foo} -is the pattern to search for. This type of pattern is called a -@dfn{regular expression}, which is covered in more detail later -(@pxref{Regexp, ,Regular Expressions}). -The pattern is allowed to match parts of words. -There are -single quotes around the @command{awk} program so that the shell won't -interpret any of it as special shell characters. - -Here is what this program prints: - -@example -$ awk '/foo/ @{ print $0 @}' BBS-list -@print{} fooey 555-1234 2400/1200/300 B -@print{} foot 555-6699 1200/300 B -@print{} macfoo 555-6480 1200/300 A -@print{} sabafoo 555-2127 1200/300 C -@end example - -@cindex action, default -@cindex pattern, default -@cindex default action -@cindex default pattern -In an @command{awk} rule, either the pattern or the action can be omitted, -but not both. If the pattern is omitted, then the action is performed -for @emph{every} input line. If the action is omitted, the default -action is to print all lines that match the pattern. - -@cindex empty action -@cindex action, empty -Thus, we could leave out the action (the @code{print} statement and the curly -braces) in the above example and the result would be the same: all -lines matching the pattern @samp{foo} are printed. By comparison, -omitting the @code{print} statement but retaining the curly braces makes an -empty action that does nothing (i.e., no lines are printed). - -@cindex one-liners -Many practical @command{awk} programs are just a line or two. Following is a -collection of useful, short programs to get you started. Some of these -programs contain constructs that haven't been covered yet. (The description -of the program will give you a good idea of what is going on, but please -read the rest of the @value{DOCUMENT} to become an @command{awk} expert!) -Most of the examples use a @value{DF} named @file{data}. This is just a -placeholder; if you use these programs yourself, substitute -your own @value{FN}s for @file{data}. -For future reference, note that there is often more than -one way to do things in @command{awk}. At some point, you may want -to look back at these examples and see if -you can come up with different ways to do the same things shown here: - -@itemize @bullet -@item -Print the length of the longest input line: - -@example -awk '@{ if (length($0) > max) max = length($0) @} - END @{ print max @}' data -@end example - -@item -Print every line that is longer than 80 characters: - -@example -awk 'length($0) > 80' data -@end example - -The sole rule has a relational expression as its pattern and it has no -action---so the default action, printing the record, is used. - -@cindex @command{expand} utility -@item -Print the length of the longest line in @file{data}: - -@example -expand data | awk '@{ if (x < length()) x = length() @} - END @{ print "maximum line length is " x @}' -@end example - -The input is processed by the @command{expand} utility to change tabs -into spaces, so the widths compared are actually the right-margin columns. - -@item -Print every line that has at least one field: - -@example -awk 'NF > 0' data -@end example - -This is an easy way to delete blank lines from a file (or rather, to -create a new file similar to the old file but from which the blank lines -have been removed). - -@item -Print seven random numbers from 0 to 100, inclusive: - -@example -awk 'BEGIN @{ for (i = 1; i <= 7; i++) - print int(101 * rand()) @}' -@end example - -@item -Print the total number of bytes used by @var{files}: - -@example -ls -l @var{files} | awk '@{ x += $5 @} - END @{ print "total bytes: " x @}' -@end example - -@item -Print the total number of kilobytes used by @var{files}: - -@c Don't use \ continuation, not discussed yet -@example -ls -l @var{files} | awk '@{ x += $5 @} - END @{ print "total K-bytes: " (x + 1023)/1024 @}' -@end example - -@item -Print a sorted list of the login names of all users: - -@example -awk -F: '@{ print $1 @}' /etc/passwd | sort -@end example - -@item -Count lines in a file: - -@example -awk 'END @{ print NR @}' data -@end example - -@item -Print the even-numbered lines in the @value{DF}: - -@example -awk 'NR % 2 == 0' data -@end example - -If you use the expression @samp{NR % 2 == 1} instead, -it would print the odd-numbered lines. -@end itemize - -@node Two Rules, More Complex, Very Simple, Getting Started -@section An Example with Two Rules -@cindex how @command{awk} works - -The @command{awk} utility reads the input files one line at a -time. For each line, @command{awk} tries the patterns of each of the rules. -If several patterns match, then several actions are run in the order in -which they appear in the @command{awk} program. If no patterns match, then -no actions are run. - -After processing all the rules that match the line (and perhaps there are none), -@command{awk} reads the next line. (However, -@pxref{Next Statement, ,The @code{next} Statement}, -and also @pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}). -This continues until the end of the file is reached. -For example, the following @command{awk} program contains two rules: - -@example -/12/ @{ print $0 @} -/21/ @{ print $0 @} -@end example - -@noindent -The first rule has the string @samp{12} as the -pattern and @samp{print $0} as the action. The second rule has the -string @samp{21} as the pattern and also has @samp{print $0} as the -action. Each rule's action is enclosed in its own pair of braces. - -This program prints every line that contains the string -@samp{12} @emph{or} the string @samp{21}. If a line contains both -strings, it is printed twice, once by each rule. - -This is what happens if we run this program on our two sample @value{DF}s, -@file{BBS-list} and @file{inventory-shipped}, as shown here: - -@example -$ awk '/12/ @{ print $0 @} -> /21/ @{ print $0 @}' BBS-list inventory-shipped -@print{} aardvark 555-5553 1200/300 B -@print{} alpo-net 555-3412 2400/1200/300 A -@print{} barfly 555-7685 1200/300 A -@print{} bites 555-1675 2400/1200/300 A -@print{} core 555-2912 1200/300 C -@print{} fooey 555-1234 2400/1200/300 B -@print{} foot 555-6699 1200/300 B -@print{} macfoo 555-6480 1200/300 A -@print{} sdace 555-3430 2400/1200/300 A -@print{} sabafoo 555-2127 1200/300 C -@print{} sabafoo 555-2127 1200/300 C -@print{} Jan 21 36 64 620 -@print{} Apr 21 70 74 514 -@end example - -@noindent -Note how the line beginning with @samp{sabafoo} -in @file{BBS-list} was printed twice, once for each rule. - -@node More Complex, Statements/Lines, Two Rules, Getting Started -@section A More Complex Example - -Now that we've mastered some simple tasks, let's look at -what typical @command{awk} -programs do. This example shows how @command{awk} can be used to -summarize, select, and rearrange the output of another utility. It uses -features that haven't been covered yet, so don't worry if you don't -understand all the details: - -@example -ls -l | awk '$6 == "Nov" @{ sum += $5 @} - END @{ print sum @}' -@end example - -@cindex @command{csh} utility -@cindex @command{csh}, backslash continuation -@cindex backslash continuation, in @command{csh} -@cindex @command{ls} utility -This command prints the total number of bytes in all the files in the -current directory that were last modified in November (of any year). -@footnote{In the C shell (@command{csh}), you need to type -a semicolon and then a backslash at the end of the first line; see -@ref{Statements/Lines, ,@command{awk} Statements Versus Lines}, for an -explanation as to why. In a POSIX-compliant shell, such as the Bourne -shell or @command{bash}, you can type the example as shown. If the command -@samp{echo $path} produces an empty output line, you are most likely -using a POSIX-compliant shell. Otherwise, you are probably using the -C shell or a shell derived from it.} -The @w{@samp{ls -l}} part of this example is a system command that gives -you a listing of the files in a directory, including each file's size and the date -the file was last modified. Its output looks like this: - -@example --rw-r--r-- 1 arnold user 1933 Nov 7 13:05 Makefile --rw-r--r-- 1 arnold user 10809 Nov 7 13:03 awk.h --rw-r--r-- 1 arnold user 983 Apr 13 12:14 awk.tab.h --rw-r--r-- 1 arnold user 31869 Jun 15 12:20 awk.y --rw-r--r-- 1 arnold user 22414 Nov 7 13:03 awk1.c --rw-r--r-- 1 arnold user 37455 Nov 7 13:03 awk2.c --rw-r--r-- 1 arnold user 27511 Dec 9 13:07 awk3.c --rw-r--r-- 1 arnold user 7989 Nov 7 13:03 awk4.c -@end example - -@noindent -The first field contains read-write permissions, the second field contains -the number of links to the file, and the third field identifies the owner of -the file. The fourth field identifies the group of the file. -The fifth field contains the size of the file in bytes. The -sixth, seventh and eighth fields contain the month, day, and time, -respectively, that the file was last modified. Finally, the ninth field -contains the name of the file.@footnote{On some -very old systems, you may need to use @samp{ls -lg} to get this output.} - -@cindex automatic initialization -@cindex initialization, automatic -The @samp{$6 == "Nov"} in our @command{awk} program is an expression that -tests whether the sixth field of the output from @w{@samp{ls -l}} -matches the string @samp{Nov}. Each time a line has the string -@samp{Nov} for its sixth field, the action @samp{sum += $5} is -performed. This adds the fifth field (the file's size) to the variable -@code{sum}. As a result, when @command{awk} has finished reading all the -input lines, @code{sum} is the total of the sizes of the files whose -lines matched the pattern. (This works because @command{awk} variables -are automatically initialized to zero.) - -After the last line of output from @command{ls} has been processed, the -@code{END} rule executes and prints the value of @code{sum}. -In this example, the value of @code{sum} is 140963. - -These more advanced @command{awk} techniques are covered in later sections -(@pxref{Action Overview, ,Actions}). Before you can move on to more -advanced @command{awk} programming, you have to know how @command{awk} interprets -your input and displays your output. By manipulating fields and using -@code{print} statements, you can produce some very useful and impressive -looking reports. - -@node Statements/Lines, Other Features, More Complex, Getting Started -@section @command{awk} Statements Versus Lines -@cindex line break -@cindex newline - -Most often, each line in an @command{awk} program is a separate statement or -separate rule, like this: - -@example -awk '/12/ @{ print $0 @} - /21/ @{ print $0 @}' BBS-list inventory-shipped -@end example - -However, @command{gawk} ignores newlines after any of the following -symbols and keywords: - -@example -, @{ ? : || && do else -@end example - -@noindent -A newline at any other point is considered the end of the -statement.@footnote{The @samp{?} and @samp{:} referred to here is the -three-operand conditional expression described in -@ref{Conditional Exp, ,Conditional Expressions}. -Splitting lines after @samp{?} and @samp{:} is a minor @command{gawk} -extension; if @option{--posix} is specified -(@pxref{Options, , Command-Line Options}), then this extension is disabled.} - -@cindex backslash continuation -@cindex continuation of lines -@cindex line continuation -If you would like to split a single statement into two lines at a point -where a newline would terminate it, you can @dfn{continue} it by ending the -first line with a backslash character (@samp{\}). The backslash must be -the final character on the line in order to be recognized as a continuation -character. A backslash is allowed anywhere in the statement, even -in the middle of a string or regular expression. For example: - -@example -awk '/This regular expression is too long, so continue it\ - on the next line/ @{ print $1 @}' -@end example - -@noindent -@cindex portability issues -We have generally not used backslash continuation in the sample programs -in this @value{DOCUMENT}. In @command{gawk}, there is no limit on the -length of a line, so backslash continuation is never strictly necessary; -it just makes programs more readable. For this same reason, as well as -for clarity, we have kept most statements short in the sample programs -presented throughout the @value{DOCUMENT}. Backslash continuation is -most useful when your @command{awk} program is in a separate source file -instead of entered from the command line. You should also note that -many @command{awk} implementations are more particular about where you -may use backslash continuation. For example, they may not allow you to -split a string constant using backslash continuation. Thus, for maximum -portability of your @command{awk} programs, it is best not to split your -lines in the middle of a regular expression or a string. -@c 10/2000: gawk, mawk, and current bell labs awk allow it, -@c solaris 2.7 nawk does not. Solaris /usr/xpg4/bin/awk does though! sigh. - -@cindex @command{csh} utility -@cindex @command{csh}, backslash continuation -@cindex backslash continuation, in @command{csh} -@strong{Caution:} @emph{Backslash continuation does not work as described -above with the C shell.} It works for @command{awk} programs in files and -for one-shot programs, @emph{provided} you are using a POSIX-compliant -shell, such as the Unix Bourne shell or @command{bash}. But the C shell behaves -differently! There, you must use two backslashes in a row, followed by -a newline. Note also that when using the C shell, @emph{every} newline -in your awk program must be escaped with a backslash. To illustrate: - -@example -% awk 'BEGIN @{ \ -? print \\ -? "hello, world" \ -? @}' -@print{} hello, world -@end example - -@noindent -Here, the @samp{%} and @samp{?} are the C shell's primary and secondary -prompts, analogous to the standard shell's @samp{$} and @samp{>}. - -Compare the previous example to how it is done with a POSIX-compliant shell: - -@example -$ awk 'BEGIN @{ -> print \ -> "hello, world" -> @}' -@print{} hello, world -@end example - -@command{awk} is a line-oriented language. Each rule's action has to -begin on the same line as the pattern. To have the pattern and action -on separate lines, you @emph{must} use backslash continuation; there -is no other way. - -@cindex backslash continuation, and comments -@cindex comments and backslash continuation -Another thing to keep in mind is that backslash continuation and -comments do not mix. As soon as @command{awk} sees the @samp{#} that -starts a comment, it ignores @emph{everything} on the rest of the -line. For example: - -@example -$ gawk 'BEGIN @{ print "dont panic" # a friendly \ -> BEGIN rule -> @}' -@error{} gawk: cmd. line:2: BEGIN rule -@error{} gawk: cmd. line:2: ^ parse error -@end example - -@noindent -In this case, it looks like the backslash would continue the comment onto the -next line. However, the backslash-newline combination is never even -noticed because it is ``hidden'' inside the comment. Thus, the -@code{BEGIN} is noted as a syntax error. - -@cindex multiple statements on one line -When @command{awk} statements within one rule are short, you might want to put -more than one of them on a line. This is accomplished by separating the statements -with a semicolon (@samp{;}). -This also applies to the rules themselves. -Thus, the program shown at the start of this @value{SECTION} -could also be written this way: - -@example -/12/ @{ print $0 @} ; /21/ @{ print $0 @} -@end example - -@noindent -@strong{Note:} The requirement that states that rules on the same line must be -separated with a semicolon was not in the original @command{awk} -language; it was added for consistency with the treatment of statements -within an action. - -@node Other Features, When, Statements/Lines, Getting Started -@section Other Features of @command{awk} - -The @command{awk} language provides a number of predefined, or -@dfn{built-in}, variables that your programs can use to get information -from @command{awk}. There are other variables your program can set -as well to control how @command{awk} processes your data. - -In addition, @command{awk} provides a number of built-in functions for doing -common computational and string related operations. -@command{gawk} provides built-in functions for working with timestamps, -performing bit manipulation, and for runtime string translation. - -As we develop our presentation of the @command{awk} language, we introduce -most of the variables and many of the functions. They are defined -systematically in @ref{Built-in Variables}, and -@ref{Built-in, ,Built-in Functions}. - -@node When, , Other Features, Getting Started -@section When to Use @command{awk} - -@cindex uses of @command{awk} -@cindex applications of @command{awk} -Now that you've seen some of what @command{awk} can do, -you might wonder how @command{awk} could be useful for you. By using -utility programs, advanced patterns, field separators, arithmetic -statements, and other selection criteria, you can produce much more -complex output. The @command{awk} language is very useful for producing -reports from large amounts of raw data, such as summarizing information -from the output of other utility programs like @command{ls}. -(@xref{More Complex, ,A More Complex Example}.) - -Programs written with @command{awk} are usually much smaller than they would -be in other languages. This makes @command{awk} programs easy to compose and -use. Often, @command{awk} programs can be quickly composed at your terminal, -used once, and thrown away. Because @command{awk} programs are interpreted, you -can avoid the (usually lengthy) compilation part of the typical -edit-compile-test-debug cycle of software development. - -Complex programs have been written in @command{awk}, including a complete -retargetable assembler for eight-bit microprocessors (@pxref{Glossary}, for -more information), and a microcode assembler for a special purpose Prolog -computer. However, @command{awk}'s capabilities are strained by tasks of -such complexity. - -If you find yourself writing @command{awk} scripts of more than, say, a few -hundred lines, you might consider using a different programming -language. Emacs Lisp is a good choice if you need sophisticated string -or pattern matching capabilities. The shell is also good at string and -pattern matching; in addition, it allows powerful use of the system -utilities. More conventional languages, such as C, C++, and Java, offer -better facilities for system programming and for managing the complexity -of large programs. Programs in these languages may require more lines -of source code than the equivalent @command{awk} programs, but they are -easier to maintain and usually run more efficiently. - -@node Regexp, Reading Files, Getting Started, Top -@chapter Regular Expressions -@cindex pattern, regular expressions -@cindex regexp -@cindex regular expression -@cindex regular expressions as patterns - -A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a -set of strings. -Because regular expressions are such a fundamental part of @command{awk} -programming, their format and use deserve a separate @value{CHAPTER}. - -A regular expression enclosed in slashes (@samp{/}) -is an @command{awk} pattern that matches every input record whose text -belongs to that set. -The simplest regular expression is a sequence of letters, numbers, or -both. Such a regexp matches any string that contains that sequence. -Thus, the regexp @samp{foo} matches any string containing @samp{foo}. -Therefore, the pattern @code{/foo/} matches any input record containing -the three characters @samp{foo} @emph{anywhere} in the record. Other -kinds of regexps let you specify more complicated classes of strings. - -@ifnotinfo -Initially, the examples in this @value{CHAPTER} are simple. -As we explain more about how -regular expressions work, we will present more complicated instances. -@end ifnotinfo - -@menu -* Regexp Usage:: How to Use Regular Expressions. -* Escape Sequences:: How to write non-printing characters. -* Regexp Operators:: Regular Expression Operators. -* Character Lists:: What can go between @samp{[...]}. -* GNU Regexp Operators:: Operators specific to GNU software. -* Case-sensitivity:: How to do case-insensitive matching. -* Leftmost Longest:: How much text matches. -* Computed Regexps:: Using Dynamic Regexps. -@end menu - -@node Regexp Usage, Escape Sequences, Regexp, Regexp -@section How to Use Regular Expressions - -A regular expression can be used as a pattern by enclosing it in -slashes. Then the regular expression is tested against the -entire text of each record. (Normally, it only needs -to match some part of the text in order to succeed.) For example, the -following prints the second field of each record that contains the string -@samp{foo} anywhere in it: - -@example -$ awk '/foo/ @{ print $2 @}' BBS-list -@print{} 555-1234 -@print{} 555-6699 -@print{} 555-6480 -@print{} 555-2127 -@end example - -@cindex regexp operators -@cindex string-matching operators -@cindex operators, string-matching -@cindex operators, regexp matching -@cindex @code{~} operator -@cindex @code{!~} operator -Regular expressions can also be used in matching expressions. These -expressions allow you to specify the string to match against; it need -not be the entire current input record. The two operators @samp{~} -and @samp{!~} perform regular expression comparisons. Expressions -using these operators can be used as patterns, or in @code{if}, -@code{while}, @code{for}, and @code{do} statements. -(@xref{Statements, ,Control Statements in Actions}.) -For example: - -@example -@var{exp} ~ /@var{regexp}/ -@end example - -@noindent -is true if the expression @var{exp} (taken as a string) -matches @var{regexp}. The following example matches, or selects, -all input records with the uppercase letter @samp{J} somewhere in the -first field: - -@example -$ awk '$1 ~ /J/' inventory-shipped -@print{} Jan 13 25 15 115 -@print{} Jun 31 42 75 492 -@print{} Jul 24 34 67 436 -@print{} Jan 21 36 64 620 -@end example - -So does this: - -@example -awk '@{ if ($1 ~ /J/) print @}' inventory-shipped -@end example - -This next example is true if the expression @var{exp} -(taken as a character string) -does @emph{not} match @var{regexp}: - -@example -@var{exp} !~ /@var{regexp}/ -@end example - -The following example matches, -or selects, all input records whose first field @emph{does not} contain -the uppercase letter @samp{J}: - -@example -$ awk '$1 !~ /J/' inventory-shipped -@print{} Feb 15 32 24 226 -@print{} Mar 15 24 34 228 -@print{} Apr 31 52 63 420 -@print{} May 16 34 29 208 -@dots{} -@end example - -@cindex regexp constant -When a regexp is enclosed in slashes, such as @code{/foo/}, we call it -a @dfn{regexp constant}, much like @code{5.27} is a numeric constant and -@code{"foo"} is a string constant. - -@node Escape Sequences, Regexp Operators, Regexp Usage, Regexp -@section Escape Sequences - -@cindex escape sequence notation -Some characters cannot be included literally in string constants -(@code{"foo"}) or regexp constants (@code{/foo/}). -Instead, they should be represented with @dfn{escape sequences}, -which are character sequences beginning with a backslash (@samp{\}). -One use of an escape sequence is to include a double quote character in -a string constant. Because a plain double quote ends the string, you -must use @samp{\"} to represent an actual double quote character as a -part of the string. For example: - -@example -$ awk 'BEGIN @{ print "He said \"hi!\" to her." @}' -@print{} He said "hi!" to her. -@end example - -The backslash character itself is another character that cannot be -included normally; you must write @samp{\\} to put one backslash in the -string or regexp. Thus, the string whose contents are the two characters -@samp{"} and @samp{\} must be written @code{"\"\\"}. - -Another use of backslash is to represent unprintable characters -such as tab or newline. While there is nothing to stop you from entering most -unprintable characters directly in a string constant or regexp constant, -they may look ugly. - -The following table lists -all the escape sequences used in @command{awk} and -what they represent. Unless noted otherwise, all these escape -sequences apply to both string constants and regexp constants: - -@table @code -@item \\ -A literal backslash, @samp{\}. - -@cindex @command{awk} language, V.4 version -@cindex @code{\a} escape sequence -@item \a -The ``alert'' character, @kbd{Ctrl-g}, ASCII code 7 (BEL). -(This usually makes some sort of audible noise.) - -@cindex @code{\b} escape sequence -@item \b -Backspace, @kbd{Ctrl-h}, ASCII code 8 (BS). - -@cindex @code{\f} escape sequence -@item \f -Formfeed, @kbd{Ctrl-l}, ASCII code 12 (FF). - -@cindex @code{\n} escape sequence -@item \n -Newline, @kbd{Ctrl-j}, ASCII code 10 (LF). - -@cindex @code{\r} escape sequence -@item \r -Carriage return, @kbd{Ctrl-m}, ASCII code 13 (CR). - -@cindex @code{\t} escape sequence -@item \t -Horizontal tab, @kbd{Ctrl-i}, ASCII code 9 (HT). - -@cindex @command{awk} language, V.4 version -@cindex @code{\v} escape sequence -@item \v -Vertical tab, @kbd{Ctrl-k}, ASCII code 11 (VT). - -@cindex @code{\}@var{nnn} escape sequence (octal) -@item \@var{nnn} -The octal value @var{nnn}, where @var{nnn} stands for 1 to 3 digits -between @samp{0} and @samp{7}. For example, the code for the ASCII ESC -(escape) character is @samp{\033}. - -@cindex @code{\x} escape sequence -@cindex @command{awk} language, V.4 version -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@item \x@var{hh}@dots{} -The hexadecimal value @var{hh}, where @var{hh} stands for a sequence -of hexadecimal digits (@samp{0} through @samp{9}, and either @samp{A} -through @samp{F} or @samp{a} through @samp{f}). Like the same construct -in ISO C, the escape sequence continues until the first non-hexadecimal -digit is seen. However, using more than two hexadecimal digits produces -undefined results. (The @samp{\x} escape sequence is not allowed in -POSIX @command{awk}.) - -@cindex @code{\/} escape sequence -@item \/ -A literal slash (necessary for regexp constants only). -This expression is used when you want to write a regexp -constant that contains a slash. Because the regexp is delimited by -slashes, you need to escape the slash that is part of the pattern, -in order to tell @command{awk} to keep processing the rest of the regexp. - -@cindex @code{\"} escape sequence -@item \" -A literal double quote (necessary for string constants only). -This expression is used when you want to write a string -constant that contains a double quote. Because the string is delimited by -double quotes, you need to escape the quote that is part of the string, -in order to tell @command{awk} to keep processing the rest of the string. -@end table - -In @command{gawk}, a number of additional two-character sequences that begin -with a backslash have special meaning in regexps. -@xref{GNU Regexp Operators, ,@command{gawk}-Specific Regexp Operators}. - -In a regexp, a backslash before any character that is not in the above table -and not listed in -@ref{GNU Regexp Operators, ,@command{gawk}-Specific Regexp Operators}, -means that the next character should be taken literally, even if it would -normally be a regexp operator. For example, @code{/a\+b/} matches the three -characters @samp{a+b}. - -@cindex portability issues -For complete portability, do not use a backslash before any character not -shown in the table above. - -To summarize: - -@itemize @bullet -@item -The escape sequences in the table above are always processed first, -for both string constants and regexp constants. This happens very early, -as soon as @command{awk} reads your program. - -@item -@command{gawk} processes both regexp constants and dynamic regexps -(@pxref{Computed Regexps, ,Using Dynamic Regexps}), -for the special operators listed in -@ref{GNU Regexp Operators, ,@command{gawk}-Specific Regexp Operators}. - -@item -A backslash before any other character means to treat that character -literally. -@end itemize - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Backslash Before Regular Characters -@cindex advanced notes - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -If you place a backslash in a string constant before something that is -not one of the characters listed above, POSIX @command{awk} purposely -leaves what happens as undefined. There are two choices: - -@cindex automatic warnings -@cindex warnings, automatic -@table @asis -@item Strip the backslash out -This is what Unix @command{awk} and @command{gawk} both do. -For example, @code{"a\qc"} is the same as @code{"aqc"}. -(Because this is such an easy bug to both introduce and to miss, -@command{gawk} warns you about it.) -Consider @samp{FS = @w{"[ \t]+\|[ \t]+"}} to use vertical bars -surrounded by whitespace as the field separator. There should be -two backslashes in the string, @samp{FS = @w{"[ \t]+\\|[ \t]+"}}.) -@c I did this! This is why I added the warning. - -@item Leave the backslash alone -Some other @command{awk} implementations do this. -In such implementations, @code{"a\qc"} is the same as if you had typed -@code{"a\\qc"}. -@end table - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Escape Sequences for Metacharacters -@cindex advanced notes - -Suppose you use an octal or hexadecimal -escape to represent a regexp metacharacter -(@pxref{Regexp Operators, , Regular Expression Operators}). -Does @command{awk} treat the character as a literal character or as a regexp -operator? - -@cindex dark corner -Historically, such characters were taken literally. -@value{DARKCORNER} -However, the POSIX standard indicates that they should be treated -as real metacharacters, which is what @command{gawk} does. -In compatibility mode (@pxref{Options, ,Command-Line Options}), -@command{gawk} treats the characters represented by octal and hexadecimal -escape sequences literally when used in regexp constants. Thus, -@code{/a\52b/} is equivalent to @code{/a\*b/}. - -@node Regexp Operators, Character Lists, Escape Sequences, Regexp -@section Regular Expression Operators -@cindex metacharacters -@cindex regular expression metacharacters -@cindex regexp operators - -You can combine regular expressions with special characters, -called @dfn{regular expression operators} or @dfn{metacharacters}, to -increase the power and versatility of regular expressions. - -The escape sequences described -@ifnotinfo -earlier -@end ifnotinfo -in @ref{Escape Sequences}, -are valid inside a regexp. They are introduced by a @samp{\}, and -are recognized and converted into the corresponding real characters as -the very first step in processing regexps. - -Here is a list of metacharacters. All characters that are not escape -sequences and that are not listed in the table stand for themselves: - -@table @code -@item \ -This is used to suppress the special meaning of a character when -matching. For example, @samp{\$} -matches the character @samp{$}. - -@cindex anchors in regexps -@cindex regexp, anchors -@cindex Texinfo -@item ^ -This matches the beginning of a string. For example, @samp{^@@chapter} -matches @samp{@@chapter} at the beginning of a string, and can be used -to identify chapter beginnings in Texinfo source files. -The @samp{^} is known as an @dfn{anchor}, because it anchors the pattern to -match only at the beginning of the string. - -It is important to realize that @samp{^} does not match the beginning of -a line embedded in a string. -The condition is not true in the following example: - -@example -if ("line1\nLINE 2" ~ /^L/) @dots{} -@end example - -@item $ -This is similar to @samp{^} but it matches only at the end of a string. -For example, @samp{p$} -matches a record that ends with a @samp{p}. The @samp{$} is an anchor -and does not match the end of a line embedded in a string. -The condition is not true in the following example: - -@example -if ("line1\nLINE 2" ~ /1$/) @dots{} -@end example - -@item . -This matches any single character, -@emph{including} the newline character. For example, @samp{.P} -matches any single character followed by a @samp{P} in a string. Using -concatenation, we can make a regular expression such as @samp{U.A}, that -matches any three-character sequence that begins with @samp{U} and ends -with @samp{A}. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -In strict POSIX mode (@pxref{Options, ,Command-Line Options}), -@samp{.} does not match the @sc{nul} -character, which is a character with all bits equal to zero. -Otherwise, @sc{nul} is just another character. Other versions of @command{awk} -may not be able to match the @sc{nul} character. - -@cindex character list -@cindex character set (regexp component) -@cindex character class -@cindex bracket expression -@item [@dots{}] -This is called a @dfn{character list}.@footnote{In other literature, -you may see a character list referred to as either a -@dfn{character set}, a @dfn{character class} or a @dfn{bracket expression}.} -It matches any @emph{one} of the characters that are enclosed in -the square brackets. For example, @samp{[MVX]} matches any one of -the characters @samp{M}, @samp{V}, or @samp{X}, in a string. A full -discussion of what can be inside the square brackets of a character list -is given in -@ref{Character Lists, ,Using Character Lists}. - -@cindex complemented character list -@cindex character list, complemented -@item [^ @dots{}] -This is a @dfn{complemented character list}. The first character after -the @samp{[} @emph{must} be a @samp{^}. It matches any characters -@emph{except} those in the square brackets. For example, @samp{[^awk]} -matches any character that is not an @samp{a}, a @samp{w}, -or a @samp{k}. - -@item | -This is the @dfn{alternation operator} and it is used to specify -alternatives. -The @samp{|} has the lowest precedence of all the regular -expression operators. -For example, @samp{^P|[[:digit:]]} -matches any string that matches either @samp{^P} or @samp{[[:digit:]]}. This -means it matches any string that starts with @samp{P} or contains a digit. - -The alternation applies to the largest possible regexps on either side. - -@cindex Texinfo -@item (@dots{}) -Parentheses are used for grouping in regular expressions, similar to -arithmetic. They can be used to concatenate regular expressions -containing the alternation operator, @samp{|}. For example, -@samp{@@(samp|code)\@{[^@}]+\@}} matches both @samp{@@code@{foo@}} and -@samp{@@samp@{bar@}}. -(These are Texinfo formatting control sequences.) - -@item * -This symbol means that the preceding regular expression should be -repeated as many times as necessary to find a match. For example, @samp{ph*} -applies the @samp{*} symbol to the preceding @samp{h} and looks for matches -of one @samp{p} followed by any number of @samp{h}s. This also matches -just @samp{p} if no @samp{h}s are present. - -The @samp{*} repeats the @emph{smallest} possible preceding expression. -(Use parentheses if you want to repeat a larger expression.) It finds -as many repetitions as possible. For example, -@samp{awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample} -prints every record in @file{sample} containing a string of the form -@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on. -Notice the escaping of the parentheses by preceding them -with backslashes. - -@item + -This symbol is similar to @samp{*} except that the preceding expression must be -matched at least once. This means that @samp{wh+y} -would match @samp{why} and @samp{whhy}, but not @samp{wy}, whereas -@samp{wh*y} would match all three of these strings. -The following is a simpler -way of writing the last @samp{*} example: - -@example -awk '/\(c[ad]+r x\)/ @{ print @}' sample -@end example - -@item ? -This symbol is similar to @samp{*} except that the preceding expression can be -matched either once or not at all. For example, @samp{fe?d} -matches @samp{fed} and @samp{fd}, but nothing else. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@cindex interval expressions -@item @{@var{n}@} -@itemx @{@var{n},@} -@itemx @{@var{n},@var{m}@} -One or two numbers inside braces denote an @dfn{interval expression}. -If there is one number in the braces, the preceding regexp is repeated -@var{n} times. -If there are two numbers separated by a comma, the preceding regexp is -repeated @var{n} to @var{m} times. -If there is one number followed by a comma, then the preceding regexp -is repeated at least @var{n} times: - -@table @code -@item wh@{3@}y -Matches @samp{whhhy}, but not @samp{why} or @samp{whhhhy}. - -@item wh@{3,5@}y -Matches @samp{whhhy}, @samp{whhhhy}, or @samp{whhhhhy}, only. - -@item wh@{2,@}y -Matches @samp{whhy} or @samp{whhhy}, and so on. -@end table - -Interval expressions were not traditionally available in @command{awk}. -They were added as part of the POSIX standard to make @command{awk} -and @command{egrep} consistent with each other. - -However, because old programs may use @samp{@{} and @samp{@}} in regexp -constants, by default @command{gawk} does @emph{not} match interval expressions -in regexps. If either @option{--posix} or @option{--re-interval} are specified -(@pxref{Options, , Command-Line Options}), then interval expressions -are allowed in regexps. - -For new programs that use @samp{@{} and @samp{@}} in regexp constants, -it is good practice to always escape them with a backslash. Then the -regexp constants are valid and work the way you want them to, using -any version of @command{awk}.@footnote{Use two backslashes if you're -using a string constant with a regexp operator or function.} -@end table - -@cindex precedence, regexp operators -@cindex regexp operators, precedence of -In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators, -as well as the braces @samp{@{} and @samp{@}}, -have -the highest precedence, followed by concatenation, and finally by @samp{|}. -As in arithmetic, parentheses can change how operators are grouped. - -In POSIX @command{awk} and @command{gawk}, the @samp{*}, @samp{+}, and @samp{?} operators -stand for themselves when there is nothing in the regexp that precedes them. -For example, @samp{/+/} matches a literal plus sign. However, many other versions of -@command{awk} treat such a usage as a syntax error. - -If @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -POSIX character classes and interval expressions are not available in -regular expressions. - -@node Character Lists, GNU Regexp Operators, Regexp Operators, Regexp -@section Using Character Lists - -Within a character list, a @dfn{range expression} consists of two -characters separated by a hyphen. It matches any single character that -sorts between the two characters, using the locale's -collating sequence and character set. For example, in the default C -locale, @samp{[a-dx-z]} is equivalent to @samp{[abcdxyz]}. Many locales -sort characters in dictionary order, and in these locales, -@samp{[a-dx-z]} is typically not equivalent to @samp{[abcdxyz]}; instead it -might be equivalent to @samp{[aBbCcDdxXyYz]}, for example. To obtain -the traditional interpretation of bracket expressions, you can use the C -locale by setting the @env{LC_ALL} environment variable to the value -@samp{C}. - -To include one of the characters @samp{\}, @samp{]}, @samp{-}, or @samp{^} in a -character list, put a @samp{\} in front of it. For example: - -@example -[d\]] -@end example - -@noindent -matches either @samp{d} or @samp{]}. - -@cindex @command{egrep} utility -This treatment of @samp{\} in character lists -is compatible with other @command{awk} -implementations and is also mandated by POSIX. -The regular expressions in @command{awk} are a superset -of the POSIX specification for Extended Regular Expressions (EREs). -POSIX EREs are based on the regular expressions accepted by the -traditional @command{egrep} utility. - -@cindex character class -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@dfn{Character classes} are a new feature introduced in the POSIX standard. -A character class is a special notation for describing -lists of characters that have a specific attribute, but the -actual characters can vary from country to country and/or -from character set to character set. For example, the notion of what -is an alphabetic character differs between the United States and France. - -A character class is only valid in a regexp @emph{inside} the -brackets of a character list. Character classes consist of @samp{[:}, -a keyword denoting the class, and @samp{:]}. Here are the character -classes defined by the POSIX standard: - -@c the regular table is commented out while trying out the multitable. -@c leave it here in case we need to go back, but make sure the text -@c still corresponds! - -@ignore -@table @code -@item [:alnum:] -Alphanumeric characters. - -@item [:alpha:] -Alphabetic characters. - -@item [:blank:] -Space and tab characters. - -@item [:cntrl:] -Control characters. - -@item [:digit:] -Numeric characters. - -@item [:graph:] -Characters that are printable and visible. -(A space is printable but not visible, whereas an @samp{a} is both.) - -@item [:lower:] -Lowercase alphabetic characters. - -@item [:print:] -Printable characters (characters that are not control characters). - -@item [:punct:] -Punctuation characters (characters that are not letters, digits, -control characters, or space characters). - -@item [:space:] -Space characters (such as space, tab, and formfeed, to name a few). - -@item [:upper:] -Uppercase alphabetic characters. - -@item [:xdigit:] -Characters that are hexadecimal digits. -@end table -@end ignore - -@multitable {@code{[:xdigit:]}} {Characters that are both printable and visible. (A space is} -@item @code{[:alnum:]} @tab Alphanumeric characters. -@item @code{[:alpha:]} @tab Alphabetic characters. -@item @code{[:blank:]} @tab Space and tab characters. -@item @code{[:cntrl:]} @tab Control characters. -@item @code{[:digit:]} @tab Numeric characters. -@item @code{[:graph:]} @tab Characters that are both printable and visible. -(A space is printable but not visible, whereas an @samp{a} is both.) -@item @code{[:lower:]} @tab Lowercase alphabetic characters. -@item @code{[:print:]} @tab Printable characters (characters that are not control characters). -@item @code{[:punct:]} @tab Punctuation characters (characters that are not letters, digits, -control characters, or space characters). -@item @code{[:space:]} @tab Space characters (such as space, tab, and formfeed, to name a few). -@item @code{[:upper:]} @tab Uppercase alphabetic characters. -@item @code{[:xdigit:]} @tab Characters that are hexadecimal digits. -@end multitable - -For example, before the POSIX standard, you had to write @code{/[A-Za-z0-9]/} -to match alphanumeric characters. If your -character set had other alphabetic characters in it, this would not -match them, and if your character set collated differently from -ASCII, this might not even match the ASCII alphanumeric characters. -With the POSIX character classes, you can write -@code{/[[:alnum:]]/} to match the alphabetic -and numeric characters in your character set. - -@cindex collating elements -Two additional special sequences can appear in character lists. -These apply to non-ASCII character sets, which can have single symbols -(called @dfn{collating elements}) that are represented with more than one -character. They can also have several characters that are equivalent for -@dfn{collating}, or sorting, purposes. (For example, in French, a plain ``e'' -and a grave-accented ``@`e'' are equivalent.) - -@table @asis -@cindex collating symbols -@item Collating Symbols -A @dfn{collating symbol} is a multicharacter collating element enclosed between -@samp{[.} and @samp{.]}. For example, if @samp{ch} is a collating element, -then @code{[[.ch.]]} is a regexp that matches this collating element, whereas -@code{[ch]} is a regexp that matches either @samp{c} or @samp{h}. - -@cindex equivalence classes -@item Equivalence Classes -An @dfn{equivalence class} is a locale-specific name for a list of -characters that are equal. The name is enclosed between -@samp{[=} and @samp{=]}. -For example, the name @samp{e} might be used to represent all of -``e,'' ``@`e,'' and ``@'e.'' In this case, @code{[[=e=]]} is a regexp -that matches any of @samp{e}, @samp{@'e}, or @samp{@`e}. -@end table - -These features are very valuable in non-English speaking locales. - -@strong{Caution:} The library functions that @command{gawk} uses for regular -expression matching currently only recognize POSIX character classes; -they do not recognize collating symbols or equivalence classes. -@c maybe one day ... - -@node GNU Regexp Operators, Case-sensitivity, Character Lists, Regexp -@section @command{gawk}-Specific Regexp Operators - -@c This section adapted (long ago) from the regex-0.12 manual - -@cindex regexp operators, GNU specific -@cindex word, regexp definition of -GNU software that deals with regular expressions provides a number of -additional regexp operators. These operators are described in this -@value{SECTION} and are specific to @command{gawk}; -they are not available in other @command{awk} implementations. -Most of the additional operators deal with word matching. -For our purposes, a @dfn{word} is a sequence of one or more letters, digits, -or underscores (@samp{_}): - -@table @code -@cindex @code{\w} regexp operator -@item \w -Matches any word-constituent character---that is, it matches any -letter, digit, or underscore. Think of it as short-hand for -@w{@code{[[:alnum:]_]}}. - -@cindex @code{\W} regexp operator -@item \W -Matches any character that is not word-constituent. -Think of it as short-hand for -@w{@code{[^[:alnum:]_]}}. - -@cindex @code{\<} regexp operator -@item \< -Matches the empty string at the beginning of a word. -For example, @code{/\} regexp operator -@item \> -Matches the empty string at the end of a word. -For example, @code{/stow\>/} matches @samp{stow} but not @samp{stowaway}. - -@cindex @code{\y} regexp operator -@cindex word boundaries, matching -@item \y -Matches the empty string at either the beginning or the -end of a word (i.e., the word boundar@strong{y}). For example, @samp{\yballs?\y} -matches either @samp{ball} or @samp{balls}, as a separate word. - -@cindex @code{\B} regexp operator -@item \B -Matches the empty string that occurs between two -word-constituent characters. For example, -@code{/\Brat\B/} matches @samp{crate} but it does not match @samp{dirty rat}. -@samp{\B} is essentially the opposite of @samp{\y}. -@end table - -@cindex buffer matching operators -There are two other operators that work on buffers. In Emacs, a -@dfn{buffer} is, naturally, an Emacs buffer. For other programs, -@command{gawk}'s regexp library routines consider the entire -string to match as the buffer. - -@table @code -@item \` -@cindex @code{\`} regexp operator -Matches the empty string at the -beginning of a buffer (string). - -@cindex @code{\'} regexp operator -@item \' -Matches the empty string at the -end of a buffer (string). -@end table - -Because @samp{^} and @samp{$} always work in terms of the beginning -and end of strings, these operators don't add any new capabilities -for @command{awk}. They are provided for compatibility with other -GNU software. - -In other GNU software, the word-boundary operator is @samp{\b}. However, -that conflicts with the @command{awk} language's definition of @samp{\b} -as backspace, so @command{gawk} uses a different letter. -An alternative method would have been to require two backslashes in the -GNU operators, but this was deemed too confusing. The current -method of using @samp{\y} for the GNU @samp{\b} appears to be the -lesser of two evils. - -@c NOTE!!! Keep this in sync with the same table in the summary appendix! -@c -@c Should really do this with file inclusion. -@cindex regexp, effect of command-line options -The various command-line options -(@pxref{Options, ,Command-Line Options}) -control how @command{gawk} interprets characters in regexps: - -@table @asis -@item No options -In the default case, @command{gawk} provides all the facilities of -POSIX regexps and the -@ifnotinfo -previously described -GNU regexp operators. -@end ifnotinfo -@ifnottex -GNU regexp operators described -in @ref{Regexp Operators, ,Regular Expression Operators}. -@end ifnottex -However, interval expressions are not supported. - -@item @code{--posix} -Only POSIX regexps are supported; the GNU operators are not special -(e.g., @samp{\w} matches a literal @samp{w}). Interval expressions -are allowed. - -@item @code{--traditional} -Traditional Unix @command{awk} regexps are matched. The GNU operators -are not special, interval expressions are not available, nor -are the POSIX character classes (@code{[[:alnum:]]} and so on). -Characters described by octal and hexadecimal escape sequences are -treated literally, even if they represent regexp metacharacters. - -@item @code{--re-interval} -Allow interval expressions in regexps, even if @option{--traditional} -has been provided. -@end table - -@node Case-sensitivity, Leftmost Longest, GNU Regexp Operators, Regexp -@section Case Sensitivity in Matching - -@cindex case sensitivity -@cindex ignoring case -Case is normally significant in regular expressions, both when matching -ordinary characters (i.e., not metacharacters) and inside character -sets. Thus, a @samp{w} in a regular expression matches only a lowercase -@samp{w} and not an uppercase @samp{W}. - -The simplest way to do a case-independent match is to use a character -list---for example, @samp{[Ww]}. However, this can be cumbersome if -you need to use it often and it can make the regular expressions harder -to read. There are two alternatives that you might prefer. - -One way to perform a case-insensitive match at a particular point in the -program is to convert the data to a single case, using the -@code{tolower} or @code{toupper} built-in string functions (which we -haven't discussed yet; -@pxref{String Functions, ,String Manipulation Functions}). -For example: - -@example -tolower($1) ~ /foo/ @{ @dots{} @} -@end example - -@noindent -converts the first field to lowercase before matching against it. -This works in any POSIX-compliant @command{awk}. - -@cindex differences between @command{gawk} and @command{awk} -@cindex @code{~} operator -@cindex @code{!~} operator -@cindex @code{IGNORECASE} variable -Another method, specific to @command{gawk}, is to set the variable -@code{IGNORECASE} to a nonzero value (@pxref{Built-in Variables}). -When @code{IGNORECASE} is not zero, @emph{all} regexp and string -operations ignore case. Changing the value of -@code{IGNORECASE} dynamically controls the case sensitivity of the -program as it runs. Case is significant by default because -@code{IGNORECASE} (like most variables) is initialized to zero: - -@example -x = "aB" -if (x ~ /ab/) @dots{} # this test will fail - -IGNORECASE = 1 -if (x ~ /ab/) @dots{} # now it will succeed -@end example - -In general, you cannot use @code{IGNORECASE} to make certain rules -case-insensitive and other rules case-sensitive, because there is no -straightforward way -to set @code{IGNORECASE} just for the pattern of -a particular rule.@footnote{Experienced C and C++ programmers will note -that it is possible, using something like -@samp{IGNORECASE = 1 && /foObAr/ @{ @dots{} @}} -and -@samp{IGNORECASE = 0 || /foobar/ @{ @dots{} @}}. -However, this is somewhat obscure and we don't recommend it.} -To do this, use either character lists or @code{tolower}. However, one -thing you can do with @code{IGNORECASE} only is dynamically turn -case-sensitivity on or off for all the rules at once. - -@code{IGNORECASE} can be set on the command line or in a @code{BEGIN} rule -(@pxref{Other Arguments, ,Other Command-Line Arguments}; also -@pxref{Using BEGIN/END, ,Startup and Cleanup Actions}). -Setting @code{IGNORECASE} from the command line is a way to make -a program case-insensitive without having to edit it. - -Prior to @command{gawk} 3.0, the value of @code{IGNORECASE} -affected regexp operations only. It did not affect string comparison -with @samp{==}, @samp{!=}, and so on. -Beginning with @value{PVERSION} 3.0, both regexp and string comparison -operations are also affected by @code{IGNORECASE}. - -@cindex ISO 8859-1 -@cindex ISO Latin-1 -Beginning with @command{gawk} 3.0, -the equivalences between upper- -and lowercase characters are based on the ISO-8859-1 (ISO Latin-1) -character set. This character set is a superset of the traditional 128 -ASCII characters, that also provides a number of characters suitable -for use with European languages. - -The value of @code{IGNORECASE} has no effect if @command{gawk} is in -compatibility mode (@pxref{Options, ,Command-Line Options}). -Case is always significant in compatibility mode. - -@node Leftmost Longest, Computed Regexps, Case-sensitivity, Regexp -@section How Much Text Matches? - -@cindex leftmost longest match -@cindex matching, leftmost longest -Consider the following: - -@example -echo aaaabcd | awk '@{ sub(/a+/, ""); print @}' -@end example - -This example uses the @code{sub} function (which we haven't discussed yet; -@pxref{String Functions, ,String Manipulation Functions}) -to make a change to the input record. Here, the regexp @code{/a+/} -indicates ``one or more @samp{a} characters,'' and the replacement -text is @samp{}. - -The input contains four @samp{a} characters. -@command{awk} (and POSIX) regular expressions always match -the leftmost, @emph{longest} sequence of input characters that can -match. Thus, all four @samp{a} characters are -replaced with @samp{} in this example: - -@example -$ echo aaaabcd | awk '@{ sub(/a+/, ""); print @}' -@print{} bcd -@end example - -For simple match/no-match tests, this is not so important. But when doing -text matching and substitutions with the @code{match}, @code{sub}, @code{gsub}, -and @code{gensub} functions, it is very important. -@ifinfo -@xref{String Functions, ,String Manipulation Functions}, -for more information on these functions. -@end ifinfo -Understanding this principle is also important for regexp-based record -and field splitting (@pxref{Records, ,How Input Is Split into Records}, -and also @pxref{Field Separators, ,Specifying How Fields Are Separated}). - -@node Computed Regexps, , Leftmost Longest, Regexp -@section Using Dynamic Regexps - -@cindex computed regular expressions -@cindex regular expressions, computed -@cindex dynamic regular expressions -@cindex regexp, dynamic -@cindex @code{~} operator -@cindex @code{!~} operator -The righthand side of a @samp{~} or @samp{!~} operator need not be a -regexp constant (i.e., a string of characters between slashes). It may -be any expression. The expression is evaluated and converted to a string -if necessary; the contents of the string are used as the -regexp. A regexp that is computed in this way is called a @dfn{dynamic -regexp}: - -@example -BEGIN @{ digits_regexp = "[[:digit:]]+" @} -$0 ~ digits_regexp @{ print @} -@end example - -@noindent -This sets @code{digits_regexp} to a regexp that describes one or more digits, -and tests whether the input record matches this regexp. - -@c @strong{Caution:} -When using the @samp{~} and @samp{!~} -@strong{Caution:} When using the @samp{~} and @samp{!~} -operators, there is a difference between a regexp constant -enclosed in slashes and a string constant enclosed in double quotes. -If you are going to use a string constant, you have to understand that -the string is, in essence, scanned @emph{twice}: the first time when -@command{awk} reads your program, and the second time when it goes to -match the string on the lefthand side of the operator with the pattern -on the right. This is true of any string valued expression (such as -@code{digits_regexp} shown previously), not just string constants. - -@cindex regexp constants, difference between slashes and quotes -What difference does it make if the string is -scanned twice? The answer has to do with escape sequences, and particularly -with backslashes. To get a backslash into a regular expression inside a -string, you have to type two backslashes. - -For example, @code{/\*/} is a regexp constant for a literal @samp{*}. -Only one backslash is needed. To do the same thing with a string, -you have to type @code{"\\*"}. The first backslash escapes the -second one so that the string actually contains the -two characters @samp{\} and @samp{*}. - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -Given that you can use both regexp and string constants to describe -regular expressions, which should you use? The answer is ``regexp -constants,'' for several reasons: - -@itemize @bullet -@item -String constants are more complicated to write and -more difficult to read. Using regexp constants makes your programs -less error-prone. Not understanding the difference between the two -kinds of constants is a common source of errors. - -@item -It is more efficient to use regexp constants. @command{awk} can note -that you have supplied a regexp, and store it internally in a form that -makes pattern matching more efficient. When using a string constant, -@command{awk} must first convert the string into this internal form and -then perform the pattern matching. - -@item -Using regexp constants is better form; it shows clearly that you -intend a regexp match. -@end itemize - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Using @code{\n} in Character Lists of Dynamic Regexps -@cindex advanced notes -@cindex dynamic regular expressions with embedded newlines -@cindex regexp, dynamic, with embedded newlines -@cindex newlines, embedded in dynamic regexps -@cindex embedded newlines, in dynamic regexps - -Some commercial versions of @command{awk} do not allow the newline -character to be used inside a character list for a dynamic regexp: - -@example -$ awk '$0 ~ "[ \t\n]"' -@error{} awk: newline in character class [ -@error{} ]... -@error{} source line number 1 -@error{} context is -@error{} >>> <<< -@end example - -But a newline in a regexp constant works with no problem: - -@example -$ awk '$0 ~ /[ \t\n]/' -here is a sample line -@print{} here is a sample line -@kbd{Ctrl-d} -@end example - -@command{gawk} does not have this problem, and it isn't likely to -occur often in practice, but it's worth noting for future reference. - -@node Reading Files, Printing, Regexp, Top -@chapter Reading Input Files - -@cindex reading files -@cindex input -@cindex standard input -@cindex @code{FILENAME} variable -In the typical @command{awk} program, all input is read either from the -standard input (by default, this is the keyboard but often it is a pipe from another -command), or from files whose names you specify on the @command{awk} -command line. If you specify input files, @command{awk} reads them -in order, processing all the data from one before going on to the next. -The name of the current input file can be found in the built-in variable -@code{FILENAME} -(@pxref{Built-in Variables}). - -The input is read in units called @dfn{records}, and is processed by the -rules of your program one record at a time. -By default, each record is one line. Each -record is automatically split into chunks called @dfn{fields}. -This makes it more convenient for programs to work on the parts of a record. - -On rare occasions, you may need to use the @code{getline} command. -The @code{getline} command is valuable, both because it -can do explicit input from any number of files, and because the files -used with it do not have to be named on the @command{awk} command line -(@pxref{Getline, ,Explicit Input with @code{getline}}). - -@menu -* Records:: Controlling how data is split into records. -* Fields:: An introduction to fields. -* Non-Constant Fields:: Non-constant Field Numbers. -* Changing Fields:: Changing the Contents of a Field. -* Field Separators:: The field separator and how to change it. -* Constant Size:: Reading constant width data. -* Multiple Line:: Reading multi-line records. -* Getline:: Reading files under explicit program control - using the @code{getline} function. -@end menu - -@node Records, Fields, Reading Files, Reading Files -@section How Input Is Split into Records - -@cindex number of records, @code{NR}, @code{FNR} -@cindex @code{NR} variable -@cindex @code{FNR} variable -The @command{awk} utility divides the input for your @command{awk} -program into records and fields. -@command{awk} keeps track of the number of records that have -been read -so far -from the current input file. This value is stored in a -built-in variable called @code{FNR}. It is reset to zero when a new -file is started. Another built-in variable, @code{NR}, is the total -number of input records read so far from all @value{DF}s. It starts at zero, -but is never automatically reset to zero. - -@cindex record separator, @code{RS} -@cindex changing the record separator -@cindex record, definition of -@cindex @code{RS} variable -Records are separated by a character called the @dfn{record separator}. -By default, the record separator is the newline character. -This is why records are, by default, single lines. -A different character can be used for the record separator by -assigning the character to the built-in variable @code{RS}. - -Like any other variable, -the value of @code{RS} can be changed in the @command{awk} program -with the assignment operator, @samp{=} -(@pxref{Assignment Ops, ,Assignment Expressions}). -The new record-separator character should be enclosed in quotation marks, -which indicate a string constant. Often the right time to do this is -at the beginning of execution, before any input is processed, -so that the very first record is read with the proper separator. -To do this, use the special @code{BEGIN} pattern -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). -For example: - -@example -awk 'BEGIN @{ RS = "/" @} - @{ print $0 @}' BBS-list -@end example - -@noindent -changes the value of @code{RS} to @code{"/"}, before reading any input. -This is a string whose first character is a slash; as a result, records -are separated by slashes. Then the input file is read, and the second -rule in the @command{awk} program (the action with no pattern) prints each -record. Because each @code{print} statement adds a newline at the end of -its output, the effect of this @command{awk} program is to copy the input -with each slash changed to a newline. Here are the results of running -the program on @file{BBS-list}: - -@example -$ awk 'BEGIN @{ RS = "/" @} -> @{ print $0 @}' BBS-list -@print{} aardvark 555-5553 1200 -@print{} 300 B -@print{} alpo-net 555-3412 2400 -@print{} 1200 -@print{} 300 A -@print{} barfly 555-7685 1200 -@print{} 300 A -@print{} bites 555-1675 2400 -@print{} 1200 -@print{} 300 A -@print{} camelot 555-0542 300 C -@print{} core 555-2912 1200 -@print{} 300 C -@print{} fooey 555-1234 2400 -@print{} 1200 -@print{} 300 B -@print{} foot 555-6699 1200 -@print{} 300 B -@print{} macfoo 555-6480 1200 -@print{} 300 A -@print{} sdace 555-3430 2400 -@print{} 1200 -@print{} 300 A -@print{} sabafoo 555-2127 1200 -@print{} 300 C -@print{} -@end example - -@noindent -Note that the entry for the @samp{camelot} BBS is not split. -In the original @value{DF} -(@pxref{Sample Data Files, ,@value{DDF}s for the Examples}), -the line looks like this: - -@example -camelot 555-0542 300 C -@end example - -@noindent -It has one baud rate only, so there are no slashes in the record, -unlike the others which have two or more baud rates. -In fact, this record is treated as part of the record -for the @samp{core} BBS; the newline separating them in the output -is the original newline in the @value{DF}, not the one added by -@command{awk} when it printed the record! - -Another way to change the record separator is on the command line, -using the variable-assignment feature -(@pxref{Other Arguments, ,Other Command-Line Arguments}): - -@example -awk '@{ print $0 @}' RS="/" BBS-list -@end example - -@noindent -This sets @code{RS} to @samp{/} before processing @file{BBS-list}. - -Using an unusual character such as @samp{/} for the record separator -produces correct behavior in the vast majority of cases. However, -the following (extreme) pipeline prints a surprising @samp{1}: - -@example -$ echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}' -@print{} 1 -@end example - -There is one field, consisting of a newline. The value of the built-in -variable @code{NF} is the number of fields in the current record. - -@cindex dark corner -Reaching the end of an input file terminates the current input record, -even if the last character in the file is not the character in @code{RS}. -@value{DARKCORNER} - -@cindex empty string -The empty string @code{""} (a string without any characters) -has a special meaning -as the value of @code{RS}. It means that records are separated -by one or more blank lines and nothing else. -@xref{Multiple Line, ,Multiple-Line Records}, for more details. - -If you change the value of @code{RS} in the middle of an @command{awk} run, -the new value is used to delimit subsequent records, but the record -currently being processed, as well as records already processed, are not -affected. - -@cindex @code{RT} variable -@cindex record terminator, @code{RT} -@cindex terminator, record -@cindex differences between @command{gawk} and @command{awk} -@cindex regular expressions as record separators -After the end of the record has been determined, @command{gawk} -sets the variable @code{RT} to the text in the input that matched -@code{RS}. -When using @command{gawk}, -the value of @code{RS} is not limited to a one-character -string. It can be any regular expression -(@pxref{Regexp, ,Regular Expressions}). -In general, each record -ends at the next string that matches the regular expression; the next -record starts at the end of the matching string. This general rule is -actually at work in the usual case, where @code{RS} contains just a -newline: a record ends at the beginning of the next matching string (the -next newline in the input) and the following record starts just after -the end of this string (at the first character of the following line). -The newline, because it matches @code{RS}, is not part of either record. - -When @code{RS} is a single character, @code{RT} -contains the same single character. However, when @code{RS} is a -regular expression, @code{RT} contains -the actual input text that matched the regular expression. - -The following example illustrates both of these features. -It sets @code{RS} equal to a regular expression that -matches either a newline or a series of one or more uppercase letters -with optional leading and/or trailing whitespace: - -@example -$ echo record 1 AAAA record 2 BBBB record 3 | -> gawk 'BEGIN @{ RS = "\n|( *[[:upper:]]+ *)" @} -> @{ print "Record =", $0, "and RT =", RT @}' -@print{} Record = record 1 and RT = AAAA -@print{} Record = record 2 and RT = BBBB -@print{} Record = record 3 and RT = -@print{} -@end example - -@noindent -The final line of output has an extra blank line. This is because the -value of @code{RT} is a newline, and the @code{print} statement -supplies its own terminating newline. -@xref{Simple Sed, ,A Simple Stream Editor}, for a more useful example -of @code{RS} as a regexp and @code{RT}. - -@cindex differences between @command{gawk} and @command{awk} -The use of @code{RS} as a regular expression and the @code{RT} -variable are @command{gawk} extensions; they are not available in -compatibility mode -(@pxref{Options, ,Command-Line Options}). -In compatibility mode, only the first character of the value of -@code{RS} is used to determine the end of the record. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: @code{RS = "\0"} Is Not Portable -@cindex advanced notes -@cindex portability issues - -There are times when you might want to treat an entire @value{DF} as a -single record. The only way to make this happen is to give @code{RS} -a value that you know doesn't occur in the input file. This is hard -to do in a general way, such that a program always works for arbitrary -input files. -@c can you say `understatement' boys and girls? - -You might think that for text files, the @sc{nul} character, which -consists of a character with all bits equal to zero, is a good -value to use for @code{RS} in this case: - -@example -BEGIN @{ RS = "\0" @} # whole file becomes one record? -@end example - -@cindex differences between @command{gawk} and @command{awk} -@command{gawk} in fact accepts this, and uses the @sc{nul} -character for the record separator. -However, this usage is @emph{not} portable -to other @command{awk} implementations. - -@cindex dark corner -All other @command{awk} implementations@footnote{At least that we know -about.} store strings internally as C-style strings. C strings use the -@sc{nul} character as the string terminator. In effect, this means that -@samp{RS = "\0"} is the same as @samp{RS = ""}. -@value{DARKCORNER} - -The best way to treat a whole file as a single record is to -simply read the file in, one record at a time, concatenating each -record onto the end of the previous ones. - -@node Fields, Non-Constant Fields, Records, Reading Files -@section Examining Fields - -@cindex examining fields -@cindex fields -@cindex accessing fields -When @command{awk} reads an input record, the record is -automatically separated or @dfn{parsed} by the interpreter into chunks -called @dfn{fields}. By default, fields are separated by @dfn{whitespace}, -like words in a line. -Whitespace in @command{awk} means any string of one or more spaces, -tabs, or newlines;@footnote{In POSIX @command{awk}, newlines are not -considered whitespace for separating fields.} other characters, such as -formfeed, vertical tab, etc.@: that are -considered whitespace by other languages, are @emph{not} considered -whitespace by @command{awk}. - -The purpose of fields is to make it more convenient for you to refer to -these pieces of the record. You don't have to use them---you can -operate on the whole record if you want---but fields are what make -simple @command{awk} programs so powerful. - -@cindex @code{$} field operator -@cindex field operator @code{$} -A dollar-sign (@samp{$}) is used -to refer to a field in an @command{awk} program, -followed by the number of the field you want. Thus, @code{$1} -refers to the first field, @code{$2} to the second, and so on. -(Unlike the Unix shells, the field numbers are not limited to single digits. -@code{$127} is the one hundred and twenty-seventh field in the record.) -For example, suppose the following is a line of input: - -@example -This seems like a pretty nice example. -@end example - -@noindent -Here the first field, or @code{$1}, is @samp{This}, the second field, or -@code{$2}, is @samp{seems}, and so on. Note that the last field, -@code{$7}, is @samp{example.}. Because there is no space between the -@samp{e} and the @samp{.}, the period is considered part of the seventh -field. - -@cindex @code{NF} variable -@cindex number of fields, @code{NF} -@code{NF} is a built-in variable whose value is the number of fields -in the current record. @command{awk} automatically updates the value -of @code{NF} each time it reads a record. No matter how many fields -there are, the last field in a record can be represented by @code{$NF}. -So, @code{$NF} is the same as @code{$7}, which is @samp{example.}. -If you try to reference a field beyond the last -one (such as @code{$8} when the record has only seven fields), you get -the empty string. (If used in a numeric operation, you get zero.) - -The use of @code{$0}, which looks like a reference to the ``zeroth'' field, is -a special case: it represents the whole input record -when you are not interested in specific fields. -Here are some more examples: - -@example -$ awk '$1 ~ /foo/ @{ print $0 @}' BBS-list -@print{} fooey 555-1234 2400/1200/300 B -@print{} foot 555-6699 1200/300 B -@print{} macfoo 555-6480 1200/300 A -@print{} sabafoo 555-2127 1200/300 C -@end example - -@noindent -This example prints each record in the file @file{BBS-list} whose first -field contains the string @samp{foo}. The operator @samp{~} is called a -@dfn{matching operator} -(@pxref{Regexp Usage, , How to Use Regular Expressions}); -it tests whether a string (here, the field @code{$1}) matches a given regular -expression. - -By contrast, the following example -looks for @samp{foo} in @emph{the entire record} and prints the first -field and the last field for each matching input record: - -@example -$ awk '/foo/ @{ print $1, $NF @}' BBS-list -@print{} fooey B -@print{} foot B -@print{} macfoo A -@print{} sabafoo C -@end example - -@node Non-Constant Fields, Changing Fields, Fields, Reading Files -@section Non-Constant Field Numbers - -The number of a field does not need to be a constant. Any expression in -the @command{awk} language can be used after a @samp{$} to refer to a -field. The value of the expression specifies the field number. If the -value is a string, rather than a number, it is converted to a number. -Consider this example: - -@example -awk '@{ print $NR @}' -@end example - -@noindent -Recall that @code{NR} is the number of records read so far: one in the -first record, two in the second, etc. So this example prints the first -field of the first record, the second field of the second record, and so -on. For the twentieth record, field number 20 is printed; most likely, -the record has fewer than 20 fields, so this prints a blank line. -Here is another example of using expressions as field numbers: - -@example -awk '@{ print $(2*2) @}' BBS-list -@end example - -@command{awk} evaluates the expression @samp{(2*2)} and uses -its value as the number of the field to print. The @samp{*} sign -represents multiplication, so the expression @samp{2*2} evaluates to four. -The parentheses are used so that the multiplication is done before the -@samp{$} operation; they are necessary whenever there is a binary -operator in the field-number expression. This example, then, prints the -hours of operation (the fourth field) for every line of the file -@file{BBS-list}. (All of the @command{awk} operators are listed, in -order of decreasing precedence, in -@ref{Precedence, , Operator Precedence (How Operators Nest)}.) - -If the field number you compute is zero, you get the entire record. -Thus, @samp{$(2-2)} has the same value as @code{$0}. Negative field -numbers are not allowed; trying to reference one usually terminates -the program. (The POSIX standard does not define -what happens when you reference a negative field number. @command{gawk} -notices this and terminates your program. Other @command{awk} -implementations may behave differently.) - -As mentioned in @ref{Fields, ,Examining Fields}, -@command{awk} stores the current record's number of fields in the built-in -variable @code{NF} (also @pxref{Built-in Variables}). The expression -@code{$NF} is not a special feature---it is the direct consequence of -evaluating @code{NF} and using its value as a field number. - -@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files -@section Changing the Contents of a Field - -@cindex fields, changing contents of -@cindex changing contents of a field -@cindex assignment to fields -The contents of a field, as seen by @command{awk}, can be changed within an -@command{awk} program; this changes what @command{awk} perceives as the -current input record. (The actual input is untouched; @command{awk} @emph{never} -modifies the input file.) -Consider this example and its output: - -@example -$ awk '@{ nboxes = $3 ; $3 = $3 - 10 -> print nboxes, $3 @}' inventory-shipped -@print{} 13 3 -@print{} 15 5 -@print{} 15 5 -@dots{} -@end example - -@noindent -The program first saves the original value of field three in the variable -@code{nboxes}. -The @samp{-} sign represents subtraction, so this program reassigns -field three, @code{$3}, as the original value of field three minus ten: -@samp{$3 - 10}. (@xref{Arithmetic Ops, ,Arithmetic Operators}.) -Then it prints the original and new values for field three. -(Someone in the warehouse made a consistent mistake while inventorying -the red boxes.) - -For this to work, the text in field @code{$2} must make sense -as a number; the string of characters must be converted to a number -for the computer to do arithmetic on it. The number resulting -from the subtraction is converted back to a string of characters that -then becomes field three. -@xref{Conversion, ,Conversion of Strings and Numbers}. - -When the value of a field is changed (as perceived by @command{awk}), the -text of the input record is recalculated to contain the new field where -the old one was. In other words, @code{$0} changes to reflect the altered -field. Thus, this program -prints a copy of the input file, with 10 subtracted from the second -field of each line: - -@example -$ awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped -@print{} Jan 3 25 15 115 -@print{} Feb 5 32 24 226 -@print{} Mar 5 24 34 228 -@dots{} -@end example - -It is also possible to also assign contents to fields that are out -of range. For example: - -@example -$ awk '@{ $6 = ($5 + $4 + $3 + $2) -> print $6 @}' inventory-shipped -@print{} 168 -@print{} 297 -@print{} 301 -@dots{} -@end example - -@noindent -We've just created @code{$6}, whose value is the sum of fields -@code{$2}, @code{$3}, @code{$4}, and @code{$5}. The @samp{+} sign -represents addition. For the file @file{inventory-shipped}, @code{$6} -represents the total number of parcels shipped for a particular month. - -Creating a new field changes @command{awk}'s internal copy of the current -input record, which is the value of @code{$0}. Thus, if you do @samp{print $0} -after adding a field, the record printed includes the new field, with -the appropriate number of field separators between it and the previously -existing fields. - -This recomputation affects and is affected by -@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}). -It is also affected by a feature that has not been discussed yet: -the @dfn{output field separator}, @code{OFS}, -used to separate the fields (@pxref{Output Separators}). -For example, the value of @code{NF} is set to the number of the highest -field you create. - -Note, however, that merely @emph{referencing} an out-of-range field -does @emph{not} change the value of either @code{$0} or @code{NF}. -Referencing an out-of-range field only produces an empty string. For -example: - -@example -if ($(NF+1) != "") - print "can't happen" -else - print "everything is normal" -@end example - -@noindent -should print @samp{everything is normal}, because @code{NF+1} is certain -to be out of range. (@xref{If Statement, ,The @code{if}-@code{else} Statement}, -for more information about @command{awk}'s @code{if-else} statements. -@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions}, -for more information about the @samp{!=} operator.) - -It is important to note that making an assignment to an existing field -changes the -value of @code{$0} but does not change the value of @code{NF}, -even when you assign the empty string to a field. For example: - -@example -$ echo a b c d | awk '@{ OFS = ":"; $2 = "" -> print $0; print NF @}' -@print{} a::c:d -@print{} 4 -@end example - -@noindent -The field is still there; it just has an empty value, denoted by -the two colons between @samp{a} and @samp{c}. -This example shows what happens if you create a new field: - -@example -$ echo a b c d | awk '@{ OFS = ":"; $2 = ""; $6 = "new" -> print $0; print NF @}' -@print{} a::c:d::new -@print{} 6 -@end example - -@noindent -The intervening field, @code{$5}, is created with an empty value -(indicated by the second pair of adjacent colons), -and @code{NF} is updated with the value six. - -@c FIXME: Verify that this is in POSIX -@cindex dark corner -Decrementing @code{NF} throws away the values of the fields -after the new value of @code{NF} and recomputes @code{$0}. -@value{DARKCORNER} -Here is an example: - -@example -$ echo a b c d e f | awk '@{ print "NF =", NF; -> NF = 3; print $0 @}' -@print{} NF = 6 -@print{} a b c -@end example - -@cindex portability issues -@strong{Caution:} Some versions of @command{awk} don't -rebuild @code{$0} when @code{NF} is decremented. Caveat emptor. - -@node Field Separators, Constant Size, Changing Fields, Reading Files -@section Specifying How Fields Are Separated - -@menu -* Regexp Field Splitting:: Using regexps as the field separator. -* Single Character Fields:: Making each character a separate field. -* Command Line Field Separator:: Setting @code{FS} from the command-line. -* Field Splitting Summary:: Some final points and a summary table. -@end menu - -@cindex @code{FS} variable -@cindex fields, separating -@cindex field separator, @code{FS} -The @dfn{field separator}, which is either a single character or a regular -expression, controls the way @command{awk} splits an input record into fields. -@command{awk} scans the input record for character sequences that -match the separator; the fields themselves are the text between the matches. - -In the examples that follow, we use the bullet symbol (@bullet{}) to -represent spaces in the output. -If the field separator is @samp{oo}, then the following line: - -@example -moo goo gai pan -@end example - -@noindent -is split into three fields: @samp{m}, @samp{@bullet{}g}, and -@samp{@bullet{}gai@bullet{}pan}. -Note the leading spaces in the values of the second and third fields. - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -The field separator is represented by the built-in variable @code{FS}. -Shell programmers take note: @command{awk} does @emph{not} use the -name @code{IFS} that is used by the POSIX-compliant shells (such as -the Unix Bourne shell, @command{sh}, or @command{bash}). - -The value of @code{FS} can be changed in the @command{awk} program with the -assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}). -Often the right time to do this is at the beginning of execution -before any input has been processed, so that the very first record -is read with the proper separator. To do this, use the special -@code{BEGIN} pattern -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). -For example, here we set the value of @code{FS} to the string -@code{","}: - -@example -awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}' -@end example - -@noindent -Given the input line: - -@example -John Q. Smith, 29 Oak St., Walamazoo, MI 42139 -@end example - -@noindent -this @command{awk} program extracts and prints the string -@samp{@bullet{}29@bullet{}Oak@bullet{}St.}. - -@cindex field separator, choice of -@cindex regular expressions as field separators -Sometimes the input data contains separator characters that don't -separate fields the way you thought they would. For instance, the -person's name in the example we just used might have a title or -suffix attached, such as: - -@example -John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139 -@end example - -@noindent -The same program would extract @samp{@bullet{}LXIX}, instead of -@samp{@bullet{}29@bullet{}Oak@bullet{}St.}. -If you were expecting the program to print the -address, you would be surprised. The moral is to choose your data layout and -separator characters carefully to prevent such problems. -(If the data is not in a form that is easy to process, perhaps you -can massage it first with a separate @command{awk} program.) - -Fields are normally separated by whitespace sequences -(spaces, tabs, and newlines), not by single spaces. Two spaces in a row do not -delimit an empty field. The default value of the field separator @code{FS} -is a string containing a single space, @w{@code{" "}}. If @command{awk} -interpreted this value in the usual way, each space character would separate -fields, so two spaces in a row would make an empty field between them. -The reason this does not happen is that a single space as the value of -@code{FS} is a special case---it is taken to specify the default manner -of delimiting fields. - -If @code{FS} is any other single character, such as @code{","}, then -each occurrence of that character separates two fields. Two consecutive -occurrences delimit an empty field. If the character occurs at the -beginning or the end of the line, that too delimits an empty field. The -space character is the only single character that does not follow these -rules. - -@node Regexp Field Splitting, Single Character Fields, Field Separators, Field Separators -@subsection Using Regular Expressions to Separate Fields - -The previous @value{SUBSECTION} -discussed the use of single characters or simple strings as the -value of @code{FS}. -More generally, the value of @code{FS} may be a string containing any -regular expression. In this case, each match in the record for the regular -expression separates fields. For example, the assignment: - -@example -FS = ", \t" -@end example - -@noindent -makes every area of an input line that consists of a comma followed by a -space and a tab into a field separator. -@ifinfo -(@samp{\t} -is an @dfn{escape sequence} that stands for a tab; -@pxref{Escape Sequences}, -for the complete list of similar escape sequences.) -@end ifinfo - -For a less trivial example of a regular expression, try using -single spaces to separate fields the way single commas are used. -@code{FS} can be set to @w{@code{"[@ ]"}} (left bracket, space, right -bracket). This regular expression matches a single space and nothing else -(@pxref{Regexp, ,Regular Expressions}). - -There is an important difference between the two cases of @samp{FS = @w{" "}} -(a single space) and @samp{FS = @w{"[ \t\n]+"}} -(a regular expression matching one or more spaces, tabs, or newlines). -For both values of @code{FS}, fields are separated by @dfn{runs} -(multiple adjacent occurrences) of spaces, tabs, -and/or newlines. However, when the value of @code{FS} is @w{@code{" "}}, -@command{awk} first strips leading and trailing whitespace from -the record and then decides where the fields are. -For example, the following pipeline prints @samp{b}: - -@example -$ echo ' a b c d ' | awk '@{ print $2 @}' -@print{} b -@end example - -@noindent -However, this pipeline prints @samp{a} (note the extra spaces around -each letter): - -@example -$ echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t\n]+" @} -> @{ print $2 @}' -@print{} a -@end example - -@noindent -@cindex null string -@cindex empty string -In this case, the first field is @dfn{null} or empty. - -The stripping of leading and trailing whitespace also comes into -play whenever @code{$0} is recomputed. For instance, study this pipeline: - -@example -$ echo ' a b c d' | awk '@{ print; $2 = $2; print @}' -@print{} a b c d -@print{} a b c d -@end example - -@noindent -The first @code{print} statement prints the record as it was read, -with leading whitespace intact. The assignment to @code{$2} rebuilds -@code{$0} by concatenating @code{$1} through @code{$NF} together, -separated by the value of @code{OFS}. Because the leading whitespace -was ignored when finding @code{$1}, it is not part of the new @code{$0}. -Finally, the last @code{print} statement prints the new @code{$0}. - -@node Single Character Fields, Command Line Field Separator, Regexp Field Splitting, Field Separators -@subsection Making Each Character a Separate Field - -@cindex differences between @command{gawk} and @command{awk} -@cindex single-character fields -There are times when you may want to examine each character -of a record separately. This can be done in @command{gawk} by -simply assigning the null string (@code{""}) to @code{FS}. In this case, -each individual character in the record becomes a separate field. -For example: - -@example -$ echo a b | gawk 'BEGIN @{ FS = "" @} -> @{ -> for (i = 1; i <= NF; i = i + 1) -> print "Field", i, "is", $i -> @}' -@print{} Field 1 is a -@print{} Field 2 is -@print{} Field 3 is b -@end example - -@cindex dark corner -Traditionally, the behavior of @code{FS} equal to @code{""} was not defined. -In this case, most versions of Unix @command{awk} simply treat the entire record -as only having one field. -@value{DARKCORNER} -In compatibility mode -(@pxref{Options, ,Command-Line Options}), -if @code{FS} is the null string, then @command{gawk} also -behaves this way. - -@node Command Line Field Separator, Field Splitting Summary, Single Character Fields, Field Separators -@subsection Setting @code{FS} from the Command Line -@cindex @code{-F} option -@cindex command-line option, @code{-F} -@cindex field separator, on command line -@cindex command line, setting @code{FS} on - -@code{FS} can be set on the command line. Use the @option{-F} option to -do so. For example: - -@example -awk -F, '@var{program}' @var{input-files} -@end example - -@noindent -sets @code{FS} to the @samp{,} character. Notice that the option uses -a capital @samp{F} instead of a lowercase @option{-f}, which specifies a file -containing an @command{awk} program. Case is significant in command-line -options: -the @option{-F} and @option{-f} options have nothing to do with each other. -You can use both options at the same time to set the @code{FS} variable -@emph{and} get an @command{awk} program from a file. - -The value used for the argument to @option{-F} is processed in exactly the -same way as assignments to the built-in variable @code{FS}. -Any special characters in the field separator must be escaped -appropriately. For example, to use a @samp{\} as the field separator -on the command line, you would have to type: - -@example -# same as FS = "\\" -awk -F\\\\ '@dots{}' files @dots{} -@end example - -@noindent -Because @samp{\} is used for quoting in the shell, @command{awk} sees -@samp{-F\\}. Then @command{awk} processes the @samp{\\} for escape -characters (@pxref{Escape Sequences}), finally yielding -a single @samp{\} to use for the field separator. - -@cindex historical features -As a special case, in compatibility mode -(@pxref{Options, ,Command-Line Options}), -if the argument to @option{-F} is @samp{t}, then @code{FS} is set to -the tab character. If you type @samp{-F\t} at the -shell, without any quotes, the @samp{\} gets deleted, so @command{awk} -figures that you really want your fields to be separated with tabs and -not @samp{t}s. Use @samp{-v FS="t"} or @samp{-F"[t]"} on the command line -if you really do want to separate your fields with @samp{t}s. - -For example, let's use an @command{awk} program file called @file{baud.awk} -that contains the pattern @code{/300/} and the action @samp{print $1}: - -@example -/300/ @{ print $1 @} -@end example - -Let's also set @code{FS} to be the @samp{-} character and run the -program on the file @file{BBS-list}. The following command prints a -list of the names of the bulletin boards that operate at 300 baud and -the first three digits of their phone numbers: - -@c tweaked to make the tex output look better in @smallbook -@example -$ awk -F- -f baud.awk BBS-list -@print{} aardvark 555 -@print{} alpo -@print{} barfly 555 -@print{} bites 555 -@print{} camelot 555 -@print{} core 555 -@print{} fooey 555 -@print{} foot 555 -@print{} macfoo 555 -@print{} sdace 555 -@print{} sabafoo 555 -@end example - -@noindent -Note the second line of output. The second line -in the original file looked like this: - -@example -alpo-net 555-3412 2400/1200/300 A -@end example - -The @samp{-} as part of the system's name was used as the field -separator, instead of the @samp{-} in the phone number that was -originally intended. This demonstrates why you have to be careful in -choosing your field and record separators. - -Perhaps the most common use of a single character as the field -separator occurs when processing the Unix system password file. -On many Unix systems, each user has a separate entry in the system password -file, one line per user. The information in these lines is separated -by colons. The first field is the user's logon name and the second is -the user's (encrypted or shadow) password. A password file entry might look -like this: - -@cindex Robbins, Arnold -@example -arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/bash -@end example - -The following program searches the system password file and prints -the entries for users who have no password: - -@example -awk -F: '$2 == ""' /etc/passwd -@end example - -@node Field Splitting Summary, , Command Line Field Separator, Field Separators -@subsection Field Splitting Summary - -The following -table -summarizes how fields are split, based on the -value of @code{FS}. (@samp{==} means ``is equal to.'') - -@table @code -@item FS == " " -Fields are separated by runs of whitespace. Leading and trailing -whitespace are ignored. This is the default. - -@item FS == @var{any other single character} -Fields are separated by each occurrence of the character. Multiple -successive occurrences delimit empty fields, as do leading and -trailing occurrences. -The character can even be a regexp metacharacter; it does not need -to be escaped. - -@item FS == @var{regexp} -Fields are separated by occurrences of characters that match @var{regexp}. -Leading and trailing matches of @var{regexp} delimit empty fields. - -@item FS == "" -Each individual character in the record becomes a separate field. -(This is a @command{gawk} extension; it is not specified by the -POSIX standard.) -@end table - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Changing @code{FS} Does Not Affect the Fields - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -According to the POSIX standard, @command{awk} is supposed to behave -as if each record is split into fields at the time it is read. -In particular, this means that if you change the value of @code{FS} -after a record is read, the value of the fields (i.e., how they were split) -should reflect the old value of @code{FS}, not the new one. - -@cindex dark corner -@cindex @command{sed} utility -@cindex stream editor -However, many implementations of @command{awk} do not work this way. Instead, -they defer splitting the fields until a field is actually -referenced. The fields are split -using the @emph{current} value of @code{FS}! -@value{DARKCORNER} -This behavior can be difficult -to diagnose. The following example illustrates the difference -between the two methods. -(The @command{sed}@footnote{The @command{sed} utility is a ``stream editor.'' -Its behavior is also defined by the POSIX standard.} -command prints just the first line of @file{/etc/passwd}.) - -@example -sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}' -@end example - -@noindent -which usually prints: - -@example -root -@end example - -@noindent -on an incorrect implementation of @command{awk}, while @command{gawk} -prints something like: - -@example -root:nSijPlPhZZwgE:0:0:Root:/: -@end example - -@node Constant Size, Multiple Line, Field Separators, Reading Files -@section Reading Fixed-Width Data - -@ifnotinfo -@strong{Note:} This @value{SECTION} discusses an advanced -feature of @command{gawk}. If you are a novice @command{awk} user, -you might want to skip it on the first reading. -@end ifnotinfo - -@ifinfo -(This @value{SECTION} discusses an advanced feature of @command{awk}. -If you are a novice @command{awk} user, you might want to skip it on -the first reading.) -@end ifinfo - -@command{gawk} @value{PVERSION} 2.13 introduced a facility for dealing with -fixed-width fields with no distinctive field separator. For example, -data of this nature arises in the input for old Fortran programs where -numbers are run together, or in the output of programs that did not -anticipate the use of their output as input for other programs. - -An example of the latter is a table where all the columns are lined up by -the use of a variable number of spaces and @emph{empty fields are just -spaces}. Clearly, @command{awk}'s normal field splitting based on @code{FS} -does not work well in this case. Although a portable @command{awk} program -can use a series of @code{substr} calls on @code{$0} -(@pxref{String Functions, ,String Manipulation Functions}), -this is awkward and inefficient for a large number of fields. - -@cindex fatal errors -@cindex @command{w} utility -The splitting of an input record into fixed-width fields is specified by -assigning a string containing space-separated numbers to the built-in -variable @code{FIELDWIDTHS}. Each number specifies the width of the field, -@emph{including} columns between fields. If you want to ignore the columns -between fields, you can specify the width as a separate field that is -subsequently ignored. -It is a fatal error to supply a field width that is not a positive number. -The following data is the output of the Unix @command{w} utility. It is useful -to illustrate the use of @code{FIELDWIDTHS}: - -@example -@group - 10:06pm up 21 days, 14:04, 23 users -User tty login@ idle JCPU PCPU what -hzuo ttyV0 8:58pm 9 5 vi p24.tex -hzang ttyV3 6:37pm 50 -csh -eklye ttyV5 9:53pm 7 1 em thes.tex -dportein ttyV6 8:17pm 1:47 -csh -gierd ttyD3 10:00pm 1 elm -dave ttyD4 9:47pm 4 4 w -brent ttyp0 26Jun91 4:46 26:46 4:41 bash -dave ttyq4 26Jun9115days 46 46 wnewmail -@end group -@end example - -The following program takes the above input, converts the idle time to -number of seconds, and prints out the first two fields and the calculated -idle time. - -@strong{Note:} -This program uses a number of @command{awk} features that -haven't been introduced yet. - -@example -BEGIN @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @} -NR > 2 @{ - idle = $4 - sub(/^ */, "", idle) # strip leading spaces - if (idle == "") - idle = 0 - if (idle ~ /:/) @{ - split(idle, t, ":") - idle = t[1] * 60 + t[2] - @} - if (idle ~ /days/) - idle *= 24 * 60 * 60 - - print $1, $2, idle -@} -@end example - -Running the program on the data produces the following results: - -@example -hzuo ttyV0 0 -hzang ttyV3 50 -eklye ttyV5 0 -dportein ttyV6 107 -gierd ttyD3 1 -dave ttyD4 0 -brent ttyp0 286 -dave ttyq4 1296000 -@end example - -Another (possibly more practical) example of fixed-width input data -is the input from a deck of balloting cards. In some parts of -the United States, voters mark their choices by punching holes in computer -cards. These cards are then processed to count the votes for any particular -candidate or on any particular issue. Because a voter may choose not to -vote on some issue, any column on the card may be empty. An @command{awk} -program for processing such data could use the @code{FIELDWIDTHS} feature -to simplify reading the data. (Of course, getting @command{gawk} to run on -a system with card readers is another story!) - -@ignore -Exercise: Write a ballot card reading program -@end ignore - -Assigning a value to @code{FS} causes @command{gawk} to return to using -@code{FS} for field splitting. Use @samp{FS = FS} to make this happen, -without having to know the current value of @code{FS}. -In order to tell which kind of field splitting is in effect, -use @code{PROCINFO["FS"]} -(@pxref{Auto-set, ,Built-in Variables That Convey Information}). -The value is @code{"FS"} if regular field splitting is being used, -or it is @code{"FIELDWIDTHS"} if fixed-width field splitting is being used: - -@example -if (PROCINFO["FS"] == "FS") - @var{regular field splitting} @dots{} -else - @var{fixed-width field splitting} @dots{} -@end example - -This information is useful when writing a function -that needs to temporarily change @code{FS} or @code{FIELDWIDTHS}, -read some records, and then restore the original settings -(@pxref{Passwd Functions, ,Reading the User Database}, -for an example of such a function). - -@node Multiple Line, Getline, Constant Size, Reading Files -@section Multiple-Line Records - -@cindex multiple line records -@cindex input, multiple line records -@cindex reading files, multiple line records -@cindex records, multiple line -In some databases, a single line cannot conveniently hold all the -information in one entry. In such cases, you can use multiline -records. The first step in doing this is to choose your data format. - -One technique is to use an unusual character or string to separate -records. For example, you could use the formfeed character (written -@samp{\f} in @command{awk}, as in C) to separate them, making each record -a page of the file. To do this, just set the variable @code{RS} to -@code{"\f"} (a string containing the formfeed character). Any -other character could equally well be used, as long as it won't be part -of the data in a record. - -Another technique is to have blank lines separate records. By a special -dispensation, an empty string as the value of @code{RS} indicates that -records are separated by one or more blank lines. When @code{RS} is set -to the empty string, each record always ends at the first blank line -encountered. The next record doesn't start until the first non-blank -line that follows. No matter how many blank lines appear in a row, they -all act as one record separator. -(Blank lines must be completely empty; lines that contain only -whitespace do not count.) - -@cindex leftmost longest match -@cindex matching, leftmost longest -You can achieve the same effect as @samp{RS = ""} by assigning the -string @code{"\n\n+"} to @code{RS}. This regexp matches the newline -at the end of the record and one or more blank lines after the record. -In addition, a regular expression always matches the longest possible -sequence when there is a choice -(@pxref{Leftmost Longest, ,How Much Text Matches?}). -So the next record doesn't start until -the first non-blank line that follows---no matter how many blank lines -appear in a row, they are considered one record separator. - -@cindex dark corner -There is an important difference between @samp{RS = ""} and -@samp{RS = "\n\n+"}. In the first case, leading newlines in the input -@value{DF} are ignored, and if a file ends without extra blank lines -after the last record, the final newline is removed from the record. -In the second case, this special processing is not done. -@value{DARKCORNER} - -Now that the input is separated into records, the second step is to -separate the fields in the record. One way to do this is to divide each -of the lines into fields in the normal manner. This happens by default -as the result of a special feature. When @code{RS} is set to the empty -string, the newline character @emph{always} acts as a field separator. -This is in addition to whatever field separations result from @code{FS}. - -The original motivation for this special exception was probably to provide -useful behavior in the default case (i.e., @code{FS} is equal -to @w{@code{" "}}). This feature can be a problem if you really don't -want the newline character to separate fields, because there is no way to -prevent it. However, you can work around this by using the @code{split} -function to break up the record manually -(@pxref{String Functions, ,String Manipulation Functions}). - -Another way to separate fields is to -put each field on a separate line: to do this, just set the -variable @code{FS} to the string @code{"\n"}. (This simple regular -expression matches a single newline.) -A practical example of a @value{DF} organized this way might be a mailing -list, where each entry is separated by blank lines. Consider a mailing -list in a file named @file{addresses}, that looks like this: - -@example -Jane Doe -123 Main Street -Anywhere, SE 12345-6789 - -John Smith -456 Tree-lined Avenue -Smallville, MW 98765-4321 -@dots{} -@end example - -@noindent -A simple program to process this file is as follows: - -@example -# addrs.awk --- simple mailing list program - -# Records are separated by blank lines. -# Each line is one field. -BEGIN @{ RS = "" ; FS = "\n" @} - -@{ - print "Name is:", $1 - print "Address is:", $2 - print "City and State are:", $3 - print "" -@} -@end example - -Running the program produces the following output: - -@example -$ awk -f addrs.awk addresses -@print{} Name is: Jane Doe -@print{} Address is: 123 Main Street -@print{} City and State are: Anywhere, SE 12345-6789 -@print{} -@print{} Name is: John Smith -@print{} Address is: 456 Tree-lined Avenue -@print{} City and State are: Smallville, MW 98765-4321 -@print{} -@dots{} -@end example - -@xref{Labels Program, ,Printing Mailing Labels}, for a more realistic -program that deals with address lists. -The following -table -summarizes how records are split, based on the -value of -@ifinfo -@code{RS}. -(@samp{==} means ``is equal to.'') -@end ifinfo -@ifnotinfo -@code{RS}: -@end ifnotinfo - -@table @code -@item RS == "\n" -Records are separated by the newline character (@samp{\n}). In effect, -every line in the @value{DF} is a separate record, including blank lines. -This is the default. - -@item RS == @var{any single character} -Records are separated by each occurrence of the character. Multiple -successive occurrences delimit empty records. - -@item RS == "" -Records are separated by runs of blank lines. The newline character -always serves as a field separator, in addition to whatever value -@code{FS} may have. Leading and trailing newlines in a file are ignored. - -@item RS == @var{regexp} -Records are separated by occurrences of characters that match @var{regexp}. -Leading and trailing matches of @var{regexp} delimit empty records. -(This is a @command{gawk} extension, it is not specified by the -POSIX standard.) -@end table - -@cindex @code{RT} variable -In all cases, @command{gawk} sets @code{RT} to the input text that matched the -value specified by @code{RS}. - -@node Getline, , Multiple Line, Reading Files -@section Explicit Input with @code{getline} - -@cindex @code{getline} built-in function -@cindex input, explicit -@cindex explicit input -@cindex input, @code{getline} command -@cindex reading files, @code{getline} command -So far we have been getting our input data from @command{awk}'s main -input stream---either the standard input (usually your terminal, sometimes -the output from another program) or from the -files specified on the command line. The @command{awk} language has a -special built-in command called @code{getline} that -can be used to read input under your explicit control. - -The @code{getline} command is used in several different ways and should -@emph{not} be used by beginners. -The examples that follow the explanation of the @code{getline} command -include material that has not been covered yet. Therefore, come back -and study the @code{getline} command @emph{after} you have reviewed the -rest of this @value{DOCUMENT} and have a good knowledge of how @command{awk} works. - -@cindex @code{ERRNO} variable -@cindex differences between @command{gawk} and @command{awk} -@cindex @code{getline}, return values -The @code{getline} command returns one if it finds a record and zero if -the end of the file is encountered. If there is some error in getting -a record, such as a file that cannot be opened, then @code{getline} -returns @minus{}1. In this case, @command{gawk} sets the variable -@code{ERRNO} to a string describing the error that occurred. - -In the following examples, @var{command} stands for a string value that -represents a shell command. - -@menu -* Plain Getline:: Using @code{getline} with no arguments. -* Getline/Variable:: Using @code{getline} into a variable. -* Getline/File:: Using @code{getline} from a file. -* Getline/Variable/File:: Using @code{getline} into a variable from a - file. -* Getline/Pipe:: Using @code{getline} from a pipe. -* Getline/Variable/Pipe:: Using @code{getline} into a variable from a - pipe. -* Getline/Coprocess:: Using @code{getline} from a coprocess. -* Getline/Variable/Coprocess:: Using @code{getline} into a variable from a - coprocess. -* Getline Notes:: Important things to know about @code{getline}. -* Getline Summary:: Summary of @code{getline} Variants. -@end menu - -@node Plain Getline, Getline/Variable, Getline, Getline -@subsection Using @code{getline} with No Arguments - -The @code{getline} command can be used without arguments to read input -from the current input file. All it does in this case is read the next -input record and split it up into fields. This is useful if you've -finished processing the current record, but want to do some special -processing @emph{right now} on the next record. Here's an -example: - -@example -@{ - if ((t = index($0, "/*")) != 0) @{ - # value of `tmp' will be "" if t is 1 - tmp = substr($0, 1, t - 1) - u = index(substr($0, t + 2), "*/") - while (u == 0) @{ - if (getline <= 0) @{ - m = "unexpected EOF or error" - m = (m ": " ERRNO) - print m > "/dev/stderr" - exit - @} - t = -1 - u = index($0, "*/") - @} - # substr expression will be "" if */ - # occurred at end of line - $0 = tmp substr($0, u + 2) - @} - print $0 -@} -@end example - -This @command{awk} program deletes all C-style comments (@samp{/* @dots{} -*/}) from the input. By replacing the @samp{print $0} with other -statements, you could perform more complicated processing on the -decommented input, such as searching for matches of a regular -expression. (This program has a subtle problem---it does not work if one -comment ends and another begins on the same line.) - -@ignore -Exercise, -write a program that does handle multiple comments on the line. -@end ignore - -This form of the @code{getline} command sets @code{NF}, -@code{NR}, @code{FNR}, and the value of @code{$0}. - -@strong{Note:} The new value of @code{$0} is used to test -the patterns of any subsequent rules. The original value -of @code{$0} that triggered the rule that executed @code{getline} -is lost. -By contrast, the @code{next} statement reads a new record -but immediately begins processing it normally, starting with the first -rule in the program. @xref{Next Statement, ,The @code{next} Statement}. - -@node Getline/Variable, Getline/File, Plain Getline, Getline -@subsection Using @code{getline} into a Variable - -You can use @samp{getline @var{var}} to read the next record from -@command{awk}'s input into the variable @var{var}. No other processing is -done. -For example, suppose the next line is a comment or a special string, -and you want to read it without triggering -any rules. This form of @code{getline} allows you to read that line -and store it in a variable so that the main -read-a-line-and-check-each-rule loop of @command{awk} never sees it. -The following example swaps every two lines of input. -The program is as follows: - -@example -@{ - if ((getline tmp) > 0) @{ - print tmp - print $0 - @} else - print $0 -@} -@end example - -@noindent -It takes the following list: - -@example -wan -tew -free -phore -@end example - -@noindent -and produces these results: - -@example -tew -wan -phore -free -@end example - -The @code{getline} command used in this way sets only the variables -@code{NR} and @code{FNR} (and of course, @var{var}). The record is not -split into fields, so the values of the fields (including @code{$0}) and -the value of @code{NF} do not change. - -@node Getline/File, Getline/Variable/File, Getline/Variable, Getline -@subsection Using @code{getline} from a File - -@cindex input redirection -@cindex redirection of input -@cindex @code{<} I/O operator -Use @samp{getline < @var{file}} to read the next record from @var{file}. -Here @var{file} is a string-valued expression that -specifies the @value{FN}. @samp{< @var{file}} is called a @dfn{redirection} -because it directs input to come from a different place. -For example, the following -program reads its input record from the file @file{secondary.input} when it -encounters a first field with a value equal to 10 in the current input -file: - -@example -@{ - if ($1 == 10) @{ - getline < "secondary.input" - print - @} else - print -@} -@end example - -Because the main input stream is not used, the values of @code{NR} and -@code{FNR} are not changed. However, the record it reads is split into fields in -the normal manner, so the values of @code{$0} and the other fields are -changed, resulting in a new value of @code{NF}. - -@c Thanks to Paul Eggert for initial wording here -According to POSIX, @samp{getline < @var{expression}} is ambiguous if -@var{expression} contains unparenthesized operators other than -@samp{$}; for example, @samp{getline < dir "/" file} is ambiguous -because the concatenation operator is not parenthesized. You should -write it as @samp{getline < (dir "/" file)} if you want your program -to be portable to other @command{awk} implementations. -(It happens that @command{gawk} gets it right, but you should not -rely on this. Parentheses make it easier to read.) - -@node Getline/Variable/File, Getline/Pipe, Getline/File, Getline -@subsection Using @code{getline} into a Variable from a File - -Use @samp{getline @var{var} < @var{file}} to read input -from the file -@var{file}, and put it in the variable @var{var}. As above, @var{file} -is a string-valued expression that specifies the file from which to read. - -In this version of @code{getline}, none of the built-in variables are -changed and the record is not split into fields. The only variable -changed is @var{var}. -For example, the following program copies all the input files to the -output, except for records that say @w{@samp{@@include @var{filename}}}. -Such a record is replaced by the contents of the file -@var{filename}: - -@example -@{ - if (NF == 2 && $1 == "@@include") @{ - while ((getline line < $2) > 0) - print line - close($2) - @} else - print -@} -@end example - -Note here how the name of the extra input file is not built into -the program; it is taken directly from the data, from the second field on -the @samp{@@include} line. - -The @code{close} function is called to ensure that if two identical -@samp{@@include} lines appear in the input, the entire specified file is -included twice. -@xref{Close Files And Pipes, ,Closing Input and Output Redirections}. - -One deficiency of this program is that it does not process nested -@samp{@@include} statements -(i.e., @samp{@@include} statements in included files) -the way a true macro preprocessor would. -@xref{Igawk Program, ,An Easy Way to Use Library Functions}, for a program -that does handle nested @samp{@@include} statements. - -@node Getline/Pipe, Getline/Variable/Pipe, Getline/Variable/File, Getline -@subsection Using @code{getline} from a Pipe - -@cindex @code{|} I/O operator -@cindex input pipeline -@cindex pipeline, input -The output of a command can also be piped into @code{getline}, using -@samp{@var{command} | getline}. In -this case, the string @var{command} is run as a shell command and its output -is piped into @command{awk} to be used as input. This form of @code{getline} -reads one record at a time from the pipe. -For example, the following program copies its input to its output, except for -lines that begin with @samp{@@execute}, which are replaced by the output -produced by running the rest of the line as a shell command: - -@example -@{ - if ($1 == "@@execute") @{ - tmp = substr($0, 10) - while ((tmp | getline) > 0) - print - close(tmp) - @} else - print -@} -@end example - -@noindent -The @code{close} function is called to ensure that if two identical -@samp{@@execute} lines appear in the input, the command is run for -each one. -@ifnottex -@xref{Close Files And Pipes, ,Closing Input and Output Redirections}. -@end ifnottex -@c Exercise!! -@c This example is unrealistic, since you could just use system -Given the input: - -@example -foo -bar -baz -@@execute who -bletch -@end example - -@noindent -the program might produce: - -@cindex Robbins, Bill -@cindex Robbins, Miriam -@cindex Robbins, Arnold -@example -foo -bar -baz -arnold ttyv0 Jul 13 14:22 -miriam ttyp0 Jul 13 14:23 (murphy:0) -bill ttyp1 Jul 13 14:23 (murphy:0) -bletch -@end example - -@noindent -Notice that this program ran the command @command{who} and printed the result. -(If you try this program yourself, you will of course get different results, -depending upon who is logged in on your system.) - -This variation of @code{getline} splits the record into fields, sets the -value of @code{NF} and recomputes the value of @code{$0}. The values of -@code{NR} and @code{FNR} are not changed. - -@c Thanks to Paul Eggert for initial wording here -According to POSIX, @samp{@var{expression} | getline} is ambiguous if -@var{expression} contains unparenthesized operators other than -@samp{$}---for example, @samp{@w{"echo "} "date" | getline} is ambiguous -because the concatenation operator is not parenthesized. You should -write it as @samp{(@w{"echo "} "date") | getline} if you want your program -to be portable to other @command{awk} implementations. -@ifinfo -(It happens that @command{gawk} gets it right, but you should not -rely on this. Parentheses make it easier to read, anyway.) -@end ifinfo - -@node Getline/Variable/Pipe, Getline/Coprocess, Getline/Pipe, Getline -@subsection Using @code{getline} into a Variable from a Pipe - -When you use @samp{@var{command} | getline @var{var}}, the -output of @var{command} is sent through a pipe to -@code{getline} and into the variable @var{var}. For example, the -following program reads the current date and time into the variable -@code{current_time}, using the @command{date} utility, and then -prints it: - -@example -BEGIN @{ - "date" | getline current_time - close("date") - print "Report printed on " current_time -@} -@end example - -In this version of @code{getline}, none of the built-in variables are -changed and the record is not split into fields. - -@ifinfo -@c Thanks to Paul Eggert for initial wording here -According to POSIX, @samp{@var{expression} | getline @var{var}} is ambiguous if -@var{expression} contains unparenthesized operators other than -@samp{$}; for example, @samp{@w{"echo "} "date" | getline @var{var}} is ambiguous -because the concatenation operator is not parenthesized. You should -write it as @samp{(@w{"echo "} "date") | getline @var{var}} if you want your -program to be portable to other @command{awk} implementations. -(It happens that @command{gawk} gets it right, but you should not -rely on this. Parentheses make it easier to read, anyway.) -@end ifinfo - -@node Getline/Coprocess, Getline/Variable/Coprocess, Getline/Variable/Pipe, Getline -@subsection Using @code{getline} from a Coprocess -@cindex coprocess -@cindex @code{|&} I/O operator -@cindex differences between @command{gawk} and @command{awk} - -Input into @code{getline} from a pipe is a one-way operation. -The command that is started with @samp{@var{command} | getline} only -sends data @emph{to} your @command{awk} program. - -On occasion, you might want to send data to another program -for processing and then read the results back. -@command{gawk} allows you start a @dfn{coprocess}, with which two-way -communications are possible. This is done with the @samp{|&} -operator. -Typically, you write data to the coprocess first, and then -read results back, as shown in the following: - -@example -print "@var{some query}" |& "db_server" -"db_server" |& getline -@end example - -@noindent -which sends a query to @command{db_server} and then reads the results. - -The values of @code{NR} and -@code{FNR} are not changed, -because the main input stream is not used. -However, the record is split into fields in -the normal manner, thus changing the values of @code{$0}, the other fields, -and of @code{NF}. - -Coprocesses are an advanced feature. They are discussed here only because -this is the @value{SECTION} on @code{getline}. -@xref{Two-way I/O, ,Two-Way Communications with Another Process}, -where coprocesses are discussed in more detail. - -@node Getline/Variable/Coprocess, Getline Notes, Getline/Coprocess, Getline -@subsection Using @code{getline} into a Variable from a Coprocess - -When you use @samp{@var{command} |& getline @var{var}}, the output from -the coprocess @var{command} is sent through a two-way pipe to @code{getline} -and into the variable @var{var}. - -In this version of @code{getline}, none of the built-in variables are -changed and the record is not split into fields. The only variable -changed is @var{var}. - -@ifinfo -Coprocesses are an advanced feature. They are discussed here only because -this is the @value{SECTION} on @code{getline}. -@xref{Two-way I/O, ,Two-Way Communications with Another Process}, -where coprocesses are discussed in more detail. -@end ifinfo - -@node Getline Notes, Getline Summary, Getline/Variable/Coprocess, Getline -@subsection Points About @code{getline} to Remember -Here are some miscellaneous points about @code{getline} that -you should bear in mind: - -@itemize @bullet -@item -When @code{getline} changes the value of @code{$0} and @code{NF}, -@command{awk} does @emph{not} automatically jump to the start of the -program and start testing the new record against every pattern. -However, the new record is tested against any subsequent rules. - -@cindex differences between @command{gawk} and @command{awk} -@cindex limitations -@cindex implementation limits -@item -Many @command{awk} implementations limit the number of pipelines that an @command{awk} -program may have open to just one. In @command{gawk}, there is no such limit. -You can open as many pipelines (and coprocesses) as the underlying operating -system permits. - -@cindex side effects -@cindex @code{FILENAME} variable -@cindex dark corner -@cindex @code{getline}, setting @code{FILENAME} -@cindex @code{FILENAME}, being set by @code{getline} -@item -An interesting side effect occurs if you use @code{getline} without a -redirection inside a @code{BEGIN} rule. Because an unredirected @code{getline} -reads from the command-line @value{DF}s, the first @code{getline} command -causes @command{awk} to set the value of @code{FILENAME}. Normally, -@code{FILENAME} does not have a value inside @code{BEGIN} rules, because you -have not yet started to process the command-line @value{DF}s. -@value{DARKCORNER} -(@xref{BEGIN/END, , The @code{BEGIN} and @code{END} Special Patterns}, -also @pxref{Auto-set, ,Built-in Variables That Convey Information}.) -@end itemize - -@node Getline Summary, , Getline Notes, Getline -@subsection Summary of @code{getline} Variants - -The following table summarizes the eight variants of @code{getline}, -listing which built-in variables are set by each one. - -@multitable {@var{command} @code{|& getline} @var{var}} {1234567890123456789012345678901234567890} -@item @code{getline} @tab Sets @code{$0}, @code{NF}, @code{FNR} and @code{NR} - -@item @code{getline} @var{var} @tab Sets @var{var}, @code{FNR} and @code{NR} - -@item @code{getline <} @var{file} @tab Sets @code{$0} and @code{NF} - -@item @code{getline @var{var} < @var{file}} @tab Sets @var{var} - -@item @var{command} @code{| getline} @tab Sets @code{$0} and @code{NF} - -@item @var{command} @code{| getline} @var{var} @tab Sets @var{var} - -@item @var{command} @code{|& getline} @tab Sets @code{$0} and @code{NF} -(this is a @command{gawk} extension) - -@item @var{command} @code{|& getline} @var{var} @tab Sets @var{var} -(this is a @command{gawk} extension) -@end multitable - -@node Printing, Expressions, Reading Files, Top -@chapter Printing Output - -@cindex printing -@cindex output -One of the most common programming actions is to @dfn{print} or output, -some or all of the input. Use the @code{print} statement -for simple output, and the @code{printf} statement -for fancier formatting. -The @code{print} statement is not limited when -computing @emph{which} values to print. However, with two exceptions, -you cannot specify @emph{how} to print them---how many -columns, whether to use exponential notation or not, and so on. -(For the exceptions, @pxref{Output Separators}, and -@ref{OFMT, ,Controlling Numeric Output with @code{print}}.) -For that, you need the @code{printf} statement -(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}). - -Besides basic and formatted printing, this @value{CHAPTER} -also covers I/O redirections to files and pipes, introduces -the special @value{FN}s that @command{gawk} processes internally, -and discusses the @code{close} built-in function. - -@menu -* Print:: The @code{print} statement. -* Print Examples:: Simple examples of @code{print} statements. -* Output Separators:: The output separators and how to change them. -* OFMT:: Controlling Numeric Output With @code{print}. -* Printf:: The @code{printf} statement. -* Redirection:: How to redirect output to multiple files and - pipes. -* Special Files:: File name interpretation in @command{gawk}. - @command{gawk} allows access to inherited file - descriptors. -* Close Files And Pipes:: Closing Input and Output Files and Pipes. -@end menu - -@node Print, Print Examples, Printing, Printing -@section The @code{print} Statement -@cindex @code{print} statement - -The @code{print} statement is used to produce output with simple, standardized -formatting. Specify only the strings or numbers to print, in a -list separated by commas. They are output, separated by single spaces, -followed by a newline. The statement looks like this: - -@example -print @var{item1}, @var{item2}, @dots{} -@end example - -@noindent -The entire list of items may be optionally enclosed in parentheses. The -parentheses are necessary if any of the item expressions uses the @samp{>} -relational operator; otherwise it could be confused with a redirection -(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}). - -The items to print can be constant strings or numbers, fields of the -current record (such as @code{$1}), variables, or any @command{awk} -expression. Numeric values are converted to strings and then printed. - -The simple statement @samp{print} with no items is equivalent to -@samp{print $0}: it prints the entire current record. To print a blank -line, use @samp{print ""}, where @code{""} is the empty string. -To print a fixed piece of text, use a string constant, such as -@w{@code{"Don't Panic"}}, as one item. If you forget to use the -double quote characters, your text is taken as an @command{awk} -expression and you will probably get an error. Keep in mind that a -space is printed between any two items. - -@node Print Examples, Output Separators, Print, Printing -@section Examples of @code{print} Statements - -Each @code{print} statement makes at least one line of output. However, it -isn't limited to only one line. If an item value is a string that contains a -newline, the newline is output along with the rest of the string. A -single @code{print} statement can make any number of lines this way. - -The following is an example of printing a string that contains embedded newlines -(the @samp{\n} is an escape sequence, used to represent the newline -character; @pxref{Escape Sequences}): - -@example -$ awk 'BEGIN @{ print "line one\nline two\nline three" @}' -@print{} line one -@print{} line two -@print{} line three -@end example - -The next example, which is run on the @file{inventory-shipped} file, -prints the first two fields of each input record, with a space between -them: - -@example -$ awk '@{ print $1, $2 @}' inventory-shipped -@print{} Jan 13 -@print{} Feb 15 -@print{} Mar 15 -@dots{} -@end example - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -A common mistake in using the @code{print} statement is to omit the comma -between two items. This often has the effect of making the items run -together in the output, with no space. The reason for this is that -juxtaposing two string expressions in @command{awk} means to concatenate -them. Here is the same program, without the comma: - -@example -$ awk '@{ print $1 $2 @}' inventory-shipped -@print{} Jan13 -@print{} Feb15 -@print{} Mar15 -@dots{} -@end example - -To someone unfamiliar with the @file{inventory-shipped} file, neither -example's output makes much sense. A heading line at the beginning -would make it clearer. Let's add some headings to our table of months -(@code{$1}) and green crates shipped (@code{$2}). We do this using the -@code{BEGIN} pattern -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}) -so that the headings are only printed once: - -@example -awk 'BEGIN @{ print "Month Crates" - print "----- ------" @} - @{ print $1, $2 @}' inventory-shipped -@end example - -@noindent -When run, the program prints the following: - -@example -Month Crates ------ ------ -Jan 13 -Feb 15 -Mar 15 -@dots{} -@end example - -@noindent -The only problem, however, is that the headings and the table data -don't line up! We can fix this by printing some spaces between the -two fields: - -@example -@group -awk 'BEGIN @{ print "Month Crates" - print "----- ------" @} - @{ print $1, " ", $2 @}' inventory-shipped -@end group -@end example - -Lining up columns this way can get pretty -complicated when there are many columns to fix. Counting spaces for two -or three columns is simple, but any more than this can take up -a lot of time. This is why the @code{printf} statement was -created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}); -one of its specialties is lining up columns of data. - -@cindex line continuation -@strong{Note:} You can continue either a @code{print} or -@code{printf} statement simply by putting a newline after any comma -(@pxref{Statements/Lines, ,@command{awk} Statements Versus Lines}). - -@node Output Separators, OFMT, Print Examples, Printing -@section Output Separators - -@cindex output field separator, @code{OFS} -@cindex output record separator, @code{ORS} -@cindex @code{OFS} variable -@cindex @code{ORS} variable -As mentioned previously, a @code{print} statement contains a list -of items separated by commas. In the output, the items are normally -separated by single spaces. However, this doesn't need to be the case; -a single space is only the default. Any string of -characters may be used as the @dfn{output field separator} by setting the -built-in variable @code{OFS}. The initial value of this variable -is the string @w{@code{" "}}---that is, a single space. - -The output from an entire @code{print} statement is called an -@dfn{output record}. Each @code{print} statement outputs one output -record, and then outputs a string called the @dfn{output record separator} -(or @code{ORS}). The initial -value of @code{ORS} is the string @code{"\n"}; i.e., a newline -character. Thus, each @code{print} statement normally makes a separate line. - -In order to change how output fields and records are separated, assign -new values to the variables @code{OFS} and @code{ORS}. The usual -place to do this is in the @code{BEGIN} rule -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}), so -that it happens before any input is processed. It can also be done -with assignments on the command line, before the names of the input -files, or using the @option{-v} command-line option -(@pxref{Options, ,Command-Line Options}). -The following example prints the first and second fields of each input -record, separated by a semicolon, with a blank line added after each -newline: - -@ignore -Exercise, -Rewrite the -@example -awk 'BEGIN @{ print "Month Crates" - print "----- ------" @} - @{ print $1, " ", $2 @}' inventory-shipped -@end example -program by using a new value of @code{OFS}. -@end ignore - -@example -$ awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @} -> @{ print $1, $2 @}' BBS-list -@print{} aardvark;555-5553 -@print{} -@print{} alpo-net;555-3412 -@print{} -@print{} barfly;555-7685 -@dots{} -@end example - -If the value of @code{ORS} does not contain a newline, the program's output -is run together on a single line. - -@node OFMT, Printf, Output Separators, Printing -@section Controlling Numeric Output with @code{print} -@cindex @code{OFMT} variable -@cindex numeric output format -@cindex format, numeric output -@cindex output format specifier, @code{OFMT} -When the @code{print} statement is used to print numeric values, -@command{awk} internally converts the number to a string of characters -and prints that string. @command{awk} uses the @code{sprintf} function -to do this conversion -(@pxref{String Functions, ,String Manipulation Functions}). -For now, it suffices to say that the @code{sprintf} -function accepts a @dfn{format specification} that tells it how to format -numbers (or strings), and that there are a number of different ways in which -numbers can be formatted. The different format specifications are discussed -more fully in -@ref{Control Letters, , Format-Control Letters}. - -The built-in variable @code{OFMT} contains the default format specification -that @code{print} uses with @code{sprintf} when it wants to convert a -number to a string for printing. -The default value of @code{OFMT} is @code{"%.6g"}. -The way @code{print} prints numbers can be changed -by supplying different format specifications -as the value of @code{OFMT}, as shown in the following example: - -@example -$ awk 'BEGIN @{ -> OFMT = "%.0f" # print numbers as integers (rounds) -> print 17.23, 17.54 @}' -@print{} 17 18 -@end example - -@noindent -@cindex dark corner -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -According to the POSIX standard, @command{awk}'s behavior is undefined -if @code{OFMT} contains anything but a floating-point conversion specification. -@value{DARKCORNER} - -@node Printf, Redirection, OFMT, Printing -@section Using @code{printf} Statements for Fancier Printing -@cindex formatted output -@cindex output, formatted -@cindex @code{printf} statement - -For more precise control over the output format than what is -normally provided by @code{print}, use @code{printf}. -@code{printf} can be used to -specify the width to use for each item, as well as various -formatting choices for numbers (such as what output base to use, whether to -print an exponent, whether to print a sign, and how many digits to print -after the decimal point). This is done by supplying a string, called -the @dfn{format string}, that controls how and where to print the other -arguments. - -@menu -* Basic Printf:: Syntax of the @code{printf} statement. -* Control Letters:: Format-control letters. -* Format Modifiers:: Format-specification modifiers. -* Printf Examples:: Several examples. -@end menu - -@node Basic Printf, Control Letters, Printf, Printf -@subsection Introduction to the @code{printf} Statement - -@cindex @code{printf} statement, syntax of -A simple @code{printf} statement looks like this: - -@example -printf @var{format}, @var{item1}, @var{item2}, @dots{} -@end example - -@noindent -The entire list of arguments may optionally be enclosed in parentheses. The -parentheses are necessary if any of the item expressions use the @samp{>} -relational operator; otherwise it can be confused with a redirection -(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}). - -@cindex format string -The difference between @code{printf} and @code{print} is the @var{format} -argument. This is an expression whose value is taken as a string; it -specifies how to output each of the other arguments. It is called the -@dfn{format string}. - -The format string is very similar to that in the ISO C library function -@code{printf}. Most of @var{format} is text to output verbatim. -Scattered among this text are @dfn{format specifiers}---one per item. -Each format specifier says to output the next item in the argument list -at that place in the format. - -The @code{printf} statement does not automatically append a newline -to its output. It outputs only what the format string specifies. -So if a newline is needed, you must include one in the format string. -The output separator variables @code{OFS} and @code{ORS} have no effect -on @code{printf} statements. For example: - -@example -$ awk 'BEGIN @{ -> ORS = "\nOUCH!\n"; OFS = "+" -> msg = "Dont Panic!" -> printf "%s\n", msg -> @}' -@print{} Dont Panic! -@end example - -@noindent -Here, neither the @samp{+} nor the @samp{OUCH} appear when -the message is printed. - -@node Control Letters, Format Modifiers, Basic Printf, Printf -@subsection Format-Control Letters -@cindex @code{printf}, format-control characters -@cindex format specifier, @code{printf} - -A format specifier starts with the character @samp{%} and ends with -a @dfn{format-control letter}---it tells the @code{printf} statement -how to output one item. The format-control letter specifies what @emph{kind} -of value to print. The rest of the format specifier is made up of -optional @dfn{modifiers} that control @emph{how} to print the value, such as -the field width. Here is a list of the format-control letters: - -@table @code -@item %c -This prints a number as an ASCII character; thus, @samp{printf "%c", -65} outputs the letter @samp{A}. (The output for a string value is -the first character of the string.) - -@item %d@r{,} %i -These are equivalent; they both print a decimal integer. -(The @samp{%i} specification is for compatibility with ISO C.) - -@item %e@r{,} %E -These print a number in scientific (exponential) notation; -for example: - -@example -printf "%4.3e\n", 1950 -@end example - -@noindent -prints @samp{1.950e+03}, with a total of four significant figures, three of -which follow the decimal point. -(The @samp{4.3} represents two modifiers, -discussed in the next @value{SUBSECTION}.) -@samp{%E} uses @samp{E} instead of @samp{e} in the output. - -@item %f -This prints a number in floating-point notation. -For example: - -@example -printf "%4.3f", 1950 -@end example - -@noindent -prints @samp{1950.000}, with a total of four significant figures, three of -which follow the decimal point. -(The @samp{4.3} represents two modifiers, -discussed in the next @value{SUBSECTION}.) - -@item %g@r{,} %G -These print a number in either scientific notation or in floating-point -notation, whichever uses fewer characters; if the result is printed in -scientific notation, @samp{%G} uses @samp{E} instead of @samp{e}. - -@item %o -This prints an unsigned octal integer. - -@item %s -This prints a string. - -@item %u -This prints an unsigned decimal integer. -(This format is of marginal use, because all numbers in @command{awk} -are floating-point; it is provided primarily for compatibility with C.) - -@item %x@r{,} %X -These print an unsigned hexadecimal integer; -@samp{%X} uses the letters @samp{A} through @samp{F} -instead of @samp{a} through @samp{f}. - -@item %% -This isn't a format-control letter but it does have meaning---the -sequence @samp{%%} outputs one @samp{%}; it does not consume an -argument and it ignores any modifiers. -@end table - -@cindex dark corner -@strong{Note:} -When using the integer format-control letters for values that are outside -the range of a C @code{long} integer, @command{gawk} switches to the -@samp{%g} format specifier. Other versions of @command{awk} may print -invalid values or do something else entirely. -@value{DARKCORNER} - -@node Format Modifiers, Printf Examples, Control Letters, Printf -@subsection Modifiers for @code{printf} Formats - -@cindex @code{printf}, modifiers -@cindex modifiers (in format specifiers) -A format specification can also include @dfn{modifiers} that can control -how much of the item's value is printed, as well as how much space it gets. -The modifiers come between the @samp{%} and the format-control letter. -We will use the bullet symbol ``@bullet{}'' in the following examples to -represent -spaces in the output. Here are the possible modifiers, in the order in -which they may appear: - -@table @code -@cindex differences between @command{gawk} and @command{awk} -@cindex @code{printf}, positional specifier -@cindex positional specifier, @code{printf} -@item @var{N}$ -An integer constant followed by a @samp{$} is a @dfn{positional specifier}. -Normally, format specifications are applied to arguments in the order -given in the format string. With a positional specifier, the format -specification is applied to a specific argument, instead of what -would be the next argument in the list. Positional specifiers begin -counting with one: - -@example -printf "%s %s\n", "don't", "panic" -printf "%2$s %1$s\n", "panic", "don't" -@end example - -@noindent -prints the famous friendly message twice. - -At first glance, this feature doesn't seem to be of much use. -It is in fact a @command{gawk} extension, intended for use in translating -messages at runtime. -@xref{Printf Ordering, , Rearranging @code{printf} Arguments}, -which describes how and why to use positional specifiers. -For now, we will not use them. - -@item - -The minus sign, used before the width modifier (see further on in -this table), -says to left-justify -the argument within its specified width. Normally, the argument -is printed right-justified in the specified width. Thus: - -@example -printf "%-4s", "foo" -@end example - -@noindent -prints @samp{foo@bullet{}}. - -@item @var{space} -For numeric conversions, prefix positive values with a space and -negative values with a minus sign. - -@item + -The plus sign, used before the width modifier (see further on in -this table), -says to always supply a sign for numeric conversions, even if the data -to format is positive. The @samp{+} overrides the space modifier. - -@item # -Use an ``alternate form'' for certain control letters. -For @samp{%o}, supply a leading zero. -For @samp{%x} and @samp{%X}, supply a leading @samp{0x} or @samp{0X} for -a nonzero result. -For @samp{%e}, @samp{%E}, and @samp{%f}, the result always contains a -decimal point. -For @samp{%g} and @samp{%G}, trailing zeros are not removed from the result. - -@cindex dark corner -@item 0 -A leading @samp{0} (zero) acts as a flag that indicates that output should be -padded with zeros instead of spaces. -This applies even to non-numeric output formats. -@value{DARKCORNER} -This flag only has an effect when the field width is wider than the -value to print. - -@item @var{width} -This is a number specifying the desired minimum width of a field. Inserting any -number between the @samp{%} sign and the format-control character forces the -field to expand to this width. The default way to do this is to -pad with spaces on the left. For example: - -@example -printf "%4s", "foo" -@end example - -@noindent -prints @samp{@bullet{}foo}. - -The value of @var{width} is a minimum width, not a maximum. If the item -value requires more than @var{width} characters, it can be as wide as -necessary. Thus, the following: - -@example -printf "%4s", "foobar" -@end example - -@noindent -prints @samp{foobar}. - -Preceding the @var{width} with a minus sign causes the output to be -padded with spaces on the right, instead of on the left. - -@item .@var{prec} -A period followed by an integer constant -specifies the precision to use when printing. -The meaning of the precision varies by control letter: - -@table @asis -@item @code{%e}, @code{%E}, @code{%f} -Number of digits to the right of the decimal point. - -@item @code{%g}, @code{%G} -Maximum number of significant digits. - -@item @code{%d}, @code{%i}, @code{%o}, @code{%u}, @code{%x}, @code{%X} -Minimum number of digits to print. - -@item @code{%s} -Maximum number of characters from the string that should print. -@end table - -Thus, the following: - -@example -printf "%.4s", "foobar" -@end example - -@noindent -prints @samp{foob}. -@end table - -The C library @code{printf}'s dynamic @var{width} and @var{prec} -capability (for example, @code{"%*.*s"}) is supported. Instead of -supplying explicit @var{width} and/or @var{prec} values in the format -string, they are passed in the argument list. For example: - -@example -w = 5 -p = 3 -s = "abcdefg" -printf "%*.*s\n", w, p, s -@end example - -@noindent -is exactly equivalent to: - -@example -s = "abcdefg" -printf "%5.3s\n", s -@end example - -@noindent -Both programs output @samp{@w{@bullet{}@bullet{}abc}}. -Earlier versions of @command{awk} did not support this capability. -If you must use such a version, you may simulate this feature by using -concatenation to build up the format string, like so: - -@example -w = 5 -p = 3 -s = "abcdefg" -printf "%" w "." p "s\n", s -@end example - -@noindent -This is not particularly easy to read but it does work. - -@cindex fatal errors -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@cindex lint checks -C programmers may be used to supplying additional -@samp{l}, @samp{L}, and @samp{h} -modifiers in @code{printf} format strings. These are not valid in @command{awk}. -Most @command{awk} implementations silently ignore these modifiers. -If @option{--lint} is provided on the command line -(@pxref{Options, ,Command-Line Options}), -@command{gawk} warns about their use. If @option{--posix} is supplied, -their use is a fatal error. - -@node Printf Examples, , Format Modifiers, Printf -@subsection Examples Using @code{printf} - -The following is a simple example of -how to use @code{printf} to make an aligned table: - -@example -awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list -@end example - -@noindent -This command -prints the names of the bulletin boards (@code{$1}) in the file -@file{BBS-list} as a string of 10 characters that are left-justified. It also -prints the phone numbers (@code{$2}) next on the line. This -produces an aligned two-column table of names and phone numbers, -as shown here: - -@example -$ awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list -@print{} aardvark 555-5553 -@print{} alpo-net 555-3412 -@print{} barfly 555-7685 -@print{} bites 555-1675 -@print{} camelot 555-0542 -@print{} core 555-2912 -@print{} fooey 555-1234 -@print{} foot 555-6699 -@print{} macfoo 555-6480 -@print{} sdace 555-3430 -@print{} sabafoo 555-2127 -@end example - -In this case, the phone numbers had to be printed as strings because -the numbers are separated by a dash. Printing the phone numbers as -numbers would have produced just the first three digits: @samp{555}. -This would have been pretty confusing. - -It wasn't necessary to specify a width for the phone numbers because -they are last on their lines. They don't need to have spaces -after them. - -The table could be made to look even nicer by adding headings to the -tops of the columns. This is done using the @code{BEGIN} pattern -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}) -so that the headers are only printed once, at the beginning of -the @command{awk} program: - -@example -awk 'BEGIN @{ print "Name Number" - print "---- ------" @} - @{ printf "%-10s %s\n", $1, $2 @}' BBS-list -@end example - -The above example mixed @code{print} and @code{printf} statements in -the same program. Using just @code{printf} statements can produce the -same results: - -@example -awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number" - printf "%-10s %s\n", "----", "------" @} - @{ printf "%-10s %s\n", $1, $2 @}' BBS-list -@end example - -@noindent -Printing each column heading with the same format specification -used for the column elements ensures that the headings -are aligned just like the columns. - -The fact that the same format specification is used three times can be -emphasized by storing it in a variable, like this: - -@example -awk 'BEGIN @{ format = "%-10s %s\n" - printf format, "Name", "Number" - printf format, "----", "------" @} - @{ printf format, $1, $2 @}' BBS-list -@end example - -@c !!! exercise -At this point, it would be a worthwhile exercise to use the -@code{printf} statement to line up the headings and table data for the -@file{inventory-shipped} example that was covered earlier in the @value{SECTION} -on the @code{print} statement -(@pxref{Print, ,The @code{print} Statement}). - -@node Redirection, Special Files, Printf, Printing -@section Redirecting Output of @code{print} and @code{printf} - -@cindex output redirection -@cindex redirection of output -So far, the output from @code{print} and @code{printf} has gone -to the standard -output, usually the terminal. Both @code{print} and @code{printf} can -also send their output to other places. -This is called @dfn{redirection}. - -A redirection appears after the @code{print} or @code{printf} statement. -Redirections in @command{awk} are written just like redirections in shell -commands, except that they are written inside the @command{awk} program. - -There are four forms of output redirection: output to a file, output -appended to a file, output through a pipe to another command, and output -to a coprocess. They are all shown for the @code{print} statement, -but they work identically for @code{printf}: - -@table @code -@cindex @code{>} I/O operator -@item print @var{items} > @var{output-file} -This type of redirection prints the items into the output file named -@var{output-file}. The @value{FN} @var{output-file} can be any -expression. Its value is changed to a string and then used as a -@value{FN} (@pxref{Expressions}). - -When this type of redirection is used, the @var{output-file} is erased -before the first output is written to it. Subsequent writes to the same -@var{output-file} do not erase @var{output-file}, but append to it. -(This is different from how you use redirections in shell scripts.) -If @var{output-file} does not exist, it is created. For example, here -is how an @command{awk} program can write a list of BBS names to one -file named @file{name-list}, and a list of phone numbers to another file -named @file{phone-list}: - -@example -$ awk '@{ print $2 > "phone-list" -> print $1 > "name-list" @}' BBS-list -$ cat phone-list -@print{} 555-5553 -@print{} 555-3412 -@dots{} -$ cat name-list -@print{} aardvark -@print{} alpo-net -@dots{} -@end example - -@noindent -Each output file contains one name or number per line. - -@cindex @code{>>} I/O operator -@item print @var{items} >> @var{output-file} -This type of redirection prints the items into the pre-existing output file -named @var{output-file}. The difference between this and the -single-@samp{>} redirection is that the old contents (if any) of -@var{output-file} are not erased. Instead, the @command{awk} output is -appended to the file. -If @var{output-file} does not exist, then it is created. - -@cindex @code{|} I/O operator -@cindex pipes for output -@cindex output, piping -@item print @var{items} | @var{command} -It is also possible to send output to another program through a pipe -instead of into a file. This type of redirection opens a pipe to -@var{command}, and writes the values of @var{items} through this pipe -to another process created to execute @var{command}. - -The redirection argument @var{command} is actually an @command{awk} -expression. Its value is converted to a string whose contents give -the shell command to be run. For example, the following produces two -files, one unsorted list of BBS names, and one list sorted in reverse -alphabetical order: - -@ignore -10/2000: -This isn't the best style, since COMMAND is assigned for each -record. It's done to avoid overfull hboxes in TeX. Leave it -alone for now and let's hope no-one notices. -@end ignore - -@example -awk '@{ print $1 > "names.unsorted" - command = "sort -r > names.sorted" - print $1 | command @}' BBS-list -@end example - -The unsorted list is written with an ordinary redirection, while -the sorted list is written by piping through the @command{sort} utility. - -The next example uses redirection to mail a message to the mailing -list @samp{bug-system}. This might be useful when trouble is encountered -in an @command{awk} script run periodically for system maintenance: - -@example -report = "mail bug-system" -print "Awk script failed:", $0 | report -m = ("at record number " FNR " of " FILENAME) -print m | report -close(report) -@end example - -The message is built using string concatenation and saved in the variable -@code{m}. It is then sent down the pipeline to the @command{mail} program. -(The parentheses group the items to concatenate---see -@ref{Concatenation, ,String Concatenation}.) - -The @code{close} function is called here because it's a good idea to close -the pipe as soon as all the intended output has been sent to it. -@xref{Close Files And Pipes, ,Closing Input and Output Redirections}, -for more information on this. - -This example also illustrates the use of a variable to represent -a @var{file} or @var{command}---it is not necessary to always -use a string constant. Using a variable is generally a good idea, -because @command{awk} requires that the string value be spelled identically -every time. - -@cindex coprocess -@cindex @code{|&} I/O operator -@cindex differences between @command{gawk} and @command{awk} -@item print @var{items} |& @var{command} -This type of redirection prints the items to the input of @var{command}. -The difference between this and the -single-@samp{|} redirection is that the output from @var{command} -can be read with @code{getline}. -Thus @var{command} is a @dfn{coprocess}, that works together with, -but subsidiary to, the @command{awk} program. - -This feature is a @command{gawk} extension, and is not available in -POSIX @command{awk}. -@xref{Two-way I/O, ,Two-Way Communications with Another Process}, -for a more complete discussion. -@end table - -Redirecting output using @samp{>}, @samp{>>}, @samp{|}, or @samp{|&} -asks the system to open a file, pipe, or coprocess, only if the particular -@var{file} or @var{command} you specify has not already been written -to by your program or if it has been closed since it was last written to. - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -It is a common error to use @samp{>} redirection for the first @code{print} -to a file, and then to use @samp{>>} for subsequent output: - -@example -# clear the file -print "Don't panic" > "guide.txt" -@dots{} -# append -print "Avoid improbability generators" >> "guide.txt" -@end example - -@noindent -This is indeed how redirections must be used from the shell. But in -@command{awk}, it isn't necessary. In this kind of case, a program should -use @samp{>} for all the @code{print} statements, since the output file -is only opened once. - -@cindex differences between @command{gawk} and @command{awk} -@cindex limitations -@cindex implementation limits -@ifnotinfo -As mentioned earlier -(@pxref{Getline Notes, ,Points About @code{getline} to Remember}), -many -@end ifnotinfo -@ifnottex -Many -@end ifnottex -@command{awk} implementations limit the number of pipelines that an @command{awk} -program may have open to just one! In @command{gawk}, there is no such limit. -@command{gawk} allows a program to -open as many pipelines as the underlying operating system permits. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Piping into @command{sh} -@cindex advanced notes -@cindex shell, piping commands into -@cindex piping commands into the shell - -A particularly powerful way to use redirection is to build command lines, -and pipe them into the shell, @command{sh}. For example, suppose you -have a list of files brought over from a system where all the @value{FN}s -are stored in uppercase, and you wish to rename them to have names in -all lowercase. The following program is both simple and efficient: - -@cindex @command{mv} utility -@example -@{ printf("mv %s %s\n", $0, tolower($0)) | "sh" @} - -END @{ close("sh") @} -@end example - -The @code{tolower} function returns its argument string with all -uppercase characters converted to lowercase -(@pxref{String Functions, ,String Manipulation Functions}). -The program builds up a list of command lines, -using the @command{mv} utility to rename the files. -It then sends the list to the shell for execution. - -@node Special Files, Close Files And Pipes, Redirection, Printing -@section Special @value{FFN}s in @command{gawk} - -@command{gawk} provides a number of special @value{FN}s that it interprets -internally. These @value{FN}s provide access to standard file descriptors, -process-related information, and TCP/IP networking. - -@menu -* Special FD:: Special files for I/O. -* Special Process:: Special files for process information. -* Special Network:: Special files for network communications. -* Special Caveats:: Things to watch out for. -@end menu - -@node Special FD, Special Process, Special Files, Special Files -@subsection Special Files for Standard Descriptors -@cindex standard input -@cindex standard output -@cindex standard error output -@cindex file descriptors - -Running programs conventionally have three input and output streams -already available to them for reading and writing. These are known as -the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error -output}. These streams are, by default, connected to your terminal, but -they are often redirected with the shell, via the @samp{<}, @samp{<<}, -@samp{>}, @samp{>>}, @samp{>&}, and @samp{|} operators. Standard error -is typically used for writing error messages; the reason there are two separate -streams, standard output, and standard error, is so that they can be -redirected separately. - -@cindex differences between @command{gawk} and @command{awk} -In other implementations of @command{awk}, the only way to write an error -message to standard error in an @command{awk} program is as follows: - -@example -print "Serious error detected!" | "cat 1>&2" -@end example - -@noindent -This works by opening a pipeline to a shell command that can access the -standard error stream that it inherits from the @command{awk} process. -This is far from elegant, and it is also inefficient, because it requires a -separate process. So people writing @command{awk} programs often -don't do this. Instead, they send the error messages to the -terminal, like this: - -@example -print "Serious error detected!" > "/dev/tty" -@end example - -@noindent -This usually has the same effect but not always: although the -standard error stream is usually the terminal, it can be redirected; when -that happens, writing to the terminal is not correct. In fact, if -@command{awk} is run from a background job, it may not have a terminal at all. -Then opening @file{/dev/tty} fails. - -@command{gawk} provides special @value{FN}s for accessing the three standard -streams, as well as any other inherited open files. If the @value{FN} matches -one of these special names when @command{gawk} redirects input or output, -then it directly uses the stream that the @value{FN} stands for. -(These special @value{FN}s work for all operating systems that @command{gawk} -has been ported to, not just those that are POSIX-compliant.): - -@cindex @file{/dev/stdin} special file -@cindex @file{/dev/stdout} special file -@cindex @file{/dev/stderr} special file -@cindex @file{/dev/fd} special files -@table @file -@item /dev/stdin -The standard input (file descriptor 0). - -@item /dev/stdout -The standard output (file descriptor 1). - -@item /dev/stderr -The standard error output (file descriptor 2). - -@item /dev/fd/@var{N} -The file associated with file descriptor @var{N}. Such a file must -be opened by the program initiating the @command{awk} execution (typically -the shell). Unless special pains are taken in the shell from which -@command{gawk} is invoked, only descriptors 0, 1, and 2 are available. -@end table - -The @value{FN}s @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr} -are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2}, -respectively. However, they are more self-explanatory. -The proper way to write an error message in a @command{gawk} program -is to use @file{/dev/stderr}, like this: - -@example -print "Serious error detected!" > "/dev/stderr" -@end example - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -Note the use of quotes around the @value{FN}. -Like any other redirection, the value must be a string. -It is a common error to omit the quotes, which leads -to confusing results. -@c Exercise: What does it do? :-) - -@node Special Process, Special Network, Special FD, Special Files -@subsection Special Files for Process-Related Information - -@command{gawk} also provides special @value{FN}s that give access to information -about the running @command{gawk} process. Each of these ``files'' provides -a single record of information. To read them more than once, they must -first be closed with the @code{close} function -(@pxref{Close Files And Pipes, ,Closing Input and Output Redirections}). -The @value{FN}s are: - -@cindex process information -@cindex @file{/dev/pid} special file -@cindex @file{/dev/pgrpid} special file -@cindex @file{/dev/ppid} special file -@cindex @file{/dev/user} special file -@table @file -@item /dev/pid -Reading this file returns the process ID of the current process, -in decimal form, terminated with a newline. - -@item /dev/ppid -Reading this file returns the parent process ID of the current process, -in decimal form, terminated with a newline. - -@item /dev/pgrpid -Reading this file returns the process group ID of the current process, -in decimal form, terminated with a newline. - -@item /dev/user -Reading this file returns a single record terminated with a newline. -The fields are separated with spaces. The fields represent the -following information: - -@table @code -@item $1 -The return value of the @code{getuid} system call -(the real user ID number). - -@item $2 -The return value of the @code{geteuid} system call -(the effective user ID number). - -@item $3 -The return value of the @code{getgid} system call -(the real group ID number). - -@item $4 -The return value of the @code{getegid} system call -(the effective group ID number). -@end table - -If there are any additional fields, they are the group IDs returned by -the @code{getgroups} system call. -(Multiple groups may not be supported on all systems.) -@end table - -These special @value{FN}s may be used on the command line as @value{DF}s, -as well as for I/O redirections within an @command{awk} program. -They may not be used as source files with the @option{-f} option. - -@cindex automatic warnings -@cindex warnings, automatic -@strong{Note:} -The special files that provide process-related information are now considered -obsolete and will disappear entirely -in the next release of @command{gawk}. -@command{gawk} prints a warning message every time you use one of -these files. -To obtain process-related information, use the @code{PROCINFO} array. -@xref{Auto-set, ,Built-in Variables That Convey Information}. - -@node Special Network, Special Caveats, Special Process, Special Files -@subsection Special Files for Network Communications - -Starting with @value{PVERSION} 3.1 of @command{gawk}, @command{awk} programs -can open a two-way -TCP/IP connection, acting as either a client or server. -This is done using a special @value{FN} of the form: - -@example -@file{/inet/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}} -@end example - -The @var{protocol} is one of @samp{tcp}, @samp{udp}, or @samp{raw}, -and the other fields represent the other essential pieces of information -for making a networking connection. -These @value{FN}s are used with the @samp{|&} operator for communicating -with a coprocess -(@pxref{Two-way I/O, ,Two-Way Communications with Another Process}). -This is an advanced feature, mentioned here only for completeness. -Full discussion is delayed until -@ref{TCP/IP Networking, ,Using @command{gawk} for Network Programming}. - -@node Special Caveats, , Special Network, Special Files -@subsection Special @value{FFN} Caveats - -Here is a list of things to bear in mind when using the -special @value{FN}s that @command{gawk} provides. - -@itemize @bullet -@item -Recognition of these special @value{FN}s is disabled if @command{gawk} is in -compatibility mode (@pxref{Options, ,Command-Line Options}). - -@cindex automatic warnings -@cindex warnings, automatic -@item -@ifnottex -The -@end ifnottex -@ifnotinfo -As mentioned earlier, the -@end ifnotinfo -special files that provide process-related information are now considered -obsolete and will disappear entirely -in the next release of @command{gawk}. -@command{gawk} prints a warning message every time you use one of -these files. -@ifnottex -To obtain process-related information, use the @code{PROCINFO} array. -@xref{Built-in Variables}. -@end ifnottex - -@item -Starting with @value{PVERSION} 3.1, @command{gawk} @emph{always} -interprets these special @value{FN}s.@footnote{Older versions of -@command{gawk} would only interpret these names internally if the system -did not actually have a a @file{/dev/fd} directory or any of the other -above listed special files. Usually this didn't make a difference, -but sometimes it did; thus, it was decided to make @command{gawk}'s -behavior consistent on all systems and to have it always interpret -the special @value{FN}s itself.} -For example, using @samp{/dev/fd/4} -for output actually writes on file descriptor 4, and not on a new -file descriptor that is @code{dup}'ed from file descriptor 4. Most of -the time this does not matter; however, it is important to @emph{not} -close any of the files related to file descriptors 0, 1, and 2. -Doing so results in unpredictable behavior. -@end itemize - -@node Close Files And Pipes, , Special Files, Printing -@section Closing Input and Output Redirections -@cindex closing input files and pipes -@cindex closing output files and pipes -@cindex closing coprocesses -@cindex coprocess -@cindex @code{close} built-in function - -If the same @value{FN} or the same shell command is used with @code{getline} -more than once during the execution of an @command{awk} program -(@pxref{Getline, ,Explicit Input with @code{getline}}), -the file is opened (or the command is executed) the first time only. -At that time, the first record of input is read from that file or command. -The next time the same file or command is used with @code{getline}, -another record is read from it, and so on. - -Similarly, when a file or pipe is opened for output, the @value{FN} or -command associated with it is remembered by @command{awk}, and subsequent -writes to the same file or command are appended to the previous writes. -The file or pipe stays open until @command{awk} exits. - -This implies that special steps are necessary in order to read the same -file again from the beginning, or to rerun a shell command (rather than -reading more output from the same command). The @code{close} function -makes these things possible: - -@example -close(@var{filename}) -@end example - -@noindent -or: - -@example -close(@var{command}) -@end example - -The argument @var{filename} or @var{command} can be any expression. Its -value must @emph{exactly} match the string that was used to open the file or -start the command (spaces and other ``irrelevant'' characters -included). For example, if you open a pipe with this: - -@example -"sort -r names" | getline foo -@end example - -@noindent -then you must close it with this: - -@example -close("sort -r names") -@end example - -Once this function call is executed, the next @code{getline} from that -file or command, or the next @code{print} or @code{printf} to that -file or command, reopens the file or reruns the command. -Because the expression that you use to close a file or pipeline must -exactly match the expression used to open the file or run the command, -it is good practice to use a variable to store the @value{FN} or command. -The previous example becomes the following: - -@example -sortcom = "sort -r names" -sortcom | getline foo -@dots{} -close(sortcom) -@end example - -@noindent -This helps avoid hard-to-find typographical errors in your @command{awk} -programs. Here are some of the reasons for closing an output file: - -@itemize @bullet -@item -To write a file and read it back later on in the same @command{awk} -program. Close the file after writing it, then -begin reading it with @code{getline}. - -@item -To write numerous files, successively, in the same @command{awk} -program. If the files aren't closed, eventually @command{awk} may exceed a -system limit on the number of open files in one process. It is best to -close each one when the program has finished writing it. - -@item -To make a command finish. When output is redirected through a pipe, -the command reading the pipe normally continues to try to read input -as long as the pipe is open. Often this means the command cannot -really do its work until the pipe is closed. For example, if -output is redirected to the @command{mail} program, the message is not -actually sent until the pipe is closed. - -@item -To run the same program a second time, with the same arguments. -This is not the same thing as giving more input to the first run! - -For example, suppose a program pipes output to the @command{mail} program. -If it outputs several lines redirected to this pipe without closing -it, they make a single message of several lines. By contrast, if the -program closes the pipe after each line of output, then each line makes -a separate message. -@end itemize - -@cindex differences between @command{gawk} and @command{awk} -@cindex portability issues -If you use more files than the system allows you to have open, -@command{gawk} attempts to multiplex the available open files among -your @value{DF}s. @command{gawk}'s ability to do this depends upon the -facilities of your operating system, so it may not always work. It is -therefore both good practice and good portability advice to always -use @code{close} on your files when you are done with them. -In fact, if you are using a lot of pipes, it is essential that -you close commands when done. For example, consider something like this: - -@example -@{ - @dots{} - command = ("grep " $1 " /some/file | my_prog -q " $3) - while ((command | getline) > 0) @{ - @var{process output of} command - @} - # need close(command) here -@} -@end example - -This example creates a new pipeline based on data in @emph{each} record. -Without the call to @code{close} indicated in the comment, @command{awk} -creates child processes to run the commands, until it eventually -runs out of file descriptors for more pipelines. - -Even though each command has finished (as indicated by the end-of-file -return status from @code{getline}), the child process is not -terminated;@footnote{The technical terminology is rather morbid. -The finished child is called a ``zombie,'' and cleaning up after -it is referred to as ``reaping.''} -@c Good old UNIX: give the marketing guys fits, that's the ticket -more importantly, the file descriptor for the pipe -is not closed and released until @code{close} is called or -@command{awk} exits. - -@code{close} will silently do nothing if given an argument that -does not represent a file, pipe or coprocess that was opened with -a redirection. - -When using the @samp{|&} operator to communicate with a coprocess, -it is occasionally useful to be able to close one end of the two-way -pipe without closing the other. -This is done by supplying a second argument to @code{close}. -As in any other call to @code{close}, -the first argument is the name of the command or special file used -to start the coprocess. -The second argument should be a string, with either of the values -@code{"to"} or @code{"from"}. Case does not matter. -As this is an advanced feature, a more complete discussion is -delayed until -@ref{Two-way I/O, ,Two-Way Communications with Another Process}, -which discusses it in more detail and gives an example. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Using @code{close}'s Return Value -@cindex advanced notes -@cindex dark corner -@cindex differences between @command{gawk} and @command{awk} -@cindex @code{close}, return value -@cindex return value from @code{close} - -In many versions of Unix @command{awk}, the @code{close} function -is actually a statement. It is a syntax error to try and use the return -value from @code{close}: -@value{DARKCORNER} - -@example -command = "@dots{}" -command | getline info -retval = close(command) # syntax error in most Unix awks -@end example - -@command{gawk} treats @code{close} as a function. -The return value is @minus{}1 if the argument names something -that was never opened with a redirection, or if there is -a system problem closing the file or process. -In these cases, @command{gawk} sets the built-in variable -@code{ERRNO} to a string describing the problem. - -In @command{gawk}, -when closing a pipe or coprocess, -the return value is the exit status of the command. -Otherwise, it is the return value from the system's @code{close} or -@code{fclose} C functions when closing input or output -files, respectively. -This value is zero if the close succeeds, or @minus{}1 if -it fails. - -The return value for closing a pipeline is particularly useful. -It allows you to get the output from a command as well as its -exit status. - -For POSIX-compliant systems, -if the exit status is a number above 128, then the program -was terminated by a signal. Subtract 128 to get the signal number: - -@example -exit_val = close(command) -if (exit_val > 128) - print command, "died with signal", exit_val - 128 -else - print command, "exited with code", exit_val -@end example - -Currently, in @command{gawk}, this only works for commands -piping into @code{getline}. For commands piped into -from @code{print} or @code{printf}, the -return value from @code{close} is that of the library's -@code{pclose} function. - -@node Expressions, Patterns and Actions, Printing, Top -@chapter Expressions -@cindex expression - -Expressions are the basic building blocks of @command{awk} patterns -and actions. An expression evaluates to a value that you can print, test, -or pass to a function. Additionally, an expression -can assign a new value to a variable or a field by using an assignment operator. - -An expression can serve as a pattern or action statement on its own. -Most other kinds of -statements contain one or more expressions that specify the data on which to -operate. As in other languages, expressions in @command{awk} include -variables, array references, constants, and function calls, as well as -combinations of these with various operators. - -@menu -* Constants:: String, numeric and regexp constants. -* Using Constant Regexps:: When and how to use a regexp constant. -* Variables:: Variables give names to values for later use. -* Conversion:: The conversion of strings to numbers and vice - versa. -* Arithmetic Ops:: Arithmetic operations (@samp{+}, @samp{-}, - etc.) -* Concatenation:: Concatenating strings. -* Assignment Ops:: Changing the value of a variable or a field. -* Increment Ops:: Incrementing the numeric value of a variable. -* Truth Values:: What is ``true'' and what is ``false''. -* Typing and Comparison:: How variables acquire types and how this - affects comparison of numbers and strings with - @samp{<}, etc. -* Boolean Ops:: Combining comparison expressions using boolean - operators @samp{||} (``or''), @samp{&&} - (``and'') and @samp{!} (``not''). -* Conditional Exp:: Conditional expressions select between two - subexpressions under control of a third - subexpression. -* Function Calls:: A function call is an expression. -* Precedence:: How various operators nest. -@end menu - -@node Constants, Using Constant Regexps, Expressions, Expressions -@section Constant Expressions -@cindex constants, types of - -The simplest type of expression is the @dfn{constant}, which always has -the same value. There are three types of constants: numeric, -string, and regular expression. - -Each is used in the appropriate context when you need a data -value that isn't going to change. Numeric constants can -have different forms, but are stored identically internally. - -@menu -* Scalar Constants:: Numeric and string constants. -* Non-decimal-numbers:: What are octal and hex numbers. -* Regexp Constants:: Regular Expression constants. -@end menu - -@node Scalar Constants, Non-decimal-numbers, Constants, Constants -@subsection Numeric and String Constants - -@cindex numeric constant -@cindex numeric value -A @dfn{numeric constant} stands for a number. This number can be an -integer, a decimal fraction, or a number in scientific (exponential) -notation.@footnote{The internal representation of all numbers, -including integers, uses double-precision -floating-point numbers. -On most modern systems, these are in IEEE 754 standard format.} -Here are some examples of numeric constants that all -have the same value: - -@example -105 -1.05e+2 -1050e-1 -@end example - -@cindex string constants -A string constant consists of a sequence of characters enclosed in -double quote marks. For example: - -@example -"parrot" -@end example - -@noindent -@cindex differences between @command{gawk} and @command{awk} -represents the string whose contents are @samp{parrot}. Strings in -@command{gawk} can be of any length, and they can contain any of the possible -eight-bit ASCII characters including ASCII @sc{nul} (character code zero). -Other @command{awk} -implementations may have difficulty with some character codes. - -@node Non-decimal-numbers, Regexp Constants, Scalar Constants, Constants -@subsection Octal and Hexadecimal Numbers -@cindex octal numbers -@cindex hexadecimal numbers -@cindex numbers, octal -@cindex numbers, hexadecimal - -In @command{awk}, all numbers are in decimal; i.e., base 10. Many other -programming languages allow you to specify numbers in other bases, often -octal (base 8) and hexadecimal (base 16). -In octal, the numbers go 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, etc.. -Just as @samp{11} in decimal is 1 times 10 plus 1, so -@samp{11} in octal is 1 times 8, plus 1. This equals nine in decimal. -In hexadecimal, there are 16 digits. Since the everyday decimal -number system only has ten digits (@samp{0}---@samp{9}), the letters -@samp{a} through @samp{f} are used to represent the rest. -(Case in the letters is usually irrelevant; hexadecimal @samp{a} and @samp{A} -have the same value.) -Thus, @samp{11} in -hexadecimal is 1 times 16 plus 1, which equals 17 in decimal. - -Just by looking at plain @samp{11}, you can't tell what base it's in. -So, in C, C++, and other languages derived from C, -@c such as PERL, but we won't mention that.... -there is a special notation to help signify the base. -Octal numbers start with a leading @samp{0}, -and hexadecimal numbers start with a leading @samp{0x} or @samp{0X}: - -@table @code -@item 11 -Decimal 11. - -@item 011 -Octal 11, decimal value 9. - -@item 0x11 -Hexadecimal 11, decimal value 17. -@end table - -This example shows the difference: - -@example -$ gawk 'BEGIN @{ printf "%d, %d, %d\n", 011, 11, 0x11 @}' -@print{} 9, 11, 17 -@end example - -Being able to use octal and hexadecimal constants in your programs is most -useful when working with data that cannot be represented conveniently as -characters or as regular numbers, such as binary data of various sorts. - -@command{gawk} allows the use of octal and hexadecimal -constants in your program text. However, such numbers in the input data -are not treated differently; doing so by default would break old -programs. -(If you really need to do this, use the @option{--non-decimal-data} -command-line option, -@pxref{Non-decimal Data, ,Allowing Non-Decimal Input Data}.) -If you have octal or hexadecimal data, -you can use the @code{strtonum} function -(@pxref{String Functions, ,String Manipulation Functions}) -to convert the data into a number. -Most of the time, you will want to use octal or hexadecimal constants -when working with the built-in bit manipulation functions; -see @ref{Bitwise Functions, ,Using @command{gawk}'s Bit Manipulation Functions}, -for more information. - -Unlike some early C implementations, @samp{8} and @samp{9} are not valid -in octal constants; e.g., @command{gawk} treats @samp{018} as decimal 18. - -@example -$ gawk 'BEGIN @{ print "021 is", 021 ; print 018 @}' -@print{} 021 is 17 -@print{} 18 -@end example - -Octal and hexadecimal source code constants are a @command{gawk} extension. -If @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -they are not available. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: A Constant's Base Does Not Affect Its Value -@cindex advanced notes - -Once a numeric constant has -been converted internally into a number, -@command{gawk} no longer remembers -what the original form of the constant was; the internal value is -always used. This has particular consequences for conversion of -numbers to strings: - -@example -$ gawk 'BEGIN @{ printf "0x11 is <%s>\n", 0x11 @}' -@print{} 0x11 is <17> -@end example - -@node Regexp Constants, , Non-decimal-numbers, Constants -@subsection Regular Expression Constants - -@cindex @code{~} operator -@cindex @code{!~} operator -A regexp constant is a regular expression description enclosed in -slashes, such as @code{@w{/^beginning and end$/}}. Most regexps used in -@command{awk} programs are constant, but the @samp{~} and @samp{!~} -matching operators can also match computed or ``dynamic'' regexps -(which are just ordinary strings or variables that contain a regexp). - -@node Using Constant Regexps, Variables, Constants, Expressions -@section Using Regular Expression Constants - -@cindex dark corner -When used on the righthand side of the @samp{~} or @samp{!~} -operators, a regexp constant merely stands for the regexp that is to be -matched. -However, regexp constants (such as @code{/foo/}) may be used like simple expressions. -When a -regexp constant appears by itself, it has the same meaning as if it appeared -in a pattern, i.e.; @samp{($0 ~ /foo/)} -@value{DARKCORNER} -@xref{Expression Patterns, ,Expressions as Patterns}. -This means that the following two code segments: - -@example -if ($0 ~ /barfly/ || $0 ~ /camelot/) - print "found" -@end example - -@noindent -and: - -@example -if (/barfly/ || /camelot/) - print "found" -@end example - -@noindent -are exactly equivalent. -One rather bizarre consequence of this rule is that the following -Boolean expression is valid, but does not do what the user probably -intended: - -@example -# note that /foo/ is on the left of the ~ -if (/foo/ ~ $1) print "found foo" -@end example - -@cindex automatic warnings -@cindex warnings, automatic -@noindent -This code is ``obviously'' testing @code{$1} for a match against the regexp -@code{/foo/}. But in fact, the expression @samp{/foo/ ~ $1} actually means -@samp{($0 ~ /foo/) ~ $1}. In other words, first match the input record -against the regexp @code{/foo/}. The result is either zero or one, -depending upon the success or failure of the match. That result -is then matched against the first field in the record. -Because it is unlikely that you would ever really want to make this kind of -test, @command{gawk} issues a warning when it sees this construct in -a program. -Another consequence of this rule is that the assignment statement: - -@example -matches = /foo/ -@end example - -@noindent -assigns either zero or one to the variable @code{matches}, depending -upon the contents of the current input record. -This feature of the language has never been well documented until the -POSIX specification. - -@cindex differences between @command{gawk} and @command{awk} -@cindex dark corner -Constant regular expressions are also used as the first argument for -the @code{gensub}, @code{sub}, and @code{gsub} functions, and as the -second argument of the @code{match} function -(@pxref{String Functions, ,String Manipulation Functions}). -Modern implementations of @command{awk}, including @command{gawk}, allow -the third argument of @code{split} to be a regexp constant, but some -older implementations do not. -@value{DARKCORNER} -This can lead to confusion when attempting to use regexp constants -as arguments to user defined functions -(@pxref{User-defined, ,User-Defined Functions}). -For example: - -@example -function mysub(pat, repl, str, global) -@{ - if (global) - gsub(pat, repl, str) - else - sub(pat, repl, str) - return str -@} - -@{ - @dots{} - text = "hi! hi yourself!" - mysub(/hi/, "howdy", text, 1) - @dots{} -@} -@end example - -@cindex automatic warnings -@cindex warnings, automatic -In this example, the programmer wants to pass a regexp constant to the -user-defined function @code{mysub}, which in turn passes it on to -either @code{sub} or @code{gsub}. However, what really happens is that -the @code{pat} parameter is either one or zero, depending upon whether -or not @code{$0} matches @code{/hi/}. -@command{gawk} issues a warning when it sees a regexp constant used as -a parameter to a user-defined function, since passing a truth value in -this way is probably not what was intended. - -@node Variables, Conversion, Using Constant Regexps, Expressions -@section Variables - -Variables are ways of storing values at one point in your program for -use later in another part of your program. They can be manipulated -entirely within the program text, and they can also be assigned values -on the @command{awk} command line. - -@menu -* Using Variables:: Using variables in your programs. -* Assignment Options:: Setting variables on the command-line and a - summary of command-line syntax. This is an - advanced method of input. -@end menu - -@node Using Variables, Assignment Options, Variables, Variables -@subsection Using Variables in a Program - -@cindex variables, user-defined -@cindex user-defined variables -Variables let you give names to values and refer to them later. Variables -have already been used in many of the examples. The name of a variable -must be a sequence of letters, digits, or underscores, and it may not begin -with a digit. Case is significant in variable names; @code{a} and @code{A} -are distinct variables. - -A variable name is a valid expression by itself; it represents the -variable's current value. Variables are given new values with -@dfn{assignment operators}, @dfn{increment operators}, and -@dfn{decrement operators}. -@xref{Assignment Ops, ,Assignment Expressions}. -@c NEXT ED: Can also be changed by sub, gsub, split - -A few variables have special built-in meanings, such as @code{FS} (the -field separator), and @code{NF} (the number of fields in the current input -record). @xref{Built-in Variables}, for a list of the built-in variables. -These built-in variables can be used and assigned just like all other -variables, but their values are also used or changed automatically by -@command{awk}. All built-in variables' names are entirely uppercase. - -Variables in @command{awk} can be assigned either numeric or string values. -The kind of value a variable holds can change over the life of a program. -By default, variables are initialized to the empty string, which -is zero if converted to a number. There is no need to -``initialize'' each variable explicitly in @command{awk}, -which is what you would do in C and in most other traditional languages. - -@node Assignment Options, , Using Variables, Variables -@subsection Assigning Variables on the Command Line - -Any @command{awk} variable can be set by including a @dfn{variable assignment} -among the arguments on the command line when @command{awk} is invoked -(@pxref{Other Arguments, ,Other Command-Line Arguments}). -Such an assignment has the following form: - -@example -@var{variable}=@var{text} -@end example - -@noindent -With it, a variable is set either at the beginning of the -@command{awk} run or in between input files. -When the assignment is preceded with the @option{-v} option, -as in the following: - -@example --v @var{variable}=@var{text} -@end example - -@noindent -the variable is set at the very beginning, even before the -@code{BEGIN} rules are run. The @option{-v} option and its assignment -must precede all the @value{FN} arguments, as well as the program text. -(@xref{Options, ,Command-Line Options}, for more information about -the @option{-v} option.) -Otherwise, the variable assignment is performed at a time determined by -its position among the input file arguments---after the processing of the -preceding input file argument. For example: - -@example -awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list -@end example - -@noindent -prints the value of field number @code{n} for all input records. Before -the first file is read, the command line sets the variable @code{n} -equal to four. This causes the fourth field to be printed in lines from -the file @file{inventory-shipped}. After the first file has finished, -but before the second file is started, @code{n} is set to two, so that the -second field is printed in lines from @file{BBS-list}: - -@example -$ awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list -@print{} 15 -@print{} 24 -@dots{} -@print{} 555-5553 -@print{} 555-3412 -@dots{} -@end example - -@cindex dark corner -Command-line arguments are made available for explicit examination by -the @command{awk} program in an array named @code{ARGV} -(@pxref{ARGC and ARGV, ,Using @code{ARGC} and @code{ARGV}}). -@command{awk} processes the values of command-line assignments for escape -sequences -@value{DARKCORNER} -(@pxref{Escape Sequences}). - -@node Conversion, Arithmetic Ops, Variables, Expressions -@section Conversion of Strings and Numbers - -@cindex conversion of strings and numbers -Strings are converted to numbers and numbers are converted to strings, if the context -of the @command{awk} program demands it. For example, if the value of -either @code{foo} or @code{bar} in the expression @samp{foo + bar} -happens to be a string, it is converted to a number before the addition -is performed. If numeric values appear in string concatenation, they -are converted to strings. Consider the following: - -@example -two = 2; three = 3 -print (two three) + 4 -@end example - -@noindent -This prints the (numeric) value 27. The numeric values of -the variables @code{two} and @code{three} are converted to strings and -concatenated together. The resulting string is converted back to the -number 23, to which four is then added. - -@cindex null string -@cindex empty string -@cindex type conversion -If, for some reason, you need to force a number to be converted to a -string, concatenate the empty string, @code{""}, with that number. -To force a string to be converted to a number, add zero to that string. -A string is converted to a number by interpreting any numeric prefix -of the string as numerals: -@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"} -has a numeric value of 25. -Strings that can't be interpreted as valid numbers convert to zero. - -@cindex @code{CONVFMT} variable -The exact manner in which numbers are converted into strings is controlled -by the @command{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}). -Numbers are converted using the @code{sprintf} function -with @code{CONVFMT} as the format -specifier -(@pxref{String Functions, ,String Manipulation Functions}). - -@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with -at least six significant digits. For some applications, you might want to -change it to specify more precision. -On most modern machines, -17 digits is enough to capture a floating-point number's -value exactly, -most of the time.@footnote{Pathological cases can require up to -752 digits (!), but we doubt that you need to worry about this.} - -@cindex dark corner -Strange results can occur if you set @code{CONVFMT} to a string that doesn't -tell @code{sprintf} how to format floating-point numbers in a useful way. -For example, if you forget the @samp{%} in the format, @command{awk} converts -all numbers to the same constant string. -As a special case, if a number is an integer, then the result of converting -it to a string is @emph{always} an integer, no matter what the value of -@code{CONVFMT} may be. Given the following code fragment: - -@example -CONVFMT = "%2.2f" -a = 12 -b = a "" -@end example - -@noindent -@code{b} has the value @code{"12"}, not @code{"12.00"}. -@value{DARKCORNER} - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@cindex @code{OFMT} variable -Prior to the POSIX standard, @command{awk} used the value -of @code{OFMT} for converting numbers to strings. @code{OFMT} -specifies the output format to use when printing numbers with @code{print}. -@code{CONVFMT} was introduced in order to separate the semantics of -conversion from the semantics of printing. Both @code{CONVFMT} and -@code{OFMT} have the same default value: @code{"%.6g"}. In the vast majority -of cases, old @command{awk} programs do not change their behavior. -However, these semantics for @code{OFMT} are something to keep in mind if you must -port your new style program to older implementations of @command{awk}. -We recommend -that instead of changing your programs, just port @command{gawk} itself. -@xref{Print, ,The @code{print} Statement}, -for more information on the @code{print} statement. - -@node Arithmetic Ops, Concatenation, Conversion, Expressions -@section Arithmetic Operators -@cindex arithmetic operators -@cindex operators, arithmetic -@cindex addition -@cindex subtraction -@cindex multiplication -@cindex division -@cindex remainder -@cindex quotient -@cindex exponentiation - -The @command{awk} language uses the common arithmetic operators when -evaluating expressions. All of these arithmetic operators follow normal -precedence rules and work as you would expect them to. - -The following example uses a file named @file{grades}, which contains -a list of student names as well as three test scores per student (it's -a small class): - -@example -Pat 100 97 58 -Sandy 84 72 93 -Chris 72 92 89 -@end example - -@noindent -This programs takes the file @file{grades} and prints the average -of the scores: - -@example -$ awk '@{ sum = $2 + $3 + $4 ; avg = sum / 3 -> print $1, avg @}' grades -@print{} Pat 85 -@print{} Sandy 83 -@print{} Chris 84.3333 -@end example - -The following list provides the arithmetic operators in @command{awk}, in order from -the highest precedence to the lowest: - -@table @code -@item - @var{x} -Negation. - -@item + @var{x} -Unary plus; the expression is converted to a number. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@item @var{x} ^ @var{y} -@itemx @var{x} ** @var{y} -Exponentiation; @var{x} raised to the @var{y} power. @samp{2 ^ 3} has -the value eight; the character sequence @samp{**} is equivalent to -@samp{^}. - -@item @var{x} * @var{y} -Multiplication. - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -@item @var{x} / @var{y} -Division; because all numbers in @command{awk} are floating-point -numbers, the result is @emph{not} rounded to an integer---@samp{3 / 4} has -the value 0.75. (It is a common mistake, especially for C programmers, -to forget that @emph{all} numbers in @command{awk} are floating-point, -and that division of integer-looking constants produces a real number, -not an integer.) - -@item @var{x} % @var{y} -Remainder; further discussion is provided in the text, just -after this list. - -@item @var{x} + @var{y} -Addition. - -@item @var{x} - @var{y} -Subtraction. -@end table - -Unary plus and minus have the same precedence, -the multiplication operators all have the same precedence, and -addition and subtraction have the same precedence. - -@cindex differences between @command{gawk} and @command{awk} -When computing the remainder of @code{@var{x} % @var{y}}, -the quotient is rounded toward zero to an integer and -multiplied by @var{y}. This result is subtracted from @var{x}; -this operation is sometimes known as ``trunc-mod.'' The following -relation always holds: - -@example -b * int(a / b) + (a % b) == a -@end example - -One possibly undesirable effect of this definition of remainder is that -@code{@var{x} % @var{y}} is negative if @var{x} is negative. Thus: - -@example --17 % 8 = -1 -@end example - -In other @command{awk} implementations, the signedness of the remainder -may be machine dependent. -@c !!! what does posix say? - -@cindex portability issues -@strong{Note:} -The POSIX standard only specifies the use of @samp{^} -for exponentiation. -For maximum portability, do not use the @samp{**} operator. - -@node Concatenation, Assignment Ops, Arithmetic Ops, Expressions -@section String Concatenation -@cindex Kernighan, Brian -@quotation -@i{It seemed like a good idea at the time.}@* -Brian Kernighan -@end quotation - -@cindex string operators -@cindex operators, string -@cindex concatenation -There is only one string operation: concatenation. It does not have a -specific operator to represent it. Instead, concatenation is performed by -writing expressions next to one another, with no operator. For example: - -@example -$ awk '@{ print "Field number one: " $1 @}' BBS-list -@print{} Field number one: aardvark -@print{} Field number one: alpo-net -@dots{} -@end example - -Without the space in the string constant after the @samp{:}, the line -runs together. For example: - -@example -$ awk '@{ print "Field number one:" $1 @}' BBS-list -@print{} Field number one:aardvark -@print{} Field number one:alpo-net -@dots{} -@end example - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -Because string concatenation does not have an explicit operator, it is -often necessary to insure that it happens at the right time by using -parentheses to enclose the items to concatenate. For example, the -following code fragment does not concatenate @code{file} and @code{name} -as you might expect: - -@example -file = "file" -name = "name" -print "something meaningful" > file name -@end example - -@noindent -It is necessary to use the following: - -@example -print "something meaningful" > (file name) -@end example - -@cindex order of evaluation, concatenation -@cindex concatenation evaluation order -@cindex evaluation, order of -@cindex side effects -Parentheses should be used around concatenation in all but the -most common contexts, such as on the righthand side of @samp{=}. -Be careful about the kinds of expressions used in string concatenation. -In particular, the order of evaluation of expressions used for concatenation -is undefined in the @command{awk} language. Consider this example: - -@example -BEGIN @{ - a = "don't" - print (a " " (a = "panic")) -@} -@end example - -@noindent -It is not defined whether the assignment to @code{a} happens -before or after the value of @code{a} is retrieved for producing the -concatenated value. The result could be either @samp{don't panic}, -or @samp{panic panic}. -@c see test/nasty.awk for a worse example -The precedence of concatenation, when mixed with other operators, is often -counter-intuitive. Consider this example: - -@ignore -> To: bug-gnu-utils@@gnu.org -> CC: arnold@gnu.org -> Subject: gawk 3.0.4 bug with {print -12 " " -24} -> From: Russell Schulz -> Date: Tue, 8 Feb 2000 19:56:08 -0700 -> -> gawk 3.0.4 on NT gives me: -> -> prompt> cat bad.awk -> BEGIN { print -12 " " -24; } -> -> prompt> gawk -f bad.awk -> -12-24 -> -> when I would expect -> -> -12 -24 -> -> I have not investigated the source, or other implementations. The -> bug is there on my NT and DOS versions 2.15.6 . -@end ignore - -@example -$ awk 'BEGIN @{ print -12 " " -24 @}' -@print{} -12-24 -@end example - -This ``obviously'' is concatenating @minus{}12, a space, and @minus{}24. -But where did the space disappear to? -The answer lies in the combination of operator precedences and -@command{awk}'s automatic conversion rules. To get the desired result, -write the program in the following manner: - -@example -$ awk 'BEGIN @{ print -12 " " (-24) @}' -@print{} -12 -24 -@end example - -This forces @command{awk} to treat the @samp{-} on the @samp{-24} as unary. -Otherwise, it's parsed as follows: - -@display - @minus{}12 (@code{"@ "} @minus{} 24) -@result{} @minus{}12 (0 @minus{} 24) -@result{} @minus{}12 (@minus{}24) -@result{} @minus{}12@minus{}24 -@end display - -As mentioned earlier, -when doing concatenation, @emph{parenthesize}. Otherwise, -you're never quite sure what you'll get. - -@node Assignment Ops, Increment Ops, Concatenation, Expressions -@section Assignment Expressions -@cindex assignment operators -@cindex operators, assignment -@cindex expression, assignment - -@cindex @code{=} operator -An @dfn{assignment} is an expression that stores a (usually different) -value into a variable. For example, let's assign the value one to the variable -@code{z}: - -@example -z = 1 -@end example - -After this expression is executed, the variable @code{z} has the value one. -Whatever old value @code{z} had before the assignment is forgotten. - -Assignments can also store string values. For example, the -following stores -the value @code{"this food is good"} in the variable @code{message}: - -@example -thing = "food" -predicate = "good" -message = "this " thing " is " predicate -@end example - -@noindent -@cindex side effects -This also illustrates string concatenation. -The @samp{=} sign is called an @dfn{assignment operator}. It is the -simplest assignment operator because the value of the righthand -operand is stored unchanged. -Most operators (addition, concatenation, and so on) have no effect -except to compute a value. If the value isn't used, there's no reason to -use the operator. An assignment operator is different; it does -produce a value, but even if you ignore it, the assignment still -makes itself felt through the alteration of the variable. We call this -a @dfn{side effect}. - -@cindex lvalue -@cindex rvalue -The lefthand operand of an assignment need not be a variable -(@pxref{Variables}); it can also be a field -(@pxref{Changing Fields, ,Changing the Contents of a Field}) or -an array element (@pxref{Arrays, ,Arrays in @command{awk}}). -These are all called @dfn{lvalues}, -which means they can appear on the lefthand side of an assignment operator. -The righthand operand may be any expression; it produces the new value -that the assignment stores in the specified variable, field, or array -element. (Such values are called @dfn{rvalues}). - -@cindex types of variables -It is important to note that variables do @emph{not} have permanent types. -A variable's type is simply the type of whatever value it happens -to hold at the moment. In the following program fragment, the variable -@code{foo} has a numeric value at first, and a string value later on: - -@example -foo = 1 -print foo -foo = "bar" -print foo -@end example - -@noindent -When the second assignment gives @code{foo} a string value, the fact that -it previously had a numeric value is forgotten. - -String values that do not begin with a digit have a numeric value of -zero. After executing the following code, the value of @code{foo} is five: - -@example -foo = "a string" -foo = foo + 5 -@end example - -@noindent -@strong{Note:} Using a variable as a number and then later as a string -can be confusing and is poor programming style. The previous two examples -illustrate how @command{awk} works, @emph{not} how you should write your -own programs! - -An assignment is an expression, so it has a value---the same value that -is assigned. Thus, @samp{z = 1} is an expression with the value one. -One consequence of this is that you can write multiple assignments together, -such as: - -@example -x = y = z = 5 -@end example - -@noindent -This example stores the value five in all three variables -(@code{x}, @code{y}, and @code{z}). -It does so because the -value of @samp{z = 5}, which is five, is stored into @code{y} and then -the value of @samp{y = z = 5}, which is five, is stored into @code{x}. - -Assignments may be used anywhere an expression is called for. For -example, it is valid to write @samp{x != (y = 1)} to set @code{y} to one, -and then test whether @code{x} equals one. But this style tends to make -programs hard to read; such nesting of assignments should be avoided, -except perhaps in a one-shot program. - -Aside from @samp{=}, there are several other assignment operators that -do arithmetic with the old value of the variable. For example, the -operator @samp{+=} computes a new value by adding the righthand value -to the old value of the variable. Thus, the following assignment adds -five to the value of @code{foo}: - -@example -foo += 5 -@end example - -@noindent -This is equivalent to the following: - -@example -foo = foo + 5 -@end example - -@noindent -Use whichever makes the meaning of your program clearer. - -There are situations where using @samp{+=} (or any assignment operator) -is @emph{not} the same as simply repeating the lefthand operand in the -righthand expression. For example: - -@cindex Rankin, Pat -@example -# Thanks to Pat Rankin for this example -BEGIN @{ - foo[rand()] += 5 - for (x in foo) - print x, foo[x] - - bar[rand()] = bar[rand()] + 5 - for (x in bar) - print x, bar[x] -@} -@end example - -@noindent -The indices of @code{bar} are practically guaranteed to be different, because -@code{rand} returns different values each time it is called. -(Arrays and the @code{rand} function haven't been covered yet. -@xref{Arrays, ,Arrays in @command{awk}}, -and see @ref{Numeric Functions}, for more information). -This example illustrates an important fact about assignment -operators: the lefthand expression is only evaluated @emph{once}. -It is up to the implementation as to which expression is evaluated -first, the lefthand or the righthand. -Consider this example: - -@example -i = 1 -a[i += 2] = i + 1 -@end example - -@noindent -The value of @code{a[3]} could be either two or four. - -Here is a table of the arithmetic assignment operators. In each -case, the righthand operand is an expression whose value is converted -to a number. - -@ignore -@table @code -@item @var{lvalue} += @var{increment} -Adds @var{increment} to the value of @var{lvalue}. - -@item @var{lvalue} -= @var{decrement} -Subtracts @var{decrement} from the value of @var{lvalue}. - -@item @var{lvalue} *= @var{coefficient} -Multiplies the value of @var{lvalue} by @var{coefficient}. - -@item @var{lvalue} /= @var{divisor} -Divides the value of @var{lvalue} by @var{divisor}. - -@item @var{lvalue} %= @var{modulus} -Sets @var{lvalue} to its remainder by @var{modulus}. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@item @var{lvalue} ^= @var{power} -@itemx @var{lvalue} **= @var{power} -Raises @var{lvalue} to the power @var{power}. -(Only the @samp{^=} operator is specified by POSIX.) -@end table -@end ignore - -@cindex @code{+=} operator -@cindex @code{-=} operator -@cindex @code{*=} operator -@cindex @code{/=} operator -@cindex @code{%=} operator -@cindex @code{^=} operator -@cindex @code{**=} operator -@multitable {@var{lvalue} *= @var{coefficient}} {Subtracts @var{decrement} from the value of @var{lvalue}.} -@item @var{lvalue} @code{+=} @var{increment} @tab Adds @var{increment} to the value of @var{lvalue}. - -@item @var{lvalue} @code{-=} @var{decrement} @tab Subtracts @var{decrement} from the value of @var{lvalue}. - -@item @var{lvalue} @code{*=} @var{coefficient} @tab Multiplies the value of @var{lvalue} by @var{coefficient}. - -@item @var{lvalue} @code{/=} @var{divisor} @tab Divides the value of @var{lvalue} by @var{divisor}. - -@item @var{lvalue} @code{%=} @var{modulus} @tab Sets @var{lvalue} to its remainder by @var{modulus}. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@item @var{lvalue} @code{^=} @var{power} @tab -@item @var{lvalue} @code{**=} @var{power} @tab Raises @var{lvalue} to the power @var{power}. -@end multitable - -@cindex portability issues -@strong{Note:} -Only the @samp{^=} operator is specified by POSIX. -For maximum portability, do not use the @samp{**=} operator. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Syntactic Ambiguities Between @samp{/=} and Regular Expressions -@cindex advanced notes - -@c derived from email from "Nelson H. F. Beebe" -@c Date: Mon, 1 Sep 1997 13:38:35 -0600 (MDT) - -@cindex dark corner -@cindex ambiguity, syntactic: @code{/=} operator vs. @code{/=@dots{}/} regexp constant -@cindex syntactic ambiguity: @code{/=} operator vs. @code{/=@dots{}/} regexp constant -@cindex @code{/=} operator vs. @code{/=@dots{}/} regexp constant -There is a syntactic ambiguity between the @samp{/=} assignment -operator and regexp constants whose first character is an @samp{=}. -@value{DARKCORNER} -This is most notable in commercial @command{awk} versions. -For example: - -@example -$ awk /==/ /dev/null -@error{} awk: syntax error at source line 1 -@error{} context is -@error{} >>> /= <<< -@error{} awk: bailing out at source line 1 -@end example - -@noindent -A workaround is: - -@example -awk '/[=]=/' /dev/null -@end example - -@command{gawk} does not have this problem, -nor do the other -freely-available versions described in -@ref{Other Versions, , Other Freely Available @command{awk} Implementations}. - -@node Increment Ops, Truth Values, Assignment Ops, Expressions -@section Increment and Decrement Operators - -@cindex increment operators -@cindex operators, increment -@dfn{Increment} and @dfn{decrement operators} increase or decrease the value of -a variable by one. An assignment operator can do the same thing, so -the increment operators add no power to the @command{awk} language; however they -are convenient abbreviations for very common operations. - -@cindex side effects -The operator used for adding one is written @samp{++}. It can be used to increment -a variable either before or after taking its value. -To pre-increment a variable @code{v}, write @samp{++v}. This adds -one to the value of @code{v}---that new value is also the value of the -expression. (The assignment expression @samp{v += 1} is completely -equivalent.) -Writing the @samp{++} after the variable specifies post-increment. This -increments the variable value just the same; the difference is that the -value of the increment expression itself is the variable's @emph{old} -value. Thus, if @code{foo} has the value four, then the expression @samp{foo++} -has the value four, but it changes the value of @code{foo} to five. -In other words, the operator returns the old value of the variable, -but with the side effect of incrementing it. - -The post-increment @samp{foo++} is nearly the same as writing @samp{(foo -+= 1) - 1}. It is not perfectly equivalent because all numbers in -@command{awk} are floating-point---in floating-point, @samp{foo + 1 - 1} does -not necessarily equal @code{foo}. But the difference is minute as -long as you stick to numbers that are fairly small (less than 10e12). - -Fields and array elements are incremented -just like variables. (Use @samp{$(i++)} when you want to do a field reference -and a variable increment at the same time. The parentheses are necessary -because of the precedence of the field reference operator @samp{$}.) - -@cindex decrement operators -@cindex operators, decrement -The decrement operator @samp{--} works just like @samp{++}, except that -it subtracts one instead of adding it. As with @samp{++}, it can be used before -the lvalue to pre-decrement or after it to post-decrement. -Following is a summary of increment and decrement expressions: - -@table @code -@cindex @code{++} operator -@item ++@var{lvalue} -This expression increments @var{lvalue}, and the new value becomes the -value of the expression. - -@item @var{lvalue}++ -This expression increments @var{lvalue}, but -the value of the expression is the @emph{old} value of @var{lvalue}. - -@cindex @code{--} operator -@item --@var{lvalue} -This expression is -like @samp{++@var{lvalue}}, but instead of adding, it subtracts. It -decrements @var{lvalue} and delivers the value that is the result. - -@item @var{lvalue}-- -This expression is -like @samp{@var{lvalue}++}, but instead of adding, it subtracts. It -decrements @var{lvalue}. The value of the expression is the @emph{old} -value of @var{lvalue}. -@end table - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Operator Evaluation Order -@cindex advanced notes -@cindex precedence -@cindex operator precedence -@cindex portability issues -@cindex evaluation, order of -@cindex Marx, Groucho -@quotation -@i{Doctor, doctor! It hurts when I do this!@* -So don't do that!}@* -Groucho Marx -@end quotation - -@noindent -What happens for something like the following? - -@example -b = 6 -print b += b++ -@end example - -@noindent -Or something even stranger? - -@example -b = 6 -b += ++b + b++ -print b -@end example - -@cindex side effects -In other words, when do the various side effects prescribed by the -postfix operators (@samp{b++}) take effect? -When side effects happen is @dfn{implementation defined}. -In other words, it is up to the particular version of @command{awk}. -The result for the first example may be 12 or 13, and for the second, it -may be 22 or 23. - -In short, doing things like this is not recommended and definitely -not anything that you can rely upon for portability. -You should avoid such things in your own programs. -@c You'll sleep better at night and be able to look at yourself -@c in the mirror in the morning. - -@node Truth Values, Typing and Comparison, Increment Ops, Expressions -@section True and False in @command{awk} -@cindex truth values -@cindex logical true -@cindex logical false - -@cindex null string -@cindex empty string -Many programming languages have a special representation for the concepts -of ``true'' and ``false.'' Such languages usually use the special -constants @code{true} and @code{false}, or perhaps their uppercase -equivalents. -However, @command{awk} is different. -It borrows a very simple concept of true and -false from C. In @command{awk}, any nonzero numeric value @emph{or} any -non-empty string value is true. Any other value (zero or the null -string @code{""}) is false. The following program prints @samp{A strange -truth value} three times: - -@example -BEGIN @{ - if (3.1415927) - print "A strange truth value" - if ("Four Score And Seven Years Ago") - print "A strange truth value" - if (j = 57) - print "A strange truth value" -@} -@end example - -@cindex dark corner -There is a surprising consequence of the ``nonzero or non-null'' rule: -the string constant @code{"0"} is actually true, because it is non-null. -@value{DARKCORNER} - -@node Typing and Comparison, Boolean Ops, Truth Values, Expressions -@section Variable Typing and Comparison Expressions -@cindex comparison expressions -@cindex expression, comparison -@cindex expression, matching -@cindex relational operators -@cindex operators, relational -@cindex regexp operators -@cindex variable typing -@cindex types of variables -@quotation -@i{The Guide is definitive. Reality is frequently inaccurate.}@* -The Hitchhiker's Guide to the Galaxy -@end quotation - -Unlike other programming languages, @command{awk} variables do not have a -fixed type. Instead, they can be either a number or a string, depending -upon the value that is assigned to them. - -@cindex numeric string -The 1992 POSIX standard introduced -the concept of a @dfn{numeric string}, which is simply a string that looks -like a number---for example, @code{@w{" +2"}}. This concept is used -for determining the type of a variable. -The type of the variable is important because the types of two variables -determine how they are compared. -In @command{gawk}, variable typing follows these rules: - -@itemize @bullet -@item -A numeric constant or the result of a numeric operation has the @var{numeric} -attribute. - -@item -A string constant or the result of a string operation has the @var{string} -attribute. - -@item -Fields, @code{getline} input, @code{FILENAME}, @code{ARGV} elements, -@code{ENVIRON} elements, and the -elements of an array created by @code{split} that are numeric strings -have the @var{strnum} attribute. Otherwise, they have the @var{string} -attribute. -Uninitialized variables also have the @var{strnum} attribute. - -@item -Attributes propagate across assignments but are not changed by -any use. -@c (Although a use may cause the entity to acquire an additional -@c value such that it has both a numeric and string value, this leaves the -@c attribute unchanged.) -@c This is important but not relevant -@end itemize - -The last rule is particularly important. In the following program, -@code{a} has numeric type, even though it is later used in a string -operation: - -@example -BEGIN @{ - a = 12.345 - b = a " is a cute number" - print b -@} -@end example - -When two operands are compared, either string comparison or numeric comparison -may be used. This depends upon the attributes of the operands, according to the -following symmetric matrix: - -@c thanks to Karl Berry, kb@cs.umb.edu, for major help with TeX tables -@tex -\centerline{ -\vbox{\bigskip % space above the table (about 1 linespace) -% Because we have vertical rules, we can't let TeX insert interline space -% in its usual way. -\offinterlineskip -% -% Define the table template. & separates columns, and \cr ends the -% template (and each row). # is replaced by the text of that entry on -% each row. The template for the first column breaks down like this: -% \strut -- a way to make each line have the height and depth -% of a normal line of type, since we turned off interline spacing. -% \hfil -- infinite glue; has the effect of right-justifying in this case. -% # -- replaced by the text (for instance, `STRNUM', in the last row). -% \quad -- about the width of an `M'. Just separates the columns. -% -% The second column (\vrule#) is what generates the vertical rule that -% spans table rows. -% -% The doubled && before the next entry means `repeat the following -% template as many times as necessary on each line' -- in our case, twice. -% -% The template itself, \quad#\hfil, left-justifies with a little space before. -% -\halign{\strut\hfil#\quad&\vrule#&&\quad#\hfil\cr - &&STRING &NUMERIC &STRNUM\cr -% The \omit tells TeX to skip inserting the template for this column on -% this particular row. In this case, we only want a little extra space -% to separate the heading row from the rule below it. the depth 2pt -- -% `\vrule depth 2pt' is that little space. -\omit &depth 2pt\cr -% This is the horizontal rule below the heading. Since it has nothing to -% do with the columns of the table, we use \noalign to get it in there. -\noalign{\hrule} -% Like above, this time a little more space. -\omit &depth 4pt\cr -% The remaining rows have nothing special about them. -STRING &&string &string &string\cr -NUMERIC &&string &numeric &numeric\cr -STRNUM &&string &numeric &numeric\cr -}}} -@end tex -@ifnottex -@display - +---------------------------------------------- - | STRING NUMERIC STRNUM ---------+---------------------------------------------- - | -STRING | string string string - | -NUMERIC | string numeric numeric - | -STRNUM | string numeric numeric ---------+---------------------------------------------- -@end display -@end ifnottex - -The basic idea is that user input that looks numeric---and @emph{only} -user input---should be treated as numeric, even though it is actually -made of characters and is therefore also a string. -Thus, for example, the string constant @w{@code{" +3.14"}} -is a string, even though it looks numeric, -and is @emph{never} treated as number for comparison -purposes. - -In short, when one operand is a ``pure'' string, such as a string -constant, then a string comparison is performed. Otherwise, a -numeric comparison is performed.@footnote{The POSIX standard is under -revision. The revised standard's rules for typing and comparison are -the same as just described for @command{gawk}.} - -@dfn{Comparison expressions} compare strings or numbers for -relationships such as equality. They are written using @dfn{relational -operators}, which are a superset of those in C. Here is a table of -them: - -@cindex relational operators -@cindex operators, relational -@cindex @code{<} operator -@cindex @code{<=} operator -@cindex @code{>} operator -@cindex @code{>=} operator -@cindex @code{==} operator -@cindex @code{!=} operator -@cindex @code{~} operator -@cindex @code{!~} operator -@cindex @code{in} operator -@table @code -@item @var{x} < @var{y} -True if @var{x} is less than @var{y}. - -@item @var{x} <= @var{y} -True if @var{x} is less than or equal to @var{y}. - -@item @var{x} > @var{y} -True if @var{x} is greater than @var{y}. - -@item @var{x} >= @var{y} -True if @var{x} is greater than or equal to @var{y}. - -@item @var{x} == @var{y} -True if @var{x} is equal to @var{y}. - -@item @var{x} != @var{y} -True if @var{x} is not equal to @var{y}. - -@item @var{x} ~ @var{y} -True if the string @var{x} matches the regexp denoted by @var{y}. - -@item @var{x} !~ @var{y} -True if the string @var{x} does not match the regexp denoted by @var{y}. - -@item @var{subscript} in @var{array} -True if the array @var{array} has an element with the subscript @var{subscript}. -@end table - -Comparison expressions have the value one if true and zero if false. -When comparing operands of mixed types, numeric operands are converted -to strings using the value of @code{CONVFMT} -(@pxref{Conversion, ,Conversion of Strings and Numbers}). - -Strings are compared -by comparing the first character of each, then the second character of each, -and so on. Thus, @code{"10"} is less than @code{"9"}. If there are two -strings where one is a prefix of the other, the shorter string is less than -the longer one. Thus, @code{"abc"} is less than @code{"abcd"}. - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -It is very easy to accidentally mistype the @samp{==} operator and -leave off one of the @samp{=} characters. The result is still valid @command{awk} -code, but the program does not do what is intended: - -@example -if (a = b) # oops! should be a == b - @dots{} -else - @dots{} -@end example - -@noindent -Unless @code{b} happens to be zero or the null string, the @code{if} -part of the test always succeeds. Because the operators are -so similar, this kind of error is very difficult to spot when -scanning the source code. - -The following table of expressions illustrates the kind of comparison -@command{gawk} performs, as well as what the result of the comparison is: - -@table @code -@item 1.5 <= 2.0 -numeric comparison (true) - -@item "abc" >= "xyz" -string comparison (false) - -@item 1.5 != " +2" -string comparison (true) - -@item "1e2" < "3" -string comparison (true) - -@item a = 2; b = "2" -@itemx a == b -string comparison (true) - -@item a = 2; b = " +2" -@item a == b -string comparison (false) -@end table - -In the next example: - -@example -$ echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}' -@print{} false -@end example - -@cindex comparisons, string vs. regexp -@cindex string comparison vs. regexp comparison -@cindex regexp comparison vs. string comparison -@noindent -the result is @samp{false} because both @code{$1} and @code{$2} -are user input. They are numeric strings---therefore both have -the @var{strnum} attribute, dictating a numeric comparison. -The purpose of the comparison rules and the use of numeric strings is -to attempt to produce the behavior that is ``least surprising,'' while -still ``doing the right thing.'' -String comparisons and regular expression comparisons are very different. -For example: - -@example -x == "foo" -@end example - -@noindent -has the value one, or is true if the variable @code{x} -is precisely @samp{foo}. By contrast: - -@example -x ~ /foo/ -@end example - -@noindent -has the value one if @code{x} contains @samp{foo}, such as -@code{"Oh, what a fool am I!"}. - -The righthand operand of the @samp{~} and @samp{!~} operators may be -either a regexp constant (@code{/@dots{}/}) or an ordinary -expression. In the latter case, the value of the expression as a string is used as a -dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}; also -@pxref{Computed Regexps, ,Using Dynamic Regexps}). - -@cindex regexp as expression -In modern implementations of @command{awk}, a constant regular -expression in slashes by itself is also an expression. The regexp -@code{/@var{regexp}/} is an abbreviation for the following comparison expression: - -@example -$0 ~ /@var{regexp}/ -@end example - -One special place where @code{/foo/} is @emph{not} an abbreviation for -@samp{$0 ~ /foo/} is when it is the righthand operand of @samp{~} or -@samp{!~}. -@xref{Using Constant Regexps, ,Using Regular Expression Constants}, -where this is discussed in more detail. - -@node Boolean Ops, Conditional Exp, Typing and Comparison, Expressions -@section Boolean Expressions -@cindex expression, boolean -@cindex boolean expressions -@cindex operators, boolean -@cindex boolean operators -@cindex logical operators -@cindex operators, logical -@cindex short-circuit operators -@cindex operators, short-circuit -@cindex AND logical operator -@cindex OR logical operator -@cindex NOT logical operator -@cindex @code{&&} operator -@cindex @code{||} operator -@cindex @code{!} operator - -A @dfn{Boolean expression} is a combination of comparison expressions or -matching expressions, using the Boolean operators ``or'' -(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with -parentheses to control nesting. The truth value of the Boolean expression is -computed by combining the truth values of the component expressions. -Boolean expressions are also referred to as @dfn{logical expressions}. -The terms are equivalent. - -Boolean expressions can be used wherever comparison and matching -expressions can be used. They can be used in @code{if}, @code{while}, -@code{do}, and @code{for} statements -(@pxref{Statements, ,Control Statements in Actions}). -They have numeric values (one if true, zero if false), that come into play -if the result of the Boolean expression is stored in a variable or -used in arithmetic. - -In addition, every Boolean expression is also a valid pattern, so -you can use one as a pattern to control the execution of rules. -The Boolean operators are: - -@table @code -@item @var{boolean1} && @var{boolean2} -True if both @var{boolean1} and @var{boolean2} are true. For example, -the following statement prints the current input record if it contains -both @samp{2400} and @samp{foo}: - -@example -if ($0 ~ /2400/ && $0 ~ /foo/) print -@end example - -@cindex side effects -The subexpression @var{boolean2} is evaluated only if @var{boolean1} -is true. This can make a difference when @var{boolean2} contains -expressions that have side effects. In the case of @samp{$0 ~ /foo/ && -($2 == bar++)}, the variable @code{bar} is not incremented if there is -no substring @samp{foo} in the record. - -@item @var{boolean1} || @var{boolean2} -True if at least one of @var{boolean1} or @var{boolean2} is true. -For example, the following statement prints all records in the input -that contain @emph{either} @samp{2400} or -@samp{foo} or both: - -@example -if ($0 ~ /2400/ || $0 ~ /foo/) print -@end example - -The subexpression @var{boolean2} is evaluated only if @var{boolean1} -is false. This can make a difference when @var{boolean2} contains -expressions that have side effects. - -@item ! @var{boolean} -True if @var{boolean} is false. For example, -the following program prints @samp{no home!} in -the unusual event that the @env{HOME} environment -variable is not defined: - -@example -BEGIN @{ if (! ("HOME" in ENVIRON)) - print "no home!" @} -@end example - -(The @code{in} operator is described in -@ref{Reference to Elements, ,Referring to an Array Element}.) -@end table - -The @samp{&&} and @samp{||} operators are called @dfn{short-circuit} -operators because of the way they work. Evaluation of the full expression -is ``short-circuited'' if the result can be determined part way through -its evaluation. - -@cindex line continuation -Statements that use @samp{&&} or @samp{||} can be continued simply -by putting a newline after them. But you cannot put a newline in front -of either of these operators without using backslash continuation -(@pxref{Statements/Lines, ,@command{awk} Statements Versus Lines}). - -@cindex flag variables -The actual value of an expression using the @samp{!} operator is -either one or zero, depending upon the truth value of the expression it -is applied to. -The @samp{!} operator is often useful for changing the sense of a flag -variable from false to true and back again. For example, the following -program is one way to print lines in between special bracketing lines: - -@example -$1 == "START" @{ interested = ! interested; next @} -interested == 1 @{ print @} -$1 == "END" @{ interested = ! interested; next @} -@end example - -@noindent -The variable @code{interested}, as with all @command{awk} variables, starts -out initialized to zero, which is also false. When a line is seen whose -first field is @samp{START}, the value of @code{interested} is toggled -to true, using @samp{!}. The next rule prints lines as long as -@code{interested} is true. When a line is seen whose first field is -@samp{END}, @code{interested} is toggled back to false. - -@ignore -Scott Deifik points out that this program isn't robust against -bogus input data, but the point is to illustrate the use of `!', -so we'll leave well enough alone. -@end ignore - -@strong{Note:} The @code{next} statement is discussed in -@ref{Next Statement, ,The @code{next} Statement}. -@code{next} tells @command{awk} to skip the rest of the rules, get the -next record, and start processing the rules over again at the top. -The reason it's there is to avoid printing the bracketing -@samp{START} and @samp{END} lines. - -@node Conditional Exp, Function Calls, Boolean Ops, Expressions -@section Conditional Expressions -@cindex conditional expression -@cindex expression, conditional - -A @dfn{conditional expression} is a special kind of expression that has -three operands. It allows you to use one expression's value to select -one of two other expressions. -The conditional expression is the same as in the C language, -as shown here: - -@example -@var{selector} ? @var{if-true-exp} : @var{if-false-exp} -@end example - -@noindent -There are three subexpressions. The first, @var{selector}, is always -computed first. If it is ``true'' (not zero or not null), then -@var{if-true-exp} is computed next and its value becomes the value of -the whole expression. Otherwise, @var{if-false-exp} is computed next -and its value becomes the value of the whole expression. -For example, the following expression produces the absolute value of @code{x}: - -@example -x >= 0 ? x : -x -@end example - -@cindex side effects -Each time the conditional expression is computed, only one of -@var{if-true-exp} and @var{if-false-exp} is used; the other is ignored. -This is important when the expressions have side effects. For example, -this conditional expression examines element @code{i} of either array -@code{a} or array @code{b}, and increments @code{i}: - -@example -x == y ? a[i++] : b[i++] -@end example - -@noindent -This is guaranteed to increment @code{i} exactly once, because each time -only one of the two increment expressions is executed -and the other is not. -@xref{Arrays, ,Arrays in @command{awk}}, -for more information about arrays. - -@cindex differences between @command{gawk} and @command{awk} -@cindex line continuation -As a minor @command{gawk} extension, -a statement that uses @samp{?:} can be continued simply -by putting a newline after either character. -However, putting a newline in front -of either character does not work without using backslash continuation -(@pxref{Statements/Lines, ,@command{awk} Statements Versus Lines}). -If @option{--posix} is specified -(@pxref{Options, , Command-Line Options}), then this extension is disabled. - -@node Function Calls, Precedence, Conditional Exp, Expressions -@section Function Calls -@cindex function call -@cindex calling a function - -A @dfn{function} is a name for a particular calculation. -This enables you to -ask for it by name at any point in the program. For -example, the function @code{sqrt} computes the square root of a number. - -A fixed set of functions are @dfn{built-in}, which means they are -available in every @command{awk} program. The @code{sqrt} function is one -of these. @xref{Built-in, ,Built-in Functions}, for a list of built-in -functions and their descriptions. In addition, you can define -functions for use in your program. -@xref{User-defined, ,User-Defined Functions}, -for instructions on how to do this. - -@cindex arguments in function call -The way to use a function is with a @dfn{function call} expression, -which consists of the function name followed immediately by a list of -@dfn{arguments} in parentheses. The arguments are expressions that -provide the raw materials for the function's calculations. -When there is more than one argument, they are separated by commas. If -there are no arguments, just write @samp{()} after the function name. -The following examples show function calls with and without arguments: - -@example -sqrt(x^2 + y^2) @i{one argument} -atan2(y, x) @i{two arguments} -rand() @i{no arguments} -@end example - -@strong{Caution:} -Do not put any space between the function name and the open-parenthesis! -A user-defined function name looks just like the name of a -variable---a space would make the expression look like concatenation of -a variable with an expression inside parentheses. - -With built-in functions, space before the parenthesis is harmless, but -it is best not to get into the habit of using space to avoid mistakes -with user-defined functions. Each function expects a particular number -of arguments. For example, the @code{sqrt} function must be called with -a single argument: the number to take the square root of: - -@example -sqrt(@var{argument}) -@end example - -Some of the built-in functions have one or -more optional arguments. -If those arguments are not supplied, the functions -use a reasonable default value. -@xref{Built-in, ,Built-in Functions}, for full details. If arguments -are omitted in calls to user-defined functions, then those arguments are -treated as local variables and initialized to the empty string -(@pxref{User-defined, ,User-Defined Functions}). - -@cindex side effects -Like every other expression, the function call has a value, which is -computed by the function based on the arguments you give it. In this -example, the value of @samp{sqrt(@var{argument})} is the square root of -@var{argument}. A function can also have side effects, such as assigning -values to certain variables or doing I/O. -The following program reads numbers, one number per line, and prints the -square root of each one: - -@example -$ awk '@{ print "The square root of", $1, "is", sqrt($1) @}' -1 -@print{} The square root of 1 is 1 -3 -@print{} The square root of 3 is 1.73205 -5 -@print{} The square root of 5 is 2.23607 -@kbd{Ctrl-d} -@end example - -@node Precedence, , Function Calls, Expressions -@section Operator Precedence (How Operators Nest) -@cindex precedence -@cindex operator precedence - -@dfn{Operator precedence} determines how operators are grouped when -different operators appear close by in one expression. For example, -@samp{*} has higher precedence than @samp{+}; thus, @samp{a + b * c} -means to multiply @code{b} and @code{c}, and then add @code{a} to the -product (i.e., @samp{a + (b * c)}). - -The normal precedence of the operators can be overruled by using parentheses. -Think of the precedence rules as saying where the -parentheses are assumed to be. In -fact, it is wise to always use parentheses whenever there is an unusual -combination of operators, because other people who read the program may -not remember what the precedence is in this case. -Even experienced programmers occasionally forget the exact rules, -which leads to mistakes. -Explicit parentheses help prevent -any such mistakes. - -When operators of equal precedence are used together, the leftmost -operator groups first, except for the assignment, conditional, and -exponentiation operators, which group in the opposite order. -Thus, @samp{a - b + c} groups as @samp{(a - b) + c} and -@samp{a = b = c} groups as @samp{a = (b = c)}. - -The precedence of prefix unary operators does not matter as long as only -unary operators are involved, because there is only one way to interpret -them: innermost first. Thus, @samp{$++i} means @samp{$(++i)} and -@samp{++$x} means @samp{++($x)}. However, when another operator follows -the operand, then the precedence of the unary operators can matter. -@samp{$x^2} means @samp{($x)^2}, but @samp{-x^2} means -@samp{-(x^2)}, because @samp{-} has lower precedence than @samp{^}, -whereas @samp{$} has higher precedence. -This table presents @command{awk}'s operators, in order of highest -precedence to lowest: - -@page - -@cindex @code{$} field operator -@cindex @code{+} operator -@cindex @code{-} operator -@cindex @code{!} operator -@cindex @code{*} operator -@cindex @code{/} operator -@cindex @code{%} operator -@cindex @code{^} operator -@cindex @code{**} operator -@cindex @code{++} operator -@cindex @code{--} operator -@cindex @code{<} operator -@cindex @code{<=} operator -@cindex @code{==} operator -@cindex @code{!=} operator -@cindex @code{>} operator -@cindex @code{>=} operator -@cindex @code{>>} I/O operator -@cindex @code{|} I/O operator -@cindex @code{|&} I/O operator -@cindex @code{~} operator -@cindex @code{!~} operator -@cindex @code{in} operator -@cindex @code{&&} operator -@cindex @code{||} operator -@cindex @code{?:} operator -@cindex @code{+=} operator -@cindex @code{-=} operator -@cindex @code{*=} operator -@cindex @code{/=} operator -@cindex @code{%=} operator -@cindex @code{^=} operator -@cindex @code{**=} operator -@c use @code in the items, looks better in TeX w/o all the quotes -@table @code -@item (@dots{}) -Grouping. - -@item $ -Field. - -@item ++ -- -Increment, decrement. - -@item ^ ** -Exponentiation. These operators group right-to-left. - -@item + - ! -Unary plus, minus, logical ``not.'' - -@item * / % -Multiplication, division, modulus. - -@item + - -Addition, subtraction. - -@item @r{String Concatenation} -No special symbol is used to indicate concatenation. -The operands are simply written side by side -(@pxref{Concatenation, ,String Concatenation}). - -@item < <= == != -@itemx > >= >> | |& -Relational and redirection. -The relational operators and the redirections have the same precedence -level. Characters such as @samp{>} serve both as relationals and as -redirections; the context distinguishes between the two meanings. - -Note that the I/O redirection operators in @code{print} and @code{printf} -statements belong to the statement level, not to expressions. The -redirection does not produce an expression that could be the operand of -another operator. As a result, it does not make sense to use a -redirection operator near another operator of lower precedence without -parentheses. Such combinations (for example @samp{print foo > a ? b : c}), -result in syntax errors. -The correct way to write this statement is @samp{print foo > (a ? b : c)}. - -@item ~ !~ -Matching, non-matching. - -@item in -Array membership. - -@item && -Logical ``and''. - -@item || -Logical ``or''. - -@item ?: -Conditional. This operator groups right-to-left. - -@item = += -= *= -@itemx /= %= ^= **= -Assignment. These operators group right-to-left. -@end table - -@cindex portability issues -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@strong{Note:} -The @samp{|&}, @samp{**}, and @samp{**=} operators are not specified by POSIX. -For maximum portability, do not use them. - -@node Patterns and Actions, Arrays, Expressions, Top -@chapter Patterns, Actions, and Variables -@cindex pattern, definition of - -As you have already seen, each @command{awk} statement consists of -a pattern with an associated action. This @value{CHAPTER} describes how -you build patterns and actions, what kinds of things you can do within -actions, and @command{awk}'s built-in variables. - -The pattern-action rules and the statements available for use -within actions form the core of @command{awk} programming. -In a sense, everything covered -up to here has been the foundation -that programs are built on top of. Now it's time to start -building something useful. - -@menu -* Pattern Overview:: What goes into a pattern. -* Using Shell Variables:: How to use shell variables with @command{awk}. -* Action Overview:: What goes into an action. -* Statements:: Describes the various control statements in - detail. -* Built-in Variables:: Summarizes the built-in variables. -@end menu - -@node Pattern Overview, Using Shell Variables, Patterns and Actions, Patterns and Actions -@section Pattern Elements - -@menu -* Regexp Patterns:: Using regexps as patterns. -* Expression Patterns:: Any expression can be used as a pattern. -* Ranges:: Pairs of patterns specify record ranges. -* BEGIN/END:: Specifying initialization and cleanup rules. -* Empty:: The empty pattern, which matches every record. -@end menu - -@cindex patterns, types of -Patterns in @command{awk} control the execution of rules---a rule is -executed when its pattern matches the current input record. -The following is a summary of the types of patterns in @command{awk}: - -@table @code -@item /@var{regular expression}/ -A regular expression. It matches when the text of the -input record fits the regular expression. -(@xref{Regexp, ,Regular Expressions}.) - -@item @var{expression} -A single expression. It matches when its value -is nonzero (if a number) or non-null (if a string). -(@xref{Expression Patterns, ,Expressions as Patterns}.) - -@item @var{pat1}, @var{pat2} -A pair of patterns separated by a comma, specifying a range of records. -The range includes both the initial record that matches @var{pat1} and -the final record that matches @var{pat2}. -(@xref{Ranges, ,Specifying Record Ranges with Patterns}.) - -@item BEGIN -@itemx END -Special patterns for you to supply startup or cleanup actions for your -@command{awk} program. -(@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}.) - -@item @var{empty} -The empty pattern matches every input record. -(@xref{Empty, ,The Empty Pattern}.) -@end table - -@node Regexp Patterns, Expression Patterns, Pattern Overview, Pattern Overview -@subsection Regular Expressions as Patterns - -Regular expressions are one of the first kinds of patterns presented -in this book. -This kind of pattern is simply a regexp constant in the pattern part of -a rule. Its meaning is @samp{$0 ~ /@var{pattern}/}. -The pattern matches when the input record matches the regexp. -For example: - -@example -/foo|bar|baz/ @{ buzzwords++ @} -END @{ print buzzwords, "buzzwords seen" @} -@end example - -@node Expression Patterns, Ranges, Regexp Patterns, Pattern Overview -@subsection Expressions as Patterns - -Any @command{awk} expression is valid as an @command{awk} pattern. -The pattern matches if the expression's value is nonzero (if a -number) or non-null (if a string). -The expression is reevaluated each time the rule is tested against a new -input record. If the expression uses fields such as @code{$1}, the -value depends directly on the new input record's text; otherwise it -depends on only what has happened so far in the execution of the -@command{awk} program. - -Comparison expressions, using the comparison operators described in -@ref{Typing and Comparison, ,Variable Typing and Comparison Expressions}, -are a very common kind of pattern. -Regexp matching and non-matching are also very common expressions. -The left operand of the @samp{~} and @samp{!~} operators is a string. -The right operand is either a constant regular expression enclosed in -slashes (@code{/@var{regexp}/}), or any expression whose string value -is used as a dynamic regular expression -(@pxref{Computed Regexps, , Using Dynamic Regexps}). -The following example prints the second field of each input record -whose first field is precisely @samp{foo}: - -@example -$ awk '$1 == "foo" @{ print $2 @}' BBS-list -@end example - -@noindent -(There is no output, because there is no BBS site with the exact name @samp{foo}.) -Contrast this with the following regular expression match, which -accepts any record with a first field that contains @samp{foo}: - -@example -$ awk '$1 ~ /foo/ @{ print $2 @}' BBS-list -@print{} 555-1234 -@print{} 555-6699 -@print{} 555-6480 -@print{} 555-2127 -@end example - -A regexp constant as a pattern is also a special case of an expression -pattern. The expression @code{/foo/} has the value one if @samp{foo} -appears in the current input record. Thus, as a pattern, @code{/foo/} -matches any record containing @samp{foo}. - -Boolean expressions are also commonly used as patterns. -Whether the pattern -matches an input record depends on whether its subexpressions match. -For example, the following command prints all the records in -@file{BBS-list} that contain both @samp{2400} and @samp{foo}: - -@example -$ awk '/2400/ && /foo/' BBS-list -@print{} fooey 555-1234 2400/1200/300 B -@end example - -The following command prints all records in -@file{BBS-list} that contain @emph{either} @samp{2400} or @samp{foo} -(or both, of course): - -@example -$ awk '/2400/ || /foo/' BBS-list -@print{} alpo-net 555-3412 2400/1200/300 A -@print{} bites 555-1675 2400/1200/300 A -@print{} fooey 555-1234 2400/1200/300 B -@print{} foot 555-6699 1200/300 B -@print{} macfoo 555-6480 1200/300 A -@print{} sdace 555-3430 2400/1200/300 A -@print{} sabafoo 555-2127 1200/300 C -@end example - -The following command prints all records in -@file{BBS-list} that do @emph{not} contain the string @samp{foo}: - -@example -$ awk '! /foo/' BBS-list -@print{} aardvark 555-5553 1200/300 B -@print{} alpo-net 555-3412 2400/1200/300 A -@print{} barfly 555-7685 1200/300 A -@print{} bites 555-1675 2400/1200/300 A -@print{} camelot 555-0542 300 C -@print{} core 555-2912 1200/300 C -@print{} sdace 555-3430 2400/1200/300 A -@end example - -The subexpressions of a Boolean operator in a pattern can be constant regular -expressions, comparisons, or any other @command{awk} expressions. Range -patterns are not expressions, so they cannot appear inside Boolean -patterns. Likewise, the special patterns @code{BEGIN} and @code{END}, -which never match any input record, are not expressions and cannot -appear inside Boolean patterns. - -@node Ranges, BEGIN/END, Expression Patterns, Pattern Overview -@subsection Specifying Record Ranges with Patterns - -@cindex range pattern -@cindex pattern, range -@cindex matching ranges of lines -A @dfn{range pattern} is made of two patterns separated by a comma, in -the form @samp{@var{begpat}, @var{endpat}}. It is used to match ranges of -consecutive input records. The first pattern, @var{begpat}, controls -where the range begins, while @var{endpat} controls where -the pattern ends. For example, the following: - -@example -awk '$1 == "on", $1 == "off"' myfile -@end example - -@noindent -prints every record in @file{myfile} between @samp{on}/@samp{off} pairs, inclusive. - -A range pattern starts out by matching @var{begpat} against every -input record. When a record matches @var{begpat}, the range pattern is -@dfn{turned on} and the range pattern matches this record as well. As long as -the range pattern stays turned on, it automatically matches every input -record read. The range pattern also matches @var{endpat} against every -input record; when this succeeds, the range pattern is turned off again -for the following record. Then the range pattern goes back to checking -@var{begpat} against each record. - -The record that turns on the range pattern and the one that turns it -off both match the range pattern. If you don't want to operate on -these records, you can write @code{if} statements in the rule's action -to distinguish them from the records you are interested in. - -It is possible for a pattern to be turned on and off by the same -record. If the record satisfies both conditions, then the action is -executed for just that record. -For example, suppose there is text between two identical markers (say -the @samp{%} symbol), each on its own line, that should be ignored. -A first attempt would be to -combine a range pattern that describes the delimited text with the -@code{next} statement -(not discussed yet, @pxref{Next Statement, , The @code{next} Statement}). -This causes @command{awk} to skip any further processing of the current -record and start over again with the next input record. Such a program -looks like this: - -@example -/^%$/,/^%$/ @{ next @} - @{ print @} -@end example - -@noindent -@cindex skipping lines between markers -@cindex flag variables -This program fails because the range pattern is both turned on and turned off -by the first line, which just has a @samp{%} on it. To accomplish this task, -write the program in the following manner, using a flag: - -@cindex @code{!} operator -@example -/^%$/ @{ skip = ! skip; next @} -skip == 1 @{ next @} # skip lines with `skip' set -@end example - -In a range pattern, the comma (@samp{,}) has the lowest precedence of -all the operators (i.e., it is evaluated last). Thus, the following -program attempts to combine a range pattern with another simpler test: - -@example -echo Yes | awk '/1/,/2/ || /Yes/' -@end example - -The intent of this program is @samp{(/1/,/2/) || /Yes/}. -However, @command{awk} interprets this as @samp{/1/, (/2/ || /Yes/)}. -This cannot be changed or worked around; range patterns do not combine -with other patterns: - -@example -$ echo yes | gawk '(/1/,/2/) || /Yes/' -@error{} gawk: cmd. line:1: (/1/,/2/) || /Yes/ -@error{} gawk: cmd. line:1: ^ parse error -@error{} gawk: cmd. line:2: (/1/,/2/) || /Yes/ -@error{} gawk: cmd. line:2: ^ unexpected newline -@end example - -@node BEGIN/END, Empty, Ranges, Pattern Overview -@subsection The @code{BEGIN} and @code{END} Special Patterns - -@cindex @code{BEGIN} special pattern -@cindex pattern, @code{BEGIN} -@cindex @code{END} special pattern -@cindex pattern, @code{END} -@cindex blocks, @code{BEGIN} and @code{END} -All the patterns described so far are for matching input records. -The @code{BEGIN} and @code{END} special patterns are different. -They supply startup and cleanup actions for @command{awk} programs. -@code{BEGIN} and @code{END} rules must have actions; there is no default -action for these rules because there is no current record when they run. -@code{BEGIN} and @code{END} rules are often referred to as -``@code{BEGIN} and @code{END} blocks'' by long-time @command{awk} -programmers. - -@menu -* Using BEGIN/END:: How and why to use BEGIN/END rules. -* I/O And BEGIN/END:: I/O issues in BEGIN/END rules. -@end menu - -@node Using BEGIN/END, I/O And BEGIN/END, BEGIN/END, BEGIN/END -@subsubsection Startup and Cleanup Actions - -A @code{BEGIN} rule is executed once only, before the first input record -is read. Likewise, an @code{END} rule is executed once only, after all the -input is read. For example: - -@example -$ awk ' -> BEGIN @{ print "Analysis of \"foo\"" @} -> /foo/ @{ ++n @} -> END @{ print "\"foo\" appears", n, "times." @}' BBS-list -@print{} Analysis of "foo" -@print{} "foo" appears 4 times. -@end example - -This program finds the number of records in the input file @file{BBS-list} -that contain the string @samp{foo}. The @code{BEGIN} rule prints a title -for the report. There is no need to use the @code{BEGIN} rule to -initialize the counter @code{n} to zero, since @command{awk} does this -automatically (@pxref{Variables}). -The second rule increments the variable @code{n} every time a -record containing the pattern @samp{foo} is read. The @code{END} rule -prints the value of @code{n} at the end of the run. - -The special patterns @code{BEGIN} and @code{END} cannot be used in ranges -or with Boolean operators (indeed, they cannot be used with any operators). -An @command{awk} program may have multiple @code{BEGIN} and/or @code{END} -rules. They are executed in the order in which they appear: all the @code{BEGIN} -rules at startup and all the @code{END} rules at termination. -@code{BEGIN} and @code{END} rules may be intermixed with other rules. -This feature was added in the 1987 version of @command{awk} and is included -in the POSIX standard. -The original (1978) version of @command{awk} -required the @code{BEGIN} rule to be placed at the beginning of the -program, the @code{END} rule to be placed at the end, and only allowed one of -each. -This is no longer required, but it is a good idea to follow this template -in terms of program organization and readability. - -Multiple @code{BEGIN} and @code{END} rules are useful for writing -library functions, because each library file can have its own @code{BEGIN} and/or -@code{END} rule to do its own initialization and/or cleanup. -The order in which library functions are named on the command line -controls the order in which their @code{BEGIN} and @code{END} rules are -executed. Therefore you have to be careful when writing such rules in -library files so that the order in which they are executed doesn't matter. -@xref{Options, ,Command-Line Options}, for more information on -using library functions. -@xref{Library Functions, ,A Library of @command{awk} Functions}, -for a number of useful library functions. - -If an @command{awk} program only has a @code{BEGIN} rule and no -other rules, then the program exits after the @code{BEGIN} rule is -run.@footnote{The original version of @command{awk} used to keep -reading and ignoring input until end of file was seen.} However, if an -@code{END} rule exists, then the input is read, even if there are -no other rules in the program. This is necessary in case the @code{END} -rule checks the @code{FNR} and @code{NR} variables. - -@node I/O And BEGIN/END, , Using BEGIN/END, BEGIN/END -@subsubsection Input/Output from @code{BEGIN} and @code{END} Rules - -@cindex I/O, from @code{BEGIN} and @code{END} -There are several (sometimes subtle) points to remember when doing I/O -from a @code{BEGIN} or @code{END} rule. -The first has to do with the value of @code{$0} in a @code{BEGIN} -rule. Because @code{BEGIN} rules are executed before any input is read, -there simply is no input record, and therefore no fields, when -executing @code{BEGIN} rules. References to @code{$0} and the fields -yield a null string or zero, depending upon the context. One way -to give @code{$0} a real value is to execute a @code{getline} command -without a variable (@pxref{Getline, ,Explicit Input with @code{getline}}). -Another way is to simply assign a value to @code{$0}. - -@cindex differences between @command{gawk} and @command{awk} -The second point is similar to the first but from the other direction. -Traditionally, due largely to implementation issues, @code{$0} and -@code{NF} were @emph{undefined} inside an @code{END} rule. -The POSIX standard specifies that @code{NF} is available in an @code{END} -rule. It contains the number of fields from the last input record. -Most probably due to an oversight, the standard does not say that @code{$0} -is also preserved, although logically one would think that it should be. -In fact, @command{gawk} does preserve the value of @code{$0} for use in -@code{END} rules. Be aware, however, that Unix @command{awk}, and possibly -other implementations, do not. - -The third point follows from the first two. The meaning of @samp{print} -inside a @code{BEGIN} or @code{END} rule is the same as always: -@samp{print $0}. If @code{$0} is the null string, then this prints an -empty line. Many long time @command{awk} programmers use an unadorned -@samp{print} in @code{BEGIN} and @code{END} rules, to mean @samp{@w{print ""}}, -relying on @code{$0} being null. Although one might generally get away with -this in @code{BEGIN} rules, it is a very bad idea in @code{END} rules, -at least in @command{gawk}. It is also poor style, since if an empty -line is needed in the output, the program should print one explicitly. - -Finally, the @code{next} and @code{nextfile} statements are not allowed -in a @code{BEGIN} rule, because the implicit -read-a-record-and-match-against-the-rules loop has not started yet. Similarly, those statements -are not valid in an @code{END} rule, since all the input has been read. -(@xref{Next Statement, ,The @code{next} Statement}, and see -@ref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}.) - -@node Empty, , BEGIN/END, Pattern Overview -@subsection The Empty Pattern - -@cindex empty pattern -@cindex pattern, empty -An empty (i.e., non-existent) pattern is considered to match @emph{every} -input record. For example, the program: - -@example -awk '@{ print $1 @}' BBS-list -@end example - -@noindent -prints the first field of every record. - -@node Using Shell Variables, Action Overview, Pattern Overview, Patterns and Actions -@section Using Shell Variables in Programs -@cindex shell varibles, using in @command{awk} programs -@cindex using shell variables in @command{awk} programs -@cindex shell and @command{awk} interaction - -@command{awk} programs are often used as components in larger -programs written in shell. -For example, it is very common to use a shell variable to -hold a pattern that the @command{awk} program searches for. -There are two ways to get the value of the shell variable -into the body of the @command{awk} program. - -The most common method is to use shell quoting to substitute -the variable's value into the program inside the script. -For example, in the following program: - -@example -echo -n "Enter search pattern: " -read pattern -awk "/$pattern/ "'@{ nmatches++ @} - END @{ print nmatches, "found" @}' /path/to/data -@end example - -@noindent -the @command{awk} program consists of two pieces of quoted text -that are concatenated together to form the program. -The first part is double-quoted, which allows substitution of -the @code{pattern} variable inside the quotes. -The second part is single-quoted. - -Variable substitution via quoting works, but can be potentially -messy. It requires a good understanding of the shell's quoting rules -(@pxref{Quoting, ,Shell Quoting Issues}), -and it's often difficult to correctly -match up the quotes when reading the program. - -A better method is to use @command{awk}'s variable assignment feature -(@pxref{Assignment Options, ,Assigning Variables on the Command Line}) -to assign the shell variable's value to an @command{awk} variable's -value. Then use dynamic regexps to match the pattern -(@pxref{Computed Regexps, ,Using Dynamic Regexps}). -The following shows how to redo the -previous example using this technique: - -@example -echo -n "Enter search pattern: " -read pattern -awk -v pat="$pattern" '$0 ~ pat @{ nmatches++ @} - END @{ print nmatches, "found" @}' /path/to/data -@end example - -@noindent -Now, the @command{awk} program is just one single-quoted string. -The assignment @samp{-v pat="$pattern"} still requires double quotes, -in case there is whitespace in the value of @code{$pattern}. -The @command{awk} variable @code{pat} could be named @code{pattern} -too, but that would be more confusing. Using a variable also -provides more flexibility, since the variable can be used anywhere inside -the program---for printing, as an array subscript, or for any other -use---without requiring the quoting tricks at every point in the program. - -@node Action Overview, Statements, Using Shell Variables, Patterns and Actions -@section Actions -@cindex action, definition of -@cindex curly braces -@cindex action, curly braces -@cindex action, separating statements - -An @command{awk} program or script consists of a series of -rules and function definitions interspersed. (Functions are -described later. @xref{User-defined, ,User-Defined Functions}.) -A rule contains a pattern and an action, either of which (but not -both) may be omitted. The purpose of the @dfn{action} is to tell -@command{awk} what to do once a match for the pattern is found. Thus, -in outline, an @command{awk} program generally looks like this: - -@example -@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]} -@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]} -@dots{} -function @var{name}(@var{args}) @{ @dots{} @} -@dots{} -@end example - -An action consists of one or more @command{awk} @dfn{statements}, enclosed -in curly braces (@samp{@{} and @samp{@}}). Each statement specifies one -thing to do. The statements are separated by newlines or semicolons. -The curly braces around an action must be used even if the action -contains only one statement, or if it contains no statements at -all. However, if you omit the action entirely, omit the curly braces as -well. An omitted action is equivalent to @samp{@{ print $0 @}}: - -@example -/foo/ @{ @} @i{match @code{foo}, do nothing --- empty action} -/foo/ @i{match @code{foo}, print the record --- omitted action} -@end example - -The following types of statements are supported in @command{awk}: - -@itemize @bullet -@cindex side effects -@item -Expressions, which can call functions or assign values to variables -(@pxref{Expressions}). Executing -this kind of statement simply computes the value of the expression. -This is useful when the expression has side effects -(@pxref{Assignment Ops, ,Assignment Expressions}). - -@item -Control statements, which specify the control flow of @command{awk} -programs. The @command{awk} language gives you C-like constructs -(@code{if}, @code{for}, @code{while}, and @code{do}) as well as a few -special ones (@pxref{Statements, ,Control Statements in Actions}). - -@item -Compound statements, which consist of one or more statements enclosed in -curly braces. A compound statement is used in order to put several -statements together in the body of an @code{if}, @code{while}, @code{do}, -or @code{for} statement. - -@item -Input statements using the @code{getline} command -(@pxref{Getline, ,Explicit Input with @code{getline}}), the @code{next} -statement (@pxref{Next Statement, ,The @code{next} Statement}), -and the @code{nextfile} statement -(@pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}). - -@item -Output statements, such as @code{print} and @code{printf}. -@xref{Printing, ,Printing Output}. - -@item -Deletion statements for deleting array elements. -@xref{Delete, ,The @code{delete} Statement}. -@end itemize - -@node Statements, Built-in Variables, Action Overview, Patterns and Actions -@section Control Statements in Actions -@cindex control statement - -@dfn{Control statements}, such as @code{if}, @code{while}, and so on, -control the flow of execution in @command{awk} programs. Most of the -control statements in @command{awk} are patterned on similar statements in C. - -@cindex compound statement -@cindex statement, compound -All the control statements start with special keywords, such as @code{if} -and @code{while}, to distinguish them from simple expressions. -Many control statements contain other statements. For example, the -@code{if} statement contains another statement that may or may not be -executed. The contained statement is called the @dfn{body}. -To include more than one statement in the body, group them into a -single @dfn{compound statement} with curly braces, separating them with -newlines or semicolons. - -@menu -* If Statement:: Conditionally execute some @command{awk} - statements. -* While Statement:: Loop until some condition is satisfied. -* Do Statement:: Do specified action while looping until some - condition is satisfied. -* For Statement:: Another looping statement, that provides - initialization and increment clauses. -* Break Statement:: Immediately exit the innermost enclosing loop. -* Continue Statement:: Skip to the end of the innermost enclosing - loop. -* Next Statement:: Stop processing the current input record. -* Nextfile Statement:: Stop processing the current file. -* Exit Statement:: Stop execution of @command{awk}. -@end menu - -@node If Statement, While Statement, Statements, Statements -@subsection The @code{if}-@code{else} Statement - -@cindex @code{if}-@code{else} statement -The @code{if}-@code{else} statement is @command{awk}'s decision-making -statement. It looks like this: - -@example -if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]} -@end example - -@noindent -The @var{condition} is an expression that controls what the rest of the -statement does. If the @var{condition} is true, @var{then-body} is -executed; otherwise, @var{else-body} is executed. -The @code{else} part of the statement is -optional. The condition is considered false if its value is zero or -the null string; otherwise the condition is true. -Refer to the following: - -@example -if (x % 2 == 0) - print "x is even" -else - print "x is odd" -@end example - -In this example, if the expression @samp{x % 2 == 0} is true (that is, -if the value of @code{x} is evenly divisible by two), then the first -@code{print} statement is executed; otherwise the second @code{print} -statement is executed. -If the @code{else} keyword appears on the same line as @var{then-body} and -@var{then-body} is not a compound statement (i.e., not surrounded by -curly braces), then a semicolon must separate @var{then-body} from -the @code{else}. -To illustrate this, the previous example can be rewritten as: - -@example -if (x % 2 == 0) print "x is even"; else - print "x is odd" -@end example - -@noindent -If the @samp{;} is left out, @command{awk} can't interpret the statement and -it produces a syntax error. Don't actually write programs this way, -because a human reader might fail to see the @code{else} if it is not -the first thing on its line. - -@node While Statement, Do Statement, If Statement, Statements -@subsection The @code{while} Statement -@cindex @code{while} statement -@cindex loop -@cindex body of a loop - -In programming, a @dfn{loop} is a part of a program that can -be executed two or more times in succession. -The @code{while} statement is the simplest looping statement in -@command{awk}. It repeatedly executes a statement as long as a condition is -true. For example: - -@example -while (@var{condition}) - @var{body} -@end example - -@noindent -@var{body} is a statement called the @dfn{body} of the loop, -and @var{condition} is an expression that controls how long the loop -keeps running. -The first thing the @code{while} statement does is test the @var{condition}. -If the @var{condition} is true, it executes the statement @var{body}. -@ifinfo -(The @var{condition} is true when the value -is not zero and not a null string.) -@end ifinfo -After @var{body} has been executed, -@var{condition} is tested again, and if it is still true, @var{body} is -executed again. This process repeats until the @var{condition} is no longer -true. If the @var{condition} is initially false, the body of the loop is -never executed and @command{awk} continues with the statement following -the loop. -This example prints the first three fields of each record, one per line: - -@example -awk '@{ i = 1 - while (i <= 3) @{ - print $i - i++ - @} -@}' inventory-shipped -@end example - -@noindent -The body of this loop is a compound statement enclosed in braces, -containing two statements. -The loop works in the following manner: first, the value of @code{i} is set to one. -Then, the @code{while} statement tests whether @code{i} is less than or equal to -three. This is true when @code{i} equals one, so the @code{i}-th -field is printed. Then the @samp{i++} increments the value of @code{i} -and the loop repeats. The loop terminates when @code{i} reaches four. - -A newline is not required between the condition and the -body; however using one makes the program clearer unless the body is a -compound statement or else is very simple. The newline after the open-brace -that begins the compound statement is not required either, but the -program is harder to read without it. - -@node Do Statement, For Statement, While Statement, Statements -@subsection The @code{do}-@code{while} Statement -@cindex @code{do}-@code{while} statement - -The @code{do} loop is a variation of the @code{while} looping statement. -The @code{do} loop executes the @var{body} once and then repeats the -@var{body} as long as the @var{condition} is true. It looks like this: - -@example -do - @var{body} -while (@var{condition}) -@end example - -Even if the @var{condition} is false at the start, the @var{body} is -executed at least once (and only once, unless executing @var{body} -makes @var{condition} true). Contrast this with the corresponding -@code{while} statement: - -@example -while (@var{condition}) - @var{body} -@end example - -@noindent -This statement does not execute @var{body} even once if the @var{condition} -is false to begin with. -The following is an example of a @code{do} statement: - -@example -@{ i = 1 - do @{ - print $0 - i++ - @} while (i <= 10) -@} -@end example - -@noindent -This program prints each input record ten times. However, it isn't a very -realistic example, since in this case an ordinary @code{while} would do -just as well. This situation reflects actual experience; only -occasionally is there a real use for a @code{do} statement. - -@node For Statement, Break Statement, Do Statement, Statements -@subsection The @code{for} Statement -@cindex @code{for} statement - -The @code{for} statement makes it more convenient to count iterations of a -loop. The general form of the @code{for} statement looks like this: - -@example -for (@var{initialization}; @var{condition}; @var{increment}) - @var{body} -@end example - -@noindent -The @var{initialization}, @var{condition}, and @var{increment} parts are -arbitrary @command{awk} expressions, and @var{body} stands for any -@command{awk} statement. - -The @code{for} statement starts by executing @var{initialization}. -Then, as long -as the @var{condition} is true, it repeatedly executes @var{body} and then -@var{increment}. Typically, @var{initialization} sets a variable to -either zero or one, @var{increment} adds one to it, and @var{condition} -compares it against the desired number of iterations. -For example: - -@example -awk '@{ for (i = 1; i <= 3; i++) - print $i -@}' inventory-shipped -@end example - -@noindent -This prints the first three fields of each input record, with one field per -line. - -It isn't possible to -set more than one variable in the -@var{initialization} part without using a multiple assignment statement -such as @samp{x = y = 0}. This makes sense only if all the initial values -are equal. (But it is possible to initialize additional variables by writing -their assignments as separate statements preceding the @code{for} loop.) - -@cindex comma operator, not supported -The same is true of the @var{increment} part. Incrementing additional -variables requires separate statements at the end of the loop. -The C compound expression, using C's comma operator, is useful in -this context but it is not supported in @command{awk}. - -Most often, @var{increment} is an increment expression, as in the previous -example. But this is not required; it can be any expression -whatsoever. For example, the following statement prints all the powers of two -between 1 and 100: - -@example -for (i = 1; i <= 100; i *= 2) - print i -@end example - -If there is nothing to be done, any of the three expressions in the -parentheses following the @code{for} keyword may be omitted. Thus, -@w{@samp{for (; x > 0;)}} is equivalent to @w{@samp{while (x > 0)}}. If the -@var{condition} is omitted, it is treated as true, effectively -yielding an @dfn{infinite loop} (i.e., a loop that never terminates). - -In most cases, a @code{for} loop is an abbreviation for a @code{while} -loop, as shown here: - -@example -@var{initialization} -while (@var{condition}) @{ - @var{body} - @var{increment} -@} -@end example - -@noindent -The only exception is when the @code{continue} statement -(@pxref{Continue Statement, ,The @code{continue} Statement}) is used -inside the loop. Changing a @code{for} statement to a @code{while} -statement in this way can change the effect of the @code{continue} -statement inside the loop. - -The @command{awk} language has a @code{for} statement in addition to a -@code{while} statement because a @code{for} loop is often both less work to -type and more natural to think of. Counting the number of iterations is -very common in loops. It can be easier to think of this counting as part -of looping rather than as something to do inside the loop. - -@ifinfo -@cindex @code{in} operator -There is an alternate version of the @code{for} loop, for iterating over -all the indices of an array: - -@example -for (i in array) - @var{do something with} array[i] -@end example - -@noindent -@xref{Scanning an Array, ,Scanning All Elements of an Array}, -for more information on this version of the @code{for} loop. -@end ifinfo - -@node Break Statement, Continue Statement, For Statement, Statements -@subsection The @code{break} Statement -@cindex @code{break} statement -@cindex loops, exiting - -The @code{break} statement jumps out of the innermost @code{for}, -@code{while}, or @code{do} loop that encloses it. The following example -finds the smallest divisor of any integer, and also identifies prime -numbers: - -@example -# find smallest divisor of num -@{ - num = $1 - for (div = 2; div*div <= num; div++) - if (num % div == 0) - break - if (num % div == 0) - printf "Smallest divisor of %d is %d\n", num, div - else - printf "%d is prime\n", num -@} -@end example - -When the remainder is zero in the first @code{if} statement, @command{awk} -immediately @dfn{breaks out} of the containing @code{for} loop. This means -that @command{awk} proceeds immediately to the statement following the loop -and continues processing. (This is very different from the @code{exit} -statement, which stops the entire @command{awk} program. -@xref{Exit Statement, ,The @code{exit} Statement}.) - -Th following program illustrates how the @var{condition} of a @code{for} -or @code{while} statement could be replaced with a @code{break} inside -an @code{if}: - -@example -# find smallest divisor of num -@{ - num = $1 - for (div = 2; ; div++) @{ - if (num % div == 0) @{ - printf "Smallest divisor of %d is %d\n", num, div - break - @} - if (div*div > num) @{ - printf "%d is prime\n", num - break - @} - @} -@} -@end example - -@cindex @code{break}, outside of loops -@cindex historical features -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@cindex dark corner -The @code{break} statement has no meaning when -used outside the body of a loop. However, although it was never documented, -historical implementations of @command{awk} treated the @code{break} -statement outside of a loop as if it were a @code{next} statement -(@pxref{Next Statement, ,The @code{next} Statement}). -Recent versions of Unix @command{awk} no longer allow this usage. -@command{gawk} supports this use of @code{break} only -if @option{--traditional} -has been specified on the command line -(@pxref{Options, ,Command-Line Options}). -Otherwise, it is treated as an error, since the POSIX standard -specifies that @code{break} should only be used inside the body of a -loop. -@value{DARKCORNER} - -@node Continue Statement, Next Statement, Break Statement, Statements -@subsection The @code{continue} Statement - -@cindex @code{continue} statement -As with @code{break}, the @code{continue} statement is used only inside -@code{for}, @code{while}, and @code{do} loops. It skips -over the rest of the loop body, causing the next cycle around the loop -to begin immediately. Contrast this with @code{break}, which jumps out -of the loop altogether. - -The @code{continue} statement in a @code{for} loop directs @command{awk} to -skip the rest of the body of the loop and resume execution with the -increment-expression of the @code{for} statement. The following program -illustrates this fact: - -@example -BEGIN @{ - for (x = 0; x <= 20; x++) @{ - if (x == 5) - continue - printf "%d ", x - @} - print "" -@} -@end example - -@noindent -This program prints all the numbers from 0 to 20---except for five, for -which the @code{printf} is skipped. Because the increment @samp{x++} -is not skipped, @code{x} does not remain stuck at five. Contrast the -@code{for} loop from the previous example with the following @code{while} loop: - -@example -BEGIN @{ - x = 0 - while (x <= 20) @{ - if (x == 5) - continue - printf "%d ", x - x++ - @} - print "" -@} -@end example - -@noindent -This program loops forever once @code{x} reaches five. - -@cindex @code{continue}, outside of loops -@cindex historical features -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@cindex dark corner -The @code{continue} statement has no meaning when used outside the body of -a loop. Historical versions of @command{awk} treated a @code{continue} -statement outside a loop the same way they treated a @code{break} -statement outside a loop: as if it were a @code{next} -statement -(@pxref{Next Statement, ,The @code{next} Statement}). -Recent versions of Unix @command{awk} no longer work this way, and -@command{gawk} allows it only if @option{--traditional} is specified on -the command line (@pxref{Options, ,Command-Line Options}). Just like the -@code{break} statement, the POSIX standard specifies that @code{continue} -should only be used inside the body of a loop. -@value{DARKCORNER} - -@node Next Statement, Nextfile Statement, Continue Statement, Statements -@subsection The @code{next} Statement -@cindex @code{next} statement - -The @code{next} statement forces @command{awk} to immediately stop processing -the current record and go on to the next record. This means that no -further rules are executed for the current record, and the rest of the -current rule's action isn't executed. - -Contrast this with the effect of the @code{getline} function -(@pxref{Getline, ,Explicit Input with @code{getline}}). That also causes -@command{awk} to read the next record immediately, but it does not alter the -flow of control in any way (i.e., the rest of the current action executes -with a new input record). - -At the highest level, @command{awk} program execution is a loop that reads -an input record and then tests each rule's pattern against it. If you -think of this loop as a @code{for} statement whose body contains the -rules, then the @code{next} statement is analogous to a @code{continue} -statement. It skips to the end of the body of this implicit loop and -executes the increment (which reads another record). - -For example, suppose an @command{awk} program works only on records -with four fields, and it shouldn't fail when given bad input. To avoid -complicating the rest of the program, write a ``weed out'' rule near -the beginning, in the following manner: - -@example -NF != 4 @{ - err = sprintf("%s:%d: skipped: NF != 4\n", FILENAME, FNR) - print err > "/dev/stderr" - next -@} -@end example - -@noindent -Because of the @code{next} statement, -the program's subsequent rules won't see the bad record. The error -message is redirected to the standard error output stream, as error -messages should be. -@xref{Special Files, ,Special @value{FFN}s in @command{gawk}}. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@cindex @code{next}, inside a user-defined function -According to the POSIX standard, the behavior is undefined if -the @code{next} statement is used in a @code{BEGIN} or @code{END} rule. -@command{gawk} treats it as a syntax error. -Although POSIX permits it, -some other @command{awk} implementations don't allow the @code{next} -statement inside function bodies -(@pxref{User-defined, ,User-Defined Functions}). -Just as with any other @code{next} statement, a @code{next} statement inside a -function body reads the next record and starts processing it with the -first rule in the program. -If the @code{next} statement causes the end of the input to be reached, -then the code in any @code{END} rules is executed. -@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}. - -@node Nextfile Statement, Exit Statement, Next Statement, Statements -@subsection Using @command{gawk}'s @code{nextfile} Statement -@cindex @code{nextfile} statement -@cindex differences between @command{gawk} and @command{awk} - -@command{gawk} provides the @code{nextfile} statement, -which is similar to the @code{next} statement. -However, instead of abandoning processing of the current record, the -@code{nextfile} statement instructs @command{gawk} to stop processing the -current @value{DF}. - -The @code{nextfile} statement is a @command{gawk} extension. -In most other @command{awk} implementations, -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -@code{nextfile} is not special. - -Upon execution of the @code{nextfile} statement, @code{FILENAME} is -updated to the name of the next @value{DF} listed on the command line, -@code{FNR} is reset to one, @code{ARGIND} is incremented, and processing -starts over with the first rule in the program. -(@code{ARGIND} hasn't been introduced yet. @xref{Built-in Variables}.) -If the @code{nextfile} statement causes the end of the input to be reached, -then the code in any @code{END} rules is executed. -@xref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}. - -The @code{nextfile} statement is useful when there are many @value{DF}s -to process but it isn't necessary to process every record in every file. -Normally, in order to move on to the next @value{DF}, a program -has to continue scanning the unwanted records. The @code{nextfile} -statement accomplishes this much more efficiently. - -While one might think that @samp{close(FILENAME)} would accomplish -the same as @code{nextfile}, this isn't true. @code{close} is -reserved for closing files, pipes, and coprocesses that are -opened with redirections. It is not related to the main processing that -@command{awk} does with the files listed in @code{ARGV}. - -If it's necessary to use an @command{awk} version that doesn't support -@code{nextfile}, see -@ref{Nextfile Function, ,Implementing @code{nextfile} as a Function}, -for a user-defined function that simulates the @code{nextfile} -statement. - -@cindex @code{nextfile}, inside a user-defined function -The current version of the Bell Laboratories @command{awk} -(@pxref{Other Versions, ,Other Freely Available @command{awk} Implementations}) -also supports @code{nextfile}. However, it doesn't allow the @code{nextfile} -statement inside function bodies -(@pxref{User-defined, ,User-Defined Functions}). -@command{gawk} does; a @code{nextfile} inside a -function body reads the next record and starts processing it with the -first rule in the program, just as any other @code{nextfile} statement. - -@cindex @code{next file} statement -@strong{Caution:} Versions of @command{gawk} prior to 3.0 used two -words (@samp{next file}) for the @code{nextfile} statement. -In @value{PVERSION} 3.0, this was changed -to one word, because the treatment of @samp{file} was -inconsistent. When it appeared after @code{next}, @samp{file} was a keyword; -otherwise, it was a regular identifier. The old usage is no longer -accepted; @samp{next file} generates a syntax error. - -@node Exit Statement, , Nextfile Statement, Statements -@subsection The @code{exit} Statement - -@cindex @code{exit} statement -The @code{exit} statement causes @command{awk} to immediately stop -executing the current rule and to stop processing input; any remaining input -is ignored. The @code{exit} statement is written as follows: - -@example -exit @r{[}@var{return code}@r{]} -@end example - -When an @code{exit} statement is executed from a @code{BEGIN} rule, the -program stops processing everything immediately. No input records are -read. However, if an @code{END} rule is present, -as part of executing the @code{exit} statement, -the @code{END} rule is executed -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). -If @code{exit} is used as part of an @code{END} rule, it causes -the program to stop immediately. - -An @code{exit} statement that is not part of a @code{BEGIN} or @code{END} -rule stops the execution of any further automatic rules for the current -record, skips reading any remaining input records, and executes the -@code{END} rule if there is one. - -In such a case, -if you don't want the @code{END} rule to do its job, set a variable -to nonzero before the @code{exit} statement and check that variable in -the @code{END} rule. -@xref{Assert Function, ,Assertions}, -for an example that does this. - -@cindex dark corner -If an argument is supplied to @code{exit}, its value is used as the exit -status code for the @command{awk} process. If no argument is supplied, -@code{exit} returns status zero (success). In the case where an argument -is supplied to a first @code{exit} statement, and then @code{exit} is -called a second time from an @code{END} rule with no argument, -@command{awk} uses the previously supplied exit value. -@value{DARKCORNER} - -@cindex conventions, programming -@cindex programming conventions -For example, suppose an error condition occurs that is difficult or -impossible to handle. Conventionally, programs report this by -exiting with a nonzero status. An @command{awk} program can do this -using an @code{exit} statement with a nonzero argument, as shown -in the following example: - -@example -BEGIN @{ - if (("date" | getline date_now) <= 0) @{ - print "Can't get system date" > "/dev/stderr" - exit 1 - @} - print "current date is", date_now - close("date") -@} -@end example - -@node Built-in Variables, , Statements, Patterns and Actions -@section Built-in Variables -@cindex built-in variables - -Most @command{awk} variables are available for you to use for your own -purposes; they never change unless your program assigns values to -them, and they never affect anything unless your program examines them. -However, a few variables in @command{awk} have special built-in meanings. -@command{awk} examines some of these automatically, so that they enable you -to tell @command{awk} how to do certain things. Others are set -automatically by @command{awk}, so that they carry information from the -internal workings of @command{awk} to your program. - -This @value{SECTION} documents all the built-in variables of -@command{gawk}, most of which are also documented in the chapters -describing their areas of activity. - -@menu -* User-modified:: Built-in variables that you change to control - @command{awk}. -* Auto-set:: Built-in variables where @command{awk} gives - you information. -* ARGC and ARGV:: Ways to use @code{ARGC} and @code{ARGV}. -@end menu - -@node User-modified, Auto-set, Built-in Variables, Built-in Variables -@subsection Built-in Variables That Control @command{awk} -@cindex built-in variables, user modifiable - -The following is an alphabetical list of variables that you can change to -control how @command{awk} does certain things. The variables that are -specific to @command{gawk} are marked with a pound sign (@samp{#}). - -@table @code -@cindex @code{BINMODE} variable -@cindex binary I/O -@cindex I/O, binary -@cindex differences between @command{gawk} and @command{awk} -@item BINMODE # -On non-POSIX systems, this variable specifies use of ``binary'' mode for all I/O. -Numeric values of one, two, or three, specify that input files, output files, or -all files, respectively, should use binary I/O. -Alternatively, -string values of @code{"r"} or @code{"w"} specify that input files and -output files, respectively, should use binary I/O. -A string value of @code{"rw"} or @code{"wr"} indicates that all -files should use binary I/O. -Any other string value is equivalent to @code{"rw"}, but @command{gawk} -generates a warning message. -@code{BINMODE} is described in more detail in -@ref{PC Using, ,Using @command{gawk} on PC Operating Systems}. - -This variable is a @command{gawk} extension. -In other @command{awk} implementations -(except @command{mawk}, -@pxref{Other Versions, , Other Freely Available @command{awk} Implementations}), -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -it is not special. - -@cindex @code{CONVFMT} variable -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@item CONVFMT -This string controls conversion of numbers to -strings (@pxref{Conversion, ,Conversion of Strings and Numbers}). -It works by being passed, in effect, as the first argument to the -@code{sprintf} function -(@pxref{String Functions, ,String Manipulation Functions}). -Its default value is @code{"%.6g"}. -@code{CONVFMT} was introduced by the POSIX standard. - -@cindex @code{FIELDWIDTHS} variable -@item FIELDWIDTHS # -This is a space-separated list of columns that tells @command{gawk} -how to split input with fixed columnar boundaries. -Assigning a value to @code{FIELDWIDTHS} -overrides the use of @code{FS} for field splitting. -@xref{Constant Size, ,Reading Fixed-Width Data}, for more information. - -If @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), then @code{FIELDWIDTHS} -has no special meaning, and field-splitting operations occur based -exclusively on the value of @code{FS}. - -@cindex @code{FS} variable -@item FS -This is the input field separator -(@pxref{Field Separators, ,Specifying How Fields Are Separated}). -The value is a single-character string or a multi-character regular -expression that matches the separations between fields in an input -record. If the value is the null string (@code{""}), then each -character in the record becomes a separate field. -(This behavior is a @command{gawk} extension. POSIX @command{awk} does not -specify the behavior when @code{FS} is the null string.) -@c NEXT ED: Mark as common extension - -The default value is @w{@code{" "}}, a string consisting of a single -space. As a special exception, this value means that any -sequence of spaces, tabs, and/or newlines is a single separator.@footnote{In -POSIX @command{awk}, newline does not count as whitespace.} It also causes -spaces, tabs, and newlines at the beginning and end of a record to be ignored. - -You can set the value of @code{FS} on the command line using the -@option{-F} option: - -@example -awk -F, '@var{program}' @var{input-files} -@end example - -If @command{gawk} is using @code{FIELDWIDTHS} for field splitting, -assigning a value to @code{FS} causes @command{gawk} to return to -the normal, @code{FS}-based field splitting. An easy way to do this -is to simply say @samp{FS = FS}, perhaps with an explanatory comment. - -@cindex @code{IGNORECASE} variable -@item IGNORECASE # -If @code{IGNORECASE} is nonzero or non-null, then all string comparisons -and all regular expression matching are case-independent. Thus, regexp -matching with @samp{~} and @samp{!~}, as well as the @code{gensub}, -@code{gsub}, @code{index}, @code{match}, @code{split}, and @code{sub} -functions, record termination with @code{RS}, and field splitting with -@code{FS}, all ignore case when doing their particular regexp operations. -However, the value of @code{IGNORECASE} does @emph{not} affect array subscripting. -@xref{Case-sensitivity, ,Case Sensitivity in Matching}. - -If @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -then @code{IGNORECASE} has no special meaning. Thus, string -and regexp operations are always case-sensitive. - -@cindex @code{LINT} variable -@cindex differences between @command{gawk} and @command{awk} -@cindex lint checks -@item LINT # -When this variable is true (nonzero or non-null), @command{gawk} -behaves as if the @option{--lint} command-line option is in effect. -(@pxref{Options, ,Command-Line Options}). -With a value of @code{"fatal"}, lint warnings become fatal errors. -Any other true value prints non-fatal warnings. -Assigning a false value to @code{LINT} turns off the lint warnings. - -This variable is a @command{gawk} extension. It is not special -in other @command{awk} implementations. Unlike the other special variables, -changing @code{LINT} does affect the production of lint warnings, -even if @command{gawk} is in compatibility mode. Much as -the @option{--lint} and @option{--traditional} options independently -control different aspects of @command{gawk}'s behavior, the control -of lint warnings during program execution is independent of the flavor -of @command{awk} being executed. - -@cindex @code{OFMT} variable -@item OFMT -This string controls conversion of numbers to -strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for -printing with the @code{print} statement. It works by being passed -as the first argument to the @code{sprintf} function -(@pxref{String Functions, ,String Manipulation Functions}). -Its default value is @code{"%.6g"}. Earlier versions of @command{awk} -also used @code{OFMT} to specify the format for converting numbers to -strings in general expressions; this is now done by @code{CONVFMT}. - -@cindex @code{OFS} variable -@item OFS -This is the output field separator (@pxref{Output Separators}). It is -output between the fields printed by a @code{print} statement. Its -default value is @w{@code{" "}}, a string consisting of a single space. - -@cindex @code{ORS} variable -@item ORS -This is the output record separator. It is output at the end of every -@code{print} statement. Its default value is @code{"\n"}, the newline -character. (@xref{Output Separators}.) - -@cindex @code{RS} variable -@item RS -This is @command{awk}'s input record separator. Its default value is a string -containing a single newline character, which means that an input record -consists of a single line of text. -It can also be the null string, in which case records are separated by -runs of blank lines. -If it is a regexp, records are separated by -matches of the regexp in the input text. -(@xref{Records, ,How Input Is Split into Records}.) - -The ability for @code{RS} to be a regular expression -is a @command{gawk} extension. -In most other @command{awk} implementations, -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -just the first character of @code{RS}'s value is used. - -@cindex @code{SUBSEP} variable -@item SUBSEP -This is the subscript separator. It has the default value of -@code{"\034"} and is used to separate the parts of the indices of a -multidimensional array. Thus, the expression @code{@w{foo["A", "B"]}} -really accesses @code{foo["A\034B"]} -(@pxref{Multi-dimensional, ,Multidimensional Arrays}). - -@cindex @code{TEXTDOMAIN} variable -@cindex internationalization -@item TEXTDOMAIN # -This variable is used for internationalization of programs at the -@command{awk} level. It sets the default text domain for specially -marked string constants in the source text, as well as for the -@code{dcgettext} and @code{bindtextdomain} functions -(@pxref{Internationalization, ,Internationalization with @command{gawk}}). -The default value of @code{TEXTDOMAIN} is @code{"messages"}. - -This variable is a @command{gawk} extension. -In other @command{awk} implementations, -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -it is not special. -@end table - -@node Auto-set, ARGC and ARGV, User-modified, Built-in Variables -@subsection Built-in Variables That Convey Information -@cindex built-in variables, convey information - -The following is an alphabetical list of variables that @command{awk} -sets automatically on certain occasions in order to provide -information to your program. The variables that are specific to -@command{gawk} are marked with an asterisk (@samp{*}). - -@table @code -@cindex @code{ARGC} variable -@cindex @code{ARGV} variable -@item ARGC@r{,} ARGV -The command-line arguments available to @command{awk} programs are stored in -an array called @code{ARGV}. @code{ARGC} is the number of command-line -arguments present. @xref{Other Arguments, ,Other Command-Line Arguments}. -Unlike most @command{awk} arrays, -@code{ARGV} is indexed from 0 to @code{ARGC} @minus{} 1. -In the following example: - -@example -$ awk 'BEGIN @{ -> for (i = 0; i < ARGC; i++) -> print ARGV[i] -> @}' inventory-shipped BBS-list -@print{} awk -@print{} inventory-shipped -@print{} BBS-list -@end example - -@noindent -@code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]} -contains @code{"inventory-shipped"} and @code{ARGV[2]} contains -@code{"BBS-list"}. The value of @code{ARGC} is three, one more than the -index of the last element in @code{ARGV}, because the elements are numbered -from zero. - -@cindex conventions, programming -@cindex programming conventions -The names @code{ARGC} and @code{ARGV}, as well as the convention of indexing -the array from 0 to @code{ARGC} @minus{} 1, are derived from the C language's -method of accessing command-line arguments. - -The value of @code{ARGV[0]} can vary from system to system. -Also, you should note that the program text is @emph{not} included in -@code{ARGV}, nor are any of @command{awk}'s command-line options. -@xref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}, for information -about how @command{awk} uses these variables. - -@cindex @code{ARGIND} variable -@item ARGIND # -This is the index in @code{ARGV} of the current file being processed. -Every time @command{gawk} opens a new @value{DF} for processing, it sets -@code{ARGIND} to the index in @code{ARGV} of the @value{FN}. -When @command{gawk} is processing the input files, -@samp{FILENAME == ARGV[ARGIND]} is always true. - -This variable is useful in file processing; it allows you to tell how far -along you are in the list of @value{DF}s as well as to distinguish between -successive instances of the same @value{FN} on the command line. - -While you can change the value of @code{ARGIND} within your @command{awk} -program, @command{gawk} automatically sets it to a new value when the -next file is opened. - -This variable is a @command{gawk} extension. -In other @command{awk} implementations, -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -it is not special. - -@cindex @code{ENVIRON} variable -@item ENVIRON -An associative array that contains the values of the environment. The array -indices are the environment variable names; the elements are the values of -the particular environment variables. For example, -@code{ENVIRON["HOME"]} might be @file{/home/arnold}. Changing this array -does not affect the environment passed on to any programs that -@command{awk} may spawn via redirection or the @code{system} function. -@c (In a future version of @command{gawk}, it may do so.) - -Some operating systems may not have environment variables. -On such systems, the @code{ENVIRON} array is empty (except for -@w{@code{ENVIRON["AWKPATH"]}}, -@pxref{AWKPATH Variable, ,The @env{AWKPATH} Environment Variable}). - -@cindex @code{ERRNO} variable -@item ERRNO # -If a system error occurs during a redirection for @code{getline}, -during a read for @code{getline}, or during a @code{close} operation, -then @code{ERRNO} contains a string describing the error. - -This variable is a @command{gawk} extension. -In other @command{awk} implementations, -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -it is not special. - -@cindex dark corner -@cindex @code{FILENAME} variable -@item FILENAME -This is the name of the file that @command{awk} is currently reading. -When no @value{DF}s are listed on the command line, @command{awk} reads -from the standard input and @code{FILENAME} is set to @code{"-"}. -@code{FILENAME} is changed each time a new file is read -(@pxref{Reading Files, ,Reading Input Files}). -Inside a @code{BEGIN} rule, the value of @code{FILENAME} is -@code{""}, since there are no input files being processed -yet.@footnote{Some early implementations of Unix @command{awk} initialized -@code{FILENAME} to @code{"-"}, even if there were @value{DF}s to be -processed. This behavior was incorrect and should not be relied -upon in your programs.} -@value{DARKCORNER} -Note though, that using @code{getline} -(@pxref{Getline, ,Explicit Input with @code{getline}}) -inside a @code{BEGIN} rule can give -@code{FILENAME} a value. - -@cindex @code{FNR} variable -@item FNR -This is the current record number in the current file. @code{FNR} is -incremented each time a new record is read -(@pxref{Getline, ,Explicit Input with @code{getline}}). It is reinitialized -to zero each time a new input file is started. - -@cindex @code{NF} variable -@item NF -This is the number of fields in the current input record. -@code{NF} is set each time a new record is read, when a new field is -created or when @code{$0} changes (@pxref{Fields, ,Examining Fields}). - -@cindex @code{NR} variable -@item NR -This is the number of input records @command{awk} has processed since -the beginning of the program's execution -(@pxref{Records, ,How Input Is Split into Records}). -@code{NR} is incremented each time a new record is read. - -@cindex @code{PROCINFO} variable -@item PROCINFO # -The elements of this array provide access to information about the -running @command{awk} program. -The following elements (listed alphabetically) -are guaranteed to be available: - -@table @code -@item PROCINFO["egid"] -The value of the @code{getegid} system call. - -@item PROCINFO["euid"] -The value of the @code{geteuid} system call. - -@item PROCINFO["FS"] -This is -@code{"FS"} if field splitting with @code{FS} is in effect, or it is -@code{"FIELDWIDTHS"} if field splitting with @code{FIELDWIDTHS} is in effect. - -@item PROCINFO["gid"] -The value of the @code{getgid} system call. - -@item PROCINFO["pgrpid"] -The process group ID of the current process. - -@item PROCINFO["pid"] -The process ID of the current process. - -@item PROCINFO["ppid"] -The parent process ID of the current process. - -@item PROCINFO["uid"] -The value of the @code{getuid} system call. -@end table - -On some systems, there may be elements in the array, @code{"group1"} -through @code{"group@var{N}"} for some @var{N}. @var{N} is the number of -supplementary groups that the process has. Use the @code{in} operator -to test for these elements -(@pxref{Reference to Elements, , Referring to an Array Element}). - -This array is a @command{gawk} extension. -In other @command{awk} implementations, -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -it is not special. - -@cindex @code{RLENGTH} variable -@item RLENGTH -This is the length of the substring matched by the -@code{match} function -(@pxref{String Functions, ,String Manipulation Functions}). -@code{RLENGTH} is set by invoking the @code{match} function. Its value -is the length of the matched string, or @minus{}1 if no match is found. - -@cindex @code{RSTART} variable -@item RSTART -This is the start-index in characters of the substring that is matched by the -@code{match} function -(@pxref{String Functions, ,String Manipulation Functions}). -@code{RSTART} is set by invoking the @code{match} function. Its value -is the position of the string where the matched substring starts, or zero -if no match was found. - -@cindex @code{RT} variable -@item RT # -This is set each time a record is read. It contains the input text -that matched the text denoted by @code{RS}, the record separator. - -This variable is a @command{gawk} extension. -In other @command{awk} implementations, -or if @command{gawk} is in compatibility mode -(@pxref{Options, ,Command-Line Options}), -it is not special. -@end table - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Changing @code{NR} and @code{FNR} -@cindex advanced notes -@cindex dark corner -@command{awk} increments @code{NR} and @code{FNR} -each time it reads a record, instead of setting them to the absolute -value of the number of records read. This means that a program can -change these variables and their new values are incremented for -each record. -@value{DARKCORNER} -This is demonstrated in the following example: - -@example -$ echo '1 -> 2 -> 3 -> 4' | awk 'NR == 2 @{ NR = 17 @} -> @{ print NR @}' -@print{} 1 -@print{} 17 -@print{} 18 -@print{} 19 -@end example - -@noindent -Before @code{FNR} was added to the @command{awk} language -(@pxref{V7/SVR3.1, ,Major Changes Between V7 and SVR3.1}), -many @command{awk} programs used this feature to track the number of -records in a file by resetting @code{NR} to zero when @code{FILENAME} -changed. - -@node ARGC and ARGV, , Auto-set, Built-in Variables -@subsection Using @code{ARGC} and @code{ARGV} - -@ref{Auto-set, ,Built-in Variables That Convey Information}, -presented the following program describing the information contained in @code{ARGC} -and @code{ARGV}: - -@example -$ awk 'BEGIN @{ -> for (i = 0; i < ARGC; i++) -> print ARGV[i] -> @}' inventory-shipped BBS-list -@print{} awk -@print{} inventory-shipped -@print{} BBS-list -@end example - -@noindent -In this example, @code{ARGV[0]} contains @samp{awk}, @code{ARGV[1]} -contains @samp{inventory-shipped}, and @code{ARGV[2]} contains -@samp{BBS-list}. -Notice that the @command{awk} program is not entered in @code{ARGV}. The -other special command-line options, with their arguments, are also not -entered. This includes variable assignments done with the @option{-v} -option (@pxref{Options, ,Command-Line Options}). -Normal variable assignments on the command line @emph{are} -treated as arguments and do show up in the @code{ARGV} array: - -@example -$ cat showargs.awk -@print{} BEGIN @{ -@print{} printf "A=%d, B=%d\n", A, B -@print{} for (i = 0; i < ARGC; i++) -@print{} printf "\tARGV[%d] = %s\n", i, ARGV[i] -@print{} @} -@print{} END @{ printf "A=%d, B=%d\n", A, B @} -$ awk -v A=1 -f showargs.awk B=2 /dev/null -@print{} A=1, B=0 -@print{} ARGV[0] = awk -@print{} ARGV[1] = B=2 -@print{} ARGV[2] = /dev/null -@print{} A=1, B=2 -@end example - -A program can alter @code{ARGC} and the elements of @code{ARGV}. -Each time @command{awk} reaches the end of an input file, it uses the next -element of @code{ARGV} as the name of the next input file. By storing a -different string there, a program can change which files are read. -Use @code{"-"} to represent the standard input. Storing -additional elements and incrementing @code{ARGC} causes -additional files to be read. - -If the value of @code{ARGC} is decreased, that eliminates input files -from the end of the list. By recording the old value of @code{ARGC} -elsewhere, a program can treat the eliminated arguments as -something other than @value{FN}s. - -To eliminate a file from the middle of the list, store the null string -(@code{""}) into @code{ARGV} in place of the file's name. As a -special feature, @command{awk} ignores @value{FN}s that have been -replaced with the null string. -Another option is to -use the @code{delete} statement to remove elements from -@code{ARGV} (@pxref{Delete, ,The @code{delete} Statement}). - -All of these actions are typically done in the @code{BEGIN} rule, -before actual processing of the input begins. -@xref{Split Program, ,Splitting a Large File into Pieces}, and see -@ref{Tee Program, ,Duplicating Output into Multiple Files}, for examples -of each way of removing elements from @code{ARGV}. -The following fragment processes @code{ARGV} in order to examine, and -then remove, command-line options: -@c NEXT ED: Add xref to rewind() function - -@example -BEGIN @{ - for (i = 1; i < ARGC; i++) @{ - if (ARGV[i] == "-v") - verbose = 1 - else if (ARGV[i] == "-d") - debug = 1 - else if (ARGV[i] ~ /^-?/) @{ - e = sprintf("%s: unrecognized option -- %c", - ARGV[0], substr(ARGV[i], 1, ,1)) - print e > "/dev/stderr" - @} else - break - delete ARGV[i] - @} -@} -@end example - -To actually get the options into the @command{awk} program, -end the @command{awk} options with @option{--} and then supply -the @command{awk} program's options, in the following manner: - -@example -awk -f myprog -- -v -d file1 file2 @dots{} -@end example - -@cindex differences between @command{gawk} and @command{awk} -This is not necessary in @command{gawk}. Unless @option{--posix} has -been specified, @command{gawk} silently puts any unrecognized options -into @code{ARGV} for the @command{awk} program to deal with. As soon -as it sees an unknown option, @command{gawk} stops looking for other -options that it might otherwise recognize. The previous example with -@command{gawk} would be: - -@example -gawk -f myprog -d -v file1 file2 @dots{} -@end example - -@noindent -Because @option{-d} is not a valid @command{gawk} option, -it and the following @option{-v} -are passed on to the @command{awk} program. - -@node Arrays, Functions, Patterns and Actions, Top -@chapter Arrays in @command{awk} - -An @dfn{array} is a table of values called @dfn{elements}. The -elements of an array are distinguished by their indices. @dfn{Indices} -may be either numbers or strings. - -This @value{CHAPTER} describes how arrays work in @command{awk}, -how to use array elements, how to scan through every element in an array, -and how to remove array elements. -It also describes how @command{awk} simulates multidimensional -arrays, as well as some of the less obvious points about array usage. -The @value{CHAPTER} finishes with a discussion of @command{gawk}'s facility -for sorting an array based on its indices. - -@cindex names, use of -@cindex namespace issues in @command{awk} -@command{awk} maintains a single set -of names that may be used for naming variables, arrays, and functions -(@pxref{User-defined, ,User-Defined Functions}). -Thus, you cannot have a variable and an array with the same name in the -same @command{awk} program. - -@menu -* Array Intro:: Introduction to Arrays -* Reference to Elements:: How to examine one element of an array. -* Assigning Elements:: How to change an element of an array. -* Array Example:: Basic Example of an Array -* Scanning an Array:: A variation of the @code{for} statement. It - loops through the indices of an array's - existing elements. -* Delete:: The @code{delete} statement removes an element - from an array. -* Numeric Array Subscripts:: How to use numbers as subscripts in - @command{awk}. -* Uninitialized Subscripts:: Using Uninitialized variables as subscripts. -* Multi-dimensional:: Emulating multidimensional arrays in - @command{awk}. -* Multi-scanning:: Scanning multidimensional arrays. -* Array Sorting:: Sorting array values and indices. -@end menu - -@node Array Intro, Reference to Elements, Arrays, Arrays -@section Introduction to Arrays - -@cindex arrays -The @command{awk} language provides one-dimensional arrays -for storing groups of related strings or numbers. -Every @command{awk} array must have a name. Array names have the same -syntax as variable names; any valid variable name would also be a valid -array name. But one name cannot be used in both ways (as an array and -as a variable) in the same @command{awk} program. - -Arrays in @command{awk} superficially resemble arrays in other programming -languages, but there are fundamental differences. In @command{awk}, it -isn't necessary to specify the size of an array before starting to use it. -Additionally, any number or string in @command{awk}, not just consecutive integers, -may be used as an array index. - -In most other languages, arrays must be @dfn{declared} before use, -including a specification of -how many elements or components they contain. In such languages, the -declaration causes a contiguous block of memory to be allocated for that -many elements. Usually, an index in the array must be a positive integer. -For example, the index zero specifies the first element in the array, which is -actually stored at the beginning of the block of memory. Index one -specifies the second element, which is stored in memory right after the -first element, and so on. It is impossible to add more elements to the -array, because it has room only for as many elements as given in -the declaration. -(Some languages allow arbitrary starting and ending -indices---e.g., @samp{15 .. 27}---but the size of the array is still fixed when -the array is declared.) - -A contiguous array of four elements might look like the following example, -conceptually, if the element values are 8, @code{"foo"}, -@code{""}, and 30: - -@c NEXT ED: Use real images here -@iftex -@c from Karl Berry, much thanks for the help. -@tex -\bigskip % space above the table (about 1 linespace) -\offinterlineskip -\newdimen\width \width = 1.5cm -\newdimen\hwidth \hwidth = 4\width \advance\hwidth by 2pt % 5 * 0.4pt -\centerline{\vbox{ -\halign{\strut\hfil\ignorespaces#&&\vrule#&\hbox to\width{\hfil#\unskip\hfil}\cr -\noalign{\hrule width\hwidth} - &&{\tt 8} &&{\tt "foo"} &&{\tt ""} &&{\tt 30} &&\quad Value\cr -\noalign{\hrule width\hwidth} -\noalign{\smallskip} - &\omit&0&\omit &1 &\omit&2 &\omit&3 &\omit&\quad Index\cr -} -}} -@end tex -@end iftex -@ifinfo -@example -+---------+---------+--------+---------+ -| 8 | "foo" | "" | 30 | @r{Value} -+---------+---------+--------+---------+ - 0 1 2 3 @r{Index} -@end example -@end ifinfo - -@noindent -Only the values are stored; the indices are implicit from the order of -the values. 8 is the value at index zero, because 8 appears in the -position with zero elements before it. - -@cindex arrays, definition of -@cindex associative arrays -@cindex arrays, associative -Arrays in @command{awk} are different---they are @dfn{associative}. This means -that each array is a collection of pairs: an index, and its corresponding -array element value: - -@example -@r{Element} 3 @r{Value} 30 -@r{Element} 1 @r{Value} "foo" -@r{Element} 0 @r{Value} 8 -@r{Element} 2 @r{Value} "" -@end example - -@noindent -The pairs are shown in jumbled order because their order is irrelevant. - -One advantage of associative arrays is that new pairs can be added -at any time. For example, suppose a tenth element is added to the array -whose value is @w{@code{"number ten"}}. The result is: - -@example -@r{Element} 10 @r{Value} "number ten" -@r{Element} 3 @r{Value} 30 -@r{Element} 1 @r{Value} "foo" -@r{Element} 0 @r{Value} 8 -@r{Element} 2 @r{Value} "" -@end example - -@noindent -@cindex sparse arrays -@cindex arrays, sparse -Now the array is @dfn{sparse}, which just means some indices are missing. -It has elements 0--3 and 10, but doesn't have elements 4, 5, 6, 7, 8, or 9. - -Another consequence of associative arrays is that the indices don't -have to be positive integers. Any number, or even a string, can be -an index. For example, the following is an array that translates words from -English into French: - -@example -@r{Element} "dog" @r{Value} "chien" -@r{Element} "cat" @r{Value} "chat" -@r{Element} "one" @r{Value} "un" -@r{Element} 1 @r{Value} "un" -@end example - -@noindent -Here we decided to translate the number one in both spelled-out and -numeric form---thus illustrating that a single array can have both -numbers and strings as indices. -In fact, array subscripts are always strings; this is discussed -in more detail in -@ref{Numeric Array Subscripts, ,Using Numbers to Subscript Arrays}. -Here, the number @code{1} isn't double-quoted, since @command{awk} -automatically converts it to a string. - -@cindex arrays, subscripts, and @code{IGNORECASE} -@cindex @code{IGNORECASE}, and array subscripts -@cindex @code{IGNORECASE} variable -The value of @code{IGNORECASE} has no effect upon array subscripting. -The identical string value used to store an array element must be used -to retrieve it. -When @command{awk} creates an array (e.g., with the @code{split} -built-in function), -that array's indices are consecutive integers starting at one. -(@xref{String Functions, ,String Manipulation Functions}.) - -@command{awk}'s arrays are efficient---the time to access an element -is independent of the number of elements in the array. - -@node Reference to Elements, Assigning Elements, Array Intro, Arrays -@section Referring to an Array Element -@cindex array reference -@cindex element of array -@cindex reference to array - -The principal way to use an array is to refer to one of its elements. -An array reference is an expression as follows: - -@example -@var{array}[@var{index}] -@end example - -@noindent -Here, @var{array} is the name of an array. The expression @var{index} is -the index of the desired element of the array. - -The value of the array reference is the current value of that array -element. For example, @code{foo[4.3]} is an expression for the element -of array @code{foo} at index @samp{4.3}. - -A reference to an array element that has no recorded value yields a value of -@code{""}, the null string. This includes elements -that have not been assigned any value as well as elements that have been -deleted (@pxref{Delete, ,The @code{delete} Statement}). Such a reference -automatically creates that array element, with the null string as its value. -(In some cases, this is unfortunate, because it might waste memory inside -@command{awk}.) - -@cindex arrays, presence of elements -@cindex arrays, the @code{in} operator -To determine whether an element exists in an array at a certain index, use -the following expression: - -@example -@var{index} in @var{array} -@end example - -@cindex side effects -@noindent -This expression tests whether or not the particular index exists, -without the side effect of creating that element if it is not present. -The expression has the value one (true) if @code{@var{array}[@var{index}]} -exists and zero (false) if it does not exist. -For example, this statement tests whether the array @code{frequencies} -contains the index @samp{2}: - -@example -if (2 in frequencies) - print "Subscript 2 is present." -@end example - -Note that this is @emph{not} a test of whether the array -@code{frequencies} contains an element whose @emph{value} is two. -There is no way to do that except to scan all the elements. Also, this -@emph{does not} create @code{frequencies[2]}, while the following -(incorrect) alternative does: - -@example -if (frequencies[2] != "") - print "Subscript 2 is present." -@end example - -@node Assigning Elements, Array Example, Reference to Elements, Arrays -@section Assigning Array Elements -@cindex array assignment -@cindex element assignment - -Array elements can be assigned values just like -@command{awk} variables: - -@example -@var{array}[@var{subscript}] = @var{value} -@end example - -@noindent -@var{array} is the name of an array. The expression -@var{subscript} is the index of the element of the array that is -assigned a value. The expression @var{value} is the value to -assign to that element of the array. - -@node Array Example, Scanning an Array, Assigning Elements, Arrays -@section Basic Array Example - -The following program takes a list of lines, each beginning with a line -number, and prints them out in order of line number. The line numbers -are not in order when they are first read---instead they -are scrambled. This program sorts the lines by making an array using -the line numbers as subscripts. The program then prints out the lines -in sorted order of their numbers. It is a very simple program and gets -confused upon encountering repeated numbers, gaps, or lines that don't -begin with a number: - -@example -@c file eg/misc/arraymax.awk -@{ - if ($1 > max) - max = $1 - arr[$1] = $0 -@} - -END @{ - for (x = 1; x <= max; x++) - print arr[x] -@} -@c endfile -@end example - -The first rule keeps track of the largest line number seen so far; -it also stores each line into the array @code{arr}, at an index that -is the line's number. -The second rule runs after all the input has been read, to print out -all the lines. -When this program is run with the following input: - -@example -@c file eg/misc/arraymax.data -5 I am the Five man -2 Who are you? The new number two! -4 . . . And four on the floor -1 Who is number one? -3 I three you. -@c endfile -@end example - -@noindent -its output is: - -@example -1 Who is number one? -2 Who are you? The new number two! -3 I three you. -4 . . . And four on the floor -5 I am the Five man -@end example - -If a line number is repeated, the last line with a given number overrides -the others. -Gaps in the line numbers can be handled with an easy improvement to the -program's @code{END} rule, as follows: - -@example -END @{ - for (x = 1; x <= max; x++) - if (x in arr) - print arr[x] -@} -@end example - -@node Scanning an Array, Delete, Array Example, Arrays -@section Scanning All Elements of an Array -@cindex @code{for (x in @dots{})} statement -@cindex arrays, special @code{for} statement -@cindex scanning an array -@cindex @code{in} operator - -In programs that use arrays, it is often necessary to use a loop that -executes once for each element of an array. In other languages, where -arrays are contiguous and indices are limited to positive integers, -this is easy: all the valid indices can be found by counting from -the lowest index up to the highest. This technique won't do the job -in @command{awk}, because any number or string can be an array index. -So @command{awk} has a special kind of @code{for} statement for scanning -an array: - -@example -for (@var{var} in @var{array}) - @var{body} -@end example - -@noindent -This loop executes @var{body} once for each index in @var{array} that the -program has previously used, with the variable @var{var} set to that index. - -The following program uses this form of the @code{for} statement. The -first rule scans the input records and notes which words appear (at -least once) in the input, by storing a one into the array @code{used} with -the word as index. The second rule scans the elements of @code{used} to -find all the distinct words that appear in the input. It prints each -word that is more than 10 characters long and also prints the number of -such words. -@xref{String Functions, ,String Manipulation Functions}, -for more information on the built-in function @code{length}. - -@example -# Record a 1 for each word that is used at least once -@{ - for (i = 1; i <= NF; i++) - used[$i] = 1 -@} - -# Find number of distinct words more than 10 characters long -END @{ - for (x in used) - if (length(x) > 10) @{ - ++num_long_words - print x - @} - print num_long_words, "words longer than 10 characters" -@} -@end example - -@noindent -@xref{Word Sorting, ,Generating Word Usage Counts}, -for a more detailed example of this type. - -The order in which elements of the array are accessed by this statement -is determined by the internal arrangement of the array elements within -@command{awk} and cannot be controlled or changed. This can lead to -problems if new elements are added to @var{array} by statements in -the loop body; it is not predictable whether or not the @code{for} loop will -reach them. Similarly, changing @var{var} inside the loop may produce -strange results. It is best to avoid such things. - -@node Delete, Numeric Array Subscripts, Scanning an Array, Arrays -@section The @code{delete} Statement -@cindex @code{delete} statement -@cindex deleting elements of arrays -@cindex removing elements of arrays -@cindex arrays, deleting an element - -To remove an individual element of an array, use the @code{delete} -statement: - -@example -delete @var{array}[@var{index}] -@end example - -Once an array element has been deleted, any value the element once -had is no longer available. It is as if the element had never -been referred to or had been given a value. -The following is an example of deleting elements in an array: - -@example -for (i in frequencies) - delete frequencies[i] -@end example - -@noindent -This example removes all the elements from the array @code{frequencies}. -Once an element is deleted, a subsequent @code{for} statement to scan the array -does not report that element and the @code{in} operator to check for -the presence of that element returns zero (i.e., false): - -@example -delete foo[4] -if (4 in foo) - print "This will never be printed" -@end example - -It is important to note that deleting an element is @emph{not} the -same as assigning it a null value (the empty string, @code{""}). -For example: - -@example -foo[4] = "" -if (4 in foo) - print "This is printed, even though foo[4] is empty" -@end example - -@cindex lint checks -It is not an error to delete an element that does not exist. -If @option{--lint} is provided on the command line -(@pxref{Options, ,Command-Line Options}), -@command{gawk} issues a warning message when an element that -is not in the array is deleted. - -@cindex arrays, deleting entire contents -@cindex deleting entire arrays -@cindex differences between @command{gawk} and @command{awk} -All the elements of an array may be deleted with a single statement -by leaving off the subscript in the @code{delete} statement, -as follows: - -@example -delete @var{array} -@end example - -This ability is a @command{gawk} extension; it is not available in -compatibility mode (@pxref{Options, ,Command-Line Options}). - -Using this version of the @code{delete} statement is about three times -more efficient than the equivalent loop that deletes each element one -at a time. - -@cindex portability issues -@cindex Brennan, Michael -The following statement provides a portable but non-obvious way to clear -out an array:@footnote{Thanks to Michael Brennan for pointing this out.} - -@example -split("", array) -@end example - -The @code{split} function -(@pxref{String Functions, ,String Manipulation Functions}) -clears out the target array first. This call asks it to split -apart the null string. Because there is no data to split out, the -function simply clears the array and then returns. - -@strong{Caution:} Deleting an array does not change its type; you cannot -delete an array and then use the array's name as a scalar -(i.e., a regular variable). For example, the following does not work: - -@example -a[1] = 3; delete a; a = 3 -@end example - -@node Numeric Array Subscripts, Uninitialized Subscripts, Delete, Arrays -@section Using Numbers to Subscript Arrays - -@cindex conversions, during subscripting -@cindex numbers, used as subscripts -@cindex @code{CONVFMT} variable -An important aspect about arrays to remember is that @emph{array subscripts -are always strings}. When a numeric value is used as a subscript, -it is converted to a string value before being used for subscripting -(@pxref{Conversion, ,Conversion of Strings and Numbers}). -This means that the value of the built-in variable @code{CONVFMT} can -affect how your program accesses elements of an array. For example: - -@example -xyz = 12.153 -data[xyz] = 1 -CONVFMT = "%2.2f" -if (xyz in data) - printf "%s is in data\n", xyz -else - printf "%s is not in data\n", xyz -@end example - -@noindent -This prints @samp{12.15 is not in data}. The first statement gives -@code{xyz} a numeric value. Assigning to -@code{data[xyz]} subscripts @code{data} with the string value @code{"12.153"} -(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}). -Thus, the array element @code{data["12.153"]} is assigned the value one. -The program then changes -the value of @code{CONVFMT}. The test @samp{(xyz in data)} generates a new -string value from @code{xyz}---this time @code{"12.15"}---because the value of -@code{CONVFMT} only allows two significant digits. This test fails, -since @code{"12.15"} is a different string from @code{"12.153"}. - -According to the rules for conversions -(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer -values are always converted to strings as integers, no matter what the -value of @code{CONVFMT} may happen to be. So the usual case of -the following works: - -@example -for (i = 1; i <= maxsub; i++) - @i{do something with} array[i] -@end example - -The ``integer values always convert to strings as integers'' rule -has an additional consequence for array indexing. -Octal and hexadecimal constants -(@pxref{Non-decimal-numbers, ,Octal and Hexadecimal Numbers}) -are converted internally into numbers and their original form -is forgotten. -This means, for example, that -@code{array[17]}, -@code{array[021]}, -and -@code{array[0x11]} -all refer to the same element! - -As with many things in @command{awk}, the majority of the time -things work as one would expect them to. But it is useful to have a precise -knowledge of the actual rules which sometimes can have a subtle -effect on your programs. - -@node Uninitialized Subscripts, Multi-dimensional, Numeric Array Subscripts, Arrays -@section Using Uninitialized Variables as Subscripts - -@cindex uninitialized variables, as array subscripts -@cindex arrays, subscripts, uninitialized variables -Suppose it's necessary to write a program -to print the input data in reverse order. -A reasonable attempt to do so (with some test -data) might look like this: - -@example -$ echo 'line 1 -> line 2 -> line 3' | awk '@{ l[lines] = $0; ++lines @} -> END @{ -> for (i = lines-1; i >= 0; --i) -> print l[i] -> @}' -@print{} line 3 -@print{} line 2 -@end example - -Unfortunately, the very first line of input data did not come out in the -output! - -At first glance, this program should have worked. The variable @code{lines} -is uninitialized, and uninitialized variables have the numeric value zero. -So, @command{awk} should have printed the value of @code{l[0]}. - -The issue here is that subscripts for @command{awk} arrays are @emph{always} -strings. Uninitialized variables, when used as strings, have the -value @code{""}, not zero. Thus, @samp{line 1} ends up stored in -@code{l[""]}. -The following version of the program works correctly: - -@example -@{ l[lines++] = $0 @} -END @{ - for (i = lines - 1; i >= 0; --i) - print l[i] -@} -@end example - -Here, the @samp{++} forces @code{lines} to be numeric, thus making -the ``old value'' numeric zero. This is then converted to @code{"0"} -as the array subscript. - -@cindex null string, as array subscript -@cindex dark corner -@cindex lint checks -Even though it is somewhat unusual, the null string -(@code{""}) is a valid array subscript. -@value{DARKCORNER} -@command{gawk} warns about the use of the null string as a subscript -if @option{--lint} is provided -on the command line (@pxref{Options, ,Command-Line Options}). - -@node Multi-dimensional, Multi-scanning, Uninitialized Subscripts, Arrays -@section Multidimensional Arrays - -@cindex subscripts in arrays -@cindex arrays, multidimensional subscripts -@cindex multidimensional subscripts -A multidimensional array is an array in which an element is identified -by a sequence of indices instead of a single index. For example, a -two-dimensional array requires two indices. The usual way (in most -languages, including @command{awk}) to refer to an element of a -two-dimensional array named @code{grid} is with -@code{grid[@var{x},@var{y}]}. - -@cindex @code{SUBSEP} variable -Multidimensional arrays are supported in @command{awk} through -concatenation of indices into one string. -@command{awk} converts the indices into strings -(@pxref{Conversion, ,Conversion of Strings and Numbers}) and -concatenates them together, with a separator between them. This creates -a single string that describes the values of the separate indices. The -combined string is used as a single index into an ordinary, -one-dimensional array. The separator used is the value of the built-in -variable @code{SUBSEP}. - -For example, suppose we evaluate the expression @samp{foo[5,12] = "value"} -when the value of @code{SUBSEP} is @code{"@@"}. The numbers 5 and 12 are -converted to strings and -concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus, -the array element @code{foo["5@@12"]} is set to @code{"value"}. - -Once the element's value is stored, @command{awk} has no record of whether -it was stored with a single index or a sequence of indices. The two -expressions @samp{foo[5,12]} and @w{@samp{foo[5 SUBSEP 12]}} are always -equivalent. - -The default value of @code{SUBSEP} is the string @code{"\034"}, -which contains a non-printing character that is unlikely to appear in an -@command{awk} program or in most input data. -The usefulness of choosing an unlikely character comes from the fact -that index values that contain a string matching @code{SUBSEP} can lead to -combined strings that are ambiguous. Suppose that @code{SUBSEP} is -@code{"@@"}; then @w{@samp{foo["a@@b", "c"]}} and @w{@samp{foo["a", -"b@@c"]}} are indistinguishable because both are actually -stored as @samp{foo["a@@b@@c"]}. - -To test whether a particular index sequence exists in a -``multidimensional'' array, use the same operator (@samp{in}) that is -used for single dimensional arrays. Write the whole sequence of indices -in parentheses, separated by commas, as the left operand: - -@example -(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array} -@end example - -The following example treats its input as a two-dimensional array of -fields; it rotates this array 90 degrees clockwise and prints the -result. It assumes that all lines have the same number of -elements. - -@example -@{ - if (max_nf < NF) - max_nf = NF - max_nr = NR - for (x = 1; x <= NF; x++) - vector[x, NR] = $x -@} - -END @{ - for (x = 1; x <= max_nf; x++) @{ - for (y = max_nr; y >= 1; --y) - printf("%s ", vector[x, y]) - printf("\n") - @} -@} -@end example - -@noindent -When given the input: - -@example -1 2 3 4 5 6 -2 3 4 5 6 1 -3 4 5 6 1 2 -4 5 6 1 2 3 -@end example - -@noindent -the program produces the following output: - -@example -4 3 2 1 -5 4 3 2 -6 5 4 3 -1 6 5 4 -2 1 6 5 -3 2 1 6 -@end example - -@node Multi-scanning, Array Sorting, Multi-dimensional, Arrays -@section Scanning Multidimensional Arrays - -There is no special @code{for} statement for scanning a -``multidimensional'' array. There cannot be one, because in truth there -are no multidimensional arrays or elements---there is only a -multidimensional @emph{way of accessing} an array. - -However, if your program has an array that is always accessed as -multidimensional, you can get the effect of scanning it by combining -the scanning @code{for} statement -(@pxref{Scanning an Array, ,Scanning All Elements of an Array}) with the -built-in @code{split} function -(@pxref{String Functions, ,String Manipulation Functions}). -It works in the following manner: - -@example -for (combined in array) @{ - split(combined, separate, SUBSEP) - @dots{} -@} -@end example - -@noindent -This sets the variable @code{combined} to -each concatenated combined index in the array, and splits it -into the individual indices by breaking it apart where the value of -@code{SUBSEP} appears. The individual indices then become the elements of -the array @code{separate}. - -Thus, if a value is previously stored in @code{array[1, "foo"]}; then -an element with index @code{"1\034foo"} exists in @code{array}. (Recall -that the default value of @code{SUBSEP} is the character with code 034.) -Sooner or later, the @code{for} statement finds that index and does an -iteration with the variable @code{combined} set to @code{"1\034foo"}. -Then the @code{split} function is called as follows: - -@example -split("1\034foo", separate, "\034") -@end example - -@noindent -The result is to set @code{separate[1]} to @code{"1"} and -@code{separate[2]} to @code{"foo"}. Presto! The original sequence of -separate indices is recovered. - -@node Array Sorting, , Multi-scanning, Arrays -@section Sorting Array Values and Indices with @command{gawk} - -@cindex arrays, sorting -@cindex @code{asort} built-in function -The order in which an array is scanned with a @samp{for (i in array)} -loop is essentially arbitrary. -In most @command{awk} implementations, sorting an array requires -writing a @code{sort} function. -While this can be educational for exploring different sorting algorithms, -usually that's not the point of the program. -@command{gawk} provides the built-in @code{asort} function -(@pxref{String Functions, ,String Manipulation Functions}) -that sorts an array. For example: - -@example -@var{populate the array} data -n = asort(data) -for (i = 1; i <= n; i++) - @var{do something with} data[i] -@end example - -After the call to @code{asort}, the array @code{data} is indexed from 1 -to some number @var{n}, the total number of elements in @code{data}. -(This count is @code{asort}'s return value.) -@code{data[1]} @value{LEQ} @code{data[2]} @value{LEQ} @code{data[3]}, and so on. -The comparison of array elements is done -using @command{gawk}'s usual comparison rules -(@pxref{Typing and Comparison, ,Variable Typing and Comparison Expressions}). - -@cindex side effects -An important side effect of calling @code{asort} is that -@emph{the array's original indices are irrevocably lost}. -As this isn't always desirable, @code{asort} accepts a -second argument: - -@example -@var{populate the array} source -n = asort(source, dest) -for (i = 1; i <= n; i++) - @var{do something with} dest[i] -@end example - -In this case, @command{gawk} copies the @code{source} array into the -@code{dest} array and then sorts @code{dest}, destroying its indices. -However, the @code{source} array is not affected. - -Often, what's needed is to sort on the values of the @emph{indices} -instead of the values of the elements. To do this, use a helper array -to hold the sorted index values, and then access the original array's -elements. It works in the following way: - -@example -@var{populate the array} data -# copy indices -j = 1 -for (i in data) @{ - ind[j] = i # index value becomes element value - j++ -@} -n = asort(ind) # index values are now sorted -for (i = 1; i <= n; i++) - @var{do something with} data[ind[i]] -@end example - -Sorting the array by replacing the indices provides maximal flexibility. -To traverse the elements in decreasing order, use a loop that goes from -@var{n} down to 1, either over the elements or over the indices. - -@cindex reference counting -Copying array indices and elements isn't expensive in terms of memory. -Internally, @command{gawk} maintains @dfn{reference counts} to data. -For example, when @code{asort} copies the first array to the second one, -there is only one copy of the original array elements' data, even though -both arrays use the values. Similarly, when copying the indices from -@code{data} to @code{ind}, there is only one copy of the actual index -strings. - -@cindex arrays, sorting and @code{IGNORECASE} -@cindex @code{IGNORECASE}, and array sorting -@cindex @code{IGNORECASE} variable -As with array subscripts, the value of @code{IGNORECASE} -does not affect array sorting. - -@node Functions, Internationalization, Arrays, Top -@chapter Functions - -This @value{CHAPTER} describes @command{awk}'s built-in functions, -which fall into three categories: numeric, string, and I/O. -@command{gawk} provides additional groups of functions -to work with values that represent time, do -bit manipulation, and to internationalize and localize programs. - -Besides the built-in functions, @command{awk} has provisions for -writing new functions that the rest of a program can use. -The second half of this @value{CHAPTER} describes these -@dfn{user-defined} functions. - -@menu -* Built-in:: Summarizes the built-in functions. -* User-defined:: Describes User-defined functions in detail. -@end menu - -@node Built-in, User-defined, Functions, Functions -@section Built-in Functions - -@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!! -@cindex built-in functions -@dfn{Built-in} functions are always available for -your @command{awk} program to call. This @value{SECTION} defines all -the built-in -functions in @command{awk}; some of these are mentioned in other sections -but are summarized here for your convenience. - -@menu -* Calling Built-in:: How to call built-in functions. -* Numeric Functions:: Functions that work with numbers, including - @code{int}, @code{sin} and @code{rand}. -* String Functions:: Functions for string manipulation, such as - @code{split}, @code{match} and @code{sprintf}. -* I/O Functions:: Functions for files and shell commands. -* Time Functions:: Functions for dealing with timestamps. -* Bitwise Functions:: Functions for bitwise operations. -* I18N Functions:: Functions for string translation. -@end menu - -@node Calling Built-in, Numeric Functions, Built-in, Built-in -@subsection Calling Built-in Functions - -To call one of @command{awk}'s built-in functions, write the name of -the function followed -by arguments in parentheses. For example, @samp{atan2(y + z, 1)} -is a call to the function @code{atan2}, and has two arguments. - -@cindex conventions, programming -@cindex programming conventions -Whitespace is ignored between the built-in function name and the -open parenthesis, and it is good practice to avoid using whitespace -there. User-defined functions do not permit whitespace in this way, and -it is easier to avoid mistakes by following a simple -convention that always works---no whitespace after a function name. - -@cindex fatal errors -@cindex differences between @command{gawk} and @command{awk} -Each built-in function accepts a certain number of arguments. -In some cases, arguments can be omitted. The defaults for omitted -arguments vary from function to function and are described under the -individual functions. In some @command{awk} implementations, extra -arguments given to built-in functions are ignored. However, in @command{gawk}, -it is a fatal error to give extra arguments to a built-in function. - -When a function is called, expressions that create the function's actual -parameters are evaluated completely before the call is performed. -For example, in the following code fragment: - -@example -i = 4 -j = sqrt(i++) -@end example - -@cindex evaluation, order of -@cindex order of evaluation -@noindent -the variable @code{i} is incremented to the value five before @code{sqrt} -is called with a value of four for its actual parameter. -The order of evaluation of the expressions used for the function's -parameters is undefined. Thus, avoid writing programs that -assume that parameters are evaluated from left to right or from -right to left. For example: - -@example -i = 5 -j = atan2(i++, i *= 2) -@end example - -If the order of evaluation is left to right, then @code{i} first becomes -six, and then 12, and @code{atan2} is called with the two arguments 6 -and 12. But if the order of evaluation is right to left, @code{i} -first becomes 10, then 11, and @code{atan2} is called with the -two arguments 11 and 10. - -@node Numeric Functions, String Functions, Calling Built-in, Built-in -@subsection Numeric Functions - -The following list describes all of -the built-in functions that work with numbers. -Optional parameters are enclosed in square brackets ([ and ]): - -@table @code -@item int(@var{x}) -@cindex @code{int} built-in function -This returns the nearest integer to @var{x}, located between @var{x} and zero and -truncated toward zero. - -For example, @code{int(3)} is three, @code{int(3.9)} is three, @code{int(-3.9)} -is @minus{}3, and @code{int(-3)} is @minus{}3 as well. - -@item sqrt(@var{x}) -@cindex @code{sqrt} built-in function -This returns the positive square root of @var{x}. -@command{gawk} reports an error -if @var{x} is negative. Thus, @code{sqrt(4)} is two. - -@item exp(@var{x}) -@cindex @code{exp} built-in function -This returns the exponential of @var{x} (@code{e ^ @var{x}}) or reports -an error if @var{x} is out of range. The range of values @var{x} can have -depends on your machine's floating-point representation. - -@item log(@var{x}) -@cindex @code{log} built-in function -This returns the natural logarithm of @var{x}, if @var{x} is positive; -otherwise, it reports an error. - -@item sin(@var{x}) -@cindex @code{sin} built-in function -This returns the sine of @var{x}, with @var{x} in radians. - -@item cos(@var{x}) -@cindex @code{cos} built-in function -This returns the cosine of @var{x}, with @var{x} in radians. - -@item atan2(@var{y}, @var{x}) -@cindex @code{atan2} built-in function -This returns the arctangent of @code{@var{y} / @var{x}} in radians. - -@item rand() -@cindex @code{rand} built-in function -This returns a random number. The values of @code{rand} are -uniformly distributed between zero and one. -The value is never zero and never one.@footnote{The C version of @code{rand} -is known to produce fairly poor sequences of random numbers. -However, nothing requires that an @command{awk} implementation use the C -@code{rand} to implement the @command{awk} version of @code{rand}. -In fact, @command{gawk} uses the BSD @code{random} function, which is -considerably better than @code{rand}, to produce random numbers.} - -Often random integers are needed instead. Following is a user-defined function -that can be used to obtain a random non-negative integer less than @var{n}: - -@example -function randint(n) @{ - return int(n * rand()) -@} -@end example - -@noindent -The multiplication produces a random number greater than zero and less -than @code{n}. Using @code{int}, this result is made into -an integer between zero and @code{n} @minus{} 1, inclusive. - -The following example uses a similar function to produce random integers -between one and @var{n}. This program prints a new random number for -each input record. - -@example -# Function to roll a simulated die. -function roll(n) @{ return 1 + int(rand() * n) @} - -# Roll 3 six-sided dice and -# print total number of points. -@{ - printf("%d points\n", - roll(6)+roll(6)+roll(6)) -@} -@end example - -@cindex seed for random numbers -@cindex random numbers, seed of -@c MAWK uses a different seed each time. -@strong{Caution:} In most @command{awk} implementations, including @command{gawk}, -@code{rand} starts generating numbers from the same -starting number, or @dfn{seed}, each time you run @command{awk}. Thus, -a program generates the same results each time you run it. -The numbers are random within one @command{awk} run but predictable -from run to run. This is convenient for debugging, but if you want -a program to do different things each time it is used, you must change -the seed to a value that is different in each run. To do this, -use @code{srand}. - -@item srand(@r{[}@var{x}@r{]}) -@cindex @code{srand} built-in function -The function @code{srand} sets the starting point, or seed, -for generating random numbers to the value @var{x}. - -Each seed value leads to a particular sequence of random -numbers.@footnote{Computer generated random numbers really are not truly -random. They are technically known as ``pseudo-random.'' This means -that while the numbers in a sequence appear to be random, you can in -fact generate the same sequence of random numbers over and over again.} -Thus, if the seed is set to the same value a second time, -the same sequence of random numbers is produced again. - -Different @command{awk} implementations use different random number -generators internally. Don't expect the same @command{awk} program -to produce the same series of random numbers when executed by -different versions of @command{awk}. - -If the argument @var{x} is omitted, as in @samp{srand()}, then the current -date and time of day are used for a seed. This is the way to get random -numbers that are truly unpredictable. - -The return value of @code{srand} is the previous seed. This makes it -easy to keep track of the seeds in case you need to consistently reproduce -sequences of random numbers. -@end table - -@node String Functions, I/O Functions, Numeric Functions, Built-in -@subsection String Manipulation Functions - -The functions in this @value{SECTION} look at or change the text of one or more -strings. -Optional parameters are enclosed in square brackets ([ and ]). -Those functions that are -specific to @command{gawk} are marked with a pound sign (@samp{#}): - -@menu -* Gory Details:: More than you want to know about @samp{\} and - @samp{&} with @code{sub}, @code{gsub}, and - @code{gensub}. -@end menu - -@table @code -@item asort(@var{source} @r{[}, @var{dest}@r{]}) # -@cindex @code{asort} built-in function -@code{asort} is a @command{gawk}-specific extension, returning the number of -elements in the array @var{source}. The contents of @var{source} are -sorted using @command{gawk}'s normal rules for comparing values, and the indices -of the sorted values of @var{source} are replaced with sequential -integers starting with one. If the optional array @var{dest} is specified, -then @var{source} is duplicated into @var{dest}. @var{dest} is then -sorted, leaving the indices of @var{source} unchanged. -For example, if the contents of @code{a} are as follows: - -@example -a["last"] = "de" -a["first"] = "sac" -a["middle"] = "cul" -@end example - -@noindent -A call to @code{asort}: - -@example -asort(a) -@end example - -@noindent -results in the following contents of @code{a}: - -@example -a[1] = "cul" -a[2] = "de" -a[3] = "sac" -@end example - -@cindex differences between @command{gawk} and @command{awk} -The @code{asort} function is described in more detail in -@ref{Array Sorting, ,Sorting Array Values and Indices with @command{gawk}}. -@code{asort} is a @command{gawk} extension; it is not available -in compatibility mode (@pxref{Options, ,Command-Line Options}). - -@item index(@var{in}, @var{find}) -@cindex @code{index} built-in function -This searches the string @var{in} for the first occurrence of the string -@var{find}, and returns the position in characters where that occurrence -begins in the string @var{in}. Consider the following example: - -@example -$ awk 'BEGIN @{ print index("peanut", "an") @}' -@print{} 3 -@end example - -@noindent -If @var{find} is not found, @code{index} returns zero. -(Remember that string indices in @command{awk} start at one.) - -@item length(@r{[}@var{string}@r{]}) -@cindex @code{length} built-in function -This returns the number of characters in @var{string}. If -@var{string} is a number, the length of the digit string representing -that number is returned. For example, @code{length("abcde")} is 5. By -contrast, @code{length(15 * 35)} works out to 3. In this example, 15 * 35 = -525, and 525 is then converted to the string @code{"525"}, which has -three characters. - -If no argument is supplied, @code{length} returns the length of @code{$0}. - -@cindex historical features -@cindex portability issues -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -@strong{Note:} -In older versions of @command{awk}, the @code{length} function could -be called -without any parentheses. Doing so is marked as ``deprecated'' in the -POSIX standard. This means that while a program can do this, -it is a feature that can eventually be removed from a future -version of the standard. Therefore, for programs to be maximally portable, -always supply the parentheses. - -@item match(@var{string}, @var{regexp} @r{[}, @var{array}@r{]}) -@cindex @code{match} built-in function -The @code{match} function searches @var{string} for the -longest leftmost substring matched by the regular expression, -@var{regexp}. It returns the character position, or @dfn{index}, -where that substring begins (one, if it starts at the beginning of -@var{string}). If no match is found, it returns zero. - -The order of the first two arguments is backwards from most other string -functions that work with regular expressions, such as -@code{sub} and @code{gsub}. It might help to remember that -for @code{match}, the order is the same as for the @samp{~} operator: -@samp{@var{string} ~ @var{regexp}}. - -@cindex @code{RSTART} variable -@cindex @code{RLENGTH} variable -The @code{match} function sets the built-in variable @code{RSTART} to -the index. It also sets the built-in variable @code{RLENGTH} to the -length in characters of the matched substring. If no match is found, -@code{RSTART} is set to zero, and @code{RLENGTH} to @minus{}1. - -For example: - -@example -@c file eg/misc/findpat.awk -@{ - if ($1 == "FIND") - regex = $2 - else @{ - where = match($0, regex) - if (where != 0) - print "Match of", regex, "found at", - where, "in", $0 - @} -@} -@c endfile -@end example - -@noindent -This program looks for lines that match the regular expression stored in -the variable @code{regex}. This regular expression can be changed. If the -first word on a line is @samp{FIND}, @code{regex} is changed to be the -second word on that line. Therefore, if given: - -@example -@c file eg/misc/findpat.data -FIND ru+n -My program runs -but not very quickly -FIND Melvin -JF+KM -This line is property of Reality Engineering Co. -Melvin was here. -@c endfile -@end example - -@noindent -@command{awk} prints: - -@example -Match of ru+n found at 12 in My program runs -Match of Melvin found at 1 in Melvin was here. -@end example - -@cindex differences between @command{gawk} and @command{awk} -If @var{array} is present, it is cleared, and then the 0'th element -of @var{array} is set to the entire portion of @var{string} -matched by @var{regexp}. If @var{regexp} contains parentheses, -the integer-indexed elements of @var{array} are set to contain the -portion of @var{string} matching the corresponding parenthesized -sub-expression. -For example: - -@example -$ echo foooobazbarrrrr | -> gawk '@{ match($0, /(fo+).+(ba*r)/, arr) -> print arr[1], arr[2] @}' -@print{} foooo barrrrr -@end example - -@cindex fatal errors -The @var{array} argument to @code{match} is a -@command{gawk} extension. In compatibility mode -(@pxref{Options, ,Command-Line Options}), -using a third argument is a fatal error. - -@item split(@var{string}, @var{array} @r{[}, @var{fieldsep}@r{]}) -@cindex @code{split} built-in function -This function divides @var{string} into pieces separated by @var{fieldsep}, -and stores the pieces in @var{array}. The first piece is stored in -@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so -forth. The string value of the third argument, @var{fieldsep}, is -a regexp describing where to split @var{string} (much as @code{FS} can -be a regexp describing where to split input records). If -the @var{fieldsep} is omitted, the value of @code{FS} is used. -@code{split} returns the number of elements created. -If @var{string} does not match @var{fieldsep}, @var{array} is empty -and @code{split} returns zero. - -The @code{split} function splits strings into pieces in a -manner similar to the way input lines are split into fields. For example: - -@example -split("cul-de-sac", a, "-") -@end example - -@noindent -splits the string @samp{cul-de-sac} into three fields using @samp{-} as the -separator. It sets the contents of the array @code{a} as follows: - -@example -a[1] = "cul" -a[2] = "de" -a[3] = "sac" -@end example - -@noindent -The value returned by this call to @code{split} is three. - -@cindex differences between @command{gawk} and @command{awk} -As with input field-splitting, when the value of @var{fieldsep} is -@w{@code{" "}}, leading and trailing whitespace is ignored and the elements -are separated by runs of whitespace. -Also as with input field-splitting, if @var{fieldsep} is the null string, each -individual character in the string is split into its own array element. -(This is a @command{gawk}-specific extension.) - -@cindex dark corner -Modern implementations of @command{awk}, including @command{gawk}, allow -the third argument to be a regexp constant (@code{/abc/}) as well as a -string. -@value{DARKCORNER} -The POSIX standard allows this as well. - -Before splitting the string, @code{split} deletes any previously existing -elements in the array @var{array}. -If @var{string} does not match @var{fieldsep} at all, @var{array} has -one element only. The value of that element is the original @var{string}. - -@item sprintf(@var{format}, @var{expression1}, @dots{}) -@cindex @code{sprintf} built-in function -This returns (without printing) the string that @code{printf} would -have printed out with the same arguments -(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}). -For example: - -@example -pival = sprintf("pi = %.2f (approx.)", 22/7) -@end example - -@noindent -assigns the string @w{@code{"pi = 3.14 (approx.)"}} to the variable @code{pival}. - -@cindex @code{strtonum} built-in function -@item strtonum(@var{str}) # -Examines @var{str} and returns its numeric value. If @var{str} -begins with a leading @samp{0}, @code{strtonum} assumes that @var{str} -is an octal number. If @var{str} begins with a leading @samp{0x} or -@samp{0X}, @code{strtonum} assumes that @var{str} is a hexadecimal number. -For example: - -@example -$ echo 0x11 | -> gawk '@{ printf "%d\n", strtonum($1) @}' -@print{} 17 -@end example - -Using the @code{strtonum} function is @emph{not} the same as adding zero -to a string value; the automatic coercion of strings to numbers -works only for decimal data, not for octal or hexadecimal.@footnote{Unless -you use the @option{--non-decimal-data} option, which isn't recommended. -@xref{Non-decimal Data, ,Allowing Non-Decimal Input Data}, for more information.} - -@cindex differences between @command{gawk} and @command{awk} -@code{strtonum} is a @command{gawk} extension; it is not available -in compatibility mode (@pxref{Options, ,Command-Line Options}). - -@item sub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]}) -@cindex @code{sub} built-in function -The @code{sub} function alters the value of @var{target}. -It searches this value, which is treated as a string, for the -leftmost longest substring matched by the regular expression @var{regexp}. -Then the entire string is -changed by replacing the matched text with @var{replacement}. -The modified string becomes the new value of @var{target}. - -This function is peculiar because @var{target} is not simply -used to compute a value, and not just any expression will do---it -must be a variable, field, or array element so that @code{sub} can -store a modified value there. If this argument is omitted, then the -default is to use and alter @code{$0}. -For example: - -@example -str = "water, water, everywhere" -sub(/at/, "ith", str) -@end example - -@noindent -sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the -leftmost longest occurrence of @samp{at} with @samp{ith}. - -The @code{sub} function returns the number of substitutions made (either -one or zero). - -If the special character @samp{&} appears in @var{replacement}, it -stands for the precise substring that was matched by @var{regexp}. (If -the regexp can match more than one string, then this precise substring -may vary.) For example: - -@example -@{ sub(/candidate/, "& and his wife"); print @} -@end example - -@noindent -changes the first occurrence of @samp{candidate} to @samp{candidate -and his wife} on each input line. -Here is another example: - -@example -$ awk 'BEGIN @{ -> str = "daabaaa" -> sub(/a+/, "C&C", str) -> print str -> @}' -@print{} dCaaCbaaa -@end example - -@noindent -This shows how @samp{&} can represent a non-constant string and also -illustrates the ``leftmost, longest'' rule in regexp matching -(@pxref{Leftmost Longest, ,How Much Text Matches?}). - -The effect of this special character (@samp{&}) can be turned off by putting a -backslash before it in the string. As usual, to insert one backslash in -the string, you must write two backslashes. Therefore, write @samp{\\&} -in a string constant to include a literal @samp{&} in the replacement. -For example, following is shown how to replace the first @samp{|} on each line with -an @samp{&}: - -@example -@{ sub(/\|/, "\\&"); print @} -@end example - -@cindex @code{sub}, third argument of -@cindex @code{gsub}, third argument of -As mentioned, the third argument to @code{sub} must -be a variable, field or array reference. -Some versions of @command{awk} allow the third argument to -be an expression that is not an lvalue. In such a case, @code{sub} -still searches for the pattern and returns zero or one, but the result of -the substitution (if any) is thrown away because there is no place -to put it. Such versions of @command{awk} accept expressions -such as the following: - -@example -sub(/USA/, "United States", "the USA and Canada") -@end example - -@noindent -@cindex fatal errors -For historical compatibility, @command{gawk} accepts erroneous code, -such as in the previous example. However, using any other non-changeable -object as the third parameter causes a fatal error and your program -will not run. - -Finally, if the @var{regexp} is not a regexp constant, it is converted into a -string, and then the value of that string is treated as the regexp to match. - -@item gsub(@var{regexp}, @var{replacement} @r{[}, @var{target}@r{]}) -@cindex @code{gsub} built-in function -This is similar to the @code{sub} function, except @code{gsub} replaces -@emph{all} of the longest, leftmost, @emph{non-overlapping} matching -substrings it can find. The @samp{g} in @code{gsub} stands for -``global,'' which means replace everywhere. For example: - -@example -@{ gsub(/Britain/, "United Kingdom"); print @} -@end example - -@noindent -replaces all occurrences of the string @samp{Britain} with @samp{United -Kingdom} for all input records. - -The @code{gsub} function returns the number of substitutions made. If -the variable to search and alter (@var{target}) is -omitted, then the entire input record (@code{$0}) is used. -As in @code{sub}, the characters @samp{&} and @samp{\} are special, -and the third argument must be assignable. - -@item gensub(@var{regexp}, @var{replacement}, @var{how} @r{[}, @var{target}@r{]}) # -@cindex @code{gensub} built-in function -@code{gensub} is a general substitution function. Like @code{sub} and -@code{gsub}, it searches the target string @var{target} for matches of -the regular expression @var{regexp}. Unlike @code{sub} and @code{gsub}, -the modified string is returned as the result of the function and the -original target string is @emph{not} changed. If @var{how} is a string -beginning with @samp{g} or @samp{G}, then it replaces all matches of -@var{regexp} with @var{replacement}. Otherwise, @var{how} is treated -as a number that indicates which match of @var{regexp} to replace. If -no @var{target} is supplied, @code{$0} is used. - -@code{gensub} provides an additional feature that is not available -in @code{sub} or @code{gsub}: the ability to specify components of a -regexp in the replacement text. This is done by using parentheses in -the regexp to mark the components and then specifying @samp{\@var{N}} -in the replacement text, where @var{N} is a digit from 1 to 9. -For example: - -@example -$ gawk ' -> BEGIN @{ -> a = "abc def" -> b = gensub(/(.+) (.+)/, "\\2 \\1", "g", a) -> print b -> @}' -@print{} def abc -@end example - -@noindent -As with @code{sub}, you must type two backslashes in order -to get one into the string. - -In the replacement text, the sequence @samp{\0} represents the entire -matched text, as does the character @samp{&}. - -The following example shows how you can use the third argument to control -which match of the regexp should be changed: - -@example -$ echo a b c a b c | -> gawk '@{ print gensub(/a/, "AA", 2) @}' -@print{} a b c AA b c -@end example - -In this case, @code{$0} is used as the default target string. -@code{gensub} returns the new string as its result, which is -passed directly to @code{print} for printing. - -@cindex automatic warnings -@cindex warnings, automatic -If the @var{how} argument is a string that does not begin with @samp{g} or -@samp{G}, or if it is a number that is less than or equal to zero, only one -substitution is performed. If @var{how} is zero, @command{gawk} issues -a warning message. - -If @var{regexp} does not match @var{target}, @code{gensub}'s return value -is the original unchanged value of @var{target}. - -@cindex differences between @command{gawk} and @command{awk} -@code{gensub} is a @command{gawk} extension; it is not available -in compatibility mode (@pxref{Options, ,Command-Line Options}). - -@item substr(@var{string}, @var{start} @r{[}, @var{length}@r{]}) -@cindex @code{substr} built-in function -This returns a @var{length}-character-long substring of @var{string}, -starting at character number @var{start}. The first character of a -string is character number one.@footnote{This is different from -C and C++, where the first character is number zero.} -For example, @code{substr("washington", 5, 3)} returns @code{"ing"}. - -If @var{length} is not present, this function returns the whole suffix of -@var{string} that begins at character number @var{start}. For example, -@code{substr("washington", 5)} returns @code{"ington"}. The whole -suffix is also returned -if @var{length} is greater than the number of characters remaining -in the string, counting from character number @var{start}. - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -The string returned by @code{substr} @emph{cannot} be -assigned. Thus, it is a mistake to attempt to change a portion of -a string, as shown in the following example: - -@example -string = "abcdef" -# try to get "abCDEf", won't work -substr(string, 3, 3) = "CDE" -@end example - -@noindent -It is also a mistake to use @code{substr} as the third argument -of @code{sub} or @code{gsub}: - -@example -gsub(/xyz/, "pdq", substr($0, 5, 20)) # WRONG -@end example - -@cindex portability issues -(Some commercial versions of @command{awk} do in fact let you use -@code{substr} this way, but doing so is not portable.) - -If you need to replace bits and pieces of a string, combine @code{substr} -with string concatenation, in the following manner: - -@example -string = "abcdef" -@dots{} -string = substr(string, 1, 2) "CDE" substr(string, 6) -@end example - -@cindex case conversion -@cindex conversion of case -@item tolower(@var{string}) -@cindex @code{tolower} built-in function -This returns a copy of @var{string}, with each uppercase character -in the string replaced with its corresponding lowercase character. -Non-alphabetic characters are left unchanged. For example, -@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}. - -@item toupper(@var{string}) -@cindex @code{toupper} built-in function -This returns a copy of @var{string}, with each lowercase character -in the string replaced with its corresponding uppercase character. -Non-alphabetic characters are left unchanged. For example, -@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}. -@end table - -@node Gory Details, , String Functions, String Functions -@subsubsection More About @samp{\} and @samp{&} with @code{sub}, @code{gsub}, and @code{gensub} - -@cindex escape processing, @code{sub} et. al. -@cindex @code{sub}, escape processing -@cindex @code{gsub}, escape processing -@cindex @code{gensub}, escape processing -When using @code{sub}, @code{gsub}, or @code{gensub}, and trying to get literal -backslashes and ampersands into the replacement text, you need to remember -that there are several levels of @dfn{escape processing} going on. - -First, there is the @dfn{lexical} level, which is when @command{awk} reads -your program -and builds an internal copy of it that can be executed. -Then there is the runtime level, which is when @command{awk} actually scans the -replacement string to determine what to generate. - -At both levels, @command{awk} looks for a defined set of characters that -can come after a backslash. At the lexical level, it looks for the -escape sequences listed in @ref{Escape Sequences}. -Thus, for every @samp{\} that @command{awk} processes at the runtime -level, type two backslashes at the lexical level. -When a character that is not valid for an escape sequence follows the -@samp{\}, Unix @command{awk} and @command{gawk} both simply remove the initial -@samp{\} and put the next character into the string. Thus, for -example, @code{"a\qb"} is treated as @code{"aqb"}. - -At the runtime level, the various functions handle sequences of -@samp{\} and @samp{&} differently. The situation is (sadly) somewhat complex. -Historically, the @code{sub} and @code{gsub} functions treated the two -character sequence @samp{\&} specially; this sequence was replaced in -the generated text with a single @samp{&}. Any other @samp{\} within -the @var{replacement} string that did not precede an @samp{&} was passed -through unchanged. To illustrate with a table: - -@c Thank to Karl Berry for help with the TeX stuff. -@tex -\vbox{\bigskip -% This table has lots of &'s and \'s, so unspecialize them. -\catcode`\& = \other \catcode`\\ = \other -% But then we need character for escape and tab. -@catcode`! = 4 -@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr - You type!@code{sub} sees!@code{sub} generates@cr -@hrulefill!@hrulefill!@hrulefill@cr - @code{\&}! @code{&}!the matched text@cr - @code{\\&}! @code{\&}!a literal @samp{&}@cr - @code{\\\&}! @code{\&}!a literal @samp{&}@cr -@code{\\\\&}! @code{\\&}!a literal @samp{\&}@cr -@code{\\\\\&}! @code{\\&}!a literal @samp{\&}@cr -@code{\\\\\\&}! @code{\\\&}!a literal @samp{\\&}@cr - @code{\\q}! @code{\q}!a literal @samp{\q}@cr -} -@bigskip} -@end tex -@ifnottex -@display - You type @code{sub} sees @code{sub} generates - -------- ---------- --------------- - @code{\&} @code{&} the matched text - @code{\\&} @code{\&} a literal @samp{&} - @code{\\\&} @code{\&} a literal @samp{&} - @code{\\\\&} @code{\\&} a literal @samp{\&} - @code{\\\\\&} @code{\\&} a literal @samp{\&} -@code{\\\\\\&} @code{\\\&} a literal @samp{\\&} - @code{\\q} @code{\q} a literal @samp{\q} -@end display -@end ifnottex - -@noindent -This table shows both the lexical-level processing, where -an odd number of backslashes becomes an even number at the runtime level, -as well as the runtime processing done by @code{sub}. -(For the sake of simplicity, the rest of the tables below only show the -case of even numbers of backslashes entered at the lexical level.) - -The problem with the historical approach is that there is no way to get -a literal @samp{\} followed by the matched text. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -The 1992 POSIX standard attempted to fix this problem. The standard -says that @code{sub} and @code{gsub} look for either a @samp{\} or an @samp{&} -after the @samp{\}. If either one follows a @samp{\}, that character is -output literally. The interpretation of @samp{\} and @samp{&} then becomes: - -@c thanks to Karl Berry for formatting this table -@tex -\vbox{\bigskip -% This table has lots of &'s and \'s, so unspecialize them. -\catcode`\& = \other \catcode`\\ = \other -% But then we need character for escape and tab. -@catcode`! = 4 -@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr - You type!@code{sub} sees!@code{sub} generates@cr -@hrulefill!@hrulefill!@hrulefill@cr - @code{&}! @code{&}!the matched text@cr - @code{\\&}! @code{\&}!a literal @samp{&}@cr -@code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text@cr -@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr -} -@bigskip} -@end tex -@ifnottex -@display - You type @code{sub} sees @code{sub} generates - -------- ---------- --------------- - @code{&} @code{&} the matched text - @code{\\&} @code{\&} a literal @samp{&} - @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text -@code{\\\\\\&} @code{\\\&} a literal @samp{\&} -@end display -@end ifnottex - -@noindent -This appears to solve the problem. -Unfortunately, the phrasing of the standard is unusual. It -says, in effect, that @samp{\} turns off the special meaning of any -following character, but for anything other than @samp{\} and @samp{&}, -such special meaning is undefined. This wording leads to two problems: - -@itemize @bullet -@item -Backslashes must now be doubled in the @var{replacement} string, breaking -historical @command{awk} programs. - -@item -To make sure that an @command{awk} program is portable, @emph{every} character -in the @var{replacement} string must be preceded with a -backslash.@footnote{This consequence was certainly unintended.} -@c I can say that, 'cause I was involved in making this change -@end itemize - -The POSIX standard is under revision. -Because of the problems just listed, proposed text for the revised standard -reverts to rules that correspond more closely to the original existing -practice. The proposed rules have special cases that make it possible -to produce a @samp{\} preceding the matched text: - -@tex -\vbox{\bigskip -% This table has lots of &'s and \'s, so unspecialize them. -\catcode`\& = \other \catcode`\\ = \other -% But then we need character for escape and tab. -@catcode`! = 4 -@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr - You type!@code{sub} sees!@code{sub} generates@cr -@hrulefill!@hrulefill!@hrulefill@cr -@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr -@code{\\\\&}! @code{\\&}!a literal @samp{\}, followed by the matched text@cr - @code{\\&}! @code{\&}!a literal @samp{&}@cr - @code{\\q}! @code{\q}!a literal @samp{\q}@cr -} -@bigskip} -@end tex -@ifinfo -@display - You type @code{sub} sees @code{sub} generates - -------- ---------- --------------- -@code{\\\\\\&} @code{\\\&} a literal @samp{\&} - @code{\\\\&} @code{\\&} a literal @samp{\}, followed by the matched text - @code{\\&} @code{\&} a literal @samp{&} - @code{\\q} @code{\q} a literal @samp{\q} -@end display -@end ifinfo - -In a nutshell, at the runtime level, there are now three special sequences -of characters (@samp{\\\&}, @samp{\\&} and @samp{\&}) whereas historically -there was only one. However, as in the historical case, any @samp{\} that -is not part of one of these three sequences is not special and appears -in the output literally. - -@command{gawk} 3.0 and 3.1 follow these proposed POSIX rules for @code{sub} and -@code{gsub}. -@c As much as we think it's a lousy idea. You win some, you lose some. Sigh. -Whether these proposed rules will actually become codified into the -standard is unknown at this point. Subsequent @command{gawk} releases will -track the standard and implement whatever the final version specifies; -this @value{DOCUMENT} will be updated as -well.@footnote{As this @value{DOCUMENT} was being finalized, -we learned that the POSIX standard will not use these rules. -However, it was too late to change @command{gawk} for the 3.1 release. -@command{gawk} behaves as described here.} - -The rules for @code{gensub} are considerably simpler. At the runtime -level, whenever @command{gawk} sees a @samp{\}, if the following character -is a digit, then the text that matched the corresponding parenthesized -subexpression is placed in the generated output. Otherwise, -no matter what the character after the @samp{\} is, it -appears in the generated text and the @samp{\} does not: - -@tex -\vbox{\bigskip -% This table has lots of &'s and \'s, so unspecialize them. -\catcode`\& = \other \catcode`\\ = \other -% But then we need character for escape and tab. -@catcode`! = 4 -@halign{@hfil#!@qquad@hfil#!@qquad#@hfil@cr - You type!@code{gensub} sees!@code{gensub} generates@cr -@hrulefill!@hrulefill!@hrulefill@cr - @code{&}! @code{&}!the matched text@cr - @code{\\&}! @code{\&}!a literal @samp{&}@cr - @code{\\\\}! @code{\\}!a literal @samp{\}@cr - @code{\\\\&}! @code{\\&}!a literal @samp{\}, then the matched text@cr -@code{\\\\\\&}! @code{\\\&}!a literal @samp{\&}@cr - @code{\\q}! @code{\q}!a literal @samp{q}@cr -} -@bigskip} -@end tex -@ifnottex -@display - You type @code{gensub} sees @code{gensub} generates - -------- ------------- ------------------ - @code{&} @code{&} the matched text - @code{\\&} @code{\&} a literal @samp{&} - @code{\\\\} @code{\\} a literal @samp{\} - @code{\\\\&} @code{\\&} a literal @samp{\}, then the matched text -@code{\\\\\\&} @code{\\\&} a literal @samp{\&} - @code{\\q} @code{\q} a literal @samp{q} -@end display -@end ifnottex - -Because of the complexity of the lexical and runtime level processing -and the special cases for @code{sub} and @code{gsub}, -we recommend the use of @command{gawk} and @code{gensub} when you have -to do substitutions. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Matching the Null String -@cindex advanced notes -@cindex matching, the null string - -In @command{awk}, the @samp{*} operator can match the null string. -This is particularly important for the @code{sub}, @code{gsub}, -and @code{gensub} functions. For example: - -@example -$ echo abc | awk '@{ gsub(/m*/, "X"); print @}' -@print{} XaXbXcX -@end example - -@noindent -Although this makes a certain amount of sense, it can be surprising. - -@node I/O Functions, Time Functions, String Functions, Built-in -@subsection Input/Output Functions - -The following functions relate to Input/Output (I/O). -Optional parameters are enclosed in square brackets ([ and ]): - -@table @code -@item close(@var{filename} @r{[}, @var{how}@r{]}) -@cindex @code{close} built-in function -Close the file @var{filename} for input or output. Alternatively, the -argument may be a shell command that was used for creating a coprocess, or -for redirecting to or from a pipe; then the coprocess or pipe is closed. -@xref{Close Files And Pipes, ,Closing Input and Output Redirections}, -for more information. - -When closing a coprocess, it is occasionally useful to first close -one end of the two-way pipe, and then to close the other. This is done -by providing a second argument to @code{close}. This second argument -should be one of the two string values @code{"to"} or @code{"from"}, -indicating which end of the pipe to close. Case in the string does -not matter. -@xref{Two-way I/O, ,Two-Way Communications with Another Process}, -which discusses this feature in more detail and gives an example. - -@item fflush(@r{[}@var{filename}@r{]}) -@cindex @code{fflush} built-in function -@cindex portability issues -@cindex flushing buffers -@cindex buffers, flushing -@cindex buffering output -@cindex output, buffering -Flush any buffered output associated with @var{filename}, which is either a -file opened for writing or a shell command for redirecting output to -a pipe or coprocess. - -Many utility programs @dfn{buffer} their output; i.e., they save information -to write to a disk file or terminal in memory, until there is enough -for it to be worthwhile to send the data to the output device. -This is often more efficient than writing -every little bit of information as soon as it is ready. However, sometimes -it is necessary to force a program to @dfn{flush} its buffers; that is, -write the information to its destination, even if a buffer is not full. -This is the purpose of the @code{fflush} function---@command{gawk} also -buffers its output and the @code{fflush} function forces -@command{gawk} to flush its buffers. - -@code{fflush} was added to the Bell Laboratories research -version of @command{awk} in 1994; it is not part of the POSIX standard and is -not available if @option{--posix} has been specified on the -command line (@pxref{Options, ,Command-Line Options}). - -@command{gawk} extends the @code{fflush} function in two ways. The first -is to allow no argument at all. In this case, the buffer for the -standard output is flushed. The second is to allow the null string -(@w{@code{""}}) as the argument. In this case, the buffers for -@emph{all} open output files and pipes are flushed. - -@cindex automatic warnings -@cindex warnings, automatic -@code{fflush} returns zero if the buffer is successfully flushed; -otherwise it returns @minus{}1. -In the case where all buffers are flushed, the return value is zero -only if all buffers were flushed successfully. Otherwise, it is -@minus{}1, and @command{gawk} warns about the @var{filename} that had the problem. - -@command{gawk} also issues a warning message if you attempt to flush -a file or pipe that was opened for reading (such as with @code{getline}), -or if @var{filename} is not an open file, pipe, or coprocess. -In such a case, @code{fflush} returns @minus{}1 as well. - -@item system(@var{command}) -@cindex @code{system} built-in function -@cindex interaction, @command{awk} and other programs -The @code{system} function allows the user to execute operating system -commands and then return to the @command{awk} program. The @code{system} -function executes the command given by the string @var{command}. -It returns the status returned by the command that was executed as -its value. - -For example, if the following fragment of code is put in your @command{awk} -program: - -@example -END @{ - system("date | mail -s 'awk run done' root") -@} -@end example - -@noindent -the system administrator is sent mail when the @command{awk} program -finishes processing input and begins its end-of-input processing. - -Note that redirecting @code{print} or @code{printf} into a pipe is often -enough to accomplish your task. If you need to run many commands, it -is more efficient to simply print them down a pipeline to the shell: - -@example -while (@var{more stuff to do}) - print @var{command} | "/bin/sh" -close("/bin/sh") -@end example - -@noindent -@cindex fatal errors -However, if your @command{awk} -program is interactive, @code{system} is useful for cranking up large -self-contained programs, such as a shell or an editor. -Some operating systems cannot implement the @code{system} function. -@code{system} causes a fatal error if it is not supported. -@end table - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Interactive Versus Non-Interactive Buffering -@cindex advanced notes -@cindex buffering, interactive vs. non-interactive -@cindex buffering, non-interactive vs. interactive -@cindex interactive buffering vs. non-interactive -@cindex non-interactive buffering vs. interactive - -As a side point, buffering issues can be even more confusing, depending -upon whether your program is @dfn{interactive}; i.e., communicating -with a user sitting at a keyboard.@footnote{A program is interactive -if the standard output is connected -to a terminal device.} - -@c Thanks to Walter.Mecky@dresdnerbank.de for this example, and for -@c motivating me to write this section. -Interactive programs generally @dfn{line buffer} their output; i.e., they -write out every line. Non-interactive programs wait until they have -a full buffer, which may be many lines of output. -Here is an example of the difference: - -@example -$ awk '@{ print $1 + $2 @}' -1 1 -@print{} 2 -2 3 -@print{} 5 -@kbd{Ctrl-d} -@end example - -@noindent -Each line of output is printed immediately. Compare that behavior -with this example: - -@example -$ awk '@{ print $1 + $2 @}' | cat -1 1 -2 3 -@kbd{Ctrl-d} -@print{} 2 -@print{} 5 -@end example - -@noindent -Here, no output is printed until after the @kbd{Ctrl-d} is typed, because -it is all buffered and sent down the pipe to @command{cat} in one shot. - -@c fakenode --- for prepinfo -@subheading Advanced Notes: Controlling Output Buffering with @code{system} -@cindex advanced notes -@cindex flushing buffers -@cindex buffers, flushing -@cindex buffering output -@cindex output, buffering - -The @code{fflush} function provides explicit control over output buffering for -individual files and pipes. However, its use is not portable to many other -@command{awk} implementations. An alternative method to flush output -buffers is to call @code{system} with a null string as its argument: - -@example -system("") # flush output -@end example - -@noindent -@command{gawk} treats this use of the @code{system} function as a special -case and is smart enough not to run a shell (or other command -interpreter) with the empty command. Therefore, with @command{gawk}, this -idiom is not only useful, it is also efficient. While this method should work -with other @command{awk} implementations, it does not necessarily avoid -starting an unnecessary shell. (Other implementations may only -flush the buffer associated with the standard output and not necessarily -all buffered output.) - -If you think about what a programmer expects, it makes sense that -@code{system} should flush any pending output. The following program: - -@example -BEGIN @{ - print "first print" - system("echo system echo") - print "second print" -@} -@end example - -@noindent -must print: - -@example -first print -system echo -second print -@end example - -@noindent -and not: - -@example -system echo -first print -second print -@end example - -If @command{awk} did not flush its buffers before calling @code{system}, the -latter (undesirable) output is what you see. - -@node Time Functions, Bitwise Functions, I/O Functions, Built-in -@subsection Using @command{gawk}'s Timestamp Functions - -@cindex timestamps -@cindex time of day -A common use for @command{awk} programs is the processing of log files -containing timestamp information, indicating when a -particular log record was written. Many programs log their timestamp -in the form returned by the @code{time} system call, which is the -number of seconds since a particular epoch. On POSIX-compliant systems, -it is the number of seconds since -1970-01-01 00:00:00 UTC, not counting leap seconds.@footnote{@xref{Glossary}, -especially the entries for ``Epoch'' and ``UTC.''} -All known POSIX-compliant systems support timestamps from 0 through -@math{2^31 - 1}, which is sufficient to represent times through -2038-01-19 03:14:07 UTC. Many systems support a wider range of timestamps, -including negative timestamps that represent times before the -epoch. - -In order to make it easier to process such log files and to produce -useful reports, @command{gawk} provides the following functions for -working with timestamps. They are @command{gawk} extensions; they are -not specified in the POSIX standard, nor are they in any other known -version of @command{awk}.@footnote{The GNU @command{date} utility can -also do many of the things described here. It's use may be preferable -for simple time-related operations in shell scripts.} -Optional parameters are enclosed in square brackets ([ and ]): - -@table @code -@item systime() -@cindex @code{systime} built-in function -This function returns the current time as the number of seconds since -the system epoch. On POSIX systems, this is the number of seconds -since 1970-01-01 00:00:00 UTC, not counting leap seconds. -It may be a different number on -other systems. - -@item mktime(@var{datespec}) -@cindex @code{mktime} built-in function -This function turns @var{datespec} into a timestamp in the same form -as is returned by @code{systime}. It is similar to the function of the -same name in ISO C. The argument, @var{datespec}, is a string of the form -@w{@code{"@var{YYYY} @var{MM} @var{DD} @var{HH} @var{MM} @var{SS} [@var{DST}]"}}. -The string consists of six or seven numbers representing, respectively, -the full year including century, the month from 1 to 12, the day of the month -from 1 to 31, the hour of the day from 0 to 23, the minute from 0 to -59, the second from 0 to 60,@footnote{Occasionally there are -minutes in a year with a leap second, which is why the -seconds can go up to 60.} -and an optional daylight savings flag. - -The values of these numbers need not be within the ranges specified; -for example, an hour of @minus{}1 means 1 hour before midnight. -The origin-zero Gregorian calendar is assumed, with year 0 preceding -year 1 and year @minus{}1 preceding year 0. -The time is assumed to be in the local timezone. -If the daylight savings flag is positive, the time is assumed to be -daylight savings time; if zero, the time is assumed to be standard -time; and if negative (the default), @code{mktime} attempts to determine -whether daylight savings time is in effect for the specified time. - -If @var{datespec} does not contain enough elements or if the resulting time -is out of range, @code{mktime} returns @minus{}1. - -@item strftime(@r{[}@var{format} @r{[}, @var{timestamp}@r{]]}) -@cindex @code{strftime} built-in function -This function returns a string. It is similar to the function of the -same name in ISO C. The time specified by @var{timestamp} is used to -produce a string, based on the contents of the @var{format} string. -The @var{timestamp} is in the same format as the value returned by the -@code{systime} function. If no @var{timestamp} argument is supplied, -@command{gawk} uses the current time of day as the timestamp. -If no @var{format} argument is supplied, @code{strftime} uses -@code{@w{"%a %b %d %H:%M:%S %Z %Y"}}. This format string produces -output that is (almost) equivalent to that of the @command{date} utility. -(Versions of @command{gawk} prior to 3.0 require the @var{format} argument.) -@end table - -The @code{systime} function allows you to compare a timestamp from a -log file with the current time of day. In particular, it is easy to -determine how long ago a particular record was logged. It also allows -you to produce log records using the ``seconds since the epoch'' format. - -@cindex converting dates to timestamps -@cindex dates, converting to timestamps -@cindex timestamps, converting from dates -The @code{mktime} function allows you to convert a textual representation -of a date and time into a timestamp. This makes it easy to do before/after -comparisons of dates and times, particularly when dealing with date and -time data coming from an external source, such as a log file. - -The @code{strftime} function allows you to easily turn a timestamp -into human-readable information. It is similar in nature to the @code{sprintf} -function -(@pxref{String Functions, ,String Manipulation Functions}), -in that it copies non-format specification characters verbatim to the -returned string, while substituting date and time values for format -specifications in the @var{format} string. - -@code{strftime} is guaranteed by the 1999 ISO C standard@footnote{As this -is a recent standard, not every system's @code{strftime} necessarily -supports all of the conversions listed here.} -to support the following date format specifications: - -@cindex format specifier, @code{strftime} -@table @code -@item %a -The locale's abbreviated weekday name. - -@item %A -The locale's full weekday name. - -@item %b -The locale's abbreviated month name. - -@item %B -The locale's full month name. - -@item %c -The locale's ``appropriate'' date and time representation. -(This is @samp{%A %B %d %T %Y} in the @code{"C"} locale.) - -@item %C -The century. This is the year divided by 100 and truncated to the next -lower integer. - -@item %d -The day of the month as a decimal number (01--31). - -@item %D -Equivalent to specifying @samp{%m/%d/%y}. - -@item %e -The day of the month, padded with a space if it is only one digit. - -@item %F -Equivalent to specifying @samp{%Y-%m-%d}. -This is the ISO 8601 date format. - -@item %g -The year modulo 100 of the ISO week number, as a decimal number (00--99). -For example, January 1, 1993, is in week 53 of 1992. Thus, the year -of its ISO week number is 1992, even though its year is 1993. -Similarly, December 31, 1973, is in week 1 of 1974. Thus, the year -of its ISO week number is 1974, even though its year is 1973. - -@item %G -The full year of the ISO week number, as a decimal number. - -@item %h -Equivalent to @samp{%b}. - -@item %H -The hour (24-hour clock) as a decimal number (00--23). - -@item %I -The hour (12-hour clock) as a decimal number (01--12). - -@item %j -The day of the year as a decimal number (001--366). - -@item %m -The month as a decimal number (01--12). - -@item %M -The minute as a decimal number (00--59). - -@item %n -A newline character (ASCII LF). - -@item %p -The locale's equivalent of the AM/PM designations associated -with a 12-hour clock. - -@item %r -The locale's 12-hour clock time. -(This is @samp{%I:%M:%S %p} in the @code{"C"} locale.) - -@item %R -Equivalent to specifying @samp{%H:%M}. - -@item %S -The second as a decimal number (00--60). - -@item %t -A tab character. - -@item %T -Equivalent to specifying @samp{%H:%M:%S}. - -@item %u -The weekday as a decimal number (1--7). Monday is day one. - -@item %U -The week number of the year (the first Sunday as the first day of week one) -as a decimal number (00--53). - -@cindex ISO 8601 -@item %V -The week number of the year (the first Monday as the first -day of week one) as a decimal number (01--53). -The method for determining the week number is as specified by ISO 8601. -(To wit: if the week containing January 1 has four or more days in the -new year, then it is week one, otherwise it is week 53 of the previous year -and the next week is week one.) - -@item %w -The weekday as a decimal number (0--6). Sunday is day zero. - -@item %W -The week number of the year (the first Monday as the first day of week one) -as a decimal number (00--53). - -@item %x -The locale's ``appropriate'' date representation. -(This is @samp{%A %B %d %Y} in the @code{"C"} locale.) - -@item %X -The locale's ``appropriate'' time representation. -(This is @samp{%T} in the @code{"C"} locale.) - -@item %y -The year modulo 100 as a decimal number (00--99). - -@item %Y -The full year as a decimal number (e.g., 1995). - -@cindex RFC 822 -@cindex RFC 1036 -@item %z -The timezone offset in a +HHMM format (e.g., the format necessary to -produce RFC 822/RFC 1036 date headers). - -@item %Z -The time zone name or abbreviation; no characters if -no time zone is determinable. - -@item %Ec %EC %Ex %EX %Ey %EY %Od %Oe %OH -@itemx %OI %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy -These are ``alternate representations'' for the specifications -that use only the second letter (@samp{%c}, @samp{%C}, -and so on).@footnote{If you don't understand any of this, don't worry about -it; these facilities are meant to make it easier to ``internationalize'' -programs. -Other internationalization features are described in -@ref{Internationalization, ,Internationalization with @command{gawk}}.} -(These facilitate compliance with the POSIX @command{date} utility.) - -@item %% -A literal @samp{%}. -@end table - -If a conversion specifier is not one of the above, the behavior is -undefined.@footnote{This is because ISO C leaves the -behavior of the C version of @code{strftime} undefined and @command{gawk} -uses the system's version of @code{strftime} if it's there. -Typically, the conversion specifier either does not appear in the -returned string or it appears literally.} - -@cindex locale, definition of -Informally, a @dfn{locale} is the geographic place in which a program -is meant to run. For example, a common way to abbreviate the date -September 4, 1991 in the United States is ``9/4/91.'' -In many countries in Europe, however, it is abbreviated ``4.9.91.'' -Thus, the @samp{%x} specification in a @code{"US"} locale might produce -@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce -@samp{4.9.91}. The ISO C standard defines a default @code{"C"} -locale, which is an environment that is typical of what most C programmers -are used to. - -A public-domain C version of @code{strftime} is supplied with @command{gawk} -for systems that are not yet fully standards-compliant. -It supports all of the just listed format specifications. -If that version is -used to compile @command{gawk} (@pxref{Installation, ,Installing @command{gawk}}), -then the following additional format specifications are available: - -@table @code -@item %k -The hour (24-hour clock) as a decimal number (0--23). -Single digit numbers are padded with a space. - -@item %l -The hour (12-hour clock) as a decimal number (1--12). -Single digit numbers are padded with a space. - -@item %N -The ``Emperor/Era'' name. -Equivalent to @code{%C}. - -@item %o -The ``Emperor/Era'' year. -Equivalent to @code{%y}. - -@item %s -The time as a decimal timestamp in seconds since the epoch. - -@item %v -The date in VMS format (e.g., @samp{20-JUN-1991}). -@end table - -Additionally, the alternate representations are recognized but their -normal representations are used. - -This example is an @command{awk} implementation of the POSIX -@command{date} utility. Normally, the @command{date} utility prints the -current date and time of day in a well-known format. However, if you -provide an argument to it that begins with a @samp{+}, @command{date} -copies non-format specifier characters to the standard output and -interprets the current time according to the format specifiers in -the string. For example: - -@example -$ date '+Today is %A, %B %d, %Y.' -@print{} Today is Thursday, September 14, 2000. -@end example - -Here is the @command{gawk} version of the @command{date} utility. -It has a shell ``wrapper'' to handle the @option{-u} option, -which requires that @command{date} run as if the time zone -is set to UTC: - -@example -#! /bin/sh -# -# date --- approximate the P1003.2 'date' command - -case $1 in --u) TZ=UTC0 # use UTC - export TZ - shift ;; -esac - -@c FIXME: One day, change %d to %e, when C 99 is common. -gawk 'BEGIN @{ - format = "%a %b %d %H:%M:%S %Z %Y" - exitval = 0 - - if (ARGC > 2) - exitval = 1 - else if (ARGC == 2) @{ - format = ARGV[1] - if (format ~ /^\+/) - format = substr(format, 2) # remove leading + - @} - print strftime(format) - exit exitval -@}' "$@@" -@end example - -@node Bitwise Functions, I18N Functions, Time Functions, Built-in -@subsection Using @command{gawk}'s Bit Manipulation Functions -@cindex bitwise operations -@quotation -@i{I can explain it for you, but I can't understand it for you.}@* -Anonymous -@end quotation - -@cindex AND bitwise operation -@cindex OR bitwise operation -@cindex XOR bitwise operation -Many languages provide the ability to perform @dfn{bitwise} operations -on two integer numbers. In other words, the operation is performed on -each successive pair of bits in the operands. -Three common operations are bitwise AND, OR, and XOR. -The operations are described by the following table: - -@ifnottex -@display - Bit Operator - | AND | OR | XOR - |---+---+---+---+---+--- -Operands | 0 | 1 | 0 | 1 | 0 | 1 -----------+---+---+---+---+---+--- - 0 | 0 0 | 0 1 | 0 1 - 1 | 0 1 | 1 1 | 1 0 -@end display -@end ifnottex -@tex -\centerline{ -\vbox{\bigskip % space above the table (about 1 linespace) -% Because we have vertical rules, we can't let TeX insert interline space -% in its usual way. -\offinterlineskip -\halign{\strut\hfil#\quad\hfil % operands - &\vrule#&\quad#\quad % rule, 0 (of and) - &\vrule#&\quad#\quad % rule, 1 (of and) - &\vrule# % rule between and and or - &\quad#\quad % 0 (of or) - &\vrule#&\quad#\quad % rule, 1 (of of) - &\vrule# % rule between or and xor - &\quad#\quad % 0 of xor - &\vrule#&\quad#\quad % rule, 1 of xor - \cr -&\omit&\multispan{11}\hfil\bf Bit operator\hfil\cr -\noalign{\smallskip} -& &\multispan3\hfil AND\hfil&&\multispan3\hfil OR\hfil - &&\multispan3\hfil XOR\hfil\cr -\bf Operands&&0&&1&&0&&1&&0&&1\cr -\noalign{\hrule} -\omit&height 2pt&&\omit&&&&\omit&&&&\omit\cr -\noalign{\hrule height0pt}% without this the rule does not extend; why? -0&&0&\omit&0&&0&\omit&1&&0&\omit&1\cr -1&&0&\omit&1&&1&\omit&1&&1&\omit&0\cr -}}} -@end tex - -@cindex bitwise complement -@cindex complement, bitwise -As you can see, the result of an AND operation is 1 only when @emph{both} -bits are 1. -The result of an OR operation is 1 if @emph{either} bit is 1. -The result of an XOR operation is 1 if either bit is 1, -but not both. -The next operation is the @dfn{complement}; the complement of 1 is 0 and -the complement of 0 is 1. Thus, this operation ``flips'' all the bits -of a given value. - -@cindex bitwise shift -@cindex left shift, bitwise -@cindex right shift, bitwise -@cindex shift, bitwise -Finally, two other common operations are to shift the bits left or right. -For example, if you have a bit string @samp{10111001} and you shift it -right by three bits, you end up with @samp{00010111}.@footnote{This example -shows that 0's come in on the left side. For @command{gawk}, this is -always true, but in some languages, it's possible to have the left side -fill with 1's. Caveat emptor.} -@c Purposely decided to use 0's and 1's here. 2/2001. -If you start over -again with @samp{10111001} and shift it left by three bits, you end up -with @samp{11001000}. -@command{gawk} provides built-in functions that implement the -bitwise operations just described. They are: - -@ignore -@table @code -@cindex @code{and} built-in function -@item and(@var{v1}, @var{v2}) -Return the bitwise AND of the values provided by @var{v1} and @var{v2}. - -@cindex @code{or} built-in function -@item or(@var{v1}, @var{v2}) -Return the bitwise OR of the values provided by @var{v1} and @var{v2}. - -@cindex @code{xor} built-in function -@item xor(@var{v1}, @var{v2}) -Return the bitwise XOR of the values provided by @var{v1} and @var{v2}. - -@cindex @code{compl} built-in function -@item compl(@var{val}) -Return the bitwise complement of @var{val}. - -@cindex @code{lshift} built-in function -@item lshift(@var{val}, @var{count}) -Return the value of @var{val}, shifted left by @var{count} bits. - -@cindex @code{rshift} built-in function -@item rshift(@var{val}, @var{count}) -Return the value of @var{val}, shifted right by @var{count} bits. -@end table -@end ignore - -@multitable {@code{rshift(@var{val}, @var{count})}} {Return the value of @var{val}, shifted right by @var{count} bits.} -@cindex @code{and} built-in function -@item @code{and(@var{v1}, @var{v2})} -@tab Return the bitwise AND of the values provided by @var{v1} and @var{v2}. - -@cindex @code{or} built-in function -@item @code{or(@var{v1}, @var{v2})} -@tab Return the bitwise OR of the values provided by @var{v1} and @var{v2}. - -@cindex @code{xor} built-in function -@item @code{xor(@var{v1}, @var{v2})} -@tab Return the bitwise XOR of the values provided by @var{v1} and @var{v2}. - -@cindex @code{compl} built-in function -@item @code{compl(@var{val})} -@tab Return the bitwise complement of @var{val}. - -@cindex @code{lshift} built-in function -@item @code{lshift(@var{val}, @var{count})} -@tab Return the value of @var{val}, shifted left by @var{count} bits. - -@cindex @code{rshift} built-in function -@item @code{rshift(@var{val}, @var{count})} -@tab Return the value of @var{val}, shifted right by @var{count} bits. -@end multitable - -For all of these functions, first the double-precision floating-point value is -converted to a C @code{unsigned long}, then the bitwise operation is -performed and then the result is converted back into a C @code{double}. (If -you don't understand this paragraph, don't worry about it.) - -Here is a user-defined function -(@pxref{User-defined, ,User-Defined Functions}) -that illustrates the use of these functions: - -@cindex @code{bits2str} user-defined function -@cindex @code{testbits.awk} program -@smallexample -@group -@c file eg/lib/bits2str.awk -# bits2str --- turn a byte into readable 1's and 0's - -function bits2str(bits, data, mask) -@{ - if (bits == 0) - return "0" - - mask = 1 - for (; bits != 0; bits = rshift(bits, 1)) - data = (and(bits, mask) ? "1" : "0") data - - while ((length(data) % 8) != 0) - data = "0" data - - return data -@} -@c endfile -@end group - -@c this is a hack to make testbits.awk self-contained -@ignore -@c file eg/prog/testbits.awk -# bits2str --- turn a byte into readable 1's and 0's - -function bits2str(bits, data, mask) -@{ - if (bits == 0) - return "0" - - mask = 1 - for (; bits != 0; bits = rshift(bits, 1)) - data = (and(bits, mask) ? "1" : "0") data - - while ((length(data) % 8) != 0) - data = "0" data - - return data -@} -@c endfile -@end ignore -@c file eg/prog/testbits.awk -BEGIN @{ - printf "123 = %s\n", bits2str(123) - printf "0123 = %s\n", bits2str(0123) - printf "0x99 = %s\n", bits2str(0x99) - comp = compl(0x99) - printf "compl(0x99) = %#x = %s\n", comp, bits2str(comp) - shift = lshift(0x99, 2) - printf "lshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift) - shift = rshift(0x99, 2) - printf "rshift(0x99, 2) = %#x = %s\n", shift, bits2str(shift) -@} -@c endfile -@end smallexample - -@noindent -This program produces the following output when run: - -@smallexample -$ gawk -f testbits.awk -@print{} 123 = 01111011 -@print{} 0123 = 01010011 -@print{} 0x99 = 10011001 -@print{} compl(0x99) = 0xffffff66 = 11111111111111111111111101100110 -@print{} lshift(0x99, 2) = 0x264 = 0000001001100100 -@print{} rshift(0x99, 2) = 0x26 = 00100110 -@end smallexample - -The @code{bits2str} function turns a binary number into a string. -The number @code{1} represents a binary value where the rightmost bit -is set to 1. Using this mask, -the function repeatedly checks the rightmost bit. -AND-ing the mask with the value indicates whether the -rightmost bit is 1 or not. If so, a @code{"1"} is concatenated onto the front -of the string. -Otherwise, a @code{"0"} is added. -The value is then shifted right by one bit and the loop continues -until there are no more 1 bits. - -If the initial value is zero it returns a simple @code{"0"}. -Otherwise, at the end, it pads the value with zeros to represent multiples -of eight-bit quantities. This is typical in modern computers. - -The main code in the @code{BEGIN} rule shows the difference between the -decimal and octal values for the same numbers -(@pxref{Non-decimal-numbers, ,Octal and Hexadecimal Numbers}), -and then demonstrates the -results of the @code{compl}, @code{lshift}, and @code{rshift} functions. - -@node I18N Functions, , Bitwise Functions, Built-in -@subsection Using @command{gawk}'s String Translation Functions - -@command{gawk} provides facilities for internationalizing @command{awk} programs. -These include the functions described in the following list. -The description here is purposely brief. -@xref{Internationalization, ,Internationalization with @command{gawk}}, -for the full story. -Optional parameters are enclosed in square brackets ([ and ]): - -@table @code -@cindex @code{dcgettext} built-in function -@item dcgettext(@var{string} @r{[}, @var{domain} @r{[}, @var{category}@r{]]}) -This function returns the translation of @var{string} in -text domain @var{domain} for locale category @var{category}. -The default value for @var{domain} is the current value of @code{TEXTDOMAIN}. -The default value for @var{category} is @code{"LC_MESSAGES"}. - -@cindex @code{bindtextdomain} built-in function -@item bindtextdomain(@var{directory} @r{[}, @var{domain}@r{]}) -This function allows you to specify the directory where -@command{gawk} will look for message translation files, in case they -will not or cannot be placed in the ``standard'' locations -(e.g., during testing). -It returns the directory where @var{domain} is ``bound.'' - -The default @var{domain} is the value of @code{TEXTDOMAIN}. -If @var{directory} is the null string (@code{""}), then -@code{bindtextdomain} returns the current binding for the -given @var{domain}. -@end table - -@node User-defined, , Built-in, Functions -@section User-Defined Functions - -@cindex user-defined functions -@cindex function, user-defined -Complicated @command{awk} programs can often be simplified by defining -your own functions. User-defined functions can be called just like -built-in ones (@pxref{Function Calls}), but it is up to you to define -them; i.e., to tell @command{awk} what they should do. - -@menu -* Definition Syntax:: How to write definitions and what they mean. -* Function Example:: An example function definition and what it - does. -* Function Caveats:: Things to watch out for. -* Return Statement:: Specifying the value a function returns. -* Dynamic Typing:: How variable types can change at runtime. -@end menu - -@node Definition Syntax, Function Example, User-defined, User-defined -@subsection Function Definition Syntax -@cindex defining functions -@cindex function definition - -Definitions of functions can appear anywhere between the rules of an -@command{awk} program. Thus, the general form of an @command{awk} program is -extended to include sequences of rules @emph{and} user-defined function -definitions. -There is no need to put the definition of a function -before all uses of the function. This is because @command{awk} reads the -entire program before starting to execute any of it. - -The definition of a function named @var{name} looks like this: -@c NEXT ED: put [ ] around parameter list - -@example -function @var{name}(@var{parameter-list}) -@{ - @var{body-of-function} -@} -@end example - -@cindex names, use of -@cindex namespace issues in @command{awk} -@noindent -@var{name} is the name of the function to define. A valid function -name is like a valid variable name: a sequence of letters, digits, and -underscores, that doesn't start with a digit. -Within a single @command{awk} program, any particular name can only be -used as a variable, array, or function. - -@c NEXT ED: parameter-list is an OPTIONAL list of ... -@var{parameter-list} is a list of the function's arguments and local -variable names, separated by commas. When the function is called, -the argument names are used to hold the argument values given in -the call. The local variables are initialized to the empty string. -A function cannot have two parameters with the same name, nor may it -have a parameter with the same name as the function itself. - -The @var{body-of-function} consists of @command{awk} statements. It is the -most important part of the definition, because it says what the function -should actually @emph{do}. The argument names exist to give the body a -way to talk about the arguments; local variables exist to give the body -places to keep temporary values. - -Argument names are not distinguished syntactically from local variable -names. Instead, the number of arguments supplied when the function is -called determines how many argument variables there are. Thus, if three -argument values are given, the first three names in @var{parameter-list} -are arguments and the rest are local variables. - -It follows that if the number of arguments is not the same in all calls -to the function, some of the names in @var{parameter-list} may be -arguments on some occasions and local variables on others. Another -way to think of this is that omitted arguments default to the -null string. - -@cindex conventions, programming -@cindex programming conventions -Usually when you write a function, you know how many names you intend to -use for arguments and how many you intend to use as local variables. It is -conventional to place some extra space between the arguments and -the local variables, in order to document how your function is supposed to be used. - -@cindex variable shadowing -During execution of the function body, the arguments and local variable -values hide or @dfn{shadow} any variables of the same names used in the -rest of the program. The shadowed variables are not accessible in the -function definition, because there is no way to name them while their -names have been taken away for the local variables. All other variables -used in the @command{awk} program can be referenced or set normally in the -function's body. - -The arguments and local variables last only as long as the function body -is executing. Once the body finishes, you can once again access the -variables that were shadowed while the function was running. - -@cindex recursive function -@cindex function, recursive -The function body can contain expressions that call functions. They -can even call this function, either directly or by way of another -function. When this happens, we say the function is @dfn{recursive}. -The act of a function calling itself is called @dfn{recursion}. - -@cindex @command{awk} language, POSIX version -@cindex POSIX @command{awk} -In many @command{awk} implementations, including @command{gawk}, -the keyword @code{function} may be -abbreviated @code{func}. However, POSIX only specifies the use of -the keyword @code{function}. This actually has some practical implications. -If @command{gawk} is in POSIX-compatibility mode -(@pxref{Options, ,Command-Line Options}), then the following -statement does @emph{not} define a function: - -@example -func foo() @{ a = sqrt($1) ; print a @} -@end example - -@noindent -Instead it defines a rule that, for each record, concatenates the value -of the variable @samp{func} with the return value of the function @samp{foo}. -If the resulting string is non-null, the action is executed. -This is probably not what is desired. (@command{awk} accepts this input as -syntactically valid, because functions may be used before they are defined -in @command{awk} programs.) -@c NEXT ED: This won't actually run, since foo() is undefined ... - -@cindex portability issues -To ensure that your @command{awk} programs are portable, always use the -keyword @code{function} when defining a function. - -@node Function Example, Function Caveats, Definition Syntax, User-defined -@subsection Function Definition Examples - -Here is an example of a user-defined function, called @code{myprint}, that -takes a number and prints it in a specific format: - -@example -function myprint(num) -@{ - printf "%6.3g\n", num -@} -@end example - -@noindent -To illustrate, here is an @command{awk} rule that uses our @code{myprint} -function: - -@example -$3 > 0 @{ myprint($3) @} -@end example - -@noindent -This program prints, in our special format, all the third fields that -contain a positive number in our input. Therefore, when given the following: - -@example - 1.2 3.4 5.6 7.8 - 9.10 11.12 -13.14 15.16 -17.18 19.20 21.22 23.24 -@end example - -@noindent -this program, using our function to format the results, prints: - -@example - 5.6 - 21.2 -@end example - -@page -This function deletes all the elements in an array: - -@example -function delarray(a, i) -@{ - for (i in a) - delete a[i] -@} -@end example - -When working with arrays, it is often necessary to delete all the elements -in an array and start over with a new list of elements -(@pxref{Delete, ,The @code{delete} Statement}). -Instead of having -to repeat this loop everywhere that you need to clear out -an array, your program can just call @code{delarray}. -(This guarantees portability. The use of @samp{delete @var{array}} to delete -the contents of an entire array is a non-standard extension.) - -The following is an example of a recursive function. It takes a string -as an input parameter and returns the string in backwards order. -Recursive functions must always have a test that stops the recursion. -In this case, the recursion terminates when the starting position -is zero; i.e., when there are no more characters left in the string. - -@example -function rev(str, start) -@{ - if (start == 0) - return "" - - return (substr(str, start, 1) rev(str, start - 1)) -@} -@end example - -If this function is in a file named @file{rev.awk}, it can be tested -this way: - -@example -$ echo "Don't Panic!" | -> gawk --source '@{ print rev($0, length($0)) @}' -f rev.awk -@print{} !cinaP t'noD -@end example - -The C @code{ctime} function takes a timestamp and returns it in a string, -formatted in a well-known fashion. -The following example uses the built-in @code{strftime} function -(@pxref{Time Functions, ,Using @command{gawk}'s Timestamp Functions}) -to create an @command{awk} version of @code{ctime}: - -@c FIXME: One day, change %d to %e, when C 99 is common. -@example -@c file eg/lib/ctime.awk -# ctime.awk -# -# awk version of C ctime(3) function - -function ctime(ts, format) -@{ - format = "%a %b %d %H:%M:%S %Z %Y" - if (ts == 0) - ts = systime() # use current time as default - return strftime(format, ts) -@} -@c endfile -@end example - -@node Function Caveats, Return Statement, Function Example, User-defined -@subsection Calling User-Defined Functions - -@cindex calling a function -@cindex function call -@dfn{Calling a function} means causing the function to run and do its job. -A function call is an expression and its value is the value returned by -the function. - -A function call consists of the function name followed by the arguments -in parentheses. @command{awk} expressions are what you write in the -call for the arguments. Each time the call is executed, these -expressions are evaluated, and the values are the actual arguments. For -example, here is a call to @code{foo} with three arguments (the first -being a string concatenation): - -@example -foo(x y, "lose", 4 * z) -@end example - -@strong{Caution:} Whitespace characters (spaces and tabs) are not allowed -between the function name and the open-parenthesis of the argument list. -If you write whitespace by mistake, @command{awk} might think that you mean -to concatenate a variable with an expression in parentheses. However, it -notices that you used a function name and not a variable name, and reports -an error. - -@cindex call by value -When a function is called, it is given a @emph{copy} of the values of -its arguments. This is known as @dfn{call by value}. The caller may use -a variable as the expression for the argument, but the called function -does not know this---it only knows what value the argument had. For -example, if you write the following code: - -@example -foo = "bar" -z = myfunc(foo) -@end example - -@noindent -then you should not think of the argument to @code{myfunc} as being -``the variable @code{foo}.'' Instead, think of the argument as the -string value @code{"bar"}. -If the function @code{myfunc} alters the values of its local variables, -this has no effect on any other variables. Thus, if @code{myfunc} -does this: - -@example -function myfunc(str) -@{ - print str - str = "zzz" - print str -@} -@end example - -@noindent -to change its first argument variable @code{str}, it @emph{does not} -change the value of @code{foo} in the caller. The role of @code{foo} in -calling @code{myfunc} ended when its value (@code{"bar"}) was computed. -If @code{str} also exists outside of @code{myfunc}, the function body -cannot alter this outer value, because it is shadowed during the -execution of @code{myfunc} and cannot be seen or changed from there. - -@cindex call by reference -However, when arrays are the parameters to functions, they are @emph{not} -copied. Instead, the array itself is made available for direct manipulation -by the function. This is usually called @dfn{call by reference}. -Changes made to an array parameter inside the body of a function @emph{are} -visible outside that function. - -@strong{Note:} Changing an array parameter inside a function -can be very dangerous if you do not watch what you are doing. -For example: - -@example -function changeit(array, ind, nvalue) -@{ - array[ind] = nvalue -@} - -BEGIN @{ - a[1] = 1; a[2] = 2; a[3] = 3 - changeit(a, 2, "two") - printf "a[1] = %s, a[2] = %s, a[3] = %s\n", - a[1], a[2], a[3] -@} -@end example - -@noindent -This program prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because -@code{changeit} stores @code{"two"} in the second element of @code{a}. - -@cindex undefined functions -@cindex functions, undefined -Some @command{awk} implementations allow you to call a function that -has not been defined. They only report a problem at runtime when the -program actually tries to call the function. For example: - -@example -BEGIN @{ - if (0) - foo() - else - bar() -@} -function bar() @{ @dots{} @} -# note that `foo' is not defined -@end example - -@noindent -Because the @samp{if} statement will never be true, it is not really a -problem that @code{foo} has not been defined. Usually though, it is a -problem if a program calls an undefined function. - -@cindex lint checks -If @option{--lint} is specified -(@pxref{Options, ,Command-Line Options}), -@command{gawk} reports calls to undefined functions. - -@cindex portability issues -Some @command{awk} implementations generate a runtime -error if you use the @code{next} statement -(@pxref{Next Statement, , The @code{next} Statement}) -inside a user-defined function. -@command{gawk} does not have this limitation. - -@node Return Statement, Dynamic Typing, Function Caveats, User-defined -@subsection The @code{return} Statement -@cindex @code{return} statement - -The body of a user-defined function can contain a @code{return} statement. -This statement returns control to the calling part of the @command{awk} program. It -can also be used to return a value for use in the rest of the @command{awk} -program. It looks like this: - -@example -return @r{[}@var{expression}@r{]} -@end example - -The @var{expression} part is optional. If it is omitted, then the returned -value is undefined, and therefore, unpredictable. - -A @code{return} statement with no value expression is assumed at the end of -every function definition. So if control reaches the end of the function -body, then the function returns an unpredictable value. @command{awk} -does @emph{not} warn you if you use the return value of such a function. - -Sometimes, you want to write a function for what it does, not for -what it returns. Such a function corresponds to a @code{void} function -in C or to a @code{procedure} in Pascal. Thus, it may be appropriate to not -return any value; simply bear in mind that if you use the return -value of such a function, you do so at your own risk. - -The following is an example of a user-defined function that returns a value -for the largest number among the elements of an array: - -@example -function maxelt(vec, i, ret) -@{ - for (i in vec) @{ - if (ret == "" || vec[i] > ret) - ret = vec[i] - @} - return ret -@} -@end example - -@cindex conventions, programming -@cindex programming conventions -@noindent -You call @code{maxelt} with one argument, which is an array name. The local -variables @code{i} and @code{ret} are not intended to be arguments; -while there is nothing to stop you from passing two or three arguments -to @code{maxelt}, the results would be strange. The extra space before -@code{i} in the function parameter list indicates that @code{i} and -@code{ret} are not supposed to be arguments. This is a convention that -you should follow when you define functions. - -The following program uses the @code{maxelt} function. It loads an -array, calls @code{maxelt}, and then reports the maximum number in that -array: - -@example -function maxelt(vec, i, ret) -@{ - for (i in vec) @{ - if (ret == "" || vec[i] > ret) - ret = vec[i] - @} - return ret -@} - -# Load all fields of each record into nums. -@{ - for(i = 1; i <= NF; i++) - nums[NR, i] = $i -@} - -END @{ - print maxelt(nums) -@} -@end example - -Given the following input: - -@example - 1 5 23 8 16 -44 3 5 2 8 26 -256 291 1396 2962 100 --6 467 998 1101 -99385 11 0 225 -@end example - -@noindent -the program reports (predictably) that @code{99385} is the largest number -in the array. - -@node Dynamic Typing, , Return Statement, User-defined -@subsection Functions and Their Effect on Variable Typing - -@command{awk} is a very fluid language. -It is possible that @command{awk} can't tell if an identifier -represents a regular variable or an array until runtime. -Here is an annotated sample program: - -@example -function foo(a) -@{ - a[1] = 1 # parameter is an array -@} - -BEGIN @{ - b = 1 - foo(b) # invalid: fatal type mismatch - - foo(x) # x uninitialized, becomes an array dynamically - x = 1 # now not allowed, runtime error -@} -@end example - -Usually, such things aren't a big issue, but it's worth -being aware of them. - -@node Internationalization, Advanced Features, Functions, Top -@chapter Internationalization with @command{gawk} - -Once upon a time, computer makers -wrote software that only worked in English. -Eventually, hardware and software vendors noticed that if their -systems worked in the native languages of non-English-speaking -countries, they were able to sell more systems. -As a result, internationalization and localization -of programs and software systems became a common practice. - -@cindex internationalization features in @command{gawk} -Until recently, the ability to provide internationalization -was largely restricted to programs written in C and C++. -This @value{CHAPTER} describes the underlying library @command{gawk} -uses for internationalization, as well as how -@command{gawk} makes internationalization -features available at the @command{awk} program level. -Having internationalization available at the @command{awk} level -gives software developers additional flexibility---they are no -longer required to write in C when internationalization is -a requirement. - -@menu -* I18N and L10N:: Internationalization and Localization. -* Explaining gettext:: How GNU @code{gettext} works. -* Programmer i18n:: Features for the programmer. -* Translator i18n:: Features for the translator. -* I18N Example:: A simple i18n example. -* Gawk I18N:: @command{gawk} is also internationalized. -@end menu - -@node I18N and L10N, Explaining gettext, Internationalization, Internationalization -@section Internationalization and Localization - -@cindex internationalization -@cindex localization -@dfn{Internationalization} means writing (or modifying) a program once, -in such a way that it can use multiple languages without requiring -further source code changes. -@dfn{Localization} means providing the data necessary for an -internationalized program to work in a particular language. -Most typically, these terms refer to features such as the language -used for printing error messages, the language used to read -responses, and information related to how numerical and -monetary values are printed and read. - -@node Explaining gettext, Programmer i18n, I18N and L10N, Internationalization -@section GNU @code{gettext} - -@cindex @code{gettext}, how it works -@cindex internationalizing a program -The facilities in GNU @code{gettext} focus on messages; strings printed -by a program, either directly or via formatting with @code{printf} or -@code{sprintf}.@footnote{For some operating systems, the @command{gawk} -port doesn't support GNU @code{gettext}. This applies most notably to -the PC operating systems. As such, these features are not available -if you are using one of those operating systems. Sorry.} - -When using GNU @code{gettext}, each application has its own -@dfn{text domain}. This is a unique name such as @samp{kpilot} or @samp{gawk}, -that identifies the application. -A complete application may have multiple components---programs written -in C or C++, as well as scripts written in @command{sh} or @command{awk}. -All of the components use the same text domain. - -To make the discussion concrete, assume we're writing an application -named @command{guide}. Internationalization consists of the -following steps, in this order: - -@enumerate -@item -The programmer goes -through the source for all of @command{guide}'s components -and marks each string that is a candidate for translation. -For example, @code{"`-F': option required"} is a good candidate for translation. -A table with strings of option names is not (e.g., @command{gawk}'s -@option{--profile} option should remain the same, no matter what the local -language). - -@cindex @code{textdomain} C library function -@item -The programmer indicates the application's text domain -(@code{"guide"}) to the @code{gettext} library, -by calling the @code{textdomain} function. - -@item -Messages from the application are extracted from the source code and -collected into a Portable Object file (@file{guide.po}), -which lists the strings and their translations. -The translations are initially empty. -The original (usually English) messages serve as the key for -lookup of the translations. - -@cindex portable object files (@code{gettext}) -@item -For each language with a translator, @file{guide.po} -is copied and translations are created and shipped with the application. - -@cindex message object files (@code{gettext}) -@item -Each language's @file{.po} file is converted into a binary -message object (@file{.mo}) file. -A message object file contains the original messages and their -translations in a binary format that allows fast lookup of translations -at runtime. - -@item -When @command{guide} is built and installed, the binary translation files -are installed in a standard place. - -@cindex @code{bindtextdomain} C library function -@item -For testing and development, it is possible to tell @code{gettext} -to use @file{.mo} files in a different directory than the standard -one by using the @code{bindtextdomain} function. - -@item -At runtime, @command{guide} looks up each string via a call -to @code{gettext}. The returned string is the translated string -if available, or the original string if not. - -@item -If necessary, it is possible to access messages from a different -text domain than the one belonging to the application, without -having to switch the application's default text domain back -and forth. -@end enumerate - -@cindex @code{gettext} C library function -In C (or C++), the string marking and dynamic translation lookup -are accomplished by wrapping each string in a call to @code{gettext}: - -@example -printf(gettext("Don't Panic!\n")); -@end example - -The tools that extract messages from source code pull out all -strings enclosed in calls to @code{gettext}. - -@cindex @code{_} C macro (@code{gettext}) -The GNU @code{gettext} developers, recognizing that typing -@samp{gettext} over and over again is both painful and ugly to look -at, use the macro @samp{_} (an underscore) to make things easier: - -@example -/* In the standard header file: */ -#define _(str) gettext(str) - -/* In the program text: */ -printf(_("Don't Panic!\n")); -@end example - -@cindex locale categories -@noindent -This reduces the typing overhead to just three extra characters per string -and is considerably easier to read as well. -There are locale @dfn{categories} -for different types of locale-related information. -The defined locale categories that @code{gettext} knows about are: - -@table @code -@cindex @code{LC_MESSAGES} locale category -@item LC_MESSAGES -Text messages. This is the default category for @code{gettext} -operations, but it is possible to supply a different one explicitly, -if necessary. (It is almost never necessary to supply a different category.) - -@cindex @code{LC_COLLATE} locale category -@item LC_COLLATE -Text collation information; i.e., how different characters -and/or groups of characters sort in a given language. - -@cindex @code{LC_CTYPE} locale category -@item LC_CTYPE -Character type information (alphabetic, digit, upper- or lowercase, and -so on). -This information is accessed via the -POSIX character classes in regular expressions, -such as @code{/[[:alnum:]]/} -(@pxref{Regexp Operators, ,Regular Expression Operators}). - -@cindex @code{LC_MONETARY} locale category -@item LC_MONETARY -Monetary information, such as the currency symbol, and whether the -symbol goes before or after a number. - -@cindex @code{LC_NUMERIC} locale category -@item LC_NUMERIC -Numeric information, such as which characters to use for the decimal -point and the thousands separator.@footnote{Americans -use a comma every three decimal places and a period for the decimal -point, while many Europeans do exactly the opposite: -@code{1,234.56} vs.@: @code{1.234,56}.} - -@cindex @code{LC_RESPONSE} locale category -@item LC_RESPONSE -Response information, such as how ``yes'' and ``no'' appear in the -local language, and possibly other information as well. - -@cindex @code{LC_TIME} locale category -@item LC_TIME -Time and date related information, such as 12- or 24-hour clock, month printed -before or after day in a date, local month abbreviations, and so on. - -@cindex @code{LC_ALL} locale category -@item LC_ALL -All of the above. (Not too useful in the context of @code{gettext}.) -@end table - -@node Programmer i18n, Translator i18n, Explaining gettext, Internationalization -@section Internationalizing @command{awk} Programs - -@command{gawk} provides the following variables and functions for -internationalization: - -@table @code -@cindex @code{TEXTDOMAIN} variable -@item TEXTDOMAIN -This variable indicates the application's text domain. -For compatibility with GNU @code{gettext}, the default -value is @code{"messages"}. - -@cindex internationalization, marked strings -@cindex marked strings for internationalization -@item _"your message here" -String constants marked with a leading underscore -are candidates for translation at runtime. -String constants without a leading underscore are not translated. - -@cindex @code{dcgettext} built-in function -@item dcgettext(@var{string} @r{[}, @var{domain} @r{[}, @var{category}@r{]]}) -This built-in function returns the translation of @var{string} in -text domain @var{domain} for locale category @var{category}. -The default value for @var{domain} is the current value of @code{TEXTDOMAIN}. -The default value for @var{category} is @code{"LC_MESSAGES"}. - -If you supply a value for @var{category}, it must be a string equal to -one of the known locale categories described in -@ifnotinfo -the previous @value{SECTION}. -@end ifnotinfo -@ifinfo -@ref{Explaining gettext, ,GNU @code{gettext}}. -@end ifinfo -You must also supply a text domain. Use @code{TEXTDOMAIN} if -you want to use the current domain. - -@strong{Caution:} The order of arguments to the @command{awk} version -of the @code{dcgettext} function is purposely different from the order for -the C version. The @command{awk} version's order was -chosen to be simple and to allow for reasonable @command{awk}-style -default arguments. - -@cindex @code{bindtextdomain} built-in function -@item bindtextdomain(@var{directory} @r{[}, @var{domain}@r{]}) -This built-in function allows you to specify the directory where -@code{gettext} looks for @file{.mo} files, in case they -will not or cannot be placed in the standard locations -(e.g., during testing). -It returns the directory where @var{domain} is ``bound.'' - -The default @var{domain} is the value of @code{TEXTDOMAIN}. -If @var{directory} is the null string (@code{""}), then -@code{bindtextdomain} returns the current binding for the -given @var{domain}. -@end table - -To use these facilities in your @command{awk} program, follow the steps -outlined in -@ifnotinfo -the previous @value{SECTION}, -@end ifnotinfo -@ifinfo -@ref{Explaining gettext, ,GNU @code{gettext}}, -@end ifinfo -like so: - -@enumerate -@item -Set the variable @code{TEXTDOMAIN} to the text domain of -your program. This is best done in a @code{BEGIN} rule -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}), -or it can also be done via the @option{-v} command-line -option (@pxref{Options, ,Command-Line Options}): - -@example -BEGIN @{ - TEXTDOMAIN = "guide" - @dots{} -@} -@end example - -@item -Mark all translatable strings with a leading underscore (@samp{_}) -character. It @emph{must} be adjacent to the opening -quote of the string. For example: - -@example -print _"hello, world" -x = _"you goofed" -printf(_"Number of users is %d\n", nusers) -@end example - -@item -If you are creating strings dynamically, you can -still translate them, using the @code{dcgettext} -built-in function. - -@example -message = nusers " users logged in" -message = dcgettext(message, "adminprog") -print message -@end example - -Here, the call to @code{dcgettext} supplies a different -text domain (@code{"adminprog"}) in which to find the -message, but it uses the default @code{"LC_MESSAGES"} category. - -@item -During development, you might want to put the @file{.mo} -file in a private directory for testing. This is done -with the @code{bindtextdomain} built-in function: - -@example -BEGIN @{ - TEXTDOMAIN = "guide" # our text domain - if (Testing) @{ - # where to find our files - bindtextdomain("testdir") - # joe is in charge of adminprog - bindtextdomain("../joe/testdir", "adminprog") - @} - @dots{} -@} -@end example - -@end enumerate - -@xref{I18N Example, ,A Simple Internationalization Example}, -for an example program showing the steps necessary to create -and use translations from @command{awk}. - -@node Translator i18n, I18N Example, Programmer i18n, Internationalization -@section Translating @command{awk} Programs - -Once a program's translatable strings have been marked, they must -be extracted to create the initial @file{.po} file. -As part of translation, it is often helpful to rearrange the order -in which arguments to @code{printf} are output. - -@command{gawk}'s @option{--gen-po} command-line option extracts -the messages and is discussed next. -After that, @code{printf}'s ability to -rearrange the order for @code{printf} arguments at runtime -is covered. - -@menu -* String Extraction:: Extracting marked strings. -* Printf Ordering:: Rearranging @code{printf} arguments. -* I18N Portability:: @command{awk}-level portability issues. -@end menu - -@node String Extraction, Printf Ordering, Translator i18n, Translator i18n -@subsection Extracting Marked Strings -@cindex string extraction (internationalization) -@cindex marked string extraction (internationalization) -@cindex extraction, of marked strings (internationalization) - -@cindex @code{--gen-po} option -@cindex command-line option, @code{--gen-po} -Once your @command{awk} program is working, and all the strings have -been marked and you've set (and perhaps bound) the text domain, -it is time to produce translations. -First, use the @option{--gen-po} command-line option to create -the initial @file{.po} file: - -@example -$ gawk --gen-po -f guide.awk > guide.po -@end example - -@cindex @code{xgettext} utility -When run with @option{--gen-po}, @command{gawk} does not execute your -program. Instead, it parses it as usual and prints all marked strings -to standard output in the format of a GNU @code{gettext} Portable Object -file. Also included in the output are any constant strings that -appear as the first argument to @code{dcgettext}.@footnote{Eventually, -the @command{xgettext} utility that comes with GNU @code{gettext} will be -taught to automatically run @samp{gawk --gen-po} for @file{.awk} files, -freeing the translator from having to do it manually.} -@xref{I18N Example, ,A Simple Internationalization Example}, -for the full list of steps to go through to create and test -translations for @command{guide}. - -@node Printf Ordering, I18N Portability, String Extraction, Translator i18n -@subsection Rearranging @code{printf} Arguments - -@cindex @code{printf}, positional specifier -@cindex positional specifier, @code{printf} -Format strings for @code{printf} and @code{sprintf} -(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}) -present a special problem for translation. -Consider the following:@footnote{This example is borrowed -from the GNU @code{gettext} manual.} - -@c line broken here only for smallbook format -@example -printf(_"String `%s' has %d characters\n", - string, length(string))) -@end example - -A possible German translation for this might be: - -@example -"%d Zeichen lang ist die Zeichenkette `%s'\n" -@end example - -The problem should be obvious: the order of the format -specifications is different from the original! -Even though @code{gettext} can return the translated string -at runtime, -it cannot change the argument order in the call to @code{printf}. - -To solve this problem, @code{printf} format specificiers may have -an additional optional element, which we call a @dfn{positional specifier}. -For example: - -@example -"%2$d Zeichen lang ist die Zeichenkette `%1$s'\n" -@end example - -Here, the positional specifier consists of an integer count, which indicates which -argument to use, and a @samp{$}. Counts are one-based, and the -format string itself is @emph{not} included. Thus, in the following -example, @samp{string} is the first argument and @samp{length(string)} is the second. - -@example -$ gawk 'BEGIN @{ -> string = "Dont Panic" -> printf _"%2$d characters live in \"%1$s\"\n", -> string, length(string) -> @}' -@print{} 10 characters live in "Dont Panic" -@end example - -If present, positional specifiers come first in the format specification, -before the flags, the field width, and/or the precision. - -Positional specifiers can be used with the dynamic field width and -precision capability: - -@example -$ gawk 'BEGIN @{ -> printf("%*.*s\n", 10, 20, "hello") -> printf("%3$*2$.*1$s\n", 20, 10, "hello") -> @}' -@print{} hello -@print{} hello -@end example - -@noindent -@strong{Note:} When using @samp{*} with a positional specifier, the @samp{*} -comes first, then the integer position, and then the @samp{$}. -This is somewhat counter-intutive. - -@cindex @code{printf}, mixing positional specifiers with regular formats -@cindex positional specifiers, mixing with regular formats (@code{printf}) -@cindex format specifiers, mixing regular with positional specifiers (@code{printf}) -@command{gawk} does not allow you to mix regular format specifiers -and those with positional specifiers in the same string: - -@smallexample -$ gawk 'BEGIN @{ printf _"%d %3$s\n", 1, 2, "hi" @}' -@error{} gawk: cmd. line:1: fatal: must use `count$' on all formats or none -@end smallexample - -@strong{Note:} There are some pathological cases that @command{gawk} may fail to -diagnose. In such cases, the output may not be what you expect. -It's still a bad idea to try mixing them, even if @command{gawk} -doesn't detect it. - -Although positional specifiers can be used directly in @command{awk} programs, -their primary purpose is to help in producing correct translations of -format strings into languages different from the one in which the program -is first written. - -@node I18N Portability, , Printf Ordering, Translator i18n -@subsection @command{awk} Portability Issues - -@cindex portability issues -@cindex portability issues, internationalization of @command{awk} programs -@cindex internationalization of @command{awk} programs, portability issues -@command{gawk}'s internationalization features were purposely chosen to -have as little impact as possible on the portability of @command{awk} -programs that use them to other versions of @command{awk}. -Consider this program: - -@example -BEGIN @{ - TEXTDOMAIN = "guide" - if (Test_Guide) # set with -v - bindtextdomain("/test/guide/messages") - print _"don't panic!" -@} -@end example - -@noindent -As written, it won't work on other versions of @command{awk}. -However, it is actually almost portable, requiring very little -change. - -@itemize @bullet -@item -Assignments to @code{TEXTDOMAIN} won't have any effect, -since @code{TEXTDOMAIN} is not special in other @command{awk} implementations. - -@item -Non-GNU versions of @command{awk} treat marked strings -as the concatenation of a variable named @code{_} with the string -following it.@footnote{This is good fodder for an ``Obfuscated -@command{awk}'' contest.} Typically, the variable @code{_} has -the null string (@code{""}) as its value, leaving the original string constant as -the result. - -@item -By defining ``dummy'' functions to replace @code{dcgettext} -and @code{bindtextdomain}, the @command{awk} program can be made to run, but -all the messages are output in the original language. -For example: - -@cindex @code{bindtextdomain} user-defined function -@cindex @code{dcgettext} user-defined function -@example -@c file eg/lib/libintl.awk -function bindtextdomain(dir, domain) -@{ - return dir -@} - -function dcgettext(string, domain, category) -@{ - return string -@} -@c endfile -@end example - -@item -The use of positional specifications in @code{printf} or -@code{sprintf} is @emph{not} portable. -To support @code{gettext} at the C level, many systems' C versions of -@code{sprintf} do support positional specifiers. But it works only if -enough arguments are supplied in the function call. Many versions of -@command{awk} pass @code{printf} formats and arguments unchanged to the -underlying C library version of @code{sprintf}, but only one format and -argument at a time. What happens if a positional specification is -used is anybody's guess. -However, since the positional specifications are primarily for use in -@emph{translated} format strings, and since non-GNU @command{awk}s never -retrieve the translated string, this should not be a problem in practice. -@end itemize - -@node I18N Example, Gawk I18N, Translator i18n, Internationalization -@section A Simple Internationalization Example - -Now let's look at a step-by-step example of how to internationalize and -localize a simple @command{awk} program, using @file{guide.awk} as our -original source: - -@example -@c file eg/prog/guide.awk -BEGIN @{ - TEXTDOMAIN = "guide" - bindtextdomain(".") # for testing - print _"Don't Panic" - print _"The Answer Is", 42 - print "Pardon me, Zaphod who?" -@} -@c endfile -@end example - -@noindent -Run @samp{gawk --gen-po} to create the @file{.po} file: - -@example -$ gawk --gen-po -f guide.awk > guide.po -@end example - -@noindent -This produces: - -@example -@c file eg/data/guide.po -#: guide.awk:4 -msgid "Don't Panic" -msgstr "" - -#: guide.awk:5 -msgid "The Answer Is" -msgstr "" - -@c endfile -@end example - -This original portable object file is saved and reused for each language -into which the application is translated. The @code{msgid} -is the original string and the @code{msgstr} is the translation. - -@strong{Note:} Strings not marked with a leading underscore do not -appear in the @file{guide.po} file. - -Next, the messages must be translated. -Here is a translation to a hypothetical dialect of English, -called ``Mellow'':@footnote{Perhaps it would be better if it were -called ``Hippy.'' Ah, well.} - -@example -@group -$ cp guide.po guide-mellow.po -@var{Add translations to} guide-mellow.po @dots{} -@end group -@end example - -@noindent -Following are the translations: - -@example -@c file eg/data/guide-mellow.po -#: guide.awk:4 -msgid "Don't Panic" -msgstr "Hey man, relax!" - -#: guide.awk:5 -msgid "The Answer Is" -msgstr "Like, the scoop is" - -@c endfile -@end example - -@cindex Linux -@cindex GNU/Linux -The next step is to make the directory to hold the binary message object -file and then to create the @file{guide.mo} file. -The directory layout shown here is standard for GNU @code{gettext} on -GNU/Linux systems. Other versions of @code{gettext} may use a different -layout: - -@example -$ mkdir en_US en_US/LC_MESSAGES -@end example - -@cindex @command{msgfmt} utility -The @command{msgfmt} utility does the conversion from human-readable -@file{.po} file to machine-readable @file{.mo} file. -By default, @command{msgfmt} creates a file named @file{messages}. -This file must be renamed and placed in the proper directory so that -@command{gawk} can find it: - -@example -$ msgfmt guide-mellow.po -$ mv messages en_US/LC_MESSAGES/guide.mo -@end example - -Finally, we run the program to test it: - -@example -$ gawk -f guide.awk -@print{} Hey man, relax! -@print{} Like, the scoop is 42 -@print{} Pardon me, Zaphod who? -@end example - -If the two replacement functions for @code{dcgettext} -and @code{bindtextdomain} -(@pxref{I18N Portability, ,@command{awk} Portability Issues}) -are in a file named @file{libintl.awk}, -then we can run @file{guide.awk} unchanged as follows: - -@example -$ gawk --posix -f guide.awk -f libintl.awk -@print{} Don't Panic -@print{} The Answer Is 42 -@print{} Pardon me, Zaphod who? -@end example - -@node Gawk I18N, , I18N Example, Internationalization -@section @command{gawk} Can Speak Your Language - -As of @value{PVERSION} 3.1, @command{gawk} itself has been internationalized -using the GNU @code{gettext} package. -@ifinfo -(GNU @code{gettext} is described in -complete detail in -@ref{Top}.) -@end ifinfo -@ifnotinfo -(GNU @code{gettext} is described in -complete detail in -@cite{GNU gettext tools}.) -@end ifnotinfo -As of this writing, the latest version of GNU @code{gettext} is -@uref{ftp://gnudist.gnu.org/gnu/gettext/gettext-0.10.37.tar.gz, @value{PVERSION} 0.10.37}. - -If a translation of @command{gawk}'s messages exists, -then @command{gawk} produces usage messages, warnings, -and fatal errors in the local language. - -@cindex @code{--with-included-gettext} configuration option -@cindex configuration option, @code{--with-included-gettext} -On systems that do not use @value{PVERSION} 2 (or later) of the GNU C library, you should -configure @command{gawk} with the @option{--with-included-gettext} option -before compiling and installing it. -@xref{Additional Configuration Options}, -for more information. - -@node Advanced Features, Invoking Gawk, Internationalization, Top -@chapter Advanced Features of @command{gawk} -@cindex advanced features -@cindex features, advanced -@ignore -Contributed by: Peter Langston - - Found in Steve English's "signature" line: - -"Write documentation as if whoever reads it is a violent psychopath -who knows where you live." -@end ignore -@quotation -@i{Write documentation as if whoever reads it is -a violent psychopath who knows where you live.}@* -Steve English, as quoted by Peter Langston -@end quotation - -This @value{CHAPTER} discusses advanced features in @command{gawk}. -It's a bit of a ``grab bag'' of items that are otherwise unrelated -to each other. -First, a command-line option allows @command{gawk} to recognize -non-decimal numbers in input data, not just in @command{awk} -programs. Next, two-way I/O, discussed briefly in earlier parts of this -@value{DOCUMENT}, is described in full detail, along with the basics -of TCP/IP networking and BSD portal files. Finally, @command{gawk} -can @dfn{profile} an @command{awk} program, making it possible to tune -it for performance. - -@ref{Dynamic Extensions, ,Adding New Built-in Functions to @command{gawk}}, -discusses the ability to dynamically add new built-in functions to -@command{gawk}. As this feature is still immature and likely to change, -its description is relegated to an appendix. - -@menu -* Non-decimal Data:: Allowing non-decimal input data. -* Two-way I/O:: Two-way communications with another process. -* TCP/IP Networking:: Using @command{gawk} for network programming. -* Portal Files:: Using @command{gawk} with BSD portals. -* Profiling:: Profiling your @command{awk} programs. -@end menu - -@node Non-decimal Data, Two-way I/O, Advanced Features, Advanced Features -@section Allowing Non-Decimal Input Data -@cindex @code{--non-decimal-data} option -@cindex command-line option, @code{--non-decimal-data} - -If you run @command{gawk} with the @option{--non-decimal-data} option, -you can have non-decimal constants in your input data: - -@c line break here for small book format -@example -$ echo 0123 123 0x123 | -> gawk --non-decimal-data '@{ printf "%d, %d, %d\n", -> $1, $2, $3 @}' -@print{} 83, 123, 291 -@end example - -For this feature to work, write your program so that -@command{gawk} treats your data as numeric: - -@example -$ echo 0123 123 0x123 | gawk '@{ print $1, $2, $3 @}' -@print{} 0123 123 0x123 -@end example - -@noindent -The @code{print} statement treats its expressions as strings. -Although the fields can act as numbers when necessary, -they are still strings, so @code{print} does not try to treat them -numerically. You may need to add zero to a field to force it to -be treated as a number. For example: - -@example -$ echo 0123 123 0x123 | gawk --non-decimal-data ' -> @{ print $1, $2, $3 -> print $1 + 0, $2 + 0, $3 + 0 @}' -@print{} 0123 123 0x123 -@print{} 83 123 291 -@end example - -Because it is common to have decimal data with leading zeros, and because -using it could lead to surprising results, the default is to leave this -facility disabled. If you want it, you must explicitly request it. - -@cindex conventions, programming -@cindex programming conventions -@strong{Caution:} -@emph{Use of this option is not recommended.} -It can break old programs very badly. -Instead, use the @code{strtonum} function to convert your data -(@pxref{Non-decimal-numbers, ,Octal and Hexadecimal Numbers}). -This makes your programs easier to write and easier to read, and -leads to less surprising results. - -@node Two-way I/O, TCP/IP Networking, Non-decimal Data, Advanced Features -@section Two-Way Communications with Another Process -@cindex Brennan, Michael -@cindex sex, programmer attractiveness -@smallexample -@c Path: cssun.mathcs.emory.edu!gatech!newsxfer3.itd.umich.edu!news-peer.sprintlink.net!news-sea-19.sprintlink.net!news-in-west.sprintlink.net!news.sprintlink.net!Sprint!204.94.52.5!news.whidbey.com!brennan -From: brennan@@whidbey.com (Mike Brennan) -Newsgroups: comp.lang.awk -Subject: Re: Learn the SECRET to Attract Women Easily -Date: 4 Aug 1997 17:34:46 GMT -@c Organization: WhidbeyNet -@c Lines: 12 -Message-ID: <5s53rm$eca@@news.whidbey.com> -@c References: <5s20dn$2e1@chronicle.concentric.net> -@c Reply-To: brennan@whidbey.com -@c NNTP-Posting-Host: asn202.whidbey.com -@c X-Newsreader: slrn (0.9.4.1 UNIX) -@c Xref: cssun.mathcs.emory.edu comp.lang.awk:5403 - -On 3 Aug 1997 13:17:43 GMT, Want More Dates??? - wrote: ->Learn the SECRET to Attract Women Easily -> ->The SCENT(tm) Pheromone Sex Attractant For Men to Attract Women - -The scent of awk programmers is a lot more attractive to women than -the scent of perl programmers. --- -Mike Brennan -@c brennan@@whidbey.com -@end smallexample - -It is often useful to be able to -send data to a separate program for -processing and then read the result. This can always be -done with temporary files: - -@example -# write the data for processing -tempfile = ("/tmp/mydata." PROCINFO["pid"]) -while (@var{not done with data}) - print @var{data} | ("subprogram > " tempfile) -close("subprogram > " tempfile) - -# read the results, remove tempfile when done -while ((getline newdata < tempfile) > 0) - @var{process} newdata @var{appropriately} -close(tempfile) -system("rm " tempfile) -@end example - -@noindent -This works, but not elegantly. - -@cindex coprocess -@cindex two-way I/O -@cindex I/O, two-way -@cindex @code{|&} I/O operator -@cindex @command{csh} utility -Starting with @value{PVERSION} 3.1 of @command{gawk}, it is possible to -open a @emph{two-way} pipe to another process. The second process is -termed a @dfn{coprocess}, since it runs in parallel with @command{gawk}. -The two-way connection is created using the new @samp{|&} operator -(borrowed from the Korn Shell, @command{ksh}):@footnote{This is very -different from the same operator in the C shell, @command{csh}.} - -@example -do @{ - print @var{data} |& "subprogram" - "subprogram" |& getline results -@} while (@var{data left to process}) -close("subprogram") -@end example - -The first time an I/O operation is executed using the @samp{|&} -operator, @command{gawk} creates a two-way pipeline to a child process -that runs the other program. Output created with @code{print} -or @code{printf} is written to the program's standard input, and -output from the program's standard output can be read by the @command{gawk} -program using @code{getline}. -As is the case with processes started by @samp{|}, the subprogram -can be any program, or pipeline of programs, that can be started by -the shell. - -There are some cautionary items to be aware of: - -@itemize @bullet -@item -As the code inside @command{gawk} currently stands, the coprocess's -standard error goes to the same place that the parent @command{gawk}'s -standard error goes. It is not possible to read the child's -standard error separately. - -@cindex deadlock -@item -I/O buffering may be a problem. @command{gawk} automatically -flushes all output down the pipe to the child process. -However, if the coprocess does not flush its output, -@command{gawk} may hang when doing a @code{getline} in order to read -the coprocess's results. This could lead to a situation -known as @dfn{deadlock}, where each process is waiting for the -other one to do something. -@end itemize - -It is possible to close just one end of the two-way pipe to -a coprocess, by supplying a second argument to the @code{close} -function of either @code{"to"} or @code{"from"} -(@pxref{Close Files And Pipes, ,Closing Input and Output Redirections}). -These strings tell @command{gawk} to close the end of the pipe -that sends data to the process or the end that reads from it, -respectively. - -This is particularly necessary in order to use -the system @command{sort} utility as part of a coprocess; -@command{sort} must read @emph{all} of its input -data before it can produce any output. -The @command{sort} program does not receive an end-of-file indication -until @command{gawk} closes the write end of the pipe. - -When you have finished writing data to the @command{sort} -utility, you can close the @code{"to"} end of the pipe, and -then start reading sorted data via @code{getline}. -For example: - -@example -BEGIN @{ - command = "LC_ALL=C sort" - n = split("abcdefghijklmnopqrstuvwxyz", a, "") - - for (i = n; i > 0; i--) - print a[i] |& command - close(command, "to") - - while ((command |& getline line) > 0) - print "got", line - close(command) -@} -@end example - -This program writes the letters of the alphabet in reverse order, one -per line, down the two-way pipe to @command{sort}. It then closes the -write end of the pipe, so that @command{sort} receives an end-of-file -indication. This causes @command{sort} to sort the data and write the -sorted data back to the @command{gawk} program. Once all of the data -has been read, @command{gawk} terminates the coprocess and exits. - -As a side note, the assignment @samp{LC_ALL=C} in the @command{sort} -command ensures traditional Unix (ASCII) sorting from @command{sort}. - -@node TCP/IP Networking, Portal Files, Two-way I/O, Advanced Features -@section Using @command{gawk} for Network Programming -@cindex networking, TCP/IP -@cindex TCP/IP networking -@cindex @file{/inet} special files -@cindex @code{EMISTERED} -@quotation -@code{EMISTERED}: @i{A host is a host from coast to coast,@* -and no-one can talk to host that's close,@* -unless the host that isn't close@* -is busy hung or dead.} -@end quotation - -In addition to being able to open a two-way pipeline to a coprocess -on the same system -(@pxref{Two-way I/O, ,Two-Way Communications with Another Process}), -it is possible to make a two-way connection to -another process on another system across an IP networking connection. - -You can think of this as just a @emph{very long} two-way pipeline to -a coprocess. -The way @command{gawk} decides that you want to use TCP/IP networking is -by recognizing special @value{FN}s that begin with @samp{/inet/}. - -The full syntax of the special @value{FN} is -@file{/inet/@var{protocol}/@var{local-port}/@var{remote-host}/@var{remote-port}}. -The meaning of the components are: - -@table @var -@item protocol -The protocol to use over IP. This must be either @samp{tcp}, -@samp{udp}, or @samp{raw}, for a TCP, UDP, or raw IP connection, -respectively. The use of TCP is recommended for most applications. - -@strong{Caution:} The use of raw sockets is not currently supported -in @value{PVERSION} 3.1 of @command{gawk}. - -@item local-port -@cindex @code{getservbyname} C library function -The local TCP or UDP port number to use. Use a port number of @samp{0} -when you want the system to pick a port. This is what you should do -when writing a TCP or UDP client. -You may also use a well-known service name, such as @samp{smtp} -or @samp{http}, in which case @command{gawk} attempts to determine -the pre-defined port number using the C @code{getservbyname} function. - -@item remote-host -The IP address or fully-qualified domain name of the Internet -host to which you want to connect. - -@item remote-port -The TCP or UDP port number to use on the given @var{remote-host}. -Again, use @samp{0} if you don't care, or else a well-known -service name. -@end table - -Consider the following very simple example: - -@example -BEGIN @{ - Service = "/inet/tcp/0/localhost/daytime" - Service |& getline - print $0 - close(Service) -@} -@end example - -This program reads the current date and time from the local system's -TCP @samp{daytime} server. -It then prints the results and closes the connection. - -Because this topic is extensive, the use of @command{gawk} for -TCP/IP programming is documented separately. -@ifinfo -@xref{Top}, -@end ifinfo -@ifnotinfo -See @cite{TCP/IP Internetworking with @command{gawk}}, -which comes as part of the @command{gawk} distribution, -@end ifnotinfo -for a much more complete introduction and discussion, as well as -extensive examples. - -@node Portal Files, Profiling, TCP/IP Networking, Advanced Features -@section Using @command{gawk} with BSD Portals -@cindex portal files -@cindex BSD portal files -@cindex TCP/IP networking -@cindex @file{/p} special files -@cindex @code{--enable-portals} configuration option -@cindex configuration option, @code{--enable-portals} -@cindex BSD-based operating systems - -Similar to the @file{/inet} special files, if @command{gawk} -is configured with the @option{--enable-portals} option -(@pxref{Quick Installation, , Compiling @command{gawk} for Unix}), -then @command{gawk} treats -files whose pathnames begin with @code{/p} as 4.4 BSD-style portals. - -When used with the @samp{|&} operator, @command{gawk} opens the file -for two-way communications. The operating system's portal mechanism -then manages creating the process associated with the portal and -the corresponding communications with the portal's process. - -@node Profiling, , Portal Files, Advanced Features -@section Profiling Your @command{awk} Programs -@cindex profiling @command{awk} programs -@cindex @command{pgawk} program - -Beginning with @value{PVERSION} 3.1 of @command{gawk}, you may produce execution -traces of your @command{awk} programs. -This is done with a specially compiled version of @command{gawk}, -called @command{pgawk} (``profiling @command{gawk}''). - -@cindex @file{awkprof.out} profiling output file -@cindex profiling output file (@file{awkprof.out}) -@command{pgawk} is identical in every way to @command{gawk}, except that when -it has finished running, it creates a profile of your program in a file -named @file{awkprof.out}. -Because it is profiling, it also executes up to 45 percent slower than -@command{gawk} normally does. - -As shown in the following example, -the @option{--profile} option can be used to change the name of the file -where @command{pgawk} will write the profile: - -@example -$ pgawk --profile=myprog.prof -f myprog.awk data1 data2 -@end example - -@noindent -In the above example, @command{pgawk} places the profile in -@file{myprog.prof} instead of in @file{awkprof.out}. - -Regular @command{gawk} also accepts this option. When called with just -@option{--profile}, @command{gawk} ``pretty prints'' the program into -@file{awkprof.out}, without any execution counts. You may supply an -option to @option{--profile} to change the @value{FN}. Here is a sample -session showing a simple @command{awk} program, its input data, and the -results from running @command{pgawk}. First, the @command{awk} program: - -@example -BEGIN @{ print "First BEGIN rule" @} - -END @{ print "First END rule" @} - -/foo/ @{ - print "matched /foo/, gosh" - for (i = 1; i <= 3; i++) - sing() -@} - -@{ - if (/foo/) - print "if is true" - else - print "else is true" -@} - -BEGIN @{ print "Second BEGIN rule" @} - -END @{ print "Second END rule" @} - -function sing( dummy) -@{ - print "I gotta be me!" -@} -@end example - -Following is the input data: - -@example -foo -bar -baz -foo -junk -@end example - -Here is the @file{awkprof.out} that results from running @command{pgawk} -on this program and data. (This example also illustrates that @command{awk} -programmers sometimes have to work late.): - -@cindex blocks, @code{BEGIN} and @code{END} -@example - # gawk profile, created Sun Aug 13 00:00:15 2000 - - # BEGIN block(s) - - BEGIN @{ - 1 print "First BEGIN rule" - 1 print "Second BEGIN rule" - @} - - # Rule(s) - - 5 /foo/ @{ # 2 - 2 print "matched /foo/, gosh" - 6 for (i = 1; i <= 3; i++) @{ - 6 sing() - @} - @} - - 5 @{ - 5 if (/foo/) @{ # 2 - 2 print "if is true" - 3 @} else @{ - 3 print "else is true" - @} - @} - - # END block(s) - - END @{ - 1 print "First END rule" - 1 print "Second END rule" - @} - - # Functions, listed alphabetically - - 6 function sing(dummy) - @{ - 6 print "I gotta be me!" - @} -@end example - -The previous example illustrates many of the basic rules for profiling output. -The rules are as follows: - -@itemize @bullet -@item -The program is printed in the order @code{BEGIN} rule, -pattern/action rules, @code{END} rule and functions, listed -alphabetically. -Multiple @code{BEGIN} and @code{END} rules are merged together. - -@item -Pattern-action rules have two counts. -The first count, to the left of the rule, shows how many times -the rule's pattern was @emph{tested}. -The second count, to the right of the rule's opening left brace -in a comment, -shows how many times the rule's action was @emph{executed}. -The difference between the two indicates how many times the rule's -pattern evaluated to false. - -@item -Similarly, -the count for an @code{if}-@code{else} statement shows how many times -the condition was tested. -To the right of the opening left brace for the @code{if}'s body -is a count showing how many times the condition was true. -The count for the @code{else} -indicates how many times the test failed. - -@item -The count for a loop header (such as @code{for} -or @code{while}) shows how many times the loop test was executed. -(Because of this, you can't just look at the count on the first -statement in a rule to determine how many times the rule was executed. -If the first statement is a loop, the count is misleading.) - -@item -For user-defined functions, the count next to the @code{function} -keyword indicates how many times the function was called. -The counts next to the statements in the body show how many times -those statements were executed. - -@item -The layout uses ``K&R'' style using tabs. -Braces are used everywhere, even when -the body of an @code{if}, @code{else}, or loop is only a single statement. - -@item -Parentheses are used only where needed, as indicated by the structure -of the program and the precedence rules. -@c extra verbiage here satisfies the copyeditor. ugh. -For example, @samp{(3 + 5) * 4} means add three plus five, then multiply -the total by four. However, @samp{3 + 5 * 4} has no parentheses, and -means @samp{3 + (5 * 4)}. - -@item -All string concatenations are parenthesized too. -(This could be made a bit smarter.) - -@item -Parentheses are used around the arguments to @code{print} -and @code{printf} only when -the @code{print} or @code{printf} statement is followed by a redirection. -Similarly, if -the target of a redirection isn't a scalar, it gets parenthesized. - -@item -@command{pgawk} supplies leading comments in -front of the @code{BEGIN} and @code{END} rules, -the pattern/action rules, and the functions. - -@end itemize - -The profiled version of your program may not look exactly like what you -typed when you wrote it. This is because @command{pgawk} creates the -profiled version by ``pretty printing'' its internal representation of -the program. The advantage to this is that @command{pgawk} can produce -a standard representation. The disadvantage is that all source code -comments are lost, as are the distinctions among multiple @code{BEGIN} -and @code{END} rules. Also, things such as: - -@example -/foo/ -@end example - -@noindent -come out as: - -@example -/foo/ @{ - print $0 -@} -@end example - -@noindent -which is correct, but possibly surprising. - -@cindex dynamic profiling -@cindex profiling, dynamic -Besides creating profiles when a program has completed, -@command{pgawk} can produce a profile while it is running. -This is useful if your @command{awk} program goes into an -infinite loop and you want to see what has been executed. -To use this feature, run @command{pgawk} in the background: - -@example -$ pgawk -f myprog & -[1] 13992 -@end example - -@cindex @command{kill} command -@cindex @code{SIGUSR1} signal -@cindex @code{USR1} signal -@cindex signals, @code{SIGUSR1} -@noindent -The shell prints a job number and process ID number, in this case, 13992. -Use the @command{kill} command to send the @code{USR1} signal -to @command{pgawk}: - -@example -$ kill -USR1 13992 -@end example - -@noindent -As usual, the profiled version of the program is written to -@file{awkprof.out}, or to a different file if you use the @option{--profile} -option. - -Along with the regular profile, as shown earlier, the profile -includes a trace of any active functions: - -@example -# Function Call Stack: - -# 3. baz -# 2. bar -# 1. foo -# -- main -- -@end example - -You may send @command{pgawk} the @code{USR1} signal as many times as you like. -Each time, the profile and function call trace are appended to the output -profile file. - -@cindex @code{SIGHUP} signal -@cindex @code{HUP} signal -@cindex signals, @code{SIGHUP} -If you use the @code{HUP} signal instead of the @code{USR1} signal, -@command{pgawk} produces the profile and the function call trace, and then exits. - -@node Invoking Gawk, Library Functions, Advanced Features, Top -@chapter Running @command{awk} and @command{gawk} - -This @value{CHAPTER} covers how to run awk, both POSIX-standard -and @command{gawk}-specific command-line options, and what -@command{awk} and -@command{gawk} do with non-option arguments. -It then proceeds to cover how @command{gawk} searches for source files, -obsolete options and/or features, and known bugs in @command{gawk}. -This @value{CHAPTER} rounds out the discussion of @command{awk} -as a program and as a language. - -While a number of the options and features described here were -discussed in passing earlier in the book, this @value{CHAPTER} provides the -full details. - -@menu -* Command Line:: How to run @command{awk}. -* Options:: Command-line options and their meanings. -* Other Arguments:: Input file names and variable assignments. -* AWKPATH Variable:: Searching directories for @command{awk} - programs. -* Obsolete:: Obsolete Options and/or features. -* Undocumented:: Undocumented Options and Features. -* Known Bugs:: Known Bugs in @command{gawk}. -@end menu - -@node Command Line, Options, Invoking Gawk, Invoking Gawk -@section Invoking @command{awk} -@cindex command line -@cindex invocation of @command{gawk} -@cindex arguments, command-line -@cindex options, command-line -@cindex long options -@cindex options, long - -There are two ways to run @command{awk}---with an explicit program or with -one or more program files. Here are templates for both of them; items -enclosed in [@dots{}] in these templates are optional: - -@example -awk @r{[@var{options}]} -f progfile @r{[@code{--}]} @var{file} @dots{} -awk @r{[@var{options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{} -@end example - -Besides traditional one-letter POSIX-style options, @command{gawk} also -supports GNU long options. - -@cindex empty program -@cindex dark corner -@cindex lint checks -It is possible to invoke @command{awk} with an empty program: - -@example -awk '' datafile1 datafile2 -@end example - -@noindent -Doing so makes little sense though; @command{awk} exits -silently when given an empty program. -@value{DARKCORNER} -If @option{--lint} has -been specified on the command-line, @command{gawk} issues a -warning that the program is empty. - -@node Options, Other Arguments, Command Line, Invoking Gawk -@section Command-Line Options - -Options begin with a dash and consist of a single character. -GNU-style long options consist of two dashes and a keyword. -The keyword can be abbreviated, as long as the abbreviation allows the option -to be uniquely identified. If the option takes an argument, then the -keyword is either immediately followed by an equals sign (@samp{=}) and the -argument's value, or the keyword and the argument's value are separated -by whitespace. -If a particular option with a value is given more than once, it is the -last value that counts. - -Each long option for @command{gawk} has a corresponding -POSIX-style option. -The long and short options are -interchangeable in all contexts. -The options and their meanings are as follows: - -@table @code -@item -F @var{fs} -@itemx --field-separator @var{fs} -@cindex @code{-F} option -@cindex command-line option, @code{-F} -@cindex @code{--field-separator} option -@cindex command-line option, @code{--field-separator} -Sets the @code{FS} variable to @var{fs} -(@pxref{Field Separators, ,Specifying How Fields Are Separated}). - -@item -f @var{source-file} -@itemx --file @var{source-file} -@cindex @code{-f} option -@cindex command-line option, @code{-f} -@cindex @code{--file} option -@cindex command-line option, @code{--file} -Indicates that the @command{awk} program is to be found in @var{source-file} -instead of in the first non-option argument. - -@item -v @var{var}=@var{val} -@itemx --assign @var{var}=@var{val} -@cindex @code{-v} option -@cindex command-line option, @code{-v} -@cindex @code{--assign} option -@cindex command-line option, @code{--assign} -Sets the variable @var{var} to the value @var{val} @emph{before} -execution of the program begins. Such variable values are available -inside the @code{BEGIN} rule -(@pxref{Other Arguments, ,Other Command-Line Arguments}). - -The @option{-v} option can only set one variable, but it can be used -more than once, setting another variable each time, like this: -@samp{awk @w{-v foo=1} @w{-v bar=2} @dots{}}. - -@strong{Caution:} Using @option{-v} to set the values of the built-in -variables may lead to surprising results. @command{awk} will reset the -values of those variables as it needs to, possibly ignoring any -predefined value you may have given. - -@item -mf @var{N} -@itemx -mr @var{N} -@cindex @code{-mf} option -@cindex command-line option, @code{-mf} -@cindex @code{-mr} option -@cindex command-line option, @code{-mr} -Set various memory limits to the value @var{N}. The @samp{f} flag sets -the maximum number of fields and the @samp{r} flag sets the maximum -record size. These two flags and the @option{-m} option are from the -Bell Laboratories research version of Unix @command{awk}. They are provided -for compatibility but otherwise ignored by -@command{gawk}, since @command{gawk} has no predefined limits. -(The Bell Laboratories @command{awk} no longer needs these options; -it continues to accept them to avoid breaking old programs.) - -@item -W @var{gawk-opt} -@cindex @code{-W} option -@cindex command-line option, @code{-W} -Following the POSIX standard, implementation-specific -options are supplied as arguments to the @option{-W} option. These options -also have corresponding GNU-style long options. -Note that the long options may be abbreviated, as long as -the abbreviations remain unique. -The full list of @command{gawk}-specific options is provided next. - -@item -- -Signals the end of the command-line options. The following arguments -are not treated as options even if they begin with @samp{-}. This -interpretation of @option{--} follows the POSIX argument parsing -conventions. - -This is useful if you have @value{FN}s that start with @samp{-}, -or in shell scripts, if you have @value{FN}s that will be specified -by the user that could start with @samp{-}. -@end table - -The previous list described options mandated by the POSIX standard, -as well as options available in the Bell Laboratories version of @command{awk}. -The following list describes @command{gawk}-specific options: - -@table @code -@item -W compat -@itemx -W traditional -@itemx --compat -@itemx --traditional -@cindex @code{--compat} option -@cindex command-line option, @code{--compat} -@cindex @code{--traditional} option -@cindex command-line option, @code{--traditional} -@cindex compatibility mode -Specifies @dfn{compatibility mode}, in which the GNU extensions to -the @command{awk} language are disabled, so that @command{gawk} behaves just -like the Bell Laboratories research version of Unix @command{awk}. -@option{--traditional} is the preferred form of this option. -@xref{POSIX/GNU, ,Extensions in @command{gawk} Not in POSIX @command{awk}}, -which summarizes the extensions. Also see -@ref{Compatibility Mode, ,Downward Compatibility and Debugging}. - -@item -W copyright -@itemx --copyright -@cindex @code{--copyright} option -@cindex command-line option, @code{--copyright} -Print the short version of the General Public License and then exit. - -@item -W copyleft -@itemx --copyleft -@cindex @code{--copyleft} option -@cindex command-line option, @code{--copyleft} -Just like @option{--copyright}. -This option may disappear in a future version of @command{gawk}. - -@cindex @code{--dump-variables} option -@cindex command-line option, @code{--dump-variables} -@cindex @file{awkvars.out} global variable list output file -@item -W dump-variables@r{[}=@var{file}@r{]} -@itemx --dump-variables@r{[}=@var{file}@r{]} -Print a sorted list of global variables, their types, and final values -to @var{file}. If no @var{file} is provided, @command{gawk} prints this -list to a file named @file{awkvars.out} in the current directory. - -@cindex common mistakes -@cindex mistakes, common -@cindex errors, common -Having a list of all the global variables is a good way to look for -typographical errors in your programs. -You would also use this option if you have a large program with a lot of -functions, and you want to be sure that your functions don't -inadvertently use global variables that you meant to be local. -(This is a particularly easy mistake to make with simple variable -names like @code{i}, @code{j}, and so on.) - -@item -W gen-po -@itemx --gen-po -@cindex @code{--gen-po} option -@cindex command-line option, @code{--gen-po} -Analyze the source program and -generate a GNU @code{gettext} Portable Object file on standard -output for all string constants that have been marked for translation. -@xref{Internationalization, ,Internationalization with @command{gawk}}, -for information about this option. - -@item -W help -@itemx -W usage -@itemx --help -@itemx --usage -@cindex @code{--help} option -@cindex command-line option, @code{--help} -@cindex @code{--usage} option -@cindex command-line option, @code{--usage} -Print a ``usage'' message summarizing the short and long style options -that @command{gawk} accepts and then exit. - -@item -W lint@r{[}=fatal@r{]} -@itemx --lint@r{[}=fatal@r{]} -@cindex @code{--lint} option -@cindex command-line option, @code{--lint} -@cindex lint checks -@cindex fatal errors -Warn about constructs that are dubious or non-portable to -other @command{awk} implementations. -Some warnings are issued when @command{gawk} first reads your program. Others -are issued at runtime, as your program executes. -With an optional argument of @samp{fatal}, -lint warnings become fatal errors. -This may be drastic but its use will certainly encourage the -development of cleaner @command{awk} programs. - -@item -W lint-old -@itemx --lint-old -@cindex @code{--lint-old} option -@cindex command-line option, @code{--lint-old} -@cindex lint checks -Warn about constructs that are not available in the original version of -@command{awk} from Version 7 Unix -(@pxref{V7/SVR3.1, ,Major Changes Between V7 and SVR3.1}). - -@item -W non-decimal-data -@itemx --non-decimal-data -@cindex @code{--non-decimal-data} option -@cindex command-line option, @code{--non-decimal-data} -Enable automatic interpretation of octal and hexadecimal -values in input data -(@pxref{Non-decimal Data, ,Allowing Non-Decimal Input Data}). - -@strong{Caution:} This option can severely break old programs. -Use with care. - -@item -W posix -@itemx --posix -@cindex @code{--posix} option -@cindex command-line option, @code{--posix} -@cindex POSIX mode -Operate in strict POSIX mode. This disables all @command{gawk} -extensions (just like @option{--traditional}) and adds the following additional -restrictions: - -@c IMPORTANT! Keep this list in sync with the one in node POSIX - -@itemize @bullet -@item -@code{\x} escape sequences are not recognized -(@pxref{Escape Sequences}). - -@item -Newlines do not act as whitespace to separate fields when @code{FS} is -equal to a single space -(@pxref{Fields, , Examining Fields}). - -@item -Newlines are not allowed after @samp{?} or @samp{:} -(@pxref{Conditional Exp, ,Conditional Expressions}). - -@item -The synonym @code{func} for the keyword @code{function} is not -recognized (@pxref{Definition Syntax, ,Function Definition Syntax}). - -@item -The @samp{**} and @samp{**=} operators cannot be used in -place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators}, -and also @pxref{Assignment Ops, ,Assignment Expressions}). - -@item -Specifying @samp{-Ft} on the command-line does not set the value -of @code{FS} to be a single tab character -(@pxref{Field Separators, ,Specifying How Fields Are Separated}). - -@item -The @code{fflush} built-in function is not supported -(@pxref{I/O Functions, ,Input/Output Functions}). -@end itemize - -@cindex automatic warnings -@cindex warnings, automatic -If you supply both @option{--traditional} and @option{--posix} on the -command-line, @option{--posix} takes precedence. @command{gawk} -also issues a warning if both options are supplied. - -@item -W profile@r{[}=@var{file}@r{]} -@itemx --profile@r{[}=@var{file}@r{]} -@cindex @code{--profile} option -@cindex command-line option, @code{--profile} -Enable profiling of @command{awk} programs -(@pxref{Profiling, ,Profiling Your @command{awk} Programs}). -By default, profiles are created in a file named @file{awkprof.out}. -The optional @var{file} argument allows you to specify a different -@value{FN} for the profile file. - -When run with @command{gawk}, the profile is just a ``pretty printed'' version -of the program. When run with @command{pgawk}, the profile contains execution -counts for each statement in the program in the left margin, and function -call counts for each function. - -@item -W re-interval -@itemx --re-interval -@cindex @code{--re-interval} option -@cindex command-line option, @code{--re-interval} -Allow interval expressions -(@pxref{Regexp Operators, , Regular Expression Operators}) -in regexps. -Because interval expressions were traditionally not available in @command{awk}, -@command{gawk} does not provide them by default. This prevents old @command{awk} -programs from breaking. - -@item -W source @var{program-text} -@itemx --source @var{program-text} -@cindex @code{--source} option -@cindex command-line option, @code{--source} -Program source code is taken from the @var{program-text}. This option -allows you to mix source code in files with source -code that you enter on the command-line. This is particularly useful -when you have library functions that you want to use from your command-line -programs (@pxref{AWKPATH Variable, ,The @env{AWKPATH} Environment Variable}). - -@item -W version -@itemx --version -@cindex @code{--version} option -@cindex command-line option, @code{--version} -Print version information for this particular copy of @command{gawk}. -This allows you to determine if your copy of @command{gawk} is up to date -with respect to whatever the Free Software Foundation is currently -distributing. -It is also useful for bug reports -(@pxref{Bugs, , Reporting Problems and Bugs}). -@end table - -As long as program text has been supplied, -any other options are flagged as invalid with a warning message but -are otherwise ignored. - -In compatibility mode, as a special case, if the value of @var{fs} supplied -to the @option{-F} option is @samp{t}, then @code{FS} is set to the tab -character (@code{"\t"}). This is only true for @option{--traditional} and not -for @option{--posix} -(@pxref{Field Separators, ,Specifying How Fields Are Separated}). - -The @option{-f} option may be used more than once on the command-line. -If it is, @command{awk} reads its program source from all of the named files, as -if they had been concatenated together into one big file. This is -useful for creating libraries of @command{awk} functions. These functions -can be written once and then retrieved from a standard place, instead -of having to be included into each individual program. -(As mentioned in -@ref{Definition Syntax, ,Function Definition Syntax}, -function names must be unique.) - -Library functions can still be used, even if the program is entered at the terminal, -by specifying @samp{-f /dev/tty}. After typing your program, -type @kbd{Ctrl-d} (the end-of-file character) to terminate it. -(You may also use @samp{-f -} to read program source from the standard -input but then you will not be able to also use the standard input as a -source of data.) - -Because it is clumsy using the standard @command{awk} mechanisms to mix source -file and command-line @command{awk} programs, @command{gawk} provides the -@option{--source} option. This does not require you to pre-empt the standard -input for your source code; it allows you to easily mix command-line -and library source code -(@pxref{AWKPATH Variable, ,The @env{AWKPATH} Environment Variable}). - -If no @option{-f} or @option{--source} option is specified, then @command{gawk} -uses the first non-option command-line argument as the text of the -program source code. - -@cindex @code{POSIXLY_CORRECT} environment variable -@cindex environment variable, @code{POSIXLY_CORRECT} -@cindex lint checks -If the environment variable @env{POSIXLY_CORRECT} exists, -then @command{gawk} behaves in strict POSIX mode, exactly as if -you had supplied the @option{--posix} command-line option. -Many GNU programs look for this environment variable to turn on -strict POSIX mode. If @option{--lint} is supplied on the command-line -and @command{gawk} turns on POSIX mode because of @env{POSIXLY_CORRECT}, -then it issues a warning message indicating that POSIX -mode is in effect. -You would typically set this variable in your shell's startup file. -For a Bourne-compatible shell (such as @command{bash}), you would add these -lines to the @file{.profile} file in your home directory: - -@example -POSIXLY_CORRECT=true -export POSIXLY_CORRECT -@end example - -@cindex @command{csh} utility -For a @command{csh} compatible -shell,@footnote{Not recommended.} -you would add this line to the @file{.login} file in your home directory: - -@example -setenv POSIXLY_CORRECT true -@end example - -Having @env{POSIXLY_CORRECT} set is not recommended for daily use, -but it is good for testing the portability of your programs to other -environments. - -@node Other Arguments, AWKPATH Variable, Options, Invoking Gawk -@section Other Command-Line Arguments - -Any additional arguments on the command-line are normally treated as -input files to be processed in the order specified. However, an -argument that has the form @code{@var{var}=@var{value}}, assigns -the value @var{value} to the variable @var{var}---it does not specify a -file at all. -(This was discussed earlier in -@ref{Assignment Options, ,Assigning Variables on the Command Line}.) - -@cindex @code{ARGIND} variable -@cindex @code{ARGV} variable -All these arguments are made available to your @command{awk} program in the -@code{ARGV} array (@pxref{Built-in Variables}). Command-line options -and the program text (if present) are omitted from @code{ARGV}. -All other arguments, including variable assignments, are -included. As each element of @code{ARGV} is processed, @command{gawk} -sets the variable @code{ARGIND} to the index in @code{ARGV} of the -current element. - -The distinction between @value{FN} arguments and variable-assignment -arguments is made when @command{awk} is about to open the next input file. -At that point in execution, it checks the @value{FN} to see whether -it is really a variable assignment; if so, @command{awk} sets the variable -instead of reading a file. - -Therefore, the variables actually receive the given values after all -previously specified files have been read. In particular, the values of -variables assigned in this fashion are @emph{not} available inside a -@code{BEGIN} rule -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}), -because such rules are run before @command{awk} begins scanning the argument list. - -@cindex dark corner -The variable values given on the command-line are processed for escape -sequences (@pxref{Escape Sequences}). -@value{DARKCORNER} - -In some earlier implementations of @command{awk}, when a variable assignment -occurred before any @value{FN}s, the assignment would happen @emph{before} -the @code{BEGIN} rule was executed. @command{awk}'s behavior was thus -inconsistent; some command-line assignments were available inside the -@code{BEGIN} rule, while others were not. Unfortunately, -some applications came to depend -upon this ``feature.'' When @command{awk} was changed to be more consistent, -the @option{-v} option was added to accommodate applications that depended -upon the old behavior. - -The variable assignment feature is most useful for assigning to variables -such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and -output formats before scanning the @value{DF}s. It is also useful for -controlling state if multiple passes are needed over a @value{DF}. For -example: - -@cindex multiple passes over data -@cindex passes, multiple -@example -awk 'pass == 1 @{ @var{pass 1 stuff} @} - pass == 2 @{ @var{pass 2 stuff} @}' pass=1 mydata pass=2 mydata -@end example - -Given the variable assignment feature, the @option{-F} option for setting -the value of @code{FS} is not -strictly necessary. It remains for historical compatibility. - -@node AWKPATH Variable, Obsolete, Other Arguments, Invoking Gawk -@section The @env{AWKPATH} Environment Variable -@cindex @env{AWKPATH} environment variable -@cindex environment variable, @env{AWKPATH} -@cindex search path -@cindex directory search -@cindex path, search -@cindex search path, for source files -@cindex differences between @command{gawk} and @command{awk} -@ifinfo -The previous @value{SECTION} described how @command{awk} program files can be named -on the command-line with the @option{-f} option. -@end ifinfo -In most @command{awk} -implementations, you must supply a precise path name for each program -file, unless the file is in the current directory. -But in @command{gawk}, if the @value{FN} supplied to the @option{-f} option -does not contain a @samp{/}, then @command{gawk} searches a list of -directories (called the @dfn{search path}), one by one, looking for a -file with the specified name. - -The search path is a string consisting of directory names -separated by colons. @command{gawk} gets its search path from the -@env{AWKPATH} environment variable. If that variable does not exist, -@command{gawk} uses a default path, which is -@samp{.:/usr/local/share/awk}.@footnote{Your version of @command{gawk} -may use a different directory; it -will depend upon how @command{gawk} was built and installed. The actual -directory is the value of @samp{$(datadir)} generated when -@command{gawk} was configured. You probably don't need to worry about this -though.} (Programs written for use by -system administrators should use an @env{AWKPATH} variable that -does not include the current directory, @file{.}.) - -The search path feature is particularly useful for building libraries -of useful @command{awk} functions. The library files can be placed in a -standard directory in the default path and then specified on -the command-line with a short @value{FN}. Otherwise, the full @value{FN} -would have to be typed for each file. - -By using both the @option{--source} and @option{-f} options, your command-line -@command{awk} programs can use facilities in @command{awk} library files. -@xref{Library Functions, , A Library of @command{awk} Functions}. -Path searching is not done if @command{gawk} is in compatibility mode. -This is true for both @option{--traditional} and @option{--posix}. -@xref{Options, ,Command-Line Options}. - -@strong{Note:} If you want files in the current directory to be found, -you must include the current directory in the path, either by including -@file{.} explicitly in the path or by writing a null entry in the -path. (A null entry is indicated by starting or ending the path with a -colon or by placing two colons next to each other (@samp{::}).) If the -current directory is not included in the path, then files cannot be -found in the current directory. This path search mechanism is identical -to the shell's. -@c someday, @cite{The Bourne Again Shell}.... - -Starting with @value{PVERSION} 3.0, if @env{AWKPATH} is not defined in the -environment, @command{gawk} places its default search path into -@code{ENVIRON["AWKPATH"]}. This makes it easy to determine -the actual search path that @command{gawk} will use -from within an @command{awk} program. - -While you can change @code{ENVIRON["AWKPATH"]} within your @command{awk} -program, this has no effect on the running program's behavior. This makes -sense: the @env{AWKPATH} environment variable is used to find the program -source files. Once your program is running, all the files have been -found, and @command{gawk} no longer needs to use @env{AWKPATH}. - -@node Obsolete, Undocumented, AWKPATH Variable, Invoking Gawk -@section Obsolete Options and/or Features - -@cindex deprecated options -@cindex obsolete options -@cindex deprecated features -@cindex obsolete features -This @value{SECTION} describes features and/or command-line options from -previous releases of @command{gawk} that are either not available in the -current version or that are still supported but deprecated (meaning that -they will @emph{not} be in the next release). - -@c update this section for each release! - -For @value{PVERSION} @value{VERSION} of @command{gawk}, there are no -deprecated command-line options -@c or other deprecated features -from the previous version of @command{gawk}. -The use of @samp{next file} (two words) for @code{nextfile} was deprecated -in @command{gawk} 3.0 but still worked. Starting with @value{PVERSION} 3.1, the -two word usage is no longer accepted. - -The process-related special files described in -@ref{Special Process, ,Special Files for Process-Related Information}, -work as described, but -are now considered deprecated. -@command{gawk} prints a warning message every time they are used. -(Use @code{PROCINFO} instead; see -@ref{Auto-set, ,Built-in Variables That Convey Information}.) -They will be removed from the next release of @command{gawk}. - -@ignore -This @value{SECTION} -is thus essentially a place holder, -in case some option becomes obsolete in a future version of @command{gawk}. -@end ignore - -@node Undocumented, Known Bugs, Obsolete, Invoking Gawk -@section Undocumented Options and Features -@cindex undocumented features -@cindex features, undocumented -@cindex Skywalker, Luke -@cindex Kenobi, Obi-Wan -@cindex Jedi knights -@cindex Knights, jedi -@quotation -@i{Use the Source, Luke!}@* -Obi-Wan -@end quotation - -This @value{SECTION} intentionally left -blank. - -@ignore -@c If these came out in the Info file or TeX document, then they wouldn't -@c be undocumented, would they? - -@command{gawk} has one undocumented option: - -@table @code -@item -W nostalgia -@itemx --nostalgia -Print the message @code{"awk: bailing out near line 1"} and dump core. -This option was inspired by the common behavior of very early versions of -Unix @command{awk} and by a t--shirt. -The message is @emph{not} subject to translation in non-English locales. -@c so there! nyah, nyah. -@end table - -Early versions of @command{awk} used to not require any separator (either -a newline or @samp{;}) between the rules in @command{awk} programs. Thus, -it was common to see one-line programs like: - -@example -awk '@{ sum += $1 @} END @{ print sum @}' -@end example - -@command{gawk} actually supports this but it is purposely undocumented -because it is considered bad style. The correct way to write such a program -is either - -@example -awk '@{ sum += $1 @} ; END @{ print sum @}' -@end example - -@noindent -or - -@example -awk '@{ sum += $1 @} - END @{ print sum @}' data -@end example - -@noindent -@xref{Statements/Lines, ,@command{awk} Statements Versus Lines}, for a fuller -explanation. - -You can insert newlines after the @samp{;} in @code{for} loops. -This seems to have been a long-undocumented feature in Unix @command{awk}. - -If the environment variable @env{WHINY_USERS} exists -when @command{gawk} is run, -then the associative @code{for} loop will go through the array -indices in sorted order. -The comparison used for sorting is simple string comparison; -any non-English or non-ASCII locales are not taken into account. -@code{IGNORECASE} does not affect the comparison either. - -@end ignore - -@node Known Bugs, , Undocumented, Invoking Gawk -@section Known Bugs in @command{gawk} -@cindex bugs, known in @command{gawk} -@cindex known bugs - -@itemize @bullet -@item -The @option{-F} option for changing the value of @code{FS} -(@pxref{Options, ,Command-Line Options}) -is not necessary given the command-line variable -assignment feature; it remains only for backwards compatibility. - -@item -Syntactically invalid single character programs tend to overflow -the parse stack, generating a rather unhelpful message. Such programs -are surprisingly difficult to diagnose in the completely general case -and the effort to do so really is not worth it. -@end itemize - -@ignore -@c Try this -@iftex -@page -@headings off -@majorheading II@ @ @ Using @command{awk} and @command{gawk} -Part II shows how to use @command{awk} and @command{gawk} for problem solving. -There is lots of code here for you to read and learn from. -It contains the following chapters: - -@itemize @bullet -@item -@ref{Library Functions, ,A Library of @command{awk} Functions}. - -@item -@ref{Sample Programs, ,Practical @command{awk} Programs}. - -@end itemize - -@page -@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @| -@oddheading @| @| @strong{@thischapter}@ @ @ @thispage -@end iftex -@end ignore - -@node Library Functions, Sample Programs, Invoking Gawk, Top -@chapter A Library of @command{awk} Functions - -@ref{User-defined, ,User-Defined Functions}, describes how to write -your own @command{awk} functions. Writing functions is important, because -it allows you to encapsulate algorithms and program tasks in a single -place. It simplifies programming, making program development more -manageable, and making programs more readable. - -One valuable way to learn a new programming language is to @emph{read} -programs in that language. To that end, this @value{CHAPTER} -and @ref{Sample Programs, ,Practical @command{awk} Programs}, -provide a good-sized body of code for you to read, -and hopefully, to learn from. - -@c 2e: USE TEXINFO-2 FUNCTION DEFINITION STUFF!!!!!!!!!!!!! -This @value{CHAPTER} presents a library of useful @command{awk} functions. -Many of the sample programs presented later in this @value{DOCUMENT} -use these functions. -The functions are presented here in a progression from simple to complex. - -@cindex Texinfo -@ref{Extract Program, ,Extracting Programs from Texinfo Source Files}, -presents a program that you can use to extract the source code for -these example library functions and programs from the Texinfo source -for this @value{DOCUMENT}. -(This has already been done as part of the @command{gawk} distribution.) - -If you have written one or more useful, general purpose @command{awk} functions -and would like to contribute them to the author's collection of @command{awk} -programs, see -@ref{How To Contribute, ,How to Contribute}, for more information. - -@cindex portability issues -The programs in this @value{CHAPTER} and in -@ref{Sample Programs, ,Practical @command{awk} Programs}, -freely use features that are @command{gawk}-specific. -It is straightforward to rewrite these programs for -different implementations of @command{awk}. - -Diagnostic error messages are sent to @file{/dev/stderr}. -Use @samp{| "cat 1>&2"} instead of @samp{> "/dev/stderr"}, if your system -does not have a @file{/dev/stderr} or if you cannot use @command{gawk}. - -A number of programs use @code{nextfile} -(@pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}) -to skip any remaining input in the input file. -@ref{Nextfile Function, ,Implementing @code{nextfile} as a Function}, -shows you how to write a function that does the same thing. - -@c 12/2000: Thanks to Nelson Beebe for pointing out the output issue. -Finally, some of the programs choose to ignore upper- and lowercase -distinctions in their input. They do so by assigning one to @code{IGNORECASE}. -You can achieve almost the same effect@footnote{The effects are -not identical. Output of the transformed -record will be in all lowercase, while @code{IGNORECASE} preserves the original -contents of the input record.} by adding the following rule to the -beginning of the program: - -@example -# ignore case -@{ $0 = tolower($0) @} -@end example - -@noindent -Also, verify that all regexp and string constants used in -comparisons only use lowercase letters. - -@menu -* Library Names:: How to best name private global variables in - library functions. -* General Functions:: Functions that are of general use. -* Data File Management:: Functions for managing command-line data - files. -* Getopt Function:: A function for processing command-line - arguments. -* Passwd Functions:: Functions for getting user information. -* Group Functions:: Functions for getting group information. -@end menu - -@node Library Names, General Functions, Library Functions, Library Functions -@section Naming Library Function Global Variables - -@cindex names, use of -@cindex namespace issues in @command{awk} -@cindex documenting @command{awk} programs -@cindex programs, documenting -Due to the way the @command{awk} language evolved, variables are either -@dfn{global} (usable by the entire program) or @dfn{local} (usable just by -a specific function). There is no intermediate state analogous to -@code{static} variables in C. - -Library functions often need to have global variables that they can use to -preserve state information between calls to the function---for example, -@code{getopt}'s variable @code{_opti} -(@pxref{Getopt Function, ,Processing Command-Line Options}). -Such variables are called @dfn{private}, since the only functions that need to -use them are the ones in the library. - -When writing a library function, you should try to choose names for your -private variables that will not conflict with any variables used by -either another library function or a user's main program. For example, a -name like @samp{i} or @samp{j} is not a good choice, because user programs -often use variable names like these for their own purposes. - -@cindex conventions, programming -@cindex programming conventions -The example programs shown in this @value{CHAPTER} all start the names of their -private variables with an underscore (@samp{_}). Users generally don't use -leading underscores in their variable names, so this convention immediately -decreases the chances that the variable name will be accidentally shared -with the user's program. - -In addition, several of the library functions use a prefix that helps -indicate what function or set of functions use the variables---for example, -@code{_pw_byname} in the user database routines -(@pxref{Passwd Functions, ,Reading the User Database}). -This convention is recommended, since it even further decreases the -chance of inadvertent conflict among variable names. Note that this -convention is used equally well for variable names and for private -function names as well.@footnote{While all the library routines could have -been rewritten to use this convention, this was not done, in order to -show how my own @command{awk} programming style has evolved, and to -provide some basis for this discussion.} - -As a final note on variable naming, if a function makes global variables -available for use by a main program, it is a good convention to start that -variable's name with a capital letter---for -example, @code{getopt}'s @code{Opterr} and @code{Optind} variables -(@pxref{Getopt Function, ,Processing Command-Line Options}). -The leading capital letter indicates that it is global, while the fact that -the variable name is not all capital letters indicates that the variable is -not one of @command{awk}'s built-in variables, such as @code{FS}. - -It is also important that @emph{all} variables in library -functions that do not need to save state are, in fact, declared -local.@footnote{@command{gawk}'s @option{--dump-variables} command-line -option is useful for verifying this.} If this is not done, the variable -could accidentally be used in the user's program, leading to bugs that -are very difficult to track down: - -@example -function lib_func(x, y, l1, l2) -@{ - @dots{} - @var{use variable} some_var # some_var should be local - @dots{} # but is not by oversight -@} -@end example - -@cindex Tcl -A different convention, common in the Tcl community, is to use a single -associative array to hold the values needed by the library function(s), or -``package.'' This significantly decreases the number of actual global names -in use. For example, the functions described in -@ref{Passwd Functions, , Reading the User Database}, -might have used array elements @code{@w{PW_data["inited"]}}, @code{@w{PW_data["total"]}}, -@code{@w{PW_data["count"]}}, and @code{@w{PW_data["awklib"]}}, instead of -@code{@w{_pw_inited}}, @code{@w{_pw_awklib}}, @code{@w{_pw_total}}, -and @code{@w{_pw_count}}. - -The conventions presented in this @value{SECTION} are exactly -that: conventions. You are not required to write your programs this -way---we merely recommend that you do so. - -@node General Functions, Data File Management, Library Names, Library Functions -@section General Programming - -This @value{SECTION} presents a number of functions that are of general -programming use. - -@menu -* Nextfile Function:: Two implementations of a @code{nextfile} - function. -* Assert Function:: A function for assertions in @command{awk} - programs. -* Round Function:: A function for rounding if @code{sprintf} does - not do it correctly. -* Cliff Random Function:: The Cliff Random Number Generator. -* Ordinal Functions:: Functions for using characters as numbers and - vice versa. -* Join Function:: A function to join an array into a string. -* Gettimeofday Function:: A function to get formatted times. -@end menu - -@node Nextfile Function, Assert Function, General Functions, General Functions -@subsection Implementing @code{nextfile} as a Function - -@cindex skipping input files -@cindex input files, skipping -The @code{nextfile} statement presented in -@ref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}, -is a @command{gawk}-specific extension---it is not available in most other -implementations of @command{awk}. This @value{SECTION} shows two versions of a -@code{nextfile} function that you can use to simulate @command{gawk}'s -@code{nextfile} statement if you cannot use @command{gawk}. - -A first attempt at writing a @code{nextfile} function is as follows: - -@example -# nextfile --- skip remaining records in current file -# this should be read in before the "main" awk program - -function nextfile() @{ _abandon_ = FILENAME; next @} -_abandon_ == FILENAME @{ next @} -@end example - -@cindex conventions, programming -@cindex programming conventions -Because it supplies a rule that must be executed first, this file should -be included before the main program. This rule compares the current -@value{DF}'s name (which is always in the @code{FILENAME} variable) to -a private variable named @code{_abandon_}. If the @value{FN} matches, -then the action part of the rule executes a @code{next} statement to -go on to the next record. (The use of @samp{_} in the variable name is -a convention. It is discussed more fully in -@ref{Library Names, , Naming Library Function Global Variables}.) - -The use of the @code{next} statement effectively creates a loop that reads -all the records from the current @value{DF}. -The end of the file is eventually reached and -a new @value{DF} is opened, changing the value of @code{FILENAME}. -Once this happens, the comparison of @code{_abandon_} to @code{FILENAME} -fails and execution continues with the first rule of the ``real'' program. - -The @code{nextfile} function itself simply sets the value of @code{_abandon_} -and then executes a @code{next} statement to start the -loop. -@ignore -@c If the function can't be used on other versions of awk, this whole -@c section is pointless, no? Sigh. -@footnote{@command{gawk} is the only known @command{awk} implementation -that allows you to -execute @code{next} from within a function body. Some other workaround -is necessary if you are not using @command{gawk}.} -@end ignore - -@cindex @code{nextfile} user-defined function -This initial version has a subtle problem. -If the same @value{DF} is listed @emph{twice} on the commandline, -one right after the other -or even with just a variable assignment between them, -this code skips right through the file, a second time, even though -it should stop when it gets to the end of the first occurrence. -A second version of @code{nextfile} that remedies this problem -is shown here: - -@example -@c file eg/lib/nextfile.awk -# nextfile --- skip remaining records in current file -# correctly handle successive occurrences of the same file -@c endfile -@ignore -@c file eg/lib/nextfile.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May, 1993 - -@c endfile -@end ignore -@c file eg/lib/nextfile.awk -# this should be read in before the "main" awk program - -function nextfile() @{ _abandon_ = FILENAME; next @} - -_abandon_ == FILENAME @{ - if (FNR == 1) - _abandon_ = "" - else - next -@} -@c endfile -@end example - -The @code{nextfile} function has not changed. It makes @code{_abandon_} -equal to the current @value{FN} and then executes a @code{next} statement. -The @code{next} statement reads the next record and increments @code{FNR} -so that @code{FNR} is guaranteed to have a value of at least two. -However, if @code{nextfile} is called for the last record in the file, -then @command{awk} closes the current @value{DF} and moves on to the next -one. Upon doing so, @code{FILENAME} is set to the name of the new file -and @code{FNR} is reset to one. If this next file is the same as -the previous one, @code{_abandon_} is still equal to @code{FILENAME}. -However, @code{FNR} is equal to one, telling us that this is a new -occurrence of the file and not the one we were reading when the -@code{nextfile} function was executed. In that case, @code{_abandon_} -is reset to the empty string, so that further executions of this rule -fail (until the next time that @code{nextfile} is called). - -If @code{FNR} is not one, then we are still in the original @value{DF} -and the program executes a @code{next} statement to skip through it. - -An important question to ask at this point is: given that the -functionality of @code{nextfile} can be provided with a library file, -why is it built into @command{gawk}? Adding -features for little reason leads to larger, slower programs that are -harder to maintain. -The answer is that building @code{nextfile} into @command{gawk} provides -significant gains in efficiency. If the @code{nextfile} function is executed -at the beginning of a large @value{DF}, @command{awk} still has to scan the entire -file, splitting it up into records, -@c at least conceptually -just to skip over it. The built-in -@code{nextfile} can simply close the file immediately and proceed to the -next one, which saves a lot of time. This is particularly important in -@command{awk}, because @command{awk} programs are generally I/O-bound (i.e., -they spend most of their time doing input and output, instead of performing -computations). - -@node Assert Function, Round Function, Nextfile Function, General Functions -@subsection Assertions - -@cindex assertions -@cindex @code{assert} C library function -When writing large programs, it is often useful to know -that a condition or set of conditions is true. Before proceeding with a -particular computation, you make a statement about what you believe to be -the case. Such a statement is known as an -@dfn{assertion}. The C language provides an @code{} header file -and corresponding @code{assert} macro that the programmer can use to make -assertions. If an assertion fails, the @code{assert} macro arranges to -print a diagnostic message describing the condition that should have -been true but was not, and then it kills the program. In C, using -@code{assert} looks this: - -@example -#include - -int myfunc(int a, double b) -@{ - assert(a <= 5 && b >= 17.1); - @dots{} -@} -@end example - -If the assertion fails, the program prints a message similar to this: - -@example -prog.c:5: assertion failed: a <= 5 && b >= 17.1 -@end example - -@cindex @code{assert} user-defined function -The C language makes it possible to turn the condition into a string for use -in printing the diagnostic message. This is not possible in @command{awk}, so -this @code{assert} function also requires a string version of the condition -that is being tested. -Following is the function: - -@example -@c file eg/lib/assert.awk -# assert --- assert that a condition is true. Otherwise exit. -@c endfile -@ignore -@c file eg/lib/assert.awk - -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May, 1993 - -@c endfile -@end ignore -@c file eg/lib/assert.awk -function assert(condition, string) -@{ - if (! condition) @{ - printf("%s:%d: assertion failed: %s\n", - FILENAME, FNR, string) > "/dev/stderr" - _assert_exit = 1 - exit 1 - @} -@} - -@group -END @{ - if (_assert_exit) - exit 1 -@} -@end group -@c endfile -@end example - -The @code{assert} function tests the @code{condition} parameter. If it -is false, it prints a message to standard error, using the @code{string} -parameter to describe the failed condition. It then sets the variable -@code{_assert_exit} to one and executes the @code{exit} statement. -The @code{exit} statement jumps to the @code{END} rule. If the @code{END} -rules finds @code{_assert_exit} to be true, it then exits immediately. - -The purpose of the test in the @code{END} rule is to -keep any other @code{END} rules from running. When an assertion fails, the -program should exit immediately. -If no assertions fail, then @code{_assert_exit} is still -false when the @code{END} rule is run normally, and the rest of the -program's @code{END} rules execute. -For all of this to work correctly, @file{assert.awk} must be the -first source file read by @command{awk}. -The function can be used in a program in the following way: - -@example -function myfunc(a, b) -@{ - assert(a <= 5 && b >= 17.1, "a <= 5 && b >= 17.1") - @dots{} -@} -@end example - -@noindent -If the assertion fails, you see a message similar to the following: - -@example -mydata:1357: assertion failed: a <= 5 && b >= 17.1 -@end example - -There is a small problem with this version of @code{assert}. -An @code{END} rule is automatically added -to the program calling @code{assert}. Normally, if a program consists -of just a @code{BEGIN} rule, the input files and/or standard input are -not read. However, now that the program has an @code{END} rule, @command{awk} -attempts to read the input @value{DF}s or standard input -(@pxref{Using BEGIN/END, , Startup and Cleanup Actions}), -most likely causing the program to hang as it waits for input. - -There is a simple workaround to this: -make sure the @code{BEGIN} rule always ends -with an @code{exit} statement. - -@node Round Function, Cliff Random Function, Assert Function, General Functions -@subsection Rounding Numbers - -@cindex rounding -The way @code{printf} and @code{sprintf} -(@pxref{Printf, , Using @code{printf} Statements for Fancier Printing}) -perform rounding often depends upon the system's C @code{sprintf} -subroutine. On many machines, @code{sprintf} rounding is ``unbiased,'' -which means it doesn't always round a trailing @samp{.5} up, contrary -to naive expectations. In unbiased rounding, @samp{.5} rounds to even, -rather than always up, so 1.5 rounds to 2 but 4.5 rounds to 4. This means -that if you are using a format that does rounding (e.g., @code{"%.0f"}), -you should check what your system does. The following function does -traditional rounding; it might be useful if your awk's @code{printf} -does unbiased rounding: - -@cindex @code{round} user-defined function -@example -@c file eg/lib/round.awk -# round --- do normal rounding -@c endfile -@ignore -@c file eg/lib/round.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# August, 1996 - -@c endfile -@end ignore -@c file eg/lib/round.awk -function round(x, ival, aval, fraction) -@{ - ival = int(x) # integer part, int() truncates - - # see if fractional part - if (ival == x) # no fraction - return x - - if (x < 0) @{ - aval = -x # absolute value - ival = int(aval) - fraction = aval - ival - if (fraction >= .5) - return int(x) - 1 # -2.5 --> -3 - else - return int(x) # -2.3 --> -2 - @} else @{ - fraction = x - ival - if (fraction >= .5) - return ival + 1 - else - return ival - @} -@} - -# test harness -@{ print $0, round($0) @} -@c endfile -@end example - -@node Cliff Random Function, Ordinal Functions, Round Function, General Functions -@subsection The Cliff Random Number Generator -@cindex random numbers, Cliff -@cindex Cliff random numbers - -The Cliff random number -generator@footnote{@uref{http://mathworld.wolfram.com/CliffRandomNumberGenerator.hmtl}} -is a very simple random number generator that ``passes the noise sphere test -for randomness by showing no structure.'' -It is easily programmed, in less than 10 lines of @command{awk} code: - -@cindex @code{cliff_rand} user-defined function -@example -@c file eg/lib/cliff_rand.awk -# cliff_rand.awk --- generate Cliff random numbers -@c endfile -@ignore -@c file eg/lib/cliff_rand.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# December 2000 - -@c endfile -@end ignore -@c file eg/lib/cliff_rand.awk -BEGIN @{ _cliff_seed = 0.1 @} - -function cliff_rand() -@{ - _cliff_seed = (100 * log(_cliff_seed)) % 1 - if (_cliff_seed < 0) - _cliff_seed = - _cliff_seed - return _cliff_seed -@} -@c endfile -@end example - -This algorithm requires an initial ``seed'' of 0.1. Each new value -uses the current seed as input for the calculation. -If the built-in @code{rand} function -(@pxref{Numeric Functions}) -isn't random enough, you might try using this function instead. - -@node Ordinal Functions, Join Function, Cliff Random Function, General Functions -@subsection Translating Between Characters and Numbers - -@cindex numeric character values -@cindex values of characters as numbers -One commercial implementation of @command{awk} supplies a built-in function, -@code{ord}, which takes a character and returns the numeric value for that -character in the machine's character set. If the string passed to -@code{ord} has more than one character, only the first one is used. - -The inverse of this function is @code{chr} (from the function of the same -name in Pascal), which takes a number and returns the corresponding character. -Both functions are written very nicely in @command{awk}; there is no real -reason to build them into the @command{awk} interpreter: - -@cindex @code{ord} user-defined function -@cindex @code{chr} user-defined function -@example -@c file eg/lib/ord.awk -# ord.awk --- do ord and chr - -# Global identifiers: -# _ord_: numerical values indexed by characters -# _ord_init: function to initialize _ord_ -@c endfile -@ignore -@c file eg/lib/ord.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# 16 January, 1992 -# 20 July, 1992, revised - -@c endfile -@end ignore -@c file eg/lib/ord.awk -BEGIN @{ _ord_init() @} - -function _ord_init( low, high, i, t) -@{ - low = sprintf("%c", 7) # BEL is ascii 7 - if (low == "\a") @{ # regular ascii - low = 0 - high = 127 - @} else if (sprintf("%c", 128 + 7) == "\a") @{ - # ascii, mark parity - low = 128 - high = 255 - @} else @{ # ebcdic(!) - low = 0 - high = 255 - @} - - for (i = low; i <= high; i++) @{ - t = sprintf("%c", i) - _ord_[t] = i - @} -@} -@c endfile -@end example - -@cindex character sets (machine character encodings) -@cindex character encodings -@cindex ASCII -@cindex EBCDIC -@cindex mark parity -Some explanation of the numbers used by @code{chr} is worthwhile. -The most prominent character set in use today is ASCII. Although an -eight-bit byte can hold 256 distinct values (from 0 to 255), ASCII only -defines characters that use the values from 0 to 127.@footnote{ASCII -has been extended in many countries to use the values from 128 to 255 -for country-specific characters. If your system uses these extensions, -you can simplify @code{_ord_init} to simply loop from 0 to 255.} -In the now distant past, -at least one minicomputer manufacturer -@c Pr1me, blech -used ASCII, but with mark parity, meaning that the leftmost bit in the byte -is always 1. This means that on those systems, characters -have numeric values from 128 to 255. -Finally, large mainframe systems use the EBCDIC character set, which -uses all 256 values. -While there are other character sets in use on some older systems, -they are not really worth worrying about: - -@example -@c file eg/lib/ord.awk -function ord(str, c) -@{ - # only first character is of interest - c = substr(str, 1, 1) - return _ord_[c] -@} - -function chr(c) -@{ - # force c to be numeric by adding 0 - return sprintf("%c", c + 0) -@} -@c endfile - -#### test code #### -# BEGIN \ -# @{ -# for (;;) @{ -# printf("enter a character: ") -# if (getline var <= 0) -# break -# printf("ord(%s) = %d\n", var, ord(var)) -# @} -# @} -@c endfile -@end example - -An obvious improvement to these functions is to move the code for the -@code{@w{_ord_init}} function into the body of the @code{BEGIN} rule. It was -written this way initially for ease of development. -There is a ``test program'' in a @code{BEGIN} rule, to test the -function. It is commented out for production use. - -@node Join Function, Gettimeofday Function, Ordinal Functions, General Functions -@subsection Merging an Array into a String - -@cindex merging strings -When doing string processing, it is often useful to be able to join -all the strings in an array into one long string. The following function, -@code{join}, accomplishes this task. It is used later in several of -the application programs -(@pxref{Sample Programs, ,Practical @command{awk} Programs}). - -Good function design is important; this function needs to be general but it -should also have a reasonable default behavior. It is called with an array -as well as the beginning and ending indices of the elements in the array to be -merged. This assumes that the array indices are numeric---a reasonable -assumption since the array was likely created with @code{split} -(@pxref{String Functions, ,String Manipulation Functions}): - -@cindex @code{join} user-defined function -@example -@c file eg/lib/join.awk -# join.awk --- join an array into a string -@c endfile -@ignore -@c file eg/lib/join.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 - -@c endfile -@end ignore -@c file eg/lib/join.awk -function join(array, start, end, sep, result, i) -@{ - if (sep == "") - sep = " " - else if (sep == SUBSEP) # magic value - sep = "" - result = array[start] - for (i = start + 1; i <= end; i++) - result = result sep array[i] - return result -@} -@c endfile -@end example - -An optional additional argument is the separator to use when joining the -strings back together. If the caller supplies a non-empty value, -@code{join} uses it; if it is not supplied, it has a null -value. In this case, @code{join} uses a single blank as a default -separator for the strings. If the value is equal to @code{SUBSEP}, -then @code{join} joins the strings with no separator between them. -@code{SUBSEP} serves as a ``magic'' value to indicate that there should -be no separation between the component strings.@footnote{It would -be nice if @command{awk} had an assignment operator for concatenation. -The lack of an explicit operator for concatenation makes string operations -more difficult than they really need to be.} - -@node Gettimeofday Function, , Join Function, General Functions -@subsection Managing the Time of Day - -@cindex formatted timestamps -@cindex timestamps, formatted -The @code{systime} and @code{strftime} functions described in -@ref{Time Functions, ,Using @command{gawk}'s Timestamp Functions}, -provide the minimum functionality necessary for dealing with the time of day -in human readable form. While @code{strftime} is extensive, the control -formats are not necessarily easy to remember or intuitively obvious when -reading a program. - -The following function, @code{gettimeofday}, populates a user-supplied array -with preformatted time information. It returns a string with the current -time formatted in the same way as the @command{date} utility: - -@cindex @code{gettimeofday} user-defined function -@example -@c file eg/lib/gettime.awk -# gettimeofday.awk --- get the time of day in a usable format -@c endfile -@ignore -@c file eg/lib/gettime.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain, May 1993 -# -@c endfile -@end ignore -@c file eg/lib/gettime.awk - -# Returns a string in the format of output of date(1) -# Populates the array argument time with individual values: -# time["second"] -- seconds (0 - 59) -# time["minute"] -- minutes (0 - 59) -# time["hour"] -- hours (0 - 23) -# time["althour"] -- hours (0 - 12) -# time["monthday"] -- day of month (1 - 31) -# time["month"] -- month of year (1 - 12) -# time["monthname"] -- name of the month -# time["shortmonth"] -- short name of the month -# time["year"] -- year modulo 100 (0 - 99) -# time["fullyear"] -- full year -# time["weekday"] -- day of week (Sunday = 0) -# time["altweekday"] -- day of week (Monday = 0) -# time["dayname"] -- name of weekday -# time["shortdayname"] -- short name of weekday -# time["yearday"] -- day of year (0 - 365) -# time["timezone"] -- abbreviation of timezone name -# time["ampm"] -- AM or PM designation -# time["weeknum"] -- week number, Sunday first day -# time["altweeknum"] -- week number, Monday first day - -function gettimeofday(time, ret, now, i) -@{ - # get time once, avoids unnecessary system calls - now = systime() - - # return date(1)-style output - ret = strftime("%a %b %d %H:%M:%S %Z %Y", now) - - # clear out target array - delete time - - # fill in values, force numeric values to be - # numeric by adding 0 - time["second"] = strftime("%S", now) + 0 - time["minute"] = strftime("%M", now) + 0 - time["hour"] = strftime("%H", now) + 0 - time["althour"] = strftime("%I", now) + 0 - time["monthday"] = strftime("%d", now) + 0 - time["month"] = strftime("%m", now) + 0 - time["monthname"] = strftime("%B", now) - time["shortmonth"] = strftime("%b", now) - time["year"] = strftime("%y", now) + 0 - time["fullyear"] = strftime("%Y", now) + 0 - time["weekday"] = strftime("%w", now) + 0 - time["altweekday"] = strftime("%u", now) + 0 - time["dayname"] = strftime("%A", now) - time["shortdayname"] = strftime("%a", now) - time["yearday"] = strftime("%j", now) + 0 - time["timezone"] = strftime("%Z", now) - time["ampm"] = strftime("%p", now) - time["weeknum"] = strftime("%U", now) + 0 - time["altweeknum"] = strftime("%W", now) + 0 - - return ret -@} -@c endfile -@end example - -The string indices are easier to use and read than the various formats -required by @code{strftime}. The @code{alarm} program presented in -@ref{Alarm Program, ,An Alarm Clock Program}, -uses this function. -A more general design for the @code{gettimeofday} function would have -allowed the user to supply an optional timestamp value to use instead -of the current time. - -@node Data File Management, Getopt Function, General Functions, Library Functions -@section @value{DDF} Management - -This @value{SECTION} presents functions that are useful for managing -command-line datafiles. - -@menu -* Filetrans Function:: A function for handling data file transitions. -* Rewind Function:: A function for rereading the current file. -* File Checking:: Checking that data files are readable. -* Ignoring Assigns:: Treating assignments as file names. -@end menu - -@node Filetrans Function, Rewind Function, Data File Management, Data File Management -@subsection Noting @value{DDF} Boundaries - -@cindex per file initialization and cleanup -The @code{BEGIN} and @code{END} rules are each executed exactly once, at -the beginning and end of your @command{awk} program, respectively -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). -We (the @command{gawk} authors) once had a user who mistakenly thought that the -@code{BEGIN} rule is executed at the beginning of each @value{DF} and the -@code{END} rule is executed at the end of each @value{DF}. When informed -that this was not the case, the user requested that we add new special -patterns to @command{gawk}, named @code{BEGIN_FILE} and @code{END_FILE}, that -would have the desired behavior. He even supplied us the code to do so. - -Adding these special patterns to @command{gawk} wasn't necessary; -the job can be done cleanly in @command{awk} itself, as illustrated -by the following library program. -It arranges to call two user-supplied functions, @code{beginfile} and -@code{endfile}, at the beginning and end of each @value{DF}. -Besides solving the problem in only nine(!) lines of code, it does so -@emph{portably}; this works with any implementation of @command{awk}: - -@example -# transfile.awk -# -# Give the user a hook for filename transitions -# -# The user must supply functions beginfile() and endfile() -# that each take the name of the file being started or -# finished, respectively. -@c # -@c # Arnold Robbins, arnold@@gnu.org, Public Domain -@c # January 1992 - -FILENAME != _oldfilename \ -@{ - if (_oldfilename != "") - endfile(_oldfilename) - _oldfilename = FILENAME - beginfile(FILENAME) -@} - -END @{ endfile(FILENAME) @} -@end example - -This file must be loaded before the user's ``main'' program, so that the -rule it supplies is executed first. - -This rule relies on @command{awk}'s @code{FILENAME} variable that -automatically changes for each new @value{DF}. The current @value{FN} is -saved in a private variable, @code{_oldfilename}. If @code{FILENAME} does -not equal @code{_oldfilename}, then a new @value{DF} is being processed and -it is necessary to call @code{endfile} for the old file. Because -@code{endfile} should only be called if a file has been processed, the -program first checks to make sure that @code{_oldfilename} is not the null -string. The program then assigns the current @value{FN} to -@code{_oldfilename} and calls @code{beginfile} for the file. -Because, like all @command{awk} variables, @code{_oldfilename} is -initialized to the null string, this rule executes correctly even for the -first @value{DF}. - -The program also supplies an @code{END} rule to do the final processing for -the last file. Because this @code{END} rule comes before any @code{END} rules -supplied in the ``main'' program, @code{endfile} is called first. Once -again the value of multiple @code{BEGIN} and @code{END} rules should be clear. - -@cindex @code{beginfile} user-defined function -@cindex @code{endfile} user-defined function -This version has same problem as the first version of @code{nextfile} -(@pxref{Nextfile Function, ,Implementing @code{nextfile} as a Function}). -If the same @value{DF} occurs twice in a row on the command line, then -@code{endfile} and @code{beginfile} are not executed at the end of the -first pass and at the beginning of the second pass. -The following version solves the problem: - -@example -@c file eg/lib/ftrans.awk -# ftrans.awk --- handle data file transitions -# -# user supplies beginfile() and endfile() functions -@c endfile -@ignore -@c file eg/lib/ftrans.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# November 1992 - -@c endfile -@end ignore -@c file eg/lib/ftrans.awk -FNR == 1 @{ - if (_filename_ != "") - endfile(_filename_) - _filename_ = FILENAME - beginfile(FILENAME) -@} - -END @{ endfile(_filename_) @} -@c endfile -@end example - -@ref{Wc Program, ,Counting Things}, -shows how this library function can be used and -how it simplifies writing the main program. - -@node Rewind Function, File Checking, Filetrans Function, Data File Management -@subsection Rereading the Current File - -Another request for a new built-in function was for a @code{rewind} -function that would make it possible to reread the current file. -The requesting user didn't want to have to use @code{getline} -(@pxref{Getline, , Explicit Input with @code{getline}}) -inside a loop. - -However, as long as you are not in the @code{END} rule, it is -quite easy to arrange to immediately close the current input file -and then start over with it from the top. -For lack of a better name, we'll call it @code{rewind}: - -@cindex @code{rewind} user-defined function -@example -@c file eg/lib/rewind.awk -# rewind.awk --- rewind the current file and start over -@c endfile -@ignore -@c file eg/lib/rewind.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# September 2000 - -@c endfile -@end ignore -@c file eg/lib/rewind.awk -function rewind( i) -@{ - # shift remaining arguments up - for (i = ARGC; i > ARGIND; i--) - ARGV[i] = ARGV[i-1] - - # make sure gawk knows to keep going - ARGC++ - - # make current file next to get done - ARGV[ARGIND+1] = FILENAME - - # do it - nextfile -@} -@c endfile -@end example - -This code relies on the @code{ARGIND} variable -(@pxref{Auto-set, ,Built-in Variables That Convey Information}), -which is specific to @command{gawk}. -If you are not using -@command{gawk}, you can use ideas presented in -@iftex -the previous @value{SECTION} -@end iftex -@ifnottex -@ref{Filetrans Function, ,Noting @value{DDF} Boundaries}, -@end ifnottex -to either update @code{ARGIND} on your own -or modify this code as appropriate. - -The @code{rewind} function also relies on the @code{nextfile} keyword -(@pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}). -@xref{Nextfile Function, ,Implementing @code{nextfile} as a Function}, -for a function version of @code{nextfile}. - -@node File Checking, Ignoring Assigns, Rewind Function, Data File Management -@subsection Checking for Readable @value{DDF}s - -@cindex fatal errors -@cindex readable @value{DF}s, checking -@cindex non-readable @value{DF}s, skipping -@cindex @value{DF}s, non-readable, skipping -@cindex @value{DF}s, readable, checking -Normally, if you give @command{awk} a @value{DF} that isn't readable, -it stops with a fatal error. There are times when you -might want to just ignore such files and keep going. You can -do this by prepending the following program to your @command{awk} -program: - -@cindex @code{readable.awk} program -@example -@c file eg/lib/readable.awk -# readable.awk --- library file to skip over unreadable files -@c endfile -@ignore -@c file eg/lib/readable.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# October 2000 - -@c endfile -@end ignore -@c file eg/lib/readable.awk -BEGIN @{ - for (i = 1; i < ARGC; i++) @{ - if (ARGV[i] ~ /^[A-Za-z_][A-Za-z0-9_]*=.*/ \ - || ARGV[i] == "-") - continue # assignment or standard input - else if ((getline junk < ARGV[i]) < 0) # unreadable - delete ARGV[i] - else - close(ARGV[i]) - @} -@} -@c endfile -@end example - -@cindex fatal errors -In @command{gawk}, the @code{getline} won't be fatal (unless -@option{--posix} is in force). -Removing the element from @code{ARGV} with @code{delete} -skips the file (since it's no longer in the list). - -@c This doesn't handle /dev/stdin etc. Not worth the hassle to mention or fix. - -@node Ignoring Assigns, , File Checking, Data File Management -@subsection Treating Assignments as @value{FFN}s - -Occasionally, you might not want @command{awk} to process command-line -variable assignments -(@pxref{Assignment Options, ,Assigning Variables on the Command Line}). -In particular, if you have @value{FN}s that contain an @samp{=} character, -@command{awk} treats the @value{FN} as an assignment, and does not process it. - -Some users have suggested an additional command-line option for @command{gawk} -to disable command-line assignments. However, some simple programming with -a library file does the trick: - -@cindex @code{noassign.awk} program -@example -@c file eg/lib/noassign.awk -# noassign.awk --- library file to avoid the need for a -# special option that disables command-line assignments -@c endfile -@ignore -@c file eg/lib/noassign.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# October 1999 - -@c endfile -@end ignore -@c file eg/lib/noassign.awk -function disable_assigns(argc, argv, i) -@{ - for (i = 1; i < argc; i++) - if (argv[i] ~ /^[A-Za-z_][A-Za-z_0-9]*=.*/) - argv[i] = ("./" argv[i]) -@} - -BEGIN @{ - if (No_command_assign) - disable_assigns(ARGC, ARGV) -@} -@c endfile -@end example - -You then run your program this way: - -@example -awk -v No_command_assign=1 -f noassign.awk -f yourprog.awk * -@end example - -The function works by looping through the arguments. -It prepends @samp{./} to -any argument that matches the form -of a variable assignment, turning that argument into a @value{FN}. - -The use of @code{No_command_assign} allows you to disable command-line -assignments at invocation time, by giving the variable a true value. -When not set, it is initially zero (i.e., false), so the command-line arguments -are left alone. - -@node Getopt Function, Passwd Functions, Data File Management, Library Functions -@section Processing Command-Line Options - -@cindex @code{getopt} C library function -@cindex processing arguments -@cindex argument processing -Most utilities on POSIX compatible systems take options, or ``switches,'' on -the command line that can be used to change the way a program behaves. -@command{awk} is an example of such a program -(@pxref{Options, ,Command-Line Options}). -Often, options take @dfn{arguments}; i.e., data that the program needs to -correctly obey the command-line option. For example, @command{awk}'s -@option{-F} option requires a string to use as the field separator. -The first occurrence on the command line of either @option{--} or a -string that does not begin with @samp{-} ends the options. - -Modern Unix systems provide a C function named @code{getopt} for processing -command-line arguments. The programmer provides a string describing the -one-letter options. If an option requires an argument, it is followed in the -string with a colon. @code{getopt} is also passed the -count and values of the command-line arguments and is called in a loop. -@code{getopt} processes the command-line arguments for option letters. -Each time around the loop, it returns a single character representing the -next option letter that it finds, or @samp{?} if it finds an invalid option. -When it returns @minus{}1, there are no options left on the command line. - -When using @code{getopt}, options that do not take arguments can be -grouped together. Furthermore, options that take arguments require that the -argument is present. The argument can immediately follow the option letter -or it can be a separate command-line argument. - -Given a hypothetical program that takes -three command-line options, @option{-a}, @option{-b}, and @option{-c}, where -@option{-b} requires an argument, all of the following are valid ways of -invoking the program: - -@example -prog -a -b foo -c data1 data2 data3 -prog -ac -bfoo -- data1 data2 data3 -prog -acbfoo data1 data2 data3 -@end example - -Notice that when the argument is grouped with its option, the rest of -the argument is considered to be the option's argument. -In this example, @option{-acbfoo} indicates that all of the -@option{-a}, @option{-b}, and @option{-c} options were supplied, -and that @samp{foo} is the argument to the @option{-b} option. - -@code{getopt} provides four external variables that the programmer can use: - -@table @code -@item optind -The index in the argument value array (@code{argv}) where the first -non-option command-line argument can be found. - -@item optarg -The string value of the argument to an option. - -@item opterr -Usually @code{getopt} prints an error message when it finds an invalid -option. Setting @code{opterr} to zero disables this feature. (An -application might want to print its own error message.) - -@item optopt -The letter representing the command-line option. -@c While not usually documented, most versions supply this variable. -@end table - -The following C fragment shows how @code{getopt} might process command-line -arguments for @command{awk}: - -@example -int -main(int argc, char *argv[]) -@{ - @dots{} - /* print our own message */ - opterr = 0; - while ((c = getopt(argc, argv, "v:f:F:W:")) != -1) @{ - switch (c) @{ - case 'f': /* file */ - @dots{} - break; - case 'F': /* field separator */ - @dots{} - break; - case 'v': /* variable assignment */ - @dots{} - break; - case 'W': /* extension */ - @dots{} - break; - case '?': - default: - usage(); - break; - @} - @} - @dots{} -@} -@end example - -As a side point, @command{gawk} actually uses the GNU @code{getopt_long} -function to process both normal and GNU-style long options -(@pxref{Options, ,Command-Line Options}). - -The abstraction provided by @code{getopt} is very useful and is quite -handy in @command{awk} programs as well. Following is an @command{awk} -version of @code{getopt}. This function highlights one of the -greatest weaknesses in @command{awk}, which is that it is very poor at -manipulating single characters. Repeated calls to @code{substr} are -necessary for accessing individual characters -(@pxref{String Functions, ,String Manipulation Functions}).@footnote{This -function was written before @command{gawk} acquired the ability to -split strings into single characters using @code{""} as the separator. -We have left it alone, since using @code{substr} is more portable.} - -The discussion that follows walks through the code a bit at a time: - -@example -@c file eg/lib/getopt.awk -# getopt.awk --- do C library getopt(3) function in awk -@c endfile -@ignore -@c file eg/lib/getopt.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# -# Initial version: March, 1991 -# Revised: May, 1993 - -@c endfile -@end ignore -@c file eg/lib/getopt.awk -# External variables: -# Optind -- index in ARGV of first non-option argument -# Optarg -- string value of argument to current option -# Opterr -- if nonzero, print our own diagnostic -# Optopt -- current option letter - -# Returns: -# -1 at end of options -# ? for unrecognized option -# a character representing the current option - -# Private Data: -# _opti -- index in multi-flag option, e.g., -abc -@c endfile -@end example - -The function starts out with -a list of the global variables it uses, -what the return values are, what they mean, and any global variables that -are ``private'' to this library function. Such documentation is essential -for any program, and particularly for library functions. - -The @code{getopt} function first checks that it was indeed called with a string of options -(the @code{options} parameter). If @code{options} has a zero length, -@code{getopt} immediately returns @minus{}1: - -@cindex @code{getopt} user-defined function -@example -@c file eg/lib/getopt.awk -function getopt(argc, argv, options, thisopt, i) -@{ - if (length(options) == 0) # no options given - return -1 - -@group - if (argv[Optind] == "--") @{ # all done - Optind++ - _opti = 0 - return -1 -@end group - @} else if (argv[Optind] !~ /^-[^: \t\n\f\r\v\b]/) @{ - _opti = 0 - return -1 - @} -@c endfile -@end example - -The next thing to check for is the end of the options. A @option{--} -ends the command-line options, as does any command-line argument that -does not begin with a @samp{-}. @code{Optind} is used to step through -the array of command-line arguments; it retains its value across calls -to @code{getopt}, because it is a global variable. - -The regular expression that is used, @code{@w{/^-[^: \t\n\f\r\v\b]/}}, is -perhaps a bit of overkill; it checks for a @samp{-} followed by anything -that is not whitespace and not a colon. -If the current command-line argument does not match this pattern, -it is not an option, and it ends option processing. - -@example -@c file eg/lib/getopt.awk - if (_opti == 0) - _opti = 2 - thisopt = substr(argv[Optind], _opti, 1) - Optopt = thisopt - i = index(options, thisopt) - if (i == 0) @{ - if (Opterr) - printf("%c -- invalid option\n", - thisopt) > "/dev/stderr" - if (_opti >= length(argv[Optind])) @{ - Optind++ - _opti = 0 - @} else - _opti++ - return "?" - @} -@c endfile -@end example - -The @code{_opti} variable tracks the position in the current command-line -argument (@code{argv[Optind]}). If multiple options are -grouped together with one @samp{-} (e.g., @option{-abx}), it is necessary -to return them to the user one at a time. - -If @code{_opti} is equal to zero, it is set to two, which is the index in -the string of the next character to look at (we skip the @samp{-}, which -is at position one). The variable @code{thisopt} holds the character, -obtained with @code{substr}. It is saved in @code{Optopt} for the main -program to use. - -If @code{thisopt} is not in the @code{options} string, then it is an -invalid option. If @code{Opterr} is nonzero, @code{getopt} prints an error -message on the standard error that is similar to the message from the C -version of @code{getopt}. - -Because the option is invalid, it is necessary to skip it and move on to the -next option character. If @code{_opti} is greater than or equal to the -length of the current command-line argument, it is necessary to move on -to the next argument, so @code{Optind} is incremented and @code{_opti} is reset -to zero. Otherwise, @code{Optind} is left alone and @code{_opti} is merely -incremented. - -In any case, because the option is invalid, @code{getopt} returns @samp{?}. -The main program can examine @code{Optopt} if it needs to know what the -invalid option letter actually is. Continuing on: - -@example -@c file eg/lib/getopt.awk - if (substr(options, i + 1, 1) == ":") @{ - # get option argument - if (length(substr(argv[Optind], _opti + 1)) > 0) - Optarg = substr(argv[Optind], _opti + 1) - else - Optarg = argv[++Optind] - _opti = 0 - @} else - Optarg = "" -@c endfile -@end example - -If the option requires an argument, the option letter is followed by a colon -in the @code{options} string. If there are remaining characters in the -current command-line argument (@code{argv[Optind]}), then the rest of that -string is assigned to @code{Optarg}. Otherwise, the next command-line -argument is used (@samp{-xFOO} vs.@: @samp{@w{-x FOO}}). In either case, -@code{_opti} is reset to zero, because there are no more characters left to -examine in the current command-line argument. Continuing: - -@example -@c file eg/lib/getopt.awk - if (_opti == 0 || _opti >= length(argv[Optind])) @{ - Optind++ - _opti = 0 - @} else - _opti++ - return thisopt -@} -@c endfile -@end example - -Finally, if @code{_opti} is either zero or greater than the length of the -current command-line argument, it means this element in @code{argv} is -through being processed, so @code{Optind} is incremented to point to the -next element in @code{argv}. If neither condition is true, then only -@code{_opti} is incremented, so that the next option letter can be processed -on the next call to @code{getopt}. - -The @code{BEGIN} rule initializes both @code{Opterr} and @code{Optind} to one. -@code{Opterr} is set to one, since the default behavior is for @code{getopt} -to print a diagnostic message upon seeing an invalid option. @code{Optind} -is set to one, since there's no reason to look at the program name, which is -in @code{ARGV[0]}: - -@example -@c file eg/lib/getopt.awk -BEGIN @{ - Opterr = 1 # default is to diagnose - Optind = 1 # skip ARGV[0] - - # test program - if (_getopt_test) @{ - while ((_go_c = getopt(ARGC, ARGV, "ab:cd")) != -1) - printf("c = <%c>, optarg = <%s>\n", - _go_c, Optarg) - printf("non-option arguments:\n") - for (; Optind < ARGC; Optind++) - printf("\tARGV[%d] = <%s>\n", - Optind, ARGV[Optind]) - @} -@} -@c endfile -@end example - -The rest of the @code{BEGIN} rule is a simple test program. Here is the -result of two sample runs of the test program: - -@example -$ awk -f getopt.awk -v _getopt_test=1 -- -a -cbARG bax -x -@print{} c = , optarg = <> -@print{} c = , optarg = <> -@print{} c = , optarg = -@print{} non-option arguments: -@print{} ARGV[3] = -@print{} ARGV[4] = <-x> - -$ awk -f getopt.awk -v _getopt_test=1 -- -a -x -- xyz abc -@print{} c = , optarg = <> -@error{} x -- invalid option -@print{} c = , optarg = <> -@print{} non-option arguments: -@print{} ARGV[4] = -@print{} ARGV[5] = -@end example - -In both runs, -the first @option{--} terminates the arguments to @command{awk}, so that it does -not try to interpret the @option{-a}, etc., as its own options. -Several of the sample programs presented in -@ref{Sample Programs, ,Practical @command{awk} Programs}, -use @code{getopt} to process their arguments. - -@node Passwd Functions, Group Functions, Getopt Function, Library Functions -@section Reading the User Database - -The @code{PROCINFO} array -(@pxref{Built-in Variables}) -provides access to the current user's real and effective user and group id -numbers, and if available, the user's supplementary group set. -However, because these are numbers, they do not provide very useful -information to the average user. There needs to be some way to find the -user information associated with the user and group numbers. This -@value{SECTION} presents a suite of functions for retrieving information from the -user database. @xref{Group Functions, ,Reading the Group Database}, -for a similar suite that retrieves information from the group database. - -@cindex @code{getpwent} C library function -@cindex user information -@cindex login information -@cindex account information -@cindex password file -The POSIX standard does not define the file where user information is -kept. Instead, it provides the @code{} header file -and several C language subroutines for obtaining user information. -The primary function is @code{getpwent}, for ``get password entry.'' -The ``password'' comes from the original user database file, -@file{/etc/passwd}, which stores user information, along with the -encrypted passwords (hence the name). - -@cindex @command{pwcat} program -While an @command{awk} program could simply read @file{/etc/passwd} -directly, this file may not contain complete information about the -system's set of users.@footnote{It is often the case that password -information is stored in a network database.} To be sure you are able to -produce a readable and complete version of the user database, it is necessary -to write a small C program that calls @code{getpwent}. @code{getpwent} -is defined as returning a pointer to a @code{struct passwd}. Each time it -is called, it returns the next entry in the database. When there are -no more entries, it returns @code{NULL}, the null pointer. When this -happens, the C program should call @code{endpwent} to close the database. -Following is @command{pwcat}, a C program that ``cats'' the password database. - -@c Use old style function header for portability to old systems (SunOS, HP/UX). - -@example -@c file eg/lib/pwcat.c -/* - * pwcat.c - * - * Generate a printable version of the password database - */ -@c endfile -@ignore -@c file eg/lib/pwcat.c -/* - * Arnold Robbins, arnold@@gnu.org, May 1993 - * Public Domain - */ - -@c endfile -@end ignore -@c file eg/lib/pwcat.c -#include -#include - -int -main(argc, argv) -int argc; -char **argv; -@{ - struct passwd *p; - - while ((p = getpwent()) != NULL) - printf("%s:%s:%d:%d:%s:%s:%s\n", - p->pw_name, p->pw_passwd, p->pw_uid, - p->pw_gid, p->pw_gecos, p->pw_dir, p->pw_shell); - - endpwent(); - exit(0); -@} -@c endfile -@end example - -If you don't understand C, don't worry about it. -The output from @command{pwcat} is the user database, in the traditional -@file{/etc/passwd} format of colon-separated fields. The fields are: - -@ignore -@table @asis -@item Login name -The user's login name. - -@item Encrypted password -The user's encrypted password. This may not be available on some systems. - -@item User-ID -The user's numeric user-id number. - -@item Group-ID -The user's numeric group-id number. - -@item Full name -The user's full name, and perhaps other information associated with the -user. - -@item Home directory -The user's login (or ``home'') directory (familiar to shell programmers as -@code{$HOME}). - -@item Login shell -The program that is run when the user logs in. This is usually a -shell, such as @command{bash}. -@end table -@end ignore - -@multitable {Encrypted password} {1234567890123456789012345678901234567890123456} -@item Login name @tab The user's login name. - -@item Encrypted password @tab The user's encrypted password. This may not be available on some systems. - -@item User-ID @tab The user's numeric user-id number. - -@item Group-ID @tab The user's numeric group-id number. - -@item Full name @tab The user's full name, and perhaps other information associated with the -user. - -@item Home directory @tab The user's login (or ``home'') directory (familiar to shell programmers as -@code{$HOME}). - -@item Login shell @tab The program that is run when the user logs in. This is usually a -shell, such as @command{bash}. -@end multitable - -A few lines representative of @command{pwcat}'s output are as follows: - -@cindex Jacobs, Andrew -@cindex Robbins, Arnold -@cindex Robbins, Miriam -@example -$ pwcat -@print{} root:3Ov02d5VaUPB6:0:1:Operator:/:/bin/sh -@print{} nobody:*:65534:65534::/: -@print{} daemon:*:1:1::/: -@print{} sys:*:2:2::/:/bin/csh -@print{} bin:*:3:3::/bin: -@print{} arnold:xyzzy:2076:10:Arnold Robbins:/home/arnold:/bin/sh -@print{} miriam:yxaay:112:10:Miriam Robbins:/home/miriam:/bin/sh -@print{} andy:abcca2:113:10:Andy Jacobs:/home/andy:/bin/sh -@dots{} -@end example - -With that introduction, following is a group of functions for getting user -information. There are several functions here, corresponding to the C -functions of the same names: - -@c Exercise: simplify all these functions that return values. -@c Answer: return foo[key] returns "" if key not there, no need to check with `in'. - -@cindex @code{_pw_init} user-defined function -@example -@c file eg/lib/passwdawk.in -# passwd.awk --- access password file information -@c endfile -@ignore -@c file eg/lib/passwdawk.in -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 -# Revised October 2000 - -@c endfile -@end ignore -@c file eg/lib/passwdawk.in -BEGIN @{ - # tailor this to suit your system - _pw_awklib = "/usr/local/libexec/awk/" -@} - -function _pw_init( oldfs, oldrs, olddol0, pwcat, using_fw) -@{ - if (_pw_inited) - return - - oldfs = FS - oldrs = RS - olddol0 = $0 - using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") - FS = ":" - RS = "\n" - - pwcat = _pw_awklib "pwcat" - while ((pwcat | getline) > 0) @{ - _pw_byname[$1] = $0 - _pw_byuid[$3] = $0 - _pw_bycount[++_pw_total] = $0 - @} - close(pwcat) - _pw_count = 0 - _pw_inited = 1 - FS = oldfs - if (using_fw) - FIELDWIDTHS = FIELDWIDTHS - RS = oldrs - $0 = olddol0 -@} -@c endfile -@end example - -The @code{BEGIN} rule sets a private variable to the directory where -@command{pwcat} is stored. Because it is used to help out an @command{awk} library -routine, we have chosen to put it in @file{/usr/local/libexec/awk}; -however, you might want it to be in a different directory on your system. - -The function @code{_pw_init} keeps three copies of the user information -in three associative arrays. The arrays are indexed by username -(@code{_pw_byname}), by user-id number (@code{_pw_byuid}), and by order of -occurrence (@code{_pw_bycount}). -The variable @code{_pw_inited} is used for efficiency; @code{_pw_init} -needs only to be called once. - -Because this function uses @code{getline} to read information from -@command{pwcat}, it first saves the values of @code{FS}, @code{RS}, and @code{$0}. -It notes in the variable @code{using_fw} whether field splitting -with @code{FIELDWIDTHS} is in effect or not. -Doing so is necessary, since these functions could be called -from anywhere within a user's program, and the user may have his -or her -own way of splitting records and fields. - -The @code{using_fw} variable checks @code{PROCINFO["FS"]}, which -is @code{"FIELDWIDTHS"} if field splitting is being done with -@code{FIELDWIDTHS}. This makes it possible to restore the correct -field-splitting mechanism later. The test can only be true for -@command{gawk}. It is false if using @code{FS} or on some other -@command{awk} implementation. - -The main part of the function uses a loop to read database lines, split -the line into fields, and then store the line into each array as necessary. -When the loop is done, @code{@w{_pw_init}} cleans up by closing the pipeline, -setting @code{@w{_pw_inited}} to one, and restoring @code{FS} (and @code{FIELDWIDTHS} -if necessary), @code{RS}, and @code{$0}. -The use of @code{@w{_pw_count}} is explained shortly. - -@c NEXT ED: All of these functions don't need the ... in ... test. Just -@c return the array element, which will be "" if not already there. Duh. -The @code{getpwnam} function takes a username as a string argument. If that -user is in the database, it returns the appropriate line. Otherwise it -returns the null string: - -@cindex @code{getpwnam} user-defined function -@example -@group -@c file eg/lib/passwdawk.in -function getpwnam(name) -@{ - _pw_init() - if (name in _pw_byname) - return _pw_byname[name] - return "" -@} -@c endfile -@end group -@end example - -Similarly, -the @code{getpwuid} function takes a user-id number argument. If that -user number is in the database, it returns the appropriate line. Otherwise it -returns the null string: - -@cindex @code{getpwuid} user-defined function -@example -@c file eg/lib/passwdawk.in -function getpwuid(uid) -@{ - _pw_init() - if (uid in _pw_byuid) - return _pw_byuid[uid] - return "" -@} -@c endfile -@end example - -The @code{getpwent} function simply steps through the database, one entry at -a time. It uses @code{_pw_count} to track its current position in the -@code{_pw_bycount} array: - -@cindex @code{getpwent} user-defined function -@example -@c file eg/lib/passwdawk.in -function getpwent() -@{ - _pw_init() - if (_pw_count < _pw_total) - return _pw_bycount[++_pw_count] - return "" -@} -@c endfile -@end example - -The @code{@w{endpwent}} function resets @code{@w{_pw_count}} to zero, so that -subsequent calls to @code{getpwent} start over again: - -@cindex @code{endpwent} user-defined function -@example -@c file eg/lib/passwdawk.in -function endpwent() -@{ - _pw_count = 0 -@} -@c endfile -@end example - -A conscious design decision in this suite is that each subroutine calls -@code{@w{_pw_init}} to initialize the database arrays. The overhead of running -a separate process to generate the user database, and the I/O to scan it, -are only incurred if the user's main program actually calls one of these -functions. If this library file is loaded along with a user's program, but -none of the routines are ever called, then there is no extra runtime overhead. -(The alternative is move the body of @code{@w{_pw_init}} into a -@code{BEGIN} rule, which always runs @command{pwcat}. This simplifies the -code but runs an extra process that may never be needed.) - -In turn, calling @code{_pw_init} is not too expensive, because the -@code{_pw_inited} variable keeps the program from reading the data more than -once. If you are worried about squeezing every last cycle out of your -@command{awk} program, the check of @code{_pw_inited} could be moved out of -@code{_pw_init} and duplicated in all the other functions. In practice, -this is not necessary, since most @command{awk} programs are I/O-bound, and it -clutters up the code. - -The @command{id} program in @ref{Id Program, ,Printing out User Information}, -uses these functions. - -@node Group Functions, , Passwd Functions, Library Functions -@section Reading the Group Database - -@cindex @code{getgrent} C library function -@cindex group information -@cindex account information -@cindex group file -Much of the discussion presented in -@ref{Passwd Functions, ,Reading the User Database}, -applies to the group database as well. Although there has traditionally -been a well-known file (@file{/etc/group}) in a well-known format, the POSIX -standard only provides a set of C library routines -(@code{} and @code{getgrent}) -for accessing the information. -Even though this file may exist, it likely does not have -complete information. Therefore, as with the user database, it is necessary -to have a small C program that generates the group database as its output. - -@cindex @command{grcat} program -@command{grcat}, a C program that ``cats'' the group database, -is as follows: - -@example -@c file eg/lib/grcat.c -/* - * grcat.c - * - * Generate a printable version of the group database - */ -@c endfile -@ignore -@c file eg/lib/grcat.c -/* - * Arnold Robbins, arnold@@gnu.org, May 1993 - * Public Domain - */ - -@c endfile -@end ignore -@c file eg/lib/grcat.c -#include -#include - -int -main(argc, argv) -int argc; -char **argv; -@{ - struct group *g; - int i; - - while ((g = getgrent()) != NULL) @{ - printf("%s:%s:%d:", g->gr_name, g->gr_passwd, - g->gr_gid); - for (i = 0; g->gr_mem[i] != NULL; i++) @{ - printf("%s", g->gr_mem[i]); -@group - if (g->gr_mem[i+1] != NULL) - putchar(','); - @} -@end group - putchar('\n'); - @} - endgrent(); - exit(0); -@} -@c endfile -@end example - -Each line in the group database represents one group. The fields are -separated with colons and represent the following information: - -@ignore -@table @asis -@item Group Name -The name of the group. - -@item Group Password -The encrypted group password. In practice, this field is never used. It is -usually empty or set to @samp{*}. - -@item Group ID Number -The numeric group-id number. This number should be unique within the file. - -@item Group Member List -A comma-separated list of usernames. These users are members of the group. -Modern Unix systems allow users to be members of several groups -simultaneously. If your system does, then there are elements -@code{"group1"} through @code{"group@var{N}"} in @code{PROCINFO} -for those group-id numbers. -(Note that @code{PROCINFO} is a @command{gawk} extension; -@pxref{Built-in Variables}.) -@end table -@end ignore - -@multitable {Encrypted password} {1234567890123456789012345678901234567890123456} -@item Group name @tab The group's name. - -@item Group password @tab The group's encrypted password. In practice, this field is never used; -it is usually empty or set to @samp{*}. - -@item Group-ID @tab -The group's numeric group-id number; this number should be unique within the file. - -@item Group member list @tab -A comma-separated list of usernames. These users are members of the group. -Modern Unix systems allow users to be members of several groups -simultaneously. If your system does, then there are elements -@code{"group1"} through @code{"group@var{N}"} in @code{PROCINFO} -for those group-id numbers. -(Note that @code{PROCINFO} is a @command{gawk} extension; -@pxref{Built-in Variables}.) -@end multitable - -Here is what running @command{grcat} might produce: - -@example -$ grcat -@print{} wheel:*:0:arnold -@print{} nogroup:*:65534: -@print{} daemon:*:1: -@print{} kmem:*:2: -@print{} staff:*:10:arnold,miriam,andy -@print{} other:*:20: -@dots{} -@end example - -Here are the functions for obtaining information from the group database. -There are several, modeled after the C library functions of the same names: - -@cindex @code{_gr_init} user-defined function -@example -@c file eg/lib/groupawk.in -# group.awk --- functions for dealing with the group file -@c endfile -@ignore -@c file eg/lib/groupawk.in -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 -# Revised October 2000 - -@c endfile -@end ignore -@c line break on _gr_init for smallbook -@c file eg/lib/groupawk.in -BEGIN \ -@{ - # Change to suit your system - _gr_awklib = "/usr/local/libexec/awk/" -@} - -function _gr_init( oldfs, oldrs, olddol0, grcat, - using_fw, n, a, i) -@{ - if (_gr_inited) - return - - oldfs = FS - oldrs = RS - olddol0 = $0 - using_fw = (PROCINFO["FS"] == "FIELDWIDTHS") - FS = ":" - RS = "\n" - - grcat = _gr_awklib "grcat" - while ((grcat | getline) > 0) @{ - if ($1 in _gr_byname) - _gr_byname[$1] = _gr_byname[$1] "," $4 - else - _gr_byname[$1] = $0 - if ($3 in _gr_bygid) - _gr_bygid[$3] = _gr_bygid[$3] "," $4 - else - _gr_bygid[$3] = $0 - - n = split($4, a, "[ \t]*,[ \t]*") - for (i = 1; i <= n; i++) - if (a[i] in _gr_groupsbyuser) - _gr_groupsbyuser[a[i]] = \ - _gr_groupsbyuser[a[i]] " " $1 - else - _gr_groupsbyuser[a[i]] = $1 - - _gr_bycount[++_gr_count] = $0 - @} - close(grcat) - _gr_count = 0 - _gr_inited++ - FS = oldfs - if (using_fw) - FIELDWIDTHS = FIELDWIDTHS - RS = oldrs - $0 = olddol0 -@} -@c endfile -@end example - -The @code{BEGIN} rule sets a private variable to the directory where -@command{grcat} is stored. Because it is used to help out an @command{awk} library -routine, we have chosen to put it in @file{/usr/local/libexec/awk}. You might -want it to be in a different directory on your system. - -These routines follow the same general outline as the user database routines -(@pxref{Passwd Functions, ,Reading the User Database}). -The @code{@w{_gr_inited}} variable is used to -ensure that the database is scanned no more than once. -The @code{@w{_gr_init}} function first saves @code{FS}, @code{FIELDWIDTHS}, @code{RS}, and -@code{$0}, and then sets @code{FS} and @code{RS} to the correct values for -scanning the group information. - -The group information is stored is several associative arrays. -The arrays are indexed by group name (@code{@w{_gr_byname}}), by group-id number -(@code{@w{_gr_bygid}}), and by position in the database (@code{@w{_gr_bycount}}). -There is an additional array indexed by username (@code{@w{_gr_groupsbyuser}}), -which is a space-separated list of groups that each user belongs to. - -Unlike the user database, it is possible to have multiple records in the -database for the same group. This is common when a group has a large number -of members. A pair of such entries might look like the following: - -@example -tvpeople:*:101:johnny,jay,arsenio -tvpeople:*:101:david,conan,tom,joan -@end example - -For this reason, @code{_gr_init} looks to see if a group name or -group-id number is already seen. If it is, then the usernames are -simply concatenated onto the previous list of users. (There is actually a -subtle problem with the code just presented. Suppose that -the first time there were no names. This code adds the names with -a leading comma. It also doesn't check that there is a @code{$4}.) - -Finally, @code{_gr_init} closes the pipeline to @command{grcat}, restores -@code{FS} (and @code{FIELDWIDTHS} if necessary), @code{RS}, and @code{$0}, -initializes @code{_gr_count} to zero -(it is used later), and makes @code{_gr_inited} nonzero. - -The @code{getgrnam} function takes a group name as its argument, and if that -group exists, it is returned. Otherwise, @code{getgrnam} returns the null -string: - -@cindex @code{getgrnam} user-defined function -@example -@c file eg/lib/groupawk.in -function getgrnam(group) -@{ - _gr_init() - if (group in _gr_byname) - return _gr_byname[group] - return "" -@} -@c endfile -@end example - -The @code{getgrgid} function is similar, it takes a numeric group-id and -looks up the information associated with that group-id: - -@cindex @code{getgrgid} user-defined function -@example -@c file eg/lib/groupawk.in -function getgrgid(gid) -@{ - _gr_init() - if (gid in _gr_bygid) - return _gr_bygid[gid] - return "" -@} -@c endfile -@end example - -The @code{getgruser} function does not have a C counterpart. It takes a -username and returns the list of groups that have the user as a member: - -@cindex @code{getgruser} user-defined function -@example -@c file eg/lib/groupawk.in -function getgruser(user) -@{ - _gr_init() - if (user in _gr_groupsbyuser) - return _gr_groupsbyuser[user] - return "" -@} -@c endfile -@end example - -The @code{getgrent} function steps through the database one entry at a time. -It uses @code{_gr_count} to track its position in the list: - -@cindex @code{getgrent} user-defined function -@example -@c file eg/lib/groupawk.in -function getgrent() -@{ - _gr_init() - if (++_gr_count in _gr_bycount) - return _gr_bycount[_gr_count] - return "" -@} -@c endfile -@end example - -The @code{endgrent} function resets @code{_gr_count} to zero so that @code{getgrent} can -start over again: - -@cindex @code{endgrent} user-defined function -@example -@c file eg/lib/groupawk.in -function endgrent() -@{ - _gr_count = 0 -@} -@c endfile -@end example - -As with the user database routines, each function calls @code{_gr_init} to -initialize the arrays. Doing so only incurs the extra overhead of running -@command{grcat} if these functions are used (as opposed to moving the body of -@code{_gr_init} into a @code{BEGIN} rule). - -Most of the work is in scanning the database and building the various -associative arrays. The functions that the user calls are themselves very -simple, relying on @command{awk}'s associative arrays to do work. - -The @command{id} program in @ref{Id Program, ,Printing out User Information}, -uses these functions. - -@node Sample Programs, Language History, Library Functions, Top -@chapter Practical @command{awk} Programs - -@ref{Library Functions, ,A Library of @command{awk} Functions}, -presents the idea that reading programs in a language contributes to -learning that language. This @value{CHAPTER} continues that theme, -presenting a potpourri of @command{awk} programs for your reading -enjoyment. -@ifnotinfo -There are three sections. -The first describes how to run the programs presented -in this @value{CHAPTER}. - -The second presents @command{awk} -versions of several common POSIX utilities. -These are programs that you are hopefully already familiar with, -and therefore, whose problems are understood. -By reimplementing these programs in @command{awk}, -you can focus on the @command{awk}-related aspects of solving -the programming problem. - -The third is a grab bag of interesting programs. -These solve a number of different data-manipulation and management -problems. Many of the programs are short, which emphasizes @command{awk}'s -ability to do a lot in just a few lines of code. -@end ifnotinfo - -Many of these programs use the library functions presented in -@ref{Library Functions, ,A Library of @command{awk} Functions}. - -@menu -* Running Examples:: How to run these examples. -* Clones:: Clones of common utilities. -* Miscellaneous Programs:: Some interesting @command{awk} programs. -@end menu - -@node Running Examples, Clones, Sample Programs, Sample Programs -@section Running the Example Programs - -To run a given program, you would typically do something like this: - -@example -awk -f @var{program} -- @var{options} @var{files} -@end example - -@noindent -Here, @var{program} is the name of the @command{awk} program (such as -@file{cut.awk}), @var{options} are any command-line options for the -program that start with a @samp{-}, and @var{files} are the actual @value{DF}s. - -If your system supports the @samp{#!} executable interpreter mechanism -(@pxref{Executable Scripts, , Executable @command{awk} Programs}), -you can instead run your program directly: - -@example -cut.awk -c1-8 myfiles > results -@end example - -If your @command{awk} is not @command{gawk}, you may instead need to use this: - -@example -cut.awk -- -c1-8 myfiles > results -@end example - -@node Clones, Miscellaneous Programs, Running Examples, Sample Programs -@section Reinventing Wheels for Fun and Profit - -This @value{SECTION} presents a number of POSIX utilities that are implemented in -@command{awk}. Reinventing these programs in @command{awk} is often enjoyable, -because the algorithms can be very clearly expressed, and the code is usually -very concise and simple. This is true because @command{awk} does so much for you. - -It should be noted that these programs are not necessarily intended to -replace the installed versions on your system. Instead, their -purpose is to illustrate @command{awk} language programming for ``real world'' -tasks. - -The programs are presented in alphabetical order. - -@menu -* Cut Program:: The @command{cut} utility. -* Egrep Program:: The @command{egrep} utility. -* Id Program:: The @command{id} utility. -* Split Program:: The @command{split} utility. -* Tee Program:: The @command{tee} utility. -* Uniq Program:: The @command{uniq} utility. -* Wc Program:: The @command{wc} utility. -@end menu - -@node Cut Program, Egrep Program, Clones, Clones -@subsection Cutting out Fields and Columns - -@cindex @command{cut} utility -The @command{cut} utility selects, or ``cuts,'' characters or fields -from its standard input and sends them to its standard output. -Fields are separated by tabs by default, -but you may supply a command-line option to change the field -@dfn{delimiter} (i.e., the field separator character). @command{cut}'s -definition of fields is less general than @command{awk}'s. - -A common use of @command{cut} might be to pull out just the login name of -logged-on users from the output of @command{who}. For example, the following -pipeline generates a sorted, unique list of the logged-on users: - -@example -who | cut -c1-8 | sort | uniq -@end example - -The options for @command{cut} are: - -@table @code -@item -c @var{list} -Use @var{list} as the list of characters to cut out. Items within the list -may be separated by commas, and ranges of characters can be separated with -dashes. The list @samp{1-8,15,22-35} specifies characters 1 through -8, 15, and 22 through 35. - -@item -f @var{list} -Use @var{list} as the list of fields to cut out. - -@item -d @var{delim} -Use @var{delim} as the field separator character instead of the tab -character. - -@item -s -Suppress printing of lines that do not contain the field delimiter. -@end table - -The @command{awk} implementation of @command{cut} uses the @code{getopt} library -function (@pxref{Getopt Function, ,Processing Command-Line Options}) -and the @code{join} library function -(@pxref{Join Function, ,Merging an Array into a String}). - -The program begins with a comment describing the options, the library -functions needed, and a @code{usage} function that prints out a usage -message and exits. @code{usage} is called if invalid arguments are -supplied: - -@cindex @code{cut.awk} program -@example -@c file eg/prog/cut.awk -# cut.awk --- implement cut in awk -@c endfile -@ignore -@c file eg/prog/cut.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 - -@c endfile -@end ignore -@c file eg/prog/cut.awk -# Options: -# -f list Cut fields -# -d c Field delimiter character -# -c list Cut characters -# -# -s Suppress lines without the delimiter -# -# Requires getopt and join library functions - -@group -function usage( e1, e2) -@{ - e1 = "usage: cut [-f list] [-d c] [-s] [files...]" - e2 = "usage: cut [-c list] [files...]" - print e1 > "/dev/stderr" - print e2 > "/dev/stderr" - exit 1 -@} -@end group -@c endfile -@end example - -@noindent -The variables @code{e1} and @code{e2} are used so that the function -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex - -Next comes a @code{BEGIN} rule that parses the command-line options. -It sets @code{FS} to a single tab character, because that is @command{cut}'s -default field separator. The output field separator is also set to be the -same as the input field separator. Then @code{getopt} is used to step -through the command-line options. One or the other of the variables -@code{by_fields} or @code{by_chars} is set to true, to indicate that -processing should be done by fields or by characters, respectively. -When cutting by characters, the output field separator is set to the null -string. - -@example -@c file eg/prog/cut.awk -BEGIN \ -@{ - FS = "\t" # default - OFS = FS - while ((c = getopt(ARGC, ARGV, "sf:c:d:")) != -1) @{ - if (c == "f") @{ - by_fields = 1 - fieldlist = Optarg - @} else if (c == "c") @{ - by_chars = 1 - fieldlist = Optarg - OFS = "" - @} else if (c == "d") @{ - if (length(Optarg) > 1) @{ - printf("Using first character of %s" \ - " for delimiter\n", Optarg) > "/dev/stderr" - Optarg = substr(Optarg, 1, 1) - @} - FS = Optarg - OFS = FS - if (FS == " ") # defeat awk semantics - FS = "[ ]" - @} else if (c == "s") - suppress++ - else - usage() - @} - - for (i = 1; i < Optind; i++) - ARGV[i] = "" -@c endfile -@end example - -Special care is taken when the field delimiter is a space. Using -a single space (@code{@w{" "}}) for the value of @code{FS} is -incorrect---@command{awk} would separate fields with runs of spaces, -tabs, and/or newlines, and we want them to be separated with individual -spaces. Also, note that after @code{getopt} is through, we have to -clear out all the elements of @code{ARGV} from 1 to @code{Optind}, -so that @command{awk} does not try to process the command-line options -as @value{FN}s. - -After dealing with the command-line options, the program verifies that the -options make sense. Only one or the other of @option{-c} and @option{-f} -should be used, and both require a field list. Then the program calls -either @code{set_fieldlist} or @code{set_charlist} to pull apart the -list of fields or characters: - -@example -@c file eg/prog/cut.awk - if (by_fields && by_chars) - usage() - - if (by_fields == 0 && by_chars == 0) - by_fields = 1 # default - - if (fieldlist == "") @{ - print "cut: needs list for -c or -f" > "/dev/stderr" - exit 1 - @} - - if (by_fields) - set_fieldlist() - else - set_charlist() -@} -@c endfile -@end example - -@code{set_fieldlist} is used to split the field list apart at the commas, -and into an array. Then, for each element of the array, it looks to -see if it is actually a range, and if so, splits it apart. The range -is verified to make sure the first number is smaller than the second. -Each number in the list is added to the @code{flist} array, which -simply lists the fields that will be printed. Normal field splitting -is used. The program lets @command{awk} handle the job of doing the -field splitting: - -@example -@c file eg/prog/cut.awk -function set_fieldlist( n, m, i, j, k, f, g) -@{ - n = split(fieldlist, f, ",") - j = 1 # index in flist - for (i = 1; i <= n; i++) @{ - if (index(f[i], "-") != 0) @{ # a range - m = split(f[i], g, "-") -@group - if (m != 2 || g[1] >= g[2]) @{ - printf("bad field list: %s\n", - f[i]) > "/dev/stderr" - exit 1 - @} -@end group - for (k = g[1]; k <= g[2]; k++) - flist[j++] = k - @} else - flist[j++] = f[i] - @} - nfields = j - 1 -@} -@c endfile -@end example - -The @code{set_charlist} function is more complicated than @code{set_fieldlist}. -The idea here is to use @command{gawk}'s @code{FIELDWIDTHS} variable -(@pxref{Constant Size, ,Reading Fixed-Width Data}), -which describes constant width input. When using a character list, that is -exactly what we have. - -Setting up @code{FIELDWIDTHS} is more complicated than simply listing the -fields that need to be printed. We have to keep track of the fields to -print and also the intervening characters that have to be skipped. -For example, suppose you wanted characters 1 through 8, 15, and -22 through 35. You would use @samp{-c 1-8,15,22-35}. The necessary value -for @code{FIELDWIDTHS} is @code{@w{"8 6 1 6 14"}}. This yields five -fields, and the fields to print -are @code{$1}, @code{$3}, and @code{$5}. -The intermediate fields are @dfn{filler}, -which is stuff in between the desired data. -@code{flist} lists the fields to print, and @code{t} tracks the -complete field list, including filler fields: - -@example -@c file eg/prog/cut.awk -function set_charlist( field, i, j, f, g, t, - filler, last, len) -@{ - field = 1 # count total fields - n = split(fieldlist, f, ",") - j = 1 # index in flist - for (i = 1; i <= n; i++) @{ - if (index(f[i], "-") != 0) @{ # range - m = split(f[i], g, "-") - if (m != 2 || g[1] >= g[2]) @{ - printf("bad character list: %s\n", - f[i]) > "/dev/stderr" - exit 1 - @} - len = g[2] - g[1] + 1 - if (g[1] > 1) # compute length of filler - filler = g[1] - last - 1 - else - filler = 0 -@group - if (filler) - t[field++] = filler -@end group - t[field++] = len # length of field - last = g[2] - flist[j++] = field - 1 - @} else @{ - if (f[i] > 1) - filler = f[i] - last - 1 - else - filler = 0 - if (filler) - t[field++] = filler - t[field++] = 1 - last = f[i] - flist[j++] = field - 1 - @} - @} - FIELDWIDTHS = join(t, 1, field - 1) - nfields = j - 1 -@} -@c endfile -@end example - -Next is the rule that actually processes the data. If the @option{-s} option -is given, then @code{suppress} is true. The first @code{if} statement -makes sure that the input record does have the field separator. If -@command{cut} is processing fields, @code{suppress} is true, and the field -separator character is not in the record, then the record is skipped. - -If the record is valid, then @command{gawk} has split the data -into fields, either using the character in @code{FS} or using fixed-length -fields and @code{FIELDWIDTHS}. The loop goes through the list of fields -that should be printed. The corresponding field is printed if it contains data. -If the next field also has data, then the separator character is -written out between the fields: - -@example -@c file eg/prog/cut.awk -@{ - if (by_fields && suppress && index($0, FS) != 0) - next - - for (i = 1; i <= nfields; i++) @{ - if ($flist[i] != "") @{ - printf "%s", $flist[i] - if (i < nfields && $flist[i+1] != "") - printf "%s", OFS - @} - @} - print "" -@} -@c endfile -@end example - -This version of @command{cut} relies on @command{gawk}'s @code{FIELDWIDTHS} -variable to do the character-based cutting. While it is possible in -other @command{awk} implementations to use @code{substr} -(@pxref{String Functions, ,String Manipulation Functions}), -it is also extremely painful. -The @code{FIELDWIDTHS} variable supplies an elegant solution to the problem -of picking the input line apart by characters. - -@c Exercise: Rewrite using split with "". - -@node Egrep Program, Id Program, Cut Program, Clones -@subsection Searching for Regular Expressions in Files - -@cindex @command{egrep} utility -The @command{egrep} utility searches files for patterns. It uses regular -expressions that are almost identical to those available in @command{awk} -(@pxref{Regexp, ,Regular Expressions}). -It is used in the following manner: - -@example -egrep @r{[} @var{options} @r{]} '@var{pattern}' @var{files} @dots{} -@end example - -The @var{pattern} is a regular expression. In typical usage, the regular -expression is quoted to prevent the shell from expanding any of the -special characters as @value{FN} wildcards. Normally, @command{egrep} -prints the lines that matched. If multiple @value{FN}s are provided on -the command line, each output line is preceded by the name of the file -and a colon. - -The options to @command{egrep} are as follows: - -@table @code -@item -c -Print out a count of the lines that matched the pattern, instead of the -lines themselves. - -@item -s -Be silent. No output is produced and the exit value indicates whether -the pattern was matched. - -@item -v -Invert the sense of the test. @command{egrep} prints the lines that do -@emph{not} match the pattern and exits successfully if the pattern is not -matched. - -@item -i -Ignore case distinctions in both the pattern and the input data. - -@item -l -Only print (list) the names of the files that matched, not the lines that matched. - -@item -e @var{pattern} -Use @var{pattern} as the regexp to match. The purpose of the @option{-e} -option is to allow patterns that start with a @samp{-}. -@end table - -This version uses the @code{getopt} library function -(@pxref{Getopt Function, ,Processing Command-Line Options}) -and the file transition library program -(@pxref{Filetrans Function, ,Noting @value{DDF} Boundaries}). - -The program begins with a descriptive comment and then a @code{BEGIN} rule -that processes the command-line arguments with @code{getopt}. The @option{-i} -(ignore case) option is particularly easy with @command{gawk}; we just use the -@code{IGNORECASE} built-in variable -(@pxref{Built-in Variables}): - -@cindex @code{egrep.awk} program -@example -@c file eg/prog/egrep.awk -# egrep.awk --- simulate egrep in awk -@c endfile -@ignore -@c file eg/prog/egrep.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 - -@c endfile -@end ignore -@c file eg/prog/egrep.awk -# Options: -# -c count of lines -# -s silent - use exit value -# -v invert test, success if no match -# -i ignore case -# -l print filenames only -# -e argument is pattern -# -# Requires getopt and file transition library functions - -BEGIN @{ - while ((c = getopt(ARGC, ARGV, "ce:svil")) != -1) @{ - if (c == "c") - count_only++ - else if (c == "s") - no_print++ - else if (c == "v") - invert++ - else if (c == "i") - IGNORECASE = 1 - else if (c == "l") - filenames_only++ - else if (c == "e") - pattern = Optarg - else - usage() - @} -@c endfile -@end example - -Next comes the code that handles the @command{egrep}-specific behavior. If no -pattern is supplied with @option{-e}, the first non-option on the -command line is used. The @command{awk} command-line arguments up to @code{ARGV[Optind]} -are cleared, so that @command{awk} won't try to process them as files. If no -files are specified, the standard input is used, and if multiple files are -specified, we make sure to note this so that the @value{FN}s can precede the -matched lines in the output: - -@example -@c file eg/prog/egrep.awk - if (pattern == "") - pattern = ARGV[Optind++] - - for (i = 1; i < Optind; i++) - ARGV[i] = "" - if (Optind >= ARGC) @{ - ARGV[1] = "-" - ARGC = 2 - @} else if (ARGC - Optind > 1) - do_filenames++ - -# if (IGNORECASE) -# pattern = tolower(pattern) -@} -@c endfile -@end example - -The last two lines are commented out, since they are not needed in -@command{gawk}. They should be uncommented if you have to use another version -of @command{awk}. - -The next set of lines should be uncommented if you are not using -@command{gawk}. This rule translates all the characters in the input line -into lowercase if the @option{-i} option is specified.@footnote{It -also introduces a subtle bug; -if a match happens, we output the translated line, not the original.} -The rule is -commented out since it is not necessary with @command{gawk}: - -@c Exercise: Fix this, w/array and new line as key to original line - -@example -@c file eg/prog/egrep.awk -#@{ -# if (IGNORECASE) -# $0 = tolower($0) -#@} -@c endfile -@end example - -The @code{beginfile} function is called by the rule in @file{ftrans.awk} -when each new file is processed. In this case, it is very simple; all it -does is initialize a variable @code{fcount} to zero. @code{fcount} tracks -how many lines in the current file matched the pattern. -(Naming the parameter @code{junk} shows we know that @code{beginfile} -is called with a parameter, but that we're not interested in its value.): - -@example -@c file eg/prog/egrep.awk -function beginfile(junk) -@{ - fcount = 0 -@} -@c endfile -@end example - -The @code{endfile} function is called after each file has been processed. -It affects the output only when the user wants a count of the number of lines that -matched. @code{no_print} is true only if the exit status is desired. -@code{count_only} is true if line counts are desired. @command{egrep} -therefore only prints line counts if printing and counting are enabled. -The output format must be adjusted depending upon the number of files to -process. Finally, @code{fcount} is added to @code{total}, so that we -know how many lines altogether matched the pattern: - -@example -@c file eg/prog/egrep.awk -function endfile(file) -@{ - if (! no_print && count_only) - if (do_filenames) - print file ":" fcount - else - print fcount - - total += fcount -@} -@c endfile -@end example - -The following rule does most of the work of matching lines. The variable -@code{matches} is true if the line matched the pattern. If the user -wants lines that did not match, the sense of @code{matches} is inverted -using the @samp{!} operator. @code{fcount} is incremented with the value of -@code{matches}, which is either one or zero, depending upon a -successful or unsuccessful match. If the line does not match, the -@code{next} statement just moves on to the next record. - -A number of additional tests are made, but they are only done if we -are not counting lines. First, if the user only wants exit status -(@code{no_print} is true), then it is enough to know that @emph{one} -line in this file matched, and we can skip on to the next file with -@code{nextfile}. Similarly, if we are only printing @value{FN}s, we can -print the @value{FN}, and then skip to the next file with @code{nextfile}. -Finally, each line is printed, with a leading @value{FN} and colon -if necessary: - -@cindex @code{!} operator -@example -@c file eg/prog/egrep.awk -@{ - matches = ($0 ~ pattern) - if (invert) - matches = ! matches - - fcount += matches # 1 or 0 - - if (! matches) - next - - if (! count_only) @{ - if (no_print) - nextfile - - if (filenames_only) @{ - print FILENAME - nextfile - @} - - if (do_filenames) - print FILENAME ":" $0 - else - print - @} -@} -@c endfile -@end example - -The @code{END} rule takes care of producing the correct exit status. If -there are no matches, the exit status is one, otherwise it is zero: - -@example -@c file eg/prog/egrep.awk -END \ -@{ - if (total == 0) - exit 1 - exit 0 -@} -@c endfile -@end example - -The @code{usage} function prints a usage message in case of invalid options, -and then exits: - -@example -@c file eg/prog/egrep.awk -function usage( e) -@{ - e = "Usage: egrep [-csvil] [-e pat] [files ...]" - e = e "\n\tegrep [-csvil] pat [files ...]" - print e > "/dev/stderr" - exit 1 -@} -@c endfile -@end example - -The variable @code{e} is used so that the function fits nicely -on the printed page. - -@cindex backslash continuation -Just a note on programming style: you may have noticed that the @code{END} -rule uses backslash continuation, with the open brace on a line by -itself. This is so that it more closely resembles the way functions -are written. Many of the examples -in this @value{CHAPTER} -use this style. You can decide for yourself if you like writing -your @code{BEGIN} and @code{END} rules this way -or not. - -@node Id Program, Split Program, Egrep Program, Clones -@subsection Printing out User Information - -@cindex @command{id} utility -The @command{id} utility lists a user's real and effective user-id numbers, -real and effective group-id numbers, and the user's group set, if any. -@command{id} only prints the effective user-id and group-id if they are -different from the real ones. If possible, @command{id} also supplies the -corresponding user and group names. The output might look like this: - -@example -$ id -@print{} uid=2076(arnold) gid=10(staff) groups=10(staff),4(tty) -@end example - -This information is part of what is provided by @command{gawk}'s -@code{PROCINFO} array (@pxref{Built-in Variables}). -However, the @command{id} utility provides a more palatable output than just -individual numbers. - -Here is a simple version of @command{id} written in @command{awk}. -It uses the user database library functions -(@pxref{Passwd Functions, ,Reading the User Database}) -and the group database library functions -(@pxref{Group Functions, ,Reading the Group Database}): - -The program is fairly straightforward. All the work is done in the -@code{BEGIN} rule. The user and group ID numbers are obtained from -@code{PROCINFO}. -The code is repetitive. The entry in the user database for the real user-id -number is split into parts at the @samp{:}. The name is the first field. -Similar code is used for the effective user-id number and the group -numbers. - -@cindex @code{id.awk} program -@example -@c file eg/prog/id.awk -# id.awk --- implement id in awk -# -# Requires user and group library functions -@c endfile -@ignore -@c file eg/prog/id.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 -# Revised February 1996 - -@c endfile -@end ignore -@c file eg/prog/id.awk -# output is: -# uid=12(foo) euid=34(bar) gid=3(baz) \ -# egid=5(blat) groups=9(nine),2(two),1(one) - -@group -BEGIN \ -@{ - uid = PROCINFO["uid"] - euid = PROCINFO["euid"] - gid = PROCINFO["gid"] - egid = PROCINFO["egid"] -@end group - - printf("uid=%d", uid) - pw = getpwuid(uid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} - - if (euid != uid) @{ - printf(" euid=%d", euid) - pw = getpwuid(euid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} - @} - - printf(" gid=%d", gid) - pw = getgrgid(gid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} - - if (egid != gid) @{ - printf(" egid=%d", egid) - pw = getgrgid(egid) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} - @} - - for (i = 1; ("group" i) in PROCINFO; i++) @{ - if (i == 1) - printf(" groups=") - group = PROCINFO["group" i] - printf("%d", group) - pw = getgrgid(group) - if (pw != "") @{ - split(pw, a, ":") - printf("(%s)", a[1]) - @} - if (("group" (i+1)) in PROCINFO) - printf(",") - @} - - print "" -@} -@c endfile -@end example - -@cindex @code{in} operator -The test in the @code{for} loop is worth noting. -Any supplementary groups in the @code{PROCINFO} array have the -indices @code{"group1"} through @code{"group@var{N}"} for some -@var{N}; i.e., the total number of supplementary groups. -The problem is, we don't know in advance how many of these groups -there are. - -This loop works by starting at one, concatenating the value with -@code{"group"}, and then using @code{in} to see if that value is -in the array. Eventually, @code{i} is incremented past -the last group in the array and the loop exits. - -The loop is also correct if there are @emph{no} supplementary -groups; then the condition is false the first time it's -tested, and the loop body never executes. - -@c exercise!!! -@ignore -The POSIX version of @command{id} takes arguments that control which -information is printed. Modify this version to accept the same -arguments and perform in the same way. -@end ignore - -@node Split Program, Tee Program, Id Program, Clones -@subsection Splitting a Large File into Pieces - -@cindex @code{split} utility -The @code{split} program splits large text files into smaller pieces. -The usage is as follows: - -@example -split @r{[}-@var{count}@r{]} file @r{[} @var{prefix} @r{]} -@end example - -By default, -the output files are named @file{xaa}, @file{xab}, and so on. Each file has -1000 lines in it, with the likely exception of the last file. To change the -number of lines in each file, supply a number on the command line -preceded with a minus; e.g., @samp{-500} for files with 500 lines in them -instead of 1000. To change the name of the output files to something like -@file{myfileaa}, @file{myfileab}, and so on, supply an additional -argument that specifies the @value{FN} prefix. - -Here is a version of @code{split} in @command{awk}. It uses the @code{ord} and -@code{chr} functions presented in -@ref{Ordinal Functions, ,Translating Between Characters and Numbers}. - -The program first sets its defaults, and then tests to make sure there are -not too many arguments. It then looks at each argument in turn. The -first argument could be a minus followed by a number. If it is, this happens -to look like a negative number, so it is made positive, and that is the -count of lines. The data @value{FN} is skipped over and the final argument -is used as the prefix for the output @value{FN}s: - -@cindex @code{split.awk} program -@example -@c file eg/prog/split.awk -# split.awk --- do split in awk -# -# Requires ord and chr library functions -@c endfile -@ignore -@c file eg/prog/split.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 - -@c endfile -@end ignore -@c file eg/prog/split.awk -# usage: split [-num] [file] [outname] - -BEGIN @{ - outfile = "x" # default - count = 1000 - if (ARGC > 4) - usage() - - i = 1 - if (ARGV[i] ~ /^-[0-9]+$/) @{ - count = -ARGV[i] - ARGV[i] = "" - i++ - @} - # test argv in case reading from stdin instead of file - if (i in ARGV) - i++ # skip data file name - if (i in ARGV) @{ - outfile = ARGV[i] - ARGV[i] = "" - @} - - s1 = s2 = "a" - out = (outfile s1 s2) -@} -@c endfile -@end example - -The next rule does most of the work. @code{tcount} (temporary count) tracks -how many lines have been printed to the output file so far. If it is greater -than @code{count}, it is time to close the current file and start a new one. -@code{s1} and @code{s2} track the current suffixes for the @value{FN}. If -they are both @samp{z}, the file is just too big. Otherwise, @code{s1} -moves to the next letter in the alphabet and @code{s2} starts over again at -@samp{a}: - -@c else on separate line here for page breaking -@example -@c file eg/prog/split.awk -@{ - if (++tcount > count) @{ - close(out) - if (s2 == "z") @{ - if (s1 == "z") @{ - printf("split: %s is too large to split\n", - FILENAME) > "/dev/stderr" - exit 1 - @} - s1 = chr(ord(s1) + 1) - s2 = "a" - @} -@group - else - s2 = chr(ord(s2) + 1) -@end group - out = (outfile s1 s2) - tcount = 1 - @} - print > out -@} -@c endfile -@end example - -@c Exercise: do this with just awk builtin functions, index("abc..."), substr, etc. - -@noindent -The @code{usage} function simply prints an error message and exits: - -@example -@c file eg/prog/split.awk -function usage( e) -@{ - e = "usage: split [-num] [file] [outname]" - print e > "/dev/stderr" - exit 1 -@} -@c endfile -@end example - -@noindent -The variable @code{e} is used so that the function -fits nicely on the -@ifinfo -screen. -@end ifinfo -@ifnotinfo -page. -@end ifnotinfo - -This program is a bit sloppy; it relies on @command{awk} to close the last file -for it automatically, instead of doing it in an @code{END} rule. -It also assumes that letters are contiguous in the character set, -which isn't true for EBCDIC systems. -@c BFD... - -@node Tee Program, Uniq Program, Split Program, Clones -@subsection Duplicating Output into Multiple Files - -@cindex @code{tee} utility -The @code{tee} program is known as a ``pipe fitting.'' @code{tee} copies -its standard input to its standard output and also duplicates it to the -files named on the command line. Its usage is as follows: - -@example -tee @r{[}-a@r{]} file @dots{} -@end example - -The @option{-a} option tells @code{tee} to append to the named files, instead of -truncating them and starting over. - -The @code{BEGIN} rule first makes a copy of all the command-line arguments -into an array named @code{copy}. -@code{ARGV[0]} is not copied, since it is not needed. -@code{tee} cannot use @code{ARGV} directly, since @command{awk} attempts to -process each @value{FN} in @code{ARGV} as input data. - -@cindex flag variables -If the first argument is @option{-a}, then the flag variable -@code{append} is set to true, and both @code{ARGV[1]} and -@code{copy[1]} are deleted. If @code{ARGC} is less than two, then no -@value{FN}s were supplied and @code{tee} prints a usage message and exits. -Finally, @command{awk} is forced to read the standard input by setting -@code{ARGV[1]} to @code{"-"} and @code{ARGC} to two: - -@c NEXT ED: Add more leading commentary in this program -@cindex @code{tee.awk} program -@example -@c file eg/prog/tee.awk -# tee.awk --- tee in awk -@c endfile -@ignore -@c file eg/prog/tee.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 -# Revised December 1995 - -@c endfile -@end ignore -@c file eg/prog/tee.awk -BEGIN \ -@{ - for (i = 1; i < ARGC; i++) - copy[i] = ARGV[i] - - if (ARGV[1] == "-a") @{ - append = 1 - delete ARGV[1] - delete copy[1] - ARGC-- - @} - if (ARGC < 2) @{ - print "usage: tee [-a] file ..." > "/dev/stderr" - exit 1 - @} - ARGV[1] = "-" - ARGC = 2 -@} -@c endfile -@end example - -The single rule does all the work. Since there is no pattern, it is -executed for each line of input. The body of the rule simply prints the -line into each file on the command line, and then to the standard output: - -@example -@c file eg/prog/tee.awk -@{ - # moving the if outside the loop makes it run faster - if (append) - for (i in copy) - print >> copy[i] - else - for (i in copy) - print > copy[i] - print -@} -@c endfile -@end example - -@noindent -It is also possible to write the loop this way: - -@example -for (i in copy) - if (append) - print >> copy[i] - else - print > copy[i] -@end example - -@noindent -This is more concise but it is also less efficient. The @samp{if} is -tested for each record and for each output file. By duplicating the loop -body, the @samp{if} is only tested once for each input record. If there are -@var{N} input records and @var{M} output files, the first method only -executes @var{N} @samp{if} statements, while the second executes -@var{N}@code{*}@var{M} @samp{if} statements. - -Finally, the @code{END} rule cleans up by closing all the output files: - -@example -@c file eg/prog/tee.awk -END \ -@{ - for (i in copy) - close(copy[i]) -@} -@c endfile -@end example - -@node Uniq Program, Wc Program, Tee Program, Clones -@subsection Printing Non-Duplicated Lines of Text - -@cindex @command{uniq} utility -The @command{uniq} utility reads sorted lines of data on its standard -input, and by default removes duplicate lines. In other words, it only -prints unique lines---hence the name. @command{uniq} has a number of -options. The usage is as follows: - -@example -uniq @r{[}-udc @r{[}-@var{n}@r{]]} @r{[}+@var{n}@r{]} @r{[} @var{input file} @r{[} @var{output file} @r{]]} -@end example - -The option meanings are: - -@table @code -@item -d -Only print repeated lines. - -@item -u -Only print non-repeated lines. - -@item -c -Count lines. This option overrides @option{-d} and @option{-u}. Both repeated -and non-repeated lines are counted. - -@item -@var{n} -Skip @var{n} fields before comparing lines. The definition of fields -is similar to @command{awk}'s default: non-whitespace characters separated -by runs of spaces and/or tabs. - -@item +@var{n} -Skip @var{n} characters before comparing lines. Any fields specified with -@samp{-@var{n}} are skipped first. - -@item @var{input file} -Data is read from the input file named on the command line, instead of from -the standard input. - -@item @var{output file} -The generated output is sent to the named output file, instead of to the -standard output. -@end table - -Normally @command{uniq} behaves as if both the @option{-d} and -@option{-u} options are provided. - -@command{uniq} uses the -@code{getopt} library function -(@pxref{Getopt Function, ,Processing Command-Line Options}) -and the @code{join} library function -(@pxref{Join Function, ,Merging an Array into a String}). - -The program begins with a @code{usage} function and then a brief outline of -the options and their meanings in a comment. -The @code{BEGIN} rule deals with the command-line arguments and options. It -uses a trick to get @code{getopt} to handle options of the form @samp{-25}, -treating such an option as the option letter @samp{2} with an argument of -@samp{5}. If indeed two or more digits are supplied (@code{Optarg} looks -like a number), @code{Optarg} is -concatenated with the option digit and then the result is added to zero to make -it into a number. If there is only one digit in the option, then -@code{Optarg} is not needed. @code{Optind} must be decremented so that -@code{getopt} processes it next time. This code is admittedly a bit -tricky. - -If no options are supplied, then the default is taken, to print both -repeated and non-repeated lines. The output file, if provided, is assigned -to @code{outputfile}. Early on, @code{outputfile} is initialized to the -standard output, @file{/dev/stdout}: - -@cindex @code{uniq.awk} program -@example -@c file eg/prog/uniq.awk -@group -# uniq.awk --- do uniq in awk -# -# Requires getopt and join library functions -@end group -@c endfile -@ignore -@c file eg/prog/uniq.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 - -@c endfile -@end ignore -@c file eg/prog/uniq.awk -function usage( e) -@{ - e = "Usage: uniq [-udc [-n]] [+n] [ in [ out ]]" - print e > "/dev/stderr" - exit 1 -@} - -# -c count lines. overrides -d and -u -# -d only repeated lines -# -u only non-repeated lines -# -n skip n fields -# +n skip n characters, skip fields first - -BEGIN \ -@{ - count = 1 - outputfile = "/dev/stdout" - opts = "udc0:1:2:3:4:5:6:7:8:9:" - while ((c = getopt(ARGC, ARGV, opts)) != -1) @{ - if (c == "u") - non_repeated_only++ - else if (c == "d") - repeated_only++ - else if (c == "c") - do_count++ - else if (index("0123456789", c) != 0) @{ - # getopt requires args to options - # this messes us up for things like -5 - if (Optarg ~ /^[0-9]+$/) - fcount = (c Optarg) + 0 - else @{ - fcount = c + 0 - Optind-- - @} - @} else - usage() - @} - - if (ARGV[Optind] ~ /^\+[0-9]+$/) @{ - charcount = substr(ARGV[Optind], 2) + 0 - Optind++ - @} - - for (i = 1; i < Optind; i++) - ARGV[i] = "" - - if (repeated_only == 0 && non_repeated_only == 0) - repeated_only = non_repeated_only = 1 - - if (ARGC - Optind == 2) @{ - outputfile = ARGV[ARGC - 1] - ARGV[ARGC - 1] = "" - @} -@} -@c endfile -@end example - -The following function, @code{are_equal}, compares the current line, -@code{$0}, to the -previous line, @code{last}. It handles skipping fields and characters. -If no field count and no character count are specified, @code{are_equal} -simply returns one or zero depending upon the result of a simple string -comparison of @code{last} and @code{$0}. Otherwise, things get more -complicated. -If fields have to be skipped, each line is broken into an array using -@code{split} -(@pxref{String Functions, ,String Manipulation Functions}); -the desired fields are then joined back into a line using @code{join}. -The joined lines are stored in @code{clast} and @code{cline}. -If no fields are skipped, @code{clast} and @code{cline} are set to -@code{last} and @code{$0}, respectively. -Finally, if characters are skipped, @code{substr} is used to strip off the -leading @code{charcount} characters in @code{clast} and @code{cline}. The -two strings are then compared and @code{are_equal} returns the result: - -@example -@c file eg/prog/uniq.awk -function are_equal( n, m, clast, cline, alast, aline) -@{ - if (fcount == 0 && charcount == 0) - return (last == $0) - - if (fcount > 0) @{ - n = split(last, alast) - m = split($0, aline) - clast = join(alast, fcount+1, n) - cline = join(aline, fcount+1, m) - @} else @{ - clast = last - cline = $0 - @} - if (charcount) @{ - clast = substr(clast, charcount + 1) - cline = substr(cline, charcount + 1) - @} - - return (clast == cline) -@} -@c endfile -@end example - -The following two rules are the body of the program. The first one is -executed only for the very first line of data. It sets @code{last} equal to -@code{$0}, so that subsequent lines of text have something to be compared to. - -The second rule does the work. The variable @code{equal} is one or zero, -depending upon the results of @code{are_equal}'s comparison. If @command{uniq} -is counting repeated lines, and the lines are equal, then it increments the @code{count} variable. -Otherwise it prints the line and resets @code{count}, -since the two lines are not equal. - -If @command{uniq} is not counting, and if the lines are equal, @code{count} is incremented. -Nothing is printed, since the point is to remove duplicates. -Otherwise, if @command{uniq} is counting repeated lines and more than -one line is seen, or if @command{uniq} is counting non-repeated lines -and only one line is seen, then the line is printed, and @code{count} -is reset. - -Finally, similar logic is used in the @code{END} rule to print the final -line of input data: - -@example -@c file eg/prog/uniq.awk -NR == 1 @{ - last = $0 - next -@} - -@{ - equal = are_equal() - - if (do_count) @{ # overrides -d and -u - if (equal) - count++ - else @{ - printf("%4d %s\n", count, last) > outputfile - last = $0 - count = 1 # reset - @} - next - @} - - if (equal) - count++ - else @{ - if ((repeated_only && count > 1) || - (non_repeated_only && count == 1)) - print last > outputfile - last = $0 - count = 1 - @} -@} - -END @{ - if (do_count) - printf("%4d %s\n", count, last) > outputfile - else if ((repeated_only && count > 1) || - (non_repeated_only && count == 1)) - print last > outputfile -@} -@c endfile -@end example - -@node Wc Program, , Uniq Program, Clones -@subsection Counting Things - -@cindex @command{wc} utility -The @command{wc} (word count) utility counts lines, words, and characters in -one or more input files. Its usage is as follows: - -@example -wc @r{[}-lwc@r{]} @r{[} @var{files} @dots{} @r{]} -@end example - -If no files are specified on the command line, @command{wc} reads its standard -input. If there are multiple files, it also prints total counts for all -the files. The options and their meanings are shown in the following list: - -@table @code -@item -l -Only count lines. - -@item -w -Only count words. -A ``word'' is a contiguous sequence of non-whitespace characters, separated -by spaces and/or tabs. Happily, this is the normal way @command{awk} separates -fields in its input data. - -@item -c -Only count characters. -@end table - -Implementing @command{wc} in @command{awk} is particularly elegant, -since @command{awk} does a lot of the work for us; it splits lines into -words (i.e., fields) and counts them, it counts lines (i.e., records), -and it can easily tell us how long a line is. - -This uses the @code{getopt} library function -(@pxref{Getopt Function, ,Processing Command-Line Options}) -and the file transition functions -(@pxref{Filetrans Function, ,Noting @value{DDF} Boundaries}). - -This version has one notable difference from traditional versions of -@command{wc}: it always prints the counts in the order lines, words, -and characters. Traditional versions note the order of the @option{-l}, -@option{-w}, and @option{-c} options on the command line, and print the -counts in that order. - -The @code{BEGIN} rule does the argument processing. The variable -@code{print_total} is true if more than one file is named on the -command line: - -@cindex @code{wc.awk} program -@example -@c file eg/prog/wc.awk -# wc.awk --- count lines, words, characters -@c endfile -@ignore -@c file eg/prog/wc.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 -@c endfile -@end ignore -@c file eg/prog/wc.awk - -# Options: -# -l only count lines -# -w only count words -# -c only count characters -# -# Default is to count lines, words, characters -# -# Requires getopt and file transition library functions - -BEGIN @{ - # let getopt print a message about - # invalid options. we ignore them - while ((c = getopt(ARGC, ARGV, "lwc")) != -1) @{ - if (c == "l") - do_lines = 1 - else if (c == "w") - do_words = 1 - else if (c == "c") - do_chars = 1 - @} - for (i = 1; i < Optind; i++) - ARGV[i] = "" - - # if no options, do all - if (! do_lines && ! do_words && ! do_chars) - do_lines = do_words = do_chars = 1 - - print_total = (ARGC - i > 2) -@} -@c endfile -@end example - -The @code{beginfile} function is simple; it just resets the counts of lines, -words, and characters to zero, and saves the current @value{FN} in -@code{fname}: - -@c NEXT ED: make it lines = words = chars = 0 -@example -@c file eg/prog/wc.awk -function beginfile(file) -@{ - chars = lines = words = 0 - fname = FILENAME -@} -@c endfile -@end example - -The @code{endfile} function adds the current file's numbers to the running -totals of lines, words, and characters. It then prints out those numbers -for the file that was just read. It relies on @code{beginfile} to reset the -numbers for the following @value{DF}: - -@c NEXT ED: make order for += be lines, words, chars -@example -@c file eg/prog/wc.awk -function endfile(file) -@{ - tchars += chars - tlines += lines - twords += words - if (do_lines) - printf "\t%d", lines -@group - if (do_words) - printf "\t%d", words -@end group - if (do_chars) - printf "\t%d", chars - printf "\t%s\n", fname -@} -@c endfile -@end example - -There is one rule that is executed for each line. It adds the length of -the record, plus one, to @code{chars}. Adding one plus the record length -is needed because the newline character separating records (the value -of @code{RS}) is not part of the record itself, and thus not included -in its length. Next, @code{lines} is incremented for each line read, -and @code{words} is incremented by the value of @code{NF}, which is the -number of ``words'' on this line:@footnote{@command{wc} can't just use -the value of @code{FNR} in @code{endfile}. If you examine the code in -@ref{Filetrans Function, ,Noting @value{DDF} Boundaries}, -you will see that @code{FNR} has already been reset by the time -@code{endfile} is called.} -@c ONE DAY: make the above an exercise, instead of giving away the answer. - -@example -@c file eg/prog/wc.awk -# do per line -@{ - chars += length($0) + 1 # get newline - lines++ - words += NF -@} -@c endfile -@end example - -Finally, the @code{END} rule simply prints the totals for all the files. - -@example -@c file eg/prog/wc.awk -END @{ - if (print_total) @{ - if (do_lines) - printf "\t%d", tlines - if (do_words) - printf "\t%d", twords - if (do_chars) - printf "\t%d", tchars - print "\ttotal" - @} -@} -@c endfile -@end example - -@node Miscellaneous Programs, , Clones, Sample Programs -@section A Grab Bag of @command{awk} Programs - -This @value{SECTION} is a large ``grab bag'' of miscellaneous programs. -We hope you find them both interesting and enjoyable. - -@menu -* Dupword Program:: Finding duplicated words in a document. -* Alarm Program:: An alarm clock. -* Translate Program:: A program similar to the @command{tr} utility. -* Labels Program:: Printing mailing labels. -* Word Sorting:: A program to produce a word usage count. -* History Sorting:: Eliminating duplicate entries from a history - file. -* Extract Program:: Pulling out programs from Texinfo source - files. -* Simple Sed:: A Simple Stream Editor. -* Igawk Program:: A wrapper for @command{awk} that includes - files. -@end menu - -@node Dupword Program, Alarm Program, Miscellaneous Programs, Miscellaneous Programs -@subsection Finding Duplicated Words in a Document - -A common error when writing large amounts of prose is to accidentally -duplicate words. Typically you will see this in text as something like ``the -the program does the following @dots{}.'' When the text is online, often -the duplicated words occur at the end of one line and the beginning of -another, making them very difficult to spot. -@c as here! - -This program, @file{dupword.awk}, scans through a file one line at a time -and looks for adjacent occurrences of the same word. It also saves the last -word on a line (in the variable @code{prev}) for comparison with the first -word on the next line. - -@cindex Texinfo -The first two statements make sure that the line is all lowercase, -so that, for example, ``The'' and ``the'' compare equal to each other. -The next statement replaces non-alphanumeric and non-whitespace characters -with spaces, so that punctuation does not affect the comparison either. -The characters are replaced with spaces so that formatting controls -don't create nonsense words (e.g., the Texinfo @samp{@@code@{NF@}} -becomes @samp{codeNF} if punctuation is simply deleted). The record is -then re-split into fields, yielding just the actual words on the line, -and insuring that there are no empty fields. - -If there are no fields left after removing all the punctuation, the -current record is skipped. Otherwise, the program loops through each -word, comparing it to the previous one: - -@cindex @code{dupword.awk} program -@example -@c file eg/prog/dupword.awk -# dupword.awk --- find duplicate words in text -@c endfile -@ignore -@c file eg/prog/dupword.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# December 1991 -# Revised October 2000 - -@c endfile -@end ignore -@c file eg/prog/dupword.awk -@{ - $0 = tolower($0) - gsub(/[^[:alnum:][:blank:]]/, " "); - $0 = $0 # re-split - if (NF == 0) - next - if ($1 == prev) - printf("%s:%d: duplicate %s\n", - FILENAME, FNR, $1) - for (i = 2; i <= NF; i++) - if ($i == $(i-1)) - printf("%s:%d: duplicate %s\n", - FILENAME, FNR, $i) - prev = $NF -@} -@c endfile -@end example - -@node Alarm Program, Translate Program, Dupword Program, Miscellaneous Programs -@subsection An Alarm Clock Program -@cindex insomnia, cure for -@cindex Robbins, Arnold -@quotation -@i{Nothing cures insomnia like a ringing alarm clock.}@* -Arnold Robbins -@end quotation - -The following program is a simple ``alarm clock'' program. -You give it a time of day and an optional message. At the specified time, -it prints the message on the standard output. In addition, you can give it -the number of times to repeat the message as well as a delay between -repetitions. - -This program uses the @code{gettimeofday} function from -@ref{Gettimeofday Function, ,Managing the Time of Day}. - -All the work is done in the @code{BEGIN} rule. The first part is argument -checking and setting of defaults: the delay, the count, and the message to -print. If the user supplied a message without the ASCII BEL -character (known as the ``alert'' character, @code{"\a"}), then it is added to -the message. (On many systems, printing the ASCII BEL generates some sort -of audible alert. Thus when the alarm goes off, the system calls attention -to itself in case the user is not looking at their computer or terminal.): - -@cindex @code{alarm.awk} program -@example -@c file eg/prog/alarm.awk -# alarm.awk --- set an alarm -# -# Requires gettimeofday library function -@c endfile -@ignore -@c file eg/prog/alarm.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 - -@c endfile -@end ignore -@c file eg/prog/alarm.awk -# usage: alarm time [ "message" [ count [ delay ] ] ] - -BEGIN \ -@{ - # Initial argument sanity checking - usage1 = "usage: alarm time ['message' [count [delay]]]" - usage2 = sprintf("\t(%s) time ::= hh:mm", ARGV[1]) - - if (ARGC < 2) @{ - print usage1 > "/dev/stderr" - print usage2 > "/dev/stderr" - exit 1 - @} else if (ARGC == 5) @{ - delay = ARGV[4] + 0 - count = ARGV[3] + 0 - message = ARGV[2] - @} else if (ARGC == 4) @{ - count = ARGV[3] + 0 - message = ARGV[2] - @} else if (ARGC == 3) @{ - message = ARGV[2] - @} else if (ARGV[1] !~ /[0-9]?[0-9]:[0-9][0-9]/) @{ - print usage1 > "/dev/stderr" - print usage2 > "/dev/stderr" - exit 1 - @} - - # set defaults for once we reach the desired time - if (delay == 0) - delay = 180 # 3 minutes -@group - if (count == 0) - count = 5 -@end group - if (message == "") - message = sprintf("\aIt is now %s!\a", ARGV[1]) - else if (index(message, "\a") == 0) - message = "\a" message "\a" -@c endfile -@end example - -The next @value{SECTION} of code turns the alarm time into hours and minutes, -converts it (if necessary) to a 24-hour clock, and then turns that -time into a count of the seconds since midnight. Next it turns the current -time into a count of seconds since midnight. The difference between the two -is how long to wait before setting off the alarm: - -@example -@c file eg/prog/alarm.awk - # split up alarm time - split(ARGV[1], atime, ":") - hour = atime[1] + 0 # force numeric - minute = atime[2] + 0 # force numeric - - # get current broken down time - gettimeofday(now) - - # if time given is 12-hour hours and it's after that - # hour, e.g., `alarm 5:30' at 9 a.m. means 5:30 p.m., - # then add 12 to real hour - if (hour < 12 && now["hour"] > hour) - hour += 12 - - # set target time in seconds since midnight - target = (hour * 60 * 60) + (minute * 60) - - # get current time in seconds since midnight - current = (now["hour"] * 60 * 60) + \ - (now["minute"] * 60) + now["second"] - - # how long to sleep for - naptime = target - current - if (naptime <= 0) @{ - print "time is in the past!" > "/dev/stderr" - exit 1 - @} -@c endfile -@end example - -@cindex @command{sleep} utility -Finally, the program uses the @code{system} function -(@pxref{I/O Functions, ,Input/Output Functions}) -to call the @command{sleep} utility. The @command{sleep} utility simply pauses -for the given number of seconds. If the exit status is not zero, -the program assumes that @command{sleep} was interrupted and exits. If -@command{sleep} exited with an OK status (zero), then the program prints the -message in a loop, again using @command{sleep} to delay for however many -seconds are necessary: - -@example -@c file eg/prog/alarm.awk - # zzzzzz..... go away if interrupted - if (system(sprintf("sleep %d", naptime)) != 0) - exit 1 - - # time to notify! - command = sprintf("sleep %d", delay) - for (i = 1; i <= count; i++) @{ - print message - # if sleep command interrupted, go away - if (system(command) != 0) - break - @} - - exit 0 -@} -@c endfile -@end example - -@node Translate Program, Labels Program, Alarm Program, Miscellaneous Programs -@subsection Transliterating Characters - -@cindex @command{tr} utility -The system @command{tr} utility transliterates characters. For example, it is -often used to map uppercase letters into lowercase for further processing: - -@example -@var{generate data} | tr 'A-Z' 'a-z' | @var{process data} @dots{} -@end example - -@command{tr} requires two lists of characters.@footnote{On some older -System V systems, -@command{tr} may require that the lists be written as -range expressions enclosed in square brackets (@samp{[a-z]}) and quoted, -to prevent the shell from attempting a @value{FN} expansion. This is -not a feature.} When processing the input, the first character in the -first list is replaced with the first character in the second list, -the second character in the first list is replaced with the second -character in the second list, and so on. If there are more characters -in the ``from'' list than in the ``to'' list, the last character of the -``to'' list is used for the remaining characters in the ``from'' list. - -Some time ago, -@c early or mid-1989! -a user proposed that a transliteration function should -be added to @command{gawk}. -@c Wishing to avoid gratuitous new features, -@c at least theoretically -The following program was written to -prove that character transliteration could be done with a user-level -function. This program is not as complete as the system @command{tr} utility -but it does most of the job. - -The @command{translate} program demonstrates one of the few weaknesses -of standard @command{awk}: dealing with individual characters is very -painful, requiring repeated use of the @code{substr}, @code{index}, -and @code{gsub} built-in functions -(@pxref{String Functions, ,String Manipulation Functions}).@footnote{This -program was written before @command{gawk} acquired the ability to -split each character in a string into separate array elements.} -@c Exercise: How might you use this new feature to simplify the program? - -There are two functions. The first, @code{stranslate}, takes three -arguments: - -@table @code -@item from -A list of characters to translate from. - -@item to -A list of characters to translate to. - -@item target -The string to do the translation on. -@end table - -Associative arrays make the translation part fairly easy. @code{t_ar} holds -the ``to'' characters, indexed by the ``from'' characters. Then a simple -loop goes through @code{from}, one character at a time. For each character -in @code{from}, if the character appears in @code{target}, @code{gsub} -is used to change it to the corresponding @code{to} character. - -The @code{translate} function simply calls @code{stranslate} using @code{$0} -as the target. The main program sets two global variables, @code{FROM} and -@code{TO}, from the command line, and then changes @code{ARGV} so that -@command{awk} reads from the standard input. - -Finally, the processing rule simply calls @code{translate} for each record: - -@cindex @code{translate.awk} program -@example -@c file eg/prog/translate.awk -# translate.awk --- do tr-like stuff -@c endfile -@ignore -@c file eg/prog/translate.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# August 1989 - -@c endfile -@end ignore -@c file eg/prog/translate.awk -# Bugs: does not handle things like: tr A-Z a-z, it has -# to be spelled out. However, if `to' is shorter than `from', -# the last character in `to' is used for the rest of `from'. - -function stranslate(from, to, target, lf, lt, t_ar, i, c) -@{ - lf = length(from) - lt = length(to) - for (i = 1; i <= lt; i++) - t_ar[substr(from, i, 1)] = substr(to, i, 1) - if (lt < lf) - for (; i <= lf; i++) - t_ar[substr(from, i, 1)] = substr(to, lt, 1) - for (i = 1; i <= lf; i++) @{ - c = substr(from, i, 1) - if (index(target, c) > 0) - gsub(c, t_ar[c], target) - @} - return target -@} - -function translate(from, to) -@{ - return $0 = stranslate(from, to, $0) -@} - -# main program -BEGIN @{ -@group - if (ARGC < 3) @{ - print "usage: translate from to" > "/dev/stderr" - exit - @} -@end group - FROM = ARGV[1] - TO = ARGV[2] - ARGC = 2 - ARGV[1] = "-" -@} - -@{ - translate(FROM, TO) - print -@} -@c endfile -@end example - -While it is possible to do character transliteration in a user-level -function, it is not necessarily efficient, and we (the @command{gawk} -authors) started to consider adding a built-in function. However, -shortly after writing this program, we learned that the System V Release 4 -@command{awk} had added the @code{toupper} and @code{tolower} functions -(@pxref{String Functions, ,String Manipulation Functions}). -These functions handle the vast majority of the -cases where character transliteration is necessary, and so we chose to -simply add those functions to @command{gawk} as well and then leave well -enough alone. - -An obvious improvement to this program would be to set up the -@code{t_ar} array only once, in a @code{BEGIN} rule. However, this -assumes that the ``from'' and ``to'' lists -will never change throughout the lifetime of the program. - -@node Labels Program, Word Sorting, Translate Program, Miscellaneous Programs -@subsection Printing Mailing Labels - -Here is a ``real world''@footnote{``Real world'' is defined as -``a program actually used to get something done.''} -program. This -script reads lists of names and -addresses and generates mailing labels. Each page of labels has 20 labels -on it, two across and ten down. The addresses are guaranteed to be no more -than five lines of data. Each address is separated from the next by a blank -line. - -The basic idea is to read 20 labels worth of data. Each line of each label -is stored in the @code{line} array. The single rule takes care of filling -the @code{line} array and printing the page when 20 labels have been read. - -The @code{BEGIN} rule simply sets @code{RS} to the empty string, so that -@command{awk} splits records at blank lines -(@pxref{Records, ,How Input Is Split into Records}). -It sets @code{MAXLINES} to 100, since 100 is the maximum number -of lines on the page (20 * 5 = 100). - -Most of the work is done in the @code{printpage} function. -The label lines are stored sequentially in the @code{line} array. But they -have to print horizontally; @code{line[1]} next to @code{line[6]}, -@code{line[2]} next to @code{line[7]}, and so on. Two loops are used to -accomplish this. The outer loop, controlled by @code{i}, steps through -every 10 lines of data; this is each row of labels. The inner loop, -controlled by @code{j}, goes through the lines within the row. -As @code{j} goes from 0 to 4, @samp{i+j} is the @code{j}'th line in -the row, and @samp{i+j+5} is the entry next to it. The output ends up -looking something like this: - -@example -line 1 line 6 -line 2 line 7 -line 3 line 8 -line 4 line 9 -line 5 line 10 -@dots{} -@end example - -As a final note, an extra blank line is printed at lines 21 and 61, to keep -the output lined up on the labels. This is dependent on the particular -brand of labels in use when the program was written. You will also note -that there are two blank lines at the top and two blank lines at the bottom. - -The @code{END} rule arranges to flush the final page of labels; there may -not have been an even multiple of 20 labels in the data: - -@cindex @code{labels.awk} program -@example -@c file eg/prog/labels.awk -# labels.awk --- print mailing labels -@c endfile -@ignore -@c file eg/prog/labels.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# June 1992 -@c endfile -@end ignore -@c file eg/prog/labels.awk - -# Each label is 5 lines of data that may have blank lines. -# The label sheets have 2 blank lines at the top and 2 at -# the bottom. - -BEGIN @{ RS = "" ; MAXLINES = 100 @} - -function printpage( i, j) -@{ - if (Nlines <= 0) - return - - printf "\n\n" # header - - for (i = 1; i <= Nlines; i += 10) @{ - if (i == 21 || i == 61) - print "" - for (j = 0; j < 5; j++) @{ - if (i + j > MAXLINES) - break - printf " %-41s %s\n", line[i+j], line[i+j+5] - @} - print "" - @} - - printf "\n\n" # footer - - for (i in line) - line[i] = "" -@} - -# main rule -@{ - if (Count >= 20) @{ - printpage() - Count = 0 - Nlines = 0 - @} - n = split($0, a, "\n") - for (i = 1; i <= n; i++) - line[++Nlines] = a[i] - for (; i <= 5; i++) - line[++Nlines] = "" - Count++ -@} - -END \ -@{ - printpage() -@} -@c endfile -@end example - -@node Word Sorting, History Sorting, Labels Program, Miscellaneous Programs -@subsection Generating Word Usage Counts - -@c NEXT ED: Rewrite this whole section and example -The following @command{awk} program prints -the number of occurrences of each word in its input. It illustrates the -associative nature of @command{awk} arrays by using strings as subscripts. It -also demonstrates the @samp{for @var{index} in @var{array}} mechanism. -Finally, it shows how @command{awk} is used in conjunction with other -utility programs to do a useful task of some complexity with a minimum of -effort. Some explanations follow the program listing: - -@example -# Print list of word frequencies -@{ - for (i = 1; i <= NF; i++) - freq[$i]++ -@} - -END @{ - for (word in freq) - printf "%s\t%d\n", word, freq[word] -@} -@end example - -@c Exercise: Use asort() here - -This program has two rules. The -first rule, because it has an empty pattern, is executed for every input line. -It uses @command{awk}'s field-accessing mechanism -(@pxref{Fields, ,Examining Fields}) to pick out the individual words from -the line, and the built-in variable @code{NF} (@pxref{Built-in Variables}) -to know how many fields are available. -For each input word, it increments an element of the array @code{freq} to -reflect that the word has been seen an additional time. - -The second rule, because it has the pattern @code{END}, is not executed -until the input has been exhausted. It prints out the contents of the -@code{freq} table that has been built up inside the first action. -This program has several problems that would prevent it from being -useful by itself on real text files: - -@itemize @bullet -@item -Words are detected using the @command{awk} convention that fields are -separated just by whitespace. Other characters in the input (except -newlines) don't have any special meaning to @command{awk}. This means that -punctuation characters count as part of words. - -@item -The @command{awk} language considers upper- and lowercase characters to be -distinct. Therefore, ``bartender'' and ``Bartender'' are not treated -as the same word. This is undesirable, since in normal text, words -are capitalized if they begin sentences, and a frequency analyzer should not -be sensitive to capitalization. - -@item -The output does not come out in any useful order. You're more likely to be -interested in which words occur most frequently or in having an alphabetized -table of how frequently each word occurs. -@end itemize - -@cindex @command{sort} utility -The way to solve these problems is to use some of @command{awk}'s more advanced -features. First, we use @code{tolower} to remove -case distinctions. Next, we use @code{gsub} to remove punctuation -characters. Finally, we use the system @command{sort} utility to process the -output of the @command{awk} script. Here is the new version of -the program: - -@cindex @code{wordfreq.awk} program -@example -@c file eg/prog/wordfreq.awk -# wordfreq.awk --- print list of word frequencies - -@{ - $0 = tolower($0) # remove case distinctions - # remove punctuation - gsub(/[^[:alnum:]_[:blank:]]/, "", $0) - for (i = 1; i <= NF; i++) - freq[$i]++ -@} - -END @{ - for (word in freq) - printf "%s\t%d\n", word, freq[word] -@} -@c endfile -@end example - -Assuming we have saved this program in a file named @file{wordfreq.awk}, -and that the data is in @file{file1}, the following pipeline: - -@example -awk -f wordfreq.awk file1 | sort +1 -nr -@end example - -@noindent -produces a table of the words appearing in @file{file1} in order of -decreasing frequency. The @command{awk} program suitably massages the -data and produces a word frequency table, which is not ordered. - -The @command{awk} script's output is then sorted by the @command{sort} -utility and printed on the terminal. The options given to @command{sort} -specify a sort that uses the second field of each input line (skipping -one field), that the sort keys should be treated as numeric quantities -(otherwise @samp{15} would come before @samp{5}), and that the sorting -should be done in descending (reverse) order. - -The @command{sort} could even be done from within the program, by changing -the @code{END} action to: - -@example -@c file eg/prog/wordfreq.awk -END @{ - sort = "sort +1 -nr" - for (word in freq) - printf "%s\t%d\n", word, freq[word] | sort - close(sort) -@} -@c endfile -@end example - -This way of sorting must be used on systems that do not -have true pipes at the command-line (or batch-file) level. -See the general operating system documentation for more information on how -to use the @command{sort} program. - -@node History Sorting, Extract Program, Word Sorting, Miscellaneous Programs -@subsection Removing Duplicates from Unsorted Text - -The @command{uniq} program -(@pxref{Uniq Program, ,Printing Non-Duplicated Lines of Text}), -removes duplicate lines from @emph{sorted} data. - -Suppose, however, you need to remove duplicate lines from a @value{DF} but -that you want to preserve the order the lines are in. A good example of -this might be a shell history file. The history file keeps a copy of all -the commands you have entered, and it is not unusual to repeat a command -several times in a row. Occasionally you might want to compact the history -by removing duplicate entries. Yet it is desirable to maintain the order -of the original commands. - -This simple program does the job. It uses two arrays. The @code{data} -array is indexed by the text of each line. -For each line, @code{data[$0]} is incremented. -If a particular line has not -been seen before, then @code{data[$0]} is zero. -In this case, the text of the line is stored in @code{lines[count]}. -Each element of @code{lines} is a unique command, and the indices of -@code{lines} indicate the order in which those lines are encountered. -The @code{END} rule simply prints out the lines, in order: - -@cindex Rakitzis, Byron -@cindex @code{histsort.awk} program -@example -@c file eg/prog/histsort.awk -# histsort.awk --- compact a shell history file -# Thanks to Byron Rakitzis for the general idea -@c endfile -@ignore -@c file eg/prog/histsort.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 - -@c endfile -@end ignore -@c file eg/prog/histsort.awk -@group -@{ - if (data[$0]++ == 0) - lines[++count] = $0 -@} -@end group - -END @{ - for (i = 1; i <= count; i++) - print lines[i] -@} -@c endfile -@end example - -This program also provides a foundation for generating other useful -information. For example, using the following @code{print} statement in the -@code{END} rule indicates how often a particular command is used: - -@example -print data[lines[i]], lines[i] -@end example - -This works because @code{data[$0]} is incremented each time a line is -seen. - -@node Extract Program, Simple Sed, History Sorting, Miscellaneous Programs -@subsection Extracting Programs from Texinfo Source Files - -@ifnotinfo -Both this chapter and the previous chapter -(@ref{Library Functions, ,A Library of @command{awk} Functions}) -present a large number of @command{awk} programs. -@end ifnotinfo -@ifinfo -The nodes -@ref{Library Functions, ,A Library of @command{awk} Functions}, -and @ref{Sample Programs, ,Practical @command{awk} Programs}, -are the top level nodes for a large number of @command{awk} programs. -@end ifinfo -If you want to experiment with these programs, it is tedious to have to type -them in by hand. Here we present a program that can extract parts of a -Texinfo input file into separate files. - -@cindex Texinfo -This @value{DOCUMENT} is written in Texinfo, the GNU project's document -formatting -language. -A single Texinfo source file can be used to produce both -printed and online documentation. -@ifnotinfo -Texinfo is fully documented in the book -@cite{Texinfo---The GNU Documentation Format}, -available from the Free Software Foundation. -@end ifnotinfo -@ifinfo -The Texinfo language is described fully, starting with -@ref{Top}. -@end ifinfo - -For our purposes, it is enough to know three things about Texinfo input -files: - -@itemize @bullet -@item -The ``at'' symbol (@samp{@@}) is special in Texinfo, much as -the backslash (@samp{\}) is in C -or @command{awk}. Literal @samp{@@} symbols are represented in Texinfo source -files as @samp{@@@@}. - -@item -Comments start with either @samp{@@c} or @samp{@@comment}. -The file extraction program works by using special comments that start -at the beginning of a line. - -@item -Lines containing @samp{@@group} and @samp{@@end group} commands bracket -example text that should not be split across a page boundary. -(Unfortunately, @TeX{} isn't always smart enough to do things exactly right -and we have to give it some help.) -@end itemize - -The following program, @file{extract.awk}, reads through a Texinfo source -file and does two things, based on the special comments. -Upon seeing @samp{@w{@@c system @dots{}}}, -it runs a command, by extracting the command text from the -control line and passing it on to the @code{system} function -(@pxref{I/O Functions, ,Input/Output Functions}). -Upon seeing @samp{@@c file @var{filename}}, each subsequent line is sent to -the file @var{filename}, until @samp{@@c endfile} is encountered. -The rules in @file{extract.awk} match either @samp{@@c} or -@samp{@@comment} by letting the @samp{omment} part be optional. -Lines containing @samp{@@group} and @samp{@@end group} are simply removed. -@file{extract.awk} uses the @code{join} library function -(@pxref{Join Function, ,Merging an Array into a String}). - -The example programs in the online Texinfo source for @cite{@value{TITLE}} -(@file{gawk.texi}) have all been bracketed inside @samp{file} and -@samp{endfile} lines. The @command{gawk} distribution uses a copy of -@file{extract.awk} to extract the sample programs and install many -of them in a standard directory where @command{gawk} can find them. -The Texinfo file looks something like this: - -@example -@dots{} -This program has a @@code@{BEGIN@} rule, -that prints a nice message: - -@@example -@@c file examples/messages.awk -BEGIN @@@{ print "Don't panic!" @@@} -@@c end file -@@end example - -It also prints some final advice: - -@@example -@@c file examples/messages.awk -END @@@{ print "Always avoid bored archeologists!" @@@} -@@c end file -@@end example -@dots{} -@end example - -@file{extract.awk} begins by setting @code{IGNORECASE} to one, so that -mixed upper- and lowercase letters in the directives won't matter. - -The first rule handles calling @code{system}, checking that a command is -given (@code{NF} is at least three) and also checking that the command -exits with a zero exit status, signifying OK: - -@cindex @code{extract.awk} program -@example -@c file eg/prog/extract.awk -# extract.awk --- extract files and run programs -# from texinfo files -@c endfile -@ignore -@c file eg/prog/extract.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# May 1993 -# Revised September 2000 - -@c endfile -@end ignore -@c file eg/prog/extract.awk -BEGIN @{ IGNORECASE = 1 @} - -/^@@c(omment)?[ \t]+system/ \ -@{ - if (NF < 3) @{ - e = (FILENAME ":" FNR) - e = (e ": badly formed `system' line") - print e > "/dev/stderr" - next - @} - $1 = "" - $2 = "" - stat = system($0) - if (stat != 0) @{ - e = (FILENAME ":" FNR) - e = (e ": warning: system returned " stat) - print e > "/dev/stderr" - @} -@} -@c endfile -@end example - -@noindent -The variable @code{e} is used so that the function -fits nicely on the -@ifnotinfo -page. -@end ifnotinfo -@ifnottex -screen. -@end ifnottex - -The second rule handles moving data into files. It verifies that a -@value{FN} is given in the directive. If the file named is not the -current file, then the current file is closed. Keeping the current file -open until a new file is encountered allows the use of the @samp{>} -redirection for printing the contents, keeping open file management -simple. - -The @samp{for} loop does the work. It reads lines using @code{getline} -(@pxref{Getline, ,Explicit Input with @code{getline}}). -For an unexpected end of file, it calls the @code{@w{unexpected_eof}} -function. If the line is an ``endfile'' line, then it breaks out of -the loop. -If the line is an @samp{@@group} or @samp{@@end group} line, then it -ignores it and goes on to the next line. -Similarly, comments within examples are also ignored. - -Most of the work is in the following few lines. If the line has no @samp{@@} -symbols, the program can print it directly. -Otherwise, each leading @samp{@@} must be stripped off. -To remove the @samp{@@} symbols, the line is split into separate elements of -the array @code{a}, using the @code{split} function -(@pxref{String Functions, ,String Manipulation Functions}). -The @samp{@@} symbol is used as the separator character. -Each element of @code{a} that is empty indicates two successive @samp{@@} -symbols in the original line. For each two empty elements (@samp{@@@@} in -the original file), we have to add a single @samp{@@} symbol back in. - -When the processing of the array is finished, @code{join} is called with the -value of @code{SUBSEP}, to rejoin the pieces back into a single -line. That line is then printed to the output file: - -@example -@c file eg/prog/extract.awk -/^@@c(omment)?[ \t]+file/ \ -@{ - if (NF != 3) @{ - e = (FILENAME ":" FNR ": badly formed `file' line") - print e > "/dev/stderr" - next - @} - if ($3 != curfile) @{ - if (curfile != "") - close(curfile) - curfile = $3 - @} - - for (;;) @{ - if ((getline line) <= 0) - unexpected_eof() - if (line ~ /^@@c(omment)?[ \t]+endfile/) - break - else if (line ~ /^@@(end[ \t]+)?group/) - continue - else if (line ~ /^@@c(omment+)?[ \t]+/) - continue - if (index(line, "@@") == 0) @{ - print line > curfile - continue - @} - n = split(line, a, "@@") - # if a[1] == "", means leading @@, - # don't add one back in. - for (i = 2; i <= n; i++) @{ - if (a[i] == "") @{ # was an @@@@ - a[i] = "@@" - if (a[i+1] == "") - i++ - @} - @} - print join(a, 1, n, SUBSEP) > curfile - @} -@} -@c endfile -@end example - -An important thing to note is the use of the @samp{>} redirection. -Output done with @samp{>} only opens the file once; it stays open and -subsequent output is appended to the file -(@pxref{Redirection, , Redirecting Output of @code{print} and @code{printf}}). -This makes it easy to mix program text and explanatory prose for the same -sample source file (as has been done here!) without any hassle. The file is -only closed when a new data @value{FN} is encountered or at the end of the -input file. - -Finally, the function @code{@w{unexpected_eof}} prints an appropriate -error message and then exits. -The @code{END} rule handles the final cleanup, closing the open file: - -@c function lb put on same line for page breaking. sigh -@example -@c file eg/prog/extract.awk -@group -function unexpected_eof() @{ - printf("%s:%d: unexpected EOF or error\n", - FILENAME, FNR) > "/dev/stderr" - exit 1 -@} -@end group - -END @{ - if (curfile) - close(curfile) -@} -@c endfile -@end example - -@node Simple Sed, Igawk Program, Extract Program, Miscellaneous Programs -@subsection A Simple Stream Editor - -@cindex @command{sed} utility -@cindex stream editor -The @command{sed} utility is a ``stream editor,'' a program that reads a -stream of data, makes changes to it, and passes it on. -It is often used to make global changes to a large file or to a stream -of data generated by a pipeline of commands. -While @command{sed} is a complicated program in its own right, its most common -use is to perform global substitutions in the middle of a pipeline: - -@example -command1 < orig.data | sed 's/old/new/g' | command2 > result -@end example - -Here, @samp{s/old/new/g} tells @command{sed} to look for the regexp -@samp{old} on each input line and globally replace it with the text -@samp{new}, (i.e., all the occurrences on a line). This is similar to -@command{awk}'s @code{gsub} function -(@pxref{String Functions, ,String Manipulation Functions}). - -The following program, @file{awksed.awk}, accepts at least two command-line -arguments: the pattern to look for and the text to replace it with. Any -additional arguments are treated as data @value{FN}s to process. If none -are provided, the standard input is used: - -@cindex Brennan, Michael -@cindex @command{awksed.awk} program -@cindex simple stream editor -@cindex stream editor, simple -@example -@c file eg/prog/awksed.awk -# awksed.awk --- do s/foo/bar/g using just print -# Thanks to Michael Brennan for the idea -@c endfile -@ignore -@c file eg/prog/awksed.awk -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# August 1995 - -@c endfile -@end ignore -@c file eg/prog/awksed.awk -function usage() -@{ - print "usage: awksed pat repl [files...]" > "/dev/stderr" - exit 1 -@} - -BEGIN @{ - # validate arguments - if (ARGC < 3) - usage() - - RS = ARGV[1] - ORS = ARGV[2] - - # don't use arguments as files - ARGV[1] = ARGV[2] = "" -@} - -@group -# look ma, no hands! -@{ - if (RT == "") - printf "%s", $0 - else - print -@} -@end group -@c endfile -@end example - -The program relies on @command{gawk}'s ability to have @code{RS} be a regexp, -as well as on the setting of @code{RT} to the actual text that terminates the -record (@pxref{Records, ,How Input Is Split into Records}). - -The idea is to have @code{RS} be the pattern to look for. @command{gawk} -automatically sets @code{$0} to the text between matches of the pattern. -This is text that we want to keep, unmodified. Then, by setting @code{ORS} -to the replacement text, a simple @code{print} statement outputs the -text we want to keep, followed by the replacement text. - -There is one wrinkle to this scheme, which is what to do if the last record -doesn't end with text that matches @code{RS}. Using a @code{print} -statement unconditionally prints the replacement text, which is not correct. -However, if the file did not end in text that matches @code{RS}, @code{RT} -is set to the null string. In this case, we can print @code{$0} using -@code{printf} -(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}). - -The @code{BEGIN} rule handles the setup, checking for the right number -of arguments and calling @code{usage} if there is a problem. Then it sets -@code{RS} and @code{ORS} from the command-line arguments and sets -@code{ARGV[1]} and @code{ARGV[2]} to the null string, so that they are -not treated as @value{FN}s -(@pxref{ARGC and ARGV, , Using @code{ARGC} and @code{ARGV}}). - -The @code{usage} function prints an error message and exits. -Finally, the single rule handles the printing scheme outlined above, -using @code{print} or @code{printf} as appropriate, depending upon the -value of @code{RT}. - -@ignore -Exercise, compare the performance of this version with the more -straightforward: - -BEGIN { - pat = ARGV[1] - repl = ARGV[2] - ARGV[1] = ARGV[2] = "" -} - -{ gsub(pat, repl); print } - -Exercise: what are the advantages and disadvantages of this version vs. sed? - Advantage: egrep regexps - speed (?) - Disadvantage: no & in replacement text - -Others? -@end ignore - -@node Igawk Program, , Simple Sed, Miscellaneous Programs -@subsection An Easy Way to Use Library Functions - -Using library functions in @command{awk} can be very beneficial. It -encourages code reuse and the writing of general functions. Programs are -smaller and therefore clearer. -However, using library functions is only easy when writing @command{awk} -programs; it is painful when running them, requiring multiple @option{-f} -options. If @command{gawk} is unavailable, then so too is the @env{AWKPATH} -environment variable and the ability to put @command{awk} functions into a -library directory (@pxref{Options, ,Command-Line Options}). -It would be nice to be able to write programs in the following manner: - -@example -# library functions -@@include getopt.awk -@@include join.awk -@dots{} - -# main program -BEGIN @{ - while ((c = getopt(ARGC, ARGV, "a:b:cde")) != -1) - @dots{} - @dots{} -@} -@end example - -The following program, @file{igawk.sh}, provides this service. -It simulates @command{gawk}'s searching of the @env{AWKPATH} variable -and also allows @dfn{nested} includes; i.e., a file that is included -with @samp{@@include} can contain further @samp{@@include} statements. -@command{igawk} makes an effort to only include files once, so that nested -includes don't accidentally include a library function twice. - -@command{igawk} should behave just like @command{gawk} externally. This -means it should accept all of @command{gawk}'s command-line arguments, -including the ability to have multiple source files specified via -@option{-f}, and the ability to mix command-line and library source files. - -The program is written using the POSIX Shell (@command{sh}) command language. -The way the program works is as follows: - -@enumerate -@item -Loop through the arguments, saving anything that doesn't represent -@command{awk} source code for later, when the expanded program is run. - -@item -For any arguments that do represent @command{awk} text, put the arguments into -a temporary file that will be expanded. There are two cases: - -@enumerate a -@item -Literal text, provided with @option{--source} or @option{--source=}. This -text is just echoed directly. The @command{echo} program automatically -supplies a trailing newline. - -@item -Source @value{FN}s provided with @option{-f}. We use a neat trick and echo -@samp{@@include @var{filename}} into the temporary file. Since the file -inclusion program works the way @command{gawk} does, this gets the text -of the file included into the program at the correct point. -@end enumerate - -@item -Run an @command{awk} program (naturally) over the temporary file to expand -@samp{@@include} statements. The expanded program is placed in a second -temporary file. - -@item -Run the expanded program with @command{gawk} and any other original command-line -arguments that the user supplied (such as the data @value{FN}s). -@end enumerate - -The initial part of the program turns on shell tracing if the first -argument is @samp{debug}. Otherwise, a shell @code{trap} statement -arranges to clean up any temporary files on program exit or upon an -interrupt. - -@c 2e: For the temp file handling, go with Darrel's ig=${TMP:-/tmp}/igs.$$ -@c 2e: or something as similar as possible. - -The next part loops through all the command-line arguments. -There are several cases of interest: - -@table @code -@item -- -This ends the arguments to @command{igawk}. Anything else should be passed on -to the user's @command{awk} program without being evaluated. - -@item -W -This indicates that the next option is specific to @command{gawk}. To make -argument processing easier, the @option{-W} is appended to the front of the -remaining arguments and the loop continues. (This is an @command{sh} -programming trick. Don't worry about it if you are not familiar with -@command{sh}.) - -@item -v@r{,} -F -These are saved and passed on to @command{gawk}. - -@item -f@r{,} --file@r{,} --file=@r{,} -Wfile= -The @value{FN} is saved to the temporary file @file{/tmp/ig.s.$$} with an -@samp{@@include} statement. -The @command{sed} utility is used to remove the leading option part of the -argument (e.g., @samp{--file=}). - -@item --source@r{,} --source=@r{,} -Wsource= -The source text is echoed into @file{/tmp/ig.s.$$}. - -@item --version@r{,} -Wversion -@command{igawk} prints its version number, runs @samp{gawk --version} -to get the @command{gawk} version information, and then exits. -@end table - -If none of the @option{-f}, @option{--file}, @option{-Wfile}, @option{--source}, -or @option{-Wsource} arguments are supplied, then the first non-option argument -should be the @command{awk} program. If there are no command-line -arguments left, @command{igawk} prints an error message and exits. -Otherwise, the first argument is echoed into @file{/tmp/ig.s.$$}. -In any case, after the arguments have been processed, -@file{/tmp/ig.s.$$} contains the complete text of the original @command{awk} -program. - -@cindex @command{sed} utility -@cindex stream editor -The @samp{$$} in @command{sh} represents the current process ID number. -It is often used in shell programs to generate unique temporary @value{FN}s. -This allows multiple users to run @command{igawk} without worrying -that the temporary @value{FN}s will clash. -The program is as follows: - -@cindex @code{igawk.sh} program -@example -@c file eg/prog/igawk.sh -#! /bin/sh -# igawk --- like gawk but do @@include processing -@c endfile -@ignore -@c file eg/prog/igawk.sh -# -# Arnold Robbins, arnold@@gnu.org, Public Domain -# July 1993 - -@c endfile -@end ignore -@c file eg/prog/igawk.sh -if [ "$1" = debug ] -then - set -x - shift -else - # cleanup on exit, hangup, interrupt, quit, termination - trap 'rm -f /tmp/ig.[se].$$' 0 1 2 3 15 -fi - -while [ $# -ne 0 ] # loop over arguments -do - case $1 in - --) shift; break;; - - -W) shift - set -- -W"$@@" - continue;; - - -[vF]) opts="$opts $1 '$2'" - shift;; - - -[vF]*) opts="$opts '$1'" ;; - - -f) echo @@include "$2" >> /tmp/ig.s.$$ - shift;; - - -f*) f=`echo "$1" | sed 's/-f//'` - echo @@include "$f" >> /tmp/ig.s.$$ ;; - - -?file=*) # -Wfile or --file - f=`echo "$1" | sed 's/-.file=//'` - echo @@include "$f" >> /tmp/ig.s.$$ ;; - - -?file) # get arg, $2 - echo @@include "$2" >> /tmp/ig.s.$$ - shift;; - - -?source=*) # -Wsource or --source - t=`echo "$1" | sed 's/-.source=//'` - echo "$t" >> /tmp/ig.s.$$ ;; - - -?source) # get arg, $2 - echo "$2" >> /tmp/ig.s.$$ - shift;; - - -?version) - echo igawk: version 1.0 1>&2 - gawk --version - exit 0 ;; - - -[W-]*) opts="$opts '$1'" ;; - - *) break;; - esac - shift -done - -if [ ! -s /tmp/ig.s.$$ ] -then -@group - if [ -z "$1" ] - then - echo igawk: no program! 1>&2 - exit 1 -@end group - else - echo "$1" > /tmp/ig.s.$$ - shift - fi -fi - -# at this point, /tmp/ig.s.$$ has the program -@c endfile -@end example - -The @command{awk} program to process @samp{@@include} directives -reads through the program, one line at a time, using @code{getline} -(@pxref{Getline, ,Explicit Input with @code{getline}}). The input -@value{FN}s and @samp{@@include} statements are managed using a stack. -As each @samp{@@include} is encountered, the current @value{FN} is -``pushed'' onto the stack and the file named in the @samp{@@include} -directive becomes the current @value{FN}. As each file is finished, -the stack is ``popped,'' and the previous input file becomes the current -input file again. The process is started by making the original file -the first one on the stack. - -The @code{pathto} function does the work of finding the full path to -a file. It simulates @command{gawk}'s behavior when searching the -@env{AWKPATH} environment variable -(@pxref{AWKPATH Variable, ,The @env{AWKPATH} Environment Variable}). -If a @value{FN} has a @samp{/} in it, no path search is done. Otherwise, -the @value{FN} is concatenated with the name of each directory in -the path, and an attempt is made to open the generated @value{FN}. -The only way to test if a file can be read in @command{awk} is to go -ahead and try to read it with @code{getline}; this is what @code{pathto} -does.@footnote{On some very old versions of @command{awk}, the test -@samp{getline junk < t} can loop forever if the file exists but is empty. -Caveat emptor.} If the file can be read, it is closed and the @value{FN} -is returned: - -@ignore -An alternative way to test for the file's existence would be to call -@samp{system("test -r " t)}, which uses the @command{test} utility to -see if the file exists and is readable. The disadvantage to this method -is that it requires creating an extra process and can thus be slightly -slower. -@end ignore - -@example -@c file eg/prog/igawk.sh -gawk -- ' -# process @@include directives - -function pathto(file, i, t, junk) -@{ - if (index(file, "/") != 0) - return file - - for (i = 1; i <= ndirs; i++) @{ - t = (pathlist[i] "/" file) -@group - if ((getline junk < t) > 0) @{ - # found it - close(t) - return t - @} -@end group - @} - return "" -@} -@c endfile -@end example - -The main program is contained inside one @code{BEGIN} rule. The first thing it -does is set up the @code{pathlist} array that @code{pathto} uses. After -splitting the path on @samp{:}, null elements are replaced with @code{"."}, -which represents the current directory: - -@example -@c file eg/prog/igawk.sh -BEGIN @{ - path = ENVIRON["AWKPATH"] - ndirs = split(path, pathlist, ":") - for (i = 1; i <= ndirs; i++) @{ - if (pathlist[i] == "") - pathlist[i] = "." - @} -@c endfile -@end example - -The stack is initialized with @code{ARGV[1]}, which will be @file{/tmp/ig.s.$$}. -The main loop comes next. Input lines are read in succession. Lines that -do not start with @samp{@@include} are printed verbatim. -If the line does start with @samp{@@include}, the @value{FN} is in @code{$2}. -@code{pathto} is called to generate the full path. If it cannot, then we -print an error message and continue. - -The next thing to check is if the file is included already. The -@code{processed} array is indexed by the full @value{FN} of each included -file and it tracks this information for us. If the file is -seen again, a warning message is printed. Otherwise, the new @value{FN} is -pushed onto the stack and processing continues. - -Finally, when @code{getline} encounters the end of the input file, the file -is closed and the stack is popped. When @code{stackptr} is less than zero, -the program is done: - -@example -@c file eg/prog/igawk.sh - stackptr = 0 - input[stackptr] = ARGV[1] # ARGV[1] is first file - - for (; stackptr >= 0; stackptr--) @{ - while ((getline < input[stackptr]) > 0) @{ - if (tolower($1) != "@@include") @{ - print - continue - @} - fpath = pathto($2) -@group - if (fpath == "") @{ - printf("igawk:%s:%d: cannot find %s\n", - input[stackptr], FNR, $2) > "/dev/stderr" - continue - @} -@end group - if (! (fpath in processed)) @{ - processed[fpath] = input[stackptr] - input[++stackptr] = fpath # push onto stack - @} else - print $2, "included in", input[stackptr], - "already included in", - processed[fpath] > "/dev/stderr" - @} - close(input[stackptr]) - @} -@}' /tmp/ig.s.$$ > /tmp/ig.e.$$ -@c endfile -@end example - -The last step is to call @command{gawk} with the expanded program, -along with the original -options and command-line arguments that the user supplied. @command{gawk}'s -exit status is passed back on to @command{igawk}'s calling program: - -@c this causes more problems than it solves, so leave it out. -@ignore -The special file @file{/dev/null} is passed as a @value{DF} to @command{gawk} -to handle an interesting case. Suppose that the user's program only has -a @code{BEGIN} rule and there are no @value{DF}s to read. -The program should exit without reading any @value{DF}s. -However, suppose that an included library file defines an @code{END} -rule of its own. In this case, @command{gawk} will hang, reading standard -input. In order to avoid this, @file{/dev/null} is explicitly added to the -command-line. Reading from @file{/dev/null} always returns an immediate -end of file indication. - -@c Hmm. Add /dev/null if $# is 0? Still messes up ARGV. Sigh. -@end ignore - -@example -@c file eg/prog/igawk.sh -eval gawk -f /tmp/ig.e.$$ $opts -- "$@@" - -exit $? -@c endfile -@end example - -This version of @command{igawk} represents my third attempt at this program. -There are three key simplifications that make the program work better: - -@itemize @bullet -@item -Using @samp{@@include} even for the files named with @option{-f} makes building -the initial collected @command{awk} program much simpler; all the -@samp{@@include} processing can be done once. - -@item -The @code{pathto} function doesn't try to save the line read with -@code{getline} when testing for the file's accessibility. Trying to save -this line for use with the main program complicates things considerably. -@c what problem does this engender though - exercise -@c answer, reading from "-" or /dev/stdin - -@item -Using a @code{getline} loop in the @code{BEGIN} rule does it all in one -place. It is not necessary to call out to a separate loop for processing -nested @samp{@@include} statements. -@end itemize - -Also, this program illustrates that it is often worthwhile to combine -@command{sh} and @command{awk} programming together. You can usually -accomplish quite a lot, without having to resort to low-level programming -in C or C++, and it is frequently easier to do certain kinds of string -and argument manipulation using the shell than it is in @command{awk}. - -Finally, @command{igawk} shows that it is not always necessary to add new -features to a program; they can often be layered on top. With @command{igawk}, -there is no real reason to build @samp{@@include} processing into -@command{gawk} itself. - -@cindex search path -@cindex directory search -@cindex path, search -@cindex search path, for source files -As an additional example of this, consider the idea of having two -files in a directory in the search path: - -@table @file -@item default.awk -This file contains a set of default library functions, such -as @code{getopt} and @code{assert}. - -@item site.awk -This file contains library functions that are specific to a site or -installation; i.e., locally developed functions. -Having a separate file allows @file{default.awk} to change with -new @command{gawk} releases, without requiring the system administrator to -update it each time by adding the local functions. -@end table - -One user -@c Karl Berry, karl@ileaf.com, 10/95 -suggested that @command{gawk} be modified to automatically read these files -upon startup. Instead, it would be very simple to modify @command{igawk} -to do this. Since @command{igawk} can process nested @samp{@@include} -directives, @file{default.awk} could simply contain @samp{@@include} -statements for the desired library functions. - -@c Exercise: make this change - -@ignore -@c Try this -@iftex -@page -@headings off -@majorheading III@ @ @ Appendixes -Part III provides the appendixes, the Glossary, and two licenses that cover -the @command{gawk} source code and this @value{DOCUMENT}, respectively. -It contains the following appendixes: - -@itemize @bullet -@item -@ref{Language History, ,The Evolution of the @command{awk} Language}. - -@item -@ref{Installation, ,Installing @command{gawk}}. - -@item -@ref{Notes, ,Implementation Notes}. - -@item -@ref{Basic Concepts, ,Basic Programming Concepts}. - -@item -@ref{Glossary}. - -@item -@ref{Copying, ,GNU General Public License}. - -@item -@ref{GNU Free Documentation License}. -@end itemize - -@page -@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @| -@oddheading @| @| @strong{@thischapter}@ @ @ @thispage -@end iftex -@end ignore - -@node Language History, Installation, Sample Programs, Top -@appendix The Evolution of the @command{awk} Language - -This @value{DOCUMENT} describes the GNU implementation of @command{awk}, which follows -the POSIX specification. -Many long-time @command{awk} users learned @command{awk} programming -with the original @command{awk} implementation in Version 7 Unix. -(This implementation was the basis for @command{awk} in Berkeley Unix, -through 4.3--Reno. Subsequent versions of Berkeley Unix, and systems -derived from 4.4BSD--Lite, use various versions of @command{gawk} -for their @command{awk}.) -This @value{CHAPTER} briefly describes the -evolution of the @command{awk} language, with cross references to other parts -of the @value{DOCUMENT} where you can find more information. - -@menu -* V7/SVR3.1:: The major changes between V7 and System V - Release 3.1. -* SVR4:: Minor changes between System V Releases 3.1 - and 4. -* POSIX:: New features from the POSIX standard. -* BTL:: New features from the Bell Laboratories - version of @command{awk}. -* POSIX/GNU:: The extensions in @command{gawk} not in POSIX - @command{awk}. -* Contributors:: The major contributors to @command{gawk}. -@end menu - -@node V7/SVR3.1, SVR4, Language History, Language History -@appendixsec Major Changes Between V7 and SVR3.1 - -The @command{awk} language evolved considerably between the release of -Version 7 Unix (1978) and the new version that was first made generally available in -System V Release 3.1 (1987). This @value{SECTION} summarizes the changes, with -cross-references to further details: - -@itemize @bullet -@item -The requirement for @samp{;} to separate rules on a line -(@pxref{Statements/Lines, ,@command{awk} Statements Versus Lines}). - -@item -User-defined functions and the @code{return} statement -(@pxref{User-defined, ,User-Defined Functions}). - -@item -The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}). - -@item -The @code{do}-@code{while} statement -(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}). - -@item -The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand}, and -@code{srand} (@pxref{Numeric Functions}). - -@item -The built-in functions @code{gsub}, @code{sub}, and @code{match} -(@pxref{String Functions, ,String Manipulation Functions}). - -@item -The built-in functions @code{close} and @code{system} -(@pxref{I/O Functions, ,Input/Output Functions}). - -@item -The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART}, -and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}). - -@item -The conditional expression using the ternary operator @samp{?:} -(@pxref{Conditional Exp, ,Conditional Expressions}). - -@item -The exponentiation operator @samp{^} -(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator -form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}). - -@item -C-compatible operator precedence, which breaks some old @command{awk} -programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}). - -@item -Regexps as the value of @code{FS} -(@pxref{Field Separators, ,Specifying How Fields Are Separated}) and as the -third argument to the @code{split} function -(@pxref{String Functions, ,String Manipulation Functions}). - -@item -Dynamic regexps as operands of the @samp{~} and @samp{!~} operators -(@pxref{Regexp Usage, ,How to Use Regular Expressions}). - -@item -The escape sequences @samp{\b}, @samp{\f}, and @samp{\r} -(@pxref{Escape Sequences}). -(Some vendors have updated their old versions of @command{awk} to -recognize @samp{\b}, @samp{\f}, and @samp{\r}, but this is not -something you can rely on.) - -@item -Redirection of input for the @code{getline} function -(@pxref{Getline, ,Explicit Input with @code{getline}}). - -@item -Multiple @code{BEGIN} and @code{END} rules -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). - -@item -Multidimensional arrays -(@pxref{Multi-dimensional, ,Multidimensional Arrays}). -@end itemize - -@node SVR4, POSIX, V7/SVR3.1, Language History -@appendixsec Changes Between SVR3.1 and SVR4 - -@cindex @command{awk} language, V.4 version -The System V Release 4 (1989) version of Unix @command{awk} added these features -(some of which originated in @command{gawk}): - -@itemize @bullet -@item -The @code{ENVIRON} variable (@pxref{Built-in Variables}). -@c gawk and MKS awk - -@item -Multiple @option{-f} options on the command line -(@pxref{Options, ,Command-Line Options}). -@c MKS awk - -@item -The @option{-v} option for assigning variables before program execution begins -(@pxref{Options, ,Command-Line Options}). -@c GNU, Bell Laboratories & MKS together - -@item -The @option{--} option for terminating command-line options. - -@item -The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences -(@pxref{Escape Sequences}). -@c GNU, for ANSI C compat - -@item -A defined return value for the @code{srand} built-in function -(@pxref{Numeric Functions}). - -@item -The @code{toupper} and @code{tolower} built-in string functions -for case translation -(@pxref{String Functions, ,String Manipulation Functions}). - -@item -A cleaner specification for the @samp{%c} format-control letter in the -@code{printf} function -(@pxref{Control Letters, ,Format-Control Letters}). - -@item -The ability to dynamically pass the field width and precision (@code{"%*.*d"}) -in the argument list of the @code{printf} function -(@pxref{Control Letters, ,Format-Control Letters}). - -@item -The use of regexp constants, such as @code{/foo/}, as expressions, where -they are equivalent to using the matching operator, as in @samp{$0 ~ /foo/} -(@pxref{Using Constant Regexps, ,Using Regular Expression Constants}). - -@item -Processing of escape sequences inside command-line variable assignments -(@pxref{Assignment Options, ,Assigning Variables on the Command Line}). -@end itemize - -@node POSIX, BTL, SVR4, Language History -@appendixsec Changes Between SVR4 and POSIX @command{awk} - -The POSIX Command Language and Utilities standard for @command{awk} (1992) -introduced the following changes into the language: - -@itemize @bullet -@item -The use of @option{-W} for implementation-specific options -(@pxref{Options, ,Command-Line Options}). - -@item -The use of @code{CONVFMT} for controlling the conversion of numbers -to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}). - -@item -The concept of a numeric string and tighter comparison rules to go -with it (@pxref{Typing and Comparison, ,Variable Typing and Comparison Expressions}). - -@item -More complete documentation of many of the previously undocumented -features of the language. -@end itemize - -The following common extensions are not permitted by the POSIX -standard: - -@c IMPORTANT! Keep this list in sync with the one in node Options - -@itemize @bullet -@item -@code{\x} escape sequences are not recognized -(@pxref{Escape Sequences}). - -@item -Newlines do not act as whitespace to separate fields when @code{FS} is -equal to a single space -(@pxref{Fields, ,Examining Fields}). - -@item -Newlines are not allowed after @samp{?} or @samp{:} -(@pxref{Conditional Exp, ,Conditional Expressions}). - -@item -The synonym @code{func} for the keyword @code{function} is not -recognized (@pxref{Definition Syntax, ,Function Definition Syntax}). - -@item -The operators @samp{**} and @samp{**=} cannot be used in -place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators}, -and @ref{Assignment Ops, ,Assignment Expressions}). - -@item -Specifying @samp{-Ft} on the command line does not set the value -of @code{FS} to be a single tab character -(@pxref{Field Separators, ,Specifying How Fields Are Separated}). - -@item -The @code{fflush} built-in function is not supported -(@pxref{I/O Functions, ,Input/Output Functions}). -@end itemize - -@node BTL, POSIX/GNU, POSIX, Language History -@appendixsec Extensions in the Bell Laboratories @command{awk} - -@cindex extensions, Bell Laboratories @command{awk} -@cindex Kernighan, Brian -Brian Kernighan, one of the original designers of Unix @command{awk}, -has made his version available via his home page -(@pxref{Other Versions, ,Other Freely Available @command{awk} Implementations}). -This @value{SECTION} describes extensions in his version of @command{awk} that are -not in POSIX @command{awk}. - -@itemize @bullet -@item -The @samp{-mf @var{N}} and @samp{-mr @var{N}} command-line options -to set the maximum number of fields and the maximum -record size, respectively -(@pxref{Options, ,Command-Line Options}). -As a side note, his @command{awk} no longer needs these options; -it continues to accept them to avoid breaking old programs. - -@item -The @code{fflush} built-in function for flushing buffered output -(@pxref{I/O Functions, ,Input/Output Functions}). - -@item -The @samp{**} and @samp{**=} operators -(@pxref{Arithmetic Ops, ,Arithmetic Operators} -and -@ref{Assignment Ops, ,Assignment Expressions}). - -@item -The use of @code{func} as an abbreviation for @code{function} -(@pxref{Definition Syntax, ,Function Definition Syntax}). - -@ignore -@item -The @code{SYMTAB} array, that allows access to @command{awk}'s internal symbol -table. This feature is not documented, largely because -it is somewhat shakily implemented. For instance, you cannot access arrays -or array elements through it. -@end ignore -@end itemize - -The Bell Laboratories @command{awk} also incorporates the following extensions, -originally developed for @command{gawk}: - -@itemize @bullet -@item -The @samp{\x} escape sequence -(@pxref{Escape Sequences}). - -@item -The @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr} -special files -(@pxref{Special Files, ,Special @value{FFN}s in @command{gawk}}). - -@item -The ability for @code{FS} and for the third -argument to @code{split} to be null strings -(@pxref{Single Character Fields, , Making Each Character a Separate Field}). - -@item -The @code{nextfile} statement -(@pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}). - -@item -The ability to delete all of an array at once with @samp{delete @var{array}} -(@pxref{Delete, ,The @code{delete} Statement}). -@end itemize - -@node POSIX/GNU, Contributors, BTL, Language History -@appendixsec Extensions in @command{gawk} Not in POSIX @command{awk} - -@ignore -I've tried to follow this general order, esp. for the 3.0 and 3.1 sections: - variables - special files - language changes (e.g., hex constants) - differences in standard awk functions - new gawk functions - new keywords - new command-line options - new ports -Within each category, be alphabetical. -@end ignore - -@cindex compatibility mode -The GNU implementation, @command{gawk}, adds a large number of features. -This @value{SECTION} lists them in the order they were added to @command{gawk}. -They can all be disabled with either the @option{--traditional} or -@option{--posix} options -(@pxref{Options, ,Command-Line Options}). - -Version 2.10 of @command{gawk} introduced the following features: - -@itemize @bullet -@item -The @env{AWKPATH} environment variable for specifying a path search for -the @option{-f} command-line option -(@pxref{Options, ,Command-Line Options}). - -@item -The @code{IGNORECASE} variable and its effects -(@pxref{Case-sensitivity, ,Case Sensitivity in Matching}). - -@item -The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr} and -@file{/dev/fd/@var{N}} special @value{FN}s -(@pxref{Special Files, ,Special @value{FFN}s in @command{gawk}}). -@end itemize - -Version 2.13 of @command{gawk} introduced the following features: - -@itemize @bullet -@item -The @code{FIELDWIDTHS} variable and its effects -(@pxref{Constant Size, ,Reading Fixed-Width Data}). - -@item -The @code{systime} and @code{strftime} built-in functions for obtaining -and printing timestamps -(@pxref{Time Functions, ,Using @command{gawk}'s Timestamp Functions}). - -@item -The @option{-W lint} option to provide error and portability checking -for both the source code and at runtime -(@pxref{Options, ,Command-Line Options}). - -@item -The @option{-W compat} option to turn off the GNU extensions -(@pxref{Options, ,Command-Line Options}). - -@item -The @option{-W posix} option for full POSIX compliance -(@pxref{Options, ,Command-Line Options}). -@end itemize - -Version 2.14 of @command{gawk} introduced the following feature: - -@itemize @bullet -@item -The @code{next file} statement for skipping to the next @value{DF} -(@pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}). -@end itemize - -Version 2.15 of @command{gawk} introduced the following features: - -@itemize @bullet -@item -The @code{ARGIND} variable, which tracks the movement of @code{FILENAME} -through @code{ARGV} (@pxref{Built-in Variables}). - -@item -The @code{ERRNO} variable, which contains the system error message when -@code{getline} returns @minus{}1 or when @code{close} fails -(@pxref{Built-in Variables}). - -@item -The @file{/dev/pid}, @file{/dev/ppid}, @file{/dev/pgrpid}, and -@file{/dev/user} @value{FN} interpretation -(@pxref{Special Files, ,Special @value{FFN}s in @command{gawk}}). - -@item -The ability to delete all of an array at once with @samp{delete @var{array}} -(@pxref{Delete, ,The @code{delete} Statement}). - -@item -The ability to use GNU-style long-named options that start with @option{--} -(@pxref{Options, ,Command-Line Options}). - -@item -The @option{--source} option for mixing command-line and library -file source code -(@pxref{Options, ,Command-Line Options}). -@end itemize - -Version 3.0 of @command{gawk} introduced the following features: - -@itemize @bullet -@item -@code{IGNORECASE} changed, now applying to string comparison as well -as regexp operations -(@pxref{Case-sensitivity, ,Case Sensitivity in Matching}). - -@item -The @code{RT} variable that contains the input text that -matched @code{RS} -(@pxref{Records, ,How Input Is Split into Records}). - -@item -Full support for both POSIX and GNU regexps -(@pxref{Regexp, , Regular Expressions}). - -@item -The @code{gensub} function for more powerful text manipulation -(@pxref{String Functions, ,String Manipulation Functions}). - -@item -The @code{strftime} function acquired a default time format, -allowing it to be called with no arguments -(@pxref{Time Functions, ,Using @command{gawk}'s Timestamp Functions}). - -@item -The ability for @code{FS} and for the third -argument to @code{split} to be null strings -(@pxref{Single Character Fields, , Making Each Character a Separate Field}). - -@item -The ability for @code{RS} to be a regexp -(@pxref{Records, ,How Input Is Split into Records}). - -@item -The @code{next file} statement became @code{nextfile} -(@pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}). - -@item -The @option{--lint-old} option to -warn about constructs that are not available in -the original Version 7 Unix version of @command{awk} -(@pxref{V7/SVR3.1, ,Major Changes Between V7 and SVR3.1}). - -@item -The @option{-m} option and the @code{fflush} function from the -Bell Laboratories research version of @command{awk} -(@pxref{Options, ,Command-Line Options}; also -@pxref{I/O Functions, ,Input/Output Functions}). - -@item -The @option{--re-interval} option to provide interval expressions in regexps -(@pxref{Regexp Operators, , Regular Expression Operators}). - -@item -The @option{--traditional} option was added as a better name for -@option{--compat} (@pxref{Options, ,Command-Line Options}). - -@item -The use of GNU Autoconf to control the configuration process -(@pxref{Quick Installation, , Compiling @command{gawk} for Unix}). - -@item -Amiga support -(@pxref{Amiga Installation, ,Installing @command{gawk} on an Amiga}). - -@end itemize - -Version 3.1 of @command{gawk} introduced the following features: - -@itemize @bullet -@item -The @code{BINMODE} special variable for non-POSIX systems, -which allows binary I/O for input and/or output files -(@pxref{PC Using, ,Using @command{gawk} on PC Operating Systems}). - -@item -The @code{LINT} special variable, which dynamically controls lint warnings -(@pxref{Built-in Variables}). - -@item -The @code{PROCINFO} array for providing process-related information -(@pxref{Built-in Variables}). - -@item -The @code{TEXTDOMAIN} special variable for setting an application's -internationalization text domain -(@pxref{Built-in Variables}, -and -@ref{Internationalization, ,Internationalization with @command{gawk}}). - -@item -The ability to use octal and hexadecimal constants in @command{awk} -program source code -(@pxref{Non-decimal-numbers, ,Octal and Hexadecimal Numbers}). - -@item -The @samp{|&} operator for two-way I/O to a coprocess -(@pxref{Two-way I/O, ,Two-Way Communications with Another Process}). - -@item -The @file{/inet} special files for TCP/IP networking using @samp{|&} -(@pxref{TCP/IP Networking, , Using @command{gawk} for Network Programming}). - -@item -The optional second argument to @code{close} that allows closing one end -of a two-way pipe to a coprocess -(@pxref{Two-way I/O, ,Two-Way Communications with Another Process}). - -@item -The optional third argument to the @code{match} function -for capturing text-matching subexpressions within a regexp -(@pxref{String Functions, , String Manipulation Functions}). - -@item -Positional specifiers in @code{printf} formats for -making translations easier -(@pxref{Printf Ordering, , Rearranging @code{printf} Arguments}). - -@item -The @code{asort} function for sorting arrays -(@pxref{Array Sorting, ,Sorting Array Values and Indices with @command{gawk}}). - -@item -The @code{bindtextdomain} and @code{dcgettext} functions -for internationalization -(@pxref{Programmer i18n, ,Internationalizing @command{awk} Programs}). - -@item -The @code{extension} built-in function and the ability to add -new built-in functions dynamically -(@pxref{Dynamic Extensions, , Adding New Built-in Functions to @command{gawk}}). - -@item -The @code{mktime} built-in function for creating timestamps -(@pxref{Time Functions, ,Using @command{gawk}'s Timestamp Functions}). - -@item -The -@code{and}, -@code{or}, -@code{xor}, -@code{compl}, -@code{lshift}, -@code{rshift}, -and -@code{strtonum} built-in -functions -(@pxref{Bitwise Functions, ,Using @command{gawk}'s Bit Manipulation Functions}). - -@item -@cindex @code{next file} statement -The support for @samp{next file} as two words was removed completely -(@pxref{Nextfile Statement, ,Using @command{gawk}'s @code{nextfile} Statement}). - -@item -The @option{--dump-variables} option to print a list of all global variables -(@pxref{Options, ,Command-Line Options}). - -@item -The @option{--gen-po} command-line option and the use of a leading -underscore to mark strings that should be translated -(@pxref{String Extraction, ,Extracting Marked Strings}). - -@item -The @option{--non-decimal-data} option to allow non-decimal -input data -(@pxref{Non-decimal Data, ,Allowing Non-Decimal Input Data}). - -@item -The @option{--profile} option and @command{pgawk}, the -profiling version of @command{gawk}, for producing execution -profiles of @command{awk} programs -(@pxref{Profiling, ,Profiling Your @command{awk} Programs}). - -@item -The @option{--enable-portals} configuration option to enable special treatment of -pathnames that begin with @file{/p} as BSD portals -(@pxref{Portal Files, , Using @command{gawk} with BSD Portals}). - -@item -The use of GNU Automake to help in standardizing the configuration process -(@pxref{Quick Installation, , Compiling @command{gawk} for Unix}). - -@item -The use of GNU @code{gettext} for @command{gawk}'s own message output -(@pxref{Gawk I18N, ,@command{gawk} Can Speak Your Language}). - -@item -BeOS support -(@pxref{BeOS Installation, , Installing @command{gawk} on BeOS}). - -@item -Tandem support -(@pxref{Tandem Installation, ,Installing @command{gawk} on a Tandem}). - -@item -The Atari port became officially unsupported -(@pxref{Atari Installation, ,Installing @command{gawk} on the Atari ST}). - -@item -The source code now uses new-style function definitions, with -@command{ansi2knr} to convert the code on systems with old compilers. - -@end itemize - -@c XXX ADD MORE STUFF HERE - -@node Contributors, , POSIX/GNU, Language History -@appendixsec Major Contributors to @command{gawk} -@cindex contributors to @command{gawk} -@quotation -@i{Always give credit where credit is due.}@* -Anonymous -@end quotation - -This @value{SECTION} names the major contributors to @command{gawk} -and/or this @value{DOCUMENT}, in approximate chronological order: - -@itemize @bullet -@item -@cindex Aho, Alfred -@cindex Weinberger, Peter -@cindex Kernighan, Brian -Dr.@: Alfred V.@: Aho, -Dr.@: Peter J.@: Weinberger, and -Dr.@: Brian W.@: Kernighan, all of Bell Laboratories, -designed and implemented Unix @command{awk}, -from which @command{gawk} gets the majority of its feature set. - -@item -@cindex Rubin, Paul -Paul Rubin -did the initial design and implementation in 1986, and wrote -the first draft (around 40 pages) of this @value{DOCUMENT}. - -@item -@cindex Fenlason, Jay -Jay Fenlason -finished the initial implementation. - -@item -@cindex Close, Diane -Diane Close -revised the first draft of this @value{DOCUMENT}, bringing it -to around 90 pages. - -@item -@cindex Stallman, Richard -Richard Stallman -helped finish the implementation and the initial draft of this -@value{DOCUMENT}. -He is also the founder of the FSF and the GNU project. - -@item -@cindex Woods, John -John Woods -contributed parts of the code (mostly fixes) in -the initial version of @command{gawk}. - -@item -@cindex Trueman, David -In 1988, -David Trueman -took over primary maintenance of @command{gawk}, -making it compatible with ``new'' @command{awk}, and -greatly improving its performance. - -@item -@cindex Rankin, Pat -Pat Rankin -provided the VMS port and its documentation. - -@item -@cindex Kwok, Conrad -@cindex Garfinkle, Scott -@cindex Williams, Kent -Conrad Kwok, -Scott Garfinkle, -and -Kent Williams -did the initial ports to MS-DOS with various versions of MSC. - -@item -@cindex Peterson, Hal -Hal Peterson -provided help in porting @command{gawk} to Cray systems. - -@item -@cindex Rommel, Kai Uwe -Kai Uwe Rommel -provided the port to OS/2 and its documentation. - -@item -@cindex Jaegermann, Michal -Michal Jaegermann -provided the port to Atari systems and its documentation. -He continues to provide portability checking with DEC Alpha -systems, and has done a lot of work to make sure @command{gawk} -works on non-32-bit systems. - -@item -@cindex Fish, Fred -Fred Fish -provided the port to Amiga systems and its documentation. - -@item -@cindex Deifik, Scott -Scott Deifik -currently maintains the MS-DOS port. - -@item -@cindex Grigera, Juan -Juan Grigera -maintains the port to Win32 systems. - -@item -@cindex Hankerson, Darrel -Dr.@: Darrel Hankerson -acts as coordinator for the various ports to different PC platforms -and creates binary distributions for various PC operating systems. -He is also instrumental in keeping the documentation up to date for -the various PC platforms. - -@item -@cindex Zoulas, Christos -Christos Zoulas -provided the @code{extension} -built-in function for dynamically adding new modules. - -@item -@cindex Kahrs, J@"urgen -J@"urgen Kahrs -contributed the initial version of the TCP/IP networking -code and documentation, and motivated the inclusion of the @samp{|&} operator. - -@item -@cindex Davies, Stephen -Stephen Davies -provided the port to Tandem systems and its documentation. - -@item -@cindex Brown, Martin -Martin Brown -provided the port to BeOS and its documentation. - -@item -@cindex Peters, Arno -Arno Peters -did the initial work to convert @command{gawk} to use -GNU Automake and @code{gettext}. - -@item -@cindex Broder, Alan J.@: -Alan J.@: Broder -provided the initial version of the @code{asort} function -as well as the code for the new optional third argument to the @code{match} function. - -@item -@cindex Robbins, Arnold -Arnold Robbins -has been working on @command{gawk} since 1988, at first -helping David Trueman, and as the primary maintainer since around 1994. -@end itemize - -@node Installation, Notes, Language History, Top -@appendix Installing @command{gawk} - -@cindex Linux -@cindex GNU/Linux -This appendix provides instructions for installing @command{gawk} on the -various platforms that are supported by the developers. The primary -developer supports GNU/Linux (and Unix), whereas the other ports are -contributed. -@xref{Bugs, , Reporting Problems and Bugs}, -for the electronic mail addresses of the people who did -the respective ports. - -@menu -* Gawk Distribution:: What is in the @command{gawk} distribution. -* Unix Installation:: Installing @command{gawk} under various - versions of Unix. -* Non-Unix Installation:: Installation on Other Operating Systems. -* Unsupported:: Systems whose ports are no longer supported. -* Bugs:: Reporting Problems and Bugs. -* Other Versions:: Other freely available @command{awk} - implementations. -@end menu - -@node Gawk Distribution, Unix Installation, Installation, Installation -@appendixsec The @command{gawk} Distribution - -This @value{SECTION} describes how to get the @command{gawk} -distribution, how to extract it, and then what is in the various files and -subdirectories. - -@menu -* Getting:: How to get the distribution. -* Extracting:: How to extract the distribution. -* Distribution contents:: What is in the distribution. -@end menu - -@node Getting, Extracting, Gawk Distribution, Gawk Distribution -@appendixsubsec Getting the @command{gawk} Distribution -@cindex getting @command{gawk} -@cindex anonymous @command{ftp} -@cindex @command{ftp}, anonymous -@cindex source code, @command{gawk} -@cindex @command{gawk}, source code -There are three ways to get GNU software: - -@itemize @bullet -@item -Copy it from someone else who already has it. - -@cindex FSF -@cindex Free Software Foundation -@item -Order @command{gawk} directly from the Free Software Foundation. -Software distributions are available for Unix, MS-DOS, and VMS, on -tape and CD-ROM. Their address is: - -@display -Free Software Foundation -59 Temple Place, Suite 330 -Boston, MA 02111-1307 USA -Phone: +1-617-542-5942 -Fax (including Japan): +1-617-542-2652 -Email: @email{gnu@@gnu.org} -URL: @uref{http://www.gnu.org/} -@end display - -@noindent -Ordering from the FSF directly contributes to the support of the foundation -and to the production of more free software. - -@item -Retrieve @command{gawk} by using anonymous @command{ftp} to the Internet host -@code{gnudist.gnu.org}, in the directory @file{/gnu/gawk}. -@end itemize - -The GNU software archive is mirrored around the world. -The up-to-date list of mirror sites is available from -@uref{http://www.gnu.org/order/ftp.html, the main FSF web site}. -Try to use one of the mirrors; they -will be less busy, and you can usually find one closer to your site. - -@node Extracting, Distribution contents, Getting, Gawk Distribution -@appendixsubsec Extracting the Distribution -@command{gawk} is distributed as a @code{tar} file compressed with the -GNU Zip program, @code{gzip}. - -Once you have the distribution (for example, -@file{gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz}), -use @code{gzip} to expand the -file and then use @code{tar} to extract it. You can use the following -pipeline to produce the @command{gawk} distribution: - -@example -# Under System V, add 'o' to the tar options -gzip -d -c gawk-@value{VERSION}.@value{PATCHLEVEL}.tar.gz | tar -xvpf - -@end example - -@noindent -This creates a directory named @file{gawk-@value{VERSION}.@value{PATCHLEVEL}} -in the current directory. - -The distribution @value{FN} is of the form -@file{gawk-@var{V}.@var{R}.@var{P}.tar.gz}. -The @var{V} represents the major version of @command{gawk}, -the @var{R} represents the current release of version @var{V}, and -the @var{P} represents a @dfn{patch level}, meaning that minor bugs have -been fixed in the release. The current patch level is @value{PATCHLEVEL}, -but when retrieving distributions, you should get the version with the highest -version, release, and patch level. (Note, however, that patch levels greater than -or equal to 80 denote ``beta'' or non-production software; you might not want -to retrieve such a version unless you don't mind experimenting.) -If you are not on a Unix system, you need to make other arrangements -for getting and extracting the @command{gawk} distribution. You should consult -a local expert. - -@node Distribution contents, , Extracting, Gawk Distribution -@appendixsubsec Contents of the @command{gawk} Distribution - -The @command{gawk} distribution has a number of C source files, -documentation files, -subdirectories, and files related to the configuration process -(@pxref{Unix Installation, ,Compiling and Installing @command{gawk} on Unix}), -as well as several subdirectories related to different non-Unix -operating systems: - -@table @asis -@item Various @samp{.c}, @samp{.y}, and @samp{.h} files: -These files are the actual @command{gawk} source code. -@end table - -@table @file -@item README -@itemx README_d/README.* -Descriptive files: @file{README} for @command{gawk} under Unix and the -rest for the various hardware and software combinations. - -@item INSTALL -A file providing an overview of the configuration and installation process. - -@item ChangeLog -A detailed list of source code changes as bugs are fixed or improvements made. - -@item NEWS -A list of changes to @command{gawk} since the last release or patch. - -@item COPYING -The GNU General Public License. - -@item FUTURES -A brief list of features and changes being contemplated for future -releases, with some indication of the time frame for the feature, based -on its difficulty. - -@item LIMITATIONS -A list of those factors that limit @command{gawk}'s performance. -Most of these depend on the hardware or operating system software, and -are not limits in @command{gawk} itself. - -@item POSIX.STD -A description of one area where the POSIX standard for @command{awk} is -incorrect as well as how @command{gawk} handles the problem. - -@cindex artificial intelligence, using @command{gawk} -@cindex AI programming, using @command{gawk} -@item doc/awkforai.txt -A short article describing why @command{gawk} is a good language for -AI (Artificial Intelligence) programming. - -@item doc/README.card -@itemx doc/ad.block -@itemx doc/awkcard.in -@itemx doc/cardfonts -@itemx doc/colors -@itemx doc/macros -@itemx doc/no.colors -@itemx doc/setter.outline -The @command{troff} source for a five-color @command{awk} reference card. -A modern version of @command{troff} such as GNU @command{troff} (@command{groff}) is -needed to produce the color version. See the file @file{README.card} -for instructions if you have an older @command{troff}. - -@item doc/gawk.1 -The @command{troff} source for a manual page describing @command{gawk}. -This is distributed for the convenience of Unix users. - -@cindex Texinfo -@item doc/gawk.texi -The Texinfo source file for this @value{DOCUMENT}. -It should be processed with @TeX{} to produce a printed document, and -with @command{makeinfo} to produce an Info or HTML file. - -@item doc/gawk.info -The generated Info file for this @value{DOCUMENT}. - -@item doc/gawkinet.texi -The Texinfo source file for -@ifinfo -@xref{Top}. -@end ifinfo -@ifnotinfo -@cite{TCP/IP Internetworking with @command{gawk}}. -@end ifnotinfo -It should be processed with @TeX{} to produce a printed document and -with @command{makeinfo} to produce an Info or HTML file. - -@item doc/gawkinet.info -The generated Info file for -@cite{TCP/IP Internetworking with @command{gawk}}. - -@item doc/igawk.1 -The @command{troff} source for a manual page describing the @command{igawk} -program presented in -@ref{Igawk Program, ,An Easy Way to Use Library Functions}. - -@item doc/Makefile.in -The input file used during the configuration process to generate the -actual @file{Makefile} for creating the documentation. - -@item Makefile.am -@itemx */Makefile.am -Files used by the GNU @command{automake} software for generating -the @file{Makefile.in} files used by @command{autoconf} and -@command{configure}. - -@item Makefile.in -@itemx acconfig.h -@itemx acinclude.m4 -@itemx aclocal.m4 -@itemx configh.in -@itemx configure.in -@itemx configure -@itemx custom.h -@itemx missing_d/* -@itemx m4/* -These files and subdirectories are used when configuring @command{gawk} -for various Unix systems. They are explained in -@ref{Unix Installation, ,Compiling and Installing @command{gawk} on Unix}. - -@item intl/* -@itemx po/* -The @file{intl} directory provides the GNU @code{gettext} library, which implements -@command{gawk}'s internationalization features, while the @file{po} library -contains message translations. - -@item awklib/extract.awk -@itemx awklib/Makefile.am -@itemx awklib/Makefile.in -@itemx awklib/eg/* -The @file{awklib} directory contains a copy of @file{extract.awk} -(@pxref{Extract Program, ,Extracting Programs from Texinfo Source Files}), -which can be used to extract the sample programs from the Texinfo -source file for this @value{DOCUMENT}. It also contains a @file{Makefile.in} file, which -@command{configure} uses to generate a @file{Makefile}. -@file{Makefile.am} is used by GNU Automake to create @file{Makefile.in}. -The library functions from -@ref{Library Functions, , A Library of @command{awk} Functions}, -and the @command{igawk} program from -@ref{Igawk Program, , An Easy Way to Use Library Functions}, -are included as ready-to-use files in the @command{gawk} distribution. -They are installed as part of the installation process. -The rest of the programs in this @value{DOCUMENT} are available in appropriate -subdirectories of @file{awklib/eg}. - -@item unsupported/atari/* -Files needed for building @command{gawk} on an Atari ST -(@pxref{Atari Installation, ,Installing @command{gawk} on the Atari ST}, for details). - -@item unsupported/tandem/* -Files needed for building @command{gawk} on a Tandem -(@pxref{Tandem Installation, ,Installing @command{gawk} on a Tandem}, for details). - -@item posix/* -Files needed for building @command{gawk} on POSIX-compliant systems. - -@item pc/* -Files needed for building @command{gawk} under MS-DOS, MS Windows and OS/2 -(@pxref{PC Installation, ,Installation on PC Operating Systems}, for details). - -@item vms/* -Files needed for building @command{gawk} under VMS -(@pxref{VMS Installation, ,How to Compile and Install @command{gawk} on VMS}, for details). - -@item test/* -A test suite for -@command{gawk}. You can use @samp{make check} from the top-level @command{gawk} -directory to run your version of @command{gawk} against the test suite. -If @command{gawk} successfully passes @samp{make check}, then you can -be confident of a successful port. -@end table - -@node Unix Installation, Non-Unix Installation, Gawk Distribution, Installation -@appendixsec Compiling and Installing @command{gawk} on Unix - -Usually, you can compile and install @command{gawk} by typing only two -commands. However, if you use an unusual system, you may need -to configure @command{gawk} for your system yourself. - -@menu -* Quick Installation:: Compiling @command{gawk} under Unix. -* Additional Configuration Options:: Other compile-time options. -* Configuration Philosophy:: How it's all supposed to work. -@end menu - -@node Quick Installation, Additional Configuration Options, Unix Installation, Unix Installation -@appendixsubsec Compiling @command{gawk} for Unix - -@cindex installation, unix -After you have extracted the @command{gawk} distribution, @command{cd} -to @file{gawk-@value{VERSION}.@value{PATCHLEVEL}}. Like most GNU software, -@command{gawk} is configured -automatically for your Unix system by running the @command{configure} program. -This program is a Bourne shell script that is generated automatically using -GNU @command{autoconf}. -@ifnotinfo -(The @command{autoconf} software is -described fully in -@cite{Autoconf---Generating Automatic Configuration Scripts}, -which is available from the Free Software Foundation.) -@end ifnotinfo -@ifinfo -(The @command{autoconf} software is described fully starting with -@ref{Top}.) -@end ifinfo - -To configure @command{gawk}, simply run @command{configure}: - -@example -sh ./configure -@end example - -This produces a @file{Makefile} and @file{config.h} tailored to your system. -The @file{config.h} file describes various facts about your system. -You might want to edit the @file{Makefile} to -change the @code{CFLAGS} variable, which controls -the command-line options that are passed to the C compiler (such as -optimization levels or compiling for debugging). - -Alternatively, you can add your own values for most @command{make} -variables on the command line, such as @code{CC} and @code{CFLAGS}, when -running @command{configure}: - -@example -CC=cc CFLAGS=-g sh ./configure -@end example - -@noindent -See the file @file{INSTALL} in the @command{gawk} distribution for -all the details. - -After you have run @command{configure} and possibly edited the @file{Makefile}, -type: - -@example -make -@end example - -@noindent -Shortly thereafter, you should have an executable version of @command{gawk}. -That's all there is to it! -To verify that @command{gawk} is working properly, -run @samp{make check}. All of the tests should succeed. -If these steps do not work, or if any of the tests fail, -check the files in the @file{README_d} directory to see if you've -found a known problem. If the failure is not described there, -please send in a bug report -(@pxref{Bugs, ,Reporting Problems and Bugs}.) - -@node Additional Configuration Options, Configuration Philosophy, Quick Installation, Unix Installation -@appendixsubsec Additional Configuration Options - -There are several additional options you may use on the @command{configure} -command line when compiling @command{gawk} from scratch. - -@table @code -@cindex @code{--enable-portals} configuration option -@cindex configuration option, @code{--enable-portals} -@item --enable-portals -This option causes @command{gawk} to treat pathnames that begin -with @file{/p} as BSD portal files when doing two-way I/O with -the @samp{|&} operator -(@pxref{Portal Files, , Using @command{gawk} with BSD Portals}). - -@cindex Linux -@cindex GNU/Linux -@cindex @code{--with-included-gettext} configuration option -@cindex configuration option, @code{--with-included-gettext} -@item --with-included-gettext -Use the version of the @code{gettext} library that comes with @command{gawk}. -This option should be used on systems that do @emph{not} use @value{PVERSION} 2 (or later) -of the GNU C library. -All known modern GNU/Linux systems use Glibc 2. Use this option on any other system. - -@cindex @code{--disable-nls} configuration option -@cindex configuration option, @code{--disable-nls} -@item --disable-nls -Disable all message translation facilities. -This is usually not desirable, but it may bring you some slight performance -improvement. -You should also use this option if @option{--with-included-gettext} -doesn't work on your system. -@end table - -@node Configuration Philosophy, , Additional Configuration Options, Unix Installation -@appendixsubsec The Configuration Process - -@cindex configuring @command{gawk} -This @value{SECTION} is of interest only if you know something about using the -C language and the Unix operating system. - -The source code for @command{gawk} generally attempts to adhere to formal -standards wherever possible. This means that @command{gawk} uses library -routines that are specified by the ISO C standard and by the POSIX -operating system interface standard. When using an ISO C compiler, -function prototypes are used to help improve the compile-time checking. - -Many Unix systems do not support all of either the ISO or the -POSIX standards. The @file{missing_d} subdirectory in the @command{gawk} -distribution contains replacement versions of those functions that are -most likely to be missing. - -The @file{config.h} file that @command{configure} creates contains -definitions that describe features of the particular operating system -where you are attempting to compile @command{gawk}. The three things -described by this file are: what header files are available, so that -they can be correctly included, what (supposedly) standard functions -are actually available in your C libraries, and various miscellaneous -facts about your variant of Unix. For example, there may not be an -@code{st_blksize} element in the @code{stat} structure. In this case, -@samp{HAVE_ST_BLKSIZE} is undefined. - -@cindex @code{custom.h} configuration file -It is possible for your C compiler to lie to @command{configure}. It may -do so by not exiting with an error when a library function is not -available. To get around this, edit the file @file{custom.h}. -Use an @samp{#ifdef} that is appropriate for your system, and either -@code{#define} any constants that @command{configure} should have defined but -didn't, or @code{#undef} any constants that @command{configure} defined and -should not have. @file{custom.h} is automatically included by -@file{config.h}. - -It is also possible that the @command{configure} program generated by -@command{autoconf} will not work on your system in some other fashion. -If you do have a problem, the file @file{configure.in} is the input for -@command{autoconf}. You may be able to change this file and generate a -new version of @command{configure} that works on your system -(@pxref{Bugs, ,Reporting Problems and Bugs}, -for information on how to report problems in configuring @command{gawk}). -The same mechanism may be used to send in updates to @file{configure.in} -and/or @file{custom.h}. - -@node Non-Unix Installation, Unsupported, Unix Installation, Installation -@appendixsec Installation on Other Operating Systems - -This @value{SECTION} describes how to install @command{gawk} on -various non-Unix systems. - -@menu -* Amiga Installation:: Installing @command{gawk} on an Amiga. -* BeOS Installation:: Installing @command{gawk} on BeOS. -* PC Installation:: Installing and Compiling @command{gawk} on - MS-DOS and OS/2. -* VMS Installation:: Installing @command{gawk} on VMS. -@end menu - -@node Amiga Installation, BeOS Installation, Non-Unix Installation, Non-Unix Installation -@appendixsubsec Installing @command{gawk} on an Amiga - -@cindex amiga -@cindex installation, amiga -You can install @command{gawk} on an Amiga system using a Unix emulation -environment, available via anonymous @command{ftp} from -@code{ftp.ninemoons.com} in the directory @file{pub/ade/current}. -This includes a shell based on @command{pdksh}. The primary component of -this environment is a Unix emulation library, @file{ixemul.lib}. -@c could really use more background here, who wrote this, etc. - -A more complete distribution for the Amiga is available on -the Geek Gadgets CD-ROM, available from: - -@display -CRONUS -1840 E. Warner Road #105-265 -Tempe, AZ 85284 USA -US Toll Free: (800) 804-0833 -Phone: +1-602-491-0442 -FAX: +1-602-491-0048 -Email: @email{info@@ninemoons.com} -WWW: @uref{http://www.ninemoons.com} -Anonymous @command{ftp} site: @code{ftp.ninemoons.com} -@end display - -Once you have the distribution, you can configure @command{gawk} simply by -running @command{configure}: - -@example -configure -v m68k-amigaos -@end example - -Then run @command{make} and you should be all set! -If these steps do not work, please send in a bug report -(@pxref{Bugs, ,Reporting Problems and Bugs}). - -@node BeOS Installation, PC Installation, Amiga Installation, Non-Unix Installation -@appendixsubsec Installing @command{gawk} on BeOS -@cindex BeOS -@cindex installation, beos - -@c From email contributed by Martin Brown, mc@whoever.com -Since BeOS DR9, all the tools that you should need to build @code{gawk} are -included with BeOS. The process is basically identical to the Unix process -of running @command{configure} and then @command{make}. Full instructions are given below. - -You can compile @command{gawk} under BeOS by extracting the standard sources -and running @command{configure}. You @emph{must} specify the location -prefix for the installation directory. For BeOS DR9 and beyond, the best directory to -use is @file{/boot/home/config}, so the @command{configure} command is: - -@example -configure --prefix=/boot/home/config -@end example - -This installs the compiled application into @file{/boot/home/config/bin}, -which is already specified in the standard @env{PATH}. - -Once the configuration process is completed, you can run @command{make}, -and then @samp{make install}: - -@example -$ make -@dots{} -$ make install -@end example - -BeOS uses @command{bash} as its shell; thus, you use @command{gawk} the same way you would -under Unix. -If these steps do not work, please send in a bug report -(@pxref{Bugs, ,Reporting Problems and Bugs}). - -@c Rewritten by Scott Deifik -@c and Darrel Hankerson - -@node PC Installation, VMS Installation, BeOS Installation, Non-Unix Installation -@appendixsubsec Installation on PC Operating Systems - -@cindex installation, pc operating systems -This @value{SECTION} covers installation and usage of @command{gawk} on x86 machines -running DOS, any version of Windows, or OS/2. -In this @value{SECTION}, the term ``Win32'' -refers to any of Windows-95/98/ME/NT/2000. - -The limitations of DOS (and DOS shells under Windows or OS/2) has meant -that various ``DOS extenders'' are often used with programs such as -@command{gawk}. The varying capabilities of Microsoft Windows 3.1 -and Win32 can add to the confusion. For an overview of the -considerations, please refer to @file{README_d/README.pc} in the -distribution. - -@menu -* PC Binary Installation:: Installing a prepared distribution. -* PC Compiling:: Compiling @command{gawk} for MS-DOS, Win32, - and OS/2. -* PC Using:: Running @command{gawk} on MS-DOS, Win32 and - OS/2. -@end menu - -@node PC Binary Installation, PC Compiling, PC Installation, PC Installation -@appendixsubsubsec Installing a Prepared Distribution for PC Systems - -If you have received a binary distribution prepared by the DOS -maintainers, then @command{gawk} and the necessary support files appear -under the @file{gnu} directory, with executables in @file{gnu/bin}, -libraries in @file{gnu/lib/awk}, and manual pages under @file{gnu/man}. -This is designed for easy installation to a @file{/gnu} directory on your -drive---however, the files can be installed anywhere provided @env{AWKPATH} is -set properly. Regardless of the installation directory, the first line of -@file{igawk.cmd} and @file{igawk.bat} (in @file{gnu/bin}) may need to be -edited. - -The binary distribution contains a separate file describing the -contents. In particular, it may include more than one version of the -@command{gawk} executable. OS/2 binary distributions may have a -different arrangement, but installation is similar. - -@node PC Compiling, PC Using, PC Binary Installation, PC Installation -@appendixsubsubsec Compiling @command{gawk} for PC Operating Systems - -@command{gawk} can be compiled for MS-DOS, Win32, and OS/2 using the GNU -development tools from DJ Delorie (DJGPP; MS-DOS only) or Eberhard -Mattes (EMX; MS-DOS, Win32 and OS/2). Microsoft Visual C/C++ can be used -to build a Win32 version, and Microsoft C/C++ can be -used to build 16-bit versions for MS-DOS and OS/2. The file -@file{README_d/README.pc} in the @command{gawk} distribution contains -additional notes, and @file{pc/Makefile} contains important information on -compilation options. - -To build @command{gawk}, copy the files in the @file{pc} directory -(@emph{except} for @file{ChangeLog}) to the directory with the rest of -the @command{gawk} sources. The @file{Makefile} contains a configuration -section with comments and may need to be edited in order to work with -your @command{make} utility. - -The @file{Makefile} contains a number of targets for building various MS-DOS, -Win32, and OS/2 versions. A list of targets is printed if the @command{make} -command is given without a target. As an example, to build @command{gawk} -using the DJGPP tools, enter @samp{make djgpp}. - -Using @command{make} to run the standard tests and to install @command{gawk} -requires additional Unix-like tools, including @command{sh}, @command{sed}, and -@command{cp}. In order to run the tests, the @file{test/*.ok} files may need to -be converted so that they have the usual DOS-style end-of-line markers. Most -of the tests work properly with Stewartson's shell along with the -companion utilities or appropriate GNU utilities. However, some editing of -@file{test/Makefile} is required. It is recommended that you copy the file -@file{pc/Makefile.tst} over the file @file{test/Makefile} as a -replacement. Details can be found in @file{README_d/README.pc} -and in the file @file{pc/Makefile.tst}. - -@node PC Using, , PC Compiling, PC Installation -@appendixsubsubsec Using @command{gawk} on PC Operating Systems - -@cindex search path -@cindex directory search -@cindex path, search -@cindex search path, for source files -The OS/2 and MS-DOS versions of @command{gawk} search for program files as -described in @ref{AWKPATH Variable, ,The @env{AWKPATH} Environment Variable}. -However, semicolons (rather than colons) separate elements -in the @env{AWKPATH} variable. If @env{AWKPATH} is not set or is empty, -then the default search path is @code{@w{".;c:/lib/awk;c:/gnu/lib/awk"}}. - -An @command{sh}-like shell (as opposed to @command{command.com} under MS-DOS -or @command{cmd.exe} under OS/2) may be useful for @command{awk} programming. -Ian Stewartson has written an excellent shell for MS-DOS and OS/2, -Daisuke Aoyama has ported GNU @command{bash} to MS-DOS using the DJGPP tools, -and several shells are available for OS/2, including @command{ksh}. The file -@file{README_d/README.pc} in the @command{gawk} distribution contains -information on these shells. Users of Stewartson's shell on DOS should -examine its documentation for handling command lines; in particular, -the setting for @command{gawk} in the shell configuration may need to be -changed and the @code{ignoretype} option may also be of interest. - -@cindex @code{BINMODE} variable -Under OS/2 and DOS, @command{gawk} (and many other text programs) silently -translate end-of-line @code{"\r\n"} to @code{"\n"} on input and @code{"\n"} -to @code{"\r\n"} on output. A special @code{BINMODE} variable allows -control over these translations and is interpreted as follows. - -@itemize @bullet -@item -If @code{BINMODE} is @samp{"r"}, or -@code{(BINMODE & 1)} is nonzero, then -binary mode is set on read (i.e., no translations on reads). - -@item -If @code{BINMODE} is @code{"w"}, or -@code{(BINMODE & 2)} is nonzero, then -binary mode is set on write (i.e., no translations on writes). - -@item -If @code{BINMODE} is @code{"rw"} or @code{"wr"}, -binary mode is set for both read and write -(same as @code{(BINMODE & 3)}). - -@item -@code{BINMODE=@var{non-null-string}} is -the same as @samp{BINMODE=3} (i.e., no translations on -reads or writes). However, @command{gawk} issues a warning -message if the string is not one of @code{"rw"} or @code{"wr"}. -@end itemize - -@noindent -The modes for standard input and standard output are set one time -only (after the -command line is read, but before processing any of the @command{awk} program). -Setting @code{BINMODE} for standard input or -standard output is accomplished by using an -appropriate @samp{-v BINMODE=@var{N}} option on the command line. -@code{BINMODE} is set at the time a file or pipe is opened and cannot be -changed mid-stream. - -The name @code{BINMODE} was chosen to match @command{mawk} -(@pxref{Other Versions, , Other Freely Available @command{awk} Implementations}). -Both @command{mawk} and @command{gawk} handle @code{BINMODE} similarly; however, -@command{mawk} adds a @samp{-W BINMODE=@var{N}} option and an environment -variable that can set @code{BINMODE}, @code{RS}, and @code{ORS}. The -files @file{binmode[1-3].awk} (under @file{gnu/lib/awk} in some of the -prepared distributions) have been chosen to match @command{mawk}'s @samp{-W -BINMODE=@var{N}} option. These can be changed or discarded; in particular, -the setting of @code{RS} giving the fewest ``surprises'' is open to debate. -@command{mawk} uses @samp{RS = "\r\n"} if binary mode is set on read, which is -appropriate for files with the DOS-style end-of-line. - -To Illustrate, the following examples set binary mode on writes for standard -output and other files, and set @code{ORS} as the ``usual'' DOS-style -end-of-line: - -@example -gawk -v BINMODE=2 -v ORS="\r\n" @dots{} -@end example - -@noindent -or: - -@example -gawk -v BINMODE=w -f binmode2.awk @dots{} -@end example - -@noindent -These give the same result as the @samp{-W BINMODE=2} option in -@command{mawk}. -The following changes the record separator to @code{"\r\n"} and sets binary -mode on reads, but does not affect the mode on standard input: - -@example -gawk -v RS="\r\n" --source "BEGIN @{ BINMODE = 1 @}" @dots{} -@end example - -@noindent -or: - -@example -gawk -f binmode1.awk @dots{} -@end example - -@noindent -With proper quoting, in the first example the setting of @code{RS} can be -moved into the @code{BEGIN} rule. - -@node VMS Installation, , PC Installation, Non-Unix Installation -@appendixsubsec How to Compile and Install @command{gawk} on VMS - -@c based on material from Pat Rankin - -@cindex installation, vms -This @value{SUBSECTION} describes how to compile and install @command{gawk} under VMS. - -@menu -* VMS Compilation:: How to compile @command{gawk} under VMS. -* VMS Installation Details:: How to install @command{gawk} under VMS. -* VMS Running:: How to run @command{gawk} under VMS. -* VMS POSIX:: Alternate instructions for VMS POSIX. -@end menu - -@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation -@appendixsubsubsec Compiling @command{gawk} on VMS - -To compile @command{gawk} under VMS, there is a @code{DCL} command procedure that -issues all the necessary @code{CC} and @code{LINK} commands. There is -also a @file{Makefile} for use with the @code{MMS} utility. From the source -directory, use either: - -@example -$ @@[.VMS]VMSBUILD.COM -@end example - -@noindent -or: - -@example -$ MMS/DESCRIPTION=[.VMS]DESCRIP.MMS GAWK -@end example - -Depending upon which C compiler you are using, follow one of the sets -of instructions in this table: - -@table @asis -@item VAX C V3.x -Use either @file{vmsbuild.com} or @file{descrip.mms} as is. These use -@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0. - -@item VAX C V2.x -You must have Version 2.3 or 2.4; older ones won't work. Edit either -@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them. -For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters. -Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h}) -and comment out or delete the two lines @samp{#define __STDC__ 0} and -@samp{#define VAXC_BUILTINS} near the end. - -@item GNU C -Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different -from those for VAX C V2.x but equally straightforward. No changes to -@file{config.h} are needed. - -@item DEC C -Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments. -No changes to @file{config.h} are needed. -@end table - -@command{gawk} has been tested under VAX/VMS 5.5-1 using VAX C V3.2, and -GNU C 1.40 and 2.3. It should work without modifications for VMS V4.6 and up. - -@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation -@appendixsubsubsec Installing @command{gawk} on VMS - -To install @command{gawk}, all you need is a ``foreign'' command, which is -a @code{DCL} symbol whose value begins with a dollar sign. For example: - -@example -$ GAWK :== $disk1:[gnubin]GAWK -@end example - -@noindent -Substitute the actual location of @command{gawk.exe} for -@samp{$disk1:[gnubin]}. The symbol should be placed in the -@file{login.com} of any user who wants to run @command{gawk}, -so that it is defined every time the user logs on. -Alternatively, the symbol may be placed in the system-wide -@file{sylogin.com} procedure, which allows all users -to run @command{gawk}. - -Optionally, the help entry can be loaded into a VMS help library: - -@example -$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP -@end example - -@noindent -(You may want to substitute a site-specific help library rather than -the standard VMS library @samp{HELPLIB}.) After loading the help text, -the command: - -@example -$ HELP GAWK -@end example - -@noindent -provides information about both the @command{gawk} implementation and the -@command{awk} programming language. - -The logical name @samp{AWK_LIBRARY} can designate a default location -for @command{awk} program files. For the @option{-f} option, if the specified -@value{FN} has no device or directory path information in it, @command{gawk} -looks in the current directory first, then in the directory specified -by the translation of @samp{AWK_LIBRARY} if the file is not found. -If, after searching in both directories, the file still is not found, -@command{gawk} appends the suffix @samp{.awk} to the filename and retries -the file search. If @samp{AWK_LIBRARY} is not defined, that -portion of the file search fails benignly. - -@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation -@appendixsubsubsec Running @command{gawk} on VMS - -Command-line parsing and quoting conventions are significantly different -on VMS, so examples in this @value{DOCUMENT} or from other sources often need minor -changes. They @emph{are} minor though, and all @command{awk} programs -should run correctly. - -Here are a couple of trivial tests: - -@example -$ gawk -- "BEGIN @{print ""Hello, World!""@}" -$ gawk -"W" version -! could also be -"W version" or "-W version" -@end example - -@noindent -Note that uppercase and mixed-case text must be quoted. - -The VMS port of @command{gawk} includes a @code{DCL}-style interface in addition -to the original shell-style interface (see the help entry for details). -One side effect of dual command-line parsing is that if there is only a -single parameter (as in the quoted string program above), the command -becomes ambiguous. To work around this, the normally optional @option{--} -flag is required to force Unix style rather than @code{DCL} parsing. If any -other dash-type options (or multiple parameters such as @value{DF}s to -process) are present, there is no ambiguity and @option{--} can be omitted. - -@cindex search path -@cindex directory search -@cindex path, search -@cindex search path, for source files -The default search path, when looking for @command{awk} program files specified -by the @option{-f} option, is @code{"SYS$DISK:[],AWK_LIBRARY:"}. The logical -name @samp{AWKPATH} can be used to override this default. The format -of @samp{AWKPATH} is a comma-separated list of directory specifications. -When defining it, the value should be quoted so that it retains a single -translation and not a multitranslation @code{RMS} searchlist. - -@node VMS POSIX, , VMS Running, VMS Installation -@appendixsubsubsec Building and Using @command{gawk} on VMS POSIX - -Ignore the instructions above, although @file{vms/gawk.hlp} should still -be made available in a help library. The source tree should be unpacked -into a container file subsystem rather than into the ordinary VMS filesystem. -Make sure that the two scripts, @file{configure} and -@file{vms/posix-cc.sh}, are executable; use @samp{chmod +x} on them if -necessary. Then execute the following two commands: - -@example -psx> CC=vms/posix-cc.sh configure -psx> make CC=c89 gawk -@end example - -@noindent -The first command constructs files @file{config.h} and @file{Makefile} out -of templates, using a script to make the C compiler fit @command{configure}'s -expectations. The second command compiles and links @command{gawk} using -the C compiler directly; ignore any warnings from @command{make} about being -unable to redefine @code{CC}. @command{configure} takes a very long -time to execute, but at least it provides incremental feedback as it runs. - -This has been tested with VAX/VMS V6.2, VMS POSIX V2.0, and DEC C V5.2. - -Once built, @command{gawk} works like any other shell utility. Unlike -the normal VMS port of @command{gawk}, no special command-line manipulation is -needed in the VMS POSIX environment. - -@node Unsupported, Bugs, Non-Unix Installation, Installation -@appendixsec Unsupported Operating System Ports - -This sections describes systems for which -the @command{gawk} port is no longer supported. - -@menu -* Atari Installation:: Installing @command{gawk} on the Atari ST. -* Tandem Installation:: Installing @command{gawk} on a Tandem. -@end menu - -@node Atari Installation, Tandem Installation, Unsupported, Unsupported -@appendixsubsec Installing @command{gawk} on the Atari ST - -The Atari port is no longer supported. It is -included for those who might want to use it but it is no longer being -actively maintained. - -@c based on material from Michal Jaegermann -@cindex atari -@cindex installation, atari -There are no substantial differences when installing @command{gawk} on -various Atari models. Compiled @command{gawk} executables do not require -a large amount of memory with most @command{awk} programs, and should run on all -Motorola processor-based models (called further ST, even if that is not -exactly right). - -In order to use @command{gawk}, you need to have a shell, either text or -graphics, that does not map all the characters of a command line to -uppercase. Maintaining case distinction in option flags is very -important (@pxref{Options, ,Command-Line Options}). -These days this is the default and it may only be a problem for some -very old machines. If your system does not preserve the case of option -flags, you need to upgrade your tools. Support for I/O -redirection is necessary to make it easy to import @command{awk} programs -from other environments. Pipes are nice to have but not vital. - -@menu -* Atari Compiling:: Compiling @command{gawk} on Atari. -* Atari Using:: Running @command{gawk} on Atari. -@end menu - -@node Atari Compiling, Atari Using, Atari Installation, Atari Installation -@appendixsubsubsec Compiling @command{gawk} on the Atari ST - -A proper compilation of @command{gawk} sources when @code{sizeof(int)} -differs from @code{sizeof(void *)} requires an ISO C compiler. An initial -port was done with @command{gcc}. You may actually prefer executables -where @code{int}s are four bytes wide but the other variant works as well. - -You may need quite a bit of memory when trying to recompile the @command{gawk} -sources, as some source files (@file{regex.c} in particular) are quite -big. If you run out of memory compiling such a file, try reducing the -optimization level for this particular file, which may help. - -@cindex Linux -@cindex GNU/Linux -With a reasonable shell (@command{bash} will do), you have a pretty good chance -that the @command{configure} utility will succeed, and in particular if -you run GNU/Linux, MiNT or a similar operating system. Otherwise -sample versions of @file{config.h} and @file{Makefile.st} are given in the -@file{atari} subdirectory and can be edited and copied to the -corresponding files in the main source directory. Even if -@command{configure} produces something, it might be advisable to compare -its results with the sample versions and possibly make adjustments. - -Some @command{gawk} source code fragments depend on a preprocessor define -@samp{atarist}. This basically assumes the TOS environment with @command{gcc}. -Modify these sections as appropriate if they are not right for your -environment. Also see the remarks about @env{AWKPATH} and @code{envsep} in -@ref{Atari Using, ,Running @command{gawk} on the Atari ST}. - -As shipped, the sample @file{config.h} claims that the @code{system} -function is missing from the libraries, which is not true, and an -alternative implementation of this function is provided in -@file{unsupported/atari/system.c}. -Depending upon your particular combination of -shell and operating system, you might want to change the file to indicate -that @code{system} is available. - -@node Atari Using, , Atari Compiling, Atari Installation -@appendixsubsubsec Running @command{gawk} on the Atari ST - -An executable version of @command{gawk} should be placed, as usual, -anywhere in your @env{PATH} where your shell can find it. - -While executing, the Atari version of @command{gawk} creates a number of temporary files. When -using @command{gcc} libraries for TOS, @command{gawk} looks for either of -the environment variables, @env{TEMP} or @env{TMPDIR}, in that order. -If either one is found, its value is assumed to be a directory for -temporary files. This directory must exist, and if you can spare the -memory, it is a good idea to put it on a RAM drive. If neither -@env{TEMP} nor @env{TMPDIR} are found, then @command{gawk} uses the -current directory for its temporary files. - -The ST version of @command{gawk} searches for its program files, as described in -@ref{AWKPATH Variable, ,The @env{AWKPATH} Environment Variable}. -The default value for the @env{AWKPATH} variable is taken from -@code{DEFPATH} defined in @file{Makefile}. The sample @command{gcc}/TOS -@file{Makefile} for the ST in the distribution sets @code{DEFPATH} to -@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}. The search path can be -modified by explicitly setting @env{AWKPATH} to whatever you want. -Note that colons cannot be used on the ST to separate elements in the -@env{AWKPATH} variable, since they have another reserved meaning. -Instead, you must use a comma to separate elements in the path. When -recompiling, the separating character can be modified by initializing -the @code{envsep} variable in @file{unsupported/atari/gawkmisc.atr} to another -value. - -Although @command{awk} allows great flexibility in doing I/O redirections -from within a program, this facility should be used with care on the ST -running under TOS. In some circumstances, the OS routines for file-handle -pool processing lose track of certain events, causing the -computer to crash and requiring a reboot. Often a warm reboot is -sufficient. Fortunately, this happens infrequently and in rather -esoteric situations. In particular, avoid having one part of an -@command{awk} program using @code{print} statements explicitly redirected -to @file{/dev/stdout}, while other @code{print} statements use the -default standard output, and a calling shell has redirected standard -output to a file. -@c 10/2000: Is this still true, now that gawk does /dev/stdout internally? - -When @command{gawk} is compiled with the ST version of @command{gcc} and its -usual libraries, it accepts both @samp{/} and @samp{\} as path separators. -While this is convenient, it should be remembered that this removes one -technically valid character (@samp{/}) from your @value{FN}. -It may also create problems for external programs called via the @code{system} -function, which may not support this convention. Whenever it is possible -that a file created by @command{gawk} will be used by some other program, -use only backslashes. Also remember that in @command{awk}, backslashes in -strings have to be doubled in order to get literal backslashes -(@pxref{Escape Sequences}). - -@node Tandem Installation, , Atari Installation, Unsupported -@appendixsubsec Installing @command{gawk} on a Tandem -@cindex tandem -@cindex installation, tandem - -The Tandem port is only minimally supported. -The port's contributor no longer has access to a Tandem system. - -@c This section based on README.Tandem by Stephen Davies (scldad@sdc.com.au) -The Tandem port was done on a Cyclone machine running D20. -The port is pretty clean and all facilities seem to work except for -the I/O piping facilities -(@pxref{Getline/Pipe, , Using @code{getline} from a Pipe}, -@ref{Getline/Variable/Pipe, ,Using @code{getline} into a Variable from a Pipe}, -and -@ref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}), -which is just too foreign a concept for Tandem. - -To build a Tandem executable from source, download all of the files so -that the @value{FN}s on the Tandem box conform to the restrictions of D20. -For example, @file{array.c} becomes @file{ARRAYC}, and @file{awk.h} -becomes @file{AWKH}. The totally Tandem-specific files are in the -@file{tandem} ``subvolume'' (@file{unsupported/tandem} in the @command{gawk} -distribution) and should be copied to the main source directory before -building @command{gawk}. - -The file @file{compit} can then be used to compile and bind an executable. -Alas, there is no @command{configure} or @command{make}. - -Usage is the same as for Unix, except that D20 requires all @samp{@{} and -@samp{@}} characters to be escaped with @samp{~} on the command line -(but @emph{not} in script files). Also, the standard Tandem syntax for -@samp{/in filename,out filename/} must be used instead of the usual -Unix @samp{<} and @samp{>} for file redirection. (Redirection options -on @code{getline}, @code{print} etc., are supported.) - -The @samp{-mr @var{val}} option -(@pxref{Options, ,Command-Line Options}) -has been ``stolen'' to enable Tandem users to process fixed-length -records with no ``end-of-line'' character. That is, @samp{-mr 74} tells -@command{gawk} to read the input file as fixed 74-byte records. - -@node Bugs, Other Versions, Unsupported, Installation -@appendixsec Reporting Problems and Bugs -@cindex archeologists -@quotation -@i{There is nothing more dangerous than a bored archeologist.}@* -The Hitchhiker's Guide to the Galaxy -@end quotation -@c the radio show, not the book. :-) - -@cindex bug reports -@cindex problem reports -@cindex reporting bugs -@cindex reporting problems -If you have problems with @command{gawk} or think that you have found a bug, -please report it to the developers; we cannot promise to do anything -but we might well want to fix it. - -Before reporting a bug, make sure you have actually found a real bug. -Carefully reread the documentation and see if it really says you can do -what you're trying to do. If it's not clear whether you should be able -to do something or not, report that too; it's a bug in the documentation! - -Before reporting a bug or trying to fix it yourself, try to isolate it -to the smallest possible @command{awk} program and input @value{DF} that -reproduces the problem. Then send us the program and @value{DF}, -some idea of what kind of Unix system you're using, -the compiler you used to compile @command{gawk}, and the exact results -@command{gawk} gave you. Also say what you expected to occur; this helps -us decide whether the problem is really in the documentation. - -@cindex @code{bug-gawk@@gnu.org} bug reporting address -@cindex emaill address for bug reports, @code{bug-gawk@@gnu.org} -@cindex bug reports, email address, @code{bug-gawk@@gnu.org} -Once you have a precise problem, send email to @email{bug-gawk@@gnu.org}. - -@cindex Robbins, Arnold -Please include the version number of @command{gawk} you are using. -You can get this information with the command @samp{gawk --version}. -Using this address automatically sends a carbon copy of your -mail to me. If necessary, I can be reached directly at -@email{arnold@@gnu.org}. The bug reporting address is preferred since the -email list is archived at the GNU Project. -@emph{All email should be in English, since that is my native language.} - -@cindex @code{comp.lang.awk} Usenet news group -@strong{Caution:} Do @emph{not} try to report bugs in @command{gawk} by -posting to the Usenet/Internet newsgroup @code{comp.lang.awk}. -While the @command{gawk} developers do occasionally read this newsgroup, -there is no guarantee that we will see your posting. The steps described -above are the official recognized ways for reporting bugs. - -Non-bug suggestions are always welcome as well. If you have questions -about things that are unclear in the documentation or are just obscure -features, ask me; I will try to help you out, although I -may not have the time to fix the problem. You can send me electronic -mail at the Internet address noted previously. - -If you find bugs in one of the non-Unix ports of @command{gawk}, please send -an electronic mail message to the person who maintains that port. They -are named in the following list, as well as in the @file{README} file in the @command{gawk} -distribution. Information in the @file{README} file should be considered -authoritative if it conflicts with this @value{DOCUMENT}. - -The people maintaining the non-Unix ports of @command{gawk} are -as follows: - -@ignore -@table @asis -@cindex Fish, Fred -@item Amiga -Fred Fish, @email{fnf@@ninemoons.com}. - -@cindex Brown, Martin -@item BeOS -Martin Brown, @email{mc@@whoever.com}. - -@cindex Deifik, Scott -@cindex Hankerson, Darrel -@item MS-DOS -Scott Deifik, @email{scottd@@amgen.com} and -Darrel Hankerson, @email{hankedr@@mail.auburn.edu}. - -@cindex Grigera, Juan -@item MS-Windows -Juan Grigera, @email{juan@@biophnet.unlp.edu.ar}. - -@cindex Rommel, Kai Uwe -@item OS/2 -Kai Uwe Rommel, @email{rommel@@ars.de}. - -@cindex Davies, Stephen -@item Tandem -Stephen Davies, @email{scldad@@sdc.com.au}. - -@cindex Rankin, Pat -@item VMS -Pat Rankin, @email{rankin@@eql.caltech.edu}. -@end table -@end ignore - -@multitable {MS-Windows} {123456789012345678901234567890123456789001234567890} -@cindex Fish, Fred -@item Amiga @tab Fred Fish, @email{fnf@@ninemoons.com}. - -@cindex Brown, Martin -@item BeOS @tab Martin Brown, @email{mc@@whoever.com}. - -@cindex Deifik, Scott -@cindex Hankerson, Darrel -@item MS-DOS @tab Scott Deifik, @email{scottd@@amgen.com} and -Darrel Hankerson, @email{hankedr@@mail.auburn.edu}. - -@cindex Grigera, Juan -@item MS-Windows @tab Juan Grigera, @email{juan@@biophnet.unlp.edu.ar}. - -@cindex Rommel, Kai Uwe -@item OS/2 @tab Kai Uwe Rommel, @email{rommel@@ars.de}. - -@cindex Davies, Stephen -@item Tandem @tab Stephen Davies, @email{scldad@@sdc.com.au}. - -@cindex Rankin, Pat -@item VMS @tab Pat Rankin, @email{rankin@@eql.caltech.edu}. -@end multitable - -If your bug is also reproducible under Unix, please send a copy of your -report to the @email{bug-gawk@@gnu.org} email list as well. - -@node Other Versions, , Bugs, Installation -@appendixsec Other Freely Available @command{awk} Implementations -@cindex other @command{awk} implementations -@ignore -From: emory!amc.com!brennan (Michael Brennan) -Subject: C++ comments in awk programs -To: arnold@gnu.ai.mit.edu (Arnold Robbins) -Date: Wed, 4 Sep 1996 08:11:48 -0700 (PDT) - -@end ignore -@cindex Brennan, Michael -@quotation -@i{It's kind of fun to put comments like this in your awk code.}@* -@ @ @ @ @ @ @code{// Do C++ comments work? answer: yes! of course}@* -Michael Brennan -@end quotation - -There are three other freely available @command{awk} implementations. -This @value{SECTION} briefly describes where to get them: - -@table @asis -@cindex Kernighan, Brian -@cindex Unix @command{awk}, source code -@cindex source code, Unix @command{awk} -@item Unix @command{awk} -Brian Kernighan has made his implementation of -@command{awk} freely available. -You can retrieve this version via the World Wide Web from -his home page.@footnote{@uref{http://cm.bell-labs.com/who/bwk}} -It is available in several archive formats: - -@table @asis -@item Shell archive -@uref{http://cm.bell-labs.com/who/bwk/awk.shar} - -@item Compressed @command{tar} file -@uref{http://cm.bell-labs.com/who/bwk/awk.tar.gz} - -@item Zip file -@uref{http://cm.bell-labs.com/who/bwk/awk.zip} -@end table - -This version requires an ISO C (1990 standard) compiler; -the C compiler from -GCC (the GNU Compiler Collection) -works quite nicely. - -@xref{BTL, ,Extensions in the Bell Laboratories @command{awk}}, -for a list of extensions in this @command{awk} that are not in POSIX @command{awk}. - -@cindex GPL -@cindex General Public License -@cindex GNU General Public License -@cindex Brennan, Michael -@cindex @command{mawk}, source code -@cindex source code, @command{mawk} -@item @command{mawk} -Michael Brennan has written an independent implementation of @command{awk}, -called @command{mawk}. It is available under the GPL -(@pxref{Copying, ,GNU General Public License}), -just as @command{gawk} is. - -You can get it via anonymous @command{ftp} to the host -@code{@w{ftp.whidbey.net}}. Change directory to @file{/pub/brennan}. -Use ``binary'' or ``image'' mode, and retrieve @file{mawk1.3.3.tar.gz} -(or the latest version that is there). - -@command{gunzip} may be used to decompress this file. Installation -is similar to @command{gawk}'s -(@pxref{Unix Installation, , Compiling and Installing @command{gawk} on Unix}). - -@cindex extensions, @command{mawk} -@command{mawk} has the following extensions that are not in POSIX @command{awk}: - -@itemize @bullet -@item -The @code{fflush} built-in function for flushing buffered output -(@pxref{I/O Functions, ,Input/Output Functions}). - -@item -The @samp{**} and @samp{**=} operators -(@pxref{Arithmetic Ops, ,Arithmetic Operators} -and also see -@ref{Assignment Ops, ,Assignment Expressions}). - -@item -The use of @code{func} as an abbreviation for @code{function} -(@pxref{Definition Syntax, ,Function Definition Syntax}). - -@item -The @samp{\x} escape sequence -(@pxref{Escape Sequences}). - -@item -The @file{/dev/stdout}, and @file{/dev/stderr} -special files -(@pxref{Special Files, ,Special @value{FFN}s in @command{gawk}}). -Use @code{"-"} instead of @code{"/dev/stdin"} with @command{mawk}. - -@item -The ability for @code{FS} and for the third -argument to @code{split} to be null strings -(@pxref{Single Character Fields, , Making Each Character a Separate Field}). - -@item -The ability to delete all of an array at once with @samp{delete @var{array}} -(@pxref{Delete, ,The @code{delete} Statement}). - -@item -The ability for @code{RS} to be a regexp -(@pxref{Records, ,How Input Is Split into Records}). - -@item -The @code{BINMODE} special variable for non-Unix operating systems -(@pxref{PC Using, ,Using @command{gawk} on PC Operating Systems}). -@end itemize - -The next version of @command{mawk} will support @code{nextfile}. - -@cindex Sumner, Andrew -@cindex @command{awka} compiler for @command{awk} programs -@cindex @command{awka}, source code -@cindex source code, @command{awka} -@item @command{awka} -Written by Andrew Sumner, -@command{awka} translates @command{awk} programs into C, compiles them, -and links them with a library of functions that provides the core -@command{awk} functionality. -It also has a number of extensions. - -@cindex GPL -@cindex General Public License -@cindex GNU General Public License -@cindex LGPL -@cindex Lesser General Public License -@cindex GNU Lesser General Public License -The @command{awk} translator is released under the GPL, and the library -is under the LGPL. - -@ignore -To get @command{awka}, go to its home page at -Go to @uref{http://awka.sourceforge.net}. -@end ignore -To get @command{awka}, go to @uref{http://awka.sourceforge.net}. -You can reach Andrew Sumner at @email{andrew_sumner@@bigfoot.com}. -@end table - -@node Notes, Basic Concepts, Installation, Top -@appendix Implementation Notes - -This appendix contains information mainly of interest to implementors and -maintainers of @command{gawk}. Everything in it applies specifically to -@command{gawk} and not to other implementations. - -@menu -* Compatibility Mode:: How to disable certain @command{gawk} - extensions. -* Additions:: Making Additions To @command{gawk}. -* Dynamic Extensions:: Adding new built-in functions to - @command{gawk}. -* Future Extensions:: New features that may be implemented one day. -@end menu - -@node Compatibility Mode, Additions, Notes, Notes -@appendixsec Downward Compatibility and Debugging - -@xref{POSIX/GNU, ,Extensions in @command{gawk} Not in POSIX @command{awk}}, -for a summary of the GNU extensions to the @command{awk} language and program. -All of these features can be turned off by invoking @command{gawk} with the -@option{--traditional} option or with the @option{--posix} option. - -If @command{gawk} is compiled for debugging with @samp{-DDEBUG}, then there -is one more option available on the command line: - -@table @code -@item -W parsedebug -@itemx --parsedebug -Print out the parse stack information as the program is being parsed. -@end table - -This option is intended only for serious @command{gawk} developers -and not for the casual user. It probably has not even been compiled into -your version of @command{gawk}, since it slows down execution. - -@node Additions, Dynamic Extensions, Compatibility Mode, Notes -@appendixsec Making Additions to @command{gawk} - -If you find that you want to enhance @command{gawk} in a significant -fashion, you are perfectly free to do so. That is the point of having -free software; the source code is available and you are free to change -it as you want (@pxref{Copying, ,GNU General Public License}). - -This @value{SECTION} discusses the ways you might want to change @command{gawk} -as well as any considerations you should bear in mind. - -@menu -* Adding Code:: Adding code to the main body of - @command{gawk}. -* New Ports:: Porting @command{gawk} to a new operating - system. -@end menu - -@node Adding Code, New Ports, Additions, Additions -@appendixsubsec Adding New Features - -@cindex adding new features -@cindex features, adding to @command{gawk} -You are free to add any new features you like to @command{gawk}. -However, if you want your changes to be incorporated into the @command{gawk} -distribution, there are several steps that you need to take in order to -make it possible for me to include your changes: - -@enumerate 1 -@item -Before building the new feature into @command{gawk} itself, -consider writing it as an extension module -(@pxref{Dynamic Extensions, ,Adding New Built-in Functions to @command{gawk}}). -If that's not possible, continue with the rest of the steps in this list. - -@item -Get the latest version. -It is much easier for me to integrate changes if they are relative to -the most recent distributed version of @command{gawk}. If your version of -@command{gawk} is very old, I may not be able to integrate them at all. -(@xref{Getting, ,Getting the @command{gawk} Distribution}, -for information on getting the latest version of @command{gawk}.) - -@item -@ifnotinfo -Follow the @cite{GNU Coding Standards}. -@end ifnotinfo -@ifinfo -See @inforef{Top, , Version, standards, GNU Coding Standards}. -@end ifinfo -This document describes how GNU software should be written. If you haven't -read it, please do so, preferably @emph{before} starting to modify @command{gawk}. -(The @cite{GNU Coding Standards} are available from -the GNU Project's -@command{ftp} -site, at -@uref{ftp://gnudist.gnu.org/gnu/GNUInfo/standards.text}. -Texinfo, Info, and DVI versions are also available.) - -@cindex @command{gawk}, coding style -@cindex coding style used in @command{gawk} -@item -Use the @command{gawk} coding style. -The C code for @command{gawk} follows the instructions in the -@cite{GNU Coding Standards}, with minor exceptions. The code is formatted -using the traditional ``K&R'' style, particularly as regards to the placement -of braces and the use of tabs. In brief, the coding rules for @command{gawk} -are as follows: - -@itemize @bullet -@item -Use ANSI/ISO style (prototype) function headers when defining functions. - -@item -Put the name of the function at the beginning of its own line. - -@item -Put the return type of the function, even if it is @code{int}, on the -line above the line with the name and arguments of the function. - -@item -Put spaces around parentheses used in control structures -(@code{if}, @code{while}, @code{for}, @code{do}, @code{switch}, -and @code{return}). - -@item -Do not put spaces in front of parentheses used in function calls. - -@item -Put spaces around all C operators and after commas in function calls. - -@item -Do not use the comma operator to produce multiple side effects, except -in @code{for} loop initialization and increment parts, and in macro bodies. - -@item -Use real tabs for indenting, not spaces. - -@item -Use the ``K&R'' brace layout style. - -@item -Use comparisons against @code{NULL} and @code{'\0'} in the conditions of -@code{if}, @code{while}, and @code{for} statements, as well as in the @code{case}s -of @code{switch} statements, instead of just the -plain pointer or character value. - -@item -Use the @code{TRUE}, @code{FALSE} and @code{NULL} symbolic constants -and the character constant @code{'\0'} where appropriate, instead of @code{1} -and @code{0}. - -@item -Use the @code{ISALPHA}, @code{ISDIGIT}, etc.@: macros, instead of the -traditional lowercase versions; these macros are better behaved for -non-ASCII character sets. - -@item -Provide one-line descriptive comments for each function. - -@item -Do not use @samp{#elif}. Many older Unix C compilers cannot handle it. - -@item -Do not use the @code{alloca} function for allocating memory off the stack. -Its use causes more portability trouble than is worth the minor benefit of not having -to free the storage. Instead, use @code{malloc} and @code{free}. -@end itemize - -@strong{Note:} -If I have to reformat your code to follow the coding style used in -@command{gawk}, I may not bother to integrate your changes at all. - -@item -Be prepared to sign the appropriate paperwork. -In order for the FSF to distribute your changes, you must either place -those changes in the public domain and submit a signed statement to that -effect, or assign the copyright in your changes to the FSF. -Both of these actions are easy to do and @emph{many} people have done so -already. If you have questions, please contact me -(@pxref{Bugs, , Reporting Problems and Bugs}), -or @email{gnu@@gnu.org}. - -@cindex Texinfo -@item -Update the documentation. -Along with your new code, please supply new sections and/or chapters -for this @value{DOCUMENT}. If at all possible, please use real -Texinfo, instead of just supplying unformatted ASCII text (although -even that is better than no documentation at all). -Conventions to be followed in @cite{@value{TITLE}} are provided -after the @samp{@@bye} at the end of the Texinfo source file. -If possible, please update the @command{man} page as well. - -You will also have to sign paperwork for your documentation changes. - -@item -Submit changes as context diffs or unified diffs. -Use @samp{diff -c -r -N} or @samp{diff -u -r -N} to compare -the original @command{gawk} source tree with your version. -(I find context diffs to be more readable but unified diffs are -more compact.) -I recommend using the GNU version of @command{diff}. -Send the output produced by either run of @command{diff} to me when you -submit your changes. -(@xref{Bugs, , Reporting Problems and Bugs}, for the electronic mail -information.) - -Using this format makes it easy for me to apply your changes to the -master version of the @command{gawk} source code (using @code{patch}). -If I have to apply the changes manually, using a text editor, I may -not do so, particularly if there are lots of changes. - -@item -Include an entry for the @file{ChangeLog} file with your submission. -This helps further minimize the amount of work I have to do, -making it easier for me to accept patches. -@end enumerate - -Although this sounds like a lot of work, please remember that while you -may write the new code, I have to maintain it and support it. If it -isn't possible for me to do that with a minimum of extra work, then I -probably will not. - -@node New Ports, , Adding Code, Additions -@appendixsubsec Porting @command{gawk} to a New Operating System - -@cindex porting @command{gawk} -If you want to port @command{gawk} to a new operating system, there are -several steps to follow: - -@enumerate 1 -@item -Follow the guidelines in -@ifinfo -@ref{Adding Code, ,Adding New Features}, -@end ifinfo -@ifnotinfo -the previous @value{SECTION} -@end ifnotinfo -concerning coding style, submission of diffs, and so on. - -@item -When doing a port, bear in mind that your code must co-exist peacefully -with the rest of @command{gawk} and the other ports. Avoid gratuitous -changes to the system-independent parts of the code. If at all possible, -avoid sprinkling @samp{#ifdef}s just for your port throughout the -code. - -@cindex GPL -@cindex General Public License -@cindex GNU General Public License -If the changes needed for a particular system affect too much of the -code, I probably will not accept them. In such a case, you can, of course, -distribute your changes on your own, as long as you comply -with the GPL -(@pxref{Copying, ,GNU General Public License}). - -@item -A number of the files that come with @command{gawk} are maintained by other -people at the Free Software Foundation. Thus, you should not change them -unless it is for a very good reason; i.e., changes are not out of the -question, but changes to these files are scrutinized extra carefully. -The files are @file{getopt.h}, @file{getopt.c}, -@file{getopt1.c}, @file{regex.h}, @file{regex.c}, @file{dfa.h}, -@file{dfa.c}, @file{install-sh}, and @file{mkinstalldirs}. - -@item -Be willing to continue to maintain the port. -Non-Unix operating systems are supported by volunteers who maintain -the code needed to compile and run @command{gawk} on their systems. If noone -volunteers to maintain a port, it becomes unsupported and it may -be necessary to remove it from the distribution. - -@item -Supply an appropriate @file{gawkmisc.???} file. -Each port has its own @file{gawkmisc.???} that implements certain -operating system specific functions. This is cleaner than a plethora of -@samp{#ifdef}s scattered throughout the code. The @file{gawkmisc.c} in -the main source directory includes the appropriate -@file{gawkmisc.???} file from each subdirectory. -Be sure to update it as well. - -Each port's @file{gawkmisc.???} file has a suffix reminiscent of the machine -or operating system for the port---for example, @file{pc/gawkmisc.pc} and -@file{vms/gawkmisc.vms}. The use of separate suffixes, instead of plain -@file{gawkmisc.c}, makes it possible to move files from a port's subdirectory -into the main subdirectory, without accidentally destroying the real -@file{gawkmisc.c} file. (Currently, this is only an issue for the -PC operating system ports.) - -@item -Supply a @file{Makefile} as well as any other C source and header files that are -necessary for your operating system. All your code should be in a -separate subdirectory, with a name that is the same as, or reminiscent -of, either your operating system or the computer system. If possible, -try to structure things so that it is not necessary to move files out -of the subdirectory into the main source directory. If that is not -possible, then be sure to avoid using names for your files that -duplicate the names of files in the main source directory. - -@item -Update the documentation. -Please write a section (or sections) for this @value{DOCUMENT} describing the -installation and compilation steps needed to compile and/or install -@command{gawk} for your system. - -@item -Be prepared to sign the appropriate paperwork. -In order for the FSF to distribute your code, you must either place -your code in the public domain and submit a signed statement to that -effect, or assign the copyright in your code to the FSF. -@ifinfo -Both of these actions are easy to do and @emph{many} people have done so -already. If you have questions, please contact me, or -@email{gnu@@gnu.org}. -@end ifinfo -@end enumerate - -Following these steps makes it much easier to integrate your changes -into @command{gawk} and have them co-exist happily with other -operating systems' code that is already there. - -In the code that you supply and maintain, feel free to use a -coding style and brace layout that suits your taste. - -@node Dynamic Extensions, Future Extensions, Additions, Notes -@appendixsec Adding New Built-in Functions to @command{gawk} -@cindex Robinson, Will -@cindex robot, the -@cindex Lost In Space -@quotation -@i{Danger Will Robinson! Danger!!@* -Warning! Warning!}@* -The Robot -@end quotation - -@cindex Linux -@cindex GNU/Linux -Beginning with @command{gawk} 3.1, it is possible to add new built-in -functions to @command{gawk} using dynamically loaded libraries. This -facility is available on systems (such as GNU/Linux) that support -the @code{dlopen} and @code{dlsym} functions. -This @value{SECTION} describes how to write and use dynamically -loaded extentions for @command{gawk}. -Experience with programming in -C or C++ is necessary when reading this @value{SECTION}. - -@strong{Caution:} The facilities described in this @value{SECTION} -are very much subject to change in the next @command{gawk} release. -Be aware that you may have to re-do everything, perhaps from scratch, -upon the next release. - -@menu -* Internals:: A brief look at some @command{gawk} internals. -* Sample Library:: A example of new functions. -@end menu - -@node Internals, Sample Library, Dynamic Extensions, Dynamic Extensions -@appendixsubsec A Minimal Introduction to @command{gawk} Internals - -The truth is that @command{gawk} was not designed for simple extensibility. -The facilities for adding functions using shared libraries work, but -are something of a ``bag on the side.'' Thus, this tour is -brief and simplistic; would-be @command{gawk} hackers are encouraged to -spend some time reading the source code before trying to write -extensions based on the material presented here. Of particular note -are the files @file{awk.h}, @file{builtin.c}, and @file{eval.c}. -Reading @file{awk.y} in order to see how the parse tree is built -would also be of use. - -With the disclaimers out of the way, the following types, structure -members, functions, and macros are declared in @file{awk.h} and are of -use when writing extensions. The next @value{SECTION} -shows how they are used: - -@table @code -@cindex @code{AWKNUM} internal type -@cindex internal type, @code{AWKNUM} -@item AWKNUM -An @code{AWKNUM} is the internal type of @command{awk} -floating-point numbers. Typically, it is a C @code{double}. - -@cindex @code{NODE} internal type -@cindex internal type, @code{NODE} -@item NODE -Just about everything is done using objects of type @code{NODE}. -These contain both strings and numbers, as well as variables and arrays. - -@cindex @code{force_number} internal function -@cindex internal function, @code{force_number} -@item AWKNUM force_number(NODE *n) -This macro forces a value to be numeric. It returns the actual -numeric value contained in the node. -It may end up calling an internal @command{gawk} function. - -@cindex @code{force_string} internal function -@cindex internal function, @code{force_string} -@item void force_string(NODE *n) -This macro guarantees that a @code{NODE}'s string value is current. -It may end up calling an internal @command{gawk} function. -It also guarantees that the string is zero-terminated. - -@cindex @code{param_cnt} internal variable -@cindex internal variable, @code{param_cnt} -@item n->param_cnt -The number of parameters actually passed in a function call at runtime. - -@cindex @code{stptr} internal variable -@cindex @code{stlen} internal variable -@cindex internal variable, @code{stptr} -@cindex internal variable, @code{stlen} -@item n->stptr -@itemx n->stlen -The data and length of a @code{NODE}'s string value, respectively. -The string is @emph{not} guaranteed to be zero-terminated. -If you need to pass the string value to a C library function, save -the value in @code{n->stptr[n->stlen]}, assign @code{'\0'} to it, -call the routine, and then restore the value. - -@cindex @code{type} internal variable -@cindex internal variable, @code{type} -@item n->type -The type of the @code{NODE}. This is a C @code{enum}. Values should -be either @code{Node_var} or @code{Node_var_array} for function -parameters. - -@cindex @code{vname} internal variable -@cindex internal variable, @code{vname} -@item n->vname -The ``variable name'' of a node. This is not of much use inside -externally written extensions. - -@cindex @code{assoc_clear} internal function -@cindex internal function, @code{assoc_clear} -@item void assoc_clear(NODE *n) -Clears the associative array pointed to by @code{n}. -Make sure that @samp{n->type == Node_var_array} first. - -@cindex @code{assoc_lookup} internal function -@cindex internal function, @code{assoc_lookup} -@item NODE **assoc_lookup(NODE *symbol, NODE *subs, int reference) -Finds, and installs if necessary, array elements. -@code{symbol} is the array, @code{subs} is the subscript. -This is usually a value created with @code{tmp_string} (see below). -@code{reference} should be @code{TRUE} if it is an error to use the -value before it is created. Typically, @code{FALSE} is the -correct value to use from extension functions. - -@cindex @code{make_string} internal function -@cindex internal function, @code{make_string} -@item NODE *make_string(char *s, size_t len) -Take a C string and turn it into a pointer to a @code{NODE} that -can be stored appropriately. This is permanent storage; understanding -of @command{gawk} memory management is helpful. - -@cindex @code{make_number} internal function -@cindex internal function, @code{make_number} -@item NODE *make_number(AWKNUM val) -Take an @code{AWKNUM} and turn it into a pointer to a @code{NODE} that -can be stored appropriately. This is permanent storage; understanding -of @command{gawk} memory management is helpful. - -@cindex @code{tmp_string} internal function -@item NODE *tmp_string(char *s, size_t len); -@cindex internal function, @code{tmp_string} -Take a C string and turn it into a pointer to a @code{NODE} that -can be stored appropriately. This is temporary storage; understanding -of @command{gawk} memory management is helpful. - -@cindex @code{tmp_number} internal function -@item NODE *tmp_number(AWKNUM val) -@cindex internal function, @code{tmp_number} -Take an @code{AWKNUM} and turn it into a pointer to a @code{NODE} that -can be stored appropriately. This is temporary storage; -understanding of @command{gawk} memory management is helpful. - -@cindex @code{dupnode} internal function -@cindex internal function, @code{dupnode} -@item NODE *dupnode(NODE *n) -Duplicate a node. In most cases, this increments an internal -reference count instead of actually duplicating the entire @code{NODE}; -understanding of @command{gawk} memory management is helpful. - -@cindex @code{free_temp} internal macro -@cindex internal macro, @code{free_temp} -@item void free_temp(NODE *n) -This macro releases the memory associated with a @code{NODE} -allocated with @code{tmp_string} or @code{tmp_number}. -Understanding of @command{gawk} memory management is helpful. - -@cindex @code{make_builtin} internal function -@cindex internal function, @code{make_builtin} -@item void make_builtin(char *name, NODE *(*func)(NODE *), int count) -Register a C function pointed to by @code{func} as new built-in -function @code{name}. @code{name} is a regular C string. @code{count} -is the maximum number of arguments that the function takes. -The function should be written in the following manner: - -@example -/* do_xxx --- do xxx function for gawk */ - -NODE * -do_xxx(NODE *tree) -@{ - @dots{} -@} -@end example - -@cindex @code{get_argument} internal function -@cindex internal function, @code{get_argument} -@item NODE *get_argument(NODE *tree, int i) -This function is called from within a C extension function to get -the @code{i}'th argument from the function call. -The first argument is argument zero. - -@cindex @code{set_value} internal function -@item void set_value(NODE *tree) -@cindex internal function, @code{set_value} -This function is called from within a C extension function to set -the return value from the extension function. This value is -what the @command{awk} program sees as the return value from the -new @command{awk} function. - -@cindex @code{update_ERRNO} internal function -@item void update_ERRNO(void) -@cindex internal function, @code{update_ERRNO} -This function is called from within a C extension function to set -the value of @command{gawk}'s @code{ERRNO} variable, based on the current -value of the C @code{errno} variable. -It is provided as a convenience. -@end table - -An argument that is supposed to be an array needs to be handled with -some extra code, in case the array being passed in is actually -from a function parameter. -The following ``boiler plate'' code shows how to do this: - -@smallexample -NODE *the_arg; - -the_arg = get_argument(tree, 2); /* assume need 3rd arg, 0-based */ - -/* if a parameter, get it off the stack */ -if (the_arg->type == Node_param_list) - the_arg = stack_ptr[the_arg->param_cnt]; - -/* parameter referenced an array, get it */ -if (the_arg->type == Node_array_ref) - the_arg = the_arg->orig_array; - -/* check type */ -if (the_arg->type != Node_var && the_arg->type != Node_var_array) - fatal("newfunc: third argument is not an array"); - -/* force it to be an array, if necessary, clear it */ -the_arg->type = Node_var_array; -assoc_clear(the_arg); -@end smallexample - -Again, you should spend time studying the @command{gawk} internals; -don't just blindly copy this code. - -@node Sample Library, , Internals, Dynamic Extensions -@appendixsubsec Directory and File Operation Built-ins - -Two useful functions that are not in @command{awk} are @code{chdir} -(so that an @command{awk} program can change its directory) and -@code{stat} (so that an @command{awk} program can gather information about -a file). -This @value{SECTION} implements these functions for @command{gawk} in an -external extension library. - -@menu -* Internal File Description:: What the new functions will do. -* Internal File Ops:: The code for internal file operations. -* Using Internal File Ops:: How to use an external extension. -@end menu - -@node Internal File Description, Internal File Ops, Sample Library, Sample Library -@appendixsubsubsec Using @code{chdir} and @code{stat} - -This @value{SECTION} shows how to use the new functions at the @command{awk} -level once they've been integrated into the running @command{gawk} -interpreter. -Using @code{chdir} is very straightforward. It takes one argument, -the new directory to change to: - -@example -@dots{} -newdir = "/home/arnold/funstuff" -ret = chdir(newdir) -if (ret < 0) @{ - printf("could not change to %s: %s\n", - newdir, ERRNO) > "/dev/stderr" - exit 1 -@} -@dots{} -@end example - -The return value is negative if the @code{chdir} failed, -and @code{ERRNO} -(@pxref{Built-in Variables}) -is set to a string indicating the error. - -Using @code{stat} is a bit more complicated. -The C @code{stat} function fills in a structure that has a fair -amount of information. -The right way to model this in @command{awk} is to fill in an associative -array with the appropriate information: - -@c broke printf for page breaking -@example -file = "/home/arnold/.profile" -fdata[1] = "x" # force `fdata' to be an array -ret = stat(file, fdata) -if (ret < 0) @{ - printf("could not stat %s: %s\n", - file, ERRNO) > "/dev/stderr" - exit 1 -@} -printf("size of %s is %d bytes\n", file, fdata["size"]) -@end example - -The @code{stat} function always clears the data array, even if -the @code{stat} fails. It fills in the following elements: - -@table @code -@item "name" -The name of the file that was @code{stat}'ed. - -@item "dev" -@itemx "ino" -The file's device and inode numbers, respectively. - -@item "mode" -The file's mode, as a numeric value. This includes both the file's -type and its permissions. - -@item "nlink" -The number of hard links (directory entries) the file has. - -@item "uid" -@itemx "gid" -The numeric user and group ID numbers of the file's owner. - -@item "size" -The size in bytes of the file. - -@item "blocks" -The number of disk blocks the file actually occupies. This may not -be a function of the file's size if the file has holes. - -@item "atime" -@itemx "mtime" -@itemx "ctime" -The file's last access, modification, and inode update times, -respectively. These are numeric timestamps, suitable for formatting -with @code{strftime} -(@pxref{Built-in, ,Built-in Functions}). - -@item "pmode" -The file's ``printable mode.'' This is a string representation of -the file's type and permissions, such as what is produced by -@samp{ls -l}---for example, @code{"drwxr-xr-x"}. - -@item "type" -A printable string representation of the file's type. The value -is one of the following: - -@table @code -@item "blockdev" -@itemx "chardev" -The file is a block or character device (``special file''). - -@ignore -@item "door" -The file is a Solaris ``door'' (special file used for -interprocess communications). -@end ignore - -@item "directory" -The file is a directory. - -@item "fifo" -The file is a named-pipe (also known as a FIFO). - -@item "file" -The file is just a regular file. - -@item "socket" -The file is an @code{AF_UNIX} (``Unix domain'') socket in the -filesystem. - -@item "symlink" -The file is a symbolic link. -@end table -@end table - -Several additional elements may be present depending upon the operating -system and the type of the file. You can test for them in your @command{awk} -program by using the @code{in} operator -(@pxref{Reference to Elements, ,Referring to an Array Element}): - -@table @code -@item "blksize" -The preferred block size for I/O to the file. This field is not -present on all POSIX-like systems in the C @code{stat} structure. - -@item "linkval" -If the file is a symbolic link, this element is the name of the -file the link points to (i.e., the value of the link). - -@item "rdev" -@itemx "major" -@itemx "minor" -If the file is a block or character device file, then these values -represent the numeric device number and the major and minor components -of that number, respectively. -@end table - -@node Internal File Ops, Using Internal File Ops, Internal File Description, Sample Library -@appendixsubsubsec C Code for @code{chdir} and @code{stat} - -@cindex Linux -@cindex GNU/Linux -Here is the C code for these extensions. They were written for -GNU/Linux. The code needs some more work for complete portability -to other POSIX-compliant systems:@footnote{This version is edited -slightly for presentation. The complete version can be found in -@file{extension/filefuncs.c} in the @command{gawk} distribution.} - -@c break line for page breaking -@example -#include "awk.h" - -#include - -/* do_chdir --- provide dynamically loaded - chdir() builtin for gawk */ - -static NODE * -do_chdir(tree) -NODE *tree; -@{ - NODE *newdir; - int ret = -1; - - newdir = get_argument(tree, 0); -@end example - -The file includes the @code{"awk.h"} header file for definitions -for the @command{gawk} internals. It includes @code{} -for access to the @code{major} and @code{minor} macros. - -@cindex conventions, programming -@cindex programming conventions -By convention, for an @command{awk} function @code{foo}, the function that -implements it is called @samp{do_foo}. The function should take -a @samp{NODE *} argument, usually called @code{tree}, that -represents the argument list to the function. The @code{newdir} -variable represents the new directory to change to, retrieved -with @code{get_argument}. Note that the first argument is -numbered zero. - -This code actually accomplishes the @code{chdir}. It first forces -the argument to be a string and passes the string value to the -@code{chdir} system call. If the @code{chdir} fails, @code{ERRNO} -is updated. -The result of @code{force_string} has to be freed with @code{free_temp}: - -@example - if (newdir != NULL) @{ - (void) force_string(newdir); - ret = chdir(newdir->stptr); - if (ret < 0) - update_ERRNO(); - - free_temp(newdir); - @} -@end example - -Finally, the function returns the return value to the @command{awk} level, -using @code{set_value}. Then it must return a value from the call to -the new built-in (this value ignored by the interpreter): - -@example - /* Set the return value */ - set_value(tmp_number((AWKNUM) ret)); - - /* Just to make the interpreter happy */ - return tmp_number((AWKNUM) 0); -@} -@end example - -The @code{stat} built-in is more involved. First comes a function -that turns a numeric mode into a printable representation -(e.g., 644 becomes @samp{-rw-r--r--}). This is omitted here for brevity: - -@c break line for page breaking -@example -/* format_mode --- turn a stat mode field - into something readable */ - -static char * -format_mode(fmode) -unsigned long fmode; -@{ - @dots{} -@} -@end example - -Next comes the actual @code{do_stat} function itself. First come the -variable declarations and argument checking: - -@ignore -Changed message for page breaking. Used to be: - "stat: called with incorrect number of arguments (%d), should be 2", -@end ignore -@example -/* do_stat --- provide a stat() function for gawk */ - -static NODE * -do_stat(tree) -NODE *tree; -@{ - NODE *file, *array; - struct stat sbuf; - int ret; - char *msg; - NODE **aptr; - char *pmode; /* printable mode */ - char *type = "unknown"; - - /* check arg count */ - if (tree->param_cnt != 2) - fatal( - "stat: called with %d arguments, should be 2", - tree->param_cnt); -@end example - -Then comes the actual work. First, we get the arguments. -Then, we always clear the array. To get the file information, -we use @code{lstat}, in case the file is a symbolic link. -If there's an error, we set @code{ERRNO} and return: - -@c comment made multiline for page breaking -@example - /* - * directory is first arg, - * array to hold results is second - */ - file = get_argument(tree, 0); - array = get_argument(tree, 1); - - /* empty out the array */ - assoc_clear(array); - - /* lstat the file, if error, set ERRNO and return */ - (void) force_string(file); - ret = lstat(file->stptr, & sbuf); - if (ret < 0) @{ - update_ERRNO(); - - set_value(tmp_number((AWKNUM) ret)); - - free_temp(file); - return tmp_number((AWKNUM) 0); - @} -@end example - -Now comes the tedious part: filling in the array. Only a few of the -calls are shown here, since they all follow the same pattern: - -@example - /* fill in the array */ - aptr = assoc_lookup(array, tmp_string("name", 4), FALSE); - *aptr = dupnode(file); - - aptr = assoc_lookup(array, tmp_string("mode", 4), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_mode); - - aptr = assoc_lookup(array, tmp_string("pmode", 5), FALSE); - pmode = format_mode(sbuf.st_mode); - *aptr = make_string(pmode, strlen(pmode)); -@end example - -When done, we free the temporary value containing the @value{FN}, -set the return value, and return: - -@example - free_temp(file); - - /* Set the return value */ - set_value(tmp_number((AWKNUM) ret)); - - /* Just to make the interpreter happy */ - return tmp_number((AWKNUM) 0); -@} -@end example - -@cindex conventions, programming -@cindex programming conventions -Finally, it's necessary to provide the ``glue'' that loads the -new function(s) into @command{gawk}. By convention, each library has -a routine named @code{dlload} that does the job: - -@example -/* dlload --- load new builtins in this library */ - -NODE * -dlload(tree, dl) -NODE *tree; -void *dl; -@{ - make_builtin("chdir", do_chdir, 1); - make_builtin("stat", do_stat, 2); - return tmp_number((AWKNUM) 0); -@} -@end example - -And that's it! As an exercise, consider adding functions to -implement system calls such as @code{chown}, @code{chmod}, and @code{umask}. - -@node Using Internal File Ops, , Internal File Ops, Sample Library -@appendixsubsubsec Integrating the Extensions - -@cindex Linux -@cindex GNU/Linux -Now that the code is written, it must be possible to add it at -runtime to the running @command{gawk} interpreter. First, the -code must be compiled. Assuming that the functions are in -a file named @file{filefuncs.c}, and @var{idir} is the location -of the @command{gawk} include files, -the following steps create -a GNU/Linux shared library: - -@example -$ gcc -shared -DHAVE_CONFIG_H -c -O -g -I@var{idir} filefuncs.c -$ ld -o filefuncs.so -shared filefuncs.o -@end example - -@cindex @code{extension} built-in function -Once the library exists, it is loaded by calling the @code{extension} -built-in function. -This function takes two arguments: the name of the -library to load and the name of a function to call when the library -is first loaded. This function adds the new functions to @command{gawk}. -It returns the value returned by the initialization function -within the shared library: - -@example -# file testff.awk -BEGIN @{ - extension("./filefuncs.so", "dlload") - - chdir(".") # no-op - - data[1] = 1 # force `data' to be an array - print "Info for testff.awk" - ret = stat("testff.awk", data) - print "ret =", ret - for (i in data) - printf "data[\"%s\"] = %s\n", i, data[i] - print "testff.awk modified:", - strftime("%m %d %y %H:%M:%S", data["mtime"]) -@} -@end example - -Here are the results of running the program: - -@example -$ gawk -f testff.awk -@print{} Info for testff.awk -@print{} ret = 0 -@print{} data["blksize"] = 4096 -@print{} data["mtime"] = 932361936 -@print{} data["mode"] = 33188 -@print{} data["type"] = file -@print{} data["dev"] = 2065 -@print{} data["gid"] = 10 -@print{} data["ino"] = 878597 -@print{} data["ctime"] = 971431797 -@print{} data["blocks"] = 2 -@print{} data["nlink"] = 1 -@print{} data["name"] = testff.awk -@print{} data["atime"] = 971608519 -@print{} data["pmode"] = -rw-r--r-- -@print{} data["size"] = 607 -@print{} data["uid"] = 2076 -@print{} testff.awk modified: 07 19 99 08:25:36 -@end example - -@node Future Extensions, , Dynamic Extensions, Notes -@appendixsec Probable Future Extensions -@ignore -From emory!scalpel.netlabs.com!lwall Tue Oct 31 12:43:17 1995 -Return-Path: -Message-Id: <9510311732.AA28472@scalpel.netlabs.com> -To: arnold@skeeve.atl.ga.us (Arnold D. Robbins) -Subject: Re: May I quote you? -In-Reply-To: Your message of "Tue, 31 Oct 95 09:11:00 EST." - -Date: Tue, 31 Oct 95 09:32:46 -0800 -From: Larry Wall - -: Greetings. I am working on the release of gawk 3.0. Part of it will be a -: thoroughly updated manual. One of the sections deals with planned future -: extensions and enhancements. I have the following at the beginning -: of it: -: -: @cindex PERL -: @cindex Wall, Larry -: @display -: @i{AWK is a language similar to PERL, only considerably more elegant.} @* -: Arnold Robbins -: @sp 1 -: @i{Hey!} @* -: Larry Wall -: @end display -: -: Before I actually release this for publication, I wanted to get your -: permission to quote you. (Hopefully, in the spirit of much of GNU, the -: implied humor is visible... :-) - -I think that would be fine. - -Larry -@end ignore -@cindex PERL -@cindex Wall, Larry -@cindex Robbins, Arnold -@quotation -@i{AWK is a language similar to PERL, only considerably more elegant.}@* -Arnold Robbins - -@i{Hey!}@* -Larry Wall -@end quotation - -This @value{SECTION} briefly lists extensions and possible improvements -that indicate the directions we are -currently considering for @command{gawk}. The file @file{FUTURES} in the -@command{gawk} distribution lists these extensions as well. - -Following is a list of probable future changes visible at the -@command{awk} language level: - -@c these are ordered by likelihood -@table @asis -@item Loadable Module Interface -It is not clear that the @command{awk}-level interface to the -modules facility is as good as it should be. The interface needs to be -redesigned, particularly taking namespace issues into account, as -well as possibly including issues such as library search path order -and versioning. - -@item @code{RECLEN} variable for fixed length records -Along with @code{FIELDWIDTHS}, this would speed up the processing of -fixed-length records. -@code{PROCINFO["RS"]} would be @code{"RS"} or @code{"RECLEN"}, -depending upon which kind of record processing is in effect. - -@item Additional @code{printf} specifiers -The 1999 ISO C standard added a number of additional @code{printf} -format specifiers. These should be evaluated for possible inclusion -in @command{gawk}. - -@ignore -@item A @samp{%'d} flag -Add @samp{%'d} for putting in commas in formatting numeric values. -@end ignore - -@item Databases -It may be possible to map a GDBM/NDBM/SDBM file into an @command{awk} array. - -@item Large Character Sets -It would be nice if @command{gawk} could handle UTF-8 and other -character sets that are larger than eight bits. - -@item More @code{lint} warnings -There are more things that could be checked for portability. -@end table - -Following is a list of probable improvements that will make @command{gawk}'s -source code easier to work with: - -@table @asis -@item Loadable Module Mechanics -The current extension mechanism works -(@pxref{Dynamic Extensions, ,Adding New Built-in Functions to @command{gawk}}), -but is rather primitive. It requires a fair amount of manual work -to create and integrate a loadable module. -Nor is the current mechanism as portable as might be desired. -The GNU @command{libtool} package provides a number of features that -would make using loadable modules much easier. -@command{gawk} should be changed to use @command{libtool}. - -@item Loadable Module Internals -The API to its internals that @command{gawk} ``exports'' should be revised. -Too many things are needlessly exposed. A new API should be designed -and implemented to make module writing easier. - -@item Better Array Subscript Management -@command{gawk}'s management of array subscript storage could use revamping, -so that using the same value to index multiple arrays only -stores one copy of the index value. - -@item Integrating the DBUG Library -Integrating Fred Fish's DBUG library would be helpful during development, -but it's a lot of work to do. -@end table - -Following is a list of probable improvements that will make @command{gawk} -perform better: - -@table @asis -@item An Improved Version of @code{dfa} -The @code{dfa} pattern matcher from GNU @command{grep} has some -problems. Either a new version or a fixed one will deal with some -important regexp matching issues. - -@c NEXT ED: remove this item. awka and mawk do these respectively -@item Compilation of @command{awk} programs -@command{gawk} uses a Bison (YACC-like) -parser to convert the script given it into a syntax tree; the syntax -tree is then executed by a simple recursive evaluator. This method incurs -a lot of overhead, since the recursive evaluator performs many procedure -calls to do even the simplest things. - -It should be possible for @command{gawk} to convert the script's parse tree -into a C program which the user would then compile, using the normal -C compiler and a special @command{gawk} library to provide all the needed -functions (regexps, fields, associative arrays, type coercion, and so on). - -An easier possibility might be for an intermediate phase of @command{gawk} to -convert the parse tree into a linear byte code form like the one used -in GNU Emacs Lisp. The recursive evaluator would then be replaced by -a straight line byte code interpreter that would be intermediate in speed -between running a compiled program and doing what @command{gawk} does -now. -@end table - -Finally, -the programs in the test suite could use documenting in this @value{DOCUMENT}. - -@xref{Additions, ,Making Additions to @command{gawk}}, -if you are interested in tackling any of these projects. - -@node Basic Concepts, Glossary, Notes, Top -@appendix Basic Programming Concepts -@cindex basic programming concepts -@cindex programming concepts, basic - -This @value{APPENDIX} attempts to define some of the basic concepts -and terms that are used throughout the rest of this @value{DOCUMENT}. -As this @value{DOCUMENT} is specifically about @command{awk}, -and not about computer programming in general, the coverage here -is by necessity fairly cursory and simplistic. -(If you need more background, there are many -other introductory texts that you should refer to instead.) - -@menu -* Basic High Level:: The high level view. -* Basic Data Typing:: A very quick intro to data types. -* Floating Point Issues:: Stuff to know about floating-point numbers. -@end menu - -@node Basic High Level, Basic Data Typing, Basic Concepts, Basic Concepts -@appendixsec What a Program Does - -@cindex processing data -At the most basic level, the job of a program is to process -some input data and produce results. - -@c NEXT ED: Use real images here -@iftex -@tex -\expandafter\ifx\csname graph\endcsname\relax \csname newbox\endcsname\graph\fi -\expandafter\ifx\csname graphtemp\endcsname\relax \csname newdimen\endcsname\graphtemp\fi -\setbox\graph=\vtop{\vskip 0pt\hbox{% - \special{pn 20}% - \special{pa 2425 200}% - \special{pa 2850 200}% - \special{fp}% - \special{sh 1.000}% - \special{pn 20}% - \special{pa 2750 175}% - \special{pa 2850 200}% - \special{pa 2750 225}% - \special{pa 2750 175}% - \special{fp}% - \special{pn 20}% - \special{pa 850 200}% - \special{pa 1250 200}% - \special{fp}% - \special{sh 1.000}% - \special{pn 20}% - \special{pa 1150 175}% - \special{pa 1250 200}% - \special{pa 1150 225}% - \special{pa 1150 175}% - \special{fp}% - \special{pn 20}% - \special{pa 2950 400}% - \special{pa 3650 400}% - \special{pa 3650 0}% - \special{pa 2950 0}% - \special{pa 2950 400}% - \special{fp}% - \special{pn 10}% - \special{ar 1800 200 450 200 0 6.28319}% - \graphtemp=.5ex\advance\graphtemp by 0.200in - \rlap{\kern 3.300in\lower\graphtemp\hbox to 0pt{\hss Results\hss}}% - \graphtemp=.5ex\advance\graphtemp by 0.200in - \rlap{\kern 1.800in\lower\graphtemp\hbox to 0pt{\hss Program\hss}}% - \special{pn 10}% - \special{pa 0 400}% - \special{pa 700 400}% - \special{pa 700 0}% - \special{pa 0 0}% - \special{pa 0 400}% - \special{fp}% - \graphtemp=.5ex\advance\graphtemp by 0.200in - \rlap{\kern 0.350in\lower\graphtemp\hbox to 0pt{\hss Data\hss}}% - \hbox{\vrule depth0.400in width0pt height 0pt}% - \kern 3.650in - }% -}% -\centerline{\box\graph} -@end tex -@end iftex -@ifnottex -@example - _______ -+------+ / \ +---------+ -| Data | -----> < Program > -----> | Results | -+------+ \_______/ +---------+ -@end example -@end ifnottex - -@cindex compiled programs -@cindex programs, compiled -@cindex interpreted programs -@cindex programs, interpreted -The ``program'' in the figure can be either a compiled -program@footnote{Compiled programs are typically written -in lower-level languages such as C, C++, Fortran, or Ada, -and then translated, or @dfn{compiled}, into a form that -the computer can execute directly.} -(such as @command{ls}), -or it may be @dfn{interpreted}. In the latter case, a machine-executable -program such as @command{awk} reads your program, and then uses the -instructions in your program to process the data. - -@cindex programming, basic steps -When you write a program, it usually consists -of the following, very basic set of steps: - -@c NEXT ED: Use real images here -@iftex -@tex -\expandafter\ifx\csname graph\endcsname\relax \csname newbox\endcsname\graph\fi -\expandafter\ifx\csname graphtemp\endcsname\relax \csname newdimen\endcsname\graphtemp\fi -\setbox\graph=\vtop{\vskip 0pt\hbox{% - \graphtemp=.5ex\advance\graphtemp by 0.600in - \rlap{\kern 2.800in\lower\graphtemp\hbox to 0pt{\hss Yes\hss}}% - \graphtemp=.5ex\advance\graphtemp by 0.100in - \rlap{\kern 3.300in\lower\graphtemp\hbox to 0pt{\hss No\hss}}% - \special{pn 8}% - \special{pa 2100 1000}% - \special{pa 1600 1000}% - \special{pa 1600 1000}% - \special{pa 1600 300}% - \special{fp}% - \special{sh 1.000}% - \special{pn 8}% - \special{pa 1575 400}% - \special{pa 1600 300}% - \special{pa 1625 400}% - \special{pa 1575 400}% - \special{fp}% - \special{pn 8}% - \special{pa 2600 500}% - \special{pa 2600 900}% - \special{fp}% - \special{sh 1.000}% - \special{pn 8}% - \special{pa 2625 800}% - \special{pa 2600 900}% - \special{pa 2575 800}% - \special{pa 2625 800}% - \special{fp}% - \special{pn 8}% - \special{pa 3200 200}% - \special{pa 4000 200}% - \special{fp}% - \special{sh 1.000}% - \special{pn 8}% - \special{pa 3900 175}% - \special{pa 4000 200}% - \special{pa 3900 225}% - \special{pa 3900 175}% - \special{fp}% - \special{pn 8}% - \special{pa 1400 200}% - \special{pa 2100 200}% - \special{fp}% - \special{sh 1.000}% - \special{pn 8}% - \special{pa 2000 175}% - \special{pa 2100 200}% - \special{pa 2000 225}% - \special{pa 2000 175}% - \special{fp}% - \special{pn 8}% - \special{ar 2600 1000 400 100 0 6.28319}% - \graphtemp=.5ex\advance\graphtemp by 1.000in - \rlap{\kern 2.600in\lower\graphtemp\hbox to 0pt{\hss Process\hss}}% - \special{pn 8}% - \special{pa 2200 400}% - \special{pa 3100 400}% - \special{pa 3100 0}% - \special{pa 2200 0}% - \special{pa 2200 400}% - \special{fp}% - \graphtemp=.5ex\advance\graphtemp by 0.200in - \rlap{\kern 2.688in\lower\graphtemp\hbox to 0pt{\hss More Data?\hss}}% - \special{pn 8}% - \special{ar 650 200 650 200 0 6.28319}% - \graphtemp=.5ex\advance\graphtemp by 0.200in - \rlap{\kern 0.613in\lower\graphtemp\hbox to 0pt{\hss Initialization\hss}}% - \special{pn 8}% - \special{ar 0 200 0 0 0 6.28319}% - \special{pn 8}% - \special{ar 4550 200 450 100 0 6.28319}% - \graphtemp=.5ex\advance\graphtemp by 0.200in - \rlap{\kern 4.600in\lower\graphtemp\hbox to 0pt{\hss Clean Up\hss}}% - \hbox{\vrule depth1.100in width0pt height 0pt}% - \kern 5.000in - }% -}% -\centerline{\box\graph} -@end tex -@end iftex -@ifnottex -@example - ______ -+----------------+ / More \ No +----------+ -| Initialization | -------> < Data > -------> | Clean Up | -+----------------+ ^ \ ? / +----------+ - | +--+-+ - | | Yes - | | - | V - | +---------+ - +-----+ Process | - +---------+ -@end example -@end ifnottex - -@table @asis -@item Initialization -These are the things you do before actually starting to process -data, such as checking arguments, initializing any data you need -to work with, and so on. -This step corresponds to @command{awk}'s @code{BEGIN} rule -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). - -If you were baking a cake, this might consist of laying out all the -mixing bowls and the baking pan, and making sure you have all the -ingredients that you need. - -@item Processing -This is where the actual work is done. Your program reads data, -one logical chunk at a time, and processes it as appropriate. - -In most programming languages, you have to manually manage the reading -of data, checking to see if there is more each time you read a chunk. -@command{awk}'s pattern-action paradigm -(@pxref{Getting Started, ,Getting Started with @command{awk}}) -handles the mechanics of this for you. - -In baking a cake, the processing corresponds to the actual labor: -breaking eggs, mixing the flour, water, and other ingredients, and then putting the cake -into the oven. - -@item Clean Up -Once you've processed all the data, you may have things you need to -do before exiting. -This step corresponds to @command{awk}'s @code{END} rule -(@pxref{BEGIN/END, ,The @code{BEGIN} and @code{END} Special Patterns}). - -After the cake comes out of the oven, you still have to wrap it in -plastic wrap to keep anyone from tasting it, as well as wash -the mixing bowls and other utensils. -@end table - -@cindex algorithm, definition of -An @dfn{algorithm} is a detailed set of instructions necessary to accomplish -a task, or process data. It is much the same as a recipe for baking -a cake. Programs implement algorithms. Often, it is up to you to design -the algorithm and implement it, simultaneously. - -@cindex record, definition of -@cindex fields, definition of -The ``logical chunks'' we talked about previously are called @dfn{records}, -similar to the records a company keeps on employees, a school keeps for -students, or a doctor keeps for patients. -Each record has many component parts, such as first and last names, -date of birth, address, and so on. The component parts are referred -to as the @dfn{fields} of the record. - -The act of reading data is termed @dfn{input}, and that of -generating results, not too surprisingly, is termed @dfn{output}. -They are often referred to together as ``Input/Output,'' -and even more often, as ``I/O'' for short. -(You will also see ``input'' and ``output'' used as verbs.) - -@cindex data-driven languages -@cindex language, data-driven -@command{awk} manages the reading of data for you, as well as the -breaking it up into records and fields. Your program's job is to -tell @command{awk} what to with the data. You do this by describing -@dfn{patterns} in the data to look for, and @dfn{actions} to execute -when those patterns are seen. This @dfn{data-driven} nature of -@command{awk} programs usually makes them both easier to write -and easier to read. - -@node Basic Data Typing, Floating Point Issues, Basic High Level, Basic Concepts -@appendixsec Data Values in a Computer - -@cindex variable, definition of -In a program, -you keep track of information and values in things called @dfn{variables}. -A variable is just a name for a given value, such as @code{first_name}, -@code{last_name}, @code{address}, and so on. -@command{awk} has several pre-defined variables, and it has -special names to refer to the current input record -and the fields of the record. -You may also group multiple -associated values under one name, as an array. - -@cindex values, numeric -@cindex values, string -@cindex scalar, definition of -Data, particularly in @command{awk}, consists of either numeric -values, such as 42 or 3.1415927, or string values. -String values are essentially anything that's not a number, such as a name. -Strings are sometimes referred to as @dfn{character data}, since they -store the individual characters that comprise them. -Individual variables, as well as numeric and string variables, are -referred to as @dfn{scalar} values. -Groups of values, such as arrays, are not scalars. - -@cindex integer, definition of -@cindex floating-point, definition of -Within computers, there are two kinds of numeric values: @dfn{integers}, -and @dfn{floating-point}. -In school, integer values were referred to as ``whole'' numbers---that is, -numbers without any fractional part, such as 1, 42, or @minus{}17. -The advantage to integer numbers is that they represent values exactly. -The disadvantage is that their range is limited. On most modern systems, -this range is @minus{}2,147,483,648 to 2,147,483,647. - -@cindex unsigned integers -@cindex integer, unsigned -Integer values come in two flavors: @dfn{signed} and @dfn{unsigned}. -Signed values may be negative or positive, with the range of values just -described. -Unsigned values are always positive. On most modern systems, -the range is from 0 to 4,294,967,295. - -@cindex double-precision floating-point, definition of -@cindex single-precision floating-point, definition of -Floating-point numbers represent what are called ``real'' numbers; i.e., -those that do have a fractional part, such as 3.1415927. -The advantage to floating-point numbers is that they -can represent a much larger range of values. -The disadvantage is that there are numbers that they cannot represent -exactly. -@command{awk} uses @dfn{double-precision} floating-point numbers, which -can hold more digits than @dfn{single-precision} -floating-point numbers. -Floating-point issues are discussed more fully in -@ref{Floating Point Issues, ,Floating-Point Number Caveats}. - -At the very lowest level, computers store values as groups of binary digits, -or @dfn{bits}. Modern computers group bits into groups of eight, called @dfn{bytes}. -Advanced applications sometimes have to manipulate bits directly, -and @command{gawk} provides functions for doing so. - -@cindex null string, definition of -@cindex empty string, definition of -While you are probably used to the idea of a number without a value (i.e., zero), -it takes a bit more getting used to the idea of zero-length character data. -Nevertheless, such a thing exists. -It is called the @dfn{null string}. -The null string is character data that has no value. -In other words, it is empty. It is written in @command{awk} programs -like this: @code{""}. - -Humans are used to working in decimal; i.e., base 10. In base 10, -numbers go from 0 to 9, and then ``roll over'' into the next -column. (Remember grade school? 42 is 4 times 10 plus 2.) - -There are other number bases though. Computers commonly use base 2 -or @dfn{binary}, base 8 or @dfn{octal}, and base 16 or @dfn{hexadecimal}. -In binary, each column represents two times the value in the column to -its right. Each column may contain either a 0 or a 1. -Thus, binary 1010 represents 1 times 8, plus 0 times 4, plus 1 times 2, -plus 0 times 1, or decimal 10. -Octal and hexadecimal are discussed more in -@ref{Non-decimal-numbers, ,Octal and Hexadecimal Numbers}. - -Programs are written in programming languages. -Hundreds, if not thousands, of programming languages exist. -One of the most popular is the C programming language. -The C language had a very strong influence on the design of -the @command{awk} language. - -@cindex Kernighan, Brian -@cindex Ritchie, Dennis -There have been several versions of C. The first is often referred to -as ``K&R'' C, after the initials of Brian Kernighan and Dennis Ritchie, -the authors of the first book on C. (Dennis Ritchie created the language, -and Brian Kernighan was one of the creators of @command{awk}.) - -In the mid-1980's, an effort began to produce an international standard -for C. This work culminated in 1989, with the production of the ANSI -standard for C. This standard became an ISO standard in 1990. -Where it makes sense, POSIX @command{awk} is compatible with 1990 ISO C. - -In 1999, a revised ISO C standard was approved and released. -Future versions of @command{gawk} will be as compatible as possible -with this standard. - -@node Floating Point Issues, , Basic Data Typing, Basic Concepts -@appendixsec Floating-Point Number Caveats - -As mentioned earlier, floating-point numbers represent what are called -``real'' numbers; i.e., those that have a fractional part. @command{awk} -uses double-precision floating-point numbers to represent all -numeric values. This @value{SECTION} describes some of the issues -involved in using floating-point numbers. - -There is a very nice paper on floating-point arithmetic by -David Goldberg, @cite{What Every -Computer Scientist Should Know About Floating-point Arithmetic}, -@cite{ACM Computing Surveys} @strong{23}, 1 (1991-03), -5-48.@footnote{@uref{http://www.validgh.com/goldberg/paper.ps}} -This is worth reading if you are interested in the details, -but it does require a background in Computer Science. - -Internally, @command{awk} keeps both the numeric value -(double-precision floating-point) and the string value for a variable. -Separately, @command{awk} keeps -track of what type the variable has -(@pxref{Typing and Comparison, ,Variable Typing and Comparison Expressions}), -which plays a role in how variables are used in comparisons. - -It is important to note that the string value for a number may not -reflect the full value (all the digits) that the numeric value -actually contains. -The following program (@file{values.awk}) illustrates this: - -@example -@{ - $1 = $2 + $3 - # see it for what it is - printf("$1 = %.12g\n", $1) - # use CONVFMT - a = "<" $1 ">" - print "a =", a -@group - # use OFMT - print "$1 =", $1 -@end group -@} -@end example - -@noindent -This program shows the full value of the sum of @code{$2} and @code{$3} -using @code{printf}, and then prints the string values obtained -from both automatic conversion (via @code{CONVFMT}) and -from printing (via @code{OFMT}). - -Here is what happens when the program is run: - -@example -$ echo 2 3.654321 1.2345678 | awk -f values.awk -@print{} $1 = 4.8888888 -@print{} a = <4.88889> -@print{} $1 = 4.88889 -@end example - -This makes it clear that the full numeric value is different from -what the default string representations show. - -@code{CONVFMT}'s default value is @code{"%.6g"}, which yields a value with -at least six significant digits. For some applications, you might want to -change it to specify more precision. -On most modern machines, most of the time, -17 digits is enough to capture a floating-point number's -value exactly.@footnote{Pathological cases can require up to -752 digits (!), but we doubt that you need to worry about this.} - -@cindex floating-point, precision issues -Unlike numbers in the abstract sense (such as what you studied in high school -or college math), numbers stored in computers are limited in certain ways. -They cannot represent an infinite number of digits, nor can they always -represent things exactly. -In particular, -floating-point numbers cannot -always represent values exactly. Here is an example: - -@example -$ awk '@{ printf("%010d\n", $1 * 100) @}' -515.79 -@print{} 0000051579 -515.80 -@print{} 0000051579 -515.81 -@print{} 0000051580 -515.82 -@print{} 0000051582 -@kbd{Ctrl-d} -@end example - -@noindent -This shows that some values can be represented exactly, -whereas others are only approximated. This is not a ``bug'' -in @command{awk}, but simply an artifact of how computers -represent numbers. - -@cindex negative zero -@cindex positive zero -@cindex zero, negative vs.@: positive -@cindex floating-point, positive and negative values for zero -Another peculiarity of floating-point numbers on modern systems -is that they often have more than one representation for the number zero! -In particular, it is possible to represent ``minus zero'' as well as -regular, or ``positive'' zero. - -This example shows that negative and positive zero are distinct values -when stored internally, but that they are in fact equal to each other, -as well as to ``regular'' zero: - -@smallexample -$ gawk 'BEGIN @{ mz = -0 ; pz = 0 -> printf "-0 = %g, +0 = %g, (-0 == +0) -> %d\n", mz, pz, mz == pz -> printf "mz == 0 -> %d, pz == 0 -> %d\n", mz == 0, pz == 0 -> @}' -@print{} -0 = -0, +0 = 0, (-0 == +0) -> 1 -@print{} mz == 0 -> 1, pz == 0 -> 1 -@end smallexample - -It helps to keep this in mind should you process numeric data -that contains negative zero values; the fact that the zero is negative -is noted and can affect comparisons. - -@node Glossary, Copying, Basic Concepts, Top -@unnumbered Glossary - -@table @asis -@item Action -A series of @command{awk} statements attached to a rule. If the rule's -pattern matches an input record, @command{awk} executes the -rule's action. Actions are always enclosed in curly braces. -(@xref{Action Overview, ,Actions}.) - -@cindex Spencer, Henry -@cindex @command{sed} utility -@cindex amazing @command{awk} assembler (@command{aaa}) -@item Amazing @command{awk} Assembler -Henry Spencer at the University of Toronto wrote a retargetable assembler -completely as @command{sed} and @command{awk} scripts. It is thousands -of lines long, including machine descriptions for several eight-bit -microcomputers. It is a good example of a program that would have been -better written in another language. -You can get it from @uref{ftp://ftp.freefriends.org/arnold/Awkstuff/aaa.tgz}. - -@cindex amazingly workable formatter (@command{awf}) -@cindex @command{awf} (amazingly workable formatter) program -@item Amazingly Workable Formatter (@command{awf}) -Henry Spencer at the University of Toronto wrote a formatter that accepts -a large subset of the @samp{nroff -ms} and @samp{nroff -man} formatting -commands, using @command{awk} and @command{sh}. -It is available over the Internet -from @uref{ftp://ftp.freefriends.org/arnold/Awkstuff/awf.tgz}. - -@item Anchor -The regexp metacharacters @samp{^} and @samp{$}, which force the match -to the beginning or end of the string, respectively. - -@cindex ANSI -@item ANSI -The American National Standards Institute. This organization produces -many standards, among them the standards for the C and C++ programming -languages. -These standards often become international standards as well. See also -``ISO.'' - -@item Array -A grouping of multiple values under the same name. -Most languages just provide sequential arrays. -@command{awk} provides associative arrays. - -@item Assertion -A statement in a program that a condition is true at this point in the program. -Useful for reasoning about how a program is supposed to behave. - -@item Assignment -An @command{awk} expression that changes the value of some @command{awk} -variable or data object. An object that you can assign to is called an -@dfn{lvalue}. The assigned values are called @dfn{rvalues}. -@xref{Assignment Ops, ,Assignment Expressions}. - -@item Associative Array -Arrays in which the indices may be numbers or strings, not just -sequential integers in a fixed range. - -@item @command{awk} Language -The language in which @command{awk} programs are written. - -@item @command{awk} Program -An @command{awk} program consists of a series of @dfn{patterns} and -@dfn{actions}, collectively known as @dfn{rules}. For each input record -given to the program, the program's rules are all processed in turn. -@command{awk} programs may also contain function definitions. - -@item @command{awk} Script -Another name for an @command{awk} program. - -@item Bash -The GNU version of the standard shell -@iftex -(the @b{B}ourne-@b{A}gain @b{SH}ell). -@end iftex -@ifnottex -(the Bourne-Again SHell). -@end ifnottex -See also ``Bourne Shell.'' - -@item BBS -See ``Bulletin Board System.'' - -@item Bit -Short for ``Binary Digit.'' -All values in computer memory ultimately reduce to binary digits: values -that are either zero or one. -Groups of bits may be interpreted differently---as integers, -floating-point numbers, character data, addresses of other -memory objects, or other data. -@command{awk} lets you work with floating-point numbers and strings. -@command{gawk} lets you manipulate bit values with the built-in -functions described in -@ref{Bitwise Functions, ,Using @command{gawk}'s Bit Manipulation Functions}. - -Computers are often defined by how many bits they use to represent integer -values. Typical systems are 32-bit systems, but 64-bit systems are -becoming increasingly popular, and 16-bit systems are waning in -popularity. - -@item Boolean Expression -Named after the English mathematician Boole. See also ``Logical Expression.'' - -@item Bourne Shell -The standard shell (@file{/bin/sh}) on Unix and Unix-like systems, -originally written by Steven R.@: Bourne. -Many shells (@command{bash}, @command{ksh}, @command{pdksh}, @command{zsh}) are -generally upwardly compatible with the Bourne shell. - -@item Built-in Function -The @command{awk} language provides built-in functions that perform various -numerical, I/O-related, and string computations. Examples are -@code{sqrt} (for the square root of a number) and @code{substr} (for a -substring of a string). -@command{gawk} provides functions for timestamp management, bit manipulation, -and runtime string translation. -(@xref{Built-in, ,Built-in Functions}.) - -@item Built-in Variable -@code{ARGC}, -@code{ARGV}, -@code{CONVFMT}, -@code{ENVIRON}, -@code{FILENAME}, -@code{FNR}, -@code{FS}, -@code{NF}, -@code{NR}, -@code{OFMT}, -@code{OFS}, -@code{ORS}, -@code{RLENGTH}, -@code{RSTART}, -@code{RS}, -and -@code{SUBSEP} -are the variables that have special meaning to @command{awk}. -In addition, -@code{ARGIND}, -@code{BINMODE}, -@code{ERRNO}, -@code{FIELDWIDTHS}, -@code{IGNORECASE}, -@code{LINT}, -@code{PROCINFO}, -@code{RT}, -and -@code{TEXTDOMAIN} -are the variables that have special meaning to @command{gawk}. -Changing some of them affects @command{awk}'s running environment. -(@xref{Built-in Variables}.) - -@item Braces -See ``Curly Braces.'' - -@item Bulletin Board System -A computer system allowing users to log in and read and/or leave messages -for other users of the system, much like leaving paper notes on a bulletin -board. - -@item C -The system programming language that most GNU software is written in. The -@command{awk} programming language has C-like syntax, and this @value{DOCUMENT} -points out similarities between @command{awk} and C when appropriate. - -In general, @command{gawk} attempts to be as similar to the 1990 version -of ISO C as makes sense. Future versions of @command{gawk} may adopt features -from the newer 1999 standard, as appropriate. - -@item C++ -A popular object-oriented programming language derived from C. - -@cindex ISO 8859-1 -@cindex ISO Latin-1 -@cindex character sets (machine character encodings) -@item Character Set -The set of numeric codes used by a computer system to represent the -characters (letters, numbers, punctuation, etc.) of a particular country -or place. The most common character set in use today is ASCII (American -Standard Code for Information Interchange). Many European -countries use an extension of ASCII known as ISO-8859-1 (ISO Latin-1). - -@cindex @command{chem} utility -@item CHEM -A preprocessor for @command{pic} that reads descriptions of molecules -and produces @command{pic} input for drawing them. -It was written in @command{awk} -by Brian Kernighan and Jon Bentley, and is available from -@uref{http://cm.bell-labs.com/netlib/typesetting/chem.gz}. - -@item Coprocess -A subordinate program with which two-way communications is possible. - -@cindex compiled programs -@item Compiler -A program that translates human-readable source code into -machine-executable object code. The object code is then executed -directly by the computer. -See also ``Interpreter.'' - -@item Compound Statement -A series of @command{awk} statements, enclosed in curly braces. Compound -statements may be nested. -(@xref{Statements, ,Control Statements in Actions}.) - -@item Concatenation -Concatenating two strings means sticking them together, one after another, -producing a new string. For example, the string @samp{foo} concatenated with -the string @samp{bar} gives the string @samp{foobar}. -(@xref{Concatenation, ,String Concatenation}.) - -@item Conditional Expression -An expression using the @samp{?:} ternary operator, such as -@samp{@var{expr1} ? @var{expr2} : @var{expr3}}. The expression -@var{expr1} is evaluated; if the result is true, the value of the whole -expression is the value of @var{expr2}; otherwise the value is -@var{expr3}. In either case, only one of @var{expr2} and @var{expr3} -is evaluated. (@xref{Conditional Exp, ,Conditional Expressions}.) - -@item Comparison Expression -A relation that is either true or false, such as @samp{(a < b)}. -Comparison expressions are used in @code{if}, @code{while}, @code{do}, -and @code{for} -statements, and in patterns to select which input records to process. -(@xref{Typing and Comparison, ,Variable Typing and Comparison Expressions}.) - -@item Curly Braces -The characters @samp{@{} and @samp{@}}. Curly braces are used in -@command{awk} for delimiting actions, compound statements, and function -bodies. - -@cindex dark corner -@item Dark Corner -An area in the language where specifications often were (or still -are) not clear, leading to unexpected or undesirable behavior. -Such areas are marked in this @value{DOCUMENT} with -@iftex -the picture of a flashlight in the margin -@end iftex -@ifnottex -``(d.c.)'' in the text -@end ifnottex -and are indexed under the heading ``dark corner.'' - -@item Data Driven -A description of @command{awk} programs, where you specify the data you -are interested in processing, and what to do when that data is seen. - -@item Data Objects -These are numbers and strings of characters. Numbers are converted into -strings and vice versa, as needed. -(@xref{Conversion, ,Conversion of Strings and Numbers}.) - -@item Deadlock -The situation in which two communicating processes are each waiting -for the other to perform an action. - -@item Double-Precision -An internal representation of numbers that can have fractional parts. -Double-precision numbers keep track of more digits than do single-precision -numbers, but operations on them are sometimes more expensive. This is the way -@command{awk} stores numeric values. It is the C type @code{double}. - -@item Dynamic Regular Expression -A dynamic regular expression is a regular expression written as an -ordinary expression. It could be a string constant, such as -@code{"foo"}, but it may also be an expression whose value can vary. -(@xref{Computed Regexps, , Using Dynamic Regexps}.) - -@item Environment -A collection of strings, of the form @var{name@code{=}val}, that each -program has available to it. Users generally place values into the -environment in order to provide information to various programs. Typical -examples are the environment variables @env{HOME} and @env{PATH}. - -@item Empty String -See ``Null String.'' - -@cindex epoch, definition of -@item Epoch -The date used as the ``beginning of time'' for timestamps. -Time values in Unix systems are represented as seconds since the epoch, -with library functions available for converting these values into -standard date and time formats. - -The epoch on Unix and POSIX systems is 1970-01-01 00:00:00 UTC. -See also ``GMT'' and ``UTC.'' - -@item Escape Sequences -A special sequence of characters used for describing non-printing -characters, such as @samp{\n} for newline or @samp{\033} for the ASCII -ESC (Escape) character. (@xref{Escape Sequences}.) - -@item FDL -See ``Free Documentation License.'' - -@item Field -When @command{awk} reads an input record, it splits the record into pieces -separated by whitespace (or by a separator regexp that you can -change by setting the built-in variable @code{FS}). Such pieces are -called fields. If the pieces are of fixed length, you can use the built-in -variable @code{FIELDWIDTHS} to describe their lengths. -(@xref{Field Separators, ,Specifying How Fields Are Separated}, -and -@ref{Constant Size, ,Reading Fixed-Width Data}.) - -@item Flag -A variable whose truth value indicates the existence or non-existence -of some condition. - -@item Floating-Point Number -Often referred to in mathematical terms as a ``rational'' or real number, -this is just a number that can have a fractional part. -See also ``Double-Precision'' and ``Single-Precision.'' - -@item Format -Format strings are used to control the appearance of output in the -@code{strftime} and @code{sprintf} functions, and are used in the -@code{printf} statement as well. Also, data conversions from numbers to strings -are controlled by the format string contained in the built-in variable -@code{CONVFMT}. (@xref{Control Letters, ,Format-Control Letters}.) - -@item Free Documentation License -This document describes the terms under which this @value{DOCUMENT} -is published and may be copied. (@xref{GNU Free Documentation License}.) - -@item Function -A specialized group of statements used to encapsulate general -or program-specific tasks. @command{awk} has a number of built-in -functions, and also allows you to define your own. -(@xref{Functions}.) - -@item FSF -See ``Free Software Foundation.'' - -@cindex FSF -@cindex Free Software Foundation -@cindex Stallman, Richard -@item Free Software Foundation -A non-profit organization dedicated -to the production and distribution of freely distributable software. -It was founded by Richard M.@: Stallman, the author of the original -Emacs editor. GNU Emacs is the most widely used version of Emacs today. - -@item @command{gawk} -The GNU implementation of @command{awk}. - -@cindex GPL -@cindex General Public License -@cindex GNU General Public License -@item General Public License -This document describes the terms under which @command{gawk} and its source -code may be distributed. (@xref{Copying, ,GNU General Public License}.) - -@item GMT -``Greenwich Mean Time.'' -This is the old term for UTC. -It is the time of day used as the epoch for Unix and POSIX systems. -See also ``Epoch'' and ``UTC.'' - -@cindex FSF -@cindex Free Software Foundation -@cindex GNU Project -@item GNU -``GNU's not Unix''. An on-going project of the Free Software Foundation -to create a complete, freely distributable, POSIX-compliant computing -environment. - -@item GNU/Linux -A variant of the GNU system using the Linux kernel, instead of the -Free Software Foundation's Hurd kernel. -Linux is a stable, efficient, full-featured clone of Unix that has -been ported to a variety of architectures. -It is most popular on PC-class systems, but runs well on a variety of -other systems too. -The Linux kernel source code is available under the terms of the GNU General -Public License, which is perhaps its most important aspect. - -@item GPL -See ``General Public License.'' - -@item Hexadecimal -Base 16 notation, where the digits are @code{0}--@code{9} and -@code{A}--@code{F}, with @samp{A} -representing 10, @samp{B} representing 11, and so on, up to @samp{F} for 15. -Hexadecimal numbers are written in C using a leading @samp{0x}, -to indicate their base. Thus, @code{0x12} is 18 (1 times 16 plus 2). - -@item I/O -Abbreviation for ``Input/Output,'' the act of moving data into and/or -out of a running program. - -@item Input Record -A single chunk of data that is read in by @command{awk}. Usually, an @command{awk} input -record consists of one line of text. -(@xref{Records, ,How Input Is Split into Records}.) - -@item Integer -A whole number, i.e., a number that does not have a fractional part. - -@item Internationalization -The process of writing or modifying a program so -that it can use multiple languages without requiring -further source code changes. - -@cindex interpreted programs -@item Interpreter -A program that reads human-readable source code directly, and uses -the instructions in it to process data and produce results. -@command{awk} is typically (but not always) implemented as an interpreter. -See also ``Compiler.'' - -@item Interval Expression -A component of a regular expression that lets you specify repeated matches of -some part of the regexp. Interval expressions were not traditionally available -in @command{awk} programs. - -@cindex ISO -@item ISO -The International Standards Organization. -This organization produces international standards for many things, including -programming languages, such as C and C++. -In the computer arena, important standards like those for C, C++, and POSIX -become both American national and ISO international standards simultaneously. -This @value{DOCUMENT} refers to Standard C as ``ISO C'' throughout. - -@item Keyword -In the @command{awk} language, a keyword is a word that has special -meaning. Keywords are reserved and may not be used as variable names. - -@command{gawk}'s keywords are: -@code{BEGIN}, -@code{END}, -@code{if}, -@code{else}, -@code{while}, -@code{do@dots{}while}, -@code{for}, -@code{for@dots{}in}, -@code{break}, -@code{continue}, -@code{delete}, -@code{next}, -@code{nextfile}, -@code{function}, -@code{func}, -and -@code{exit}. - -@cindex LGPL -@cindex Lesser General Public License -@cindex GNU Lesser General Public License -@item Lesser General Public License -This document describes the terms under which binary library archives -or shared objects, -and their source code may be distributed. - -@item Linux -See ``GNU/Linux.'' - -@item LGPL -See ``Lesser General Public License.'' - -@item Localization -The process of providing the data necessary for an -internationalized program to work in a particular language. - -@item Logical Expression -An expression using the operators for logic, AND, OR, and NOT, written -@samp{&&}, @samp{||}, and @samp{!} in @command{awk}. Often called Boolean -expressions, after the mathematician who pioneered this kind of -mathematical logic. - -@item Lvalue -An expression that can appear on the left side of an assignment -operator. In most languages, lvalues can be variables or array -elements. In @command{awk}, a field designator can also be used as an -lvalue. - -@item Matching -The act of testing a string against a regular expression. If the -regexp describes the contents of the string, it is said to @dfn{match} it. - -@item Metacharacters -Characters used within a regexp that do not stand for themselves. -Instead, they denote regular expression operations, such as repetition, -grouping, or alternation. - -@item Null String -A string with no characters in it. It is represented explicitly in -@command{awk} programs by placing two double quote characters next to -each other (@code{""}). It can appear in input data by having two successive -occurrences of the field separator appear next to each other. - -@item Number -A numeric-valued data object. Modern @command{awk} implementations use -double-precision floating-point to represent numbers. -Very old @command{awk} implementations use single-precision floating-point. - -@item Octal -Base-eight notation, where the digits are @code{0}--@code{7}. -Octal numbers are written in C using a leading @samp{0}, -to indicate their base. Thus, @code{013} is 11 (one times 8 plus 3). - -@cindex P1003.2 POSIX standard -@item P1003.2 -See ``POSIX.'' - -@item Pattern -Patterns tell @command{awk} which input records are interesting to which -rules. - -A pattern is an arbitrary conditional expression against which input is -tested. If the condition is satisfied, the pattern is said to @dfn{match} -the input record. A typical pattern might compare the input record against -a regular expression. (@xref{Pattern Overview, ,Pattern Elements}.) - -@item POSIX -The name for a series of standards -@c being developed by the IEEE -that specify a Portable Operating System interface. The ``IX'' denotes -the Unix heritage of these standards. The main standard of interest for -@command{awk} users is -@cite{IEEE Standard for Information Technology, Standard 1003.2-1992, -Portable Operating System Interface (POSIX) Part 2: Shell and Utilities}. -Informally, this standard is often referred to as simply ``P1003.2.'' - -@item Precedence -The order in which operations are performed when operators are used -without explicit parentheses. - -@item Private -Variables and/or functions that are meant for use exclusively by library -functions and not for the main @command{awk} program. Special care must be -taken when naming such variables and functions. -(@xref{Library Names, , Naming Library Function Global Variables}.) - -@item Range (of input lines) -A sequence of consecutive lines from the input file(s). A pattern -can specify ranges of input lines for @command{awk} to process or it can -specify single lines. (@xref{Pattern Overview, ,Pattern Elements}.) - -@item Recursion -When a function calls itself, either directly or indirectly. -If this isn't clear, refer to the entry for ``recursion.'' - -@item Redirection -Redirection means performing input from something other than the standard input -stream, or performing output to something other than the standard output stream. - -You can redirect the output of the @code{print} and @code{printf} statements -to a file or a system command, using the @samp{>}, @samp{>>}, @samp{|}, and @samp{|&} -operators. You can redirect input to the @code{getline} statement using -the @samp{<}, @samp{|}, and @samp{|&} operators. -(@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}, -and @ref{Getline, ,Explicit Input with @code{getline}}.) - -@item Regexp -Short for @dfn{regular expression}. A regexp is a pattern that denotes a -set of strings, possibly an infinite set. For example, the regexp -@samp{R.*xp} matches any string starting with the letter @samp{R} -and ending with the letters @samp{xp}. In @command{awk}, regexps are -used in patterns and in conditional expressions. Regexps may contain -escape sequences. (@xref{Regexp, ,Regular Expressions}.) - -@item Regular Expression -See ``regexp.'' - -@item Regular Expression Constant -A regular expression constant is a regular expression written within -slashes, such as @code{/foo/}. This regular expression is chosen -when you write the @command{awk} program and cannot be changed during -its execution. (@xref{Regexp Usage, ,How to Use Regular Expressions}.) - -@item Rule -A segment of an @command{awk} program that specifies how to process single -input records. A rule consists of a @dfn{pattern} and an @dfn{action}. -@command{awk} reads an input record; then, for each rule, if the input record -satisfies the rule's pattern, @command{awk} executes the rule's action. -Otherwise, the rule does nothing for that input record. - -@item Rvalue -A value that can appear on the right side of an assignment operator. -In @command{awk}, essentially every expression has a value. These values -are rvalues. - -@item Scalar -A single value, be it a number or a string. -Regular variables are scalars; arrays and functions are not. - -@item Search Path -In @command{gawk}, a list of directories to search for @command{awk} program source files. -In the shell, a list of directories to search for executable programs. - -@item Seed -The initial value, or starting point, for a sequence of random numbers. - -@item @command{sed} -See ``Stream Editor.'' - -@item Shell -The command interpreter for Unix and POSIX-compliant systems. -The shell works both interactively, and as a programming language -for batch files, or shell scripts. - -@item Short-Circuit -The nature of the @command{awk} logical operators @samp{&&} and @samp{||}. -If the value of the entire expression is determinable from evaluating just -the lefthand side of these operators, the righthand side is not -evaluated. -(@xref{Boolean Ops, ,Boolean Expressions}.) - -@item Side Effect -A side effect occurs when an expression has an effect aside from merely -producing a value. Assignment expressions, increment and decrement -expressions, and function calls have side effects. -(@xref{Assignment Ops, ,Assignment Expressions}.) - -@item Single-Precision -An internal representation of numbers that can have fractional parts. -Single-precision numbers keep track of fewer digits than do double-precision -numbers, but operations on them are sometimes less expensive in terms of CPU time. -This is the type used by some very old versions of @command{awk} to store -numeric values. It is the C type @code{float}. - -@item Space -The character generated by hitting the space bar on the keyboard. - -@item Special File -A @value{FN} interpreted internally by @command{gawk}, instead of being handed -directly to the underlying operating system---for example, @file{/dev/stderr}. -(@xref{Special Files, ,Special @value{FFN}s in @command{gawk}}.) - -@item Stream Editor -A program that reads records from an input stream and processes them one -or more at a time. This is in contrast with batch programs, which may -expect to read their input files in entirety before starting to do -anything, as well as with interactive programs which require input from the -user. - -@item String -A datum consisting of a sequence of characters, such as @samp{I am a -string}. Constant strings are written with double quotes in the -@command{awk} language and may contain escape sequences. -(@xref{Escape Sequences}.) - -@item Tab -The character generated by hitting the @kbd{TAB} key on the keyboard. -It usually expands to up to eight spaces upon output. - -@item Text Domain -A unique name that identifies an application. -Used for grouping messages that are translated at runtime -into the local language. - -@item Timestamp -A value in the ``seconds since the epoch'' format used by Unix -and POSIX systems. Used for the @command{gawk} functions -@code{mktime}, @code{strftime}, and @code{systime}. -See also ``Epoch'' and ``UTC.'' - -@cindex Linux -@cindex GNU/Linux -@cindex Unix -@cindex BSD-based operating systems -@cindex NetBSD -@cindex FreeBSD -@cindex OpenBSD -@item Unix -A computer operating system originally developed in the early 1970's at -AT&T Bell Laboratories. It initially became popular in universities around -the world and later moved into commercial environments as a software -development system and network server system. There are many commercial -versions of Unix, as well as several work-alike systems whose source code -is freely available (such as GNU/Linux, NetBSD, FreeBSD, and OpenBSD). - -@item UTC -The accepted abbreviation for ``Universal Coordinated Time.'' -This is standard time in Greenwich, England, which is used as a -reference time for day and date calculations. -See also ``Epoch'' and ``GMT.'' - -@item Whitespace -A sequence of space, tab, or newline characters occurring inside an input -record or a string. -@end table - -@node Copying, GNU Free Documentation License, Glossary, Top -@unnumbered GNU General Public License -@center Version 2, June 1991 - -@display -Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc. -59 Temple Place, Suite 330, Boston, MA 02111, USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display - -@c fakenode --- for prepinfo -@unnumberedsec Preamble - - The licenses for most software are designed to take away your -freedom to share and change it. By contrast, the GNU General Public -License is intended to guarantee your freedom to share and change free -software---to make sure the software is free for all its users. This -General Public License applies to most of the Free Software -Foundation's software and to any other program whose authors commit to -using it. (Some other Free Software Foundation software is covered by -the GNU Library General Public License instead.) You can apply it to -your programs, too. - - When we speak of free software, we are referring to freedom, not -price. Our General Public Licenses are designed to make sure that you -have the freedom to distribute copies of free software (and charge for -this service if you wish), that you receive source code or can get it -if you want it, that you can change the software or use pieces of it -in new free programs; and that you know you can do these things. - - To protect your rights, we need to make restrictions that forbid -anyone to deny you these rights or to ask you to surrender the rights. -These restrictions translate to certain responsibilities for you if you -distribute copies of the software, or if you modify it. - - For example, if you distribute copies of such a program, whether -gratis or for a fee, you must give the recipients all the rights that -you have. You must make sure that they, too, receive or can get the -source code. And you must show them these terms so they know their -rights. - - We protect your rights with two steps: (1) copyright the software, and -(2) offer you this license which gives you legal permission to copy, -distribute and/or modify the software. - - Also, for each author's protection and ours, we want to make certain -that everyone understands that there is no warranty for this free -software. If the software is modified by someone else and passed on, we -want its recipients to know that what they have is not the original, so -that any problems introduced by others will not reflect on the original -authors' reputations. - - Finally, any free program is threatened constantly by software -patents. We wish to avoid the danger that redistributors of a free -program will individually obtain patent licenses, in effect making the -program proprietary. To prevent this, we have made it clear that any -patent must be licensed for everyone's free use or not licensed at all. - - The precise terms and conditions for copying, distribution and -modification follow. - -@ifnotinfo -@c fakenode --- for prepinfo -@unnumberedsec Terms and Conditions for Copying, Distribution and Modification -@end ifnotinfo -@ifinfo -@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -@end ifinfo - -@enumerate 0 -@item -This License applies to any program or other work which contains -a notice placed by the copyright holder saying it may be distributed -under the terms of this General Public License. The ``Program'', below, -refers to any such program or work, and a ``work based on the Program'' -means either the Program or any derivative work under copyright law: -that is to say, a work containing the Program or a portion of it, -either verbatim or with modifications and/or translated into another -language. (Hereinafter, translation is included without limitation in -the term ``modification''.) Each licensee is addressed as ``you''. - -Activities other than copying, distribution and modification are not -covered by this License; they are outside its scope. The act of -running the Program is not restricted, and the output from the Program -is covered only if its contents constitute a work based on the -Program (independent of having been made by running the Program). -Whether that is true depends on what the Program does. - -@item -You may copy and distribute verbatim copies of the Program's -source code as you receive it, in any medium, provided that you -conspicuously and appropriately publish on each copy an appropriate -copyright notice and disclaimer of warranty; keep intact all the -notices that refer to this License and to the absence of any warranty; -and give any other recipients of the Program a copy of this License -along with the Program. - -You may charge a fee for the physical act of transferring a copy, and -you may at your option offer warranty protection in exchange for a fee. - -@item -You may modify your copy or copies of the Program or any portion -of it, thus forming a work based on the Program, and copy and -distribute such modifications or work under the terms of Section 1 -above, provided that you also meet all of these conditions: - -@enumerate a -@item -You must cause the modified files to carry prominent notices -stating that you changed the files and the date of any change. - -@item -You must cause any work that you distribute or publish, that in -whole or in part contains or is derived from the Program or any -part thereof, to be licensed as a whole at no charge to all third -parties under the terms of this License. - -@item -If the modified program normally reads commands interactively -when run, you must cause it, when started running for such -interactive use in the most ordinary way, to print or display an -announcement including an appropriate copyright notice and a -notice that there is no warranty (or else, saying that you provide -a warranty) and that users may redistribute the program under -these conditions, and telling the user how to view a copy of this -License. (Exception: if the Program itself is interactive but -does not normally print such an announcement, your work based on -the Program is not required to print an announcement.) -@end enumerate - -These requirements apply to the modified work as a whole. If -identifiable sections of that work are not derived from the Program, -and can be reasonably considered independent and separate works in -themselves, then this License, and its terms, do not apply to those -sections when you distribute them as separate works. But when you -distribute the same sections as part of a whole which is a work based -on the Program, the distribution of the whole must be on the terms of -this License, whose permissions for other licensees extend to the -entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest -your rights to work written entirely by you; rather, the intent is to -exercise the right to control the distribution of derivative or -collective works based on the Program. - -In addition, mere aggregation of another work not based on the Program -with the Program (or with a work based on the Program) on a volume of -a storage or distribution medium does not bring the other work under -the scope of this License. - -@item -You may copy and distribute the Program (or a work based on it, -under Section 2) in object code or executable form under the terms of -Sections 1 and 2 above provided that you also do one of the following: - -@enumerate a -@item -Accompany it with the complete corresponding machine-readable -source code, which must be distributed under the terms of Sections -1 and 2 above on a medium customarily used for software interchange; or, - -@item -Accompany it with a written offer, valid for at least three -years, to give any third party, for a charge no more than your -cost of physically performing source distribution, a complete -machine-readable copy of the corresponding source code, to be -distributed under the terms of Sections 1 and 2 above on a medium -customarily used for software interchange; or, - -@item -Accompany it with the information you received as to the offer -to distribute corresponding source code. (This alternative is -allowed only for noncommercial distribution and only if you -received the program in object code or executable form with such -an offer, in accord with Subsection b above.) -@end enumerate - -The source code for a work means the preferred form of the work for -making modifications to it. For an executable work, complete source -code means all the source code for all modules it contains, plus any -associated interface definition files, plus the scripts used to -control compilation and installation of the executable. However, as a -special exception, the source code distributed need not include -anything that is normally distributed (in either source or binary -form) with the major components (compiler, kernel, and so on) of the -operating system on which the executable runs, unless that component -itself accompanies the executable. - -If distribution of executable or object code is made by offering -access to copy from a designated place, then offering equivalent -access to copy the source code from the same place counts as -distribution of the source code, even though third parties are not -compelled to copy the source along with the object code. - -@item -You may not copy, modify, sublicense, or distribute the Program -except as expressly provided under this License. Any attempt -otherwise to copy, modify, sublicense or distribute the Program is -void, and will automatically terminate your rights under this License. -However, parties who have received copies, or rights, from you under -this License will not have their licenses terminated so long as such -parties remain in full compliance. - -@item -You are not required to accept this License, since you have not -signed it. However, nothing else grants you permission to modify or -distribute the Program or its derivative works. These actions are -prohibited by law if you do not accept this License. Therefore, by -modifying or distributing the Program (or any work based on the -Program), you indicate your acceptance of this License to do so, and -all its terms and conditions for copying, distributing or modifying -the Program or works based on it. - -@item -Each time you redistribute the Program (or any work based on the -Program), the recipient automatically receives a license from the -original licensor to copy, distribute or modify the Program subject to -these terms and conditions. You may not impose any further -restrictions on the recipients' exercise of the rights granted herein. -You are not responsible for enforcing compliance by third parties to -this License. - -@item -If, as a consequence of a court judgment or allegation of patent -infringement or for any other reason (not limited to patent issues), -conditions are imposed on you (whether by court order, agreement or -otherwise) that contradict the conditions of this License, they do not -excuse you from the conditions of this License. If you cannot -distribute so as to satisfy simultaneously your obligations under this -License and any other pertinent obligations, then as a consequence you -may not distribute the Program at all. For example, if a patent -license would not permit royalty-free redistribution of the Program by -all those who receive copies directly or indirectly through you, then -the only way you could satisfy both it and this License would be to -refrain entirely from distribution of the Program. - -If any portion of this section is held invalid or unenforceable under -any particular circumstance, the balance of the section is intended to -apply and the section as a whole is intended to apply in other -circumstances. - -It is not the purpose of this section to induce you to infringe any -patents or other property right claims or to contest validity of any -such claims; this section has the sole purpose of protecting the -integrity of the free software distribution system, which is -implemented by public license practices. Many people have made -generous contributions to the wide range of software distributed -through that system in reliance on consistent application of that -system; it is up to the author/donor to decide if he or she is willing -to distribute software through any other system and a licensee cannot -impose that choice. - -This section is intended to make thoroughly clear what is believed to -be a consequence of the rest of this License. - -@item -If the distribution and/or use of the Program is restricted in -certain countries either by patents or by copyrighted interfaces, the -original copyright holder who places the Program under this License -may add an explicit geographical distribution limitation excluding -those countries, so that distribution is permitted only in or among -countries not thus excluded. In such case, this License incorporates -the limitation as if written in the body of this License. - -@item -The Free Software Foundation may publish revised and/or new versions -of the General Public License from time to time. Such new versions will -be similar in spirit to the present version, but may differ in detail to -address new problems or concerns. - -Each version is given a distinguishing version number. If the Program -specifies a version number of this License which applies to it and ``any -later version'', you have the option of following the terms and conditions -either of that version or of any later version published by the Free -Software Foundation. If the Program does not specify a version number of -this License, you may choose any version ever published by the Free Software -Foundation. - -@item -If you wish to incorporate parts of the Program into other free -programs whose distribution conditions are different, write to the author -to ask for permission. For software which is copyrighted by the Free -Software Foundation, write to the Free Software Foundation; we sometimes -make exceptions for this. Our decision will be guided by the two goals -of preserving the free status of all derivatives of our free software and -of promoting the sharing and reuse of software generally. - -@ifnotinfo -@c fakenode --- for prepinfo -@heading NO WARRANTY -@end ifnotinfo -@ifinfo -@center NO WARRANTY -@end ifinfo - -@item -BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY -FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW@. EXCEPT WHEN -OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES -PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED -OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE@. THE ENTIRE RISK AS -TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU@. SHOULD THE -PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, -REPAIR OR CORRECTION. - -@item -IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING -WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR -REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, -INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING -OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED -TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY -YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER -PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE -POSSIBILITY OF SUCH DAMAGES. -@end enumerate - -@ifnotinfo -@c fakenode --- for prepinfo -@heading END OF TERMS AND CONDITIONS -@end ifnotinfo -@ifinfo -@center END OF TERMS AND CONDITIONS -@end ifinfo - -@page -@c fakenode --- for prepinfo -@unnumberedsec How to Apply These Terms to Your New Programs - - If you develop a new program, and you want it to be of the greatest -possible use to the public, the best way to achieve this is to make it -free software which everyone can redistribute and change under these terms. - - To do so, attach the following notices to the program. It is safest -to attach them to the start of each source file to most effectively -convey the exclusion of warranty; and each file should have at least -the ``copyright'' line and a pointer to where the full notice is found. - -@smallexample -@var{one line to give the program's name and an idea of what it does.} -Copyright (C) @var{year} @var{name of author} - -This program is free software; you can redistribute it and/or -modify it under the terms of the GNU General Public License -as published by the Free Software Foundation; either version 2 -of the License, or (at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE@. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. -@end smallexample - -Also add information on how to contact you by electronic and paper mail. - -If the program is interactive, make it output a short notice like this -when it starts in an interactive mode: - -@smallexample -Gnomovision version 69, Copyright (C) @var{year} @var{name of author} -Gnomovision comes with ABSOLUTELY NO WARRANTY; for details -type `show w'. This is free software, and you are welcome -to redistribute it under certain conditions; type `show c' -for details. -@end smallexample - -The hypothetical commands @samp{show w} and @samp{show c} should show -the appropriate parts of the General Public License. Of course, the -commands you use may be called something other than @samp{show w} and -@samp{show c}; they could even be mouse-clicks or menu items---whatever -suits your program. - -You should also get your employer (if you work as a programmer) or your -school, if any, to sign a ``copyright disclaimer'' for the program, if -necessary. Here is a sample; alter the names: - -@smallexample -@group -Yoyodyne, Inc., hereby disclaims all copyright -interest in the program `Gnomovision' -(which makes passes at compilers) written -by James Hacker. - -@var{signature of Ty Coon}, 1 April 1989 -Ty Coon, President of Vice -@end group -@end smallexample - -This General Public License does not permit incorporating your program into -proprietary programs. If your program is a subroutine library, you may -consider it more useful to permit linking proprietary applications with the -library. If this is what you want to do, use the GNU Lesser General -Public License instead of this License. - -@node GNU Free Documentation License, Index, Copying, Top -@unnumbered GNU Free Documentation License -@center Version 1.1, March 2000 -@cindex FDL -@cindex Free Documentation License -@cindex GNU Free Documentation License - -@display -Copyright (C) 2000 Free Software Foundation, Inc. -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display -@sp 1 -@enumerate 0 -@item -PREAMBLE - -The purpose of this License is to make a manual, textbook, or other -written document ``free'' in the sense of freedom: to assure everyone -the effective freedom to copy and redistribute it, with or without -modifying it, either commercially or noncommercially. Secondarily, -this License preserves for the author and publisher a way to get -credit for their work, while not being considered responsible for -modifications made by others. - -This License is a kind of ``copyleft'', which means that derivative -works of the document must themselves be free in the same sense. It -complements the GNU General Public License, which is a copyleft -license designed for free software. - -We have designed this License in order to use it for manuals for free -software, because free software needs free documentation: a free -program should come with manuals providing the same freedoms that the -software does. But this License is not limited to software manuals; -it can be used for any textual work, regardless of subject matter or -whether it is published as a printed book. We recommend this License -principally for works whose purpose is instruction or reference. - -@sp 1 -@item -APPLICABILITY AND DEFINITIONS - -This License applies to any manual or other work that contains a -notice placed by the copyright holder saying it can be distributed -under the terms of this License. The ``Document'', below, refers to any -such manual or work. Any member of the public is a licensee, and is -addressed as ``you''. - -A ``Modified Version'' of the Document means any work containing the -Document or a portion of it, either copied verbatim, or with -modifications and/or translated into another language. - -A ``Secondary Section'' is a named appendix or a front-matter section of -the Document that deals exclusively with the relationship of the -publishers or authors of the Document to the Document's overall subject -(or to related matters) and contains nothing that could fall directly -within that overall subject. (For example, if the Document is in part a -textbook of mathematics, a Secondary Section may not explain any -mathematics.) The relationship could be a matter of historical -connection with the subject or with related matters, or of legal, -commercial, philosophical, ethical or political position regarding -them. - -The ``Invariant Sections'' are certain Secondary Sections whose titles -are designated, as being those of Invariant Sections, in the notice -that says that the Document is released under this License. - -The ``Cover Texts'' are certain short passages of text that are listed, -as Front-Cover Texts or Back-Cover Texts, in the notice that says that -the Document is released under this License. - -A ``Transparent'' copy of the Document means a machine-readable copy, -represented in a format whose specification is available to the -general public, whose contents can be viewed and edited directly and -straightforwardly with generic text editors or (for images composed of -pixels) generic paint programs or (for drawings) some widely available -drawing editor, and that is suitable for input to text formatters or -for automatic translation to a variety of formats suitable for input -to text formatters. A copy made in an otherwise Transparent file -format whose markup has been designed to thwart or discourage -subsequent modification by readers is not Transparent. A copy that is -not ``Transparent'' is called ``Opaque''. - -Examples of suitable formats for Transparent copies include plain -ASCII without markup, Texinfo input format, LaTeX input format, SGML -or XML using a publicly available DTD, and standard-conforming simple -HTML designed for human modification. Opaque formats include -PostScript, PDF, proprietary formats that can be read and edited only -by proprietary word processors, SGML or XML for which the DTD and/or -processing tools are not generally available, and the -machine-generated HTML produced by some word processors for output -purposes only. - -The ``Title Page'' means, for a printed book, the title page itself, -plus such following pages as are needed to hold, legibly, the material -this License requires to appear in the title page. For works in -formats which do not have any title page as such, ``Title Page'' means -the text near the most prominent appearance of the work's title, -preceding the beginning of the body of the text. -@sp 1 -@item -VERBATIM COPYING - -You may copy and distribute the Document in any medium, either -commercially or noncommercially, provided that this License, the -copyright notices, and the license notice saying this License applies -to the Document are reproduced in all copies, and that you add no other -conditions whatsoever to those of this License. You may not use -technical measures to obstruct or control the reading or further -copying of the copies you make or distribute. However, you may accept -compensation in exchange for copies. If you distribute a large enough -number of copies you must also follow the conditions in section 3. - -You may also lend copies, under the same conditions stated above, and -you may publicly display copies. -@sp 1 -@item -COPYING IN QUANTITY - -If you publish printed copies of the Document numbering more than 100, -and the Document's license notice requires Cover Texts, you must enclose -the copies in covers that carry, clearly and legibly, all these Cover -Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on -the back cover. Both covers must also clearly and legibly identify -you as the publisher of these copies. The front cover must present -the full title with all words of the title equally prominent and -visible. You may add other material on the covers in addition. -Copying with changes limited to the covers, as long as they preserve -the title of the Document and satisfy these conditions, can be treated -as verbatim copying in other respects. - -If the required texts for either cover are too voluminous to fit -legibly, you should put the first ones listed (as many as fit -reasonably) on the actual cover, and continue the rest onto adjacent -pages. - -If you publish or distribute Opaque copies of the Document numbering -more than 100, you must either include a machine-readable Transparent -copy along with each Opaque copy, or state in or with each Opaque copy -a publicly-accessible computer-network location containing a complete -Transparent copy of the Document, free of added material, which the -general network-using public has access to download anonymously at no -charge using public-standard network protocols. If you use the latter -option, you must take reasonably prudent steps, when you begin -distribution of Opaque copies in quantity, to ensure that this -Transparent copy will remain thus accessible at the stated location -until at least one year after the last time you distribute an Opaque -copy (directly or through your agents or retailers) of that edition to -the public. - -It is requested, but not required, that you contact the authors of the -Document well before redistributing any large number of copies, to give -them a chance to provide you with an updated version of the Document. -@sp 1 -@item -MODIFICATIONS - -You may copy and distribute a Modified Version of the Document under -the conditions of sections 2 and 3 above, provided that you release -the Modified Version under precisely this License, with the Modified -Version filling the role of the Document, thus licensing distribution -and modification of the Modified Version to whoever possesses a copy -of it. In addition, you must do these things in the Modified Version: - -@enumerate A -@item -Use in the Title Page (and on the covers, if any) a title distinct -from that of the Document, and from those of previous versions -(which should, if there were any, be listed in the History section -of the Document). You may use the same title as a previous version -if the original publisher of that version gives permission. - -@item -List on the Title Page, as authors, one or more persons or entities -responsible for authorship of the modifications in the Modified -Version, together with at least five of the principal authors of the -Document (all of its principal authors, if it has less than five). - -@item -State on the Title page the name of the publisher of the -Modified Version, as the publisher. - -@item -Preserve all the copyright notices of the Document. - -@item -Add an appropriate copyright notice for your modifications -adjacent to the other copyright notices. - -@item -Include, immediately after the copyright notices, a license notice -giving the public permission to use the Modified Version under the -terms of this License, in the form shown in the Addendum below. - -@item -Preserve in that license notice the full lists of Invariant Sections -and required Cover Texts given in the Document's license notice. - -@item -Include an unaltered copy of this License. - -@item -Preserve the section entitled ``History'', and its title, and add to -it an item stating at least the title, year, new authors, and -publisher of the Modified Version as given on the Title Page. If -there is no section entitled ``History'' in the Document, create one -stating the title, year, authors, and publisher of the Document as -given on its Title Page, then add an item describing the Modified -Version as stated in the previous sentence. - -@item -Preserve the network location, if any, given in the Document for -public access to a Transparent copy of the Document, and likewise -the network locations given in the Document for previous versions -it was based on. These may be placed in the ``History'' section. -You may omit a network location for a work that was published at -least four years before the Document itself, or if the original -publisher of the version it refers to gives permission. - -@item -In any section entitled ``Acknowledgements'' or ``Dedications'', -preserve the section's title, and preserve in the section all the -substance and tone of each of the contributor acknowledgements -and/or dedications given therein. - -@item -Preserve all the Invariant Sections of the Document, -unaltered in their text and in their titles. Section numbers -or the equivalent are not considered part of the section titles. - -@item -Delete any section entitled ``Endorsements''. Such a section -may not be included in the Modified Version. - -@item -Do not retitle any existing section as ``Endorsements'' -or to conflict in title with any Invariant Section. -@end enumerate - -If the Modified Version includes new front-matter sections or -appendices that qualify as Secondary Sections and contain no material -copied from the Document, you may at your option designate some or all -of these sections as invariant. To do this, add their titles to the -list of Invariant Sections in the Modified Version's license notice. -These titles must be distinct from any other section titles. - -You may add a section entitled ``Endorsements'', provided it contains -nothing but endorsements of your Modified Version by various -parties--for example, statements of peer review or that the text has -been approved by an organization as the authoritative definition of a -standard. - -You may add a passage of up to five words as a Front-Cover Text, and a -passage of up to 25 words as a Back-Cover Text, to the end of the list -of Cover Texts in the Modified Version. Only one passage of -Front-Cover Text and one of Back-Cover Text may be added by (or -through arrangements made by) any one entity. If the Document already -includes a cover text for the same cover, previously added by you or -by arrangement made by the same entity you are acting on behalf of, -you may not add another; but you may replace the old one, on explicit -permission from the previous publisher that added the old one. - -The author(s) and publisher(s) of the Document do not by this License -give permission to use their names for publicity for or to assert or -imply endorsement of any Modified Version. -@sp 1 -@item -COMBINING DOCUMENTS - -You may combine the Document with other documents released under this -License, under the terms defined in section 4 above for modified -versions, provided that you include in the combination all of the -Invariant Sections of all of the original documents, unmodified, and -list them all as Invariant Sections of your combined work in its -license notice. - -The combined work need only contain one copy of this License, and -multiple identical Invariant Sections may be replaced with a single -copy. If there are multiple Invariant Sections with the same name but -different contents, make the title of each such section unique by -adding at the end of it, in parentheses, the name of the original -author or publisher of that section if known, or else a unique number. -Make the same adjustment to the section titles in the list of -Invariant Sections in the license notice of the combined work. - -In the combination, you must combine any sections entitled ``History'' -in the various original documents, forming one section entitled -``History''; likewise combine any sections entitled ``Acknowledgements'', -and any sections entitled ``Dedications''. You must delete all sections -entitled ``Endorsements.'' -@sp 1 -@item -COLLECTIONS OF DOCUMENTS - -You may make a collection consisting of the Document and other documents -released under this License, and replace the individual copies of this -License in the various documents with a single copy that is included in -the collection, provided that you follow the rules of this License for -verbatim copying of each of the documents in all other respects. - -You may extract a single document from such a collection, and distribute -it individually under this License, provided you insert a copy of this -License into the extracted document, and follow this License in all -other respects regarding verbatim copying of that document. -@sp 1 -@item -AGGREGATION WITH INDEPENDENT WORKS - -A compilation of the Document or its derivatives with other separate -and independent documents or works, in or on a volume of a storage or -distribution medium, does not as a whole count as a Modified Version -of the Document, provided no compilation copyright is claimed for the -compilation. Such a compilation is called an ``aggregate'', and this -License does not apply to the other self-contained works thus compiled -with the Document, on account of their being thus compiled, if they -are not themselves derivative works of the Document. - -If the Cover Text requirement of section 3 is applicable to these -copies of the Document, then if the Document is less than one quarter -of the entire aggregate, the Document's Cover Texts may be placed on -covers that surround only the Document within the aggregate. -Otherwise they must appear on covers around the whole aggregate. -@sp 1 -@item -TRANSLATION - -Translation is considered a kind of modification, so you may -distribute translations of the Document under the terms of section 4. -Replacing Invariant Sections with translations requires special -permission from their copyright holders, but you may include -translations of some or all Invariant Sections in addition to the -original versions of these Invariant Sections. You may include a -translation of this License provided that you also include the -original English version of this License. In case of a disagreement -between the translation and the original English version of this -License, the original English version will prevail. -@sp 1 -@item -TERMINATION - -You may not copy, modify, sublicense, or distribute the Document except -as expressly provided for under this License. Any other attempt to -copy, modify, sublicense or distribute the Document is void, and will -automatically terminate your rights under this License. However, -parties who have received copies, or rights, from you under this -License will not have their licenses terminated so long as such -parties remain in full compliance. -@sp 1 -@item -FUTURE REVISIONS OF THIS LICENSE - -The Free Software Foundation may publish new, revised versions -of the GNU Free Documentation License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. See -@uref{http://www.gnu.org/copyleft/}. - -Each version of the License is given a distinguishing version number. -If the Document specifies that a particular numbered version of this -License ``or any later version'' applies to it, you have the option of -following the terms and conditions either of that specified version or -of any later version that has been published (not as a draft) by the -Free Software Foundation. If the Document does not specify a version -number of this License, you may choose any version ever published (not -as a draft) by the Free Software Foundation. - -@end enumerate - -@c fakenode --- for prepinfo -@unnumberedsec ADDENDUM: How to use this License for your documents - -To use this License in a document you have written, include a copy of -the License in the document and put the following copyright and -license notices just after the title page: - -@smallexample -@group - - Copyright (C) @var{year} @var{your name}. - Permission is granted to copy, distribute and/or modify this document - under the terms of the GNU Free Documentation License, Version 1.1 - or any later version published by the Free Software Foundation; - with the Invariant Sections being @var{list their titles}, with the - Front-Cover Texts being @var{list}, and with the Back-Cover Texts being @var{list}. - A copy of the license is included in the section entitled ``GNU - Free Documentation License''. -@end group -@end smallexample -If you have no Invariant Sections, write ``with no Invariant Sections'' -instead of saying which ones are invariant. If you have no -Front-Cover Texts, write ``no Front-Cover Texts'' instead of -``Front-Cover Texts being @var{list}''; likewise for Back-Cover Texts. - -If your document contains nontrivial examples of program code, we -recommend releasing these examples in parallel under your choice of -free software license, such as the GNU General Public License, -to permit their use in free software. - -@node Index, , GNU Free Documentation License, Top -@unnumbered Index -@printindex cp - -@bye - -Unresolved Issues: ------------------- -1. From ADR. - - Robert J. Chassell points out that awk programs should have some indication - of how to use them. It would be useful to perhaps have a "programming - style" section of the manual that would include this and other tips. - -2. The default AWKPATH search path should be configurable via `configure' - The default and how this changes needs to be documented. - -Consistency issues: - /.../ regexps are in @code, not @samp - ".." strings are in @code, not @samp - no @print before @dots - values of expressions in the text (@code{x} has the value 15), - should be in roman, not @code - Use tab and not TAB - Use ESC and not ESCAPE - Use space and not blank to describe the space bar's character - The term "blank" is thus basically reserved for "blank lines" etc. - To make dark corners work, the @value{DARKCORNER} has to be outside - closing `.' of a sentence and after (pxref{...}). This is - a change from earlier versions. - " " should have an @w{} around it - Use "non-" everywhere - Use @command{ftp} when talking about anonymous ftp - Use uppercase and lowercase, not "upper-case" and "lower-case" - or "upper case" and "lower case" - Use "single precision" and "double precision", not "single-precision" or "double-precision" - Use alphanumeric, not alpha-numeric - Use POSIX-compliant, not POSIX compliant - Use --foo, not -Wfoo when describing long options - Use "Bell Laboratories", but not "Bell Labs". - Use "behavior" instead of "behaviour". - Use "zeros" instead of "zeroes". - Use "nonzero" not "non-zero". - Use "runtime" not "run time" or "run-time". - Use "command-line" not "command line". - Use "online" not "on-line". - Use "whitespace" not "white space". - Use "Input/Output", not "input/output". Also "I/O", not "i/o". - Use "lefthand"/"righthand", not "left-hand"/"right-hand". - Use "workaround", not "work-around". - Use "startup"/"cleanup", not "start-up"/"clean-up" - Use @code{do}, and not @code{do}-@code{while}, except where - actually discussing the do-while. - The words "a", "and", "as", "between", "for", "from", "in", "of", - "on", "that", "the", "to", "with", and "without", - should not be capitalized in @chapter, @section etc. - "Into" and "How" should. - Search for @dfn; make sure important items are also indexed. - "e.g." should always be followed by a comma. - "i.e." should always be followed by a comma. - The numbers zero through ten should be spelled out, except when - talking about file descriptor numbers. > 10 and < 0, it's - ok to use numbers. - In tables, put command-line options in @code, while in the text, - put them in @option. - When using @strong, use "Note:" or "Caution:" with colons and - not exclamation points. Do not surround the paragraphs - with @quotation ... @end quotation. - For most cases, do NOT put a comma before "and", "or" or "but". - But exercise taste with this rule. - Don't show the awk command with a program in quotes when it's - just the program. I.e. - - { - .... - } - - not - awk '{ - ... - }' - - Do show it when showing command-line arguments, data files, etc, even - if there is no output shown. - - Use numbered lists only to show a sequential series of steps. - - Use @code{xxx} for the xxx operator in indexing statements, not @samp. - -Date: Wed, 13 Apr 94 15:20:52 -0400 -From: rms@gnu.org (Richard Stallman) -To: gnu-prog@gnu.org -Subject: A reminder: no pathnames in GNU - -It's a GNU convention to use the term "file name" for the name of a -file, never "pathname". We use the term "path" for search paths, -which are lists of file names. Using it for a single file name as -well is potentially confusing to users. - -So please check any documentation you maintain, if you think you might -have used "pathname". - -Note that "file name" should be two words when it appears as ordinary -text. It's ok as one word when it's a metasyntactic variable, though. - ------------------------- -ORA uses filename, thus the macro. - -Suggestions: ------------- -Enhance FIELDWIDTHS with some way to indicate "the rest of the record". -E.g., a length of 0 or -1 or something. May be "n"? - -Make FIELDWIDTHS be an array? - -% Next edition: -% 1. Talk about common extensions, those in nawk, gawk, mawk -% 2. Use @code{foo} for variables and @code{foo()} for functions -% 3. Standardize the error messages from the functions and programs -% in Chapters 12 and 13. -% 4. Nuke the BBS stuff and use something that won't be obsolete -% 5. Reorg chapters 5 & 7 like so: -%Chapter 5: -% - Constants, Variables, and Conversions -% + Constant Expressions -% + Using Regular Expression Constants -% + Variables -% + Conversion of Strings and Numbers -% - Operators -% + Arithmetic Operators -% + String Concatenation -% + Assignment Expressions -% + Increment and Decrement Operators -% - Truth Values and Conditions -% + True and False in Awk -% + Boolean Expressions -% + Conditional Expressions -% - Function Calls -% - Operator Precedence -% -%Chapter 7: -% - Array Basics -% + Introduction to Arrays -% + Referring to an Array Element -% + Assigning Array Elements -% + Basic Array Example -% + Scanning All Elements of an Array -% - The delete Statement -% - Using Numbers to Subscript Arrays -% - Using Uninitialized Variables as Subscripts -% - Multidimensional Arrays -% + Scanning Multidimensional Arrays -% - Sorting Array Values and Indices with gawk diff --git a/contrib/awk/doc/gawkinet.texi b/contrib/awk/doc/gawkinet.texi deleted file mode 100644 index 2ffb581..0000000 --- a/contrib/awk/doc/gawkinet.texi +++ /dev/null @@ -1,5075 +0,0 @@ -\input texinfo @c -*-texinfo-*- -@c %**start of header (This is for running Texinfo on a region.) -@setfilename gawkinet.info -@settitle TCP/IP Internetworking With @command{gawk} -@c %**end of header (This is for running Texinfo on a region.) - -@c inside ifinfo for older versions of texinfo.tex -@ifinfo -@dircategory GNU Packages -@direntry -* Gawkinet: (gawkinet). TCP/IP Internetworking With @command{gawk}. -@end direntry -@end ifinfo - -@iftex -@set DOCUMENT book -@set CHAPTER chapter -@set SECTION section -@set DARKCORNER @inmargin{@image{lflashlight,1cm}, @image{rflashlight,1cm}} -@end iftex -@ifinfo -@set DOCUMENT Info file -@set CHAPTER major node -@set SECTION node -@set DARKCORNER (d.c.) -@end ifinfo -@ifhtml -@set DOCUMENT web page -@set CHAPTER chapter -@set SECTION section -@set DARKCORNER (d.c.) -@end ifhtml - -@set FSF - -@set FN file name -@set FFN File Name - -@c merge the function and variable indexes into the concept index -@ifinfo -@synindex fn cp -@synindex vr cp -@end ifinfo -@iftex -@syncodeindex fn cp -@syncodeindex vr cp -@end iftex - -@c If "finalout" is commented out, the printed output will show -@c black boxes that mark lines that are too long. Thus, it is -@c unwise to comment it out when running a master in case there are -@c overfulls which are deemed okay. - -@iftex -@finalout -@end iftex - -@smallbook - -@c Special files are described in chapter 6 Printing Output under -@c 6.7 Special File Names in gawk. I think the networking does not -@c fit into that chapter, thus this separate document. At over 50 -@c pages, I think this is the right decision. ADR. - -@set TITLE TCP/IP Internetworking With @command{gawk} -@set EDITION 1.1 -@set UPDATE-MONTH March, 2001 -@c gawk versions: -@set VERSION 3.1 -@set PATCHLEVEL 0 - -@ifinfo -This file documents the networking features in GNU @command{awk}. - -This is Edition @value{EDITION} of @cite{@value{TITLE}}, -for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU -implementation of AWK. - -Copyright (C) 2000, 2001 Free Software Foundation, Inc. - -Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.1 or -any later version published by the Free Software Foundation; with the -Invariant Sections being ``GNU General Public License'', the Front-Cover -texts being (a) (see below), and with the Back-Cover Texts being (b) -(see below). A copy of the license is included in the section entitled -``GNU Free Documentation License''. - -@enumerate a -@item -``A GNU Manual'' - -@item -``You have freedom to copy and modify this GNU Manual, like GNU -software. Copies published by the Free Software Foundation raise -funds for GNU development.'' -@end enumerate -@end ifinfo - -@setchapternewpage odd - -@titlepage -@title @value{TITLE} -@subtitle Edition @value{EDITION} -@subtitle @value{UPDATE-MONTH} -@author J@"urgen Kahrs -@author with Arnold D. Robbins - -@c Include the Distribution inside the titlepage environment so -@c that headings are turned off. Headings on and off do not work. - -@page -@vskip 0pt plus 1filll -Copyright @copyright{} 2000, 2001 Free Software Foundation, Inc. -@sp 1 -@b{User Friendly} Copyright @copyright{} 2000 J.D.@: ``Iliad'' Frazier. -Reprinted by permission. -@sp 2 - -This is Edition @value{EDITION} of @cite{@value{TITLE}}, -for the @value{VERSION}.@value{PATCHLEVEL} (or later) version of the GNU -implementation of AWK. - -@sp 2 -Published by: -@sp 1 - -Free Software Foundation @* -59 Temple Place --- Suite 330 @* -Boston, MA 02111-1307 USA @* -Phone: +1-617-542-5942 @* -Fax: +1-617-542-2652 @* -Email: @email{gnu@@gnu.org} @* -URL: @uref{http://www.gnu.org/} @* - -ISBN 1-882114-93-0 @* - -Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.1 or -any later version published by the Free Software Foundation; with the -Invariant Sections being ``GNU General Public License'', the Front-Cover -texts being (a) (see below), and with the Back-Cover Texts being (b) -(see below). A copy of the license is included in the section entitled -``GNU Free Documentation License''. - -@enumerate a -@item -``A GNU Manual'' - -@item -``You have freedom to copy and modify this GNU Manual, like GNU -software. Copies published by the Free Software Foundation raise -funds for GNU development.'' -@end enumerate -@c @sp 2 -@c Cover art by ?????. -@end titlepage - -@iftex -@headings off -@evenheading @thispage@ @ @ @strong{@value{TITLE}} @| @| -@oddheading @| @| @strong{@thischapter}@ @ @ @thispage -@end iftex - -@ifinfo -@node Top, Preface, (dir), (dir) -@top General Introduction -@comment node-name, next, previous, up - -This file documents the networking features in GNU Awk (@command{gawk}) -version 3.1 and later. -@end ifinfo - -@menu -* Preface:: About this document. -* Introduction:: About networkiing. -* Using Networking:: Some examples. -* Some Applications and Techniques:: More extended examples. -* Links:: Where to find the stuff mentioned in this - document. -* GNU Free Documentation License:: The license for this document. -* Index:: The index. - -@detailmenu -* Stream Communications:: Sending data streams. -* Datagram Communications:: Sending self-contained messages. -* The TCP/IP Protocols:: How these models work in the Internet. -* Basic Protocols:: The basic protocols. -* Ports:: The idea behind ports. -* Making Connections:: Making TCP/IP connections. -* Gawk Special Files:: How to do @command{gawk} networking. -* Special File Fields:: The fields in the special file name. -* Comparing Protocols:: Differences between the protocols. -* File /inet/tcp:: The TCP special file. -* File /inet/udp:: The UDB special file. -* File /inet/raw:: The RAW special file. -* TCP Connecting:: Making a TCP connection. -* Troubleshooting:: Troubleshooting TCP/IP connections. -* Interacting:: Interacting with a service. -* Setting Up:: Setting up a service. -* Email:: Reading email. -* Web page:: Reading a Web page. -* Primitive Service:: A primitive Web service. -* Interacting Service:: A Web service with interaction. -* CGI Lib:: A simple CGI library. -* Simple Server:: A simple Web server. -* Caveats:: Network programming caveats. -* Challenges:: Where to go from here. -* PANIC:: An Emergency Web Server. -* GETURL:: Retrieving Web Pages. -* REMCONF:: Remote Configuration Of Embedded Systems. -* URLCHK:: Look For Changed Web Pages. -* WEBGRAB:: Extract Links From A Page. -* STATIST:: Graphing A Statistical Distribution. -* MAZE:: Walking Through A Maze In Virtual Reality. -* MOBAGWHO:: A Simple Mobile Agent. -* STOXPRED:: Stock Market Prediction As A Service. -* PROTBASE:: Searching Through A Protein Database. -@end detailmenu -@end menu - -@contents - -@node Preface, Introduction, Top, Top -@unnumbered Preface - -In May of 1997, J@"urgen Kahrs felt the need for network access -from @command{awk}, and, with a little help from me, set about adding -features to do this for @command{gawk}. At that time, he -wrote the bulk of this @value{DOCUMENT}. - -The code and documentation were added to the @command{gawk} 3.1 development -tree, and languished somewhat until I could finally get -down to some serious work on that version of @command{gawk}. -This finally happened in the middle of 2000. - -Meantime, J@"urgen wrote an article about the Internet special -files and @samp{|&} operator for @cite{Linux Journal}, and made a -networking patch for the production versions of @command{gawk} -available from his home page. -In August of 2000 (for @command{gawk} 3.0.6), this patch -also made it to the main GNU @command{ftp} distribution site. - -For release with @command{gawk}, I edited J@"urgen's prose -for English grammar and style, as he is not a native English -speaker. I also -rearranged the material somewhat for what I felt was a better order of -presentation, and (re)wrote some of the introductory material. - -The majority of this document and the code are his work, and the -high quality and interesting ideas speak for themselves. It is my -hope that these features will be of significant value to the @command{awk} -community. - -@sp 1 -@noindent -Arnold Robbins @* -Nof Ayalon, ISRAEL @* -March, 2001 - -@node Introduction, Using Networking, Preface, Top -@chapter Networking Concepts - -This @value{CHAPTER} provides a (necessarily) brief intoduction to -computer networking concepts. For many applications of @command{gawk} -to TCP/IP networking, we hope that this is enough. For more -advanced tasks, you will need deeper background, and it may be necessary -to switch to lower-level programming in C or C++. - -There are two real-life models for the way computers send messages -to each other over a network. While the analogies are not perfect, -they are close enough to convey the major concepts. -These two models are the phone system (reliable byte-stream communications), -and the postal system (best-effort datagrams). - -@menu -* Stream Communications:: Sending data streams. -* Datagram Communications:: Sending self-contained messages. -* The TCP/IP Protocols:: How these models work in the Internet. -* Making Connections:: Making TCP/IP connections. -@end menu - -@node Stream Communications, Datagram Communications, Introduction, Introduction -@section Reliable Byte-streams (Phone Calls) - -When you make a phone call, the following steps occur: - -@enumerate -@item -You dial a number. - -@item -The phone system connects to the called party, telling -them there is an incoming call. (Their phone rings.) - -@item -The other party answers the call, or, in the case of a -computer network, refuses to answer the call. - -@item -Assuming the other party answers, the connection between -you is now a @dfn{duplex} (two-way), @dfn{reliable} (no data lost), -sequenced (data comes out in the order sent) data stream. - -@item -You and your friend may now talk freely, with the phone system -moving the data (your voices) from one end to the other. -From your point of view, you have a direct end-to-end -connection with the person on the other end. -@end enumerate - -The same steps occur in a duplex reliable computer networking connection. -There is considerably more overhead in setting up the communications, -but once it's done, data moves in both directions, reliably, in sequence. - -@node Datagram Communications, The TCP/IP Protocols, Stream Communications, Introduction -@section Best-effort Datagrams (Mailed Letters) - -Suppose you mail three different documents to your office on the -other side of the country on two different days. Doing so -entails the following. - -@enumerate -@item -Each document travels in its own envelope. - -@item -Each envelope contains both the sender and the -recipient address. - -@item -Each envelope may travel a different route to its destination. - -@item -The envelopes may arrive in a different order from the one -in which they were sent. - -@item -One or more may get lost in the mail. -(Although, fortunately, this does not occur very often.) - -@item -In a computer network, one or more @dfn{packets} -may also arrive multiple times. (This doesn't happen -with the postal system!) - -@end enumerate - -The important characteristics of datagram communications, like -those of the postal system are thus: - -@itemize @bullet -@item -Delivery is ``best effort;'' the data may never get there. - -@item -Each message is self-contained, including the source and -destination addresses. - -@item -Delivery is @emph{not} sequenced; packets may arrive out -of order, and/or multiple times. - -@item -Unlike the phone system, overhead is considerably lower. -It is not necessary to set up the call first. -@end itemize - -The price the user pays for the lower overhead of datagram communications -is exactly the lower reliability; it is often necessary for user-level -protocols that use datagram communications to add their own reliabilty -features on top of the basic communications. - -@node The TCP/IP Protocols, Making Connections, Datagram Communications, Introduction -@section The Internet Protocols - -The Internet Protocol Suite (usually referred as just TCP/IP)@footnote{ -It should be noted that although the Internet seems to have conquered the -world, there are other networking protocol suites in existence and in use.} -consists of a number of different protocols at different levels or ``layers.'' -For our purposes, three protocols provide the fundamental communications -mechanisms. All other defined protocols are referred to as user-level -protocols (e.g., HTTP, used later in this @value{DOCUMENT}). - -@menu -* Basic Protocols:: The basic protocols. -* Ports:: The idea behind ports. -@end menu - -@node Basic Protocols, Ports, The TCP/IP Protocols, The TCP/IP Protocols -@subsection The Basic Internet Protocols - -@table @asis -@item IP -The Internet Protocol. This protocol is almost never used directly by -applications. It provides the basic packet delivery and routing infrastructure -of the Internet. Much like the phone company's switching centers or the Post -Office's trucks, it is not of much day-to-day interest to the regular user -(or programmer). -It happens to be a best effort datagram protocol. - -@item UDP -The User Datagram Protocol. This is a best effort datagram protocol. -It provides a small amount of extra reliability over IP, and adds -the notion of @dfn{ports}, described in @ref{Ports, ,TCP and UDP Ports}. - -@item TCP -The Transmission Control Protocol. This is a duplex, reliable, sequenced -byte-stream protocol, again layered on top of IP, and also providing the -notion of ports. This is the protocol that you will most likely use -when using @command{gawk} for network programming. -@end table - -All other user-level protocols use either TCP or UDP to do their basic -communications. Examples are SMTP (Simple Mail Transfer Protocol), -FTP (File Transfer Protocol) and HTTP (HyperText Transfer Protocol). -@cindex SMTP -@cindex FTP -@cindex HTTP - -@node Ports, , Basic Protocols, The TCP/IP Protocols -@subsection TCP and UDP Ports - -In the postal system, the address on an envelope indicates a physical -location, such as a residence or office building. But there may be -more than one person at the location; thus you have to further quantify -the recipient by putting a person or company name on the envelope. - -In the phone system, one phone number may represent an entire company, -in which case you need a person's extension number in order to -reach that individual directly. Or, when you call a home, you have to -say, ``May I please speak to ...'' before talking to the person directly. - -IP networking provides the concept of addressing. An IP address represents -a particular computer, but no more. In order to reach the mail service -on a system, or the FTP or WWW service on a system, you have to have some -way to further specify which service you want. In the Internet Protocol suite, -this is done with @dfn{port numbers}, which represent the services, much -like an extension number used with a phone number. - -Port numbers are 16-bit integers. Unix and Unix-like systems reserve ports -below 1024 for ``well known'' services, such as SMTP, FTP, and HTTP. -Numbers above 1024 may be used by any application, although there is no -promise made that a particular port number is always available. - -@node Making Connections, , The TCP/IP Protocols, Introduction -@section Making TCP/IP Connections (And Some Terminology) - -Two terms come up repeatedly when discussing networking: -@dfn{client} and @dfn{server}. For now, we'll discuss these terms -at the @dfn{connection level}, when first establishing connections -between two processes on different systems over a network. -(Once the connection is established, the higher level, or -@dfn{application level} protocols, -such as HTTP or FTP, determine who is the client and who is the -server. Often, it turns out that the client and server are the -same in both roles.) - -@cindex server -The @dfn{server} is the system providing the service, such as the -web server or email server. It is the @dfn{host} (system) which -is @emph{connected to} in a transaction. -For this to work though, the server must be expecting connections. -Much as there has to be someone at the office building to answer -the phone@footnote{In the days before voice mail systems!}, the -server process (usually) has to be started first and waiting -for a connection. - -@cindex client -The @dfn{client} is the system requesting the service. -It is the system @emph{initiating the connection} in a transaction. -(Just as when you pick up the phone to call an office or store.) - -In the TCP/IP framework, each end of a connection is represented by a pair -of (@var{address}, @var{port}) pairs. For the duration of the connection, -the ports in use at each end are unique, and cannot be used simultaneously -by other processes on the same system. (Only after closing a connection -can a new one be built up on the same port. This is contrary to the usual -behavior of fully developed web servers which have to avoid situations -in which they are not reachable. We have to pay this price in order to -enjoy the benefits of a simple communication paradigm in @command{gawk}.) - -@cindex blocking -@cindex synchronous communications -Furthermore, once the connection is established, communications -are @dfn{synchronous}. I.e., each end waits on the other to finish -transmitting, before replying. This is much like two people in a phone -conversation. While both could talk simultaneously, doing so usually -doesn't work too well. - -In the case of TCP, the synchronicity is enforced by the protocol when -sending data. Data writes @dfn{block} until the data have been received on the -other end. For both TCP and UDP, data reads block until there is incoming -data waiting to be read. This is summarized in the following table, -where an ``X'' indicates that the given action blocks. - -@ifnottex -@multitable {Protocol} {Reads} {Writes} -@item TCP @tab X @tab X -@item UDP @tab X @tab -@item RAW @tab X @tab -@end multitable -@end ifnottex -@tex -\centerline{ -\vbox{\bigskip % space above the table (about 1 linespace) -% Because we have vertical rules, we can't let TeX insert interline space -% in its usual way. -\offinterlineskip -\halign{\hfil\strut# &\vrule #& \hfil#\hfil& \hfil#\hfil\cr -Protocol&&\quad Reads\quad &Writes\cr -\noalign{\hrule} -\omit&height 2pt\cr -\noalign{\hrule height0pt}% without this the rule does not extend; why? -TCP&&X&X\cr -UDP&&X&\cr -RAW&&X&\cr -}}} -@end tex - -@node Using Networking, Some Applications and Techniques, Introduction, Top -@comment node-name, next, previous, up -@chapter Networking With @command{gawk} - -@cindex network -The @command{awk} programming language was originally developed as a -pattern-matching language for writing short programs to perform -data manipulation tasks. -@command{awk}'s strength is the manipulation of textual data -that is stored in files. -It was never meant to be used for networking purposes. -To exploit its features in a -networking context, it's necessary to use an access mode for network connections -that resembles the access of files as closely as possible. - -@cindex Perl -@cindex Python -@cindex Tcl/Tk -@command{awk} is also meant to be a prototyping language. It is used -to demonstrate feasibility and to play with features and user interfaces. -This can be done with file-like handling of network -connections. -@command{gawk} trades the lack -of many of the advanced features of the TCP/IP family of protocols -for the convenience of simple connection handling. -The advanced -features are available when programming in C or Perl. In fact, the -network programming -in this @value{CHAPTER} -is very similar to what is described in books like -@cite{Internet Programming with Python}, -@cite{Advanced Perl Programming}, -or -@cite{Web Client Programming with Perl}. -But it's done here without first having to learn object-oriented ideology, underlying -languages such as Tcl/Tk, Perl, Python, or all of the libraries necessary to -extend these languages before they are ready for the Internet. - -This @value{CHAPTER} demonstrates how to use the TCP protocol. The -other protocols are much less important for most users (UDP) or even -untractable (RAW). - -@menu -* Gawk Special Files:: How to do @command{gawk} networking. -* TCP Connecting:: Making a TCP connection. -* Troubleshooting:: Troubleshooting TCP/IP connections. -* Interacting:: Interacting with a service. -* Setting Up:: Setting up a service. -* Email:: Reading email. -* Web page:: Reading a Web page. -* Primitive Service:: A primitive Web service. -* Interacting Service:: A Web service with interaction. -* Simple Server:: A simple Web server. -* Caveats:: Network programming caveats. -* Challenges:: Where to go from here. -@end menu - -@node Gawk Special Files, TCP Connecting, Using Networking, Using Networking -@comment node-name, next, previous, up -@section @command{gawk} Networking Mechanisms -@cindex network - -The @samp{|&} operator introduced in @command{gawk} 3.1 for use in -communicating with a @dfn{co-process} is described in -@ref{Two-way I/O, ,Two-way Communications With Another Process, gawk, GAWK: Effective AWK Programming}. -It shows how to do two-way I/O to a -separate process, sending it data with @code{print} or @code{printf} and -reading data with @code{getline}. If you haven't read it already, you should -detour there to do so. - -@command{gawk} transparently extends the two-way I/O mechanism to simple networking through -the use of special @value{FN}s. When a ``co-process'' is started that matches -the special files we are about to describe, @command{gawk} creates the appropriate network -connection, and then two-way I/O proceeds as usual. - -At the C, C++ (and basic Perl) level, networking is accomplished -via @dfn{sockets}, an Application Programming Interface (API) originally -developed at the University of California at Berkeley that is now used -almost universally for TCP/IP networking. -Socket level programming, while fairly straightforward, requires paying -attention to a number of details, as well as using binary data. It is not -well-suited for use from a high-level language like @command{awk}. -The special files provided in @command{gawk} hide the details from -the programmer, making things much simpler and easier to use. -@c Who sez we can't toot our own horn occasionally? - -The special @value{FN} for network access is made up of several fields, all -of them mandatory, none of them optional: - -@example -/inet/@var{protocol}/@var{localport}/@var{hostname}/@var{remoteport} -@end example - -The @file{/inet/} field is, of course, constant when accessing the network. -The @var{localport} and @var{remoteport} fields do not have a meaning -when used with @file{/inet/raw} because ``ports'' only apply to -TCP and UDP. So, when using @file{/inet/raw}, the port fields always have -to be @samp{0}. - -@menu -* Special File Fields:: The fields in the special file name. -* Comparing Protocols:: Differences between the protocols. -@end menu - -@node Special File Fields, Comparing Protocols, Gawk Special Files, Gawk Special Files -@subsection The Fields of the Special @value{FFN} -This @value{SECTION} explains the meaning of all the other fields, -as well as the range of values and the defaults. -All of the fields are mandatory. To let the system pick a value, -or if the field doesn't apply to the protocol, specify it as @samp{0}. - -@table @var -@item protocol -Determines which member of the TCP/IP -family of protocols is selected to transport the data across the -network. There are three possible values (always written in lowercase): -@samp{tcp}, @samp{udp}, and @samp{raw}. The exact meaning of each is -explained later in this @value{SECTION}. - -@item localport -Determines which port on the local -machine is used to communicate across the network. It has no meaning -with @file{/inet/raw} and must therefore be @samp{0}. Application level clients -usually use @samp{0} to indicate they do not care which local port is -used---instead they specify a remote port to connect to. It is vital for -application level servers to use a number different from @samp{0} here -because their service has to be available at a specific publicly-known -port number. It is possible to use a name from @file{/etc/services} here. - -@item hostname -Determines which remote host is to -be at the other end of the connection. Application level servers must fill -this field with a @samp{0} to indicate their being open for all other hosts -to connect to them and enforce connection level server behavior this way. -It is not possible for an application level server to restrict its -availability to one remote host by entering a host name here. -Application level clients must enter a name different from @samp{0}. -The name can be either symbolic -(e.g., @samp{jpl-devvax.jpl.nasa.gov}) or numeric (e.g., @samp{128.149.1.143}). - -@item remoteport -Determines which port on the remote -machine is used to communicate across the network. It has no meaning -with @file{/inet/raw} and must therefore be 0. -For @file{/inet/tcp} and @file{/inet/udp}, -application level clients @emph{must} use a number -other than @samp{0} to indicate which port on the remote machine -they want to connect to. Application level servers must not fill this field with -a @samp{0}. Instead they specify a local port for clients to connect to. -It is possible to use a name from @file{/etc/services} here. -@end table - -Experts in network programming will notice that the usual -client/server asymmetry found at the level of the socket API is not visible -here. This is for the sake of simplicity of the high-level concept. If this -asymmetry is necessary for your application, -use another language. -For @command{gawk}, it is -more important to enable users to write a client program with a minimum -of code. What happens when first accessing a network connection is seen -in the following pseudo-code: - -@smallexample -if ((name of remote host given) && (other side accepts connection)) @{ - rendez-vous successful; transmit with getline or print -@} else @{ - if ((other side did not accept) && (localport == 0)) - exit unsuccessful - if (TCP) @{ - set up a server accepting connections - this means waiting for the client on the other side to connect - @} else - ready -@} -@end smallexample - -The exact behavior of this algorithm depends on the values of the -fields of the special @value{FN}. When in doubt, the following table -gives you the combinations of values and their meaning. If this -table is too complicated, focus on the three lines printed in -@strong{bold}. All the examples in -@ref{Using Networking, ,Networking With @command{gawk}}, -use only the -patterns printed in bold letters. - -@multitable {12345678901234} {123456} {123456} {1234567} {1234567890123456789012345} -@item @sc{protocol} @tab @sc{local port} @tab @sc{host name} -@tab @sc{remote port} @tab @sc{Resulting connection level behavior} -@item @strong{tcp} @tab @strong{0} @tab @strong{x} @tab @strong{x} @tab - @strong{Dedicated client, fails if immediately connecting to a - server on the other side fails} -@item udp @tab 0 @tab x @tab x @tab Dedicated client -@item raw @tab 0 @tab x @tab 0 @tab Dedicated client, works only as @code{root} -@item @strong{tcp, udp} @tab @strong{x} @tab @strong{x} @tab @strong{x} @tab - @strong{Client, switches to dedicated server if necessary} -@item @strong{tcp, udp} @tab @strong{x} @tab @strong{0} @tab @strong{0} @tab - @strong{Dedicated server} -@item raw @tab 0 @tab 0 @tab 0 @tab Dedicated server, works only as @code{root} -@item tcp, udp, raw @tab x @tab x @tab 0 @tab Invalid -@item tcp, udp, raw @tab 0 @tab 0 @tab x @tab Invalid -@item tcp, udp, raw @tab x @tab 0 @tab x @tab Invalid -@item tcp, udp @tab 0 @tab 0 @tab 0 @tab Invalid -@item tcp, udp @tab 0 @tab x @tab 0 @tab Invalid -@item raw @tab x @tab 0 @tab 0 @tab Invalid -@item raw @tab 0 @tab x @tab x @tab Invalid -@item raw @tab x @tab x @tab x @tab Invalid -@end multitable - -In general, TCP is the preferred mechanism to use. It is the simplest -protocol to understand and to use. Use the others only if circumstances -demand low-overhead. - -@node Comparing Protocols, , Special File Fields, Gawk Special Files -@subsection Comparing Protocols - -This @value{SECTION} develops a pair of programs (sender and receiver) -that do nothing but send a timestamp from one machine to another. The -sender and the receiver are implemented with each of the three protocols -available and demonstrate the differences between them. - -@menu -* File /inet/tcp:: The TCP special file. -* File /inet/udp:: The UDB special file. -* File /inet/raw:: The RAW special file. -@end menu - -@node File /inet/tcp, File /inet/udp, Comparing Protocols, Comparing Protocols -@subsubsection @file{/inet/tcp} -@cindex @file{/inet/tcp} special files -@cindex TCP -Once again, always use TCP. -(Use UDP when low-overhead is a necessity, and use RAW for -network experimentation.) -The first example is the sender -program: - -@example -# Server -BEGIN @{ - print strftime() |& "/inet/tcp/8888/0/0" - close("/inet/tcp/8888/0/0") -@} -@end example - -The receiver is very simple: - -@example -# Client -BEGIN @{ - "/inet/tcp/0/localhost/8888" |& getline - print $0 - close("/inet/tcp/0/localhost/8888") -@} -@end example - -TCP guarantees that the bytes arrive at the receiving end in exactly -the same order that they were sent. No byte is lost -(except for broken connections), doubled, or out of order. Some -overhead is necessary to accomplish this, but this is the price to pay for -a reliable service. -It does matter which side starts first. The sender/server has to be started -first, and it waits for the receiver to read a line. - -@node File /inet/udp, File /inet/raw, File /inet/tcp, Comparing Protocols -@subsubsection @file{/inet/udp} -@cindex @file{/inet/udp} special files -@cindex UDP -The server and client programs that use UDP are almost identical to their TCP counterparts; -only the @var{protocol} has changed. As before, it does matter which side -starts first. The receiving side blocks and waits for the sender. -In this case, the receiver/client has to be started first: - -@page -@example -# Server -BEGIN @{ - print strftime() |& "/inet/udp/8888/0/0" - close("/inet/udp/8888/0/0") -@} -@end example - -The receiver is almost identical to the TCP receiver: - -@example -# Client -BEGIN @{ - "/inet/udp/0/localhost/8888" |& getline - print $0 - close("/inet/udp/0/localhost/8888") -@} -@end example - -UDP cannot guarantee that the datagrams at the receiving end will arrive in exactly -the same order they were sent. Some datagrams could be -lost, some doubled, and some out of order. But no overhead is necessary to -accomplish this. This unreliable behavior is good enough for tasks -such as data acquisition, logging, and even stateless services like NFS. - -@node File /inet/raw, , File /inet/udp, Comparing Protocols -@subsubsection @file{/inet/raw} -@cindex @file{/inet/raw} special files -@cindex RAW - -This is an IP-level protocol. Only @code{root} is allowed to access this -special file. It is meant to be the basis for implementing -and experimenting with transport level protocols.@footnote{This special file -is reserved, but not otherwise currently implemented.} -In the most general case, -the sender has to supply the encapsulating header bytes in front of the -packet and the receiver has to strip the additional bytes from the message. - -@cindex dark corner -RAW receivers cannot receive packets sent with TCP or UDP because the -operating system does not deliver the packets to a RAW receiver. The -operating system knows about some of the protocols on top of IP -and decides on its own which packet to deliver to which process. -@value{DARKCORNER} -Therefore, the UDP receiver must be used for receiving UDP -datagrams sent with the RAW sender. This is a dark corner, not only of -@command{gawk}, but also of TCP/IP. - -@cindex SPAK utility -For extended experimentation with protocols, look into -the approach implemented in a tool called SPAK. -This tool reflects the hierarchical layering of protocols (encapsulation) -in the way data streams are piped out of one program into the next one. -It shows which protocol is based on which other (lower-level) protocol -by looking at the command-line ordering of the program calls. -Cleverly thought out, SPAK is much better than @command{gawk}'s -@file{/inet} for learning the meaning of each and every bit in the -protocol headers. - -The next example uses the RAW protocol to emulate -the behavior of UDP. The sender program is the same as above, but with some -additional bytes that fill the places of the UDP fields: - -@example -@group -BEGIN @{ - Message = "Hello world\n" - SourcePort = 0 - DestinationPort = 8888 - MessageLength = length(Message)+8 - RawService = "/inet/raw/0/localhost/0" - printf("%c%c%c%c%c%c%c%c%s", - SourcePort/256, SourcePort%256, - DestinationPort/256, DestinationPort%256, - MessageLength/256, MessageLength%256, - 0, 0, Message) |& RawService - fflush(RawService) - close(RawService) -@} -@end group -@end example - -Since this program tries -to emulate the behavior of UDP, it checks if -the RAW sender is understood by the UDP receiver but not if the RAW receiver -can understand the UDP sender. In a real network, the -RAW receiver is hardly -of any use because it gets every IP packet that -comes across the network. There are usually so many packets that -@command{gawk} would be too slow for processing them. -Only on a network with little -traffic can the IP-level receiver program be tested. Programs for analyzing -IP traffic on modem or ISDN channels should be possible. - -Port numbers do not have a meaning when using @file{/inet/raw}. Their fields -have to be @samp{0}. Only TCP and UDP use ports. Receiving data from -@file{/inet/raw} is difficult, not only because of processing speed but also -because data is usually binary and not restricted to ASCII. This -implies that line separation with @code{RS} does not work as usual. - -@node TCP Connecting, Troubleshooting, Gawk Special Files, Using Networking -@section Establishing a TCP Connection - -Let's observe a network connection at work. Type in the following program -and watch the output. Within a second, it connects via TCP (@file{/inet/tcp}) -to the machine it is running on (@samp{localhost}), and asks the service -@samp{daytime} on the machine what time it is: - -@cindex @code{|&} I/O operator -@cindex @code{getline} built-in function -@example -BEGIN @{ - "/inet/tcp/0/localhost/daytime" |& getline - print $0 - close("/inet/tcp/0/localhost/daytime") -@} -@end example - -Even experienced @command{awk} users will find the second line strange in two -respects: - -@itemize @bullet -@item -A special file is used as a shell command that pipes its output -into @code{getline}. One would rather expect to see the special file -being read like any other file (@samp{getline < -"/inet/tcp/0/localhost/daytime")}. - -@item -The operator @samp{|&} has not been part of any @command{awk} -implementation (until now). -It is actually the only extension of the @command{awk} -language needed (apart from the special files) to introduce network access. -@end itemize - -The @samp{|&} operator was introduced in @command{gawk} 3.1 in order to -overcome the crucial restriction that access to files and pipes in -@command{awk} is always unidirectional. It was formerly impossible to use -both access modes on the same file or pipe. Instead of changing the whole -concept of file access, the @samp{|&} operator -behaves exactly like the usual pipe operator except for two additions: - -@itemize @bullet -@item -Normal shell commands connected to their @command{gawk} program with a @samp{|&} -pipe can be accessed bidirectionally. The @samp{|&} turns out to be a quite -general, useful, and natural extension of @command{awk}. - -@item -Pipes that consist of a special @value{FN} for network connections are not -executed as shell commands. Instead, they can be read and written to, just -like a full-duplex network connection. -@end itemize - -In the earlier example, the @samp{|&} operator tells @code{getline} -to read a line from the special file @file{/inet/tcp/0/localhost/daytime}. -We could also have printed a line into the special file. But instead we just -read a line with the time, printed it, and closed the connection. -(While we could just let @command{gawk} close the connection by finishing -the program, in this @value{DOCUMENT} -we are pedantic, and always explicitly close the connections.) - -@node Troubleshooting, Interacting, TCP Connecting, Using Networking -@section Troubleshooting Connection Problems -It may well be that for some reason the above program does not run on your -machine. When looking at possible reasons for this, you will learn much -about typical problems that arise in network programming. First of all, -your implementation of @command{gawk} may not support network access -because it is -a pre-3.1 version or you do not have a network interface in your machine. -Perhaps your machine uses some other protocol -like DECnet or Novell's IPX. For the rest of this @value{CHAPTER}, -we will assume -you work on a Unix machine that supports TCP/IP. If the above program does -not run on such a machine, it may help to replace the name -@samp{localhost} with the name of your machine or its IP address. If it -does, you could replace @samp{localhost} with the name of another machine -in your vicinity. This way, the program connects to another machine. -Now you should see the date and time being printed by the program. -Otherwise your machine may not support the @samp{daytime} service. -Try changing the service to @samp{chargen} or @samp{ftp}. This way, the program -connects to other services that should give you some response. If you are -curious, you should have a look at your file @file{/etc/services}. It could -look like this: - -@ignore -@multitable {1234567890123} {1234567890123} {123456789012345678901234567890123456789012} -@item Service @strong{name} @tab Service @strong{number} -@item echo @tab 7/tcp @tab echo sends back each line it receivces -@item echo @tab 7/udp @tab echo is good for testing purposes -@item discard @tab 9/tcp @tab discard behaves like @file{/dev/null} -@item discard @tab 9/udp @tab discard just throws away each line -@item daytime @tab 13/tcp @tab daytime sends date & time once per connection -@item daytime @tab 13/udp -@item chargen @tab 19/tcp @tab chargen infinitely produces character sets -@item chargen @tab 19/udp @tab chargen is good for testing purposes -@item ftp @tab 21/tcp @tab ftp is the usual file transfer protocol -@item telnet @tab 23/tcp @tab telnet is the usual login facility -@item smtp @tab 25/tcp @tab smtp is the Simple Mail Transfer Protocol -@item finger @tab 79/tcp @tab finger tells you who is logged in -@item www @tab 80/tcp @tab www is the HyperText Transfer Protocol -@item pop2 @tab 109/tcp @tab pop2 is an older version of pop3 -@item pop2 @tab 109/udp -@item pop3 @tab 110/tcp @tab pop3 is the Post Office Protocol -@item pop3 @tab 110/udp @tab pop3 is used for receiving email -@item nntp @tab 119/tcp @tab nntp is the USENET News Transfer Protocol -@item irc @tab 194/tcp @tab irc is the Internet Relay Chat -@item irc @tab 194/udp -@end multitable -@end ignore - -@smallexample -# /etc/services: -# -# Network services, Internet style -# -# Name Number/Protcol Alternate name # Comments - -echo 7/tcp -echo 7/udp -discard 9/tcp sink null -discard 9/udp sink null -daytime 13/tcp -daytime 13/udp -chargen 19/tcp ttytst source -chargen 19/udp ttytst source -ftp 21/tcp -telnet 23/tcp -smtp 25/tcp mail -finger 79/tcp -www 80/tcp http # WorldWideWeb HTTP -www 80/udp # HyperText Transfer Protocol -pop-2 109/tcp postoffice # POP version 2 -pop-2 109/udp -pop-3 110/tcp # POP version 3 -pop-3 110/udp -nntp 119/tcp readnews untp # USENET News -irc 194/tcp # Internet Relay Chat -irc 194/udp -@dots{} -@end smallexample - -@cindex Linux -@cindex GNU/Linux -@cindex Microsoft Windows -Here, you find a list of services that traditional Unix machines usually -support. If your GNU/Linux machine does not do so, it may be that these -services are switched off in some startup script. Systems running some -flavor of Microsoft Windows usually do @emph{not} support such services. -Nevertheless, it @emph{is} possible to do networking with @command{gawk} on -Microsoft -Windows.@footnote{Microsoft prefered to ignore the TCP/IP -family of protocols until 1995. Then came the rise of the Netscape browser -as a landmark ``killer application.'' Microsoft added TCP/IP support and -their own browser to Microsoft Windows 95 at the last minute. They even back-ported -their TCP/IP implementation to Microsoft Windows for Workgroups 3.11, but it was -a rather rudimentary and half-hearted implementation. Nevertheless, -the equivalent of @file{/etc/services} resides under -@file{c:\windows\services} on Microsoft Windows.} -The first column of the file gives the name of the service, -the second a unique number, and the protocol that one can use to connect to -this service. -The rest of the line is treated as a comment. -You see that some services (@samp{echo}) support TCP as -well as UDP. - -@node Interacting, Setting Up, Troubleshooting, Using Networking -@section Interacting with a Network Service - -The next program makes use of the possibility to really interact with a -network service by printing something into the special file. It asks the -so-called @command{finger} service if a user of the machine is logged in. When -testing this program, try to change @samp{localhost} to -some other machine name in your local network: - -@c system if test ! -d eg ; then mkdir eg ; fi -@c system if test ! -d eg/network ; then mkdir eg/network ; fi -@example -@c file eg/network/fingerclient.awk -BEGIN @{ - NetService = "/inet/tcp/0/localhost/finger" - print "@var{name}" |& NetService - while ((NetService |& getline) > 0) - print $0 - close(NetService) -@} -@c endfile -@end example - -After telling the service on the machine which user to look for, -the program repeatedly reads lines that come as a reply. When no more -lines are coming (because the service has closed the connection), the -program also closes the connection. Try replacing @code{"@var{name}"} with your -login name (or the name of someone else logged in). For a list -of all users currently logged in, replace @var{name} with an empty string -@code{""}. - -@cindex Linux -@cindex GNU/Linux -The final @code{close} command could be safely deleted from -the above script, because the operating system closes any open connection -by default when a script reaches the end of execution. In order to avoid -portability problems, it is best to always close connections explicitly. -With the Linux kernel, -for example, proper closing results in flushing of buffers. Letting -the close happen by default may result in discarding buffers. - -@ignore -@c Chuck comments that this seems out of place. He's right. I dunno -@c where to put it though. -@cindex @command{finger} utility -@cindex RFC 1288 -In the early days of the Internet (up until about 1992), you could use -such a program to check if some user in another country was logged in on -a specific machine. -RFC 1288@footnote{@uref{http://www.cis.ohio-state.edu/htbin/rfc/rfc1288.html}} -provides the exact definition of the @command{finger} protocol. -Every contemporary Unix system also has a command named @command{finger}, -which functions as a client for the protocol of the same name. -Still today, some people maintain simple information systems -with this ancient protocol. For example, by typing -@samp{finger quake@@seismo.unr.edu} -you get the latest @dfn{Earthquake Bulletin} for the state of Nevada. - -@cindex Earthquake Bulletin -@smallexample -$ finger quake@@seismo.unr.edu - -[@dots{}] - -DATE-(UTC)-TIME LAT LON DEP MAG COMMENTS -yy/mm/dd hh:mm:ss deg. deg. km - -98/12/14 21:09:22 37.47N 116.30W 0.0 2.3Md 76.4 km S of WARM SPRINGS, NEVA -98/12/14 22:05:09 39.69N 120.41W 11.9 2.1Md 53.8 km WNW of RENO, NEVADA -98/12/15 14:14:19 38.04N 118.60W 2.0 2.3Md 51.0 km S of HAWTHORNE, NEVADA -98/12/17 01:49:02 36.06N 117.58W 13.9 3.0Md 74.9 km SE of LONE PINE, CALIFOR -98/12/17 05:39:26 39.95N 120.87W 6.2 2.6Md 101.6 km WNW of RENO, NEVADA -98/12/22 06:07:42 38.68N 119.82W 5.2 2.3Md 50.7 km S of CARSON CITY, NEVAD -@end smallexample - -@noindent -This output from @command{finger} contains the time, location, depth, -magnitude, and a short comment about -the earthquakes registered in that region during the last 10 days. -In many places today the use of such services is restricted -because most networks have firewalls and proxy servers between them -and the Internet. Most firewalls are programmed to not let -@command{finger} requests go beyond the local network. - -@cindex Coke machine -Another (ab)use of the @command{finger} protocol are several Coke machines -that are connected to the Internet. There is a short list of such -Coke machines.@footnote{@uref{http://ca.yahoo.com/Computers_and_Internet/Internet/Devices_Connected_to_the_Internet/Soda_Machines/}} -You can access them either from the command-line or with a simple -@command{gawk} script. They usually tell you about the different -flavors of Coke and beer available there. If you have an account there, -you can even order some drink this way. -@end ignore - -When looking at @file{/etc/services} you may have noticed that the -@samp{daytime} service is also available with @samp{udp}. In the earlier -example, change @samp{tcp} to @samp{udp}, -and change @samp{finger} to @samp{daytime}. -After starting the modified program, you see the expected day and time message. -The program then hangs, because it waits for more lines coming from the -service. However, they never come. This behavior is a consequence of the -differences between TCP and UDP. When using UDP, neither party is -automatically informed about the other closing the connection. -Continuing to experiment this way reveals many other subtle -differences between TCP and UDP. To avoid such trouble, one should always -remember the advice Douglas E.@: Comer and David Stevens give in -Volume III of their series @cite{Internetworking With TCP} -(page 14): - -@cindex TCP -@cindex UDP -@quotation -When designing client-server applications, beginners are strongly -advised to use TCP because it provides reliable, connection-oriented -communication. Programs only use UDP if the application protocol handles -reliability, the application requires hardware broadcast or multicast, -or the application cannot tolerate virtual circuit overhead. -@end quotation - -@node Setting Up, Email, Interacting, Using Networking -@section Setting Up a Service -The preceding programs behaved as clients that connect to a server somewhere -on the Internet and request a particular service. Now we set up such a -service to mimic the behavior of the @samp{daytime} service. -Such a server does not know in advance who is going to connect to it over -the network. Therefore we cannot insert a name for the host to connect to -in our special @value{FN}. - -Start the following program in one window. Notice that the service does -not have the name @samp{daytime}, but the number @samp{8888}. -From looking at @file{/etc/services}, you know that names like @samp{daytime} -are just mnemonics for predetermined 16-bit integers. -Only the system administrator (@code{root}) could enter -our new service into @file{/etc/services} with an appropriate name. -Also notice that the service name has to be entered into a different field -of the special @value{FN} because we are setting up a server, not a client: - -@cindex @command{finger} utility -@cindex server -@example -BEGIN @{ - print strftime() |& "/inet/tcp/8888/0/0" - close("/inet/tcp/8888/0/0") -@} -@end example - -Now open another window on the same machine. -Copy the client program given as the first example -(@pxref{TCP Connecting, ,Establishing a TCP Connection}) -to a new file and edit it, changing the name @samp{daytime} to -@samp{8888}. Then start the modified client. You should get a reply -like this: - -@example -Sat Sep 27 19:08:16 CEST 1997 -@end example - -@noindent -Both programs explicitly close the connection. - -@cindex Microsoft Windows -@cindex reserved ports -Now we will intentionally make a mistake to see what happens when the name -@samp{8888} (the so-called port) is already used by another service. -Start the server -program in both windows. The first one works, but the second one -complains that it could not open the connection. Each port on a single -machine can only be used by one server program at a time. Now terminate the -server program and change the name @samp{8888} to @samp{echo}. After restarting it, -the server program does not run any more and you know why: there already is -an @samp{echo} service running on your machine. But even if this isn't true, -you would not get -your own @samp{echo} server running on a Unix machine, -because the ports with numbers smaller -than 1024 (@samp{echo} is at port 7) are reserved for @code{root}. -On machines running some flavor of Microsoft Windows, there is no restriction -that reserves ports 1 to 1024 for a privileged user; hence you can start -an @samp{echo} server there. - -Turning this short server program into something really useful is simple. -Imagine a server that first reads a @value{FN} from the client through the -network connection, then does something with the file and -sends a result back to the client. The server-side processing -could be: - -@example -BEGIN @{ - NetService = "/inet/tcp/8888/0/0" - NetService |& getline - CatPipe = ("cat " $1) # sets $0 and the fields - while ((CatPipe | getline) > 0) - print $0 |& NetService - close(NetService) -@} -@end example - -@noindent -and we would -have a remote copying facility. Such a server reads the name of a file -from any client that connects to it and transmits the contents of the -named file across the net. The server-side processing could also be -the execution of a command that is transmitted across the network. From this -example, you can see how simple it is to open up a security hole on your -machine. If you allow clients to connect to your machine and -execute arbitrary commands, anyone would be free to do @samp{rm -rf *}. - -@node Email, Web page, Setting Up, Using Networking -@section Reading Email -@cindex POP -@cindex SMTP -@cindex RFC 1939 -@cindex RFC 821 -The distribution of email is usually done by dedicated email servers that -communicate with your machine using special protocols. To receive email, we -will use the Post Office Protocol (POP). Sending can be done with the much -older Simple Mail Transfer Protocol (SMTP). -@ignore -@footnote{RFC 1939 defines POP. -RFC 821 defines SMTP. See -@uref{http://rfc.fh-koeln.de/doc/rfc/html/rfc.html, RFCs in HTML}.} -@end ignore - -When you type in the following program, replace the @var{emailhost} by the -name of your local email server. Ask your administrator if the server has a -POP service, and then use its name or number in the program below. -Now the program is ready to connect to your email server, but it will not -succeed in retrieving your mail because it does not yet know your login -name or password. Replace them in the program and it -shows you the first email the server has in store: - -@example -BEGIN @{ - POPService = "/inet/tcp/0/@var{emailhost}/pop3" - RS = ORS = "\r\n" - print "user @var{name}" |& POPService - POPService |& getline - print "pass @var{password}" |& POPService - POPService |& getline - print "retr 1" |& POPService - POPService |& getline - if ($1 != "+OK") exit - print "quit" |& POPService - RS = "\r\n\\.\r\n" - POPService |& getline - print $0 - close(POPService) -@} -@end example - -@cindex RFC 1939 -The record separators @code{RS} and @code{ORS} are redefined because the -protocol (POP) requires CR-LF to separate lines. After identifying -yourself to the email service, the command @samp{retr 1} instructs the -service to send the first of all your email messages in line. If the service -replies with something other than @samp{+OK}, the program exits; maybe there -is no email. Otherwise, the program first announces that it intends to finish -reading email, and then redefines @code{RS} in order to read the entire -email as multiline input in one record. From the POP RFC, we know that the body -of the email always ends with a single line containing a single dot. -The program looks for this using @samp{RS = "\r\n\\.\r\n"}. -When it finds this sequence in the mail message, it quits. -You can invoke this program as often as you like; it does not delete the -message it reads, but instead leaves it on the server. - -@node Web page, Primitive Service, Email, Using Networking -@section Reading a Web Page -@cindex HTTP -@cindex RFC 2068 -@cindex RFC 2616 - -Retrieving a web page from a web server is as simple as -retrieving email from an email server. We only have to use a -similar, but not identical, protocol and a different port. The name of the -protocol is HyperText Transfer Protocol (HTTP) and the port number is usually -80. As in the preceding @value{SECTION}, ask your administrator about the -name of your local web server or proxy web server and its port number -for HTTP requests. - -@ignore -@c Chuck says this stuff isn't necessary -More detailed information about HTTP can be found at -the home of the web protocols,@footnote{@uref{http://www.w3.org/pub/WWW/Protocols}} -including the specification of HTTP in RFC 2068. The protocol specification -in RFC 2068 is concise and you can get it for free. If you need more -explanation and you are willing to pay for a book, you might be -interested in one of these books: - -@enumerate - -@item -When we started writing web clients and servers with @command{gawk}, -the only book available with details about HTTP was the one by Paul Hethmon -called -@cite{Illustrated Guide to HTTP}.@footnote{@uref{http://www.browsebooks.com/Hethmon/?882}} -Hethmon not only describes HTTP, -he also implements a simple web server in C++. - -@item -Since July 2000, O'Reilly offers the book by Clinton Wong called -@cite{HTTP Pocket Reference}.@footnote{@uref{http://www.oreilly.com/catalog/httppr}} -It only has 75 pages but its -focus definitely is HTTP. This pocket reference is not a replacement -for the RFC, but I wish I had had it back in 1997 when I started writing -scripts to handle HTTP. - -@item -Another small booklet about HTTP is the one by Toexcell Incorporated Staff, -ISBN 1-58348-270-9, called -@cite{Hypertext Transfer Protocol Http 1.0 Specifications} - -@end enumerate -@end ignore - -The following program employs a rather crude approach toward retrieving a -web page. It uses the prehistoric syntax of HTTP 0.9, which almost all -web servers still support. The most noticeable thing about it is that the -program directs the request to the local proxy server whose name you insert -in the special @value{FN} (which in turn calls @samp{www.yahoo.com}): - -@example -BEGIN @{ - RS = ORS = "\r\n" - HttpService = "/inet/tcp/0/@var{proxy}/80" - print "GET http://www.yahoo.com" |& HttpService - while ((HttpService |& getline) > 0) - print $0 - close(HttpService) -@} -@end example - -@cindex RFC 1945 -@cindex HTML -@cindex Yahoo! -Again, lines are separated by a redefined @code{RS} and @code{ORS}. -The @code{GET} request that we send to the server is the only kind of -HTTP request that existed when the web was created in the early 1990s. -HTTP calls this @code{GET} request a ``method,'' which tells the -service to transmit a web page (here the home page of the Yahoo! search -engine). Version 1.0 added the request methods @code{HEAD} and -@code{POST}. The current version of HTTP is 1.1,@footnote{Version 1.0 of -HTTP was defined in RFC 1945. HTTP 1.1 was initially specified in RFC -2068. In June 1999, RFC 2068 was made obsolete by RFC 2616. It is an update -without any substantial changes.} and knows the additional request -methods @code{OPTIONS}, @code{PUT}, @code{DELETE}, and @code{TRACE}. -You can fill in any valid web address, and the program prints the -HTML code of that page to your screen. - -Notice the similarity between the responses of the POP and HTTP -services. First, you get a header that is terminated by an empty line, and -then you get the body of the page in HTML. The lines of the headers also -have the same form as in POP. There is the name of a parameter, -then a colon, and finally the value of that parameter. - -@cindex CGI -@cindex @file{gif} image format -@cindex @file{png} image format -Images (@file{.png} or @file{.gif} files) can also be retrieved this way, -but then you -get binary data that should be redirected into a file. Another -application is calling a CGI (Common Gateway Interface) script on some -server. CGI scripts are used when the contents of a web page are not -constant, but generated instantly at the moment you send a request -for the page. For example, to get a detailed report about the current -quotes of Motorola stock shares, call a CGI script at Yahoo! with -the following: - -@example -get = "GET http://quote.yahoo.com/q?s=MOT&d=t" -print get |& HttpService -@end example - -You can also request weather reports this way. -@ignore -@cindex Boutell, Thomas -A good book to go on with is -the -@cite{HTML Source Book}.@footnote{@uref{http://www.utoronto.ca/webdocs/HTMLdocs/NewHTML/book.html}} -There are also some books on CGI programming -like @cite{CGI Programming in C & Perl}, -by Thomas Boutell@footnote{@uref{http://cseng.aw.com/bookdetail.qry?ISBN=0-201-42219-0&ptype=0}}, -and @cite{The CGI Book}.@footnote{@uref{http://www.cgibook.com}} -Another good source is @cite{The CGI Resource Index}}.@footnote{@uref{http://www.cgi-resources.com}} -@end ignore - -@node Primitive Service, Interacting Service, Web page, Using Networking -@section A Primitive Web Service -Now we know enough about HTTP to set up a primitive web service that just -says @code{"Hello, world"} when someone connects to it with a browser. -Compared -to the situation in the preceding @value{SECTION}, our program changes the role. It -tries to behave just like the server we have observed. Since we are setting -up a server here, we have to insert the port number in the @samp{localport} -field of the special @value{FN}. The other two fields (@var{hostname} and -@var{remoteport}) have to contain a @samp{0} because we do not know in -advance which host will connect to our service. - -In the early 1990s, all a server had to do was send an HTML document and -close the connection. Here, we adhere to the modern syntax of HTTP. -The steps are as follows: - -@enumerate 1 -@item -Send a status line telling the web browser that everything -is OK. - -@item -Send a line to tell the browser how many bytes follow in the -body of the message. This was not necessary earlier because both -parties knew that the document ended when the connection closed. Nowadays -it is possible to stay connected after the transmission of one web page. -This is to avoid the network traffic necessary for repeatedly establishing -TCP connections for requesting several images. Thus, there is the need to tell -the receiving party how many bytes will be sent. The header is terminated -as usual with an empty line. - -@item -Send the @code{"Hello, world"} body -in HTML. -The useless @code{while} loop swallows the request of the browser. -We could actually omit the loop, and on most machines the program would still -work. -First, start the following program: -@end enumerate - -@example -@c file eg/network/hello-serv.awk -BEGIN @{ - RS = ORS = "\r\n" - HttpService = "/inet/tcp/8080/0/0" - Hello = "" \ - "A Famous Greeting" \ - "

Hello, world

" - Len = length(Hello) + length(ORS) - print "HTTP/1.0 200 OK" |& HttpService - print "Content-Length: " Len ORS |& HttpService - print Hello |& HttpService - while ((HttpService |& getline) > 0) - continue; - close(HttpService) -@} -@c endfile -@end example - -Now, on the same machine, start your favorite browser and let it point to -@uref{http://localhost:8080} (the browser needs to know on which port -our server is listening for requests). If this does not work, the browser -probably tries to connect to a proxy server that does not know your machine. -If so, change the browser's configuration so that the browser does not try to -use a proxy to connect to your machine. - -@node Interacting Service, Simple Server, Primitive Service, Using Networking -@section A Web Service with Interaction -@cindex GUI -@ifinfo -This node shows how to set up a simple web server. -The subnode is a library file that we will use with all the examples in -@ref{Some Applications and Techniques}. -@end ifinfo - -@menu -* CGI Lib:: A simple CGI library. -@end menu - -Setting up a web service that allows user interaction is more difficult and -shows us the limits of network access in @command{gawk}. In this @value{SECTION}, -we develop a main program (a @code{BEGIN} pattern and its action) -that will become the core of event-driven execution controlled by a -graphical user interface (GUI). -Each HTTP event that the user triggers by some action within the browser -is received in this central procedure. Parameters and menu choices are -extracted from this request and an appropriate measure is taken according to -the user's choice. -For example: - -@cindex HTTP server, core logic -@example -BEGIN @{ - if (MyHost == "") @{ - "uname -n" | getline MyHost - close("uname -n") - @} - if (MyPort == 0) MyPort = 8080 - HttpService = "/inet/tcp/" MyPort "/0/0" - MyPrefix = "http://" MyHost ":" MyPort - SetUpServer() - while ("awk" != "complex") @{ - # header lines are terminated this way - RS = ORS = "\r\n" - Status = 200 # this means OK - Reason = "OK" - Header = TopHeader - Document = TopDoc - Footer = TopFooter - if (GETARG["Method"] == "GET") @{ - HandleGET() - @} else if (GETARG["Method"] == "HEAD") @{ - # not yet implemented - @} else if (GETARG["Method"] != "") @{ - print "bad method", GETARG["Method"] - @} - Prompt = Header Document Footer - print "HTTP/1.0", Status, Reason |& HttpService - print "Connection: Close" |& HttpService - print "Pragma: no-cache" |& HttpService - len = length(Prompt) + length(ORS) - print "Content-length:", len |& HttpService - print ORS Prompt |& HttpService - # ignore all the header lines - while ((HttpService |& getline) > 0) - ; - # stop talking to this client - close(HttpService) - # wait for new client request - HttpService |& getline - # do some logging - print systime(), strftime(), $0 - # read request parameters - CGI_setup($1, $2, $3) - @} -@} -@end example - -This web server presents menu choices in the form of HTML links. -Therefore, it has to tell the browser the name of the host it is -residing on. When starting the server, the user may supply the name -of the host from the command line with @samp{gawk -v MyHost="Rumpelstilzchen"}. -If the user does not do this, the server looks up the name of the host it is -running on for later use as a web address in HTML documents. The same -applies to the port number. These values are inserted later into the -HTML content of the web pages to refer to the home system. - -Each server that is built around this core has to initialize some -application-dependent variables (such as the default home page) in a procedure -@code{SetUpServer}, which is called immediately before entering the -infinite loop of the server. For now, we will write an instance that -initiates a trivial interaction. With this home page, the client user -can click on two possible choices, and receive the current date either -in human-readable format or in seconds since 1970: - -@example -function SetUpServer() @{ - TopHeader = "" - TopHeader = TopHeader \ - "My name is GAWK, GNU AWK" - TopDoc = "

\ - Do you prefer your date human or \ - POSIXed?

" ORS ORS - TopFooter = "" -@} -@end example - -On the first run through the main loop, the default line terminators are -set and the default home page is copied to the actual home page. Since this -is the first run, @code{GETARG["Method"]} is not initialized yet, hence the -case selection over the method does nothing. Now that the home page is -initialized, the server can start communicating to a client browser. - -@cindex RFC 2068 -@cindex CGI -It does so by printing the HTTP header into the network connection -(@samp{print @dots{} |& HttpService}). This command blocks execution of -the server script until a client connects. If this server -script is compared with the primitive one we wrote before, you will notice -two additional lines in the header. The first instructs the browser -to close the connection after each request. The second tells the -browser that it should never try to @emph{remember} earlier requests -that had identical web addresses (no caching). Otherwise, it could happen -that the browser retrieves the time of day in the previous example just once, -and later it takes the web page from the cache, always displaying the same -time of day although time advances each second. - -Having supplied the initial home page to the browser with a valid document -stored in the parameter @code{Prompt}, it closes the connection and waits -for the next request. When the request comes, a log line is printed that -allows us to see which request the server receives. The final step in the -loop is to call the function @code{CGI_setup}, which reads all the lines -of the request (coming from the browser), processes them, and stores the -transmitted parameters in the array @code{PARAM}. The complete -text of these application-independent functions can be found in -@ref{CGI Lib, ,A Simple CGI Library}. -For now, we use a simplified version of @code{CGI_setup}: - -@example -function CGI_setup( method, uri, version, i) @{ - delete GETARG; delete MENU; delete PARAM - GETARG["Method"] = $1 - GETARG["URI"] = $2 - GETARG["Version"] = $3 - i = index($2, "?") - # is there a "?" indicating a CGI request? -@group - if (i > 0) @{ - split(substr($2, 1, i-1), MENU, "[/:]") - split(substr($2, i+1), PARAM, "&") - for (i in PARAM) @{ - j = index(PARAM[i], "=") - GETARG[substr(PARAM[i], 1, j-1)] = \ - substr(PARAM[i], j+1) - @} - @} else @{ # there is no "?", no need for splitting PARAMs - split($2, MENU, "[/:]") - @} -@end group -@} -@end example - -At first, the function clears all variables used for -global storage of request parameters. The rest of the function serves -the purpose of filling the global parameters with the extracted new values. -To accomplish this, the name of the requested resource is split into -parts and stored for later evaluation. If the request contains a @samp{?}, -then the request has CGI variables seamlessly appended to the web address. -Everything in front of the @samp{?} is split up into menu items, and -everything behind the @samp{?} is a list of @samp{@var{variable}=@var{value}} pairs -(separated by @samp{&}) that also need splitting. This way, CGI variables are -isolated and stored. This procedure lacks recognition of special characters -that are transmitted in coded form@footnote{As defined in RFC 2068.}. Here, any -optional request header and body parts are ignored. We do not need -header parameters and the request body. However, when refining our approach or -working with the @code{POST} and @code{PUT} methods, reading the header -and body -becomes inevitable. Header parameters should then be stored in a global -array as well as the body. - -On each subsequent run through the main loop, one request from a browser is -received, evaluated, and answered according to the user's choice. This can be -done by letting the value of the HTTP method guide the main loop into -execution of the procedure @code{HandleGET}, which evaluates the user's -choice. In this case, we have only one hierarchical level of menus, -but in the general case, -menus are nested. -The menu choices at each level are -separated by @samp{/}, just as in @value{FN}s. Notice how simple it is to -construct menus of arbitrary depth: - -@example -function HandleGET() @{ - if ( MENU[2] == "human") @{ - Footer = strftime() TopFooter - @} else if (MENU[2] == "POSIX") @{ - Footer = systime() TopFooter - @} -@} -@end example - -@cindex CGI -The disadvantage of this approach is that our server is slow and can -handle only one request at a time. Its main advantage, however, is that -the server -consists of just one @command{gawk} program. No need for installing an -@command{httpd}, and no need for static separate HTML files, CGI scripts, or -@code{root} privileges. This is rapid prototyping. -This program can be started on the same host that runs your browser. -Then let your browser point to @uref{http://localhost:8080}. - -@cindex @file{xbm} image format -@cindex image format -@cindex GNUPlot utility -It is also possible to include images into the HTML pages. -Most browsers support the not very well-known -@file{.xbm} format, -which may contain only -monochrome pictures but is an ASCII format. Binary images are possible but -not so easy to handle. Another way of including images is to generate them -with a tool such as GNUPlot, -by calling the tool with the @code{system} function or through a pipe. - -@node CGI Lib, , Interacting Service, Interacting Service -@subsection A Simple CGI Library -@quotation -@i{HTTP is like being married: you have to be able to handle whatever -you're given, while being very careful what you send back.}@* -Phil Smith III,@* -@uref{http://www.netfunny.com/rhf/jokes/99/Mar/http.html} -@end quotation - -In @ref{Interacting Service, ,A Web Service with Interaction}, -we saw the function @code{CGI_setup} as part of the web server -``core logic'' framework. The code presented there handles almost -everything necessary for CGI requests. -One thing it doesn't do is handle encoded characters in the requests. -For example, an @samp{&} is encoded as a percent sign followed by -the hexadecimal value---@samp{%26}. These encoded values should be -decoded. -Following is a simple library to perform these tasks. -This code is used for all web server examples -used throughout the rest of this @value{DOCUMENT}. -If you want to use it for your own web server, store the source code -into a file named @file{inetlib.awk}. Then you can include -these functions into your code by placing the following statement -into your program: - -@example -@@include inetlib.awk -@end example - -@noindent -on the first line of your script. But beware, this mechanism is -only possible if you invoke your web server script with @command{igawk} -instead of the usual @command{awk} or @command{gawk}. -Here is the code: - -@example -@c file eg/network/coreserv.awk -# CGI Library and core of a web server -@c endfile -@ignore -@c file eg/network/coreserv.awk -# -# Juergen Kahrs, Juergen.Kahrs@@vr-web.de -# with Arnold Robbins, arnold@@gnu.org -# September 2000 - -@c endfile -@end ignore -@c file eg/network/coreserv.awk -# Global arrays -# GETARG --- arguments to CGI GET command -# MENU --- menu items (path names) -# PARAM --- parameters of form x=y - -# Optional variable MyHost contains host address -# Optional variable MyPort contains port number -# Needs TopHeader, TopDoc, TopFooter -# Sets MyPrefix, HttpService, Status, Reason - -BEGIN @{ - if (MyHost == "") @{ - "uname -n" | getline MyHost - close("uname -n") - @} - if (MyPort == 0) MyPort = 8080 - HttpService = "/inet/tcp/" MyPort "/0/0" - MyPrefix = "http://" MyHost ":" MyPort - SetUpServer() - while ("awk" != "complex") @{ - # header lines are terminated this way - RS = ORS = "\r\n" - Status = 200 # this means OK - Reason = "OK" - Header = TopHeader - Document = TopDoc - Footer = TopFooter - if (GETARG["Method"] == "GET") @{ - HandleGET() - @} else if (GETARG["Method"] == "HEAD") @{ - # not yet implemented - @} else if (GETARG["Method"] != "") @{ - print "bad method", GETARG["Method"] - @} - Prompt = Header Document Footer - print "HTTP/1.0", Status, Reason |& HttpService - print "Connection: Close" |& HttpService - print "Pragma: no-cache" |& HttpService - len = length(Prompt) + length(ORS) - print "Content-length:", len |& HttpService - print ORS Prompt |& HttpService - # ignore all the header lines - while ((HttpService |& getline) > 0) - continue - # stop talking to this client - close(HttpService) - # wait for new client request - HttpService |& getline - # do some logging - print systime(), strftime(), $0 - CGI_setup($1, $2, $3) - @} -@} - -function CGI_setup( method, uri, version, i) -@{ - delete GETARG - delete MENU - delete PARAM - GETARG["Method"] = method - GETARG["URI"] = uri - GETARG["Version"] = version - - i = index(uri, "?") - if (i > 0) @{ # is there a "?" indicating a CGI request? - split(substr(uri, 1, i-1), MENU, "[/:]") - split(substr(uri, i+1), PARAM, "&") - for (i in PARAM) @{ - PARAM[i] = _CGI_decode(PARAM[i]) - j = index(PARAM[i], "=") - GETARG[substr(PARAM[i], 1, j-1)] = \ - substr(PARAM[i], j+1) - @} - @} else @{ # there is no "?", no need for splitting PARAMs - split(uri, MENU, "[/:]") - @} - for (i in MENU) # decode characters in path - if (i > 4) # but not those in host name - MENU[i] = _CGI_decode(MENU[i]) -@} -@c endfile -@end example - -This isolates details in a single function, @code{CGI_setup}. -Decoding of encoded characters is pushed off to a helper function, -@code{_CGI_decode}. The use of the leading underscore (@samp{_}) in -the function name is intended to indicate that it is an ``internal'' -function, although there is nothing to enforce this: - -@example -@c file eg/network/coreserv.awk -function _CGI_decode(str, hexdigs, i, pre, code1, code2, - val, result) -@{ - hexdigs = "123456789abcdef" - - i = index(str, "%") - if (i == 0) # no work to do - return str - - do @{ - pre = substr(str, 1, i-1) # part before %xx - code1 = substr(str, i+1, 1) # first hex digit - code2 = substr(str, i+2, 1) # second hex digit - str = substr(str, i+3) # rest of string - - code1 = tolower(code1) - code2 = tolower(code2) - val = index(hexdigs, code1) * 16 \ - + index(hexdigs, code2) - - result = result pre sprintf("%c", val) - i = index(str, "%") - @} while (i != 0) - if (length(str) > 0) - result = result str - return result -@} -@c endfile -@end example - -This works by splitting the string apart around an encoded character. -The two digits are converted to lowercase and looked up in a string -of hex digits. Note that @code{0} is not in the string on purpose; -@code{index} returns zero when it's not found, automatically giving -the correct value! Once the hexadecimal value is converted from -characters in a string into a numerical value, @code{sprintf} -converts the value back into a real character. -The following is a simple test harness for the above functions: - -@example -@c file eg/network/testserv.awk -BEGIN @{ - CGI_setup("GET", - "http://www.gnu.org/cgi-bin/foo?p1=stuff&p2=stuff%26junk" \ - "&percent=a %25 sign", - "1.0") - for (i in MENU) - printf "MENU[\"%s\"] = %s\n", i, MENU[i] - for (i in PARAM) - printf "PARAM[\"%s\"] = %s\n", i, PARAM[i] - for (i in GETARG) - printf "GETARG[\"%s\"] = %s\n", i, GETARG[i] -@} -@c endfile -@end example - -And this is the result when we run it: - -@c artificial line wrap in last output line -@example -$ gawk -f testserv.awk -@print{} MENU["4"] = www.gnu.org -@print{} MENU["5"] = cgi-bin -@print{} MENU["6"] = foo -@print{} MENU["1"] = http -@print{} MENU["2"] = -@print{} MENU["3"] = -@print{} PARAM["1"] = p1=stuff -@print{} PARAM["2"] = p2=stuff&junk -@print{} PARAM["3"] = percent=a % sign -@print{} GETARG["p1"] = stuff -@print{} GETARG["percent"] = a % sign -@print{} GETARG["p2"] = stuff&junk -@print{} GETARG["Method"] = GET -@print{} GETARG["Version"] = 1.0 -@print{} GETARG["URI"] = http://www.gnu.org/cgi-bin/foo?p1=stuff& -p2=stuff%26junk&percent=a %25 sign -@end example - -@node Simple Server, Caveats, Interacting Service, Using Networking -@section A Simple Web Server -@cindex GUI -In the preceding @value{SECTION}, we built the core logic for event driven GUIs. -In this @value{SECTION}, we finally extend the core to a real application. -No one would actually write a commercial web server in @command{gawk}, but -it is instructive to see that it is feasible in principle. - -@iftex -@image{uf002331,4in} -@end iftex - -@cindex ELIZA program -@cindex Weizenbaum, Joseph -The application is ELIZA, the famous program by Joseph Weizenbaum that -mimics the behavior of a professional psychotherapist when talking to you. -Weizenbaum would certainly object to this description, but this is part of -the legend around ELIZA. -Take the site-independent core logic and append the following code: - -@example -@c file eg/network/eliza.awk -function SetUpServer() @{ - SetUpEliza() - TopHeader = \ - "An HTTP-based System with GAWK\ - \ - " - TopDoc = "\ -

Please choose one of the following actions:

\ -
" - TopFooter = "" -@} -@c endfile -@end example - -@code{SetUpServer} is similar to the previous example, -except for calling another function, @code{SetUpEliza}. -This approach can be used to implement other kinds of servers. -The only changes needed to do so are hidden in the functions -@code{SetUpServer} and @code{HandleGET}. Perhaps it might be necessary to -implement other HTTP methods. -The @command{igawk} program that comes with @command{gawk} -may be useful for this process. - -When extending this example to a complete application, the first -thing to do is to implement the function @code{SetUpServer} to -initialize the HTML pages and some variables. These initializations -determine the way your HTML pages look (colors, titles, menu -items, etc.). - -@cindex GUI -The function @code{HandleGET} is a nested case selection that decides -which page the user wants to see next. Each nesting level refers to a menu -level of the GUI. Each case implements a certain action of the menu. On the -deepest level of case selection, the handler essentially knows what the -user wants and stores the answer into the variable that holds the HTML -page contents: - -@smallexample -@c file eg/network/eliza.awk -function HandleGET() @{ - # A real HTTP server would treat some parts of the URI as a file name. - # We take parts of the URI as menu choices and go on accordingly. - if(MENU[2] == "AboutServer") @{ - Document = "This is not a CGI script.\ - This is an httpd, an HTML file, and a CGI script all \ - in one GAWK script. It needs no separate www-server, \ - no installation, and no root privileges.\ -

To run it, do this:

    \ -
  • start this script with \"gawk -f httpserver.awk\",
  • \ -
  • and on the same host let your www browser open location\ - \"http://localhost:8080\"
  • \ -
\

\ Details of HTTP come from:

    \ -
  • Hethmon: Illustrated Guide to HTTP

    \ -
  • RFC 2068

JK 14.9.1997

" - @} else if (MENU[2] == "AboutELIZA") @{ - Document = "This is an implementation of the famous ELIZA\ - program by Joseph Weizenbaum. It is written in GAWK and\ -/bin/sh: expad: command not found - @} else if (MENU[2] == "StartELIZA") @{ - gsub(/\+/, " ", GETARG["YouSay"]) - # Here we also have to substitute coded special characters - Document = "
" \ - "

" ElizaSays(GETARG["YouSay"]) "

\ -

\ -

" - @} -@} -@c endfile -@end smallexample - -Now we are down to the heart of ELIZA, so you can see how it works. -Initially the user does not say anything; then ELIZA resets its money -counter and asks the user to tell what comes to mind open heartedly. -The subsequent answers are converted to uppercase and stored for -later comparison. ELIZA presents the bill when being confronted with -a sentence that contains the phrase ``shut up.'' Otherwise, it looks for -keywords in the sentence, conjugates the rest of the sentence, remembers -the keyword for later use, and finally selects an answer from the set of -possible answers: - -@smallexample -@c file eg/network/eliza.awk -function ElizaSays(YouSay) @{ - if (YouSay == "") @{ - cost = 0 - answer = "HI, IM ELIZA, TELL ME YOUR PROBLEM" - @} else @{ - q = toupper(YouSay) - gsub("'", "", q) - if(q == qold) @{ - answer = "PLEASE DONT REPEAT YOURSELF !" - @} else @{ - if (index(q, "SHUT UP") > 0) @{ - answer = "WELL, PLEASE PAY YOUR BILL. ITS EXACTLY ... $"\ - int(100*rand()+30+cost/100) - @} else @{ - qold = q - w = "-" # no keyword recognized yet - for (i in k) @{ # search for keywords - if (index(q, i) > 0) @{ - w = i - break - @} - @} - if (w == "-") @{ # no keyword, take old subject - w = wold - subj = subjold - @} else @{ # find subject - subj = substr(q, index(q, w) + length(w)+1) - wold = w - subjold = subj # remember keyword and subject - @} - for (i in conj) - gsub(i, conj[i], q) # conjugation - # from all answers to this keyword, select one randomly - answer = r[indices[int(split(k[w], indices) * rand()) + 1]] - # insert subject into answer - gsub("_", subj, answer) - @} - @} - @} - cost += length(answer) # for later payment : 1 cent per character - return answer -@} -@c endfile -@end smallexample - -In the long but simple function @code{SetUpEliza}, you can see tables -for conjugation, keywords, and answers.@footnote{The version shown -here is abbreviated. The full version comes with the @command{gawk} -distribution.} The associative array @code{k} -contains indices into the array of answers @code{r}. To choose an -answer, ELIZA just picks an index randomly: - -@example -@c file eg/network/eliza.awk -function SetUpEliza() @{ - srand() - wold = "-" - subjold = " " - - # table for conjugation - conj[" ARE " ] = " AM " - conj["WERE " ] = "WAS " - conj[" YOU " ] = " I " - conj["YOUR " ] = "MY " - conj[" IVE " ] =\ - conj[" I HAVE " ] = " YOU HAVE " - conj[" YOUVE " ] =\ - conj[" YOU HAVE "] = " I HAVE " - conj[" IM " ] =\ - conj[" I AM " ] = " YOU ARE " - conj[" YOURE " ] =\ - conj[" YOU ARE " ] = " I AM " - - # table of all answers - r[1] = "DONT YOU BELIEVE THAT I CAN _" - r[2] = "PERHAPS YOU WOULD LIKE TO BE ABLE TO _ ?" -@c endfile - @dots{} -@end example -@ignore -@c file eg/network/eliza.awk - r[3] = "YOU WANT ME TO BE ABLE TO _ ?" - r[4] = "PERHAPS YOU DONT WANT TO _ " - r[5] = "DO YOU WANT TO BE ABLE TO _ ?" - r[6] = "WHAT MAKES YOU THINK I AM _ ?" - r[7] = "DOES IT PLEASE YOU TO BELIEVE I AM _ ?" - r[8] = "PERHAPS YOU WOULD LIKE TO BE _ ?" - r[9] = "DO YOU SOMETIMES WISH YOU WERE _ ?" - r[10] = "DONT YOU REALLY _ ?" - r[11] = "WHY DONT YOU _ ?" - r[12] = "DO YOU WISH TO BE ABLE TO _ ?" - r[13] = "DOES THAT TROUBLE YOU ?" - r[14] = "TELL ME MORE ABOUT SUCH FEELINGS" - r[15] = "DO YOU OFTEN FEEL _ ?" - r[16] = "DO YOU ENJOY FEELING _ ?" - r[17] = "DO YOU REALLY BELIEVE I DONT _ ?" - r[18] = "PERHAPS IN GOOD TIME I WILL _ " - r[19] = "DO YOU WANT ME TO _ ?" - r[20] = "DO YOU THINK YOU SHOULD BE ABLE TO _ ?" - r[21] = "WHY CANT YOU _ ?" - r[22] = "WHY ARE YOU INTERESTED IN WHETHER OR NOT I AM _ ?" - r[23] = "WOULD YOU PREFER IF I WERE NOT _ ?" - r[24] = "PERHAPS IN YOUR FANTASIES I AM _ " - r[25] = "HOW DO YOU KNOW YOU CANT _ ?" - r[26] = "HAVE YOU TRIED ?" - r[27] = "PERHAPS YOU CAN NOW _ " - r[28] = "DID YOU COME TO ME BECAUSE YOU ARE _ ?" - r[29] = "HOW LONG HAVE YOU BEEN _ ?" - r[30] = "DO YOU BELIEVE ITS NORMAL TO BE _ ?" - r[31] = "DO YOU ENJOY BEING _ ?" - r[32] = "WE WERE DISCUSSING YOU -- NOT ME" - r[33] = "Oh, I _" - r[34] = "YOU'RE NOT REALLY TALKING ABOUT ME, ARE YOU ?" - r[35] = "WHAT WOULD IT MEAN TO YOU, IF YOU GOT _ ?" - r[36] = "WHY DO YOU WANT _ ?" - r[37] = "SUPPOSE YOU SOON GOT _" - r[38] = "WHAT IF YOU NEVER GOT _ ?" - r[39] = "I SOMETIMES ALSO WANT _" - r[40] = "WHY DO YOU ASK ?" - r[41] = "DOES THAT QUESTION INTEREST YOU ?" - r[42] = "WHAT ANSWER WOULD PLEASE YOU THE MOST ?" - r[43] = "WHAT DO YOU THINK ?" - r[44] = "ARE SUCH QUESTIONS IN YOUR MIND OFTEN ?" - r[45] = "WHAT IS IT THAT YOU REALLY WANT TO KNOW ?" - r[46] = "HAVE YOU ASKED ANYONE ELSE ?" - r[47] = "HAVE YOU ASKED SUCH QUESTIONS BEFORE ?" - r[48] = "WHAT ELSE COMES TO MIND WHEN YOU ASK THAT ?" - r[49] = "NAMES DON'T INTEREST ME" - r[50] = "I DONT CARE ABOUT NAMES -- PLEASE GO ON" - r[51] = "IS THAT THE REAL REASON ?" - r[52] = "DONT ANY OTHER REASONS COME TO MIND ?" - r[53] = "DOES THAT REASON EXPLAIN ANYTHING ELSE ?" - r[54] = "WHAT OTHER REASONS MIGHT THERE BE ?" - r[55] = "PLEASE DON'T APOLOGIZE !" - r[56] = "APOLOGIES ARE NOT NECESSARY" - r[57] = "WHAT FEELINGS DO YOU HAVE WHEN YOU APOLOGIZE ?" - r[58] = "DON'T BE SO DEFENSIVE" - r[59] = "WHAT DOES THAT DREAM SUGGEST TO YOU ?" - r[60] = "DO YOU DREAM OFTEN ?" - r[61] = "WHAT PERSONS APPEAR IN YOUR DREAMS ?" - r[62] = "ARE YOU DISTURBED BY YOUR DREAMS ?" - r[63] = "HOW DO YOU DO ... PLEASE STATE YOUR PROBLEM" - r[64] = "YOU DON'T SEEM QUITE CERTAIN" - r[65] = "WHY THE UNCERTAIN TONE ?" - r[66] = "CAN'T YOU BE MORE POSITIVE ?" - r[67] = "YOU AREN'T SURE ?" - r[68] = "DON'T YOU KNOW ?" - r[69] = "WHY NO _ ?" - r[70] = "DON'T SAY NO, IT'S ALWAYS SO NEGATIVE" - r[71] = "WHY NOT ?" - r[72] = "ARE YOU SURE ?" - r[73] = "WHY NO ?" - r[74] = "WHY ARE YOU CONCERNED ABOUT MY _ ?" - r[75] = "WHAT ABOUT YOUR OWN _ ?" - r[76] = "CAN'T YOU THINK ABOUT A SPECIFIC EXAMPLE ?" - r[77] = "WHEN ?" - r[78] = "WHAT ARE YOU THINKING OF ?" - r[79] = "REALLY, ALWAYS ?" - r[80] = "DO YOU REALLY THINK SO ?" - r[81] = "BUT YOU ARE NOT SURE YOU _ " - r[82] = "DO YOU DOUBT YOU _ ?" - r[83] = "IN WHAT WAY ?" - r[84] = "WHAT RESEMBLANCE DO YOU SEE ?" - r[85] = "WHAT DOES THE SIMILARITY SUGGEST TO YOU ?" - r[86] = "WHAT OTHER CONNECTION DO YOU SEE ?" - r[87] = "COULD THERE REALLY BE SOME CONNECTIONS ?" - r[88] = "HOW ?" - r[89] = "YOU SEEM QUITE POSITIVE" - r[90] = "ARE YOU SURE ?" - r[91] = "I SEE" - r[92] = "I UNDERSTAND" - r[93] = "WHY DO YOU BRING UP THE TOPIC OF FRIENDS ?" - r[94] = "DO YOUR FRIENDS WORRY YOU ?" - r[95] = "DO YOUR FRIENDS PICK ON YOU ?" - r[96] = "ARE YOU SURE YOU HAVE ANY FRIENDS ?" - r[97] = "DO YOU IMPOSE ON YOUR FRIENDS ?" - r[98] = "PERHAPS YOUR LOVE FOR FRIENDS WORRIES YOU" - r[99] = "DO COMPUTERS WORRY YOU ?" - r[100] = "ARE YOU TALKING ABOUT ME IN PARTICULAR ?" - r[101] = "ARE YOU FRIGHTENED BY MACHINES ?" - r[102] = "WHY DO YOU MENTION COMPUTERS ?" - r[103] = "WHAT DO YOU THINK MACHINES HAVE TO DO WITH YOUR PROBLEMS ?" - r[104] = "DON'T YOU THINK COMPUTERS CAN HELP PEOPLE ?" - r[105] = "WHAT IS IT ABOUT MACHINES THAT WORRIES YOU ?" - r[106] = "SAY, DO YOU HAVE ANY PSYCHOLOGICAL PROBLEMS ?" - r[107] = "WHAT DOES THAT SUGGEST TO YOU ?" - r[108] = "I SEE" - r[109] = "IM NOT SURE I UNDERSTAND YOU FULLY" - r[110] = "COME COME ELUCIDATE YOUR THOUGHTS" - r[111] = "CAN YOU ELABORATE ON THAT ?" - r[112] = "THAT IS QUITE INTERESTING" - r[113] = "WHY DO YOU HAVE PROBLEMS WITH MONEY ?" - r[114] = "DO YOU THINK MONEY IS EVERYTHING ?" - r[115] = "ARE YOU SURE THAT MONEY IS THE PROBLEM ?" - r[116] = "I THINK WE WANT TO TALK ABOUT YOU, NOT ABOUT ME" - r[117] = "WHAT'S ABOUT ME ?" - r[118] = "WHY DO YOU ALWAYS BRING UP MY NAME ?" -@c endfile -@end ignore - -@example -@c file eg/network/eliza.awk - # table for looking up answers that - # fit to a certain keyword - k["CAN YOU"] = "1 2 3" - k["CAN I"] = "4 5" - k["YOU ARE"] =\ - k["YOURE"] = "6 7 8 9" -@c endfile - @dots{} -@end example -@ignore -@c file eg/network/eliza.awk - k["I DONT"] = "10 11 12 13" - k["I FEEL"] = "14 15 16" - k["WHY DONT YOU"] = "17 18 19" - k["WHY CANT I"] = "20 21" - k["ARE YOU"] = "22 23 24" - k["I CANT"] = "25 26 27" - k["I AM"] =\ - k["IM "] = "28 29 30 31" - k["YOU "] = "32 33 34" - k["I WANT"] = "35 36 37 38 39" - k["WHAT"] =\ - k["HOW"] =\ - k["WHO"] =\ - k["WHERE"] =\ - k["WHEN"] =\ - k["WHY"] = "40 41 42 43 44 45 46 47 48" - k["NAME"] = "49 50" - k["CAUSE"] = "51 52 53 54" - k["SORRY"] = "55 56 57 58" - k["DREAM"] = "59 60 61 62" - k["HELLO"] =\ - k["HI "] = "63" - k["MAYBE"] = "64 65 66 67 68" - k[" NO "] = "69 70 71 72 73" - k["YOUR"] = "74 75" - k["ALWAYS"] = "76 77 78 79" - k["THINK"] = "80 81 82" - k["LIKE"] = "83 84 85 86 87 88 89" - k["YES"] = "90 91 92" - k["FRIEND"] = "93 94 95 96 97 98" - k["COMPUTER"] = "99 100 101 102 103 104 105" - k["-"] = "106 107 108 109 110 111 112" - k["MONEY"] = "113 114 115" - k["ELIZA"] = "116 117 118" -@c endfile -@end ignore -@example -@c file eg/network/eliza.awk -@} -@c endfile -@end example - -@cindex Humphrys, Mark -@cindex ELIZA program -@cindex Yahoo! -Some interesting remarks and details (including the original source code -of ELIZA) are found on Mark Humphrys' home page. Yahoo! also has a -page with a collection of ELIZA-like programs. Many of them are written -in Java, some of them disclosing the Java source code, and a few even -explain how to modify the Java source code. - -@node Caveats, Challenges, Simple Server, Using Networking -@section Network Programming Caveats - -By now it should be clear -that debugging a networked application is more -complicated than debugging a single-process single-hosted application. -The behavior of a networked application sometimes looks non-causal because -it is not reproducible in a strong sense. Whether a network application -works or not sometimes depends on the following: - -@itemize @bullet -@item -How crowded the underlying network is. - -@item -If the party at the other end is running or not. - -@item -The state of the party at the other end. -@end itemize - -@cindex network -The most difficult problems for a beginner arise from the hidden states of the -underlying network. After closing a TCP connection, it's often necessary to wait -a short while before reopening the connection. Even more difficult is the -establishment of a connection that previously ended with a ``broken pipe.'' -Those connections have to ``time out'' for a minute or so -before they can reopen. -Check this with the command @samp{netstat -a}, which -provides a list of still ``active'' connections. - -@node Challenges, , Caveats, Using Networking -@section Where To Go From Here - -@cindex Loebner, Hugh -@cindex Contest -Now, you have learned enough to build your own application. You could, -for example, take part in the -Loebner Contest -to win $100,000. The Loebner Prize is the first formal instantiation of -a Turing Test. Hugh Loebner agreed with The Cambridge Center for Behavioral -Studies to underwrite a contest designed to implement the Turing Test. -Dr.@: Loebner pledged a Grand Prize of $100,000 for the first computer whose -responses were indistinguishable from a human's. Each year an annual prize -of $2000 and a bronze medal is awarded to the @emph{most} human computer. -The winner of the annual contest is the best entry relative to other entries -that year, irrespective of how good it is in an absolute sense. Here is -an example of a conversation with the winning program of 1997: - -@smallexample -(c)1997 Cambridge Center For Behavioral Studies all rights reserved [CONVERSE] -[Intelligent Research Ltd.] Start at: [1997/04/29 08:59:40] -PROGRAM[10:39:20]Hi, my name is Catherine, what's yours? -JUDGE02[10:39:36]My name is Nathan. -PROGRAM[10:39:37]Well, it's nice to meet you, Nathan. -PROGRAM[10:39:45]Did you see that story on CNN last night about the lesbian -PROGRAM[10:40:03]couple who came out at a White House party on Sunday? -JUDGE02[10:40:45]No. I'm sorry I missed that. How did the Prez take it? -PROGRAM[10:40:47]Ellen Degeneres was one of them - she was kissing her lover -PROGRAM[10:41:08]in front of Bill Clinton. Do you think he was very -PROGRAM[10:41:21]embarrassed? -JUDGE02[10:41:37]You tell me. I didn't see it. -PROGRAM[10:41:38]Are you still a Friend of Bill Clinton? -JUDGE02[10:42:22]Yes. I would like to talk about something else. Do you know anything about sculpture. -PROGRAM[10:42:24]Well, I suppose he's doing his best like most of us, but he -PROGRAM[10:42:35]does seem to attract an UNUSUAL amount of bad publicity, -PROGRAM[10:42:51]wouldnt you say--can it all be bad luck? -JUDGE02[10:43:20]I asked you if you knew anything about sculpture. -PROGRAM[10:43:21]OK, but what happens now--does it all drift on, with -PROGRAM[10:43:36]everyone waiting to see how much his friends have said about -PROGRAM[10:43:47]him under threat of lots and lots of jail-or will something -PROGRAM[10:44:01]dramatic actually happen? -JUDGE02[10:44:19]Do you know what sculpture is? -PROGRAM[10:44:20]Well, youre more a fan of his than I am I guess---though I -PROGRAM[10:44:34]cant SWEAR what Id have done in that locked hotel room, I -PROGRAM[10:44:51]must say. -JUDGE02[10:45:26]Have you ever been to the Salmagundi Club? -PROGRAM[10:45:29]Never. to go back to the original discussion Time to get -PROGRAM[10:45:43]off this one I think and onto something more down to earth! -JUDGE02[10:46:20]What is more down to earth than art? -PROGRAM[10:46:21]I don't know what -- to be honest. - -PROGRAM[10:46:30]I suppose it depends on where you're coming from, but as the -PROGRAM[10:46:46]song goes in My Fair Lady, "Why can't a woman be more like a -PROGRAM[10:47:03]man? " -JUDGE02[10:47:52]I liked that song but I prefer "Why can't children be more like we are" -JUDGE02[10:48:10]from another play the name of which I cannot remember. -PROGRAM[10:48:12]What, if anything, do you enjoy about being a man? -JUDGE02[10:49:01]I would enjoy all aspects of life no matter what my gender. -PROGRAM[10:49:02]Do you think of yourself as being attractive? -@end smallexample - -@cindex Clinton, Bill -This program insists on always speaking about the same story around Bill -Clinton. You see, even a program with a rather narrow mind can behave so -much like a human being that it can win this prize. It is quite common to -let these programs talk to each other via network connections. But during the -competition itself, the program and its computer have to be present at the -place the competition is held. We all would love to see a @command{gawk} -program win in such an event. Maybe it is up to you to accomplish this? - -Some other ideas for useful networked applications: -@itemize @bullet -@item -Read the file @file{doc/awkforai.txt} in the @command{gawk} distribution. -It was written by Ronald P.@: Loui (Associate Professor of -Computer Science, at Washington University in St. Louis, -@email{loui@@ai.wustl.edu}) and summarizes why -he teaches @command{gawk} to students of Artificial Intelligence. Here are -some passages from the text: - -@cindex AI -@cindex PROLOG -@cindex Loui, Ronald P. -@cindex agent -@quotation -The GAWK manual can -be consumed in a single lab session and the language can be mastered by -the next morning by the average student. GAWK's automatic -initialization, implicit coercion, I/O support and lack of pointers -forgive many of the mistakes that young programmers are likely to make. -Those who have seen C but not mastered it are happy to see that GAWK -retains some of the same sensibilities while adding what must be -regarded as spoonsful of syntactic sugar.@* -@dots{}@* -@cindex robot -There are further simple answers. Probably the best is the fact that -increasingly, undergraduate AI programming is involving the Web. Oren -Etzioni (University of Washington, Seattle) has for a while been arguing -that the ``softbot'' is replacing the mechanical engineers' robot as the -most glamorous AI testbed. If the artifact whose behavior needs to be -controlled in an intelligent way is the software agent, then a language -that is well-suited to controlling the software environment is the -appropriate language. That would imply a scripting language. If the -robot is KAREL, then the right language is ``turn left; turn right.'' If -the robot is Netscape, then the right language is something that can -generate @samp{netscape -remote 'openURL(http://cs.wustl.edu/~loui)'} with -elan.@* -@dots{}@* -AI programming requires high-level thinking. There have always been a few -gifted programmers who can write high-level programs in assembly language. -Most however need the ambient abstraction to have a higher floor.@* -@dots{}@* -Second, inference is merely the expansion of notation. No matter whether -the logic that underlies an AI program is fuzzy, probabilistic, deontic, -defeasible, or deductive, the logic merely defines how strings can be -transformed into other strings. A language that provides the best -support for string processing in the end provides the best support for -logic, for the exploration of various logics, and for most forms of -symbolic processing that AI might choose to call ``reasoning'' instead of -``logic.'' The implication is that PROLOG, which saves the AI programmer -from having to write a unifier, saves perhaps two dozen lines of GAWK -code at the expense of strongly biasing the logic and representational -expressiveness of any approach. -@end quotation - -Now that @command{gawk} itself can connect to the Internet, it should be obvious -that it is suitable for writing intelligent web agents. - -@item -@command{awk} is strong at pattern recognition and string processing. -So, it is well suited to the classic problem of language translation. -A first try could be a program that knows the 100 most frequent English -words and their counterparts in German or French. The service could be -implemented by regularly reading email with the program above, replacing -each word by its translation and sending the translation back via SMTP. -Users would send English email to their translation service and get -back a translated email message in return. As soon as this works, -more effort can be spent on a real translation program. - -@item -Another dialogue-oriented application (on the verge -of ridicule) is the email ``support service.'' Troubled customers write an -email to an automatic @command{gawk} service that reads the email. It looks -for keywords in the mail and assembles a reply email accordingly. By carefully -investigating the email header, and repeating these keywords through the -reply email, it is rather simple to give the customer a feeling that -someone cares. Ideally, such a service would search a database of previous -cases for solutions. If none exists, the database could, for example, consist -of all the newsgroups, mailing lists and FAQs on the Internet. -@end itemize - -@node Some Applications and Techniques, Links, Using Networking, Top -@comment node-name, next, previous, up - -@chapter Some Applications and Techniques -In this @value{CHAPTER}, we look at a number of self-contained -scripts, with an emphasis on concise networking. Along the way, we -work towards creating building blocks that encapsulate often needed -functions of the networking world, show new techniques that -broaden the scope of problems that can be solved with @command{gawk}, and -explore leading edge technology that may shape the future of networking. - -We often refer to the site-independent core of the server that -we built in -@ref{Simple Server, ,A Simple Web Server}. -When building new and non-trivial servers, we -always copy this building block and append new instances of the two -functions @code{SetUpServer} and @code{HandleGET}. - -This makes a lot of sense, since -this scheme of event-driven -execution provides @command{gawk} with an interface to the most widely -accepted standard for GUIs: the web browser. Now, @command{gawk} can even rival -Tcl/Tk. - -@cindex Tcl/Tk -@cindex JavaScript -Tcl and @command{gawk} have much in common. Both are simple scripting languages -that allow us to quickly solve problems with short programs. But Tcl has Tk -on top of it and @command{gawk} had nothing comparable up to now. While Tcl -needs a large and ever changing library (Tk, which was bound to the X Window -System until recently), @command{gawk} needs just the networking interface -and some kind of browser on the client's side. Besides better portability, -the most important advantage of this approach (embracing well-established -standards such HTTP and HTML) is that @emph{we do not need to change the -language}. We let others do the work of fighting over protocols and standards. -We can use HTML, JavaScript, VRML, or whatever else comes along to do our work. - -@menu -* PANIC:: An Emergency Web Server. -* GETURL:: Retrieving Web Pages. -* REMCONF:: Remote Configuration Of Embedded Systems. -* URLCHK:: Look For Changed Web Pages. -* WEBGRAB:: Extract Links From A Page. -* STATIST:: Graphing A Statistical Distribution. -* MAZE:: Walking Through A Maze In Virtual Reality. -* MOBAGWHO:: A Simple Mobile Agent. -* STOXPRED:: Stock Market Prediction As A Service. -* PROTBASE:: Searching Through A Protein Database. -@end menu - -@node PANIC, GETURL, Some Applications and Techniques, Some Applications and Techniques -@section PANIC: an Emergency Web Server -@cindex PANIC program -At first glance, the @code{"Hello, world"} example in -@ref{Primitive Service, ,A Primitive Web Service}, -seems useless. By adding just a few lines, we can turn it into something useful. - -The PANIC program tells everyone who connects that the local -site is not working. When a web server breaks down, it makes a difference -if customers get a strange ``network unreachable'' message, or a short message -telling them that the server has a problem. In such an emergency, -the hard disk and everything on it (including the regular web service) may -be unavailable. Rebooting the web server off a diskette makes sense in this -setting. - -To use the PANIC program as an emergency web server, all you need are the -@command{gawk} executable and the program below on a diskette. By default, -it connects to port 8080. A different value may be supplied on the -command line: - -@example -@c file eg/network/panic.awk -BEGIN @{ - RS = ORS = "\r\n" - if (MyPort == 0) MyPort = 8080 - HttpService = "/inet/tcp/" MyPort "/0/0" - Hello = "Out Of Service" \ - "

" \ - "This site is temporarily out of service." \ - "

" - Len = length(Hello) + length(ORS) - while ("awk" != "complex") @{ - print "HTTP/1.0 200 OK" |& HttpService - print "Content-Length: " Len ORS |& HttpService - print Hello |& HttpService - while ((HttpService |& getline) > 0) - continue; - close(HttpService) - @} -@} -@c endfile -@end example - -@node GETURL, REMCONF, PANIC, Some Applications and Techniques -@section GETURL: Retrieving Web Pages -@cindex GETURL program -@cindex robot -GETURL is a versatile building block for shell scripts that need to retrieve -files from the Internet. It takes a web address as a command-line parameter and -tries to retrieve the contents of this address. The contents are printed -to standard output, while the header is printed to @file{/dev/stderr}. -A surrounding shell script -could analyze the contents and extract the text or the links. An ASCII -browser could be written around GETURL. But more interestingly, web robots are -straightforward to write on top of GETURL. On the Internet, you can find -several programs of the same name that do the same job. They are usually -much more complex internally and at least 10 times longer. - -At first, GETURL checks if it was called with exactly one web address. -Then, it checks if the user chose to use a special proxy server whose name -is handed over in a variable. By default, it is assumed that the local -machine serves as proxy. GETURL uses the @code{GET} method by default -to access the web page. By handing over the name of a different method -(such as @code{HEAD}), it is possible to choose a different behavior. With -the @code{HEAD} method, the user does not receive the body of the page -content, but does receive the header: - -@example -@c file eg/network/geturl.awk -BEGIN @{ - if (ARGC != 2) @{ - print "GETURL - retrieve Web page via HTTP 1.0" - print "IN:\n the URL as a command-line parameter" - print "PARAM(S):\n -v Proxy=MyProxy" - print "OUT:\n the page content on stdout" - print " the page header on stderr" - print "JK 16.05.1997" - print "ADR 13.08.2000" - exit - @} - URL = ARGV[1]; ARGV[1] = "" - if (Proxy == "") Proxy = "127.0.0.1" - if (ProxyPort == 0) ProxyPort = 80 - if (Method == "") Method = "GET" - HttpService = "/inet/tcp/0/" Proxy "/" ProxyPort - ORS = RS = "\r\n\r\n" - print Method " " URL " HTTP/1.0" |& HttpService - HttpService |& getline Header - print Header > "/dev/stderr" - while ((HttpService |& getline) > 0) - printf "%s", $0 - close(HttpService) -@} -@c endfile -@end example - -This program can be changed as needed, but be careful with the last lines. -Make sure transmission of binary data is not corrupted by additional line -breaks. Even as it is now, the byte sequence @code{"\r\n\r\n"} would -disappear if it were contained in binary data. Don't get caught in a -trap when trying a quick fix on this one. - -@node REMCONF, URLCHK, GETURL, Some Applications and Techniques -@section REMCONF: Remote Configuration of Embedded Systems -@cindex REMCONF program -@cindex Linux -@cindex GNU/Linux -@cindex Yahoo! -Today, you often find powerful processors in embedded systems. Dedicated -network routers and controllers for all kinds of machinery are examples -of embedded systems. Processors like the Intel 80x86 or the AMD Elan are -able to run multitasking operating systems, such as XINU or GNU/Linux -in embedded PCs. These systems are small and usually do not have -a keyboard or a display. Therefore it is difficult to set up their -configuration. There are several widespread ways to set them up: - -@itemize @bullet -@item -DIP switches - -@item -Read Only Memories such as EPROMs - -@item -Serial lines or some kind of keyboard - -@item -Network connections via @command{telnet} or SNMP - -@item -HTTP connections with HTML GUIs -@end itemize - -In this @value{SECTION}, we look at a solution that uses HTTP connections -to control variables of an embedded system that are stored in a file. -Since embedded systems have tight limits on resources like memory, -it is difficult to employ advanced techniques such as SNMP and HTTP -servers. @command{gawk} fits in quite nicely with its single executable -which needs just a short script to start working. -The following program stores the variables in a file, and a concurrent -process in the embedded system may read the file. The program uses the -site-independent part of the simple web server that we developed in -@ref{Interacting Service, ,A Web Service with Interaction}. -As mentioned there, all we have to do is to write two new procedures -@code{SetUpServer} and @code{HandleGET}: - -@smallexample -@c file eg/network/remconf.awk -function SetUpServer() @{ - TopHeader = "Remote Configuration" - TopDoc = "\ -

Please choose one of the following actions:

\ - " - TopFooter = "" - if (ConfigFile == "") ConfigFile = "config.asc" -@} -@c endfile -@end smallexample - -The function @code{SetUpServer} initializes the top level HTML texts -as usual. It also initializes the name of the file that contains the -configuration parameters and their values. In case the user supplies -a name from the command line, that name is used. The file is expected to -contain one parameter per line, with the name of the parameter in -column one and the value in column two. - -The function @code{HandleGET} reflects the structure of the menu -tree as usual. The first menu choice tells the user what this is all -about. The second choice reads the configuration file line by line -and stores the parameters and their values. Notice that the record -separator for this file is @code{"\n"}, in contrast to the record separator -for HTTP. The third menu choice builds an HTML table to show -the contents of the configuration file just read. The fourth choice -does the real work of changing parameters, and the last one just saves -the configuration into a file: - -@smallexample -@c file eg/network/remconf.awk -function HandleGET() @{ - if(MENU[2] == "AboutServer") @{ - Document = "This is a GUI for remote configuration of an\ - embedded system. It is is implemented as one GAWK script." - @} else if (MENU[2] == "ReadConfig") @{ - RS = "\n" - while ((getline < ConfigFile) > 0) - config[$1] = $2; - close(ConfigFile) - RS = "\r\n" - Document = "Configuration has been read." - @} else if (MENU[2] == "CheckConfig") @{ - Document = "" - for (i in config) - Document = Document "" \ - "" - Document = Document "
" i "" config[i] "
" - @} else if (MENU[2] == "ChangeConfig") @{ - if ("Param" in GETARG) @{ # any parameter to set? - if (GETARG["Param"] in config) @{ # is parameter valid? - config[GETARG["Param"]] = GETARG["Value"] - Document = (GETARG["Param"] " = " GETARG["Value"] ".") - @} else @{ - Document = "Parameter " GETARG["Param"] " is invalid." - @} - @} else @{ - Document = "

Change one parameter

\ - \ - \ - \ - \ -
ParameterValue
" - @} - @} else if (MENU[2] == "SaveConfig") @{ - for (i in config) - printf("%s %s\n", i, config[i]) > ConfigFile - close(ConfigFile) - Document = "Configuration has been saved." - @} -@} -@c endfile -@end smallexample - -@cindex MiniSQL -We could also view the configuration file as a database. From this -point of view, the previous program acts like a primitive database server. -Real SQL database systems also make a service available by providing -a TCP port that clients can connect to. But the application level protocols -they use are usually proprietary and also change from time to time. -This is also true for the protocol that -MiniSQL uses. - -@node URLCHK, WEBGRAB, REMCONF, Some Applications and Techniques -@section URLCHK: Look for Changed Web Pages -@cindex URLCHK program -Most people who make heavy use of Internet resources have a large -bookmark file with pointers to interesting web sites. It is impossible -to regularly check by hand if any of these sites have changed. A program -is needed to automatically look at the headers of web pages and tell -which ones have changed. URLCHK does the comparison after using GETURL -with the @code{HEAD} method to retrieve the header. - -Like GETURL, this program first checks that it is called with exactly -one command-line parameter. URLCHK also takes the same command-line variables -@code{Proxy} and @code{ProxyPort} as GETURL, -because these variables are handed over to GETURL for each URL -that gets checked. The one and only parameter is the name of a file that -contains one line for each URL. In the first column, we find the URL, and -the second and third columns hold the length of the URL's body when checked -for the two last times. Now, we follow this plan: - -@enumerate -@item -Read the URLs from the file and remember their most recent lengths - -@item -Delete the contents of the file - -@item -For each URL, check its new length and write it into the file - -@item -If the most recent and the new length differ, tell the user -@end enumerate - -It may seem a bit peculiar to read the URLs from a file together -with their two most recent lengths, but this approach has several -advantages. You can call the program again and again with the same -file. After running the program, you can regenerate the changed URLs -by extracting those lines that differ in their second and third columns: - -@c inspired by URLCHK in iX 5/97 166. -@smallexample -@c file eg/network/urlchk.awk -BEGIN @{ - if (ARGC != 2) @{ - print "URLCHK - check if URLs have changed" - print "IN:\n the file with URLs as a command-line parameter" - print " file contains URL, old length, new length" - print "PARAMS:\n -v Proxy=MyProxy -v ProxyPort=8080" - print "OUT:\n same as file with URLs" - print "JK 02.03.1998" - exit - @} - URLfile = ARGV[1]; ARGV[1] = "" - if (Proxy != "") Proxy = " -v Proxy=" Proxy - if (ProxyPort != "") ProxyPort = " -v ProxyPort=" ProxyPort - while ((getline < URLfile) > 0) - Length[$1] = $3 + 0 - close(URLfile) # now, URLfile is read in and can be updated - GetHeader = "gawk " Proxy ProxyPort " -v Method=\"HEAD\" -f geturl.awk " - for (i in Length) @{ - GetThisHeader = GetHeader i " 2>&1" - while ((GetThisHeader | getline) > 0) - if (toupper($0) ~ /CONTENT-LENGTH/) NewLength = $2 + 0 - close(GetThisHeader) - print i, Length[i], NewLength > URLfile - if (Length[i] != NewLength) # report only changed URLs - print i, Length[i], NewLength - @} - close(URLfile) -@} -@c endfile -@end smallexample - -Another thing that may look strange is the way GETURL is called. -Before calling GETURL, we have to check if the proxy variables need -to be passed on. If so, we prepare strings that will become part -of the command line later. In @code{GetHeader}, we store these strings -together with the longest part of the command line. Later, in the loop -over the URLs, @code{GetHeader} is appended with the URL and a redirection -operator to form the command that reads the URL's header over the Internet. -GETURL always produces the headers over @file{/dev/stderr}. That is -the reason why we need the redirection operator to have the header -piped in. - -This program is not perfect because it assumes that changing URLs -results in changed lengths, which is not necessarily true. A more -advanced approach is to look at some other header line that -holds time information. But, as always when things get a bit more -complicated, this is left as an exercise to the reader. - -@node WEBGRAB, STATIST, URLCHK, Some Applications and Techniques -@section WEBGRAB: Extract Links from a Page -@cindex WEBGRAB program -@c Inspired by iX 1/98 157. -@cindex robot -Sometimes it is necessary to extract links from web pages. -Browsers do it, web robots do it, and sometimes even humans do it. -Since we have a tool like GETURL at hand, we can solve this problem with -some help from the Bourne shell: - -@example -@c file eg/network/webgrab.awk -BEGIN @{ RS = "http://[#%&\\+\\-\\./0-9\\:;\\?A-Z_a-z\\~]*" @} -RT != "" @{ - command = ("gawk -v Proxy=MyProxy -f geturl.awk " RT \ - " > doc" NR ".html") - print command -@} -@c endfile -@end example - -Notice that the regular expression for URLs is rather crude. A precise -regular expression is much more complex. But this one works -rather well. One problem is that it is unable to find internal links of -an HTML document. Another problem is that -@samp{ftp}, @samp{telnet}, @samp{news}, @samp{mailto}, and other kinds -of links are missing in the regular expression. -However, it is straightforward to add them, if doing so is necessary for other tasks. - -This program reads an HTML file and prints all the HTTP links that it finds. -It relies on @command{gawk}'s ability to use regular expressions as record -separators. With @code{RS} set to a regular expression that matches links, -the second action is executed each time a non-empty link is found. -We can find the matching link itself in @code{RT}. - -The action could use the @code{system} function to let another GETURL -retrieve the page, but here we use a different approach. -This simple program prints shell commands that can be piped into @command{sh} -for execution. This way it is possible to first extract -the links, wrap shell commands around them, and pipe all the shell commands -into a file. After editing the file, execution of the file retrieves -exactly those files that we really need. In case we do not want to edit, -we can retrieve all the pages like this: - -@smallexample -gawk -f geturl.awk http://www.suse.de | gawk -f webgrab.awk | sh -@end smallexample - -@cindex Microsoft Windows -After this, you will find the contents of all referenced documents in -files named @file{doc*.html} even if they do not contain HTML code. -The most annoying thing is that we always have to pass the proxy to -GETURL. If you do not like to see the headers of the web pages -appear on the screen, you can redirect them to @file{/dev/null}. -Watching the headers appear can be quite interesting, because -it reveals -interesting details such as which web server the companies use. -Now, it is clear how the clever marketing people -use web robots to determine the -market shares -of Microsoft and Netscape in the web server market. - -Port 80 of any web server is like a small hole in a repellent firewall. -After attaching a browser to port 80, we usually catch a glimpse -of the bright side of the server (its home page). With a tool like GETURL -at hand, we are able to discover some of the more concealed -or even ``indecent'' services (i.e., lacking conformity to standards of quality). -It can be exciting to see the fancy CGI scripts that lie -there, revealing the inner workings of the server, ready to be called: - -@itemize @bullet -@item -With a command such as: - -@example -gawk -f geturl.awk http://any.host.on.the.net/cgi-bin/ -@end example - -some servers give you a directory listing of the CGI files. -Knowing the names, you can try to call some of them and watch -for useful results. Sometimes there are executables in such directories -(such as Perl interpreters) that you may call remotely. If there are -subdirectories with configuration data of the web server, this can also -be quite interesting to read. - -@item -@cindex apache -The well-known Apache web server usually has its CGI files in the -directory @file{/cgi-bin}. There you can often find the scripts -@file{test-cgi} and @file{printenv}. Both tell you some things -about the current connection and the installation of the web server. -Just call: - -@smallexample -gawk -f geturl.awk http://any.host.on.the.net/cgi-bin/test-cgi -gawk -f geturl.awk http://any.host.on.the.net/cgi-bin/printenv -@end smallexample - -@item -Sometimes it is even possible to retrieve system files like the web -server's log file---possibly containing customer data---or even the file -@file{/etc/passwd}. -(We don't recommend this!) -@end itemize - -@strong{Caution:} -Although this may sound funny or simply irrelevant, we are talking about -severe security holes. Try to explore your own system this way and make -sure that none of the above reveals too much information about your system. - -@node STATIST, MAZE, WEBGRAB, Some Applications and Techniques -@section STATIST: Graphing a Statistical Distribution -@cindex STATIST program - -@cindex GNUPlot utility -@cindex image format -@cindex @file{gif} image format -@cindex @file{png} image format -@cindex @file{ps} image format -@cindex Boutell, Thomas -@iftex -@image{statist,3in} -@end iftex -In the HTTP server examples we've shown thus far, we never present an image -to the browser and its user. Presenting images is one task. Generating -images that reflect some user input and presenting these dynamically -generated images is another. In this @value{SECTION}, we use GNUPlot -for generating @file{.png}, @file{.ps}, or @file{.gif} -files.@footnote{Due to licensing problems, the default -installation of GNUPlot disables the generation of @file{.gif} files. -If your installed version does not accept @samp{set term gif}, -just download and install the most recent version of GNUPlot and the -@uref{http://www.boutell.com/gd/, GD library} -by Thomas Boutell. -Otherwise you still have the chance to generate some -ASCII-art style images with GNUPlot by using @samp{set term dumb}. -(We tried it and it worked.)} - -The program we develop takes the statistical parameters of two samples -and computes the t-test statistics. As a result, we get the probabilities -that the means and the variances of both samples are the same. In order to -let the user check plausibility, the program presents an image of the -distributions. The statistical computation follows -@cite{Numerical Recipes in C: The Art of Scientific Computing} -by William H.@: Press, Saul A.@: Teukolsky, William T.@: Vetterling, and Brian P. Flannery. -Since @command{gawk} does not have a built-in function -for the computation of the beta function, we use the @code{ibeta} function -of GNUPlot. As a side effect, we learn how to use GNUPlot as a -sophisticated calculator. The comparison of means is done as in @code{tutest}, -paragraph 14.2, page 613, and the comparison of variances is done as in @code{ftest}, -page 611 in @cite{Numerical Recipes}. -@cindex Numerical Recipes - -As usual, we take the site-independent code for servers and append -our own functions @code{SetUpServer} and @code{HandleGET}: - -@smallexample -@c file eg/network/statist.awk -function SetUpServer() @{ - TopHeader = "Statistics with GAWK" - TopDoc = "\ -

Please choose one of the following actions:

\ - " - TopFooter = "" - GnuPlot = "gnuplot 2>&1" - m1=m2=0; v1=v2=1; n1=n2=10 -@} -@c endfile -@end smallexample - -Here, you see the menu structure that the user sees. Later, we -will see how the program structure of the @code{HandleGET} function -reflects the menu structure. What is missing here is the link for the -image we generate. In an event-driven environment, request, -generation, and delivery of images are separated. - -Notice the way we initialize the @code{GnuPlot} command string for -the pipe. By default, -GNUPlot outputs the generated image via standard output, as well as -the results of @code{print}(ed) calculations via standard error. -The redirection causes standard error to be mixed into standard -output, enabling us to read results of calculations with @code{getline}. -By initializing the statistical parameters with some meaningful -defaults, we make sure the user gets an image the first time -he uses the program. - -@cindex JavaScript -Following is the rather long function @code{HandleGET}, which -implements the contents of this service by reacting to the different -kinds of requests from the browser. Before you start playing with -this script, make sure that your browser supports JavaScript and that it also -has this option switched on. The script uses a short snippet of -JavaScript code for delayed opening of a window with an image. -A more detailed explanation follows: - -@smallexample -@c file eg/network/statist.awk -function HandleGET() @{ - if(MENU[2] == "AboutServer") @{ - Document = "This is a GUI for a statistical computation.\ - It compares means and variances of two distributions.\ - It is implemented as one GAWK script and uses GNUPLOT." - @} else if (MENU[2] == "EnterParameters") @{ - Document = "" - if ("m1" in GETARG) @{ # are there parameters to compare? - Document = Document "" - m1 = GETARG["m1"]; v1 = GETARG["v1"]; n1 = GETARG["n1"] - m2 = GETARG["m2"]; v2 = GETARG["v2"]; n2 = GETARG["n2"] - t = (m1-m2)/sqrt(v1/n1+v2/n2) - df = (v1/n1+v2/n2)*(v1/n1+v2/n2)/((v1/n1)*(v1/n1)/(n1-1) \ - + (v2/n2)*(v2/n2) /(n2-1)) - if (v1>v2) @{ - f = v1/v2 - df1 = n1 - 1 - df2 = n2 - 1 - @} else @{ - f = v2/v1 - df1 = n2 - 1 - df2 = n1 - 1 - @} - print "pt=ibeta(" df/2 ",0.5," df/(df+t*t) ")" |& GnuPlot - print "pF=2.0*ibeta(" df2/2 "," df1/2 "," \ - df2/(df2+df1*f) ")" |& GnuPlot - print "print pt, pF" |& GnuPlot - RS="\n"; GnuPlot |& getline; RS="\r\n" # $1 is pt, $2 is pF - print "invsqrt2pi=1.0/sqrt(2.0*pi)" |& GnuPlot - print "nd(x)=invsqrt2pi/sd*exp(-0.5*((x-mu)/sd)**2)" |& GnuPlot - print "set term png small color" |& GnuPlot - #print "set term postscript color" |& GnuPlot - #print "set term gif medium size 320,240" |& GnuPlot - print "set yrange[-0.3:]" |& GnuPlot - print "set label 'p(m1=m2) =" $1 "' at 0,-0.1 left" |& GnuPlot - print "set label 'p(v1=v2) =" $2 "' at 0,-0.2 left" |& GnuPlot - print "plot mu=" m1 ",sd=" sqrt(v1) ", nd(x) title 'sample 1',\ - mu=" m2 ",sd=" sqrt(v2) ", nd(x) title 'sample 2'" |& GnuPlot - print "quit" |& GnuPlot - GnuPlot |& getline Image - while ((GnuPlot |& getline) > 0) - Image = Image RS $0 - close(GnuPlot) - @} - Document = Document "\ -

Do these samples have the same Gaussian distribution?

\ -
\ - \ - - \ - - \ - - \ - \ - - \ - - \ - - \ - \ -
1. Mean 1. Variance1. Count
2. Mean 2. Variance2. Count

" - @} else if (MENU[2] ~ "Image") @{ - Reason = "OK" ORS "Content-type: image/png" - #Reason = "OK" ORS "Content-type: application/x-postscript" - #Reason = "OK" ORS "Content-type: image/gif" - Header = Footer = "" - Document = Image - @} -@} -@c endfile -@end smallexample - -@cindex PostScript -As usual, we give a short description of the service in the first -menu choice. The third menu choice shows us that generation and -presentation of an image are two separate actions. While the latter -takes place quite instantly in the third menu choice, the former -takes place in the much longer second choice. Image data passes from the -generating action to the presenting action via the variable @code{Image} -that contains a complete @file{.png} image, which is otherwise stored -in a file. If you prefer @file{.ps} or @file{.gif} images over the -default @file{.png} images, you may select these options by uncommenting -the appropriate lines. But remember to do so in two places: when -telling GNUPlot which kind of images to generate, and when transmitting the -image at the end of the program. - -Looking at the end of the program, -the way we pass the @samp{Content-type} to the browser is a bit unusual. -It is appended to the @samp{OK} of the first header line -to make sure the type information becomes part of the header. -The other variables that get transmitted across the network are -made empty, because in this case we do not have an HTML document to -transmit, but rather raw image data to contain in the body. - -Most of the work is done in the second menu choice. It starts with a -strange JavaScript code snippet. When first implementing this server, -we used a short @code{@w{""} here. But then -browsers got smarter and tried to improve on speed by requesting the -image and the HTML code at the same time. When doing this, the browser -tries to build up a connection for the image request while the request for -the HTML text is not yet completed. The browser tries to connect -to the @command{gawk} server on port 8080 while port 8080 is still in use for -transmission of the HTML text. The connection for the image cannot be -built up, so the image appears as ``broken'' in the browser window. -We solved this problem by telling the browser to open a separate window -for the image, but only after a delay of 1000 milliseconds. -By this time, the server should be ready for serving the next request. - -But there is one more subtlety in the JavaScript code. -Each time the JavaScript code opens a window for the image, the -name of the image is appended with a timestamp (@code{systime}). -Why this constant change of name for the image? Initially, we always named -the image @code{Image}, but then the Netscape browser noticed the name -had @emph{not} changed since the previous request and displayed the -previous image (caching behavior). The server core -is implemented so that browsers are told @emph{not} to cache anything. -Obviously HTTP requests do not always work as expected. One way to -circumvent the cache of such overly smart browsers is to change the -name of the image with each request. These three lines of JavaScript -caused us a lot of trouble. - -The rest can be broken -down into two phases. At first, we check if there are statistical -parameters. When the program is first started, there usually are no -parameters because it enters the page coming from the top menu. -Then, we only have to present the user a form that he can use to change -statistical parameters and submit them. Subsequently, the submission of -the form causes the execution of the first phase because @emph{now} -there @emph{are} parameters to handle. - -Now that we have parameters, we know there will be an image available. -Therefore we insert the JavaScript code here to initiate the opening -of the image in a separate window. Then, -we prepare some variables that will be passed to GNUPlot for calculation -of the probabilities. Prior to reading the results, we must temporarily -change @code{RS} because GNUPlot separates lines with newlines. -After instructing GNUPlot to generate a @file{.png} (or @file{.ps} or -@file{.gif}) image, we initiate the insertion of some text, -explaining the resulting probabilities. The final @samp{plot} command -actually generates the image data. This raw binary has to be read in carefully -without adding, changing, or deleting a single byte. Hence the unusual -initialization of @code{Image} and completion with a @code{while} loop. - -When using this server, it soon becomes clear that it is far from being -perfect. It mixes source code of six scripting languages or protocols: - -@itemize @bullet -@item GNU @command{awk} implements a server for the protocol: -@item HTTP which transmits: -@item HTML text which contains a short piece of: -@item JavaScript code opening a separate window. -@item A Bourne shell script is used for piping commands into: -@item GNUPlot to generate the image to be opened. -@end itemize - -After all this work, the GNUPlot image opens in the JavaScript window -where it can be viewed by the user. - -It is probably better not to mix up so many different languages. -The result is not very readable. Furthermore, the -statistical part of the server does not take care of invalid input. -Among others, using negative variances will cause invalid results. - -@node MAZE, MOBAGWHO, STATIST, Some Applications and Techniques -@section MAZE: Walking Through a Maze In Virtual Reality -@cindex MAZE -@cindex VRML -@c VRML in iX 11/96 134. -@quotation -@cindex Perlis, Alan -@i{In the long run, every program becomes rococo, and then rubble.}@* -Alan Perlis -@end quotation - -By now, we know how to present arbitrary @samp{Content-type}s to a browser. -In this @value{SECTION}, our server will present a 3D world to our browser. -The 3D world is described in a scene description language (VRML, -Virtual Reality Modeling Language) that allows us to travel through a -perspective view of a 2D maze with our browser. Browsers with a -VRML plugin enable exploration of this technology. We could do -one of those boring @samp{Hello world} examples here, that are usually -presented when introducing novices to -VRML. If you have never written -any VRML code, have a look at -the VRML FAQ. -Presenting a static VRML scene is a bit trivial; in order to expose -@command{gawk}'s new capabilities, we will present a dynamically generated -VRML scene. The function @code{SetUpServer} is very simple because it -only sets the default HTML page and initializes the random number -generator. As usual, the surrounding server lets you browse the maze. - -@smallexample -@c file eg/network/maze.awk -function SetUpServer() @{ - TopHeader = "Walk through a maze" - TopDoc = "\ -

Please choose one of the following actions:

\ - " - TopFooter = "" - srand() -@} -@c endfile -@end smallexample - -The function @code{HandleGET} is a bit longer because it first computes -the maze and afterwards generates the VRML code that is sent across -the network. As shown in the STATIST example -(@pxref{STATIST}), -we set the type of the -content to VRML and then store the VRML representation of the maze as the -page content. We assume that the maze is stored in a 2D array. Initially, -the maze consists of walls only. Then, we add an entry and an exit to the -maze and let the rest of the work be done by the function @code{MakeMaze}. -Now, only the wall fields are left in the maze. By iterating over the these -fields, we generate one line of VRML code for each wall field. - -@smallexample -@c file eg/network/maze.awk -function HandleGET() @{ - if (MENU[2] == "AboutServer") @{ - Document = "If your browser has a VRML 2 plugin,\ - this server shows you a simple VRML scene." - @} else if (MENU[2] == "VRMLtest") @{ - XSIZE = YSIZE = 11 # initially, everything is wall - for (y = 0; y < YSIZE; y++) - for (x = 0; x < XSIZE; x++) - Maze[x, y] = "#" - delete Maze[0, 1] # entry is not wall - delete Maze[XSIZE-1, YSIZE-2] # exit is not wall - MakeMaze(1, 1) - Document = "\ -#VRML V2.0 utf8\n\ -Group @{\n\ - children [\n\ - PointLight @{\n\ - ambientIntensity 0.2\n\ - color 0.7 0.7 0.7\n\ - location 0.0 8.0 10.0\n\ - @}\n\ - DEF B1 Background @{\n\ - skyColor [0 0 0, 1.0 1.0 1.0 ]\n\ - skyAngle 1.6\n\ - groundColor [1 1 1, 0.8 0.8 0.8, 0.2 0.2 0.2 ]\n\ - groundAngle [ 1.2 1.57 ]\n\ - @}\n\ - DEF Wall Shape @{\n\ - geometry Box @{size 1 1 1@}\n\ - appearance Appearance @{ material Material @{ diffuseColor 0 0 1 @} @}\n\ - @}\n\ - DEF Entry Viewpoint @{\n\ - position 0.5 1.0 5.0\n\ - orientation 0.0 0.0 -1.0 0.52\n\ - @}\n" - for (i in Maze) @{ - split(i, t, SUBSEP) - Document = Document " Transform @{ translation " - Document = Document t[1] " 0 -" t[2] " children USE Wall @}\n" - @} - Document = Document " ] # end of group for world\n@}" - Reason = "OK" ORS "Content-type: model/vrml" - Header = Footer = "" - @} -@} -@c endfile -@end smallexample - -Finally, we have a look at @code{MakeMaze}, the function that generates -the @code{Maze} array. When entered, this function assumes that the array -has been initialized so that each element represents a wall element and -the maze is initially full of wall elements. Only the entrance and the exit -of the maze should have been left free. The parameters of the function tell -us which element must be marked as not being a wall. After this, we take -a look at the four neighbouring elements and remember which we have already -treated. Of all the neighbouring elements, we take one at random and -walk in that direction. Therefore, the wall element in that direction has -to be removed and then, we call the function recursively for that element. -The maze is only completed if we iterate the above procedure for -@emph{all} neighbouring elements (in random order) and for our present -element by recursively calling the function for the present element. This -last iteration could have been done in a loop, -but it is done much simpler recursively. - -Notice that elements with coordinates that are both odd are assumed to be -on our way through the maze and the generating process cannot terminate -as long as there is such an element not being @code{delete}d. All other -elements are potentially part of the wall. - -@smallexample -@c file eg/network/maze.awk -function MakeMaze(x, y) @{ - delete Maze[x, y] # here we are, we have no wall here - p = 0 # count unvisited fields in all directions - if (x-2 SUBSEP y in Maze) d[p++] = "-x" - if (x SUBSEP y-2 in Maze) d[p++] = "-y" - if (x+2 SUBSEP y in Maze) d[p++] = "+x" - if (x SUBSEP y+2 in Maze) d[p++] = "+y" - if (p>0) @{ # if there are univisited fields, go there - p = int(p*rand()) # choose one unvisited field at random - if (d[p] == "-x") @{ delete Maze[x - 1, y]; MakeMaze(x - 2, y) - @} else if (d[p] == "-y") @{ delete Maze[x, y - 1]; MakeMaze(x, y - 2) - @} else if (d[p] == "+x") @{ delete Maze[x + 1, y]; MakeMaze(x + 2, y) - @} else if (d[p] == "+y") @{ delete Maze[x, y + 1]; MakeMaze(x, y + 2) - @} # we are back from recursion - MakeMaze(x, y); # try again while there are unvisited fields - @} -@} -@c endfile -@end smallexample - -@node MOBAGWHO, STOXPRED, MAZE, Some Applications and Techniques -@section MOBAGWHO: a Simple Mobile Agent -@cindex MOBAGWHO program -@cindex agent -@quotation -@cindex Hoare, C.A.R. -@i{There are two ways of constructing a software design: One way is to -make it so simple that there are obviously no deficiencies, and the -other way is to make it so complicated that there are no obvious -deficiencies.} @* -C. A. R. Hoare -@end quotation - -A @dfn{mobile agent} is a program that can be dispatched from a computer and -transported to a remote server for execution. This is called @dfn{migration}, -which means that a process on another system is started that is independent -from its originator. Ideally, it wanders through -a network while working for its creator or owner. In places like -the UMBC Agent Web, -people are quite confident that (mobile) agents are a software engineering -paradigm that enables us to significantly increase the efficiency -of our work. Mobile agents could become the mediators between users and -the networking world. For an unbiased view at this technology, -see the remarkable paper @cite{Mobile Agents: Are they a good -idea?}.@footnote{@uref{http://www.research.ibm.com/massive/mobag.ps}} - -@ignore -@c Chuck says to take all of this out. -@cindex Tcl/Tk -A good instance of this paradigm is -@cite{Agent Tcl},@footnote{@uref{http://agent.cs.dartmouth.edu/software/agent2.0/}} -an extension of the Tcl language. After introducing a typical -development environment, the aforementioned paper shows a nice little -example application that we will try to rebuild in @command{gawk}. The -@command{who} agent takes a list of servers and wanders from one server -to the next one, always looking to see who is logged in. -Having reached the last -one, it sends back a message with a list of all users it found on each -machine. - -But before implementing something that might or might not be a mobile -agent, let us clarify the concept and some important terms. The agent -paradigm in general is such a young scientific discipline that it has -not yet developed a widely-accepted terminology. Some authors try to -give precise definitions, but their scope is often not wide enough -to be generally accepted. Franklin and Graesser ask -@cite{Is it an Agent or just a Program: A Taxonomy for Autonomous -Agents}@footnote{@uref{http://www.msci.memphis.edu/~franklin/AgentProg.html}} -and give even better answers than Caglayan and Harrison in their -@cite{Agent Sourcebook}.@footnote{@uref{http://www.aminda.com/mazzu/sourcebook/}} - -@itemize @minus -@item -@i{An autonomous agent is a system situated within and a part of -an environment that senses that environment and acts on it, over time, in -pursuit of its own agenda and so as to effect what it senses in the future.} -(Quoted from Franklin and Graesser.) -@item -A mobile agent is able to transport itself from one machine to another. -@item -The term @dfn{migration} often denotes this process of moving. -But neither of the two sources above even mentions this term, while others -use it regularly. -@end itemize - -Before delving into the (rather demanding) details of -implementation, let us give just one more quotation as a final -motivation. Steven Farley published an excellent paper called -@cite{Mobile Agent System Architecture},@footnote{This often -cited text originally appeared as a conference paper here: -@uref{http://www.sigs.com/publications/docs/java/9705/farley.html} -Many bibliographies on the Internet point to this dead link. Meanwhile, -the paper appeared as a contribution to a book called More Java Gems here: -@uref{http://uk.cambridge.org/computerscience/object/catalogue/0521774772/default.htm}} -in which he asks ``Why use an agent architecture?'' - -@quotation -If client-server systems are the currently established norm and distributed -object systems such as CORBA are defining the future standards, why bother -with agents? Agent architectures have certain advantages over these other -types. Three of the most important advantages are: -@cindex CORBA - -@enumerate -@item -An agent performs much processing at the server where local bandwidth -is high, thus reducing the amount of network bandwidth consumed and increasing -overall performance. In contrast, a CORBA client object with the equivalent -functionality of a given agent must make repeated remote method calls to -the server object because CORBA objects cannot move across the network -at runtime. - -@item -An agent operates independently of the application from which the -agent was invoked. The agent operates asynchronously, meaning that the -client application does not need to wait for the results. This is especially -important for mobile users who are not always connected to the network. - -@item -The use of agents allows for the injection of new functionality into -a system at run time. An agent system essentially contains its own automatic -software distribution mechanism. Since CORBA has no built-in support for -mobile code, new functionality generally has to be installed manually. - -@end enumerate - -Of course a non-agent system can exhibit these same features with some -work. But the mobile code paradigm supports the transfer of executable -code to a remote location for asynchronous execution from the start. An -agent architecture should be considered for systems where the above features -are primary requirements. -@end quotation -@end ignore - -When trying to migrate a process from one system to another, -a server process is needed on the receiving side. Depending on the kind -of server process, several ways of implementation come to mind. -How the process is implemented depends upon the kind of server process: - -@itemize @bullet -@item -HTTP can be used as the protocol for delivery of the migrating -process. In this case, we use a common web -server as the receiving server process. A universal CGI script -mediates between migrating process and web server. -Each server willing to accept migrating agents makes this universal -service available. HTTP supplies the @code{POST} method to transfer -some data to a file on the web server. When a CGI script is called -remotely with the @code{POST} method instead of the usual @code{GET} method, -data is transmitted from the client process to the standard input -of the server's CGI script. So, to implement a mobile agent, -we must not only write the agent program to start on the client -side, but also the CGI script to receive the agent on the server side. - -@cindex CGI -@cindex apache -@item -The @code{PUT} method can also be used for migration. HTTP does not -require a CGI script for migration via @code{PUT}. However, with common web -servers there is no advantage to this solution, because web servers such as -Apache -require explicit activation of a special @code{PUT} script. - -@item -@cite{Agent Tcl} pursues a different course; it relies on a dedicated server -process with a dedicated protocol specialized for receiving mobile agents. -@end itemize - -Our agent example abuses a common web server as a migration tool. So, it needs a -universal CGI script on the receiving side (the web server). The receiving script is -activated with a @code{POST} request when placed into a location like -@file{/httpd/cgi-bin/PostAgent.sh}. Make sure that the server system uses a -version of @command{gawk} that supports network access (Version 3.1 or later; -verify with @samp{gawk --version}). - -@example -@c file eg/network/PostAgent.sh -#!/bin/sh -MobAg=/tmp/MobileAgent.$$ -# direct script to mobile agent file -cat > $MobAg -# execute agent concurrently -gawk -f $MobAg $MobAg > /dev/null & -# HTTP header, terminator and body -gawk 'BEGIN @{ print "\r\nAgent started" @}' -rm $MobAg # delete script file of agent -@c endfile -@end example - -By making its process id (@code{$$}) part of the unique @value{FN}, the -script avoids conflicts between concurrent instances of the script. -First, all lines -from standard input (the mobile agent's source code) are copied into -this unique file. Then, the agent is started as a concurrent process -and a short message reporting this fact is sent to the submitting client. -Finally, the script file of the mobile agent is removed because it is -no longer needed. Although it is a short script, there are several noteworthy -points: - -@table @asis -@item Security -@emph{There is none}. In fact, the CGI script should never -be made available on a server that is part of the Internet because everyone -would be allowed to execute arbitrary commands with it. This behavior is -acceptable only when performing rapid prototyping. - -@item Self-Reference -Each migrating instance of an agent is started -in a way that enables it to read its own source code from standard input -and use the code for subsequent -migrations. This is necessary because it needs to treat the agent's code -as data to transmit. @command{gawk} is not the ideal language for such -a job. Lisp and Tcl are more suitable because they do not make a distinction -between program code and data. - -@item Independence -After migration, the agent is not linked to its -former home in any way. By reporting @samp{Agent started}, it waves -``Goodbye'' to its origin. The originator may choose to terminate or not. -@end table - -@cindex Lisp -The originating agent itself is started just like any other command-line -script, and reports the results on standard output. By letting the name -of the original host migrate with the agent, the agent that migrates -to a host far away from its origin can report the result back home. -Having arrived at the end of the journey, the agent establishes -a connection and reports the results. This is the reason for -determining the name of the host with @samp{uname -n} and storing it -in @code{MyOrigin} for later use. We may also set variables with the -@option{-v} option from the command line. This interactivity is only -of importance in the context of starting a mobile agent; therefore this -@code{BEGIN} pattern and its action do not take part in migration: - -@smallexample -@c file eg/network/mobag.awk -BEGIN @{ - if (ARGC != 2) @{ - print "MOBAG - a simple mobile agent" - print "CALL:\n gawk -f mobag.awk mobag.awk" - print "IN:\n the name of this script as a command-line parameter" - print "PARAM:\n -v MyOrigin=myhost.com" - print "OUT:\n the result on stdout" - print "JK 29.03.1998 01.04.1998" - exit - @} - if (MyOrigin == "") @{ - "uname -n" | getline MyOrigin - close("uname -n") - @} -@} -@c endfile -@end smallexample - -Since @command{gawk} cannot manipulate and transmit parts of the program -directly, the source code is read and stored in strings. -Therefore, the program scans itself for -the beginning and the ending of functions. -Each line in between is appended to the code string until the end of -the function has been reached. A special case is this part of the program -itself. It is not a function. -Placing a similar framework around it causes it to be treated -like a function. Notice that this mechanism works for all the -functions of the source code, but it cannot guarantee that the order -of the functions is preserved during migration: - -@smallexample -@c file eg/network/mobag.awk -#ReadMySelf -/^function / @{ FUNC = $2 @} -/^END/ || /^#ReadMySelf/ @{ FUNC = $1 @} -FUNC != "" @{ MOBFUN[FUNC] = MOBFUN[FUNC] RS $0 @} -(FUNC != "") && (/^@}/ || /^#EndOfMySelf/) \ - @{ FUNC = "" @} -#EndOfMySelf -@c endfile -@end smallexample - -The web server code in -@ref{Interacting Service, ,A Web Service with Interaction}, -was first developed as a site-independent core. Likewise, the -@command{gawk}-based mobile agent -starts with an agent-independent core, to which can be appended -application-dependent functions. What follows is the only -application-independent function needed for the mobile agent: - -@smallexample -@c file eg/network/mobag.awk -function migrate(Destination, MobCode, Label) @{ - MOBVAR["Label"] = Label - MOBVAR["Destination"] = Destination - RS = ORS = "\r\n" - HttpService = "/inet/tcp/0/" Destination - for (i in MOBFUN) - MobCode = (MobCode "\n" MOBFUN[i]) - MobCode = MobCode "\n\nBEGIN @{" - for (i in MOBVAR) - MobCode = (MobCode "\n MOBVAR[\"" i "\"] = \"" MOBVAR[i] "\"") - MobCode = MobCode "\n@}\n" - print "POST /cgi-bin/PostAgent.sh HTTP/1.0" |& HttpService - print "Content-length:", length(MobCode) ORS |& HttpService - printf "%s", MobCode |& HttpService - while ((HttpService |& getline) > 0) - print $0 - close(HttpService) -@} -@c endfile -@end smallexample - -The @code{migrate} function prepares the -aforementioned strings containing the program code and transmits them to a -server. A consequence of this modular approach is that the @code{migrate} -function takes some parameters that aren't needed in this application, -but that will be in future ones. Its mandatory parameter @code{Destination} holds the -name (or IP address) of the server that the agent wants as a host for its -code. The optional parameter @code{MobCode} may contain some @command{gawk} -code that is inserted during migration in front of all other code. -The optional parameter @code{Label} may contain -a string that tells the agent what to do in program execution after -arrival at its new home site. One of the serious obstacles in implementing -a framework for mobile agents is that it does not suffice to migrate the -code. It is also necessary to migrate the state of execution of the agent. In -contrast to @cite{Agent Tcl}, this program does not try to migrate the complete set -of variables. The following conventions are used: - -@itemize @bullet -@item -Each variable in an agent program is local to the current host and does -@emph{not} migrate. - -@item -The array @code{MOBFUN} shown above is an exception. It is handled -by the function @code{migrate} and does migrate with the application. - -@item -The other exception is the array @code{MOBVAR}. Each variable that -takes part in migration has to be an element of this array. -@code{migrate} also takes care of this. -@end itemize - -Now it's clear what happens to the @code{Label} parameter of the -function @code{migrate}. It is copied into @code{MOBVAR["Label"]} and -travels alongside the other data. Since travelling takes place via HTTP, -records must be separated with @code{"\r\n"} in @code{RS} and -@code{ORS} as usual. The code assembly for migration takes place in -three steps: - -@itemize @bullet -@item -Iterate over @code{MOBFUN} to collect all functions verbatim. - -@item -Prepare a @code{BEGIN} pattern and put assignments to mobile -variables into the action part. - -@item -Transmission itself resembles GETURL: the header with the request -and the @code{Content-length} is followed by the body. In case there is -any reply over the network, it is read completely and echoed to -standard output to avoid irritating the server. -@end itemize - -The application-independent framework is now almost complete. What follows -is the @code{END} pattern that is executed when the mobile agent has -finished reading its own code. First, it checks whether it is already -running on a remote host or not. In case initialization has not yet taken -place, it starts @code{MyInit}. Otherwise (later, on a remote host), it -starts @code{MyJob}: - -@smallexample -@c file eg/network/mobag.awk -END @{ - if (ARGC != 2) exit # stop when called with wrong parameters - if (MyOrigin != "") # is this the originating host? - MyInit() # if so, initialize the application - else # we are on a host with migrated data - MyJob() # so we do our job -@} -@c endfile -@end smallexample - -All that's left to extend the framework into a complete application -is to write two application-specific functions: @code{MyInit} and -@code{MyJob}. Keep in mind that the former is executed once on the -originating host, while the latter is executed after each migration: - -@smallexample -@c file eg/network/mobag.awk -function MyInit() @{ - MOBVAR["MyOrigin"] = MyOrigin - MOBVAR["Machines"] = "localhost/80 max/80 moritz/80 castor/80" - split(MOBVAR["Machines"], Machines) # which host is the first? - migrate(Machines[1], "", "") # go to the first host - while (("/inet/tcp/8080/0/0" |& getline) > 0) # wait for result - print $0 # print result - close("/inet/tcp/8080/0/0") -@} -@c endfile -@end smallexample - -As mentioned earlier, this agent takes the name of its origin -(@code{MyOrigin}) with it. Then, it takes the name of its first -destination and goes there for further work. Notice that this name has -the port number of the web server appended to the name of the server, -because the function @code{migrate} needs it this way to create -the @code{HttpService} variable. Finally, it waits for the result to arrive. -The @code{MyJob} function runs on the remote host: - -@smallexample -@c file eg/network/mobag.awk -function MyJob() @{ - # forget this host - sub(MOBVAR["Destination"], "", MOBVAR["Machines"]) - MOBVAR["Result"]=MOBVAR["Result"] SUBSEP SUBSEP MOBVAR["Destination"] ":" - while (("who" | getline) > 0) # who is logged in? - MOBVAR["Result"] = MOBVAR["Result"] SUBSEP $0 - close("who") - if (index(MOBVAR["Machines"], "/") > 0) @{ # any more machines to visit? - split(MOBVAR["Machines"], Machines) # which host is next? - migrate(Machines[1], "", "") # go there - @} else @{ # no more machines - gsub(SUBSEP, "\n", MOBVAR["Result"]) # send result to origin - print MOBVAR["Result"] |& "/inet/tcp/0/" MOBVAR["MyOrigin"] "/8080" - close("/inet/tcp/0/" MOBVAR["MyOrigin"] "/8080") - @} -@} -@c endfile -@end smallexample - -After migrating, the first thing to do in @code{MyJob} is to delete -the name of the current host from the list of hosts to visit. Now, it -is time to start the real work by appending the host's name to the -result string, and reading line by line who is logged in on this host. -A very annoying circumstance is the fact that the elements of -@code{MOBVAR} cannot hold the newline character (@code{"\n"}). If they -did, migration of this string did not work because the string didn't -obey the syntax rule for a string in @command{gawk}. -@code{SUBSEP} is used as a temporary replacement. -If the list of hosts to visit holds -at least one more entry, the agent migrates to that place to go on -working there. Otherwise, we replace the @code{SUBSEP}s -with a newline character in the resulting string, and report it to -the originating host, whose name is stored in @code{MOBVAR["MyOrigin"]}. - -@node STOXPRED, PROTBASE, MOBAGWHO, Some Applications and Techniques -@section STOXPRED: Stock Market Prediction As A Service -@cindex STOXPRED program -@cindex Yahoo -@quotation -@i{Far out in the uncharted backwaters of the unfashionable end of -the Western Spiral arm of the Galaxy lies a small unregarded yellow sun.} - -@i{Orbiting this at a distance of roughly ninety-two million miles is an -utterly insignificant little blue-green planet whose ape-descendent life -forms are so amazingly primitive that they still think digital watches are -a pretty neat idea.} - -@i{This planet has --- or rather had --- a problem, which was this: -most of the people living on it were unhappy for pretty much of the time. -Many solutions were suggested for this problem, but most of these were -largely concerned with the movements of small green pieces of paper, -which is odd because it wasn't the small green pieces of paper that -were unhappy.} @* -Douglas Adams, @cite{The Hitch Hiker's Guide to the Galaxy} -@end quotation - -@cindex @command{cron} -Valuable services on the Internet are usually @emph{not} implemented -as mobile agents. There are much simpler ways of implementing services. -All Unix systems provide, for example, the @command{cron} service. -Unix system users can write a list of tasks to be done each day, each -week, twice a day, or just once. The list is entered into a file named -@file{crontab}. For example, to distribute a newsletter on a daily -basis this way, use @command{cron} for calling a script each day early -in the morning. - -@example -# run at 8 am on weekdays, distribute the newsletter -0 8 * * 1-5 $HOME/bin/daily.job >> $HOME/log/newsletter 2>&1 -@end example - -The script first looks for interesting information on the Internet, -assembles it in a nice form and sends the results via email to -the customers. - -The following is an example of a primitive -newsletter on stock market prediction. It is a report which first -tries to predict the change of each share in the Dow Jones Industrial -Index for the particular day. Then it mentions some especially -promising shares as well as some shares which look remarkably bad -on that day. The report ends with the usual disclaimer which tells -every child @emph{not} to try this at home and hurt anybody. -@cindex Dow Jones Industrial Index - -@smallexample -Good morning Uncle Scrooge, - -This is your daily stock market report for Monday, October 16, 2000. -Here are the predictions for today: - - AA neutral - GE up - JNJ down - MSFT neutral - @dots{} - UTX up - DD down - IBM up - MO down - WMT up - DIS up - INTC up - MRK down - XOM down - EK down - IP down - -The most promising shares for today are these: - - INTC http://biz.yahoo.com/n/i/intc.html - -The stock shares to avoid today are these: - - EK http://biz.yahoo.com/n/e/ek.html - IP http://biz.yahoo.com/n/i/ip.html - DD http://biz.yahoo.com/n/d/dd.html - @dots{} -@end smallexample - -@ignore -@c Chuck suggests removing this paragraph -If you are not into stock market prediction but want to earn money -with a more humane service, you might prefer to send out horoscopes -to your customers. Or, once every refrigerator in every household on this side -of the Chinese Wall is connected to the Internet, such a service could -inspect the contents of your customer's refrigerators each day and -advise them on nutrition. Big Brother is watching them. -@end ignore - -The script as a whole is rather long. In order to ease the pain of -studying other people's source code, we have broken the script -up into meaningful parts which are invoked one after the other. -The basic structure of the script is as follows: - -@example -@c file eg/network/stoxpred.awk -BEGIN @{ - Init() - ReadQuotes() - CleanUp() - Prediction() - Report() - SendMail() -@} -@c endfile -@end example - -The earlier parts store data into variables and arrays which are -subsequently used by later parts of the script. The @code{Init} function -first checks if the script is invoked correctly (without any parameters). -If not, it informs the user of the correct usage. What follows are preparations -for the retrieval of the historical quote data. The names of the 30 stock -shares are stored in an array @code{name} along with the current date -in @code{day}, @code{month}, and @code{year}. - -All users who are separated -from the Internet by a firewall and have to direct their Internet accesses -to a proxy must supply the name of the proxy to this script with the -@samp{-v Proxy=@var{name}} option. For most users, the default proxy and -port number should suffice. - -@example -@c file eg/network/stoxpred.awk -function Init() @{ - if (ARGC != 1) @{ - print "STOXPRED - daily stock share prediction" - print "IN:\n no parameters, nothing on stdin" - print "PARAM:\n -v Proxy=MyProxy -v ProxyPort=80" - print "OUT:\n commented predictions as email" - print "JK 09.10.2000" - exit - @} - # Remember ticker symbols from Dow Jones Industrial Index - StockCount = split("AA GE JNJ MSFT AXP GM JPM PG BA HD KO \ - SBC C HON MCD T CAT HWP MMM UTX DD IBM MO WMT DIS INTC \ - MRK XOM EK IP", name); - # Remember the current date as the end of the time series - day = strftime("%d") - month = strftime("%m") - year = strftime("%Y") - if (Proxy == "") Proxy = "chart.yahoo.com" - if (ProxyPort == 0) ProxyPort = 80 - YahooData = "/inet/tcp/0/" Proxy "/" ProxyPort -@} -@c endfile -@end example - -@cindex CSV format -There are two really interesting parts in the script. One is the -function which reads the historical stock quotes from an Internet -server. The other is the one that does the actual prediction. In -the following function we see how the quotes are read from the -Yahoo server. The data which comes from the server is in -CSV format (comma-separated values): - -@example -@c file eg/network/stoxdata.txt -Date,Open,High,Low,Close,Volume -9-Oct-00,22.75,22.75,21.375,22.375,7888500 -6-Oct-00,23.8125,24.9375,21.5625,22,10701100 -5-Oct-00,24.4375,24.625,23.125,23.50,5810300 -@c endfile -@end example - -Lines contain values of the same time instant, whereas columns are -separated by commas and contain the kind of data that is described -in the header (first) line. At first, @command{gawk} is instructed to -separate columns by commas (@samp{FS = ","}). In the loop that follows, -a connection to the Yahoo server is first opened, then a download takes -place, and finally the connection is closed. All this happens once for -each ticker symbol. In the body of this loop, an Internet address is -built up as a string according to the rules of the Yahoo server. The -starting and ending date are chosen to be exactly the same, but one year -apart in the past. All the action is initiated within the @code{printf} -command which transmits the request for data to the Yahoo server. - -In the inner loop, the server's data is first read and then scanned -line by line. Only lines which have six columns and the name of a month -in the first column contain relevant data. This data is stored -in the two-dimensional array @code{quote}; one dimension -being time, the other being the ticker symbol. During retrieval of the -first stock's data, the calendar names of the time instances are stored -in the array @code{day} because we need them later. - -@smallexample -@c file eg/network/stoxpred.awk -function ReadQuotes() @{ - # Retrieve historical data for each ticker symbol - FS = "," - for (stock = 1; stock <= StockCount; stock++) @{ - URL = "http://chart.yahoo.com/table.csv?s=" name[stock] \ - "&a=" month "&b=" day "&c=" year-1 \ - "&d=" month "&e=" day "&f=" year \ - "g=d&q=q&y=0&z=" name[stock] "&x=.csv" - printf("GET " URL " HTTP/1.0\r\n\r\n") |& YahooData - while ((YahooData |& getline) > 0) @{ - if (NF == 6 && $1 ~ /Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec/) @{ - if (stock == 1) - days[++daycount] = $1; - quote[$1, stock] = $5 - @} - @} - close(YahooData) - @} - FS = " " -@} -@c endfile -@end smallexample - -Now that we @emph{have} the data, it can be checked once again to make sure -that no individual stock is missing or invalid, and that all the stock quotes are -aligned correctly. Furthermore, we renumber the time instances. The -most recent day gets day number 1 and all other days get consecutive -numbers. All quotes are rounded toward the nearest whole number in US Dollars. - -@smallexample -@c file eg/network/stoxpred.awk -function CleanUp() @{ - # clean up time series; eliminate incomplete data sets - for (d = 1; d <= daycount; d++) @{ - for (stock = 1; stock <= StockCount; stock++) - if (! ((days[d], stock) in quote)) - stock = StockCount + 10 - if (stock > StockCount + 1) - continue - datacount++ - for (stock = 1; stock <= StockCount; stock++) - data[datacount, stock] = int(0.5 + quote[days[d], stock]) - @} - delete quote - delete days -@} -@c endfile -@end smallexample - -Now we have arrived at the second really interesting part of the whole affair. -What we present here is a very primitive prediction algorithm: -@emph{If a stock fell yesterday, assume it will also fall today; if -it rose yesterday, assume it will rise today}. (Feel free to replace this -algorithm with a smarter one.) If a stock changed in the same direction -on two consecutive days, this is an indication which should be highlighted. -Two-day advances are stored in @code{hot} and two-day declines in -@code{avoid}. - -The rest of the function is a sanity check. It counts the number of -correct predictions in relation to the total number of predictions -one could have made in the year before. - -@smallexample -@c file eg/network/stoxpred.awk -function Prediction() @{ - # Predict each ticker symbol by prolonging yesterday's trend - for (stock = 1; stock <= StockCount; stock++) @{ - if (data[1, stock] > data[2, stock]) @{ - predict[stock] = "up" - @} else if (data[1, stock] < data[2, stock]) @{ - predict[stock] = "down" - @} else @{ - predict[stock] = "neutral" - @} - if ((data[1, stock] > data[2, stock]) && (data[2, stock] > data[3, stock])) - hot[stock] = 1 - if ((data[1, stock] < data[2, stock]) && (data[2, stock] < data[3, stock])) - avoid[stock] = 1 - @} - # Do a plausibility check: how many predictions proved correct? - for (s = 1; s <= StockCount; s++) @{ - for (d = 1; d <= datacount-2; d++) @{ - if (data[d+1, s] > data[d+2, s]) @{ - UpCount++ - @} else if (data[d+1, s] < data[d+2, s]) @{ - DownCount++ - @} else @{ - NeutralCount++ - @} - if (((data[d, s] > data[d+1, s]) && (data[d+1, s] > data[d+2, s])) || - ((data[d, s] < data[d+1, s]) && (data[d+1, s] < data[d+2, s])) || - ((data[d, s] == data[d+1, s]) && (data[d+1, s] == data[d+2, s]))) - CorrectCount++ - @} - @} -@} -@c endfile -@end smallexample - -At this point the hard work has been done: the array @code{predict} -contains the predictions for all the ticker symbols. It is up to the -function @code{Report} to find some nice words to introduce the -desired information. - -@smallexample -@c file eg/network/stoxpred.awk -function Report() @{ - # Generate report - report = "\nThis is your daily " - report = report "stock market report for "strftime("%A, %B %d, %Y")".\n" - report = report "Here are the predictions for today:\n\n" - for (stock = 1; stock <= StockCount; stock++) - report = report "\t" name[stock] "\t" predict[stock] "\n" - for (stock in hot) @{ - if (HotCount++ == 0) - report = report "\nThe most promising shares for today are these:\n\n" - report = report "\t" name[stock] "\t\thttp://biz.yahoo.com/n/" \ - tolower(substr(name[stock], 1, 1)) "/" tolower(name[stock]) ".html\n" - @} - for (stock in avoid) @{ - if (AvoidCount++ == 0) - report = report "\nThe stock shares to avoid today are these:\n\n" - report = report "\t" name[stock] "\t\thttp://biz.yahoo.com/n/" \ - tolower(substr(name[stock], 1, 1)) "/" tolower(name[stock]) ".html\n" - @} - report = report "\nThis sums up to " HotCount+0 " winners and " AvoidCount+0 - report = report " losers. When using this kind\nof prediction scheme for" - report = report " the 12 months which lie behind us,\nwe get " UpCount - report = report " 'ups' and " DownCount " 'downs' and " NeutralCount - report = report " 'neutrals'. Of all\nthese " UpCount+DownCount+NeutralCount - report = report " predictions " CorrectCount " proved correct next day.\n" - report = report "A success rate of "\ - int(100*CorrectCount/(UpCount+DownCount+NeutralCount)) "%.\n" - report = report "Random choice would have produced a 33% success rate.\n" - report = report "Disclaimer: Like every other prediction of the stock\n" - report = report "market, this report is, of course, complete nonsense.\n" - report = report "If you are stupid enough to believe these predictions\n" - report = report "you should visit a doctor who can treat your ailment." -@} -@c endfile -@end smallexample - -The function @code{SendMail} goes through the list of customers and opens -a pipe to the @code{mail} command for each of them. Each one receives an -email message with a proper subject heading and is addressed with his full name. - -@smallexample -@c file eg/network/stoxpred.awk -function SendMail() @{ - # send report to customers - customer["uncle.scrooge@@ducktown.gov"] = "Uncle Scrooge" - customer["more@@utopia.org" ] = "Sir Thomas More" - customer["spinoza@@denhaag.nl" ] = "Baruch de Spinoza" - customer["marx@@highgate.uk" ] = "Karl Marx" - customer["keynes@@the.long.run" ] = "John Maynard Keynes" - customer["bierce@@devil.hell.org" ] = "Ambrose Bierce" - customer["laplace@@paris.fr" ] = "Pierre Simon de Laplace" - for (c in customer) @{ - MailPipe = "mail -s 'Daily Stock Prediction Newsletter'" c - print "Good morning " customer[c] "," | MailPipe - print report "\n.\n" | MailPipe - close(MailPipe) - @} -@} -@c endfile -@end smallexample - -Be patient when running the script by hand. -Retrieving the data for all the ticker symbols and sending the emails -may take several minutes to complete, depending upon network traffic -and the speed of the available Internet link. -The quality of the prediction algorithm is likely to be disappointing. -Try to find a better one. -Should you find one with a success rate of more than 50%, please tell -us about it! It is only for the sake of curiosity, of course. @code{:-)} - -@ignore -@c chuck says to remove this -Let us give you one final indication as to what one can expect from -a prediction of stock data, which is sometimes said to contain much -randomness. One theory says that all relevant information to be taken -into account when estimating the price of a stock is contained in the -stock quotes. Every bit of useful information has influenced the -fair price. Therefore (the theory says) temporary changes (i.e., fluctuations -within a minute) have to be purely random. But what is the cause of -short-term changes in stock prices? - -Stock prices are fixed when supply and demand meet each other. -What people are willing to pay reflects human expectations. -Human expectations are not necessarily random. On the Internet, -you can find an elucidating paper about predictability and human -expectations: -@uref{http://it.ucsd.edu/IT/Newsletter/archives/meir/05meir.html, -@cite{Reflections on ``Universal Prediction of Individual Sequences''}} -The authors (Feder, Merhav, Gutman) introduce the reader to the subject -by telling a thrilling anecdote. -@cindex Shannon, Claude -@quotation -In the early 50's, at Bell Laboratories, David Hagelbarger built a -simple ``mind reading'' machine, whose purpose was to play the ``penny -matching'' game. In this game, a player chooses head or tail, while a -``mind reading'' machine tries to predict and match his choice. -Surprisingly, as Robert Lucky tells in his book ``Silicon Dreams'', -Hagelbarger's simple, 8-state machine, was able to match the ``pennies'' -of its human opponent 5,218 times over the course of 9,795 plays. -Random guessing would lead to such a high success rate with a probability -less than one out of 10 billion! Shannon, who was interested in prediction, -information, and thinking machines, closely followed Hagelbarger's -machine, and eventually built his own stripped-down version of the machine, -having the same states, but one that used a simpler strategy at each state. -As the legend goes, in a duel between the two machines, Shannon's machine -won by a slight margin! No one knows if this was due to a superior algorithm -or just a chance happening associated with the specific sequence at that game. -In any event, the success of both these machines against ``untrained'' human -opponents was explained by the fact that the human opponents cannot draw -completely random -bits. -@end quotation -@end ignore - -@node PROTBASE, , STOXPRED, Some Applications and Techniques -@section PROTBASE: Searching Through A Protein Database -@cindex PROTBASE -@cindex NCBI, National Center for Biotechnology Information -@cindex BLAST, Basic Local Alignment Search Tool -@cindex Hoare, C.A.R. -@quotation -@i{Hoare's Law of Large Problems: Inside every large problem is a small - problem struggling to get out.} -@end quotation - -Yahoo's database of stock market data is just one among the many large -databases on the Internet. Another one is located at NCBI -(National Center for Biotechnology -Information). Established in 1988 as a national resource for molecular -biology information, NCBI creates public databases, conducts research -in computational biology, develops software tools for analyzing genome -data, and disseminates biomedical information. In this section, we -look at one of NCBI's public services, which is called BLAST -(Basic Local Alignment Search Tool). - -You probably know that the information necessary for reproducing living -cells is encoded in the genetic material of the cells. The genetic material -is a very long chain of four base nucleotides. It is the order of -appearance (the sequence) of nucleotides which contains the information -about the substance to be produced. Scientists in biotechnology often -find a specific fragment, determine the nucleotide sequence, and need -to know where the sequence at hand comes from. This is where the large -databases enter the game. At NCBI, databases store the knowledge -about which sequences have ever been found and where they have been found. -When the scientist sends his sequence to the BLAST service, the server -looks for regions of genetic material in its database which -look the most similar to the delivered nucleotide sequence. After a -search time of some seconds or minutes the server sends an answer to -the scientist. In order to make access simple, NCBI chose to offer -their database service through popular Internet protocols. There are -four basic ways to use the so-called BLAST services: - -@itemize @bullet -@item -The easiest way to use BLAST is through the web. Users may simply point -their browsers at the NCBI home page -and link to the BLAST pages. -NCBI provides a stable URL that may be used to perform BLAST searches -without interactive use of a web browser. This is what we will do later -in this section. -A demonstration client -and a @file{README} file demonstrate how to access this URL. - -@item -Currently, -@command{blastcl3} is the standard network BLAST client. -You can download @command{blastcl3} from the -anonymous FTP location. - -@item -BLAST 2.0 can be run locally as a full executable and can be used to run -BLAST searches against private local databases, or downloaded copies of the -NCBI databases. BLAST 2.0 executables may be found on the NCBI -anonymous FTP server. - -@item -The NCBI BLAST Email server is the best option for people without convenient -access to the web. A similarity search can be performed by sending a properly -formatted mail message containing the nucleotide or protein query sequence to -@email{blast@@ncbi.nlm.nih.gov}. The query sequence is compared against the -specified database using the BLAST algorithm and the results are returned in -an email message. For more information on formulating email BLAST searches, -you can send a message consisting of the word ``HELP'' to the same address, -@email{blast@@ncbi.nlm.nih.gov}. -@end itemize - -Our starting point is the demonstration client mentioned in the first option. -The @file{README} file that comes along with the client explains the whole -process in a nutshell. In the rest of this section, we first show -what such requests look like. Then we show how to use @command{gawk} to -implement a client in about 10 lines of code. Finally, we show how to -interpret the result returned from the service. - -Sequences are expected to be represented in the standard -IUB/IUPAC amino acid and nucleic acid codes, -with these exceptions: lower-case letters are accepted and are mapped -into upper-case; a single hyphen or dash can be used to represent a gap -of indeterminate length; and in amino acid sequences, @samp{U} and @samp{*} -are acceptable letters (see below). Before submitting a request, any numerical -digits in the query sequence should either be removed or replaced by -appropriate letter codes (e.g., @samp{N} for unknown nucleic acid residue -or @samp{X} for unknown amino acid residue). -The nucleic acid codes supported are: - -@example -A --> adenosine M --> A C (amino) -C --> cytidine S --> G C (strong) -G --> guanine W --> A T (weak) -T --> thymidine B --> G T C -U --> uridine D --> G A T -R --> G A (purine) H --> A C T -Y --> T C (pyrimidine) V --> G C A -K --> G T (keto) N --> A G C T (any) - - gap of indeterminate length -@end example - -Now you know the alphabet of nucleotide sequences. The last two lines -of the following example query show you such a sequence, which is obviously -made up only of elements of the alphabet just described. Store this example -query into a file named @file{protbase.request}. You are now ready to send -it to the server with the demonstration client. - -@example -@c file eg/network/protbase.request -PROGRAM blastn -DATALIB month -EXPECT 0.75 -BEGIN ->GAWK310 the gawking gene GNU AWK -tgcttggctgaggagccataggacgagagcttcctggtgaagtgtgtttcttgaaatcat -caccaccatggacagcaaa -@c endfile -@end example - -@cindex FASTA/Pearson format -The actual search request begins with the mandatory parameter @samp{PROGRAM} -in the first column followed by the value @samp{blastn} (the name of the -program) for searching nucleic acids. The next line contains the mandatory -search parameter @samp{DATALIB} with the value @samp{month} for the newest -nucleic acid sequences. The third line contains an optional @samp{EXPECT} -parameter and the value desired for it. The fourth line contains the -mandatory @samp{BEGIN} directive, followed by the query sequence in -FASTA/Pearson format. -Each line of information must be less than 80 characters in length. - -The ``month'' database contains all new or revised sequences released in the -last 30 days and is useful for searching against new sequences. -There are five different blast programs, @command{blastn} being the one that -compares a nucleotide query sequence against a nucleotide sequence database. - -The last server directive that must appear in every request is the -@samp{BEGIN} directive. The query sequence should immediately follow the -@samp{BEGIN} directive and must appear in FASTA/Pearson format. -A sequence in -FASTA/Pearson format begins with a single-line description. -The description line, which is required, is distinguished from the lines of -sequence data that follow it by having a greater-than (@samp{>}) symbol -in the first column. For the purposes of the BLAST server, the text of -the description is arbitrary. - -If you prefer to use a client written in @command{gawk}, just store the following -10 lines of code into a file named @file{protbase.awk} and use this client -instead. Invoke it with @samp{gawk -f protbase.awk protbase.request}. -Then wait a minute and watch the result coming in. In order to replicate -the demonstration client's behaviour as closely as possible, this client -does not use a proxy server. We could also have extended the client program -in @ref{GETURL, ,Retrieving Web Pages}, to implement the client request from -@file{protbase.awk} as a special case. - -@smallexample -@c file eg/network/protbase.awk -@{ request = request "\n" $0 @} - -END @{ - BLASTService = "/inet/tcp/0/www.ncbi.nlm.nih.gov/80" - printf "POST /cgi-bin/BLAST/nph-blast_report HTTP/1.0\n" |& BLASTService - printf "Content-Length: " length(request) "\n\n" |& BLASTService - printf request |& BLASTService - while ((BLASTService |& getline) > 0) - print $0 - close(BLASTService) -@} -@c endfile -@end smallexample - -The demonstration client from NCBI is 214 lines long (written in C) and -it is not immediately obvious what it does. Our client is so short that -it @emph{is} obvious what it does. First it loops over all lines of the -query and stores the whole query into a variable. Then the script -establishes an Internet connection to the NCBI server and transmits the -query by framing it with a proper HTTP request. Finally it receives -and prints the complete result coming from the server. - -Now, let us look at the result. It begins with an HTTP header, which you -can ignore. Then there are some comments about the query having been -filtered to avoid spuriously high scores. After this, there is a reference -to the paper that describes the software being used for searching the data -base. After a repitition of the original query's description we find the -list of significant alignments: - -@smallexample -@c file eg/network/protbase.result -Sequences producing significant alignments: (bits) Value - -gb|AC021182.14|AC021182 Homo sapiens chromosome 7 clone RP11-733... 38 0.20 -gb|AC021056.12|AC021056 Homo sapiens chromosome 3 clone RP11-115... 38 0.20 -emb|AL160278.10|AL160278 Homo sapiens chromosome 9 clone RP11-57... 38 0.20 -emb|AL391139.11|AL391139 Homo sapiens chromosome X clone RP11-35... 38 0.20 -emb|AL365192.6|AL365192 Homo sapiens chromosome 6 clone RP3-421H... 38 0.20 -emb|AL138812.9|AL138812 Homo sapiens chromosome 11 clone RP1-276... 38 0.20 -gb|AC073881.3|AC073881 Homo sapiens chromosome 15 clone CTD-2169... 38 0.20 -@c endfile -@end smallexample - -This means that the query sequence was found in seven human chromosomes. -But the value 0.20 (20%) means that the probability of an accidental match -is rather high (20%) in all cases and should be taken into account. -You may wonder what the first column means. It is a key to the specific -database in which this occurence was found. The unique sequence identifiers -reported in the search results can be used as sequence retrieval keys -via the NCBI server. The syntax of sequence header lines used by the NCBI -BLAST server depends on the database from which each sequence was obtained. -The table below lists the identifiers for the databases from which the -sequences were derived. - -@ifinfo -@example -Database Name Identifier Syntax -============================ ======================== -GenBank gb|accession|locus -EMBL Data Library emb|accession|locus -DDBJ, DNA Database of Japan dbj|accession|locus -NBRF PIR pir||entry -Protein Research Foundation prf||name -SWISS-PROT sp|accession|entry name -Brookhaven Protein Data Bank pdb|entry|chain -Kabat's Sequences of Immuno@dots{} gnl|kabat|identifier -Patents pat|country|number -GenInfo Backbone Id bbs|number -@end example -@end ifinfo - -@ifnotinfo -@multitable {Kabat's Sequences of Immuno@dots{}} {@code{@w{sp|accession|entry name}}} -@item GenBank @tab @code{gb|accession|locus} -@item EMBL Data Library @tab @code{emb|accession|locus} -@item DDBJ, DNA Database of Japan @tab @code{dbj|accession|locus} -@item NBRF PIR @tab @code{pir||entry} -@item Protein Research Foundation @tab @code{prf||name} -@item SWISS-PROT @tab @code{@w{sp|accession|entry name}} -@item Brookhaven Protein Data Bank @tab @code{pdb|entry|chain} -@item Kabat's Sequences of Immuno@dots{} @tab @code{gnl|kabat|identifier} -@item Patents @tab @code{pat|country|number} -@item GenInfo Backbone Id @tab @code{bbs|number} -@end multitable -@end ifnotinfo - - -For example, an identifier might be @samp{gb|AC021182.14|AC021182}, where the -@samp{gb} tag indicates that the identifier refers to a GenBank sequence, -@samp{AC021182.14} is its GenBank ACCESSION, and @samp{AC021182} is the GenBank LOCUS. -The identifier contains no spaces, so that a space indicates the end of the -identifier. - -Let us continue in the result listing. Each of the seven alignments mentioned -above is subsequently described in detail. We will have a closer look at -the first of them. - -@smallexample ->gb|AC021182.14|AC021182 Homo sapiens chromosome 7 clone RP11-733N23, WORKING DRAFT SEQUENCE, 4 - unordered pieces - Length = 176383 - - Score = 38.2 bits (19), Expect = 0.20 - Identities = 19/19 (100%) - Strand = Plus / Plus - -Query: 35 tggtgaagtgtgtttcttg 53 - ||||||||||||||||||| -Sbjct: 69786 tggtgaagtgtgtttcttg 69804 -@end smallexample - -This alignment was located on the human chromosome 7. The fragment on which -part of the query was found had a total length of 176383. Only 19 of the -nucleotides matched and the matching sequence ran from character 35 to 53 -in the query sequence and from 69786 to 69804 in the fragment on chromosome 7. -If you are still reading at this point, you are probably interested in finding -out more about Computational Biology and you might appreciate the following -hints. - -@cindex Computational Biology -@cindex Bioinformatics -@enumerate -@item -There is a book called @cite{Introduction to Computational Biology} -by Michael S. Waterman, which is worth reading if you are seriously -interested. You can find a good -book review -on the Internet. - -@item -While Waterman's book can explain to you the algorithms employed internally -in the database search engines, most practicioners prefer to approach -the subject differently. The applied side of Computational Biology is -called Bioinformatics, and emphasizes the tools available for day-to-day -work as well as how to actually @emph{use} them. One of the very few affordable -books on Bioinformatics is -@cite{Developing Bioinformatics Computer Skills}. - -@item -The sequences @emph{gawk} and @emph{gnuawk} are in widespread use in -the genetic material of virtually every earthly living being. Let us -take this as a clear indication that the divine creator has intended -@code{gawk} to prevail over other scripting languages such as @code{perl}, -@code{tcl}, or @code{python} which are not even proper sequences. (:-) -@end enumerate - -@node Links, GNU Free Documentation License, Some Applications and Techniques, Top -@chapter Related Links - -This section lists the URLs for various items discussed in this @value{CHAPTER}. -They are presented in the order in which they appear. - -@table @asis - -@item @cite{Internet Programming with Python} -@uref{http://www.fsbassociates.com/books/python.htm} - -@item @cite{Advanced Perl Programming} -@uref{http://www.oreilly.com/catalog/advperl} - -@item @cite{Web Client Programming with Perl} -@uref{http://www.oreilly.com/catalog/webclient} - -@item Richard Stevens's home page and book -@uref{http://www.kohala.com/~rstevens} - -@item The SPAK home page -@uref{http://www.userfriendly.net/linux/RPM/contrib/libc6/i386/spak-0.6b-1.i386.html} - -@item Volume III of @cite{Internetworking with TCP/IP}, by Comer and Stevens -@uref{http://www.cs.purdue.edu/homes/dec/tcpip3s.cont.html} - -@item XBM Graphics File Format -@uref{http://www.wotsit.org/download.asp?f=xbm} - -@item GNUPlot -@uref{http://www.cs.dartmouth.edu/gnuplot_info.html} - -@item Mark Humphrys' Eliza page -@uref{http://www.compapp.dcu.ie/~humphrys/eliza.html} - -@item Yahoo! Eliza Information -@uref{http://dir.yahoo.com/Recreation/Games/Computer_Games/Internet_Games/Web_Games/Artificial_Intelligence} - -@item Java versions of Eliza -@uref{http://www.tjhsst.edu/Psych/ch1/eliza.html} - -@item Java versions of Eliza with source code -@uref{http://home.adelphia.net/~lifeisgood/eliza/eliza.htm} - -@item Eliza Programs with Explanations -@uref{http://chayden.net/chayden/eliza/Eliza.shtml} - -@item Loebner Contest -@uref{http://acm.org/~loebner/loebner-prize.htmlx} - -@item Tck/Tk Information -@uref{http://www.scriptics.com/} - -@item Intel 80x86 Processors -@uref{http://developer.intel.com/design/platform/embedpc/what_is.htm} - -@item AMD Elan Processors -@uref{http://www.amd.com/products/epd/processors/4.32bitcont/32bitcont/index.html} - -@item XINU -@uref{http://willow.canberra.edu.au/~chrisc/xinu.html } - -@item GNU/Linux -@uref{http://uclinux.lineo.com/} - -@item Embedded PCs -@uref{http://dir.yahoo.com/Business_and_Economy/Business_to_Business/Computers/Hardware/Embedded_Control/} - -@item MiniSQL -@uref{http://www.hughes.com.au/library/} - -@item Market Share Surveys -@uref{http://www.netcraft.com/survey} - -@item @cite{Numerical Recipes in C: The Art of Scientific Computing} -@uref{http://www.nr.com} - -@item VRML -@uref{http://www.vrml.org} - -@item The VRML FAQ -@uref{http://www.vrml.org/technicalinfo/specifications/specifications.htm#FAQ} - -@item The UMBC Agent Web -@uref{http://www.cs.umbc.edu/agents } - -@item Apache Web Server -@uref{http://www.apache.org} - -@item National Center for Biotechnology Information (NCBI) -@uref{http://www.ncbi.nlm.nih.gov} - -@item Basic Local Alignment Search Tool (BLAST) -@uref{http://www.ncbi.nlm.nih.gov/BLAST/blast_overview.html} - -@item NCBI Home Page -@uref{http://www.ncbi.nlm.nih.gov} - -@item BLAST Pages -@uref{http://www.ncbi.nlm.nih.gov/BLAST} - -@item BLAST Demonstration Client -@uref{ftp://ncbi.nlm.nih.gov/blast/blasturl/} - -@item BLAST anonymous FTP location -@uref{ftp://ncbi.nlm.nih.gov/blast/network/netblast/} - -@item BLAST 2.0 Executables -@uref{ftp://ncbi.nlm.nih.gov/blast/executables/} - -@item IUB/IUPAC Amino Acid and Nucleic Acid Codes -@uref{http://www.uthscsa.edu/geninfo/blastmail.html#item6} - -@item FASTA/Pearson Format -@uref{http://www.ncbi.nlm.nih.gov/BLAST/fasta.html} - -@item Fasta/Pearson Sequence in Java -@uref{http://www.kazusa.or.jp/java/codon_table_java/} - -@item Book Review of @cite{Introduction to Computational Biology} -@uref{http://www.acm.org/crossroads/xrds5-1/introcb.html} - -@item @cite{Developing Bioinformatics Computer Skills} -@uref{http://www.oreilly.com/catalog/bioskills/} - -@end table - -@node GNU Free Documentation License, Index, Links, Top -@unnumbered GNU Free Documentation License -@center Version 1.1, March 2000 - -@display -Copyright (C) 2000 Free Software Foundation, Inc. -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. -@end display -@sp 1 -@enumerate 0 -@item -PREAMBLE - -The purpose of this License is to make a manual, textbook, or other -written document ``free'' in the sense of freedom: to assure everyone -the effective freedom to copy and redistribute it, with or without -modifying it, either commercially or noncommercially. Secondarily, -this License preserves for the author and publisher a way to get -credit for their work, while not being considered responsible for -modifications made by others. - -This License is a kind of ``copyleft'', which means that derivative -works of the document must themselves be free in the same sense. It -complements the GNU General Public License, which is a copyleft -license designed for free software. - -We have designed this License in order to use it for manuals for free -software, because free software needs free documentation: a free -program should come with manuals providing the same freedoms that the -software does. But this License is not limited to software manuals; -it can be used for any textual work, regardless of subject matter or -whether it is published as a printed book. We recommend this License -principally for works whose purpose is instruction or reference. - -@sp 1 -@item -APPLICABILITY AND DEFINITIONS - -This License applies to any manual or other work that contains a -notice placed by the copyright holder saying it can be distributed -under the terms of this License. The ``Document'', below, refers to any -such manual or work. Any member of the public is a licensee, and is -addressed as ``you''. - -A ``Modified Version'' of the Document means any work containing the -Document or a portion of it, either copied verbatim, or with -modifications and/or translated into another language. - -A ``Secondary Section'' is a named appendix or a front-matter section of -the Document that deals exclusively with the relationship of the -publishers or authors of the Document to the Document's overall subject -(or to related matters) and contains nothing that could fall directly -within that overall subject. (For example, if the Document is in part a -textbook of mathematics, a Secondary Section may not explain any -mathematics.) The relationship could be a matter of historical -connection with the subject or with related matters, or of legal, -commercial, philosophical, ethical or political position regarding -them. - -The ``Invariant Sections'' are certain Secondary Sections whose titles -are designated, as being those of Invariant Sections, in the notice -that says that the Document is released under this License. - -The ``Cover Texts'' are certain short passages of text that are listed, -as Front-Cover Texts or Back-Cover Texts, in the notice that says that -the Document is released under this License. - -A ``Transparent'' copy of the Document means a machine-readable copy, -represented in a format whose specification is available to the -general public, whose contents can be viewed and edited directly and -straightforwardly with generic text editors or (for images composed of -pixels) generic paint programs or (for drawings) some widely available -drawing editor, and that is suitable for input to text formatters or -for automatic translation to a variety of formats suitable for input -to text formatters. A copy made in an otherwise Transparent file -format whose markup has been designed to thwart or discourage -subsequent modification by readers is not Transparent. A copy that is -not ``Transparent'' is called ``Opaque''. - -Examples of suitable formats for Transparent copies include plain -ASCII without markup, Texinfo input format, LaTeX input format, SGML -or XML using a publicly available DTD, and standard-conforming simple -HTML designed for human modification. Opaque formats include -PostScript, PDF, proprietary formats that can be read and edited only -by proprietary word processors, SGML or XML for which the DTD and/or -processing tools are not generally available, and the -machine-generated HTML produced by some word processors for output -purposes only. - -The ``Title Page'' means, for a printed book, the title page itself, -plus such following pages as are needed to hold, legibly, the material -this License requires to appear in the title page. For works in -formats which do not have any title page as such, ``Title Page'' means -the text near the most prominent appearance of the work's title, -preceding the beginning of the body of the text. -@sp 1 -@item -VERBATIM COPYING - -You may copy and distribute the Document in any medium, either -commercially or noncommercially, provided that this License, the -copyright notices, and the license notice saying this License applies -to the Document are reproduced in all copies, and that you add no other -conditions whatsoever to those of this License. You may not use -technical measures to obstruct or control the reading or further -copying of the copies you make or distribute. However, you may accept -compensation in exchange for copies. If you distribute a large enough -number of copies you must also follow the conditions in section 3. - -You may also lend copies, under the same conditions stated above, and -you may publicly display copies. -@sp 1 -@item -COPYING IN QUANTITY - -If you publish printed copies of the Document numbering more than 100, -and the Document's license notice requires Cover Texts, you must enclose -the copies in covers that carry, clearly and legibly, all these Cover -Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on -the back cover. Both covers must also clearly and legibly identify -you as the publisher of these copies. The front cover must present -the full title with all words of the title equally prominent and -visible. You may add other material on the covers in addition. -Copying with changes limited to the covers, as long as they preserve -the title of the Document and satisfy these conditions, can be treated -as verbatim copying in other respects. - -If the required texts for either cover are too voluminous to fit -legibly, you should put the first ones listed (as many as fit -reasonably) on the actual cover, and continue the rest onto adjacent -pages. - -If you publish or distribute Opaque copies of the Document numbering -more than 100, you must either include a machine-readable Transparent -copy along with each Opaque copy, or state in or with each Opaque copy -a publicly-accessible computer-network location containing a complete -Transparent copy of the Document, free of added material, which the -general network-using public has access to download anonymously at no -charge using public-standard network protocols. If you use the latter -option, you must take reasonably prudent steps, when you begin -distribution of Opaque copies in quantity, to ensure that this -Transparent copy will remain thus accessible at the stated location -until at least one year after the last time you distribute an Opaque -copy (directly or through your agents or retailers) of that edition to -the public. - -It is requested, but not required, that you contact the authors of the -Document well before redistributing any large number of copies, to give -them a chance to provide you with an updated version of the Document. -@sp 1 -@item -MODIFICATIONS - -You may copy and distribute a Modified Version of the Document under -the conditions of sections 2 and 3 above, provided that you release -the Modified Version under precisely this License, with the Modified -Version filling the role of the Document, thus licensing distribution -and modification of the Modified Version to whoever possesses a copy -of it. In addition, you must do these things in the Modified Version: - -@enumerate A -@item -Use in the Title Page (and on the covers, if any) a title distinct -from that of the Document, and from those of previous versions -(which should, if there were any, be listed in the History section -of the Document). You may use the same title as a previous version -if the original publisher of that version gives permission. - -@item -List on the Title Page, as authors, one or more persons or entities -responsible for authorship of the modifications in the Modified -Version, together with at least five of the principal authors of the -Document (all of its principal authors, if it has less than five). - -@item -State on the Title page the name of the publisher of the -Modified Version, as the publisher. - -@item -Preserve all the copyright notices of the Document. - -@item -Add an appropriate copyright notice for your modifications -adjacent to the other copyright notices. - -@item -Include, immediately after the copyright notices, a license notice -giving the public permission to use the Modified Version under the -terms of this License, in the form shown in the Addendum below. - -@item -Preserve in that license notice the full lists of Invariant Sections -and required Cover Texts given in the Document's license notice. - -@item -Include an unaltered copy of this License. - -@item -Preserve the section entitled ``History'', and its title, and add to -it an item stating at least the title, year, new authors, and -publisher of the Modified Version as given on the Title Page. If -there is no section entitled ``History'' in the Document, create one -stating the title, year, authors, and publisher of the Document as -given on its Title Page, then add an item describing the Modified -Version as stated in the previous sentence. - -@item -Preserve the network location, if any, given in the Document for -public access to a Transparent copy of the Document, and likewise -the network locations given in the Document for previous versions -it was based on. These may be placed in the ``History'' section. -You may omit a network location for a work that was published at -least four years before the Document itself, or if the original -publisher of the version it refers to gives permission. - -@item -In any section entitled ``Acknowledgements'' or ``Dedications'', -preserve the section's title, and preserve in the section all the -substance and tone of each of the contributor acknowledgements -and/or dedications given therein. - -@item -Preserve all the Invariant Sections of the Document, -unaltered in their text and in their titles. Section numbers -or the equivalent are not considered part of the section titles. - -@item -Delete any section entitled ``Endorsements''. Such a section -may not be included in the Modified Version. - -@item -Do not retitle any existing section as ``Endorsements'' -or to conflict in title with any Invariant Section. -@end enumerate - -If the Modified Version includes new front-matter sections or -appendices that qualify as Secondary Sections and contain no material -copied from the Document, you may at your option designate some or all -of these sections as invariant. To do this, add their titles to the -list of Invariant Sections in the Modified Version's license notice. -These titles must be distinct from any other section titles. - -You may add a section entitled ``Endorsements'', provided it contains -nothing but endorsements of your Modified Version by various -parties--for example, statements of peer review or that the text has -been approved by an organization as the authoritative definition of a -standard. - -You may add a passage of up to five words as a Front-Cover Text, and a -passage of up to 25 words as a Back-Cover Text, to the end of the list -of Cover Texts in the Modified Version. Only one passage of -Front-Cover Text and one of Back-Cover Text may be added by (or -through arrangements made by) any one entity. If the Document already -includes a cover text for the same cover, previously added by you or -by arrangement made by the same entity you are acting on behalf of, -you may not add another; but you may replace the old one, on explicit -permission from the previous publisher that added the old one. - -The author(s) and publisher(s) of the Document do not by this License -give permission to use their names for publicity for or to assert or -imply endorsement of any Modified Version. -@sp 1 -@item -COMBINING DOCUMENTS - -You may combine the Document with other documents released under this -License, under the terms defined in section 4 above for modified -versions, provided that you include in the combination all of the -Invariant Sections of all of the original documents, unmodified, and -list them all as Invariant Sections of your combined work in its -license notice. - -The combined work need only contain one copy of this License, and -multiple identical Invariant Sections may be replaced with a single -copy. If there are multiple Invariant Sections with the same name but -different contents, make the title of each such section unique by -adding at the end of it, in parentheses, the name of the original -author or publisher of that section if known, or else a unique number. -Make the same adjustment to the section titles in the list of -Invariant Sections in the license notice of the combined work. - -In the combination, you must combine any sections entitled ``History'' -in the various original documents, forming one section entitled -``History''; likewise combine any sections entitled ``Acknowledgements'', -and any sections entitled ``Dedications''. You must delete all sections -entitled ``Endorsements.'' -@sp 1 -@item -COLLECTIONS OF DOCUMENTS - -You may make a collection consisting of the Document and other documents -released under this License, and replace the individual copies of this -License in the various documents with a single copy that is included in -the collection, provided that you follow the rules of this License for -verbatim copying of each of the documents in all other respects. - -You may extract a single document from such a collection, and distribute -it individually under this License, provided you insert a copy of this -License into the extracted document, and follow this License in all -other respects regarding verbatim copying of that document. -@sp 1 -@item -AGGREGATION WITH INDEPENDENT WORKS - -A compilation of the Document or its derivatives with other separate -and independent documents or works, in or on a volume of a storage or -distribution medium, does not as a whole count as a Modified Version -of the Document, provided no compilation copyright is claimed for the -compilation. Such a compilation is called an ``aggregate'', and this -License does not apply to the other self-contained works thus compiled -with the Document, on account of their being thus compiled, if they -are not themselves derivative works of the Document. - -If the Cover Text requirement of section 3 is applicable to these -copies of the Document, then if the Document is less than one quarter -of the entire aggregate, the Document's Cover Texts may be placed on -covers that surround only the Document within the aggregate. -Otherwise they must appear on covers around the whole aggregate. -@sp 1 -@item -TRANSLATION - -Translation is considered a kind of modification, so you may -distribute translations of the Document under the terms of section 4. -Replacing Invariant Sections with translations requires special -permission from their copyright holders, but you may include -translations of some or all Invariant Sections in addition to the -original versions of these Invariant Sections. You may include a -translation of this License provided that you also include the -original English version of this License. In case of a disagreement -between the translation and the original English version of this -License, the original English version will prevail. -@sp 1 -@item -TERMINATION - -You may not copy, modify, sublicense, or distribute the Document except -as expressly provided for under this License. Any other attempt to -copy, modify, sublicense or distribute the Document is void, and will -automatically terminate your rights under this License. However, -parties who have received copies, or rights, from you under this -License will not have their licenses terminated so long as such -parties remain in full compliance. -@sp 1 -@item -FUTURE REVISIONS OF THIS LICENSE - -The Free Software Foundation may publish new, revised versions -of the GNU Free Documentation License from time to time. Such new -versions will be similar in spirit to the present version, but may -differ in detail to address new problems or concerns. See -@uref{http://www.gnu.org/copyleft/}. - -Each version of the License is given a distinguishing version number. -If the Document specifies that a particular numbered version of this -License ``or any later version'' applies to it, you have the option of -following the terms and conditions either of that specified version or -of any later version that has been published (not as a draft) by the -Free Software Foundation. If the Document does not specify a version -number of this License, you may choose any version ever published (not -as a draft) by the Free Software Foundation. - -@end enumerate - -@c fakenode --- for prepinfo -@unnumberedsec ADDENDUM: How to use this License for your documents - -To use this License in a document you have written, include a copy of -the License in the document and put the following copyright and -license notices just after the title page: - -@smallexample -@group - - Copyright (C) @var{year} @var{your name}. - Permission is granted to copy, distribute and/or modify this document - under the terms of the GNU Free Documentation License, Version 1.1 - or any later version published by the Free Software Foundation; - with the Invariant Sections being @var{list their titles}, with the - Front-Cover Texts being @var{list}, and with the Back-Cover Texts being @var{list}. - A copy of the license is included in the section entitled ``GNU - Free Documentation License''. -@end group -@end smallexample -If you have no Invariant Sections, write ``with no Invariant Sections'' -instead of saying which ones are invariant. If you have no -Front-Cover Texts, write ``no Front-Cover Texts'' instead of -``Front-Cover Texts being @var{list}''; likewise for Back-Cover Texts. - -If your document contains nontrivial examples of program code, we -recommend releasing these examples in parallel under your choice of -free software license, such as the GNU General Public License, -to permit their use in free software. - -@node Index, , GNU Free Documentation License, Top -@comment node-name, next, previous, up - -@unnumbered Index -@printindex cp -@bye - -Conventions: -1. Functions, built-in or otherwise, do NOT have () after them. -2. Gawk built-in vars and functions are in @code. Also program vars and - functions. -3. HTTP method names are in @code. -4. Protocols such as echo, ftp, etc are in @samp. -5. URLs are in @url. -6. All RFC's in the index. Put a space between `RFC' and the number. diff --git a/contrib/awk/doc/igawk.1 b/contrib/awk/doc/igawk.1 deleted file mode 100644 index 08173ec..0000000 --- a/contrib/awk/doc/igawk.1 +++ /dev/null @@ -1,73 +0,0 @@ -.TH IGAWK 1 "Nov 3 1999" "Free Software Foundation" "Utility Commands" -.SH NAME -igawk \- gawk with include files -.SH SYNOPSIS -.B igawk -[ all -.I gawk -options ] -.B \-f -.I program-file -[ -.B \-\^\- -] file .\^.\^. -.br -.B igawk -[ all -.I gawk -options ] -[ -.B \-\^\- -] -.I program-text -file .\^.\^. -.SH DESCRIPTION -.I Igawk -is a simple shell script that adds the ability to have ``include files'' to -.IR gawk (1). -.PP -AWK programs for -.I igawk -are the same as for -.IR gawk , -except that, in addition, you may have lines like -.RS -.sp -.ft B -@include getopt.awk -.ft R -.sp -.RE -in your program to include the file -.B getopt.awk -from either the current directory or one of the other directories -in the search path. -.SH OPTIONS -See -.IR gawk (1) -for a full description of the AWK language and the options that -.I gawk -supports. -.SH EXAMPLES -.nf -.ft B -cat << EOF > test.awk -@include getopt.awk -.sp -BEGIN { - while (getopt(ARGC, ARGV, "am:q") != \-1) - \&.\^.\^. -} -EOF -.sp -igawk \-f test.awk -.ft R -.fi -.SH SEE ALSO -.IR gawk (1) -.PP -.IR "Effective AWK Programming" , -Edition 1.0, published by the Free Software Foundation, 1995. -.SH AUTHOR -Arnold Robbins -.RB ( arnold@skeeve.com ). diff --git a/contrib/awk/doc/macros b/contrib/awk/doc/macros deleted file mode 100644 index bdfc5c8..0000000 --- a/contrib/awk/doc/macros +++ /dev/null @@ -1,211 +0,0 @@ -.\" SSC Reference card macros -.\" -.\" Copyright (C) 1996, Specialized System Consultants Inc. (SSC) -.\" -.\" These macros are free software; you can redistribute them and/or modify -.\" them under the terms of the GNU General Public License as published by -.\" the Free Software Foundation; either version 2 of the License, or -.\" (at your option) any later version. -.\" -.\" These macros are distributed in the hope that it will be useful, -.\" but WITHOUT ANY WARRANTY; without even the implied warranty of -.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -.\" GNU General Public License for more details. -.\" -.\" You should have received a copy of the GNU General Public License -.\" along with this program; if not, write to the Free Software -.\" Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -.\" -.\" Generic SSC "card" macros -.\" based on lots of other macros -.\" Last update: 4-25-91 ph -.\" attempting to get margins in the boxes Aug 3 09:43:48 PDT 1994 -.ll 3i \" length of text line -.lt 3.2i \" length of title line -.de BT \" bottom of page trap -.sp |8.2i \" go to where we put footer -.ie \\n(CL=1 \{\ -. nr CL 2 -.tl ''\\*(CD\\n+(PN'' \" footer is just page number -. po 4i \" go to second column -.TP \" print header if any -\} -.el \{\ -. nr CL 1 -.tl ''\\*(CD\\n+(PN'' \" footer is just page number -. po .5i \" go to first column -. bp \" force a new page (which will force header) -. TP -\} -.. -.de TP \" top of page -.\" .sp |.2i -.sp |0 -.\" put page header stuff here -.\" for example: .tl ''WOW!'' -.\".sp -.. -.\" .wh 8.1i BT \" set bottom of column trap -.nf \" don't fill lines -.nh \" no hyphenation -.nr CL 1 \" start with column = 1 -.po .5i \" offset for first column -.vs 9 \" line spacing -.ps 8 \" point size -.de ST \" set tabs to normal places -.ta .2i .78i 1.2i 1.7i \" set tabs -.. -.ig - From: bryang@chinet.chi.il.us (Bryan Glennon) - Box macro. Do a .mk z where the box is to start, and a .eb - where it is to end. Optional argument is a title to be centered - within the top box line. - - Usage: - - .mk z - Text, etc to be boxed... - .eb "Optional title goes here" - - ~or~ - - .mk z - Text, etc to be boxed... - .eb - - - Some explanation: - The macro name is eb <.de eb>. First thing we do is to go up on line - <.sp -1> and turn off fill mode <.nf>. Now it gets interesting: the - .ie is the if/else construct. We check the number of arguments provided - to the macro <\\n(.$> and if it is greater than 0 (meaning we have a title), - we do the rest of the .ie line, as follows: - - \h'-.5n' - move left one-half of an n - \L'|\\nzu-1' - draw a vertical line <\L> to the - absolute position (|) given by \\nzu-1, - which is the position set with the .mk - command into register z <\\nz> in base - units minus 1. - \l'(\\n(.lu+1n-\w'\\$1'u/2u)\(ul' - Draw a horizontal line <\l> with length - equal to the current line length - <\\n(.l> in base units plus the - space required for an 'n' <1n>, minus - the width <\w> of the title string - <\\$1> in base units divided by 2 - >. Draw the line - using the underline character, <\(ul>. - \v'.3m'\|\\$1\|\v'-.3m' - Move down the page 3/10 of an m, - <\v'.3m'>, output a 1/6 of an m space - <\|>, output the title <\\$1>, another - 1/6 of an m space <\|>, and then move - up the page 3/10 of an m <\v'-.3m'>. - \l'...\(ul' - Draw the second part of the line, just - like the corresponding left half done - before. - \L'-|\\nzu+1' - Draw a verticle line <\L> going down - the absolute distance <-|> from where - the macro was given to where the start - point was marked <\\nz> in base units - plus one line <+1> - \l'|0u-.5n\(ul' - Draw a horizontal line to the absolute - position (|0) when the macro was - invoked, minus half an n <-.5n> using - the underline character <\(ul>. - - The .el beings the else part, which is identical to the above, except - the string dosen't get printed. This makes the printing of the top - line much easier: just draw a line <\l> with width equal to the - current line plus the witdh of an n <\\n(.l+1n> using the underline - character <.\(ul>. -.. -.de ES \" start "text in a box" -.mk z -.in +.5n -.ll -.5n -.sp 1.3 -.. -.de EB \" end "text in a box" -- optional box title as argument -.sp -.6 -.nf -.in -.5n -.ll +.5n -.ie \\n(.$\ -\L'|\\nzu'\ -\l'(\\n(.lu-\w'\\$1'u)/2u-.33m\(ul'\ -\v'.3m'\|\\$1\|\v'-.3m'\ -\l'(\\n(.lu-\w'\\$1'u)/2u\(ul'\ -\L'-|\\nzu'\ -\l'|0u\(ul' -.el \h'-.5n'\L'|\\nzu-1'\l'\\n(.lu+1n\(ul'\L'-|\\nzu+1'\l'|0u-.5n\(ul' -.in 0 -.. -.de SL \" draw single line (works in non-fill mode only) -.sp -.8 -.ti 0 -\l'\\n(.lu\(ul' -.. -.de Hl \" draw horizontal line -.br -.ti 0 -\l'\\n(.lu-\\n(.iu' -.br -.. -.de DL \" draw double line (works in non-fill mode only) -.sp -.8 -.ti 0 -\l'\\n(.lu\(ul' -.sp -.8 -.ti 0 -\l'\\n(.lu\(ul' -.. -.ST -.nr PN 0 1 \" sets starting page number and auto-increment -.\" must define page header (if any) before here -.TP -.ds 3) \|\v'3p'\s+5\z\(sq\s0\v'-3p'\h'1.25p'\v'-.5p'3\v'.5p'\h'2p' -.\" old one .ds 2) \h'-1.5p'\v'1p'\s+4\z\(ci\s0\v'-1p'\h'3.25p'2 -.ds 2) \|\v'-2.4p'\D'c.095id'\h'-5.15p'\v'2.4p'2\h'1.9p' -.ds dC \v'1p'\s+5\(bu\s0\v'-1p'\" for development commands -.ds tC \s+2\(dm\s0\" (for DWB) should be a triangle -.ds tP \s+2\(dm\s0\" (for other text processing) should be a triangle -.\" various trademark symbols -.ds Tm \v'-0.5m'\s8TM\s0\v'0.5m' -.ds Ts \v'-0.5m'\s4TM\s0\v'0.5m' -.ig ++ -.\" mount Serifa fonts -.fp 5 SR -.fp 6 SB -.fp 4 Si -.++ -.\" other assorted junk -.lg 0 -.\" Fl requires extended version of troff -.de Fl \" draw fat horizontal line -.br -.ti 0 -.ruw 1.5p -\l'\\n(.lu-\\n(.iu' -.br -.ruw -.. -.de Bx \" box for keys in text -\\$3\&\|\&\c -\s-3\(br\|\fH\v'.18n'\\$1\v'-.18n\fP\|\(br\l'|0\(rn'\l'|0\(ul'\&\s0\|\\$2 -.. -.de Fn \" function name - left justified, gray background -.\" bold with gray for function name -.ns -.br -\ -.ns -.br -\!! gsave ( ) stringwidth neg 0 rmoveto -\!! /Serifa-Bold findfont 8 scalefont setfont -\!! (\\$1) dup stringwidth pop 6 gsave dup 0 exch rlineto neg exch 0 rlineto -\!! 0 exch rlineto closepath .9 setgray fill grestore show -\!! grestore -.nf -.rs -.. -.rs diff --git a/contrib/awk/doc/no.colors b/contrib/awk/doc/no.colors deleted file mode 100644 index d5fb038..0000000 --- a/contrib/awk/doc/no.colors +++ /dev/null @@ -1,31 +0,0 @@ -.\" AWK Reference Card --- Arnold Robbins, arnold@gnu.org -.\" This file is for troff which does not know what to do -.\" with literal Poscript and cannot use the macros from 'colors'. -.\" -.\" Copyright (C) 1996 Free Software Foundation, Inc. -.\" -.\" Permission is granted to make and distribute verbatim copies of -.\" this reference card provided the copyright notice and this permission -.\" notice are preserved on all copies. -.\" -.\" Permission is granted to process this file through troff and print the -.\" results, provided the printed document carries copying permission -.\" notice identical to this one except for the removal of this paragraph -.\" (this paragraph not being relevant to the printed reference card). -.\" -.\" Permission is granted to copy and distribute modified versions of this -.\" reference card under the conditions for verbatim copying, provided that -.\" the entire resulting derived work is distributed under the terms of a -.\" permission notice identical to this one. -.\" -.\" Permission is granted to copy and distribute translations of this -.\" reference card into another language, under the above conditions for -.\" modified versions, except that this permission notice may be stated in -.\" a translation approved by the Foundation. -.\" -.ds CR -.ds CG -.ds CL -.ds CB -.ds CD -.ds CX diff --git a/contrib/awk/doc/setter.outline b/contrib/awk/doc/setter.outline deleted file mode 100644 index 67ade73..0000000 --- a/contrib/awk/doc/setter.outline +++ /dev/null @@ -1,77 +0,0 @@ -%!PS-Adobe-3.0 -% SSC Reference card typesetter outline / cut marks -% -% Copyright (C) 1996, Specialized System Consultants Inc. (SSC) -% -% This file is free software; you can redistribute it and/or modify -% it under the terms of the GNU General Public License as published by -% the Free Software Foundation; either version 2 of the License, or -% (at your option) any later version. -% -% This file is distributed in the hope that it will be useful, -% but WITHOUT ANY WARRANTY; without even the implied warranty of -% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -% GNU General Public License for more details. -% -% You should have received a copy of the GNU General Public License -% along with this program; if not, write to the Free Software -% Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -% -%! page cut marks and stuff for Pocket References - 10-26-88 - ph -%! modified to move the cut marks onto the page -%! center a string -/inch {72 mul} def -/cshow % stk: string - % center string in space (space us variable) - { - dup stringwidth pop % get length of string - space exch sub 2 div % compute initial space needed - 0 rmoveto % move over - show - } def - -/flashme -{ /space 612 def - 0 0 0 setrgbcolor % always print this stuff - - /Helvetica findfont 12 scalefont setfont - gsave -% for groff (I hope) - -6 -6 translate - 0.2 setlinewidth - - 0.25 inch 10.5 inch moveto - 0.5 inch 10.5 inch lineto - .75 inch 10.75 inch moveto - .75 inch 11 inch lineto - stroke - - 0.25 inch 2 inch moveto - 0.5 inch 2 inch lineto - .75 inch 1.75 inch moveto - .75 inch 1.50 inch lineto - stroke - 4.25 inch 11 inch moveto - 4.25 inch 10.75 inch lineto - stroke - - 4.25 inch 1.75 inch moveto - 4.25 inch 1.5 inch lineto - stroke - - 7.75 inch 1.5 inch moveto - 7.75 inch 1.75 inch lineto - 8 inch 2 inch moveto - 8.25 inch 2 inch lineto - stroke - - 7.75 inch 11 inch moveto - 7.75 inch 10.75 inch lineto - 8 inch 10.5 inch moveto - 8.25 inch 10.5 inch lineto - stroke - grestore - } def - -% actually do something - diff --git a/contrib/awk/doc/texinfo.tex b/contrib/awk/doc/texinfo.tex deleted file mode 100644 index 0b5b903..0000000 --- a/contrib/awk/doc/texinfo.tex +++ /dev/null @@ -1,6238 +0,0 @@ -% texinfo.tex -- TeX macros to handle Texinfo files. -% -% Load plain if necessary, i.e., if running under initex. -\expandafter\ifx\csname fmtname\endcsname\relax\input plain\fi -% -\def\texinfoversion{2001-03-28.08} -% -% Copyright (C) 1985, 86, 88, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, -% 2000, 01 Free Software Foundation, Inc. -% -% This texinfo.tex file is free software; you can redistribute it and/or -% modify it under the terms of the GNU General Public License as -% published by the Free Software Foundation; either version 2, or (at -% your option) any later version. -% -% This texinfo.tex file is distributed in the hope that it will be -% useful, but WITHOUT ANY WARRANTY; without even the implied warranty -% of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -% General Public License for more details. -% -% You should have received a copy of the GNU General Public License -% along with this texinfo.tex file; see the file COPYING. If not, write -% to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -% Boston, MA 02111-1307, USA. -% -% In other words, you are welcome to use, share and improve this program. -% You are forbidden to forbid anyone else to use, share and improve -% what you give them. Help stamp out software-hoarding! -% -% Please try the latest version of texinfo.tex before submitting bug -% reports; you can get the latest version from: -% ftp://ftp.gnu.org/gnu/texinfo.tex -% (and all GNU mirrors, see http://www.gnu.org/order/ftp.html) -% ftp://texinfo.org/tex/texinfo.tex -% ftp://us.ctan.org/macros/texinfo/texinfo.tex -% (and all CTAN mirrors, finger ctan@us.ctan.org for a list). -% /home/gd/gnu/doc/texinfo.tex on the GNU machines. -% The texinfo.tex in any given Texinfo distribution could well be out -% of date, so if that's what you're using, please check. -% Texinfo has a small home page at http://texinfo.org/. -% -% Send bug reports to bug-texinfo@gnu.org. Please include including a -% complete document in each bug report with which we can reproduce the -% problem. Patches are, of course, greatly appreciated. -% -% To process a Texinfo manual with TeX, it's most reliable to use the -% texi2dvi shell script that comes with the distribution. For a simple -% manual foo.texi, however, you can get away with this: -% tex foo.texi -% texindex foo.?? -% tex foo.texi -% tex foo.texi -% dvips foo.dvi -o # or whatever, to process the dvi file; this makes foo.ps. -% The extra runs of TeX get the cross-reference information correct. -% Sometimes one run after texindex suffices, and sometimes you need more -% than two; texi2dvi does it as many times as necessary. -% -% It is possible to adapt texinfo.tex for other languages. You can get -% the existing language-specific files from ftp://ftp.gnu.org/gnu/texinfo/. - -\message{Loading texinfo [version \texinfoversion]:} - -% If in a .fmt file, print the version number -% and turn on active characters that we couldn't do earlier because -% they might have appeared in the input file name. -\everyjob{\message{[Texinfo version \texinfoversion]}% - \catcode`+=\active \catcode`\_=\active} - -% Save some parts of plain tex whose names we will redefine. -\let\ptexb=\b -\let\ptexbullet=\bullet -\let\ptexc=\c -\let\ptexcomma=\, -\let\ptexdot=\. -\let\ptexdots=\dots -\let\ptexend=\end -\let\ptexequiv=\equiv -\let\ptexexclam=\! -\let\ptexi=\i -\let\ptexlbrace=\{ -\let\ptexrbrace=\} -\let\ptexstar=\* -\let\ptext=\t - -% We never want plain's outer \+ definition in Texinfo. -% For @tex, we can use \tabalign. -\let\+ = \relax - -\message{Basics,} -\chardef\other=12 - -% If this character appears in an error message or help string, it -% starts a new line in the output. -\newlinechar = `^^J - -% Set up fixed words for English if not already set. -\ifx\putwordAppendix\undefined \gdef\putwordAppendix{Appendix}\fi -\ifx\putwordChapter\undefined \gdef\putwordChapter{Chapter}\fi -\ifx\putwordfile\undefined \gdef\putwordfile{file}\fi -\ifx\putwordin\undefined \gdef\putwordin{in}\fi -\ifx\putwordIndexIsEmpty\undefined \gdef\putwordIndexIsEmpty{(Index is empty)}\fi -\ifx\putwordIndexNonexistent\undefined \gdef\putwordIndexNonexistent{(Index is nonexistent)}\fi -\ifx\putwordInfo\undefined \gdef\putwordInfo{Info}\fi -\ifx\putwordInstanceVariableof\undefined \gdef\putwordInstanceVariableof{Instance Variable of}\fi -\ifx\putwordMethodon\undefined \gdef\putwordMethodon{Method on}\fi -\ifx\putwordNoTitle\undefined \gdef\putwordNoTitle{No Title}\fi -\ifx\putwordof\undefined \gdef\putwordof{of}\fi -\ifx\putwordon\undefined \gdef\putwordon{on}\fi -\ifx\putwordpage\undefined \gdef\putwordpage{page}\fi -\ifx\putwordsection\undefined \gdef\putwordsection{section}\fi -\ifx\putwordSection\undefined \gdef\putwordSection{Section}\fi -\ifx\putwordsee\undefined \gdef\putwordsee{see}\fi -\ifx\putwordSee\undefined \gdef\putwordSee{See}\fi -\ifx\putwordShortTOC\undefined \gdef\putwordShortTOC{Short Contents}\fi -\ifx\putwordTOC\undefined \gdef\putwordTOC{Table of Contents}\fi -% -\ifx\putwordMJan\undefined \gdef\putwordMJan{January}\fi -\ifx\putwordMFeb\undefined \gdef\putwordMFeb{February}\fi -\ifx\putwordMMar\undefined \gdef\putwordMMar{March}\fi -\ifx\putwordMApr\undefined \gdef\putwordMApr{April}\fi -\ifx\putwordMMay\undefined \gdef\putwordMMay{May}\fi -\ifx\putwordMJun\undefined \gdef\putwordMJun{June}\fi -\ifx\putwordMJul\undefined \gdef\putwordMJul{July}\fi -\ifx\putwordMAug\undefined \gdef\putwordMAug{August}\fi -\ifx\putwordMSep\undefined \gdef\putwordMSep{September}\fi -\ifx\putwordMOct\undefined \gdef\putwordMOct{October}\fi -\ifx\putwordMNov\undefined \gdef\putwordMNov{November}\fi -\ifx\putwordMDec\undefined \gdef\putwordMDec{December}\fi -% -\ifx\putwordDefmac\undefined \gdef\putwordDefmac{Macro}\fi -\ifx\putwordDefspec\undefined \gdef\putwordDefspec{Special Form}\fi -\ifx\putwordDefvar\undefined \gdef\putwordDefvar{Variable}\fi -\ifx\putwordDefopt\undefined \gdef\putwordDefopt{User Option}\fi -\ifx\putwordDeftypevar\undefined\gdef\putwordDeftypevar{Variable}\fi -\ifx\putwordDeffunc\undefined \gdef\putwordDeffunc{Function}\fi -\ifx\putwordDeftypefun\undefined\gdef\putwordDeftypefun{Function}\fi - -% Ignore a token. -% -\def\gobble#1{} - -\hyphenation{ap-pen-dix} -\hyphenation{mini-buf-fer mini-buf-fers} -\hyphenation{eshell} -\hyphenation{white-space} - -% Margin to add to right of even pages, to left of odd pages. -\newdimen \bindingoffset -\newdimen \normaloffset -\newdimen\pagewidth \newdimen\pageheight - -% Sometimes it is convenient to have everything in the transcript file -% and nothing on the terminal. We don't just call \tracingall here, -% since that produces some useless output on the terminal. -% -\def\gloggingall{\begingroup \globaldefs = 1 \loggingall \endgroup}% -\ifx\eTeXversion\undefined -\def\loggingall{\tracingcommands2 \tracingstats2 - \tracingpages1 \tracingoutput1 \tracinglostchars1 - \tracingmacros2 \tracingparagraphs1 \tracingrestores1 - \showboxbreadth\maxdimen\showboxdepth\maxdimen -}% -\else -\def\loggingall{\tracingcommands3 \tracingstats2 - \tracingpages1 \tracingoutput1 \tracinglostchars1 - \tracingmacros2 \tracingparagraphs1 \tracingrestores1 - \tracingscantokens1 \tracingassigns1 \tracingifs1 - \tracinggroups1 \tracingnesting2 - \showboxbreadth\maxdimen\showboxdepth\maxdimen -}% -\fi - -% For @cropmarks command. -% Do @cropmarks to get crop marks. -% -\newif\ifcropmarks -\let\cropmarks = \cropmarkstrue -% -% Dimensions to add cropmarks at corners. -% Added by P. A. MacKay, 12 Nov. 1986 -% -\newdimen\outerhsize \newdimen\outervsize % set by the paper size routines -\newdimen\cornerlong \cornerlong=1pc -\newdimen\cornerthick \cornerthick=.3pt -\newdimen\topandbottommargin \topandbottommargin=.75in - -% Main output routine. -\chardef\PAGE = 255 -\output = {\onepageout{\pagecontents\PAGE}} - -\newbox\headlinebox -\newbox\footlinebox - -% \onepageout takes a vbox as an argument. Note that \pagecontents -% does insertions, but you have to call it yourself. -\def\onepageout#1{% - \ifcropmarks \hoffset=0pt \else \hoffset=\normaloffset \fi - % - \ifodd\pageno \advance\hoffset by \bindingoffset - \else \advance\hoffset by -\bindingoffset\fi - % - % Do this outside of the \shipout so @code etc. will be expanded in - % the headline as they should be, not taken literally (outputting ''code). - \setbox\headlinebox = \vbox{\let\hsize=\pagewidth \makeheadline}% - \setbox\footlinebox = \vbox{\let\hsize=\pagewidth \makefootline}% - % - {% - % Have to do this stuff outside the \shipout because we want it to - % take effect in \write's, yet the group defined by the \vbox ends - % before the \shipout runs. - % - \escapechar = `\\ % use backslash in output files. - \indexdummies % don't expand commands in the output. - \normalturnoffactive % \ in index entries must not stay \, e.g., if - % the page break happens to be in the middle of an example. - \shipout\vbox{% - % Do this early so pdf references go to the beginning of the page. - \ifpdfmakepagedest \pdfmkdest{\the\pageno} \fi - % - \ifcropmarks \vbox to \outervsize\bgroup - \hsize = \outerhsize - \vskip-\topandbottommargin - \vtop to0pt{% - \line{\ewtop\hfil\ewtop}% - \nointerlineskip - \line{% - \vbox{\moveleft\cornerthick\nstop}% - \hfill - \vbox{\moveright\cornerthick\nstop}% - }% - \vss}% - \vskip\topandbottommargin - \line\bgroup - \hfil % center the page within the outer (page) hsize. - \ifodd\pageno\hskip\bindingoffset\fi - \vbox\bgroup - \fi - % - \unvbox\headlinebox - \pagebody{#1}% - \ifdim\ht\footlinebox > 0pt - % Only leave this space if the footline is nonempty. - % (We lessened \vsize for it in \oddfootingxxx.) - % The \baselineskip=24pt in plain's \makefootline has no effect. - \vskip 2\baselineskip - \unvbox\footlinebox - \fi - % - \ifcropmarks - \egroup % end of \vbox\bgroup - \hfil\egroup % end of (centering) \line\bgroup - \vskip\topandbottommargin plus1fill minus1fill - \boxmaxdepth = \cornerthick - \vbox to0pt{\vss - \line{% - \vbox{\moveleft\cornerthick\nsbot}% - \hfill - \vbox{\moveright\cornerthick\nsbot}% - }% - \nointerlineskip - \line{\ewbot\hfil\ewbot}% - }% - \egroup % \vbox from first cropmarks clause - \fi - }% end of \shipout\vbox - }% end of group with \turnoffactive - \advancepageno - \ifnum\outputpenalty>-20000 \else\dosupereject\fi -} - -\newinsert\margin \dimen\margin=\maxdimen - -\def\pagebody#1{\vbox to\pageheight{\boxmaxdepth=\maxdepth #1}} -{\catcode`\@ =11 -\gdef\pagecontents#1{\ifvoid\topins\else\unvbox\topins\fi -% marginal hacks, juha@viisa.uucp (Juha Takala) -\ifvoid\margin\else % marginal info is present - \rlap{\kern\hsize\vbox to\z@{\kern1pt\box\margin \vss}}\fi -\dimen@=\dp#1 \unvbox#1 -\ifvoid\footins\else\vskip\skip\footins\footnoterule \unvbox\footins\fi -\ifr@ggedbottom \kern-\dimen@ \vfil \fi} -} - -% Here are the rules for the cropmarks. Note that they are -% offset so that the space between them is truly \outerhsize or \outervsize -% (P. A. MacKay, 12 November, 1986) -% -\def\ewtop{\vrule height\cornerthick depth0pt width\cornerlong} -\def\nstop{\vbox - {\hrule height\cornerthick depth\cornerlong width\cornerthick}} -\def\ewbot{\vrule height0pt depth\cornerthick width\cornerlong} -\def\nsbot{\vbox - {\hrule height\cornerlong depth\cornerthick width\cornerthick}} - -% Parse an argument, then pass it to #1. The argument is the rest of -% the input line (except we remove a trailing comment). #1 should be a -% macro which expects an ordinary undelimited TeX argument. -% -\def\parsearg#1{% - \let\next = #1% - \begingroup - \obeylines - \futurelet\temp\parseargx -} - -% If the next token is an obeyed space (from an @example environment or -% the like), remove it and recurse. Otherwise, we're done. -\def\parseargx{% - % \obeyedspace is defined far below, after the definition of \sepspaces. - \ifx\obeyedspace\temp - \expandafter\parseargdiscardspace - \else - \expandafter\parseargline - \fi -} - -% Remove a single space (as the delimiter token to the macro call). -{\obeyspaces % - \gdef\parseargdiscardspace {\futurelet\temp\parseargx}} - -{\obeylines % - \gdef\parseargline#1^^M{% - \endgroup % End of the group started in \parsearg. - % - % First remove any @c comment, then any @comment. - % Result of each macro is put in \toks0. - \argremovec #1\c\relax % - \expandafter\argremovecomment \the\toks0 \comment\relax % - % - % Call the caller's macro, saved as \next in \parsearg. - \expandafter\next\expandafter{\the\toks0}% - }% -} - -% Since all \c{,omment} does is throw away the argument, we can let TeX -% do that for us. The \relax here is matched by the \relax in the call -% in \parseargline; it could be more or less anything, its purpose is -% just to delimit the argument to the \c. -\def\argremovec#1\c#2\relax{\toks0 = {#1}} -\def\argremovecomment#1\comment#2\relax{\toks0 = {#1}} - -% \argremovec{,omment} might leave us with trailing spaces, though; e.g., -% @end itemize @c foo -% will have two active spaces as part of the argument with the -% `itemize'. Here we remove all active spaces from #1, and assign the -% result to \toks0. -% -% This loses if there are any *other* active characters besides spaces -% in the argument -- _ ^ +, for example -- since they get expanded. -% Fortunately, Texinfo does not define any such commands. (If it ever -% does, the catcode of the characters in questionwill have to be changed -% here.) But this means we cannot call \removeactivespaces as part of -% \argremovec{,omment}, since @c uses \parsearg, and thus the argument -% that \parsearg gets might well have any character at all in it. -% -\def\removeactivespaces#1{% - \begingroup - \ignoreactivespaces - \edef\temp{#1}% - \global\toks0 = \expandafter{\temp}% - \endgroup -} - -% Change the active space to expand to nothing. -% -\begingroup - \obeyspaces - \gdef\ignoreactivespaces{\obeyspaces\let =\empty} -\endgroup - - -\def\flushcr{\ifx\par\lisppar \def\next##1{}\else \let\next=\relax \fi \next} - -%% These are used to keep @begin/@end levels from running away -%% Call \inENV within environments (after a \begingroup) -\newif\ifENV \ENVfalse \def\inENV{\ifENV\relax\else\ENVtrue\fi} -\def\ENVcheck{% -\ifENV\errmessage{Still within an environment; press RETURN to continue} -\endgroup\fi} % This is not perfect, but it should reduce lossage - -% @begin foo is the same as @foo, for now. -\newhelp\EMsimple{Press RETURN to continue.} - -\outer\def\begin{\parsearg\beginxxx} - -\def\beginxxx #1{% -\expandafter\ifx\csname #1\endcsname\relax -{\errhelp=\EMsimple \errmessage{Undefined command @begin #1}}\else -\csname #1\endcsname\fi} - -% @end foo executes the definition of \Efoo. -% -\def\end{\parsearg\endxxx} -\def\endxxx #1{% - \removeactivespaces{#1}% - \edef\endthing{\the\toks0}% - % - \expandafter\ifx\csname E\endthing\endcsname\relax - \expandafter\ifx\csname \endthing\endcsname\relax - % There's no \foo, i.e., no ``environment'' foo. - \errhelp = \EMsimple - \errmessage{Undefined command `@end \endthing'}% - \else - \unmatchedenderror\endthing - \fi - \else - % Everything's ok; the right environment has been started. - \csname E\endthing\endcsname - \fi -} - -% There is an environment #1, but it hasn't been started. Give an error. -% -\def\unmatchedenderror#1{% - \errhelp = \EMsimple - \errmessage{This `@end #1' doesn't have a matching `@#1'}% -} - -% Define the control sequence \E#1 to give an unmatched @end error. -% -\def\defineunmatchedend#1{% - \expandafter\def\csname E#1\endcsname{\unmatchedenderror{#1}}% -} - - -% Single-spacing is done by various environments (specifically, in -% \nonfillstart and \quotations). -\newskip\singlespaceskip \singlespaceskip = 12.5pt -\def\singlespace{% - % Why was this kern here? It messes up equalizing space above and below - % environments. --karl, 6may93 - %{\advance \baselineskip by -\singlespaceskip - %\kern \baselineskip}% - \setleading \singlespaceskip -} - -%% Simple single-character @ commands - -% @@ prints an @ -% Kludge this until the fonts are right (grr). -\def\@{{\tt\char64}} - -% This is turned off because it was never documented -% and you can use @w{...} around a quote to suppress ligatures. -%% Define @` and @' to be the same as ` and ' -%% but suppressing ligatures. -%\def\`{{`}} -%\def\'{{'}} - -% Used to generate quoted braces. -\def\mylbrace {{\tt\char123}} -\def\myrbrace {{\tt\char125}} -\let\{=\mylbrace -\let\}=\myrbrace -\begingroup - % Definitions to produce actual \{ & \} command in an index. - \catcode`\{ = 12 \catcode`\} = 12 - \catcode`\[ = 1 \catcode`\] = 2 - \catcode`\@ = 0 \catcode`\\ = 12 - @gdef@lbracecmd[\{]% - @gdef@rbracecmd[\}]% -@endgroup - -% Accents: @, @dotaccent @ringaccent @ubaraccent @udotaccent -% Others are defined by plain TeX: @` @' @" @^ @~ @= @v @H. -\let\, = \c -\let\dotaccent = \. -\def\ringaccent#1{{\accent23 #1}} -\let\tieaccent = \t -\let\ubaraccent = \b -\let\udotaccent = \d - -% Other special characters: @questiondown @exclamdown -% Plain TeX defines: @AA @AE @O @OE @L (and lowercase versions) @ss. -\def\questiondown{?`} -\def\exclamdown{!`} - -% Dotless i and dotless j, used for accents. -\def\imacro{i} -\def\jmacro{j} -\def\dotless#1{% - \def\temp{#1}% - \ifx\temp\imacro \ptexi - \else\ifx\temp\jmacro \j - \else \errmessage{@dotless can be used only with i or j}% - \fi\fi -} - -% Be sure we're in horizontal mode when doing a tie, since we make space -% equivalent to this in @example-like environments. Otherwise, a space -% at the beginning of a line will start with \penalty -- and -% since \penalty is valid in vertical mode, we'd end up putting the -% penalty on the vertical list instead of in the new paragraph. -{\catcode`@ = 11 - % Avoid using \@M directly, because that causes trouble - % if the definition is written into an index file. - \global\let\tiepenalty = \@M - \gdef\tie{\leavevmode\penalty\tiepenalty\ } -} - -% @: forces normal size whitespace following. -\def\:{\spacefactor=1000 } - -% @* forces a line break. -\def\*{\hfil\break\hbox{}\ignorespaces} - -% @. is an end-of-sentence period. -\def\.{.\spacefactor=3000 } - -% @! is an end-of-sentence bang. -\def\!{!\spacefactor=3000 } - -% @? is an end-of-sentence query. -\def\?{?\spacefactor=3000 } - -% @w prevents a word break. Without the \leavevmode, @w at the -% beginning of a paragraph, when TeX is still in vertical mode, would -% produce a whole line of output instead of starting the paragraph. -\def\w#1{\leavevmode\hbox{#1}} - -% @group ... @end group forces ... to be all on one page, by enclosing -% it in a TeX vbox. We use \vtop instead of \vbox to construct the box -% to keep its height that of a normal line. According to the rules for -% \topskip (p.114 of the TeXbook), the glue inserted is -% max (\topskip - \ht (first item), 0). If that height is large, -% therefore, no glue is inserted, and the space between the headline and -% the text is small, which looks bad. -% -\def\group{\begingroup - \ifnum\catcode13=\active \else - \errhelp = \groupinvalidhelp - \errmessage{@group invalid in context where filling is enabled}% - \fi - % - % The \vtop we start below produces a box with normal height and large - % depth; thus, TeX puts \baselineskip glue before it, and (when the - % next line of text is done) \lineskip glue after it. (See p.82 of - % the TeXbook.) Thus, space below is not quite equal to space - % above. But it's pretty close. - \def\Egroup{% - \egroup % End the \vtop. - \endgroup % End the \group. - }% - % - \vtop\bgroup - % We have to put a strut on the last line in case the @group is in - % the midst of an example, rather than completely enclosing it. - % Otherwise, the interline space between the last line of the group - % and the first line afterwards is too small. But we can't put the - % strut in \Egroup, since there it would be on a line by itself. - % Hence this just inserts a strut at the beginning of each line. - \everypar = {\strut}% - % - % Since we have a strut on every line, we don't need any of TeX's - % normal interline spacing. - \offinterlineskip - % - % OK, but now we have to do something about blank - % lines in the input in @example-like environments, which normally - % just turn into \lisppar, which will insert no space now that we've - % turned off the interline space. Simplest is to make them be an - % empty paragraph. - \ifx\par\lisppar - \edef\par{\leavevmode \par}% - % - % Reset ^^M's definition to new definition of \par. - \obeylines - \fi - % - % Do @comment since we are called inside an environment such as - % @example, where each end-of-line in the input causes an - % end-of-line in the output. We don't want the end-of-line after - % the `@group' to put extra space in the output. Since @group - % should appear on a line by itself (according to the Texinfo - % manual), we don't worry about eating any user text. - \comment -} -% -% TeX puts in an \escapechar (i.e., `@') at the beginning of the help -% message, so this ends up printing `@group can only ...'. -% -\newhelp\groupinvalidhelp{% -group can only be used in environments such as @example,^^J% -where each line of input produces a line of output.} - -% @need space-in-mils -% forces a page break if there is not space-in-mils remaining. - -\newdimen\mil \mil=0.001in - -\def\need{\parsearg\needx} - -% Old definition--didn't work. -%\def\needx #1{\par % -%% This method tries to make TeX break the page naturally -%% if the depth of the box does not fit. -%{\baselineskip=0pt% -%\vtop to #1\mil{\vfil}\kern -#1\mil\nobreak -%\prevdepth=-1000pt -%}} - -\def\needx#1{% - % Ensure vertical mode, so we don't make a big box in the middle of a - % paragraph. - \par - % - % If the @need value is less than one line space, it's useless. - \dimen0 = #1\mil - \dimen2 = \ht\strutbox - \advance\dimen2 by \dp\strutbox - \ifdim\dimen0 > \dimen2 - % - % Do a \strut just to make the height of this box be normal, so the - % normal leading is inserted relative to the preceding line. - % And a page break here is fine. - \vtop to #1\mil{\strut\vfil}% - % - % TeX does not even consider page breaks if a penalty added to the - % main vertical list is 10000 or more. But in order to see if the - % empty box we just added fits on the page, we must make it consider - % page breaks. On the other hand, we don't want to actually break the - % page after the empty box. So we use a penalty of 9999. - % - % There is an extremely small chance that TeX will actually break the - % page at this \penalty, if there are no other feasible breakpoints in - % sight. (If the user is using lots of big @group commands, which - % almost-but-not-quite fill up a page, TeX will have a hard time doing - % good page breaking, for example.) However, I could not construct an - % example where a page broke at this \penalty; if it happens in a real - % document, then we can reconsider our strategy. - \penalty9999 - % - % Back up by the size of the box, whether we did a page break or not. - \kern -#1\mil - % - % Do not allow a page break right after this kern. - \nobreak - \fi -} - -% @br forces paragraph break - -\let\br = \par - -% @dots{} output an ellipsis using the current font. -% We do .5em per period so that it has the same spacing in a typewriter -% font as three actual period characters. -% -\def\dots{% - \leavevmode - \hbox to 1.5em{% - \hskip 0pt plus 0.25fil minus 0.25fil - .\hss.\hss.% - \hskip 0pt plus 0.5fil minus 0.5fil - }% -} - -% @enddots{} is an end-of-sentence ellipsis. -% -\def\enddots{% - \leavevmode - \hbox to 2em{% - \hskip 0pt plus 0.25fil minus 0.25fil - .\hss.\hss.\hss.% - \hskip 0pt plus 0.5fil minus 0.5fil - }% - \spacefactor=3000 -} - - -% @page forces the start of a new page -% -\def\page{\par\vfill\supereject} - -% @exdent text.... -% outputs text on separate line in roman font, starting at standard page margin - -% This records the amount of indent in the innermost environment. -% That's how much \exdent should take out. -\newskip\exdentamount - -% This defn is used inside fill environments such as @defun. -\def\exdent{\parsearg\exdentyyy} -\def\exdentyyy #1{{\hfil\break\hbox{\kern -\exdentamount{\rm#1}}\hfil\break}} - -% This defn is used inside nofill environments such as @example. -\def\nofillexdent{\parsearg\nofillexdentyyy} -\def\nofillexdentyyy #1{{\advance \leftskip by -\exdentamount -\leftline{\hskip\leftskip{\rm#1}}}} - -% @inmargin{WHICH}{TEXT} puts TEXT in the WHICH margin next to the current -% paragraph. For more general purposes, use the \margin insertion -% class. WHICH is `l' or `r'. -% -\newskip\inmarginspacing \inmarginspacing=1cm -\def\strutdepth{\dp\strutbox} -% -\def\doinmargin#1#2{\strut\vadjust{% - \nobreak - \kern-\strutdepth - \vtop to \strutdepth{% - \baselineskip=\strutdepth - \vss - % if you have multiple lines of stuff to put here, you'll need to - % make the vbox yourself of the appropriate size. - \ifx#1l% - \llap{\ignorespaces #2\hskip\inmarginspacing}% - \else - \rlap{\hskip\hsize \hskip\inmarginspacing \ignorespaces #2}% - \fi - \null - }% -}} -\def\inleftmargin{\doinmargin l} -\def\inrightmargin{\doinmargin r} -% -% @inmargin{TEXT [, RIGHT-TEXT]} -% (if RIGHT-TEXT is given, use TEXT for left page, RIGHT-TEXT for right; -% else use TEXT for both). -% -\def\inmargin#1{\parseinmargin #1,,\finish} -\def\parseinmargin#1,#2,#3\finish{% not perfect, but better than nothing. - \setbox0 = \hbox{\ignorespaces #2}% - \ifdim\wd0 > 0pt - \def\lefttext{#1}% have both texts - \def\righttext{#2}% - \else - \def\lefttext{#1}% have only one text - \def\righttext{#1}% - \fi - % - \ifodd\pageno - \def\temp{\inrightmargin\righttext}% odd page -> outside is right margin - \else - \def\temp{\inleftmargin\lefttext}% - \fi - \temp -} - -% @include file insert text of that file as input. -% Allow normal characters that we make active in the argument (a file name). -\def\include{\begingroup - \catcode`\\=12 - \catcode`~=12 - \catcode`^=12 - \catcode`_=12 - \catcode`|=12 - \catcode`<=12 - \catcode`>=12 - \catcode`+=12 - \parsearg\includezzz} -% Restore active chars for included file. -\def\includezzz#1{\endgroup\begingroup - % Read the included file in a group so nested @include's work. - \def\thisfile{#1}% - \input\thisfile -\endgroup} - -\def\thisfile{} - -% @center line outputs that line, centered - -\def\center{\parsearg\centerzzz} -\def\centerzzz #1{{\advance\hsize by -\leftskip -\advance\hsize by -\rightskip -\centerline{#1}}} - -% @sp n outputs n lines of vertical space - -\def\sp{\parsearg\spxxx} -\def\spxxx #1{\vskip #1\baselineskip} - -% @comment ...line which is ignored... -% @c is the same as @comment -% @ignore ... @end ignore is another way to write a comment - -\def\comment{\begingroup \catcode`\^^M=\other% -\catcode`\@=\other \catcode`\{=\other \catcode`\}=\other% -\commentxxx} -{\catcode`\^^M=\other \gdef\commentxxx#1^^M{\endgroup}} - -\let\c=\comment - -% @paragraphindent NCHARS -% We'll use ems for NCHARS, close enough. -% We cannot implement @paragraphindent asis, though. -% -\def\asisword{asis} % no translation, these are keywords -\def\noneword{none} -% -\def\paragraphindent{\parsearg\doparagraphindent} -\def\doparagraphindent#1{% - \def\temp{#1}% - \ifx\temp\asisword - \else - \ifx\temp\noneword - \defaultparindent = 0pt - \else - \defaultparindent = #1em - \fi - \fi - \parindent = \defaultparindent -} - -% @exampleindent NCHARS -% We'll use ems for NCHARS like @paragraphindent. -% It seems @exampleindent asis isn't necessary, but -% I preserve it to make it similar to @paragraphindent. -\def\exampleindent{\parsearg\doexampleindent} -\def\doexampleindent#1{% - \def\temp{#1}% - \ifx\temp\asisword - \else - \ifx\temp\noneword - \lispnarrowing = 0pt - \else - \lispnarrowing = #1em - \fi - \fi -} - -% @asis just yields its argument. Used with @table, for example. -% -\def\asis#1{#1} - -% @math means output in math mode. -% We don't use $'s directly in the definition of \math because control -% sequences like \math are expanded when the toc file is written. Then, -% we read the toc file back, the $'s will be normal characters (as they -% should be, according to the definition of Texinfo). So we must use a -% control sequence to switch into and out of math mode. -% -% This isn't quite enough for @math to work properly in indices, but it -% seems unlikely it will ever be needed there. -% -\let\implicitmath = $ -\def\math#1{\implicitmath #1\implicitmath} - -% @bullet and @minus need the same treatment as @math, just above. -\def\bullet{\implicitmath\ptexbullet\implicitmath} -\def\minus{\implicitmath-\implicitmath} - -% @refill is a no-op. -\let\refill=\relax - -% If working on a large document in chapters, it is convenient to -% be able to disable indexing, cross-referencing, and contents, for test runs. -% This is done with @novalidate (before @setfilename). -% -\newif\iflinks \linkstrue % by default we want the aux files. -\let\novalidate = \linksfalse - -% @setfilename is done at the beginning of every texinfo file. -% So open here the files we need to have open while reading the input. -% This makes it possible to make a .fmt file for texinfo. -\def\setfilename{% - \iflinks - \readauxfile - \fi % \openindices needs to do some work in any case. - \openindices - \fixbackslash % Turn off hack to swallow `\input texinfo'. - \global\let\setfilename=\comment % Ignore extra @setfilename cmds. - % - % If texinfo.cnf is present on the system, read it. - % Useful for site-wide @afourpaper, etc. - % Just to be on the safe side, close the input stream before the \input. - \openin 1 texinfo.cnf - \ifeof1 \let\temp=\relax \else \def\temp{\input texinfo.cnf }\fi - \closein1 - \temp - % - \comment % Ignore the actual filename. -} - -% Called from \setfilename. -% -\def\openindices{% - \newindex{cp}% - \newcodeindex{fn}% - \newcodeindex{vr}% - \newcodeindex{tp}% - \newcodeindex{ky}% - \newcodeindex{pg}% -} - -% @bye. -\outer\def\bye{\pagealignmacro\tracingstats=1\ptexend} - - -\message{pdf,} -% adobe `portable' document format -\newcount\tempnum -\newcount\lnkcount -\newtoks\filename -\newcount\filenamelength -\newcount\pgn -\newtoks\toksA -\newtoks\toksB -\newtoks\toksC -\newtoks\toksD -\newbox\boxA -\newcount\countA -\newif\ifpdf -\newif\ifpdfmakepagedest - -\ifx\pdfoutput\undefined - \pdffalse - \let\pdfmkdest = \gobble - \let\pdfurl = \gobble - \let\endlink = \relax - \let\linkcolor = \relax - \let\pdfmakeoutlines = \relax -\else - \pdftrue - \pdfoutput = 1 - \input pdfcolor - \def\dopdfimage#1#2#3{% - \def\imagewidth{#2}% - \def\imageheight{#3}% - \ifnum\pdftexversion < 14 - \pdfimage - \else - \pdfximage - \fi - \ifx\empty\imagewidth\else width \imagewidth \fi - \ifx\empty\imageheight\else height \imageheight \fi - \ifnum\pdftexversion<13 - #1.pdf% - \else - {#1.pdf}% - \fi - \ifnum\pdftexversion < 14 \else - \pdfrefximage \pdflastximage - \fi} - \def\pdfmkdest#1{\pdfdest name{#1} xyz} - \def\pdfmkpgn#1{#1@} - \let\linkcolor = \Blue % was Cyan, but that seems light? - \def\endlink{\Black\pdfendlink} - % Adding outlines to PDF; macros for calculating structure of outlines - % come from Petr Olsak - \def\expnumber#1{\expandafter\ifx\csname#1\endcsname\relax 0% - \else \csname#1\endcsname \fi} - \def\advancenumber#1{\tempnum=\expnumber{#1}\relax - \advance\tempnum by1 - \expandafter\xdef\csname#1\endcsname{\the\tempnum}} - \def\pdfmakeoutlines{{% - \openin 1 \jobname.toc - \ifeof 1\else\bgroup - \closein 1 - \indexnofonts - \def\tt{} - \let\_ = \normalunderscore - % Thanh's hack / proper braces in bookmarks - \edef\mylbrace{\iftrue \string{\else}\fi}\let\{=\mylbrace - \edef\myrbrace{\iffalse{\else\string}\fi}\let\}=\myrbrace - % - \def\chapentry ##1##2##3{} - \def\unnumbchapentry ##1##2{} - \def\secentry ##1##2##3##4{\advancenumber{chap##2}} - \def\unnumbsecentry ##1##2{} - \def\subsecentry ##1##2##3##4##5{\advancenumber{sec##2.##3}} - \def\unnumbsubsecentry ##1##2{} - \def\subsubsecentry ##1##2##3##4##5##6{\advancenumber{subsec##2.##3.##4}} - \def\unnumbsubsubsecentry ##1##2{} - \input \jobname.toc - \def\chapentry ##1##2##3{% - \pdfoutline goto name{\pdfmkpgn{##3}}count-\expnumber{chap##2}{##1}} - \def\unnumbchapentry ##1##2{% - \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} - \def\secentry ##1##2##3##4{% - \pdfoutline goto name{\pdfmkpgn{##4}}count-\expnumber{sec##2.##3}{##1}} - \def\unnumbsecentry ##1##2{% - \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} - \def\subsecentry ##1##2##3##4##5{% - \pdfoutline goto name{\pdfmkpgn{##5}}count-\expnumber{subsec##2.##3.##4}{##1}} - \def\unnumbsubsecentry ##1##2{% - \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} - \def\subsubsecentry ##1##2##3##4##5##6{% - \pdfoutline goto name{\pdfmkpgn{##6}}{##1}} - \def\unnumbsubsubsecentry ##1##2{% - \pdfoutline goto name{\pdfmkpgn{##2}}{##1}} - \input \jobname.toc - \egroup\fi - }} - \def\makelinks #1,{% - \def\params{#1}\def\E{END}% - \ifx\params\E - \let\nextmakelinks=\relax - \else - \let\nextmakelinks=\makelinks - \ifnum\lnkcount>0,\fi - \picknum{#1}% - \startlink attr{/Border [0 0 0]} - goto name{\pdfmkpgn{\the\pgn}}% - \linkcolor #1% - \advance\lnkcount by 1% - \endlink - \fi - \nextmakelinks - } - \def\picknum#1{\expandafter\pn#1} - \def\pn#1{% - \def\p{#1}% - \ifx\p\lbrace - \let\nextpn=\ppn - \else - \let\nextpn=\ppnn - \def\first{#1} - \fi - \nextpn - } - \def\ppn#1{\pgn=#1\gobble} - \def\ppnn{\pgn=\first} - \def\pdfmklnk#1{\lnkcount=0\makelinks #1,END,} - \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} - \def\skipspaces#1{\def\PP{#1}\def\D{|}% - \ifx\PP\D\let\nextsp\relax - \else\let\nextsp\skipspaces - \ifx\p\space\else\addtokens{\filename}{\PP}% - \advance\filenamelength by 1 - \fi - \fi - \nextsp} - \def\getfilename#1{\filenamelength=0\expandafter\skipspaces#1|\relax} - \ifnum\pdftexversion < 14 - \let \startlink \pdfannotlink - \else - \let \startlink \pdfstartlink - \fi - \def\pdfurl#1{% - \begingroup - \normalturnoffactive\def\@{@}% - \leavevmode\Red - \startlink attr{/Border [0 0 0]}% - user{/Subtype /Link /A << /S /URI /URI (#1) >>}% - % #1 - \endgroup} - \def\pdfgettoks#1.{\setbox\boxA=\hbox{\toksA={#1.}\toksB={}\maketoks}} - \def\addtokens#1#2{\edef\addtoks{\noexpand#1={\the#1#2}}\addtoks} - \def\adn#1{\addtokens{\toksC}{#1}\global\countA=1\let\next=\maketoks} - \def\poptoks#1#2|ENDTOKS|{\let\first=#1\toksD={#1}\toksA={#2}} - \def\maketoks{% - \expandafter\poptoks\the\toksA|ENDTOKS| - \ifx\first0\adn0 - \else\ifx\first1\adn1 \else\ifx\first2\adn2 \else\ifx\first3\adn3 - \else\ifx\first4\adn4 \else\ifx\first5\adn5 \else\ifx\first6\adn6 - \else\ifx\first7\adn7 \else\ifx\first8\adn8 \else\ifx\first9\adn9 - \else - \ifnum0=\countA\else\makelink\fi - \ifx\first.\let\next=\done\else - \let\next=\maketoks - \addtokens{\toksB}{\the\toksD} - \ifx\first,\addtokens{\toksB}{\space}\fi - \fi - \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi - \next} - \def\makelink{\addtokens{\toksB}% - {\noexpand\pdflink{\the\toksC}}\toksC={}\global\countA=0} - \def\pdflink#1{% - \startlink attr{/Border [0 0 0]} goto name{\mkpgn{#1}} - \linkcolor #1\endlink} - \def\mkpgn#1{#1@} - \def\done{\edef\st{\global\noexpand\toksA={\the\toksB}}\st} -\fi % \ifx\pdfoutput - - -\message{fonts,} -% Font-change commands. - -% Texinfo sort of supports the sans serif font style, which plain TeX does not. -% So we set up a \sf analogous to plain's \rm, etc. -\newfam\sffam -\def\sf{\fam=\sffam \tensf} -\let\li = \sf % Sometimes we call it \li, not \sf. - -% We don't need math for this one. -\def\ttsl{\tenttsl} - -% Use Computer Modern fonts at \magstephalf (11pt). -\newcount\mainmagstep -\mainmagstep=\magstephalf - -% Set the font macro #1 to the font named #2, adding on the -% specified font prefix (normally `cm'). -% #3 is the font's design size, #4 is a scale factor -\def\setfont#1#2#3#4{\font#1=\fontprefix#2#3 scaled #4} - -% Use cm as the default font prefix. -% To specify the font prefix, you must define \fontprefix -% before you read in texinfo.tex. -\ifx\fontprefix\undefined -\def\fontprefix{cm} -\fi -% Support font families that don't use the same naming scheme as CM. -\def\rmshape{r} -\def\rmbshape{bx} %where the normal face is bold -\def\bfshape{b} -\def\bxshape{bx} -\def\ttshape{tt} -\def\ttbshape{tt} -\def\ttslshape{sltt} -\def\itshape{ti} -\def\itbshape{bxti} -\def\slshape{sl} -\def\slbshape{bxsl} -\def\sfshape{ss} -\def\sfbshape{ss} -\def\scshape{csc} -\def\scbshape{csc} - -\ifx\bigger\relax -\let\mainmagstep=\magstep1 -\setfont\textrm\rmshape{12}{1000} -\setfont\texttt\ttshape{12}{1000} -\else -\setfont\textrm\rmshape{10}{\mainmagstep} -\setfont\texttt\ttshape{10}{\mainmagstep} -\fi -% Instead of cmb10, you many want to use cmbx10. -% cmbx10 is a prettier font on its own, but cmb10 -% looks better when embedded in a line with cmr10. -\setfont\textbf\bfshape{10}{\mainmagstep} -\setfont\textit\itshape{10}{\mainmagstep} -\setfont\textsl\slshape{10}{\mainmagstep} -\setfont\textsf\sfshape{10}{\mainmagstep} -\setfont\textsc\scshape{10}{\mainmagstep} -\setfont\textttsl\ttslshape{10}{\mainmagstep} -\font\texti=cmmi10 scaled \mainmagstep -\font\textsy=cmsy10 scaled \mainmagstep - -% A few fonts for @defun, etc. -\setfont\defbf\bxshape{10}{\magstep1} %was 1314 -\setfont\deftt\ttshape{10}{\magstep1} -\def\df{\let\tentt=\deftt \let\tenbf = \defbf \bf} - -% Fonts for indices, footnotes, small examples (9pt). -\setfont\smallrm\rmshape{9}{1000} -\setfont\smalltt\ttshape{9}{1000} -\setfont\smallbf\bfshape{10}{900} -\setfont\smallit\itshape{9}{1000} -\setfont\smallsl\slshape{9}{1000} -\setfont\smallsf\sfshape{9}{1000} -\setfont\smallsc\scshape{10}{900} -\setfont\smallttsl\ttslshape{10}{900} -\font\smalli=cmmi9 -\font\smallsy=cmsy9 - -% Fonts for title page: -\setfont\titlerm\rmbshape{12}{\magstep3} -\setfont\titleit\itbshape{10}{\magstep4} -\setfont\titlesl\slbshape{10}{\magstep4} -\setfont\titlett\ttbshape{12}{\magstep3} -\setfont\titlettsl\ttslshape{10}{\magstep4} -\setfont\titlesf\sfbshape{17}{\magstep1} -\let\titlebf=\titlerm -\setfont\titlesc\scbshape{10}{\magstep4} -\font\titlei=cmmi12 scaled \magstep3 -\font\titlesy=cmsy10 scaled \magstep4 -\def\authorrm{\secrm} - -% Chapter (and unnumbered) fonts (17.28pt). -\setfont\chaprm\rmbshape{12}{\magstep2} -\setfont\chapit\itbshape{10}{\magstep3} -\setfont\chapsl\slbshape{10}{\magstep3} -\setfont\chaptt\ttbshape{12}{\magstep2} -\setfont\chapttsl\ttslshape{10}{\magstep3} -\setfont\chapsf\sfbshape{17}{1000} -\let\chapbf=\chaprm -\setfont\chapsc\scbshape{10}{\magstep3} -\font\chapi=cmmi12 scaled \magstep2 -\font\chapsy=cmsy10 scaled \magstep3 - -% Section fonts (14.4pt). -\setfont\secrm\rmbshape{12}{\magstep1} -\setfont\secit\itbshape{10}{\magstep2} -\setfont\secsl\slbshape{10}{\magstep2} -\setfont\sectt\ttbshape{12}{\magstep1} -\setfont\secttsl\ttslshape{10}{\magstep2} -\setfont\secsf\sfbshape{12}{\magstep1} -\let\secbf\secrm -\setfont\secsc\scbshape{10}{\magstep2} -\font\seci=cmmi12 scaled \magstep1 -\font\secsy=cmsy10 scaled \magstep2 - -% \setfont\ssecrm\bxshape{10}{\magstep1} % This size an font looked bad. -% \setfont\ssecit\itshape{10}{\magstep1} % The letters were too crowded. -% \setfont\ssecsl\slshape{10}{\magstep1} -% \setfont\ssectt\ttshape{10}{\magstep1} -% \setfont\ssecsf\sfshape{10}{\magstep1} - -%\setfont\ssecrm\bfshape{10}{1315} % Note the use of cmb rather than cmbx. -%\setfont\ssecit\itshape{10}{1315} % Also, the size is a little larger than -%\setfont\ssecsl\slshape{10}{1315} % being scaled magstep1. -%\setfont\ssectt\ttshape{10}{1315} -%\setfont\ssecsf\sfshape{10}{1315} - -%\let\ssecbf=\ssecrm - -% Subsection fonts (13.15pt). -\setfont\ssecrm\rmbshape{12}{\magstephalf} -\setfont\ssecit\itbshape{10}{1315} -\setfont\ssecsl\slbshape{10}{1315} -\setfont\ssectt\ttbshape{12}{\magstephalf} -\setfont\ssecttsl\ttslshape{10}{1315} -\setfont\ssecsf\sfbshape{12}{\magstephalf} -\let\ssecbf\ssecrm -\setfont\ssecsc\scbshape{10}{\magstep1} -\font\sseci=cmmi12 scaled \magstephalf -\font\ssecsy=cmsy10 scaled 1315 -% The smallcaps and symbol fonts should actually be scaled \magstep1.5, -% but that is not a standard magnification. - -% In order for the font changes to affect most math symbols and letters, -% we have to define the \textfont of the standard families. Since -% texinfo doesn't allow for producing subscripts and superscripts, we -% don't bother to reset \scriptfont and \scriptscriptfont (which would -% also require loading a lot more fonts). -% -\def\resetmathfonts{% - \textfont0 = \tenrm \textfont1 = \teni \textfont2 = \tensy - \textfont\itfam = \tenit \textfont\slfam = \tensl \textfont\bffam = \tenbf - \textfont\ttfam = \tentt \textfont\sffam = \tensf -} - - -% The font-changing commands redefine the meanings of \tenSTYLE, instead -% of just \STYLE. We do this so that font changes will continue to work -% in math mode, where it is the current \fam that is relevant in most -% cases, not the current font. Plain TeX does \def\bf{\fam=\bffam -% \tenbf}, for example. By redefining \tenbf, we obviate the need to -% redefine \bf itself. -\def\textfonts{% - \let\tenrm=\textrm \let\tenit=\textit \let\tensl=\textsl - \let\tenbf=\textbf \let\tentt=\texttt \let\smallcaps=\textsc - \let\tensf=\textsf \let\teni=\texti \let\tensy=\textsy \let\tenttsl=\textttsl - \resetmathfonts} -\def\titlefonts{% - \let\tenrm=\titlerm \let\tenit=\titleit \let\tensl=\titlesl - \let\tenbf=\titlebf \let\tentt=\titlett \let\smallcaps=\titlesc - \let\tensf=\titlesf \let\teni=\titlei \let\tensy=\titlesy - \let\tenttsl=\titlettsl - \resetmathfonts \setleading{25pt}} -\def\titlefont#1{{\titlefonts\rm #1}} -\def\chapfonts{% - \let\tenrm=\chaprm \let\tenit=\chapit \let\tensl=\chapsl - \let\tenbf=\chapbf \let\tentt=\chaptt \let\smallcaps=\chapsc - \let\tensf=\chapsf \let\teni=\chapi \let\tensy=\chapsy \let\tenttsl=\chapttsl - \resetmathfonts \setleading{19pt}} -\def\secfonts{% - \let\tenrm=\secrm \let\tenit=\secit \let\tensl=\secsl - \let\tenbf=\secbf \let\tentt=\sectt \let\smallcaps=\secsc - \let\tensf=\secsf \let\teni=\seci \let\tensy=\secsy \let\tenttsl=\secttsl - \resetmathfonts \setleading{16pt}} -\def\subsecfonts{% - \let\tenrm=\ssecrm \let\tenit=\ssecit \let\tensl=\ssecsl - \let\tenbf=\ssecbf \let\tentt=\ssectt \let\smallcaps=\ssecsc - \let\tensf=\ssecsf \let\teni=\sseci \let\tensy=\ssecsy \let\tenttsl=\ssecttsl - \resetmathfonts \setleading{15pt}} -\let\subsubsecfonts = \subsecfonts % Maybe make sssec fonts scaled magstephalf? -\def\smallfonts{% - \let\tenrm=\smallrm \let\tenit=\smallit \let\tensl=\smallsl - \let\tenbf=\smallbf \let\tentt=\smalltt \let\smallcaps=\smallsc - \let\tensf=\smallsf \let\teni=\smalli \let\tensy=\smallsy - \let\tenttsl=\smallttsl - \resetmathfonts \setleading{11pt}} - -% Set up the default fonts, so we can use them for creating boxes. -% -\textfonts - -% Define these so they can be easily changed for other fonts. -\def\angleleft{$\langle$} -\def\angleright{$\rangle$} - -% Count depth in font-changes, for error checks -\newcount\fontdepth \fontdepth=0 - -% Fonts for short table of contents. -\setfont\shortcontrm\rmshape{12}{1000} -\setfont\shortcontbf\bxshape{12}{1000} -\setfont\shortcontsl\slshape{12}{1000} - -%% Add scribe-like font environments, plus @l for inline lisp (usually sans -%% serif) and @ii for TeX italic - -% \smartitalic{ARG} outputs arg in italics, followed by an italic correction -% unless the following character is such as not to need one. -\def\smartitalicx{\ifx\next,\else\ifx\next-\else\ifx\next.\else\/\fi\fi\fi} -\def\smartslanted#1{{\sl #1}\futurelet\next\smartitalicx} -\def\smartitalic#1{{\it #1}\futurelet\next\smartitalicx} - -\let\i=\smartitalic -\let\var=\smartslanted -\let\dfn=\smartslanted -\let\emph=\smartitalic -\let\cite=\smartslanted - -\def\b#1{{\bf #1}} -\let\strong=\b - -% We can't just use \exhyphenpenalty, because that only has effect at -% the end of a paragraph. Restore normal hyphenation at the end of the -% group within which \nohyphenation is presumably called. -% -\def\nohyphenation{\hyphenchar\font = -1 \aftergroup\restorehyphenation} -\def\restorehyphenation{\hyphenchar\font = `- } - -\def\t#1{% - {\tt \rawbackslash \frenchspacing #1}% - \null -} -\let\ttfont=\t -\def\samp#1{`\tclose{#1}'\null} -\setfont\keyrm\rmshape{8}{1000} -\font\keysy=cmsy9 -\def\key#1{{\keyrm\textfont2=\keysy \leavevmode\hbox{% - \raise0.4pt\hbox{\angleleft}\kern-.08em\vtop{% - \vbox{\hrule\kern-0.4pt - \hbox{\raise0.4pt\hbox{\vphantom{\angleleft}}#1}}% - \kern-0.4pt\hrule}% - \kern-.06em\raise0.4pt\hbox{\angleright}}}} -% The old definition, with no lozenge: -%\def\key #1{{\ttsl \nohyphenation \uppercase{#1}}\null} -\def\ctrl #1{{\tt \rawbackslash \hat}#1} - -% @file, @option are the same as @samp. -\let\file=\samp -\let\option=\samp - -% @code is a modification of @t, -% which makes spaces the same size as normal in the surrounding text. -\def\tclose#1{% - {% - % Change normal interword space to be same as for the current font. - \spaceskip = \fontdimen2\font - % - % Switch to typewriter. - \tt - % - % But `\ ' produces the large typewriter interword space. - \def\ {{\spaceskip = 0pt{} }}% - % - % Turn off hyphenation. - \nohyphenation - % - \rawbackslash - \frenchspacing - #1% - }% - \null -} - -% We *must* turn on hyphenation at `-' and `_' in \code. -% Otherwise, it is too hard to avoid overfull hboxes -% in the Emacs manual, the Library manual, etc. - -% Unfortunately, TeX uses one parameter (\hyphenchar) to control -% both hyphenation at - and hyphenation within words. -% We must therefore turn them both off (\tclose does that) -% and arrange explicitly to hyphenate at a dash. -% -- rms. -{ - \catcode`\-=\active - \catcode`\_=\active - % - \global\def\code{\begingroup - \catcode`\-=\active \let-\codedash - \catcode`\_=\active \let_\codeunder - \codex - } - % - % If we end up with any active - characters when handling the index, - % just treat them as a normal -. - \global\def\indexbreaks{\catcode`\-=\active \let-\realdash} -} - -\def\realdash{-} -\def\codedash{-\discretionary{}{}{}} -\def\codeunder{\ifusingtt{\normalunderscore\discretionary{}{}{}}{\_}} -\def\codex #1{\tclose{#1}\endgroup} - -%\let\exp=\tclose %Was temporary - -% @kbd is like @code, except that if the argument is just one @key command, -% then @kbd has no effect. - -% @kbdinputstyle -- arg is `distinct' (@kbd uses slanted tty font always), -% `example' (@kbd uses ttsl only inside of @example and friends), -% or `code' (@kbd uses normal tty font always). -\def\kbdinputstyle{\parsearg\kbdinputstylexxx} -\def\kbdinputstylexxx#1{% - \def\arg{#1}% - \ifx\arg\worddistinct - \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\ttsl}% - \else\ifx\arg\wordexample - \gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\tt}% - \else\ifx\arg\wordcode - \gdef\kbdexamplefont{\tt}\gdef\kbdfont{\tt}% - \fi\fi\fi -} -\def\worddistinct{distinct} -\def\wordexample{example} -\def\wordcode{code} - -% Default is kbdinputdistinct. (Too much of a hassle to call the macro, -% the catcodes are wrong for parsearg to work.) -\gdef\kbdexamplefont{\ttsl}\gdef\kbdfont{\ttsl} - -\def\xkey{\key} -\def\kbdfoo#1#2#3\par{\def\one{#1}\def\three{#3}\def\threex{??}% -\ifx\one\xkey\ifx\threex\three \key{#2}% -\else{\tclose{\kbdfont\look}}\fi -\else{\tclose{\kbdfont\look}}\fi} - -% For @url, @env, @command quotes seem unnecessary, so use \code. -\let\url=\code -\let\env=\code -\let\command=\code - -% @uref (abbreviation for `urlref') takes an optional (comma-separated) -% second argument specifying the text to display and an optional third -% arg as text to display instead of (rather than in addition to) the url -% itself. First (mandatory) arg is the url. Perhaps eventually put in -% a hypertex \special here. -% -\def\uref#1{\douref #1,,,\finish} -\def\douref#1,#2,#3,#4\finish{\begingroup - \unsepspaces - \pdfurl{#1}% - \setbox0 = \hbox{\ignorespaces #3}% - \ifdim\wd0 > 0pt - \unhbox0 % third arg given, show only that - \else - \setbox0 = \hbox{\ignorespaces #2}% - \ifdim\wd0 > 0pt - \ifpdf - \unhbox0 % PDF: 2nd arg given, show only it - \else - \unhbox0\ (\code{#1})% DVI: 2nd arg given, show both it and url - \fi - \else - \code{#1}% only url given, so show it - \fi - \fi - \endlink -\endgroup} - -% rms does not like angle brackets --karl, 17may97. -% So now @email is just like @uref, unless we are pdf. -% -%\def\email#1{\angleleft{\tt #1}\angleright} -\ifpdf - \def\email#1{\doemail#1,,\finish} - \def\doemail#1,#2,#3\finish{\begingroup - \unsepspaces - \pdfurl{mailto:#1}% - \setbox0 = \hbox{\ignorespaces #2}% - \ifdim\wd0>0pt\unhbox0\else\code{#1}\fi - \endlink - \endgroup} -\else - \let\email=\uref -\fi - -% Check if we are currently using a typewriter font. Since all the -% Computer Modern typewriter fonts have zero interword stretch (and -% shrink), and it is reasonable to expect all typewriter fonts to have -% this property, we can check that font parameter. -% -\def\ifmonospace{\ifdim\fontdimen3\font=0pt } - -% Typeset a dimension, e.g., `in' or `pt'. The only reason for the -% argument is to make the input look right: @dmn{pt} instead of @dmn{}pt. -% -\def\dmn#1{\thinspace #1} - -\def\kbd#1{\def\look{#1}\expandafter\kbdfoo\look??\par} - -% @l was never documented to mean ``switch to the Lisp font'', -% and it is not used as such in any manual I can find. We need it for -% Polish suppressed-l. --karl, 22sep96. -%\def\l#1{{\li #1}\null} - -% Explicit font changes: @r, @sc, undocumented @ii. -\def\r#1{{\rm #1}} % roman font -\def\sc#1{{\smallcaps#1}} % smallcaps font -\def\ii#1{{\it #1}} % italic font - -% @acronym downcases the argument and prints in smallcaps. -\def\acronym#1{{\smallcaps \lowercase{#1}}} - -% @pounds{} is a sterling sign. -\def\pounds{{\it\$}} - - -\message{page headings,} - -\newskip\titlepagetopglue \titlepagetopglue = 1.5in -\newskip\titlepagebottomglue \titlepagebottomglue = 2pc - -% First the title page. Must do @settitle before @titlepage. -\newif\ifseenauthor -\newif\iffinishedtitlepage - -% Do an implicit @contents or @shortcontents after @end titlepage if the -% user says @setcontentsaftertitlepage or @setshortcontentsaftertitlepage. -% -\newif\ifsetcontentsaftertitlepage - \let\setcontentsaftertitlepage = \setcontentsaftertitlepagetrue -\newif\ifsetshortcontentsaftertitlepage - \let\setshortcontentsaftertitlepage = \setshortcontentsaftertitlepagetrue - -\def\shorttitlepage{\parsearg\shorttitlepagezzz} -\def\shorttitlepagezzz #1{\begingroup\hbox{}\vskip 1.5in \chaprm \centerline{#1}% - \endgroup\page\hbox{}\page} - -\def\titlepage{\begingroup \parindent=0pt \textfonts - \let\subtitlerm=\tenrm - \def\subtitlefont{\subtitlerm \normalbaselineskip = 13pt \normalbaselines}% - % - \def\authorfont{\authorrm \normalbaselineskip = 16pt \normalbaselines}% - % - % Leave some space at the very top of the page. - \vglue\titlepagetopglue - % - % Now you can print the title using @title. - \def\title{\parsearg\titlezzz}% - \def\titlezzz##1{\leftline{\titlefonts\rm ##1} - % print a rule at the page bottom also. - \finishedtitlepagefalse - \vskip4pt \hrule height 4pt width \hsize \vskip4pt}% - % No rule at page bottom unless we print one at the top with @title. - \finishedtitlepagetrue - % - % Now you can put text using @subtitle. - \def\subtitle{\parsearg\subtitlezzz}% - \def\subtitlezzz##1{{\subtitlefont \rightline{##1}}}% - % - % @author should come last, but may come many times. - \def\author{\parsearg\authorzzz}% - \def\authorzzz##1{\ifseenauthor\else\vskip 0pt plus 1filll\seenauthortrue\fi - {\authorfont \leftline{##1}}}% - % - % Most title ``pages'' are actually two pages long, with space - % at the top of the second. We don't want the ragged left on the second. - \let\oldpage = \page - \def\page{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - \oldpage - \let\page = \oldpage - \hbox{}}% -% \def\page{\oldpage \hbox{}} -} - -\def\Etitlepage{% - \iffinishedtitlepage\else - \finishtitlepage - \fi - % It is important to do the page break before ending the group, - % because the headline and footline are only empty inside the group. - % If we use the new definition of \page, we always get a blank page - % after the title page, which we certainly don't want. - \oldpage - \endgroup - % - % If they want short, they certainly want long too. - \ifsetshortcontentsaftertitlepage - \shortcontents - \contents - \global\let\shortcontents = \relax - \global\let\contents = \relax - \fi - % - \ifsetcontentsaftertitlepage - \contents - \global\let\contents = \relax - \global\let\shortcontents = \relax - \fi - % - \ifpdf \pdfmakepagedesttrue \fi - % - \HEADINGSon -} - -\def\finishtitlepage{% - \vskip4pt \hrule height 2pt width \hsize - \vskip\titlepagebottomglue - \finishedtitlepagetrue -} - -%%% Set up page headings and footings. - -\let\thispage=\folio - -\newtoks\evenheadline % headline on even pages -\newtoks\oddheadline % headline on odd pages -\newtoks\evenfootline % footline on even pages -\newtoks\oddfootline % footline on odd pages - -% Now make Tex use those variables -\headline={{\textfonts\rm \ifodd\pageno \the\oddheadline - \else \the\evenheadline \fi}} -\footline={{\textfonts\rm \ifodd\pageno \the\oddfootline - \else \the\evenfootline \fi}\HEADINGShook} -\let\HEADINGShook=\relax - -% Commands to set those variables. -% For example, this is what @headings on does -% @evenheading @thistitle|@thispage|@thischapter -% @oddheading @thischapter|@thispage|@thistitle -% @evenfooting @thisfile|| -% @oddfooting ||@thisfile - -\def\evenheading{\parsearg\evenheadingxxx} -\def\oddheading{\parsearg\oddheadingxxx} -\def\everyheading{\parsearg\everyheadingxxx} - -\def\evenfooting{\parsearg\evenfootingxxx} -\def\oddfooting{\parsearg\oddfootingxxx} -\def\everyfooting{\parsearg\everyfootingxxx} - -{\catcode`\@=0 % - -\gdef\evenheadingxxx #1{\evenheadingyyy #1@|@|@|@|\finish} -\gdef\evenheadingyyy #1@|#2@|#3@|#4\finish{% -\global\evenheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddheadingxxx #1{\oddheadingyyy #1@|@|@|@|\finish} -\gdef\oddheadingyyy #1@|#2@|#3@|#4\finish{% -\global\oddheadline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\everyheadingxxx#1{\oddheadingxxx{#1}\evenheadingxxx{#1}}% - -\gdef\evenfootingxxx #1{\evenfootingyyy #1@|@|@|@|\finish} -\gdef\evenfootingyyy #1@|#2@|#3@|#4\finish{% -\global\evenfootline={\rlap{\centerline{#2}}\line{#1\hfil#3}}} - -\gdef\oddfootingxxx #1{\oddfootingyyy #1@|@|@|@|\finish} -\gdef\oddfootingyyy #1@|#2@|#3@|#4\finish{% - \global\oddfootline = {\rlap{\centerline{#2}}\line{#1\hfil#3}}% - % - % Leave some space for the footline. Hopefully ok to assume - % @evenfooting will not be used by itself. - \global\advance\pageheight by -\baselineskip - \global\advance\vsize by -\baselineskip -} - -\gdef\everyfootingxxx#1{\oddfootingxxx{#1}\evenfootingxxx{#1}} -% -}% unbind the catcode of @. - -% @headings double turns headings on for double-sided printing. -% @headings single turns headings on for single-sided printing. -% @headings off turns them off. -% @headings on same as @headings double, retained for compatibility. -% @headings after turns on double-sided headings after this page. -% @headings doubleafter turns on double-sided headings after this page. -% @headings singleafter turns on single-sided headings after this page. -% By default, they are off at the start of a document, -% and turned `on' after @end titlepage. - -\def\headings #1 {\csname HEADINGS#1\endcsname} - -\def\HEADINGSoff{ -\global\evenheadline={\hfil} \global\evenfootline={\hfil} -\global\oddheadline={\hfil} \global\oddfootline={\hfil}} -\HEADINGSoff -% When we turn headings on, set the page number to 1. -% For double-sided printing, put current file name in lower left corner, -% chapter name on inside top of right hand pages, document -% title on inside top of left hand pages, and page numbers on outside top -% edge of all pages. -\def\HEADINGSdouble{ -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -\global\let\contentsalignmacro = \chapoddpage -} -\let\contentsalignmacro = \chappager - -% For single-sided printing, chapter title goes across top left of page, -% page number on top right. -\def\HEADINGSsingle{ -\global\pageno=1 -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -\global\let\contentsalignmacro = \chappager -} -\def\HEADINGSon{\HEADINGSdouble} - -\def\HEADINGSafter{\let\HEADINGShook=\HEADINGSdoublex} -\let\HEADINGSdoubleafter=\HEADINGSafter -\def\HEADINGSdoublex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\folio\hfil\thistitle}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -\global\let\contentsalignmacro = \chapoddpage -} - -\def\HEADINGSsingleafter{\let\HEADINGShook=\HEADINGSsinglex} -\def\HEADINGSsinglex{% -\global\evenfootline={\hfil} -\global\oddfootline={\hfil} -\global\evenheadline={\line{\thischapter\hfil\folio}} -\global\oddheadline={\line{\thischapter\hfil\folio}} -\global\let\contentsalignmacro = \chappager -} - -% Subroutines used in generating headings -% This produces Day Month Year style of output. -% Only define if not already defined, in case a txi-??.tex file has set -% up a different format (e.g., txi-cs.tex does this). -\ifx\today\undefined -\def\today{% - \number\day\space - \ifcase\month - \or\putwordMJan\or\putwordMFeb\or\putwordMMar\or\putwordMApr - \or\putwordMMay\or\putwordMJun\or\putwordMJul\or\putwordMAug - \or\putwordMSep\or\putwordMOct\or\putwordMNov\or\putwordMDec - \fi - \space\number\year} -\fi - -% @settitle line... specifies the title of the document, for headings. -% It generates no output of its own. -\def\thistitle{\putwordNoTitle} -\def\settitle{\parsearg\settitlezzz} -\def\settitlezzz #1{\gdef\thistitle{#1}} - - -\message{tables,} -% Tables -- @table, @ftable, @vtable, @item(x), @kitem(x), @xitem(x). - -% default indentation of table text -\newdimen\tableindent \tableindent=.8in -% default indentation of @itemize and @enumerate text -\newdimen\itemindent \itemindent=.3in -% margin between end of table item and start of table text. -\newdimen\itemmargin \itemmargin=.1in - -% used internally for \itemindent minus \itemmargin -\newdimen\itemmax - -% Note @table, @vtable, and @vtable define @item, @itemx, etc., with -% these defs. -% They also define \itemindex -% to index the item name in whatever manner is desired (perhaps none). - -\newif\ifitemxneedsnegativevskip - -\def\itemxpar{\par\ifitemxneedsnegativevskip\nobreak\vskip-\parskip\nobreak\fi} - -\def\internalBitem{\smallbreak \parsearg\itemzzz} -\def\internalBitemx{\itemxpar \parsearg\itemzzz} - -\def\internalBxitem "#1"{\def\xitemsubtopix{#1} \smallbreak \parsearg\xitemzzz} -\def\internalBxitemx "#1"{\def\xitemsubtopix{#1} \itemxpar \parsearg\xitemzzz} - -\def\internalBkitem{\smallbreak \parsearg\kitemzzz} -\def\internalBkitemx{\itemxpar \parsearg\kitemzzz} - -\def\kitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \lastfunction}}% - \itemzzz {#1}} - -\def\xitemzzz #1{\dosubind {kw}{\code{#1}}{for {\bf \xitemsubtopic}}% - \itemzzz {#1}} - -\def\itemzzz #1{\begingroup % - \advance\hsize by -\rightskip - \advance\hsize by -\tableindent - \setbox0=\hbox{\itemfont{#1}}% - \itemindex{#1}% - \nobreak % This prevents a break before @itemx. - % - % If the item text does not fit in the space we have, put it on a line - % by itself, and do not allow a page break either before or after that - % line. We do not start a paragraph here because then if the next - % command is, e.g., @kindex, the whatsit would get put into the - % horizontal list on a line by itself, resulting in extra blank space. - \ifdim \wd0>\itemmax - % - % Make this a paragraph so we get the \parskip glue and wrapping, - % but leave it ragged-right. - \begingroup - \advance\leftskip by-\tableindent - \advance\hsize by\tableindent - \advance\rightskip by0pt plus1fil - \leavevmode\unhbox0\par - \endgroup - % - % We're going to be starting a paragraph, but we don't want the - % \parskip glue -- logically it's part of the @item we just started. - \nobreak \vskip-\parskip - % - % Stop a page break at the \parskip glue coming up. Unfortunately - % we can't prevent a possible page break at the following - % \baselineskip glue. - \nobreak - \endgroup - \itemxneedsnegativevskipfalse - \else - % The item text fits into the space. Start a paragraph, so that the - % following text (if any) will end up on the same line. - \noindent - % Do this with kerns and \unhbox so that if there is a footnote in - % the item text, it can migrate to the main vertical list and - % eventually be printed. - \nobreak\kern-\tableindent - \dimen0 = \itemmax \advance\dimen0 by \itemmargin \advance\dimen0 by -\wd0 - \unhbox0 - \nobreak\kern\dimen0 - \endgroup - \itemxneedsnegativevskiptrue - \fi -} - -\def\item{\errmessage{@item while not in a table}} -\def\itemx{\errmessage{@itemx while not in a table}} -\def\kitem{\errmessage{@kitem while not in a table}} -\def\kitemx{\errmessage{@kitemx while not in a table}} -\def\xitem{\errmessage{@xitem while not in a table}} -\def\xitemx{\errmessage{@xitemx while not in a table}} - -% Contains a kludge to get @end[description] to work. -\def\description{\tablez{\dontindex}{1}{}{}{}{}} - -% @table, @ftable, @vtable. -\def\table{\begingroup\inENV\obeylines\obeyspaces\tablex} -{\obeylines\obeyspaces% -\gdef\tablex #1^^M{% -\tabley\dontindex#1 \endtabley}} - -\def\ftable{\begingroup\inENV\obeylines\obeyspaces\ftablex} -{\obeylines\obeyspaces% -\gdef\ftablex #1^^M{% -\tabley\fnitemindex#1 \endtabley -\def\Eftable{\endgraf\afterenvbreak\endgroup}% -\let\Etable=\relax}} - -\def\vtable{\begingroup\inENV\obeylines\obeyspaces\vtablex} -{\obeylines\obeyspaces% -\gdef\vtablex #1^^M{% -\tabley\vritemindex#1 \endtabley -\def\Evtable{\endgraf\afterenvbreak\endgroup}% -\let\Etable=\relax}} - -\def\dontindex #1{} -\def\fnitemindex #1{\doind {fn}{\code{#1}}}% -\def\vritemindex #1{\doind {vr}{\code{#1}}}% - -{\obeyspaces % -\gdef\tabley#1#2 #3 #4 #5 #6 #7\endtabley{\endgroup% -\tablez{#1}{#2}{#3}{#4}{#5}{#6}}} - -\def\tablez #1#2#3#4#5#6{% -\aboveenvbreak % -\begingroup % -\def\Edescription{\Etable}% Necessary kludge. -\let\itemindex=#1% -\ifnum 0#3>0 \advance \leftskip by #3\mil \fi % -\ifnum 0#4>0 \tableindent=#4\mil \fi % -\ifnum 0#5>0 \advance \rightskip by #5\mil \fi % -\def\itemfont{#2}% -\itemmax=\tableindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \tableindent % -\exdentamount=\tableindent -\parindent = 0pt -\parskip = \smallskipamount -\ifdim \parskip=0pt \parskip=2pt \fi% -\def\Etable{\endgraf\afterenvbreak\endgroup}% -\let\item = \internalBitem % -\let\itemx = \internalBitemx % -\let\kitem = \internalBkitem % -\let\kitemx = \internalBkitemx % -\let\xitem = \internalBxitem % -\let\xitemx = \internalBxitemx % -} - -% This is the counter used by @enumerate, which is really @itemize - -\newcount \itemno - -\def\itemize{\parsearg\itemizezzz} - -\def\itemizezzz #1{% - \begingroup % ended by the @end itemize - \itemizey {#1}{\Eitemize} -} - -\def\itemizey #1#2{% -\aboveenvbreak % -\itemmax=\itemindent % -\advance \itemmax by -\itemmargin % -\advance \leftskip by \itemindent % -\exdentamount=\itemindent -\parindent = 0pt % -\parskip = \smallskipamount % -\ifdim \parskip=0pt \parskip=2pt \fi% -\def#2{\endgraf\afterenvbreak\endgroup}% -\def\itemcontents{#1}% -\let\item=\itemizeitem} - -% Set sfcode to normal for the chars that usually have another value. -% These are `.?!:;,' -\def\frenchspacing{\sfcode46=1000 \sfcode63=1000 \sfcode33=1000 - \sfcode58=1000 \sfcode59=1000 \sfcode44=1000 } - -% \splitoff TOKENS\endmark defines \first to be the first token in -% TOKENS, and \rest to be the remainder. -% -\def\splitoff#1#2\endmark{\def\first{#1}\def\rest{#2}}% - -% Allow an optional argument of an uppercase letter, lowercase letter, -% or number, to specify the first label in the enumerated list. No -% argument is the same as `1'. -% -\def\enumerate{\parsearg\enumeratezzz} -\def\enumeratezzz #1{\enumeratey #1 \endenumeratey} -\def\enumeratey #1 #2\endenumeratey{% - \begingroup % ended by the @end enumerate - % - % If we were given no argument, pretend we were given `1'. - \def\thearg{#1}% - \ifx\thearg\empty \def\thearg{1}\fi - % - % Detect if the argument is a single token. If so, it might be a - % letter. Otherwise, the only valid thing it can be is a number. - % (We will always have one token, because of the test we just made. - % This is a good thing, since \splitoff doesn't work given nothing at - % all -- the first parameter is undelimited.) - \expandafter\splitoff\thearg\endmark - \ifx\rest\empty - % Only one token in the argument. It could still be anything. - % A ``lowercase letter'' is one whose \lccode is nonzero. - % An ``uppercase letter'' is one whose \lccode is both nonzero, and - % not equal to itself. - % Otherwise, we assume it's a number. - % - % We need the \relax at the end of the \ifnum lines to stop TeX from - % continuing to look for a . - % - \ifnum\lccode\expandafter`\thearg=0\relax - \numericenumerate % a number (we hope) - \else - % It's a letter. - \ifnum\lccode\expandafter`\thearg=\expandafter`\thearg\relax - \lowercaseenumerate % lowercase letter - \else - \uppercaseenumerate % uppercase letter - \fi - \fi - \else - % Multiple tokens in the argument. We hope it's a number. - \numericenumerate - \fi -} - -% An @enumerate whose labels are integers. The starting integer is -% given in \thearg. -% -\def\numericenumerate{% - \itemno = \thearg - \startenumeration{\the\itemno}% -} - -% The starting (lowercase) letter is in \thearg. -\def\lowercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more lowercase letters in @enumerate; get a bigger - alphabet}% - \fi - \char\lccode\itemno - }% -} - -% The starting (uppercase) letter is in \thearg. -\def\uppercaseenumerate{% - \itemno = \expandafter`\thearg - \startenumeration{% - % Be sure we're not beyond the end of the alphabet. - \ifnum\itemno=0 - \errmessage{No more uppercase letters in @enumerate; get a bigger - alphabet} - \fi - \char\uccode\itemno - }% -} - -% Call itemizey, adding a period to the first argument and supplying the -% common last two arguments. Also subtract one from the initial value in -% \itemno, since @item increments \itemno. -% -\def\startenumeration#1{% - \advance\itemno by -1 - \itemizey{#1.}\Eenumerate\flushcr -} - -% @alphaenumerate and @capsenumerate are abbreviations for giving an arg -% to @enumerate. -% -\def\alphaenumerate{\enumerate{a}} -\def\capsenumerate{\enumerate{A}} -\def\Ealphaenumerate{\Eenumerate} -\def\Ecapsenumerate{\Eenumerate} - -% Definition of @item while inside @itemize. - -\def\itemizeitem{% -\advance\itemno by 1 -{\let\par=\endgraf \smallbreak}% -\ifhmode \errmessage{In hmode at itemizeitem}\fi -{\parskip=0in \hskip 0pt -\hbox to 0pt{\hss \itemcontents\hskip \itemmargin}% -\vadjust{\penalty 1200}}% -\flushcr} - -% @multitable macros -% Amy Hendrickson, 8/18/94, 3/6/96 -% -% @multitable ... @end multitable will make as many columns as desired. -% Contents of each column will wrap at width given in preamble. Width -% can be specified either with sample text given in a template line, -% or in percent of \hsize, the current width of text on page. - -% Table can continue over pages but will only break between lines. - -% To make preamble: -% -% Either define widths of columns in terms of percent of \hsize: -% @multitable @columnfractions .25 .3 .45 -% @item ... -% -% Numbers following @columnfractions are the percent of the total -% current hsize to be used for each column. You may use as many -% columns as desired. - - -% Or use a template: -% @multitable {Column 1 template} {Column 2 template} {Column 3 template} -% @item ... -% using the widest term desired in each column. -% -% For those who want to use more than one line's worth of words in -% the preamble, break the line within one argument and it -% will parse correctly, i.e., -% -% @multitable {Column 1 template} {Column 2 template} {Column 3 -% template} -% Not: -% @multitable {Column 1 template} {Column 2 template} -% {Column 3 template} - -% Each new table line starts with @item, each subsequent new column -% starts with @tab. Empty columns may be produced by supplying @tab's -% with nothing between them for as many times as empty columns are needed, -% ie, @tab@tab@tab will produce two empty columns. - -% @item, @tab, @multitable or @end multitable do not need to be on their -% own lines, but it will not hurt if they are. - -% Sample multitable: - -% @multitable {Column 1 template} {Column 2 template} {Column 3 template} -% @item first col stuff @tab second col stuff @tab third col -% @item -% first col stuff -% @tab -% second col stuff -% @tab -% third col -% @item first col stuff @tab second col stuff -% @tab Many paragraphs of text may be used in any column. -% -% They will wrap at the width determined by the template. -% @item@tab@tab This will be in third column. -% @end multitable - -% Default dimensions may be reset by user. -% @multitableparskip is vertical space between paragraphs in table. -% @multitableparindent is paragraph indent in table. -% @multitablecolmargin is horizontal space to be left between columns. -% @multitablelinespace is space to leave between table items, baseline -% to baseline. -% 0pt means it depends on current normal line spacing. -% -\newskip\multitableparskip -\newskip\multitableparindent -\newdimen\multitablecolspace -\newskip\multitablelinespace -\multitableparskip=0pt -\multitableparindent=6pt -\multitablecolspace=12pt -\multitablelinespace=0pt - -% Macros used to set up halign preamble: -% -\let\endsetuptable\relax -\def\xendsetuptable{\endsetuptable} -\let\columnfractions\relax -\def\xcolumnfractions{\columnfractions} -\newif\ifsetpercent - -% #1 is the part of the @columnfraction before the decimal point, which -% is presumably either 0 or the empty string (but we don't check, we -% just throw it away). #2 is the decimal part, which we use as the -% percent of \hsize for this column. -\def\pickupwholefraction#1.#2 {% - \global\advance\colcount by 1 - \expandafter\xdef\csname col\the\colcount\endcsname{.#2\hsize}% - \setuptable -} - -\newcount\colcount -\def\setuptable#1{% - \def\firstarg{#1}% - \ifx\firstarg\xendsetuptable - \let\go = \relax - \else - \ifx\firstarg\xcolumnfractions - \global\setpercenttrue - \else - \ifsetpercent - \let\go\pickupwholefraction - \else - \global\advance\colcount by 1 - \setbox0=\hbox{#1\unskip }% Add a normal word space as a separator; - % typically that is always in the input, anyway. - \expandafter\xdef\csname col\the\colcount\endcsname{\the\wd0}% - \fi - \fi - \ifx\go\pickupwholefraction - % Put the argument back for the \pickupwholefraction call, so - % we'll always have a period there to be parsed. - \def\go{\pickupwholefraction#1}% - \else - \let\go = \setuptable - \fi% - \fi - \go -} - -% This used to have \hskip1sp. But then the space in a template line is -% not enough. That is bad. So let's go back to just & until we -% encounter the problem it was intended to solve again. -% --karl, nathan@acm.org, 20apr99. -\def\tab{&} - -% @multitable ... @end multitable definitions: -% -\def\multitable{\parsearg\dotable} -\def\dotable#1{\bgroup - \vskip\parskip - \let\item\crcr - \tolerance=9500 - \hbadness=9500 - \setmultitablespacing - \parskip=\multitableparskip - \parindent=\multitableparindent - \overfullrule=0pt - \global\colcount=0 - \def\Emultitable{\global\setpercentfalse\cr\egroup\egroup}% - % - % To parse everything between @multitable and @item: - \setuptable#1 \endsetuptable - % - % \everycr will reset column counter, \colcount, at the end of - % each line. Every column entry will cause \colcount to advance by one. - % The table preamble - % looks at the current \colcount to find the correct column width. - \everycr{\noalign{% - % - % \filbreak%% keeps underfull box messages off when table breaks over pages. - % Maybe so, but it also creates really weird page breaks when the table - % breaks over pages. Wouldn't \vfil be better? Wait until the problem - % manifests itself, so it can be fixed for real --karl. - \global\colcount=0\relax}}% - % - % This preamble sets up a generic column definition, which will - % be used as many times as user calls for columns. - % \vtop will set a single line and will also let text wrap and - % continue for many paragraphs if desired. - \halign\bgroup&\global\advance\colcount by 1\relax - \multistrut\vtop{\hsize=\expandafter\csname col\the\colcount\endcsname - % - % In order to keep entries from bumping into each other - % we will add a \leftskip of \multitablecolspace to all columns after - % the first one. - % - % If a template has been used, we will add \multitablecolspace - % to the width of each template entry. - % - % If the user has set preamble in terms of percent of \hsize we will - % use that dimension as the width of the column, and the \leftskip - % will keep entries from bumping into each other. Table will start at - % left margin and final column will justify at right margin. - % - % Make sure we don't inherit \rightskip from the outer environment. - \rightskip=0pt - \ifnum\colcount=1 - % The first column will be indented with the surrounding text. - \advance\hsize by\leftskip - \else - \ifsetpercent \else - % If user has not set preamble in terms of percent of \hsize - % we will advance \hsize by \multitablecolspace. - \advance\hsize by \multitablecolspace - \fi - % In either case we will make \leftskip=\multitablecolspace: - \leftskip=\multitablecolspace - \fi - % Ignoring space at the beginning and end avoids an occasional spurious - % blank line, when TeX decides to break the line at the space before the - % box from the multistrut, so the strut ends up on a line by itself. - % For example: - % @multitable @columnfractions .11 .89 - % @item @code{#} - % @tab Legal holiday which is valid in major parts of the whole country. - % Is automatically provided with highlighting sequences respectively marking - % characters. - \noindent\ignorespaces##\unskip\multistrut}\cr -} - -\def\setmultitablespacing{% test to see if user has set \multitablelinespace. -% If so, do nothing. If not, give it an appropriate dimension based on -% current baselineskip. -\ifdim\multitablelinespace=0pt -\setbox0=\vbox{X}\global\multitablelinespace=\the\baselineskip -\global\advance\multitablelinespace by-\ht0 -%% strut to put in table in case some entry doesn't have descenders, -%% to keep lines equally spaced -\let\multistrut = \strut -\else -%% FIXME: what is \box0 supposed to be? -\gdef\multistrut{\vrule height\multitablelinespace depth\dp0 -width0pt\relax} \fi -%% Test to see if parskip is larger than space between lines of -%% table. If not, do nothing. -%% If so, set to same dimension as multitablelinespace. -\ifdim\multitableparskip>\multitablelinespace -\global\multitableparskip=\multitablelinespace -\global\advance\multitableparskip-7pt %% to keep parskip somewhat smaller - %% than skip between lines in the table. -\fi% -\ifdim\multitableparskip=0pt -\global\multitableparskip=\multitablelinespace -\global\advance\multitableparskip-7pt %% to keep parskip somewhat smaller - %% than skip between lines in the table. -\fi} - - -\message{conditionals,} -% Prevent errors for section commands. -% Used in @ignore and in failing conditionals. -\def\ignoresections{% - \let\chapter=\relax - \let\unnumbered=\relax - \let\top=\relax - \let\unnumberedsec=\relax - \let\unnumberedsection=\relax - \let\unnumberedsubsec=\relax - \let\unnumberedsubsection=\relax - \let\unnumberedsubsubsec=\relax - \let\unnumberedsubsubsection=\relax - \let\section=\relax - \let\subsec=\relax - \let\subsubsec=\relax - \let\subsection=\relax - \let\subsubsection=\relax - \let\appendix=\relax - \let\appendixsec=\relax - \let\appendixsection=\relax - \let\appendixsubsec=\relax - \let\appendixsubsection=\relax - \let\appendixsubsubsec=\relax - \let\appendixsubsubsection=\relax - \let\contents=\relax - \let\smallbook=\relax - \let\titlepage=\relax -} - -% Used in nested conditionals, where we have to parse the Texinfo source -% and so want to turn off most commands, in case they are used -% incorrectly. -% -\def\ignoremorecommands{% - \let\defcodeindex = \relax - \let\defcv = \relax - \let\deffn = \relax - \let\deffnx = \relax - \let\defindex = \relax - \let\defivar = \relax - \let\defmac = \relax - \let\defmethod = \relax - \let\defop = \relax - \let\defopt = \relax - \let\defspec = \relax - \let\deftp = \relax - \let\deftypefn = \relax - \let\deftypefun = \relax - \let\deftypeivar = \relax - \let\deftypeop = \relax - \let\deftypevar = \relax - \let\deftypevr = \relax - \let\defun = \relax - \let\defvar = \relax - \let\defvr = \relax - \let\ref = \relax - \let\xref = \relax - \let\printindex = \relax - \let\pxref = \relax - \let\settitle = \relax - \let\setchapternewpage = \relax - \let\setchapterstyle = \relax - \let\everyheading = \relax - \let\evenheading = \relax - \let\oddheading = \relax - \let\everyfooting = \relax - \let\evenfooting = \relax - \let\oddfooting = \relax - \let\headings = \relax - \let\include = \relax - \let\lowersections = \relax - \let\down = \relax - \let\raisesections = \relax - \let\up = \relax - \let\set = \relax - \let\clear = \relax - \let\item = \relax -} - -% Ignore @ignore ... @end ignore. -% -\def\ignore{\doignore{ignore}} - -% Ignore @ifinfo, @ifhtml, @ifnottex, @html, @menu, and @direntry text. -% -\def\ifinfo{\doignore{ifinfo}} -\def\ifhtml{\doignore{ifhtml}} -\def\ifnottex{\doignore{ifnottex}} -\def\html{\doignore{html}} -\def\menu{\doignore{menu}} -\def\direntry{\doignore{direntry}} - -% @dircategory CATEGORY -- specify a category of the dir file -% which this file should belong to. Ignore this in TeX. -\let\dircategory = \comment - -% Ignore text until a line `@end #1'. -% -\def\doignore#1{\begingroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define a command to swallow text until we reach `@end #1'. - % This @ is a catcode 12 token (that is the normal catcode of @ in - % this texinfo.tex file). We change the catcode of @ below to match. - \long\def\doignoretext##1@end #1{\enddoignore}% - % - % Make sure that spaces turn into tokens that match what \doignoretext wants. - \catcode32 = 10 - % - % Ignore braces, too, so mismatched braces don't cause trouble. - \catcode`\{ = 9 - \catcode`\} = 9 - % - % We must not have @c interpreted as a control sequence. - \catcode`\@ = 12 - % - % Make the letter c a comment character so that the rest of the line - % will be ignored. This way, the document can have (for example) - % @c @end ifinfo - % and the @end ifinfo will be properly ignored. - % (We've just changed @ to catcode 12.) - \catcode`\c = 14 - % - % And now expand that command. - \doignoretext -} - -% What we do to finish off ignored text. -% -\def\enddoignore{\endgroup\ignorespaces}% - -\newif\ifwarnedobs\warnedobsfalse -\def\obstexwarn{% - \ifwarnedobs\relax\else - % We need to warn folks that they may have trouble with TeX 3.0. - % This uses \immediate\write16 rather than \message to get newlines. - \immediate\write16{} - \immediate\write16{WARNING: for users of Unix TeX 3.0!} - \immediate\write16{This manual trips a bug in TeX version 3.0 (tex hangs).} - \immediate\write16{If you are running another version of TeX, relax.} - \immediate\write16{If you are running Unix TeX 3.0, kill this TeX process.} - \immediate\write16{ Then upgrade your TeX installation if you can.} - \immediate\write16{ (See ftp://ftp.gnu.org/pub/gnu/TeX.README.)} - \immediate\write16{If you are stuck with version 3.0, run the} - \immediate\write16{ script ``tex3patch'' from the Texinfo distribution} - \immediate\write16{ to use a workaround.} - \immediate\write16{} - \global\warnedobstrue - \fi -} - -% **In TeX 3.0, setting text in \nullfont hangs tex. For a -% workaround (which requires the file ``dummy.tfm'' to be installed), -% uncomment the following line: -%%%%%\font\nullfont=dummy\let\obstexwarn=\relax - -% Ignore text, except that we keep track of conditional commands for -% purposes of nesting, up to an `@end #1' command. -% -\def\nestedignore#1{% - \obstexwarn - % We must actually expand the ignored text to look for the @end - % command, so that nested ignore constructs work. Thus, we put the - % text into a \vbox and then do nothing with the result. To minimize - % the change of memory overflow, we follow the approach outlined on - % page 401 of the TeXbook: make the current font be a dummy font. - % - \setbox0 = \vbox\bgroup - % Don't complain about control sequences we have declared \outer. - \ignoresections - % - % Define `@end #1' to end the box, which will in turn undefine the - % @end command again. - \expandafter\def\csname E#1\endcsname{\egroup\ignorespaces}% - % - % We are going to be parsing Texinfo commands. Most cause no - % trouble when they are used incorrectly, but some commands do - % complicated argument parsing or otherwise get confused, so we - % undefine them. - % - % We can't do anything about stray @-signs, unfortunately; - % they'll produce `undefined control sequence' errors. - \ignoremorecommands - % - % Set the current font to be \nullfont, a TeX primitive, and define - % all the font commands to also use \nullfont. We don't use - % dummy.tfm, as suggested in the TeXbook, because not all sites - % might have that installed. Therefore, math mode will still - % produce output, but that should be an extremely small amount of - % stuff compared to the main input. - % - \nullfont - \let\tenrm=\nullfont \let\tenit=\nullfont \let\tensl=\nullfont - \let\tenbf=\nullfont \let\tentt=\nullfont \let\smallcaps=\nullfont - \let\tensf=\nullfont - % Similarly for index fonts (mostly for their use in smallexample). - \let\smallrm=\nullfont \let\smallit=\nullfont \let\smallsl=\nullfont - \let\smallbf=\nullfont \let\smalltt=\nullfont \let\smallsc=\nullfont - \let\smallsf=\nullfont - % - % Don't complain when characters are missing from the fonts. - \tracinglostchars = 0 - % - % Don't bother to do space factor calculations. - \frenchspacing - % - % Don't report underfull hboxes. - \hbadness = 10000 - % - % Do minimal line-breaking. - \pretolerance = 10000 - % - % Do not execute instructions in @tex - \def\tex{\doignore{tex}}% - % Do not execute macro definitions. - % `c' is a comment character, so the word `macro' will get cut off. - \def\macro{\doignore{ma}}% -} - -% @set VAR sets the variable VAR to an empty value. -% @set VAR REST-OF-LINE sets VAR to the value REST-OF-LINE. -% -% Since we want to separate VAR from REST-OF-LINE (which might be -% empty), we can't just use \parsearg; we have to insert a space of our -% own to delimit the rest of the line, and then take it out again if we -% didn't need it. Make sure the catcode of space is correct to avoid -% losing inside @example, for instance. -% -\def\set{\begingroup\catcode` =10 - \catcode`\-=12 \catcode`\_=12 % Allow - and _ in VAR. - \parsearg\setxxx} -\def\setxxx#1{\setyyy#1 \endsetyyy} -\def\setyyy#1 #2\endsetyyy{% - \def\temp{#2}% - \ifx\temp\empty \global\expandafter\let\csname SET#1\endcsname = \empty - \else \setzzz{#1}#2\endsetzzz % Remove the trailing space \setxxx inserted. - \fi - \endgroup -} -% Can't use \xdef to pre-expand #2 and save some time, since \temp or -% \next or other control sequences that we've defined might get us into -% an infinite loop. Consider `@set foo @cite{bar}'. -\def\setzzz#1#2 \endsetzzz{\expandafter\gdef\csname SET#1\endcsname{#2}} - -% @clear VAR clears (i.e., unsets) the variable VAR. -% -\def\clear{\parsearg\clearxxx} -\def\clearxxx#1{\global\expandafter\let\csname SET#1\endcsname=\relax} - -% @value{foo} gets the text saved in variable foo. -{ - \catcode`\_ = \active - % - % We might end up with active _ or - characters in the argument if - % we're called from @code, as @code{@value{foo-bar_}}. So \let any - % such active characters to their normal equivalents. - \gdef\value{\begingroup - \catcode`\-=12 \catcode`\_=12 - \indexbreaks \let_\normalunderscore - \valuexxx} -} -\def\valuexxx#1{\expandablevalue{#1}\endgroup} - -% We have this subroutine so that we can handle at least some @value's -% properly in indexes (we \let\value to this in \indexdummies). Ones -% whose names contain - or _ still won't work, but we can't do anything -% about that. The command has to be fully expandable, since the result -% winds up in the index file. This means that if the variable's value -% contains other Texinfo commands, it's almost certain it will fail -% (although perhaps we could fix that with sufficient work to do a -% one-level expansion on the result, instead of complete). -% -\def\expandablevalue#1{% - \expandafter\ifx\csname SET#1\endcsname\relax - {[No value for ``#1'']}% - \else - \csname SET#1\endcsname - \fi -} - -% @ifset VAR ... @end ifset reads the `...' iff VAR has been defined -% with @set. -% -\def\ifset{\parsearg\ifsetxxx} -\def\ifsetxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifsetfail - \else - \expandafter\ifsetsucceed - \fi -} -\def\ifsetsucceed{\conditionalsucceed{ifset}} -\def\ifsetfail{\nestedignore{ifset}} -\defineunmatchedend{ifset} - -% @ifclear VAR ... @end ifclear reads the `...' iff VAR has never been -% defined with @set, or has been undefined with @clear. -% -\def\ifclear{\parsearg\ifclearxxx} -\def\ifclearxxx #1{% - \expandafter\ifx\csname SET#1\endcsname\relax - \expandafter\ifclearsucceed - \else - \expandafter\ifclearfail - \fi -} -\def\ifclearsucceed{\conditionalsucceed{ifclear}} -\def\ifclearfail{\nestedignore{ifclear}} -\defineunmatchedend{ifclear} - -% @iftex, @ifnothtml, @ifnotinfo always succeed; we read the text -% following, through the first @end iftex (etc.). Make `@end iftex' -% (etc.) valid only after an @iftex. -% -\def\iftex{\conditionalsucceed{iftex}} -\def\ifnothtml{\conditionalsucceed{ifnothtml}} -\def\ifnotinfo{\conditionalsucceed{ifnotinfo}} -\defineunmatchedend{iftex} -\defineunmatchedend{ifnothtml} -\defineunmatchedend{ifnotinfo} - -% We can't just want to start a group at @iftex (for example) and end it -% at @end iftex, since then @set commands inside the conditional have no -% effect (they'd get reverted at the end of the group). So we must -% define \Eiftex to redefine itself to be its previous value. (We can't -% just define it to fail again with an ``unmatched end'' error, since -% the @ifset might be nested.) -% -\def\conditionalsucceed#1{% - \edef\temp{% - % Remember the current value of \E#1. - \let\nece{prevE#1} = \nece{E#1}% - % - % At the `@end #1', redefine \E#1 to be its previous value. - \def\nece{E#1}{\let\nece{E#1} = \nece{prevE#1}}% - }% - \temp -} - -% We need to expand lots of \csname's, but we don't want to expand the -% control sequences after we've constructed them. -% -\def\nece#1{\expandafter\noexpand\csname#1\endcsname} - -% @defininfoenclose. -\let\definfoenclose=\comment - - -\message{indexing,} -% Index generation facilities - -% Define \newwrite to be identical to plain tex's \newwrite -% except not \outer, so it can be used within \newindex. -{\catcode`\@=11 -\gdef\newwrite{\alloc@7\write\chardef\sixt@@n}} - -% \newindex {foo} defines an index named foo. -% It automatically defines \fooindex such that -% \fooindex ...rest of line... puts an entry in the index foo. -% It also defines \fooindfile to be the number of the output channel for -% the file that accumulates this index. The file's extension is foo. -% The name of an index should be no more than 2 characters long -% for the sake of vms. -% -\def\newindex#1{% - \iflinks - \expandafter\newwrite \csname#1indfile\endcsname - \openout \csname#1indfile\endcsname \jobname.#1 % Open the file - \fi - \expandafter\xdef\csname#1index\endcsname{% % Define @#1index - \noexpand\doindex{#1}} -} - -% @defindex foo == \newindex{foo} -% -\def\defindex{\parsearg\newindex} - -% Define @defcodeindex, like @defindex except put all entries in @code. -% -\def\defcodeindex{\parsearg\newcodeindex} -% -\def\newcodeindex#1{% - \iflinks - \expandafter\newwrite \csname#1indfile\endcsname - \openout \csname#1indfile\endcsname \jobname.#1 - \fi - \expandafter\xdef\csname#1index\endcsname{% - \noexpand\docodeindex{#1}}% -} - - -% @synindex foo bar makes index foo feed into index bar. -% Do this instead of @defindex foo if you don't want it as a separate index. -% -% @syncodeindex foo bar similar, but put all entries made for index foo -% inside @code. -% -\def\synindex#1 #2 {\dosynindex\doindex{#1}{#2}} -\def\syncodeindex#1 #2 {\dosynindex\docodeindex{#1}{#2}} - -% #1 is \doindex or \docodeindex, #2 the index getting redefined (foo), -% #3 the target index (bar). -\def\dosynindex#1#2#3{% - % Only do \closeout if we haven't already done it, else we'll end up - % closing the target index. - \expandafter \ifx\csname donesynindex#2\endcsname \undefined - % The \closeout helps reduce unnecessary open files; the limit on the - % Acorn RISC OS is a mere 16 files. - \expandafter\closeout\csname#2indfile\endcsname - \expandafter\let\csname\donesynindex#2\endcsname = 1 - \fi - % redefine \fooindfile: - \expandafter\let\expandafter\temp\expandafter=\csname#3indfile\endcsname - \expandafter\let\csname#2indfile\endcsname=\temp - % redefine \fooindex: - \expandafter\xdef\csname#2index\endcsname{\noexpand#1{#3}}% -} - -% Define \doindex, the driver for all \fooindex macros. -% Argument #1 is generated by the calling \fooindex macro, -% and it is "foo", the name of the index. - -% \doindex just uses \parsearg; it calls \doind for the actual work. -% This is because \doind is more useful to call from other macros. - -% There is also \dosubind {index}{topic}{subtopic} -% which makes an entry in a two-level index such as the operation index. - -\def\doindex#1{\edef\indexname{#1}\parsearg\singleindexer} -\def\singleindexer #1{\doind{\indexname}{#1}} - -% like the previous two, but they put @code around the argument. -\def\docodeindex#1{\edef\indexname{#1}\parsearg\singlecodeindexer} -\def\singlecodeindexer #1{\doind{\indexname}{\code{#1}}} - -\def\indexdummies{% -\def\ { }% -% Take care of the plain tex accent commands. -\def\"{\realbackslash "}% -\def\`{\realbackslash `}% -\def\'{\realbackslash '}% -\def\^{\realbackslash ^}% -\def\~{\realbackslash ~}% -\def\={\realbackslash =}% -\def\b{\realbackslash b}% -\def\c{\realbackslash c}% -\def\d{\realbackslash d}% -\def\u{\realbackslash u}% -\def\v{\realbackslash v}% -\def\H{\realbackslash H}% -% Take care of the plain tex special European modified letters. -\def\oe{\realbackslash oe}% -\def\ae{\realbackslash ae}% -\def\aa{\realbackslash aa}% -\def\OE{\realbackslash OE}% -\def\AE{\realbackslash AE}% -\def\AA{\realbackslash AA}% -\def\o{\realbackslash o}% -\def\O{\realbackslash O}% -\def\l{\realbackslash l}% -\def\L{\realbackslash L}% -\def\ss{\realbackslash ss}% -% Take care of texinfo commands likely to appear in an index entry. -% (Must be a way to avoid doing expansion at all, and thus not have to -% laboriously list every single command here.) -\def\@{@}% will be @@ when we switch to @ as escape char. -% Need these in case \tex is in effect and \{ is a \delimiter again. -% But can't use \lbracecmd and \rbracecmd because texindex assumes -% braces and backslashes are used only as delimiters. -\let\{ = \mylbrace -\let\} = \myrbrace -\def\_{{\realbackslash _}}% -\def\w{\realbackslash w }% -\def\bf{\realbackslash bf }% -%\def\rm{\realbackslash rm }% -\def\sl{\realbackslash sl }% -\def\sf{\realbackslash sf}% -\def\tt{\realbackslash tt}% -\def\gtr{\realbackslash gtr}% -\def\less{\realbackslash less}% -\def\hat{\realbackslash hat}% -\def\TeX{\realbackslash TeX}% -\def\dots{\realbackslash dots }% -\def\result{\realbackslash result}% -\def\equiv{\realbackslash equiv}% -\def\expansion{\realbackslash expansion}% -\def\print{\realbackslash print}% -\def\error{\realbackslash error}% -\def\point{\realbackslash point}% -\def\copyright{\realbackslash copyright}% -\def\tclose##1{\realbackslash tclose {##1}}% -\def\code##1{\realbackslash code {##1}}% -\def\uref##1{\realbackslash uref {##1}}% -\def\url##1{\realbackslash url {##1}}% -\def\env##1{\realbackslash env {##1}}% -\def\command##1{\realbackslash command {##1}}% -\def\option##1{\realbackslash option {##1}}% -\def\dotless##1{\realbackslash dotless {##1}}% -\def\samp##1{\realbackslash samp {##1}}% -\def\,##1{\realbackslash ,{##1}}% -\def\t##1{\realbackslash t {##1}}% -\def\r##1{\realbackslash r {##1}}% -\def\i##1{\realbackslash i {##1}}% -\def\b##1{\realbackslash b {##1}}% -\def\sc##1{\realbackslash sc {##1}}% -\def\cite##1{\realbackslash cite {##1}}% -\def\key##1{\realbackslash key {##1}}% -\def\file##1{\realbackslash file {##1}}% -\def\var##1{\realbackslash var {##1}}% -\def\kbd##1{\realbackslash kbd {##1}}% -\def\dfn##1{\realbackslash dfn {##1}}% -\def\emph##1{\realbackslash emph {##1}}% -\def\acronym##1{\realbackslash acronym {##1}}% -% -% Handle some cases of @value -- where the variable name does not -% contain - or _, and the value does not contain any -% (non-fully-expandable) commands. -\let\value = \expandablevalue -% -\unsepspaces -% Turn off macro expansion -\turnoffmacros -} - -% If an index command is used in an @example environment, any spaces -% therein should become regular spaces in the raw index file, not the -% expansion of \tie (\\leavevmode \penalty \@M \ ). -{\obeyspaces - \gdef\unsepspaces{\obeyspaces\let =\space}} - -% \indexnofonts no-ops all font-change commands. -% This is used when outputting the strings to sort the index by. -\def\indexdummyfont#1{#1} -\def\indexdummytex{TeX} -\def\indexdummydots{...} - -\def\indexnofonts{% -% Just ignore accents. -\let\,=\indexdummyfont -\let\"=\indexdummyfont -\let\`=\indexdummyfont -\let\'=\indexdummyfont -\let\^=\indexdummyfont -\let\~=\indexdummyfont -\let\==\indexdummyfont -\let\b=\indexdummyfont -\let\c=\indexdummyfont -\let\d=\indexdummyfont -\let\u=\indexdummyfont -\let\v=\indexdummyfont -\let\H=\indexdummyfont -\let\dotless=\indexdummyfont -% Take care of the plain tex special European modified letters. -\def\oe{oe}% -\def\ae{ae}% -\def\aa{aa}% -\def\OE{OE}% -\def\AE{AE}% -\def\AA{AA}% -\def\o{o}% -\def\O{O}% -\def\l{l}% -\def\L{L}% -\def\ss{ss}% -\let\w=\indexdummyfont -\let\t=\indexdummyfont -\let\r=\indexdummyfont -\let\i=\indexdummyfont -\let\b=\indexdummyfont -\let\emph=\indexdummyfont -\let\strong=\indexdummyfont -\let\cite=\indexdummyfont -\let\sc=\indexdummyfont -%Don't no-op \tt, since it isn't a user-level command -% and is used in the definitions of the active chars like <, >, |... -%\let\tt=\indexdummyfont -\let\tclose=\indexdummyfont -\let\code=\indexdummyfont -\let\url=\indexdummyfont -\let\uref=\indexdummyfont -\let\env=\indexdummyfont -\let\acronym=\indexdummyfont -\let\command=\indexdummyfont -\let\option=\indexdummyfont -\let\file=\indexdummyfont -\let\samp=\indexdummyfont -\let\kbd=\indexdummyfont -\let\key=\indexdummyfont -\let\var=\indexdummyfont -\let\TeX=\indexdummytex -\let\dots=\indexdummydots -\def\@{@}% -} - -% To define \realbackslash, we must make \ not be an escape. -% We must first make another character (@) an escape -% so we do not become unable to do a definition. - -{\catcode`\@=0 \catcode`\\=\other - @gdef@realbackslash{\}} - -\let\indexbackslash=0 %overridden during \printindex. -\let\SETmarginindex=\relax % put index entries in margin (undocumented)? - -% For \ifx comparisons. -\def\emptymacro{\empty} - -% Most index entries go through here, but \dosubind is the general case. -% -\def\doind#1#2{\dosubind{#1}{#2}\empty} - -% Workhorse for all \fooindexes. -% #1 is name of index, #2 is stuff to put there, #3 is subentry -- -% \empty if called from \doind, as we usually are. The main exception -% is with defuns, which call us directly. -% -\def\dosubind#1#2#3{% - % Put the index entry in the margin if desired. - \ifx\SETmarginindex\relax\else - \insert\margin{\hbox{\vrule height8pt depth3pt width0pt #2}}% - \fi - {% - \count255=\lastpenalty - {% - \indexdummies % Must do this here, since \bf, etc expand at this stage - \escapechar=`\\ - {% - \let\folio = 0% We will expand all macros now EXCEPT \folio. - \def\rawbackslashxx{\indexbackslash}% \indexbackslash isn't defined now - % so it will be output as is; and it will print as backslash. - % - \def\thirdarg{#3}% - % - % If third arg is present, precede it with space in sort key. - \ifx\thirdarg\emptymacro - \let\subentry = \empty - \else - \def\subentry{ #3}% - \fi - % - % First process the index entry with all font commands turned - % off to get the string to sort by. - {\indexnofonts \xdef\indexsorttmp{#2\subentry}}% - % - % Now the real index entry with the fonts. - \toks0 = {#2}% - % - % If the third (subentry) arg is present, add it to the index - % line to write. - \ifx\thirdarg\emptymacro \else - \toks0 = \expandafter{\the\toks0{#3}}% - \fi - % - % Set up the complete index entry, with both the sort key and - % the original text, including any font commands. We write - % three arguments to \entry to the .?? file (four in the - % subentry case), texindex reduces to two when writing the .??s - % sorted result. - \edef\temp{% - \write\csname#1indfile\endcsname{% - \realbackslash entry{\indexsorttmp}{\folio}{\the\toks0}}% - }% - % - % If a skip is the last thing on the list now, preserve it - % by backing up by \lastskip, doing the \write, then inserting - % the skip again. Otherwise, the whatsit generated by the - % \write will make \lastskip zero. The result is that sequences - % like this: - % @end defun - % @tindex whatever - % @defun ... - % will have extra space inserted, because the \medbreak in the - % start of the @defun won't see the skip inserted by the @end of - % the previous defun. - % - % But don't do any of this if we're not in vertical mode. We - % don't want to do a \vskip and prematurely end a paragraph. - % - % Avoid page breaks due to these extra skips, too. - % - \iflinks - \ifvmode - \skip0 = \lastskip - \ifdim\lastskip = 0pt \else \nobreak\vskip-\lastskip \fi - \fi - % - \temp % do the write - % - % - \ifvmode \ifdim\skip0 = 0pt \else \nobreak\vskip\skip0 \fi \fi - \fi - }% - }% - \penalty\count255 - }% -} - -% The index entry written in the file actually looks like -% \entry {sortstring}{page}{topic} -% or -% \entry {sortstring}{page}{topic}{subtopic} -% The texindex program reads in these files and writes files -% containing these kinds of lines: -% \initial {c} -% before the first topic whose initial is c -% \entry {topic}{pagelist} -% for a topic that is used without subtopics -% \primary {topic} -% for the beginning of a topic that is used with subtopics -% \secondary {subtopic}{pagelist} -% for each subtopic. - -% Define the user-accessible indexing commands -% @findex, @vindex, @kindex, @cindex. - -\def\findex {\fnindex} -\def\kindex {\kyindex} -\def\cindex {\cpindex} -\def\vindex {\vrindex} -\def\tindex {\tpindex} -\def\pindex {\pgindex} - -\def\cindexsub {\begingroup\obeylines\cindexsub} -{\obeylines % -\gdef\cindexsub "#1" #2^^M{\endgroup % -\dosubind{cp}{#2}{#1}}} - -% Define the macros used in formatting output of the sorted index material. - -% @printindex causes a particular index (the ??s file) to get printed. -% It does not print any chapter heading (usually an @unnumbered). -% -\def\printindex{\parsearg\doprintindex} -\def\doprintindex#1{\begingroup - \dobreak \chapheadingskip{10000}% - % - \smallfonts \rm - \tolerance = 9500 - \indexbreaks - % - % See if the index file exists and is nonempty. - % Change catcode of @ here so that if the index file contains - % \initial {@} - % as its first line, TeX doesn't complain about mismatched braces - % (because it thinks @} is a control sequence). - \catcode`\@ = 11 - \openin 1 \jobname.#1s - \ifeof 1 - % \enddoublecolumns gets confused if there is no text in the index, - % and it loses the chapter title and the aux file entries for the - % index. The easiest way to prevent this problem is to make sure - % there is some text. - \putwordIndexNonexistent - \else - % - % If the index file exists but is empty, then \openin leaves \ifeof - % false. We have to make TeX try to read something from the file, so - % it can discover if there is anything in it. - \read 1 to \temp - \ifeof 1 - \putwordIndexIsEmpty - \else - % Index files are almost Texinfo source, but we use \ as the escape - % character. It would be better to use @, but that's too big a change - % to make right now. - \def\indexbackslash{\rawbackslashxx}% - \catcode`\\ = 0 - \escapechar = `\\ - \begindoublecolumns - \input \jobname.#1s - \enddoublecolumns - \fi - \fi - \closein 1 -\endgroup} - -% These macros are used by the sorted index file itself. -% Change them to control the appearance of the index. - -\def\initial#1{{% - % Some minor font changes for the special characters. - \let\tentt=\sectt \let\tt=\sectt \let\sf=\sectt - % - % Remove any glue we may have, we'll be inserting our own. - \removelastskip - % - % We like breaks before the index initials, so insert a bonus. - \penalty -300 - % - % Typeset the initial. Making this add up to a whole number of - % baselineskips increases the chance of the dots lining up from column - % to column. It still won't often be perfect, because of the stretch - % we need before each entry, but it's better. - % - % No shrink because it confuses \balancecolumns. - \vskip 1.67\baselineskip plus .5\baselineskip - \leftline{\secbf #1}% - \vskip .33\baselineskip plus .1\baselineskip - % - % Do our best not to break after the initial. - \nobreak -}} - -% This typesets a paragraph consisting of #1, dot leaders, and then #2 -% flush to the right margin. It is used for index and table of contents -% entries. The paragraph is indented by \leftskip. -% -\def\entry#1#2{\begingroup - % - % Start a new paragraph if necessary, so our assignments below can't - % affect previous text. - \par - % - % Do not fill out the last line with white space. - \parfillskip = 0in - % - % No extra space above this paragraph. - \parskip = 0in - % - % Do not prefer a separate line ending with a hyphen to fewer lines. - \finalhyphendemerits = 0 - % - % \hangindent is only relevant when the entry text and page number - % don't both fit on one line. In that case, bob suggests starting the - % dots pretty far over on the line. Unfortunately, a large - % indentation looks wrong when the entry text itself is broken across - % lines. So we use a small indentation and put up with long leaders. - % - % \hangafter is reset to 1 (which is the value we want) at the start - % of each paragraph, so we need not do anything with that. - \hangindent = 2em - % - % When the entry text needs to be broken, just fill out the first line - % with blank space. - \rightskip = 0pt plus1fil - % - % A bit of stretch before each entry for the benefit of balancing columns. - \vskip 0pt plus1pt - % - % Start a ``paragraph'' for the index entry so the line breaking - % parameters we've set above will have an effect. - \noindent - % - % Insert the text of the index entry. TeX will do line-breaking on it. - #1% - % The following is kludged to not output a line of dots in the index if - % there are no page numbers. The next person who breaks this will be - % cursed by a Unix daemon. - \def\tempa{{\rm }}% - \def\tempb{#2}% - \edef\tempc{\tempa}% - \edef\tempd{\tempb}% - \ifx\tempc\tempd\ \else% - % - % If we must, put the page number on a line of its own, and fill out - % this line with blank space. (The \hfil is overwhelmed with the - % fill leaders glue in \indexdotfill if the page number does fit.) - \hfil\penalty50 - \null\nobreak\indexdotfill % Have leaders before the page number. - % - % The `\ ' here is removed by the implicit \unskip that TeX does as - % part of (the primitive) \par. Without it, a spurious underfull - % \hbox ensues. - \ifpdf - \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. - \else - \ #2% The page number ends the paragraph. - \fi - \fi% - \par -\endgroup} - -% Like \dotfill except takes at least 1 em. -\def\indexdotfill{\cleaders - \hbox{$\mathsurround=0pt \mkern1.5mu ${\it .}$ \mkern1.5mu$}\hskip 1em plus 1fill} - -\def\primary #1{\line{#1\hfil}} - -\newskip\secondaryindent \secondaryindent=0.5cm -\def\secondary#1#2{{% - \parfillskip=0in - \parskip=0in - \hangindent=1in - \hangafter=1 - \noindent\hskip\secondaryindent\hbox{#1}\indexdotfill - \ifpdf - \pdfgettoks#2.\ \the\toksA % The page number ends the paragraph. - \else - #2 - \fi - \par -}} - -% Define two-column mode, which we use to typeset indexes. -% Adapted from the TeXbook, page 416, which is to say, -% the manmac.tex format used to print the TeXbook itself. -\catcode`\@=11 - -\newbox\partialpage -\newdimen\doublecolumnhsize - -\def\begindoublecolumns{\begingroup % ended by \enddoublecolumns - % Grab any single-column material above us. - \output = {% - % - % Here is a possibility not foreseen in manmac: if we accumulate a - % whole lot of material, we might end up calling this \output - % routine twice in a row (see the doublecol-lose test, which is - % essentially a couple of indexes with @setchapternewpage off). In - % that case we just ship out what is in \partialpage with the normal - % output routine. Generally, \partialpage will be empty when this - % runs and this will be a no-op. See the indexspread.tex test case. - \ifvoid\partialpage \else - \onepageout{\pagecontents\partialpage}% - \fi - % - \global\setbox\partialpage = \vbox{% - % Unvbox the main output page. - \unvbox\PAGE - \kern-\topskip \kern\baselineskip - }% - }% - \eject % run that output routine to set \partialpage - % - % Use the double-column output routine for subsequent pages. - \output = {\doublecolumnout}% - % - % Change the page size parameters. We could do this once outside this - % routine, in each of @smallbook, @afourpaper, and the default 8.5x11 - % format, but then we repeat the same computation. Repeating a couple - % of assignments once per index is clearly meaningless for the - % execution time, so we may as well do it in one place. - % - % First we halve the line length, less a little for the gutter between - % the columns. We compute the gutter based on the line length, so it - % changes automatically with the paper format. The magic constant - % below is chosen so that the gutter has the same value (well, +-<1pt) - % as it did when we hard-coded it. - % - % We put the result in a separate register, \doublecolumhsize, so we - % can restore it in \pagesofar, after \hsize itself has (potentially) - % been clobbered. - % - \doublecolumnhsize = \hsize - \advance\doublecolumnhsize by -.04154\hsize - \divide\doublecolumnhsize by 2 - \hsize = \doublecolumnhsize - % - % Double the \vsize as well. (We don't need a separate register here, - % since nobody clobbers \vsize.) - \vsize = 2\vsize -} - -% The double-column output routine for all double-column pages except -% the last. -% -\def\doublecolumnout{% - \splittopskip=\topskip \splitmaxdepth=\maxdepth - % Get the available space for the double columns -- the normal - % (undoubled) page height minus any material left over from the - % previous page. - \dimen@ = \vsize - \divide\dimen@ by 2 - \advance\dimen@ by -\ht\partialpage - % - % box0 will be the left-hand column, box2 the right. - \setbox0=\vsplit255 to\dimen@ \setbox2=\vsplit255 to\dimen@ - \onepageout\pagesofar - \unvbox255 - \penalty\outputpenalty -} -% -% Re-output the contents of the output page -- any previous material, -% followed by the two boxes we just split, in box0 and box2. -\def\pagesofar{% - \unvbox\partialpage - % - \hsize = \doublecolumnhsize - \wd0=\hsize \wd2=\hsize - \hbox to\pagewidth{\box0\hfil\box2}% -} -% -% All done with double columns. -\def\enddoublecolumns{% - \output = {% - % Split the last of the double-column material. Leave it on the - % current page, no automatic page break. - \balancecolumns - % - % If we end up splitting too much material for the current page, - % though, there will be another page break right after this \output - % invocation ends. Having called \balancecolumns once, we do not - % want to call it again. Therefore, reset \output to its normal - % definition right away. (We hope \balancecolumns will never be - % called on to balance too much material, but if it is, this makes - % the output somewhat more palatable.) - \global\output = {\onepageout{\pagecontents\PAGE}}% - }% - \eject - \endgroup % started in \begindoublecolumns - % - % \pagegoal was set to the doubled \vsize above, since we restarted - % the current page. We're now back to normal single-column - % typesetting, so reset \pagegoal to the normal \vsize (after the - % \endgroup where \vsize got restored). - \pagegoal = \vsize -} -% -% Called at the end of the double column material. -\def\balancecolumns{% - \setbox0 = \vbox{\unvbox255}% like \box255 but more efficient, see p.120. - \dimen@ = \ht0 - \advance\dimen@ by \topskip - \advance\dimen@ by-\baselineskip - \divide\dimen@ by 2 % target to split to - %debug\message{final 2-column material height=\the\ht0, target=\the\dimen@.}% - \splittopskip = \topskip - % Loop until we get a decent breakpoint. - {% - \vbadness = 10000 - \loop - \global\setbox3 = \copy0 - \global\setbox1 = \vsplit3 to \dimen@ - \ifdim\ht3>\dimen@ - \global\advance\dimen@ by 1pt - \repeat - }% - %debug\message{split to \the\dimen@, column heights: \the\ht1, \the\ht3.}% - \setbox0=\vbox to\dimen@{\unvbox1}% - \setbox2=\vbox to\dimen@{\unvbox3}% - % - \pagesofar -} -\catcode`\@ = \other - - -\message{sectioning,} -% Chapters, sections, etc. - -\newcount\chapno -\newcount\secno \secno=0 -\newcount\subsecno \subsecno=0 -\newcount\subsubsecno \subsubsecno=0 - -% This counter is funny since it counts through charcodes of letters A, B, ... -\newcount\appendixno \appendixno = `\@ -% \def\appendixletter{\char\the\appendixno} -% We do the following for the sake of pdftex, which needs the actual -% letter in the expansion, not just typeset. -\def\appendixletter{% - \ifnum\appendixno=`A A% - \else\ifnum\appendixno=`B B% - \else\ifnum\appendixno=`C C% - \else\ifnum\appendixno=`D D% - \else\ifnum\appendixno=`E E% - \else\ifnum\appendixno=`F F% - \else\ifnum\appendixno=`G G% - \else\ifnum\appendixno=`H H% - \else\ifnum\appendixno=`I I% - \else\ifnum\appendixno=`J J% - \else\ifnum\appendixno=`K K% - \else\ifnum\appendixno=`L L% - \else\ifnum\appendixno=`M M% - \else\ifnum\appendixno=`N N% - \else\ifnum\appendixno=`O O% - \else\ifnum\appendixno=`P P% - \else\ifnum\appendixno=`Q Q% - \else\ifnum\appendixno=`R R% - \else\ifnum\appendixno=`S S% - \else\ifnum\appendixno=`T T% - \else\ifnum\appendixno=`U U% - \else\ifnum\appendixno=`V V% - \else\ifnum\appendixno=`W W% - \else\ifnum\appendixno=`X X% - \else\ifnum\appendixno=`Y Y% - \else\ifnum\appendixno=`Z Z% - % The \the is necessary, despite appearances, because \appendixletter is - % expanded while writing the .toc file. \char\appendixno is not - % expandable, thus it is written literally, thus all appendixes come out - % with the same letter (or @) in the toc without it. - \else\char\the\appendixno - \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi - \fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi\fi} - -% Each @chapter defines this as the name of the chapter. -% page headings and footings can use it. @section does likewise. -\def\thischapter{} -\def\thissection{} - -\newcount\absseclevel % used to calculate proper heading level -\newcount\secbase\secbase=0 % @raise/lowersections modify this count - -% @raisesections: treat @section as chapter, @subsection as section, etc. -\def\raisesections{\global\advance\secbase by -1} -\let\up=\raisesections % original BFox name - -% @lowersections: treat @chapter as section, @section as subsection, etc. -\def\lowersections{\global\advance\secbase by 1} -\let\down=\lowersections % original BFox name - -% Choose a numbered-heading macro -% #1 is heading level if unmodified by @raisesections or @lowersections -% #2 is text for heading -\def\numhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \chapterzzz{#2} -\or - \seczzz{#2} -\or - \numberedsubseczzz{#2} -\or - \numberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \chapterzzz{#2} - \else - \numberedsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses appendix heading levels -\def\apphead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \appendixzzz{#2} -\or - \appendixsectionzzz{#2} -\or - \appendixsubseczzz{#2} -\or - \appendixsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \appendixzzz{#2} - \else - \appendixsubsubseczzz{#2} - \fi -\fi -} - -% like \numhead, but chooses numberless heading levels -\def\unnmhead#1#2{\absseclevel=\secbase\advance\absseclevel by #1 -\ifcase\absseclevel - \unnumberedzzz{#2} -\or - \unnumberedseczzz{#2} -\or - \unnumberedsubseczzz{#2} -\or - \unnumberedsubsubseczzz{#2} -\else - \ifnum \absseclevel<0 - \unnumberedzzz{#2} - \else - \unnumberedsubsubseczzz{#2} - \fi -\fi -} - -% @chapter, @appendix, @unnumbered. -\def\thischaptername{No Chapter Title} -\outer\def\chapter{\parsearg\chapteryyy} -\def\chapteryyy #1{\numhead0{#1}} % normally numhead0 calls chapterzzz -\def\chapterzzz #1{% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \chapno by 1 \message{\putwordChapter\space \the\chapno}% -\chapmacro {#1}{\the\chapno}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -% We don't substitute the actual chapter name into \thischapter -% because we don't want its macros evaluated now. -\xdef\thischapter{\putwordChapter{} \the\chapno: \noexpand\thischaptername}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash chapentry{\the\toks0}% - {\the\chapno}}}% -\temp -\donoderef -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec -} - -\outer\def\appendix{\parsearg\appendixyyy} -\def\appendixyyy #1{\apphead0{#1}} % normally apphead0 calls appendixzzz -\def\appendixzzz #1{% -\secno=0 \subsecno=0 \subsubsecno=0 -\global\advance \appendixno by 1 -\message{\putwordAppendix\space \appendixletter}% -\chapmacro {#1}{\putwordAppendix{} \appendixletter}% -\gdef\thissection{#1}% -\gdef\thischaptername{#1}% -\xdef\thischapter{\putwordAppendix{} \appendixletter: \noexpand\thischaptername}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash chapentry{\the\toks0}% - {\putwordAppendix{} \appendixletter}}}% -\temp -\appendixnoderef -\global\let\section = \appendixsec -\global\let\subsection = \appendixsubsec -\global\let\subsubsection = \appendixsubsubsec -} - -% @centerchap is like @unnumbered, but the heading is centered. -\outer\def\centerchap{\parsearg\centerchapyyy} -\def\centerchapyyy #1{{\let\unnumbchapmacro=\centerchapmacro \unnumberedyyy{#1}}} - -% @top is like @unnumbered. -\outer\def\top{\parsearg\unnumberedyyy} - -\outer\def\unnumbered{\parsearg\unnumberedyyy} -\def\unnumberedyyy #1{\unnmhead0{#1}} % normally unnmhead0 calls unnumberedzzz -\def\unnumberedzzz #1{% -\secno=0 \subsecno=0 \subsubsecno=0 -% -% This used to be simply \message{#1}, but TeX fully expands the -% argument to \message. Therefore, if #1 contained @-commands, TeX -% expanded them. For example, in `@unnumbered The @cite{Book}', TeX -% expanded @cite (which turns out to cause errors because \cite is meant -% to be executed, not expanded). -% -% Anyway, we don't want the fully-expanded definition of @cite to appear -% as a result of the \message, we just want `@cite' itself. We use -% \the to achieve this: TeX expands \the only once, -% simply yielding the contents of . (We also do this for -% the toc entries.) -\toks0 = {#1}\message{(\the\toks0)}% -% -\unnumbchapmacro {#1}% -\gdef\thischapter{#1}\gdef\thissection{#1}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash unnumbchapentry{\the\toks0}}}% -\temp -\unnumbnoderef -\global\let\section = \unnumberedsec -\global\let\subsection = \unnumberedsubsec -\global\let\subsubsection = \unnumberedsubsubsec -} - -% Sections. -\outer\def\numberedsec{\parsearg\secyyy} -\def\secyyy #1{\numhead1{#1}} % normally calls seczzz -\def\seczzz #1{% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\the\chapno}{\the\secno}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash secentry{\the\toks0}% - {\the\chapno}{\the\secno}}}% -\temp -\donoderef -\nobreak -} - -\outer\def\appendixsection{\parsearg\appendixsecyyy} -\outer\def\appendixsec{\parsearg\appendixsecyyy} -\def\appendixsecyyy #1{\apphead1{#1}} % normally calls appendixsectionzzz -\def\appendixsectionzzz #1{% -\subsecno=0 \subsubsecno=0 \global\advance \secno by 1 % -\gdef\thissection{#1}\secheading {#1}{\appendixletter}{\the\secno}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash secentry{\the\toks0}% - {\appendixletter}{\the\secno}}}% -\temp -\appendixnoderef -\nobreak -} - -\outer\def\unnumberedsec{\parsearg\unnumberedsecyyy} -\def\unnumberedsecyyy #1{\unnmhead1{#1}} % normally calls unnumberedseczzz -\def\unnumberedseczzz #1{% -\plainsecheading {#1}\gdef\thissection{#1}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash unnumbsecentry{\the\toks0}}}% -\temp -\unnumbnoderef -\nobreak -} - -% Subsections. -\outer\def\numberedsubsec{\parsearg\numberedsubsecyyy} -\def\numberedsubsecyyy #1{\numhead2{#1}} % normally calls numberedsubseczzz -\def\numberedsubseczzz #1{% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\the\chapno}{\the\secno}{\the\subsecno}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash subsecentry{\the\toks0}% - {\the\chapno}{\the\secno}{\the\subsecno}}}% -\temp -\donoderef -\nobreak -} - -\outer\def\appendixsubsec{\parsearg\appendixsubsecyyy} -\def\appendixsubsecyyy #1{\apphead2{#1}} % normally calls appendixsubseczzz -\def\appendixsubseczzz #1{% -\gdef\thissection{#1}\subsubsecno=0 \global\advance \subsecno by 1 % -\subsecheading {#1}{\appendixletter}{\the\secno}{\the\subsecno}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash subsecentry{\the\toks0}% - {\appendixletter}{\the\secno}{\the\subsecno}}}% -\temp -\appendixnoderef -\nobreak -} - -\outer\def\unnumberedsubsec{\parsearg\unnumberedsubsecyyy} -\def\unnumberedsubsecyyy #1{\unnmhead2{#1}} %normally calls unnumberedsubseczzz -\def\unnumberedsubseczzz #1{% -\plainsubsecheading {#1}\gdef\thissection{#1}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash unnumbsubsecentry% - {\the\toks0}}}% -\temp -\unnumbnoderef -\nobreak -} - -% Subsubsections. -\outer\def\numberedsubsubsec{\parsearg\numberedsubsubsecyyy} -\def\numberedsubsubsecyyy #1{\numhead3{#1}} % normally numberedsubsubseczzz -\def\numberedsubsubseczzz #1{% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash subsubsecentry{\the\toks0}% - {\the\chapno}{\the\secno}{\the\subsecno}{\the\subsubsecno}}}% -\temp -\donoderef -\nobreak -} - -\outer\def\appendixsubsubsec{\parsearg\appendixsubsubsecyyy} -\def\appendixsubsubsecyyy #1{\apphead3{#1}} % normally appendixsubsubseczzz -\def\appendixsubsubseczzz #1{% -\gdef\thissection{#1}\global\advance \subsubsecno by 1 % -\subsubsecheading {#1} - {\appendixletter}{\the\secno}{\the\subsecno}{\the\subsubsecno}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash subsubsecentry{\the\toks0}% - {\appendixletter}{\the\secno}{\the\subsecno}{\the\subsubsecno}}}% -\temp -\appendixnoderef -\nobreak -} - -\outer\def\unnumberedsubsubsec{\parsearg\unnumberedsubsubsecyyy} -\def\unnumberedsubsubsecyyy #1{\unnmhead3{#1}} %normally unnumberedsubsubseczzz -\def\unnumberedsubsubseczzz #1{% -\plainsubsubsecheading {#1}\gdef\thissection{#1}% -\toks0 = {#1}% -\edef\temp{\noexpand\writetocentry{\realbackslash unnumbsubsubsecentry% - {\the\toks0}}}% -\temp -\unnumbnoderef -\nobreak -} - -% These are variants which are not "outer", so they can appear in @ifinfo. -% Actually, they should now be obsolete; ordinary section commands should work. -\def\infotop{\parsearg\unnumberedzzz} -\def\infounnumbered{\parsearg\unnumberedzzz} -\def\infounnumberedsec{\parsearg\unnumberedseczzz} -\def\infounnumberedsubsec{\parsearg\unnumberedsubseczzz} -\def\infounnumberedsubsubsec{\parsearg\unnumberedsubsubseczzz} - -\def\infoappendix{\parsearg\appendixzzz} -\def\infoappendixsec{\parsearg\appendixseczzz} -\def\infoappendixsubsec{\parsearg\appendixsubseczzz} -\def\infoappendixsubsubsec{\parsearg\appendixsubsubseczzz} - -\def\infochapter{\parsearg\chapterzzz} -\def\infosection{\parsearg\sectionzzz} -\def\infosubsection{\parsearg\subsectionzzz} -\def\infosubsubsection{\parsearg\subsubsectionzzz} - -% These macros control what the section commands do, according -% to what kind of chapter we are in (ordinary, appendix, or unnumbered). -% Define them by default for a numbered chapter. -\global\let\section = \numberedsec -\global\let\subsection = \numberedsubsec -\global\let\subsubsection = \numberedsubsubsec - -% Define @majorheading, @heading and @subheading - -% NOTE on use of \vbox for chapter headings, section headings, and such: -% 1) We use \vbox rather than the earlier \line to permit -% overlong headings to fold. -% 2) \hyphenpenalty is set to 10000 because hyphenation in a -% heading is obnoxious; this forbids it. -% 3) Likewise, headings look best if no \parindent is used, and -% if justification is not attempted. Hence \raggedright. - - -\def\majorheading{\parsearg\majorheadingzzz} -\def\majorheadingzzz #1{% -{\advance\chapheadingskip by 10pt \chapbreak }% -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -\def\chapheading{\parsearg\chapheadingzzz} -\def\chapheadingzzz #1{\chapbreak % -{\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\penalty 200} - -% @heading, @subheading, @subsubheading. -\def\heading{\parsearg\plainsecheading} -\def\subheading{\parsearg\plainsubsecheading} -\def\subsubheading{\parsearg\plainsubsubsecheading} - -% These macros generate a chapter, section, etc. heading only -% (including whitespace, linebreaking, etc. around it), -% given all the information in convenient, parsed form. - -%%% Args are the skip and penalty (usually negative) -\def\dobreak#1#2{\par\ifdim\lastskip<#1\removelastskip\penalty#2\vskip#1\fi} - -\def\setchapterstyle #1 {\csname CHAPF#1\endcsname} - -%%% Define plain chapter starts, and page on/off switching for it -% Parameter controlling skip before chapter headings (if needed) - -\newskip\chapheadingskip - -\def\chapbreak{\dobreak \chapheadingskip {-4000}} -\def\chappager{\par\vfill\supereject} -\def\chapoddpage{\chappager \ifodd\pageno \else \hbox to 0pt{} \chappager\fi} - -\def\setchapternewpage #1 {\csname CHAPPAG#1\endcsname} - -\def\CHAPPAGoff{% -\global\let\contentsalignmacro = \chappager -\global\let\pchapsepmacro=\chapbreak -\global\let\pagealignmacro=\chappager} - -\def\CHAPPAGon{% -\global\let\contentsalignmacro = \chappager -\global\let\pchapsepmacro=\chappager -\global\let\pagealignmacro=\chappager -\global\def\HEADINGSon{\HEADINGSsingle}} - -\def\CHAPPAGodd{ -\global\let\contentsalignmacro = \chapoddpage -\global\let\pchapsepmacro=\chapoddpage -\global\let\pagealignmacro=\chapoddpage -\global\def\HEADINGSon{\HEADINGSdouble}} - -\CHAPPAGon - -\def\CHAPFplain{ -\global\let\chapmacro=\chfplain -\global\let\unnumbchapmacro=\unnchfplain -\global\let\centerchapmacro=\centerchfplain} - -% Plain chapter opening. -% #1 is the text, #2 the chapter number or empty if unnumbered. -\def\chfplain#1#2{% - \pchapsepmacro - {% - \chapfonts \rm - \def\chapnum{#2}% - \setbox0 = \hbox{#2\ifx\chapnum\empty\else\enspace\fi}% - \vbox{\hyphenpenalty=10000 \tolerance=5000 \parindent=0pt \raggedright - \hangindent = \wd0 \centerparametersmaybe - \unhbox0 #1\par}% - }% - \nobreak\bigskip % no page break after a chapter title - \nobreak -} - -% Plain opening for unnumbered. -\def\unnchfplain#1{\chfplain{#1}{}} - -% @centerchap -- centered and unnumbered. -\let\centerparametersmaybe = \relax -\def\centerchfplain#1{{% - \def\centerparametersmaybe{% - \advance\rightskip by 3\rightskip - \leftskip = \rightskip - \parfillskip = 0pt - }% - \chfplain{#1}{}% -}} - -\CHAPFplain % The default - -\def\unnchfopen #1{% -\chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt\raggedright - \rm #1\hfill}}\bigskip \par\nobreak -} - -\def\chfopen #1#2{\chapoddpage {\chapfonts -\vbox to 3in{\vfil \hbox to\hsize{\hfil #2} \hbox to\hsize{\hfil #1} \vfil}}% -\par\penalty 5000 % -} - -\def\centerchfopen #1{% -\chapoddpage {\chapfonts \vbox{\hyphenpenalty=10000\tolerance=5000 - \parindent=0pt - \hfill {\rm #1}\hfill}}\bigskip \par\nobreak -} - -\def\CHAPFopen{ -\global\let\chapmacro=\chfopen -\global\let\unnumbchapmacro=\unnchfopen -\global\let\centerchapmacro=\centerchfopen} - - -% Section titles. -\newskip\secheadingskip -\def\secheadingbreak{\dobreak \secheadingskip {-1000}} -\def\secheading#1#2#3{\sectionheading{sec}{#2.#3}{#1}} -\def\plainsecheading#1{\sectionheading{sec}{}{#1}} - -% Subsection titles. -\newskip \subsecheadingskip -\def\subsecheadingbreak{\dobreak \subsecheadingskip {-500}} -\def\subsecheading#1#2#3#4{\sectionheading{subsec}{#2.#3.#4}{#1}} -\def\plainsubsecheading#1{\sectionheading{subsec}{}{#1}} - -% Subsubsection titles. -\let\subsubsecheadingskip = \subsecheadingskip -\let\subsubsecheadingbreak = \subsecheadingbreak -\def\subsubsecheading#1#2#3#4#5{\sectionheading{subsubsec}{#2.#3.#4.#5}{#1}} -\def\plainsubsubsecheading#1{\sectionheading{subsubsec}{}{#1}} - - -% Print any size section title. -% -% #1 is the section type (sec/subsec/subsubsec), #2 is the section -% number (maybe empty), #3 the text. -\def\sectionheading#1#2#3{% - {% - \expandafter\advance\csname #1headingskip\endcsname by \parskip - \csname #1headingbreak\endcsname - }% - {% - % Switch to the right set of fonts. - \csname #1fonts\endcsname \rm - % - % Only insert the separating space if we have a section number. - \def\secnum{#2}% - \setbox0 = \hbox{#2\ifx\secnum\empty\else\enspace\fi}% - % - \vbox{\hyphenpenalty=10000 \tolerance=5000 \parindent=0pt \raggedright - \hangindent = \wd0 % zero if no section number - \unhbox0 #3}% - }% - \ifdim\parskip<10pt \nobreak\kern10pt\nobreak\kern-\parskip\fi \nobreak -} - - -\message{toc,} -% Table of contents. -\newwrite\tocfile - -% Write an entry to the toc file, opening it if necessary. -% Called from @chapter, etc. We supply {\folio} at the end of the -% argument, which will end up as the last argument to the \...entry macro. -% -% We open the .toc file here instead of at @setfilename or any other -% given time so that @contents can be put in the document anywhere. -% -\newif\iftocfileopened -\def\writetocentry#1{% - \iftocfileopened\else - \immediate\openout\tocfile = \jobname.toc - \global\tocfileopenedtrue - \fi - \iflinks \write\tocfile{#1{\folio}}\fi -} - -\newskip\contentsrightmargin \contentsrightmargin=1in -\newcount\savepageno -\newcount\lastnegativepageno \lastnegativepageno = -1 - -% Finish up the main text and prepare to read what we've written -% to \tocfile. -% -\def\startcontents#1{% - % If @setchapternewpage on, and @headings double, the contents should - % start on an odd page, unlike chapters. Thus, we maintain - % \contentsalignmacro in parallel with \pagealignmacro. - % From: Torbjorn Granlund - \contentsalignmacro - \immediate\closeout\tocfile - % - % Don't need to put `Contents' or `Short Contents' in the headline. - % It is abundantly clear what they are. - \unnumbchapmacro{#1}\def\thischapter{}% - \savepageno = \pageno - \begingroup % Set up to handle contents files properly. - \catcode`\\=0 \catcode`\{=1 \catcode`\}=2 \catcode`\@=11 - % We can't do this, because then an actual ^ in a section - % title fails, e.g., @chapter ^ -- exponentiation. --karl, 9jul97. - %\catcode`\^=7 % to see ^^e4 as \"a etc. juha@piuha.ydi.vtt.fi - \raggedbottom % Worry more about breakpoints than the bottom. - \advance\hsize by -\contentsrightmargin % Don't use the full line length. - % - % Roman numerals for page numbers. - \ifnum \pageno>0 \pageno = \lastnegativepageno \fi -} - - -% Normal (long) toc. -\def\contents{% - \startcontents{\putwordTOC}% - \openin 1 \jobname.toc - \ifeof 1 \else - \closein 1 - \input \jobname.toc - \fi - \vfill \eject - \contentsalignmacro % in case @setchapternewpage odd is in effect - \pdfmakeoutlines - \endgroup - \lastnegativepageno = \pageno - \pageno = \savepageno -} - -% And just the chapters. -\def\summarycontents{% - \startcontents{\putwordShortTOC}% - % - \let\chapentry = \shortchapentry - \let\unnumbchapentry = \shortunnumberedentry - % We want a true roman here for the page numbers. - \secfonts - \let\rm=\shortcontrm \let\bf=\shortcontbf \let\sl=\shortcontsl - \rm - \hyphenpenalty = 10000 - \advance\baselineskip by 1pt % Open it up a little. - \def\secentry ##1##2##3##4{} - \def\unnumbsecentry ##1##2{} - \def\subsecentry ##1##2##3##4##5{} - \def\unnumbsubsecentry ##1##2{} - \def\subsubsecentry ##1##2##3##4##5##6{} - \def\unnumbsubsubsecentry ##1##2{} - \openin 1 \jobname.toc - \ifeof 1 \else - \closein 1 - \input \jobname.toc - \fi - \vfill \eject - \contentsalignmacro % in case @setchapternewpage odd is in effect - \endgroup - \lastnegativepageno = \pageno - \pageno = \savepageno -} -\let\shortcontents = \summarycontents - -\ifpdf - \pdfcatalog{/PageMode /UseOutlines}% -\fi - -% These macros generate individual entries in the table of contents. -% The first argument is the chapter or section name. -% The last argument is the page number. -% The arguments in between are the chapter number, section number, ... - -% Chapter-level things, for both the long and short contents. -\def\chapentry#1#2#3{\dochapentry{#2\labelspace#1}{#3}} - -% See comments in \dochapentry re vbox and related settings -\def\shortchapentry#1#2#3{% - \tocentry{\shortchaplabel{#2}\labelspace #1}{\doshortpageno\bgroup#3\egroup}% -} - -% Typeset the label for a chapter or appendix for the short contents. -% The arg is, e.g. `Appendix A' for an appendix, or `3' for a chapter. -% We could simplify the code here by writing out an \appendixentry -% command in the toc file for appendices, instead of using \chapentry -% for both, but it doesn't seem worth it. -% -\newdimen\shortappendixwidth -% -\def\shortchaplabel#1{% - % Compute width of word "Appendix", may change with language. - \setbox0 = \hbox{\shortcontrm \putwordAppendix}% - \shortappendixwidth = \wd0 - % - % We typeset #1 in a box of constant width, regardless of the text of - % #1, so the chapter titles will come out aligned. - \setbox0 = \hbox{#1}% - \dimen0 = \ifdim\wd0 > \shortappendixwidth \shortappendixwidth \else 0pt \fi - % - % This space should be plenty, since a single number is .5em, and the - % widest letter (M) is 1em, at least in the Computer Modern fonts. - % (This space doesn't include the extra space that gets added after - % the label; that gets put in by \shortchapentry above.) - \advance\dimen0 by 1.1em - \hbox to \dimen0{#1\hfil}% -} - -\def\unnumbchapentry#1#2{\dochapentry{#1}{#2}} -\def\shortunnumberedentry#1#2{\tocentry{#1}{\doshortpageno\bgroup#2\egroup}} - -% Sections. -\def\secentry#1#2#3#4{\dosecentry{#2.#3\labelspace#1}{#4}} -\def\unnumbsecentry#1#2{\dosecentry{#1}{#2}} - -% Subsections. -\def\subsecentry#1#2#3#4#5{\dosubsecentry{#2.#3.#4\labelspace#1}{#5}} -\def\unnumbsubsecentry#1#2{\dosubsecentry{#1}{#2}} - -% And subsubsections. -\def\subsubsecentry#1#2#3#4#5#6{% - \dosubsubsecentry{#2.#3.#4.#5\labelspace#1}{#6}} -\def\unnumbsubsubsecentry#1#2{\dosubsubsecentry{#1}{#2}} - -% This parameter controls the indentation of the various levels. -\newdimen\tocindent \tocindent = 3pc - -% Now for the actual typesetting. In all these, #1 is the text and #2 is the -% page number. -% -% If the toc has to be broken over pages, we want it to be at chapters -% if at all possible; hence the \penalty. -\def\dochapentry#1#2{% - \penalty-300 \vskip1\baselineskip plus.33\baselineskip minus.25\baselineskip - \begingroup - \chapentryfonts - \tocentry{#1}{\dopageno\bgroup#2\egroup}% - \endgroup - \nobreak\vskip .25\baselineskip plus.1\baselineskip -} - -\def\dosecentry#1#2{\begingroup - \secentryfonts \leftskip=\tocindent - \tocentry{#1}{\dopageno\bgroup#2\egroup}% -\endgroup} - -\def\dosubsecentry#1#2{\begingroup - \subsecentryfonts \leftskip=2\tocindent - \tocentry{#1}{\dopageno\bgroup#2\egroup}% -\endgroup} - -\def\dosubsubsecentry#1#2{\begingroup - \subsubsecentryfonts \leftskip=3\tocindent - \tocentry{#1}{\dopageno\bgroup#2\egroup}% -\endgroup} - -% Final typesetting of a toc entry; we use the same \entry macro as for -% the index entries, but we want to suppress hyphenation here. (We -% can't do that in the \entry macro, since index entries might consist -% of hyphenated-identifiers-that-do-not-fit-on-a-line-and-nothing-else.) -\def\tocentry#1#2{\begingroup - \vskip 0pt plus1pt % allow a little stretch for the sake of nice page breaks - % Do not use \turnoffactive in these arguments. Since the toc is - % typeset in cmr, so characters such as _ would come out wrong; we - % have to do the usual translation tricks. - \entry{#1}{#2}% -\endgroup} - -% Space between chapter (or whatever) number and the title. -\def\labelspace{\hskip1em \relax} - -\def\dopageno#1{{\rm #1}} -\def\doshortpageno#1{{\rm #1}} - -\def\chapentryfonts{\secfonts \rm} -\def\secentryfonts{\textfonts} -\let\subsecentryfonts = \textfonts -\let\subsubsecentryfonts = \textfonts - - -\message{environments,} -% @foo ... @end foo. - -% Since these characters are used in examples, it should be an even number of -% \tt widths. Each \tt character is 1en, so two makes it 1em. -% Furthermore, these definitions must come after we define our fonts. -\newbox\dblarrowbox \newbox\longdblarrowbox -\newbox\pushcharbox \newbox\bullbox -\newbox\equivbox \newbox\errorbox - -%{\tentt -%\global\setbox\dblarrowbox = \hbox to 1em{\hfil$\Rightarrow$\hfil} -%\global\setbox\longdblarrowbox = \hbox to 1em{\hfil$\mapsto$\hfil} -%\global\setbox\pushcharbox = \hbox to 1em{\hfil$\dashv$\hfil} -%\global\setbox\equivbox = \hbox to 1em{\hfil$\ptexequiv$\hfil} -% Adapted from the manmac format (p.420 of TeXbook) -%\global\setbox\bullbox = \hbox to 1em{\kern.15em\vrule height .75ex width .85ex -% depth .1ex\hfil} -%} - -% @point{}, @result{}, @expansion{}, @print{}, @equiv{}. -\def\point{$\star$} -\def\result{\leavevmode\raise.15ex\hbox to 1em{\hfil$\Rightarrow$\hfil}} -\def\expansion{\leavevmode\raise.1ex\hbox to 1em{\hfil$\mapsto$\hfil}} -\def\print{\leavevmode\lower.1ex\hbox to 1em{\hfil$\dashv$\hfil}} -\def\equiv{\leavevmode\lower.1ex\hbox to 1em{\hfil$\ptexequiv$\hfil}} - -% Adapted from the TeXbook's \boxit. -{\tentt \global\dimen0 = 3em}% Width of the box. -\dimen2 = .55pt % Thickness of rules -% The text. (`r' is open on the right, `e' somewhat less so on the left.) -\setbox0 = \hbox{\kern-.75pt \tensf error\kern-1.5pt} - -\global\setbox\errorbox=\hbox to \dimen0{\hfil - \hsize = \dimen0 \advance\hsize by -5.8pt % Space to left+right. - \advance\hsize by -2\dimen2 % Rules. - \vbox{ - \hrule height\dimen2 - \hbox{\vrule width\dimen2 \kern3pt % Space to left of text. - \vtop{\kern2.4pt \box0 \kern2.4pt}% Space above/below. - \kern3pt\vrule width\dimen2}% Space to right. - \hrule height\dimen2} - \hfil} - -% The @error{} command. -\def\error{\leavevmode\lower.7ex\copy\errorbox} - -% @tex ... @end tex escapes into raw Tex temporarily. -% One exception: @ is still an escape character, so that @end tex works. -% But \@ or @@ will get a plain tex @ character. - -\def\tex{\begingroup - \catcode `\\=0 \catcode `\{=1 \catcode `\}=2 - \catcode `\$=3 \catcode `\&=4 \catcode `\#=6 - \catcode `\^=7 \catcode `\_=8 \catcode `\~=13 \let~=\tie - \catcode `\%=14 - \catcode 43=12 % plus - \catcode`\"=12 - \catcode`\==12 - \catcode`\|=12 - \catcode`\<=12 - \catcode`\>=12 - \escapechar=`\\ - % - \let\b=\ptexb - \let\bullet=\ptexbullet - \let\c=\ptexc - \let\,=\ptexcomma - \let\.=\ptexdot - \let\dots=\ptexdots - \let\equiv=\ptexequiv - \let\!=\ptexexclam - \let\i=\ptexi - \let\{=\ptexlbrace - \let\+=\tabalign - \let\}=\ptexrbrace - \let\*=\ptexstar - \let\t=\ptext - % - \def\endldots{\mathinner{\ldots\ldots\ldots\ldots}}% - \def\enddots{\relax\ifmmode\endldots\else$\mathsurround=0pt \endldots\,$\fi}% - \def\@{@}% -\let\Etex=\endgroup} - -% Define @lisp ... @endlisp. -% @lisp does a \begingroup so it can rebind things, -% including the definition of @endlisp (which normally is erroneous). - -% Amount to narrow the margins by for @lisp. -\newskip\lispnarrowing \lispnarrowing=0.4in - -% This is the definition that ^^M gets inside @lisp, @example, and other -% such environments. \null is better than a space, since it doesn't -% have any width. -\def\lisppar{\null\endgraf} - -% Make each space character in the input produce a normal interword -% space in the output. Don't allow a line break at this space, as this -% is used only in environments like @example, where each line of input -% should produce a line of output anyway. -% -{\obeyspaces % -\gdef\sepspaces{\obeyspaces\let =\tie}} - -% Define \obeyedspace to be our active space, whatever it is. This is -% for use in \parsearg. -{\sepspaces% -\global\let\obeyedspace= } - -% This space is always present above and below environments. -\newskip\envskipamount \envskipamount = 0pt - -% Make spacing and below environment symmetrical. We use \parskip here -% to help in doing that, since in @example-like environments \parskip -% is reset to zero; thus the \afterenvbreak inserts no space -- but the -% start of the next paragraph will insert \parskip -% -\def\aboveenvbreak{{\advance\envskipamount by \parskip -\endgraf \ifdim\lastskip<\envskipamount -\removelastskip \penalty-50 \vskip\envskipamount \fi}} - -\let\afterenvbreak = \aboveenvbreak - -% \nonarrowing is a flag. If "set", @lisp etc don't narrow margins. -\let\nonarrowing=\relax - -% @cartouche ... @end cartouche: draw rectangle w/rounded corners around -% environment contents. -\font\circle=lcircle10 -\newdimen\circthick -\newdimen\cartouter\newdimen\cartinner -\newskip\normbskip\newskip\normpskip\newskip\normlskip -\circthick=\fontdimen8\circle -% -\def\ctl{{\circle\char'013\hskip -6pt}}% 6pt from pl file: 1/2charwidth -\def\ctr{{\hskip 6pt\circle\char'010}} -\def\cbl{{\circle\char'012\hskip -6pt}} -\def\cbr{{\hskip 6pt\circle\char'011}} -\def\carttop{\hbox to \cartouter{\hskip\lskip - \ctl\leaders\hrule height\circthick\hfil\ctr - \hskip\rskip}} -\def\cartbot{\hbox to \cartouter{\hskip\lskip - \cbl\leaders\hrule height\circthick\hfil\cbr - \hskip\rskip}} -% -\newskip\lskip\newskip\rskip - -\long\def\cartouche{% -\begingroup - \lskip=\leftskip \rskip=\rightskip - \leftskip=0pt\rightskip=0pt %we want these *outside*. - \cartinner=\hsize \advance\cartinner by-\lskip - \advance\cartinner by-\rskip - \cartouter=\hsize - \advance\cartouter by 18.4pt % allow for 3pt kerns on either -% side, and for 6pt waste from -% each corner char, and rule thickness - \normbskip=\baselineskip \normpskip=\parskip \normlskip=\lineskip - % Flag to tell @lisp, etc., not to narrow margin. - \let\nonarrowing=\comment - \vbox\bgroup - \baselineskip=0pt\parskip=0pt\lineskip=0pt - \carttop - \hbox\bgroup - \hskip\lskip - \vrule\kern3pt - \vbox\bgroup - \hsize=\cartinner - \kern3pt - \begingroup - \baselineskip=\normbskip - \lineskip=\normlskip - \parskip=\normpskip - \vskip -\parskip -\def\Ecartouche{% - \endgroup - \kern3pt - \egroup - \kern3pt\vrule - \hskip\rskip - \egroup - \cartbot - \egroup -\endgroup -}} - - -% This macro is called at the beginning of all the @example variants, -% inside a group. -\def\nonfillstart{% - \aboveenvbreak - \inENV % This group ends at the end of the body - \hfuzz = 12pt % Don't be fussy - \sepspaces % Make spaces be word-separators rather than space tokens. - \singlespace - \let\par = \lisppar % don't ignore blank lines - \obeylines % each line of input is a line of output - \parskip = 0pt - \parindent = 0pt - \emergencystretch = 0pt % don't try to avoid overfull boxes - % @cartouche defines \nonarrowing to inhibit narrowing - % at next level down. - \ifx\nonarrowing\relax - \advance \leftskip by \lispnarrowing - \exdentamount=\lispnarrowing - \let\exdent=\nofillexdent - \let\nonarrowing=\relax - \fi -} - -% Define the \E... control sequence only if we are inside the particular -% environment, so the error checking in \end will work. -% -% To end an @example-like environment, we first end the paragraph (via -% \afterenvbreak's vertical glue), and then the group. That way we keep -% the zero \parskip that the environments set -- \parskip glue will be -% inserted at the beginning of the next paragraph in the document, after -% the environment. -% -\def\nonfillfinish{\afterenvbreak\endgroup} - -% @lisp: indented, narrowed, typewriter font. -\def\lisp{\begingroup - \nonfillstart - \let\Elisp = \nonfillfinish - \tt - \let\kbdfont = \kbdexamplefont % Allow @kbd to do something special. - \gobble % eat return -} - -% @example: Same as @lisp. -\def\example{\begingroup \def\Eexample{\nonfillfinish\endgroup}\lisp} - -% @small... is usually equivalent to the non-small (@smallbook -% redefines). We must call \example (or whatever) last in the -% definition, since it reads the return following the @example (or -% whatever) command. -% -% This actually allows (for example) @end display inside an -% @smalldisplay. Too bad, but makeinfo will catch the error anyway. -% -\def\smalldisplay{\begingroup\def\Esmalldisplay{\nonfillfinish\endgroup}\display} -\def\smallexample{\begingroup\def\Esmallexample{\nonfillfinish\endgroup}\lisp} -\def\smallformat{\begingroup\def\Esmallformat{\nonfillfinish\endgroup}\format} -\def\smalllisp{\begingroup\def\Esmalllisp{\nonfillfinish\endgroup}\lisp} - -% Real @smallexample and @smalllisp (when @smallbook): use smaller fonts. -% Originally contributed by Pavel@xerox. -\def\smalllispx{\begingroup - \def\Esmalllisp{\nonfillfinish\endgroup}% - \def\Esmallexample{\nonfillfinish\endgroup}% - \smallfonts - \lisp -} - -% @display: same as @lisp except keep current font. -% -\def\display{\begingroup - \nonfillstart - \let\Edisplay = \nonfillfinish - \gobble -} - -% @smalldisplay (when @smallbook): @display plus smaller fonts. -% -\def\smalldisplayx{\begingroup - \def\Esmalldisplay{\nonfillfinish\endgroup}% - \smallfonts \rm - \display -} - -% @format: same as @display except don't narrow margins. -% -\def\format{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eformat = \nonfillfinish - \gobble -} - -% @smallformat (when @smallbook): @format plus smaller fonts. -% -\def\smallformatx{\begingroup - \def\Esmallformat{\nonfillfinish\endgroup}% - \smallfonts \rm - \format -} - -% @flushleft (same as @format). -% -\def\flushleft{\begingroup \def\Eflushleft{\nonfillfinish\endgroup}\format} - -% @flushright. -% -\def\flushright{\begingroup - \let\nonarrowing = t - \nonfillstart - \let\Eflushright = \nonfillfinish - \advance\leftskip by 0pt plus 1fill - \gobble -} - - -% @quotation does normal linebreaking (hence we can't use \nonfillstart) -% and narrows the margins. -% -\def\quotation{% - \begingroup\inENV %This group ends at the end of the @quotation body - {\parskip=0pt \aboveenvbreak}% because \aboveenvbreak inserts \parskip - \singlespace - \parindent=0pt - % We have retained a nonzero parskip for the environment, since we're - % doing normal filling. So to avoid extra space below the environment... - \def\Equotation{\parskip = 0pt \nonfillfinish}% - % - % @cartouche defines \nonarrowing to inhibit narrowing at next level down. - \ifx\nonarrowing\relax - \advance\leftskip by \lispnarrowing - \advance\rightskip by \lispnarrowing - \exdentamount = \lispnarrowing - \let\nonarrowing = \relax - \fi -} - - -% LaTeX-like @verbatim...@end verbatim and @verb{...} -% If we want to allow any as delimiter, -% we need the curly braces so that makeinfo sees the @verb command, eg: -% `@verbx...x' would look like the '@verbx' command. --janneke@gnu.org -% -% [Knuth]: Donald Ervin Knuth, 1996. The TeXbook. -% -% [Knuth] p. 344; only we need to do '@' too -\def\dospecials{% - \do\ \do\\\do\@\do\{\do\}\do\$\do\&% - \do\#\do\^\do\^^K\do\_\do\^^A\do\%\do\~} -% -% [Knuth] p. 380 -\def\uncatcodespecials{% - \def\do##1{\catcode`##1=12}\dospecials} -% -% [Knuth] pp. 380,381,391 -% Disable Spanish ligatures ?` and !` of \tt font -\begingroup - \catcode`\`=\active\gdef`{\relax\lq} -\endgroup -% -% Setup for the @verb command. -% -% Eight spaces for a tab -\begingroup - \catcode`\^^I=\active - \gdef\tabeightspaces{\catcode`\^^I=\active\def^^I{\ \ \ \ \ \ \ \ }} -\endgroup -% -\def\setupverb{% - \tt % easiest (and conventionally used) font for verbatim - \def\par{\leavevmode\endgraf}% - \catcode`\`=\active - \tabeightspaces - % Respect line breaks, - % print special symbols as themselves, and - % make each space count - % must do in this order: - \obeylines \uncatcodespecials \sepspaces -} - -% Setup for the @verbatim environment -% -% Real tab expansion -\newdimen\tabw \setbox0=\hbox{\tt\space} \tabw=8\wd0 % tab amount -% -\def\starttabbox{\setbox0=\hbox\bgroup} -\begingroup - \catcode`\^^I=\active - \gdef\tabexpand{% - \catcode`\^^I=\active - \def^^I{\leavevmode\egroup - \dimen0=\wd0 % the width so far, or since the previous tab - \divide\dimen0 by\tabw - \multiply\dimen0 by\tabw % compute previous multiple of \tabw - \advance\dimen0 by\tabw % advance to next multiple of \tabw - \wd0=\dimen0 \box0 \starttabbox - }% - } -\endgroup -\def\setupverbatim{% - % Easiest (and conventionally used) font for verbatim - \tt - \def\par{\leavevmode\egroup\box0\endgraf}% - \catcode`\`=\active - \tabexpand - % Respect line breaks, - % print special symbols as themselves, and - % make each space count - % must do in this order: - \obeylines \uncatcodespecials \sepspaces - \everypar{\starttabbox}% -} - -% Do the @verb magic: verbatim text is quoted by unique -% delimiter characters. Before first delimiter expect a -% right brace, after last delimiter expect closing brace: -% -% \def\doverb'{'#1'}'{#1} -% -% [Knuth] p. 382; only eat outer {} -\begingroup - \catcode`[=1\catcode`]=2\catcode`\{=12\catcode`\}=12 - \gdef\doverb{#1[\def\next##1#1}[##1\endgroup]\next] -\endgroup -% -\def\verb{\begingroup\setupverb\doverb} -% -% -% Do the @verbatim magic: define the macro \doverbatim so that -% the (first) argument ends when '@end verbatim' is reached, ie: -% -% \def\doverbatim#1@end verbatim{#1} -% -% For Texinfo it's a lot easier than for LaTeX, -% because texinfo's \verbatim doesn't stop at '\end{verbatim}': -% we need not redefine '\', '{' and '}' -% -% Inspired by LaTeX's verbatim command set [latex.ltx] -%% Include LaTeX hack for completeness -- never know -%% \begingroup -%% \catcode`|=0 \catcode`[=1 -%% \catcode`]=2\catcode`\{=12\catcode`\}=12\catcode`\ =\active -%% \catcode`\\=12|gdef|doverbatim#1@end verbatim[ -%% #1|endgroup|def|Everbatim[]|end[verbatim]] -%% |endgroup -\begingroup - \catcode`\ =\active - \gdef\doverbatim#1@end verbatim{#1\end{verbatim}} -\endgroup -% -\def\verbatim{% - \def\Everbatim{\nonfillfinish\endgroup}% - \begingroup - \nonfillstart - \advance\leftskip by -\defbodyindent - \begingroup\setupverbatim\doverbatim -} - -% @verbatiminclude FILE - insert text of file in verbatim environment. -% -% Allow normal characters that we make active in the argument (a file name). -\def\verbatiminclude{% - \begingroup - \catcode`\\=12 - \catcode`~=12 - \catcode`^=12 - \catcode`_=12 - \catcode`|=12 - \catcode`<=12 - \catcode`>=12 - \catcode`+=12 - \parsearg\doverbatiminclude -} -\def\setupverbatiminclude{% - \begingroup - \nonfillstart - \advance\leftskip by -\defbodyindent - \begingroup\setupverbatim -} -% -\def\doverbatiminclude#1{% - % Restore active chars for included file. - \endgroup - \begingroup - \def\thisfile{#1}% - \expandafter\expandafter\setupverbatiminclude\input\thisfile - \endgroup\nonfillfinish\endgroup -} - - -\message{defuns,} -% @defun etc. - -% Allow user to change definition object font (\df) internally -\def\setdeffont #1 {\csname DEF#1\endcsname} - -\newskip\defbodyindent \defbodyindent=.4in -\newskip\defargsindent \defargsindent=50pt -\newskip\deftypemargin \deftypemargin=12pt -\newskip\deflastargmargin \deflastargmargin=18pt - -\newcount\parencount -% define \functionparens, which makes ( and ) and & do special things. -% \functionparens affects the group it is contained in. -\def\activeparens{% -\catcode`\(=\active \catcode`\)=\active \catcode`\&=\active -\catcode`\[=\active \catcode`\]=\active} - -% Make control sequences which act like normal parenthesis chars. -\let\lparen = ( \let\rparen = ) - -{\activeparens % Now, smart parens don't turn on until &foo (see \amprm) - -% Be sure that we always have a definition for `(', etc. For example, -% if the fn name has parens in it, \boldbrax will not be in effect yet, -% so TeX would otherwise complain about undefined control sequence. -\global\let(=\lparen \global\let)=\rparen -\global\let[=\lbrack \global\let]=\rbrack - -\gdef\functionparens{\boldbrax\let&=\amprm\parencount=0 } -\gdef\boldbrax{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb} -% This is used to turn on special parens -% but make & act ordinary (given that it's active). -\gdef\boldbraxnoamp{\let(=\opnr\let)=\clnr\let[=\lbrb\let]=\rbrb\let&=\ampnr} - -% Definitions of (, ) and & used in args for functions. -% This is the definition of ( outside of all parentheses. -\gdef\oprm#1 {{\rm\char`\(}#1 \bf \let(=\opnested - \global\advance\parencount by 1 -} -% -% This is the definition of ( when already inside a level of parens. -\gdef\opnested{\char`\(\global\advance\parencount by 1 } -% -\gdef\clrm{% Print a paren in roman if it is taking us back to depth of 0. - % also in that case restore the outer-level definition of (. - \ifnum \parencount=1 {\rm \char `\)}\sl \let(=\oprm \else \char `\) \fi - \global\advance \parencount by -1 } -% If we encounter &foo, then turn on ()-hacking afterwards -\gdef\amprm#1 {{\rm\}\let(=\oprm \let)=\clrm\ } -% -\gdef\normalparens{\boldbrax\let&=\ampnr} -} % End of definition inside \activeparens -%% These parens (in \boldbrax) actually are a little bolder than the -%% contained text. This is especially needed for [ and ] -\def\opnr{{\sf\char`\(}\global\advance\parencount by 1 } -\def\clnr{{\sf\char`\)}\global\advance\parencount by -1 } -\let\ampnr = \& -\def\lbrb{{\bf\char`\[}} -\def\rbrb{{\bf\char`\]}} - -% Active &'s sneak into the index arguments, so make sure it's defined. -{ - \catcode`& = 13 - \global\let& = \ampnr -} - -% First, defname, which formats the header line itself. -% #1 should be the function name. -% #2 should be the type of definition, such as "Function". - -\def\defname #1#2{% -% Get the values of \leftskip and \rightskip as they were -% outside the @def... -\dimen2=\leftskip -\advance\dimen2 by -\defbodyindent -\noindent -\setbox0=\hbox{\hskip \deflastargmargin{\rm #2}\hskip \deftypemargin}% -\dimen0=\hsize \advance \dimen0 by -\wd0 % compute size for first line -\dimen1=\hsize \advance \dimen1 by -\defargsindent %size for continuations -\parshape 2 0in \dimen0 \defargsindent \dimen1 -% Now output arg 2 ("Function" or some such) -% ending at \deftypemargin from the right margin, -% but stuck inside a box of width 0 so it does not interfere with linebreaking -{% Adjust \hsize to exclude the ambient margins, -% so that \rightline will obey them. -\advance \hsize by -\dimen2 -\rlap{\rightline{{\rm #2}\hskip -1.25pc }}}% -% Make all lines underfull and no complaints: -\tolerance=10000 \hbadness=10000 -\advance\leftskip by -\defbodyindent -\exdentamount=\defbodyindent -{\df #1}\enskip % Generate function name -} - -% Actually process the body of a definition -% #1 should be the terminating control sequence, such as \Edefun. -% #2 should be the "another name" control sequence, such as \defunx. -% #3 should be the control sequence that actually processes the header, -% such as \defunheader. - -\def\defparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\activeparens\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % 61 is `=' -\obeylines\activeparens\spacesplit#3} - -% #1 is the \E... control sequence to end the definition (which we define). -% #2 is the \...x control sequence for consecutive fns (which we define). -% #3 is the control sequence to call to resume processing. -% #4, delimited by the space, is the class name. -% -\def\defmethparsebody#1#2#3#4 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 {\begingroup\obeylines\activeparens\spacesplit{#3{##1}}}% -\parindent=0in -\advance\leftskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#4}}} - -% Used for @deftypemethod and @deftypeivar. -% #1 is the \E... control sequence to end the definition (which we define). -% #2 is the \...x control sequence for consecutive fns (which we define). -% #3 is the control sequence to call to resume processing. -% #4, delimited by a space, is the class name. -% #5 is the method's return type. -% -\def\deftypemethparsebody#1#2#3#4 #5 {\begingroup\inENV - \medbreak - \def#1{\endgraf\endgroup\medbreak}% - \def#2##1 ##2 {\begingroup\obeylines\activeparens\spacesplit{#3{##1}{##2}}}% - \parindent=0in - \advance\leftskip by \defbodyindent - \exdentamount=\defbodyindent - \begingroup\obeylines\activeparens\spacesplit{#3{#4}{#5}}} - -% Used for @deftypeop. The change from \deftypemethparsebody is an -% extra argument at the beginning which is the `category', instead of it -% being the hardwired string `Method' or `Instance Variable'. We have -% to account for this both in the \...x definition and in parsing the -% input at hand. Thus also need a control sequence (passed as #5) for -% the \E... definition to assign the category name to. -% -\def\deftypeopparsebody#1#2#3#4#5 #6 {\begingroup\inENV - \medbreak - \def#1{\endgraf\endgroup\medbreak}% - \def#2##1 ##2 ##3 {% - \def#4{##1}% - \begingroup\obeylines\activeparens\spacesplit{#3{##2}{##3}}}% - \parindent=0in - \advance\leftskip by \defbodyindent - \exdentamount=\defbodyindent - \begingroup\obeylines\activeparens\spacesplit{#3{#5}{#6}}} - -\def\defopparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\activeparens\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\activeparens\spacesplit{#3{#5}}} - -% These parsing functions are similar to the preceding ones -% except that they do not make parens into active characters. -% These are used for "variables" since they have no arguments. - -\def\defvarparsebody #1#2#3{\begingroup\inENV% Environment for definitionbody -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2{\begingroup\obeylines\spacesplit#3}% -\parindent=0in -\advance\leftskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup % -\catcode 61=\active % -\obeylines\spacesplit#3} - -% This is used for \def{tp,vr}parsebody. It could probably be used for -% some of the others, too, with some judicious conditionals. -% -\def\parsebodycommon#1#2#3{% - \begingroup\inENV % - \medbreak % - % Define the end token that this defining construct specifies - % so that it will exit this group. - \def#1{\endgraf\endgroup\medbreak}% - \def#2##1 {\begingroup\obeylines\spacesplit{#3{##1}}}% - \parindent=0in - \advance\leftskip by \defbodyindent - \exdentamount=\defbodyindent - \begingroup\obeylines -} - -\def\defvrparsebody#1#2#3#4 {% - \parsebodycommon{#1}{#2}{#3}% - \spacesplit{#3{#4}}% -} - -% This loses on `@deftp {Data Type} {struct termios}' -- it thinks the -% type is just `struct', because we lose the braces in `{struct -% termios}' when \spacesplit reads its undelimited argument. Sigh. -% \let\deftpparsebody=\defvrparsebody -% -% So, to get around this, we put \empty in with the type name. That -% way, TeX won't find exactly `{...}' as an undelimited argument, and -% won't strip off the braces. -% -\def\deftpparsebody #1#2#3#4 {% - \parsebodycommon{#1}{#2}{#3}% - \spacesplit{\parsetpheaderline{#3{#4}}}\empty -} - -% Fine, but then we have to eventually remove the \empty *and* the -% braces (if any). That's what this does. -% -\def\removeemptybraces\empty#1\relax{#1} - -% After \spacesplit has done its work, this is called -- #1 is the final -% thing to call, #2 the type name (which starts with \empty), and #3 -% (which might be empty) the arguments. -% -\def\parsetpheaderline#1#2#3{% - #1{\removeemptybraces#2\relax}{#3}% -}% - -\def\defopvarparsebody #1#2#3#4#5 {\begingroup\inENV % -\medbreak % -% Define the end token that this defining construct specifies -% so that it will exit this group. -\def#1{\endgraf\endgroup\medbreak}% -\def#2##1 ##2 {\def#4{##1}% -\begingroup\obeylines\spacesplit{#3{##2}}}% -\parindent=0in -\advance\leftskip by \defbodyindent -\exdentamount=\defbodyindent -\begingroup\obeylines\spacesplit{#3{#5}}} - -% Split up #2 at the first space token. -% call #1 with two arguments: -% the first is all of #2 before the space token, -% the second is all of #2 after that space token. -% If #2 contains no space token, all of it is passed as the first arg -% and the second is passed as empty. - -{\obeylines -\gdef\spacesplit#1#2^^M{\endgroup\spacesplitfoo{#1}#2 \relax\spacesplitfoo}% -\long\gdef\spacesplitfoo#1#2 #3#4\spacesplitfoo{% -\ifx\relax #3% -#1{#2}{}\else #1{#2}{#3#4}\fi}} - -% So much for the things common to all kinds of definitions. - -% Define @defun. - -% First, define the processing that is wanted for arguments of \defun -% Use this to expand the args and terminate the paragraph they make up - -\def\defunargs#1{\functionparens \sl -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -% Set the font temporarily and use \font in case \setfont made \tensl a macro. -{\tensl\hyphenchar\font=0}% -#1% -{\tensl\hyphenchar\font=45}% -\ifnum\parencount=0 \else \errmessage{Unbalanced parentheses in @def}\fi% -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\nobreak\vskip -\parskip\nobreak -} - -\def\deftypefunargs #1{% -% Expand, preventing hyphenation at `-' chars. -% Note that groups don't affect changes in \hyphenchar. -% Use \boldbraxnoamp, not \functionparens, so that & is not special. -\boldbraxnoamp -\tclose{#1}% avoid \code because of side effects on active chars -\interlinepenalty=10000 -\advance\rightskip by 0pt plus 1fil -\endgraf\nobreak\vskip -\parskip\nobreak -} - -% Do complete processing of one @defun or @defunx line already parsed. - -% @deffn Command forward-char nchars - -\def\deffn{\defmethparsebody\Edeffn\deffnx\deffnheader} - -\def\deffnheader #1#2#3{\doind {fn}{\code{#2}}% -\begingroup\defname {#2}{#1}\defunargs{#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defun == @deffn Function - -\def\defun{\defparsebody\Edefun\defunx\defunheader} - -\def\defunheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{\putwordDeffunc}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefun int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefun{\defparsebody\Edeftypefun\deftypefunx\deftypefunheader} - -% #1 is the data type. #2 is the name and args. -\def\deftypefunheader #1#2{\deftypefunheaderx{#1}#2 \relax} -% #1 is the data type, #2 the name, #3 the args. -\def\deftypefunheaderx #1#2 #3\relax{% -\doind {fn}{\code{#2}}% Make entry in function index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{\putwordDeftypefun}% -\deftypefunargs {#3}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @deftypefn {Library Function} int foobar (int @var{foo}, float @var{bar}) - -\def\deftypefn{\defmethparsebody\Edeftypefn\deftypefnx\deftypefnheader} - -% \defheaderxcond#1\relax$$$ -% puts #1 in @code, followed by a space, but does nothing if #1 is null. -\def\defheaderxcond#1#2$$${\ifx#1\relax\else\code{#1#2} \fi} - -% #1 is the classification. #2 is the data type. #3 is the name and args. -\def\deftypefnheader #1#2#3{\deftypefnheaderx{#1}{#2}#3 \relax} -% #1 is the classification, #2 the data type, #3 the name, #4 the args. -\def\deftypefnheaderx #1#2#3 #4\relax{% -\doind {fn}{\code{#3}}% Make entry in function index -\begingroup -\normalparens % notably, turn off `&' magic, which prevents -% at least some C++ text from working -\defname {\defheaderxcond#2\relax$$$#3}{#1}% -\deftypefunargs {#4}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defmac == @deffn Macro - -\def\defmac{\defparsebody\Edefmac\defmacx\defmacheader} - -\def\defmacheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{\putwordDefmac}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defspec == @deffn Special Form - -\def\defspec{\defparsebody\Edefspec\defspecx\defspecheader} - -\def\defspecheader #1#2{\doind {fn}{\code{#1}}% Make entry in function index -\begingroup\defname {#1}{\putwordDefspec}% -\defunargs {#2}\endgroup % -\catcode 61=\other % Turn off change made in \defparsebody -} - -% @defop CATEGORY CLASS OPERATION ARG... -% -\def\defop #1 {\def\defoptype{#1}% -\defopparsebody\Edefop\defopx\defopheader\defoptype} -% -\def\defopheader#1#2#3{% -\dosubind {fn}{\code{#2}}{\putwordon\ #1}% Make entry in function index -\begingroup\defname {#2}{\defoptype\ \putwordon\ #1}% -\defunargs {#3}\endgroup % -} - -% @deftypeop CATEGORY CLASS TYPE OPERATION ARG... -% -\def\deftypeop #1 {\def\deftypeopcategory{#1}% - \deftypeopparsebody\Edeftypeop\deftypeopx\deftypeopheader - \deftypeopcategory} -% -% #1 is the class name, #2 the data type, #3 the operation name, #4 the args. -\def\deftypeopheader#1#2#3#4{% - \dosubind{fn}{\code{#3}}{\putwordon\ \code{#1}}% entry in function index - \begingroup - \defname{\defheaderxcond#2\relax$$$#3} - {\deftypeopcategory\ \putwordon\ \code{#1}}% - \deftypefunargs{#4}% - \endgroup -} - -% @deftypemethod CLASS TYPE METHOD ARG... -% -\def\deftypemethod{% - \deftypemethparsebody\Edeftypemethod\deftypemethodx\deftypemethodheader} -% -% #1 is the class name, #2 the data type, #3 the method name, #4 the args. -\def\deftypemethodheader#1#2#3#4{% - \dosubind{fn}{\code{#3}}{\putwordon\ \code{#1}}% entry in function index - \begingroup - \defname{\defheaderxcond#2\relax$$$#3}{\putwordMethodon\ \code{#1}}% - \deftypefunargs{#4}% - \endgroup -} - -% @deftypeivar CLASS TYPE VARNAME -% -\def\deftypeivar{% - \deftypemethparsebody\Edeftypeivar\deftypeivarx\deftypeivarheader} -% -% #1 is the class name, #2 the data type, #3 the variable name. -\def\deftypeivarheader#1#2#3{% - \dosubind{vr}{\code{#3}}{\putwordof\ \code{#1}}% entry in variable index - \begingroup - \defname{\defheaderxcond#2\relax$$$#3} - {\putwordInstanceVariableof\ \code{#1}}% - \defvarargs{#3}% - \endgroup -} - -% @defmethod == @defop Method -% -\def\defmethod{\defmethparsebody\Edefmethod\defmethodx\defmethodheader} -% -% #1 is the class name, #2 the method name, #3 the args. -\def\defmethodheader#1#2#3{% - \dosubind{fn}{\code{#2}}{\putwordon\ \code{#1}}% entry in function index - \begingroup - \defname{#2}{\putwordMethodon\ \code{#1}}% - \defunargs{#3}% - \endgroup -} - -% @defcv {Class Option} foo-class foo-flag - -\def\defcv #1 {\def\defcvtype{#1}% -\defopvarparsebody\Edefcv\defcvx\defcvarheader\defcvtype} - -\def\defcvarheader #1#2#3{% -\dosubind {vr}{\code{#2}}{\putwordof\ #1}% Make entry in var index -\begingroup\defname {#2}{\defcvtype\ \putwordof\ #1}% -\defvarargs {#3}\endgroup % -} - -% @defivar CLASS VARNAME == @defcv {Instance Variable} CLASS VARNAME -% -\def\defivar{\defvrparsebody\Edefivar\defivarx\defivarheader} -% -\def\defivarheader#1#2#3{% - \dosubind {vr}{\code{#2}}{\putwordof\ #1}% entry in var index - \begingroup - \defname{#2}{\putwordInstanceVariableof\ #1}% - \defvarargs{#3}% - \endgroup -} - -% @defvar -% First, define the processing that is wanted for arguments of @defvar. -% This is actually simple: just print them in roman. -% This must expand the args and terminate the paragraph they make up -\def\defvarargs #1{\normalparens #1% -\interlinepenalty=10000 -\endgraf\nobreak\vskip -\parskip\nobreak} - -% @defvr Counter foo-count - -\def\defvr{\defvrparsebody\Edefvr\defvrx\defvrheader} - -\def\defvrheader #1#2#3{\doind {vr}{\code{#2}}% -\begingroup\defname {#2}{#1}\defvarargs{#3}\endgroup} - -% @defvar == @defvr Variable - -\def\defvar{\defvarparsebody\Edefvar\defvarx\defvarheader} - -\def\defvarheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{\putwordDefvar}% -\defvarargs {#2}\endgroup % -} - -% @defopt == @defvr {User Option} - -\def\defopt{\defvarparsebody\Edefopt\defoptx\defoptheader} - -\def\defoptheader #1#2{\doind {vr}{\code{#1}}% Make entry in var index -\begingroup\defname {#1}{\putwordDefopt}% -\defvarargs {#2}\endgroup % -} - -% @deftypevar int foobar - -\def\deftypevar{\defvarparsebody\Edeftypevar\deftypevarx\deftypevarheader} - -% #1 is the data type. #2 is the name, perhaps followed by text that -% is actually part of the data type, which should not be put into the index. -\def\deftypevarheader #1#2{% -\dovarind#2 \relax% Make entry in variables index -\begingroup\defname {\defheaderxcond#1\relax$$$#2}{\putwordDeftypevar}% -\interlinepenalty=10000 -\endgraf\nobreak\vskip -\parskip\nobreak -\endgroup} -\def\dovarind#1 #2\relax{\doind{vr}{\code{#1}}} - -% @deftypevr {Global Flag} int enable - -\def\deftypevr{\defvrparsebody\Edeftypevr\deftypevrx\deftypevrheader} - -\def\deftypevrheader #1#2#3{\dovarind#3 \relax% -\begingroup\defname {\defheaderxcond#2\relax$$$#3}{#1} -\interlinepenalty=10000 -\endgraf\nobreak\vskip -\parskip\nobreak -\endgroup} - -% Now define @deftp -% Args are printed in bold, a slight difference from @defvar. - -\def\deftpargs #1{\bf \defvarargs{#1}} - -% @deftp Class window height width ... - -\def\deftp{\deftpparsebody\Edeftp\deftpx\deftpheader} - -\def\deftpheader #1#2#3{\doind {tp}{\code{#2}}% -\begingroup\defname {#2}{#1}\deftpargs{#3}\endgroup} - -% These definitions are used if you use @defunx (etc.) -% anywhere other than immediately after a @defun or @defunx. -% -\def\defcvx#1 {\errmessage{@defcvx in invalid context}} -\def\deffnx#1 {\errmessage{@deffnx in invalid context}} -\def\defivarx#1 {\errmessage{@defivarx in invalid context}} -\def\defmacx#1 {\errmessage{@defmacx in invalid context}} -\def\defmethodx#1 {\errmessage{@defmethodx in invalid context}} -\def\defoptx #1 {\errmessage{@defoptx in invalid context}} -\def\defopx#1 {\errmessage{@defopx in invalid context}} -\def\defspecx#1 {\errmessage{@defspecx in invalid context}} -\def\deftpx#1 {\errmessage{@deftpx in invalid context}} -\def\deftypefnx#1 {\errmessage{@deftypefnx in invalid context}} -\def\deftypefunx#1 {\errmessage{@deftypefunx in invalid context}} -\def\deftypeivarx#1 {\errmessage{@deftypeivarx in invalid context}} -\def\deftypemethodx#1 {\errmessage{@deftypemethodx in invalid context}} -\def\deftypeopx#1 {\errmessage{@deftypeopx in invalid context}} -\def\deftypevarx#1 {\errmessage{@deftypevarx in invalid context}} -\def\deftypevrx#1 {\errmessage{@deftypevrx in invalid context}} -\def\defunx#1 {\errmessage{@defunx in invalid context}} -\def\defvarx#1 {\errmessage{@defvarx in invalid context}} -\def\defvrx#1 {\errmessage{@defvrx in invalid context}} - - -\message{macros,} -% @macro. - -% To do this right we need a feature of e-TeX, \scantokens, -% which we arrange to emulate with a temporary file in ordinary TeX. -\ifx\eTeXversion\undefined - \newwrite\macscribble - \def\scanmacro#1{% - \begingroup \newlinechar`\^^M - % Undo catcode changes of \startcontents and \doprintindex - \catcode`\@=0 \catcode`\\=12 \escapechar=`\@ - % Append \endinput to make sure that TeX does not see the ending newline. - \toks0={#1\endinput}% - \immediate\openout\macscribble=\jobname.tmp - \immediate\write\macscribble{\the\toks0}% - \immediate\closeout\macscribble - \let\xeatspaces\eatspaces - \input \jobname.tmp - \endgroup -} -\else -\def\scanmacro#1{% -\begingroup \newlinechar`\^^M -% Undo catcode changes of \startcontents and \doprintindex -\catcode`\@=0 \catcode`\\=12 \escapechar=`\@ -\let\xeatspaces\eatspaces\scantokens{#1\endinput}\endgroup} -\fi - -\newcount\paramno % Count of parameters -\newtoks\macname % Macro name -\newif\ifrecursive % Is it recursive? -\def\macrolist{} % List of all defined macros in the form - % \do\macro1\do\macro2... - -% Utility routines. -% Thisdoes \let #1 = #2, except with \csnames. -\def\cslet#1#2{% -\expandafter\expandafter -\expandafter\let -\expandafter\expandafter -\csname#1\endcsname -\csname#2\endcsname} - -% Trim leading and trailing spaces off a string. -% Concepts from aro-bend problem 15 (see CTAN). -{\catcode`\@=11 -\gdef\eatspaces #1{\expandafter\trim@\expandafter{#1 }} -\gdef\trim@ #1{\trim@@ @#1 @ #1 @ @@} -\gdef\trim@@ #1@ #2@ #3@@{\trim@@@\empty #2 @} -\def\unbrace#1{#1} -\unbrace{\gdef\trim@@@ #1 } #2@{#1} -} - -% Trim a single trailing ^^M off a string. -{\catcode`\^^M=12\catcode`\Q=3% -\gdef\eatcr #1{\eatcra #1Q^^MQ}% -\gdef\eatcra#1^^MQ{\eatcrb#1Q}% -\gdef\eatcrb#1Q#2Q{#1}% -} - -% Macro bodies are absorbed as an argument in a context where -% all characters are catcode 10, 11 or 12, except \ which is active -% (as in normal texinfo). It is necessary to change the definition of \. - -% It's necessary to have hard CRs when the macro is executed. This is -% done by making ^^M (\endlinechar) catcode 12 when reading the macro -% body, and then making it the \newlinechar in \scanmacro. - -\def\macrobodyctxt{% - \catcode`\~=12 - \catcode`\^=12 - \catcode`\_=12 - \catcode`\|=12 - \catcode`\<=12 - \catcode`\>=12 - \catcode`\+=12 - \catcode`\{=12 - \catcode`\}=12 - \catcode`\@=12 - \catcode`\^^M=12 - \usembodybackslash} - -\def\macroargctxt{% - \catcode`\~=12 - \catcode`\^=12 - \catcode`\_=12 - \catcode`\|=12 - \catcode`\<=12 - \catcode`\>=12 - \catcode`\+=12 - \catcode`\@=12 - \catcode`\\=12} - -% \mbodybackslash is the definition of \ in @macro bodies. -% It maps \foo\ => \csname macarg.foo\endcsname => #N -% where N is the macro parameter number. -% We define \csname macarg.\endcsname to be \realbackslash, so -% \\ in macro replacement text gets you a backslash. - -{\catcode`@=0 @catcode`@\=@active - @gdef@usembodybackslash{@let\=@mbodybackslash} - @gdef@mbodybackslash#1\{@csname macarg.#1@endcsname} -} -\expandafter\def\csname macarg.\endcsname{\realbackslash} - -\def\macro{\recursivefalse\parsearg\macroxxx} -\def\rmacro{\recursivetrue\parsearg\macroxxx} - -\def\macroxxx#1{% - \getargs{#1}% now \macname is the macname and \argl the arglist - \ifx\argl\empty % no arguments - \paramno=0% - \else - \expandafter\parsemargdef \argl;% - \fi - \if1\csname ismacro.\the\macname\endcsname - \message{Warning: redefining \the\macname}% - \else - \expandafter\ifx\csname \the\macname\endcsname \relax - \else \errmessage{The name \the\macname\space is reserved}\fi - \global\cslet{macsave.\the\macname}{\the\macname}% - \global\expandafter\let\csname ismacro.\the\macname\endcsname=1% - % Add the macroname to \macrolist - \toks0 = \expandafter{\macrolist\do}% - \xdef\macrolist{\the\toks0 - \expandafter\noexpand\csname\the\macname\endcsname}% - \fi - \begingroup \macrobodyctxt - \ifrecursive \expandafter\parsermacbody - \else \expandafter\parsemacbody - \fi} - -\def\unmacro{\parsearg\unmacroxxx} -\def\unmacroxxx#1{% - \if1\csname ismacro.#1\endcsname - \global\cslet{#1}{macsave.#1}% - \global\expandafter\let \csname ismacro.#1\endcsname=0% - % Remove the macro name from \macrolist - \begingroup - \edef\tempa{\expandafter\noexpand\csname#1\endcsname}% - \def\do##1{% - \def\tempb{##1}% - \ifx\tempa\tempb - % remove this - \else - \toks0 = \expandafter{\newmacrolist\do}% - \edef\newmacrolist{\the\toks0\expandafter\noexpand\tempa}% - \fi}% - \def\newmacrolist{}% - % Execute macro list to define \newmacrolist - \macrolist - \global\let\macrolist\newmacrolist - \endgroup - \else - \errmessage{Macro #1 not defined}% - \fi -} - -% This makes use of the obscure feature that if the last token of a -% is #, then the preceding argument is delimited by -% an opening brace, and that opening brace is not consumed. -\def\getargs#1{\getargsxxx#1{}} -\def\getargsxxx#1#{\getmacname #1 \relax\getmacargs} -\def\getmacname #1 #2\relax{\macname={#1}} -\def\getmacargs#1{\def\argl{#1}} - -% Parse the optional {params} list. Set up \paramno and \paramlist -% so \defmacro knows what to do. Define \macarg.blah for each blah -% in the params list, to be ##N where N is the position in that list. -% That gets used by \mbodybackslash (above). - -% We need to get `macro parameter char #' into several definitions. -% The technique used is stolen from LaTeX: let \hash be something -% unexpandable, insert that wherever you need a #, and then redefine -% it to # just before using the token list produced. -% -% The same technique is used to protect \eatspaces till just before -% the macro is used. - -\def\parsemargdef#1;{\paramno=0\def\paramlist{}% - \let\hash\relax\let\xeatspaces\relax\parsemargdefxxx#1,;,} -\def\parsemargdefxxx#1,{% - \if#1;\let\next=\relax - \else \let\next=\parsemargdefxxx - \advance\paramno by 1% - \expandafter\edef\csname macarg.\eatspaces{#1}\endcsname - {\xeatspaces{\hash\the\paramno}}% - \edef\paramlist{\paramlist\hash\the\paramno,}% - \fi\next} - -% These two commands read recursive and nonrecursive macro bodies. -% (They're different since rec and nonrec macros end differently.) - -\long\def\parsemacbody#1@end macro% -{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}% -\long\def\parsermacbody#1@end rmacro% -{\xdef\temp{\eatcr{#1}}\endgroup\defmacro}% - -% This defines the macro itself. There are six cases: recursive and -% nonrecursive macros of zero, one, and many arguments. -% Much magic with \expandafter here. -% \xdef is used so that macro definitions will survive the file -% they're defined in; @include reads the file inside a group. -\def\defmacro{% - \let\hash=##% convert placeholders to macro parameter chars - \ifrecursive - \ifcase\paramno - % 0 - \expandafter\xdef\csname\the\macname\endcsname{% - \noexpand\scanmacro{\temp}}% - \or % 1 - \expandafter\xdef\csname\the\macname\endcsname{% - \bgroup\noexpand\macroargctxt - \noexpand\braceorline - \expandafter\noexpand\csname\the\macname xxx\endcsname}% - \expandafter\xdef\csname\the\macname xxx\endcsname##1{% - \egroup\noexpand\scanmacro{\temp}}% - \else % many - \expandafter\xdef\csname\the\macname\endcsname{% - \bgroup\noexpand\macroargctxt - \noexpand\csname\the\macname xx\endcsname}% - \expandafter\xdef\csname\the\macname xx\endcsname##1{% - \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}% - \expandafter\expandafter - \expandafter\xdef - \expandafter\expandafter - \csname\the\macname xxx\endcsname - \paramlist{\egroup\noexpand\scanmacro{\temp}}% - \fi - \else - \ifcase\paramno - % 0 - \expandafter\xdef\csname\the\macname\endcsname{% - \noexpand\norecurse{\the\macname}% - \noexpand\scanmacro{\temp}\egroup}% - \or % 1 - \expandafter\xdef\csname\the\macname\endcsname{% - \bgroup\noexpand\macroargctxt - \noexpand\braceorline - \expandafter\noexpand\csname\the\macname xxx\endcsname}% - \expandafter\xdef\csname\the\macname xxx\endcsname##1{% - \egroup - \noexpand\norecurse{\the\macname}% - \noexpand\scanmacro{\temp}\egroup}% - \else % many - \expandafter\xdef\csname\the\macname\endcsname{% - \bgroup\noexpand\macroargctxt - \expandafter\noexpand\csname\the\macname xx\endcsname}% - \expandafter\xdef\csname\the\macname xx\endcsname##1{% - \expandafter\noexpand\csname\the\macname xxx\endcsname ##1,}% - \expandafter\expandafter - \expandafter\xdef - \expandafter\expandafter - \csname\the\macname xxx\endcsname - \paramlist{% - \egroup - \noexpand\norecurse{\the\macname}% - \noexpand\scanmacro{\temp}\egroup}% - \fi - \fi} - -\def\norecurse#1{\bgroup\cslet{#1}{macsave.#1}} - -% \braceorline decides whether the next nonwhitespace character is a -% {. If so it reads up to the closing }, if not, it reads the whole -% line. Whatever was read is then fed to the next control sequence -% as an argument (by \parsebrace or \parsearg) -\def\braceorline#1{\let\next=#1\futurelet\nchar\braceorlinexxx} -\def\braceorlinexxx{% - \ifx\nchar\bgroup\else - \expandafter\parsearg - \fi \next} - -% We mant to disable all macros during \shipout so that they are not -% expanded by \write. -\def\turnoffmacros{\begingroup \def\do##1{\let\noexpand##1=\relax}% - \edef\next{\macrolist}\expandafter\endgroup\next} - - -% @alias. -% We need some trickery to remove the optional spaces around the equal -% sign. Just make them active and then expand them all to nothing. -\def\alias{\begingroup\obeyspaces\parsearg\aliasxxx} -\def\aliasxxx #1{\aliasyyy#1\relax} -\def\aliasyyy #1=#2\relax{\ignoreactivespaces -\edef\next{\global\let\expandafter\noexpand\csname#1\endcsname=% - \expandafter\noexpand\csname#2\endcsname}% -\expandafter\endgroup\next} - - -\message{cross references,} -% @xref etc. - -\newwrite\auxfile - -\newif\ifhavexrefs % True if xref values are known. -\newif\ifwarnedxrefs % True if we warned once that they aren't known. - -% @inforef is relatively simple. -\def\inforef #1{\inforefzzz #1,,,,**} -\def\inforefzzz #1,#2,#3,#4**{\putwordSee{} \putwordInfo{} \putwordfile{} \file{\ignorespaces #3{}}, - node \samp{\ignorespaces#1{}}} - -% @node's job is to define \lastnode. -\def\node{\ENVcheck\parsearg\nodezzz} -\def\nodezzz#1{\nodexxx [#1,]} -\def\nodexxx[#1,#2]{\gdef\lastnode{#1}} -\let\nwnode=\node -\let\lastnode=\relax - -% The sectioning commands (@chapter, etc.) call these. -\def\donoderef{% - \ifx\lastnode\relax\else - \expandafter\expandafter\expandafter\setref{\lastnode}% - {Ysectionnumberandtype}% - \global\let\lastnode=\relax - \fi -} -\def\unnumbnoderef{% - \ifx\lastnode\relax\else - \expandafter\expandafter\expandafter\setref{\lastnode}{Ynothing}% - \global\let\lastnode=\relax - \fi -} -\def\appendixnoderef{% - \ifx\lastnode\relax\else - \expandafter\expandafter\expandafter\setref{\lastnode}% - {Yappendixletterandtype}% - \global\let\lastnode=\relax - \fi -} - - -% @anchor{NAME} -- define xref target at arbitrary point. -% -\newcount\savesfregister -\gdef\savesf{\relax \ifhmode \savesfregister=\spacefactor \fi} -\gdef\restoresf{\relax \ifhmode \spacefactor=\savesfregister \fi} -\gdef\anchor#1{\savesf \setref{#1}{Ynothing}\restoresf \ignorespaces} - -% \setref{NAME}{SNT} defines a cross-reference point NAME, namely -% NAME-title, NAME-pg, and NAME-SNT. Called from \foonoderef. We have -% to set \indexdummies so commands such as @code in a section title -% aren't expanded. It would be nicer not to expand the titles in the -% first place, but there's so many layers that that is hard to do. -% -\def\setref#1#2{{% - \indexdummies - \pdfmkdest{#1}% - \dosetq{#1-title}{Ytitle}% - \dosetq{#1-pg}{Ypagenumber}% - \dosetq{#1-snt}{#2}% -}} - -% @xref, @pxref, and @ref generate cross-references. For \xrefX, #1 is -% the node name, #2 the name of the Info cross-reference, #3 the printed -% node name, #4 the name of the Info file, #5 the name of the printed -% manual. All but the node name can be omitted. -% -\def\pxref#1{\putwordsee{} \xrefX[#1,,,,,,,]} -\def\xref#1{\putwordSee{} \xrefX[#1,,,,,,,]} -\def\ref#1{\xrefX[#1,,,,,,,]} -\def\xrefX[#1,#2,#3,#4,#5,#6]{\begingroup - \unsepspaces - \def\printedmanual{\ignorespaces #5}% - \def\printednodename{\ignorespaces #3}% - \setbox1=\hbox{\printedmanual}% - \setbox0=\hbox{\printednodename}% - \ifdim \wd0 = 0pt - % No printed node name was explicitly given. - \expandafter\ifx\csname SETxref-automatic-section-title\endcsname\relax - % Use the node name inside the square brackets. - \def\printednodename{\ignorespaces #1}% - \else - % Use the actual chapter/section title appear inside - % the square brackets. Use the real section title if we have it. - \ifdim \wd1 > 0pt - % It is in another manual, so we don't have it. - \def\printednodename{\ignorespaces #1}% - \else - \ifhavexrefs - % We know the real title if we have the xref values. - \def\printednodename{\refx{#1-title}{}}% - \else - % Otherwise just copy the Info node name. - \def\printednodename{\ignorespaces #1}% - \fi% - \fi - \fi - \fi - % - % If we use \unhbox0 and \unhbox1 to print the node names, TeX does not - % insert empty discretionaries after hyphens, which means that it will - % not find a line break at a hyphen in a node names. Since some manuals - % are best written with fairly long node names, containing hyphens, this - % is a loss. Therefore, we give the text of the node name again, so it - % is as if TeX is seeing it for the first time. - \ifpdf - \leavevmode - \getfilename{#4}% - \ifnum\filenamelength>0 - \startlink attr{/Border [0 0 0]}% - goto file{\the\filename.pdf} name{#1@}% - \else - \startlink attr{/Border [0 0 0]}% - goto name{#1@}% - \fi - \linkcolor - \fi - % - \ifdim \wd1 > 0pt - \putwordsection{} ``\printednodename'' \putwordin{} \cite{\printedmanual}% - \else - % _ (for example) has to be the character _ for the purposes of the - % control sequence corresponding to the node, but it has to expand - % into the usual \leavevmode...\vrule stuff for purposes of - % printing. So we \turnoffactive for the \refx-snt, back on for the - % printing, back off for the \refx-pg. - {\normalturnoffactive - % Only output a following space if the -snt ref is nonempty; for - % @unnumbered and @anchor, it won't be. - \setbox2 = \hbox{\ignorespaces \refx{#1-snt}{}}% - \ifdim \wd2 > 0pt \refx{#1-snt}\space\fi - }% - % [mynode], - [\printednodename],\space - % page 3 - \turnoffactive \putwordpage\tie\refx{#1-pg}{}% - \fi - \endlink -\endgroup} - -% \dosetq is the interface for calls from other macros - -% Use \normalturnoffactive so that punctuation chars such as underscore -% and backslash work in node names. (\turnoffactive doesn't do \.) -\def\dosetq#1#2{% - {\let\folio=0% - \normalturnoffactive - \edef\next{\write\auxfile{\internalsetq{#1}{#2}}}% - \iflinks - \next - \fi - }% -} - -% \internalsetq {foo}{page} expands into -% CHARACTERS 'xrdef {foo}{...expansion of \Ypage...} -% When the aux file is read, ' is the escape character - -\def\internalsetq #1#2{'xrdef {#1}{\csname #2\endcsname}} - -% Things to be expanded by \internalsetq - -\def\Ypagenumber{\folio} - -\def\Ytitle{\thissection} - -\def\Ynothing{} - -\def\Ysectionnumberandtype{% -\ifnum\secno=0 \putwordChapter\xreftie\the\chapno % -\else \ifnum \subsecno=0 \putwordSection\xreftie\the\chapno.\the\secno % -\else \ifnum \subsubsecno=0 % -\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno % -\else % -\putwordSection\xreftie\the\chapno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\def\Yappendixletterandtype{% -\ifnum\secno=0 \putwordAppendix\xreftie'char\the\appendixno{}% -\else \ifnum \subsecno=0 \putwordSection\xreftie'char\the\appendixno.\the\secno % -\else \ifnum \subsubsecno=0 % -\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno % -\else % -\putwordSection\xreftie'char\the\appendixno.\the\secno.\the\subsecno.\the\subsubsecno % -\fi \fi \fi } - -\gdef\xreftie{'tie} - -% Use TeX 3.0's \inputlineno to get the line number, for better error -% messages, but if we're using an old version of TeX, don't do anything. -% -\ifx\inputlineno\thisisundefined - \let\linenumber = \empty % Non-3.0. -\else - \def\linenumber{\the\inputlineno:\space} -\fi - -% Define \refx{NAME}{SUFFIX} to reference a cross-reference string named NAME. -% If its value is nonempty, SUFFIX is output afterward. - -\def\refx#1#2{% - \expandafter\ifx\csname X#1\endcsname\relax - % If not defined, say something at least. - \angleleft un\-de\-fined\angleright - \iflinks - \ifhavexrefs - \message{\linenumber Undefined cross reference `#1'.}% - \else - \ifwarnedxrefs\else - \global\warnedxrefstrue - \message{Cross reference values unknown; you must run TeX again.}% - \fi - \fi - \fi - \else - % It's defined, so just use it. - \csname X#1\endcsname - \fi - #2% Output the suffix in any case. -} - -% This is the macro invoked by entries in the aux file. -% -\def\xrdef#1{\begingroup - % Reenable \ as an escape while reading the second argument. - \catcode`\\ = 0 - \afterassignment\endgroup - \expandafter\gdef\csname X#1\endcsname -} - -% Read the last existing aux file, if any. No error if none exists. -\def\readauxfile{\begingroup - \catcode`\^^@=\other - \catcode`\^^A=\other - \catcode`\^^B=\other - \catcode`\^^C=\other - \catcode`\^^D=\other - \catcode`\^^E=\other - \catcode`\^^F=\other - \catcode`\^^G=\other - \catcode`\^^H=\other - \catcode`\^^K=\other - \catcode`\^^L=\other - \catcode`\^^N=\other - \catcode`\^^P=\other - \catcode`\^^Q=\other - \catcode`\^^R=\other - \catcode`\^^S=\other - \catcode`\^^T=\other - \catcode`\^^U=\other - \catcode`\^^V=\other - \catcode`\^^W=\other - \catcode`\^^X=\other - \catcode`\^^Z=\other - \catcode`\^^[=\other - \catcode`\^^\=\other - \catcode`\^^]=\other - \catcode`\^^^=\other - \catcode`\^^_=\other - \catcode`\@=\other - \catcode`\^=\other - % It was suggested to define this as 7, which would allow ^^e4 etc. - % in xref tags, i.e., node names. But since ^^e4 notation isn't - % supported in the main text, it doesn't seem desirable. Furthermore, - % that is not enough: for node names that actually contain a ^ - % character, we would end up writing a line like this: 'xrdef {'hat - % b-title}{'hat b} and \xrdef does a \csname...\endcsname on the first - % argument, and \hat is not an expandable control sequence. It could - % all be worked out, but why? Either we support ^^ or we don't. - % - % The other change necessary for this was to define \auxhat: - % \def\auxhat{\def^{'hat }}% extra space so ok if followed by letter - % and then to call \auxhat in \setq. - % - \catcode`\~=\other - \catcode`\[=\other - \catcode`\]=\other - \catcode`\"=\other - \catcode`\_=\other - \catcode`\|=\other - \catcode`\<=\other - \catcode`\>=\other - \catcode`\$=\other - \catcode`\#=\other - \catcode`\&=\other - \catcode`+=\other % avoid \+ for paranoia even though we've turned it off - % Make the characters 128-255 be printing characters - {% - \count 1=128 - \def\loop{% - \catcode\count 1=\other - \advance\count 1 by 1 - \ifnum \count 1<256 \loop \fi - }% - }% - % The aux file uses ' as the escape (for now). - % Turn off \ as an escape so we do not lose on - % entries which were dumped with control sequences in their names. - % For example, 'xrdef {$\leq $-fun}{page ...} made by @defun ^^ - % Reference to such entries still does not work the way one would wish, - % but at least they do not bomb out when the aux file is read in. - \catcode`\{=1 - \catcode`\}=2 - \catcode`\%=\other - \catcode`\'=0 - \catcode`\\=\other - % - \openin 1 \jobname.aux - \ifeof 1 \else - \closein 1 - \input \jobname.aux - \global\havexrefstrue - \global\warnedobstrue - \fi - % Open the new aux file. TeX will close it automatically at exit. - \openout\auxfile=\jobname.aux -\endgroup} - - -% Footnotes. - -\newcount \footnoteno - -% The trailing space in the following definition for supereject is -% vital for proper filling; pages come out unaligned when you do a -% pagealignmacro call if that space before the closing brace is -% removed. (Generally, numeric constants should always be followed by a -% space to prevent strange expansion errors.) -\def\supereject{\par\penalty -20000\footnoteno =0 } - -% @footnotestyle is meaningful for info output only. -\let\footnotestyle=\comment - -\let\ptexfootnote=\footnote - -{\catcode `\@=11 -% -% Auto-number footnotes. Otherwise like plain. -\gdef\footnote{% - \global\advance\footnoteno by \@ne - \edef\thisfootno{$^{\the\footnoteno}$}% - % - % In case the footnote comes at the end of a sentence, preserve the - % extra spacing after we do the footnote number. - \let\@sf\empty - \ifhmode\edef\@sf{\spacefactor\the\spacefactor}\/\fi - % - % Remove inadvertent blank space before typesetting the footnote number. - \unskip - \thisfootno\@sf - \footnotezzz -}% - -% Don't bother with the trickery in plain.tex to not require the -% footnote text as a parameter. Our footnotes don't need to be so general. -% -% Oh yes, they do; otherwise, @ifset and anything else that uses -% \parseargline fail inside footnotes because the tokens are fixed when -% the footnote is read. --karl, 16nov96. -% -\long\gdef\footnotezzz{\insert\footins\bgroup - % We want to typeset this text as a normal paragraph, even if the - % footnote reference occurs in (for example) a display environment. - % So reset some parameters. - \interlinepenalty\interfootnotelinepenalty - \splittopskip\ht\strutbox % top baseline for broken footnotes - \splitmaxdepth\dp\strutbox - \floatingpenalty\@MM - \leftskip\z@skip - \rightskip\z@skip - \spaceskip\z@skip - \xspaceskip\z@skip - \parindent\defaultparindent - % - \smallfonts \rm - % - % Hang the footnote text off the number. - \hang - \textindent{\thisfootno}% - % - % Don't crash into the line above the footnote text. Since this - % expands into a box, it must come within the paragraph, lest it - % provide a place where TeX can split the footnote. - \footstrut - \futurelet\next\fo@t -} -\def\fo@t{\ifcat\bgroup\noexpand\next \let\next\f@@t - \else\let\next\f@t\fi \next} -\def\f@@t{\bgroup\aftergroup\@foot\let\next} -\def\f@t#1{#1\@foot} -\def\@foot{\strut\par\egroup} - -}%end \catcode `\@=11 - -% Set the baselineskip to #1, and the lineskip and strut size -% correspondingly. There is no deep meaning behind these magic numbers -% used as factors; they just match (closely enough) what Knuth defined. -% -\def\lineskipfactor{.08333} -\def\strutheightpercent{.70833} -\def\strutdepthpercent {.29167} -% -\def\setleading#1{% - \normalbaselineskip = #1\relax - \normallineskip = \lineskipfactor\normalbaselineskip - \normalbaselines - \setbox\strutbox =\hbox{% - \vrule width0pt height\strutheightpercent\baselineskip - depth \strutdepthpercent \baselineskip - }% -} - -% @| inserts a changebar to the left of the current line. It should -% surround any changed text. This approach does *not* work if the -% change spans more than two lines of output. To handle that, we would -% have adopt a much more difficult approach (putting marks into the main -% vertical list for the beginning and end of each change). -% -\def\|{% - % \vadjust can only be used in horizontal mode. - \leavevmode - % - % Append this vertical mode material after the current line in the output. - \vadjust{% - % We want to insert a rule with the height and depth of the current - % leading; that is exactly what \strutbox is supposed to record. - \vskip-\baselineskip - % - % \vadjust-items are inserted at the left edge of the type. So - % the \llap here moves out into the left-hand margin. - \llap{% - % - % For a thicker or thinner bar, change the `1pt'. - \vrule height\baselineskip width1pt - % - % This is the space between the bar and the text. - \hskip 12pt - }% - }% -} - -% For a final copy, take out the rectangles -% that mark overfull boxes (in case you have decided -% that the text looks ok even though it passes the margin). -% -\def\finalout{\overfullrule=0pt} - -% @image. We use the macros from epsf.tex to support this. -% If epsf.tex is not installed and @image is used, we complain. -% -% Check for and read epsf.tex up front. If we read it only at @image -% time, we might be inside a group, and then its definitions would get -% undone and the next image would fail. -\openin 1 = epsf.tex -\ifeof 1 \else - \closein 1 - % Do not bother showing banner with post-v2.7 epsf.tex (available in - % doc/epsf.tex until it shows up on ctan). - \def\epsfannounce{\toks0 = }% - \input epsf.tex -\fi -% -% We will only complain once about lack of epsf.tex. -\newif\ifwarnednoepsf -\newhelp\noepsfhelp{epsf.tex must be installed for images to - work. It is also included in the Texinfo distribution, or you can get - it from ftp://tug.org/tex/epsf.tex.} -% -\def\image#1{% - \ifx\epsfbox\undefined - \ifwarnednoepsf \else - \errhelp = \noepsfhelp - \errmessage{epsf.tex not found, images will be ignored}% - \global\warnednoepsftrue - \fi - \else - \imagexxx #1,,,\finish - \fi -} -% -% Arguments to @image: -% #1 is (mandatory) image filename; we tack on .eps extension. -% #2 is (optional) width, #3 is (optional) height. -% #4 is just the usual extra ignored arg for parsing this stuff. -\def\imagexxx#1,#2,#3,#4\finish{% - \ifpdf - \centerline{\dopdfimage{#1}{#2}{#3}}% - \else - % \epsfbox itself resets \epsf?size at each figure. - \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \epsfxsize=#2\relax \fi - \setbox0 = \hbox{\ignorespaces #3}\ifdim\wd0 > 0pt \epsfysize=#3\relax \fi - \begingroup - \catcode`\^^M = 5 % in case we're inside an example - \normalturnoffactive % allow _ et al. in names - % If the image is by itself, center it. - \ifvmode - \nobreak\bigskip - % Usually we'll have text after the image which will insert - % \parskip glue, so insert it here too to equalize the space - % above and below. - \nobreak\vskip\parskip - \nobreak - \centerline{\epsfbox{#1.eps}}% - \bigbreak - \else - % In the middle of a paragraph, no extra space. - \epsfbox{#1.eps}% - \fi - \endgroup - \fi -} - - -\message{localization,} -% and i18n. - -% @documentlanguage is usually given very early, just after -% @setfilename. If done too late, it may not override everything -% properly. Single argument is the language abbreviation. -% It would be nice if we could set up a hyphenation file here. -% -\def\documentlanguage{\parsearg\dodocumentlanguage} -\def\dodocumentlanguage#1{% - \tex % read txi-??.tex file in plain TeX. - % Read the file if it exists. - \openin 1 txi-#1.tex - \ifeof1 - \errhelp = \nolanghelp - \errmessage{Cannot read language file txi-#1.tex}% - \let\temp = \relax - \else - \def\temp{\input txi-#1.tex }% - \fi - \temp - \endgroup -} -\newhelp\nolanghelp{The given language definition file cannot be found or -is empty. Maybe you need to install it? In the current directory -should work if nowhere else does.} - - -% @documentencoding should change something in TeX eventually, most -% likely, but for now just recognize it. -\let\documentencoding = \comment - - -% Page size parameters. -% -\newdimen\defaultparindent \defaultparindent = 15pt - -\chapheadingskip = 15pt plus 4pt minus 2pt -\secheadingskip = 12pt plus 3pt minus 2pt -\subsecheadingskip = 9pt plus 2pt minus 2pt - -% Prevent underfull vbox error messages. -\vbadness = 10000 - -% Don't be so finicky about underfull hboxes, either. -\hbadness = 2000 - -% Following George Bush, just get rid of widows and orphans. -\widowpenalty=10000 -\clubpenalty=10000 - -% Use TeX 3.0's \emergencystretch to help line breaking, but if we're -% using an old version of TeX, don't do anything. We want the amount of -% stretch added to depend on the line length, hence the dependence on -% \hsize. We call this whenever the paper size is set. -% -\def\setemergencystretch{% - \ifx\emergencystretch\thisisundefined - % Allow us to assign to \emergencystretch anyway. - \def\emergencystretch{\dimen0}% - \else - \emergencystretch = .15\hsize - \fi -} - -% Parameters in order: 1) textheight; 2) textwidth; 3) voffset; -% 4) hoffset; 5) binding offset; 6) topskip. Then whoever calls us can -% set \parskip and call \setleading for \baselineskip. -% -\def\internalpagesizes#1#2#3#4#5#6{% - \voffset = #3\relax - \topskip = #6\relax - \splittopskip = \topskip - % - \vsize = #1\relax - \advance\vsize by \topskip - \outervsize = \vsize - \advance\outervsize by 2\topandbottommargin - \pageheight = \vsize - % - \hsize = #2\relax - \outerhsize = \hsize - \advance\outerhsize by 0.5in - \pagewidth = \hsize - % - \normaloffset = #4\relax - \bindingoffset = #5\relax - % - \parindent = \defaultparindent - \setemergencystretch -} - -% Use `small' versions. -% -\def\smallenvironments{% - \let\smalldisplay = \smalldisplayx - \let\smallexample = \smalllispx - \let\smallformat = \smallformatx - \let\smalllisp = \smalllispx -} - -% @letterpaper (the default). -\def\letterpaper{{\globaldefs = 1 - \parskip = 3pt plus 2pt minus 1pt - \setleading{13.2pt}% - % - % If page is nothing but text, make it come out even. - \internalpagesizes{46\baselineskip}{6in}{\voffset}{.25in}{\bindingoffset}{36pt}% -}} - -% Use @smallbook to reset parameters for 7x9.5 (or so) format. -\def\smallbook{{\globaldefs = 1 - \parskip = 2pt plus 1pt - \setleading{12pt}% - % - \internalpagesizes{7.5in}{5.in}{\voffset}{.25in}{\bindingoffset}{16pt}% - % - \lispnarrowing = 0.3in - \tolerance = 700 - \hfuzz = 1pt - \contentsrightmargin = 0pt - \deftypemargin = 0pt - \defbodyindent = .5cm - \smallenvironments -}} - -% Use @afourpaper to print on European A4 paper. -\def\afourpaper{{\globaldefs = 1 - \setleading{12pt}% - \parskip = 3pt plus 2pt minus 1pt - % - \internalpagesizes{53\baselineskip}{160mm}{\voffset}{4mm}{\bindingoffset}{44pt}% - % - \tolerance = 700 - \hfuzz = 1pt -}} - -% Use @afivepaper to print on European A5 paper. -% From romildo@urano.iceb.ufop.br, 2 July 2000. -% He also recommends making @example and @lisp be small. -\def\afivepaper{{\globaldefs = 1 - \setleading{12.5pt}% - \parskip = 2pt plus 1pt minus 0.1pt - % - \internalpagesizes{166mm}{120mm}{\voffset}{-8mm}{\bindingoffset}{8pt}% - % - \lispnarrowing = 0.2in - \tolerance = 800 - \hfuzz = 1.2pt - \contentsrightmargin = 0mm - \deftypemargin = 0pt - \defbodyindent = 2mm - \tableindent = 12mm - % - \smallenvironments -}} - -% A specific text layout, 24x15cm overall, intended for A4 paper. Top margin -% 29mm, hence bottom margin 28mm, nominal side margin 3cm. -\def\afourlatex{{\globaldefs = 1 - \setleading{13.6pt}% - % - \afourpaper - \internalpagesizes{237mm}{150mm}{3.6mm}{3.6mm}{3mm}{7mm}% - % - \globaldefs = 0 -}} - -% Use @afourwide to print on European A4 paper in wide format. -\def\afourwide{% - \afourpaper - \internalpagesizes{6.5in}{9.5in}{\hoffset}{\normaloffset}{\bindingoffset}{7mm}% - % - \globaldefs = 0 -} - -% @pagesizes TEXTHEIGHT[,TEXTWIDTH] -% Perhaps we should allow setting the margins, \topskip, \parskip, -% and/or leading, also. Or perhaps we should compute them somehow. -% -\def\pagesizes{\parsearg\pagesizesxxx} -\def\pagesizesxxx#1{\pagesizesyyy #1,,\finish} -\def\pagesizesyyy#1,#2,#3\finish{{% - \setbox0 = \hbox{\ignorespaces #2}\ifdim\wd0 > 0pt \hsize=#2\relax \fi - \globaldefs = 1 - % - \parskip = 3pt plus 2pt minus 1pt - \setleading{13.2pt}% - % - \internalpagesizes{#1}{\hsize}{\voffset}{\normaloffset}{\bindingoffset}{44pt}% -}} - -% Set default to letter. -% -\letterpaper - - -\message{and turning on texinfo input format.} - -% Define macros to output various characters with catcode for normal text. -\catcode`\"=\other -\catcode`\~=\other -\catcode`\^=\other -\catcode`\_=\other -\catcode`\|=\other -\catcode`\<=\other -\catcode`\>=\other -\catcode`\+=\other -\catcode`\$=\other -\def\normaldoublequote{"} -\def\normaltilde{~} -\def\normalcaret{^} -\def\normalunderscore{_} -\def\normalverticalbar{|} -\def\normalless{<} -\def\normalgreater{>} -\def\normalplus{+} -\def\normaldollar{$} - -% This macro is used to make a character print one way in ttfont -% where it can probably just be output, and another way in other fonts, -% where something hairier probably needs to be done. -% -% #1 is what to print if we are indeed using \tt; #2 is what to print -% otherwise. Since all the Computer Modern typewriter fonts have zero -% interword stretch (and shrink), and it is reasonable to expect all -% typewriter fonts to have this, we can check that font parameter. -% -\def\ifusingtt#1#2{\ifdim \fontdimen3\font=0pt #1\else #2\fi} - -% Same as above, but check for italic font. Actually this also catches -% non-italic slanted fonts since it is impossible to distinguish them from -% italic fonts. But since this is only used by $ and it uses \sl anyway -% this is not a problem. -\def\ifusingit#1#2{\ifdim \fontdimen1\font>0pt #1\else #2\fi} - -% Turn off all special characters except @ -% (and those which the user can use as if they were ordinary). -% Most of these we simply print from the \tt font, but for some, we can -% use math or other variants that look better in normal text. - -\catcode`\"=\active -\def\activedoublequote{{\tt\char34}} -\let"=\activedoublequote -\catcode`\~=\active -\def~{{\tt\char126}} -\chardef\hat=`\^ -\catcode`\^=\active -\def^{{\tt \hat}} - -\catcode`\_=\active -\def_{\ifusingtt\normalunderscore\_} -% Subroutine for the previous macro. -\def\_{\leavevmode \kern.06em \vbox{\hrule width.3em height.1ex}} - -\catcode`\|=\active -\def|{{\tt\char124}} -\chardef \less=`\< -\catcode`\<=\active -\def<{{\tt \less}} -\chardef \gtr=`\> -\catcode`\>=\active -\def>{{\tt \gtr}} -\catcode`\+=\active -\def+{{\tt \char 43}} -\catcode`\$=\active -\def${\ifusingit{{\sl\$}}\normaldollar} -%\catcode 27=\active -%\def^^[{$\diamondsuit$} - -% Set up an active definition for =, but don't enable it most of the time. -{\catcode`\==\active -\global\def={{\tt \char 61}}} - -\catcode`+=\active -\catcode`\_=\active - -% If a .fmt file is being used, characters that might appear in a file -% name cannot be active until we have parsed the command line. -% So turn them off again, and have \everyjob (or @setfilename) turn them on. -% \otherifyactive is called near the end of this file. -\def\otherifyactive{\catcode`+=\other \catcode`\_=\other} - -\catcode`\@=0 - -% \rawbackslashxx output one backslash character in current font -\global\chardef\rawbackslashxx=`\\ -%{\catcode`\\=\other -%@gdef@rawbackslashxx{\}} - -% \rawbackslash redefines \ as input to do \rawbackslashxx. -{\catcode`\\=\active -@gdef@rawbackslash{@let\=@rawbackslashxx }} - -% \normalbackslash outputs one backslash in fixed width font. -\def\normalbackslash{{\tt\rawbackslashxx}} - -% \catcode 17=0 % Define control-q -\catcode`\\=\active - -% Used sometimes to turn off (effectively) the active characters -% even after parsing them. -@def@turnoffactive{@let"=@normaldoublequote -@let\=@realbackslash -@let~=@normaltilde -@let^=@normalcaret -@let_=@normalunderscore -@let|=@normalverticalbar -@let<=@normalless -@let>=@normalgreater -@let+=@normalplus -@let$=@normaldollar} - -@def@normalturnoffactive{@let"=@normaldoublequote -@let\=@normalbackslash -@let~=@normaltilde -@let^=@normalcaret -@let_=@normalunderscore -@let|=@normalverticalbar -@let<=@normalless -@let>=@normalgreater -@let+=@normalplus -@let$=@normaldollar} - -% Make _ and + \other characters, temporarily. -% This is canceled by @fixbackslash. -@otherifyactive - -% If a .fmt file is being used, we don't want the `\input texinfo' to show up. -% That is what \eatinput is for; after that, the `\' should revert to printing -% a backslash. -% -@gdef@eatinput input texinfo{@fixbackslash} -@global@let\ = @eatinput - -% On the other hand, perhaps the file did not have a `\input texinfo'. Then -% the first `\{ in the file would cause an error. This macro tries to fix -% that, assuming it is called before the first `\' could plausibly occur. -% Also back turn on active characters that might appear in the input -% file name, in case not using a pre-dumped format. -% -@gdef@fixbackslash{% - @ifx\@eatinput @let\ = @normalbackslash @fi - @catcode`+=@active - @catcode`@_=@active -} - -% Say @foo, not \foo, in error messages. -@escapechar = `@@ - -% These look ok in all fonts, so just make them not special. -@catcode`@& = @other -@catcode`@# = @other -@catcode`@% = @other - -@c Set initial fonts. -@textfonts -@rm - - -@c Local variables: -@c eval: (add-hook 'write-file-hooks 'time-stamp) -@c page-delimiter: "^\\\\message" -@c time-stamp-start: "def\\\\texinfoversion{" -@c time-stamp-format: "%:y-%02m-%02d.%02H" -@c time-stamp-end: "}" -@c End: diff --git a/contrib/awk/eval.c b/contrib/awk/eval.c deleted file mode 100644 index 75c9aa8..0000000 --- a/contrib/awk/eval.c +++ /dev/null @@ -1,2154 +0,0 @@ -/* - * eval.c - gawk parse tree interpreter - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $FreeBSD$ - */ - -#include "awk.h" - -extern double pow P((double x, double y)); -extern double modf P((double x, double *yp)); -extern double fmod P((double x, double y)); - -static int eval_condition P((NODE *tree)); -static NODE *op_assign P((NODE *tree)); -static NODE *func_call P((NODE *name, NODE *arg_list)); -static NODE *match_op P((NODE *tree)); -static void push_args P((int count, NODE *arglist, NODE **oldstack, - char *func_name, char **varnames)); -static void pop_fcall_stack P((void)); -static void pop_fcall P((void)); -static int in_function P((void)); -char *nodetype2str P((NODETYPE type)); -char *flags2str P((int flagval)); -static int comp_func P((const void *p1, const void *p2)); - -#if __GNUC__ < 2 -NODE *_t; /* used as a temporary in macros */ -#endif -#ifdef MSDOS -double _msc51bug; /* to get around a bug in MSC 5.1 */ -#endif -NODE *ret_node; -int OFSlen; -int ORSlen; -int OFMTidx; -int CONVFMTidx; - -/* Profiling stuff */ -#ifdef PROFILING -#define INCREMENT(n) n++ -#else -#define INCREMENT(n) /* nothing */ -#endif - -/* Macros and variables to save and restore function and loop bindings */ -/* - * the val variable allows return/continue/break-out-of-context to be - * caught and diagnosed - */ -#define PUSH_BINDING(stack, x, val) (memcpy((char *)(stack), (char *)(x), sizeof(jmp_buf)), val++) -#define RESTORE_BINDING(stack, x, val) (memcpy((char *)(x), (char *)(stack), sizeof(jmp_buf)), val--) - -static jmp_buf loop_tag; /* always the current binding */ -static int loop_tag_valid = FALSE; /* nonzero when loop_tag valid */ -static int func_tag_valid = FALSE; -static jmp_buf func_tag; -extern int exiting, exit_val; - -/* This rather ugly macro is for VMS C */ -#ifdef C -#undef C -#endif -#define C(c) ((char)c) -/* - * This table is used by the regexp routines to do case independant - * matching. Basically, every ascii character maps to itself, except - * uppercase letters map to lower case ones. This table has 256 - * entries, for ISO 8859-1. Note also that if the system this - * is compiled on doesn't use 7-bit ascii, casetable[] should not be - * defined to the linker, so gawk should not load. - * - * Do NOT make this array static, it is used in several spots, not - * just in this file. - */ -#if 'a' == 97 /* it's ascii */ -char casetable[] = { - '\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007', - '\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017', - '\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027', - '\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037', - /* ' ' '!' '"' '#' '$' '%' '&' ''' */ - '\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047', - /* '(' ')' '*' '+' ',' '-' '.' '/' */ - '\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057', - /* '0' '1' '2' '3' '4' '5' '6' '7' */ - '\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067', - /* '8' '9' ':' ';' '<' '=' '>' '?' */ - '\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077', - /* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */ - '\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - /* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */ - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - /* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */ - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - /* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */ - '\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137', - /* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */ - '\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147', - /* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */ - '\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157', - /* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */ - '\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167', - /* 'x' 'y' 'z' '{' '|' '}' '~' */ - '\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177', - - /* Latin 1: */ - C('\200'), C('\201'), C('\202'), C('\203'), C('\204'), C('\205'), C('\206'), C('\207'), - C('\210'), C('\211'), C('\212'), C('\213'), C('\214'), C('\215'), C('\216'), C('\217'), - C('\220'), C('\221'), C('\222'), C('\223'), C('\224'), C('\225'), C('\226'), C('\227'), - C('\230'), C('\231'), C('\232'), C('\233'), C('\234'), C('\235'), C('\236'), C('\237'), - C('\240'), C('\241'), C('\242'), C('\243'), C('\244'), C('\245'), C('\246'), C('\247'), - C('\250'), C('\251'), C('\252'), C('\253'), C('\254'), C('\255'), C('\256'), C('\257'), - C('\260'), C('\261'), C('\262'), C('\263'), C('\264'), C('\265'), C('\266'), C('\267'), - C('\270'), C('\271'), C('\272'), C('\273'), C('\274'), C('\275'), C('\276'), C('\277'), - C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'), - C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'), - C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\327'), - C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\337'), - C('\340'), C('\341'), C('\342'), C('\343'), C('\344'), C('\345'), C('\346'), C('\347'), - C('\350'), C('\351'), C('\352'), C('\353'), C('\354'), C('\355'), C('\356'), C('\357'), - C('\360'), C('\361'), C('\362'), C('\363'), C('\364'), C('\365'), C('\366'), C('\367'), - C('\370'), C('\371'), C('\372'), C('\373'), C('\374'), C('\375'), C('\376'), C('\377'), -}; -#else -#include "You lose. You will need a translation table for your character set." -#endif - -#undef C - -/* - * This table maps node types to strings for debugging. - * KEEP IN SYNC WITH awk.h!!!! - */ -static char *nodetypes[] = { - "Node_illegal", - "Node_times", - "Node_quotient", - "Node_mod", - "Node_plus", - "Node_minus", - "Node_cond_pair", - "Node_subscript", - "Node_concat", - "Node_exp", - "Node_preincrement", - "Node_predecrement", - "Node_postincrement", - "Node_postdecrement", - "Node_unary_minus", - "Node_field_spec", - "Node_assign", - "Node_assign_times", - "Node_assign_quotient", - "Node_assign_mod", - "Node_assign_plus", - "Node_assign_minus", - "Node_assign_exp", - "Node_and", - "Node_or", - "Node_equal", - "Node_notequal", - "Node_less", - "Node_greater", - "Node_leq", - "Node_geq", - "Node_match", - "Node_nomatch", - "Node_not", - "Node_rule_list", - "Node_rule_node", - "Node_statement_list", - "Node_if_branches", - "Node_expression_list", - "Node_param_list", - "Node_K_if", - "Node_K_while", - "Node_K_for", - "Node_K_arrayfor", - "Node_K_break", - "Node_K_continue", - "Node_K_print", - "Node_K_printf", - "Node_K_next", - "Node_K_exit", - "Node_K_do", - "Node_K_return", - "Node_K_delete", - "Node_K_delete_loop", - "Node_K_getline", - "Node_K_function", - "Node_K_nextfile", - "Node_redirect_output", - "Node_redirect_append", - "Node_redirect_pipe", - "Node_redirect_pipein", - "Node_redirect_input", - "Node_redirect_twoway", - "Node_var", - "Node_var_array", - "Node_val", - "Node_builtin", - "Node_line_range", - "Node_in_array", - "Node_func", - "Node_func_call", - "Node_cond_exp", - "Node_regex", - "Node_hashnode", - "Node_ahash", - "Node_array_ref", - "Node_BINMODE", - "Node_CONVFMT", - "Node_FIELDWIDTHS", - "Node_FNR", - "Node_FS", - "Node_IGNORECASE", - "Node_LINT", - "Node_NF", - "Node_NR", - "Node_OFMT", - "Node_OFS", - "Node_ORS", - "Node_RS", - "Node_TEXTDOMAIN", - "Node_final --- this should never appear", - NULL -}; - -/* nodetype2str --- convert a node type into a printable value */ - -char * -nodetype2str(NODETYPE type) -{ - static char buf[40]; - - if (type >= Node_illegal && type <= Node_final) - return nodetypes[(int) type]; - - sprintf(buf, _("unknown nodetype %d"), (int) type); - return buf; -} - -/* flags2str --- make a flags value readable */ - -char * -flags2str(int flagval) -{ - static struct flagtab values[] = { - { MALLOC, "MALLOC" }, - { TEMP, "TEMP" }, - { PERM, "PERM" }, - { STRING, "STRING" }, - { STR, "STR" }, - { NUM, "NUM" }, - { NUMBER, "NUMBER" }, - { MAYBE_NUM, "MAYBE_NUM" }, - { ARRAYMAXED, "ARRAYMAXED" }, - { SCALAR, "SCALAR" }, - { FUNC, "FUNC" }, - { FIELD, "FIELD" }, - { INTLSTR, "INTLSTR" }, - { UNINITIALIZED, "UNINITIALIZED" }, - { 0, NULL }, - }; - - return genflags2str(flagval, values); -} - -/* genflags2str --- general routine to convert a flag value to a string */ - -char * -genflags2str(int flagval, struct flagtab *tab) -{ - static char buffer[BUFSIZ]; - char *sp; - int i, space_left, space_needed; - - sp = buffer; - space_left = BUFSIZ; - for (i = 0; tab[i].name != NULL; i++) { - /* - * note the trick, we want 1 or 0 for whether we need - * the '|' character. - */ - space_needed = (strlen(tab[i].name) + (sp != buffer)); - if (space_left < space_needed) - fatal(_("buffer overflow in genflags2str")); - - if ((flagval & tab[i].val) != 0) { - if (sp != buffer) { - *sp++ = '|'; - space_left--; - } - strcpy(sp, tab[i].name); - /* note ordering! */ - space_left -= strlen(sp); - sp += strlen(sp); - } - } - - return buffer; -} - -/* - * interpret: - * Tree is a bunch of rules to run. Returns zero if it hit an exit() - * statement - */ -int -interpret(register NODE *volatile tree) -{ - jmp_buf volatile loop_tag_stack; /* shallow binding stack for loop_tag */ - static jmp_buf rule_tag; /* tag the rule currently being run, for NEXT - * and EXIT statements. It is static because - * there are no nested rules */ - register NODE *volatile t = NULL; /* temporary */ - NODE **volatile lhs; /* lhs == Left Hand Side for assigns, etc */ - NODE *volatile stable_tree; - int volatile traverse = TRUE; /* True => loop thru tree (Node_rule_list) */ - - /* avoid false source indications */ - source = NULL; - sourceline = 0; - - if (tree == NULL) - return 1; - sourceline = tree->source_line; - source = tree->source_file; - switch (tree->type) { - case Node_rule_node: - traverse = FALSE; /* False => one for-loop iteration only */ - /* FALL THROUGH */ - case Node_rule_list: - for (t = tree; t != NULL; t = t->rnode) { - if (traverse) - tree = t->lnode; - sourceline = tree->source_line; - source = tree->source_file; - INCREMENT(tree->exec_count); - switch (setjmp(rule_tag)) { - case 0: /* normal non-jump */ - /* test pattern, if any */ - if (tree->lnode == NULL || - eval_condition(tree->lnode)) { - /* using the lnode exec_count is kludgey */ - if (tree->lnode != NULL) - INCREMENT(tree->lnode->exec_count); - (void) interpret(tree->rnode); - } - break; - case TAG_CONTINUE: /* NEXT statement */ - return 1; - case TAG_BREAK: - return 0; - default: - cant_happen(); - } - if (! traverse) /* case Node_rule_node */ - break; /* don't loop */ - } - break; - - case Node_statement_list: - for (t = tree; t != NULL; t = t->rnode) - (void) interpret(t->lnode); - break; - - case Node_K_if: - INCREMENT(tree->exec_count); - if (eval_condition(tree->lnode)) { - INCREMENT(tree->rnode->exec_count); - (void) interpret(tree->rnode->lnode); - } else { - (void) interpret(tree->rnode->rnode); - } - break; - - case Node_K_while: - PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - - stable_tree = tree; - while (eval_condition(stable_tree->lnode)) { - INCREMENT(stable_tree->exec_count); - switch (setjmp(loop_tag)) { - case 0: /* normal non-jump */ - (void) interpret(stable_tree->rnode); - break; - case TAG_CONTINUE: /* continue statement */ - break; - case TAG_BREAK: /* break statement */ - RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - return 1; - default: - cant_happen(); - } - } - RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - break; - - case Node_K_do: - PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - stable_tree = tree; - do { - INCREMENT(stable_tree->exec_count); - switch (setjmp(loop_tag)) { - case 0: /* normal non-jump */ - (void) interpret(stable_tree->rnode); - break; - case TAG_CONTINUE: /* continue statement */ - break; - case TAG_BREAK: /* break statement */ - RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - return 1; - default: - cant_happen(); - } - } while (eval_condition(stable_tree->lnode)); - RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - break; - - case Node_K_for: - PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - (void) interpret(tree->forloop->init); - stable_tree = tree; - while (eval_condition(stable_tree->forloop->cond)) { - INCREMENT(stable_tree->exec_count); - switch (setjmp(loop_tag)) { - case 0: /* normal non-jump */ - (void) interpret(stable_tree->lnode); - /* fall through */ - case TAG_CONTINUE: /* continue statement */ - (void) interpret(stable_tree->forloop->incr); - break; - case TAG_BREAK: /* break statement */ - RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - return 1; - default: - cant_happen(); - } - } - RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - break; - - case Node_K_arrayfor: - { - Func_ptr after_assign = NULL; - NODE **list = 0; - NODE *volatile array; - volatile size_t i; - size_t j, num_elems; - volatile int retval = 0; - static int first = TRUE; - static int sort_indices = FALSE; - -#define hakvar forloop->init -#define arrvar forloop->incr - /* get the array */ - array = tree->arrvar; - if (array->type == Node_param_list) - array = stack_ptr[array->param_cnt]; - if (array->type == Node_array_ref) - array = array->orig_array; - if ((array->flags & SCALAR) != 0) - fatal(_("attempt to use scalar `%s' as array"), array->vname); - - /* sanity: do nothing if empty */ - if (array->type == Node_var || array->var_array == NULL - || array->table_size == 0) { - break; /* from switch */ - } - - /* allocate space for array */ - num_elems = array->table_size; - emalloc(list, NODE **, num_elems * sizeof(NODE *), "for_loop"); - - /* populate it */ - for (i = j = 0; i < array->array_size; i++) { - NODE *t = array->var_array[i]; - - if (t == NULL) - continue; - - for (; t != NULL; t = t->ahnext) { - list[j++] = dupnode(t->ahname); - } - } - - if (first) { - first = FALSE; - sort_indices = (getenv("WHINY_USERS") != 0); - } - - if (sort_indices) - qsort(list, num_elems, sizeof(NODE *), comp_func); /* shazzam! */ - - /* now we can run the loop */ - PUSH_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - - lhs = get_lhs(tree->hakvar, &after_assign, FALSE); - stable_tree = tree; - for (i = 0; i < num_elems; i++) { - INCREMENT(stable_tree->exec_count); - unref(*((NODE **) lhs)); - *lhs = dupnode(list[i]); - if (after_assign) - (*after_assign)(); - switch (setjmp(loop_tag)) { - case 0: - (void) interpret(stable_tree->lnode); - case TAG_CONTINUE: - break; - - case TAG_BREAK: - retval = 1; - goto done; - - default: - cant_happen(); - } - } - - done: - RESTORE_BINDING(loop_tag_stack, loop_tag, loop_tag_valid); - - if (do_lint && num_elems != array->table_size) - lintwarn(_("for loop: array `%s' changed size from %d to %d during loop execution"), - array->vname, num_elems, array->table_size); - - for (i = 0; i < num_elems; i++) - unref(list[i]); - - free(list); - - if (retval == 1) - return 1; - break; - } - - case Node_K_break: - INCREMENT(tree->exec_count); - if (! loop_tag_valid) { - /* - * Old AT&T nawk treats break outside of loops like - * next. New ones catch it at parse time. Allow it if - * do_traditional is on, and complain if lint. - */ - static int warned = FALSE; - - if (do_lint && ! warned) { - lintwarn(_("`break' outside a loop is not portable")); - warned = TRUE; - } - if (! do_traditional || do_posix) - fatal(_("`break' outside a loop is not allowed")); - if (in_function()) - pop_fcall_stack(); - longjmp(rule_tag, TAG_CONTINUE); - } else - longjmp(loop_tag, TAG_BREAK); - break; - - case Node_K_continue: - INCREMENT(tree->exec_count); - if (! loop_tag_valid) { - /* - * Old AT&T nawk treats continue outside of loops like - * next. New ones catch it at parse time. Allow it if - * do_traditional is on, and complain if lint. - */ - static int warned = FALSE; - - if (do_lint && ! warned) { - lintwarn(_("`continue' outside a loop is not portable")); - warned = TRUE; - } - if (! do_traditional || do_posix) - fatal(_("`continue' outside a loop is not allowed")); - if (in_function()) - pop_fcall_stack(); - longjmp(rule_tag, TAG_CONTINUE); - } else - longjmp(loop_tag, TAG_CONTINUE); - break; - - case Node_K_print: - INCREMENT(tree->exec_count); - do_print(tree); - break; - - case Node_K_printf: - INCREMENT(tree->exec_count); - do_printf(tree); - break; - - case Node_K_delete: - INCREMENT(tree->exec_count); - do_delete(tree->lnode, tree->rnode); - break; - - case Node_K_delete_loop: - do_delete_loop(tree->lnode, tree->rnode); - break; - - case Node_K_next: - INCREMENT(tree->exec_count); - if (in_begin_rule) - fatal(_("`next' cannot be called from a BEGIN rule")); - else if (in_end_rule) - fatal(_("`next' cannot be called from an END rule")); - - /* could add a lint check here */ - if (in_function()) - pop_fcall_stack(); - - longjmp(rule_tag, TAG_CONTINUE); - break; - - case Node_K_nextfile: - INCREMENT(tree->exec_count); - if (in_begin_rule) - fatal(_("`nextfile' cannot be called from a BEGIN rule")); - else if (in_end_rule) - fatal(_("`nextfile' cannot be called from an END rule")); - - /* could add a lint check here */ - if (in_function()) - pop_fcall_stack(); - - do_nextfile(); - break; - - case Node_K_exit: - INCREMENT(tree->exec_count); - /* - * In A,K,&W, p. 49, it says that an exit statement "... - * causes the program to behave as if the end of input had - * occurred; no more input is read, and the END actions, if - * any are executed." This implies that the rest of the rules - * are not done. So we immediately break out of the main loop. - */ - exiting = TRUE; - if (tree->lnode != NULL) { - t = tree_eval(tree->lnode); - exit_val = (int) force_number(t); - free_temp(t); - } - longjmp(rule_tag, TAG_BREAK); - break; - - case Node_K_return: - INCREMENT(tree->exec_count); - t = tree_eval(tree->lnode); - ret_node = dupnode(t); - free_temp(t); - longjmp(func_tag, TAG_RETURN); - break; - - default: - /* - * Appears to be an expression statement. Throw away the - * value. - */ - if (do_lint && tree->type == Node_var) - lintwarn(_("statement has no effect")); - INCREMENT(tree->exec_count); - t = tree_eval(tree); - free_temp(t); - break; - } - return 1; -} - -/* r_tree_eval --- evaluate a subtree */ - -NODE * -r_tree_eval(register NODE *tree, int iscond) -{ - register NODE *r, *t1, *t2; /* return value & temporary subtrees */ - register NODE **lhs; - register int di; - AWKNUM x, x1, x2; - long lx; -#ifdef _CRAY - long lx2; -#endif - -#ifdef GAWKDEBUG - if (tree == NULL) - return Nnull_string; - else if (tree->type == Node_val) { - if (tree->stref <= 0) - cant_happen(); - return tree; - } else if (tree->type == Node_var) { - if (tree->var_value->stref <= 0) - cant_happen(); - if ((tree->flags & UNINITIALIZED) != 0) - warning(_("reference to uninitialized variable `%s'"), - tree->vname); - return tree->var_value; - } -#endif - - if (tree->type == Node_param_list) { - if ((tree->flags & FUNC) != 0) - fatal(_("can't use function name `%s' as variable or array"), - tree->vname); - - tree = stack_ptr[tree->param_cnt]; - - if (tree == NULL) { - if (do_lint) - lintwarn(_("reference to uninitialized argument `%s'"), - tree->vname); - return Nnull_string; - } - - if (do_lint && (tree->flags & UNINITIALIZED) != 0) - lintwarn(_("reference to uninitialized argument `%s'"), - tree->vname); - } - if (tree->type == Node_array_ref) - tree = tree->orig_array; - - switch (tree->type) { - case Node_var: - if (do_lint && (tree->flags & UNINITIALIZED) != 0) - lintwarn(_("reference to uninitialized variable `%s'"), - tree->vname); - return tree->var_value; - - case Node_and: - return tmp_number((AWKNUM) (eval_condition(tree->lnode) - && eval_condition(tree->rnode))); - - case Node_or: - return tmp_number((AWKNUM) (eval_condition(tree->lnode) - || eval_condition(tree->rnode))); - - case Node_not: - return tmp_number((AWKNUM) ! eval_condition(tree->lnode)); - - /* Builtins */ - case Node_builtin: - return (*tree->proc)(tree->subnode); - - case Node_K_getline: - return (do_getline(tree)); - - case Node_in_array: - return tmp_number((AWKNUM) in_array(tree->lnode, tree->rnode)); - - case Node_func_call: - return func_call(tree->rnode, tree->lnode); - - /* unary operations */ - case Node_NR: - case Node_FNR: - case Node_NF: - case Node_FIELDWIDTHS: - case Node_FS: - case Node_RS: - case Node_field_spec: - case Node_subscript: - case Node_IGNORECASE: - case Node_OFS: - case Node_ORS: - case Node_OFMT: - case Node_CONVFMT: - case Node_BINMODE: - case Node_LINT: - case Node_TEXTDOMAIN: - lhs = get_lhs(tree, (Func_ptr *) NULL, TRUE); - return *lhs; - - case Node_var_array: - fatal(_("attempt to use array `%s' in a scalar context"), - tree->vname); - - case Node_unary_minus: - t1 = tree_eval(tree->subnode); - x = -force_number(t1); - free_temp(t1); - return tmp_number(x); - - case Node_cond_exp: - if (eval_condition(tree->lnode)) - return tree_eval(tree->rnode->lnode); - return tree_eval(tree->rnode->rnode); - - case Node_match: - case Node_nomatch: - case Node_regex: - return match_op(tree); - - case Node_func: - fatal(_("function `%s' called with space between name and `(',\n%s"), - tree->lnode->param, - _("or used in other expression context")); - - /* assignments */ - case Node_assign: - { - Func_ptr after_assign = NULL; - - if (do_lint && iscond) - lintwarn(_("assignment used in conditional context")); - r = tree_eval(tree->rnode); - lhs = get_lhs(tree->lnode, &after_assign, FALSE); - assign_val(lhs, r); - free_temp(r); - tree->lnode->flags |= SCALAR; - if (after_assign) - (*after_assign)(); - return *lhs; - } - - case Node_concat: - { - NODE **treelist; - NODE **strlist; - NODE *save_tree; - register NODE **treep; - register NODE **strp; - register size_t len; - register size_t supposed_len; - char *str; - register char *dest; - int alloc_count, str_count; - int i; - - /* - * This is an efficiency hack for multiple adjacent string - * concatenations, to avoid recursion and string copies. - * - * Node_concat trees grow downward to the left, so - * descend to lowest (first) node, accumulating nodes - * to evaluate to strings as we go. - */ - - /* - * But first, no arbitrary limits. Count the number of - * nodes and malloc the treelist and strlist arrays. - * There will be alloc_count + 1 items to concatenate. We - * also leave room for an extra pointer at the end to - * use as a sentinel. Thus, start alloc_count at 2. - */ - save_tree = tree; - for (alloc_count = 2; tree != NULL && tree->type == Node_concat; - tree = tree->lnode) - alloc_count++; - tree = save_tree; - emalloc(treelist, NODE **, sizeof(NODE *) * alloc_count, "tree_eval"); - emalloc(strlist, NODE **, sizeof(NODE *) * alloc_count, "tree_eval"); - - /* Now, here we go. */ - treep = treelist; - while (tree != NULL && tree->type == Node_concat) { - *treep++ = tree->rnode; - tree = tree->lnode; - } - *treep = tree; - /* - * Now, evaluate to strings in LIFO order, accumulating - * the string length, so we can do a single malloc at the - * end. - * - * Evaluate the expressions first, then get their - * lengthes, in case one of the expressions has a - * side effect that changes one of the others. - * See test/nasty.awk. - * - * dupnode the results a la do_print, to give us - * more predicable behavior; compare gawk 3.0.6 to - * nawk/mawk on test/nasty.awk. - */ - strp = strlist; - supposed_len = len = 0; - while (treep >= treelist) { - NODE *n; - - /* Here lies the wumpus's brother. R.I.P. */ - n = force_string(tree_eval(*treep--)); - *strp = dupnode(n); - free_temp(n); - supposed_len += (*strp)->stlen; - strp++; - } - *strp = NULL; - - str_count = strp - strlist; - strp = strlist; - for (i = 0; i < str_count; i++) { - len += (*strp)->stlen; - strp++; - } - if (do_lint && supposed_len != len) - lintwarn(_("concatenation: side effects in one expression have changed the length of another!")); - emalloc(str, char *, len+2, "tree_eval"); - str[len] = str[len+1] = '\0'; /* for good measure */ - dest = str; - strp = strlist; - while (*strp != NULL) { - memcpy(dest, (*strp)->stptr, (*strp)->stlen); - dest += (*strp)->stlen; - unref(*strp); - strp++; - } - r = make_str_node(str, len, ALREADY_MALLOCED); - r->flags |= TEMP; - - free(strlist); - free(treelist); - } - return r; - - /* other assignment types are easier because they are numeric */ - case Node_preincrement: - case Node_predecrement: - case Node_postincrement: - case Node_postdecrement: - case Node_assign_exp: - case Node_assign_times: - case Node_assign_quotient: - case Node_assign_mod: - case Node_assign_plus: - case Node_assign_minus: - return op_assign(tree); - default: - break; /* handled below */ - } - - /* evaluate subtrees in order to do binary operation, then keep going */ - t1 = tree_eval(tree->lnode); - t2 = tree_eval(tree->rnode); - - switch (tree->type) { - case Node_geq: - case Node_leq: - case Node_greater: - case Node_less: - case Node_notequal: - case Node_equal: - di = cmp_nodes(t1, t2); - free_temp(t1); - free_temp(t2); - switch (tree->type) { - case Node_equal: - return tmp_number((AWKNUM) (di == 0)); - case Node_notequal: - return tmp_number((AWKNUM) (di != 0)); - case Node_less: - return tmp_number((AWKNUM) (di < 0)); - case Node_greater: - return tmp_number((AWKNUM) (di > 0)); - case Node_leq: - return tmp_number((AWKNUM) (di <= 0)); - case Node_geq: - return tmp_number((AWKNUM) (di >= 0)); - default: - cant_happen(); - } - break; - default: - break; /* handled below */ - } - - x1 = force_number(t1); - free_temp(t1); - x2 = force_number(t2); - free_temp(t2); - switch (tree->type) { - case Node_exp: - if ((lx = x2) == x2 && lx >= 0) { /* integer exponent */ - if (lx == 0) - x = 1; - else if (lx == 1) - x = x1; - else { - /* doing it this way should be more precise */ - for (x = x1; --lx; ) - x *= x1; - } - } else - x = pow((double) x1, (double) x2); - return tmp_number(x); - - case Node_times: - return tmp_number(x1 * x2); - - case Node_quotient: - if (x2 == 0) - fatal(_("division by zero attempted")); -#ifdef _CRAY - /* special case for integer division, put in for Cray */ - lx2 = x2; - if (lx2 == 0) - return tmp_number(x1 / x2); - lx = (long) x1 / lx2; - if (lx * x2 == x1) - return tmp_number((AWKNUM) lx); - else -#endif - return tmp_number(x1 / x2); - - case Node_mod: - if (x2 == 0) - fatal(_("division by zero attempted in `%%'")); -#ifdef HAVE_FMOD - return tmp_number(fmod(x1, x2)); -#else /* ! HAVE_FMOD */ - (void) modf(x1 / x2, &x); - return tmp_number(x1 - x * x2); -#endif /* ! HAVE_FMOD */ - - case Node_plus: - return tmp_number(x1 + x2); - - case Node_minus: - return tmp_number(x1 - x2); - - case Node_var_array: - fatal(_("attempt to use array `%s' in a scalar context"), - tree->vname); - - default: - fatal(_("illegal type (%s) in tree_eval"), nodetype2str(tree->type)); - } - return 0; -} - -/* eval_condition --- is TREE true or false? Returns 0==false, non-zero==true */ - -static int -eval_condition(register NODE *tree) -{ - register NODE *t1; - register int ret; - - if (tree == NULL) /* Null trees are the easiest kinds */ - return TRUE; - if (tree->type == Node_line_range) { - /* - * Node_line_range is kind of like Node_match, EXCEPT: the - * lnode field (more properly, the condpair field) is a node - * of a Node_cond_pair; whether we evaluate the lnode of that - * node or the rnode depends on the triggered word. More - * precisely: if we are not yet triggered, we tree_eval the - * lnode; if that returns true, we set the triggered word. - * If we are triggered (not ELSE IF, note), we tree_eval the - * rnode, clear triggered if it succeeds, and perform our - * action (regardless of success or failure). We want to be - * able to begin and end on a single input record, so this - * isn't an ELSE IF, as noted above. - */ - if (! tree->triggered) { - if (! eval_condition(tree->condpair->lnode)) - return FALSE; - else - tree->triggered = TRUE; - } - /* Else we are triggered */ - if (eval_condition(tree->condpair->rnode)) - tree->triggered = FALSE; - return TRUE; - } - - /* - * Could just be J.random expression. in which case, null and 0 are - * false, anything else is true - */ - - t1 = m_tree_eval(tree, TRUE); - if (t1->flags & MAYBE_NUM) - (void) force_number(t1); - if (t1->flags & NUMBER) - ret = (t1->numbr != 0.0); - else - ret = (t1->stlen != 0); - free_temp(t1); - return ret; -} - -/* cmp_nodes --- compare two nodes, returning negative, 0, positive */ - -int -cmp_nodes(register NODE *t1, register NODE *t2) -{ - register int ret; - register size_t len1, len2; - register int l; - int ldiff; - - if (t1 == t2) - return 0; - if (t1->flags & MAYBE_NUM) - (void) force_number(t1); - if (t2->flags & MAYBE_NUM) - (void) force_number(t2); - if ((t1->flags & NUMBER) && (t2->flags & NUMBER)) { - if (t1->numbr == t2->numbr) - return 0; - /* don't subtract, in case one or both are infinite */ - else if (t1->numbr < t2->numbr) - return -1; - else - return 1; - } - (void) force_string(t1); - (void) force_string(t2); - len1 = t1->stlen; - len2 = t2->stlen; - ldiff = len1 - len2; - if (len1 == 0 || len2 == 0) - return ldiff; - l = (ldiff <= 0 ? len1 : len2); - if (IGNORECASE) { - register unsigned char *cp1 = (unsigned char *) t1->stptr; - register unsigned char *cp2 = (unsigned char *) t2->stptr; - - for (ret = 0; l-- > 0 && ret == 0; cp1++, cp2++) - ret = casetable[*cp1] - casetable[*cp2]; - } else - ret = memcmp(t1->stptr, t2->stptr, l); - return (ret == 0 ? ldiff : ret); -} - -/* op_assign --- do +=, -=, etc. */ - -static NODE * -op_assign(register NODE *tree) -{ - AWKNUM rval, lval; - NODE **lhs; - AWKNUM t1, t2; - long ltemp; - NODE *tmp; - Func_ptr after_assign = NULL; - - /* - * For ++ and --, get the lhs when doing the op and then - * return. For += etc, do the rhs first, since it can - * rearrange things, and *then* get the lhs. - */ - - switch(tree->type) { - case Node_preincrement: - case Node_predecrement: - lhs = get_lhs(tree->lnode, &after_assign, TRUE); - lval = force_number(*lhs); - unref(*lhs); - *lhs = make_number(lval + - (tree->type == Node_preincrement ? 1.0 : -1.0)); - tree->lnode->flags |= SCALAR; - if (after_assign) - (*after_assign)(); - return *lhs; - - case Node_postincrement: - case Node_postdecrement: - lhs = get_lhs(tree->lnode, &after_assign, TRUE); - lval = force_number(*lhs); - unref(*lhs); - *lhs = make_number(lval + - (tree->type == Node_postincrement ? 1.0 : -1.0)); - tree->lnode->flags |= SCALAR; - if (after_assign) - (*after_assign)(); - return tmp_number(lval); - default: - break; /* handled below */ - } - - /* - * It's a += kind of thing. Do the rhs, then the lhs. - */ - - tmp = tree_eval(tree->rnode); - rval = force_number(tmp); - free_temp(tmp); - - lhs = get_lhs(tree->lnode, &after_assign, FALSE); - lval = force_number(*lhs); - - unref(*lhs); - switch(tree->type) { - case Node_assign_exp: - if ((ltemp = rval) == rval) { /* integer exponent */ - if (ltemp == 0) - *lhs = make_number((AWKNUM) 1); - else if (ltemp == 1) - *lhs = make_number(lval); - else { - /* doing it this way should be more precise */ - for (t1 = t2 = lval; --ltemp; ) - t1 *= t2; - *lhs = make_number(t1); - } - } else - *lhs = make_number((AWKNUM) pow((double) lval, (double) rval)); - break; - - case Node_assign_times: - *lhs = make_number(lval * rval); - break; - - case Node_assign_quotient: - if (rval == (AWKNUM) 0) - fatal(_("division by zero attempted in `/='")); -#ifdef _CRAY - /* special case for integer division, put in for Cray */ - ltemp = rval; - if (ltemp == 0) { - *lhs = make_number(lval / rval); - break; - } - ltemp = (long) lval / ltemp; - if (ltemp * lval == rval) - *lhs = make_number((AWKNUM) ltemp); - else -#endif /* _CRAY */ - *lhs = make_number(lval / rval); - break; - - case Node_assign_mod: - if (rval == (AWKNUM) 0) - fatal(_("division by zero attempted in `%%='")); -#ifdef HAVE_FMOD - *lhs = make_number(fmod(lval, rval)); -#else /* ! HAVE_FMOD */ - (void) modf(lval / rval, &t1); - t2 = lval - rval * t1; - *lhs = make_number(t2); -#endif /* ! HAVE_FMOD */ - break; - - case Node_assign_plus: - *lhs = make_number(lval + rval); - break; - - case Node_assign_minus: - *lhs = make_number(lval - rval); - break; - default: - cant_happen(); - } - tree->lnode->flags |= SCALAR; - if (after_assign) - (*after_assign)(); - return *lhs; -} - -static struct fcall { - char *fname; - unsigned long count; - NODE *arglist; - NODE **prevstack; - NODE **stack; -} *fcall_list = NULL; - -static long fcall_list_size = 0; -static long curfcall = -1; - -/* in_function --- return true/false if we need to unwind awk functions */ - -static int -in_function() -{ - return (curfcall >= 0); -} - -/* pop_fcall --- pop off a single function call */ - -static void -pop_fcall() -{ - NODE *n, **sp, *arg, *argp; - int count; - struct fcall *f; - - assert(curfcall >= 0); - f = & fcall_list[curfcall]; - stack_ptr = f->prevstack; - - /* - * here, we pop each parameter and check whether - * it was an array. If so, and if the arg. passed in was - * a simple variable, then the value should be copied back. - * This achieves "call-by-reference" for arrays. - */ - sp = f->stack; - count = f->count; - - for (argp = f->arglist; count > 0 && argp != NULL; argp = argp->rnode) { - arg = argp->lnode; - if (arg->type == Node_param_list) - arg = stack_ptr[arg->param_cnt]; - n = *sp++; - if (n->type == Node_var_array || n->type == Node_array_ref) { - NODETYPE old_type; /* for check, below */ - - old_type = arg->type; - - /* - * subtlety: if arg->type is Node_var but n->type - * is Node_var_array, then the array routines noticed - * that a variable name was really an array and - * changed the type. But when v->name was pushed - * on the stack, it came out of the varnames array, - * and was not malloc'ed, so we shouldn't free it. - * See the corresponding code in push_args(). - * Thanks to Juergen Kahrs for finding a test case - * that shows this. - */ - if (old_type == Node_var_array || old_type == Node_array_ref) - free(n->vname); - - if (arg->type == Node_var) { - /* type changed, copy array back for call by reference */ - /* should we free arg->var_value ? */ - arg->var_array = n->var_array; - arg->type = Node_var_array; - arg->array_size = n->array_size; - arg->table_size = n->table_size; - arg->flags = n->flags; - } - } - /* n->lnode overlays the array size, don't unref it if array */ - if (n->type != Node_var_array && n->type != Node_array_ref) - unref(n->lnode); - freenode(n); - count--; - } - while (count-- > 0) { - n = *sp++; - /* if n is a local array, all the elements should be freed */ - if (n->type == Node_var_array) - assoc_clear(n); - /* n->lnode overlays the array size, don't unref it if array */ - if (n->type != Node_var_array && n->type != Node_array_ref) - unref(n->lnode); - freenode(n); - } - if (f->stack) - free((char *) f->stack); - memset(f, '\0', sizeof(struct fcall)); - curfcall--; -} - -/* pop_fcall_stack --- pop off all function args, don't leak memory */ - -static void -pop_fcall_stack() -{ - while (curfcall >= 0) - pop_fcall(); -} - -/* push_args --- push function arguments onto the stack */ - -static void -push_args(int count, - NODE *arglist, - NODE **oldstack, - char *func_name, - char **varnames) -{ - struct fcall *f; - NODE *arg, *argp, *r, **sp, *n; - int i; - int num_args; - - num_args = count; /* save for later use */ - - if (fcall_list_size == 0) { /* first time */ - emalloc(fcall_list, struct fcall *, 10 * sizeof(struct fcall), - "push_args"); - fcall_list_size = 10; - } - - if (++curfcall >= fcall_list_size) { - fcall_list_size *= 2; - erealloc(fcall_list, struct fcall *, - fcall_list_size * sizeof(struct fcall), "push_args"); - } - f = & fcall_list[curfcall]; - memset(f, '\0', sizeof(struct fcall)); - - if (count > 0) - emalloc(f->stack, NODE **, count*sizeof(NODE *), "push_args"); - f->count = count; - f->fname = func_name; /* not used, for debugging, just in case */ - f->arglist = arglist; - f->prevstack = oldstack; - - sp = f->stack; - - /* for each calling arg. add NODE * on stack */ - for (argp = arglist, i = 0; count > 0 && argp != NULL; argp = argp->rnode) { - static char from[] = N_("%s (from %s)"); - arg = argp->lnode; - getnode(r); - r->type = Node_var; - r->flags = 0; - /* call by reference for arrays; see below also */ - if (arg->type == Node_param_list) { - /* we must also reassign f here; see below */ - f = & fcall_list[curfcall]; - arg = f->prevstack[arg->param_cnt]; - } - if (arg->type == Node_var_array) { - char *p; - size_t len; - - r->type = Node_array_ref; - r->flags &= ~SCALAR; - r->orig_array = arg; - len = strlen(varnames[i]) + strlen(arg->vname) - + strlen(gettext(from)) - 4 + 1; - emalloc(p, char *, len, "push_args"); - sprintf(p, _(from), varnames[i], arg->vname); - r->vname = p; - } else if (arg->type == Node_array_ref) { - char *p; - size_t len; - - *r = *arg; - len = strlen(varnames[i]) + strlen(arg->vname) - + strlen(gettext(from)) - 4 + 1; - emalloc(p, char *, len, "push_args"); - sprintf(p, _(from), varnames[i], arg->vname); - r->vname = p; - } else { - n = tree_eval(arg); - r->lnode = dupnode(n); - r->rnode = (NODE *) NULL; - if ((n->flags & SCALAR) != 0) - r->flags |= SCALAR; - r->vname = varnames[i]; - free_temp(n); - } - *sp++ = r; - i++; - count--; - } - if (argp != NULL) /* left over calling args. */ - warning( - _("function `%s' called with more arguments than declared"), - func_name); - - /* add remaining params. on stack with null value */ - while (count-- > 0) { - getnode(r); - r->type = Node_var; - r->lnode = Nnull_string; - r->flags &= ~SCALAR; - r->rnode = (NODE *) NULL; - r->vname = varnames[i++]; - r->flags = UNINITIALIZED; - r->param_cnt = num_args - count; - *sp++ = r; - } - - /* - * We have to reassign f. Why, you may ask? It is possible that - * other functions were called during the course of tree_eval()-ing - * the arguments to this function. As a result of that, fcall_list - * may have been realloc()'ed, with the result that f is now - * pointing into free()'d space. This was a nasty one to track down. - */ - f = & fcall_list[curfcall]; - - stack_ptr = f->stack; -} - -/* func_call --- call a function, call by reference for arrays */ - -NODE **stack_ptr; - -static NODE * -func_call(NODE *name, /* name is a Node_val giving function name */ - NODE *arg_list) /* Node_expression_list of calling args. */ -{ - register NODE *r; - NODE *f; - jmp_buf volatile func_tag_stack; - jmp_buf volatile loop_tag_stack; - int volatile save_loop_tag_valid = FALSE; - NODE *save_ret_node; - extern NODE *ret_node; - - /* retrieve function definition node */ - f = lookup(name->stptr); - if (f == NULL || f->type != Node_func) - fatal(_("function `%s' not defined"), name->stptr); -#ifdef FUNC_TRACE - fprintf(stderr, _("function %s called\n"), name->stptr); -#endif - push_args(f->lnode->param_cnt, arg_list, stack_ptr, name->stptr, - f->parmlist); - - /* - * Execute function body, saving context, as a return statement - * will longjmp back here. - * - * Have to save and restore the loop_tag stuff so that a return - * inside a loop in a function body doesn't scrog any loops going - * on in the main program. We save the necessary info in variables - * local to this function so that function nesting works OK. - * We also only bother to save the loop stuff if we're in a loop - * when the function is called. - */ - if (loop_tag_valid) { - int junk = 0; - - save_loop_tag_valid = (volatile int) loop_tag_valid; - PUSH_BINDING(loop_tag_stack, loop_tag, junk); - loop_tag_valid = FALSE; - } - PUSH_BINDING(func_tag_stack, func_tag, func_tag_valid); - save_ret_node = ret_node; - ret_node = Nnull_string; /* default return value */ - INCREMENT(f->exec_count); /* count function calls */ - if (setjmp(func_tag) == 0) - (void) interpret(f->rnode); - - r = ret_node; - ret_node = (NODE *) save_ret_node; - RESTORE_BINDING(func_tag_stack, func_tag, func_tag_valid); - pop_fcall(); - - /* Restore the loop_tag stuff if necessary. */ - if (save_loop_tag_valid) { - int junk = 0; - - loop_tag_valid = (int) save_loop_tag_valid; - RESTORE_BINDING(loop_tag_stack, loop_tag, junk); - } - - if ((r->flags & PERM) == 0) - r->flags |= TEMP; - return r; -} - -#ifdef PROFILING -/* dump_fcall_stack --- print a backtrace of the awk function calls */ - -void -dump_fcall_stack(FILE *fp) -{ - int i; - - if (curfcall < 0) - return; - - fprintf(fp, _("\n\t# Function Call Stack:\n\n")); - for (i = curfcall; i >= 0; i--) - fprintf(fp, "\t# %3d. %s\n", i+1, fcall_list[i].fname); - fprintf(fp, _("\t# -- main --\n")); -} -#endif /* PROFILING */ - -/* - * r_get_lhs: - * This returns a POINTER to a node pointer. get_lhs(ptr) is the current - * value of the var, or where to store the var's new value - * - * For the special variables, don't unref their current value if it's - * the same as the internal copy; perhaps the current one is used in - * a concatenation or some other expression somewhere higher up in the - * call chain. Ouch. - */ - -NODE ** -r_get_lhs(register NODE *ptr, Func_ptr *assign, int reference) -{ - register NODE **aptr = NULL; - register NODE *n; - - if (assign) - *assign = NULL; /* for safety */ - if (ptr->type == Node_param_list) { - if ((ptr->flags & FUNC) != 0) - fatal(_("can't use function name `%s' as variable or array"), ptr->vname); - ptr = stack_ptr[ptr->param_cnt]; - } - - switch (ptr->type) { - case Node_array_ref: - case Node_var_array: - fatal(_("attempt to use array `%s' in a scalar context"), - ptr->vname); - - case Node_var: - if (! reference) - ptr->flags &= ~UNINITIALIZED; - else if (do_lint && (ptr->flags & UNINITIALIZED) != 0) - lintwarn(_("reference to uninitialized variable `%s'"), - ptr->vname); - - aptr = &(ptr->var_value); -#ifdef GAWKDEBUG - if (ptr->var_value->stref <= 0) - cant_happen(); -#endif - break; - - case Node_FIELDWIDTHS: - aptr = &(FIELDWIDTHS_node->var_value); - if (assign != NULL) - *assign = set_FIELDWIDTHS; - break; - - case Node_RS: - aptr = &(RS_node->var_value); - if (assign != NULL) - *assign = set_RS; - break; - - case Node_FS: - aptr = &(FS_node->var_value); - if (assign != NULL) - *assign = set_FS; - break; - - case Node_FNR: - if (FNR_node->var_value->numbr != FNR) { - unref(FNR_node->var_value); - FNR_node->var_value = make_number((AWKNUM) FNR); - } - aptr = &(FNR_node->var_value); - if (assign != NULL) - *assign = set_FNR; - break; - - case Node_NR: - if (NR_node->var_value->numbr != NR) { - unref(NR_node->var_value); - NR_node->var_value = make_number((AWKNUM) NR); - } - aptr = &(NR_node->var_value); - if (assign != NULL) - *assign = set_NR; - break; - - case Node_NF: - if (NF == -1 || NF_node->var_value->numbr != NF) { - if (NF == -1) - (void) get_field(HUGE-1, assign); /* parse record */ - unref(NF_node->var_value); - NF_node->var_value = make_number((AWKNUM) NF); - } - aptr = &(NF_node->var_value); - if (assign != NULL) - *assign = set_NF; - break; - - case Node_IGNORECASE: - aptr = &(IGNORECASE_node->var_value); - if (assign != NULL) - *assign = set_IGNORECASE; - break; - - case Node_BINMODE: - aptr = &(BINMODE_node->var_value); - if (assign != NULL) - *assign = set_BINMODE; - break; - - case Node_LINT: - aptr = &(LINT_node->var_value); - if (assign != NULL) - *assign = set_LINT; - break; - - case Node_OFMT: - aptr = &(OFMT_node->var_value); - if (assign != NULL) - *assign = set_OFMT; - break; - - case Node_CONVFMT: - aptr = &(CONVFMT_node->var_value); - if (assign != NULL) - *assign = set_CONVFMT; - break; - - case Node_ORS: - aptr = &(ORS_node->var_value); - if (assign != NULL) - *assign = set_ORS; - break; - - case Node_OFS: - aptr = &(OFS_node->var_value); - if (assign != NULL) - *assign = set_OFS; - break; - - case Node_TEXTDOMAIN: - aptr = &(TEXTDOMAIN_node->var_value); - if (assign != NULL) - *assign = set_TEXTDOMAIN; - break; - - case Node_param_list: - { - NODE *n = stack_ptr[ptr->param_cnt]; - - /* - * This test should always be true, due to the code - * above, before the switch, that handles parameters. - */ - if (n->type != Node_var_array) - aptr = &n->var_value; - else - fatal(_("attempt to use array `%s' in a scalar context"), - n->vname); - - if (! reference) - n->flags &= ~UNINITIALIZED; - else if (do_lint && (n->flags & UNINITIALIZED) != 0) - lintwarn(_("reference to uninitialized argument `%s'"), - n->vname); - } - break; - - case Node_field_spec: - { - int field_num; - - n = tree_eval(ptr->lnode); - if (do_lint) { - if ((n->flags & NUMBER) == 0) { - lintwarn(_("attempt to field reference from non-numeric value")); - if (n->stlen == 0) - lintwarn(_("attempt to reference from null string")); - } - } - field_num = (int) force_number(n); - free_temp(n); - if (field_num < 0) - fatal(_("attempt to access field %d"), field_num); - if (field_num == 0 && field0_valid) { /* short circuit */ - aptr = &fields_arr[0]; - if (assign != NULL) - *assign = reset_record; - break; - } - aptr = get_field(field_num, assign); - break; - } - - case Node_subscript: - n = ptr->lnode; - if (n->type == Node_param_list) { - n = stack_ptr[n->param_cnt]; - if ((n->flags & SCALAR) != 0) - fatal(_("attempt to use scalar parameter `%s' as an array"), n->vname); - } - if (n->type == Node_array_ref) { - n = n->orig_array; - assert(n->type == Node_var_array || n->type == Node_var); - } - if (n->type == Node_func) { - fatal(_("attempt to use function `%s' as array"), - n->lnode->param); - } - aptr = assoc_lookup(n, concat_exp(ptr->rnode), reference); - break; - - case Node_func: - fatal(_("`%s' is a function, assignment is not allowed"), - ptr->lnode->param); - - case Node_builtin: -#if 1 - /* in gawk for a while */ - fatal(_("assignment is not allowed to result of builtin function")); -#else - /* - * This is how Christos at Deshaw did it. - * Does this buy us anything? - */ - if (ptr->proc == NULL) - fatal(_("assignment is not allowed to result of builtin function")); - ptr->callresult = (*ptr->proc)(ptr->subnode); - aptr = &ptr->callresult; - break; -#endif - - default: - fprintf(stderr, "type = %s\n", nodetype2str(ptr->type)); - fflush(stderr); - cant_happen(); - } - return aptr; -} - -/* match_op --- do ~ and !~ */ - -static NODE * -match_op(register NODE *tree) -{ - register NODE *t1; - register Regexp *rp; - int i; - int match = TRUE; - int kludge_need_start = FALSE; /* FIXME: --- see below */ - - if (tree->type == Node_nomatch) - match = FALSE; - if (tree->type == Node_regex) - t1 = *get_field(0, (Func_ptr *) 0); - else { - t1 = force_string(tree_eval(tree->lnode)); - tree = tree->rnode; - } - rp = re_update(tree); - /* - * FIXME: - * - * Any place where research() is called with a last parameter of - * FALSE, we need to use the avoid_dfa test. This is the only place - * at the moment. - * - * A new or improved dfa that distinguishes beginning/end of - * string from beginning/end of line will allow us to get rid of - * this temporary hack. - * - * The avoid_dfa() function is in re.c; it is not very smart. - */ - if (avoid_dfa(tree, t1->stptr, t1->stlen)) - kludge_need_start = TRUE; - i = research(rp, t1->stptr, 0, t1->stlen, kludge_need_start); - i = (i == -1) ^ (match == TRUE); - free_temp(t1); - return tmp_number((AWKNUM) i); -} - -/* set_IGNORECASE --- update IGNORECASE as appropriate */ - -void -set_IGNORECASE() -{ - static int warned = FALSE; - - if ((do_lint || do_traditional) && ! warned) { - warned = TRUE; - lintwarn(_("`IGNORECASE' is a gawk extension")); - } - if (do_traditional) - IGNORECASE = FALSE; - else if ((IGNORECASE_node->var_value->flags & (STRING|STR)) != 0) { - if ((IGNORECASE_node->var_value->flags & MAYBE_NUM) == 0) - IGNORECASE = (force_string(IGNORECASE_node->var_value)->stlen > 0); - else - IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0); - } else if ((IGNORECASE_node->var_value->flags & (NUM|NUMBER)) != 0) - IGNORECASE = (force_number(IGNORECASE_node->var_value) != 0.0); - else - IGNORECASE = FALSE; /* shouldn't happen */ - set_FS_if_not_FIELDWIDTHS(); -} - -/* set_BINMODE --- set translation mode (OS/2, DOS, others) */ - -void -set_BINMODE() -{ - static int warned = FALSE; - char *p, *cp, save; - NODE *v; - int digits = FALSE; - - if ((do_lint || do_traditional) && ! warned) { - warned = TRUE; - lintwarn(_("`BINMODE' is a gawk extension")); - } - if (do_traditional) - BINMODE = 0; - else if ((BINMODE_node->var_value->flags & STRING) != 0) { - v = BINMODE_node->var_value; - p = v->stptr; - save = p[v->stlen]; - p[v->stlen] = '\0'; - - for (cp = p; *cp != '\0'; cp++) { - if (ISDIGIT(*cp)) { - digits = TRUE; - break; - } - } - - if (! digits || (BINMODE_node->var_value->flags & MAYBE_NUM) == 0) { - BINMODE = 0; - if (strcmp(p, "r") == 0) - BINMODE = 1; - else if (strcmp(p, "w") == 0) - BINMODE = 2; - else if (strcmp(p, "rw") == 0 || strcmp(p, "wr") == 0) - BINMODE = 3; - - if (BINMODE == 0 && v->stlen != 0) { - /* arbitrary string, assume both */ - BINMODE = 3; - warning("BINMODE: arbitary string value treated as \"rw\""); - } - } else - BINMODE = (int) force_number(BINMODE_node->var_value); - - p[v->stlen] = save; - } else if ((BINMODE_node->var_value->flags & NUMBER) != 0) - BINMODE = (int) force_number(BINMODE_node->var_value); - else - BINMODE = 0; /* shouldn't happen */ -} - -/* set_OFS --- update OFS related variables when OFS assigned to */ - -void -set_OFS() -{ - OFS = force_string(OFS_node->var_value)->stptr; - OFSlen = OFS_node->var_value->stlen; - OFS[OFSlen] = '\0'; -} - -/* set_ORS --- update ORS related variables when ORS assigned to */ - -void -set_ORS() -{ - ORS = force_string(ORS_node->var_value)->stptr; - ORSlen = ORS_node->var_value->stlen; - ORS[ORSlen] = '\0'; -} - -/* fmt_ok --- is the conversion format a valid one? */ - -NODE **fmt_list = NULL; -static int fmt_ok P((NODE *n)); -static int fmt_index P((NODE *n)); - -static int -fmt_ok(NODE *n) -{ - NODE *tmp = force_string(n); - char *p = tmp->stptr; - - if (*p++ != '%') - return 0; - while (*p && strchr(" +-#", *p) != NULL) /* flags */ - p++; - while (*p && ISDIGIT(*p)) /* width - %*.*g is NOT allowed */ - p++; - if (*p == '\0' || (*p != '.' && ! ISDIGIT(*p))) - return 0; - if (*p == '.') - p++; - while (*p && ISDIGIT(*p)) /* precision */ - p++; - if (*p == '\0' || strchr("efgEG", *p) == NULL) - return 0; - if (*++p != '\0') - return 0; - return 1; -} - -/* fmt_index --- track values of OFMT and CONVFMT to keep semantics correct */ - -static int -fmt_index(NODE *n) -{ - register int ix = 0; - static int fmt_num = 4; - static int fmt_hiwater = 0; - - if (fmt_list == NULL) - emalloc(fmt_list, NODE **, fmt_num*sizeof(*fmt_list), "fmt_index"); - (void) force_string(n); - while (ix < fmt_hiwater) { - if (cmp_nodes(fmt_list[ix], n) == 0) - return ix; - ix++; - } - /* not found */ - n->stptr[n->stlen] = '\0'; - if (do_lint && ! fmt_ok(n)) - lintwarn(_("bad `%sFMT' specification `%s'"), - n == CONVFMT_node->var_value ? "CONV" - : n == OFMT_node->var_value ? "O" - : "", n->stptr); - - if (fmt_hiwater >= fmt_num) { - fmt_num *= 2; - emalloc(fmt_list, NODE **, fmt_num, "fmt_index"); - } - fmt_list[fmt_hiwater] = dupnode(n); - return fmt_hiwater++; -} - -/* set_OFMT --- track OFMT correctly */ - -void -set_OFMT() -{ - OFMTidx = fmt_index(OFMT_node->var_value); - OFMT = fmt_list[OFMTidx]->stptr; -} - -/* set_CONVFMT --- track CONVFMT correctly */ - -void -set_CONVFMT() -{ - CONVFMTidx = fmt_index(CONVFMT_node->var_value); - CONVFMT = fmt_list[CONVFMTidx]->stptr; -} - -/* set_LINT --- update LINT as appropriate */ - -void -set_LINT() -{ - int old_lint = do_lint; - - if ((LINT_node->var_value->flags & (STRING|STR)) != 0) { - if ((LINT_node->var_value->flags & MAYBE_NUM) == 0) { - char *lintval; - size_t lintlen; - - do_lint = (force_string(LINT_node->var_value)->stlen > 0); - lintval = LINT_node->var_value->stptr; - lintlen = LINT_node->var_value->stlen; - if (do_lint) { - if (lintlen == 5 && strncmp(lintval, "fatal", 5) == 0) - lintfunc = r_fatal; - else - lintfunc = warning; - } else - lintfunc = warning; - } else - do_lint = (force_number(LINT_node->var_value) != 0.0); - } else if ((LINT_node->var_value->flags & (NUM|NUMBER)) != 0) { - do_lint = (force_number(LINT_node->var_value) != 0.0); - lintfunc = warning; - } else - do_lint = FALSE; /* shouldn't happen */ - - if (! do_lint) - lintfunc = warning; - - /* explicitly use warning() here, in case lintfunc == r_fatal */ - if (old_lint != do_lint && old_lint) - warning(_("turning off `--lint' due to assignment to `LINT'")); -} - -/* set_TEXTDOMAIN --- update TEXTDOMAIN variable when TEXTDOMAIN assigned to */ - -void -set_TEXTDOMAIN() -{ - int len; - - TEXTDOMAIN = force_string(TEXTDOMAIN_node->var_value)->stptr; - len = TEXTDOMAIN_node->var_value->stlen; - TEXTDOMAIN[len] = '\0'; - /* - * Note: don't call textdomain(); this value is for - * the awk program, not for gawk itself. - */ -} - -/* - * assign_val --- do mechanics of assignment, for calling from multiple - * places. - */ - -NODE * -assign_val(NODE **lhs_p, NODE *rhs) -{ - NODE *save; - - if (rhs != *lhs_p) { - save = *lhs_p; - *lhs_p = dupnode(rhs); - unref(save); - } - return *lhs_p; -} - -/* update_ERRNO --- update the value of ERRNO */ - -void -update_ERRNO() -{ - char *cp; - - cp = strerror(errno); - cp = gettext(cp); - unref(ERRNO_node->var_value); - ERRNO_node->var_value = make_string(cp, strlen(cp)); -} - -/* comp_func --- array index comparison function for qsort */ - -static int -comp_func(const void *p1, const void *p2) -{ - size_t len1, len2; - char *str1, *str2; - NODE *t1, *t2; - - t1 = *((NODE **) p1); - t2 = *((NODE **) p2); - -/* - t1 = force_string(t1); - t2 = force_string(t2); -*/ - len1 = t1->stlen; - str1 = t1->stptr; - - len2 = t2->stlen; - str2 = t2->stptr; - - /* Array indexes are strings, compare as such, always! */ - if (len1 == len2 || len1 < len2) - return strncmp(str1, str2, len1); - else - return strncmp(str1, str2, len2); -} diff --git a/contrib/awk/eval_p.c b/contrib/awk/eval_p.c deleted file mode 100644 index b01bec1..0000000 --- a/contrib/awk/eval_p.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * eval_p.c - compile eval.c with profiling turned on. - */ - -/* - * Copyright (C) 2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#define PROFILING 1 -#include "eval.c" diff --git a/contrib/awk/ext.c b/contrib/awk/ext.c deleted file mode 100644 index 1ed7e2d..0000000 --- a/contrib/awk/ext.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * ext.c - Builtin function that links external gawk functions and related - * utilities. - * - * Christos Zoulas, Thu Jun 29 17:40:41 EDT 1995 - * Arnold Robbins, update for 3.1, Mon Nov 23 12:53:39 EST 1998 - */ - -/* - * Copyright (C) 1995 - 2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" - -#ifdef DYNAMIC - -#include - -#ifdef __GNUC__ -static unsigned long long dummy; /* fake out gcc for dynamic loading? */ -#endif - -extern int errcount; - -/* do_ext --- load an extension */ - -NODE * -do_ext(NODE *tree) -{ - NODE *obj; - NODE *fun; - NODE *(*func) P((NODE *, void *)); - void *dl; - int flags = RTLD_LAZY; - -#ifdef __GNUC__ - AWKNUM junk; - - junk = (AWKNUM) dummy; -#endif - - if (do_lint) - lintwarn(_("`extension' is a gawk extension")); - - if (do_traditional || do_posix) { - errcount++; - error(_("`extension' is a gawk extension")); - } - - obj = tree_eval(tree->lnode); - force_string(obj); - -#ifdef RTLD_GLOBAL - flags |= RTLD_GLOBAL; -#endif - if ((dl = dlopen(obj->stptr, flags)) == NULL) - fatal(_("extension: cannot open `%s' (%s)\n"), obj->stptr, - dlerror()); - - fun = tree_eval(tree->rnode->lnode); - force_string(fun); - - func = (NODE *(*) P((NODE *, void *))) dlsym(dl, fun->stptr); - if (func == NULL) - fatal(_("extension: library `%s': cannot call function `%s' (%s)\n"), - obj->stptr, fun->stptr, dlerror()); - free_temp(obj); - free_temp(fun); - - return (*func)(tree, dl); -} - -/* make_builtin --- register name to be called as func with a builtin body */ - -void -make_builtin(char *name, NODE *(*func) P((NODE *)), int count) -{ - NODE *p, *b, *f; - char **vnames, *parm_names, *sp; - char buf[200]; - int space_needed, i; - - /* count parameters, create artificial list of param names */ - space_needed = 0; - for (i = 0; i < count; i++) { - sprintf(buf, "p%d", i); - space_needed += strlen(buf) + 1; - } - emalloc(parm_names, char *, space_needed, "make_builtin"); - emalloc(vnames, char **, count * sizeof(char *), "make_builtin"); - sp = parm_names; - for (i = 0; i < count; i++) { - sprintf(sp, "p%d",i); - vnames[i] = sp; - sp += strlen(sp) + 1; - } - - getnode(p); - p->type = Node_param_list; - p->rnode = NULL; - p->param = name; - p->param_cnt = count; -#if 0 - /* setting these blows away the param_cnt. dang unions! */ - p->source_line = __LINE__; - p->source_file = __FILE__; -#endif - - getnode(b); - b->type = Node_builtin; - b->proc = func; - b->subnode = p; - b->source_line = __LINE__; - b->source_file = __FILE__; - - f = node(p, Node_func, b); - f->parmlist = vnames; - install(name, f); -} - -/* get_argument --- Get the n'th argument of a dynamically linked function */ - -NODE * -get_argument(NODE *tree, int i) -{ - extern NODE **stack_ptr; - - if (i < 0 || i >= tree->param_cnt) - return NULL; - - tree = stack_ptr[i]; - if (tree->lnode == Nnull_string) - return NULL; - - if (tree->type == Node_array_ref) - tree = tree->orig_array; - - if (tree->type == Node_var_array) - return tree; - - return tree->lnode; -} - -/* set_value --- set the return value of a dynamically linked function */ - -void -set_value(NODE *tree) -{ - extern NODE *ret_node; - - if (tree) - ret_node = tree; - else - ret_node = Nnull_string; -} -#else - -/* do_ext --- dummy version if extensions not available */ - -NODE * -do_ext(NODE *tree) -{ - char *emsg = _("Operation Not Supported"); - - unref(ERRNO_node->var_value); - ERRNO_node->var_value = make_string(emsg, strlen(emsg)); - return tmp_number((AWKNUM) -1); -} -#endif diff --git a/contrib/awk/extension/dl.c b/contrib/awk/extension/dl.c deleted file mode 100644 index 0f1e1c5..0000000 --- a/contrib/awk/extension/dl.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * dl.c - Example of adding a new builtin function to gawk. - * - * Christos Zoulas, Thu Jun 29 17:40:41 EDT 1995 - * Arnold Robbins, update for 3.1, Wed Sep 13 09:38:56 2000 - */ - -/* - * Copyright (C) 1995 - 2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" -#include - -static void *sdl = NULL; - -static NODE * -zaxxon(tree) -NODE *tree; -{ - NODE *obj; - int i; - int comma = 0; - - /* - * Print the arguments - */ - printf("External linkage %s(", tree->param); - - for (i = 0; i < tree->param_cnt; i++) { - - obj = get_argument(tree, i); - - if (obj == NULL) - break; - - force_string(obj); - - printf(comma ? ", %s" : "%s", obj->stptr); - free_temp(obj); - comma = 1; - } - - printf(");\n"); - - /* - * Do something useful - */ - obj = get_argument(tree, 0); - - if (obj != NULL) { - force_string(obj); - if (strcmp(obj->stptr, "unload") == 0 && sdl) { - /* - * XXX: How to clean up the function? - * I would like the ability to remove a function... - */ - dlclose(sdl); - sdl = NULL; - } - free_temp(obj); - } - - /* Set the return value */ - set_value(tmp_number((AWKNUM) 3.14)); - - /* Just to make the interpreter happy */ - return tmp_number((AWKNUM) 0); -} - -NODE * -dlload(tree, dl) -NODE *tree; -void *dl; -{ - sdl = dl; - make_builtin("zaxxon", zaxxon, 4); - return tmp_number((AWKNUM) 0); -} diff --git a/contrib/awk/extension/doit b/contrib/awk/extension/doit deleted file mode 100755 index 29dff7d..0000000 --- a/contrib/awk/extension/doit +++ /dev/null @@ -1 +0,0 @@ -../gawk -f foo.awk diff --git a/contrib/awk/extension/filefuncs.c b/contrib/awk/extension/filefuncs.c deleted file mode 100644 index 12badb5..0000000 --- a/contrib/awk/extension/filefuncs.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * filefuncs.c - Builtin functions that provide initial minimal iterface - * to the file system. - * - * Arnold Robbins, update for 3.1, Mon Nov 23 12:53:39 EST 1998 - */ - -/* - * Copyright (C) 2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" - -#include - -/* do_chdir --- provide dynamically loaded chdir() builtin for gawk */ - -static NODE * -do_chdir(tree) -NODE *tree; -{ - NODE *newdir; - int ret = -1; - - if (do_lint && tree->param_cnt > 1) - lintwarn("chdir: called with too many arguments"); - - newdir = get_argument(tree, 0); - if (newdir != NULL) { - (void) force_string(newdir); - ret = chdir(newdir->stptr); - if (ret < 0) - update_ERRNO(); - - free_temp(newdir); - } else if (do_lint) - lintwarn("chdir: called with no arguments"); - - - /* Set the return value */ - set_value(tmp_number((AWKNUM) ret)); - - /* Just to make the interpreter happy */ - return tmp_number((AWKNUM) 0); -} - -/* format_mode --- turn a stat mode field into something readable */ - -static char * -format_mode(fmode) -unsigned long fmode; -{ - static char outbuf[12]; - int i; - - strcpy(outbuf, "----------"); - /* first, get the file type */ - i = 0; - switch (fmode & S_IFMT) { -#ifdef S_IFSOCK - case S_IFSOCK: - outbuf[i] = 's'; - break; -#endif -#ifdef S_IFLNK - case S_IFLNK: - outbuf[i] = 'l'; - break; -#endif - case S_IFREG: - outbuf[i] = '-'; /* redundant */ - break; - case S_IFBLK: - outbuf[i] = 'b'; - break; - case S_IFDIR: - outbuf[i] = 'd'; - break; -#ifdef S_IFDOOR /* Solaris weirdness */ - case S_IFDOOR: - outbuf[i] = 'D'; - break; -#endif /* S_IFDOOR */ - case S_IFCHR: - outbuf[i] = 'c'; - break; -#ifdef S_IFIFO - case S_IFIFO: - outbuf[i] = 'p'; - break; -#endif - } - - i++; - if ((fmode & S_IRUSR) != 0) - outbuf[i] = 'r'; - i++; - if ((fmode & S_IWUSR) != 0) - outbuf[i] = 'w'; - i++; - if ((fmode & S_IXUSR) != 0) - outbuf[i] = 'x'; - i++; - - if ((fmode & S_IRGRP) != 0) - outbuf[i] = 'r'; - i++; - if ((fmode & S_IWGRP) != 0) - outbuf[i] = 'w'; - i++; - if ((fmode & S_IXGRP) != 0) - outbuf[i] = 'x'; - i++; - - if ((fmode & S_IROTH) != 0) - outbuf[i] = 'r'; - i++; - if ((fmode & S_IWOTH) != 0) - outbuf[i] = 'w'; - i++; - if ((fmode & S_IXOTH) != 0) - outbuf[i] = 'x'; - i++; - - outbuf[i] = '\0'; - - if ((fmode & S_ISUID) != 0) { - if (outbuf[3] == 'x') - outbuf[3] = 's'; - else - outbuf[3] = 'S'; - } - - /* setgid without execute == locking */ - if ((fmode & S_ISGID) != 0) { - if (outbuf[6] == 'x') - outbuf[6] = 's'; - else - outbuf[6] = 'l'; - } - - if ((fmode & S_ISVTX) != 0) { - if (outbuf[9] == 'x') - outbuf[9] = 't'; - else - outbuf[9] = 'T'; - } - - return outbuf; -} - -/* do_stat --- provide a stat() function for gawk */ - -static NODE * -do_stat(tree) -NODE *tree; -{ - NODE *file, *array; - struct stat sbuf; - int ret; - NODE **aptr; - char *pmode; /* printable mode */ - char *type = "unknown"; - - /* check arg count */ - if (tree->param_cnt != 2) - fatal( - "stat: called with incorrect number of arguments (%d), should be 2", - tree->param_cnt); - - /* directory is first arg, array to hold results is second */ - file = get_argument(tree, 0); - array = get_argument(tree, 1); - - /* empty out the array */ - assoc_clear(array); - - /* lstat the file, if error, set ERRNO and return */ - (void) force_string(file); - ret = lstat(file->stptr, & sbuf); - if (ret < 0) { - update_ERRNO(); - - set_value(tmp_number((AWKNUM) ret)); - - free_temp(file); - return tmp_number((AWKNUM) 0); - } - - /* fill in the array */ - aptr = assoc_lookup(array, tmp_string("name", 4), FALSE); - *aptr = dupnode(file); - - aptr = assoc_lookup(array, tmp_string("dev", 3), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_dev); - - aptr = assoc_lookup(array, tmp_string("ino", 3), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_ino); - - aptr = assoc_lookup(array, tmp_string("mode", 4), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_mode); - - aptr = assoc_lookup(array, tmp_string("nlink", 5), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_nlink); - - aptr = assoc_lookup(array, tmp_string("uid", 3), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_uid); - - aptr = assoc_lookup(array, tmp_string("gid", 3), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_gid); - - aptr = assoc_lookup(array, tmp_string("size", 4), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_size); - - aptr = assoc_lookup(array, tmp_string("blocks", 6), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_blocks); - - aptr = assoc_lookup(array, tmp_string("atime", 5), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_atime); - - aptr = assoc_lookup(array, tmp_string("mtime", 5), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_mtime); - - aptr = assoc_lookup(array, tmp_string("ctime", 5), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_ctime); - - /* for block and character devices, add rdev, major and minor numbers */ - if (S_ISBLK(sbuf.st_mode) || S_ISCHR(sbuf.st_mode)) { - aptr = assoc_lookup(array, tmp_string("rdev", 4), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_rdev); - - aptr = assoc_lookup(array, tmp_string("major", 5), FALSE); - *aptr = make_number((AWKNUM) major(sbuf.st_rdev)); - - aptr = assoc_lookup(array, tmp_string("minor", 5), FALSE); - *aptr = make_number((AWKNUM) minor(sbuf.st_rdev)); - } - -#ifdef HAVE_ST_BLKSIZE - aptr = assoc_lookup(array, tmp_string("blksize", 7), FALSE); - *aptr = make_number((AWKNUM) sbuf.st_blksize); -#endif /* HAVE_ST_BLKSIZE */ - - aptr = assoc_lookup(array, tmp_string("pmode", 5), FALSE); - pmode = format_mode(sbuf.st_mode); - *aptr = make_string(pmode, strlen(pmode)); - - /* for symbolic links, add a linkval field */ - if (S_ISLNK(sbuf.st_mode)) { - char buf[BUFSIZ*2]; - int linksize; - - linksize = readlink(file->stptr, buf, sizeof buf); - /* should make this smarter */ - if (linksize == sizeof(buf)) - fatal("size of symbolic link too big"); - buf[linksize] = '\0'; - - aptr = assoc_lookup(array, tmp_string("linkval", 7), FALSE); - *aptr = make_string(buf, linksize); - } - - /* add a type field */ - switch (sbuf.st_mode & S_IFMT) { -#ifdef S_IFSOCK - case S_IFSOCK: - type = "socket"; - break; -#endif -#ifdef S_IFLNK - case S_IFLNK: - type = "symlink"; - break; -#endif - case S_IFREG: - type = "file"; - break; - case S_IFBLK: - type = "blockdev"; - break; - case S_IFDIR: - type = "directory"; - break; -#ifdef S_IFDOOR - case S_IFDOOR: - type = "door"; - break; -#endif - case S_IFCHR: - type = "chardev"; - break; -#ifdef S_IFIFO - case S_IFIFO: - type = "fifo"; - break; -#endif - } - - aptr = assoc_lookup(array, tmp_string("type", 4), FALSE); - *aptr = make_string(type, strlen(type)); - - free_temp(file); - - /* Set the return value */ - set_value(tmp_number((AWKNUM) ret)); - - /* Just to make the interpreter happy */ - return tmp_number((AWKNUM) 0); -} - -/* dlload --- load new builtins in this library */ - -NODE * -dlload(tree, dl) -NODE *tree; -void *dl; -{ - make_builtin("chdir", do_chdir, 1); - make_builtin("stat", do_stat, 2); - - return tmp_number((AWKNUM) 0); -} diff --git a/contrib/awk/extension/foo.awk b/contrib/awk/extension/foo.awk deleted file mode 100644 index 00a89e5..0000000 --- a/contrib/awk/extension/foo.awk +++ /dev/null @@ -1,9 +0,0 @@ -BEGIN { - extension("./dl.so","dlload") - zaxxon("hi there", "this is", "a test", "of argument passing") - zaxxon(1) - zaxxon(1,2) - z = zaxxon(1,2,3,4) - z = zaxxon(1,zaxxon(zaxxon("foo")),3,4) - print z -} diff --git a/contrib/awk/extension/fork.c b/contrib/awk/extension/fork.c deleted file mode 100644 index 038a168..0000000 --- a/contrib/awk/extension/fork.c +++ /dev/null @@ -1,106 +0,0 @@ -/* - * fork.c - Provide fork and waitpid functions for gawk. - */ - -/* - * Copyright (C) 2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" -#include - -/* do_fork --- provide dynamically loaded fork() builtin for gawk */ - -static NODE * -do_fork(tree) -NODE *tree; -{ - int ret = -1; - NODE **aptr; - - if (do_lint && tree->param_cnt > 0) - lintwarn("fork: called with too many arguments"); - - ret = fork(); - - if (ret < 0) - update_ERRNO(); - else if (ret == 0) { - /* update PROCINFO in the child */ - - aptr = assoc_lookup(PROCINFO_node, tmp_string("pid", 3), FALSE); - (*aptr)->numbr = (AWKNUM) getpid(); - - aptr = assoc_lookup(PROCINFO_node, tmp_string("ppid", 4), FALSE); - (*aptr)->numbr = (AWKNUM) getppid(); - } - - /* Set the return value */ - set_value(tmp_number((AWKNUM) ret)); - - /* Just to make the interpreter happy */ - return tmp_number((AWKNUM) 0); -} - - -/* do_waitpid --- provide dynamically loaded waitpid() builtin for gawk */ - -static NODE * -do_waitpid(tree) -NODE *tree; -{ - NODE *pidnode; - int ret = -1; - double pidval; - pid_t pid; - int options = 0; - - if (do_lint && tree->param_cnt > 1) - lintwarn("waitpid: called with too many arguments"); - - pidnode = get_argument(tree, 0); - if (pidnode != NULL) { - pidval = force_number(pidnode); - pid = (int) pidval; - options = WNOHANG|WUNTRACED; - ret = waitpid(pid, NULL, options); - if (ret < 0) - update_ERRNO(); - } else if (do_lint) - lintwarn("wait: called with no arguments"); - - /* Set the return value */ - set_value(tmp_number((AWKNUM) ret)); - - /* Just to make the interpreter happy */ - return tmp_number((AWKNUM) 0); -} - -/* dlload --- load new builtins in this library */ - -NODE * -dlload(tree, dl) -NODE *tree; -void *dl; -{ - make_builtin("fork", do_fork, 0); - make_builtin("waitpid", do_waitpid, 1); - return tmp_number((AWKNUM) 0); -} diff --git a/contrib/awk/extension/steps b/contrib/awk/extension/steps deleted file mode 100755 index 61a9e6e..0000000 --- a/contrib/awk/extension/steps +++ /dev/null @@ -1,9 +0,0 @@ -# what to do under linux to make dl.so -# Tue Nov 24 15:04:14 EST 1998 - -gcc -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. dl.c -gcc -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. filefuncs.c -gcc -shared -Wall -DHAVE_CONFIG_H -c -O -g -I.. fork.c -ld -o dl.so -shared dl.o -ld -o filefuncs.so -shared filefuncs.o -ld -o fork.so -shared fork.o diff --git a/contrib/awk/extension/testff.awk b/contrib/awk/extension/testff.awk deleted file mode 100644 index 0a0a9b2..0000000 --- a/contrib/awk/extension/testff.awk +++ /dev/null @@ -1,30 +0,0 @@ -BEGIN { - extension("./filefuncs.so", "dlload") - -# printf "before: " -# fflush() -# system("pwd") -# -# chdir("..") -# -# printf "after: " -# fflush() -# system("pwd") - - chdir(".") - - data[1] = 1 - print "Info for testff.awk" - ret = stat("testff.awk", data) - print "ret =", ret - for (i in data) - printf "data[\"%s\"] = %s\n", i, data[i] - print "testff.awk modified:", strftime("%m %d %y %H:%M:%S", data["mtime"]) - - print "\nInfo for JUNK" - ret = stat("JUNK", data) - print "ret =", ret - for (i in data) - printf "data[\"%s\"] = %s\n", i, data[i] - print "JUNK modified:", strftime("%m %d %y %H:%M:%S", data["mtime"]) -} diff --git a/contrib/awk/extension/testfork.awk b/contrib/awk/extension/testfork.awk deleted file mode 100644 index ca00dca..0000000 --- a/contrib/awk/extension/testfork.awk +++ /dev/null @@ -1,20 +0,0 @@ -BEGIN { - extension("./fork.so", "dlload") - - printf "before fork, pid = %d, ppid = %d\n", PROCINFO["pid"], - PROCINFO["ppid"] - - fflush() - ret = fork() - if (ret < 0) - printf("ret = %d, ERRNO = %s\n", ret, ERRNO) - else if (ret == 0) - printf "child, pid = %d, ppid = %d\n", PROCINFO["pid"], - PROCINFO["ppid"] - else { - system("sleep 3") - printf "parent, ret = %d\n", ret - printf "parent, pid = %d, ppid = %d\n", PROCINFO["pid"], - PROCINFO["ppid"] - } -} diff --git a/contrib/awk/field.c b/contrib/awk/field.c deleted file mode 100644 index 1d0f22e..0000000 --- a/contrib/awk/field.c +++ /dev/null @@ -1,998 +0,0 @@ -/* - * field.c - routines for dealing with fields and record parsing - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $FreeBSD$ - */ - -#include "awk.h" - -typedef void (* Setfunc) P((long, char *, long, NODE *)); - -static long (*parse_field) P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); -static void rebuild_record P((void)); -static long re_parse_field P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); -static long def_parse_field P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); -static long posix_def_parse_field P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); -static long null_parse_field P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); -static long sc_parse_field P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); -static long fw_parse_field P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); -static void set_element P((long num, char * str, long len, NODE *arr)); -static void grow_fields_arr P((long num)); -static void set_field P((long num, char *str, long len, NODE *dummy)); -static void update_PROCINFO P((char *subscript, char *str)); - - -static char *parse_extent; /* marks where to restart parse of record */ -static long parse_high_water = 0; /* field number that we have parsed so far */ -static long nf_high_water = 0; /* size of fields_arr */ -static int resave_fs; -static NODE *save_FS; /* save current value of FS when line is read, - * to be used in deferred parsing - */ -static int *FIELDWIDTHS = NULL; - -NODE **fields_arr; /* array of pointers to the field nodes */ -int field0_valid; /* $(>0) has not been changed yet */ -int default_FS; /* TRUE when FS == " " */ -Regexp *FS_regexp = NULL; -static NODE *Null_field = NULL; - -/* using_FIELDWIDTHS --- static function, macro to avoid overhead */ -#define using_FIELDWIDTHS() (parse_field == fw_parse_field) - -/* init_fields --- set up the fields array to start with */ - -void -init_fields() -{ - NODE *n; - - emalloc(fields_arr, NODE **, sizeof(NODE *), "init_fields"); - getnode(n); - *n = *Nnull_string; - n->flags |= (SCALAR|FIELD); - n->flags &= ~PERM; - fields_arr[0] = n; - parse_extent = fields_arr[0]->stptr; - save_FS = dupnode(FS_node->var_value); - getnode(Null_field); - *Null_field = *Nnull_string; - Null_field->flags |= (SCALAR|FIELD); - Null_field->flags &= ~(NUM|NUMBER|MAYBE_NUM|PERM); - field0_valid = TRUE; -} - -/* grow_fields --- acquire new fields as needed */ - -static void -grow_fields_arr(long num) -{ - register int t; - register NODE *n; - - erealloc(fields_arr, NODE **, (num + 1) * sizeof(NODE *), "grow_fields_arr"); - for (t = nf_high_water + 1; t <= num; t++) { - getnode(n); - *n = *Null_field; - fields_arr[t] = n; - } - nf_high_water = num; -} - -/* set_field --- set the value of a particular field */ - -/*ARGSUSED*/ -static void -set_field(long num, - char *str, - long len, - NODE *dummy) /* not used -- just to make interface same as set_element */ -{ - register NODE *n; - - if (num > nf_high_water) - grow_fields_arr(num); - n = fields_arr[num]; - n->stptr = str; - n->stlen = len; - n->flags = (STR|STRING|MAYBE_NUM|SCALAR|FIELD); -} - -/* rebuild_record --- Someone assigned a value to $(something). - Fix up $0 to be right */ - -static void -rebuild_record() -{ - /* - * use explicit unsigned longs for lengths, in case - * a size_t isn't big enough. - */ - register unsigned long tlen; - register unsigned long ofslen; - register NODE *tmp; - NODE *ofs; - char *ops; - register char *cops; - long i; - - assert(NF != -1); - - tlen = 0; - ofs = force_string(OFS_node->var_value); - ofslen = ofs->stlen; - for (i = NF; i > 0; i--) { - tmp = fields_arr[i]; - tmp = force_string(tmp); - tlen += tmp->stlen; - } - tlen += (NF - 1) * ofslen; - if ((long) tlen < 0) - tlen = 0; - emalloc(ops, char *, tlen + 2, "rebuild_record"); - cops = ops; - ops[0] = '\0'; - for (i = 1; i <= NF; i++) { - tmp = fields_arr[i]; - /* copy field */ - if (tmp->stlen == 1) - *cops++ = tmp->stptr[0]; - else if (tmp->stlen != 0) { - memcpy(cops, tmp->stptr, tmp->stlen); - cops += tmp->stlen; - } - /* copy OFS */ - if (i != NF) { - if (ofslen == 1) - *cops++ = ofs->stptr[0]; - else if (ofslen != 0) { - memcpy(cops, ofs->stptr, ofslen); - cops += ofslen; - } - } - } - tmp = make_str_node(ops, tlen, ALREADY_MALLOCED); - - /* - * Since we are about to unref fields_arr[0], we want to find - * any fields that still point into it, and have them point - * into the new field zero. This has to be done intelligently, - * so that unrefing a field doesn't try to unref into the old $0. - */ - for (cops = ops, i = 1; i <= NF; i++) { - if (fields_arr[i]->stlen > 0) { - NODE *n; - getnode(n); - - if ((fields_arr[i]->flags & FIELD) == 0) { - *n = *Null_field; - n->stlen = fields_arr[i]->stlen; - if ((fields_arr[i]->flags & (NUM|NUMBER)) != 0) { - n->flags |= (fields_arr[i]->flags & (NUM|NUMBER)); - n->numbr = fields_arr[i]->numbr; - } - } else { - *n = *(fields_arr[i]); - n->flags &= ~(MALLOC|TEMP|PERM|STRING); - } - - n->stptr = cops; - unref(fields_arr[i]); - fields_arr[i] = n; - } - cops += fields_arr[i]->stlen + ofslen; - } - - unref(fields_arr[0]); - - fields_arr[0] = tmp; - field0_valid = TRUE; -} - -/* - * set_record: - * setup $0, but defer parsing rest of line until reference is made to $(>0) - * or to NF. At that point, parse only as much as necessary. - * - * Manage a private buffer for the contents of $0. Doing so keeps us safe - * if `getline var' decides to rearrange the contents of the IOBUF that - * $0 might have been pointing into. The cost is the copying of the buffer; - * but better correct than fast. - */ -void -set_record(char *buf, /* ignored if ! freeold */ - int cnt, /* ignored if ! freeold */ - int freeold) -{ - register int i; - NODE *n; - static char *databuf; - static unsigned long databuf_size; -#define INITIAL_SIZE 512 -#define MAX_SIZE ((unsigned long) ~0) /* maximally portable ... */ - - NF = -1; - for (i = 1; i <= parse_high_water; i++) { - unref(fields_arr[i]); - getnode(n); - *n = *Null_field; - fields_arr[i] = n; - } - - parse_high_water = 0; - /* - * $0 = $0 should resplit using the current value of FS, thus, - * this is executed orthogonally to the value of freeold. - */ - if (resave_fs) { - resave_fs = FALSE; - unref(save_FS); - save_FS = dupnode(FS_node->var_value); - } - if (freeold) { - /* buffer management: */ - if (databuf_size == 0) { /* first time */ - emalloc(databuf, char *, INITIAL_SIZE, "set_record"); - databuf_size = INITIAL_SIZE; - memset(databuf, '\0', INITIAL_SIZE); - - } - /* - * Make sure there's enough room. Since we sometimes need - * to place a sentinel at the end, we make sure - * databuf_size is > cnt after allocation. - */ - if (cnt >= databuf_size) { - while (cnt >= databuf_size && databuf_size <= MAX_SIZE) - databuf_size *= 2; - erealloc(databuf, char *, databuf_size, "set_record"); - memset(databuf, '\0', databuf_size); - } - /* copy the data */ - memcpy(databuf, buf, cnt); - - /* manage field 0: */ - unref(fields_arr[0]); - getnode(n); - n->stptr = databuf; - n->stlen = cnt; - n->stref = 1; - n->type = Node_val; - n->stfmt = -1; - n->flags = (STRING|STR|MAYBE_NUM|SCALAR|FIELD); - fields_arr[0] = n; - } - fields_arr[0]->flags |= MAYBE_NUM; - field0_valid = TRUE; - -#undef INITIAL_SIZE -#undef MAX_SIZE -} - -/* reset_record --- start over again with current $0 */ - -void -reset_record() -{ - (void) force_string(fields_arr[0]); - set_record(fields_arr[0]->stptr, fields_arr[0]->stlen, FALSE); -} - -/* set_NF --- handle what happens to $0 and fields when NF is changed */ - -void -set_NF() -{ - register int i; - NODE *n; - - assert(NF != -1); - - NF = (long) force_number(NF_node->var_value); - if (NF > nf_high_water) - grow_fields_arr(NF); - if (parse_high_water < NF) { - for (i = parse_high_water + 1; i <= NF; i++) { - unref(fields_arr[i]); - getnode(n); - *n = *Null_field; - fields_arr[i] = n; - } - } else if (parse_high_water > 0) { - for (i = NF + 1; i <= parse_high_water; i++) { - unref(fields_arr[i]); - getnode(n); - *n = *Null_field; - fields_arr[i] = n; - } - parse_high_water = NF; - } - field0_valid = FALSE; -} - -/* - * re_parse_field --- parse fields using a regexp. - * - * This is called both from get_field() and from do_split() - * via (*parse_field)(). This variation is for when FS is a regular - * expression -- either user-defined or because RS=="" and FS==" " - */ -static long -re_parse_field(long up_to, /* parse only up to this field number */ - char **buf, /* on input: string to parse; on output: point to start next */ - int len, - NODE *fs, - Regexp *rp, - Setfunc set, /* routine to set the value of the parsed field */ - NODE *n) -{ - register char *scan = *buf; - register long nf = parse_high_water; - register char *field; - register char *end = scan + len; - - if (up_to == HUGE) - nf = 0; - if (len == 0) - return nf; - - if (RS_is_null && default_FS) - while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n')) - scan++; - field = scan; - while (scan < end - && research(rp, scan, 0, (end - scan), TRUE) != -1 - && nf < up_to) { - if (REEND(rp, scan) == RESTART(rp, scan)) { /* null match */ - scan++; - if (scan == end) { - (*set)(++nf, field, (long)(scan - field), n); - up_to = nf; - break; - } - continue; - } - (*set)(++nf, field, - (long)(scan + RESTART(rp, scan) - field), n); - scan += REEND(rp, scan); - field = scan; - if (scan == end) /* FS at end of record */ - (*set)(++nf, field, 0L, n); - } - if (nf != up_to && scan < end) { - (*set)(++nf, scan, (long)(end - scan), n); - scan = end; - } - *buf = scan; - return (nf); -} - -/* - * def_parse_field --- default field parsing. - * - * This is called both from get_field() and from do_split() - * via (*parse_field)(). This variation is for when FS is a single space - * character. - */ - -static long -def_parse_field(long up_to, /* parse only up to this field number */ - char **buf, /* on input: string to parse; on output: point to start next */ - int len, - NODE *fs, - Regexp *rp, - Setfunc set, /* routine to set the value of the parsed field */ - NODE *n) -{ - register char *scan = *buf; - register long nf = parse_high_water; - register char *field; - register char *end = scan + len; - char sav; - - if (up_to == HUGE) - nf = 0; - if (len == 0) - return nf; - - /* - * Nasty special case. If FS set to "", return whole record - * as first field. This is not worth a separate function. - */ - if (fs->stlen == 0) { - (*set)(++nf, *buf, len, n); - *buf += len; - return nf; - } - - /* before doing anything save the char at *end */ - sav = *end; - /* because it will be destroyed now: */ - - *end = ' '; /* sentinel character */ - for (; nf < up_to; scan++) { - /* - * special case: fs is single space, strip leading whitespace - */ - while (scan < end && (*scan == ' ' || *scan == '\t' || *scan == '\n')) - scan++; - if (scan >= end) - break; - field = scan; - while (*scan != ' ' && *scan != '\t' && *scan != '\n') - scan++; - (*set)(++nf, field, (long)(scan - field), n); - if (scan == end) - break; - } - - /* everything done, restore original char at *end */ - *end = sav; - - *buf = scan; - return nf; -} - -/* - * posix_def_parse_field --- default field parsing. - * - * This is called both from get_field() and from do_split() - * via (*parse_field)(). This variation is for when FS is a single space - * character. The only difference between this and def_parse_field() - * is that this one does not allow newlines to separate fields. - */ - -static long -posix_def_parse_field(long up_to, /* parse only up to this field number */ - char **buf, /* on input: string to parse; on output: point to start next */ - int len, - NODE *fs, - Regexp *rp, - Setfunc set, /* routine to set the value of the parsed field */ - NODE *n) -{ - register char *scan = *buf; - register long nf = parse_high_water; - register char *field; - register char *end = scan + len; - char sav; - - if (up_to == HUGE) - nf = 0; - if (len == 0) - return nf; - - /* - * Nasty special case. If FS set to "", return whole record - * as first field. This is not worth a separate function. - */ - if (fs->stlen == 0) { - (*set)(++nf, *buf, len, n); - *buf += len; - return nf; - } - - /* before doing anything save the char at *end */ - sav = *end; - /* because it will be destroyed now: */ - - *end = ' '; /* sentinel character */ - for (; nf < up_to; scan++) { - /* - * special case: fs is single space, strip leading whitespace - */ - while (scan < end && (*scan == ' ' || *scan == '\t')) - scan++; - if (scan >= end) - break; - field = scan; - while (*scan != ' ' && *scan != '\t') - scan++; - (*set)(++nf, field, (long)(scan - field), n); - if (scan == end) - break; - } - - /* everything done, restore original char at *end */ - *end = sav; - - *buf = scan; - return nf; -} - -/* - * null_parse_field --- each character is a separate field - * - * This is called both from get_field() and from do_split() - * via (*parse_field)(). This variation is for when FS is the null string. - */ -static long -null_parse_field(long up_to, /* parse only up to this field number */ - char **buf, /* on input: string to parse; on output: point to start next */ - int len, - NODE *fs, - Regexp *rp, - Setfunc set, /* routine to set the value of the parsed field */ - NODE *n) -{ - register char *scan = *buf; - register long nf = parse_high_water; - register char *end = scan + len; - - if (up_to == HUGE) - nf = 0; - if (len == 0) - return nf; - - for (; nf < up_to && scan < end; scan++) - (*set)(++nf, scan, 1L, n); - - *buf = scan; - return nf; -} - -/* - * sc_parse_field --- single character field separator - * - * This is called both from get_field() and from do_split() - * via (*parse_field)(). This variation is for when FS is a single character - * other than space. - */ -static long -sc_parse_field(long up_to, /* parse only up to this field number */ - char **buf, /* on input: string to parse; on output: point to start next */ - int len, - NODE *fs, - Regexp *rp, - Setfunc set, /* routine to set the value of the parsed field */ - NODE *n) -{ - register char *scan = *buf; - register char fschar; - register long nf = parse_high_water; - register char *field; - register char *end = scan + len; - int onecase; - char sav; - - if (up_to == HUGE) - nf = 0; - if (len == 0) - return nf; - - if (RS_is_null && fs->stlen == 0) - fschar = '\n'; - else - fschar = fs->stptr[0]; - - onecase = (IGNORECASE && ISALPHA(fschar)); - if (onecase) - fschar = casetable[(unsigned char) fschar]; - - /* before doing anything save the char at *end */ - sav = *end; - /* because it will be destroyed now: */ - *end = fschar; /* sentinel character */ - - for (; nf < up_to;) { - field = scan; - if (onecase) { - while (casetable[(unsigned char) *scan] != fschar) - scan++; - } else { - while (*scan != fschar) - scan++; - } - (*set)(++nf, field, (long)(scan - field), n); - if (scan == end) - break; - scan++; - if (scan == end) { /* FS at end of record */ - (*set)(++nf, field, 0L, n); - break; - } - } - - /* everything done, restore original char at *end */ - *end = sav; - - *buf = scan; - return nf; -} - -/* - * fw_parse_field --- field parsing using FIELDWIDTHS spec - * - * This is called both from get_field() and from do_split() - * via (*parse_field)(). This variation is for fields are fixed widths. - */ -static long -fw_parse_field(long up_to, /* parse only up to this field number */ - char **buf, /* on input: string to parse; on output: point to start next */ - int len, - NODE *fs, - Regexp *rp, - Setfunc set, /* routine to set the value of the parsed field */ - NODE *n) -{ - register char *scan = *buf; - register long nf = parse_high_water; - register char *end = scan + len; - - if (up_to == HUGE) - nf = 0; - if (len == 0) - return nf; - for (; nf < up_to && (len = FIELDWIDTHS[nf+1]) != -1; ) { - if (len > end - scan) - len = end - scan; - (*set)(++nf, scan, (long) len, n); - scan += len; - } - if (len == -1) - *buf = end; - else - *buf = scan; - return nf; -} - -/* get_field --- return a particular $n */ - -/* assign is not NULL if this field is on the LHS of an assign */ - -NODE ** -get_field(register long requested, Func_ptr *assign) -{ - /* - * if requesting whole line but some other field has been altered, - * then the whole line must be rebuilt - */ - if (requested == 0) { - if (! field0_valid) { - /* first, parse remainder of input record */ - if (NF == -1) { - NF = (*parse_field)(HUGE-1, &parse_extent, - fields_arr[0]->stlen - - (parse_extent - fields_arr[0]->stptr), - save_FS, FS_regexp, set_field, - (NODE *) NULL); - parse_high_water = NF; - } - rebuild_record(); - } - if (assign != NULL) - *assign = reset_record; - return &fields_arr[0]; - } - - /* assert(requested > 0); */ - - if (assign != NULL) - field0_valid = FALSE; /* $0 needs reconstruction */ - - if (requested <= parse_high_water) /* already parsed this field */ - return &fields_arr[requested]; - - if (NF == -1) { /* have not yet parsed to end of record */ - /* - * parse up to requested fields, calling set_field() for each, - * saving in parse_extent the point where the parse left off - */ - if (parse_high_water == 0) /* starting at the beginning */ - parse_extent = fields_arr[0]->stptr; - parse_high_water = (*parse_field)(requested, &parse_extent, - fields_arr[0]->stlen - (parse_extent - fields_arr[0]->stptr), - save_FS, FS_regexp, set_field, (NODE *) NULL); - - /* - * if we reached the end of the record, set NF to the number of - * fields so far. Note that requested might actually refer to - * a field that is beyond the end of the record, but we won't - * set NF to that value at this point, since this is only a - * reference to the field and NF only gets set if the field - * is assigned to -- this case is handled below - */ - if (parse_extent == fields_arr[0]->stptr + fields_arr[0]->stlen) - NF = parse_high_water; - if (requested == HUGE-1) /* HUGE-1 means set NF */ - requested = parse_high_water; - } - if (parse_high_water < requested) { /* requested beyond end of record */ - if (assign != NULL) { /* expand record */ - if (requested > nf_high_water) - grow_fields_arr(requested); - - NF = requested; - parse_high_water = requested; - } else - return &Null_field; - } - - return &fields_arr[requested]; -} - -/* set_element --- set an array element, used by do_split() */ - -static void -set_element(long num, char *s, long len, NODE *n) -{ - register NODE *it; - - it = make_string(s, len); - it->flags |= MAYBE_NUM; - *assoc_lookup(n, tmp_number((AWKNUM) (num)), FALSE) = it; -} - -/* do_split --- implement split(), semantics are same as for field splitting */ - -NODE * -do_split(NODE *tree) -{ - NODE *src, *arr, *sep, *tmp; - NODE *fs; - char *s; - long (*parseit) P((long, char **, int, NODE *, - Regexp *, Setfunc, NODE *)); - Regexp *rp = NULL; - - /* - * do dupnode(), to avoid problems like - * x = split(a[1], a, "blah") - * since we assoc_clear the array. gack. - * this also gives us complete call by value semantics. - */ - tmp = tree_eval(tree->lnode); - src = dupnode(tmp); - free_temp(tmp); - - arr = tree->rnode->lnode; - if (tree->rnode->rnode != NULL) - sep = tree->rnode->rnode->lnode; /* 3rd arg */ - else - sep = NULL; - - (void) force_string(src); - - if (arr->type == Node_param_list) - arr = stack_ptr[arr->param_cnt]; - if (arr->type == Node_array_ref) - arr = arr->orig_array; - if (arr->type != Node_var && arr->type != Node_var_array) - fatal(_("split: second argument is not an array")); - arr->type = Node_var_array; - assoc_clear(arr); - - if (src->stlen == 0) { - /* - * Skip the work if first arg is the null string. - * Check after clearing the array, to preserve - * correct semantics. - */ - tmp = tmp_number((AWKNUM) 0); - goto out; - } - - if ((sep->re_flags & FS_DFLT) != 0 && ! using_FIELDWIDTHS()) { - parseit = parse_field; - fs = force_string(FS_node->var_value); - rp = FS_regexp; - } else { - tmp = force_string(tree_eval(sep->re_exp)); - if (tmp->stlen == 0) { - static short warned = FALSE; - - parseit = null_parse_field; - - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("split: null string for third arg is a gawk extension")); - } - } else if (tmp->stlen == 1 && (sep->re_flags & CONST) == 0) { - if (tmp->stptr[0] == ' ') { - if (do_posix) - parseit = posix_def_parse_field; - else - parseit = def_parse_field; - } else - parseit = sc_parse_field; - } else { - parseit = re_parse_field; - rp = re_update(sep); - } - fs = tmp; - } - - s = src->stptr; - tmp = tmp_number((AWKNUM) (*parseit)(HUGE, &s, (int) src->stlen, - fs, rp, set_element, arr)); -out: - unref(src); - free_temp(sep); - return tmp; -} - -/* set_FIELDWIDTHS --- handle an assignment to FIELDWIDTHS */ - -void -set_FIELDWIDTHS() -{ - register char *scan; - char *end; - register int i; - static int fw_alloc = 1; - static int warned = FALSE; - extern double strtod(); - - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("`FIELDWIDTHS' is a gawk extension")); - } - if (do_traditional) /* quick and dirty, does the trick */ - return; - - /* - * If changing the way fields are split, obey least-suprise - * semantics, and force $0 to be split totally. - */ - if (fields_arr != NULL) - (void) get_field(HUGE - 1, 0); - - parse_field = fw_parse_field; - scan = force_string(FIELDWIDTHS_node->var_value)->stptr; - end = scan + 1; - if (FIELDWIDTHS == NULL) - emalloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); - FIELDWIDTHS[0] = 0; - for (i = 1; ; i++) { - if (i >= fw_alloc) { - fw_alloc *= 2; - erealloc(FIELDWIDTHS, int *, fw_alloc * sizeof(int), "set_FIELDWIDTHS"); - } - FIELDWIDTHS[i] = (int) strtod(scan, &end); - if (end == scan) - break; - if (FIELDWIDTHS[i] <= 0) - fatal(_("field %d in FIELDWIDTHS, must be > 0"), i); - scan = end; - } - FIELDWIDTHS[i] = -1; - - update_PROCINFO("FS", "FIELDWIDTHS"); -} - -void -set_FS_if_not_FIELDWIDTHS() -{ - if (parse_field != fw_parse_field) - set_FS(); -} - -/* set_FS --- handle things when FS is assigned to */ - -void -set_FS() -{ - char buf[10]; - NODE *fs; - static NODE *save_fs = NULL; - static NODE *save_rs = NULL; - - /* - * If changing the way fields are split, obey least-suprise - * semantics, and force $0 to be split totally. - */ - if (fields_arr != NULL) - (void) get_field(HUGE - 1, 0); - - if (! (save_fs && cmp_nodes(FS_node->var_value, save_fs) == 0 - && save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0)) { - unref(save_fs); - save_fs = dupnode(FS_node->var_value); - unref(save_rs); - save_rs = dupnode(RS_node->var_value); - resave_fs = TRUE; - if (FS_regexp) { - refree(FS_regexp); - FS_regexp = NULL; - } - } - buf[0] = '\0'; - default_FS = FALSE; - fs = force_string(FS_node->var_value); - if (! do_traditional && fs->stlen == 0) { - static short warned = FALSE; - - parse_field = null_parse_field; - - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("null string for `FS' is a gawk extension")); - } - } else if (fs->stlen > 1) - parse_field = re_parse_field; - else if (RS_is_null) { - parse_field = sc_parse_field; - if (fs->stlen == 1) { - if (fs->stptr[0] == ' ') { - default_FS = TRUE; - strcpy(buf, "[ \t\n]+"); - } else if (fs->stptr[0] != '\n') - sprintf(buf, "[%c\n]", fs->stptr[0]); - } - } else { - if (do_posix) - parse_field = posix_def_parse_field; - else - parse_field = def_parse_field; - if (fs->stptr[0] == ' ' && fs->stlen == 1) - default_FS = TRUE; - else if (fs->stptr[0] != ' ' && fs->stlen == 1) { - if (! ISALPHA(fs->stptr[0]) || ! IGNORECASE) - parse_field = sc_parse_field; - else if (fs->stptr[0] == '\\') - /* yet another special case */ - strcpy(buf, "[\\\\]"); - else - sprintf(buf, "[%c]", fs->stptr[0]); - } - } - if (buf[0] != '\0') { - FS_regexp = make_regexp(buf, strlen(buf), IGNORECASE, TRUE); - parse_field = re_parse_field; - } else if (parse_field == re_parse_field) { - FS_regexp = make_regexp(fs->stptr, fs->stlen, IGNORECASE, TRUE); - } else - FS_regexp = NULL; - - update_PROCINFO("FS", "FS"); -} - -/* using_fieldwidths --- is FS or FIELDWIDTHS in use? */ - -int -using_fieldwidths() -{ - return using_FIELDWIDTHS(); -} - -/* update_PROCINFO --- update PROCINFO[sub] when FS or FIELDWIDTHS set */ - -static void -update_PROCINFO(char *subscript, char *str) -{ - NODE **aptr; - - if (PROCINFO_node == NULL) - return; - - aptr = assoc_lookup(PROCINFO_node, tmp_string(subscript, strlen(subscript)), FALSE); - assign_val(aptr, tmp_string(str, strlen(str))); -} diff --git a/contrib/awk/fixvers b/contrib/awk/fixvers deleted file mode 100755 index f7e419b..0000000 --- a/contrib/awk/fixvers +++ /dev/null @@ -1,58 +0,0 @@ -#! /bin/sh - -# fixvers --- make sure version.c and patchlev.h reflect -# the reality in configure.in -# -# Copyright (C) 2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - -line=`grep AM_INIT_AUTOMAKE configure.in` - -case $1 in --v) - VERS=`echo $line | sed 's/.*, *\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\))/\1.\2/'` - sed "s/X\.Y/$VERS/" < version.in > version.new - if cmp version.c version.new - then - : - else - mv version.new version.c - fi - touch version.c - ;; - --p) - LEV=`echo $line | sed 's/.*, *\([0-9][0-9]*\)\.\([0-9][0-9]*\)\.\([0-9][0-9]*\))/\3/'` - echo "#define PATCHLEVEL $LEV" > patchlev.new - if cmp patchlev.h patchlev.new - then - : - else - mv patchlev.new patchlev.h - fi - touch patchlev.h - ;; - -*) - echo "usage: $0 -v|-p" 1>&2 - exit 1 - ;; -esac - -exit 0 diff --git a/contrib/awk/gawkmisc.c b/contrib/awk/gawkmisc.c deleted file mode 100644 index 35e5bf9..0000000 --- a/contrib/awk/gawkmisc.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * gawkmisc.c --- miscellanious gawk routines that are OS specific. - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" - -#if defined(HAVE_FCNTL_H) -#include -#endif - -/* some old compilers don't grok #elif. sigh */ - -#if defined(MSDOS) || defined(OS2) || defined(WIN32) -#include "gawkmisc.pc" -#else /* not MSDOS, not OS2, not WIN32 */ -#if defined(VMS) -#include "vms/gawkmisc.vms" -#else /* not VMS */ -#if defined(atarist) -#include "unsupported/atari/gawkmisc.atr" -#else /* not atarist */ -#if defined(TANDEM) -#include "tmiscc" -#else /* not TANDEM */ -#include "posix/gawkmisc.c" -#endif /* not TANDEM */ -#endif /* not atarist */ -#endif /* not VMS */ -#endif /* not MSDOS, not OS2, not WIN32 */ - -/* xmalloc --- provide this so that other GNU library routines work */ - -#if __STDC__ -typedef void *pointer; -#else -typedef char *pointer; -#endif - -extern pointer xmalloc P((size_t bytes)); /* get rid of gcc warning */ - -pointer -xmalloc(size_t bytes) -{ - pointer p; - - emalloc(p, pointer, bytes, "xmalloc"); - - return p; -} diff --git a/contrib/awk/getopt.c b/contrib/awk/getopt.c deleted file mode 100644 index 0ecad38..0000000 --- a/contrib/awk/getopt.c +++ /dev/null @@ -1,1062 +0,0 @@ -/* Getopt for GNU. - NOTE: getopt is now part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to drepper@gnu.org - before changing it! - - Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 2000 - Free Software Foundation, Inc. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* This tells Alpha OSF/1 not to define a getopt prototype in . - Ditto for AIX 3.2 and . */ -#ifndef _NO_PROTO -# define _NO_PROTO -#endif - -#ifdef HAVE_CONFIG_H -# include -#endif - -#if !defined __STDC__ || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -# ifndef const -# define const -# endif -#endif - -#include - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#define GETOPT_INTERFACE_VERSION 2 -#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 -# include -# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION -# define ELIDE_CODE -# endif -#endif - -#ifndef ELIDE_CODE - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -/* Don't include stdlib.h for non-GNU C libraries because some of them - contain conflicting prototypes for getopt. */ -# include -# include -#endif /* GNU C library. */ - -#ifdef VMS -# include -# if HAVE_STRING_H - 0 -# include -# endif -#endif - -#ifndef _ -/* This is for other GNU distributions with internationalized messages. */ -# if defined HAVE_LIBINTL_H || defined _LIBC -# include -# ifndef _ -# define _(msgid) gettext (msgid) -# endif -# else -# define _(msgid) (msgid) -# endif -#endif - -/* This version of `getopt' appears to the caller like standard Unix `getopt' - but it behaves differently for the user, since it allows the user - to intersperse the options with the other arguments. - - As `getopt' works, it permutes the elements of ARGV so that, - when it is done, all the options precede everything else. Thus - all application programs are extended to handle flexible argument order. - - Setting the environment variable POSIXLY_CORRECT disables permutation. - Then the behavior is completely standard. - - GNU application programs can use a third alternative mode in which - they can distinguish the relative order of options and other arguments. */ - -#include "getopt.h" - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -/* 1003.2 says this must be 1 before any call. */ -int optind = 1; - -/* Formerly, initialization of getopt depended on optind==0, which - causes problems with re-calling getopt as programs generally don't - know that. */ - -int __getopt_initialized; - -/* The next char to be scanned in the option-element - in which the last option character we returned was found. - This allows us to pick up the scan where we left off. - - If this is zero, or a null string, it means resume the scan - by advancing to the next ARGV-element. */ - -static char *nextchar; - -/* Callers store zero here to inhibit the error message - for unrecognized options. */ - -int opterr = 1; - -/* Set to an option character which was unrecognized. - This must be initialized on some systems to avoid linking in the - system's own getopt implementation. */ - -int optopt = '?'; - -/* Describe how to deal with options that follow non-option ARGV-elements. - - If the caller did not specify anything, - the default is REQUIRE_ORDER if the environment variable - POSIXLY_CORRECT is defined, PERMUTE otherwise. - - REQUIRE_ORDER means don't recognize them as options; - stop option processing when the first non-option is seen. - This is what Unix does. - This mode of operation is selected by either setting the environment - variable POSIXLY_CORRECT, or using `+' as the first character - of the list of option characters. - - PERMUTE is the default. We permute the contents of ARGV as we scan, - so that eventually all the non-options are at the end. This allows options - to be given in any order, even with programs that were not written to - expect this. - - RETURN_IN_ORDER is an option available to programs that were written - to expect options and other ARGV-elements in any order and that care about - the ordering of the two. We describe each non-option ARGV-element - as if it were the argument of an option with character code 1. - Using `-' as the first character of the list of option characters - selects this mode of operation. - - The special argument `--' forces an end of option-scanning regardless - of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return -1 with `optind' != ARGC. */ - -static enum -{ - REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER -} ordering; - -/* Value of POSIXLY_CORRECT environment variable. */ -static char *posixly_correct; - -#ifdef __GNU_LIBRARY__ -/* We want to avoid inclusion of string.h with non-GNU libraries - because there are many ways it can cause trouble. - On some systems, it contains special magic macros that don't work - in GCC. */ -# include -# define my_index strchr -#else - -# if HAVE_STRING_H -# include -# else -# include -# endif - -/* Avoid depending on library functions or files - whose names are inconsistent. */ - -#ifndef getenv -extern char *getenv (); -#endif - -static char * -my_index (str, chr) - const char *str; - int chr; -{ - while (*str) - { - if (*str == chr) - return (char *) str; - str++; - } - return 0; -} - -/* If using GCC, we can safely declare strlen this way. - If not using GCC, it is ok not to declare it. */ -#ifdef __GNUC__ -/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. - That was relevant to code that was here before. */ -# if (!defined __STDC__ || !__STDC__) && !defined strlen -/* gcc with -traditional declares the built-in strlen to return int, - and has done so at least since version 2.4.5. -- rms. */ -extern int strlen (const char *); -# endif /* not __STDC__ */ -#endif /* __GNUC__ */ - -#endif /* not __GNU_LIBRARY__ */ - -/* Handle permutation of arguments. */ - -/* Describe the part of ARGV that contains non-options that have - been skipped. `first_nonopt' is the index in ARGV of the first of them; - `last_nonopt' is the index after the last of them. */ - -static int first_nonopt; -static int last_nonopt; - -#ifdef _LIBC -/* Bash 2.0 gives us an environment variable containing flags - indicating ARGV elements that should not be considered arguments. */ - -/* Defined in getopt_init.c */ -extern char *__getopt_nonoption_flags; - -static int nonoption_flags_max_len; -static int nonoption_flags_len; - -static int original_argc; -static char *const *original_argv; - -/* Make sure the environment variable bash 2.0 puts in the environment - is valid for the getopt call we must make sure that the ARGV passed - to getopt is that one passed to the process. */ -static void -__attribute__ ((unused)) -store_args_and_env (int argc, char *const *argv) -{ - /* XXX This is no good solution. We should rather copy the args so - that we can compare them later. But we must not use malloc(3). */ - original_argc = argc; - original_argv = argv; -} -# ifdef text_set_element -text_set_element (__libc_subinit, store_args_and_env); -# endif /* text_set_element */ - -# define SWAP_FLAGS(ch1, ch2) \ - if (nonoption_flags_len > 0) \ - { \ - char __tmp = __getopt_nonoption_flags[ch1]; \ - __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ - __getopt_nonoption_flags[ch2] = __tmp; \ - } -#else /* !_LIBC */ -# define SWAP_FLAGS(ch1, ch2) -#endif /* _LIBC */ - -/* Exchange two adjacent subsequences of ARGV. - One subsequence is elements [first_nonopt,last_nonopt) - which contains all the non-options that have been skipped so far. - The other is elements [last_nonopt,optind), which contains all - the options processed since those non-options were skipped. - - `first_nonopt' and `last_nonopt' are relocated so that they describe - the new indices of the non-options in ARGV after they are moved. */ - -#if defined __STDC__ && __STDC__ -static void exchange (char **); -#endif - -static void -exchange (argv) - char **argv; -{ - int bottom = first_nonopt; - int middle = last_nonopt; - int top = optind; - char *tem; - - /* Exchange the shorter segment with the far end of the longer segment. - That puts the shorter segment into the right place. - It leaves the longer segment in the right place overall, - but it consists of two parts that need to be swapped next. */ - -#ifdef _LIBC - /* First make sure the handling of the `__getopt_nonoption_flags' - string can work normally. Our top argument must be in the range - of the string. */ - if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) - { - /* We must extend the array. The user plays games with us and - presents new arguments. */ - char *new_str = malloc (top + 1); - if (new_str == NULL) - nonoption_flags_len = nonoption_flags_max_len = 0; - else - { - memset (__mempcpy (new_str, __getopt_nonoption_flags, - nonoption_flags_max_len), - '\0', top + 1 - nonoption_flags_max_len); - nonoption_flags_max_len = top + 1; - __getopt_nonoption_flags = new_str; - } - } -#endif - - while (top > middle && middle > bottom) - { - if (top - middle > middle - bottom) - { - /* Bottom segment is the short one. */ - int len = middle - bottom; - register int i; - - /* Swap it with the top part of the top segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[top - (middle - bottom) + i]; - argv[top - (middle - bottom) + i] = tem; - SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); - } - /* Exclude the moved bottom segment from further swapping. */ - top -= len; - } - else - { - /* Top segment is the short one. */ - int len = top - middle; - register int i; - - /* Swap it with the bottom part of the bottom segment. */ - for (i = 0; i < len; i++) - { - tem = argv[bottom + i]; - argv[bottom + i] = argv[middle + i]; - argv[middle + i] = tem; - SWAP_FLAGS (bottom + i, middle + i); - } - /* Exclude the moved top segment from further swapping. */ - bottom += len; - } - } - - /* Update records for the slots the non-options now occupy. */ - - first_nonopt += (optind - last_nonopt); - last_nonopt = optind; -} - -/* Initialize the internal data when the first call is made. */ - -#if defined __STDC__ && __STDC__ -static const char *_getopt_initialize (int, char *const *, const char *); -#endif -static const char * -_getopt_initialize (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - /* Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - first_nonopt = last_nonopt = optind; - - nextchar = NULL; - - posixly_correct = getenv ("POSIXLY_CORRECT"); - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - { - ordering = RETURN_IN_ORDER; - ++optstring; - } - else if (optstring[0] == '+') - { - ordering = REQUIRE_ORDER; - ++optstring; - } - else if (posixly_correct != NULL) - ordering = REQUIRE_ORDER; - else - ordering = PERMUTE; - -#ifdef _LIBC - if (posixly_correct == NULL - && argc == original_argc && argv == original_argv) - { - if (nonoption_flags_max_len == 0) - { - if (__getopt_nonoption_flags == NULL - || __getopt_nonoption_flags[0] == '\0') - nonoption_flags_max_len = -1; - else - { - const char *orig_str = __getopt_nonoption_flags; - int len = nonoption_flags_max_len = strlen (orig_str); - if (nonoption_flags_max_len < argc) - nonoption_flags_max_len = argc; - __getopt_nonoption_flags = - (char *) malloc (nonoption_flags_max_len); - if (__getopt_nonoption_flags == NULL) - nonoption_flags_max_len = -1; - else - memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), - '\0', nonoption_flags_max_len - len); - } - } - nonoption_flags_len = nonoption_flags_max_len; - } - else - nonoption_flags_len = 0; -#endif - - return optstring; -} - -/* Scan elements of ARGV (whose length is ARGC) for option characters - given in OPTSTRING. - - If an element of ARGV starts with '-', and is not exactly "-" or "--", - then it is an option element. The characters of this element - (aside from the initial '-') are option characters. If `getopt' - is called repeatedly, it returns successively each of the option characters - from each of the option elements. - - If `getopt' finds another option character, it returns that character, - updating `optind' and `nextchar' so that the next call to `getopt' can - resume the scan with the following option character or ARGV-element. - - If there are no more option characters, `getopt' returns -1. - Then `optind' is the index in ARGV of the first ARGV-element - that is not an option. (The ARGV-elements have been permuted - so that those that are not options now come last.) - - OPTSTRING is a string containing the legitimate option characters. - If an option character is seen that is not listed in OPTSTRING, - return '?' after printing an error message. If you set `opterr' to - zero, the error message is suppressed but we still return '?'. - - If a char in OPTSTRING is followed by a colon, that means it wants an arg, - so the following text in the same ARGV-element, or the text of the following - ARGV-element, is returned in `optarg'. Two colons mean an option that - wants an optional arg; if there is text in the current ARGV-element, - it is returned in `optarg', otherwise `optarg' is set to zero. - - If OPTSTRING starts with `-' or `+', it requests different methods of - handling the non-option ARGV-elements. - See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. - - Long-named options begin with `--' instead of `-'. - Their names may be abbreviated as long as the abbreviation is unique - or is an exact match for some defined option. If they have an - argument, it follows the option name in the same ARGV-element, separated - from the option name by a `=', or else the in next ARGV-element. - When `getopt' finds a long-named option, it returns 0 if that option's - `flag' field is nonzero, the value of the option's `val' field - if the `flag' field is zero. - - The elements of ARGV aren't really const, because we permute them. - But we pretend they're const in the prototype to be compatible - with other systems. - - LONGOPTS is a vector of `struct option' terminated by an - element containing a name which is zero. - - LONGIND returns the index in LONGOPT of the long-named option found. - It is only valid when a long-named option has been found by the most - recent call. - - If LONG_ONLY is nonzero, '-' as well as '--' can introduce - long-named options. */ - -int -_getopt_internal (argc, argv, optstring, longopts, longind, long_only) - int argc; - char *const *argv; - const char *optstring; - const struct option *longopts; - int *longind; - int long_only; -{ - int print_errors = opterr; - if (optstring[0] == ':') - print_errors = 0; - - if (argc < 1) - return -1; - - optarg = NULL; - - if (optind == 0 || !__getopt_initialized) - { - if (optind == 0) - optind = 1; /* Don't scan ARGV[0], the program name. */ - optstring = _getopt_initialize (argc, argv, optstring); - __getopt_initialized = 1; - } - - /* Test whether ARGV[optind] points to a non-option argument. - Either it does not have option syntax, or there is an environment flag - from the shell indicating it is not an option. The later information - is only used when the used in the GNU libc. */ -#ifdef _LIBC -# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ - || (optind < nonoption_flags_len \ - && __getopt_nonoption_flags[optind] == '1')) -#else -# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') -#endif - - if (nextchar == NULL || *nextchar == '\0') - { - /* Advance to the next ARGV-element. */ - - /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been - moved back by the user (who may also have changed the arguments). */ - if (last_nonopt > optind) - last_nonopt = optind; - if (first_nonopt > optind) - first_nonopt = optind; - - if (ordering == PERMUTE) - { - /* If we have just processed some options following some non-options, - exchange them so that the options come first. */ - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (last_nonopt != optind) - first_nonopt = optind; - - /* Skip any additional non-options - and extend the range of non-options previously skipped. */ - - while (optind < argc && NONOPTION_P) - optind++; - last_nonopt = optind; - } - - /* The special ARGV-element `--' means premature end of options. - Skip it like a null option, - then exchange with previous non-options as if it were an option, - then skip everything else like a non-option. */ - - if (optind != argc && !strcmp (argv[optind], "--")) - { - optind++; - - if (first_nonopt != last_nonopt && last_nonopt != optind) - exchange ((char **) argv); - else if (first_nonopt == last_nonopt) - first_nonopt = optind; - last_nonopt = argc; - - optind = argc; - } - - /* If we have done all the ARGV-elements, stop the scan - and back over any non-options that we skipped and permuted. */ - - if (optind == argc) - { - /* Set the next-arg-index to point at the non-options - that we previously skipped, so the caller will digest them. */ - if (first_nonopt != last_nonopt) - optind = first_nonopt; - return -1; - } - - /* If we have come to a non-option and did not permute it, - either stop the scan or describe it to the caller and pass it by. */ - - if (NONOPTION_P) - { - if (ordering == REQUIRE_ORDER) - return -1; - optarg = argv[optind++]; - return 1; - } - - /* We have found another option-ARGV-element. - Skip the initial punctuation. */ - - nextchar = (argv[optind] + 1 - + (longopts != NULL && argv[optind][1] == '-')); - } - - /* Decode the current option-ARGV-element. */ - - /* Check whether the ARGV-element is a long option. - - If long_only and the ARGV-element has the form "-f", where f is - a valid short option, don't consider it an abbreviated form of - a long option that starts with f. Otherwise there would be no - way to give the -f short option. - - On the other hand, if there's a long option "fubar" and - the ARGV-element is "-fu", do consider that an abbreviation of - the long option, just like "--fu", and not "-f" with arg "u". - - This distinction seems to be the most useful approach. */ - - if (longopts != NULL - && (argv[optind][1] == '-' - || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = -1; - int option_index; - - for (nameend = nextchar; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if ((unsigned int) (nameend - nextchar) - == (unsigned int) strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else if (long_only - || pfound->has_arg != p->has_arg - || pfound->flag != p->flag - || pfound->val != p->val) - /* Second or later nonexact match found. */ - ambig = 1; - } - - if (ambig && !exact) - { - if (print_errors) - fprintf (stderr, _("%s: option `%s' is ambiguous\n"), - argv[0], argv[optind]); - nextchar += strlen (nextchar); - optind++; - optopt = 0; - return '?'; - } - - if (pfound != NULL) - { - option_index = indfound; - optind++; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (print_errors) - { - if (argv[optind - 1][1] == '-') - /* --option */ - fprintf (stderr, - _("%s: option `--%s' doesn't allow an argument\n"), - argv[0], pfound->name); - else - /* +option or -option */ - fprintf (stderr, - _("%s: option `%c%s' doesn't allow an argument\n"), - argv[0], argv[optind - 1][0], pfound->name); - } - - nextchar += strlen (nextchar); - - optopt = pfound->val; - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (print_errors) - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); - nextchar += strlen (nextchar); - optopt = pfound->val; - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - - /* Can't find it as a long option. If this is not getopt_long_only, - or the option starts with '--' or is not a valid short - option, then it's an error. - Otherwise interpret it as a short option. */ - if (!long_only || argv[optind][1] == '-' - || my_index (optstring, *nextchar) == NULL) - { - if (print_errors) - { - if (argv[optind][1] == '-') - /* --option */ - fprintf (stderr, _("%s: unrecognized option `--%s'\n"), - argv[0], nextchar); - else - /* +option or -option */ - fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), - argv[0], argv[optind][0], nextchar); - } - nextchar = (char *) ""; - optind++; - optopt = 0; - return '?'; - } - } - - /* Look at and handle the next short option-character. */ - - { - char c = *nextchar++; - char *temp = my_index (optstring, c); - - /* Increment `optind' when we start to process its last character. */ - if (*nextchar == '\0') - ++optind; - - if (temp == NULL || c == ':') - { - if (print_errors) - { - if (posixly_correct) - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, _("%s: illegal option -- %c\n"), - argv[0], c); - else - fprintf (stderr, _("%s: invalid option -- %c\n"), - argv[0], c); - } - optopt = c; - return '?'; - } - /* Convenience. Treat POSIX -W foo same as long option --foo */ - if (temp[0] == 'W' && temp[1] == ';') - { - char *nameend; - const struct option *p; - const struct option *pfound = NULL; - int exact = 0; - int ambig = 0; - int indfound = 0; - int option_index; - - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, _("%s: option requires an argument -- %c\n"), - argv[0], c); - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - return c; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - - /* optarg is now the argument, see if it's in the - table of longopts. */ - - for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) - /* Do nothing. */ ; - - /* Test all long options for either exact match - or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; p++, option_index++) - if (!strncmp (p->name, nextchar, nameend - nextchar)) - { - if ((unsigned int) (nameend - nextchar) == strlen (p->name)) - { - /* Exact match found. */ - pfound = p; - indfound = option_index; - exact = 1; - break; - } - else if (pfound == NULL) - { - /* First nonexact match found. */ - pfound = p; - indfound = option_index; - } - else - /* Second or later nonexact match found. */ - ambig = 1; - } - if (ambig && !exact) - { - if (print_errors) - fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), - argv[0], argv[optind]); - nextchar += strlen (nextchar); - optind++; - return '?'; - } - if (pfound != NULL) - { - option_index = indfound; - if (*nameend) - { - /* Don't test has_arg with >, because some C compilers don't - allow it to be used on enums. */ - if (pfound->has_arg) - optarg = nameend + 1; - else - { - if (print_errors) - fprintf (stderr, _("\ -%s: option `-W %s' doesn't allow an argument\n"), - argv[0], pfound->name); - - nextchar += strlen (nextchar); - return '?'; - } - } - else if (pfound->has_arg == 1) - { - if (optind < argc) - optarg = argv[optind++]; - else - { - if (print_errors) - fprintf (stderr, - _("%s: option `%s' requires an argument\n"), - argv[0], argv[optind - 1]); - nextchar += strlen (nextchar); - return optstring[0] == ':' ? ':' : '?'; - } - } - nextchar += strlen (nextchar); - if (longind != NULL) - *longind = option_index; - if (pfound->flag) - { - *(pfound->flag) = pfound->val; - return 0; - } - return pfound->val; - } - nextchar = NULL; - return 'W'; /* Let the application handle it. */ - } - if (temp[1] == ':') - { - if (temp[2] == ':') - { - /* This is an option that accepts an argument optionally. */ - if (*nextchar != '\0') - { - optarg = nextchar; - optind++; - } - else - optarg = NULL; - nextchar = NULL; - } - else - { - /* This is an option that requires an argument. */ - if (*nextchar != '\0') - { - optarg = nextchar; - /* If we end this ARGV-element by taking the rest as an arg, - we must advance to the next element now. */ - optind++; - } - else if (optind == argc) - { - if (print_errors) - { - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, - _("%s: option requires an argument -- %c\n"), - argv[0], c); - } - optopt = c; - if (optstring[0] == ':') - c = ':'; - else - c = '?'; - } - else - /* We already incremented `optind' once; - increment it again when taking next ARGV-elt as argument. */ - optarg = argv[optind++]; - nextchar = NULL; - } - } - return c; - } -} - -int -getopt (argc, argv, optstring) - int argc; - char *const *argv; - const char *optstring; -{ - return _getopt_internal (argc, argv, optstring, - (const struct option *) 0, - (int *) 0, - 0); -} - -#endif /* Not ELIDE_CODE. */ - -#ifdef TEST - -/* Compile with -DTEST to make an executable for use in testing - the above definition of `getopt'. */ - -int -main (argc, argv) - int argc; - char **argv; -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - - c = getopt (argc, argv, "abc:d:0123456789"); - if (c == -1) - break; - - switch (c) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ diff --git a/contrib/awk/getopt.h b/contrib/awk/getopt.h deleted file mode 100644 index 6439020..0000000 --- a/contrib/awk/getopt.h +++ /dev/null @@ -1,180 +0,0 @@ -/* Declarations for getopt. - Copyright (C) 1989,90,91,92,93,94,96,97,98,99 Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#ifndef _GETOPT_H - -#ifndef __need_getopt -# define _GETOPT_H 1 -#endif - -/* If __GNU_LIBRARY__ is not already defined, either we are being used - standalone, or this is the first header included in the source file. - If we are being used with glibc, we need to include , but - that does not exist if we are standalone. So: if __GNU_LIBRARY__ is - not defined, include , which will pull in for us - if it's from glibc. (Why ctype.h? It's guaranteed to exist and it - doesn't flood the namespace with stuff the way some other headers do.) */ -#if !defined __GNU_LIBRARY__ -# include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* For communication from `getopt' to the caller. - When `getopt' finds an option that takes an argument, - the argument value is returned here. - Also, when `ordering' is RETURN_IN_ORDER, - each non-option ARGV-element is returned here. */ - -extern char *optarg; - -/* Index in ARGV of the next element to be scanned. - This is used for communication to and from the caller - and for communication between successive calls to `getopt'. - - On entry to `getopt', zero means this is the first call; initialize. - - When `getopt' returns -1, this is the index of the first of the - non-option elements that the caller should itself scan. - - Otherwise, `optind' communicates from one call to the next - how much of ARGV has been scanned so far. */ - -extern int optind; - -/* Callers store zero here to inhibit the error message `getopt' prints - for unrecognized options. */ - -extern int opterr; - -/* Set to an option character which was unrecognized. */ - -extern int optopt; - -#ifndef __need_getopt -/* Describe the long-named options requested by the application. - The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector - of `struct option' terminated by an element containing a name which is - zero. - - The field `has_arg' is: - no_argument (or 0) if the option does not take an argument, - required_argument (or 1) if the option requires an argument, - optional_argument (or 2) if the option takes an optional argument. - - If the field `flag' is not NULL, it points to a variable that is set - to the value given in the field `val' when the option is found, but - left unchanged if the option is not found. - - To have a long-named option do something other than set an `int' to - a compiled-in constant, such as set a value from `optarg', set the - option's `flag' field to zero and its `val' field to a nonzero - value (the equivalent single-letter option character, if there is - one). For long options that have a zero `flag' field, `getopt' - returns the contents of the `val' field. */ - -struct option -{ -# if defined __STDC__ && __STDC__ - const char *name; -# else - char *name; -# endif - /* has_arg can't be an enum because some compilers complain about - type mismatches in all the code that assumes it is an int. */ - int has_arg; - int *flag; - int val; -}; - -/* Names for the values of the `has_arg' field of `struct option'. */ - -# define no_argument 0 -# define required_argument 1 -# define optional_argument 2 -#endif /* need getopt */ - - -/* Get definitions and prototypes for functions to process the - arguments in ARGV (ARGC of them, minus the program name) for - options given in OPTS. - - Return the option character from OPTS just read. Return -1 when - there are no more options. For unrecognized options, or options - missing arguments, `optopt' is set to the option letter, and '?' is - returned. - - The OPTS string is a list of characters which are recognized option - letters, optionally followed by colons, specifying that that letter - takes an argument, to be placed in `optarg'. - - If a letter in OPTS is followed by two colons, its argument is - optional. This behavior is specific to the GNU `getopt'. - - The argument `--' causes premature termination of argument - scanning, explicitly telling `getopt' that there are no more - options. - - If OPTS begins with `--', then non-option arguments are treated as - arguments to the option '\0'. This behavior is specific to the GNU - `getopt'. */ - -#if defined __STDC__ && __STDC__ -# ifdef __GNU_LIBRARY__ -/* Many other libraries have conflicting prototypes for getopt, with - differences in the consts, in stdlib.h. To avoid compilation - errors, only prototype getopt for the GNU C library. */ -extern int getopt (int __argc, char *const *__argv, const char *__shortopts); -# else /* not __GNU_LIBRARY__ */ -extern int getopt (); -# endif /* __GNU_LIBRARY__ */ - -# ifndef __need_getopt -extern int getopt_long (int, char *const *, const char *, - const struct option *, int *); -extern int getopt_long_only (int , char *const *, - const char *, - const struct option *, int *); - -/* Internal only. Users should not call this directly. */ -extern int _getopt_internal (int __argc, char *const *__argv, - const char *__shortopts, - const struct option *__longopts, int *__longind, - int __long_only); -# endif -#else /* not __STDC__ */ -extern int getopt (); -# ifndef __need_getopt -extern int getopt_long (); -extern int getopt_long_only (); - -extern int _getopt_internal (); -# endif -#endif /* __STDC__ */ - -#ifdef __cplusplus -} -#endif - -/* Make sure we later can get all the definitions and declarations. */ -#undef __need_getopt - -#endif /* getopt.h */ diff --git a/contrib/awk/getopt1.c b/contrib/awk/getopt1.c deleted file mode 100644 index 3d264f2..0000000 --- a/contrib/awk/getopt1.c +++ /dev/null @@ -1,188 +0,0 @@ -/* getopt_long and getopt_long_only entry points for GNU getopt. - Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Library General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Library General Public License for more details. - - You should have received a copy of the GNU Library General Public - License along with the GNU C Library; see the file COPYING.LIB. If not, - write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include "getopt.h" - -#if !defined __STDC__ || !__STDC__ -/* This is a separate conditional since some stdc systems - reject `defined (const)'. */ -#ifndef const -#define const -#endif -#endif - -#include - -/* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling - and linking in this code is a waste when using the GNU C library - (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ - -#define GETOPT_INTERFACE_VERSION 2 -#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 -#include -#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION -#define ELIDE_CODE -#endif -#endif - -#ifndef ELIDE_CODE - - -/* This needs to come after some library #include - to get __GNU_LIBRARY__ defined. */ -#ifdef __GNU_LIBRARY__ -#include -#endif - -#ifndef NULL -#define NULL 0 -#endif - -int -getopt_long (argc, argv, options, long_options, opt_index) - int argc; - char *const *argv; - const char *options; - const struct option *long_options; - int *opt_index; -{ - return _getopt_internal (argc, argv, options, long_options, opt_index, 0); -} - -/* Like getopt_long, but '-' as well as '--' can indicate a long option. - If an option that starts with '-' (not '--') doesn't match a long option, - but does match a short option, it is parsed as a short option - instead. */ - -int -getopt_long_only (argc, argv, options, long_options, opt_index) - int argc; - char *const *argv; - const char *options; - const struct option *long_options; - int *opt_index; -{ - return _getopt_internal (argc, argv, options, long_options, opt_index, 1); -} - - -#endif /* Not ELIDE_CODE. */ - -#ifdef TEST - -#include - -int -main (argc, argv) - int argc; - char **argv; -{ - int c; - int digit_optind = 0; - - while (1) - { - int this_option_optind = optind ? optind : 1; - int option_index = 0; - static struct option long_options[] = - { - {"add", 1, 0, 0}, - {"append", 0, 0, 0}, - {"delete", 1, 0, 0}, - {"verbose", 0, 0, 0}, - {"create", 0, 0, 0}, - {"file", 1, 0, 0}, - {0, 0, 0, 0} - }; - - c = getopt_long (argc, argv, "abc:d:0123456789", - long_options, &option_index); - if (c == -1) - break; - - switch (c) - { - case 0: - printf ("option %s", long_options[option_index].name); - if (optarg) - printf (" with arg %s", optarg); - printf ("\n"); - break; - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - if (digit_optind != 0 && digit_optind != this_option_optind) - printf ("digits occur in two different argv-elements.\n"); - digit_optind = this_option_optind; - printf ("option %c\n", c); - break; - - case 'a': - printf ("option a\n"); - break; - - case 'b': - printf ("option b\n"); - break; - - case 'c': - printf ("option c with value `%s'\n", optarg); - break; - - case 'd': - printf ("option d with value `%s'\n", optarg); - break; - - case '?': - break; - - default: - printf ("?? getopt returned character code 0%o ??\n", c); - } - } - - if (optind < argc) - { - printf ("non-option ARGV-elements: "); - while (optind < argc) - printf ("%s ", argv[optind++]); - printf ("\n"); - } - - exit (0); -} - -#endif /* TEST */ diff --git a/contrib/awk/install-sh b/contrib/awk/install-sh deleted file mode 100755 index ab74c88..0000000 --- a/contrib/awk/install-sh +++ /dev/null @@ -1,238 +0,0 @@ -#!/bin/sh -# -# install - install a program, script, or datafile -# This comes from X11R5. -# -# Calling this script install-sh is preferred over install.sh, to prevent -# `make' implicit rules from creating a file called install from it -# when there is no Makefile. -# -# This script is compatible with the BSD install script, but was written -# from scratch. -# - - -# set DOITPROG to echo to test this script - -# Don't use :- since 4.3BSD and earlier shells don't like it. -doit="${DOITPROG-}" - - -# put in absolute paths if you don't have them in your path; or use env. vars. - -mvprog="${MVPROG-mv}" -cpprog="${CPPROG-cp}" -chmodprog="${CHMODPROG-chmod}" -chownprog="${CHOWNPROG-chown}" -chgrpprog="${CHGRPPROG-chgrp}" -stripprog="${STRIPPROG-strip}" -rmprog="${RMPROG-rm}" -mkdirprog="${MKDIRPROG-mkdir}" - -tranformbasename="" -transform_arg="" -instcmd="$mvprog" -chmodcmd="$chmodprog 0755" -chowncmd="" -chgrpcmd="" -stripcmd="" -rmcmd="$rmprog -f" -mvcmd="$mvprog" -src="" -dst="" -dir_arg="" - -while [ x"$1" != x ]; do - case $1 in - -c) instcmd="$cpprog" - shift - continue;; - - -d) dir_arg=true - shift - continue;; - - -m) chmodcmd="$chmodprog $2" - shift - shift - continue;; - - -o) chowncmd="$chownprog $2" - shift - shift - continue;; - - -g) chgrpcmd="$chgrpprog $2" - shift - shift - continue;; - - -s) stripcmd="$stripprog" - shift - continue;; - - -t=*) transformarg=`echo $1 | sed 's/-t=//'` - shift - continue;; - - -b=*) transformbasename=`echo $1 | sed 's/-b=//'` - shift - continue;; - - *) if [ x"$src" = x ] - then - src=$1 - else - # this colon is to work around a 386BSD /bin/sh bug - : - dst=$1 - fi - shift - continue;; - esac -done - -if [ x"$src" = x ] -then - echo "install: no input file specified" - exit 1 -else - true -fi - -if [ x"$dir_arg" != x ]; then - dst=$src - src="" - - if [ -d $dst ]; then - instcmd=: - else - instcmd=mkdir - fi -else - -# Waiting for this to be detected by the "$instcmd $src $dsttmp" command -# might cause directories to be created, which would be especially bad -# if $src (and thus $dsttmp) contains '*'. - - if [ -f $src -o -d $src ] - then - true - else - echo "install: $src does not exist" - exit 1 - fi - - if [ x"$dst" = x ] - then - echo "install: no destination specified" - exit 1 - else - true - fi - -# If destination is a directory, append the input filename; if your system -# does not like double slashes in filenames, you may need to add some logic - - if [ -d $dst ] - then - dst="$dst"/`basename $src` - else - true - fi -fi - -## this sed command emulates the dirname command -dstdir=`echo $dst | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'` - -# Make sure that the destination directory exists. -# this part is taken from Noah Friedman's mkinstalldirs script - -# Skip lots of stat calls in the usual case. -if [ ! -d "$dstdir" ]; then -defaultIFS=' -' -IFS="${IFS-${defaultIFS}}" - -oIFS="${IFS}" -# Some sh's can't handle IFS=/ for some reason. -IFS='%' -set - `echo ${dstdir} | sed -e 's@/@%@g' -e 's@^%@/@'` -IFS="${oIFS}" - -pathcomp='' - -while [ $# -ne 0 ] ; do - pathcomp="${pathcomp}${1}" - shift - - if [ ! -d "${pathcomp}" ] ; - then - $mkdirprog "${pathcomp}" - else - true - fi - - pathcomp="${pathcomp}/" -done -fi - -if [ x"$dir_arg" != x ] -then - $doit $instcmd $dst && - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dst; else true ; fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dst; else true ; fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dst; else true ; fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dst; else true ; fi -else - -# If we're going to rename the final executable, determine the name now. - - if [ x"$transformarg" = x ] - then - dstfile=`basename $dst` - else - dstfile=`basename $dst $transformbasename | - sed $transformarg`$transformbasename - fi - -# don't allow the sed command to completely eliminate the filename - - if [ x"$dstfile" = x ] - then - dstfile=`basename $dst` - else - true - fi - -# Make a temp file name in the proper directory. - - dsttmp=$dstdir/#inst.$$# - -# Move or copy the file name to the temp name - - $doit $instcmd $src $dsttmp && - - trap "rm -f ${dsttmp}" 0 && - -# and set any options; do chmod last to preserve setuid bits - -# If any of these fail, we abort the whole thing. If we want to -# ignore errors from any of these, just make sure not to ignore -# errors from the above "$doit $instcmd $src $dsttmp" command. - - if [ x"$chowncmd" != x ]; then $doit $chowncmd $dsttmp; else true;fi && - if [ x"$chgrpcmd" != x ]; then $doit $chgrpcmd $dsttmp; else true;fi && - if [ x"$stripcmd" != x ]; then $doit $stripcmd $dsttmp; else true;fi && - if [ x"$chmodcmd" != x ]; then $doit $chmodcmd $dsttmp; else true;fi && - -# Now rename the file to the real destination. - - $doit $rmcmd -f $dstdir/$dstfile && - $doit $mvcmd $dsttmp $dstdir/$dstfile - -fi && - - -exit 0 diff --git a/contrib/awk/io.c b/contrib/awk/io.c deleted file mode 100644 index 8d741e2..0000000 --- a/contrib/awk/io.c +++ /dev/null @@ -1,2400 +0,0 @@ -/* - * io.c --- routines for dealing with input and output and records - */ - -/* - * Copyright (C) 1976, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $FreeBSD$ - */ - -#include "awk.h" - -#ifdef HAVE_SYS_PARAM_H -#undef RE_DUP_MAX /* avoid spurious conflict w/regex.h */ -#include -#endif /* HAVE_SYS_PARAM_H */ - -#ifdef HAVE_SYS_WAIT_H -#include -#endif /* HAVE_SYS_WAIT_H */ - -#ifndef O_RDONLY -#include -#endif -#ifndef O_ACCMODE -#define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR) -#endif - -#ifdef HAVE_SOCKETS -#ifdef HAVE_SYS_SOCKET_H -#include -#else -#include -#endif /* HAVE_SYS_SOCKET_H */ -#ifdef HAVE_NETINET_IN_H -#include -#else -#include -#endif /* HAVE_NETINET_IN_H */ -#ifdef HAVE_NETDB_H -#include -#endif /* HAVE_NETDB_H */ -#endif /* HAVE_SOCKETS */ - -#if ! defined(S_ISREG) && defined(S_IFREG) -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#endif - -#ifndef ENFILE -#define ENFILE EMFILE -#endif - -extern int MRL; - -#ifdef HAVE_SOCKETS -enum inet_prot { INET_NONE, INET_TCP, INET_UDP, INET_RAW }; - -#ifndef SHUT_RD -#define SHUT_RD 0 -#endif - -#ifndef SHUT_WR -#define SHUT_WR 1 -#endif - -#ifndef SHUT_RDWR -#define SHUT_RDWR 2 -#endif - -#endif /* HAVE_SOCKETS */ - -#ifdef atarist -#include -#endif - -#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(TANDEM) -#define PIPES_SIMULATED -#endif - -typedef enum { CLOSE_ALL, CLOSE_TO, CLOSE_FROM } two_way_close_type; - -static IOBUF *nextfile P((int skipping)); -static int inrec P((IOBUF *iop)); -static int iop_close P((IOBUF *iop)); -struct redirect *redirect P((NODE *tree, int *errflg)); -static void close_one P((void)); -static int close_redir P((struct redirect *rp, int exitwarn, two_way_close_type how)); -#ifndef PIPES_SIMULATED -static int wait_any P((int interesting)); -#endif -static IOBUF *gawk_popen P((char *cmd, struct redirect *rp)); -static IOBUF *iop_open P((const char *file, const char *how, IOBUF *buf)); -static IOBUF *iop_alloc P((int fd, const char *name, IOBUF *buf)); -static int gawk_pclose P((struct redirect *rp)); -static int do_pathopen P((const char *file)); -static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode)); -static int str2mode P((const char *mode)); -static void spec_setup P((IOBUF *iop, int len, int allocate)); -static int specfdopen P((IOBUF *iop, const char *name, const char *mode)); -static int pidopen P((IOBUF *iop, const char *name, const char *mode)); -static int useropen P((IOBUF *iop, const char *name, const char *mode)); -static int two_way_open P((char *str, struct redirect *rp)); - -#if defined(HAVE_POPEN_H) -#include "popen.h" -#endif - -static struct redirect *red_head = NULL; -static NODE *RS; -static Regexp *RS_regexp; - -int RS_is_null; - -extern int output_is_tty; -extern NODE *ARGC_node; -extern NODE *ARGV_node; -extern NODE *ARGIND_node; -extern NODE *ERRNO_node; -extern NODE **fields_arr; - -static jmp_buf filebuf; /* for do_nextfile() */ - -#if defined(MSDOS) || defined(OS2) -static const char * -binmode(char *mode) -{ - switch (mode[0]) { - case 'r': - if ((BINMODE & 1) != 0) - mode = "rb"; - break; - case 'w': - case 'a': - if ((BINMODE & 2) != 0) - mode = (mode[0] == 'w' ? "wb" : "ab"); - break; - } - return mode; -} -#else -#define binmode(mode) (mode) -#endif - -#ifdef VMS -/* File pointers have an extra level of indirection, and there are cases where - `stdin' can be null. That can crash gawk if fileno() is used as-is. */ -static int vmsrtl_fileno P((FILE *)); -static int vmsrtl_fileno(fp) FILE *fp; { return fileno(fp); } -#undef fileno -#define fileno(FP) (((FP) && *(FP)) ? vmsrtl_fileno(FP) : -1) -#endif /* VMS */ - -/* do_nextfile --- implement gawk "nextfile" extension */ - -void -do_nextfile() -{ - (void) nextfile(TRUE); - longjmp(filebuf, 1); -} - -/* nextfile --- move to the next input data file */ - -static IOBUF * -nextfile(int skipping) -{ - static long i = 1; - static int files = 0; - NODE *arg; - static IOBUF *curfile = NULL; - static IOBUF mybuf; - const char *fname; - - if (skipping) { - if (curfile != NULL) - iop_close(curfile); - curfile = NULL; - return NULL; - } - if (curfile != NULL) { - if (curfile->cnt == EOF) { - (void) iop_close(curfile); - curfile = NULL; - } else - return curfile; - } - for (; i < (long) (ARGC_node->lnode->numbr); i++) { - arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i), FALSE); - if (arg->stlen == 0) - continue; - arg->stptr[arg->stlen] = '\0'; - if (! do_traditional) { - unref(ARGIND_node->var_value); - ARGIND_node->var_value = make_number((AWKNUM) i); - } - if (! arg_assign(arg->stptr)) { - files++; - fname = arg->stptr; - curfile = iop_open(fname, binmode("r"), &mybuf); - if (curfile == NULL) - goto give_up; - curfile->flag |= IOP_NOFREE_OBJ; - /* This is a kludge. */ - unref(FILENAME_node->var_value); - FILENAME_node->var_value = dupnode(arg); - FNR = 0; - i++; - break; - } - } - if (files == 0) { - files++; - /* no args. -- use stdin */ - /* FNR is init'ed to 0 */ - FILENAME_node->var_value = make_string("-", 1); - fname = "-"; - curfile = iop_open(fname, binmode("r"), &mybuf); - if (curfile == NULL) - goto give_up; - curfile->flag |= IOP_NOFREE_OBJ; - } - return curfile; - - give_up: - fatal(_("cannot open file `%s' for reading (%s)"), - fname, strerror(errno)); - /* NOTREACHED */ - return 0; -} - -/* set_FNR --- update internal FNR from awk variable */ - -void -set_FNR() -{ - FNR = (long) FNR_node->var_value->numbr; -} - -/* set_NR --- update internal NR from awk variable */ - -void -set_NR() -{ - NR = (long) NR_node->var_value->numbr; -} - -/* inrec --- This reads in a record from the input file */ - -static int -inrec(IOBUF *iop) -{ - char *begin; - register int cnt; - int retval = 0; - - if ((cnt = iop->cnt) != EOF) - cnt = get_a_record(&begin, iop, RS->stptr[0], RS_regexp, NULL); - if (cnt == EOF) { - cnt = 0; - retval = 1; - } else { - NR += 1; - FNR += 1; - set_record(begin, cnt, TRUE); - } - - return retval; -} - -/* iop_close --- close an open IOP */ - -static int -iop_close(IOBUF *iop) -{ - int ret; - - if (iop == NULL) - return 0; - errno = 0; - -#ifdef _CRAY - /* Work around bug in UNICOS popen */ - if (iop->fd < 3) - ret = 0; - else -#endif - /* save these for re-use; don't free the storage */ - if ((iop->flag & IOP_IS_INTERNAL) != 0) { - iop->off = iop->buf; - iop->end = iop->buf + strlen(iop->buf); - iop->cnt = 0; - iop->secsiz = 0; - return 0; - } - - /* Don't close standard files or else crufty code elsewhere will lose */ - if (iop->fd == fileno(stdin) - || iop->fd == fileno(stdout) - || iop->fd == fileno(stderr)) - ret = 0; - else - ret = close(iop->fd); - - if (ret == -1) - warning(_("close of fd %d (`%s') failed (%s)"), iop->fd, - iop->name, strerror(errno)); - if ((iop->flag & IOP_NO_FREE) == 0) { - /* - * Be careful -- $0 may still reference the buffer even though - * an explicit close is being done; in the future, maybe we - * can do this a bit better. - */ - if (iop->buf) { - if ((fields_arr[0]->stptr >= iop->buf) - && (fields_arr[0]->stptr < (iop->buf + iop->secsiz + iop->size))) { - NODE *t; - - t = make_string(fields_arr[0]->stptr, - fields_arr[0]->stlen); - unref(fields_arr[0]); - fields_arr[0] = t; - reset_record(); - } - free(iop->buf); - } - if ((iop->flag & IOP_NOFREE_OBJ) == 0) - free((char *) iop); - } - return ret == -1 ? 1 : 0; -} - -/* do_input --- the main input processing loop */ - -void -do_input() -{ - IOBUF *iop; - extern int exiting; - - (void) setjmp(filebuf); /* for `nextfile' */ - - while ((iop = nextfile(FALSE)) != NULL) { - if (inrec(iop) == 0) - while (interpret(expression_value) && inrec(iop) == 0) - continue; - if (exiting) - break; - } -} - -/* redflags2str --- turn redirection flags into a string, for debugging */ - -char * -redflags2str(int flags) -{ - static struct flagtab redtab[] = { - { RED_FILE, "RED_FILE" }, - { RED_PIPE, "RED_PIPE" }, - { RED_READ, "RED_READ" }, - { RED_WRITE, "RED_WRITE" }, - { RED_APPEND, "RED_APPEND" }, - { RED_NOBUF, "RED_NOBUF" }, - { RED_EOF, "RED_EOF" }, - { RED_TWOWAY, "RED_TWOWAY" }, - { RED_SOCKET, "RED_SOCKET" }, - { RED_TCP, "RED_TCP" }, - { 0, NULL } - }; - - return genflags2str(flags, redtab); -} - -/* redirect --- Redirection for printf and print commands */ - -struct redirect * -redirect(NODE *tree, int *errflg) -{ - register NODE *tmp; - register struct redirect *rp; - register char *str; - int tflag = 0; - int outflag = 0; - const char *direction = "to"; - const char *mode; - int fd; - const char *what = NULL; - - switch (tree->type) { - case Node_redirect_append: - tflag = RED_APPEND; - /* FALL THROUGH */ - case Node_redirect_output: - outflag = (RED_FILE|RED_WRITE); - tflag |= outflag; - if (tree->type == Node_redirect_output) - what = ">"; - else - what = ">>"; - break; - case Node_redirect_pipe: - tflag = (RED_PIPE|RED_WRITE); - what = "|"; - break; - case Node_redirect_pipein: - tflag = (RED_PIPE|RED_READ); - what = "|"; - break; - case Node_redirect_input: - tflag = (RED_FILE|RED_READ); - what = "<"; - break; - case Node_redirect_twoway: - tflag = (RED_READ|RED_WRITE|RED_TWOWAY); - what = "|&"; - break; - default: - fatal(_("invalid tree type %s in redirect()"), - nodetype2str(tree->type)); - break; - } - tmp = tree_eval(tree->subnode); - if (do_lint && (tmp->flags & STR) == 0) - lintwarn(_("expression in `%s' redirection only has numeric value"), - what); - tmp = force_string(tmp); - str = tmp->stptr; - - if (str == NULL || *str == '\0') - fatal(_("expression for `%s' redirection has null string value"), - what); - - if (do_lint - && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen))) - lintwarn(_("filename `%s' for `%s' redirection may be result of logical expression"), str, what); - -#ifdef HAVE_SOCKETS - if (STREQN(str, "/inet/", 6)) { - tflag |= RED_SOCKET; - if (STREQN(str + 6, "tcp/", 4)) - tflag |= RED_TCP; /* use shutdown when closing */ - } -#endif /* HAVE_SOCKETS */ - - for (rp = red_head; rp != NULL; rp = rp->next) { - if (strlen(rp->value) == tmp->stlen - && STREQN(rp->value, str, tmp->stlen) - && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag - || (outflag != 0 - && (rp->flag & (RED_FILE|RED_WRITE)) == outflag))) { - - int rpflag = (rp->flag & ~(RED_NOBUF|RED_EOF)); - int newflag = (tflag & ~(RED_NOBUF|RED_EOF)); - - if (do_lint && rpflag != newflag) - lintwarn( - _("unnecessary mixing of `>' and `>>' for file `%.*s'"), - tmp->stlen, rp->value); - - break; - } - } - - if (rp == NULL) { - emalloc(rp, struct redirect *, sizeof(struct redirect), - "redirect"); - emalloc(str, char *, tmp->stlen+1, "redirect"); - memcpy(str, tmp->stptr, tmp->stlen); - str[tmp->stlen] = '\0'; - rp->value = str; - rp->flag = tflag; - rp->fp = NULL; - rp->iop = NULL; - rp->pid = 0; /* unlikely that we're worried about init */ - rp->status = 0; - /* maintain list in most-recently-used first order */ - if (red_head != NULL) - red_head->prev = rp; - rp->prev = NULL; - rp->next = red_head; - red_head = rp; - } else - str = rp->value; /* get \0 terminated string */ - - while (rp->fp == NULL && rp->iop == NULL) { - if (rp->flag & RED_EOF) - /* - * encountered EOF on file or pipe -- must be cleared - * by explicit close() before reading more - */ - return rp; - mode = NULL; - errno = 0; - switch (tree->type) { - case Node_redirect_output: - mode = binmode("w"); - if ((rp->flag & RED_USED) != 0) - mode = (rp->mode[1] == 'b') ? "ab" : "a"; - break; - case Node_redirect_append: - mode = binmode("a"); - break; - case Node_redirect_pipe: - /* synchronize output before new pipe */ - (void) flush_io(); - - os_restore_mode(fileno(stdin)); - if ((rp->fp = popen(str, binmode("w"))) == NULL) - fatal(_("can't open pipe `%s' for output (%s)"), - str, strerror(errno)); - /* set close-on-exec */ - os_close_on_exec(fileno(rp->fp), str, "pipe", "to"); - rp->flag |= RED_NOBUF; - break; - case Node_redirect_pipein: - direction = "from"; - if (gawk_popen(str, rp) == NULL) - fatal(_("can't open pipe `%s' for input (%s)"), - str, strerror(errno)); - break; - case Node_redirect_input: - direction = "from"; - rp->iop = iop_open(str, binmode("r"), NULL); - break; - case Node_redirect_twoway: - direction = "to/from"; - if (!two_way_open(str, rp)) { -#ifdef HAVE_SOCKETS - /* multiple messages make life easier for translators */ - if (STREQN(str, "/inet/", 6)) - fatal(_("can't open two way socket `%s' for input/output (%s)"), - str, strerror(errno)); - else -#endif - fatal(_("can't open two way pipe `%s' for input/output (%s)"), - str, strerror(errno)); - } - break; - default: - cant_happen(); - } - if (mode != NULL) { - errno = 0; - fd = devopen(str, mode); - if (fd > INVALID_HANDLE) { - if (fd == fileno(stdin)) - rp->fp = stdin; - else if (fd == fileno(stdout)) - rp->fp = stdout; - else if (fd == fileno(stderr)) - rp->fp = stderr; - else { -#if defined(F_GETFL) && defined(O_APPEND) - int fd_flags; - - fd_flags = fcntl(fd, F_GETFL); - if (fd_flags != -1 && (fd_flags & O_APPEND) == O_APPEND) - rp->fp = fdopen(fd, binmode("a")); - else -#endif - rp->fp = fdopen(fd, (char *) mode); - rp->mode = (char *) mode; - /* don't leak file descriptors */ - if (rp->fp == NULL) - close(fd); - } - if (rp->fp != NULL && isatty(fd)) - rp->flag |= RED_NOBUF; - /* Move rp to the head of the list. */ - if (red_head != rp) { - if ((rp->prev->next = rp->next) != NULL) - rp->next->prev = rp->prev; - red_head->prev = rp; - rp->prev = NULL; - rp->next = red_head; - red_head = rp; - } - } - } - if (rp->fp == NULL && rp->iop == NULL) { - /* too many files open -- close one and try again */ - if (errno == EMFILE || errno == ENFILE) - close_one(); -#if defined __MINGW32__ || defined solaris - else if (errno == 0) /* HACK! */ - close_one(); -#endif -#ifdef VMS - /* Alpha/VMS V7.1's C RTL is returning this instead - of EMFILE (haven't tried other post-V6.2 systems) */ -#define SS$_EXQUOTA 0x001C - else if (errno == EIO && vaxc$errno == SS$_EXQUOTA) - close_one(); -#endif - else { - /* - * Some other reason for failure. - * - * On redirection of input from a file, - * just return an error, so e.g. getline - * can return -1. For output to file, - * complain. The shell will complain on - * a bad command to a pipe. - */ - if (errflg != NULL) - *errflg = errno; - if (tree->type == Node_redirect_output - || tree->type == Node_redirect_append) { - /* multiple messages make life easier for translators */ - if (*direction == 'f') - fatal(_("can't redirect from `%s' (%s)"), - str, strerror(errno)); - else - fatal(_("can't redirect to `%s' (%s)"), - str, strerror(errno)); - } else { - free_temp(tmp); - return NULL; - } - } - } - } - free_temp(tmp); - return rp; -} - -/* getredirect --- find the struct redirect for this file or pipe */ - -struct redirect * -getredirect(char *str, int len) -{ - struct redirect *rp; - - for (rp = red_head; rp != NULL; rp = rp->next) - if (strlen(rp->value) == len && STREQN(rp->value, str, len)) - return rp; - - return NULL; -} - -/* close_one --- temporarily close an open file to re-use the fd */ - -static void -close_one() -{ - register struct redirect *rp; - register struct redirect *rplast = NULL; - - static short warned = FALSE; - - if (do_lint && ! warned) { - warned = TRUE; - lintwarn(_("reached system limit for open files: starting to multiplex file descriptors")); - } - - /* go to end of list first, to pick up least recently used entry */ - for (rp = red_head; rp != NULL; rp = rp->next) - rplast = rp; - /* now work back up through the list */ - for (rp = rplast; rp != NULL; rp = rp->prev) - if (rp->fp != NULL && (rp->flag & RED_FILE) != 0) { - rp->flag |= RED_USED; - errno = 0; - if (/* do_lint && */ fclose(rp->fp) != 0) - warning(_("close of `%s' failed (%s)."), - rp->value, strerror(errno)); - rp->fp = NULL; - break; - } - if (rp == NULL) - /* surely this is the only reason ??? */ - fatal(_("too many pipes or input files open")); -} - -/* do_close --- completely close an open file or pipe */ - -NODE * -do_close(NODE *tree) -{ - NODE *tmp, *tmp2; - register struct redirect *rp; - two_way_close_type how = CLOSE_ALL; /* default */ - - tmp = force_string(tree_eval(tree->lnode)); /* 1st arg: redir to close */ - - if (tree->rnode != NULL) { - /* 2nd arg if present: "to" or "from" for two-way pipe */ - /* DO NOT use _() on the strings here! */ - tmp2 = force_string(tree->rnode->lnode); - if (strcasecmp(tmp2->stptr, "to") == 0) - how = CLOSE_TO; - else if (strcasecmp(tmp2->stptr, "from") == 0) - how = CLOSE_FROM; - else - fatal(_("close: second argument must be `to' or `from'")); - free_temp(tmp2); - } - - for (rp = red_head; rp != NULL; rp = rp->next) { - if (strlen(rp->value) == tmp->stlen - && STREQN(rp->value, tmp->stptr, tmp->stlen)) - break; - } - - if (rp == NULL) { /* no match, return -1 */ - char *cp; - - if (do_lint) - lintwarn(_("close: `%.*s' is not an open file, pipe or co-process"), - tmp->stlen, tmp->stptr); - - /* update ERRNO manually, using errno = ENOENT is a stretch. */ - cp = _("close of redirection that was never opened"); - unref(ERRNO_node->var_value); - ERRNO_node->var_value = make_string(cp, strlen(cp)); - - free_temp(tmp); - return tmp_number((AWKNUM) -1.0); - } - free_temp(tmp); - fflush(stdout); /* synchronize regular output */ - tmp = tmp_number((AWKNUM) close_redir(rp, FALSE, how)); - rp = NULL; - return tmp; -} - -/* close_redir --- close an open file or pipe */ - -static int -close_redir(register struct redirect *rp, int exitwarn, two_way_close_type how) -{ - int status = 0; - - if (rp == NULL) - return 0; - if (rp->fp == stdout || rp->fp == stderr) - return 0; - - if (do_lint && (rp->flag & RED_TWOWAY) == 0 && how != CLOSE_ALL) - lintwarn(_("close: redirection `%s' not opened with `|&', second argument ignored"), - rp->value); - - errno = 0; - if ((rp->flag & RED_TWOWAY) != 0) { /* two-way pipe */ - /* write end: */ - if ((how == CLOSE_ALL || how == CLOSE_TO) && rp->fp != NULL) { -#ifdef HAVE_SOCKETS - if ((rp->flag & RED_TCP) != 0) - (void) shutdown(fileno(rp->fp), SHUT_WR); -#endif /* HAVE_SOCKETS */ - status = fclose(rp->fp); - rp->fp = NULL; - } - - /* read end: */ - if (how == CLOSE_ALL || how == CLOSE_FROM) { - if ((rp->flag & RED_SOCKET) != 0 && rp->iop != NULL) { -#ifdef HAVE_SOCKETS - if ((rp->flag & RED_TCP) != 0) - (void) shutdown(rp->iop->fd, SHUT_RD); -#endif /* HAVE_SOCKETS */ - (void) iop_close(rp->iop); - } else - status = gawk_pclose(rp); - - rp->iop = NULL; - } - } else if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE)) { /* write to pipe */ - status = pclose(rp->fp); - if ((BINMODE & 1) != 0) - os_setbinmode(fileno(stdin), O_BINARY); - - rp->fp = NULL; - } else if (rp->fp != NULL) { /* write to file */ - status = fclose(rp->fp); - rp->fp = NULL; - } else if (rp->iop != NULL) { /* read from pipe/file */ - if ((rp->flag & RED_PIPE) != 0) /* read from pipe */ - status = gawk_pclose(rp); - /* gawk_pclose sets rp->iop to null */ - else { /* read from file */ - status = iop_close(rp->iop); - rp->iop = NULL; - } - } - - /* SVR4 awk checks and warns about status of close */ - if (status != 0) { - char *s = strerror(errno); - - /* - * Too many people have complained about this. - * As of 2.15.6, it is now under lint control. - */ - if (do_lint) { - if ((rp->flag & RED_PIPE) != 0) - lintwarn(_("failure status (%d) on pipe close of `%s' (%s)"), - status, rp->value, s); - else - lintwarn(_("failure status (%d) on file close of `%s' (%s)"), - status, rp->value, s); - } - - if (! do_traditional) { - /* set ERRNO too so that program can get at it */ - update_ERRNO(); - } - } - - if (exitwarn) { - /* - * Don't use lintwarn() here. If lint warnings are fatal, - * doing so prevents us from closing other open redirections. - * - * Using multiple full messages instead of string parameters - * for the types makes message translation easier. - */ - if ((rp->flag & RED_SOCKET) != 0) - warning(_("no explicit close of socket `%s' provided"), - rp->value); - else if ((rp->flag & RED_TWOWAY) != 0) - warning(_("no explicit close of co-process `%s' provided"), - rp->value); - else if ((rp->flag & RED_PIPE) != 0) - warning(_("no explicit close of pipe `%s' provided"), - rp->value); - else - warning(_("no explicit close of file `%s' provided"), - rp->value); - } - - /* remove it from the list if closing both or both ends have been closed */ - if (how == CLOSE_ALL || (rp->iop == NULL && rp->fp == NULL)) { - if (rp->next != NULL) - rp->next->prev = rp->prev; - if (rp->prev != NULL) - rp->prev->next = rp->next; - else - red_head = rp->next; - free(rp->value); - free((char *) rp); - } - - return status; -} - -/* flush_io --- flush all open output files */ - -int -flush_io() -{ - register struct redirect *rp; - int status = 0; - - errno = 0; - if (fflush(stdout)) { - warning(_("error writing standard output (%s)"), strerror(errno)); - status++; - } - if (fflush(stderr)) { - warning(_("error writing standard error (%s)"), strerror(errno)); - status++; - } - for (rp = red_head; rp != NULL; rp = rp->next) - /* flush both files and pipes, what the heck */ - if ((rp->flag & RED_WRITE) && rp->fp != NULL) { - if (fflush(rp->fp)) { - if (rp->flag & RED_PIPE) - warning(_("pipe flush of `%s' failed (%s)."), - rp->value, strerror(errno)); - else if (rp->flag & RED_TWOWAY) - warning(_("co-process flush of pipe to `%s' failed (%s)."), - rp->value, strerror(errno)); - else - warning(_("file flush of `%s' failed (%s)."), - rp->value, strerror(errno)); - status++; - } - } - if (status != 0) - status = -1; /* canonicalize it */ - return status; -} - -/* close_io --- close all open files, called when exiting */ - -int -close_io() -{ - register struct redirect *rp; - register struct redirect *next; - int status = 0; - - errno = 0; - for (rp = red_head; rp != NULL; rp = next) { - next = rp->next; - /* - * close_redir() will print a message if needed - * if do_lint, warn about lack of explicit close - */ - if (close_redir(rp, do_lint, CLOSE_ALL)) - status++; - rp = NULL; - } - /* - * Some of the non-Unix os's have problems doing an fclose - * on stdout and stderr. Since we don't really need to close - * them, we just flush them, and do that across the board. - */ - if (fflush(stdout)) { - warning(_("error writing standard output (%s)"), strerror(errno)); - status++; - } - if (fflush(stderr)) { - warning(_("error writing standard error (%s)"), strerror(errno)); - status++; - } - return status; -} - -/* str2mode --- convert a string mode to an integer mode */ - -static int -str2mode(const char *mode) -{ - int ret; - const char *second = & mode[1]; - - if (*second == 'b') - second++; - - switch(mode[0]) { - case 'r': - ret = O_RDONLY; - if (*second == '+' || *second == 'w') - ret = O_RDWR; - break; - - case 'w': - ret = O_WRONLY|O_CREAT|O_TRUNC; - if (*second == '+' || *second == 'r') - ret = O_RDWR|O_CREAT|O_TRUNC; - break; - - case 'a': - ret = O_WRONLY|O_APPEND|O_CREAT; - if (*second == '+') - ret = O_RDWR|O_APPEND|O_CREAT; - break; - - default: - ret = 0; /* lint */ - cant_happen(); - } - if (strchr(mode, 'b') != NULL) - ret |= O_BINARY; - return ret; -} - -#ifdef HAVE_SOCKETS -/* socketopen --- open a socket and set it into connected state */ - -int -socketopen(enum inet_prot type, int localport, int remoteport, char *remotehostname) -{ - struct hostent *hp = gethostbyname(remotehostname); - struct sockaddr_in local_addr, remote_addr; - int socket_fd; - int any_remote_host = strcmp(remotehostname, "0"); - - socket_fd = INVALID_HANDLE; - switch (type) { - case INET_TCP: - if (localport != 0 || remoteport != 0) { - int on = 1; -#ifdef SO_LINGER - struct linger linger; - - memset(& linger, '\0', sizeof(linger)); -#endif - socket_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, - (char *) & on, sizeof(on)); -#ifdef SO_LINGER - linger.l_onoff = 1; - linger.l_linger = 30; /* linger for 30/100 second */ - setsockopt(socket_fd, SOL_SOCKET, SO_LINGER, - (char *) & linger, sizeof(linger)); -#endif - } - break; - case INET_UDP: - if (localport != 0 || remoteport != 0) - socket_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - break; - case INET_RAW: -#ifdef SOCK_RAW - if (localport == 0 && remoteport == 0) - socket_fd = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); -#endif - break; - case INET_NONE: - /* fall through */ - default: - cant_happen(); - break; - } - - if (socket_fd < 0 || socket_fd == INVALID_HANDLE - || (hp == NULL && any_remote_host != 0)) - return INVALID_HANDLE; - - local_addr.sin_family = remote_addr.sin_family = AF_INET; - local_addr.sin_addr.s_addr = htonl(INADDR_ANY); - remote_addr.sin_addr.s_addr = htonl(INADDR_ANY); - local_addr.sin_port = htons(localport); - remote_addr.sin_port = htons(remoteport); - if (bind(socket_fd, (struct sockaddr *) &local_addr, sizeof(local_addr)) == 0) { - if (any_remote_host != 0) { /* not ANY => create a client */ - if (type == INET_TCP || type == INET_UDP) { - memcpy(&remote_addr.sin_addr, hp->h_addr, - sizeof(remote_addr.sin_addr)); - if (connect(socket_fd, - (struct sockaddr *) &remote_addr, - sizeof(remote_addr)) != 0) { - close(socket_fd); - if (localport == 0) - socket_fd = INVALID_HANDLE; - else - socket_fd = socketopen(type, localport, 0, "0"); - } - } else { - /* /inet/raw client not ready yet */ - fatal(_("/inet/raw client not ready yet, sorry")); - if (geteuid() != 0) - fatal(_("only root may use `/inet/raw'.")); - } - } else { /* remote host is ANY => create a server */ - if (type == INET_TCP) { - int clientsocket_fd = INVALID_HANDLE; - int namelen = sizeof(remote_addr); - - if (listen(socket_fd, 1) >= 0 - && (clientsocket_fd = accept(socket_fd, - (struct sockaddr *) &remote_addr, - &namelen)) >= 0) { - close(socket_fd); - socket_fd = clientsocket_fd; - } else { - close(socket_fd); - socket_fd = INVALID_HANDLE; - } - } else if (type == INET_UDP) { - char buf[10]; - int readle; - -#ifdef MSG_PEEK - if (recvfrom(socket_fd, buf, 1, MSG_PEEK, - (struct sockaddr *) & remote_addr, - & readle) < 1 - || readle != sizeof(remote_addr) - || connect(socket_fd, - (struct sockaddr *)& remote_addr, - readle) != 0) { - close(socket_fd); - socket_fd = INVALID_HANDLE; - } -#endif - } else { - /* /inet/raw server not ready yet */ - fatal(_("/inet/raw server not ready yet, sorry")); - if (geteuid() != 0) - fatal(_("only root may use `/inet/raw'.")); - } - } - } else { - close(socket_fd); - socket_fd = INVALID_HANDLE; - } - - return socket_fd; -} -#endif /* HAVE_SOCKETS */ - -/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */ - -/* - * This separate version is still needed for output, since file and pipe - * output is done with stdio. iop_open() handles input with IOBUFs of - * more "special" files. Those files are not handled here since it makes - * no sense to use them for output. - */ - -/* - * Strictly speaking, "name" is not a "const char *" because we temporarily - * change the string. - */ - -int -devopen(const char *name, const char *mode) -{ - int openfd; - char *cp; - char *ptr; - int flag = 0; - extern double strtod(); - - flag = str2mode(mode); - - if (STREQ(name, "-")) - return fileno(stdin); - - openfd = INVALID_HANDLE; - - if (do_traditional) - goto strictopen; - - if ((openfd = os_devopen(name, flag)) != INVALID_HANDLE) { - os_close_on_exec(openfd, name, "file", ""); - return openfd; - } - - if (STREQN(name, "/dev/", 5)) { - cp = (char *) name + 5; - - if (STREQ(cp, "stdin") && (flag & O_ACCMODE) == O_RDONLY) - openfd = fileno(stdin); - else if (STREQ(cp, "stdout") && (flag & O_ACCMODE) == O_WRONLY) - openfd = fileno(stdout); - else if (STREQ(cp, "stderr") && (flag & O_ACCMODE) == O_WRONLY) - openfd = fileno(stderr); - else if (STREQN(cp, "fd/", 3)) { - cp += 3; - openfd = (int) strtod(cp, &ptr); - if (openfd <= INVALID_HANDLE || ptr == cp) - openfd = INVALID_HANDLE; - } - /* do not set close-on-exec for inherited fd's */ - if (openfd != INVALID_HANDLE) - return openfd; - } else if (STREQN(name, "/inet/", 6)) { -#ifdef HAVE_SOCKETS - /* /inet/protocol/localport/hostname/remoteport */ - enum inet_prot protocol = INET_NONE; - int localport, remoteport; - char *hostname; - char *hostnameslastcharp; - char *localpname; - char proto[4]; - struct servent *service; - - cp = (char *) name + 6; - /* which protocol? */ - if (STREQN(cp, "tcp/", 4)) - protocol = INET_TCP; - else if (STREQN(cp, "udp/", 4)) - protocol = INET_UDP; - else if (STREQN(cp, "raw/", 4)) - protocol = INET_RAW; - else - fatal(_("no (known) protocol supplied in special filename `%s'"), - name); - - proto[0] = cp[0]; - proto[1] = cp[1]; - proto[2] = cp[2]; - proto[3] = '\0'; - cp += 4; - - /* which localport? */ - localpname = cp; - while (*cp != '/' && *cp != '\0') - cp++; - /* - * Require a port, let them explicitly put 0 if - * they don't care. - */ - if (*cp != '/' || cp == localpname) - fatal(_("special file name `%s' is incomplete"), name); - /* We change the special file name temporarily because we - * need a 0-terminated string here for conversion with atoi(). - * By using atoi() the use of decimal numbers is enforced. - */ - *cp = '\0'; - - localport = atoi(localpname); - if (strcmp(localpname, "0") != 0 - && (localport <= 0 || localport > 65535)) { - service = getservbyname(localpname, proto); - if (service == NULL) - fatal(_("local port invalid in `%s'"), name); - else - localport = ntohs(service->s_port); - } - *cp = '/'; - - /* which hostname? */ - cp++; - hostname = cp; - while (*cp != '/' && *cp != '\0') - cp++; - if (*cp != '/' || cp == hostname) - fatal(_("must supply a remote hostname to `/inet'")); - *cp = '\0'; - hostnameslastcharp = cp; - - /* which remoteport? */ - cp++; - /* - * The remote port ends the special file name. - * This means there already is a 0 at the end of the string. - * Therefore no need to patch any string ending. - * - * Here too, require a port, let them explicitly put 0 if - * they don't care. - */ - if (*cp == '\0') - fatal(_("must supply a remote port to `/inet'")); - remoteport = atoi(cp); - if (strcmp(cp, "0") != 0 - && (remoteport <= 0 || remoteport > 65535)) { - service = getservbyname(cp, proto); - if (service == NULL) - fatal(_("remote port invalid in `%s'"), name); - else - remoteport = ntohs(service->s_port); - } - - /* Open Sesame! */ - openfd = socketopen(protocol, localport, remoteport, hostname); - *hostnameslastcharp = '/'; - -#else /* ! HAVE_SOCKETS */ - fatal(_("TCP/IP communications are not supported")); -#endif /* HAVE_SOCKETS */ - } - -strictopen: - if (openfd == INVALID_HANDLE) - openfd = open(name, flag, 0666); - if (openfd != INVALID_HANDLE) { - if (os_isdir(openfd)) - fatal(_("file `%s' is a directory"), name); - - os_close_on_exec(openfd, name, "file", ""); - } - return openfd; -} - - -/* spec_setup --- setup an IOBUF for a special internal file */ - -static void -spec_setup(IOBUF *iop, int len, int allocate) -{ - char *cp; - - if (allocate) { - emalloc(cp, char *, len+2, "spec_setup"); - iop->buf = cp; - } else { - len = strlen(iop->buf); - iop->buf[len++] = '\n'; /* get_a_record clobbered it */ - iop->buf[len] = '\0'; /* just in case */ - } - iop->off = iop->buf; - iop->cnt = 0; - iop->secsiz = 0; - iop->size = len; - iop->end = iop->buf + len; - iop->fd = -1; - iop->flag = IOP_IS_INTERNAL; -} - -/* specfdopen --- open an fd special file */ - -static int -specfdopen(IOBUF *iop, const char *name, const char *mode) -{ - int fd; - IOBUF *tp; - - fd = devopen(name, mode); - if (fd == INVALID_HANDLE) - return INVALID_HANDLE; - tp = iop_alloc(fd, name, NULL); - if (tp == NULL) { - /* don't leak fd's */ - close(fd); - return INVALID_HANDLE; - } - *iop = *tp; - iop->flag |= IOP_NO_FREE; - free(tp); - return 0; -} - -#ifdef GETPGRP_VOID -#define getpgrp_arg() /* nothing */ -#else -#define getpgrp_arg() getpid() -#endif - -/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */ - -static int -pidopen(IOBUF *iop, const char *name, const char *mode) -{ - char tbuf[BUFSIZ]; - int i; - const char *cp = name + 5; - - warning(_("use `PROCINFO[\"%s\"]' instead of `%s'"), cp, name); - - if (name[6] == 'g') - sprintf(tbuf, "%d\n", (int) getpgrp(getpgrp_arg())); - else if (name[6] == 'i') - sprintf(tbuf, "%d\n", (int) getpid()); - else - sprintf(tbuf, "%d\n", (int) getppid()); - i = strlen(tbuf); - spec_setup(iop, i, TRUE); - strcpy(iop->buf, tbuf); - return 0; -} - -/* useropen --- "open" /dev/user */ - -/* - * /dev/user creates a record as follows: - * $1 = getuid() - * $2 = geteuid() - * $3 = getgid() - * $4 = getegid() - * If multiple groups are supported, then $5 through $NF are the - * supplementary group set. - */ - -static int -useropen(IOBUF *iop, const char *name, const char *mode) -{ - char tbuf[BUFSIZ], *cp; - int i; -#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 - GETGROUPS_T groupset[NGROUPS_MAX]; - int ngroups; -#endif - - warning(_("use `PROCINFO[...]' instead of `/dev/user'")); - - sprintf(tbuf, "%d %d %d %d", (int) getuid(), (int) geteuid(), (int) getgid(), (int) getegid()); - - cp = tbuf + strlen(tbuf); -#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 - ngroups = getgroups(NGROUPS_MAX, groupset); - if (ngroups == -1) - fatal(_("could not find groups: %s"), strerror(errno)); - - for (i = 0; i < ngroups; i++) { - *cp++ = ' '; - sprintf(cp, "%d", (int) groupset[i]); - cp += strlen(cp); - } -#endif - *cp++ = '\n'; - *cp++ = '\0'; - - i = strlen(tbuf); - spec_setup(iop, i, TRUE); - strcpy(iop->buf, tbuf); - return 0; -} - -/* iop_open --- handle special and regular files for input */ - -static IOBUF * -iop_open(const char *name, const char *mode, IOBUF *iop) -{ - int openfd = INVALID_HANDLE; - int flag = 0; - static struct internal { - const char *name; - int compare; - int (*fp) P((IOBUF *, const char *, const char *)); - IOBUF iob; - } table[] = { - { "/dev/fd/", 8, specfdopen }, - { "/dev/stdin", 10, specfdopen }, - { "/dev/stdout", 11, specfdopen }, - { "/dev/stderr", 11, specfdopen }, - { "/inet/", 6, specfdopen }, - { "/dev/pid", 8, pidopen }, - { "/dev/ppid", 9, pidopen }, - { "/dev/pgrpid", 11, pidopen }, - { "/dev/user", 9, useropen }, - }; - int devcount = sizeof(table) / sizeof(table[0]); - - flag = str2mode(mode); - - if (STREQ(name, "-")) - openfd = fileno(stdin); - else if (do_traditional) - goto strictopen; - else if (STREQN(name, "/dev/", 5) || STREQN(name, "/inet/", 6)) { - int i; - - for (i = 0; i < devcount; i++) { - if (STREQN(name, table[i].name, table[i].compare)) { - iop = & table[i].iob; - - if (iop->buf != NULL) { - spec_setup(iop, 0, FALSE); - return iop; - } else if ((*table[i].fp)(iop, name, mode) == 0) - return iop; - else { - warning(_("could not open `%s', mode `%s'"), - name, mode); - return NULL; - } - } - } - /* not in table, fall through to regular code */ - } - -strictopen: - if (openfd == INVALID_HANDLE) - openfd = open(name, flag, 0666); - if (openfd != INVALID_HANDLE) { - if (os_isdir(openfd)) - fatal(_("file `%s' is a directory"), name); - - os_close_on_exec(openfd, name, "file", ""); - } - return iop_alloc(openfd, name, iop); -} - -/* two_way_open --- open a two way communications channel */ - -static int -two_way_open(char *str, struct redirect *rp) -{ -#ifdef HAVE_SOCKETS - /* case 1: socket */ - if (STREQN(str, "/inet/", 6)) { - int fd, newfd; - - fd = devopen(str, "rw"); - if (fd == INVALID_HANDLE) - return FALSE; - rp->fp = fdopen(fd, "w"); - if (rp->fp == NULL) { - close(fd); - return FALSE; - } - newfd = dup(fd); - if (newfd < 0) { - fclose(rp->fp); - return FALSE; - } - os_close_on_exec(newfd, str, "socket", "to/from"); - rp->iop = iop_alloc(newfd, str, NULL); - if (rp->iop == NULL) { - fclose(rp->fp); - return FALSE; - } - rp->flag |= RED_SOCKET; - return TRUE; - } -#endif /* HAVE_SOCKETS */ - -#ifdef HAVE_PORTALS - /* case 1.5: portal */ - if (STREQN(str, "/p/", 3)) { - int fd, newfd; - - fd = open(str, O_RDWR); - if (fd == INVALID_HANDLE) - return FALSE; - rp->fp = fdopen(fd, "w"); - if (rp->fp == NULL) { - close(fd); - return FALSE; - } - newfd = dup(fd); - if (newfd < 0) { - fclose(rp->fp); - return FALSE; - } - os_close_on_exec(newfd, str, "portal", "to/from"); - rp->iop = iop_alloc(newfd, str, NULL); - if (rp->iop == NULL) { - fclose(rp->fp); - return FALSE; - } - rp->flag |= RED_SOCKET; - return TRUE; - } -#endif /* HAVE_PORTALS */ - -#ifndef PIPES_SIMULATED /* real pipes */ - /* case 2: two way pipe to a child process */ - { - int ptoc[2], ctop[2]; - int pid; - int save_errno; - - if (pipe(ptoc) < 0) - return FALSE; /* errno set, diagnostic from caller */ - - if (pipe(ctop) < 0) { - save_errno = errno; - close(ptoc[0]); - close(ptoc[1]); - errno = save_errno; - return FALSE; - } - - if ((pid = fork()) < 0) { - save_errno = errno; - close(ptoc[0]); close(ptoc[1]); - close(ctop[0]); close(ctop[1]); - errno = save_errno; - return FALSE; - } - - if (pid == 0) { /* child */ - if (close(1) == -1) - fatal(_("close of stdout in child failed (%s)"), - strerror(errno)); - if (dup(ctop[1]) != 1) - fatal(_("moving pipe to stdout in child failed (dup: %s)"), strerror(errno)); - if (close(0) == -1) - fatal(_("close of stdin in child failed (%s)"), - strerror(errno)); - if (dup(ptoc[0]) != 0) - fatal(_("moving pipe to stdin in child failed (dup: %s)"), strerror(errno)); - if ( close(ptoc[0]) == -1 || close(ptoc[1]) == -1 - || close(ctop[0]) == -1 || close(ctop[1]) == -1) - fatal(_("close of pipe failed (%s)"), strerror(errno)); - /* stderr does NOT get dup'ed onto child's stdout */ - execl("/bin/sh", "sh", "-c", str, NULL); - _exit(127); - } - - /* parent */ - rp->pid = pid; - rp->iop = iop_alloc(ctop[0], str, NULL); - if (rp->iop == NULL) { - (void) close(ctop[0]); - (void) close(ctop[1]); - (void) close(ptoc[0]); - (void) close(ptoc[1]); - (void) kill(pid, SIGKILL); /* overkill? (pardon pun) */ - return FALSE; - } - rp->fp = fdopen(ptoc[1], "w"); - if (rp->fp == NULL) { - iop_close(rp->iop); - rp->iop = NULL; - (void) close(ctop[0]); - (void) close(ctop[1]); - (void) close(ptoc[0]); - (void) close(ptoc[1]); - (void) kill(pid, SIGKILL); /* overkill? (pardon pun) */ - return FALSE; - } - if (fcntl(ctop[0], F_SETFD, 1) < 0) { - warning(_("pipe from `%s': could not set close-on-exec (fcntl: %s)"), - str, strerror(errno));; - } - if (fcntl(ptoc[1], F_SETFD, 1) < 0) { - warning(_("pipe to `%s': could not set close-on-exec (fcntl: %s)"), - str, strerror(errno));; - } - (void) close(ptoc[0]); - (void) close(ctop[1]); - return TRUE; - } - -#else /*PIPES_SIMULATED*/ - - fatal(_("`|&' not supported")); - /*NOTREACHED*/ - return FALSE; - -#endif -} - -#ifndef PIPES_SIMULATED /* real pipes */ - -/* wait_any --- wait for a child process, close associated pipe */ - -static int -wait_any(int interesting) /* pid of interest, if any */ -{ - RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)(); - int pid; - int status = 0; - struct redirect *redp; - extern int errno; - - hstat = signal(SIGHUP, SIG_IGN); - istat = signal(SIGINT, SIG_IGN); - qstat = signal(SIGQUIT, SIG_IGN); - for (;;) { -#ifdef HAVE_SYS_WAIT_H /* Posix compatible sys/wait.h */ - pid = wait(&status); -#else - pid = wait((union wait *)&status); -#endif /* NeXT */ - if (interesting && pid == interesting) { - break; - } else if (pid != -1) { - for (redp = red_head; redp != NULL; redp = redp->next) - if (pid == redp->pid) { - redp->pid = -1; - redp->status = status; - break; - } - } - if (pid == -1 && errno == ECHILD) - break; - } - signal(SIGHUP, hstat); - signal(SIGINT, istat); - signal(SIGQUIT, qstat); - return(status); -} - -/* gawk_popen --- open an IOBUF on a child process */ - -static IOBUF * -gawk_popen(char *cmd, struct redirect *rp) -{ - int p[2]; - register int pid; - - /* - * used to wait for any children to synchronize input and output, - * but this could cause gawk to hang when it is started in a pipeline - * and thus has a child process feeding it input (shell dependant) - */ - /*(void) wait_any(0);*/ /* wait for outstanding processes */ - - if (pipe(p) < 0) - fatal(_("cannot open pipe `%s' (%s)"), cmd, strerror(errno)); - if ((pid = fork()) == 0) { - if (close(1) == -1) - fatal(_("close of stdout in child failed (%s)"), - strerror(errno)); - if (dup(p[1]) != 1) - fatal(_("moving pipe to stdout in child failed (dup: %s)"), strerror(errno)); - if (close(p[0]) == -1 || close(p[1]) == -1) - fatal(_("close of pipe failed (%s)"), strerror(errno)); - execl("/bin/sh", "sh", "-c", cmd, NULL); - _exit(127); - } - if (pid == -1) - fatal(_("cannot create child process for `%s' (fork: %s)"), cmd, strerror(errno)); - rp->pid = pid; - if (close(p[1]) == -1) - fatal(_("close of pipe failed (%s)"), strerror(errno)); - os_close_on_exec(p[0], cmd, "pipe", "from"); - rp->iop = iop_alloc(p[0], cmd, NULL); - if (rp->iop == NULL) - (void) close(p[0]); - return (rp->iop); -} - -/* gawk_pclose --- close an open child pipe */ - -static int -gawk_pclose(struct redirect *rp) -{ - if (rp->iop != NULL) - (void) iop_close(rp->iop); - rp->iop = NULL; - - /* process previously found, return stored status */ - if (rp->pid == -1) - return (rp->status >> 8) + ((rp->status &0xFF) ? 128 + (rp->status & 0xF) : 0); - rp->status = wait_any(rp->pid); - rp->pid = -1; - return (rp->status >> 8) + ((rp->status &0xFF) ? 128 + (rp->status & 0xF) : 0); -} - -#else /* PIPES_SIMULATED */ - -/* - * use temporary file rather than pipe - * except if popen() provides real pipes too - */ - -#if defined(VMS) || defined(OS2) || defined (MSDOS) || defined(WIN32) || defined(TANDEM) - -/* gawk_popen --- open an IOBUF on a child process */ - -static IOBUF * -gawk_popen(char *cmd, struct redirect *rp) -{ - FILE *current; - - os_restore_mode(fileno(stdin)); - current = popen(cmd, binmode("r")); - if ((BINMODE & 1) != 0) - os_setbinmode(fileno(stdin), O_BINARY); - if (current == NULL) - return NULL; - os_close_on_exec(fileno(current), cmd, "pipe", "from"); - rp->iop = iop_alloc(fileno(current), cmd, NULL); - if (rp->iop == NULL) { - (void) pclose(current); - current = NULL; - } - rp->ifp = current; - return (rp->iop); -} - -/* gawk_pclose --- close an open child pipe */ - -static int -gawk_pclose(struct redirect *rp) -{ - int rval, aval, fd = rp->iop->fd; - - if (rp->iop != NULL) { - rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */ - rval = iop_close(rp->iop); - } - rp->iop = NULL; - aval = pclose(rp->ifp); - rp->ifp = NULL; - return (rval < 0 ? rval : aval); -} -#else /* not (VMS || OS2 || MSDOS || TANDEM) */ - -static struct pipeinfo { - char *command; - char *name; -} pipes[_NFILE]; - -/* gawk_popen --- open an IOBUF on a child process */ - -static IOBUF * -gawk_popen(char *cmd, struct redirect *rp) -{ - extern char *strdup P((const char *)); - int current; - char *name; - static char cmdbuf[256]; - - /* get a name to use */ - if ((name = tempnam(".", "pip")) == NULL) - return NULL; - sprintf(cmdbuf, "%s > %s", cmd, name); - system(cmdbuf); - if ((current = open(name, O_RDONLY)) == INVALID_HANDLE) - return NULL; - pipes[current].name = name; - pipes[current].command = strdup(cmd); - os_close_on_exec(current, cmd, "pipe", "from"); - rp->iop = iop_alloc(current, name, NULL); - if (rp->iop == NULL) - (void) close(current); - return (rp->iop); -} - -/* gawk_pclose --- close an open child pipe */ - -static int -gawk_pclose(struct redirect *rp) -{ - int cur = rp->iop->fd; - int rval = 0; - - if (rp->iop != NULL) - rval = iop_close(rp->iop); - rp->iop = NULL; - - /* check for an open file */ - if (pipes[cur].name == NULL) - return -1; - unlink(pipes[cur].name); - free(pipes[cur].name); - pipes[cur].name = NULL; - free(pipes[cur].command); - return rval; -} -#endif /* not (VMS || OS2 || MSDOS || TANDEM) */ - -#endif /* PIPES_SIMULATED */ - -/* do_getline --- read in a line, into var and with redirection, as needed */ - -NODE * -do_getline(NODE *tree) -{ - struct redirect *rp = NULL; - IOBUF *iop; - int cnt = EOF; - char *s = NULL; - int errcode; - - while (cnt == EOF) { - if (tree->rnode == NULL) { /* no redirection */ - iop = nextfile(FALSE); - if (iop == NULL) /* end of input */ - return tmp_number((AWKNUM) 0.0); - } else { - int redir_error = 0; - - rp = redirect(tree->rnode, &redir_error); - if (rp == NULL && redir_error) { /* failed redirect */ - if (! do_traditional) - update_ERRNO(); - - return tmp_number((AWKNUM) -1.0); - } - iop = rp->iop; - if (iop == NULL) /* end of input */ - return tmp_number((AWKNUM) 0.0); - } - errcode = 0; - cnt = get_a_record(&s, iop, RS->stptr[0], RS_regexp, &errcode); - if (errcode != 0) { - if (! do_traditional) - update_ERRNO(); - - return tmp_number((AWKNUM) -1.0); - } - if (cnt == EOF) { - if (rp != NULL) { - /* - * Don't do iop_close() here if we are - * reading from a pipe; otherwise - * gawk_pclose will not be called. - */ - if ((rp->flag & (RED_PIPE|RED_TWOWAY)) == 0) { - (void) iop_close(iop); - rp->iop = NULL; - } - rp->flag |= RED_EOF; /* sticky EOF */ - return tmp_number((AWKNUM) 0.0); - } else - continue; /* try another file */ - } - if (rp == NULL) { - NR++; - FNR++; - } - if (tree->lnode == NULL) /* no optional var. */ - set_record(s, cnt, TRUE); - else { /* assignment to variable */ - Func_ptr after_assign = NULL; - NODE **lhs; - - lhs = get_lhs(tree->lnode, &after_assign, FALSE); - unref(*lhs); - *lhs = make_string(s, cnt); - (*lhs)->flags |= MAYBE_NUM; - /* we may have to regenerate $0 here! */ - if (after_assign != NULL) - (*after_assign)(); - } - } - return tmp_number((AWKNUM) 1.0); -} - -/* pathopen --- pathopen with default file extension handling */ - -int -pathopen(const char *file) -{ - int fd = do_pathopen(file); - -#ifdef DEFAULT_FILETYPE - if (! do_traditional && fd <= INVALID_HANDLE) { - char *file_awk; - int save = errno; -#ifdef VMS - int vms_save = vaxc$errno; -#endif - - /* append ".awk" and try again */ - emalloc(file_awk, char *, strlen(file) + - sizeof(DEFAULT_FILETYPE) + 1, "pathopen"); - sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE); - fd = do_pathopen(file_awk); - free(file_awk); - if (fd <= INVALID_HANDLE) { - errno = save; -#ifdef VMS - vaxc$errno = vms_save; -#endif - } - } -#endif /*DEFAULT_FILETYPE*/ - - return fd; -} - -/* do_pathopen --- search $AWKPATH for source file */ - -static int -do_pathopen(const char *file) -{ - static const char *savepath = NULL; - static int first = TRUE; - const char *awkpath; - char *cp, trypath[BUFSIZ]; - int fd; - - if (STREQ(file, "-")) - return (0); - - if (do_traditional) - return (devopen(file, "r")); - - if (first) { - first = FALSE; - if ((awkpath = getenv("AWKPATH")) != NULL && *awkpath) - savepath = awkpath; /* used for restarting */ - else - savepath = defpath; - } - awkpath = savepath; - - /* some kind of path name, no search */ - if (ispath(file)) - return (devopen(file, "r")); - - do { - trypath[0] = '\0'; - /* this should take into account limits on size of trypath */ - for (cp = trypath; *awkpath && *awkpath != envsep; ) - *cp++ = *awkpath++; - - if (cp != trypath) { /* nun-null element in path */ - /* add directory punctuation only if needed */ - if (! isdirpunct(*(cp-1))) - *cp++ = '/'; - /* append filename */ - strcpy(cp, file); - } else - strcpy(trypath, file); - if ((fd = devopen(trypath, "r")) > INVALID_HANDLE) - return (fd); - - /* no luck, keep going */ - if(*awkpath == envsep && awkpath[1] != '\0') - awkpath++; /* skip colon */ - } while (*awkpath != '\0'); - /* - * You might have one of the awk paths defined, WITHOUT the current - * working directory in it. Therefore try to open the file in the - * current directory. - */ - return (devopen(file, "r")); -} - -#ifdef TEST -int bufsize = 8192; - -void -fatal(char *s) -{ - printf("%s\n", s); - exit(1); -} -#endif - -/* iop_alloc --- allocate an IOBUF structure for an open fd */ - -static IOBUF * -iop_alloc(int fd, const char *name, IOBUF *iop) -{ - struct stat sbuf; - - if (fd == INVALID_HANDLE) - return NULL; - if (iop == NULL) - emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc"); - iop->flag = 0; - if (isatty(fd)) - iop->flag |= IOP_IS_TTY; - iop->size = optimal_bufsize(fd, & sbuf); - if (do_lint && S_ISREG(sbuf.st_mode) && sbuf.st_size == 0) - lintwarn(_("data file `%s' is empty"), name); - iop->secsiz = -2; - errno = 0; - iop->fd = fd; - iop->off = iop->buf = NULL; - iop->cnt = 0; - iop->name = name; - return iop; -} - -#define set_RT_to_null() \ - (void)(! do_traditional && (unref(RT_node->var_value), \ - RT_node->var_value = Nnull_string)) - -#define set_RT(str, len) \ - (void)(! do_traditional && (unref(RT_node->var_value), \ - RT_node->var_value = make_string(str, len))) - -/* - * get_a_record: - * Get the next record. Uses a "split buffer" where the latter part is - * the normal read buffer and the head part is an "overflow" area that is used - * when a record spans the end of the normal buffer, in which case the first - * part of the record is copied into the overflow area just before the - * normal buffer. Thus, the eventual full record can be returned as a - * contiguous area of memory with a minimum of copying. The overflow area - * is expanded as needed, so that records are unlimited in length. - * We also mark both the end of the buffer and the end of the read() with - * a sentinel character (the current record separator) so that the inside - * loop can run as a single test. - * - * Note that since we know or can compute the end of the read and the end - * of the buffer, the sentinel character does not get in the way of regexp - * based searching, since we simply search up to that character, but not - * including it. - */ - -static int -get_a_record(char **out, /* pointer to pointer to data */ - IOBUF *iop, /* input IOP */ - register int grRS, /* first char in RS->stptr */ - Regexp *RSre, /* regexp for RS */ - int *errcode) /* pointer to error variable */ -{ - register char *bp = iop->off; - char *bufend; - char *start = iop->off; /* beginning of record */ - int rs; - static Regexp *RS_null_re = NULL; - Regexp *rsre = NULL; - int continuing = FALSE, continued = FALSE; /* used for re matching */ - int onecase; - -#ifdef TANDEM - char *mend; -#endif - -#ifdef TANDEM -#define not_past_end() (bp < mend) -#else -#define not_past_end() (1) -#endif - - /* first time through */ - if (RS_null_re == NULL) { - RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE); - if (RS_null_re == NULL) - fatal(_("internal error: file `%s', line %d\n"), - __FILE__, __LINE__); - } - - if (iop->cnt == EOF) { /* previous read hit EOF */ - *out = NULL; - set_RT_to_null(); - return EOF; - } - -#ifdef TANDEM - if (MRL) - mend = start + MRL; - else - mend = (char *) LONG_MAX; -#endif - - if (RS_is_null) /* special case: RS == "" */ - rs = '\n'; - else - rs = (char) grRS; - - onecase = (IGNORECASE && ISALPHA(rs)); - if (onecase) - rs = casetable[(unsigned char) rs]; - - /* set up sentinel */ - if (iop->buf) { - bufend = iop->buf + iop->size + iop->secsiz; - *bufend = rs; /* add sentinel to buffer */ - } else - bufend = NULL; - - for (;;) { /* break on end of record, read error or EOF */ -/* buffer mgmt, chunk #1 */ - /* - * Following code is entered on the first call of this routine - * for a new iop, or when we scan to the end of the buffer. - * In the latter case, we copy the current partial record to - * the space preceding the normal read buffer. If necessary, - * we expand this space. This is done so that we can return - * the record as a contiguous area of memory. - */ - if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) { - char *oldbuf = NULL; - char *oldsplit = iop->buf + iop->secsiz; - long len; /* record length so far */ - - len = bp - start; - if (len > iop->secsiz) { - /* expand secondary buffer */ - if (iop->secsiz == -2) - iop->secsiz = 256; - while (len > iop->secsiz) - iop->secsiz *= 2; - oldbuf = iop->buf; - emalloc(iop->buf, char *, - iop->size+iop->secsiz+2, "get_a_record"); - bufend = iop->buf + iop->size + iop->secsiz; - *bufend = rs; - } - if (len > 0) { - char *newsplit = iop->buf + iop->secsiz; - - if (start < oldsplit) { - memcpy(newsplit - len, start, - oldsplit - start); - memcpy(newsplit - (bp - oldsplit), - oldsplit, bp - oldsplit); - } else - memcpy(newsplit - len, start, len); - } - bp = iop->end = iop->off = iop->buf + iop->secsiz; - start = bp - len; -#ifdef TANDEM - if (MRL) - mend = start + MRL; -#endif - if (oldbuf != NULL) { - free(oldbuf); - oldbuf = NULL; - } - } -/* buffer mgmt, chunk #2 */ - /* - * Following code is entered whenever we have no more data to - * scan. In most cases this will read into the beginning of - * the main buffer, but in some cases (terminal, pipe etc.) - * we may be doing smallish reads into more advanced positions. - */ - if (bp >= iop->end) { - if ((iop->flag & IOP_IS_INTERNAL) != 0) { - iop->cnt = EOF; - break; - } - iop->cnt = read(iop->fd, iop->end, bufend - iop->end); - if (iop->cnt == -1) { - if (! do_traditional && errcode != NULL) { - *errcode = errno; - iop->cnt = EOF; - break; - } else - fatal(_("error reading input file `%s': %s"), - iop->name, strerror(errno)); - } else if (iop->cnt == 0) { - /* - * hit EOF before matching RS, so end - * the record and set RT to "" - */ - iop->cnt = EOF; - /* see comments below about this test */ - if (! continuing) { - set_RT_to_null(); - break; - } - } - if (iop->cnt != EOF) { - iop->end += iop->cnt; - *iop->end = rs; /* reset the sentinel */ - } - } -/* buffers are now setup and filled with data */ -/* search for RS, #1, regexp based, or RS = "" */ - /* - * Attempt to simplify the code a bit. The case where - * RS = "" can also be described by a regexp, RS = "\n\n+". - * The buffer managment and searching code can thus now - * use a common case (the one for regexps) both when RS is - * a regexp, and when RS = "". This particularly benefits - * us for keeping track of how many newlines were matched - * in order to set RT. - */ - if (! do_traditional && RSre != NULL) /* regexp */ - rsre = RSre; - else if (RS_is_null) /* RS = "" */ - rsre = RS_null_re; - else - rsre = NULL; - - /* - * Look for regexp match of RS. Non-match conditions are: - * 1. No match at all - * 2. Match of a null string - * 3. Match ends at exact end of buffer - * Number 3 is subtle; we have to add more to the buffer - * in case the match would have extended further into the - * file, since regexp match by definition always matches the - * longest possible match. - * - * It is even more subtle than you might think. Suppose - * the re matches at exactly the end of file. We don't know - * that until we try to add more to the buffer. Thus, we - * set a flag to indicate, that if eof really does happen, - * don't break early. - * - * Still more subtlety. Suppose RS is a multi-character regexp, - * but it doesn't have the metacharacters that would let it - * match an arbitrary number of characters. So it's an exact - * string match. We need to check for this, in the case where - * there is an exact match at the end, and NOT read more - * data. Otherwise, this might bite us for an interactive - * networking program that uses CR-LF as the line terminator. - */ - continuing = FALSE; - if (rsre != NULL) { - again: - /* cases 1 and 2 are simple, just keep going */ - if (research(rsre, start, 0, iop->end - start, TRUE) == -1 - || RESTART(rsre, start) == REEND(rsre, start)) { - /* - * Leading newlines at the beginning of the file - * should be ignored. Whew! - */ - if (RS_is_null && *start == '\n') { - /* - * have to catch the case of a - * single newline at the front of - * the record, which the regex - * doesn't. gurr. - */ - while (*start == '\n' && start < iop->end) - start++; - goto again; - } - bp = iop->end; - continue; - } - /* case 3, regex match at exact end */ - if (start + REEND(rsre, start) >= iop->end) { - if (iop->cnt != EOF) { - /* - * Only do the test if not at EOF - */ - int isstring; - - isstring = reisstring(RS->stptr, - RS->stlen, rsre, start); - if (isstring == FALSE) { - bp = iop->end; - continuing = continued = TRUE; - continue; - } - } - } - /* got a match! */ - /* - * Leading newlines at the beginning of the file - * should be ignored. Whew! - */ - if (RS_is_null && *start == '\n') { - /* - * have to catch the case of a - * single newline at the front of - * the record, which the regex - * doesn't. gurr. - */ - while (*start == '\n' && start < iop->end) - start++; - goto again; - } - bp = start + RESTART(rsre, start); - set_RT(bp, REEND(rsre, start) - RESTART(rsre, start)); - *bp = '\0'; - iop->off = start + REEND(rsre, start); - break; - } -/* search for RS, #2, RS = */ - if (onecase) { - while (casetable[(unsigned char) *bp++] != rs && not_past_end()) - continue; - } else { - while (*bp++ != rs && not_past_end()) - continue; - } - set_RT(bp - 1, 1); - - if (bp <= iop->end) - break; - else - bp--; - - if ((iop->flag & IOP_IS_INTERNAL) != 0) - iop->cnt = bp - start; - } - if (iop->cnt == EOF - && (((iop->flag & IOP_IS_INTERNAL) != 0) - || (start == bp && ! continued))) { - *out = NULL; - set_RT_to_null(); - return EOF; - } - - if (do_traditional || rsre == NULL) { - iop->off = bp; - bp--; - if (onecase ? casetable[(unsigned char) *bp] != rs : *bp != rs) - bp++; - if (MRL == 0) - *bp = '\0'; - } else if (RS_is_null && iop->cnt == EOF) { - /* - * special case, delete trailing newlines, - * should never be more than one. - */ - while (bp[-1] == '\n') - bp--; - *bp = '\0'; - } - - *out = start; - return bp - start; -} - -#ifdef TEST -int -main(int argc, char *argv[]) -{ - IOBUF *iop; - char *out; - int cnt; - char rs[2]; - - rs[0] = '\0'; - if (argc > 1) - bufsize = atoi(argv[1]); - if (argc > 2) - rs[0] = *argv[2]; - iop = iop_alloc(0, "stdin", NULL); - while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) { - fwrite(out, 1, cnt, stdout); - fwrite(rs, 1, 1, stdout); - } - return 0; -} -#endif - -/* set_RS --- update things as appropriate when RS is set */ - -void -set_RS() -{ - static NODE *save_rs = NULL; - - if (save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0) - return; - unref(save_rs); - save_rs = dupnode(RS_node->var_value); - RS_is_null = FALSE; - RS = force_string(RS_node->var_value); - if (RS_regexp != NULL) { - refree(RS_regexp); - RS_regexp = NULL; - } - if (RS->stlen == 0) - RS_is_null = TRUE; - else if (RS->stlen > 1) { - static int warned = FALSE; - - RS_regexp = make_regexp(RS->stptr, RS->stlen, IGNORECASE, TRUE); - - if (do_lint && ! warned) { - lintwarn(_("multicharacter value of `RS' is a gawk extension")); - warned = TRUE; - } - } - - set_FS_if_not_FIELDWIDTHS(); -} diff --git a/contrib/awk/main.c b/contrib/awk/main.c deleted file mode 100644 index 1b9b415..0000000 --- a/contrib/awk/main.c +++ /dev/null @@ -1,1006 +0,0 @@ -/* - * main.c -- Expression tree constructors and main program for gawk. - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $FreeBSD$ - */ - -#include "awk.h" -#include "getopt.h" -#ifdef TANDEM -#include "ptchlvl.h" /* blech */ -#else -#include "patchlev.h" -#endif - -#ifndef O_BINARY -#include -#endif - -#ifdef HAVE_MCHECK_H -#include -#endif - -#define DEFAULT_PROFILE "awkprof.out" /* where to put profile */ -#define DEFAULT_VARFILE "awkvars.out" /* where to put vars */ - -static char *varfile = DEFAULT_VARFILE; - -static void usage P((int exitval, FILE *fp)); -static void copyleft P((void)); -static void cmdline_fs P((char *str)); -static void init_args P((int argc0, int argc, char *argv0, char **argv)); -static void init_vars P((void)); -static void pre_assign P((char *v)); -RETSIGTYPE catchsig P((int sig, int code)); -static void nostalgia P((void)); -static void version P((void)); -static void init_fds P((void)); - -/* These nodes store all the special variables AWK uses */ -NODE *ARGC_node, *ARGIND_node, *ARGV_node, *BINMODE_node, *CONVFMT_node; -NODE *ENVIRON_node, *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node; -NODE *FS_node, *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node; -NODE *ORS_node, *PROCINFO_node, *RLENGTH_node, *RSTART_node, *RS_node; -NODE *RT_node, *SUBSEP_node, *LINT_node, *TEXTDOMAIN_node; - -long NF; -long NR; -long FNR; -int BINMODE; -int IGNORECASE; -char *OFS; -char *ORS; -char *OFMT; -char *TEXTDOMAIN; -int MRL; /* See -mr option for use of this variable */ - -/* - * CONVFMT is a convenience pointer for the current number to string format. - * We must supply an initial value to avoid recursion problems of - * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT - * Fun, fun, fun, fun. - */ -char *CONVFMT = "%.6g"; - - -int errcount = 0; /* error counter, used by yyerror() */ - -NODE *Nnull_string; /* The global null string */ - -/* The name the program was invoked under, for error messages */ -const char *myname; - -/* A block of AWK code to be run before running the program */ -NODE *begin_block = NULL; - -/* A block of AWK code to be run after the last input file */ -NODE *end_block = NULL; - -int exiting = FALSE; /* Was an "exit" statement executed? */ -int exit_val = 0; /* optional exit value */ - -#if defined(YYDEBUG) || defined(GAWKDEBUG) -extern int yydebug; -#endif - -struct src *srcfiles = NULL; /* source file name(s) */ -long numfiles = -1; /* how many source files */ - -int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */ -int do_posix = FALSE; /* turn off gnu and unix extensions */ -int do_lint = FALSE; /* provide warnings about questionable stuff */ -int do_lint_old = FALSE; /* warn about stuff not in V7 awk */ -int do_intl = FALSE; /* dump locale-izable strings to stdout */ -int do_non_decimal_data = FALSE; /* allow octal/hex C style DATA. Use with caution! */ -int do_nostalgia = FALSE; /* provide a blast from the past */ -int do_intervals = FALSE; /* allow {...,...} in regexps */ -int do_profiling = FALSE; /* profile and pretty print the program */ -int do_dump_vars = FALSE; /* dump all global variables at end */ -int do_tidy_mem = FALSE; /* release vars when done */ - -int in_begin_rule = FALSE; /* we're in a BEGIN rule */ -int in_end_rule = FALSE; /* we're in a END rule */ - -int output_is_tty = FALSE; /* control flushing of output */ - -extern char *version_string; /* current version, for printing */ - -/* The parse tree is stored here. */ -NODE *expression_value; - -#if _MSC_VER == 510 -void (*lintfunc) P((va_list va_alist, ...)) = warning; -#else -void (*lintfunc) P((char *mesg, ...)) = warning; -#endif - -static struct option optab[] = { - { "compat", no_argument, & do_traditional, 1 }, - { "traditional", no_argument, & do_traditional, 1 }, - { "lint", optional_argument, NULL, 'l' }, - { "lint-old", no_argument, & do_lint_old, 1 }, - { "posix", no_argument, & do_posix, 1 }, - { "nostalgia", no_argument, & do_nostalgia, 1 }, - { "gen-po", no_argument, & do_intl, 1 }, - { "non-decimal-data", no_argument, & do_non_decimal_data, 1 }, - { "profile", optional_argument, NULL, 'p' }, - { "copyleft", no_argument, NULL, 'C' }, - { "copyright", no_argument, NULL, 'C' }, - { "field-separator", required_argument, NULL, 'F' }, - { "file", required_argument, NULL, 'f' }, - { "re-interval", no_argument, & do_intervals, 1 }, - { "source", required_argument, NULL, 's' }, - { "dump-variables", optional_argument, NULL, 'd' }, - { "assign", required_argument, NULL, 'v' }, - { "version", no_argument, NULL, 'V' }, - { "usage", no_argument, NULL, 'u' }, - { "help", no_argument, NULL, 'u' }, -#ifdef GAWKDEBUG - { "parsedebug", no_argument, NULL, 'D' }, -#endif - { NULL, 0, NULL, '\0' } -}; - -/* main --- process args, parse program, run it, clean up */ - -int -main(int argc, char **argv) -{ - int c; - char *scan; - /* the + on the front tells GNU getopt not to rearrange argv */ - const char *optlist = "+F:f:v:W;m:"; - int stopped_early = FALSE; - int old_optind; - extern int optind; - extern int opterr; - extern char *optarg; - - /* do these checks early */ - if (getenv("TIDYMEM") != NULL) - do_tidy_mem = TRUE; - -#ifdef HAVE_MCHECK_H - if (do_tidy_mem) - mtrace(); -#endif /* HAVE_MCHECK_H */ - - - setlocale(LC_CTYPE, ""); - setlocale(LC_COLLATE, ""); - /* setlocale (LC_ALL, ""); */ - bindtextdomain(PACKAGE, LOCALEDIR); - textdomain(PACKAGE); - - (void) signal(SIGFPE, (RETSIGTYPE (*) P((int))) catchsig); - (void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig); -#ifdef SIGBUS - (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig); -#endif - - myname = gawk_name(argv[0]); - argv[0] = (char *) myname; - os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ - - /* remove sccs gunk */ - if (strncmp(version_string, "@(#)", 4) == 0) - version_string += 4; - - if (argc < 2) - usage(1, stderr); - - /* initialize the null string */ - Nnull_string = make_string("", 0); - Nnull_string->numbr = 0.0; - Nnull_string->type = Node_val; - Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER); - - /* - * Tell the regex routines how they should work. - * Do this before initializing variables, since - * they could want to do a regexp compile. - */ - resetup(); - - /* Set up the special variables */ - /* - * Note that this must be done BEFORE arg parsing else -F - * breaks horribly. - */ - init_vars(); - - /* Set up the field variables */ - /* - * Do this before arg parsing so that `-v NF=blah' won't - * break anything. - */ - init_fields(); - - /* Robustness: check that 0, 1, 2, exist */ - init_fds(); - - /* worst case */ - emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main"); - memset(srcfiles, '\0', argc * sizeof(struct src)); - - /* we do error messages ourselves on invalid options */ - opterr = FALSE; - - /* option processing. ready, set, go! */ - for (optopt = 0, old_optind = 1; - (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF; - optopt = 0, old_optind = optind) { - if (do_posix) - opterr = TRUE; - - switch (c) { - case 'F': - cmdline_fs(optarg); - break; - - case 'f': - /* - * a la MKS awk, allow multiple -f options. - * this makes function libraries real easy. - * most of the magic is in the scanner. - * - * The following is to allow for whitespace at the end - * of a #! /bin/gawk line in an executable file - */ - scan = optarg; - while (ISSPACE(*scan)) - scan++; - - ++numfiles; - srcfiles[numfiles].stype = SOURCEFILE; - if (*scan == '\0') - srcfiles[numfiles].val = argv[optind++]; - else - srcfiles[numfiles].val = optarg; - break; - - case 'v': - pre_assign(optarg); - break; - - case 'm': - /* - * Research awk extension. - * -mf nnn set # fields, gawk ignores - * -mr nnn set record length, ditto - */ - if (do_lint) - lintwarn(_("`-m[fr]' option irrelevant in gawk")); - if (optarg[0] != 'r' && optarg[0] != 'f') - warning(_("-m option usage: `-m[fr] nnn'")); - /* - * Set fixed length records for Tandem, - * ignored on other platforms (see io.c:get_a_record). - */ - if (optarg[0] == 'r') { - if (ISDIGIT(optarg[1])) - MRL = atoi(optarg+1); - else { - MRL = atoi(argv[optind]); - optind++; - } - } else if (optarg[1] == '\0') - optind++; - break; - - case 'W': /* gawk specific options - now in getopt_long */ - fprintf(stderr, _("%s: option `-W %s' unrecognized, ignored\n"), - argv[0], optarg); - break; - - /* These can only come from long form options */ - case 'C': - copyleft(); - break; - - case 'd': - do_dump_vars = TRUE; - if (optarg != NULL && optarg[0] != '\0') - varfile = optarg; - break; - - case 'l': - do_lint = TRUE; - if (optarg != NULL && strcmp(optarg, "fatal") == 0) - lintfunc = r_fatal; - break; - - case 'p': - do_profiling = TRUE; - if (optarg != NULL) - set_prof_file(optarg); - else - set_prof_file(DEFAULT_PROFILE); - break; - - case 's': - if (optarg[0] == '\0') - warning(_("empty argument to `--source' ignored")); - else { - srcfiles[++numfiles].stype = CMDLINE; - srcfiles[numfiles].val = optarg; - } - break; - - case 'u': - usage(0, stdout); /* per coding stds */ - break; - - case 'V': - version(); - break; - -#ifdef GAWKDEBUG - case 'D': - yydebug = 2; - break; -#endif - - case 0: - /* - * getopt_long found an option that sets a variable - * instead of returning a letter. Do nothing, just - * cycle around for the next one. - */ - break; - - case '?': - default: - /* - * New behavior. If not posix, an unrecognized - * option stops argument processing so that it can - * go into ARGV for the awk program to see. This - * makes use of ``#! /bin/gawk -f'' easier. - * - * However, it's never simple. If optopt is set, - * an option that requires an argument didn't get the - * argument. We care because if opterr is 0, then - * getopt_long won't print the error message for us. - */ - if (! do_posix - && (optopt == '\0' || strchr(optlist, optopt) == NULL)) { - /* - * can't just do optind--. In case of an - * option with >= 2 letters, getopt_long - * won't have incremented optind. - */ - optind = old_optind; - stopped_early = TRUE; - goto out; - } else if (optopt != '\0') - /* Use 1003.2 required message format */ - fprintf(stderr, - _("%s: option requires an argument -- %c\n"), - myname, optopt); - /* else - let getopt print error message for us */ - break; - } - } -out: - - if (do_nostalgia) - nostalgia(); - - /* check for POSIXLY_CORRECT environment variable */ - if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) { - do_posix = TRUE; - if (do_lint) - lintwarn( - _("environment variable `POSIXLY_CORRECT' set: turning on `--posix'")); - } - - if (do_posix) { - if (do_traditional) /* both on command line */ - warning(_("`--posix' overrides `--traditional'")); - else - do_traditional = TRUE; - /* - * POSIX compliance also implies - * no GNU extensions either. - */ - } - - if (do_traditional && do_non_decimal_data) { - do_non_decimal_data = FALSE; - warning(_("`--posix'/`--traditional' overrides `--non-decimal-data'")); - } - - if (do_lint && os_is_setuid()) - warning(_("runing %s setuid root may be a security problem"), myname); - - /* - * Tell the regex routines how they should work. - * Do this again, after argument processing, since do_posix - * and do_traditional are now paid attention to by resetup(). - */ - if (do_traditional || do_posix || do_intervals) { - resetup(); - - /* now handle RS and FS. have to be careful with FS */ - set_RS(); - if (using_fieldwidths()) { - set_FS(); - set_FIELDWIDTHS(); - } else - set_FS(); - } - - /* - * Initialize profiling info, do after parsing args, - * in case this is pgawk. Don't bother if the command - * line already set profling up. - */ - if (! do_profiling) - init_profiling(& do_profiling, DEFAULT_PROFILE); - - if ((BINMODE & 1) != 0) - if (os_setbinmode(fileno(stdin), O_BINARY) == -1) - fatal(_("can't set mode on stdin (%s)"), strerror(errno)); - if ((BINMODE & 2) != 0) { - if (os_setbinmode(fileno(stdout), O_BINARY) == -1) - fatal(_("can't set mode on stdout (%s)"), strerror(errno)); - if (os_setbinmode(fileno(stderr), O_BINARY) == -1) - fatal(_("can't set mode on stderr (%s)"), strerror(errno)); - } - -#ifdef GAWKDEBUG - setbuf(stdout, (char *) NULL); /* make debugging easier */ -#endif - if (isatty(fileno(stdout))) - output_is_tty = TRUE; - /* No -f or --source options, use next arg */ - if (numfiles == -1) { - if (optind > argc - 1 || stopped_early) /* no args left or no program */ - usage(1, stderr); - srcfiles[++numfiles].stype = CMDLINE; - srcfiles[numfiles].val = argv[optind]; - optind++; - } - - init_args(optind, argc, (char *) myname, argv); - (void) tokexpand(); - - /* Read in the program */ - if (yyparse() != 0 || errcount != 0) - exit(1); - - if (do_intl) - exit(0); - - if (do_lint && begin_block == NULL && expression_value == NULL - && end_block == NULL) - lintwarn(_("no program text at all!")); - - if (do_lint) - shadow_funcs(); - - init_profiling_signals(); - - if (begin_block != NULL) { - in_begin_rule = TRUE; - (void) interpret(begin_block); - } - in_begin_rule = FALSE; - if (! exiting && (expression_value != NULL || end_block != NULL)) - do_input(); - if (end_block != NULL) { - in_end_rule = TRUE; - (void) interpret(end_block); - } - in_end_rule = FALSE; - if (close_io() != 0 && exit_val == 0) - exit_val = 1; - - if (do_profiling) { - dump_prog(begin_block, expression_value, end_block); - dump_funcs(); - } - - if (do_dump_vars) - dump_vars(varfile); - - if (do_tidy_mem) - release_all_vars(); - - exit(exit_val); /* more portable */ - return exit_val; /* to suppress warnings */ -} - -/* usage --- print usage information and exit */ - -static void -usage(int exitval, FILE *fp) -{ - /* Not factoring out common stuff makes it easier to translate. */ - - fprintf(fp, _("Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n"), - myname); - fprintf(fp, _("Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n"), - myname, quote, quote); - - /* GNU long options info. This is too many options. */ - - fputs(_("POSIX options:\t\tGNU long options:\n"), fp); - fputs(_("\t-f progfile\t\t--file=progfile\n"), fp); - fputs(_("\t-F fs\t\t\t--field-separator=fs\n"), fp); - fputs(_("\t-v var=val\t\t--assign=var=val\n"), fp); - fputs(_("\t-m[fr] val\n"), fp); - fputs(_("\t-W compat\t\t--compat\n"), fp); - fputs(_("\t-W copyleft\t\t--copyleft\n"), fp); - fputs(_("\t-W copyright\t\t--copyright\n"), fp); - fputs(_("\t-W dump-variables[=file]\t--dump-variables[=file]\n"), fp); - fputs(_("\t-W gen-po\t\t--gen-po\n"), fp); - fputs(_("\t-W help\t\t\t--help\n"), fp); - fputs(_("\t-W lint[=fatal]\t\t--lint[=fatal]\n"), fp); - fputs(_("\t-W lint-old\t\t--lint-old\n"), fp); - fputs(_("\t-W non-decimal-data\t--non-decimal-data\n"), fp); -#ifdef NOSTALGIA - fputs(_("\t-W nostalgia\t\t--nostalgia\n"), fp); -#endif -#ifdef GAWKDEBUG - fputs(_("\t-W parsedebug\t\t--parsedebug\n"), fp); -#endif - fputs(_("\t-W profile[=file]\t--profile[=file]\n"), fp); - fputs(_("\t-W posix\t\t--posix\n"), fp); - fputs(_("\t-W re-interval\t\t--re-interval\n"), fp); - fputs(_("\t-W source=program-text\t--source=program-text\n"), fp); - fputs(_("\t-W traditional\t\t--traditional\n"), fp); - fputs(_("\t-W usage\t\t--usage\n"), fp); - fputs(_("\t-W version\t\t--version\n"), fp); - fputs(_("\nTo report bugs, see node `Bugs' in `gawk.info', which is\n"), fp); - fputs(_("section `Reporting Problems and Bugs' in the printed version.\n"), fp); - exit(exitval); -} - -/* copyleft --- print out the short GNU copyright information */ - -static void -copyleft() -{ - static char blurb_part1[] = - N_("Copyright (C) 1989, 1991-2001 Free Software Foundation.\n\ -\n\ -This program is free software; you can redistribute it and/or modify\n\ -it under the terms of the GNU General Public License as published by\n\ -the Free Software Foundation; either version 2 of the License, or\n\ -(at your option) any later version.\n\ -\n"); - static char blurb_part2[] = - N_("This program is distributed in the hope that it will be useful,\n\ -but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ -GNU General Public License for more details.\n\ -\n"); - static char blurb_part3[] = - N_("You should have received a copy of the GNU General Public License\n\ -along with this program; if not, write to the Free Software\n\ -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n"); - - /* multiple blurbs are needed for some brain dead compilers. */ - fputs(_(blurb_part1), stdout); - fputs(_(blurb_part2), stdout); - fputs(_(blurb_part3), stdout); - fflush(stdout); - exit(0); -} - -/* cmdline_fs --- set FS from the command line */ - -static void -cmdline_fs(char *str) -{ - register NODE **tmp; - - tmp = get_lhs(FS_node, (Func_ptr *) 0, FALSE); - unref(*tmp); - /* - * Only if in full compatibility mode check for the stupid special - * case so -F\t works as documented in awk book even though the shell - * hands us -Ft. Bleah! - * - * Thankfully, Posix didn't propogate this "feature". - */ - if (str[0] == 't' && str[1] == '\0') { - if (do_lint) - lintwarn(_("-Ft does not set FS to tab in POSIX awk")); - if (do_traditional && ! do_posix) - str[0] = '\t'; - } - *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */ - set_FS(); -} - -/* init_args --- set up ARGV from stuff on the command line */ - -static void -init_args(int argc0, int argc, char *argv0, char **argv) -{ - int i, j; - NODE **aptr; - - ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL)); - aptr = assoc_lookup(ARGV_node, tmp_number(0.0), FALSE); - *aptr = make_string(argv0, strlen(argv0)); - (*aptr)->flags |= MAYBE_NUM; - for (i = argc0, j = 1; i < argc; i++) { - aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j), FALSE); - *aptr = make_string(argv[i], strlen(argv[i])); - (*aptr)->flags |= MAYBE_NUM; - (*aptr)->flags &= ~UNINITIALIZED; - j++; - } - ARGC_node = install("ARGC", - node(make_number((AWKNUM) j), Node_var, (NODE *) NULL)); - ARGC_node->flags &= ~UNINITIALIZED; -} - -/* - * Set all the special variables to their initial values. - * Note that some of the variables that have set_FOO routines should - * *N*O*T* have those routines called upon initialization, and thus - * they have NULL entries in that field. This is notably true of FS - * and IGNORECASE. - */ -struct varinit { - NODE **spec; - const char *name; - NODETYPE type; - const char *strval; - AWKNUM numval; - Func_ptr assign; -}; -static struct varinit varinit[] = { -{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT }, -{&NF_node, "NF", Node_NF, NULL, -1, set_NF }, -{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, NULL }, -{&NR_node, "NR", Node_NR, NULL, 0, set_NR }, -{&FNR_node, "FNR", Node_FNR, NULL, 0, set_FNR }, -{&FS_node, "FS", Node_FS, " ", 0, NULL }, -{&RS_node, "RS", Node_RS, "\n", 0, set_RS }, -{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, NULL, 0, NULL }, -{&FILENAME_node, "FILENAME", Node_var, "", 0, NULL }, -{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS }, -{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS }, -{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT }, -{&RLENGTH_node, "RLENGTH", Node_var, NULL, 0, NULL }, -{&RSTART_node, "RSTART", Node_var, NULL, 0, NULL }, -{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, NULL }, -{&ARGIND_node, "ARGIND", Node_var, NULL, 0, NULL }, -{&ERRNO_node, "ERRNO", Node_var, NULL, 0, NULL }, -{&RT_node, "RT", Node_var, "", 0, NULL }, -{&BINMODE_node, "BINMODE", Node_BINMODE, NULL, 0, NULL }, -{&LINT_node, "LINT", Node_LINT, NULL, 0, NULL }, -{&TEXTDOMAIN_node, "TEXTDOMAIN", Node_TEXTDOMAIN, "messages", 0, set_TEXTDOMAIN }, -{0, NULL, Node_illegal, NULL, 0, NULL }, -}; - -/* init_vars --- actually initialize everything in the symbol table */ - -static void -init_vars() -{ - register struct varinit *vp; - - for (vp = varinit; vp->name; vp++) { - *(vp->spec) = install((char *) vp->name, - node(vp->strval == NULL ? make_number(vp->numval) - : make_string((char *) vp->strval, - strlen(vp->strval)), - vp->type, (NODE *) NULL)); - (*(vp->spec))->flags |= SCALAR; - (*(vp->spec))->flags &= ~UNINITIALIZED; - if (vp->assign) - (*(vp->assign))(); - } -} - -/* load_environ --- populate the ENVIRON array */ - -void -load_environ() -{ -#if ! defined(TANDEM) -#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC)) - extern char **environ; -#endif - register char *var, *val; - NODE **aptr; - register int i; - - ENVIRON_node = install("ENVIRON", - node(Nnull_string, Node_var, (NODE *) NULL)); - for (i = 0; environ[i] != NULL; i++) { - static char nullstr[] = ""; - - var = environ[i]; - val = strchr(var, '='); - if (val != NULL) - *val++ = '\0'; - else - val = nullstr; - aptr = assoc_lookup(ENVIRON_node,tmp_string(var, strlen(var)), - FALSE); - *aptr = make_string(val, strlen(val)); - (*aptr)->flags |= (MAYBE_NUM|SCALAR); - - /* restore '=' so that system() gets a valid environment */ - if (val != nullstr) - *--val = '='; - } - /* - * Put AWKPATH into ENVIRON if it's not there. - * This allows querying it from within awk programs. - */ - if (getenv("AWKPATH") == NULL) { - aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7), FALSE); - *aptr = make_string(defpath, strlen(defpath)); - (*aptr)->flags |= SCALAR; - } -#endif /* TANDEM */ -} - -/* load_procinfo --- populate the PROCINFO array */ - -void -load_procinfo() -{ - int i; - NODE **aptr; - char name[100]; - AWKNUM value; -#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 - GETGROUPS_T groupset[NGROUPS_MAX]; - int ngroups; -#endif - - PROCINFO_node = install("PROCINFO", - node(Nnull_string, Node_var, (NODE *) NULL)); - -#ifdef GETPGRP_VOID -#define getpgrp_arg() /* nothing */ -#else -#define getpgrp_arg() getpid() -#endif - - value = getpgrp(getpgrp_arg()); - aptr = assoc_lookup(PROCINFO_node, tmp_string("pgrpid", 6), FALSE); - *aptr = make_number(value); - - /* - * could put a lot of this into a table, but then there's - * portability problems declaring all the functions. so just - * do it the slow and stupid way. sigh. - */ - - value = getpid(); - aptr = assoc_lookup(PROCINFO_node, tmp_string("pid", 3), FALSE); - *aptr = make_number(value); - - value = getppid(); - aptr = assoc_lookup(PROCINFO_node, tmp_string("ppid", 4), FALSE); - *aptr = make_number(value); - - value = getuid(); - aptr = assoc_lookup(PROCINFO_node, tmp_string("uid", 3), FALSE); - *aptr = make_number(value); - - value = geteuid(); - aptr = assoc_lookup(PROCINFO_node, tmp_string("euid", 4), FALSE); - *aptr = make_number(value); - - value = getgid(); - aptr = assoc_lookup(PROCINFO_node, tmp_string("gid", 3), FALSE); - *aptr = make_number(value); - - value = getegid(); - aptr = assoc_lookup(PROCINFO_node, tmp_string("egid", 4), FALSE); - *aptr = make_number(value); - - aptr = assoc_lookup(PROCINFO_node, tmp_string("FS", 2), FALSE); - *aptr = make_string("FS", 2); - -#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 - ngroups = getgroups(NGROUPS_MAX, groupset); - if (ngroups == -1) - fatal(_("could not find groups: %s"), strerror(errno)); - - for (i = 0; i < ngroups; i++) { - sprintf(name, "group%d", i + 1); - value = groupset[i]; - aptr = assoc_lookup(PROCINFO_node, tmp_string(name, strlen(name)), FALSE); - *aptr = make_number(value); - } -#endif -} - -/* arg_assign --- process a command-line assignment */ - -char * -arg_assign(char *arg) -{ - char *cp, *cp2; - int badvar; - Func_ptr after_assign = NULL; - NODE *var; - NODE *it; - NODE **lhs; - - cp = strchr(arg, '='); - if (cp != NULL) { - *cp++ = '\0'; - /* first check that the variable name has valid syntax */ - badvar = FALSE; - if (! ISALPHA(arg[0]) && arg[0] != '_') - badvar = TRUE; - else - for (cp2 = arg+1; *cp2; cp2++) - if (! ISALNUM(*cp2) && *cp2 != '_') { - badvar = TRUE; - break; - } - - if (badvar) { - if (do_lint) - lintwarn(_("invalid syntax in name `%s' for variable assignment"), arg); - *--cp = '='; /* restore original text of ARGV */ - return NULL; - } - - /* - * Recent versions of nawk expand escapes inside assignments. - * This makes sense, so we do it too. - */ - it = make_str_node(cp, strlen(cp), SCAN); - it->flags |= (MAYBE_NUM|SCALAR); - var = variable(arg, FALSE, Node_var); - lhs = get_lhs(var, &after_assign, FALSE); - unref(*lhs); - *lhs = it; - if (after_assign != NULL) - (*after_assign)(); - *--cp = '='; /* restore original text of ARGV */ - } - return cp; -} - -/* pre_assign --- handle -v, print a message and die if a problem */ - -static void -pre_assign(char *v) -{ - char *cp; - /* - * There is a problem when doing profiling. For -v x=y, - * the variable x gets installed into the symbol table pointing - * at the value in argv. This is what gets dumped. The string - * ends up containing the full x=y, leading to stuff in the profile - * of the form: - * - * if (x=y) ... - * - * Needless to say, this is gross, ugly and wrong. To fix, we - * malloc a private copy of the storage that we can tweak to - * our heart's content. - * - * This can't depend upon do_profiling; that variable isn't set up yet. - * Sigh. - */ - - emalloc(cp, char *, strlen(v) + 1, "pre_assign"); - strcpy(cp, v); - - if (arg_assign(cp) == NULL) { - fprintf(stderr, - "%s: `%s' argument to `-v' not in `var=value' form\n", - myname, v); - usage(1, stderr); - } - - cp = strchr(cp, '='); - assert(cp); - *cp = '\0'; -} - -/* catchsig --- catch signals */ - -RETSIGTYPE -catchsig(int sig, int code) -{ -#ifdef lint - code = 0; sig = code; code = sig; -#endif - if (sig == SIGFPE) { - fatal(_("floating point exception")); - } else if (sig == SIGSEGV -#ifdef SIGBUS - || sig == SIGBUS -#endif - ) { - set_loc(__FILE__, __LINE__); - msg(_("fatal error: internal error")); - /* fatal won't abort() if not compiled for debugging */ - abort(); - } else - cant_happen(); - /* NOTREACHED */ -} - -/* nostalgia --- print the famous error message and die */ - -static void -nostalgia() -{ - /* - * N.B.: This string is not gettextized, on purpose. - * So there. - */ - fprintf(stderr, "awk: bailing out near line 1\n"); - fflush(stderr); - abort(); -} - -/* version --- print version message */ - -static void -version() -{ - printf("%s.%d\n", version_string, PATCHLEVEL); - /* - * Per GNU coding standards, print copyright info, - * then exit successfully, do nothing else. - */ - copyleft(); - exit(0); -} - -/* init_fds --- check for 0, 1, 2, open on /dev/null if possible */ - -static void -init_fds() -{ - struct stat sbuf; - int fd; - int newfd; - - /* maybe no stderr, don't bother with error mesg */ - for (fd = 0; fd <= 2; fd++) { - if (fstat(fd, &sbuf) < 0) { -#if MAKE_A_HEROIC_EFFORT - if (do_lint) - lintwarn(_("no pre-opened fd %d"), fd); -#endif - newfd = devopen("/dev/null", "r+"); -#ifdef MAKE_A_HEROIC_EFFORT - if (do_lint && newfd < 0) - lintwarn(_("could not pre-open /dev/null for fd %d"), fd); -#endif - } - } -} diff --git a/contrib/awk/mkinstalldirs b/contrib/awk/mkinstalldirs deleted file mode 100755 index 0801ec2..0000000 --- a/contrib/awk/mkinstalldirs +++ /dev/null @@ -1,32 +0,0 @@ -#! /bin/sh -# mkinstalldirs --- make directory hierarchy -# Author: Noah Friedman -# Created: 1993-05-16 -# Last modified: 1994-03-25 -# Public domain - -errstatus=0 - -for file in ${1+"$@"} ; do - set fnord `echo ":$file" | sed -ne 's/^:\//#/;s/^://;s/\// /g;s/^#/\//;p'` - shift - - pathcomp= - for d in ${1+"$@"} ; do - pathcomp="$pathcomp$d" - case "$pathcomp" in - -* ) pathcomp=./$pathcomp ;; - esac - - if test ! -d "$pathcomp"; then - echo "mkdir $pathcomp" 1>&2 - mkdir "$pathcomp" || errstatus=$? - fi - - pathcomp="$pathcomp/" - done -done - -exit $errstatus - -# mkinstalldirs ends here diff --git a/contrib/awk/msg.c b/contrib/awk/msg.c deleted file mode 100644 index 8b113c8..0000000 --- a/contrib/awk/msg.c +++ /dev/null @@ -1,184 +0,0 @@ -/* - * msg.c - routines for error messages - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" - -int sourceline = 0; -char *source = NULL; - -static char *srcfile = NULL; -static int srcline; - -/* prototype needed for ansi / gcc */ -void err P((const char *s, const char *emsg, va_list argp)); - -/* err --- print an error message with source line and file and record */ - -/* VARARGS2 */ -void -err(const char *s, const char *emsg, va_list argp) -{ - char *file; - - (void) fflush(stdout); - (void) fprintf(stderr, "%s: ", myname); -#ifdef GAWKDEBUG - if (srcfile != NULL) { - fprintf(stderr, "%s:%d:", srcfile, srcline); - srcfile = NULL; - } -#endif /* GAWKDEBUG */ - if (sourceline != 0) { - if (source != NULL) - (void) fprintf(stderr, "%s:", source); - else - (void) fprintf(stderr, _("cmd. line:")); - - (void) fprintf(stderr, "%d: ", sourceline); - } - if (FNR > 0) { - file = FILENAME_node->var_value->stptr; - (void) putc('(', stderr); - if (file) - (void) fprintf(stderr, "FILENAME=%s ", file); - (void) fprintf(stderr, "FNR=%ld) ", FNR); - } - (void) fprintf(stderr, s); - vfprintf(stderr, emsg, argp); - (void) fprintf(stderr, "\n"); - (void) fflush(stderr); -} - -/* msg --- take a varargs error message and print it */ - -/* - * Function identifier purposely indented to avoid mangling - * by ansi2knr. Sigh. - */ - -void -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - msg(char *mesg, ...) -#else -/*VARARGS0*/ - msg(va_alist) - va_dcl -#endif -{ - va_list args; -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - va_start(args, mesg); -#else - char *mesg; - - va_start(args); - mesg = va_arg(args, char *); -#endif - err("", mesg, args); - va_end(args); -} - -/* warning --- print a warning message */ - -void -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - warning(char *mesg, ...) -#else -/*VARARGS0*/ - warning(va_alist) - va_dcl -#endif -{ - va_list args; -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - va_start(args, mesg); -#else - char *mesg; - - va_start(args); - mesg = va_arg(args, char *); -#endif - err(_("warning: "), mesg, args); - va_end(args); -} - -void -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - error(char *mesg, ...) -#else -/*VARARGS0*/ - error(va_alist) - va_dcl -#endif -{ - va_list args; -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - va_start(args, mesg); -#else - char *mesg; - - va_start(args); - mesg = va_arg(args, char *); -#endif - err(_("error: "), mesg, args); - va_end(args); -} - -/* set_loc --- set location where a fatal error happened */ - -void -set_loc(char *file, int line) -{ - srcfile = file; - srcline = line; -} - -/* fatal --- print an error message and die */ - -void -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - r_fatal(char *mesg, ...) -#else -/*VARARGS0*/ - r_fatal(va_alist) - va_dcl -#endif -{ - va_list args; -#if defined(HAVE_STDARG_H) && defined(__STDC__) && __STDC__ - va_start(args, mesg); -#else - char *mesg; - - va_start(args); - mesg = va_arg(args, char *); -#endif - err(_("fatal: "), mesg, args); - va_end(args); -#ifdef GAWKDEBUG - abort(); -#endif - exit(2); -} diff --git a/contrib/awk/node.c b/contrib/awk/node.c deleted file mode 100644 index 5196ff7..0000000 --- a/contrib/awk/node.c +++ /dev/null @@ -1,571 +0,0 @@ -/* - * node.c -- routines for node management - */ - -/* - * Copyright (C) 1986, 1988, 1989, 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - * - * $FreeBSD$ - */ - -#include "awk.h" - -/* r_force_number --- force a value to be numeric */ - -AWKNUM -r_force_number(register NODE *n) -{ - register char *cp; - register char *cpend; - char save; - char *ptr; - unsigned int newflags; - extern double strtod(); - -#ifdef GAWKDEBUG - if (n == NULL) - cant_happen(); - if (n->type != Node_val) - cant_happen(); - if(n->flags == 0) - cant_happen(); - if (n->flags & NUM) - return n->numbr; -#endif - - /* all the conditionals are an attempt to avoid the expensive strtod */ - - n->numbr = 0.0; - n->flags |= NUM; - n->flags &= ~UNINITIALIZED; - - if (n->stlen == 0) { - if (0 && do_lint) - lintwarn(_("can't convert string to float")); - return 0.0; - } - - cp = n->stptr; - if (ISALPHA(*cp)) { - if (0 && do_lint) - lintwarn(_("can't convert string to float")); - return 0.0; - } - - cpend = cp + n->stlen; - while (cp < cpend && ISSPACE(*cp)) - cp++; - if (cp == cpend || ISALPHA(*cp)) { - if (0 && do_lint) - lintwarn(_("can't convert string to float")); - return 0.0; - } - - if (n->flags & MAYBE_NUM) { - newflags = NUMBER; - n->flags &= ~MAYBE_NUM; - } else - newflags = 0; - if (cpend - cp == 1) { - if (ISDIGIT(*cp)) { - n->numbr = (AWKNUM)(*cp - '0'); - n->flags |= newflags; - } else if (0 && do_lint) - lintwarn(_("can't convert string to float")); - return n->numbr; - } - - if (do_non_decimal_data) { - errno = 0; - if (! do_traditional && isnondecimal(cp)) { - n->numbr = nondec2awknum(cp, cpend - cp); - goto finish; - } - } - - errno = 0; - save = *cpend; - *cpend = '\0'; - n->numbr = (AWKNUM) strtod((const char *) cp, &ptr); - - /* POSIX says trailing space is OK for NUMBER */ - while (ISSPACE(*ptr)) - ptr++; - *cpend = save; -finish: - /* the >= should be ==, but for SunOS 3.5 strtod() */ - if (errno == 0 && ptr >= cpend) { - n->flags |= newflags; - } else { - if (0 && do_lint && ptr < cpend) - lintwarn(_("can't convert string to float")); - errno = 0; - } - - return n->numbr; -} - -/* - * the following lookup table is used as an optimization in force_string - * (more complicated) variations on this theme didn't seem to pay off, but - * systematic testing might be in order at some point - */ -static const char *values[] = { - "0", - "1", - "2", - "3", - "4", - "5", - "6", - "7", - "8", - "9", -}; -#define NVAL (sizeof(values)/sizeof(values[0])) - -/* format_val --- format a numeric value based on format */ - -NODE * -format_val(char *format, int index, register NODE *s) -{ - char buf[BUFSIZ]; - register char *sp = buf; - double val; - char *orig, *trans, save; - - if (! do_traditional && (s->flags & INTLSTR) != 0) { - save = s->stptr[s->stlen]; - s->stptr[s->stlen] = '\0'; - - orig = s->stptr; - trans = dgettext(TEXTDOMAIN, orig); - - s->stptr[s->stlen] = save; - return tmp_string(trans, strlen(trans)); - } - - /* not an integral value, or out of range */ - if ((val = double_to_int(s->numbr)) != s->numbr - || val < LONG_MIN || val > LONG_MAX) { - /* - * Once upon a time, if GFMT_WORKAROUND wasn't defined, - * we just blindly did this: - * sprintf(sp, format, s->numbr); - * s->stlen = strlen(sp); - * s->stfmt = (char) index; - * but that's no good if, e.g., OFMT is %s. So we punt, - * and just always format the value ourselves. - */ - - NODE *dummy, *r; - unsigned short oflags; - extern NODE **fmt_list; /* declared in eval.c */ - - /* create dummy node for a sole use of format_tree */ - getnode(dummy); - dummy->type = Node_expression_list; - dummy->lnode = s; - dummy->rnode = NULL; - oflags = s->flags; - s->flags |= PERM; /* prevent from freeing by format_tree() */ - r = format_tree(format, fmt_list[index]->stlen, dummy, 2); - s->flags = oflags; - s->stfmt = (char) index; - s->stlen = r->stlen; - s->stptr = r->stptr; - freenode(r); /* Do not free_temp(r)! We want */ - freenode(dummy); /* to keep s->stptr == r->stpr. */ - - goto no_malloc; - } else { - /* integral value */ - /* force conversion to long only once */ - register long num = (long) val; - if (num < NVAL && num >= 0) { - sp = (char *) values[num]; - s->stlen = 1; - } else { - (void) sprintf(sp, "%ld", num); - s->stlen = strlen(sp); - } - s->stfmt = -1; - } - emalloc(s->stptr, char *, s->stlen + 2, "format_val"); - memcpy(s->stptr, sp, s->stlen+1); -no_malloc: - s->stref = 1; - s->flags |= STR; - s->flags &= ~UNINITIALIZED; - return s; -} - -/* r_force_string --- force a value to be a string */ - -NODE * -r_force_string(register NODE *s) -{ - NODE *ret; -#ifdef GAWKDEBUG - if (s == NULL) - cant_happen(); - if (s->type != Node_val) - cant_happen(); - if (s->stref <= 0) - cant_happen(); - if ((s->flags & STR) != 0 - && (s->stfmt == -1 || s->stfmt == CONVFMTidx)) - return s; -#endif - - ret = format_val(CONVFMT, CONVFMTidx, s); - return ret; -} - -/* - * dupnode: - * Duplicate a node. (For strings, "duplicate" means crank up the - * reference count.) - */ - -NODE * -dupnode(NODE *n) -{ - register NODE *r; - - if ((n->flags & TEMP) != 0) { - n->flags &= ~TEMP; - n->flags |= MALLOC; - return n; - } - if ((n->flags & PERM) != 0) - return n; - if ((n->flags & (MALLOC|STR)) == (MALLOC|STR)) { - if (n->stref < LONG_MAX) - n->stref++; - return n; - } - getnode(r); - *r = *n; - r->flags &= ~(PERM|TEMP|FIELD); - r->flags |= MALLOC; - if (n->type == Node_val && (n->flags & STR) != 0) { - r->stref = 1; - emalloc(r->stptr, char *, r->stlen + 2, "dupnode"); - memcpy(r->stptr, n->stptr, r->stlen); - r->stptr[r->stlen] = '\0'; - } - return r; -} - -/* copy_node --- force a brand new copy of a node to be allocated */ - -NODE * -copynode(NODE *old) -{ - NODE *new; - int saveflags; - - assert(old != NULL); - saveflags = old->flags; - old->flags &= ~(MALLOC|PERM); - new = dupnode(old); - old->flags = saveflags; - return new; -} - -/* mk_number --- allocate a node with defined number */ - -NODE * -mk_number(AWKNUM x, unsigned int flags) -{ - register NODE *r; - - getnode(r); - r->type = Node_val; - r->numbr = x; - r->flags = flags | SCALAR; -#ifdef GAWKDEBUG - r->stref = 1; - r->stptr = NULL; - r->stlen = 0; -#endif - return r; -} - -/* make_str_node --- make a string node */ - -NODE * -make_str_node(char *s, size_t len, int flags) -{ - register NODE *r; - - getnode(r); - r->type = Node_val; - r->flags = (STRING|STR|MALLOC|SCALAR); - if (flags & ALREADY_MALLOCED) - r->stptr = s; - else { - emalloc(r->stptr, char *, len + 2, s); - memcpy(r->stptr, s, len); - } - r->stptr[len] = '\0'; - - if ((flags & SCAN) != 0) { /* scan for escape sequences */ - char *pf; - register char *ptm; - register int c; - register char *end; - - end = &(r->stptr[len]); - for (pf = ptm = r->stptr; pf < end;) { - c = *pf++; - if (c == '\\') { - c = parse_escape(&pf); - if (c < 0) { - if (do_lint) - lintwarn(_("backslash at end of string")); - c = '\\'; - } - *ptm++ = c; - } else - *ptm++ = c; - } - len = ptm - r->stptr; - erealloc(r->stptr, char *, len + 1, "make_str_node"); - r->stptr[len] = '\0'; - r->flags |= PERM; - } - r->stlen = len; - r->stref = 1; - r->stfmt = -1; - - return r; -} - -/* tmp_string --- allocate a temporary string */ - -NODE * -tmp_string(char *s, size_t len) -{ - register NODE *r; - - r = make_string(s, len); - r->flags |= TEMP; - return r; -} - -/* more_nodes --- allocate more nodes */ - -#define NODECHUNK 100 - -NODE *nextfree = NULL; - -NODE * -more_nodes() -{ - register NODE *np; - - /* get more nodes and initialize list */ - emalloc(nextfree, NODE *, NODECHUNK * sizeof(NODE), "more_nodes"); - for (np = nextfree; np <= &nextfree[NODECHUNK - 1]; np++) { - np->flags = 0; - np->flags |= UNINITIALIZED; -#ifndef NO_PROFILING - np->exec_count = 0; -#endif - np->nextp = np + 1; - } - --np; - np->nextp = NULL; - np = nextfree; - nextfree = nextfree->nextp; - return np; -} - -#ifdef MEMDEBUG -#undef freenode -/* freenode --- release a node back to the pool */ - -void -freenode(NODE *it) -{ - it->flags &= ~SCALAR; - it->flags |= UNINITIALIZED; -#ifdef MPROF - it->stref = 0; - free((char *) it); -#else /* not MPROF */ -#ifndef NO_PROFILING - it->exec_count = 0; -#endif - /* add it to head of freelist */ - it->nextp = nextfree; - nextfree = it; -#endif /* not MPROF */ -} -#endif /* GAWKDEBUG */ - -/* unref --- remove reference to a particular node */ - -void -unref(register NODE *tmp) -{ - if (tmp == NULL) - return; - if ((tmp->flags & PERM) != 0) - return; - tmp->flags &= ~TEMP; - if ((tmp->flags & MALLOC) != 0) { - if ((tmp->flags & STR) != 0) { - if (tmp->stref > 1) { - if (tmp->stref != LONG_MAX) - tmp->stref--; - return; - } - free(tmp->stptr); - } - freenode(tmp); - return; - } - if ((tmp->flags & FIELD) != 0) { - freenode(tmp); - return; - } -} - -/* - * parse_escape: - * - * Parse a C escape sequence. STRING_PTR points to a variable containing a - * pointer to the string to parse. That pointer is updated past the - * characters we use. The value of the escape sequence is returned. - * - * A negative value means the sequence \ newline was seen, which is supposed to - * be equivalent to nothing at all. - * - * If \ is followed by a null character, we return a negative value and leave - * the string pointer pointing at the null character. - * - * If \ is followed by 000, we return 0 and leave the string pointer after the - * zeros. A value of 0 does not mean end of string. - * - * Posix doesn't allow \x. - */ - -int -parse_escape(char **string_ptr) -{ - register int c = *(*string_ptr)++; - register int i; - register int count; - - switch (c) { - case 'a': - return BELL; - case 'b': - return '\b'; - case 'f': - return '\f'; - case 'n': - return '\n'; - case 'r': - return '\r'; - case 't': - return '\t'; - case 'v': - return '\v'; - case '\n': - return -2; - case 0: - (*string_ptr)--; - return -1; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - i = c - '0'; - count = 0; - while (++count < 3) { - if ((c = *(*string_ptr)++) >= '0' && c <= '7') { - i *= 8; - i += c - '0'; - } else { - (*string_ptr)--; - break; - } - } - return i; - case 'x': - if (do_lint) { - static int didwarn = FALSE; - - if (! didwarn) { - didwarn = TRUE; - lintwarn(_("POSIX does not allow `\\x' escapes")); - } - } - if (do_posix) - return ('x'); - if (! ISXDIGIT((*string_ptr)[0])) { - warning(_("no hex digits in `\\x' escape sequence")); - return ('x'); - } - i = 0; - for (;;) { - /* do outside test to avoid multiple side effects */ - c = *(*string_ptr)++; - if (ISXDIGIT(c)) { - i *= 16; - if (ISDIGIT(c)) - i += c - '0'; - else if (ISUPPER(c)) - i += c - 'A' + 10; - else - i += c - 'a' + 10; - } else { - (*string_ptr)--; - break; - } - } - return i; - case '\\': - case '"': - return c; - default: - { - static short warned[256]; - unsigned char uc = (unsigned char) c; - - /* N.B.: use unsigned char here to avoid Latin-1 problems */ - - if (! warned[uc]) { - warned[uc] = TRUE; - - warning(_("escape sequence `\\%c' treated as plain `%c'"), uc, uc); - } - } - return c; - } -} diff --git a/contrib/awk/patchlev.h b/contrib/awk/patchlev.h deleted file mode 100644 index 2867bba..0000000 --- a/contrib/awk/patchlev.h +++ /dev/null @@ -1 +0,0 @@ -#define PATCHLEVEL 0 diff --git a/contrib/awk/patchlevel.h b/contrib/awk/patchlevel.h deleted file mode 100644 index e44bc09..0000000 --- a/contrib/awk/patchlevel.h +++ /dev/null @@ -1 +0,0 @@ -#define PATCHLEVEL 6 diff --git a/contrib/awk/po/POTFILES.in b/contrib/awk/po/POTFILES.in deleted file mode 100644 index d5f623e..0000000 --- a/contrib/awk/po/POTFILES.in +++ /dev/null @@ -1,23 +0,0 @@ -# List of source files containing translatable strings. -# Copyright (C) 1999 Free Software Foundation, Inc. - -array.c -builtin.c -ext.c -getopt.c -main.c -profile.c -regex.c -awkgram.c -dfa.c -field.c -getopt1.c -msg.c -random.c -version.c -eval.c -gawkmisc.c -io.c -node.c -re.c -posix/gawkmisc.c diff --git a/contrib/awk/po/cat-id-tbl.c b/contrib/awk/po/cat-id-tbl.c deleted file mode 100644 index ba3581c..0000000 --- a/contrib/awk/po/cat-id-tbl.c +++ /dev/null @@ -1,411 +0,0 @@ -/* Automatically generated by po2tbl.sed from gawk.pot. */ - -#if HAVE_CONFIG_H -# include -#endif - -#include "libgettext.h" - -const struct _msg_ent _msg_tbl[] = { - {"", 1}, - {"attempt to use scalar `%s' as array", 2}, - {"reference to uninitialized element `%s[\"%s\"]'", 3}, - {"subscript of array `%s' is null string", 4}, - {"delete: illegal use of variable `%s' as array", 5}, - {"delete: index `%s' not in array `%s'", 6}, - {"%s: empty (null)\n", 7}, - {"%s: empty (zero)\n", 8}, - {"%s: table_size = %d, array_size = %d\n", 9}, - {"%s: is paramater\n", 10}, - {"%s: array_ref to %s\n", 11}, - {"asort: first argument is not an array", 12}, - {"asort: second argument is not an array", 13}, - {"%s to \"%s\" failed (%s)", 14}, - {"standard output", 15}, - {"reason unknown", 16}, - {"exp: received non-numeric argument", 17}, - {"exp: argument %g is out of range", 18}, - {"fflush: cannot flush: pipe `%s' opened for reading, not writing", 19}, - {"fflush: cannot flush: file `%s' opened for reading, not writing", 20}, - {"fflush: `%s' is not an open file, pipe or co-process", 21}, - {"index: received non-string first argument", 22}, - {"index: received non-string second argument", 23}, - {"int: received non-numeric argument", 24}, - {"length: received non-string argument", 25}, - {"log: received non-numeric argument", 26}, - {"log: received negative argument %g", 27}, - {"`$' is not permitted in awk formats", 28}, - {"arg count with `$' must be > 0", 29}, - {"arg count %d greater than total number of supplied arguments", 30}, - {"`$' not permitted after period in format", 31}, - {"no `$' supplied for positional field width or precision", 32}, - {"`l' is meaningless in awk formats; ignored", 33}, - {"`l' is not permitted in POSIX awk formats", 34}, - {"`L' is meaningless in awk formats; ignored", 35}, - {"`L' is not permitted in POSIX awk formats", 36}, - {"`h' is meaningless in awk formats; ignored", 37}, - {"`h' is not permitted in POSIX awk formats", 38}, - {"not enough arguments to satisfy format string", 39}, - {"^ ran out for this one", 40}, - {"[s]printf: format specifier does not have control letter", 41}, - {"too many arguments supplied for format string", 42}, - {"printf: no arguments", 43}, - {"sqrt: received non-numeric argument", 44}, - {"sqrt: called with negative argument %g", 45}, - {"substr: start index %g is invalid, using 1", 46}, - {"substr: non-integer start index %g will be truncated", 47}, - {"substr: length %g is <= 0", 48}, - {"substr: non-integer length %g will be truncated", 49}, - {"substr: source string is zero length", 50}, - {"\ -substr: length %d at start index %d exceeds length of first argument (%d)", 51}, - {"substr: start index %d is past end of string", 52}, - {"strftime: recieved non-string first argument", 53}, - {"strftime: received empty format string", 54}, - {"strftime: recieved non-numeric second argument", 55}, - {"mktime: received non-string argument", 56}, - {"system: recieved non-string argument", 57}, - {"tolower: recieved non-string argument", 58}, - {"toupper: recieved non-string argument", 59}, - {"atan2: received non-numeric first argument", 60}, - {"atan2: received non-numeric second argument", 61}, - {"sin: received non-numeric argument", 62}, - {"cos: received non-numeric argument", 63}, - {"srand: received non-numeric argument", 64}, - {"match: third argument is not an array", 65}, - {"gensub: 3rd argument of 0 treated as 1", 66}, - {"lshift: received non-numeric first argument", 67}, - {"lshift(%lf, %lf): negative values will give strange results", 68}, - {"lshift(%lf, %lf): fractional values will be truncated", 69}, - {"lshift(%lf, %lf): too large shift value will give strange results", 70}, - {"rshift: received non-numeric first argument", 71}, - {"rshift(%lf, %lf): negative values will give strange results", 72}, - {"rshift(%lf, %lf): fractional values will be truncated", 73}, - {"rshift(%lf, %lf): too large shift value will give strange results", 74}, - {"and: received non-numeric first argument", 75}, - {"and(%lf, %lf): negative values will give strange results", 76}, - {"and(%lf, %lf): fractional values will be truncated", 77}, - {"or: received non-numeric first argument", 78}, - {"or(%lf, %lf): negative values will give strange results", 79}, - {"or(%lf, %lf): fractional values will be truncated", 80}, - {"xor: received non-numeric first argument", 81}, - {"xor(%lf, %lf): negative values will give strange results", 82}, - {"xor(%lf, %lf): fractional values will be truncated", 83}, - {"compl: received non-numeric argument", 84}, - {"compl(%lf): negative value will give strange results", 85}, - {"compl(%lf): fractional value will be truncated", 86}, - {"dcgettext: `%s' is not a valid locale category", 87}, - {"`extension' is a gawk extension", 88}, - {"extension: cannot open `%s' (%s)\n", 89}, - {"extension: library `%s': cannot call function `%s' (%s)\n", 90}, - {"Operation Not Supported", 91}, - {"%s: option `%s' is ambiguous\n", 92}, - {"%s: option `--%s' doesn't allow an argument\n", 93}, - {"%s: option `%c%s' doesn't allow an argument\n", 94}, - {"%s: option `%s' requires an argument\n", 95}, - {"%s: unrecognized option `--%s'\n", 96}, - {"%s: unrecognized option `%c%s'\n", 97}, - {"%s: illegal option -- %c\n", 98}, - {"%s: invalid option -- %c\n", 99}, - {"%s: option requires an argument -- %c\n", 100}, - {"%s: option `-W %s' is ambiguous\n", 101}, - {"%s: option `-W %s' doesn't allow an argument\n", 102}, - {"`-m[fr]' option irrelevant in gawk", 103}, - {"-m option usage: `-m[fr] nnn'", 104}, - {"%s: option `-W %s' unrecognized, ignored\n", 105}, - {"empty argument to `--source' ignored", 106}, - {"environment variable `POSIXLY_CORRECT' set: turning on `--posix'", 107}, - {"`--posix' overrides `--traditional'", 108}, - {"`--posix'/`--traditional' overrides `--non-decimal-data'", 109}, - {"runing %s setuid root may be a security problem", 110}, - {"can't set mode on stdin (%s)", 111}, - {"can't set mode on stdout (%s)", 112}, - {"can't set mode on stderr (%s)", 113}, - {"no program text at all!", 114}, - {"Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n", 115}, - {"Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n", 116}, - {"POSIX options:\t\tGNU long options:\n", 117}, - {"\t-f progfile\t\t--file=progfile\n", 118}, - {"\t-F fs\t\t\t--field-separator=fs\n", 119}, - {"\t-v var=val\t\t--assign=var=val\n", 120}, - {"\t-m[fr] val\n", 121}, - {"\t-W compat\t\t--compat\n", 122}, - {"\t-W copyleft\t\t--copyleft\n", 123}, - {"\t-W copyright\t\t--copyright\n", 124}, - {"\t-W dump-variables[=file]\t--dump-variables[=file]\n", 125}, - {"\t-W gen-po\t\t--gen-po\n", 126}, - {"\t-W help\t\t\t--help\n", 127}, - {"\t-W lint[=fatal]\t\t--lint[=fatal]\n", 128}, - {"\t-W lint-old\t\t--lint-old\n", 129}, - {"\t-W non-decimal-data\t--non-decimal-data\n", 130}, - {"\t-W nostalgia\t\t--nostalgia\n", 131}, - {"\t-W parsedebug\t\t--parsedebug\n", 132}, - {"\t-W profile[=file]\t--profile[=file]\n", 133}, - {"\t-W posix\t\t--posix\n", 134}, - {"\t-W re-interval\t\t--re-interval\n", 135}, - {"\t-W source=program-text\t--source=program-text\n", 136}, - {"\t-W traditional\t\t--traditional\n", 137}, - {"\t-W usage\t\t--usage\n", 138}, - {"\t-W version\t\t--version\n", 139}, - {"\ -\n\ -To report bugs, see node `Bugs' in `gawk.info', which is\n", 140}, - {"section `Reporting Problems and Bugs' in the printed version.\n", 141}, - {"\ -Copyright (C) 1989, 1991-2001 Free Software Foundation.\n\ -\n\ -This program is free software; you can redistribute it and/or modify\n\ -it under the terms of the GNU General Public License as published by\n\ -the Free Software Foundation; either version 2 of the License, or\n\ -(at your option) any later version.\n\ -\n", 142}, - {"\ -This program is distributed in the hope that it will be useful,\n\ -but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ -GNU General Public License for more details.\n\ -\n", 143}, - {"\ -You should have received a copy of the GNU General Public License\n\ -along with this program; if not, write to the Free Software\n\ -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n", 144}, - {"-Ft does not set FS to tab in POSIX awk", 145}, - {"could not find groups: %s", 146}, - {"invalid syntax in name `%s' for variable assignment", 147}, - {"floating point exception", 148}, - {"fatal error: internal error", 149}, - {"no pre-opened fd %d", 150}, - {"could not pre-open /dev/null for fd %d", 151}, - {"could not open `%s' for writing: %s", 152}, - {"sending profile to standard error", 153}, - {"internal error: Node_var with null vname", 154}, - {"internal error: Node_var_array with null vname", 155}, - {"\ -function `%s' called with space between name and `(',\n\ -%s", 156}, - {"or used in other expression context", 157}, - {"attempt to use array `%s' in a scalar context", 158}, - {"illegal type (%s) in tree_eval", 159}, - {"attempt to use function `%s' as array", 160}, - {"`%s' is a function, assignment is not allowed", 161}, - {"assignment is not allowed to result of builtin function", 162}, - {"\t# gawk profile, created %s\n", 163}, - {"\ -\t# BEGIN block(s)\n\ -\n", 164}, - {"\ -\t# Rule(s)\n\ -\n", 165}, - {"\ -\t# END block(s)\n\ -\n", 166}, - {"\ -\n\ -\t# Functions, listed alphabetically\n", 167}, - {"unexpected type %s in prec_level", 168}, - {"Success", 169}, - {"No match", 170}, - {"Invalid regular expression", 171}, - {"Invalid collation character", 172}, - {"Invalid character class name", 173}, - {"Trailing backslash", 174}, - {"Invalid back reference", 175}, - {"Unmatched [ or [^", 176}, - {"Unmatched ( or \\(", 177}, - {"Unmatched \\{", 178}, - {"Invalid content of \\{\\}", 179}, - {"Invalid range end", 180}, - {"Memory exhausted", 181}, - {"Invalid preceding regular expression", 182}, - {"Premature end of regular expression", 183}, - {"Regular expression too big", 184}, - {"Unmatched ) or \\)", 185}, - {"No previous regular expression", 186}, - {"BEGIN blocks must have an action part", 187}, - {"END blocks must have an action part", 188}, - {"`%s' is a built-in function, it cannot be redefined", 189}, - {"statement may have no effect", 190}, - {"plain `print' in BEGIN or END rule should probably be `print \"\"'", 191}, - {"`next' used in BEGIN or END action", 192}, - {"`nextfile' is a gawk extension", 193}, - {"`nextfile' used in BEGIN or END action", 194}, - {"`return' used outside function context", 195}, - {"`delete array' is a gawk extension", 196}, - {"multistage two-way pipelines don't work", 197}, - {"regular expression on right of assignment", 198}, - {"non-redirected `getline' undefined inside END action", 199}, - {"regular expression on left of `~' or `!~' operator", 200}, - {"regexp constant `/%s/' looks like a C comment, but is not", 201}, - {"regular expression on right of comparison", 202}, - {"non-redirected `getline' undefined inside BEGIN or END action", 203}, - {"call of `length' without parentheses is not portable", 204}, - {"call of `length' without parentheses is deprecated by POSIX", 205}, - {"invalid subscript expression", 206}, - {"fptr %x not in tokentab\n", 207}, - {"unexpected newline", 208}, - {"empty program text on command line", 209}, - {"can't open source file `%s' for reading (%s)", 210}, - {"can't read sourcefile `%s' (%s)", 211}, - {"source file `%s' is empty", 212}, - {"source file does not end in newline", 213}, - {"unterminated regexp ends with `\\' at end of file", 214}, - {"unterminated regexp", 215}, - {"unterminated regexp at end of file", 216}, - {"use of `\\ #...' line continuation is not portable", 217}, - {"backslash not last character on line", 218}, - {"POSIX does not allow operator `**='", 219}, - {"old awk does not support operator `**='", 220}, - {"POSIX does not allow operator `**'", 221}, - {"old awk does not support operator `**'", 222}, - {"operator `^=' is not supported in old awk", 223}, - {"operator `^' is not supported in old awk", 224}, - {"unterminated string", 225}, - {"invalid char '%c' in expression", 226}, - {"`%s' is a gawk extension", 227}, - {"`%s' is a Bell Labs extension", 228}, - {"POSIX does not allow `%s'", 229}, - {"`%s' is not supported in old awk", 230}, - {"`goto' considered harmful!\n", 231}, - {"%d is invalid as number of arguments for %s", 232}, - {"match: third argument is a gawk extension", 233}, - {"%s: string literal as last arg of substitute has no effect", 234}, - {"sub third parameter is not a changeable object", 235}, - {"gsub third parameter is not a changeable object", 236}, - {"close: second argument is a gawk extension", 237}, - {"use of dcgettext(_\"...\") is incorrect: remove leading underscore", 238}, - {"function `%s': parameter #%d, `%s', duplicates parameter #%d", 239}, - {"function `%s': parameter `%s' shadows global variable", 240}, - {"could not open `%s' for writing (%s)", 241}, - {"%s: close failed (%s)", 242}, - {"shadow_funcs() called twice!", 243}, - {"function `%s': can't use function name as parameter name", 244}, - {"function name `%s' previously defined", 245}, - {"function `%s' called but never defined", 246}, - {"function `%s' defined but never called", 247}, - {"regexp constant for parameter #%d yields boolean value", 248}, - {"Unfinished \\ escape", 249}, - {"unfinished repeat count", 250}, - {"malformed repeat count", 251}, - {"Unbalanced [", 252}, - {"Unbalanced (", 253}, - {"No regexp syntax bits specified", 254}, - {"Unbalanced )", 255}, - {"out of memory", 256}, - {"split: second argument is not an array", 257}, - {"split: null string for third arg is a gawk extension", 258}, - {"`FIELDWIDTHS' is a gawk extension", 259}, - {"field %d in FIELDWIDTHS, must be > 0", 260}, - {"null string for `FS' is a gawk extension", 261}, - {"cmd. line:", 262}, - {"warning: ", 263}, - {"error: ", 264}, - {"fatal: ", 265}, - {"unknown nodetype %d", 266}, - {"buffer overflow in genflags2str", 267}, - {"for loop: array `%s' changed size from %d to %d during loop execution", 268}, - {"`break' outside a loop is not portable", 269}, - {"`break' outside a loop is not allowed", 270}, - {"`continue' outside a loop is not portable", 271}, - {"`continue' outside a loop is not allowed", 272}, - {"`next' cannot be called from a BEGIN rule", 273}, - {"`next' cannot be called from an END rule", 274}, - {"`nextfile' cannot be called from a BEGIN rule", 275}, - {"`nextfile' cannot be called from an END rule", 276}, - {"statement has no effect", 277}, - {"reference to uninitialized variable `%s'", 278}, - {"can't use function name `%s' as variable or array", 279}, - {"reference to uninitialized argument `%s'", 280}, - {"assignment used in conditional context", 281}, - {"\ -concatenation: side effects in one expression have changed the length of \ -another!", 282}, - {"division by zero attempted", 283}, - {"division by zero attempted in `%%'", 284}, - {"division by zero attempted in `/='", 285}, - {"division by zero attempted in `%%='", 286}, - {"%s (from %s)", 287}, - {"function `%s' called with more arguments than declared", 288}, - {"function `%s' not defined", 289}, - {"function %s called\n", 290}, - {"\ -\n\ -\t# Function Call Stack:\n\ -\n", 291}, - {"\t# -- main --\n", 292}, - {"attempt to field reference from non-numeric value", 293}, - {"attempt to reference from null string", 294}, - {"attempt to access field %d", 295}, - {"attempt to use scalar parameter `%s' as an array", 296}, - {"`IGNORECASE' is a gawk extension", 297}, - {"`BINMODE' is a gawk extension", 298}, - {"bad `%sFMT' specification `%s'", 299}, - {"turning off `--lint' due to assignment to `LINT'", 300}, - {"cannot open file `%s' for reading (%s)", 301}, - {"close of fd %d (`%s') failed (%s)", 302}, - {"invalid tree type %s in redirect()", 303}, - {"expression in `%s' redirection only has numeric value", 304}, - {"expression for `%s' redirection has null string value", 305}, - {"filename `%s' for `%s' redirection may be result of logical expression", 306}, - {"unnecessary mixing of `>' and `>>' for file `%.*s'", 307}, - {"can't open pipe `%s' for output (%s)", 308}, - {"can't open pipe `%s' for input (%s)", 309}, - {"can't open two way socket `%s' for input/output (%s)", 310}, - {"can't open two way pipe `%s' for input/output (%s)", 311}, - {"can't redirect from `%s' (%s)", 312}, - {"can't redirect to `%s' (%s)", 313}, - {"\ -reached system limit for open files: starting to multiplex file descriptors", 314}, - {"close of `%s' failed (%s).", 315}, - {"too many pipes or input files open", 316}, - {"close: second argument must be `to' or `from'", 317}, - {"close: `%.*s' is not an open file, pipe or co-process", 318}, - {"close of redirection that was never opened", 319}, - {"close: redirection `%s' not opened with `|&', second argument ignored", 320}, - {"failure status (%d) on pipe close of `%s' (%s)", 321}, - {"failure status (%d) on file close of `%s' (%s)", 322}, - {"no explicit close of socket `%s' provided", 323}, - {"no explicit close of co-process `%s' provided", 324}, - {"no explicit close of pipe `%s' provided", 325}, - {"no explicit close of file `%s' provided", 326}, - {"error writing standard output (%s)", 327}, - {"error writing standard error (%s)", 328}, - {"pipe flush of `%s' failed (%s).", 329}, - {"co-process flush of pipe to `%s' failed (%s).", 330}, - {"file flush of `%s' failed (%s).", 331}, - {"/inet/raw client not ready yet, sorry", 332}, - {"only root may use `/inet/raw'.", 333}, - {"/inet/raw server not ready yet, sorry", 334}, - {"no (known) protocol supplied in special filename `%s'", 335}, - {"special file name `%s' is incomplete", 336}, - {"local port invalid in `%s'", 337}, - {"must supply a remote hostname to `/inet'", 338}, - {"must supply a remote port to `/inet'", 339}, - {"remote port invalid in `%s'", 340}, - {"TCP/IP communications are not supported", 341}, - {"file `%s' is a directory", 342}, - {"use `PROCINFO[\"%s\"]' instead of `%s'", 343}, - {"use `PROCINFO[...]' instead of `/dev/user'", 344}, - {"could not open `%s', mode `%s'", 345}, - {"close of stdout in child failed (%s)", 346}, - {"moving pipe to stdout in child failed (dup: %s)", 347}, - {"close of stdin in child failed (%s)", 348}, - {"moving pipe to stdin in child failed (dup: %s)", 349}, - {"close of pipe failed (%s)", 350}, - {"pipe from `%s': could not set close-on-exec (fcntl: %s)", 351}, - {"pipe to `%s': could not set close-on-exec (fcntl: %s)", 352}, - {"`|&' not supported", 353}, - {"cannot open pipe `%s' (%s)", 354}, - {"cannot create child process for `%s' (fork: %s)", 355}, - {"data file `%s' is empty", 356}, - {"internal error: file `%s', line %d\n", 357}, - {"error reading input file `%s': %s", 358}, - {"multicharacter value of `RS' is a gawk extension", 359}, - {"can't convert string to float", 360}, - {"backslash at end of string", 361}, - {"POSIX does not allow `\\x' escapes", 362}, - {"no hex digits in `\\x' escape sequence", 363}, - {"escape sequence `\\%c' treated as plain `%c'", 364}, - {"%s %s `%s': could not set close-on-exec: %s", 365}, -}; - -int _msg_tbl_length = 365; diff --git a/contrib/awk/po/gawk.pot b/contrib/awk/po/gawk.pot deleted file mode 100644 index 57a2625..0000000 --- a/contrib/awk/po/gawk.pot +++ /dev/null @@ -1,1696 +0,0 @@ -# SOME DESCRIPTIVE TITLE. -# Copyright (C) YEAR Free Software Foundation, Inc. -# FIRST AUTHOR , YEAR. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PACKAGE VERSION\n" -"POT-Creation-Date: 2001-05-15 19:48+0300\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=CHARSET\n" -"Content-Transfer-Encoding: ENCODING\n" - -#: array.c:243 array.c:278 eval.c:483 -#, c-format -msgid "attempt to use scalar `%s' as array" -msgstr "" - -#: array.c:302 -#, c-format -msgid "reference to uninitialized element `%s[\"%s\"]'" -msgstr "" - -#: array.c:308 -#, c-format -msgid "subscript of array `%s' is null string" -msgstr "" - -#: array.c:371 array.c:454 -#, c-format -msgid "delete: illegal use of variable `%s' as array" -msgstr "" - -#: array.c:404 -#, c-format -msgid "delete: index `%s' not in array `%s'" -msgstr "" - -#: array.c:569 -#, c-format -msgid "%s: empty (null)\n" -msgstr "" - -#: array.c:574 -#, c-format -msgid "%s: empty (zero)\n" -msgstr "" - -#: array.c:578 -#, c-format -msgid "%s: table_size = %d, array_size = %d\n" -msgstr "" - -#: array.c:610 -#, c-format -msgid "%s: is paramater\n" -msgstr "" - -#: array.c:615 -#, c-format -msgid "%s: array_ref to %s\n" -msgstr "" - -#: array.c:842 -msgid "asort: first argument is not an array" -msgstr "" - -#: array.c:851 -msgid "asort: second argument is not an array" -msgstr "" - -#: builtin.c:107 -#, c-format -msgid "%s to \"%s\" failed (%s)" -msgstr "" - -#: builtin.c:108 -msgid "standard output" -msgstr "" - -#: builtin.c:109 -msgid "reason unknown" -msgstr "" - -#: builtin.c:122 -msgid "exp: received non-numeric argument" -msgstr "" - -#: builtin.c:128 -#, c-format -msgid "exp: argument %g is out of range" -msgstr "" - -#: builtin.c:186 -#, c-format -msgid "fflush: cannot flush: pipe `%s' opened for reading, not writing" -msgstr "" - -#: builtin.c:189 -#, c-format -msgid "fflush: cannot flush: file `%s' opened for reading, not writing" -msgstr "" - -#: builtin.c:201 -#, c-format -msgid "fflush: `%s' is not an open file, pipe or co-process" -msgstr "" - -#: builtin.c:222 -msgid "index: received non-string first argument" -msgstr "" - -#: builtin.c:224 -msgid "index: received non-string second argument" -msgstr "" - -#: builtin.c:287 -msgid "int: received non-numeric argument" -msgstr "" - -#: builtin.c:304 -msgid "length: received non-string argument" -msgstr "" - -#: builtin.c:320 -msgid "log: received non-numeric argument" -msgstr "" - -#: builtin.c:323 -#, c-format -msgid "log: received negative argument %g" -msgstr "" - -#: builtin.c:583 -msgid "`$' is not permitted in awk formats" -msgstr "" - -#: builtin.c:589 -msgid "arg count with `$' must be > 0" -msgstr "" - -#: builtin.c:591 -#, c-format -msgid "arg count %d greater than total number of supplied arguments" -msgstr "" - -#: builtin.c:593 -msgid "`$' not permitted after period in format" -msgstr "" - -#: builtin.c:606 -msgid "no `$' supplied for positional field width or precision" -msgstr "" - -#: builtin.c:664 -msgid "`l' is meaningless in awk formats; ignored" -msgstr "" - -#: builtin.c:668 -msgid "`l' is not permitted in POSIX awk formats" -msgstr "" - -#: builtin.c:679 -msgid "`L' is meaningless in awk formats; ignored" -msgstr "" - -#: builtin.c:683 -msgid "`L' is not permitted in POSIX awk formats" -msgstr "" - -#: builtin.c:694 -msgid "`h' is meaningless in awk formats; ignored" -msgstr "" - -#: builtin.c:698 -msgid "`h' is not permitted in POSIX awk formats" -msgstr "" - -#: builtin.c:947 -msgid "not enough arguments to satisfy format string" -msgstr "" - -#: builtin.c:949 -msgid "^ ran out for this one" -msgstr "" - -#: builtin.c:954 -msgid "[s]printf: format specifier does not have control letter" -msgstr "" - -#: builtin.c:957 -msgid "too many arguments supplied for format string" -msgstr "" - -#: builtin.c:1000 builtin.c:1003 -msgid "printf: no arguments" -msgstr "" - -#: builtin.c:1036 -msgid "sqrt: received non-numeric argument" -msgstr "" - -#: builtin.c:1040 -#, c-format -msgid "sqrt: called with negative argument %g" -msgstr "" - -#: builtin.c:1062 -#, c-format -msgid "substr: start index %g is invalid, using 1" -msgstr "" - -#: builtin.c:1067 -#, c-format -msgid "substr: non-integer start index %g will be truncated" -msgstr "" - -#: builtin.c:1081 -#, c-format -msgid "substr: length %g is <= 0" -msgstr "" - -#: builtin.c:1087 -#, c-format -msgid "substr: non-integer length %g will be truncated" -msgstr "" - -#: builtin.c:1094 -msgid "substr: source string is zero length" -msgstr "" - -#: builtin.c:1101 -#, c-format -msgid "" -"substr: length %d at start index %d exceeds length of first argument (%d)" -msgstr "" - -#: builtin.c:1107 -#, c-format -msgid "substr: start index %d is past end of string" -msgstr "" - -#: builtin.c:1143 -msgid "strftime: recieved non-string first argument" -msgstr "" - -#: builtin.c:1149 -msgid "strftime: received empty format string" -msgstr "" - -#: builtin.c:1158 -msgid "strftime: recieved non-numeric second argument" -msgstr "" - -#: builtin.c:1221 -msgid "mktime: received non-string argument" -msgstr "" - -#: builtin.c:1266 -msgid "system: recieved non-string argument" -msgstr "" - -#: builtin.c:1386 -msgid "tolower: recieved non-string argument" -msgstr "" - -#: builtin.c:1407 -msgid "toupper: recieved non-string argument" -msgstr "" - -#: builtin.c:1430 -msgid "atan2: received non-numeric first argument" -msgstr "" - -#: builtin.c:1432 -msgid "atan2: received non-numeric second argument" -msgstr "" - -#: builtin.c:1451 -msgid "sin: received non-numeric argument" -msgstr "" - -#: builtin.c:1467 -msgid "cos: received non-numeric argument" -msgstr "" - -#: builtin.c:1511 -msgid "srand: received non-numeric argument" -msgstr "" - -#: builtin.c:1545 -msgid "match: third argument is not an array" -msgstr "" - -#: builtin.c:1942 -msgid "gensub: 3rd argument of 0 treated as 1" -msgstr "" - -#: builtin.c:2053 builtin.c:2055 -msgid "lshift: received non-numeric first argument" -msgstr "" - -#: builtin.c:2057 -#, c-format -msgid "lshift(%lf, %lf): negative values will give strange results" -msgstr "" - -#: builtin.c:2059 -#, c-format -msgid "lshift(%lf, %lf): fractional values will be truncated" -msgstr "" - -#: builtin.c:2061 -#, c-format -msgid "lshift(%lf, %lf): too large shift value will give strange results" -msgstr "" - -#: builtin.c:2090 builtin.c:2092 -msgid "rshift: received non-numeric first argument" -msgstr "" - -#: builtin.c:2094 -#, c-format -msgid "rshift(%lf, %lf): negative values will give strange results" -msgstr "" - -#: builtin.c:2096 -#, c-format -msgid "rshift(%lf, %lf): fractional values will be truncated" -msgstr "" - -#: builtin.c:2098 -#, c-format -msgid "rshift(%lf, %lf): too large shift value will give strange results" -msgstr "" - -#: builtin.c:2127 builtin.c:2129 -msgid "and: received non-numeric first argument" -msgstr "" - -#: builtin.c:2131 -#, c-format -msgid "and(%lf, %lf): negative values will give strange results" -msgstr "" - -#: builtin.c:2133 -#, c-format -msgid "and(%lf, %lf): fractional values will be truncated" -msgstr "" - -#: builtin.c:2162 builtin.c:2164 -msgid "or: received non-numeric first argument" -msgstr "" - -#: builtin.c:2166 -#, c-format -msgid "or(%lf, %lf): negative values will give strange results" -msgstr "" - -#: builtin.c:2168 -#, c-format -msgid "or(%lf, %lf): fractional values will be truncated" -msgstr "" - -#: builtin.c:2197 builtin.c:2199 -msgid "xor: received non-numeric first argument" -msgstr "" - -#: builtin.c:2201 -#, c-format -msgid "xor(%lf, %lf): negative values will give strange results" -msgstr "" - -#: builtin.c:2203 -#, c-format -msgid "xor(%lf, %lf): fractional values will be truncated" -msgstr "" - -#: builtin.c:2231 -msgid "compl: received non-numeric argument" -msgstr "" - -#: builtin.c:2233 -#, c-format -msgid "compl(%lf): negative value will give strange results" -msgstr "" - -#: builtin.c:2235 -#, c-format -msgid "compl(%lf): fractional value will be truncated" -msgstr "" - -#. not there -#: builtin.c:2429 -#, c-format -msgid "dcgettext: `%s' is not a valid locale category" -msgstr "" - -#: ext.c:60 ext.c:64 -msgid "`extension' is a gawk extension" -msgstr "" - -#: ext.c:74 -#, c-format -msgid "extension: cannot open `%s' (%s)\n" -msgstr "" - -#: ext.c:82 -#, c-format -msgid "extension: library `%s': cannot call function `%s' (%s)\n" -msgstr "" - -#: ext.c:180 -msgid "Operation Not Supported" -msgstr "" - -#: getopt.c:688 -#, c-format -msgid "%s: option `%s' is ambiguous\n" -msgstr "" - -#: getopt.c:713 -#, c-format -msgid "%s: option `--%s' doesn't allow an argument\n" -msgstr "" - -#: getopt.c:718 -#, c-format -msgid "%s: option `%c%s' doesn't allow an argument\n" -msgstr "" - -#: getopt.c:736 getopt.c:909 -#, c-format -msgid "%s: option `%s' requires an argument\n" -msgstr "" - -#. --option -#: getopt.c:765 -#, c-format -msgid "%s: unrecognized option `--%s'\n" -msgstr "" - -#. +option or -option -#: getopt.c:769 -#, c-format -msgid "%s: unrecognized option `%c%s'\n" -msgstr "" - -#. 1003.2 specifies the format of this message. -#: getopt.c:795 -#, c-format -msgid "%s: illegal option -- %c\n" -msgstr "" - -#: getopt.c:798 -#, c-format -msgid "%s: invalid option -- %c\n" -msgstr "" - -#. 1003.2 specifies the format of this message. -#: getopt.c:828 getopt.c:958 main.c:397 -#, c-format -msgid "%s: option requires an argument -- %c\n" -msgstr "" - -#: getopt.c:875 -#, c-format -msgid "%s: option `-W %s' is ambiguous\n" -msgstr "" - -#: getopt.c:893 -#, c-format -msgid "%s: option `-W %s' doesn't allow an argument\n" -msgstr "" - -#: main.c:292 -msgid "`-m[fr]' option irrelevant in gawk" -msgstr "" - -#: main.c:294 -msgid "-m option usage: `-m[fr] nnn'" -msgstr "" - -#. gawk specific options - now in getopt_long -#: main.c:311 -#, c-format -msgid "%s: option `-W %s' unrecognized, ignored\n" -msgstr "" - -#: main.c:342 -msgid "empty argument to `--source' ignored" -msgstr "" - -#: main.c:414 -msgid "environment variable `POSIXLY_CORRECT' set: turning on `--posix'" -msgstr "" - -#. both on command line -#: main.c:419 -msgid "`--posix' overrides `--traditional'" -msgstr "" - -#: main.c:430 -msgid "`--posix'/`--traditional' overrides `--non-decimal-data'" -msgstr "" - -#: main.c:434 -#, c-format -msgid "runing %s setuid root may be a security problem" -msgstr "" - -#: main.c:463 -#, c-format -msgid "can't set mode on stdin (%s)" -msgstr "" - -#: main.c:466 -#, c-format -msgid "can't set mode on stdout (%s)" -msgstr "" - -#: main.c:468 -#, c-format -msgid "can't set mode on stderr (%s)" -msgstr "" - -#: main.c:497 -msgid "no program text at all!" -msgstr "" - -#. Not factoring out common stuff makes it easier to translate. -#: main.c:541 -#, c-format -msgid "Usage: %s [POSIX or GNU style options] -f progfile [--] file ...\n" -msgstr "" - -#: main.c:543 -#, c-format -msgid "Usage: %s [POSIX or GNU style options] [--] %cprogram%c file ...\n" -msgstr "" - -#. GNU long options info. This is too many options. -#: main.c:548 -msgid "POSIX options:\t\tGNU long options:\n" -msgstr "" - -#: main.c:549 -msgid "\t-f progfile\t\t--file=progfile\n" -msgstr "" - -#: main.c:550 -msgid "\t-F fs\t\t\t--field-separator=fs\n" -msgstr "" - -#: main.c:551 -msgid "\t-v var=val\t\t--assign=var=val\n" -msgstr "" - -#: main.c:552 -msgid "\t-m[fr] val\n" -msgstr "" - -#: main.c:553 -msgid "\t-W compat\t\t--compat\n" -msgstr "" - -#: main.c:554 -msgid "\t-W copyleft\t\t--copyleft\n" -msgstr "" - -#: main.c:555 -msgid "\t-W copyright\t\t--copyright\n" -msgstr "" - -#: main.c:556 -msgid "\t-W dump-variables[=file]\t--dump-variables[=file]\n" -msgstr "" - -#: main.c:557 -msgid "\t-W gen-po\t\t--gen-po\n" -msgstr "" - -#: main.c:558 -msgid "\t-W help\t\t\t--help\n" -msgstr "" - -#: main.c:559 -msgid "\t-W lint[=fatal]\t\t--lint[=fatal]\n" -msgstr "" - -#: main.c:560 -msgid "\t-W lint-old\t\t--lint-old\n" -msgstr "" - -#: main.c:561 -msgid "\t-W non-decimal-data\t--non-decimal-data\n" -msgstr "" - -#: main.c:563 -msgid "\t-W nostalgia\t\t--nostalgia\n" -msgstr "" - -#: main.c:566 -msgid "\t-W parsedebug\t\t--parsedebug\n" -msgstr "" - -#: main.c:568 -msgid "\t-W profile[=file]\t--profile[=file]\n" -msgstr "" - -#: main.c:569 -msgid "\t-W posix\t\t--posix\n" -msgstr "" - -#: main.c:570 -msgid "\t-W re-interval\t\t--re-interval\n" -msgstr "" - -#: main.c:571 -msgid "\t-W source=program-text\t--source=program-text\n" -msgstr "" - -#: main.c:572 -msgid "\t-W traditional\t\t--traditional\n" -msgstr "" - -#: main.c:573 -msgid "\t-W usage\t\t--usage\n" -msgstr "" - -#: main.c:574 -msgid "\t-W version\t\t--version\n" -msgstr "" - -#: main.c:575 -msgid "" -"\n" -"To report bugs, see node `Bugs' in `gawk.info', which is\n" -msgstr "" - -#: main.c:576 -msgid "section `Reporting Problems and Bugs' in the printed version.\n" -msgstr "" - -#: main.c:586 -msgid "" -"Copyright (C) 1989, 1991-2001 Free Software Foundation.\n" -"\n" -"This program is free software; you can redistribute it and/or modify\n" -"it under the terms of the GNU General Public License as published by\n" -"the Free Software Foundation; either version 2 of the License, or\n" -"(at your option) any later version.\n" -"\n" -msgstr "" - -#: main.c:594 -msgid "" -"This program is distributed in the hope that it will be useful,\n" -"but WITHOUT ANY WARRANTY; without even the implied warranty of\n" -"MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" -"GNU General Public License for more details.\n" -"\n" -msgstr "" - -#: main.c:600 -msgid "" -"You should have received a copy of the GNU General Public License\n" -"along with this program; if not, write to the Free Software\n" -"Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n" -msgstr "" - -#: main.c:630 -msgid "-Ft does not set FS to tab in POSIX awk" -msgstr "" - -#: io.c:1371 main.c:830 -#, c-format -msgid "could not find groups: %s" -msgstr "" - -#: main.c:869 -#, c-format -msgid "invalid syntax in name `%s' for variable assignment" -msgstr "" - -#: main.c:938 -msgid "floating point exception" -msgstr "" - -#: main.c:945 -msgid "fatal error: internal error" -msgstr "" - -#: main.c:995 -#, c-format -msgid "no pre-opened fd %d" -msgstr "" - -#: main.c:1000 -#, c-format -msgid "could not pre-open /dev/null for fd %d" -msgstr "" - -#: profile.c:94 -#, c-format -msgid "could not open `%s' for writing: %s" -msgstr "" - -#: awkgram.y:2281 profile.c:96 -msgid "sending profile to standard error" -msgstr "" - -#: profile.c:402 -msgid "internal error: Node_var with null vname" -msgstr "" - -#: profile.c:508 -msgid "internal error: Node_var_array with null vname" -msgstr "" - -#: eval.c:816 profile.c:531 -#, c-format -msgid "" -"function `%s' called with space between name and `(',\n" -"%s" -msgstr "" - -#: eval.c:818 profile.c:533 -msgid "or used in other expression context" -msgstr "" - -#: eval.c:796 eval.c:1042 eval.c:1610 eval.c:1736 profile.c:610 profile.c:738 -#, c-format -msgid "attempt to use array `%s' in a scalar context" -msgstr "" - -#: eval.c:1046 profile.c:614 -#, c-format -msgid "illegal type (%s) in tree_eval" -msgstr "" - -#: eval.c:1785 profile.c:815 -#, c-format -msgid "attempt to use function `%s' as array" -msgstr "" - -#: eval.c:1792 profile.c:829 -#, c-format -msgid "`%s' is a function, assignment is not allowed" -msgstr "" - -#. in gawk for a while -#: eval.c:1798 eval.c:1805 profile.c:833 -msgid "assignment is not allowed to result of builtin function" -msgstr "" - -#. \n on purpose, with \n in ctime() output -#: profile.c:1083 -#, c-format -msgid "\t# gawk profile, created %s\n" -msgstr "" - -#: profile.c:1086 -msgid "" -"\t# BEGIN block(s)\n" -"\n" -msgstr "" - -#: profile.c:1096 -msgid "" -"\t# Rule(s)\n" -"\n" -msgstr "" - -#: profile.c:1102 -msgid "" -"\t# END block(s)\n" -"\n" -msgstr "" - -#: profile.c:1122 -msgid "" -"\n" -"\t# Functions, listed alphabetically\n" -msgstr "" - -#: profile.c:1325 -#, c-format -msgid "unexpected type %s in prec_level" -msgstr "" - -#: regex.c:1017 -msgid "Success" -msgstr "" - -#. REG_NOERROR -#: regex.c:1018 -msgid "No match" -msgstr "" - -#. REG_NOMATCH -#: regex.c:1019 -msgid "Invalid regular expression" -msgstr "" - -#. REG_BADPAT -#: regex.c:1020 -msgid "Invalid collation character" -msgstr "" - -#. REG_ECOLLATE -#: regex.c:1021 -msgid "Invalid character class name" -msgstr "" - -#. REG_ECTYPE -#: regex.c:1022 -msgid "Trailing backslash" -msgstr "" - -#. REG_EESCAPE -#: regex.c:1023 -msgid "Invalid back reference" -msgstr "" - -#. REG_ESUBREG -#: regex.c:1024 -msgid "Unmatched [ or [^" -msgstr "" - -#. REG_EBRACK -#: regex.c:1025 -msgid "Unmatched ( or \\(" -msgstr "" - -#. REG_EPAREN -#: regex.c:1026 -msgid "Unmatched \\{" -msgstr "" - -#. REG_EBRACE -#: regex.c:1027 -msgid "Invalid content of \\{\\}" -msgstr "" - -#. REG_BADBR -#: regex.c:1028 -msgid "Invalid range end" -msgstr "" - -#. REG_ERANGE -#: dfa.c:167 dfa.c:178 dfa.c:189 regex.c:1029 -msgid "Memory exhausted" -msgstr "" - -#. REG_ESPACE -#: regex.c:1030 -msgid "Invalid preceding regular expression" -msgstr "" - -#. REG_BADRPT -#: regex.c:1031 -msgid "Premature end of regular expression" -msgstr "" - -#. REG_EEND -#: regex.c:1032 -msgid "Regular expression too big" -msgstr "" - -#. REG_ESIZE -#: regex.c:1033 -msgid "Unmatched ) or \\)" -msgstr "" - -#: regex.c:5531 -msgid "No previous regular expression" -msgstr "" - -#: awkgram.y:230 -msgid "BEGIN blocks must have an action part" -msgstr "" - -#: awkgram.y:236 -msgid "END blocks must have an action part" -msgstr "" - -#: awkgram.y:272 -#, c-format -msgid "`%s' is a built-in function, it cannot be redefined" -msgstr "" - -#: awkgram.y:360 -msgid "statement may have no effect" -msgstr "" - -#: awkgram.y:457 -msgid "plain `print' in BEGIN or END rule should probably be `print \"\"'" -msgstr "" - -#: awkgram.y:469 -msgid "`next' used in BEGIN or END action" -msgstr "" - -#: awkgram.y:476 awkgram.y:483 -msgid "`nextfile' is a gawk extension" -msgstr "" - -#: awkgram.y:488 -msgid "`nextfile' used in BEGIN or END action" -msgstr "" - -#: awkgram.y:497 -msgid "`return' used outside function context" -msgstr "" - -#: awkgram.y:506 awkgram.y:513 -msgid "`delete array' is a gawk extension" -msgstr "" - -#: awkgram.y:571 -msgid "multistage two-way pipelines don't work" -msgstr "" - -#: awkgram.y:662 -msgid "regular expression on right of assignment" -msgstr "" - -#: awkgram.y:680 -msgid "non-redirected `getline' undefined inside END action" -msgstr "" - -#: awkgram.y:690 -msgid "regular expression on left of `~' or `!~' operator" -msgstr "" - -#: awkgram.y:700 -#, c-format -msgid "regexp constant `/%s/' looks like a C comment, but is not" -msgstr "" - -#: awkgram.y:716 -msgid "regular expression on right of comparison" -msgstr "" - -#: awkgram.y:743 -msgid "non-redirected `getline' undefined inside BEGIN or END action" -msgstr "" - -#: awkgram.y:798 -msgid "call of `length' without parentheses is not portable" -msgstr "" - -#: awkgram.y:801 -msgid "call of `length' without parentheses is deprecated by POSIX" -msgstr "" - -#: awkgram.y:851 -msgid "invalid subscript expression" -msgstr "" - -#: awkgram.y:985 -#, c-format -msgid "fptr %x not in tokentab\n" -msgstr "" - -#: awkgram.y:1019 -msgid "unexpected newline" -msgstr "" - -#: awkgram.y:1098 -msgid "empty program text on command line" -msgstr "" - -#: awkgram.y:1155 -#, c-format -msgid "can't open source file `%s' for reading (%s)" -msgstr "" - -#: awkgram.y:1190 -#, c-format -msgid "can't read sourcefile `%s' (%s)" -msgstr "" - -#: awkgram.y:1198 -#, c-format -msgid "source file `%s' is empty" -msgstr "" - -#: awkgram.y:1312 awkgram.y:1417 awkgram.y:1435 awkgram.y:1778 awkgram.y:1840 -msgid "source file does not end in newline" -msgstr "" - -#: awkgram.y:1373 -msgid "unterminated regexp ends with `\\' at end of file" -msgstr "" - -#: awkgram.y:1394 -msgid "unterminated regexp" -msgstr "" - -#: awkgram.y:1397 -msgid "unterminated regexp at end of file" -msgstr "" - -#: awkgram.y:1461 -msgid "use of `\\ #...' line continuation is not portable" -msgstr "" - -#: awkgram.y:1473 -msgid "backslash not last character on line" -msgstr "" - -#: awkgram.y:1514 -msgid "POSIX does not allow operator `**='" -msgstr "" - -#: awkgram.y:1516 -msgid "old awk does not support operator `**='" -msgstr "" - -#: awkgram.y:1525 -msgid "POSIX does not allow operator `**'" -msgstr "" - -#: awkgram.y:1527 -msgid "old awk does not support operator `**'" -msgstr "" - -#: awkgram.y:1560 -msgid "operator `^=' is not supported in old awk" -msgstr "" - -#: awkgram.y:1568 -msgid "operator `^' is not supported in old awk" -msgstr "" - -#: awkgram.y:1650 awkgram.y:1664 -msgid "unterminated string" -msgstr "" - -#: awkgram.y:1815 -#, c-format -msgid "invalid char '%c' in expression" -msgstr "" - -#: awkgram.y:1861 -#, c-format -msgid "`%s' is a gawk extension" -msgstr "" - -#: awkgram.y:1864 -#, c-format -msgid "`%s' is a Bell Labs extension" -msgstr "" - -#: awkgram.y:1867 -#, c-format -msgid "POSIX does not allow `%s'" -msgstr "" - -#: awkgram.y:1871 -#, c-format -msgid "`%s' is not supported in old awk" -msgstr "" - -#: awkgram.y:1899 -msgid "`goto' considered harmful!\n" -msgstr "" - -#: awkgram.y:1963 -#, c-format -msgid "%d is invalid as number of arguments for %s" -msgstr "" - -#: awkgram.y:1982 awkgram.y:1985 -msgid "match: third argument is a gawk extension" -msgstr "" - -#: awkgram.y:2001 -#, c-format -msgid "%s: string literal as last arg of substitute has no effect" -msgstr "" - -#: awkgram.y:2005 -msgid "sub third parameter is not a changeable object" -msgstr "" - -#: awkgram.y:2007 -msgid "gsub third parameter is not a changeable object" -msgstr "" - -#: awkgram.y:2033 awkgram.y:2036 -msgid "close: second argument is a gawk extension" -msgstr "" - -#: awkgram.y:2046 -msgid "use of dcgettext(_\"...\") is incorrect: remove leading underscore" -msgstr "" - -#: awkgram.y:2139 -#, c-format -msgid "function `%s': parameter #%d, `%s', duplicates parameter #%d" -msgstr "" - -#: awkgram.y:2171 -#, c-format -msgid "function `%s': parameter `%s' shadows global variable" -msgstr "" - -#: awkgram.y:2280 -#, c-format -msgid "could not open `%s' for writing (%s)" -msgstr "" - -#: awkgram.y:2311 -#, c-format -msgid "%s: close failed (%s)" -msgstr "" - -#: awkgram.y:2421 -msgid "shadow_funcs() called twice!" -msgstr "" - -#: awkgram.y:2496 -#, c-format -msgid "function `%s': can't use function name as parameter name" -msgstr "" - -#: awkgram.y:2506 -#, c-format -msgid "function name `%s' previously defined" -msgstr "" - -#: awkgram.y:2654 awkgram.y:2660 -#, c-format -msgid "function `%s' called but never defined" -msgstr "" - -#: awkgram.y:2663 -#, c-format -msgid "function `%s' defined but never called" -msgstr "" - -#: awkgram.y:2690 -#, c-format -msgid "regexp constant for parameter #%d yields boolean value" -msgstr "" - -#: dfa.c:451 -msgid "Unfinished \\ escape" -msgstr "" - -#. Cases: -#. {M} - exact count -#. {M,} - minimum count, maximum is infinity -#. {M,N} - M through N -#: dfa.c:584 dfa.c:590 dfa.c:600 dfa.c:608 dfa.c:623 -msgid "unfinished repeat count" -msgstr "" - -#: dfa.c:597 dfa.c:614 dfa.c:622 dfa.c:626 -msgid "malformed repeat count" -msgstr "" - -#: dfa.c:691 dfa.c:694 dfa.c:721 dfa.c:725 dfa.c:726 dfa.c:729 dfa.c:742 -#: dfa.c:743 -msgid "Unbalanced [" -msgstr "" - -#: dfa.c:889 -msgid "Unbalanced (" -msgstr "" - -#: dfa.c:1003 -msgid "No regexp syntax bits specified" -msgstr "" - -#: dfa.c:1011 -msgid "Unbalanced )" -msgstr "" - -#: dfa.c:2002 -msgid "out of memory" -msgstr "" - -#: field.c:787 -msgid "split: second argument is not an array" -msgstr "" - -#: field.c:814 -msgid "split: null string for third arg is a gawk extension" -msgstr "" - -#: field.c:854 -msgid "`FIELDWIDTHS' is a gawk extension" -msgstr "" - -#: field.c:881 -#, c-format -msgid "field %d in FIELDWIDTHS, must be > 0" -msgstr "" - -#: field.c:935 -msgid "null string for `FS' is a gawk extension" -msgstr "" - -#: msg.c:57 -msgid "cmd. line:" -msgstr "" - -#: msg.c:123 -msgid "warning: " -msgstr "" - -#: msg.c:145 -msgid "error: " -msgstr "" - -#: msg.c:178 -msgid "fatal: " -msgstr "" - -#: eval.c:258 -#, c-format -msgid "unknown nodetype %d" -msgstr "" - -#: eval.c:306 -msgid "buffer overflow in genflags2str" -msgstr "" - -#: eval.c:545 -#, c-format -msgid "for loop: array `%s' changed size from %d to %d during loop execution" -msgstr "" - -#: eval.c:569 -msgid "`break' outside a loop is not portable" -msgstr "" - -#: eval.c:573 -msgid "`break' outside a loop is not allowed" -msgstr "" - -#: eval.c:592 -msgid "`continue' outside a loop is not portable" -msgstr "" - -#: eval.c:596 -msgid "`continue' outside a loop is not allowed" -msgstr "" - -#: eval.c:626 -msgid "`next' cannot be called from a BEGIN rule" -msgstr "" - -#: eval.c:628 -msgid "`next' cannot be called from an END rule" -msgstr "" - -#: eval.c:640 -msgid "`nextfile' cannot be called from a BEGIN rule" -msgstr "" - -#: eval.c:642 -msgid "`nextfile' cannot be called from an END rule" -msgstr "" - -#: eval.c:683 -msgid "statement has no effect" -msgstr "" - -#: eval.c:717 eval.c:747 eval.c:1617 -#, c-format -msgid "reference to uninitialized variable `%s'" -msgstr "" - -#: eval.c:725 eval.c:1603 -#, c-format -msgid "can't use function name `%s' as variable or array" -msgstr "" - -#: eval.c:732 eval.c:738 eval.c:1742 -#, c-format -msgid "reference to uninitialized argument `%s'" -msgstr "" - -#: eval.c:826 -msgid "assignment used in conditional context" -msgstr "" - -#: eval.c:917 -msgid "" -"concatenation: side effects in one expression have changed the length of " -"another!" -msgstr "" - -#: eval.c:1012 -msgid "division by zero attempted" -msgstr "" - -#: eval.c:1027 -msgid "division by zero attempted in `%%'" -msgstr "" - -#: eval.c:1228 -msgid "division by zero attempted in `/='" -msgstr "" - -#: eval.c:1246 -msgid "division by zero attempted in `%%='" -msgstr "" - -#: eval.c:1419 -#, c-format -msgid "%s (from %s)" -msgstr "" - -#: eval.c:1467 -#, c-format -msgid "function `%s' called with more arguments than declared" -msgstr "" - -#: eval.c:1514 -#, c-format -msgid "function `%s' not defined" -msgstr "" - -#: eval.c:1516 -#, c-format -msgid "function %s called\n" -msgstr "" - -#: eval.c:1575 -msgid "" -"\n" -"\t# Function Call Stack:\n" -"\n" -msgstr "" - -#: eval.c:1578 -msgid "\t# -- main --\n" -msgstr "" - -#: eval.c:1754 -msgid "attempt to field reference from non-numeric value" -msgstr "" - -#: eval.c:1756 -msgid "attempt to reference from null string" -msgstr "" - -#: eval.c:1762 -#, c-format -msgid "attempt to access field %d" -msgstr "" - -#: eval.c:1778 -#, c-format -msgid "attempt to use scalar parameter `%s' as an array" -msgstr "" - -#: eval.c:1869 -msgid "`IGNORECASE' is a gawk extension" -msgstr "" - -#: eval.c:1897 -msgid "`BINMODE' is a gawk extension" -msgstr "" - -#: eval.c:2009 -#, c-format -msgid "bad `%sFMT' specification `%s'" -msgstr "" - -#: eval.c:2075 -msgid "turning off `--lint' due to assignment to `LINT'" -msgstr "" - -#: io.c:240 -#, c-format -msgid "cannot open file `%s' for reading (%s)" -msgstr "" - -#: io.c:320 -#, c-format -msgid "close of fd %d (`%s') failed (%s)" -msgstr "" - -#: io.c:432 -#, c-format -msgid "invalid tree type %s in redirect()" -msgstr "" - -#: io.c:438 -#, c-format -msgid "expression in `%s' redirection only has numeric value" -msgstr "" - -#: io.c:444 -#, c-format -msgid "expression for `%s' redirection has null string value" -msgstr "" - -#: io.c:449 -#, c-format -msgid "filename `%s' for `%s' redirection may be result of logical expression" -msgstr "" - -#: io.c:471 -#, c-format -msgid "unnecessary mixing of `>' and `>>' for file `%.*s'" -msgstr "" - -#: io.c:523 -#, c-format -msgid "can't open pipe `%s' for output (%s)" -msgstr "" - -#: io.c:532 -#, c-format -msgid "can't open pipe `%s' for input (%s)" -msgstr "" - -#: io.c:545 -#, c-format -msgid "can't open two way socket `%s' for input/output (%s)" -msgstr "" - -#: io.c:549 -#, c-format -msgid "can't open two way pipe `%s' for input/output (%s)" -msgstr "" - -#: io.c:625 -#, c-format -msgid "can't redirect from `%s' (%s)" -msgstr "" - -#: io.c:628 -#, c-format -msgid "can't redirect to `%s' (%s)" -msgstr "" - -#: io.c:667 -msgid "" -"reached system limit for open files: starting to multiplex file descriptors" -msgstr "" - -#. do_lint && -#: io.c:679 -#, c-format -msgid "close of `%s' failed (%s)." -msgstr "" - -#. surely this is the only reason ??? -#: io.c:686 -msgid "too many pipes or input files open" -msgstr "" - -#: io.c:709 -msgid "close: second argument must be `to' or `from'" -msgstr "" - -#: io.c:723 -#, c-format -msgid "close: `%.*s' is not an open file, pipe or co-process" -msgstr "" - -#. update ERRNO manually, using errno = ENOENT is a stretch. -#: io.c:727 -msgid "close of redirection that was never opened" -msgstr "" - -#: io.c:754 -#, c-format -msgid "close: redirection `%s' not opened with `|&', second argument ignored" -msgstr "" - -#: io.c:811 -#, c-format -msgid "failure status (%d) on pipe close of `%s' (%s)" -msgstr "" - -#: io.c:814 -#, c-format -msgid "failure status (%d) on file close of `%s' (%s)" -msgstr "" - -#: io.c:833 -#, c-format -msgid "no explicit close of socket `%s' provided" -msgstr "" - -#: io.c:836 -#, c-format -msgid "no explicit close of co-process `%s' provided" -msgstr "" - -#: io.c:839 -#, c-format -msgid "no explicit close of pipe `%s' provided" -msgstr "" - -#: io.c:842 -#, c-format -msgid "no explicit close of file `%s' provided" -msgstr "" - -#: io.c:871 io.c:925 -#, c-format -msgid "error writing standard output (%s)" -msgstr "" - -#: io.c:875 io.c:929 -#, c-format -msgid "error writing standard error (%s)" -msgstr "" - -#: io.c:883 -#, c-format -msgid "pipe flush of `%s' failed (%s)." -msgstr "" - -#: io.c:886 -#, c-format -msgid "co-process flush of pipe to `%s' failed (%s)." -msgstr "" - -#: io.c:889 -#, c-format -msgid "file flush of `%s' failed (%s)." -msgstr "" - -#. /inet/raw client not ready yet -#: io.c:1048 -msgid "/inet/raw client not ready yet, sorry" -msgstr "" - -#: io.c:1050 io.c:1087 -msgid "only root may use `/inet/raw'." -msgstr "" - -#. /inet/raw server not ready yet -#: io.c:1085 -msgid "/inet/raw server not ready yet, sorry" -msgstr "" - -#: io.c:1175 -#, c-format -msgid "no (known) protocol supplied in special filename `%s'" -msgstr "" - -#: io.c:1193 -#, c-format -msgid "special file name `%s' is incomplete" -msgstr "" - -#: io.c:1205 -#, c-format -msgid "local port invalid in `%s'" -msgstr "" - -#: io.c:1217 -msgid "must supply a remote hostname to `/inet'" -msgstr "" - -#: io.c:1232 -msgid "must supply a remote port to `/inet'" -msgstr "" - -#: io.c:1238 -#, c-format -msgid "remote port invalid in `%s'" -msgstr "" - -#: io.c:1248 -msgid "TCP/IP communications are not supported" -msgstr "" - -#: io.c:1257 io.c:1446 -#, c-format -msgid "file `%s' is a directory" -msgstr "" - -#: io.c:1327 -#, c-format -msgid "use `PROCINFO[\"%s\"]' instead of `%s'" -msgstr "" - -#: io.c:1363 -msgid "use `PROCINFO[...]' instead of `/dev/user'" -msgstr "" - -#: io.c:1432 -#, c-format -msgid "could not open `%s', mode `%s'" -msgstr "" - -#: io.c:1544 io.c:1665 -#, c-format -msgid "close of stdout in child failed (%s)" -msgstr "" - -#: io.c:1547 io.c:1668 -#, c-format -msgid "moving pipe to stdout in child failed (dup: %s)" -msgstr "" - -#: io.c:1549 -#, c-format -msgid "close of stdin in child failed (%s)" -msgstr "" - -#: io.c:1552 -#, c-format -msgid "moving pipe to stdin in child failed (dup: %s)" -msgstr "" - -#: io.c:1555 io.c:1670 io.c:1678 -#, c-format -msgid "close of pipe failed (%s)" -msgstr "" - -#: io.c:1584 -#, c-format -msgid "pipe from `%s': could not set close-on-exec (fcntl: %s)" -msgstr "" - -#: io.c:1588 -#, c-format -msgid "pipe to `%s': could not set close-on-exec (fcntl: %s)" -msgstr "" - -#: io.c:1598 -msgid "`|&' not supported" -msgstr "" - -#: io.c:1662 -#, c-format -msgid "cannot open pipe `%s' (%s)" -msgstr "" - -#: io.c:1675 -#, c-format -msgid "cannot create child process for `%s' (fork: %s)" -msgstr "" - -#: io.c:2004 -#, c-format -msgid "data file `%s' is empty" -msgstr "" - -#: io.c:2071 -#, c-format -msgid "internal error: file `%s', line %d\n" -msgstr "" - -#: io.c:2173 -#, c-format -msgid "error reading input file `%s': %s" -msgstr "" - -#: io.c:2392 -msgid "multicharacter value of `RS' is a gawk extension" -msgstr "" - -#: node.c:59 node.c:66 node.c:75 node.c:89 node.c:116 -msgid "can't convert string to float" -msgstr "" - -#: node.c:342 -msgid "backslash at end of string" -msgstr "" - -#: node.c:524 -msgid "POSIX does not allow `\\x' escapes" -msgstr "" - -#: node.c:530 -msgid "no hex digits in `\\x' escape sequence" -msgstr "" - -#: node.c:564 -#, c-format -msgid "escape sequence `\\%c' treated as plain `%c'" -msgstr "" - -#: posix/gawkmisc.c:122 -#, c-format -msgid "%s %s `%s': could not set close-on-exec: %s" -msgstr "" diff --git a/contrib/awk/po/he.gmo b/contrib/awk/po/he.gmo deleted file mode 100644 index 1905a85..0000000 Binary files a/contrib/awk/po/he.gmo and /dev/null differ diff --git a/contrib/awk/po/stamp-cat-id b/contrib/awk/po/stamp-cat-id deleted file mode 100644 index 9788f70..0000000 --- a/contrib/awk/po/stamp-cat-id +++ /dev/null @@ -1 +0,0 @@ -timestamp diff --git a/contrib/awk/posix/ChangeLog b/contrib/awk/posix/ChangeLog deleted file mode 100644 index 359a635..0000000 --- a/contrib/awk/posix/ChangeLog +++ /dev/null @@ -1,57 +0,0 @@ -Sun Jun 3 13:04:44 2001 Arnold D. Robbins - - * Release 3.1.0: Release tar file made. And there was - rejoicing. - -Sun Jan 28 15:50:02 2001 Eli Zaretskii - - * gawkmisc.c (os_restore_mode): New function. - -Sun Dec 3 16:53:37 2000 Arnold D. Robbins - - * gawkmisc.c (os_setbinmode): new function. - -Tue Nov 14 16:13:08 2000 Arnold D. Robbins - - * gawkmisc.c: Remove all includes. Done by ../gawkmisc.c. - -Tue Nov 7 14:09:14 2000 Arnold D. Robbins - - * gawkmisc.c (os_is_setuid): new function. - -Wed Jul 30 19:53:52 1997 Arnold D. Robbins - - * Close-on-exec changes: - gawkmisc.c: (os_close_on_exec, os_isdir): new functions. - -Mon Aug 7 15:23:00 2000 Arnold D. Robbins - - * Release 3.0.6: Release tar file made. - -Sun Jun 25 15:08:19 2000 Arnold D. Robbins - - * Release 3.0.5: Release tar file made. - -Wed Jun 30 16:14:36 1999 Arnold D. Robbins - - * Release 3.0.4: Release tar file made. This time for sure. - -Thu May 15 12:49:08 1997 Arnold D. Robbins - - * Release 3.0.3: Release tar file made. - -Fri Apr 18 07:55:47 1997 Arnold D. Robbins - - * BETA Release 3.0.34: Release tar file made. - -Wed Dec 25 11:25:22 1996 Arnold D. Robbins - - * Release 3.0.2: Release tar file made. - -Tue Dec 10 23:09:26 1996 Arnold D. Robbins - - * Release 3.0.1: Release tar file made. - -Wed Jan 10 22:58:55 1996 Arnold D. Robbins - - * ChangeLog created. diff --git a/contrib/awk/posix/gawkmisc.c b/contrib/awk/posix/gawkmisc.c deleted file mode 100644 index 4f94683..0000000 --- a/contrib/awk/posix/gawkmisc.c +++ /dev/null @@ -1,171 +0,0 @@ -/* gawkmisc.c --- miscellanious gawk routines that are OS specific. - - Copyright (C) 1986, 1988, 1989, 1991 - 98, 2001 the Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -char quote = '\''; -char *defpath = DEFPATH; -char envsep = ':'; - -#ifndef INVALID_HANDLE -/* FIXME: is this value for INVALID_HANDLE correct? */ -#define INVALID_HANDLE -1 -#endif - -/* gawk_name --- pull out the "gawk" part from how the OS called us */ - -char * -gawk_name(filespec) -const char *filespec; -{ - char *p; - - /* "path/name" -> "name" */ - p = strrchr(filespec, '/'); - return (p == NULL ? (char *) filespec : p + 1); -} - -/* os_arg_fixup --- fixup the command line */ - -void -os_arg_fixup(argcp, argvp) -int *argcp; -char ***argvp; -{ - /* no-op */ - return; -} - -/* os_devopen --- open special per-OS devices */ - -int -os_devopen(name, flag) -const char *name; -int flag; -{ - /* no-op */ - return INVALID_HANDLE; -} - -/* optimal_bufsize --- determine optimal buffer size */ - -int -optimal_bufsize(fd, stb) -int fd; -struct stat *stb; -{ - /* force all members to zero in case OS doesn't use all of them. */ - memset(stb, '\0', sizeof(struct stat)); - - /* - * System V.n, n < 4, doesn't have the file system block size in the - * stat structure. So we have to make some sort of reasonable - * guess. We use stdio's BUFSIZ, since that is what it was - * meant for in the first place. - */ -#ifdef HAVE_ST_BLKSIZE -#define DEFBLKSIZE (stb->st_blksize ? stb->st_blksize : BUFSIZ) -#else -#define DEFBLKSIZE BUFSIZ -#endif - - if (isatty(fd)) - return BUFSIZ; - if (fstat(fd, stb) == -1) - fatal("can't stat fd %d (%s)", fd, strerror(errno)); - if (lseek(fd, (off_t)0, 0) == -1) /* not a regular file */ - return DEFBLKSIZE; - if (stb->st_size > 0 && stb->st_size < DEFBLKSIZE) /* small file */ - return stb->st_size; - return DEFBLKSIZE; -} - -/* ispath --- return true if path has directory components */ - -int -ispath(file) -const char *file; -{ - return (strchr(file, '/') != NULL); -} - -/* isdirpunct --- return true if char is a directory separator */ - -int -isdirpunct(c) -int c; -{ - return (c == '/'); -} - -/* os_close_on_exec --- set close on exec flag, print warning if fails */ - -void -os_close_on_exec(fd, name, what, dir) -int fd; -const char *name, *what, *dir; -{ - if (fcntl(fd, F_SETFD, 1) < 0) - warning(_("%s %s `%s': could not set close-on-exec: %s"), - what, dir, name, strerror(errno)); -} - -/* os_isdir --- is this an fd on a directory? */ - -#if ! defined(S_ISDIR) && defined(S_IFDIR) -#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) -#endif - -int -os_isdir(fd) -int fd; -{ - struct stat sbuf; - - return (fstat(fd, &sbuf) == 0 && S_ISDIR(sbuf.st_mode)); -} - -/* os_is_setuid --- true if running setuid root */ - -int -os_is_setuid() -{ - long uid, euid; - - uid = getuid(); - euid = geteuid(); - - return (euid == 0 && euid != uid); -} - -/* os_setbinmode --- set binary mode on file */ - -int -os_setbinmode (fd, mode) -int fd, mode; -{ - return 0; -} - -/* os_restore_mode --- restore the original mode of the console device */ - -void -os_restore_mode (fd) -int fd; -{ - /* no-op */ - return; -} diff --git a/contrib/awk/profile.c b/contrib/awk/profile.c deleted file mode 100644 index 1dced87..0000000 --- a/contrib/awk/profile.c +++ /dev/null @@ -1,1381 +0,0 @@ -/* - * profile.c - gawk parse tree pretty-printer with counts - */ - -/* - * Copyright (C) 1999-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" - -/* where to place redirections for getline, print, printf */ -enum redir_placement { - BEFORE = 0, - AFTER = 1 -}; - -#undef tree_eval -static void tree_eval P((NODE *tree)); -static void parenthesize P((NODETYPE parent_type, NODE *tree)); -static void eval_condition P((NODE *tree)); -static void pp_op_assign P((NODE *tree)); -static void pp_func_call P((NODE *name, NODE *arg_list)); -static void pp_match_op P((NODE *tree)); -static void pp_lhs P((NODE *ptr)); -static void pp_print_stmt P((const char *command, NODE *tree)); -static void pp_delete P((NODE *tree)); -static void pp_in_array P((NODE *array, NODE *subscript)); -static void pp_getline P((NODE *tree)); -static void pp_builtin P((NODE *tree)); -static void pp_list P((NODE *tree)); -static void pp_string P((char *str, size_t len, int delim)); -static int is_scalar P((NODETYPE type)); -static int prec_level P((NODETYPE type)); -#ifdef PROFILING -static RETSIGTYPE dump_and_exit P((int signum)); -static RETSIGTYPE just_dump P((int signum)); -#endif - -/* pretty printing related functions and variables */ - -static char **fparms; /* function parameter names */ -static FILE *prof_fp; /* where to send the profile */ - -static long indent_level = 0; - -static int in_BEGIN_or_END = FALSE; - -static int in_expr = FALSE; - -#define SPACEOVER 0 - -/* init_profiling --- do needed initializations, see also main.c */ - -void -init_profiling(int *flag, const char *def_file) -{ - /* run time init avoids glibc innovations */ - prof_fp = stderr; - -#ifdef PROFILING - if (*flag == FALSE) { - *flag = TRUE; - set_prof_file(def_file); - } -#endif -} - -/* set_prof_file --- set the output file for profiling */ - -void -set_prof_file(const char *file) -{ - assert(file != NULL); - - prof_fp = fopen(file, "w"); - if (prof_fp == NULL) { - warning(_("could not open `%s' for writing: %s"), - file, strerror(errno)); - warning(_("sending profile to standard error")); - prof_fp = stderr; - } -} - -void -init_profiling_signals() -{ -#ifdef PROFILING -#ifdef SIGHUP - signal(SIGHUP, dump_and_exit); -#endif -#ifdef SIGUSR1 - signal(SIGUSR1, just_dump); -#endif -#endif -} - -/* indent --- print out enough tabs */ - -static void -indent(long count) -{ - int i; - - if (count == 0) - putc('\t', prof_fp); - else - fprintf(prof_fp, "%6ld ", count); - - assert(indent_level >= 0); - for (i = 0; i < indent_level; i++) - putc('\t', prof_fp); -} - -/* indent_in --- increase the level, with error checking */ - -static void -indent_in() -{ - assert(indent_level >= 0); - indent_level++; -} - -/* indent_out --- decrease the level, with error checking */ - -static void -indent_out() -{ - indent_level--; - assert(indent_level >= 0); -} - -/* - * pprint: - * Tree is a bunch of rules to run. Returns zero if it hit an exit() - * statement - */ -static void -pprint(register NODE *volatile tree) -{ - register NODE *volatile t = NULL; /* temporary */ - int volatile traverse = TRUE; /* True => loop thru tree (Node_rule_list) */ - - /* avoid false source indications */ - source = NULL; - sourceline = 0; - - if (tree == NULL) - return; - sourceline = tree->source_line; - source = tree->source_file; - switch (tree->type) { - case Node_rule_node: - traverse = FALSE; /* False => one for-loop iteration only */ - /* FALL THROUGH */ - case Node_rule_list: - for (t = tree; t != NULL; t = t->rnode) { - if (traverse) - tree = t->lnode; - sourceline = tree->source_line; - source = tree->source_file; - - if (! in_BEGIN_or_END) - indent(tree->exec_count); - - if (tree->lnode) { - eval_condition(tree->lnode); - if (tree->rnode) - fprintf(prof_fp, "\t"); - } - - if (tree->rnode) { - if (! in_BEGIN_or_END) { - fprintf(prof_fp, "{"); - if (tree->lnode != NULL - && tree->lnode->exec_count) - fprintf(prof_fp, " # %ld", - tree->lnode->exec_count); - fprintf(prof_fp, "\n"); - } - indent_in(); - pprint(tree->rnode); - indent_out(); - if (! in_BEGIN_or_END) { - indent(SPACEOVER); - fprintf(prof_fp, "}\n"); - } - } - - if (! traverse) /* case Node_rule_node */ - break; /* don't loop */ - - if (t->rnode && ! in_BEGIN_or_END) - fprintf(prof_fp, "\n"); - } - break; - - case Node_statement_list: - for (t = tree; t != NULL; t = t->rnode) { - pprint(t->lnode); - } - break; - - case Node_K_if: - indent(tree->exec_count); - fprintf(prof_fp, "if ("); - in_expr++; - eval_condition(tree->lnode); - in_expr--; - fprintf(prof_fp, ") {"); -#ifdef PROFILING - if (tree->rnode->exec_count) - fprintf(prof_fp, " # %ld", tree->rnode->exec_count); -#endif - fprintf(prof_fp, "\n"); - indent_in(); - pprint(tree->rnode->lnode); - indent_out(); - if (tree->rnode->rnode != NULL) { - if (tree->exec_count - tree->rnode->exec_count > 0) - indent(tree->exec_count - tree->rnode->exec_count); - else - indent(0); - fprintf(prof_fp, "} else {\n"); - indent_in(); - pprint(tree->rnode->rnode); - indent_out(); - } - indent(SPACEOVER); - fprintf(prof_fp, "}\n"); - break; - - case Node_K_while: - indent(tree->exec_count); - fprintf(prof_fp, "while ("); - in_expr++; - eval_condition(tree->lnode); - in_expr--; - fprintf(prof_fp, ") {\n"); - indent_in(); - pprint(tree->rnode); - indent_out(); - indent(SPACEOVER); - fprintf(prof_fp, "}\n"); - break; - - case Node_K_do: - indent(tree->exec_count); - fprintf(prof_fp, "do {\n"); - indent_in(); - pprint(tree->rnode); - indent_out(); - indent(SPACEOVER); - fprintf(prof_fp, "} while ("); - in_expr++; - eval_condition(tree->lnode); - in_expr--; - fprintf(prof_fp, ")\n"); - break; - - case Node_K_for: - indent(tree->exec_count); - fprintf(prof_fp, "for ("); - in_expr++; - pprint(tree->forloop->init); - fprintf(prof_fp, "; "); - eval_condition(tree->forloop->cond); - fprintf(prof_fp, "; "); - pprint(tree->forloop->incr); - fprintf(prof_fp, ") {\n"); - in_expr--; - indent_in(); - pprint(tree->lnode); - indent_out(); - indent(SPACEOVER); - fprintf(prof_fp, "}\n"); - break; - - case Node_K_arrayfor: -#define hakvar forloop->init -#define arrvar forloop->incr - indent(tree->exec_count); - fprintf(prof_fp, "for ("); - in_expr++; - pp_lhs(tree->hakvar); - in_expr--; - fprintf(prof_fp, " in "); - t = tree->arrvar; - if (t->type == Node_param_list) - fprintf(prof_fp, "%s", fparms[t->param_cnt]); - else - fprintf(prof_fp, "%s", t->vname); - fprintf(prof_fp, ") {\n"); - indent_in(); - pprint(tree->lnode); - indent_out(); - indent(SPACEOVER); - fprintf(prof_fp, "}\n"); - break; - - case Node_K_break: - indent(tree->exec_count); - fprintf(prof_fp, "break\n"); - break; - - case Node_K_continue: - indent(tree->exec_count); - fprintf(prof_fp, "continue\n"); - break; - - case Node_K_print: - pp_print_stmt("print", tree); - break; - - case Node_K_printf: - pp_print_stmt("printf", tree); - break; - - case Node_K_delete: - pp_delete(tree); - break; - - case Node_K_next: - indent(tree->exec_count); - fprintf(prof_fp, "next\n"); - break; - - case Node_K_nextfile: - indent(tree->exec_count); - fprintf(prof_fp, "nextfile\n"); - break; - - case Node_K_exit: - indent(tree->exec_count); - fprintf(prof_fp, "exit"); - if (tree->lnode != NULL) { - fprintf(prof_fp, " "); - tree_eval(tree->lnode); - } - fprintf(prof_fp, "\n"); - break; - - case Node_K_return: - indent(tree->exec_count); - fprintf(prof_fp, "return"); - if (tree->lnode != NULL) { - fprintf(prof_fp, " "); - tree_eval(tree->lnode); - } - fprintf(prof_fp, "\n"); - break; - - default: - /* - * Appears to be an expression statement. - * Throw away the value. - */ - if (in_expr) - tree_eval(tree); - else { - indent(tree->exec_count); - tree_eval(tree); - fprintf(prof_fp, "\n"); - } - break; - } -} - -/* tree_eval --- evaluate a subtree */ - -static void -tree_eval(register NODE *tree) -{ - if (tree == NULL) - return; - - switch (tree->type) { - case Node_param_list: - fprintf(prof_fp, "%s", fparms[tree->param_cnt]); - return; - - case Node_var: - if (tree->vname != NULL) - fprintf(prof_fp, "%s", tree->vname); - else - fatal(_("internal error: Node_var with null vname")); - return; - - case Node_val: - if ((tree->flags & (NUM|NUMBER)) != 0) - fprintf(prof_fp, "%g", tree->numbr); - else { - if ((tree->flags & INTLSTR) != 0) - fprintf(prof_fp, "_"); - pp_string(tree->stptr, tree->stlen, '"'); - } - return; - - case Node_and: - eval_condition(tree->lnode); - fprintf(prof_fp, " && "); - eval_condition(tree->rnode); - return; - - case Node_or: - eval_condition(tree->lnode); - fprintf(prof_fp, " || "); - eval_condition(tree->rnode); - return; - - case Node_not: - parenthesize(tree->type, tree->lnode); - return; - - /* Builtins */ - case Node_builtin: - pp_builtin(tree); - return; - - case Node_in_array: - in_expr++; - pp_in_array(tree->lnode, tree->rnode); - in_expr--; - return; - - case Node_func_call: - pp_func_call(tree->rnode, tree->lnode); - return; - - case Node_K_getline: - pp_getline(tree); - return; - - /* unary operations */ - case Node_NR: - fprintf(prof_fp, "NR"); - return; - - case Node_FNR: - fprintf(prof_fp, "FNR"); - return; - - case Node_NF: - fprintf(prof_fp, "NF"); - return; - - case Node_FIELDWIDTHS: - fprintf(prof_fp, "FIELDWIDTHS"); - return; - - case Node_FS: - fprintf(prof_fp, "FS"); - return; - - case Node_RS: - fprintf(prof_fp, "RS"); - return; - - case Node_IGNORECASE: - fprintf(prof_fp, "IGNORECASE"); - return; - - case Node_OFS: - fprintf(prof_fp, "OFS"); - return; - - case Node_ORS: - fprintf(prof_fp, "ORS"); - return; - - case Node_OFMT: - fprintf(prof_fp, "OFMT"); - return; - - case Node_CONVFMT: - fprintf(prof_fp, "CONVFMT"); - return; - - case Node_BINMODE: - fprintf(prof_fp, "BINMODE"); - return; - - case Node_field_spec: - case Node_subscript: - pp_lhs(tree); - return; - - case Node_var_array: - if (tree->vname != NULL) - fprintf(prof_fp, "%s", tree->vname); - else - fatal(_("internal error: Node_var_array with null vname")); - return; - - case Node_unary_minus: - fprintf(prof_fp, " -"); - tree_eval(tree->subnode); - return; - - case Node_cond_exp: - eval_condition(tree->lnode); - fprintf(prof_fp, " ? "); - tree_eval(tree->rnode->lnode); - fprintf(prof_fp, " : "); - tree_eval(tree->rnode->rnode); - return; - - case Node_match: - case Node_nomatch: - case Node_regex: - pp_match_op(tree); - return; - - case Node_func: - fatal(_("function `%s' called with space between name and `(',\n%s"), - tree->lnode->param, - _("or used in other expression context")); - - /* assignments */ - case Node_assign: - tree_eval(tree->lnode); - fprintf(prof_fp, " = "); - tree_eval(tree->rnode); - return; - - case Node_concat: - fprintf(prof_fp, "("); - tree_eval(tree->lnode); - fprintf(prof_fp, " "); - tree_eval(tree->rnode); - fprintf(prof_fp, ")"); - return; - - /* other assignment types are easier because they are numeric */ - case Node_preincrement: - case Node_predecrement: - case Node_postincrement: - case Node_postdecrement: - case Node_assign_exp: - case Node_assign_times: - case Node_assign_quotient: - case Node_assign_mod: - case Node_assign_plus: - case Node_assign_minus: - pp_op_assign(tree); - return; - - default: - break; /* handled below */ - } - - /* handle binary ops */ - in_expr++; - parenthesize(tree->type, tree->lnode); - - switch (tree->type) { - case Node_geq: - fprintf(prof_fp, " >= "); - break; - case Node_leq: - fprintf(prof_fp, " <= "); - break; - case Node_greater: - fprintf(prof_fp, " > "); - break; - case Node_less: - fprintf(prof_fp, " < "); - break; - case Node_notequal: - fprintf(prof_fp, " != "); - break; - case Node_equal: - fprintf(prof_fp, " == "); - break; - case Node_exp: - fprintf(prof_fp, " ^ "); - break; - case Node_times: - fprintf(prof_fp, " * "); - break; - case Node_quotient: - fprintf(prof_fp, " / "); - break; - case Node_mod: - fprintf(prof_fp, " %% "); - break; - case Node_plus: - fprintf(prof_fp, " + "); - break; - case Node_minus: - fprintf(prof_fp, " - "); - break; - case Node_var_array: - fatal(_("attempt to use array `%s' in a scalar context"), - tree->vname); - return; - default: - fatal(_("illegal type (%s) in tree_eval"), nodetype2str(tree->type)); - } - parenthesize(tree->type, tree->rnode); - in_expr--; - - return; -} - -/* eval_condition --- is TREE true or false */ - -static void -eval_condition(register NODE *tree) -{ - if (tree == NULL) /* Null trees are the easiest kinds */ - return; - - if (tree->type == Node_line_range) { - /* /.../, /.../ */ - eval_condition(tree->condpair->lnode); - fprintf(prof_fp,", "); - eval_condition(tree->condpair->rnode); - return; - } - - /* - * Could just be J.random expression. in which case, null and 0 are - * false, anything else is true - */ - - tree_eval(tree); - return; -} - -/* pp_op_assign --- do +=, -=, etc. */ - -static void -pp_op_assign(register NODE *tree) -{ - char *op = NULL; - enum Order { - NA = 0, - PRE = 1, - POST = 2 - } order = NA; - - switch(tree->type) { - case Node_preincrement: - op = "++"; - order = PRE; - break; - - case Node_predecrement: - op = "--"; - order = PRE; - break; - - case Node_postincrement: - op = "++"; - order = POST; - break; - - case Node_postdecrement: - op = "--"; - order = POST; - break; - - default: - break; /* handled below */ - } - - if (order == PRE) { - fprintf(prof_fp, "%s", op); - pp_lhs(tree->lnode); - return; - } else if (order == POST) { - pp_lhs(tree->lnode); - fprintf(prof_fp, "%s", op); - return; - } - - /* a binary op */ - pp_lhs(tree->lnode); - - switch(tree->type) { - case Node_assign_exp: - fprintf(prof_fp, " ^= "); - break; - - case Node_assign_times: - fprintf(prof_fp, " *= "); - break; - - case Node_assign_quotient: - fprintf(prof_fp, " /= "); - break; - - case Node_assign_mod: - fprintf(prof_fp, " %%= "); - break; - - case Node_assign_plus: - fprintf(prof_fp, " += "); - break; - - case Node_assign_minus: - fprintf(prof_fp, " -= "); - break; - - default: - cant_happen(); - } - - tree_eval(tree->rnode); -} - -/* pp_lhs --- print the lhs */ - -static void -pp_lhs(register NODE *ptr) -{ - register NODE *n; - - switch (ptr->type) { - case Node_var_array: - fatal(_("attempt to use array `%s' in a scalar context"), - ptr->vname); - - case Node_var: - fprintf(prof_fp, "%s", ptr->vname); - break; - - case Node_FIELDWIDTHS: - fprintf(prof_fp, "FIELDWIDTHS"); - break; - - case Node_RS: - fprintf(prof_fp, "RS"); - break; - - case Node_FS: - fprintf(prof_fp, "FS"); - break; - - case Node_FNR: - fprintf(prof_fp, "FNR"); - break; - - case Node_NR: - fprintf(prof_fp, "NR"); - break; - - case Node_NF: - fprintf(prof_fp, "NF"); - break; - - case Node_IGNORECASE: - fprintf(prof_fp, "IGNORECASE"); - break; - - case Node_BINMODE: - fprintf(prof_fp, "BINMODE"); - break; - - case Node_LINT: - fprintf(prof_fp, "LINT"); - break; - - case Node_OFMT: - fprintf(prof_fp, "OFMT"); - break; - - case Node_CONVFMT: - fprintf(prof_fp, "CONVFMT"); - break; - - case Node_ORS: - fprintf(prof_fp, "ORS"); - break; - - case Node_OFS: - fprintf(prof_fp, "OFS"); - break; - - case Node_param_list: - fprintf(prof_fp, "%s", fparms[ptr->param_cnt]); - break; - - case Node_field_spec: - fprintf(prof_fp, "$"); - if (is_scalar(ptr->lnode->type)) - tree_eval(ptr->lnode); - else { - fprintf(prof_fp, "("); - tree_eval(ptr->lnode); - fprintf(prof_fp, ")"); - } - break; - - case Node_subscript: - n = ptr->lnode; - if (n->type == Node_func) { - fatal(_("attempt to use function `%s' as array"), - n->lnode->param); - } else if (n->type == Node_param_list) { - fprintf(prof_fp, "%s[", fparms[n->param_cnt]); - } else - fprintf(prof_fp, "%s[", n->vname); - if (ptr->rnode->type == Node_expression_list) - pp_list(ptr->rnode); - else - tree_eval(ptr->rnode); - fprintf(prof_fp, "]"); - break; - - case Node_func: - fatal(_("`%s' is a function, assignment is not allowed"), - ptr->lnode->param); - - case Node_builtin: - fatal(_("assignment is not allowed to result of builtin function")); - - default: - cant_happen(); - } -} - -/* match_op --- do ~ and !~ */ - -static void -pp_match_op(register NODE *tree) -{ - register NODE *re; - char *op; - char *restr; - size_t relen; - NODE *text = NULL; - - if (tree->type == Node_regex) - re = tree->re_exp; - else { - re = tree->rnode->re_exp; - text = tree->lnode; - } - - if ((re->re_flags & CONST) != 0) { - restr = re->stptr; - relen = re->stlen; - } else { - restr = re->stptr; - relen = re->stlen; - } - - if (tree->type == Node_regex) { - pp_string(restr, relen, '/'); - return; - } - - if (tree->type == Node_nomatch) - op = "!~"; - else if (tree->type == Node_match) - op = "~"; - else - op = ""; - - tree_eval(text); - fprintf(prof_fp, " %s ", op); - fprintf(prof_fp, "/%.*s/", (int) relen, restr); -} - -/* pp_redir --- print a redirection */ - -static void -pp_redir(register NODE *tree, enum redir_placement dir) -{ - char *op = "[BOGUS]"; /* should never be seen */ - - if (tree == NULL) - return; - - switch (tree->type) { - case Node_redirect_output: - op = ">"; - break; - case Node_redirect_append: - op = ">>"; - break; - case Node_redirect_pipe: - op = "|"; - break; - case Node_redirect_pipein: - op = "|"; - break; - case Node_redirect_input: - op = "<"; - break; - case Node_redirect_twoway: - op = "|&"; - break; - default: - cant_happen(); - } - - if (dir == BEFORE) { - if (! is_scalar(tree->subnode->type)) { - fprintf(prof_fp, "("); - tree_eval(tree->subnode); - fprintf(prof_fp, ")"); - } else - tree_eval(tree->subnode); - fprintf(prof_fp, " %s ", op); - } else { - fprintf(prof_fp, " %s ", op); - if (! is_scalar(tree->subnode->type)) { - fprintf(prof_fp, "("); - tree_eval(tree->subnode); - fprintf(prof_fp, ")"); - } else - tree_eval(tree->subnode); - } -} - -/* pp_list --- dump a list of arguments, without parens */ - -static void -pp_list(register NODE *tree) -{ - for (; tree != NULL; tree = tree->rnode) { - if (tree->type != Node_expression_list) { - fprintf(stderr, "pp_list: got %s\n", - nodetype2str(tree->type)); - fflush(stderr); - } - assert(tree->type == Node_expression_list); - tree_eval(tree->lnode); - if (tree->rnode != NULL) - fprintf(prof_fp, ", "); - } -} - -/* pp_print_stmt --- print a "print" or "printf" statement */ - -static void -pp_print_stmt(const char *command, register NODE *tree) -{ - NODE *redir = tree->rnode; - - indent(tree->exec_count); - fprintf(prof_fp, "%s", command); - if (redir != NULL) { /* parenthesize if have a redirection */ - fprintf(prof_fp, "("); - pp_list(tree->lnode); - fprintf(prof_fp, ")"); - pp_redir(redir, AFTER); - } else { - fprintf(prof_fp, " "); - pp_list(tree->lnode); - } - fprintf(prof_fp, "\n"); -} - -/* pp_delete --- print a "delete" statement */ - -static void -pp_delete(register NODE *tree) -{ - NODE *array, *subscript; - - array = tree->lnode; - subscript = tree->rnode; - indent(array->exec_count); - if (array->type == Node_param_list) - fprintf(prof_fp, "delete %s", fparms[array->param_cnt]); - else - fprintf(prof_fp, "delete %s", array->vname); - if (subscript != NULL) { - fprintf(prof_fp, "["); - pp_list(subscript); - fprintf(prof_fp, "]"); - } - fprintf(prof_fp, "\n"); -} - -/* pp_in_array --- pretty print "foo in array" test */ - -static void -pp_in_array(NODE *array, NODE *subscript) -{ - if (subscript->type == Node_expression_list) { - fprintf(prof_fp, "("); - pp_list(subscript); - fprintf(prof_fp, ")"); - } else - pprint(subscript); - - if (array->type == Node_param_list) - fprintf(prof_fp, " in %s", fparms[array->param_cnt]); - else - fprintf(prof_fp, " in %s", array->vname); -} - -/* pp_getline --- print a getline statement */ - -static void -pp_getline(register NODE *tree) -{ - NODE *redir = tree->rnode; - int before, after; - - /* - * command | getline - * or - * command |& getline - * or - * getline < file - */ - if (redir != NULL) { - before = (redir->type == Node_redirect_pipein - || redir->type == Node_redirect_twoway); - after = ! before; - } else - before = after = FALSE; - - if (before) - pp_redir(redir, BEFORE); - - fprintf(prof_fp, "getline"); - if (tree->lnode != NULL) { /* optional var */ - fprintf(prof_fp, " "); - pp_lhs(tree->lnode); - } - - if (after) - pp_redir(redir, AFTER); -} - -/* pp_builtin --- print a builtin function */ - -static void -pp_builtin(register NODE *tree) -{ - fprintf(prof_fp, "%s(", getfname(tree->proc)); - pp_list(tree->subnode); - fprintf(prof_fp, ")"); -} - -/* pp_func_call --- print a function call */ - -static void -pp_func_call(NODE *name, NODE *arglist) -{ - fprintf(prof_fp, "%s(", name->stptr); - pp_list(arglist); - fprintf(prof_fp, ")"); -} - -/* dump_prog --- dump the program */ - -/* - * XXX: I am not sure it is right to have the strings in the dump - * be translated, but I'll leave it alone for now. - */ - -void -dump_prog(NODE *begin, NODE *prog, NODE *end) -{ - time_t now; - - (void) time(& now); - /* \n on purpose, with \n in ctime() output */ - fprintf(prof_fp, _("\t# gawk profile, created %s\n"), ctime(& now)); - - if (begin != NULL) { - fprintf(prof_fp, _("\t# BEGIN block(s)\n\n")); - fprintf(prof_fp, "\tBEGIN {\n"); - in_BEGIN_or_END = TRUE; - pprint(begin); - in_BEGIN_or_END = FALSE; - fprintf(prof_fp, "\t}\n"); - if (prog != NULL || end != NULL) - fprintf(prof_fp, "\n"); - } - if (prog != NULL) { - fprintf(prof_fp, _("\t# Rule(s)\n\n")); - pprint(prog); - if (end != NULL) - fprintf(prof_fp, "\n"); - } - if (end != NULL) { - fprintf(prof_fp, _("\t# END block(s)\n\n")); - fprintf(prof_fp, "\tEND {\n"); - in_BEGIN_or_END = TRUE; - pprint(end); - in_BEGIN_or_END = FALSE; - fprintf(prof_fp, "\t}\n"); - } -} - -/* pp_func --- pretty print a function */ - -void -pp_func(char *name, size_t namelen, NODE *f) -{ - int j; - char **pnames; - static int first = TRUE; - - if (first) { - first = FALSE; - fprintf(prof_fp, _("\n\t# Functions, listed alphabetically\n")); - } - - fprintf(prof_fp, "\n"); - indent(f->exec_count); - fprintf(prof_fp, "function %.*s(", (int) namelen, name); - pnames = f->parmlist; - fparms = pnames; - for (j = 0; j < f->lnode->param_cnt; j++) { - fprintf(prof_fp, "%s", pnames[j]); - if (j < f->lnode->param_cnt - 1) - fprintf(prof_fp, ", "); - } - fprintf(prof_fp, ")\n\t{\n"); - indent_in(); - pprint(f->rnode); /* body */ - indent_out(); - fprintf(prof_fp, "\t}\n"); -} - -/* pp_string --- pretty print a string or regex constant */ - -static void -pp_string(char *str, size_t len, int delim) -{ - pp_string_fp(prof_fp, str, len, delim, FALSE); -} - -/* pp_string_fp --- printy print a string to the fp */ - -/* - * This routine concentrates string pretty printing in one place, - * so that it can be called from multiple places within gawk. - */ - -void -pp_string_fp(FILE *fp, char *in_str, size_t len, int delim, int breaklines) -{ - static char escapes[] = "\b\f\n\r\t\v\\"; - static char printables[] = "bfnrtv\\"; - char *cp; - int i; - int count; -#define BREAKPOINT 70 /* arbitrary */ - unsigned char *str = (unsigned char *) in_str; - - fprintf(fp, "%c", delim); - for (count = 0; len > 0; len--, str++) { - if (++count >= BREAKPOINT && breaklines) { - fprintf(fp, "%c\n%c", delim, delim); - count = 0; - } - if (*str == delim) { - fprintf(fp, "\\%c", delim); - count++; - } else if (*str == BELL) { - fprintf(fp, "\\a"); - count++; - } else if ((cp = strchr(escapes, *str)) != NULL) { - i = cp - escapes; - putc('\\', fp); - count++; - putc(printables[i], fp); - if (breaklines && *str == '\n' && delim == '"') { - fprintf(fp, "\"\n\""); - count = 0; - } - /* NB: Deliberate use of lower-case versions. */ - } else if (isascii(*str) && isprint(*str)) { - putc(*str, fp); - } else { - char buf[10]; - - sprintf(buf, "\\%03o", *str & 0xff); - count += strlen(buf) - 1; - fprintf(fp, "%s", buf); - } - } - fprintf(fp, "%c", delim); -} - -/* is_scalar --- true or false if we'll get a scalar value */ - -static int -is_scalar(NODETYPE type) -{ - switch (type) { - case Node_var: - case Node_var_array: - case Node_val: - case Node_BINMODE: - case Node_CONVFMT: - case Node_FIELDWIDTHS: - case Node_FNR: - case Node_FS: - case Node_IGNORECASE: - case Node_LINT: - case Node_NF: - case Node_NR: - case Node_OFMT: - case Node_OFS: - case Node_ORS: - case Node_RS: - case Node_subscript: - return TRUE; - default: - return FALSE; - } -} - -/* prec_level --- return the precedence of an operator, for paren tests */ - -static int -prec_level(NODETYPE type) -{ - switch (type) { - case Node_var: - case Node_var_array: - case Node_param_list: - case Node_subscript: - case Node_func_call: - case Node_val: - case Node_builtin: - case Node_BINMODE: - case Node_CONVFMT: - case Node_FIELDWIDTHS: - case Node_FNR: - case Node_FS: - case Node_IGNORECASE: - case Node_LINT: - case Node_NF: - case Node_NR: - case Node_OFMT: - case Node_OFS: - case Node_ORS: - case Node_RS: - return 15; - - case Node_field_spec: - return 14; - - case Node_exp: - return 13; - - case Node_preincrement: - case Node_predecrement: - case Node_postincrement: - case Node_postdecrement: - return 12; - - case Node_unary_minus: - case Node_not: - return 11; - - case Node_times: - case Node_quotient: - case Node_mod: - return 10; - - case Node_plus: - case Node_minus: - return 9; - - case Node_concat: - return 8; - - case Node_equal: - case Node_notequal: - case Node_greater: - case Node_leq: - case Node_geq: - case Node_match: - case Node_nomatch: - return 7; - - case Node_K_getline: - return 6; - - case Node_less: - return 5; - - case Node_in_array: - return 5; - - case Node_and: - return 4; - - case Node_or: - return 3; - - case Node_cond_exp: - return 2; - - case Node_assign: - case Node_assign_times: - case Node_assign_quotient: - case Node_assign_mod: - case Node_assign_plus: - case Node_assign_minus: - case Node_assign_exp: - return 1; - - default: - fatal(_("unexpected type %s in prec_level"), nodetype2str(type)); - return 0; /* keep the compiler happy */ - } -} - -/* parenthesize --- print a subtree in parentheses if need be */ - -static void -parenthesize(NODETYPE parent_type, NODE *tree) -{ - NODETYPE child_type; - - if (tree == NULL) - return; - - child_type = tree->type; - - in_expr++; - /* first the special cases, then the general ones */ - if (parent_type == Node_not && child_type == Node_in_array) { - fprintf(prof_fp, "! ("); - pp_in_array(tree->lnode, tree->rnode); - fprintf(prof_fp, ")"); - /* other special cases here, as needed */ - } else if (prec_level(child_type) < prec_level(parent_type)) { - fprintf(prof_fp, "("); - tree_eval(tree); - fprintf(prof_fp, ")"); - } else - tree_eval(tree); - in_expr--; -} - -#ifdef PROFILING -/* just_dump --- dump the profile and function stack and keep going */ - -static RETSIGTYPE -just_dump(int signum) -{ - extern NODE *begin_block, *expression_value, *end_block; - - dump_prog(begin_block, expression_value, end_block); - dump_funcs(); - dump_fcall_stack(prof_fp); - fflush(prof_fp); - signal(signum, just_dump); /* for OLD Unix systems ... */ -} - -/* dump_and_exit --- dump the profile, the function stack, and exit */ - -static RETSIGTYPE -dump_and_exit(int signum) -{ - just_dump(signum); - exit(1); -} -#endif diff --git a/contrib/awk/profile_p.c b/contrib/awk/profile_p.c deleted file mode 100644 index bfd58d3..0000000 --- a/contrib/awk/profile_p.c +++ /dev/null @@ -1,27 +0,0 @@ -/* - * profile_p.c - compile profile.c with profiling turned on. - */ - -/* - * Copyright (C) 2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#define PROFILING 1 -#include "profile.c" diff --git a/contrib/awk/protos.h b/contrib/awk/protos.h deleted file mode 100644 index 02f1f3f..0000000 --- a/contrib/awk/protos.h +++ /dev/null @@ -1,128 +0,0 @@ -/* - * protos.h -- function prototypes for when the headers don't have them. - */ - -/* - * Copyright (C) 1991 - 2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#ifdef __STDC__ -#define aptr_t void * /* arbitrary pointer type */ -#else -#define aptr_t char * -#endif -extern aptr_t malloc P((MALLOC_ARG_T)); -extern aptr_t realloc P((aptr_t, MALLOC_ARG_T)); -extern aptr_t calloc P((MALLOC_ARG_T, MALLOC_ARG_T)); - -extern void free P((aptr_t)); -extern char *getenv P((const char *)); - -#if ! defined(HAVE_STRING_H) && ! defined(HAVE_STRINGS_H) -extern char *strcpy P((char *, const char *)); -extern char *strcat P((char *, const char *)); -extern char *strncpy P((char *, const char *, size_t)); -extern int strcmp P((const char *, const char *)); -extern int strncmp P((const char *, const char *, size_t)); -extern char *strchr P((const char *, int)); -extern char *strrchr P((const char *, int)); -extern char *strstr P((const char *s1, const char *s2)); -extern size_t strlen P((const char *)); -extern long strtol P((const char *, char **, int)); - -extern aptr_t memset P((aptr_t, int, size_t)); -extern aptr_t memcpy P((aptr_t, const aptr_t, size_t)); -extern aptr_t memmove P((aptr_t, const aptr_t, size_t)); -extern aptr_t memchr P((const aptr_t, int, size_t)); -extern int memcmp P((const aptr_t, const aptr_t, size_t)); -#endif /* ! defined(HAVE_STRING_H) && ! defined(HAVE_STRINGS_H) */ - -#ifndef VMS -extern char *strerror P((int)); -#else -extern char *strerror P((int,...)); -#endif - -#if ! defined(_MSC_VER) && ! defined(__GNU_LIBRARY__) -extern size_t strftime P((char *, size_t, const char *, const struct tm *)); -#endif -#ifdef __STDC__ -extern time_t time P((time_t *)); -#else -extern long time(); -#endif - -extern FILE *fdopen P((int, const char *)); -extern int fprintf P((FILE *, const char *, ...)); -#if ! defined(MSDOS) && ! defined(__GNU_LIBRARY__) -#ifdef __STDC__ -extern size_t fwrite P((const aptr_t, size_t, size_t, FILE *)); -#else -extern int fwrite(); -#endif -extern int fputs P((const char *, FILE *)); -extern int unlink P((const char *)); -#endif -extern int fflush P((FILE *)); -extern int fclose P((FILE *)); -extern FILE *popen P((const char *, const char *)); -extern int pclose P((FILE *)); -extern void abort P(()); -extern int isatty P((int)); -extern void exit P((int)); -extern int system P((const char *)); -extern int sscanf P((const char *, const char *, ...)); -#ifndef toupper -extern int toupper P((int)); -#endif -#ifndef tolower -extern int tolower P((int)); -#endif - -extern double pow P((double x, double y)); -extern double atof P((const char *)); -extern double strtod P((const char *, char **)); -extern int fstat P((int, struct stat *)); -extern int stat P((const char *, struct stat *)); -extern off_t lseek P((int, off_t, int)); -extern int fseek P((FILE *, long, int)); -extern int close P((int)); -extern int creat P((const char *, mode_t)); -extern int open P((const char *, int, ...)); -extern int pipe P((int *)); -extern int dup P((int)); -extern int dup2 P((int,int)); -extern int fork P(()); -extern int execl P((const char *, const char *, ...)); -#ifndef __STDC__ -extern int read P((int, void *, unsigned int)); -#endif -#ifndef HAVE_SYS_WAIT_H -extern int wait P((int *)); -#endif -extern void _exit P((int)); - -#ifndef __STDC__ -extern long time P((long *)); -#endif - -extern SPRINTF_RET sprintf P((char *, const char *, ...)); - -#undef aptr_t diff --git a/contrib/awk/re.c b/contrib/awk/re.c deleted file mode 100644 index 2ee9e6d..0000000 --- a/contrib/awk/re.c +++ /dev/null @@ -1,322 +0,0 @@ -/* - * re.c - compile regular expressions. - */ - -/* - * Copyright (C) 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" - -static reg_syntax_t syn; - -/* make_regexp --- generate compiled regular expressions */ - -Regexp * -make_regexp(char *s, size_t len, int ignorecase, int dfa) -{ - Regexp *rp; - const char *rerr; - char *src = s; - char *temp; - char *end = s + len; - register char *dest; - register int c, c2; - - /* Handle escaped characters first. */ - - /* - * Build a copy of the string (in dest) with the - * escaped characters translated, and generate the regex - * from that. - */ - emalloc(dest, char *, len + 2, "make_regexp"); - temp = dest; - - while (src < end) { - if (*src == '\\') { - c = *++src; - switch (c) { - case 'a': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - case 'x': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - c2 = parse_escape(&src); - if (c2 < 0) - cant_happen(); - /* - * Unix awk treats octal (and hex?) chars - * literally in re's, so escape regexp - * metacharacters. - */ - if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x') - && strchr("()|*+?.^$\\[]", c2) != NULL) - *dest++ = '\\'; - *dest++ = (char) c2; - break; - case '8': - case '9': /* a\9b not valid */ - *dest++ = c; - src++; - break; - case 'y': /* normally \b */ - /* gnu regex op */ - if (! do_traditional) { - *dest++ = '\\'; - *dest++ = 'b'; - src++; - break; - } - /* else, fall through */ - default: - *dest++ = '\\'; - *dest++ = (char) c; - src++; - break; - } /* switch */ - } else - *dest++ = *src++; /* not '\\' */ - } /* for */ - - *dest = '\0' ; /* Only necessary if we print dest ? */ - emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); - memset((char *) rp, 0, sizeof(*rp)); - rp->pat.allocated = 0; /* regex will allocate the buffer */ - emalloc(rp->pat.fastmap, char *, 256, "make_regexp"); - - if (ignorecase) - rp->pat.translate = casetable; - else - rp->pat.translate = NULL; - len = dest - temp; - if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL) - fatal("%s: /%s/", gettext(rerr), temp); - - /* gack. this must be done *after* re_compile_pattern */ - rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */ - if (dfa && ! ignorecase) { - dfacomp(temp, len, &(rp->dfareg), TRUE); - rp->dfa = TRUE; - } else - rp->dfa = FALSE; - - free(temp); - return rp; -} - -/* research --- do a regexp search. use dfa if possible */ - -int -research(Regexp *rp, register char *str, int start, - register size_t len, int need_start) -{ - char *ret = str; - int try_backref; - - /* - * Always do dfa search if can; if it fails, then even if - * need_start is true, we won't bother with the regex search. - */ - if (rp->dfa) { - char save; - int count = 0; - - /* - * dfa likes to stick a '\n' right after the matched - * text. So we just save and restore the character. - */ - save = str[start+len]; - ret = dfaexec(&(rp->dfareg), str+start, str+start+len, TRUE, - &count, &try_backref); - str[start+len] = save; - } - if (ret) { - if (need_start || rp->dfa == FALSE || try_backref) { - int res = re_search(&(rp->pat), str, start+len, - start, len, &(rp->regs)); - return res; - } else - return 1; - } else - return -1; -} - -/* refree --- free up the dynamic memory used by a compiled regexp */ - -void -refree(Regexp *rp) -{ - free(rp->pat.buffer); - free(rp->pat.fastmap); - if (rp->regs.start) - free(rp->regs.start); - if (rp->regs.end) - free(rp->regs.end); - if (rp->dfa) - dfafree(&(rp->dfareg)); - free(rp); -} - -/* dfaerror --- print an error message for the dfa routines */ - -void -dfaerror(const char *s) -{ - fatal("%s", s); -} - -/* re_update --- recompile a dynamic regexp */ - -Regexp * -re_update(NODE *t) -{ - NODE *t1; - - if ((t->re_flags & CASE) == IGNORECASE) { - if ((t->re_flags & CONST) != 0) - return t->re_reg; - t1 = force_string(tree_eval(t->re_exp)); - if (t->re_text != NULL) { - if (cmp_nodes(t->re_text, t1) == 0) { - free_temp(t1); - return t->re_reg; - } - unref(t->re_text); - } - t->re_text = dupnode(t1); - free_temp(t1); - } - if (t->re_reg != NULL) - refree(t->re_reg); - if (t->re_cnt > 0) - t->re_cnt++; - if (t->re_cnt > 10) - t->re_cnt = 0; - if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) { - t1 = force_string(tree_eval(t->re_exp)); - unref(t->re_text); - t->re_text = dupnode(t1); - free_temp(t1); - } - t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, - IGNORECASE, t->re_cnt); - t->re_flags &= ~CASE; - t->re_flags |= IGNORECASE; - return t->re_reg; -} - -/* resetup --- choose what kind of regexps we match */ - -void -resetup() -{ - if (do_posix) - syn = RE_SYNTAX_POSIX_AWK; /* strict POSIX re's */ - else if (do_traditional) - syn = RE_SYNTAX_AWK; /* traditional Unix awk re's */ - else - syn = RE_SYNTAX_GNU_AWK; /* POSIX re's + GNU ops */ - - /* - * Interval expressions are off by default, since it's likely to - * break too many old programs to have them on. - */ - if (do_intervals) - syn |= RE_INTERVALS; - - (void) re_set_syntax(syn); - dfasyntax(syn, FALSE, '\n'); -} - -/* avoid_dfa --- FIXME: temporary kludge function until we have a new dfa.c */ - -int -avoid_dfa(NODE *re, char *str, size_t len) -{ - char *restr; - int relen; - int anchor, i; - char *end; - - if ((re->re_flags & CONST) != 0) { - restr = re->re_exp->stptr; - relen = re->re_exp->stlen; - } else { - restr = re->re_text->stptr; - relen = re->re_text->stlen; - } - - for (anchor = FALSE, i = 0; i < relen; i++) { - if (restr[i] == '^' || restr[i] == '$') { - anchor = TRUE; - break; - } - } - if (! anchor) - return FALSE; - - for (end = str + len; str < end; str++) - if (*str == '\n') - return TRUE; - - return FALSE; -} - -/* reisstring --- return TRUE if the RE match is a simple string match */ - -int -reisstring(char *text, size_t len, Regexp *re, char *buf) -{ - static char metas[] = ".*+(){}[]|?^$\\"; - int i; - int has_meta = FALSE; - int res; - char *matched; - - /* simple checking for has meta characters in re */ - for (i = 0; i < len; i++) { - if (strchr(metas, text[i]) != NULL) { - has_meta = TRUE; - break; - } - } - - /* make accessable to gdb */ - matched = &buf[RESTART(re, buf)]; - - if (has_meta) - return FALSE; /* give up early, can't be string match */ - - res = STREQN(text, matched, len); - - return res; -} diff --git a/contrib/awk/regex.c b/contrib/awk/regex.c deleted file mode 100644 index 1da69e2..0000000 --- a/contrib/awk/regex.c +++ /dev/null @@ -1,5854 +0,0 @@ -/* Extended regular expression matching and search library, - version 0.12. - (Implements POSIX draft P1003.2/D11.2, except for some of the - internationalization features.) - Copyright (C) 1993, 94, 95, 96, 97, 98, 99, 2000 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software Foundation, - Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -/* AIX requires this to be the first thing in the file. */ -#if defined _AIX && !defined REGEX_MALLOC - #pragma alloca -#endif - -#undef _GNU_SOURCE -#define _GNU_SOURCE - -#ifdef HAVE_CONFIG_H -# include -#endif - -#ifndef PARAMS -# if defined __GNUC__ || (defined __STDC__ && __STDC__) -# define PARAMS(args) args -# else -# define PARAMS(args) () -# endif /* GCC. */ -#endif /* Not PARAMS. */ - -#if defined STDC_HEADERS && !defined emacs -# include -#else -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -# include -#endif - -#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) - -/* For platform which support the ISO C amendement 1 functionality we - support user defined character classes. */ -#if defined _LIBC || WIDE_CHAR_SUPPORT -/* Solaris 2.5 has a bug: must be included before . */ -# include -# include -#endif - -#ifdef _LIBC -/* We have to keep the namespace clean. */ -# define regfree(preg) __regfree (preg) -# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) -# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) -# define regerror(errcode, preg, errbuf, errbuf_size) \ - __regerror(errcode, preg, errbuf, errbuf_size) -# define re_set_registers(bu, re, nu, st, en) \ - __re_set_registers (bu, re, nu, st, en) -# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ - __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) -# define re_match(bufp, string, size, pos, regs) \ - __re_match (bufp, string, size, pos, regs) -# define re_search(bufp, string, size, startpos, range, regs) \ - __re_search (bufp, string, size, startpos, range, regs) -# define re_compile_pattern(pattern, length, bufp) \ - __re_compile_pattern (pattern, length, bufp) -# define re_set_syntax(syntax) __re_set_syntax (syntax) -# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ - __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) -# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) - -#define btowc __btowc -#endif - -/* This is for other GNU distributions with internationalized messages. */ -#if HAVE_LIBINTL_H || defined _LIBC -# include -#else -# define gettext(msgid) (msgid) -#endif - -#ifndef gettext_noop -/* This define is so xgettext can find the internationalizable - strings. */ -# define gettext_noop(String) String -#endif - -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -#ifdef emacs - -# include "lisp.h" -# include "buffer.h" -# include "syntax.h" - -#else /* not emacs */ - -/* If we are not linking with Emacs proper, - we can't use the relocating allocator - even if config.h says that we can. */ -# undef REL_ALLOC - -# if defined STDC_HEADERS || defined _LIBC -# include -# else -char *malloc (); -char *realloc (); -# endif - -/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. - If nothing else has been done, use the method below. */ -# ifdef INHIBIT_STRING_HEADER -# if !(defined HAVE_BZERO && defined HAVE_BCOPY) -# if !defined bzero && !defined bcopy -# undef INHIBIT_STRING_HEADER -# endif -# endif -# endif - -/* This is the normal way of making sure we have a bcopy and a bzero. - This is used in most programs--a few other programs avoid this - by defining INHIBIT_STRING_HEADER. */ -# ifndef INHIBIT_STRING_HEADER -# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC -# include -# ifndef bzero -# ifndef _LIBC -# define bzero(s, n) (memset (s, '\0', n), (s)) -# else -# define bzero(s, n) __bzero (s, n) -# endif -# endif -# else -# include -# ifndef memcmp -# define memcmp(s1, s2, n) bcmp (s1, s2, n) -# endif -# ifndef memcpy -# define memcpy(d, s, n) (bcopy (s, d, n), (d)) -# endif -# endif -# endif - -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -# ifndef Sword -# define Sword 1 -# endif - -# ifdef SWITCH_ENUM_BUG -# define SWITCH_ENUM_CAST(x) ((int)(x)) -# else -# define SWITCH_ENUM_CAST(x) (x) -# endif - -#endif /* not emacs */ - -/* Get the interface, including the syntax bits. */ -#include - -/* isalpha etc. are used for the character classes. */ -#include - -/* Jim Meyering writes: - - "... Some ctype macros are valid only for character codes that - isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when - using /bin/cc or gcc but without giving an ansi option). So, all - ctype uses should be through macros like ISPRINT... If - STDC_HEADERS is defined, then autoconf has verified that the ctype - macros don't need to be guarded with references to isascii. ... - Defining isascii to 1 should let any compiler worth its salt - eliminate the && through constant folding." - Solaris defines some of these symbols so we must undefine them first. */ - -#undef ISASCII -#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) -# define ISASCII(c) 1 -#else -# define ISASCII(c) isascii(c) -#endif - -#ifdef isblank -# define ISBLANK(c) (ISASCII (c) && isblank (c)) -#else -# define ISBLANK(c) ((c) == ' ' || (c) == '\t') -#endif -#ifdef isgraph -# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) -#else -# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) -#endif - -#undef ISPRINT -#define ISPRINT(c) (ISASCII (c) && isprint (c)) -#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) -#define ISALNUM(c) (ISASCII (c) && isalnum (c)) -#define ISALPHA(c) (ISASCII (c) && isalpha (c)) -#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) -#define ISLOWER(c) (ISASCII (c) && islower (c)) -#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) -#define ISSPACE(c) (ISASCII (c) && isspace (c)) -#define ISUPPER(c) (ISASCII (c) && isupper (c)) -#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) - -#ifdef _tolower -# define TOLOWER(c) _tolower(c) -#else -# define TOLOWER(c) tolower(c) -#endif - -#ifndef NULL -# define NULL (void *)0 -#endif - -/* We remove any previous definition of `SIGN_EXTEND_CHAR', - since ours (we hope) works properly with all combinations of - machines, compilers, `char' and `unsigned char' argument types. - (Per Bothner suggested the basic approach.) */ -#undef SIGN_EXTEND_CHAR -#if __STDC__ -# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) -#else /* not __STDC__ */ -/* As in Harbison and Steele. */ -# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) -#endif - -#ifndef emacs -/* How many characters in the character set. */ -#define CHAR_SET_SIZE 256 - -#ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -static char re_syntax_table[CHAR_SET_SIZE]; - -static void -init_syntax_once () -{ - register int c; - static int done = 0; - - if (done) - return; - bzero (re_syntax_table, sizeof re_syntax_table); - - for (c = 0; c < CHAR_SET_SIZE; c++) - if (ISALNUM (c)) - re_syntax_table[c] = Sword; - - re_syntax_table['_'] = Sword; - - done = 1; -} - -#endif /* not SYNTAX_TABLE */ - -#define SYNTAX(c) re_syntax_table[(unsigned char) (c)] - -#endif /* emacs */ - -/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we - use `alloca' instead of `malloc'. This is because using malloc in - re_search* or re_match* could cause memory leaks when C-g is used in - Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - - Because we sometimes use alloca, some routines have to be macros, - not functions -- `alloca'-allocated space disappears at the end of the - function it is called in. */ - -#ifdef REGEX_MALLOC - -# define REGEX_ALLOCATE malloc -# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE free - -#else /* not REGEX_MALLOC */ - -/* Emacs already defines alloca, sometimes. */ -# ifndef alloca - -/* Make alloca work the best possible way. */ -# ifdef __GNUC__ -# define alloca __builtin_alloca -# else /* not __GNUC__ */ -# if HAVE_ALLOCA_H -# include -# endif /* HAVE_ALLOCA_H */ -# endif /* not __GNUC__ */ - -# endif /* not alloca */ - -# define REGEX_ALLOCATE alloca - -/* Assumes a `char *destination' variable. */ -# define REGEX_REALLOCATE(source, osize, nsize) \ - (destination = (char *) alloca (nsize), \ - memcpy (destination, source, osize)) - -/* No need to do anything to free, after alloca. */ -# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ - -#endif /* not REGEX_MALLOC */ - -/* Define how to allocate the failure stack. */ - -#if defined REL_ALLOC && defined REGEX_MALLOC - -# define REGEX_ALLOCATE_STACK(size) \ - r_alloc (&failure_stack_ptr, (size)) -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ - r_re_alloc (&failure_stack_ptr, (nsize)) -# define REGEX_FREE_STACK(ptr) \ - r_alloc_free (&failure_stack_ptr) - -#else /* not using relocating allocator */ - -# ifdef REGEX_MALLOC - -# define REGEX_ALLOCATE_STACK malloc -# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) -# define REGEX_FREE_STACK free - -# else /* not REGEX_MALLOC */ - -# define REGEX_ALLOCATE_STACK alloca - -# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ - REGEX_REALLOCATE (source, osize, nsize) -/* No need to explicitly free anything. */ -# define REGEX_FREE_STACK(arg) - -# endif /* not REGEX_MALLOC */ -#endif /* not using relocating allocator */ - - -/* True if `size1' is non-NULL and PTR is pointing anywhere inside - `string1' or just past its end. This works if PTR is NULL, which is - a good thing. */ -#define FIRST_STRING_P(ptr) \ - (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) - -/* (Re)Allocate N items of type T using malloc, or fail. */ -#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) -#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) -#define RETALLOC_IF(addr, n, t) \ - if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) -#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) - -#define BYTEWIDTH 8 /* In bits. */ - -#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) - -#undef MAX -#undef MIN -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - -typedef char boolean; -#define false 0 -#define true 1 - -static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, - const char *string1, int size1, - const char *string2, int size2, - int pos, - struct re_registers *regs, - int stop)); - -/* These are the command codes that appear in compiled regular - expressions. Some opcodes are followed by argument bytes. A - command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. */ - -typedef enum -{ - no_op = 0, - - /* Succeed right away--no more backtracking. */ - succeed, - - /* Followed by one byte giving n, then by n literal bytes. */ - exactn, - - /* Matches any (more or less) character. */ - anychar, - - /* Matches any one char belonging to specified set. First - following byte is number of bitmap bytes. Then come bytes - for a bitmap saying which chars are in. Bits in each byte - are ordered low-bit-first. A character is in the set if its - bit is 1. A character too large to have a bit in the map is - automatically not in the set. */ - charset, - - /* Same parameters as charset, but match any character that is - not one of those specified. */ - charset_not, - - /* Start remembering the text that is matched, for storing in a - register. Followed by one byte with the register number, in - the range 0 to one less than the pattern buffer's re_nsub - field. Then followed by one byte with the number of groups - inner to this one. (This last has to be part of the - start_memory only because we need it in the on_failure_jump - of re_match_2.) */ - start_memory, - - /* Stop remembering the text that is matched and store it in a - memory register. Followed by one byte with the register - number, in the range 0 to one less than `re_nsub' in the - pattern buffer, and one byte with the number of inner groups, - just like `start_memory'. (We need the number of inner - groups here because we don't have any easy way of finding the - corresponding start_memory when we're at a stop_memory.) */ - stop_memory, - - /* Match a duplicate of something remembered. Followed by one - byte containing the register number. */ - duplicate, - - /* Fail unless at beginning of line. */ - begline, - - /* Fail unless at end of line. */ - endline, - - /* Succeeds if at beginning of buffer (if emacs) or at beginning - of string to be matched (if not). */ - begbuf, - - /* Analogously, for end of buffer/string. */ - endbuf, - - /* Followed by two byte relative address to which to jump. */ - jump, - - /* Same as jump, but marks the end of an alternative. */ - jump_past_alt, - - /* Followed by two-byte relative address of place to resume at - in case of failure. */ - on_failure_jump, - - /* Like on_failure_jump, but pushes a placeholder instead of the - current string position when executed. */ - on_failure_keep_string_jump, - - /* Throw away latest failure point and then jump to following - two-byte relative address. */ - pop_failure_jump, - - /* Change to pop_failure_jump if know won't have to backtrack to - match; otherwise change to jump. This is used to jump - back to the beginning of a repeat. If what follows this jump - clearly won't match what the repeat does, such that we can be - sure that there is no use backtracking out of repetitions - already matched, then we change it to a pop_failure_jump. - Followed by two-byte address. */ - maybe_pop_jump, - - /* Jump to following two-byte address, and push a dummy failure - point. This failure point will be thrown away if an attempt - is made to use it for a failure. A `+' construct makes this - before the first repeat. Also used as an intermediary kind - of jump when compiling an alternative. */ - dummy_failure_jump, - - /* Push a dummy failure point and continue. Used at the end of - alternatives. */ - push_dummy_failure, - - /* Followed by two-byte relative address and two-byte number n. - After matching N times, jump to the address upon failure. */ - succeed_n, - - /* Followed by two-byte relative address, and two-byte number n. - Jump to the address N times, then fail. */ - jump_n, - - /* Set the following two-byte relative address to the - subsequent two-byte number. The address *includes* the two - bytes of number. */ - set_number_at, - - wordchar, /* Matches any word-constituent character. */ - notwordchar, /* Matches any char that is not a word-constituent. */ - - wordbeg, /* Succeeds if at word beginning. */ - wordend, /* Succeeds if at word end. */ - - wordbound, /* Succeeds if at a word boundary. */ - notwordbound /* Succeeds if not at a word boundary. */ - -#ifdef emacs - ,before_dot, /* Succeeds if before point. */ - at_dot, /* Succeeds if at point. */ - after_dot, /* Succeeds if after point. */ - - /* Matches any character whose syntax is specified. Followed by - a byte which contains a syntax code, e.g., Sword. */ - syntaxspec, - - /* Matches any character whose syntax is not that specified. */ - notsyntaxspec -#endif /* emacs */ -} re_opcode_t; - -/* Common operations on the compiled pattern. */ - -/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ - -#define STORE_NUMBER(destination, number) \ - do { \ - (destination)[0] = (number) & 0377; \ - (destination)[1] = (number) >> 8; \ - } while (0) - -/* Same as STORE_NUMBER, except increment DESTINATION to - the byte after where the number is stored. Therefore, DESTINATION - must be an lvalue. */ - -#define STORE_NUMBER_AND_INCR(destination, number) \ - do { \ - STORE_NUMBER (destination, number); \ - (destination) += 2; \ - } while (0) - -/* Put into DESTINATION a number stored in two contiguous bytes starting - at SOURCE. */ - -#define EXTRACT_NUMBER(destination, source) \ - do { \ - (destination) = *(source) & 0377; \ - (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ - } while (0) - -#ifdef DEBUG -static void extract_number _RE_ARGS ((int *dest, unsigned char *source)); -static void -extract_number (dest, source) - int *dest; - unsigned char *source; -{ - int temp = SIGN_EXTEND_CHAR (*(source + 1)); - *dest = *source & 0377; - *dest += temp << 8; -} - -# ifndef EXTRACT_MACROS /* To debug the macros. */ -# undef EXTRACT_NUMBER -# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -# endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. - SOURCE must be an lvalue. */ - -#define EXTRACT_NUMBER_AND_INCR(destination, source) \ - do { \ - EXTRACT_NUMBER (destination, source); \ - (source) += 2; \ - } while (0) - -#ifdef DEBUG -static void extract_number_and_incr _RE_ARGS ((int *destination, - unsigned char **source)); -static void -extract_number_and_incr (destination, source) - int *destination; - unsigned char **source; -{ - extract_number (destination, *source); - *source += 2; -} - -# ifndef EXTRACT_MACROS -# undef EXTRACT_NUMBER_AND_INCR -# define EXTRACT_NUMBER_AND_INCR(dest, src) \ - extract_number_and_incr (&dest, &src) -# endif /* not EXTRACT_MACROS */ - -#endif /* DEBUG */ - -/* If DEBUG is defined, Regex prints many voluminous messages about what - it is doing (if the variable `debug' is nonzero). If linked with the - main program in `iregex.c', you can enter patterns and strings - interactively. And if linked with the main program in `main.c' and - the other test files, you can run the already-written tests. */ - -#ifdef DEBUG - -/* We use standard I/O for debugging. */ -# include - -/* It is useful to test things that ``must'' be true when debugging. */ -# include - -static int debug; - -# define DEBUG_STATEMENT(e) e -# define DEBUG_PRINT1(x) if (debug) printf (x) -# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ - if (debug) print_partial_compiled_pattern (s, e) -# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ - if (debug) print_double_string (w, s1, sz1, s2, sz2) - - -/* Print the fastmap in human-readable form. */ - -void -print_fastmap (fastmap) - char *fastmap; -{ - unsigned was_a_range = 0; - unsigned i = 0; - - while (i < (1 << BYTEWIDTH)) - { - if (fastmap[i++]) - { - was_a_range = 0; - putchar (i - 1); - while (i < (1 << BYTEWIDTH) && fastmap[i]) - { - was_a_range = 1; - i++; - } - if (was_a_range) - { - printf ("-"); - putchar (i - 1); - } - } - } - putchar ('\n'); -} - - -/* Print a compiled pattern string in human-readable form, starting at - the START pointer into it and ending just before the pointer END. */ - -void -print_partial_compiled_pattern (start, end) - unsigned char *start; - unsigned char *end; -{ - int mcnt, mcnt2; - unsigned char *p1; - unsigned char *p = start; - unsigned char *pend = end; - - if (start == NULL) - { - printf ("(null)\n"); - return; - } - - /* Loop over pattern commands. */ - while (p < pend) - { - printf ("%d:\t", p - start); - - switch ((re_opcode_t) *p++) - { - case no_op: - printf ("/no_op"); - break; - - case exactn: - mcnt = *p++; - printf ("/exactn/%d", mcnt); - do - { - putchar ('/'); - putchar (*p++); - } - while (--mcnt); - break; - - case start_memory: - mcnt = *p++; - printf ("/start_memory/%d/%d", mcnt, *p++); - break; - - case stop_memory: - mcnt = *p++; - printf ("/stop_memory/%d/%d", mcnt, *p++); - break; - - case duplicate: - printf ("/duplicate/%d", *p++); - break; - - case anychar: - printf ("/anychar"); - break; - - case charset: - case charset_not: - { - register int c, last = -100; - register int in_range = 0; - - printf ("/charset [%s", - (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); - - assert (p + *p < pend); - - for (c = 0; c < 256; c++) - if (c / 8 < *p - && (p[1 + (c/8)] & (1 << (c % 8)))) - { - /* Are we starting a range? */ - if (last + 1 == c && ! in_range) - { - putchar ('-'); - in_range = 1; - } - /* Have we broken a range? */ - else if (last + 1 != c && in_range) - { - putchar (last); - in_range = 0; - } - - if (! in_range) - putchar (c); - - last = c; - } - - if (in_range) - putchar (last); - - putchar (']'); - - p += 1 + *p; - } - break; - - case begline: - printf ("/begline"); - break; - - case endline: - printf ("/endline"); - break; - - case on_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_jump to %d", p + mcnt - start); - break; - - case on_failure_keep_string_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); - break; - - case dummy_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/dummy_failure_jump to %d", p + mcnt - start); - break; - - case push_dummy_failure: - printf ("/push_dummy_failure"); - break; - - case maybe_pop_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/maybe_pop_jump to %d", p + mcnt - start); - break; - - case pop_failure_jump: - extract_number_and_incr (&mcnt, &p); - printf ("/pop_failure_jump to %d", p + mcnt - start); - break; - - case jump_past_alt: - extract_number_and_incr (&mcnt, &p); - printf ("/jump_past_alt to %d", p + mcnt - start); - break; - - case jump: - extract_number_and_incr (&mcnt, &p); - printf ("/jump to %d", p + mcnt - start); - break; - - case succeed_n: - extract_number_and_incr (&mcnt, &p); - p1 = p + mcnt; - extract_number_and_incr (&mcnt2, &p); - printf ("/succeed_n to %d, %d times", p1 - start, mcnt2); - break; - - case jump_n: - extract_number_and_incr (&mcnt, &p); - p1 = p + mcnt; - extract_number_and_incr (&mcnt2, &p); - printf ("/jump_n to %d, %d times", p1 - start, mcnt2); - break; - - case set_number_at: - extract_number_and_incr (&mcnt, &p); - p1 = p + mcnt; - extract_number_and_incr (&mcnt2, &p); - printf ("/set_number_at location %d to %d", p1 - start, mcnt2); - break; - - case wordbound: - printf ("/wordbound"); - break; - - case notwordbound: - printf ("/notwordbound"); - break; - - case wordbeg: - printf ("/wordbeg"); - break; - - case wordend: - printf ("/wordend"); - -# ifdef emacs - case before_dot: - printf ("/before_dot"); - break; - - case at_dot: - printf ("/at_dot"); - break; - - case after_dot: - printf ("/after_dot"); - break; - - case syntaxspec: - printf ("/syntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; - - case notsyntaxspec: - printf ("/notsyntaxspec"); - mcnt = *p++; - printf ("/%d", mcnt); - break; -# endif /* emacs */ - - case wordchar: - printf ("/wordchar"); - break; - - case notwordchar: - printf ("/notwordchar"); - break; - - case begbuf: - printf ("/begbuf"); - break; - - case endbuf: - printf ("/endbuf"); - break; - - default: - printf ("?%d", *(p-1)); - } - - putchar ('\n'); - } - - printf ("%d:\tend of pattern.\n", p - start); -} - - -void -print_compiled_pattern (bufp) - struct re_pattern_buffer *bufp; -{ - unsigned char *buffer = bufp->buffer; - - print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%ld bytes used/%ld bytes allocated.\n", - bufp->used, bufp->allocated); - - if (bufp->fastmap_accurate && bufp->fastmap) - { - printf ("fastmap: "); - print_fastmap (bufp->fastmap); - } - - printf ("re_nsub: %d\t", bufp->re_nsub); - printf ("regs_alloc: %d\t", bufp->regs_allocated); - printf ("can_be_null: %d\t", bufp->can_be_null); - printf ("newline_anchor: %d\n", bufp->newline_anchor); - printf ("no_sub: %d\t", bufp->no_sub); - printf ("not_bol: %d\t", bufp->not_bol); - printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %lx\n", bufp->syntax); - /* Perhaps we should print the translate table? */ -} - - -void -print_double_string (where, string1, size1, string2, size2) - const char *where; - const char *string1; - const char *string2; - int size1; - int size2; -{ - int this_char; - - if (where == NULL) - printf ("(null)"); - else - { - if (FIRST_STRING_P (where)) - { - for (this_char = where - string1; this_char < size1; this_char++) - putchar (string1[this_char]); - - where = string2; - } - - for (this_char = where - string2; this_char < size2; this_char++) - putchar (string2[this_char]); - } -} - -void -printchar (c) - int c; -{ - putc (c, stderr); -} - -#else /* not DEBUG */ - -# undef assert -# define assert(e) - -# define DEBUG_STATEMENT(e) -# define DEBUG_PRINT1(x) -# define DEBUG_PRINT2(x1, x2) -# define DEBUG_PRINT3(x1, x2, x3) -# define DEBUG_PRINT4(x1, x2, x3, x4) -# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) - -#endif /* not DEBUG */ - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -/* This has no initializer because initialized variables in Emacs - become read-only after dumping. */ -reg_syntax_t re_syntax_options = 0; /* Gawk: initialize to force this one, - not the one in the C library. */ - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; -#ifdef DEBUG - if (syntax & RE_DEBUG) - debug = 1; - else if (debug) /* was on but now is not */ - debug = 0; -#endif /* DEBUG */ - return ret; -} -#ifdef _LIBC -weak_alias (__re_set_syntax, re_set_syntax) -#endif - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. - POSIX doesn't require that we do anything for REG_NOERROR, - but why not be nice? */ - -/* Gawk: 12 Dec 2000 --- revert to array of char * for use with K&R Compilers. */ - -static const char *re_error_msgid[] = - { - gettext_noop ("Success"), /* REG_NOERROR */ - gettext_noop ("No match"), /* REG_NOMATCH */ - gettext_noop ("Invalid regular expression"), /* REG_BADPAT */ - gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */ - gettext_noop ("Invalid character class name"), /* REG_ECTYPE */ - gettext_noop ("Trailing backslash"), /* REG_EESCAPE */ - gettext_noop ("Invalid back reference"), /* REG_ESUBREG */ - gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */ - gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */ - gettext_noop ("Unmatched \\{"), /* REG_EBRACE */ - gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */ - gettext_noop ("Invalid range end"), /* REG_ERANGE */ - gettext_noop ("Memory exhausted"), /* REG_ESPACE */ - gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */ - gettext_noop ("Premature end of regular expression"), /* REG_EEND */ - gettext_noop ("Regular expression too big"), /* REG_ESIZE */ - gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ - }; - -/* Avoiding alloca during matching, to placate r_alloc. */ - -/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the - searching and matching functions should not call alloca. On some - systems, alloca is implemented in terms of malloc, and if we're - using the relocating allocator routines, then malloc could cause a - relocation, which might (if the strings being searched are in the - ralloc heap) shift the data out from underneath the regexp - routines. - - Here's another reason to avoid allocation: Emacs - processes input from X in a signal handler; processing X input may - call malloc; if input arrives while a matching routine is calling - malloc, then we're scrod. But Emacs can't just block input while - calling matching routines; then we don't notice interrupts when - they come in. So, Emacs blocks input around all regexp calls - except the matching calls, which it leaves unprotected, in the - faith that they will not malloc. */ - -/* Normally, this is fine. */ -#define MATCH_MAY_ALLOCATE - -/* When using GNU C, we are not REALLY using the C alloca, no matter - what config.h may say. So don't take precautions for it. */ -#ifdef __GNUC__ -# undef C_ALLOCA -#endif - -/* The match routines may not allocate if (1) they would do it with malloc - and (2) it's not safe for them to use malloc. - Note that if REL_ALLOC is defined, matching would not use malloc for the - failure stack, but we would still use it for the register vectors; - so REL_ALLOC should not affect this. */ -#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs -# undef MATCH_MAY_ALLOCATE -#endif - - -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE_STACK. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -# define INIT_FAILURE_ALLOC 5 -#endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_ITEMS items each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ - -#ifdef INT_IS_16BIT - -# if defined MATCH_MAY_ALLOCATE -/* 4400 was enough to cause a crash on Alpha OSF/1, - whose default stack limit is 2mb. */ -long int re_max_failures = 4000; -# else -long int re_max_failures = 2000; -# endif - -union fail_stack_elt -{ - unsigned char *pointer; - long int integer; -}; - -typedef union fail_stack_elt fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned long int size; - unsigned long int avail; /* Offset of next open position. */ -} fail_stack_type; - -#else /* not INT_IS_16BIT */ - -# if defined MATCH_MAY_ALLOCATE -/* 4400 was enough to cause a crash on Alpha OSF/1, - whose default stack limit is 2mb. */ -int re_max_failures = 20000; -# else -int re_max_failures = 2000; -# endif - -union fail_stack_elt -{ - unsigned char *pointer; - int integer; -}; - -typedef union fail_stack_elt fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} fail_stack_type; - -#endif /* INT_IS_16BIT */ - -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) - - -/* Define macros to initialize and free the failure stack. - Do `return -2' if the alloc fails. */ - -#ifdef MATCH_MAY_ALLOCATE -# define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) - -# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) -#else -# define INIT_FAIL_STACK() \ - do { \ - fail_stack.avail = 0; \ - } while (0) - -# define RESET_FAIL_STACK() -#endif - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE_STACK requires `destination' be declared. */ - -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ - ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ - REGEX_REALLOCATE_STACK ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push pointer POINTER on FAIL_STACK. - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ - ? 0 \ - : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ - 1)) - -/* Push a pointer value onto the failure stack. - Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_POINTER(item) \ - fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) - -/* This pushes an integer-valued item onto the failure stack. - Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_INT(item) \ - fail_stack.stack[fail_stack.avail++].integer = (item) - -/* Push a fail_stack_elt_t value onto the failure stack. - Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ELT(item) \ - fail_stack.stack[fail_stack.avail++] = (item) - -/* These three POP... operations complement the three PUSH... operations. - All assume that `fail_stack' is nonempty. */ -#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer -#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer -#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] - -/* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -# define DEBUG_PUSH PUSH_FAILURE_INT -# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () -#else -# define DEBUG_PUSH(item) -# define DEBUG_POP(item_addr) -#endif - - -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' - be declared. - - Does `return FAILURE_CODE' if runs out of memory. */ - -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - /* Can't be int, since there is not a shred of a guarantee that int \ - is wide enough to hold a value of something to which pointer can \ - be assigned */ \ - active_reg_t this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - if (1) \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ - PUSH_FAILURE_POINTER (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ - PUSH_FAILURE_POINTER (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: %p\n ", \ - reg_info[this_reg].word.pointer); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ELT (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ - PUSH_FAILURE_INT (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ - PUSH_FAILURE_INT (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_POINTER (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_POINTER (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) - -/* This is the number of items that are pushed and popped on the stack - for each register. */ -#define NUM_REG_ITEMS 3 - -/* Individual items aside from the registers. */ -#ifdef DEBUG -# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -# define NUM_NONREG_ITEMS 4 -#endif - -/* We push at most this many items on the stack. */ -/* We used to use (num_regs - 1), which is the number of registers - this regexp will save; but that was changed to 5 - to avoid stack overflow for a regexp with lots of parens. */ -#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) - -/* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ - (((0 \ - ? 0 : highest_active_reg - lowest_active_reg + 1) \ - * NUM_REG_ITEMS) \ - + NUM_NONREG_ITEMS) - -/* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) - - -/* Pops what PUSH_FAIL_STACK pushes. - - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ - -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (unsigned failure_id;) \ - active_reg_t this_reg; \ - const unsigned char *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_POINTER (); \ - if (string_temp != NULL) \ - str = (const char *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string %p: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (unsigned char *) POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (active_reg_t) POP_FAILURE_INT (); \ - DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ - \ - low_reg = (active_reg_t) POP_FAILURE_INT (); \ - DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ - \ - if (1) \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ELT (); \ - DEBUG_PRINT2 (" info: %p\n", \ - reg_info[this_reg].word.pointer); \ - \ - regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ - DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ - } \ - else \ - { \ - for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ - { \ - reg_info[this_reg].word.integer = 0; \ - regend[this_reg] = 0; \ - regstart[this_reg] = 0; \ - } \ - highest_active_reg = high_reg; \ - } \ - \ - set_regs_matched_done = 0; \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ - - - -/* Structure for per-register (a.k.a. per-group) information. - Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ - - -/* Declarations and macros for re_match_2. */ - -typedef union -{ - fail_stack_elt_t word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} register_info_type; - -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ - do \ - { \ - if (!set_regs_matched_done) \ - { \ - active_reg_t r; \ - set_regs_matched_done = 1; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - } \ - while (0) - -/* Registers are set to a sentinel when they haven't yet matched. */ -static char reg_unset_dummy; -#define REG_UNSET_VALUE (®_unset_dummy) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - -/* Subroutine declarations and macros for regex_compile. */ - -static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, - reg_syntax_t syntax, - struct re_pattern_buffer *bufp)); -static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg)); -static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, - int arg1, int arg2)); -static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, - int arg, unsigned char *end)); -static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, - int arg1, int arg2, unsigned char *end)); -static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p, - reg_syntax_t syntax)); -static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, - reg_syntax_t syntax)); -static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, - const char *pend, - char *translate, - reg_syntax_t syntax, - unsigned char *b)); - -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -#ifndef PATFETCH -# define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = (unsigned char) translate[c]; \ - } while (0) -#endif - -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - } while (0) - -/* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- - - -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#ifndef TRANSLATE -# define TRANSLATE(d) \ - (translate ? (char) translate[(unsigned char) (d)] : (d)) -#endif - - -/* Macros for outputting the compiled pattern into `buffer'. */ - -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 32 - -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ - EXTEND_BUFFER () - -/* Make sure we have one more byte of buffer space and then add C to it. */ -#define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (unsigned char) (c); \ - } while (0) - - -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -#define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - } while (0) - - -/* As with BUF_PUSH_2, except for three bytes. */ -#define BUF_PUSH_3(c1, c2, c3) \ - do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - *b++ = (unsigned char) (c3); \ - } while (0) - - -/* Store a jump with opcode OP at LOC to location TO. We store a - relative address offset by the three bytes the jump itself occupies. */ -#define STORE_JUMP(op, loc, to) \ - store_op1 (op, loc, (int) ((to) - (loc) - 3)) - -/* Likewise, for a two-argument jump. */ -#define STORE_JUMP2(op, loc, to, arg) \ - store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) - -/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP(op, loc, to) \ - insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) - -/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP2(op, loc, to, arg) \ - insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) - - -/* This is not an arbitrary limit: the arguments which represent offsets - into the pattern are two bytes long. So if 2^16 bytes turns out to - be too small, many things would have to change. */ -/* Any other compiler which, like MSC, has allocation limit below 2^16 - bytes will have to use approach similar to what was done below for - MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up - reallocating to 0 bytes. Such thing is not going to work too well. - You have been warned!! */ -#if defined _MSC_VER && !defined WIN32 -/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. - The REALLOC define eliminates a flurry of conversion warnings, - but is not required. */ -# define MAX_BUF_SIZE 65500L -# define REALLOC(p,s) realloc ((p), (size_t) (s)) -#else -# define MAX_BUF_SIZE (1L << 16) -# define REALLOC(p,s) realloc ((p), (s)) -#endif - -/* Extend the buffer by twice its current size via realloc and - reset the pointers that pointed into the old block to point to the - correct places in the new one. If extending the buffer results in it - being larger than MAX_BUF_SIZE, then flag memory exhausted. */ -#if __BOUNDED_POINTERS__ -# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated) -# define MOVE_BUFFER_POINTER(P) \ - (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr) -# define ELSE_EXTEND_BUFFER_HIGH_BOUND \ - else \ - { \ - SET_HIGH_BOUND (b); \ - SET_HIGH_BOUND (begalt); \ - if (fixup_alt_jump) \ - SET_HIGH_BOUND (fixup_alt_jump); \ - if (laststart) \ - SET_HIGH_BOUND (laststart); \ - if (pending_exact) \ - SET_HIGH_BOUND (pending_exact); \ - } -#else -# define MOVE_BUFFER_POINTER(P) (P) += incr -# define ELSE_EXTEND_BUFFER_HIGH_BOUND -#endif -#define EXTEND_BUFFER() \ - do { \ - unsigned char *old_buffer = bufp->buffer; \ - if (bufp->allocated == MAX_BUF_SIZE) \ - return REG_ESIZE; \ - bufp->allocated <<= 1; \ - if (bufp->allocated > MAX_BUF_SIZE) \ - bufp->allocated = MAX_BUF_SIZE; \ - bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ - if (bufp->buffer == NULL) \ - return REG_ESPACE; \ - /* If the buffer moved, move all the pointers into it. */ \ - if (old_buffer != bufp->buffer) \ - { \ - int incr = bufp->buffer - old_buffer; \ - MOVE_BUFFER_POINTER (b); \ - MOVE_BUFFER_POINTER (begalt); \ - if (fixup_alt_jump) \ - MOVE_BUFFER_POINTER (fixup_alt_jump); \ - if (laststart) \ - MOVE_BUFFER_POINTER (laststart); \ - if (pending_exact) \ - MOVE_BUFFER_POINTER (pending_exact); \ - } \ - ELSE_EXTEND_BUFFER_HIGH_BOUND \ - } while (0) - - -/* Since we have one byte reserved for the register number argument to - {start,stop}_memory, the maximum number of groups we can report - things about is what fits in that byte. */ -#define MAX_REGNUM 255 - -/* But patterns can have more than `MAX_REGNUM' registers. We just - ignore the excess. */ -typedef unsigned regnum_t; - - -/* Macros for the compile stack. */ - -/* Since offsets can go either forwards or backwards, this type needs to - be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ -/* int may be not enough when sizeof(int) == 2. */ -typedef long pattern_offset_t; - -typedef struct -{ - pattern_offset_t begalt_offset; - pattern_offset_t fixup_alt_jump; - pattern_offset_t inner_group_offset; - pattern_offset_t laststart_offset; - regnum_t regnum; -} compile_stack_elt_t; - - -typedef struct -{ - compile_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} compile_stack_type; - - -#define INIT_COMPILE_STACK_SIZE 32 - -#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) -#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) - -/* The next available element. */ -#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) - - -/* Set the bit for character C in a list. */ -#define SET_LIST_BIT(c) \ - (b[((unsigned char) (c)) / BYTEWIDTH] \ - |= 1 << (((unsigned char) c) % BYTEWIDTH)) - - -/* Get the next unsigned number in the uncompiled pattern. */ -#define GET_UNSIGNED_NUMBER(num) \ - { if (p != pend) \ - { \ - PATFETCH (c); \ - while (ISDIGIT (c)) \ - { \ - if (num < 0) \ - num = 0; \ - num = num * 10 + c - '0'; \ - if (p == pend) \ - break; \ - PATFETCH (c); \ - } \ - } \ - } - -#if defined _LIBC || WIDE_CHAR_SUPPORT -/* The GNU C library provides support for user-defined character classes - and the functions from ISO C amendement 1. */ -# ifdef CHARCLASS_NAME_MAX -# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX -# else -/* This shouldn't happen but some implementation might still have this - problem. Use a reasonable default value. */ -# define CHAR_CLASS_MAX_LENGTH 256 -# endif - -# ifdef _LIBC -# define IS_CHAR_CLASS(string) __wctype (string) -# else -# define IS_CHAR_CLASS(string) wctype (string) -# endif -#else -# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ - -# define IS_CHAR_CLASS(string) \ - (STREQ (string, "alpha") || STREQ (string, "upper") \ - || STREQ (string, "lower") || STREQ (string, "digit") \ - || STREQ (string, "alnum") || STREQ (string, "xdigit") \ - || STREQ (string, "space") || STREQ (string, "print") \ - || STREQ (string, "punct") || STREQ (string, "graph") \ - || STREQ (string, "cntrl") || STREQ (string, "blank")) -#endif - -#ifndef MATCH_MAY_ALLOCATE - -/* If we cannot allocate large objects within re_match_2_internal, - we make the fail stack and register vectors global. - The fail stack, we grow to the maximum size when a regexp - is compiled. - The register vectors, we adjust in size each time we - compile a regexp, according to the number of registers it needs. */ - -static fail_stack_type fail_stack; - -/* Size with which the following vectors are currently allocated. - That is so we can make them bigger as needed, - but never make them smaller. */ -static int regs_allocated_size; - -static const char ** regstart, ** regend; -static const char ** old_regstart, ** old_regend; -static const char **best_regstart, **best_regend; -static register_info_type *reg_info; -static const char **reg_dummy; -static register_info_type *reg_info_dummy; - -/* Make the register vectors big enough for NUM_REGS registers, - but don't make them smaller. */ - -static -regex_grow_registers (num_regs) - int num_regs; -{ - if (num_regs > regs_allocated_size) - { - RETALLOC_IF (regstart, num_regs, const char *); - RETALLOC_IF (regend, num_regs, const char *); - RETALLOC_IF (old_regstart, num_regs, const char *); - RETALLOC_IF (old_regend, num_regs, const char *); - RETALLOC_IF (best_regstart, num_regs, const char *); - RETALLOC_IF (best_regend, num_regs, const char *); - RETALLOC_IF (reg_info, num_regs, register_info_type); - RETALLOC_IF (reg_dummy, num_regs, const char *); - RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); - - regs_allocated_size = num_regs; - } -} - -#endif /* not MATCH_MAY_ALLOCATE */ - -static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type - compile_stack, - regnum_t regnum)); - -/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. - Returns one of error codes defined in `regex.h', or zero for success. - - Assumes the `allocated' (and perhaps `buffer') and `translate' - fields are set in BUFP on entry. - - If it succeeds, results are put in BUFP (if it returns an error, the - contents of BUFP are undefined): - `buffer' is the compiled pattern; - `syntax' is set to SYNTAX; - `used' is set to the length of the compiled pattern; - `fastmap_accurate' is zero; - `re_nsub' is the number of subexpressions in PATTERN; - `not_bol' and `not_eol' are zero; - - The `fastmap' and `newline_anchor' fields are neither - examined nor set. */ - -/* Return, freeing storage we allocated. */ -#define FREE_STACK_RETURN(value) \ - return (free (compile_stack.stack), value) - -static reg_errcode_t -regex_compile (pattern, size, syntax, bufp) - const char *pattern; - size_t size; - reg_syntax_t syntax; - struct re_pattern_buffer *bufp; -{ - /* We fetch characters from PATTERN here. Even though PATTERN is - `char *' (i.e., signed), we declare these variables as unsigned, so - they can be reliably used as array indices. */ - register unsigned char c, c1; - - /* A random temporary spot in PATTERN. */ - const char *p1; - - /* Points to the end of the buffer, where we should append. */ - register unsigned char *b; - - /* Keeps track of unclosed groups. */ - compile_stack_type compile_stack; - - /* Points to the current (ending) position in the pattern. */ - const char *p = pattern; - const char *pend = pattern + size; - - /* How to translate the characters in the pattern. */ - RE_TRANSLATE_TYPE translate = bufp->translate; - - /* Address of the count-byte of the most recently inserted `exactn' - command. This makes it possible to tell if a new exact-match - character can be added to that command or if the character requires - a new `exactn' command. */ - unsigned char *pending_exact = 0; - - /* Address of start of the most recently finished expression. - This tells, e.g., postfix * where to find the start of its - operand. Reset at the beginning of groups and alternatives. */ - unsigned char *laststart = 0; - - /* Address of beginning of regexp, or inside of last group. */ - unsigned char *begalt; - - /* Place in the uncompiled pattern (i.e., the {) to - which to go back if the interval is invalid. */ - const char *beg_interval; - - /* Address of the place where a forward jump should go to the end of - the containing expression. Each alternative of an `or' -- except the - last -- ends with a forward jump of this sort. */ - unsigned char *fixup_alt_jump = 0; - - /* Counts open-groups as they are encountered. Remembered for the - matching close-group on the compile stack, so the same register - number is put in the stop_memory as the start_memory. */ - regnum_t regnum = 0; - -#ifdef DEBUG - DEBUG_PRINT1 ("\nCompiling pattern: "); - if (debug) - { - unsigned debug_count; - - for (debug_count = 0; debug_count < size; debug_count++) - putchar (pattern[debug_count]); - putchar ('\n'); - } -#endif /* DEBUG */ - - /* Initialize the compile stack. */ - compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); - if (compile_stack.stack == NULL) - return REG_ESPACE; - - compile_stack.size = INIT_COMPILE_STACK_SIZE; - compile_stack.avail = 0; - - /* Initialize the pattern buffer. */ - bufp->syntax = syntax; - bufp->fastmap_accurate = 0; - bufp->not_bol = bufp->not_eol = 0; - - /* Set `used' to zero, so that if we return an error, the pattern - printer (for debugging) will think there's no pattern. We reset it - at the end. */ - bufp->used = 0; - - /* Always count groups, whether or not bufp->no_sub is set. */ - bufp->re_nsub = 0; - -#if !defined emacs && !defined SYNTAX_TABLE - /* Initialize the syntax table. */ - init_syntax_once (); -#endif - - if (bufp->allocated == 0) - { - if (bufp->buffer) - { /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. */ - RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); - } - else - { /* Caller did not allocate a buffer. Do it for them. */ - bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); - } - if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); - - bufp->allocated = INIT_BUF_SIZE; - } - - begalt = b = bufp->buffer; - - /* Loop through the uncompiled pattern until we're at the end. */ - while (p != pend) - { - PATFETCH (c); - - switch (c) - { - case '^': - { - if ( /* If at start of pattern, it's an operator. */ - p == pattern + 1 - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's come before. */ - || at_begline_loc_p (pattern, p, syntax)) - BUF_PUSH (begline); - else - goto normal_char; - } - break; - - - case '$': - { - if ( /* If at end of pattern, it's an operator. */ - p == pend - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's next. */ - || at_endline_loc_p (p, pend, syntax)) - BUF_PUSH (endline); - else - goto normal_char; - } - break; - - - case '+': - case '?': - if ((syntax & RE_BK_PLUS_QM) - || (syntax & RE_LIMITED_OPS)) - goto normal_char; - handle_plus: - case '*': - /* If there is no previous pattern... */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (!(syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } - - { - /* Are we optimizing this jump? */ - boolean keep_string_p = false; - - /* 1 means zero (many) matches is allowed. */ - char zero_times_ok = 0, many_times_ok = 0; - - /* If there is a sequence of repetition chars, collapse it - down to just one (the right one). We can't combine - interval operators with these because of, e.g., `a{2}*', - which should only match an even number of `a's. */ - - for (;;) - { - zero_times_ok |= c != '+'; - many_times_ok |= c != '?'; - - if (p == pend) - break; - - PATFETCH (c); - - if (c == '*' - || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) - ; - - else if (syntax & RE_BK_PLUS_QM && c == '\\') - { - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - PATFETCH (c1); - if (!(c1 == '+' || c1 == '?')) - { - PATUNFETCH; - PATUNFETCH; - break; - } - - c = c1; - } - else - { - PATUNFETCH; - break; - } - - /* If we get here, we found another repeat character. */ - } - - /* Star, etc. applied to an empty pattern is equivalent - to an empty pattern. */ - if (!laststart) - break; - - /* Now we know whether or not zero matches is allowed - and also whether or not two or more matches is allowed. */ - if (many_times_ok) - { /* More than one repetition is allowed, so put in at the - end a backward relative jump from `b' to before the next - jump we're going to put in below (which jumps from - laststart to after this jump). - - But if we are at the `*' in the exact sequence `.*\n', - insert an unconditional jump backwards to the ., - instead of the beginning of the loop. This way we only - push a failure point once, instead of every time - through the loop. */ - assert (p - 1 > pattern); - - /* Allocate the space for the jump. */ - GET_BUFFER_SPACE (3); - - /* We know we are not at the first character of the pattern, - because laststart was nonzero. And we've already - incremented `p', by the way, to be the character after - the `*'. Do we have to do something analogous here - for null bytes, because of RE_DOT_NOT_NULL? */ - if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') - && zero_times_ok - && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') - && !(syntax & RE_DOT_NEWLINE)) - { /* We have .*\n. */ - STORE_JUMP (jump, b, laststart); - keep_string_p = true; - } - else - /* Anything else. */ - STORE_JUMP (maybe_pop_jump, b, laststart - 3); - - /* We've added more stuff to the buffer. */ - b += 3; - } - - /* On failure, jump from laststart to b + 3, which will be the - end of the buffer after this jump is inserted. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump - : on_failure_jump, - laststart, b + 3); - pending_exact = 0; - b += 3; - - if (!zero_times_ok) - { - /* At least one repetition is required, so insert a - `dummy_failure_jump' before the initial - `on_failure_jump' instruction of the loop. This - effects a skip over that instruction the first time - we hit that loop. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); - b += 3; - } - } - break; - - - case '.': - laststart = b; - BUF_PUSH (anychar); - break; - - - case '[': - { - boolean had_char_class = false; - - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - /* Ensure that we have enough space to push a charset: the - opcode, the length count, and the bitset; 34 bytes in all. */ - GET_BUFFER_SPACE (34); - - laststart = b; - - /* We test `*p == '^' twice, instead of using an if - statement, so we only need one BUF_PUSH. */ - BUF_PUSH (*p == '^' ? charset_not : charset); - if (*p == '^') - p++; - - /* Remember the first position in the bracket expression. */ - p1 = p; - - /* Push the number of bytes in the bitmap. */ - BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); - - /* Clear the whole map. */ - bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); - - /* charset_not matches newline according to a syntax bit. */ - if ((re_opcode_t) b[-2] == charset_not - && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) - SET_LIST_BIT ('\n'); - - /* Read in characters and ranges, setting map bits. */ - for (;;) - { - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - PATFETCH (c); - - /* \ might escape characters inside [...] and [^...]. */ - if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') - { - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - PATFETCH (c1); - SET_LIST_BIT (c1); - continue; - } - - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p != p1 + 1) - break; - - /* Look ahead to see if it's a range when the last thing - was a character class. */ - if (had_char_class && c == '-' && *p != ']') - FREE_STACK_RETURN (REG_ERANGE); - - /* Look ahead to see if it's a range when the last thing - was a character: if this is a hyphen not at the - beginning or the end of a list, then it's the range - operator. */ - if (c == '-' - && !(p - 2 >= pattern && p[-2] == '[') - && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') - && *p != ']') - { - reg_errcode_t ret - = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); - } - - else if (p[0] == '-' && p[1] != ']') - { /* This handles ranges made up of characters only. */ - reg_errcode_t ret; - - /* Move past the `-'. */ - PATFETCH (c1); - - ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); - } - - /* See if we're at the beginning of a possible character - class. */ - - else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') - { /* Leave room for the null. */ - char str[CHAR_CLASS_MAX_LENGTH + 1]; - - PATFETCH (c); - c1 = 0; - - /* If pattern is `[[:'. */ - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - for (;;) - { - PATFETCH (c); - if ((c == ':' && *p == ']') || p == pend) - break; - if (c1 < CHAR_CLASS_MAX_LENGTH) - str[c1++] = c; - else - /* This is in any case an invalid class name. */ - str[0] = '\0'; - } - str[c1] = '\0'; - - /* If isn't a word bracketed by `[:' and `:]': - undo the ending character, the letters, and leave - the leading `:' and `[' (but set bits for them). */ - if (c == ':' && *p == ']') - { -#if defined _LIBC || WIDE_CHAR_SUPPORT - boolean is_lower = STREQ (str, "lower"); - boolean is_upper = STREQ (str, "upper"); - wctype_t wt; - int ch; - - wt = IS_CHAR_CLASS (str); - if (wt == 0) - FREE_STACK_RETURN (REG_ECTYPE); - - /* Throw away the ] at the end of the character - class. */ - PATFETCH (c); - - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) - { -# ifdef _LIBC - if (__iswctype (__btowc (ch), wt)) - SET_LIST_BIT (ch); -# else - if (iswctype (btowc (ch), wt)) - SET_LIST_BIT (ch); -# endif - - if (translate && (is_upper || is_lower) - && (ISUPPER (ch) || ISLOWER (ch))) - SET_LIST_BIT (ch); - } - - had_char_class = true; -#else - int ch; - boolean is_alnum = STREQ (str, "alnum"); - boolean is_alpha = STREQ (str, "alpha"); - boolean is_blank = STREQ (str, "blank"); - boolean is_cntrl = STREQ (str, "cntrl"); - boolean is_digit = STREQ (str, "digit"); - boolean is_graph = STREQ (str, "graph"); - boolean is_lower = STREQ (str, "lower"); - boolean is_print = STREQ (str, "print"); - boolean is_punct = STREQ (str, "punct"); - boolean is_space = STREQ (str, "space"); - boolean is_upper = STREQ (str, "upper"); - boolean is_xdigit = STREQ (str, "xdigit"); - - if (!IS_CHAR_CLASS (str)) - FREE_STACK_RETURN (REG_ECTYPE); - - /* Throw away the ] at the end of the character - class. */ - PATFETCH (c); - - if (p == pend) FREE_STACK_RETURN (REG_EBRACK); - - for (ch = 0; ch < 1 << BYTEWIDTH; ch++) - { - /* This was split into 3 if's to - avoid an arbitrary limit in some compiler. */ - if ( (is_alnum && ISALNUM (ch)) - || (is_alpha && ISALPHA (ch)) - || (is_blank && ISBLANK (ch)) - || (is_cntrl && ISCNTRL (ch))) - SET_LIST_BIT (ch); - if ( (is_digit && ISDIGIT (ch)) - || (is_graph && ISGRAPH (ch)) - || (is_lower && ISLOWER (ch)) - || (is_print && ISPRINT (ch))) - SET_LIST_BIT (ch); - if ( (is_punct && ISPUNCT (ch)) - || (is_space && ISSPACE (ch)) - || (is_upper && ISUPPER (ch)) - || (is_xdigit && ISXDIGIT (ch))) - SET_LIST_BIT (ch); - if ( translate && (is_upper || is_lower) - && (ISUPPER (ch) || ISLOWER (ch))) - SET_LIST_BIT (ch); - } - had_char_class = true; -#endif /* libc || wctype.h */ - } - else - { - c1++; - while (c1--) - PATUNFETCH; - SET_LIST_BIT ('['); - SET_LIST_BIT (':'); - had_char_class = false; - } - } - else - { - had_char_class = false; - SET_LIST_BIT (c); - } - } - - /* Discard any (non)matching list bytes that are all 0 at the - end of the map. Decrease the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; - b += b[-1]; - } - break; - - - case '(': - if (syntax & RE_NO_BK_PARENS) - goto handle_open; - else - goto normal_char; - - - case ')': - if (syntax & RE_NO_BK_PARENS) - goto handle_close; - else - goto normal_char; - - - case '\n': - if (syntax & RE_NEWLINE_ALT) - goto handle_alt; - else - goto normal_char; - - - case '|': - if (syntax & RE_NO_BK_VBAR) - goto handle_alt; - else - goto normal_char; - - - case '{': - if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) - goto handle_interval; - else - goto normal_char; - - - case '\\': - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - /* Do not translate the character after the \, so that we can - distinguish, e.g., \B from \b, even if we normally would - translate, e.g., B to b. */ - PATFETCH_RAW (c); - - switch (c) - { - case '(': - if (syntax & RE_NO_BK_PARENS) - goto normal_backslash; - - handle_open: - bufp->re_nsub++; - regnum++; - - if (COMPILE_STACK_FULL) - { - RETALLOC (compile_stack.stack, compile_stack.size << 1, - compile_stack_elt_t); - if (compile_stack.stack == NULL) return REG_ESPACE; - - compile_stack.size <<= 1; - } - - /* These are the values to restore when we hit end of this - group. They are all relative offsets, so that if the - whole pattern moves because of realloc, they will still - be valid. */ - COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; - COMPILE_STACK_TOP.fixup_alt_jump - = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; - COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; - COMPILE_STACK_TOP.regnum = regnum; - - /* We will eventually replace the 0 with the number of - groups inner to this one. But do not push a - start_memory for groups beyond the last one we can - represent in the compiled pattern. */ - if (regnum <= MAX_REGNUM) - { - COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; - BUF_PUSH_3 (start_memory, regnum, 0); - } - - compile_stack.avail++; - - fixup_alt_jump = 0; - laststart = 0; - begalt = b; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - break; - - - case ')': - if (syntax & RE_NO_BK_PARENS) goto normal_backslash; - - if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - FREE_STACK_RETURN (REG_ERPAREN); - } - - handle_close: - if (fixup_alt_jump) - { /* Push a dummy failure point at the end of the - alternative for a possible future - `pop_failure_jump' to pop. See comments at - `push_dummy_failure' in `re_match_2'. */ - BUF_PUSH (push_dummy_failure); - - /* We allocated space for this jump when we assigned - to `fixup_alt_jump', in the `handle_alt' case below. */ - STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); - } - - /* See similar code for backslashed left paren above. */ - if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - FREE_STACK_RETURN (REG_ERPAREN); - } - - /* Since we just checked for an empty stack above, this - ``can't happen''. */ - assert (compile_stack.avail != 0); - { - /* We don't just want to restore into `regnum', because - later groups should continue to be numbered higher, - as in `(ab)c(de)' -- the second group is #2. */ - regnum_t this_group_regnum; - - compile_stack.avail--; - begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; - fixup_alt_jump - = COMPILE_STACK_TOP.fixup_alt_jump - ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 - : 0; - laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; - this_group_regnum = COMPILE_STACK_TOP.regnum; - /* If we've reached MAX_REGNUM groups, then this open - won't actually generate any code, so we'll have to - clear pending_exact explicitly. */ - pending_exact = 0; - - /* We're at the end of the group, so now we know how many - groups were inside this one. */ - if (this_group_regnum <= MAX_REGNUM) - { - unsigned char *inner_group_loc - = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; - - *inner_group_loc = regnum - this_group_regnum; - BUF_PUSH_3 (stop_memory, this_group_regnum, - regnum - this_group_regnum); - } - } - break; - - - case '|': /* `\|'. */ - if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) - goto normal_backslash; - handle_alt: - if (syntax & RE_LIMITED_OPS) - goto normal_char; - - /* Insert before the previous alternative a jump which - jumps to this alternative if the former fails. */ - GET_BUFFER_SPACE (3); - INSERT_JUMP (on_failure_jump, begalt, b + 6); - pending_exact = 0; - b += 3; - - /* The alternative before this one has a jump after it - which gets executed if it gets matched. Adjust that - jump so it will jump to this alternative's analogous - jump (put in below, which in turn will jump to the next - (if any) alternative's such jump, etc.). The last such - jump jumps to the correct final destination. A picture: - _____ _____ - | | | | - | v | v - a | b | c - - If we are at `b', then fixup_alt_jump right now points to a - three-byte space after `a'. We'll put in the jump, set - fixup_alt_jump to right after `b', and leave behind three - bytes which we'll fill in when we get to after `c'. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - /* Mark and leave space for a jump after this alternative, - to be filled in later either by next alternative or - when know we're at the end of a series of alternatives. */ - fixup_alt_jump = b; - GET_BUFFER_SPACE (3); - b += 3; - - laststart = 0; - begalt = b; - break; - - - case '{': - /* If \{ is a literal. */ - if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval - operator. */ - || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - || (p - 2 == pattern && p == pend)) - goto normal_backslash; - - handle_interval: - { - /* If got here, then the syntax allows intervals. */ - - /* At least (most) this many matches must be made. */ - int lower_bound = -1, upper_bound = -1; - - beg_interval = p - 1; - - if (p == pend) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_EBRACE); - } - - GET_UNSIGNED_NUMBER (lower_bound); - - if (c == ',') - { - GET_UNSIGNED_NUMBER (upper_bound); - if (upper_bound < 0) upper_bound = RE_DUP_MAX; - } - else - /* Interval such as `{1}' => match exactly once. */ - upper_bound = lower_bound; - - if (lower_bound < 0 || upper_bound > RE_DUP_MAX - || lower_bound > upper_bound) - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_BADBR); - } - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); - - PATFETCH (c); - } - - if (c != '}') - { - if (syntax & RE_NO_BK_BRACES) - goto unfetch_interval; - else - FREE_STACK_RETURN (REG_BADBR); - } - - /* We just parsed a valid interval. */ - - /* If it's invalid to have no preceding re. */ - if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (syntax & RE_CONTEXT_INDEP_OPS) - laststart = b; - else - goto unfetch_interval; - } - - /* If the upper bound is zero, don't want to succeed at - all; jump from `laststart' to `b + 3', which will be - the end of the buffer after we insert the jump. */ - if (upper_bound == 0) - { - GET_BUFFER_SPACE (3); - INSERT_JUMP (jump, laststart, b + 3); - b += 3; - } - - /* Otherwise, we have a nontrivial interval. When - we're all done, the pattern will look like: - set_number_at - set_number_at - succeed_n - - jump_n - (The upper bound and `jump_n' are omitted if - `upper_bound' is 1, though.) */ - else - { /* If the upper bound is > 1, we need to insert - more at the end of the loop. */ - unsigned nbytes = 10 + (upper_bound > 1) * 10; - - GET_BUFFER_SPACE (nbytes); - - /* Initialize lower bound of the `succeed_n', even - though it will be set during matching by its - attendant `set_number_at' (inserted next), - because `re_compile_fastmap' needs to know. - Jump to the `jump_n' we might insert below. */ - INSERT_JUMP2 (succeed_n, laststart, - b + 5 + (upper_bound > 1) * 5, - lower_bound); - b += 5; - - /* Code to initialize the lower bound. Insert - before the `succeed_n'. The `5' is the last two - bytes of this `set_number_at', plus 3 bytes of - the following `succeed_n'. */ - insert_op2 (set_number_at, laststart, 5, lower_bound, b); - b += 5; - - if (upper_bound > 1) - { /* More than one repetition is allowed, so - append a backward jump to the `succeed_n' - that starts this interval. - - When we've reached this during matching, - we'll have matched the interval once, so - jump back only `upper_bound - 1' times. */ - STORE_JUMP2 (jump_n, b, laststart + 5, - upper_bound - 1); - b += 5; - - /* The location we want to set is the second - parameter of the `jump_n'; that is `b-2' as - an absolute address. `laststart' will be - the `set_number_at' we're about to insert; - `laststart+3' the number to set, the source - for the relative address. But we are - inserting into the middle of the pattern -- - so everything is getting moved up by 5. - Conclusion: (b - 2) - (laststart + 3) + 5, - i.e., b - laststart. - - We insert this at the beginning of the loop - so that if we fail during matching, we'll - reinitialize the bounds. */ - insert_op2 (set_number_at, laststart, b - laststart, - upper_bound - 1, b); - b += 5; - } - } - pending_exact = 0; - beg_interval = NULL; - } - break; - - unfetch_interval: - /* If an invalid interval, match the characters as literals. */ - assert (beg_interval); - p = beg_interval; - beg_interval = NULL; - - /* normal_char and normal_backslash need `c'. */ - PATFETCH (c); - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (p > pattern && p[-1] == '\\') - goto normal_backslash; - } - goto normal_char; - -#ifdef emacs - /* There is no way to specify the before_dot and after_dot - operators. rms says this is ok. --karl */ - case '=': - BUF_PUSH (at_dot); - break; - - case 's': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); - break; - - case 'S': - laststart = b; - PATFETCH (c); - BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); - break; -#endif /* emacs */ - - - case 'w': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - laststart = b; - BUF_PUSH (wordchar); - break; - - - case 'W': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - laststart = b; - BUF_PUSH (notwordchar); - break; - - - case '<': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - BUF_PUSH (wordbeg); - break; - - case '>': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - BUF_PUSH (wordend); - break; - - case 'b': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - BUF_PUSH (wordbound); - break; - - case 'B': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - BUF_PUSH (notwordbound); - break; - - case '`': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - BUF_PUSH (begbuf); - break; - - case '\'': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; - BUF_PUSH (endbuf); - break; - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (syntax & RE_NO_BK_REFS) - goto normal_char; - - c1 = c - '0'; - - if (c1 > regnum) - FREE_STACK_RETURN (REG_ESUBREG); - - /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, (regnum_t) c1)) - goto normal_char; - - laststart = b; - BUF_PUSH_2 (duplicate, c1); - break; - - - case '+': - case '?': - if (syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backslash; - - default: - normal_backslash: - /* You might think it would be useful for \ to mean - not to translate; but if we don't translate it - it will never match anything. */ - c = TRANSLATE (c); - goto normal_char; - } - break; - - - default: - /* Expects the character in `c'. */ - normal_char: - /* If no exactn currently being built. */ - if (!pending_exact - - /* If last exactn not at current position. */ - || pending_exact + *pending_exact + 1 != b - - /* We have only one byte following the exactn for the count. */ - || *pending_exact == (1 << BYTEWIDTH) - 1 - - /* If followed by a repetition operator. */ - || *p == '*' || *p == '^' - || ((syntax & RE_BK_PLUS_QM) - ? *p == '\\' && (p[1] == '+' || p[1] == '?') - : (*p == '+' || *p == '?')) - || ((syntax & RE_INTERVALS) - && ((syntax & RE_NO_BK_BRACES) - ? *p == '{' - : (p[0] == '\\' && p[1] == '{')))) - { - /* Start building a new exactn. */ - - laststart = b; - - BUF_PUSH_2 (exactn, 0); - pending_exact = b - 1; - } - - BUF_PUSH (c); - (*pending_exact)++; - break; - } /* switch (c) */ - } /* while p != pend */ - - - /* Through the pattern now. */ - - if (fixup_alt_jump) - STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - - if (!COMPILE_STACK_EMPTY) - FREE_STACK_RETURN (REG_EPAREN); - - /* If we don't want backtracking, force success - the first time we reach the end of the compiled pattern. */ - if (syntax & RE_NO_POSIX_BACKTRACKING) - BUF_PUSH (succeed); - - free (compile_stack.stack); - - /* We have succeeded; set the length of the buffer. */ - bufp->used = b - bufp->buffer; - -#ifdef DEBUG - if (debug) - { - DEBUG_PRINT1 ("\nCompiled pattern: \n"); - print_compiled_pattern (bufp); - } -#endif /* DEBUG */ - -#ifndef MATCH_MAY_ALLOCATE - /* Initialize the failure stack to the largest possible stack. This - isn't necessary unless we're trying to avoid calling alloca in - the search and match routines. */ - { - int num_regs = bufp->re_nsub + 1; - - /* Since DOUBLE_FAIL_STACK refuses to double only if the current size - is strictly greater than re_max_failures, the largest possible stack - is 2 * re_max_failures failure points. */ - if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) - { - fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); - -# ifdef emacs - if (! fail_stack.stack) - fail_stack.stack - = (fail_stack_elt_t *) xmalloc (fail_stack.size - * sizeof (fail_stack_elt_t)); - else - fail_stack.stack - = (fail_stack_elt_t *) xrealloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); -# else /* not emacs */ - if (! fail_stack.stack) - fail_stack.stack - = (fail_stack_elt_t *) malloc (fail_stack.size - * sizeof (fail_stack_elt_t)); - else - fail_stack.stack - = (fail_stack_elt_t *) realloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); -# endif /* not emacs */ - } - - regex_grow_registers (num_regs); - } -#endif /* not MATCH_MAY_ALLOCATE */ - - return REG_NOERROR; -} /* regex_compile */ - -/* Subroutines for `regex_compile'. */ - -/* Store OP at LOC followed by two-byte integer parameter ARG. */ - -static void -store_op1 (op, loc, arg) - re_opcode_t op; - unsigned char *loc; - int arg; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg); -} - - -/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -store_op2 (op, loc, arg1, arg2) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; -{ - *loc = (unsigned char) op; - STORE_NUMBER (loc + 1, arg1); - STORE_NUMBER (loc + 3, arg2); -} - - -/* Copy the bytes from LOC to END to open up three bytes of space at LOC - for OP followed by two-byte integer parameter ARG. */ - -static void -insert_op1 (op, loc, arg, end) - re_opcode_t op; - unsigned char *loc; - int arg; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 3; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op1 (op, loc, arg); -} - - -/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ - -static void -insert_op2 (op, loc, arg1, arg2, end) - re_opcode_t op; - unsigned char *loc; - int arg1, arg2; - unsigned char *end; -{ - register unsigned char *pfrom = end; - register unsigned char *pto = end + 5; - - while (pfrom != loc) - *--pto = *--pfrom; - - store_op2 (op, loc, arg1, arg2); -} - - -/* P points to just after a ^ in PATTERN. Return true if that ^ comes - after an alternative or a begin-subexpression. We assume there is at - least one character before the ^. */ - -static boolean -at_begline_loc_p (pattern, p, syntax) - const char *pattern, *p; - reg_syntax_t syntax; -{ - const char *prev = p - 2; - boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; - - return - /* After a subexpression? */ - (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) - /* After an alternative? */ - || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); -} - - -/* The dual of at_begline_loc_p. This one is for $. We assume there is - at least one character after the $, i.e., `P < PEND'. */ - -static boolean -at_endline_loc_p (p, pend, syntax) - const char *p, *pend; - reg_syntax_t syntax; -{ - const char *next = p; - boolean next_backslash = *next == '\\'; - const char *next_next = p + 1 < pend ? p + 1 : 0; - - return - /* Before a subexpression? */ - (syntax & RE_NO_BK_PARENS ? *next == ')' - : next_backslash && next_next && *next_next == ')') - /* Before an alternative? */ - || (syntax & RE_NO_BK_VBAR ? *next == '|' - : next_backslash && next_next && *next_next == '|'); -} - - -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and - false if it's not. */ - -static boolean -group_in_compile_stack (compile_stack, regnum) - compile_stack_type compile_stack; - regnum_t regnum; -{ - int this_element; - - for (this_element = compile_stack.avail - 1; - this_element >= 0; - this_element--) - if (compile_stack.stack[this_element].regnum == regnum) - return true; - - return false; -} - - -/* Read the ending character of a range (in a bracket expression) from the - uncompiled pattern *P_PTR (which ends at PEND). We assume the - starting character is in `P[-2]'. (`P[-1]' is the character `-'.) - Then we set the translation of all bits between the starting and - ending characters (inclusive) in the compiled pattern B. - - Return an error code. - - We use these short variable names so we can use the same macros as - `regex_compile' itself. */ - -static reg_errcode_t -compile_range (p_ptr, pend, translate, syntax, b) - const char **p_ptr, *pend; - RE_TRANSLATE_TYPE translate; - reg_syntax_t syntax; - unsigned char *b; -{ - unsigned this_char; - - const char *p = *p_ptr; - unsigned int range_start, range_end; - - if (p == pend) - return REG_ERANGE; - - /* Even though the pattern is a signed `char *', we need to fetch - with unsigned char *'s; if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch using a - signed char *. - - We also want to fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *. */ - range_start = ((const unsigned char *) p)[-2]; - range_end = ((const unsigned char *) p)[0]; - - /* Have to increment the pointer into the pattern string, so the - caller isn't still at the ending character. */ - (*p_ptr)++; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- the range is inclusive, so if `range_end' == 0xff - (assuming 8-bit characters), we would otherwise go into an infinite - loop, since all characters <= 0xff. */ - for (this_char = range_start; this_char <= range_end; this_char++) - { - SET_LIST_BIT (TRANSLATE (this_char)); - } - - return REG_NOERROR; -} - -/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in - BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible - characters can start a string that matches the pattern. This fastmap - is used by re_search to skip quickly over impossible starting points. - - The caller must supply the address of a (1 << BYTEWIDTH)-byte data - area as BUFP->fastmap. - - We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in - the pattern buffer. - - Returns 0 if we succeed, -2 if an internal error. */ - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - int j, k; -#ifdef MATCH_MAY_ALLOCATE - fail_stack_type fail_stack; -#endif -#ifndef REGEX_MALLOC - char *destination; -#endif - - register char *fastmap = bufp->fastmap; - unsigned char *pattern = bufp->buffer; - unsigned char *p = pattern; - register unsigned char *pend = pattern + bufp->used; - -#ifdef REL_ALLOC - /* This holds the pointer to the failure stack, when - it is allocated relocatably. */ - fail_stack_elt_t *failure_stack_ptr; -#endif - - /* Assume that each path through the pattern can be null until - proven otherwise. We set this false at the bottom of switch - statement, to which we get only if a particular path doesn't - match the empty string. */ - boolean path_can_be_null = true; - - /* We aren't doing a `succeed_n' to begin with. */ - boolean succeed_n_p = false; - - assert (fastmap != NULL && p != NULL); - - INIT_FAIL_STACK (); - bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ - bufp->fastmap_accurate = 1; /* It will be when we're done. */ - bufp->can_be_null = 0; - - while (1) - { - if (p == pend || *p == succeed) - { - /* We have reached the (effective) end of pattern. */ - if (!FAIL_STACK_EMPTY ()) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail].pointer; - - continue; - } - else - break; - } - - /* We should never be about to go beyond the end of the pattern. */ - assert (p < pend); - - switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) - { - - /* I guess the idea here is to simply not bother with a fastmap - if a backreference is used, since it's too hard to figure out - the fastmap for the corresponding group. Setting - `can_be_null' stops `re_search_2' from using the fastmap, so - that is all we do. */ - case duplicate: - bufp->can_be_null = 1; - goto done; - - - /* Following are the cases which match a character. These end - with `break'. */ - - case exactn: - fastmap[p[1]] = 1; - break; - - - case charset: - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) - fastmap[j] = 1; - break; - - - case charset_not: - /* Chars beyond end of map must be allowed. */ - for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) - if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) - fastmap[j] = 1; - break; - - - case wordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == Sword) - fastmap[j] = 1; - break; - - - case notwordchar: - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != Sword) - fastmap[j] = 1; - break; - - - case anychar: - { - int fastmap_newline = fastmap['\n']; - - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; - - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = fastmap_newline; - - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - goto done; - - /* Otherwise, have to check alternative paths. */ - break; - } - -#ifdef emacs - case syntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) == (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - case notsyntaxspec: - k = *p++; - for (j = 0; j < (1 << BYTEWIDTH); j++) - if (SYNTAX (j) != (enum syntaxcode) k) - fastmap[j] = 1; - break; - - - /* All cases after this match the empty string. These end with - `continue'. */ - - - case before_dot: - case at_dot: - case after_dot: - continue; -#endif /* emacs */ - - - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbound: - case notwordbound: - case wordbeg: - case wordend: - case push_dummy_failure: - continue; - - - case jump_n: - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case jump_past_alt: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - if (j > 0) - continue; - - /* Jump backward implies we just went through the body of a - loop and matched nothing. Opcode jumped to should be - `on_failure_jump' or `succeed_n'. Just treat it like an - ordinary jump. For a * loop, it has pushed its failure - point already; if so, discard that as redundant. */ - if ((re_opcode_t) *p != on_failure_jump - && (re_opcode_t) *p != succeed_n) - continue; - - p++; - EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - - /* If what's on the stack is where we are now, pop it. */ - if (!FAIL_STACK_EMPTY () - && fail_stack.stack[fail_stack.avail - 1].pointer == p) - fail_stack.avail--; - - continue; - - - case on_failure_jump: - case on_failure_keep_string_jump: - handle_on_failure_jump: - EXTRACT_NUMBER_AND_INCR (j, p); - - /* For some patterns, e.g., `(a?)?', `p+j' here points to the - end of the pattern. We don't want to push such a point, - since when we restore it above, entering the switch will - increment `p' past the end of the pattern. We don't need - to push such a point since we obviously won't find any more - fastmap entries beyond `pend'. Such a pattern can match - the null string, though. */ - if (p + j < pend) - { - if (!PUSH_PATTERN_OP (p + j, fail_stack)) - { - RESET_FAIL_STACK (); - return -2; - } - } - else - bufp->can_be_null = 1; - - if (succeed_n_p) - { - EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ - succeed_n_p = false; - } - - continue; - - - case succeed_n: - /* Get to the number of times to succeed. */ - p += 2; - - /* Increment p past the n for when k != 0. */ - EXTRACT_NUMBER_AND_INCR (k, p); - if (k == 0) - { - p -= 4; - succeed_n_p = true; /* Spaghetti code alert. */ - goto handle_on_failure_jump; - } - continue; - - - case set_number_at: - p += 4; - continue; - - - case start_memory: - case stop_memory: - p += 2; - continue; - - - default: - abort (); /* We have listed all the cases. */ - } /* switch *p++ */ - - /* Getting here means we have found the possible starting - characters for one path of the pattern -- and that the empty - string does not match. We need not follow this path further. - Instead, look at the next alternative (remembered on the - stack), or quit if no more. The test at the top of the loop - does these things. */ - path_can_be_null = false; - p = pend; - } /* while p */ - - /* Set `can_be_null' for the last path (also the first path, if the - pattern is empty). */ - bufp->can_be_null |= path_can_be_null; - - done: - RESET_FAIL_STACK (); - return 0; -} /* re_compile_fastmap */ -#ifdef _LIBC -weak_alias (__re_compile_fastmap, re_compile_fastmap) -#endif - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; - } -} -#ifdef _LIBC -weak_alias (__re_set_registers, re_set_registers) -#endif - -/* Searching routines. */ - -/* Like re_search_2, below, but only one string is specified, and - doesn't let you say where to stop matching. */ - -int -re_search (bufp, string, size, startpos, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, startpos, range; - struct re_registers *regs; -{ - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, - regs, size); -} -#ifdef _LIBC -weak_alias (__re_search, re_search) -#endif - - -/* Using the compiled pattern in BUFP->buffer, first tries to match the - virtual concatenation of STRING1 and STRING2, starting first at index - STARTPOS, then at STARTPOS + 1, and so on. - - STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. - - RANGE is how far to scan while trying to match. RANGE = 0 means try - only at STARTPOS; in general, the last start tried is STARTPOS + - RANGE. - - In REGS, return the indices of the virtual concatenation of STRING1 - and STRING2 that matched the entire BUFP->buffer and its contained - subexpressions. - - Do not consider matching one past the index STOP in the virtual - concatenation of STRING1 and STRING2. - - We return either the position in the strings at which the match was - found, -1 if no match, or -2 if error (such as failure - stack overflow). */ - -int -re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int startpos; - int range; - struct re_registers *regs; - int stop; -{ - int val; - register char *fastmap = bufp->fastmap; - register RE_TRANSLATE_TYPE translate = bufp->translate; - int total_size = size1 + size2; - int endpos = startpos + range; - - /* Check for out-of-range STARTPOS. */ - if (startpos < 0 || startpos > total_size) - return -1; - - /* Fix up RANGE if it might eventually take us outside - the virtual concatenation of STRING1 and STRING2. - Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ - if (endpos < 0) - range = 0 - startpos; - else if (endpos > total_size) - range = total_size - startpos; - - /* If the search isn't to be a backwards one, don't waste time in a - search for a pattern that must be anchored. */ - if (bufp->used > 0 && range > 0 - && ((re_opcode_t) bufp->buffer[0] == begbuf - /* `begline' is like `begbuf' if it cannot match at newlines. */ - || ((re_opcode_t) bufp->buffer[0] == begline - && !bufp->newline_anchor))) - { - if (startpos > 0) - return -1; - else - range = 1; - } - -#ifdef emacs - /* In a forward search for something that starts with \=. - don't keep searching past point. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) - { - range = PT - startpos; - if (range <= 0) - return -1; - } -#endif /* emacs */ - - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) - if (re_compile_fastmap (bufp) == -2) - return -2; - - /* Loop through the string, looking for a place to start matching. */ - for (;;) - { - /* If a fastmap is supplied, skip quickly over characters that - cannot be the start of a match. If the pattern can match the - null string, however, we don't need to skip characters; we want - the first null string. */ - if (fastmap && startpos < total_size && !bufp->can_be_null) - { - if (range > 0) /* Searching forwards. */ - { - register const char *d; - register int lim = 0; - int irange = range; - - if (startpos < size1 && startpos + range >= size1) - lim = range - (size1 - startpos); - - d = (startpos >= size1 ? string2 - size1 : string1) + startpos; - - /* Written out as an if-else to avoid testing `translate' - inside the loop. */ - if (translate) - while (range > lim - && !fastmap[(unsigned char) - translate[(unsigned char) *d++]]) - range--; - else - while (range > lim && !fastmap[(unsigned char) *d++]) - range--; - - startpos += irange - range; - } - else /* Searching backwards. */ - { - register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] - : string1[startpos]); - - if (!fastmap[(unsigned char) TRANSLATE (c)]) - goto advance; - } - } - - /* If can't match the null string, and that's all we have left, fail. */ - if (range >= 0 && startpos == total_size && fastmap - && !bufp->can_be_null) - return -1; - - val = re_match_2_internal (bufp, string1, size1, string2, size2, - startpos, regs, stop); -#ifndef REGEX_MALLOC -# ifdef C_ALLOCA - alloca (0); -# endif -#endif - - if (val >= 0) - return startpos; - - if (val == -2) - return -2; - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} /* re_search_2 */ -#ifdef _LIBC -weak_alias (__re_search_2, re_search_2) -#endif - -/* This converts PTR, a pointer into one of the search strings `string1' - and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) \ - ? ((regoff_t) ((ptr) - string1)) \ - : ((regoff_t) ((ptr) - string2 + size1))) - -/* Macros for dealing with the split strings in re_match_2. */ - -#define MATCHING_IN_FIRST_STRING (dend == end_match_1) - -/* Call before fetching a character with *d. This switches over to - string2 if necessary. */ -#define PREFETCH() \ - while (d == dend) \ - { \ - /* End of string2 => fail. */ \ - if (dend == end_match_2) \ - goto fail; \ - /* End of string1 => advance to string2. */ \ - d = string2; \ - dend = end_match_2; \ - } - - -/* Test if at very beginning or at very end of the virtual concatenation - of `string1' and `string2'. If only one string, it's `string2'. */ -#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END(d) ((d) == end2) - - -/* Test if D points to a character which is word-constituent. We have - two special cases to check for: if past the end of string1, look at - the first character in string2; and if before the beginning of - string2, look at the last character in string1. */ -#define WORDCHAR_P(d) \ - (SYNTAX ((d) == end1 ? *string2 \ - : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ - == Sword) - -/* Disabled due to a compiler bug -- see comment at case wordbound */ -#if 0 -/* Test if the character before D and the one at D differ with respect - to being word-constituent. */ -#define AT_WORD_BOUNDARY(d) \ - (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ - || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) -#endif - -/* Free everything we malloc. */ -#ifdef MATCH_MAY_ALLOCATE -# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL -# define FREE_VARIABLES() \ - do { \ - REGEX_FREE_STACK (fail_stack.stack); \ - FREE_VAR (regstart); \ - FREE_VAR (regend); \ - FREE_VAR (old_regstart); \ - FREE_VAR (old_regend); \ - FREE_VAR (best_regstart); \ - FREE_VAR (best_regend); \ - FREE_VAR (reg_info); \ - FREE_VAR (reg_dummy); \ - FREE_VAR (reg_info_dummy); \ - } while (0) -#else -# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ -#endif /* not MATCH_MAY_ALLOCATE */ - -/* These values must meet several constraints. They must not be valid - register values; since we have a limit of 255 registers (because - we use only one byte in the pattern for the register number), we can - use numbers larger than 255. They must differ by 1, because of - NUM_FAILURE_ITEMS above. And the value for the lowest register must - be larger than the value for the highest register, so we do not try - to actually save any registers when none are active. */ -#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) -#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) - -/* Matching routines. */ - -#ifndef emacs /* Emacs never uses this. */ -/* re_match is like re_match_2 except it takes only a single string. */ - -int -re_match (bufp, string, size, pos, regs) - struct re_pattern_buffer *bufp; - const char *string; - int size, pos; - struct re_registers *regs; -{ - int result = re_match_2_internal (bufp, NULL, 0, string, size, - pos, regs, size); -# ifndef REGEX_MALLOC -# ifdef C_ALLOCA - alloca (0); -# endif -# endif - return result; -} -# ifdef _LIBC -weak_alias (__re_match, re_match) -# endif -#endif /* not emacs */ - -static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p, - unsigned char *end, - register_info_type *reg_info)); -static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p, - unsigned char *end, - register_info_type *reg_info)); -static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p, - unsigned char *end, - register_info_type *reg_info)); -static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2, - int len, char *translate)); - -/* re_match_2 matches the compiled pattern in BUFP against the - the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 - and SIZE2, respectively). We start matching at POS, and stop - matching at STOP. - - If REGS is non-null and the `no_sub' field of BUFP is nonzero, we - store offsets for the substring each group matched in REGS. See the - documentation for exactly how many groups we fill. - - We return -1 if no match, -2 if an internal error (such as the - failure stack overflowing). Otherwise, we return the length of the - matched substring. */ - -int -re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - int result = re_match_2_internal (bufp, string1, size1, string2, size2, - pos, regs, stop); -#ifndef REGEX_MALLOC -# ifdef C_ALLOCA - alloca (0); -# endif -#endif - return result; -} -#ifdef _LIBC -weak_alias (__re_match_2, re_match_2) -#endif - -/* This is a separate function so that we can force an alloca cleanup - afterwards. */ -static int -re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int size1, size2; - int pos; - struct re_registers *regs; - int stop; -{ - /* General temporaries. */ - int mcnt; - unsigned char *p1; - - /* Just past the end of the corresponding string. */ - const char *end1, *end2; - - /* Pointers into string1 and string2, just past the last characters in - each to consider matching. */ - const char *end_match_1, *end_match_2; - - /* Where we are in the data, and the end of the current string. */ - const char *d, *dend; - - /* Where we are in the pattern, and the end of the pattern. */ - unsigned char *p = bufp->buffer; - register unsigned char *pend = p + bufp->used; - - /* Mark the opcode just after a start_memory, so we can test for an - empty subpattern when we get to the stop_memory. */ - unsigned char *just_past_start_mem = 0; - - /* We use this to map every character in the string. */ - RE_TRANSLATE_TYPE translate = bufp->translate; - - /* Failure point stack. Each place that can handle a failure further - down the line pushes a failure point on this stack. It consists of - restart, regend, and reg_info for all registers corresponding to - the subexpressions we're currently inside, plus the number of such - registers, and, finally, two char *'s. The first char * is where - to resume scanning the pattern; the second one is where to resume - scanning the strings. If the latter is zero, the failure point is - a ``dummy''; if a failure happens and the failure point is a dummy, - it gets discarded and the next next one is tried. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ - fail_stack_type fail_stack; -#endif -#ifdef DEBUG - static unsigned failure_id; - unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; -#endif - -#ifdef REL_ALLOC - /* This holds the pointer to the failure stack, when - it is allocated relocatably. */ - fail_stack_elt_t *failure_stack_ptr; -#endif - - /* We fill all the registers internally, independent of what we - return, for use in backreferences. The number here includes - an element for register zero. */ - size_t num_regs = bufp->re_nsub + 1; - - /* The currently active registers. */ - active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; - active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; - - /* Information on the contents of registers. These are pointers into - the input strings; they record just what was matched (on this - attempt) by a subexpression part of the pattern, that is, the - regnum-th regstart pointer points to where in the pattern we began - matching and the regnum-th regend points to right after where we - stopped matching the regnum-th subexpression. (The zeroth register - keeps track of what the whole pattern matches.) */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **regstart, **regend; -#endif - - /* If a group that's operated upon by a repetition operator fails to - match anything, then the register for its start will need to be - restored because it will have been set to wherever in the string we - are when we last see its open-group operator. Similarly for a - register's end. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **old_regstart, **old_regend; -#endif - - /* The is_active field of reg_info helps us keep track of which (possibly - nested) subexpressions we are currently in. The matched_something - field of reg_info[reg_num] helps us tell whether or not we have - matched any of the pattern so far this time through the reg_num-th - subexpression. These two fields get reset each time through any - loop their register is in. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ - register_info_type *reg_info; -#endif - - /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. - This happens as we backtrack through the failure points, which in - turn happens only if we have not yet matched the entire string. */ - unsigned best_regs_set = false; -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **best_regstart, **best_regend; -#endif - - /* Logically, this is `best_regend[0]'. But we don't want to have to - allocate space for that if we're not allocating space for anything - else (see below). Also, we never need info about register 0 for - any of the other register vectors, and it seems rather a kludge to - treat `best_regend' differently than the rest. So we keep track of - the end of the best match so far in a separate variable. We - initialize this to NULL so that when we backtrack the first time - and need to test it, it's not garbage. */ - const char *match_end = NULL; - - /* This helps SET_REGS_MATCHED avoid doing redundant work. */ - int set_regs_matched_done = 0; - - /* Used when we pop values we don't care about. */ -#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ - const char **reg_dummy; - register_info_type *reg_info_dummy; -#endif - -#ifdef DEBUG - /* Counts the total number of registers pushed. */ - unsigned num_regs_pushed = 0; -#endif - - DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); - - INIT_FAIL_STACK (); - -#ifdef MATCH_MAY_ALLOCATE - /* Do not bother to initialize all the register variables if there are - no groups in the pattern, as it takes a fair amount of time. If - there are groups, we include space for register 0 (the whole - pattern), even though we never use it, since it simplifies the - array indexing. We should fix this. */ - if (bufp->re_nsub) - { - regstart = REGEX_TALLOC (num_regs, const char *); - regend = REGEX_TALLOC (num_regs, const char *); - old_regstart = REGEX_TALLOC (num_regs, const char *); - old_regend = REGEX_TALLOC (num_regs, const char *); - best_regstart = REGEX_TALLOC (num_regs, const char *); - best_regend = REGEX_TALLOC (num_regs, const char *); - reg_info = REGEX_TALLOC (num_regs, register_info_type); - reg_dummy = REGEX_TALLOC (num_regs, const char *); - reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); - - if (!(regstart && regend && old_regstart && old_regend && reg_info - && best_regstart && best_regend && reg_dummy && reg_info_dummy)) - { - FREE_VARIABLES (); - return -2; - } - } - else - { - /* We must initialize all our variables to NULL, so that - `FREE_VARIABLES' doesn't try to free them. */ - regstart = regend = old_regstart = old_regend = best_regstart - = best_regend = reg_dummy = NULL; - reg_info = reg_info_dummy = (register_info_type *) NULL; - } -#endif /* MATCH_MAY_ALLOCATE */ - - /* The starting position is bogus. */ - if (pos < 0 || pos > size1 + size2) - { - FREE_VARIABLES (); - return -1; - } - - /* Initialize subexpression text positions to -1 to mark ones that no - start_memory/stop_memory has been seen for. Also initialize the - register information struct. */ - for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) - { - regstart[mcnt] = regend[mcnt] - = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; - - REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; - IS_ACTIVE (reg_info[mcnt]) = 0; - MATCHED_SOMETHING (reg_info[mcnt]) = 0; - EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; - } - - /* We move `string1' into `string2' if the latter's empty -- but not if - `string1' is null. */ - if (size2 == 0 && string1 != NULL) - { - string2 = string1; - size2 = size1; - string1 = 0; - size1 = 0; - } - end1 = string1 + size1; - end2 = string2 + size2; - - /* Compute where to stop matching, within the two strings. */ - if (stop <= size1) - { - end_match_1 = string1 + stop; - end_match_2 = string2; - } - else - { - end_match_1 = end1; - end_match_2 = string2 + stop - size1; - } - - /* `p' scans through the pattern as `d' scans through the data. - `dend' is the end of the input string that `d' points within. `d' - is advanced into the following input string whenever necessary, but - this happens before fetching; therefore, at the beginning of the - loop, `d' can be pointing at the end of a string, but it cannot - equal `string2'. */ - if (size1 > 0 && pos <= size1) - { - d = string1 + pos; - dend = end_match_1; - } - else - { - d = string2 + pos - size1; - dend = end_match_2; - } - - DEBUG_PRINT1 ("The compiled pattern is:\n"); - DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); - DEBUG_PRINT1 ("The string to match is: `"); - DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); - DEBUG_PRINT1 ("'\n"); - - /* This loops over pattern commands. It exits by returning from the - function if the match is complete, or it drops through if the match - fails at this starting point in the input data. */ - for (;;) - { -#ifdef _LIBC - DEBUG_PRINT2 ("\n%p: ", p); -#else - DEBUG_PRINT2 ("\n0x%x: ", p); -#endif - - if (p == pend) - { /* End of pattern means we might have succeeded. */ - DEBUG_PRINT1 ("end of pattern ... "); - - /* If we haven't matched the entire string, and we want the - longest match, try backtracking. */ - if (d != end_match_2) - { - /* 1 if this match ends in the same string (string1 or string2) - as the best previous match. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); - /* 1 if this match is the best seen so far. */ - boolean best_match_p; - - /* AIX compiler got confused when this was combined - with the previous declaration. */ - if (same_str_p) - best_match_p = d > match_end; - else - best_match_p = !MATCHING_IN_FIRST_STRING; - - DEBUG_PRINT1 ("backtracking.\n"); - - if (!FAIL_STACK_EMPTY ()) - { /* More failure points to try. */ - - /* If exceeds best match so far, save it. */ - if (!best_regs_set || best_match_p) - { - best_regs_set = true; - match_end = d; - - DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); - - for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) - { - best_regstart[mcnt] = regstart[mcnt]; - best_regend[mcnt] = regend[mcnt]; - } - } - goto fail; - } - - /* If no failure points, don't restore garbage. And if - last match is real best match, don't restore second - best one. */ - else if (best_regs_set && !best_match_p) - { - restore_best_regs: - /* Restore best match. It may happen that `dend == - end_match_1' while the restored d is in string2. - For example, the pattern `x.*y.*z' against the - strings `x-' and `y-z-', if the two strings are - not consecutive in memory. */ - DEBUG_PRINT1 ("Restoring best registers.\n"); - - d = match_end; - dend = ((d >= string1 && d <= end1) - ? end_match_1 : end_match_2); - - for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) - { - regstart[mcnt] = best_regstart[mcnt]; - regend[mcnt] = best_regend[mcnt]; - } - } - } /* d != end_match_2 */ - - succeed_label: - DEBUG_PRINT1 ("Accepting match.\n"); - - /* If caller wants register contents data back, do it. */ - if (regs && !bufp->no_sub) - { - /* Have the register data arrays been allocated? */ - if (bufp->regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. We need one - extra element beyond `num_regs' for the `-1' marker - GNU code uses. */ - regs->num_regs = MAX (RE_NREGS, num_regs + 1); - regs->start = TALLOC (regs->num_regs, regoff_t); - regs->end = TALLOC (regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - { - FREE_VARIABLES (); - return -2; - } - bufp->regs_allocated = REGS_REALLOCATE; - } - else if (bufp->regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (regs->num_regs < num_regs + 1) - { - regs->num_regs = num_regs + 1; - RETALLOC (regs->start, regs->num_regs, regoff_t); - RETALLOC (regs->end, regs->num_regs, regoff_t); - if (regs->start == NULL || regs->end == NULL) - { - FREE_VARIABLES (); - return -2; - } - } - } - else - { - /* These braces fend off a "empty body in an else-statement" - warning under GCC when assert expands to nothing. */ - assert (bufp->regs_allocated == REGS_FIXED); - } - - /* Convert the pointer data in `regstart' and `regend' to - indices. Register zero has to be set differently, - since we haven't kept track of any info for it. */ - if (regs->num_regs > 0) - { - regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING - ? ((regoff_t) (d - string1)) - : ((regoff_t) (d - string2 + size1))); - } - - /* Go through the first `min (num_regs, regs->num_regs)' - registers, since that is all we initialized. */ - for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); - mcnt++) - { - if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) - regs->start[mcnt] = regs->end[mcnt] = -1; - else - { - regs->start[mcnt] - = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] - = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); - } - } - - /* If the regs structure we return has more elements than - were in the pattern, set the extra elements to -1. If - we (re)allocated the registers, this is the case, - because we always allocate enough to have at least one - -1 at the end. */ - for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) - regs->start[mcnt] = regs->end[mcnt] = -1; - } /* regs && !bufp->no_sub */ - - DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", - nfailure_points_pushed, nfailure_points_popped, - nfailure_points_pushed - nfailure_points_popped); - DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); - - mcnt = d - pos - (MATCHING_IN_FIRST_STRING - ? string1 - : string2 - size1); - - DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); - - FREE_VARIABLES (); - return mcnt; - } - - /* Otherwise match next pattern command. */ - switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) - { - /* Ignore these. Used to ignore the n of succeed_n's which - currently have n == 0. */ - case no_op: - DEBUG_PRINT1 ("EXECUTING no_op.\n"); - break; - - case succeed: - DEBUG_PRINT1 ("EXECUTING succeed.\n"); - goto succeed_label; - - /* Match the next n pattern characters exactly. The following - byte in the pattern defines n, and the n bytes after that - are the characters to match. */ - case exactn: - mcnt = *p++; - DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); - - /* This is written out as an if-else so we don't waste time - testing `translate' inside the loop. */ - if (translate) - { - do - { - PREFETCH (); - if ((unsigned char) translate[(unsigned char) *d++] - != (unsigned char) *p++) - goto fail; - } - while (--mcnt); - } - else - { - do - { - PREFETCH (); - if (*d++ != (char) *p++) goto fail; - } - while (--mcnt); - } - SET_REGS_MATCHED (); - break; - - - /* Match any character except possibly a newline or a null. */ - case anychar: - DEBUG_PRINT1 ("EXECUTING anychar.\n"); - - PREFETCH (); - - if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') - || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) - goto fail; - - SET_REGS_MATCHED (); - DEBUG_PRINT2 (" Matched `%d'.\n", *d); - d++; - break; - - - case charset: - case charset_not: - { - register unsigned char c; - boolean not = (re_opcode_t) *(p - 1) == charset_not; - - DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); - - PREFETCH (); - c = TRANSLATE (*d); /* The character to match. */ - - /* Cast to `unsigned' instead of `unsigned char' in case the - bit list is a full 32 bytes long. */ - if (c < (unsigned) (*p * BYTEWIDTH) - && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - p += 1 + *p; - - if (!not) goto fail; - - SET_REGS_MATCHED (); - d++; - break; - } - - - /* The beginning of a group is represented by start_memory. - The arguments are the register number in the next byte, and the - number of groups inner to this one in the next. The text - matched within the group is recorded (in the internal - registers data structure) under the register number. */ - case start_memory: - DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); - - /* Find out if this group can match the empty string. */ - p1 = p; /* To send to group_match_null_string_p. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[*p]) - = group_match_null_string_p (&p1, pend, reg_info); - - /* Save the position in the string where we were the last time - we were at this open-group operator in case the group is - operated upon by a repetition operator, e.g., with `(a*)*b' - against `ab'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regstart[*p]) ? d : regstart[*p] - : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", - POINTER_TO_OFFSET (old_regstart[*p])); - - regstart[*p] = d; - DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); - - IS_ACTIVE (reg_info[*p]) = 1; - MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* Clear this whenever we change the register activity status. */ - set_regs_matched_done = 0; - - /* This is the new highest active register. */ - highest_active_reg = *p; - - /* If nothing was active before, this is the new lowest active - register. */ - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *p; - - /* Move past the register number and inner group count. */ - p += 2; - just_past_start_mem = p; - - break; - - - /* The stop_memory opcode represents the end of a group. Its - arguments are the same as start_memory's: the register - number, and the number of inner groups. */ - case stop_memory: - DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); - - /* We need to save the string position the last time we were at - this close-group operator in case the group is operated - upon by a repetition operator, e.g., with `((a*)*(b*)*)*' - against `aba'; then we want to ignore where we are now in - the string in case this attempt to match fails. */ - old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) - ? REG_UNSET (regend[*p]) ? d : regend[*p] - : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", - POINTER_TO_OFFSET (old_regend[*p])); - - regend[*p] = d; - DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); - - /* This register isn't active anymore. */ - IS_ACTIVE (reg_info[*p]) = 0; - - /* Clear this whenever we change the register activity status. */ - set_regs_matched_done = 0; - - /* If this was the only register active, nothing is active - anymore. */ - if (lowest_active_reg == highest_active_reg) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - { /* We must scan for the new highest active register, since - it isn't necessarily one less than now: consider - (a(b)c(d(e)f)g). When group 3 ends, after the f), the - new highest active register is 1. */ - unsigned char r = *p - 1; - while (r > 0 && !IS_ACTIVE (reg_info[r])) - r--; - - /* If we end up at register zero, that means that we saved - the registers as the result of an `on_failure_jump', not - a `start_memory', and we jumped to past the innermost - `stop_memory'. For example, in ((.)*) we save - registers 1 and 2 as a result of the *, but when we pop - back to the second ), we are at the stop_memory 1. - Thus, nothing is active. */ - if (r == 0) - { - lowest_active_reg = NO_LOWEST_ACTIVE_REG; - highest_active_reg = NO_HIGHEST_ACTIVE_REG; - } - else - highest_active_reg = r; - } - - /* If just failed to match something this time around with a - group that's operated on by a repetition operator, try to - force exit from the ``loop'', and restore the register - information for this group that we had before trying this - last match. */ - if ((!MATCHED_SOMETHING (reg_info[*p]) - || just_past_start_mem == p - 1) - && (p + 2) < pend) - { - boolean is_a_jump_n = false; - - p1 = p + 2; - mcnt = 0; - switch ((re_opcode_t) *p1++) - { - case jump_n: - is_a_jump_n = true; - case pop_failure_jump: - case maybe_pop_jump: - case jump: - case dummy_failure_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (is_a_jump_n) - p1 += 2; - break; - - default: - /* do nothing */ ; - } - p1 += mcnt; - - /* If the next operation is a jump backwards in the pattern - to an on_failure_jump right before the start_memory - corresponding to this stop_memory, exit from the loop - by forcing a failure after pushing on the stack the - on_failure_jump's jump in the pattern, and d. */ - if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump - && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) - { - /* If this group ever matched anything, then restore - what its registers were before trying this last - failed match, e.g., with `(a*)*b' against `ab' for - regstart[1], and, e.g., with `((a*)*(b*)*)*' - against `aba' for regend[3]. - - Also restore the registers for inner groups for, - e.g., `((a*)(b*))*' against `aba' (register 3 would - otherwise get trashed). */ - - if (EVER_MATCHED_SOMETHING (reg_info[*p])) - { - unsigned r; - - EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; - - /* Restore this and inner groups' (if any) registers. */ - for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); - r++) - { - regstart[r] = old_regstart[r]; - - /* xx why this test? */ - if (old_regend[r] >= regstart[r]) - regend[r] = old_regend[r]; - } - } - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - PUSH_FAILURE_POINT (p1 + mcnt, d, -2); - - goto fail; - } - } - - /* Move past the register number and the inner group count. */ - p += 2; - break; - - - /* \ has been turned into a `duplicate' command which is - followed by the numeric value of as the register number. */ - case duplicate: - { - register const char *d2, *dend2; - int regno = *p++; /* Get which register to match against. */ - DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); - - /* Can't back reference a group which we've never matched. */ - if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) - goto fail; - - /* Where in input to try to start matching. */ - d2 = regstart[regno]; - - /* Where to stop matching; if both the place to start and - the place to stop matching are in the same string, then - set to the place to stop, otherwise, for now have to use - the end of the first string. */ - - dend2 = ((FIRST_STRING_P (regstart[regno]) - == FIRST_STRING_P (regend[regno])) - ? regend[regno] : end_match_1); - for (;;) - { - /* If necessary, advance to next segment in register - contents. */ - while (d2 == dend2) - { - if (dend2 == end_match_2) break; - if (dend2 == regend[regno]) break; - - /* End of string1 => advance to string2. */ - d2 = string2; - dend2 = regend[regno]; - } - /* At end of register contents => success */ - if (d2 == dend2) break; - - /* If necessary, advance to next segment in data. */ - PREFETCH (); - - /* How many characters left in this segment to match. */ - mcnt = dend - d; - - /* Want how many consecutive characters we can match in - one shot, so, if necessary, adjust the count. */ - if (mcnt > dend2 - d2) - mcnt = dend2 - d2; - - /* Compare that many; failure if mismatch, else move - past them. */ - if (translate - ? bcmp_translate (d, d2, mcnt, translate) - : memcmp (d, d2, mcnt)) - goto fail; - d += mcnt, d2 += mcnt; - - /* Do this because we've match some characters. */ - SET_REGS_MATCHED (); - } - } - break; - - - /* begline matches the empty string at the beginning of the string - (unless `not_bol' is set in `bufp'), and, if - `newline_anchor' is set, after newlines. */ - case begline: - DEBUG_PRINT1 ("EXECUTING begline.\n"); - - if (AT_STRINGS_BEG (d)) - { - if (!bufp->not_bol) break; - } - else if (d[-1] == '\n' && bufp->newline_anchor) - { - break; - } - /* In all other cases, we fail. */ - goto fail; - - - /* endline is the dual of begline. */ - case endline: - DEBUG_PRINT1 ("EXECUTING endline.\n"); - - if (AT_STRINGS_END (d)) - { - if (!bufp->not_eol) break; - } - - /* We have to ``prefetch'' the next character. */ - else if ((d == end1 ? *string2 : *d) == '\n' - && bufp->newline_anchor) - { - break; - } - goto fail; - - - /* Match at the very beginning of the data. */ - case begbuf: - DEBUG_PRINT1 ("EXECUTING begbuf.\n"); - if (AT_STRINGS_BEG (d)) - break; - goto fail; - - - /* Match at the very end of the data. */ - case endbuf: - DEBUG_PRINT1 ("EXECUTING endbuf.\n"); - if (AT_STRINGS_END (d)) - break; - goto fail; - - - /* on_failure_keep_string_jump is used to optimize `.*\n'. It - pushes NULL as the value for the string on the stack. Then - `pop_failure_point' will keep the current value for the - string, instead of restoring it. To see why, consider - matching `foo\nbar' against `.*\n'. The .* matches the foo; - then the . fails against the \n. But the next thing we want - to do is match the \n against the \n; if we restored the - string value, we would be back at the foo. - - Because this is used only in specific cases, we don't need to - check all the things that `on_failure_jump' does, to make - sure the right things get saved on the stack. Hence we don't - share its code. The only reason to push anything on the - stack at all is that otherwise we would have to change - `anychar's code to do something besides goto fail in this - case; that seems worse than this. */ - case on_failure_keep_string_jump: - DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); -#ifdef _LIBC - DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); -#else - DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); -#endif - - PUSH_FAILURE_POINT (p + mcnt, NULL, -2); - break; - - - /* Uses of on_failure_jump: - - Each alternative starts with an on_failure_jump that points - to the beginning of the next alternative. Each alternative - except the last ends with a jump that in effect jumps past - the rest of the alternatives. (They really jump to the - ending jump of the following alternative, because tensioning - these jumps is a hassle.) - - Repeats start with an on_failure_jump that points past both - the repetition text and either the following jump or - pop_failure_jump back to this on_failure_jump. */ - case on_failure_jump: - on_failure: - DEBUG_PRINT1 ("EXECUTING on_failure_jump"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); -#ifdef _LIBC - DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); -#else - DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); -#endif - - /* If this on_failure_jump comes right before a group (i.e., - the original * applied to a group), save the information - for that group and all inner ones, so that if we fail back - to this point, the group's information will be correct. - For example, in \(a*\)*\1, we need the preceding group, - and in \(zz\(a*\)b*\)\2, we need the inner group. */ - - /* We can't use `p' to check ahead because we push - a failure point to `p + mcnt' after we do this. */ - p1 = p; - - /* We need to skip no_op's before we look for the - start_memory in case this on_failure_jump is happening as - the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 - against aba. */ - while (p1 < pend && (re_opcode_t) *p1 == no_op) - p1++; - - if (p1 < pend && (re_opcode_t) *p1 == start_memory) - { - /* We have a new highest active register now. This will - get reset at the start_memory we are about to get to, - but we will have saved all the registers relevant to - this repetition op, as described above. */ - highest_active_reg = *(p1 + 1) + *(p1 + 2); - if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) - lowest_active_reg = *(p1 + 1); - } - - DEBUG_PRINT1 (":\n"); - PUSH_FAILURE_POINT (p + mcnt, d, -2); - break; - - - /* A smart repeat ends with `maybe_pop_jump'. - We change it to either `pop_failure_jump' or `jump'. */ - case maybe_pop_jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); - DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); - { - register unsigned char *p2 = p; - - /* Compare the beginning of the repeat with what in the - pattern follows its end. If we can establish that there - is nothing that they would both match, i.e., that we - would have to backtrack because of (as in, e.g., `a*a') - then we can change to pop_failure_jump, because we'll - never have to backtrack. - - This is not true in the case of alternatives: in - `(a|ab)*' we do need to backtrack to the `ab' alternative - (e.g., if the string was `ab'). But instead of trying to - detect that here, the alternative has put on a dummy - failure point which is what we will end up popping. */ - - /* Skip over open/close-group commands. - If what follows this loop is a ...+ construct, - look at what begins its body, since we will have to - match at least one of that. */ - while (1) - { - if (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; - else if (p2 + 6 < pend - && (re_opcode_t) *p2 == dummy_failure_jump) - p2 += 6; - else - break; - } - - p1 = p + mcnt; - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ - - /* If we're at the end of the pattern, we can change. */ - if (p2 == pend) - { - /* Consider what happens when matching ":\(.*\)" - against ":/". I don't really understand this code - yet. */ - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 - (" End of pattern: change to `pop_failure_jump'.\n"); - } - - else if ((re_opcode_t) *p2 == exactn - || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) - { - register unsigned char c - = *p2 == (unsigned char) endline ? '\n' : p2[2]; - - if ((re_opcode_t) p1[3] == exactn && p1[5] != c) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", - c, p1[5]); - } - - else if ((re_opcode_t) p1[3] == charset - || (re_opcode_t) p1[3] == charset_not) - { - int not = (re_opcode_t) p1[3] == charset_not; - - if (c < (unsigned) (p1[4] * BYTEWIDTH) - && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) - not = !not; - - /* `not' is equal to 1 if c would match, which means - that we can't change to pop_failure_jump. */ - if (!not) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - else if ((re_opcode_t) *p2 == charset) - { - /* We win if the first character of the loop is not part - of the charset. */ - if ((re_opcode_t) p1[3] == exactn - && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] - && (p2[2 + p1[5] / BYTEWIDTH] - & (1 << (p1[5] % BYTEWIDTH))))) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - - else if ((re_opcode_t) p1[3] == charset_not) - { - int idx; - /* We win if the charset_not inside the loop - lists every character listed in the charset after. */ - for (idx = 0; idx < (int) p2[1]; idx++) - if (! (p2[2 + idx] == 0 - || (idx < (int) p1[4] - && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) - break; - - if (idx == p2[1]) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - else if ((re_opcode_t) p1[3] == charset) - { - int idx; - /* We win if the charset inside the loop - has no overlap with the one after the loop. */ - for (idx = 0; - idx < (int) p2[1] && idx < (int) p1[4]; - idx++) - if ((p2[2 + idx] & p1[5 + idx]) != 0) - break; - - if (idx == p2[1] || idx == p1[4]) - { - p[-3] = (unsigned char) pop_failure_jump; - DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); - } - } - } - } - p -= 2; /* Point at relative address again. */ - if ((re_opcode_t) p[-1] != pop_failure_jump) - { - p[-1] = (unsigned char) jump; - DEBUG_PRINT1 (" Match => jump.\n"); - goto unconditional_jump; - } - /* Note fall through. */ - - - /* The end of a simple repeat has a pop_failure_jump back to - its matching on_failure_jump, where the latter will push a - failure point. The pop_failure_jump takes off failure - points put on by this pop_failure_jump's matching - on_failure_jump; we got through the pattern to here from the - matching on_failure_jump, so didn't fail. */ - case pop_failure_jump: - { - /* We need to pass separate storage for the lowest and - highest registers, even though we don't care about the - actual values. Otherwise, we will restore only one - register from the stack, since lowest will == highest in - `pop_failure_point'. */ - active_reg_t dummy_low_reg, dummy_high_reg; - unsigned char *pdummy; - const char *sdummy; - - DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); - POP_FAILURE_POINT (sdummy, pdummy, - dummy_low_reg, dummy_high_reg, - reg_dummy, reg_dummy, reg_info_dummy); - } - /* Note fall through. */ - - unconditional_jump: -#ifdef _LIBC - DEBUG_PRINT2 ("\n%p: ", p); -#else - DEBUG_PRINT2 ("\n0x%x: ", p); -#endif - /* Note fall through. */ - - /* Unconditionally jump (without popping any failure points). */ - case jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ - DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); - p += mcnt; /* Do the jump. */ -#ifdef _LIBC - DEBUG_PRINT2 ("(to %p).\n", p); -#else - DEBUG_PRINT2 ("(to 0x%x).\n", p); -#endif - break; - - - /* We need this opcode so we can detect where alternatives end - in `group_match_null_string_p' et al. */ - case jump_past_alt: - DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); - goto unconditional_jump; - - - /* Normally, the on_failure_jump pushes a failure point, which - then gets popped at pop_failure_jump. We will end up at - pop_failure_jump, also, and with a pattern of, say, `a+', we - are skipping over the on_failure_jump, so we have to push - something meaningless for pop_failure_jump to pop. */ - case dummy_failure_jump: - DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); - /* It doesn't matter what we push for the string here. What - the code at `fail' tests is the value for the pattern. */ - PUSH_FAILURE_POINT (NULL, NULL, -2); - goto unconditional_jump; - - - /* At the end of an alternative, we need to push a dummy failure - point in case we are followed by a `pop_failure_jump', because - we don't want the failure point for the alternative to be - popped. For example, matching `(a|ab)*' against `aab' - requires that we match the `ab' alternative. */ - case push_dummy_failure: - DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); - /* See comments just above at `dummy_failure_jump' about the - two zeroes. */ - PUSH_FAILURE_POINT (NULL, NULL, -2); - break; - - /* Have to succeed matching what follows at least n times. - After that, handle like `on_failure_jump'. */ - case succeed_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); - - assert (mcnt >= 0); - /* Originally, this is how many times we HAVE to succeed. */ - if (mcnt > 0) - { - mcnt--; - p += 2; - STORE_NUMBER_AND_INCR (p, mcnt); -#ifdef _LIBC - DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt); -#else - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt); -#endif - } - else if (mcnt == 0) - { -#ifdef _LIBC - DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); -#else - DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); -#endif - p[2] = (unsigned char) no_op; - p[3] = (unsigned char) no_op; - goto on_failure; - } - break; - - case jump_n: - EXTRACT_NUMBER (mcnt, p + 2); - DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); - - /* Originally, this is how many times we CAN jump. */ - if (mcnt) - { - mcnt--; - STORE_NUMBER (p + 2, mcnt); -#ifdef _LIBC - DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt); -#else - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt); -#endif - goto unconditional_jump; - } - /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; - break; - - case set_number_at: - { - DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); - - EXTRACT_NUMBER_AND_INCR (mcnt, p); - p1 = p + mcnt; - EXTRACT_NUMBER_AND_INCR (mcnt, p); -#ifdef _LIBC - DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); -#else - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); -#endif - STORE_NUMBER (p1, mcnt); - break; - } - -#if 0 - /* The DEC Alpha C compiler 3.x generates incorrect code for the - test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of - AT_WORD_BOUNDARY, so this code is disabled. Expanding the - macro and introducing temporary variables works around the bug. */ - - case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - break; - goto fail; - - case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); - if (AT_WORD_BOUNDARY (d)) - goto fail; - break; -#else - case wordbound: - { - boolean prevchar, thischar; - - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) - break; - - prevchar = WORDCHAR_P (d - 1); - thischar = WORDCHAR_P (d); - if (prevchar != thischar) - break; - goto fail; - } - - case notwordbound: - { - boolean prevchar, thischar; - - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); - if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) - goto fail; - - prevchar = WORDCHAR_P (d - 1); - thischar = WORDCHAR_P (d); - if (prevchar != thischar) - goto fail; - break; - } -#endif - - case wordbeg: - DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); - if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) - break; - goto fail; - - case wordend: - DEBUG_PRINT1 ("EXECUTING wordend.\n"); - if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) - && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) - break; - goto fail; - -#ifdef emacs - case before_dot: - DEBUG_PRINT1 ("EXECUTING before_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) >= point) - goto fail; - break; - - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) != point) - goto fail; - break; - - case after_dot: - DEBUG_PRINT1 ("EXECUTING after_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) <= point) - goto fail; - break; - - case syntaxspec: - DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchsyntax; - - case wordchar: - DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); - mcnt = (int) Sword; - matchsyntax: - PREFETCH (); - /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ - d++; - if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - - case notsyntaxspec: - DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); - mcnt = *p++; - goto matchnotsyntax; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); - mcnt = (int) Sword; - matchnotsyntax: - PREFETCH (); - /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ - d++; - if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) - goto fail; - SET_REGS_MATCHED (); - break; - -#else /* not emacs */ - case wordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); - PREFETCH (); - if (!WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; - - case notwordchar: - DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); - PREFETCH (); - if (WORDCHAR_P (d)) - goto fail; - SET_REGS_MATCHED (); - d++; - break; -#endif /* not emacs */ - - default: - abort (); - } - continue; /* Successfully executed one pattern command; keep going. */ - - - /* We goto here if a matching operation fails. */ - fail: - if (!FAIL_STACK_EMPTY ()) - { /* A restart point is known. Restore to that state. */ - DEBUG_PRINT1 ("\nFAIL:\n"); - POP_FAILURE_POINT (d, p, - lowest_active_reg, highest_active_reg, - regstart, regend, reg_info); - - /* If this failure point is a dummy, try the next one. */ - if (!p) - goto fail; - - /* If we failed to the end of the pattern, don't examine *p. */ - assert (p <= pend); - if (p < pend) - { - boolean is_a_jump_n = false; - - /* If failed to a backwards jump that's part of a repetition - loop, need to pop this failure point and use the next one. */ - switch ((re_opcode_t) *p) - { - case jump_n: - is_a_jump_n = true; - case maybe_pop_jump: - case pop_failure_jump: - case jump: - p1 = p + 1; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - - if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) - || (!is_a_jump_n - && (re_opcode_t) *p1 == on_failure_jump)) - goto fail; - break; - default: - /* do nothing */ ; - } - } - - if (d >= string1 && d <= end1) - dend = end_match_1; - } - else - break; /* Matching at this starting point really fails. */ - } /* for (;;) */ - - if (best_regs_set) - goto restore_best_regs; - - FREE_VARIABLES (); - - return -1; /* Failure to match. */ -} /* re_match_2 */ - -/* Subroutine definitions for re_match_2. */ - - -/* We are passed P pointing to a register number after a start_memory. - - Return true if the pattern up to the corresponding stop_memory can - match the empty string, and false otherwise. - - If we find the matching stop_memory, sets P to point to one past its number. - Otherwise, sets P to an undefined byte less than or equal to END. - - We don't handle duplicates properly (yet). */ - -static boolean -group_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - /* Point to after the args to the start_memory. */ - unsigned char *p1 = *p + 2; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and return true or - false, as appropriate, when we get to one that can't, or to the - matching stop_memory. */ - - switch ((re_opcode_t) *p1) - { - /* Could be either a loop or a series of alternatives. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - /* If the next operation is not a jump backwards in the - pattern. */ - - if (mcnt >= 0) - { - /* Go through the on_failure_jumps of the alternatives, - seeing if any of the alternatives cannot match nothing. - The last alternative starts with only a jump, - whereas the rest start with on_failure_jump and end - with a jump, e.g., here is the pattern for `a|b|c': - - /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 - /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 - /exactn/1/c - - So, we have to first go through the first (n-1) - alternatives and then deal with the last one separately. */ - - - /* Deal with the first (n-1) alternatives, which start - with an on_failure_jump (see above) that jumps to right - past a jump_past_alt. */ - - while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) - { - /* `mcnt' holds how many bytes long the alternative - is, including the ending `jump_past_alt' and - its number. */ - - if (!alt_match_null_string_p (p1, p1 + mcnt - 3, - reg_info)) - return false; - - /* Move to right after this alternative, including the - jump_past_alt. */ - p1 += mcnt; - - /* Break if it's the beginning of an n-th alternative - that doesn't begin with an on_failure_jump. */ - if ((re_opcode_t) *p1 != on_failure_jump) - break; - - /* Still have to check that it's not an n-th - alternative that starts with an on_failure_jump. */ - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) - { - /* Get to the beginning of the n-th alternative. */ - p1 -= 3; - break; - } - } - - /* Deal with the last alternative: go back and get number - of the `jump_past_alt' just before it. `mcnt' contains - the length of the alternative. */ - EXTRACT_NUMBER (mcnt, p1 - 2); - - if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) - return false; - - p1 += mcnt; /* Get past the n-th alternative. */ - } /* if mcnt > 0 */ - break; - - - case stop_memory: - assert (p1[1] == **p); - *p = p1 + 2; - return true; - - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return false; -} /* group_match_null_string_p */ - - -/* Similar to group_match_null_string_p, but doesn't deal with alternatives: - It expects P to be the first byte of a single alternative and END one - byte past the last. The alternative can contain groups. */ - -static boolean -alt_match_null_string_p (p, end, reg_info) - unsigned char *p, *end; - register_info_type *reg_info; -{ - int mcnt; - unsigned char *p1 = p; - - while (p1 < end) - { - /* Skip over opcodes that can match nothing, and break when we get - to one that can't. */ - - switch ((re_opcode_t) *p1) - { - /* It's a loop. */ - case on_failure_jump: - p1++; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - break; - - default: - if (!common_op_match_null_string_p (&p1, end, reg_info)) - return false; - } - } /* while p1 < end */ - - return true; -} /* alt_match_null_string_p */ - - -/* Deals with the ops common to group_match_null_string_p and - alt_match_null_string_p. - - Sets P to one after the op and its arguments, if any. */ - -static boolean -common_op_match_null_string_p (p, end, reg_info) - unsigned char **p, *end; - register_info_type *reg_info; -{ - int mcnt; - boolean ret; - int reg_no; - unsigned char *p1 = *p; - - switch ((re_opcode_t) *p1++) - { - case no_op: - case begline: - case endline: - case begbuf: - case endbuf: - case wordbeg: - case wordend: - case wordbound: - case notwordbound: -#ifdef emacs - case before_dot: - case at_dot: - case after_dot: -#endif - break; - - case start_memory: - reg_no = *p1; - assert (reg_no > 0 && reg_no <= MAX_REGNUM); - ret = group_match_null_string_p (&p1, end, reg_info); - - /* Have to set this here in case we're checking a group which - contains a group and a back reference to it. */ - - if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; - - if (!ret) - return false; - break; - - /* If this is an optimized succeed_n for zero times, make the jump. */ - case jump: - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - if (mcnt >= 0) - p1 += mcnt; - else - return false; - break; - - case succeed_n: - /* Get to the number of times to succeed. */ - p1 += 2; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - - if (mcnt == 0) - { - p1 -= 4; - EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; - } - else - return false; - break; - - case duplicate: - if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) - return false; - break; - - case set_number_at: - p1 += 4; - - default: - /* All other opcodes mean we cannot match the empty string. */ - return false; - } - - *p = p1; - return true; -} /* common_op_match_null_string_p */ - - -/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN - bytes; nonzero otherwise. */ - -static int -bcmp_translate (s1, s2, len, translate) - const char *s1, *s2; - register int len; - RE_TRANSLATE_TYPE translate; -{ - register const unsigned char *p1 = (const unsigned char *) s1; - register const unsigned char *p2 = (const unsigned char *) s2; - while (len) - { - if (translate[*p1++] != translate[*p2++]) return 1; - len--; - } - return 0; -} - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length SIZE) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. - - We call regex_compile to do the actual compilation. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - size_t length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* GNU code is written to assume at least RE_NREGS registers will be set - (and at least one extra will be -1). */ - bufp->regs_allocated = REGS_UNALLOCATED; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub. */ - bufp->no_sub = 0; - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = regex_compile (pattern, length, re_syntax_options, bufp); - - if (!ret) - return NULL; - return gettext (re_error_msgid[(int) ret]); -} -#ifdef _LIBC -weak_alias (__re_compile_pattern, re_compile_pattern) -#endif - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -#ifdef _LIBC -/* Make these definitions weak in libc, so POSIX programs can redefine - these names if they don't use our functions, and still use - regcomp/regexec below without link errors. */ -weak_function -#endif -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - - if (!s) - { - if (!re_comp_buf.buffer) - return gettext ("No previous regular expression"); - return 0; - } - - if (!re_comp_buf.buffer) - { - re_comp_buf.buffer = (unsigned char *) malloc (200); - if (re_comp_buf.buffer == NULL) - return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); - re_comp_buf.allocated = 200; - - re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); - if (re_comp_buf.fastmap == NULL) - return (char *) gettext (re_error_msgid[(int) REG_ESPACE]); - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - if (!ret) - return NULL; - - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext (re_error_msgid[(int) ret]); -} - - -int -#ifdef _LIBC -weak_function -#endif -re_exec (s) - const char *s; -{ - const int len = strlen (s); - return - 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); -} - -#endif /* _REGEX_RE_COMP */ - -/* POSIX.2 functions. Don't define these for Emacs. */ - -#ifndef emacs - -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' to an allocated space for the fastmap; - `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -#ifdef __APPLE__ -__private_extern__ -#endif -int -regcomp (preg, pattern, cflags) - regex_t *preg; - const char *pattern; - int cflags; -{ - reg_errcode_t ret; - reg_syntax_t syntax - = (cflags & REG_EXTENDED) ? - RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; - - /* regex_compile will allocate the space for the compiled pattern. */ - preg->buffer = 0; - preg->allocated = 0; - preg->used = 0; - - /* Try to allocate space for the fastmap. */ - preg->fastmap = (char *) malloc (1 << BYTEWIDTH); - - if (cflags & REG_ICASE) - { - unsigned i; - - preg->translate - = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE - * sizeof (*(RE_TRANSLATE_TYPE)0)); - if (preg->translate == NULL) - return (int) REG_ESPACE; - - /* Map uppercase characters to corresponding lowercase ones. */ - for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; - } - else - preg->translate = NULL; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - - preg->no_sub = !!(cflags & REG_NOSUB); - - /* POSIX says a null character in the pattern terminates it, so we - can use strlen here in compiling the pattern. */ - ret = regex_compile (pattern, strlen (pattern), syntax, preg); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) ret = REG_EPAREN; - - if (ret == REG_NOERROR && preg->fastmap) - { - /* Compute the fastmap now, since regexec cannot modify the pattern - buffer. */ - if (re_compile_fastmap (preg) == -2) - { - /* Some error occured while computing the fastmap, just forget - about it. */ - free (preg->fastmap); - preg->fastmap = NULL; - } - } - - return (int) ret; -} -#ifdef _LIBC -weak_alias (__regcomp, regcomp) -#endif - - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -#ifdef __APPLE__ -__private_extern__ -#endif -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *preg; - const char *string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - int ret; - struct re_registers regs; - regex_t private_preg; - int len = strlen (string); - boolean want_reg_info = !preg->no_sub && nmatch > 0; - - private_preg = *preg; - - private_preg.not_bol = !!(eflags & REG_NOTBOL); - private_preg.not_eol = !!(eflags & REG_NOTEOL); - - /* The user has told us exactly how many registers to return - information about, via `nmatch'. We have to pass that on to the - matching routines. */ - private_preg.regs_allocated = REGS_FIXED; - - if (want_reg_info) - { - regs.num_regs = nmatch; - regs.start = TALLOC (nmatch * 2, regoff_t); - if (regs.start == NULL) - return (int) REG_NOMATCH; - regs.end = regs.start + nmatch; - } - - /* Perform the searching operation. */ - ret = re_search (&private_preg, string, len, - /* start: */ 0, /* range: */ len, - want_reg_info ? ®s : (struct re_registers *) 0); - - /* Copy the register information to the POSIX structure. */ - if (want_reg_info) - { - if (ret >= 0) - { - unsigned r; - - for (r = 0; r < nmatch; r++) - { - pmatch[r].rm_so = regs.start[r]; - pmatch[r].rm_eo = regs.end[r]; - } - } - - /* If we needed the temporary register info, free the space now. */ - free (regs.start); - } - - /* We want zero return to mean success, unlike `re_search'. */ - return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; -} -#ifdef _LIBC -weak_alias (__regexec, regexec) -#endif - - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *preg; - char *errbuf; - size_t errbuf_size; -{ - const char *msg; - size_t msg_size; - - if (errcode < 0 - || errcode >= (int) (sizeof (re_error_msgid) - / sizeof (re_error_msgid[0]))) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = gettext (re_error_msgid[errcode]); - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (errbuf_size != 0) - { - if (msg_size > errbuf_size) - { -#if defined HAVE_MEMPCPY || defined _LIBC - *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; -#else - memcpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; -#endif - } - else - memcpy (errbuf, msg, msg_size); - } - - return msg_size; -} -#ifdef _LIBC -weak_alias (__regerror, regerror) -#endif - - -/* Free dynamically allocated space used by PREG. */ - -#ifdef __APPLE__ -__private_extern__ -#endif -void -regfree (preg) - regex_t *preg; -{ - if (preg->buffer != NULL) - free (preg->buffer); - preg->buffer = NULL; - - preg->allocated = 0; - preg->used = 0; - - if (preg->fastmap != NULL) - free (preg->fastmap); - preg->fastmap = NULL; - preg->fastmap_accurate = 0; - - if (preg->translate != NULL) - free (preg->translate); - preg->translate = NULL; -} -#ifdef _LIBC -weak_alias (__regfree, regfree) -#endif - -#endif /* not emacs */ diff --git a/contrib/awk/replace.c b/contrib/awk/replace.c deleted file mode 100644 index 81e1745..0000000 --- a/contrib/awk/replace.c +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Do all necessary includes here, so that we don't have to worry about - * overlapping includes in the files in missing.d. - */ -#include "config.h" -#include "awk.h" - - -#ifdef atarist -/* - * this will work with gcc compiler - for other compilers you may - * have to replace path separators in this file into backslashes - */ -#include "unsupported/atari/stack.c" -#include "unsupported/atari/tmpnam.c" -#endif /* atarist */ - -#ifndef HAVE_SYSTEM -#ifdef atarist -#include "unsupported/atari/system.c" -#else -#include "missing_d/system.c" -#endif -#endif /* HAVE_SYSTEM */ - -#ifndef HAVE_MEMCMP -#include "missing_d/memcmp.c" -#endif /* HAVE_MEMCMP */ - -#ifndef HAVE_MEMCPY -#include "missing_d/memcpy.c" -#endif /* HAVE_MEMCPY */ - -#ifndef HAVE_MEMSET -#include "missing_d/memset.c" -#endif /* HAVE_MEMSET */ - -#ifndef HAVE_STRNCASECMP -#include "missing_d/strncasecmp.c" -#endif /* HAVE_STRCASE */ - -#ifndef HAVE_STRERROR -#include "missing_d/strerror.c" -#endif /* HAVE_STRERROR */ - -#ifndef HAVE_STRFTIME -#include "missing_d/strftime.c" -#endif /* HAVE_STRFTIME */ - -#ifndef HAVE_STRCHR -#include "missing_d/strchr.c" -#endif /* HAVE_STRCHR */ - -#if !defined(HAVE_STRTOD) || defined(STRTOD_NOT_C89) -#include "missing_d/strtod.c" -#endif /* HAVE_STRTOD */ - -#ifndef HAVE_TZSET -#include "missing_d/tzset.c" -#endif /* HAVE_TZSET */ - -#ifndef HAVE_MKTIME -#include "missing_d/mktime.c" -#endif /* HAVE_MKTIME */ - -#if defined TANDEM -#include "strdupc" -#include "getidc" -#include "strnchkc" -#endif /* TANDEM */ diff --git a/contrib/awk/stamp-h.in b/contrib/awk/stamp-h.in deleted file mode 100644 index 9788f70..0000000 --- a/contrib/awk/stamp-h.in +++ /dev/null @@ -1 +0,0 @@ -timestamp diff --git a/contrib/awk/test/Makefile.am b/contrib/awk/test/Makefile.am deleted file mode 100644 index e1dbdfd..0000000 --- a/contrib/awk/test/Makefile.am +++ /dev/null @@ -1,1036 +0,0 @@ -# -# test/Makefile.am --- automake input file for gawk -# -# Copyright (C) 1988-2001 the Free Software Foundation, Inc. -# -# This file is part of GAWK, the GNU implementation of the -# AWK Programming Language. -# -# GAWK is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# GAWK is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA -# - -## process this file with automake to produce Makefile.in - -EXTRA_DIST = \ - reg \ - lib \ - README \ - addcomma.awk \ - addcomma.in \ - addcomma.ok \ - anchgsub.awk \ - anchgsub.in \ - anchgsub.ok \ - argarray.awk \ - argarray.in \ - argarray.ok \ - argtest.awk \ - argtest.ok \ - arrayparm.awk \ - arrayparm.ok \ - arrayref.awk \ - arrayref.ok \ - arynasty.awk \ - arynasty.ok \ - arynocls.awk \ - arynocls.in \ - arynocls.ok \ - arysubnm.awk \ - arysubnm.ok \ - asgext.awk \ - asgext.in \ - asgext.ok \ - awkpath.ok \ - back89.in \ - back89.ok \ - backgsub.awk \ - backgsub.in \ - backgsub.ok \ - badargs.ok \ - childin.ok \ - clobber.awk \ - clobber.ok \ - clos1way.awk \ - clos1way.ok \ - clsflnam.awk \ - clsflnam.in \ - clsflnam.ok \ - compare.awk \ - compare.in \ - compare.ok \ - convfmt.awk \ - convfmt.ok \ - datanonl.awk \ - datanonl.in \ - datanonl.ok \ - defref.awk \ - defref.ok \ - delarprm.awk \ - delarprm.ok \ - dynlj.awk \ - dynlj.ok \ - eofsplit.awk \ - eofsplit.ok \ - fflush.ok \ - fflush.sh \ - fieldwdth.ok \ - fldchg.awk \ - fldchg.in \ - fldchg.ok \ - fldchgnf.awk \ - fldchgnf.in \ - fldchgnf.ok \ - fnamedat.awk \ - fnamedat.in \ - fnamedat.ok \ - fnarray.awk \ - fnarray.ok \ - fnarydel.awk \ - fnarydel.ok \ - fnaryscl.awk \ - fnaryscl.ok \ - fnasgnm.awk \ - fnasgnm.in \ - fnasgnm.ok \ - fnparydl.awk \ - fnparydl.ok \ - fsbs.in \ - fsbs.ok \ - fsfwfs.awk \ - fsfwfs.in \ - fsfwfs.ok \ - fsrs.awk \ - fsrs.in \ - fsrs.ok \ - fstabplus.awk \ - fstabplus.ok \ - funsemnl.awk \ - funsemnl.ok \ - funsmnam.awk \ - funsmnam.ok \ - funstack.awk \ - funstack.in \ - funstack.ok \ - gensub.awk \ - gensub.in \ - gensub.ok \ - getline.awk \ - getline.ok \ - getlnbuf.awk \ - getlnbuf.in \ - getlnbuf.ok \ - getlnhd.awk \ - getlnhd.ok \ - getnr2tb.awk \ - getnr2tb.in \ - getnr2tb.ok \ - getnr2tm.awk \ - getnr2tm.in \ - getnr2tm.ok \ - gnuops2.awk \ - gnuops2.ok \ - gnureops.awk \ - gnureops.ok \ - gsubasgn.awk \ - gsubasgn.ok \ - gsubtest.awk \ - gsubtest.ok \ - gtlnbufv.awk \ - hsprint.awk \ - hsprint.ok \ - igncdym.awk \ - igncdym.in \ - igncdym.ok \ - igncfs.awk \ - igncfs.in \ - igncfs.ok \ - ignrcase.ok \ - inftest.awk \ - inftest.ok \ - intest.awk \ - intest.ok \ - intprec.awk \ - intprec.ok \ - leaddig.awk \ - leaddig.ok \ - leadnl.awk \ - leadnl.in \ - leadnl.ok \ - lint.awk \ - lint.ok \ - litoct.awk \ - litoct.ok \ - longwrds.awk \ - longwrds.ok \ - manpage \ - manyfiles.awk \ - math.awk \ - math.ok \ - messages.awk \ - mmap8k.in \ - nasty.awk \ - nasty.ok \ - nasty2.awk \ - nasty2.ok \ - negexp.ok \ - nfldstr.ok \ - nfset.awk \ - nfset.in \ - nfset.ok \ - nlfldsep.awk \ - nlfldsep.in \ - nlfldsep.ok \ - nlinstr.awk \ - nlinstr.in \ - nlinstr.ok \ - nlstrina.awk \ - nlstrina.ok \ - noeffect.awk \ - noeffect.ok \ - nofmtch.awk \ - nofmtch.ok \ - nondec.awk \ - nondec.ok \ - nonl.awk \ - nonl.ok \ - noparms.awk \ - noparms.ok \ - nors.in \ - nors.ok \ - numindex.awk \ - numindex.in \ - numindex.ok \ - numsubstr.awk \ - numsubstr.in \ - numsubstr.ok \ - octsub.awk \ - octsub.ok \ - ofmt.awk \ - ofmt.in \ - ofmt.ok \ - ofmtbig.awk \ - ofmtbig.in \ - ofmtbig.ok \ - ofmts.awk \ - ofmts.in \ - ofmts.ok \ - opasnidx.awk \ - opasnidx.ok \ - opasnslf.awk \ - opasnslf.ok \ - out1.ok \ - out2.ok \ - out3.ok \ - paramdup.awk \ - paramdup.ok \ - paramtyp.awk \ - paramtyp.ok \ - parseme.awk \ - parseme.ok \ - pcntplus.awk \ - pcntplus.ok \ - pid.awk \ - pid.ok \ - pid.sh \ - pipeio1.awk \ - pipeio1.ok \ - pipeio2.awk \ - pipeio2.in \ - pipeio2.ok \ - posix.awk \ - posix.ok \ - poundbang.awk \ - poundbang.ok \ - prdupval.awk \ - prdupval.in \ - prdupval.ok \ - printf1.awk \ - printf1.ok \ - printfloat.awk \ - prmarscl.awk \ - prmarscl.ok \ - prmreuse.awk \ - prmreuse.ok \ - procinfs.awk \ - procinfs.ok \ - prt1eval.awk \ - prt1eval.ok \ - prtoeval.awk \ - prtoeval.ok \ - psx96sub.awk \ - psx96sub.ok \ - rand.awk \ - rand.ok \ - rebt8b1.awk \ - rebt8b1.ok \ - rebt8b2.awk \ - rebt8b2.ok \ - redfilnm.awk \ - redfilnm.in \ - redfilnm.ok \ - regeq.awk \ - regeq.in \ - regeq.ok \ - regtest.sh \ - regx8bit.awk \ - regx8bit.ok \ - reindops.awk \ - reindops.in \ - reindops.ok \ - reint.awk \ - reint.in \ - reint.ok \ - reparse.awk \ - reparse.in \ - reparse.ok \ - resplit.ok \ - rs.in \ - rs.ok \ - rsnul1nl.awk \ - rsnul1nl.in \ - rsnul1nl.ok \ - rswhite.awk \ - rswhite.in \ - rswhite.ok \ - sclforin.awk \ - sclforin.ok \ - sclifin.awk \ - sclifin.ok \ - shadow.awk \ - shadow.ok \ - sort1.awk \ - sort1.ok \ - splitargv.awk \ - splitargv.in \ - splitargv.ok \ - splitdef.awk \ - splitdef.ok \ - splitvar.awk \ - splitvar.in \ - splitvar.ok \ - splitwht.awk \ - splitwht.ok \ - sprintfc.awk \ - sprintfc.in \ - sprintfc.ok \ - strtod.awk \ - strtod.in \ - strtod.ok \ - strftime.awk \ - strftlng.awk \ - strftlng.ok \ - subslash.awk \ - subslash.ok \ - substr.awk \ - substr.ok \ - swaplns.awk \ - swaplns.in \ - swaplns.ok \ - tradanch.awk \ - tradanch.in \ - tradanch.ok \ - tweakfld.awk \ - tweakfld.in \ - tweakfld.ok \ - zeroflag.awk \ - zeroflag.ok - -CMP = cmp -AWK = ../gawk - -# message stuff is to make it a little easier to follow -check: msg \ - basic-msg-start basic basic-msg-end \ - unix-msg-start unix-tests unix-msg-end \ - extend-msg-start gawk-extensions extend-msg-end - -# try to keep these sorted -basic: addcomma anchgsub argarray arrayparm arrayref arynasty arynocls \ - arysubnm asgext awkpath back89 backgsub childin clobber clsflnam \ - compare convfmt datanonl defref delarprm dynlj eofsplit fldchg \ - fldchgnf fnamedat fnarray fnarydel fnaryscl fnasgnm fnparydl \ - fsbs fsrs fstabplus funsemnl funsmnam funstack getline getlnbuf getnr2tb \ - getnr2tm gsubasgn gsubtest hsprint intest intprec leaddig leadnl litoct \ - longwrds math messages mmap8k nasty nasty2 negexp nfldstr nfset \ - nlfldsep nlinstr nlstrina noeffect nofmtch nonl noparms nors \ - numindex numsubstr octsub ofmt ofmtbig ofmts opasnidx opasnslf \ - paramdup paramtyp parseme pcntplus prdupval printf1 prmarscl \ - prmreuse prt1eval prtoeval psx96sub rand rebt8b1 rebt8b2 redfilnm \ - regeq reindops reparse resplit rs rsnul1nl rswhite sclforin \ - sclifin splitargv splitdef splitvar splitwht sprintfc strtod \ - subslash substr swaplns tradanch tweakfld zeroflag - -unix-tests: fflush getlnhd pid pipeio1 pipeio2 poundbang strftlng - -gawk-extensions: argtest badargs clos1way fieldwdth fsfwfs gensub \ - gnuops2 gnureops igncdym igncfs ignrcase lint manyfiles nondec \ - posix procinfs regx8bit reint shadow sort1 strftime - -extra: regtest inftest inet - -inet: inetmesg inetechu inetecht inetdayu inetdayt - -msg:: - @echo 'Any output from "cmp" is bad news, although some differences' - @echo 'in floating point values are probably benign -- in particular,' - @echo 'some systems may omit a leading zero and the floating point' - @echo 'precision may lead to slightly different output in a few cases.' - -basic-msg-start: - @echo "======== Starting basic tests ========" - -basic-msg-end: - @echo "======== Done with basic tests ========" - -unix-msg-start: - @echo "======== Starting Unix tests ========" - -unix-msg-end: - @echo "======== Done with Unix tests ========" - -extend-msg-start: - @echo "======== Starting gawk extension tests ========" - -extend-msg-end: - @echo "======== Done with gawk extension tests ========" - - -# This test is a PITA because increasingly, /tmp is getting -# mounted noexec. So, we'll test it. Sigh. -poundbang:: - @cp $(AWK) /tmp/gawk - @if /tmp/gawk 'BEGIN { print "OK" }' | grep OK > /dev/null ; \ - then \ - $(srcdir)/poundbang.awk $(srcdir)/poundbang.awk >_`basename $@` ; \ - $(CMP) $(srcdir)/poundbang.ok _`basename $@` && rm -f _`basename $@` && echo poundbang is ok ; \ - else \ - echo "*** /tmp is apparently mounted noexec, skipping poundbang test." ; \ - fi - @rm -f /tmp/gawk - -swaplns:: - @$(AWK) -f $(srcdir)/swaplns.awk $(srcdir)/swaplns.in >_$@ - -$(CMP) $(srcdir)/swaplns.ok _$@ && rm -f _$@ - -messages:: - @$(AWK) -f $(srcdir)/messages.awk >out2 2>out3 - -$(CMP) $(srcdir)/out1.ok out1 && $(CMP) $(srcdir)/out2.ok out2 && $(CMP) $(srcdir)/out3.ok out3 && rm -f out1 out2 out3 - -argarray:: - @case $(srcdir) in \ - .) : ;; \ - *) cp $(srcdir)/argarray.in . ;; \ - esac - @TEST=test echo just a test | $(AWK) -f $(srcdir)/argarray.awk ./argarray.in - >_$@ - -$(CMP) $(srcdir)/argarray.ok _$@ && rm -f _$@ - -fstabplus:: - @echo '1 2' | $(AWK) -f $(srcdir)/fstabplus.awk >_$@ - -$(CMP) $(srcdir)/fstabplus.ok _$@ && rm -f _$@ - -fsrs:: - @$(AWK) -f $(srcdir)/fsrs.awk $(srcdir)/fsrs.in >_$@ - -$(CMP) $(srcdir)/fsrs.ok _$@ && rm -f _$@ - -igncfs:: - @$(AWK) -f $(srcdir)/igncfs.awk $(srcdir)/igncfs.in >_$@ - -$(CMP) $(srcdir)/igncfs.ok _$@ && rm -f _$@ - -longwrds:: - @$(AWK) -f $(srcdir)/longwrds.awk $(srcdir)/manpage | (LC_ALL=C sort) >_$@ - -$(CMP) $(srcdir)/longwrds.ok _$@ && rm -f _$@ - -fieldwdth:: - @echo '123456789' | $(AWK) -v FIELDWIDTHS="2 3 4" '{ print $$2}' >_$@ - -$(CMP) $(srcdir)/fieldwdth.ok _$@ && rm -f _$@ - -ignrcase:: - @echo xYz | $(AWK) -v IGNORECASE=1 '{ sub(/y/, ""); print}' >_$@ - -$(CMP) $(srcdir)/ignrcase.ok _$@ && rm -f _$@ - -regtest:: - @echo 'Some of the output from regtest is very system specific, do not' - @echo 'be distressed if your output differs from that distributed.' - @echo 'Manual inspection is called for.' - AWK=`pwd`/$(AWK) $(srcdir)/regtest.awk - -posix:: - @echo '1:2,3 4' | $(AWK) -f $(srcdir)/posix.awk >_$@ - -$(CMP) $(srcdir)/posix.ok _$@ && rm -f _$@ - -manyfiles:: - @rm -rf junk - @mkdir junk - @$(AWK) 'BEGIN { for (i = 1; i <= 300; i++) print i, i}' >_$@ - @$(AWK) -f $(srcdir)/manyfiles.awk _$@ _$@ - @echo "This number better be 1 ->" | tr -d '\012' - @wc -l junk/* | $(AWK) '$$1 != 2' | wc -l - @rm -rf junk _$@ - -compare:: - @$(AWK) -f $(srcdir)/compare.awk 0 1 $(srcdir)/compare.in >_$@ - -$(CMP) $(srcdir)/compare.ok _$@ && rm -f _$@ - -arrayref:: - @$(AWK) -f $(srcdir)/arrayref.awk >_$@ - -$(CMP) $(srcdir)/arrayref.ok _$@ && rm -f _$@ - -rs:: - @$(AWK) -v RS="" '{ print $$1, $$2}' $(srcdir)/rs.in >_$@ - -$(CMP) $(srcdir)/rs.ok _$@ && rm -f _$@ - -fsbs:: - @$(AWK) -v FS='\' '{ print $$1, $$2 }' $(srcdir)/fsbs.in >_$@ - -$(CMP) $(srcdir)/fsbs.ok _$@ && rm -f _$@ - -inftest:: - @echo This test is very machine specific... - @$(AWK) -f $(srcdir)/inftest.awk >_$@ - -$(CMP) $(srcdir)/inftest.ok _$@ && rm -f _$@ - -getline:: - @$(AWK) -f $(srcdir)/getline.awk $(srcdir)/getline.awk $(srcdir)/getline.awk >_$@ - -$(CMP) $(srcdir)/getline.ok _$@ && rm -f _$@ - -rand:: - @$(AWK) -f $(srcdir)/rand.awk >_$@ - -$(CMP) $(srcdir)/rand.ok _$@ && rm -f _$@ - -negexp:: - @$(AWK) 'BEGIN { a = -2; print 10^a }' >_$@ - -$(CMP) $(srcdir)/negexp.ok _$@ && rm -f _$@ - -asgext:: - @$(AWK) -f $(srcdir)/asgext.awk $(srcdir)/asgext.in >_$@ - -$(CMP) $(srcdir)/asgext.ok _$@ && rm -f _$@ - -anchgsub:: - @$(AWK) -f $(srcdir)/anchgsub.awk $(srcdir)/anchgsub.in >_$@ - -$(CMP) $(srcdir)/anchgsub.ok _$@ && rm -f _$@ - -splitargv:: - @$(AWK) -f $(srcdir)/splitargv.awk $(srcdir)/splitargv.in >_$@ - -$(CMP) $(srcdir)/splitargv.ok _$@ && rm -f _$@ - -awkpath:: - @AWKPATH="$(srcdir):$(srcdir)/lib" $(AWK) -f awkpath.awk >_$@ - -$(CMP) $(srcdir)/awkpath.ok _$@ && rm -f _$@ - -nfset:: - @$(AWK) -f $(srcdir)/nfset.awk $(srcdir)/nfset.in >_$@ - -$(CMP) $(srcdir)/nfset.ok _$@ && rm -f _$@ - -reparse:: - @$(AWK) -f $(srcdir)/reparse.awk $(srcdir)/reparse.in >_$@ - -$(CMP) $(srcdir)/reparse.ok _$@ && rm -f _$@ - -argtest:: - @$(AWK) -f $(srcdir)/argtest.awk -x -y abc >_$@ - -$(CMP) $(srcdir)/argtest.ok _$@ && rm -f _$@ - -badargs:: - @-$(AWK) -f 2>&1 | grep -v patchlevel >_$@ - -$(CMP) $(srcdir)/badargs.ok _$@ && rm -f _$@ - -convfmt:: - @$(AWK) -f $(srcdir)/convfmt.awk >_$@ - -$(CMP) $(srcdir)/convfmt.ok _$@ && rm -f _$@ - -arrayparm:: - @-AWKPATH=$(srcdir) $(AWK) -f arrayparm.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/arrayparm.ok _$@ && rm -f _$@ - -paramdup:: - @-AWKPATH=$(srcdir) $(AWK) -f paramdup.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/paramdup.ok _$@ && rm -f _$@ - -nonl:: - @-AWKPATH=$(srcdir) $(AWK) --lint -f nonl.awk /dev/null >_$@ 2>&1 - -$(CMP) $(srcdir)/nonl.ok _$@ && rm -f _$@ - -defref:: - @-AWKPATH=$(srcdir) $(AWK) --lint -f defref.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/defref.ok _$@ && rm -f _$@ - -nofmtch:: - @-AWKPATH=$(srcdir) $(AWK) --lint -f nofmtch.awk >_$@ 2>&1 - -$(CMP) $(srcdir)/nofmtch.ok _$@ && rm -f _$@ - -strftime:: - : this test could fail on slow machines or on a second boundary, - : so if it does, double check the actual results - @LC_ALL=C; export LC_ALL; LANG=C; export LANG; \ - date | $(AWK) -v OUTPUT=_$@ -f $(srcdir)/strftime.awk - -$(CMP) strftime.ok _$@ && rm -f _$@ strftime.ok || exit 0 - -litoct:: - @echo ab | $(AWK) --traditional -f $(srcdir)/litoct.awk >_$@ - -$(CMP) $(srcdir)/litoct.ok _$@ && rm -f _$@ - -gensub:: - @$(AWK) -f $(srcdir)/gensub.awk $(srcdir)/gensub.in >_$@ - -$(CMP) $(srcdir)/gensub.ok _$@ && rm -f _$@ - -resplit:: - @echo a:b:c d:e:f | $(AWK) '{ FS = ":"; $$0 = $$0; print $$2 }' > _$@ - -$(CMP) $(srcdir)/resplit.ok _$@ && rm -f _$@ - -rswhite:: - @$(AWK) -f $(srcdir)/rswhite.awk $(srcdir)/rswhite.in > _$@ - -$(CMP) $(srcdir)/rswhite.ok _$@ && rm -f _$@ - -prmarscl:: - @-AWKPATH=$(srcdir) $(AWK) -f prmarscl.awk > _$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/prmarscl.ok _$@ && rm -f _$@ - -sclforin:: - @-AWKPATH=$(srcdir) $(AWK) -f sclforin.awk > _$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/sclforin.ok _$@ && rm -f _$@ - -sclifin:: - @-AWKPATH=$(srcdir) $(AWK) -f sclifin.awk > _$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/sclifin.ok _$@ && rm -f _$@ - -intprec:: - @-$(AWK) -f $(srcdir)/intprec.awk > _$@ 2>&1 - -$(CMP) $(srcdir)/intprec.ok _$@ && rm -f _$@ - -childin:: - @echo hi | $(AWK) 'BEGIN { "cat" | getline; print; close("cat") }' > _$@ - -$(CMP) $(srcdir)/childin.ok _$@ && rm -f _$@ - -noeffect:: - @-AWKPATH=$(srcdir) $(AWK) --lint -f noeffect.awk > _$@ 2>&1 - -$(CMP) $(srcdir)/noeffect.ok _$@ && rm -f _$@ - -numsubstr:: - @-AWKPATH=$(srcdir) $(AWK) -f numsubstr.awk $(srcdir)/numsubstr.in >_$@ - -$(CMP) $(srcdir)/numsubstr.ok _$@ && rm -f _$@ - -gnureops:: - @$(AWK) -f $(srcdir)/gnureops.awk >_$@ - -$(CMP) $(srcdir)/gnureops.ok _$@ && rm -f _$@ - -pcntplus:: - @$(AWK) -f $(srcdir)/pcntplus.awk >_$@ - -$(CMP) $(srcdir)/pcntplus.ok _$@ && rm -f _$@ - -prmreuse:: - @$(AWK) -f $(srcdir)/prmreuse.awk >_$@ - -$(CMP) $(srcdir)/prmreuse.ok _$@ && rm -f _$@ - -math:: - @$(AWK) -f $(srcdir)/math.awk >_$@ - -$(CMP) $(srcdir)/math.ok _$@ && rm -f _$@ - -fflush:: - @$(srcdir)/fflush.sh >_$@ - -$(CMP) $(srcdir)/fflush.ok _$@ && rm -f _$@ - -fldchg:: - @$(AWK) -f $(srcdir)/fldchg.awk $(srcdir)/fldchg.in >_$@ - -$(CMP) $(srcdir)/fldchg.ok _$@ && rm -f _$@ - -fldchgnf:: - @$(AWK) -f $(srcdir)/fldchgnf.awk $(srcdir)/fldchgnf.in >_$@ - -$(CMP) $(srcdir)/fldchgnf.ok _$@ && rm -f _$@ - -reindops:: - @$(AWK) -f $(srcdir)/reindops.awk $(srcdir)/reindops.in >_$@ - -$(CMP) $(srcdir)/reindops.ok _$@ && rm -f _$@ - -sprintfc:: - @$(AWK) -f $(srcdir)/sprintfc.awk $(srcdir)/sprintfc.in >_$@ - -$(CMP) $(srcdir)/sprintfc.ok _$@ && rm -f _$@ - -getlnhd:: - @$(AWK) -f $(srcdir)/getlnhd.awk >_$@ - -$(CMP) $(srcdir)/getlnhd.ok _$@ && rm -f _$@ - -backgsub:: - @$(AWK) -f $(srcdir)/backgsub.awk $(srcdir)/backgsub.in >_$@ - -$(CMP) $(srcdir)/backgsub.ok _$@ && rm -f _$@ - -tweakfld:: - @$(AWK) -f $(srcdir)/tweakfld.awk $(srcdir)/tweakfld.in >_$@ - @rm -f errors.cleanup - -$(CMP) $(srcdir)/tweakfld.ok _$@ && rm -f _$@ - -clsflnam:: - @$(AWK) -f $(srcdir)/clsflnam.awk $(srcdir)/clsflnam.in >_$@ 2>&1 - -$(CMP) $(srcdir)/clsflnam.ok _$@ && rm -f _$@ - -mmap8k:: - @$(AWK) '{ print }' $(srcdir)/mmap8k.in >_$@ - -$(CMP) $(srcdir)/mmap8k.in _$@ && rm -f _$@ - -fnarray:: - @-AWKPATH=$(srcdir) $(AWK) -f fnarray.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/fnarray.ok _$@ && rm -f _$@ - -dynlj:: - @$(AWK) -f $(srcdir)/dynlj.awk >_$@ - -$(CMP) $(srcdir)/dynlj.ok _$@ && rm -f _$@ - -substr:: - @$(AWK) -f $(srcdir)/substr.awk >_$@ - -$(CMP) $(srcdir)/substr.ok _$@ && rm -f _$@ - -eofsplit:: - @$(AWK) -f $(srcdir)/eofsplit.awk >_$@ - -$(CMP) $(srcdir)/eofsplit.ok _$@ && rm -f _$@ - -prt1eval:: - @$(AWK) -f $(srcdir)/prt1eval.awk >_$@ - -$(CMP) $(srcdir)/prt1eval.ok _$@ && rm -f _$@ - -gsubasgn:: - @-AWKPATH=$(srcdir) $(AWK) -f gsubasgn.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/gsubasgn.ok _$@ && rm -f _$@ - -prtoeval:: - @$(AWK) -f $(srcdir)/prtoeval.awk >_$@ - -$(CMP) $(srcdir)/prtoeval.ok _$@ && rm -f _$@ - -gsubtest:: - @$(AWK) -f $(srcdir)/gsubtest.awk >_$@ - -$(CMP) $(srcdir)/gsubtest.ok _$@ && rm -f _$@ - -splitwht:: - @$(AWK) -f $(srcdir)/splitwht.awk >_$@ - -$(CMP) $(srcdir)/splitwht.ok _$@ && rm -f _$@ - -back89:: - @$(AWK) '/a\8b/' $(srcdir)/back89.in >_$@ - -$(CMP) $(srcdir)/back89.ok _$@ && rm -f _$@ - -tradanch:: - @$(AWK) --traditional -f $(srcdir)/tradanch.awk $(srcdir)/tradanch.in >_$@ - -$(CMP) $(srcdir)/tradanch.ok _$@ && rm -f _$@ - -nlfldsep:: - @$(AWK) -f $(srcdir)/nlfldsep.awk $(srcdir)/nlfldsep.in > _$@ - -$(CMP) $(srcdir)/nlfldsep.ok _$@ && rm -f _$@ - -splitvar:: - @$(AWK) -f $(srcdir)/splitvar.awk $(srcdir)/splitvar.in >_$@ - -$(CMP) $(srcdir)/splitvar.ok _$@ && rm -f _$@ - -intest:: - @$(AWK) -f $(srcdir)/intest.awk >_$@ - -$(CMP) $(srcdir)/intest.ok _$@ && rm -f _$@ - -# AIX /bin/sh exec's the last command in a list, therefore issue a ":" -# command so that pid.sh is fork'ed as a child before being exec'ed. -pid:: - @AWKPATH=$(srcdir) AWK=$(AWK) $(SHELL) $(srcdir)/pid.sh $$$$ > _`basename $@` ; : - -$(CMP) $(srcdir)/pid.ok _`basename $@` && rm -f _`basename $@` _`basename $@`.in - -strftlng:: - @TZ=UTC; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@ - @if $(CMP) -s $(srcdir)/strftlng.ok _$@ ; then : ; else \ - TZ=UTC0; export TZ; $(AWK) -f $(srcdir)/strftlng.awk >_$@ ; \ - fi - -$(CMP) $(srcdir)/strftlng.ok _$@ && rm -f _$@ - -nfldstr:: - @echo | $(AWK) '$$1 == 0 { print "bug" }' > _$@ - -$(CMP) $(srcdir)/nfldstr.ok _$@ && rm -f _$@ - -nors:: - @echo A B C D E | tr -d '\12' | $(AWK) '{ print $$NF }' - $(srcdir)/nors.in > _$@ - -$(CMP) $(srcdir)/nors.ok _$@ && rm -f _$@ - -fnarydel:: - @$(AWK) -f $(srcdir)/fnarydel.awk >_$@ - -$(CMP) $(srcdir)/fnarydel.ok _$@ && rm -f _$@ - -reint:: - @$(AWK) --re-interval -f $(srcdir)/reint.awk $(srcdir)/reint.in >_$@ - -$(CMP) $(srcdir)/reint.ok _$@ && rm -f _$@ - -noparms:: - @-AWKPATH=$(srcdir) $(AWK) -f noparms.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/noparms.ok _$@ && rm -f _$@ - -pipeio1:: - @$(AWK) -f $(srcdir)/pipeio1.awk >_$@ - @rm -f test1 test2 - -$(CMP) $(srcdir)/pipeio1.ok _$@ && rm -f _$@ - -pipeio2:: - @$(AWK) -v SRCDIR=$(srcdir) -f $(srcdir)/pipeio2.awk >_$@ - -$(CMP) $(srcdir)/pipeio2.ok _$@ && rm -f _$@ - -funstack:: - @$(AWK) -f $(srcdir)/funstack.awk $(srcdir)/funstack.in >_$@ - -$(CMP) $(srcdir)/funstack.ok _$@ && rm -f _$@ - -clobber:: - @$(AWK) -f $(srcdir)/clobber.awk >_$@ - -$(CMP) $(srcdir)/clobber.ok seq && $(CMP) $(srcdir)/clobber.ok _$@ && rm -f _$@ - @rm -f seq - -delarprm:: - @$(AWK) -f $(srcdir)/delarprm.awk >_$@ - -$(CMP) $(srcdir)/delarprm.ok _$@ && rm -f _$@ - -prdupval:: - @$(AWK) -f $(srcdir)/prdupval.awk $(srcdir)/prdupval.in >_$@ - -$(CMP) $(srcdir)/prdupval.ok _$@ && rm -f _$@ - -nondec:: - @if grep BITOP ../config.h | grep define > /dev/null; \ - then \ - $(AWK) -f $(srcdir)/nondec.awk >_$@; \ - else \ - cp $(srcdir)/nondec.ok _$@; \ - fi - -$(CMP) $(srcdir)/nondec.ok _$@ && rm -f _$@ - -nasty:: - @$(AWK) -f $(srcdir)/nasty.awk >_$@ - -$(CMP) $(srcdir)/nasty.ok _$@ && rm -f _$@ - -nasty2:: - @$(AWK) -f $(srcdir)/nasty2.awk >_$@ - -$(CMP) $(srcdir)/nasty2.ok _$@ && rm -f _$@ - -zeroflag:: - @$(AWK) -f $(srcdir)/zeroflag.awk >_$@ - -$(CMP) $(srcdir)/zeroflag.ok _$@ && rm -f _$@ - -getnr2tm:: - @$(AWK) -f $(srcdir)/getnr2tm.awk $(srcdir)/getnr2tm.in >_$@ - -$(CMP) $(srcdir)/getnr2tm.ok _$@ && rm -f _$@ - -getnr2tb:: - @$(AWK) -f $(srcdir)/getnr2tb.awk $(srcdir)/getnr2tb.in >_$@ - -$(CMP) $(srcdir)/getnr2tb.ok _$@ && rm -f _$@ - -printf1:: - @$(AWK) -f $(srcdir)/printf1.awk >_$@ - -$(CMP) $(srcdir)/printf1.ok _$@ && rm -f _$@ - -funsmnam:: - @-AWKPATH=$(srcdir) $(AWK) -f funsmnam.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/funsmnam.ok _$@ && rm -f _$@ - -fnamedat:: - @-AWKPATH=$(srcdir) $(AWK) -f fnamedat.awk < $(srcdir)/fnamedat.in >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/fnamedat.ok _$@ && rm -f _$@ - -numindex:: - @-AWKPATH=$(srcdir) $(AWK) -f numindex.awk < $(srcdir)/numindex.in >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/numindex.ok _$@ && rm -f _$@ - -subslash:: - @-AWKPATH=$(srcdir) $(AWK) -f subslash.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/subslash.ok _$@ && rm -f _$@ - -opasnslf:: - @-AWKPATH=$(srcdir) $(AWK) -f opasnslf.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/opasnslf.ok _$@ && rm -f _$@ - -opasnidx:: - @-AWKPATH=$(srcdir) $(AWK) -f opasnidx.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/opasnidx.ok _$@ && rm -f _$@ - -arynocls:: - @-AWKPATH=$(srcdir) $(AWK) -v INPUT=$(srcdir)/arynocls.in -f arynocls.awk >_$@ - -$(CMP) $(srcdir)/arynocls.ok _$@ && rm -f _$@ - -igncdym:: - @-AWKPATH=$(srcdir) $(AWK) -f igncdym.awk $(srcdir)/igncdym.in >_$@ - -$(CMP) $(srcdir)/igncdym.ok _$@ && rm -f _$@ - -getlnbuf:: - @-AWKPATH=$(srcdir) $(AWK) -f getlnbuf.awk $(srcdir)/getlnbuf.in > _$@ - @-AWKPATH=$(srcdir) $(AWK) -f gtlnbufv.awk $(srcdir)/getlnbuf.in > _2$@ - -$(CMP) $(srcdir)/getlnbuf.ok _$@ && $(CMP) $(srcdir)/getlnbuf.ok _2$@ && rm -f _$@ _2$@ - -arysubnm:: - @-AWKPATH=$(srcdir) $(AWK) -f arysubnm.awk >_$@ - -$(CMP) $(srcdir)/arysubnm.ok _$@ && rm -f _$@ - -fnparydl:: - @-AWKPATH=$(srcdir) $(AWK) -f fnparydl.awk >_$@ - -$(CMP) $(srcdir)/fnparydl.ok _$@ && rm -f _$@ - -nlstrina:: - @-AWKPATH=$(srcdir) $(AWK) -f nlstrina.awk >_$@ - -$(CMP) $(srcdir)/nlstrina.ok _$@ && rm -f _$@ - -octsub:: - @-AWKPATH=$(srcdir) $(AWK) -f octsub.awk >_$@ - -$(CMP) $(srcdir)/octsub.ok _$@ && rm -f _$@ - -nlinstr:: - @$(AWK) -f $(srcdir)/nlinstr.awk $(srcdir)/nlinstr.in >_$@ - -$(CMP) $(srcdir)/nlinstr.ok _$@ && rm -f _$@ - -ofmt:: - @$(AWK) -f $(srcdir)/ofmt.awk $(srcdir)/ofmt.in >_$@ - -$(CMP) $(srcdir)/ofmt.ok _$@ && rm -f _$@ - -hsprint:: - @$(AWK) -f $(srcdir)/hsprint.awk >_$@ - -$(CMP) $(srcdir)/hsprint.ok _$@ && rm -f _$@ - -fsfwfs:: - @$(AWK) -f $(srcdir)/fsfwfs.awk $(srcdir)/fsfwfs.in >_$@ - -$(CMP) $(srcdir)/fsfwfs.ok _$@ && rm -f _$@ - -ofmts:: - @$(AWK) -f $(srcdir)/ofmts.awk $(srcdir)/ofmts.in >_$@ - -$(CMP) $(srcdir)/ofmts.ok _$@ && rm -f _$@ - -parseme:: - @-AWKPATH=$(srcdir) $(AWK) -f parseme.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/parseme.ok _$@ && rm -f _$@ - -splitdef:: - @$(AWK) -f $(srcdir)/splitdef.awk >_$@ - -$(CMP) $(srcdir)/splitdef.ok _$@ && rm -f _$@ - -fnaryscl:: - @-AWKPATH=$(srcdir) $(AWK) -f fnaryscl.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/fnaryscl.ok _$@ && rm -f _$@ - -fnasgnm:: - @-AWKPATH=$(srcdir) $(AWK) -f fnasgnm.awk < $(srcdir)/fnasgnm.in >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/fnasgnm.ok _$@ && rm -f _$@ - -lint:: - @-AWKPATH=$(srcdir) $(AWK) -f lint.awk > _$@ 2>&1 - -$(CMP) $(srcdir)/lint.ok _$@ && rm -f _$@ - -procinfs:: - @-$(AWK) -f $(srcdir)/procinfs.awk > _$@ - -$(CMP) $(srcdir)/procinfs.ok _$@ && rm -f _$@ - -sort1:: - @-$(AWK) -f $(srcdir)/sort1.awk > _$@ - -$(CMP) $(srcdir)/sort1.ok _$@ && rm -f _$@ - -ofmtbig:: - @$(AWK) -f $(srcdir)/ofmtbig.awk $(srcdir)/ofmtbig.in >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/ofmtbig.ok _$@ && rm -f _$@ - -inetmesg:: - @echo These tests only work if your system supports the services - @echo "'discard'" at port 9 and "'daytimed" at port 13. Check your - @echo file /etc/services and do "'netstat -a'". - -inetechu:: - @echo This test is for establishing UDP connections - @$(AWK) 'BEGIN {print "" |& "/inet/udp/0/127.0.0.1/9"}' - -inetecht:: - @echo This test is for establishing TCP connections - @$(AWK) 'BEGIN {print "" |& "/inet/tcp/0/127.0.0.1/9"}' - -inetdayu:: - @echo This test is for bidirectional UDP transmission - @$(AWK) 'BEGIN { print "" |& "/inet/udp/0/127.0.0.1/13"; \ - "/inet/udp/0/127.0.0.1/13" |& getline; print $0}' - -inetdayt:: - @echo This test is for bidirectional TCP transmission - @$(AWK) 'BEGIN { print "" |& "/inet/tcp/0/127.0.0.1/13"; \ - "/inet/tcp/0/127.0.0.1/13" |& getline; print $0}' - -paramtyp:: - @$(AWK) -f $(srcdir)/paramtyp.awk >_$@ - -$(CMP) $(srcdir)/paramtyp.ok _$@ && rm -f _$@ - -rsnul1nl:: - @$(AWK) -f $(srcdir)/rsnul1nl.awk $(srcdir)/rsnul1nl.in >_$@ - -$(CMP) $(srcdir)/rsnul1nl.ok _$@ && rm -f _$@ - -datanonl:: - @$(AWK) -f $(srcdir)/datanonl.awk $(srcdir)/datanonl.in >_$@ - -$(CMP) $(srcdir)/datanonl.ok _$@ && rm -f _$@ - -regeq:: - @$(AWK) -f $(srcdir)/regeq.awk $(srcdir)/regeq.in >_$@ - -$(CMP) $(srcdir)/regeq.ok _$@ && rm -f _$@ - -redfilnm:: - @$(AWK) -f $(srcdir)/redfilnm.awk srcdir=$(srcdir) $(srcdir)/redfilnm.in >_$@ - -$(CMP) $(srcdir)/redfilnm.ok _$@ && rm -f _$@ - -strtod:: - @$(AWK) -f $(srcdir)/strtod.awk $(srcdir)/strtod.in >_$@ - -$(CMP) $(srcdir)/strtod.ok _$@ && rm -f _$@ - -leaddig:: - @$(AWK) -v x=2E -f $(srcdir)/leaddig.awk >_$@ - -$(CMP) $(srcdir)/leaddig.ok _$@ && rm -f _$@ - -clos1way:: - @$(AWK) -f $(srcdir)/clos1way.awk >_$@ - -$(CMP) $(srcdir)/clos1way.ok _$@ && rm -f _$@ - -arynasty:: - @$(AWK) -f $(srcdir)/arynasty.awk >_$@ - -$(CMP) $(srcdir)/arynasty.ok _$@ && rm -f _$@ - -shadow:: - @-AWKPATH=$(srcdir) $(AWK) --lint -f shadow.awk >_$@ 2>&1 || exit 0 - -$(CMP) $(srcdir)/shadow.ok _$@ && rm -f _$@ - -regx8bit:: - @$(AWK) -f $(srcdir)/regx8bit.awk >_$@ - -$(CMP) $(srcdir)/regx8bit.ok _$@ && rm -f _$@ - -psx96sub:: - @$(AWK) -f $(srcdir)/psx96sub.awk >_$@ - -$(CMP) $(srcdir)/psx96sub.ok _$@ && rm -f _$@ - -addcomma:: - @$(AWK) -f $(srcdir)/addcomma.awk $(srcdir)/addcomma.in >_$@ - -$(CMP) $(srcdir)/addcomma.ok _$@ && rm -f _$@ - -gnuops2:: - @$(AWK) -f $(srcdir)/gnuops2.awk >_$@ - -$(CMP) $(srcdir)/gnuops2.ok _$@ && rm -f _$@ - -rebt8b1:: - @$(AWK) -f $(srcdir)/rebt8b1.awk >_$@ - -$(CMP) $(srcdir)/rebt8b1.ok _$@ && rm -f _$@ - -rebt8b2:: - @$(AWK) -f $(srcdir)/rebt8b2.awk >_$@ - -$(CMP) $(srcdir)/rebt8b2.ok _$@ && rm -f _$@ - -leadnl:: - @$(AWK) -f $(srcdir)/leadnl.awk $(srcdir)/leadnl.in >_$@ - -$(CMP) $(srcdir)/leadnl.ok _$@ && rm -f _$@ - -funsemnl:: - @$(AWK) -f $(srcdir)/funsemnl.awk >_$@ - -$(CMP) $(srcdir)/funsemnl.ok _$@ && rm -f _$@ - -clean: - rm -fr _* core junk out1 out2 out3 strftime.ok test1 test2 seq *~ - -# This target for my convenience to look at all the results -diffout: - for i in _* ; \ - do \ - echo ============== $$i ============= ; \ - diff -c $${i#_}.ok $$i ; \ - done | more diff --git a/contrib/awk/test/addcomma.awk b/contrib/awk/test/addcomma.awk deleted file mode 100644 index 8f52f36..0000000 --- a/contrib/awk/test/addcomma.awk +++ /dev/null @@ -1,15 +0,0 @@ -# addcomma - put commas in numbers -# input: a number per line -# output: the input number followed by -# the number with commas and two decimal places - -{ printf("%-12s %20s\n", $0, addcomma($0)) } - -function addcomma(x, num) { - if (x < 0) - return "-" addcomma(-x) - num = sprintf("%.2f", x) # num is dddddd.dd - while (num ~ /[0-9][0-9][0-9][0-9]/) - sub(/[0-9][0-9][0-9][,.]/, ",&", num) - return num -} diff --git a/contrib/awk/test/addcomma.in b/contrib/awk/test/addcomma.in deleted file mode 100644 index be70ac9..0000000 --- a/contrib/awk/test/addcomma.in +++ /dev/null @@ -1,7 +0,0 @@ -0 --1 --12.34 -12345 --1234567.89 --123. --123456 diff --git a/contrib/awk/test/addcomma.ok b/contrib/awk/test/addcomma.ok deleted file mode 100644 index 57c5886..0000000 --- a/contrib/awk/test/addcomma.ok +++ /dev/null @@ -1,7 +0,0 @@ -0 0.00 --1 -1.00 --12.34 -12.34 -12345 12,345.00 --1234567.89 -1,234,567.89 --123. -123.00 --123456 -123,456.00 diff --git a/contrib/awk/test/arynasty.awk b/contrib/awk/test/arynasty.awk deleted file mode 100644 index ec17093..0000000 --- a/contrib/awk/test/arynasty.awk +++ /dev/null @@ -1,16 +0,0 @@ -BEGIN { - a = 12.153 -#print "-- stroring test[a]" > "/dev/stderr" ; fflush("/dev/stderr") - test[a] = "hi" -#print "-- setting CONVFMT" > "/dev/stderr" ; fflush("/dev/stderr") - CONVFMT = "%.0f" -#print "-- setting a" > "/dev/stderr" ; fflush("/dev/stderr") - a = 5 -#stopme() -#print "-- starting loop" > "/dev/stderr" ; fflush("/dev/stderr") - for (i in test) { -#print("-- i =", i) > "/dev/stderr" ; fflush("/dev/stderr"); -#printf("-- i = <%s>\n", i) > "/dev/stderr" ; fflush("/dev/stderr"); - printf ("test[%s] = %s\n", i, test[i]) - } -} diff --git a/contrib/awk/test/arynasty.ok b/contrib/awk/test/arynasty.ok deleted file mode 100644 index 125ed80..0000000 --- a/contrib/awk/test/arynasty.ok +++ /dev/null @@ -1 +0,0 @@ -test[12.153] = hi diff --git a/contrib/awk/test/arynocls.awk b/contrib/awk/test/arynocls.awk deleted file mode 100644 index 724c9ac..0000000 --- a/contrib/awk/test/arynocls.awk +++ /dev/null @@ -1,95 +0,0 @@ -#To: bug-gnu-utils@gnu.org -#From: Kristján Jónasson -#Subject: Gawk bug -#Cc: arnold@gnu.org -# -#Hi! -# -#The following seems to be a bug in gawk. I have tried as I could to -#minimize the bug-causing program, so of course it does not seem to do -#anything useful in its present form. The error message received is: -# -#gawk: test.awk:15: fatal error: internal error -#Aborted -# -#Note that there is an attached file that the program reads, called "a". I -#played with the program a fair bit and my feeling is that the error is -#related with the delete statement, and not the reading of the file and the -#close statement. At one point I was able to remove the file reading and -#still obtain the error. If, for example, I remove the close statement and -#make two copies of the file instead, (reading one copy in sub1 and the -#other in sub2), the error still occurs. -# -#The operating system is Red Hat Linux, version 6.0, the gawk is version -#3.0.4, and the gawk was obtained from an rpm file gawk-3.0.4-1.i386.rpm. -# -#The program is: -# - -# Wed Mar 8 13:41:34 IST 2000 -# ADR: modified to use INPUT, so can set it from command line. -# When run, no output is produced, but it shouldn't core -# dump, either. -# -# The program bug is to not close the file in sub2. - -function sub1(x) { -# while (getline < "a" == 1) i++ - while (getline < INPUT == 1) i++ -# close("a") - close(INPUT) -} - -function sub2(x) { - i=0 - delete y -# while (getline < "a" == 1) z[++i] = $1 - while (getline < INPUT == 1) z[++i] = $1 - for(i in z) y[i] = x[i] + z[i] -} - -function sub3(x, y, z) { - sub2(x) - for(i=1; i<=4; i++) z[i] = y[i] -} - -BEGIN { - sub1(x) - sub2(x) - sub3(x, y, z) -} -# -#And the data file is: -# -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# 32.440 3.830 3.383700000000000 10.08 298 865 -# -# diff --git a/contrib/awk/test/arynocls.in b/contrib/awk/test/arynocls.in deleted file mode 100644 index 8f4712c..0000000 --- a/contrib/awk/test/arynocls.in +++ /dev/null @@ -1,30 +0,0 @@ - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 - 32.440 3.830 3.383700000000000 10.08 298 865 diff --git a/contrib/awk/test/arynocls.ok b/contrib/awk/test/arynocls.ok deleted file mode 100644 index e69de29..0000000 diff --git a/contrib/awk/test/arysubnm.awk b/contrib/awk/test/arysubnm.awk deleted file mode 100644 index 961b54a..0000000 --- a/contrib/awk/test/arysubnm.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN { n = 11 ; foo[n] = n; print (2 <= n) } diff --git a/contrib/awk/test/arysubnm.ok b/contrib/awk/test/arysubnm.ok deleted file mode 100644 index d00491f..0000000 --- a/contrib/awk/test/arysubnm.ok +++ /dev/null @@ -1 +0,0 @@ -1 diff --git a/contrib/awk/test/clos1way.awk b/contrib/awk/test/clos1way.awk deleted file mode 100644 index 5bc4068..0000000 --- a/contrib/awk/test/clos1way.awk +++ /dev/null @@ -1,21 +0,0 @@ -BEGIN { - command = "LC_ALL=C sort" - - n = split("abcdefghijklmnopqrstuvwxyz", a, "") - for (i = n; i > 0; i--) { -# print "printing", a[i] > "/dev/stderr" - print a[i] |& command - } - - close(command, "to") - -# print "starting read loop" > "/dev/stderr" - do { - if (line) - print "got", line -# stopme(); - } while ((command |& getline line) > 0) - -# print "doing final close" > "/dev/stderr" - close(command) -} diff --git a/contrib/awk/test/clos1way.ok b/contrib/awk/test/clos1way.ok deleted file mode 100644 index 09d732a..0000000 --- a/contrib/awk/test/clos1way.ok +++ /dev/null @@ -1,26 +0,0 @@ -got a -got b -got c -got d -got e -got f -got g -got h -got i -got j -got k -got l -got m -got n -got o -got p -got q -got r -got s -got t -got u -got v -got w -got x -got y -got z diff --git a/contrib/awk/test/datanonl.awk b/contrib/awk/test/datanonl.awk deleted file mode 100644 index 29e668e..0000000 --- a/contrib/awk/test/datanonl.awk +++ /dev/null @@ -1,3 +0,0 @@ -# example program from alex@bofh.torun.pl -BEGIN { IGNORECASE=1 } -/\w+@([[:alnum:]]+\.)+[[:alnum:]]+[[:blank:]]+/ {print $0} diff --git a/contrib/awk/test/datanonl.in b/contrib/awk/test/datanonl.in deleted file mode 100644 index 5340d7b..0000000 --- a/contrib/awk/test/datanonl.in +++ /dev/null @@ -1 +0,0 @@ -bleble@foo1.bh.pl deny \ No newline at end of file diff --git a/contrib/awk/test/datanonl.ok b/contrib/awk/test/datanonl.ok deleted file mode 100644 index 7b0fcaf..0000000 --- a/contrib/awk/test/datanonl.ok +++ /dev/null @@ -1 +0,0 @@ -bleble@foo1.bh.pl deny diff --git a/contrib/awk/test/fnamedat.awk b/contrib/awk/test/fnamedat.awk deleted file mode 100644 index 33a0704..0000000 --- a/contrib/awk/test/fnamedat.awk +++ /dev/null @@ -1 +0,0 @@ -function foo() { print foo } {foo()} diff --git a/contrib/awk/test/fnamedat.in b/contrib/awk/test/fnamedat.in deleted file mode 100644 index 257cc56..0000000 --- a/contrib/awk/test/fnamedat.in +++ /dev/null @@ -1 +0,0 @@ -foo diff --git a/contrib/awk/test/fnamedat.ok b/contrib/awk/test/fnamedat.ok deleted file mode 100644 index 0dd0ae5..0000000 --- a/contrib/awk/test/fnamedat.ok +++ /dev/null @@ -1 +0,0 @@ -gawk: fnamedat.awk:1: (FILENAME=- FNR=1) fatal: can't use function name `foo' as variable or array diff --git a/contrib/awk/test/fnaryscl.awk b/contrib/awk/test/fnaryscl.awk deleted file mode 100644 index b88778e..0000000 --- a/contrib/awk/test/fnaryscl.awk +++ /dev/null @@ -1,10 +0,0 @@ -BEGIN { - foo[1] = 4 - f1(foo) -} - -function f1(a) { f2(a) } - -function f2(b) { f3(b) } - -function f3(c) { c = 6 } diff --git a/contrib/awk/test/fnaryscl.ok b/contrib/awk/test/fnaryscl.ok deleted file mode 100644 index d39dfdc..0000000 --- a/contrib/awk/test/fnaryscl.ok +++ /dev/null @@ -1 +0,0 @@ -gawk: fnaryscl.awk:10: fatal: attempt to use array `c (from b (from a (from foo)))' in a scalar context diff --git a/contrib/awk/test/fnasgnm.awk b/contrib/awk/test/fnasgnm.awk deleted file mode 100644 index 056cdf7..0000000 --- a/contrib/awk/test/fnasgnm.awk +++ /dev/null @@ -1,14 +0,0 @@ -# AFP_Bug1.awk - illustrate a problem with `gawk' (GNU Awk 3.0.3 on OS/2) -# Arthur Pool .. pool@commerce.uq.edu.au -# $Id: AFP_Bug1.awk,v 1.1 1998-03-17 12:22:44+10 pool Exp pool $ - -# Assignment to a variable with the same name as a function from within -# that function causes an ABEND. -# -# Yes, I do realise that it's not a smart thing to do, but an error -# message would be a kinder response than a core dump (and would make -# debugging a whole lot easier). - -{ShowMe()} - -function ShowMe() {ShowMe = 1} diff --git a/contrib/awk/test/fnasgnm.in b/contrib/awk/test/fnasgnm.in deleted file mode 100644 index a941931..0000000 --- a/contrib/awk/test/fnasgnm.in +++ /dev/null @@ -1 +0,0 @@ -junk diff --git a/contrib/awk/test/fnasgnm.ok b/contrib/awk/test/fnasgnm.ok deleted file mode 100644 index 844893c..0000000 --- a/contrib/awk/test/fnasgnm.ok +++ /dev/null @@ -1 +0,0 @@ -gawk: fnasgnm.awk:14: (FILENAME=- FNR=1) fatal: can't use function name `ShowMe' as variable or array diff --git a/contrib/awk/test/fnparydl.awk b/contrib/awk/test/fnparydl.awk deleted file mode 100644 index ef3a822..0000000 --- a/contrib/awk/test/fnparydl.awk +++ /dev/null @@ -1,31 +0,0 @@ -# fnparydl.awk --- check that deleting works with arrays -# that are parameters. -# -# Tue Jul 11 14:20:58 EDT 2000 - -function delit(a, k) -{ - print "BEFORE LOOP" - for (k in a) { - print "DELETING KEY", k - delete a[k] - } - print "AFTER LOOP" -} - -BEGIN { - for (i = 1 ; i <= 7; i++) { - q[i] = sprintf("element %d", i) - x[i] = i - y[i] = q[i] - } -# adump(q) - delit(q) -# for (i in q) -# delete q[i] - j = 0; - for (i in q) - j++ - print j, "elements still in q[]" -# adump(q) -} diff --git a/contrib/awk/test/fnparydl.ok b/contrib/awk/test/fnparydl.ok deleted file mode 100644 index 26a5c39..0000000 --- a/contrib/awk/test/fnparydl.ok +++ /dev/null @@ -1,10 +0,0 @@ -BEFORE LOOP -DELETING KEY 4 -DELETING KEY 5 -DELETING KEY 6 -DELETING KEY 7 -DELETING KEY 1 -DELETING KEY 2 -DELETING KEY 3 -AFTER LOOP -0 elements still in q[] diff --git a/contrib/awk/test/fsfwfs.awk b/contrib/awk/test/fsfwfs.awk deleted file mode 100644 index beed10a..0000000 --- a/contrib/awk/test/fsfwfs.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN{FIELDWIDTHS="6 6 6 5";OFS=",";FS=FS}{print $1,$2,$3,$4} diff --git a/contrib/awk/test/fsfwfs.in b/contrib/awk/test/fsfwfs.in deleted file mode 100644 index fc10928..0000000 --- a/contrib/awk/test/fsfwfs.in +++ /dev/null @@ -1,16 +0,0 @@ -00000113000 00000000000 -00000275000 00000000000 -00000321334 00000000000 -00000048709 00000010000 -00000117000 00000100000 -00000152000 00000138000 -00000000000 00000150000 -00000189425 00000000000 -00000146128 00000000000 -00000146128 00000000000 -00000146128 00000000000 -00000000000 00000050000 -00000000000 00000050000 -00000000000 00000000000 -00000158014 00000000000 -00000113656 00000000000 diff --git a/contrib/awk/test/fsfwfs.ok b/contrib/awk/test/fsfwfs.ok deleted file mode 100644 index 36bea48..0000000 --- a/contrib/awk/test/fsfwfs.ok +++ /dev/null @@ -1,16 +0,0 @@ -00000113000,00000000000,, -00000275000,00000000000,, -00000321334,00000000000,, -00000048709,00000010000,, -00000117000,00000100000,, -00000152000,00000138000,, -00000000000,00000150000,, -00000189425,00000000000,, -00000146128,00000000000,, -00000146128,00000000000,, -00000146128,00000000000,, -00000000000,00000050000,, -00000000000,00000050000,, -00000000000,00000000000,, -00000158014,00000000000,, -00000113656,00000000000,, diff --git a/contrib/awk/test/funsemnl.awk b/contrib/awk/test/funsemnl.awk deleted file mode 100644 index 6b39dca..0000000 --- a/contrib/awk/test/funsemnl.awk +++ /dev/null @@ -1,3 +0,0 @@ -# make sure that ; + \n at end after function works -function foo() { print "foo" } ; -BEGIN { foo() } diff --git a/contrib/awk/test/funsemnl.ok b/contrib/awk/test/funsemnl.ok deleted file mode 100644 index 257cc56..0000000 --- a/contrib/awk/test/funsemnl.ok +++ /dev/null @@ -1 +0,0 @@ -foo diff --git a/contrib/awk/test/funsmnam.awk b/contrib/awk/test/funsmnam.awk deleted file mode 100644 index 1e8ca50..0000000 --- a/contrib/awk/test/funsmnam.awk +++ /dev/null @@ -1,6 +0,0 @@ -function foo( \ - foo) -{ - print foo -} -{ foo() } diff --git a/contrib/awk/test/funsmnam.ok b/contrib/awk/test/funsmnam.ok deleted file mode 100644 index bc68a2f..0000000 --- a/contrib/awk/test/funsmnam.ok +++ /dev/null @@ -1 +0,0 @@ -gawk: funsmnam.awk:6: fatal: function `foo': can't use function name as parameter name diff --git a/contrib/awk/test/getlnbuf.awk b/contrib/awk/test/getlnbuf.awk deleted file mode 100644 index 8a4483e..0000000 --- a/contrib/awk/test/getlnbuf.awk +++ /dev/null @@ -1,18 +0,0 @@ -#Date: Tue, 21 Dec 1999 16:11:07 +0100 -#From: Daniel Schnell -#To: bug-gnu-utils@gnu.org -#CC: arnold@gnu.org -#Subject: BUG in gawk (version 3.0.4 linux, windows): Text mangeling in between - -# search for "@K@CODE" segment - -$0 ~ /@K@CODE/ { - # get next record - getline temp - printf ("@K@CODE\n") - printf ("%s\n",temp) - } - -$0 !~ /@K@CODE/ { - printf ("%s\n", $0) - } diff --git a/contrib/awk/test/getlnbuf.in b/contrib/awk/test/getlnbuf.in deleted file mode 100644 index 062b377..0000000 --- a/contrib/awk/test/getlnbuf.in +++ /dev/null @@ -1,1708 +0,0 @@ -EXTRA_INFO.TYP3.EC := EC; -EXTRA_INFO.TYP3.TEXT:= 'CONNECT_SERVICE TO OAM FAILED'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C003', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_SWERR, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -917596041 -@K@NAME -T_ERR4_1 -@K@INSCRIPT -ERROR_HANDLING: -DB_OVERFLOW -MP/NSEI -@K@CODE -/***@@@ ERROR ***/ -/*@@ERRORTEXT -*@ DB-OVERFLOW -*@ -*@ -*@@DESCRIPTION -*@ THE INSTANCE-CREATION WAS NOT POSSIBLE -*@ BECAUSE THE DATABASE WOULD OVERFLOW -*@ -*@@EXTRA INFO -*@ (EXTRA_INFO_4_STRUCT) -*@ NSEI -*@ NSVCI -*@ TEXT -*@ -*/ - -EXTRA_INFO.TYP4.NSEI := EVD_PTR->.KEYS.INT_ARR(0); -EXTRA_INFO.TYP4.NSVCI:= EVD_PTR->.KEYS.INT_ARR(1); -EXTRA_INFO.TYP4.TEXT := 'NSVC-HAND.: MP/NSEI-OVERFLOW'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C004', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_ESC_MAX_ANY, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -920903219 -@K@NAME -T_ERR4_2 -@K@INSCRIPT -ERROR_HANDLING: -DB_OVERFLOW -MP/NSVCI -@K@CODE -/***@@@ ERROR ***/ -/*@@ERRORTEXT -*@ DB-OVERFLOW -*@ -*@ -*@@DESCRIPTION -*@ THE INSTANCE-CREATION WAS NOT POSSIBLE -*@ BECAUSE THE DATABASE WOULD OVERFLOW -*@ -*@@EXTRA INFO -*@ (EXTRA_INFO_4_STRUCT) -*@ NSEI -*@ NSVCI -*@ TEXT -*@ -*/ - -EXTRA_INFO.TYP4.NSEI := EVD_PTR->.KEYS.INT_ARR(0); -EXTRA_INFO.TYP4.NSVCI:= EVD_PTR->.KEYS.INT_ARR(1); -EXTRA_INFO.TYP4.TEXT := 'NSVC-HAND.: MP/NSVCI-OVERFLOW'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C004', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_ESC_MAX_ANY, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -920903222 -@K@NAME -T_ERR4_3 -@K@INSCRIPT -ERROR_HANDLING: -DB_OVERFLOW -NSEI/NSVCI -@K@CODE -/***@@@ ERROR ***/ -/*@@ERRORTEXT -*@ DB-OVERFLOW -*@ -*@ -*@@DESCRIPTION -*@ THE INSTANCE-CREATION WAS NOT POSSIBLE -*@ BECAUSE THE DATABASE WOULD OVERFLOW -*@ -*@@EXTRA INFO -*@ (EXTRA_INFO_4_STRUCT) -*@ NSEI -*@ NSVCI -*@ TEXT -*@ -*/ - -EXTRA_INFO.TYP4.NSEI := EVD_PTR->.KEYS.INT_ARR(0); -EXTRA_INFO.TYP4.NSVCI:= EVD_PTR->.KEYS.INT_ARR(1); -EXTRA_INFO.TYP4.TEXT := 'NSVC-HAND.: NSEI/NSVC-OVERFLOW'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C004', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_ESC_MAX_ANY, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -920903226 -@K@NAME -TR_RESET -@K@INSCRIPT -RESTART_ -TNS_RESET_ -TIMER -@K@CODE -/* TIMER EVENT DESCRIPTOR STILL THERE */ - -/* INITIALIZATION OF THE TIMER-EVENT-DESCRIPTOR STILL VALID */ -NSVCI_CON_PTR->.TIM_EVD_PTR->.TIMER:= TNS_RESET_MAP; - -/* START TIMER */ -G9PX508_START_TIMER_P -( -NSVCI_CON_PTR->.TIM_EVD_PTR -); - -@K@FREEZE -924684867 -@K@NAME -TX_AUDIT -@K@INSCRIPT -FOR -AUDIT - -@K@NAME -M_BLKOACKM -@K@INSCRIPT -NS_ -BLOCK_ACK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; - -IF -/* 'OTHER' ALIVE NSVC TO THIS NSEI EXISTING? */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR /= NULL -THEN -/* USE THIS 'OTHER' FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -/* NSEI TO BE USED FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; -/* NSVCI TO BE USED FOR TRANSPORT */ -ELSE -/* USE AFFECTED NSVC AGAIN FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -FI; - - -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D3); - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR2_NS_BLOCK_ACK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D3.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -938805885 -@K@NAME -T_RCTRUE -@K@INSCRIPT -RC -= -TRUE -@K@CODE -RC:= TRUE; - -@K@FREEZE -922176328 -@K@NAME -M_AC_SBVCN -@K@INSCRIPT -G9IBME0_ -ACT_ -SIGN_BVC_C -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB512_GET_MSG_LESS_EV_DESCR_P -( -SID_GBNSVC, -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -TX_EVD_PTR->.EVENT_CMD:= G9IBME0_ACT_SIGN_BVC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBBVC_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= EVD_PTR->.KEYS.INT_ARR(0); -TX_EVD_PTR->.KEYS.INT_ARR(1):= SIGN_BVCI; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -938788211 -@K@NAME -T_RC_EOD -@K@INSCRIPT -RC -= -END OF DATA - -@K@CODE -RC:= G9IBSM4_RC_END_OF_DATA; - -@K@FREEZE -921083785 -@K@NAME -T_RC_EMP -@K@INSCRIPT -RC -= -EMPTY - -@K@CODE -RC:= G9IBSM4_RC_EMPTY; - -@K@FREEZE -921083757 -@K@NAME -T_RC_ERR -@K@INSCRIPT -RC -= -ERROR - -@K@CODE -RC:= G9IBSM4_RC_ERROR; - -@K@FREEZE -921083731 -@K@NAME -S_UNUSED -@K@INSCRIPT -G9IBSM0_ -UNUSED -@K@CODE - - -@K@FREEZE -919416670 -@K@NAME -TA_UNBLOCK -@K@INSCRIPT -START_ -TNS_UNBLOCK_ -TIMER -@K@CODE -/* GET TIMER-EVENT DESCRIPTOR */ -G9PB513_GET_TIMER_EV_DESCR_P -( -SID_GBNSVC, -NSVCI_CON_PTR->.TIM_EVD_PTR -); - -/* INITIALIZATION OF THE TIMER-EVENT-DESCRIPTOR */ -NSVCI_CON_PTR->.TIM_EVD_PTR->.EVENT_CMD:= G9IBSE4_TO_TNS_C; -NSVCI_CON_PTR->.TIM_EVD_PTR->.EVENT_DESTINATION:= GBNSVC_HANDLE; -NSVCI_CON_PTR->.TIM_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -NSVCI_CON_PTR->.TIM_EVD_PTR->.KEYS.INT_ARR(1):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; -NSVCI_CON_PTR->.TIM_EVD_PTR->.TIMER:= TNS_UNBLOCK_MAP; - -/* START TIMER */ -G9PX508_START_TIMER_P -( -NSVCI_CON_PTR->.TIM_EVD_PTR -); - -@K@FREEZE -924686210 -@K@NAME -M_BLK_ACKM -@K@INSCRIPT -NS_ -BLOCK_ACK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D3); - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR2_NS_BLOCK_ACK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D3.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -926348442 -@K@NAME -TA_NXTALIV -@K@INSCRIPT -DEFINE -NEW 'NEXT_ -ALIVE' -@K@CODE -IF -/* ALIVE NSVC TO THE NSEI EXISTING? */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR /= NULL - -THEN -/* TAKE NEXT ELEMENT IN THE LINKED LIST AS THE NEXT ALIVE NSVC */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR:= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.NEXT_ALV_NSVCI_CON_PTR; - -FI; - -@K@FREEZE -938801086 -@K@NAME -M_DE_CBVCN -@K@INSCRIPT -G9IBME2_ -DEACT_ -CELL_BVC_C - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB512_GET_MSG_LESS_EV_DESCR_P -( -SID_GBNSVC, -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -TX_EVD_PTR->.EVENT_CMD:= G9IBME2_DEACT_CELL_BVC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBBVC_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -TX_EVD_PTR->.KEYS.INT_ARR(1):= EVD_PTR->.ADD_DATA(3); - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -933318270 -@K@NAME -TA_NXTRESP -@K@INSCRIPT -DEFINE -NEW 'NEXT_ -RESPONSIBLE' -@K@CODE -NSEI_CON_PTR->.NEXT_RESP_NSVCI_CON_PTR:= - NSEI_CON_PTR->.NEXT_RESP_NSVCI_CON_PTR->.NEXT_LSP_NSVCI_CON_PTR; - - -@K@FREEZE -938005006 -@K@NAME -TA_NXTSUBS -@K@INSCRIPT -DEFINE -NEW 'NEXT_ -SUBSTITUTE' -@K@CODE -NSEI_CON_PTR->.NEXT_SUBS_NSVCI_CON_PTR:= - NSEI_CON_PTR->.NEXT_SUBS_NSVCI_CON_PTR->.NEXT_UBL_NSVCI_CON_PTR; - -@K@NAME -M_BLK_O__M -@K@INSCRIPT -NS_ -BLOCK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; - -IF -/* 'OTHER' ALIVE NSVC TO THIS NSEI EXISTING? */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR /= NULL -THEN -/* USE THIS 'OTHER' FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -/* NSEI TO BE USED FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; -/* NSVCI TO BE USED FOR TRANSPORT */ -ELSE -/* USE AFFECTED NSVC AGAIN FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -FI; - - -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D2); - - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR1_NS_BLOCK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D2.CAUSE_TLV.CAUSE_VAL:= - G9IBBA2_NS_TRANSIT_NETWORK_FAILURE; /* CAUSE FOR BLOCK */ -NS_PDU_PTR->.D2.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@TEXT -GSM 8.16 CHAP. 7.2: -THE NS-BLOCK-PDU MAY BE SENT IN ANY ALIVE -(BLOCKED OR UNBLOCKED) NS-VC... -@K@FREEZE -938803215 -@K@NAME -M_DE_SBVCN -@K@INSCRIPT -G9IBME1_ -DEACT_ -SIGN_BVC_C -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB512_GET_MSG_LESS_EV_DESCR_P -( -SID_GBNSVC, -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -TX_EVD_PTR->.EVENT_CMD:= G9IBME1_DEACT_SIGN_BVC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBBVC_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= EVD_PTR->.KEYS.INT_ARR(0); -TX_EVD_PTR->.KEYS.INT_ARR(1):= SIGN_BVCI; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -938788201 -@K@NAME -M_OAME401M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSE */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSE; -OAM_MSG_PTR->.HANDLED_OBJECT.NSE_ID:= EVD_PTR->.KEYS.INT_ARR(0); /* -USED NSEI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* AFFECTED NSEI (FROM PDU) */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* AFFECTED NSVCI (FROM PDU) */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935766108 -@K@NAME -M_OAME402M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.KEYS.INT_ARR(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* NSEI FROM PDU */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* NSVCI FROM PDU */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935766407 -@K@NAME -M_OAME411M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_ACK_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSE */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSE; -OAM_MSG_PTR->.HANDLED_OBJECT.NSE_ID:= EVD_PTR->.KEYS.INT_ARR(0); /* -USED NSEI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_ACK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* NSEI FROM PDU */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* NSVCI FROM PDU */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935767332 -@K@NAME -M_OAME412M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_ACK_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.KEYS.INT_ARR(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_ACK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* NSEI FROM PDU */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* NSVCI FROM PDU */ - - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935767189 -@K@NAME -C_CON -@K@INSCRIPT -RC_DB -@K@CODE -RC_DB - -@K@FREEZE -922176673 -@K@NAME -M_BLK____M -@K@INSCRIPT -NS_ -BLOCK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D2); - - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR1_NS_BLOCK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D2.CAUSE_TLV.CAUSE_VAL:= - G9IBBA2_NS_OAM_INTERVENTION; /* CAUSE FOR BLOCK */ -NS_PDU_PTR->.D2.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@TEXT -GSM 8.16 CHAP. 7.2: -THE NS-BLOCK-PDU MAY BE SENT IN ANY ALIVE -(BLOCKED OR UNBLOCKED) NS-VC... -@K@FREEZE -926348613 -@K@NAME -S_BLOCKED -@K@INSCRIPT -G9IBSM0_ -BLOCKED -@K@CODE - - -@K@FREEZE -922176496 -@K@NAME -D_CON -@K@INSCRIPT -CONTEXT -GOT -@K@CODE -RC_DB = G9IBSR0_RC_OK - -@K@FREEZE -921772339 -@K@NAME -M_OAME901M -@K@INSCRIPT -ERROR_MESSAGE: -OPERATIONAL_STATE_CHANGE -UBL->BLK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = OPERATIONAL_STATE_CHANGE */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_OPERATIONAL_STATE_CHANGE; -/* ADDITIONAL_OPERATIONAL_STATE_INFO */ -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_OLD:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_OLD:= - G9OC102_ENABLED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_NEW:= - G9OC102_ENABLED; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -925970975 -@K@NAME -M_OAME902M -@K@INSCRIPT -ERROR_MESSAGE: -OPERATIONAL_STATE_CHANGE -UBL->BLK - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = OPERATIONAL_STATE_CHANGE */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_OPERATIONAL_STATE_CHANGE; -/* ADDITIONAL_OPERATIONAL_STATE_INFO */ -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_OLD:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_OLD:= - G9OC102_ENABLED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_NEW:= - G9OC102_DISABLED; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -925970987 -@K@NAME -M_OAME10SM -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@NAME -M_OAME911M -@K@INSCRIPT -ERROR_MESSAGE: -OPERATIONAL_STATE_CHANGE -BLK->UBL -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = OPERATIONAL_STATE_CHANGE */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_OPERATIONAL_STATE_CHANGE; -/* ADDITIONAL_OPERATIONAL_STATE_INFO */ -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_NEW:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_OLD:= - G9OC102_ENABLED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_NEW:= - G9OC102_ENABLED; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -925970996 -@K@NAME -M_OAME20SM -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_END_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_END_NS_ALIVE_TEST */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_END_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@NAME -M_OAME10_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443813 -@K@NAME -D_SEM -@K@INSCRIPT -CALL_SEM -= -TRUE -@K@CODE -CALL_SEM = TRUE - -@K@FREEZE -922176624 -@K@NAME -D_N_0 -@K@INSCRIPT -N = 0 - -@K@CODE -NSVCI_CON_PTR->.N = 0 - -@K@FREEZE -921511000 -@K@NAME -M_OAME12_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - FALSE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_ALIVE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443829 -@K@NAME -M_OAME21_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_END_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_END_NS_ALIVE_TEST */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_END_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - FALSE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443856 -@K@NAME -M_OAME13_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - FALSE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_ALIVE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443838 -@K@NAME -M_OAME22_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_END_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_END_NS_ALIVE_TEST */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_END_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - FALSE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_ALIVE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443864 -@K@NAME -M_OAME30_M -@K@INSCRIPT -ERROR_MESSAGE: -NO_ANSWER_FORM_BSS -RESET_PROCEDURE - - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = NO_ANSWER_FROM_BSS */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_NO_ANSWER_FROM_BSS; -/* INITIATED_PROCEDURE */ -OAM_MSG_PTR->.INITIATED_PROCEDURE:= - G9OC123_RESET_PROCEDURE; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922175973 -@K@NAME -M_OAME31_M -@K@INSCRIPT -ERROR_MESSAGE: -NO_ANSWER_FROM_BSS -BLOCK_PROCEDURE - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = NO_ANSWER_FROM_BSS */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_NO_ANSWER_FROM_BSS; -/* INITIATED_PROCEDURE */ -OAM_MSG_PTR->.INITIATED_PROCEDURE:= - G9OC123_BLOCK_PROCEDURE; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922175976 -@K@NAME -M_OAME32_M -@K@INSCRIPT -ERROR_MESSAGE: -NO_ANSWER_FROM_BSS -UNBLOCK_PROCEDURE - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = NO_ANSWER_FROM_BSS */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_NO_ANSWER_FROM_BSS; -/* INITIATED_PROCEDURE */ -OAM_MSG_PTR->.INITIATED_PROCEDURE:= - G9OC123_UNBLOCK_PROCEDURE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922175980 -@K@NAME -M_OAME42_M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -BLOCK_PDU -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.ADD_DATA(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_BLOCK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.KEYS.INT_ARR(0); /* AFFECTED NSEI (FROM PDU) */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.KEYS.INT_ARR(1); /* AFFECTED NSVCI (FROM PDU) */ - - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -934296141 -@K@NAME -M_OAME50_M -@K@INSCRIPT -ERROR_MESSAGE: -STATUS_PDU_CONTAINS_ERROR_INFO -RECEIVED -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR-CAUSE = STATUS_PDU_CONTAINS_ERROR_INFO */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_STATUS_PDU_CONTAINS_ERROR_INFO; -/* ADDITIONAL_STATUS_PDU_INFO */ -INT_CAUSE_PTR.INT_PTR:= ADDR(EVD_PTR->.ADD_DATA(2)); -OAM_MSG_PTR->.ADDITIONAL_STATUS_PDU_INFO:= - INT_CAUSE_PTR.CAUSE_PTR->; /* CAUSE */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@TEXT -NICHT OK - -@K@FREEZE -934298924 -@K@NAME -M_OAME43_M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -BLOCK_ACK_PDU -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.ADD_DATA(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_BLOCK_ACK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.KEYS.INT_ARR(0); /* AFFECTED NSEI (FROM PDU) */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.KEYS.INT_ARR(1); /* AFFECTED NSVCI (FROM PDU) */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -934297710 diff --git a/contrib/awk/test/getlnbuf.ok b/contrib/awk/test/getlnbuf.ok deleted file mode 100644 index 062b377..0000000 --- a/contrib/awk/test/getlnbuf.ok +++ /dev/null @@ -1,1708 +0,0 @@ -EXTRA_INFO.TYP3.EC := EC; -EXTRA_INFO.TYP3.TEXT:= 'CONNECT_SERVICE TO OAM FAILED'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C003', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_SWERR, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -917596041 -@K@NAME -T_ERR4_1 -@K@INSCRIPT -ERROR_HANDLING: -DB_OVERFLOW -MP/NSEI -@K@CODE -/***@@@ ERROR ***/ -/*@@ERRORTEXT -*@ DB-OVERFLOW -*@ -*@ -*@@DESCRIPTION -*@ THE INSTANCE-CREATION WAS NOT POSSIBLE -*@ BECAUSE THE DATABASE WOULD OVERFLOW -*@ -*@@EXTRA INFO -*@ (EXTRA_INFO_4_STRUCT) -*@ NSEI -*@ NSVCI -*@ TEXT -*@ -*/ - -EXTRA_INFO.TYP4.NSEI := EVD_PTR->.KEYS.INT_ARR(0); -EXTRA_INFO.TYP4.NSVCI:= EVD_PTR->.KEYS.INT_ARR(1); -EXTRA_INFO.TYP4.TEXT := 'NSVC-HAND.: MP/NSEI-OVERFLOW'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C004', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_ESC_MAX_ANY, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -920903219 -@K@NAME -T_ERR4_2 -@K@INSCRIPT -ERROR_HANDLING: -DB_OVERFLOW -MP/NSVCI -@K@CODE -/***@@@ ERROR ***/ -/*@@ERRORTEXT -*@ DB-OVERFLOW -*@ -*@ -*@@DESCRIPTION -*@ THE INSTANCE-CREATION WAS NOT POSSIBLE -*@ BECAUSE THE DATABASE WOULD OVERFLOW -*@ -*@@EXTRA INFO -*@ (EXTRA_INFO_4_STRUCT) -*@ NSEI -*@ NSVCI -*@ TEXT -*@ -*/ - -EXTRA_INFO.TYP4.NSEI := EVD_PTR->.KEYS.INT_ARR(0); -EXTRA_INFO.TYP4.NSVCI:= EVD_PTR->.KEYS.INT_ARR(1); -EXTRA_INFO.TYP4.TEXT := 'NSVC-HAND.: MP/NSVCI-OVERFLOW'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C004', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_ESC_MAX_ANY, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -920903222 -@K@NAME -T_ERR4_3 -@K@INSCRIPT -ERROR_HANDLING: -DB_OVERFLOW -NSEI/NSVCI -@K@CODE -/***@@@ ERROR ***/ -/*@@ERRORTEXT -*@ DB-OVERFLOW -*@ -*@ -*@@DESCRIPTION -*@ THE INSTANCE-CREATION WAS NOT POSSIBLE -*@ BECAUSE THE DATABASE WOULD OVERFLOW -*@ -*@@EXTRA INFO -*@ (EXTRA_INFO_4_STRUCT) -*@ NSEI -*@ NSVCI -*@ TEXT -*@ -*/ - -EXTRA_INFO.TYP4.NSEI := EVD_PTR->.KEYS.INT_ARR(0); -EXTRA_INFO.TYP4.NSVCI:= EVD_PTR->.KEYS.INT_ARR(1); -EXTRA_INFO.TYP4.TEXT := 'NSVC-HAND.: NSEI/NSVC-OVERFLOW'; - -G9PXYA1S!G9TE500_EHP_P( -'G9IBSA1C004', /*@@ID*/ -G9PXYA1S!G9TE102_ERR_CLASS_ESC_MAX_ANY, /*@@CLASS*/ -ADDR(EXTRA_INFO.ERROR_HANDLER), /* EXTRA-INFO ADDR */ -G9PXYA1S!G9TE100_GB_LM, /* USER-ID */ -NULL /* OPTIONAL-SWET-INFO ADDR */ -); -/***@@@ END OF ERROR ***/ - -@K@FREEZE -920903226 -@K@NAME -TR_RESET -@K@INSCRIPT -RESTART_ -TNS_RESET_ -TIMER -@K@CODE -/* TIMER EVENT DESCRIPTOR STILL THERE */ - -/* INITIALIZATION OF THE TIMER-EVENT-DESCRIPTOR STILL VALID */ -NSVCI_CON_PTR->.TIM_EVD_PTR->.TIMER:= TNS_RESET_MAP; - -/* START TIMER */ -G9PX508_START_TIMER_P -( -NSVCI_CON_PTR->.TIM_EVD_PTR -); - -@K@FREEZE -924684867 -@K@NAME -TX_AUDIT -@K@INSCRIPT -FOR -AUDIT - -@K@NAME -M_BLKOACKM -@K@INSCRIPT -NS_ -BLOCK_ACK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; - -IF -/* 'OTHER' ALIVE NSVC TO THIS NSEI EXISTING? */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR /= NULL -THEN -/* USE THIS 'OTHER' FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -/* NSEI TO BE USED FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; -/* NSVCI TO BE USED FOR TRANSPORT */ -ELSE -/* USE AFFECTED NSVC AGAIN FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -FI; - - -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D3); - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR2_NS_BLOCK_ACK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D3.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -938805885 -@K@NAME -T_RCTRUE -@K@INSCRIPT -RC -= -TRUE -@K@CODE -RC:= TRUE; - -@K@FREEZE -922176328 -@K@NAME -M_AC_SBVCN -@K@INSCRIPT -G9IBME0_ -ACT_ -SIGN_BVC_C -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB512_GET_MSG_LESS_EV_DESCR_P -( -SID_GBNSVC, -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -TX_EVD_PTR->.EVENT_CMD:= G9IBME0_ACT_SIGN_BVC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBBVC_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= EVD_PTR->.KEYS.INT_ARR(0); -TX_EVD_PTR->.KEYS.INT_ARR(1):= SIGN_BVCI; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -938788211 -@K@NAME -T_RC_EOD -@K@INSCRIPT -RC -= -END OF DATA - -@K@CODE -RC:= G9IBSM4_RC_END_OF_DATA; - -@K@FREEZE -921083785 -@K@NAME -T_RC_EMP -@K@INSCRIPT -RC -= -EMPTY - -@K@CODE -RC:= G9IBSM4_RC_EMPTY; - -@K@FREEZE -921083757 -@K@NAME -T_RC_ERR -@K@INSCRIPT -RC -= -ERROR - -@K@CODE -RC:= G9IBSM4_RC_ERROR; - -@K@FREEZE -921083731 -@K@NAME -S_UNUSED -@K@INSCRIPT -G9IBSM0_ -UNUSED -@K@CODE - - -@K@FREEZE -919416670 -@K@NAME -TA_UNBLOCK -@K@INSCRIPT -START_ -TNS_UNBLOCK_ -TIMER -@K@CODE -/* GET TIMER-EVENT DESCRIPTOR */ -G9PB513_GET_TIMER_EV_DESCR_P -( -SID_GBNSVC, -NSVCI_CON_PTR->.TIM_EVD_PTR -); - -/* INITIALIZATION OF THE TIMER-EVENT-DESCRIPTOR */ -NSVCI_CON_PTR->.TIM_EVD_PTR->.EVENT_CMD:= G9IBSE4_TO_TNS_C; -NSVCI_CON_PTR->.TIM_EVD_PTR->.EVENT_DESTINATION:= GBNSVC_HANDLE; -NSVCI_CON_PTR->.TIM_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -NSVCI_CON_PTR->.TIM_EVD_PTR->.KEYS.INT_ARR(1):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; -NSVCI_CON_PTR->.TIM_EVD_PTR->.TIMER:= TNS_UNBLOCK_MAP; - -/* START TIMER */ -G9PX508_START_TIMER_P -( -NSVCI_CON_PTR->.TIM_EVD_PTR -); - -@K@FREEZE -924686210 -@K@NAME -M_BLK_ACKM -@K@INSCRIPT -NS_ -BLOCK_ACK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D3); - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR2_NS_BLOCK_ACK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D3.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -926348442 -@K@NAME -TA_NXTALIV -@K@INSCRIPT -DEFINE -NEW 'NEXT_ -ALIVE' -@K@CODE -IF -/* ALIVE NSVC TO THE NSEI EXISTING? */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR /= NULL - -THEN -/* TAKE NEXT ELEMENT IN THE LINKED LIST AS THE NEXT ALIVE NSVC */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR:= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.NEXT_ALV_NSVCI_CON_PTR; - -FI; - -@K@FREEZE -938801086 -@K@NAME -M_DE_CBVCN -@K@INSCRIPT -G9IBME2_ -DEACT_ -CELL_BVC_C - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB512_GET_MSG_LESS_EV_DESCR_P -( -SID_GBNSVC, -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -TX_EVD_PTR->.EVENT_CMD:= G9IBME2_DEACT_CELL_BVC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBBVC_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -TX_EVD_PTR->.KEYS.INT_ARR(1):= EVD_PTR->.ADD_DATA(3); - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -933318270 -@K@NAME -TA_NXTRESP -@K@INSCRIPT -DEFINE -NEW 'NEXT_ -RESPONSIBLE' -@K@CODE -NSEI_CON_PTR->.NEXT_RESP_NSVCI_CON_PTR:= - NSEI_CON_PTR->.NEXT_RESP_NSVCI_CON_PTR->.NEXT_LSP_NSVCI_CON_PTR; - - -@K@FREEZE -938005006 -@K@NAME -TA_NXTSUBS -@K@INSCRIPT -DEFINE -NEW 'NEXT_ -SUBSTITUTE' -@K@CODE -NSEI_CON_PTR->.NEXT_SUBS_NSVCI_CON_PTR:= - NSEI_CON_PTR->.NEXT_SUBS_NSVCI_CON_PTR->.NEXT_UBL_NSVCI_CON_PTR; - -@K@NAME -M_BLK_O__M -@K@INSCRIPT -NS_ -BLOCK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; - -IF -/* 'OTHER' ALIVE NSVC TO THIS NSEI EXISTING? */ -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR /= NULL -THEN -/* USE THIS 'OTHER' FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; -/* NSEI TO BE USED FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - -NSVCI_CON_PTR->.OWN_NSEI_CON_PTR->.NEXT_ALIV_NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; -/* NSVCI TO BE USED FOR TRANSPORT */ -ELSE -/* USE AFFECTED NSVC AGAIN FOR TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(0):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -FI; - - -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D2); - - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR1_NS_BLOCK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D2.CAUSE_TLV.CAUSE_VAL:= - G9IBBA2_NS_TRANSIT_NETWORK_FAILURE; /* CAUSE FOR BLOCK */ -NS_PDU_PTR->.D2.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@TEXT -GSM 8.16 CHAP. 7.2: -THE NS-BLOCK-PDU MAY BE SENT IN ANY ALIVE -(BLOCKED OR UNBLOCKED) NS-VC... -@K@FREEZE -938803215 -@K@NAME -M_DE_SBVCN -@K@INSCRIPT -G9IBME1_ -DEACT_ -SIGN_BVC_C -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB512_GET_MSG_LESS_EV_DESCR_P -( -SID_GBNSVC, -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -TX_EVD_PTR->.EVENT_CMD:= G9IBME1_DEACT_SIGN_BVC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBBVC_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= EVD_PTR->.KEYS.INT_ARR(0); -TX_EVD_PTR->.KEYS.INT_ARR(1):= SIGN_BVCI; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -938788201 -@K@NAME -M_OAME401M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSE */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSE; -OAM_MSG_PTR->.HANDLED_OBJECT.NSE_ID:= EVD_PTR->.KEYS.INT_ARR(0); /* -USED NSEI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* AFFECTED NSEI (FROM PDU) */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* AFFECTED NSVCI (FROM PDU) */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935766108 -@K@NAME -M_OAME402M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.KEYS.INT_ARR(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* NSEI FROM PDU */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* NSVCI FROM PDU */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935766407 -@K@NAME -M_OAME411M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_ACK_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSE */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSE; -OAM_MSG_PTR->.HANDLED_OBJECT.NSE_ID:= EVD_PTR->.KEYS.INT_ARR(0); /* -USED NSEI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_ACK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* NSEI FROM PDU */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* NSVCI FROM PDU */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935767332 -@K@NAME -M_OAME412M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -RESET_ACK_PDU - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.KEYS.INT_ARR(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_RESET_ACK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.ADD_DATA(0); /* NSEI FROM PDU */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.ADD_DATA(1); /* NSVCI FROM PDU */ - - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -935767189 -@K@NAME -C_CON -@K@INSCRIPT -RC_DB -@K@CODE -RC_DB - -@K@FREEZE -922176673 -@K@NAME -M_BLK____M -@K@INSCRIPT -NS_ -BLOCK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9IBDF4_NS_LM_M) + G9IBD44_NS_PDU_DATA_OFFSET_C), -TX_EVD_PTR -); - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9IBD40_NS_LM_PDU_C; -TX_EVD_PTR->.EVENT_DESTINATION:= GBDL_HANDLE; -TX_EVD_PTR->.KEYS.INT_ARR(0):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSEI; /* NSEI TO BE USED FOR -TRANSPORT */ -TX_EVD_PTR->.KEYS.INT_ARR(1):= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVCI TO BE USED FOR -TRANSPORT */ -/* POINTER TO PDU IN POOL-ELEMENT */ -NS_PDU_PTR:= NS_PDU_REF_M (INT(TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT) - + G9IBD44_NS_PDU_DATA_OFFSET_C); -/* OFFSET OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_OFFSET:= G9IBD44_NS_PDU_DATA_OFFSET_C; -/* LENGTH OF THE PDU IN POOL-ELEMENT */ -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(NS_PDU_PTR->.G9IBDF4_PDU_TYPE) + -SIZE(NS_PDU_PTR->.D2); - - -/* NOW THE POOL-ELEMENT */ -NS_PDU_PTR->.G9IBDF4_PDU_TYPE:= G9IBDR1_NS_BLOCK_C; /* PDU-TYPE */ - -NS_PDU_PTR->.D2.CAUSE_TLV.CAUSE_VAL:= - G9IBBA2_NS_OAM_INTERVENTION; /* CAUSE FOR BLOCK */ -NS_PDU_PTR->.D2.NSVCI_TLV.NSVCI_VAL := - NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; /* NSVC TO BE BLOCKED */ - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@TEXT -GSM 8.16 CHAP. 7.2: -THE NS-BLOCK-PDU MAY BE SENT IN ANY ALIVE -(BLOCKED OR UNBLOCKED) NS-VC... -@K@FREEZE -926348613 -@K@NAME -S_BLOCKED -@K@INSCRIPT -G9IBSM0_ -BLOCKED -@K@CODE - - -@K@FREEZE -922176496 -@K@NAME -D_CON -@K@INSCRIPT -CONTEXT -GOT -@K@CODE -RC_DB = G9IBSR0_RC_OK - -@K@FREEZE -921772339 -@K@NAME -M_OAME901M -@K@INSCRIPT -ERROR_MESSAGE: -OPERATIONAL_STATE_CHANGE -UBL->BLK -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = OPERATIONAL_STATE_CHANGE */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_OPERATIONAL_STATE_CHANGE; -/* ADDITIONAL_OPERATIONAL_STATE_INFO */ -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_OLD:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_OLD:= - G9OC102_ENABLED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_NEW:= - G9OC102_ENABLED; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -925970975 -@K@NAME -M_OAME902M -@K@INSCRIPT -ERROR_MESSAGE: -OPERATIONAL_STATE_CHANGE -UBL->BLK - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = OPERATIONAL_STATE_CHANGE */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_OPERATIONAL_STATE_CHANGE; -/* ADDITIONAL_OPERATIONAL_STATE_INFO */ -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_OLD:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_OLD:= - G9OC102_ENABLED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_NEW:= - G9OC102_DISABLED; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -925970987 -@K@NAME -M_OAME10SM -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@NAME -M_OAME911M -@K@INSCRIPT -ERROR_MESSAGE: -OPERATIONAL_STATE_CHANGE -BLK->UBL -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = OPERATIONAL_STATE_CHANGE */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_OPERATIONAL_STATE_CHANGE; -/* ADDITIONAL_OPERATIONAL_STATE_INFO */ -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.OPER_STATE_NEW:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_OLD:= - G9OC102_ENABLED; -OAM_MSG_PTR->.ADDITIONAL_OPERATIONAL_STATE_INFO.ADMIN_STATE_NEW:= - G9OC102_ENABLED; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -925970996 -@K@NAME -M_OAME20SM -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_END_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_END_NS_ALIVE_TEST */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_END_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@NAME -M_OAME10_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443813 -@K@NAME -D_SEM -@K@INSCRIPT -CALL_SEM -= -TRUE -@K@CODE -CALL_SEM = TRUE - -@K@FREEZE -922176624 -@K@NAME -D_N_0 -@K@INSCRIPT -N = 0 - -@K@CODE -NSVCI_CON_PTR->.N = 0 - -@K@FREEZE -921511000 -@K@NAME -M_OAME12_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - FALSE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_ALIVE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443829 -@K@NAME -M_OAME21_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_END_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_END_NS_ALIVE_TEST */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_END_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - TRUE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - FALSE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443856 -@K@NAME -M_OAME13_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_BEGIN_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_BEGIN_NS_ALIVE_TEST*/ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_BEGIN_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - FALSE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_UNBLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_ALIVE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443838 -@K@NAME -M_OAME22_M -@K@INSCRIPT -ERROR_MESSAGE: -ALARM_END_NS_ALIVE_TEST -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = ALARM_END_NS_ALIVE_TEST */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ALARM_END_NS_ALIVE_TEST; -/* ADDITIONAL_ALARM_INFO */ -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.ADMINISTRATIVE_STATE_CHANGED:= - FALSE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_OLD:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.OPER_STATE_NEW:= - G9OC101_BLOCKED; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_OLD:= - G9OC103_DEAD; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.AVAIL_STATE_NEW:= - G9OC103_ALIVE; -OAM_MSG_PTR->.ADDITIONAL_ALARM_INFO.CONFIGURATION_OF_NSVC:= - TRUE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922443864 -@K@NAME -M_OAME30_M -@K@INSCRIPT -ERROR_MESSAGE: -NO_ANSWER_FORM_BSS -RESET_PROCEDURE - - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = NO_ANSWER_FROM_BSS */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_NO_ANSWER_FROM_BSS; -/* INITIATED_PROCEDURE */ -OAM_MSG_PTR->.INITIATED_PROCEDURE:= - G9OC123_RESET_PROCEDURE; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922175973 -@K@NAME -M_OAME31_M -@K@INSCRIPT -ERROR_MESSAGE: -NO_ANSWER_FROM_BSS -BLOCK_PROCEDURE - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = NO_ANSWER_FROM_BSS */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_NO_ANSWER_FROM_BSS; -/* INITIATED_PROCEDURE */ -OAM_MSG_PTR->.INITIATED_PROCEDURE:= - G9OC123_BLOCK_PROCEDURE; - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922175976 -@K@NAME -M_OAME32_M -@K@INSCRIPT -ERROR_MESSAGE: -NO_ANSWER_FROM_BSS -UNBLOCK_PROCEDURE - -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR_CAUSE = NO_ANSWER_FROM_BSS */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_NO_ANSWER_FROM_BSS; -/* INITIATED_PROCEDURE */ -OAM_MSG_PTR->.INITIATED_PROCEDURE:= - G9OC123_UNBLOCK_PROCEDURE; - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -922175980 -@K@NAME -M_OAME42_M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -BLOCK_PDU -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.ADD_DATA(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_BLOCK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.KEYS.INT_ARR(0); /* AFFECTED NSEI (FROM PDU) */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.KEYS.INT_ARR(1); /* AFFECTED NSVCI (FROM PDU) */ - - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -934296141 -@K@NAME -M_OAME50_M -@K@INSCRIPT -ERROR_MESSAGE: -STATUS_PDU_CONTAINS_ERROR_INFO -RECEIVED -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= -NSVCI_CON_PTR->.DBMS.NSVC_INSTANCE.NSVCI; - -/* ERROR-CAUSE = STATUS_PDU_CONTAINS_ERROR_INFO */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_STATUS_PDU_CONTAINS_ERROR_INFO; -/* ADDITIONAL_STATUS_PDU_INFO */ -INT_CAUSE_PTR.INT_PTR:= ADDR(EVD_PTR->.ADD_DATA(2)); -OAM_MSG_PTR->.ADDITIONAL_STATUS_PDU_INFO:= - INT_CAUSE_PTR.CAUSE_PTR->; /* CAUSE */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@TEXT -NICHT OK - -@K@FREEZE -934298924 -@K@NAME -M_OAME43_M -@K@INSCRIPT -ERROR_MESSAGE: -ERRONOUS_PDU -BLOCK_ACK_PDU -@K@CODE -/* GETTING THE EVENT DESCRIPTOR */ -G9PB511_GET_MSG_BOUND_EV_DESCR_P -( -SID_GBNSVC, -(SIZE(G9OC109_REPORTED_EVENT_STR_M)), -TX_EVD_PTR -); - - -/* COMPOSING THE EVENT */ -/* FIRST THE DESCRIPTOR */ -TX_EVD_PTR->.EVENT_CMD:= G9PX040_SEND_MBC_C; -TX_EVD_PTR->.EVENT_DESTINATION:= RXTX_HANDLE; -TX_EVD_PTR->.KEYS.UBI_INDEX:= OAM_UBI_INDEX; -TX_EVD_PTR->.BOUND.DATA_OFFSET:= 0; -TX_EVD_PTR->.BOUND.DATA_LENGTH:= SIZE(G9OC109_REPORTED_EVENT_STR_M); - -/* NOW THE POOL-ELEMENT */ -/* INITIALIZATION OF THE POINTER WITH THE POOL-ELEMENT-START */ -OAM_MSG_PTR:= OAM_MSG_PTR_M (TX_EVD_PTR->.BOUND.PTR_TO_POOL_ELEMENT); - -/* COMPOSING THE MESSAGE */ -/* HANDLED OBJECT = AFFECTED INSTANCE, TYPE NSVC */ -OAM_MSG_PTR->.HANDLED_OBJECT.OBJECT_TYPE:= G9OC104_NSVC; -OAM_MSG_PTR->.HANDLED_OBJECT.NSVC_ID:= EVD_PTR->.ADD_DATA(1); /* -USED NSVCI (FROM ECI) */ - -/* ERROR-CAUSE = ERRONEOUS_PDU */ -OAM_MSG_PTR->.ERROR_CAUSE:= G9OC108_ERRONEOUS_PDU; -/* ADDITIONAL_PDU_INFO */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_PDU_TYPE:= - G9OC124_BLOCK_ACK_PDU; -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSEI:= - EVD_PTR->.KEYS.INT_ARR(0); /* AFFECTED NSEI (FROM PDU) */ -OAM_MSG_PTR->.ADDITIONAL_PDU_INFO.G9OC120_REPORTED_NSVC:= - EVD_PTR->.KEYS.INT_ARR(1); /* AFFECTED NSVCI (FROM PDU) */ - - - -/* SENDING */ -G9PX503_POST_EVENT_P(TX_EVD_PTR); - -@K@FREEZE -934297710 diff --git a/contrib/awk/test/getnr2tb.awk b/contrib/awk/test/getnr2tb.awk deleted file mode 100644 index 204acf4..0000000 --- a/contrib/awk/test/getnr2tb.awk +++ /dev/null @@ -1,111 +0,0 @@ -#From vp@dmat.uevora.pt Thu Jun 18 09:10 EDT 1998 -#Received: from mescaline.gnu.org (we-refuse-to-spy-on-our-users@mescaline.gnu.org [158.121.106.21]) by cssun.mathcs.emory.edu (8.7.5/8.6.9-940818.01cssun) with ESMTP id JAA23649 for ; Thu, 18 Jun 1998 09:10:54 -0400 (EDT) -#Received: from khromeleque.dmat.uevora.pt by mescaline.gnu.org (8.8.5/8.6.12GNU) with ESMTP id JAA21732 for ; Thu, 18 Jun 1998 09:11:19 -0400 -#Received: from khromeleque.dmat.uevora.pt (vp@localhost [127.0.0.1]) -# by khromeleque.dmat.uevora.pt (8.8.8/8.8.8/Debian/GNU) with ESMTP id OAA11817 -# for ; Thu, 18 Jun 1998 14:13:57 +0100 -#Message-Id: <199806181313.OAA11817@khromeleque.dmat.uevora.pt> -#To: arnold@gnu.org -#Subject: concatenation bug in gawk 3.0.3 -#Date: Thu, 18 Jun 1998 14:13:57 +0200 -#From: Vasco Pedro -#Content-Type: text -#Content-Length: 2285 -#Status: RO -# -#Hi, -# -#The gawk program '{print NR " " 10/NR}' will print: -# -#1 10 -#5 5 -#3 3.33333 -#2 2.5 -#2 2 -#1 1.66667 -# -#instead of the correct: -# -#1 10 -#2 5 -#3 3.33333 -#4 2.5 -#5 2 -#6 1.66667 -# -#You'll notice, on the incorrect output, that the first column is -#the first digit of the second. -# -#I think the problem comes from the way builtin variables are handled. -#Since the items to be concatenated are processed in reverse order and -#the return value of tree_eval(``NR'') is a pointer to the value part -#of `NR_node', the `unref()' of `NR_node' due to its second occurrence -#will leave a dangling pointer in `strlist'. The reason that it doesn't -#reuse the freed space with objects of the same type. (Using Electric -#Fence with EF_PROTECT_FREE set confirms that freed space is being -#accessed.) -# -#The enclosed patch (hack would be a better word to describe it) is -#all I could come up with. With it installed, things seem to work ok, -#but I doubt this is the correct way to do it. (If I treated the -#case for `Node_field_spec' as the I did others, `make check' would -#fail in several places.) -# -#Regards, -#vasco -# -#*** eval.c~ Tue May 6 21:39:55 1997 -#--- eval.c Thu Jun 18 13:39:25 1998 -#*************** -#*** 685,697 **** -# return func_call(tree->rnode, tree->lnode); -# -# /* unary operations */ -# case Node_NR: -# case Node_FNR: -# case Node_NF: -# case Node_FIELDWIDTHS: -# case Node_FS: -# case Node_RS: -#- case Node_field_spec: -# case Node_subscript: -# case Node_IGNORECASE: -# case Node_OFS: -#--- 685,700 ---- -# return func_call(tree->rnode, tree->lnode); -# -# /* unary operations */ -#+ case Node_field_spec: -#+ lhs = get_lhs(tree, (Func_ptr *) NULL); -#+ return *lhs; -#+ -# case Node_NR: -# case Node_FNR: -# case Node_NF: -# case Node_FIELDWIDTHS: -# case Node_FS: -# case Node_RS: -# case Node_subscript: -# case Node_IGNORECASE: -# case Node_OFS: -#*************** -#*** 699,705 **** -# case Node_OFMT: -# case Node_CONVFMT: -# lhs = get_lhs(tree, (Func_ptr *) NULL); -#! return *lhs; -# -# case Node_var_array: -# fatal("attempt to use array `%s' in a scalar context", -#--- 702,710 ---- -# case Node_OFMT: -# case Node_CONVFMT: -# lhs = get_lhs(tree, (Func_ptr *) NULL); -#! r = dupnode(*lhs); -#! r->flags |= TEMP; -#! return r; -# -# case Node_var_array: -# fatal("attempt to use array `%s' in a scalar context", -# -{ print NR " " 10/NR } diff --git a/contrib/awk/test/getnr2tb.in b/contrib/awk/test/getnr2tb.in deleted file mode 100644 index f985857..0000000 --- a/contrib/awk/test/getnr2tb.in +++ /dev/null @@ -1,6 +0,0 @@ -line 1 -line 2 -line 3 -line 4 -line 5 -line 6 diff --git a/contrib/awk/test/getnr2tb.ok b/contrib/awk/test/getnr2tb.ok deleted file mode 100644 index 7b40e8d..0000000 --- a/contrib/awk/test/getnr2tb.ok +++ /dev/null @@ -1,6 +0,0 @@ -1 10 -2 5 -3 3.33333 -4 2.5 -5 2 -6 1.66667 diff --git a/contrib/awk/test/getnr2tm.awk b/contrib/awk/test/getnr2tm.awk deleted file mode 100644 index dfe377a..0000000 --- a/contrib/awk/test/getnr2tm.awk +++ /dev/null @@ -1,75 +0,0 @@ -#From dhw@gamgee.acad.emich.edu Sat Oct 31 22:54:07 1998 -#Return-Path: -#Received: from cssun.mathcs.emory.edu (cssun.mathcs.emory.edu [170.140.150.1]) -# by amx.netvision.net.il (8.9.0.Beta5/8.8.6) with ESMTP id HAA08891 -# for ; Sat, 31 Oct 1998 07:14:07 +0200 (IST) -#Received: from mescaline.gnu.org (we-refuse-to-spy-on-our-users@mescaline.gnu.org [158.121.106.21]) by cssun.mathcs.emory.edu (8.7.5/8.6.9-940818.01cssun) with ESMTP id AAA14947 for ; Sat, 31 Oct 1998 00:14:32 -0500 (EST) -#Received: from gamgee.acad.emich.edu (gamgee.acad.emich.edu [164.76.102.76]) -# by mescaline.gnu.org (8.9.1a/8.9.1) with SMTP id AAA20645 -# for ; Sat, 31 Oct 1998 00:17:54 -0500 -#Received: by gamgee.acad.emich.edu (Smail3.1.29.1 #57) -# id m0zZUKY-000IDSC; Sat, 31 Oct 98 00:16 CST -#Message-Id: -#Date: Sat, 31 Oct 98 00:16 CST -#From: dhw@gamgee.acad.emich.edu (David H. West) -#To: bug-gnu-utils@gnu.org -#Subject: gawk 3.0.3 bug report -#Cc: arnold@gnu.org -#X-UIDL: 7474b825cff989adf38f13883d84fdd7 -#Status: RO -# -#gawk version: 3.03 -#System used: Linux, kernel 2.0.28, libc 5.4.33, AMD K5PR133 (i586 clone) -#Remark: There seems to be at least one bug shown by the demo below. -# There may also be a Dark Corner involving the value of NR in an -# END block, a topic on which the info file is silent. In gawk -# 3.0.3, NR often seems to have the least-surprise value in an -# END block, but sometimes it doesn't - see example below. -#Problem descr: the log below shows a case where: -# a) (this may be a red herring) the output of the gawk script -# is different depending on whether its input file is named on -# the command line or catted to stdin, without any use of the -# legitimate means which could produce this effect. -# b) NR is clearly getting clobbered; I have tried to simplify -# the 19-line script "awkerr1" below, but seemingly unrelated -# changes, like shortening constant strings which appear only in -# print statements, or removing unexecuted or irrelevant code, -# cause the clobbering to go away. Some previous (larger) -# versions of this code would clobber NR also when reading from -# stdin, but I thought you'd prefer a shorter example :-). -#Reproduce-By: using the gawk script "awkerr1", the contents of -# which appear in the transcript below as the output of the -# command "cat awkerr1". Comments following # were added -# to the transcript later as explanation. -#---------------------------------------------- Script started on Fri -#Oct 30 20:04:16 1998 chipmunk:/ram0# ls -l a1 awkerr1 -rw-r--r-- 1 -#root root 2 Oct 30 18:42 a1 -rwxr-xr-x 1 root root -#389 Oct 30 19:54 awkerr1 chipmunk:/ram0# cat a1 #a1 contains -#one printable char and a newline a chipmunk:/ram0# od -c xc a1 -#0000000 0a61 -# a \n -#0000002 chipmunk:/ram0# cat a1 | awkerr1 #no surprises here -#1 lines in 1 sec: 1 lines/sec; nlines=1 chipmunk:/ram0# awkerr1 a1 È -#lines in 1 sec: 1 lines/sec; nlines=1 #?! first char is an uppercase -#E-grave chipmunk:/ram0# awkerr1 a1 | od -N1 -xc 0000000 00c8 -# 310 \0 -#0000001 chipmunk:/ram0# cat awkerr1 #the apparent ^M's are not -#actually in the file -#!/usr/bin/awk -f -function process(w) { - if(w in ws) { - printf " : found\n"; lc[p " " w]++; rc[w " " n]++; } - } -BEGIN {IGNORECASE=1; - } -/^/ {if(NR % 10 ==0)print "processing line " NR; - process($1); nlines++; - } -END {p=w; w=n; n=""; - if(w)process(w); t=1; print NR " lines in " t " sec: " NR+0 " lines/sec; nlines=" nlines; - } -#chipmunk:/ram0# exit Script done on Fri Oct 30 20:07:31 1998 -#--------------------------------------------- -# -#-David West dhw@gamgee.acad.emich.edu -# diff --git a/contrib/awk/test/getnr2tm.in b/contrib/awk/test/getnr2tm.in deleted file mode 100644 index 7898192..0000000 --- a/contrib/awk/test/getnr2tm.in +++ /dev/null @@ -1 +0,0 @@ -a diff --git a/contrib/awk/test/getnr2tm.ok b/contrib/awk/test/getnr2tm.ok deleted file mode 100644 index d63fca0..0000000 --- a/contrib/awk/test/getnr2tm.ok +++ /dev/null @@ -1 +0,0 @@ -1 lines in 1 sec: 1 lines/sec; nlines=1 diff --git a/contrib/awk/test/gnuops2.awk b/contrib/awk/test/gnuops2.awk deleted file mode 100644 index 1b0d4d7..0000000 --- a/contrib/awk/test/gnuops2.awk +++ /dev/null @@ -1,72 +0,0 @@ -# From Servatius.Brandt@fujitsu-siemens.com Fri Dec 1 13:44:48 2000 -# Received: from mail.actcom.co.il -# by localhost with POP3 (fetchmail-5.1.0) -# for arnold@localhost (single-drop); Fri, 01 Dec 2000 13:44:48 +0200 (IST) -# Received: by actcom.co.il (mbox arobbins) -# (with Cubic Circle's cucipop (v1.31 1998/05/13) Fri Dec 1 13:44:10 2000) -# X-From_: Servatius.Brandt@fujitsu-siemens.com Fri Dec 1 13:11:23 2000 -# Received: from lmail.actcom.co.il by actcom.co.il with ESMTP -# (8.9.1a/actcom-0.2) id NAA11033 for ; -# Fri, 1 Dec 2000 13:11:21 +0200 (EET) -# (rfc931-sender: lmail.actcom.co.il [192.114.47.13]) -# Received: from billohost.com (10-209.196.35.dellhost.com [209.196.35.10] (may be forged)) -# by lmail.actcom.co.il (8.9.3/8.9.1) with ESMTP id NAA30286 -# for ; Fri, 1 Dec 2000 13:12:25 +0200 -# Received: from fencepost.gnu.org (we-refuse-to-spy-on-our-users@fencepost.gnu.org [199.232.76.164]) -# by billohost.com (8.9.3/8.9.3) with ESMTP id GAA26074 -# for ; Fri, 1 Dec 2000 06:09:08 -0500 -# Received: from energy.pdb.sbs.de ([192.109.2.19]) -# by fencepost.gnu.org with esmtp (Exim 3.16 #1 (Debian)) -# id 141o5z-0000RJ-00; Fri, 01 Dec 2000 06:11:16 -0500 -# Received: from trulli.pdb.fsc.net ([172.25.96.20]) -# by energy.pdb.sbs.de (8.9.3/8.9.3) with ESMTP id MAA32687; -# Fri, 1 Dec 2000 12:11:13 +0100 -# Received: from pdbrd02e.pdb.fsc.net (pdbrd02e.pdb.fsc.net [172.25.96.15]) -# by trulli.pdb.fsc.net (8.9.3/8.9.3) with ESMTP id MAA27384; -# Fri, 1 Dec 2000 12:11:13 +0100 -# Received: from Fujitsu-Siemens.com (pgtd1181.mch.fsc.net [172.25.126.152]) by pdbrd02e.pdb.fsc.net with SMTP (Microsoft Exchange Internet Mail Service Version 5.5.2650.21) -# id XC2QLXS2; Fri, 1 Dec 2000 12:11:13 +0100 -# Message-ID: <3A2786CF.1000903@Fujitsu-Siemens.com> -# Date: Fri, 01 Dec 2000 12:09:03 +0100 -# From: Servatius Brandt -# Organization: Fujitsu Siemens Computers -# User-Agent: Mozilla/5.0 (Windows; U; Win95; en-US; m18) Gecko/20001108 Netscape6/6.0 -# X-Accept-Language: de, en -# MIME-Version: 1.0 -# To: bug-gnu-utils@gnu.org -# CC: arnold@gnu.org -# Subject: Bug Report: \y, \B, \<, \> do not work with _ -# Content-Type: text/plain; charset=us-ascii; format=flowed -# Content-Transfer-Encoding: 7bit -# Status: R -# -# Hello, -# -# The \y, \B, \<, \> patterns do not regard _ as -# word-constituent (unlike \w and \W, which do). -# -# Operating system: ReliantUNIX-Y 5.44 C2001 RM600 R10000 -# Version of gawk: 3.0.6 -# C-Compiler: Fujitsu Siemens Computers CDS++ V2.0C0004 -# -# Test program: -# -#!/usr/local/bin/gawk -f - -BEGIN { - print match("X _abc Y", /\<_abc/) # bug - print match("X _abc Y", /\y_abc/) # bug - print match("X abc_ Y", /abc_\>/) # bug - print match("X abc_ Y", /abc_\y/) # bug - print match("X abc_def Y", /abc_\Bdef/) # bug - - print match("X a_c Y", /a\wc/) # ok! - print match("X a.c Y", /a\Wc/) # ok! - exit -} -# -# -# Regards, -# Servatius Brandt -# -# diff --git a/contrib/awk/test/gnuops2.ok b/contrib/awk/test/gnuops2.ok deleted file mode 100644 index aa0ecae..0000000 --- a/contrib/awk/test/gnuops2.ok +++ /dev/null @@ -1,7 +0,0 @@ -3 -3 -3 -3 -3 -3 -3 diff --git a/contrib/awk/test/gtlnbufv.awk b/contrib/awk/test/gtlnbufv.awk deleted file mode 100644 index ce7d243..0000000 --- a/contrib/awk/test/gtlnbufv.awk +++ /dev/null @@ -1,2 +0,0 @@ -/@K@CODE/ { print ; getline temp ; print temp ;next } -{print} diff --git a/contrib/awk/test/hsprint.awk b/contrib/awk/test/hsprint.awk deleted file mode 100644 index d17cede..0000000 --- a/contrib/awk/test/hsprint.awk +++ /dev/null @@ -1,74 +0,0 @@ -# Test which attempts to repeat examples of formatted output -# from "C a reference manual" by Harbison and Steele. -# -# In the second series of outputs formats of a type "%5%" are skipped -# since my old copy of H&S explicitely requires padding ("...%05% will -# print 0000%..."), whereas Standard says "...the complete conversion -# specification shall be %%". -# -# Michal Jaegermann - michal@phys.ualberta.ca - - -BEGIN { - zero = "0"; - alt = "#"; - spc = " "; - plus = "+"; - just = "-"; - value[0] = 45; - value[1] = 45; - value[2] = 45; - value[3] = 12.678; - value[4] = 12.678; - value[5] = 12.678; - value[6] = "zap"; - value[7] = "*"; - value[8] = -3.4567; - value[9] = -3.4567; - value[10]= -3.4567; - value[11]= -3.4567; - oper[0] = "5d"; - oper[1] = "5o"; - oper[2] = "5x"; - oper[3] = "7.2f"; - oper[4] = "10.2e"; - oper[5] = "10.4g"; - oper[6] = "5s"; - oper[7] = "5c"; - oper[8] = "7.1G"; - oper[9] = "7.2f"; - oper[10] = "10.2e"; - oper[11] = "10.4g"; - - - for (r = 0; r < 12; r += 6) { - for (j = 2; j > 0; --j) { - for (p = 2; p > 0; --p) { - for (s = 2; s > 0; --s) { - for (a = 2; a > 0; --a) { - for (z = 2; z > 0; --z) { - fmt = "%" substr(just,j,1) substr(plus,p,1) \ - substr(spc,s,1) substr(alt,a,1) substr(zero,z,1); - fstr = sprintf(\ - "%6s|%s%s|%s%s|%s%s|%s%s|%s%s|%s%s|\n", - fmt, - fmt, oper[r], - fmt, oper[r+1], - fmt, oper[r+2], - fmt, oper[r+3], - fmt, oper[r+4], - fmt, oper[r+5]); - printf(fstr, value[r], value[r+1], - value[r+2], value[r+3], - value[r+4], value[r+5]); - } - } - } - } - } - print ""; - } -} - - - diff --git a/contrib/awk/test/hsprint.ok b/contrib/awk/test/hsprint.ok deleted file mode 100644 index 55fd16f..0000000 --- a/contrib/awk/test/hsprint.ok +++ /dev/null @@ -1,66 +0,0 @@ - %| 45| 55| 2d| 12.68| 1.27e+01| 12.68| - %0|00045|00055|0002d|0012.68|001.27e+01|0000012.68| - %#| 45| 055| 0x2d| 12.68| 1.27e+01| 12.68| - %#0|00045|00055|0x02d|0012.68|001.27e+01|0000012.68| - % | 45| 55| 2d| 12.68| 1.27e+01| 12.68| - % 0| 0045|00055|0002d| 012.68| 01.27e+01| 000012.68| - % #| 45| 055| 0x2d| 12.68| 1.27e+01| 12.68| - % #0| 0045|00055|0x02d| 012.68| 01.27e+01| 000012.68| - %+| +45| 55| 2d| +12.68| +1.27e+01| +12.68| - %+0|+0045|00055|0002d|+012.68|+01.27e+01|+000012.68| - %+#| +45| 055| 0x2d| +12.68| +1.27e+01| +12.68| - %+#0|+0045|00055|0x02d|+012.68|+01.27e+01|+000012.68| - %+ | +45| 55| 2d| +12.68| +1.27e+01| +12.68| - %+ 0|+0045|00055|0002d|+012.68|+01.27e+01|+000012.68| - %+ #| +45| 055| 0x2d| +12.68| +1.27e+01| +12.68| - %+ #0|+0045|00055|0x02d|+012.68|+01.27e+01|+000012.68| - %-|45 |55 |2d |12.68 |1.27e+01 |12.68 | - %-0|45 |55 |2d |12.68 |1.27e+01 |12.68 | - %-#|45 |055 |0x2d |12.68 |1.27e+01 |12.68 | - %-#0|45 |055 |0x2d |12.68 |1.27e+01 |12.68 | - %- | 45 |55 |2d | 12.68 | 1.27e+01 | 12.68 | - %- 0| 45 |55 |2d | 12.68 | 1.27e+01 | 12.68 | - %- #| 45 |055 |0x2d | 12.68 | 1.27e+01 | 12.68 | - %- #0| 45 |055 |0x2d | 12.68 | 1.27e+01 | 12.68 | - %-+|+45 |55 |2d |+12.68 |+1.27e+01 |+12.68 | - %-+0|+45 |55 |2d |+12.68 |+1.27e+01 |+12.68 | - %-+#|+45 |055 |0x2d |+12.68 |+1.27e+01 |+12.68 | - %-+#0|+45 |055 |0x2d |+12.68 |+1.27e+01 |+12.68 | - %-+ |+45 |55 |2d |+12.68 |+1.27e+01 |+12.68 | - %-+ 0|+45 |55 |2d |+12.68 |+1.27e+01 |+12.68 | - %-+ #|+45 |055 |0x2d |+12.68 |+1.27e+01 |+12.68 | -%-+ #0|+45 |055 |0x2d |+12.68 |+1.27e+01 |+12.68 | - - %| zap| *| -3| -3.46| -3.46e+00| -3.457| - %0|00zap|0000*|-000003|-003.46|-03.46e+00|-00003.457| - %#| zap| *| -3.| -3.46| -3.46e+00| -3.457| - %#0|00zap|0000*|-00003.|-003.46|-03.46e+00|-00003.457| - % | zap| *| -3| -3.46| -3.46e+00| -3.457| - % 0|00zap|0000*|-000003|-003.46|-03.46e+00|-00003.457| - % #| zap| *| -3.| -3.46| -3.46e+00| -3.457| - % #0|00zap|0000*|-00003.|-003.46|-03.46e+00|-00003.457| - %+| zap| *| -3| -3.46| -3.46e+00| -3.457| - %+0|00zap|0000*|-000003|-003.46|-03.46e+00|-00003.457| - %+#| zap| *| -3.| -3.46| -3.46e+00| -3.457| - %+#0|00zap|0000*|-00003.|-003.46|-03.46e+00|-00003.457| - %+ | zap| *| -3| -3.46| -3.46e+00| -3.457| - %+ 0|00zap|0000*|-000003|-003.46|-03.46e+00|-00003.457| - %+ #| zap| *| -3.| -3.46| -3.46e+00| -3.457| - %+ #0|00zap|0000*|-00003.|-003.46|-03.46e+00|-00003.457| - %-|zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %-0|zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %-#|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | - %-#0|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | - %- |zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %- 0|zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %- #|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | - %- #0|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | - %-+|zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %-+0|zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %-+#|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | - %-+#0|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | - %-+ |zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %-+ 0|zap |* |-3 |-3.46 |-3.46e+00 |-3.457 | - %-+ #|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | -%-+ #0|zap |* |-3. |-3.46 |-3.46e+00 |-3.457 | - diff --git a/contrib/awk/test/igncdym.awk b/contrib/awk/test/igncdym.awk deleted file mode 100644 index 3119c3e..0000000 --- a/contrib/awk/test/igncdym.awk +++ /dev/null @@ -1,56 +0,0 @@ -#From Jeffrey.B.Woodward@Hitchcock.ORG Mon Feb 21 09:33:32 2000 -#Message-id: <12901034@mailbox2.Hitchcock.ORG> -#Date: 20 Feb 2000 18:14:11 EST -#From: Jeffrey.B.Woodward@Hitchcock.ORG (Jeffrey B. Woodward) -#Subject: gawk 3.0.4 bug -#To: bug-gnu-utils@gnu.org -#Cc: arnold@gnu.org -# -#O/S: Digital UNIX 4.0D -# -#C Compiler: DEC C -# -#gawk version: 3.0.4 -# -#Sample Program: -#gawk ' - BEGIN { - pattern[1] = "bar" ; ignore[1] = 1 - pattern[2] = "foo" ; ignore[2] = 0 - } - - { - for (i = 1 ; i <= 2 ; i++) { - IGNORECASE = ignore[i] - print match($0, pattern[i]) " " pattern[i] ":" $0 - } - } -#' << -EOF- -#This is foo -#This is bar -#-EOF- -# -#Program Output: -#0 bar:This is foo -#0 foo:This is foo -#9 bar:This is bar -#9 foo:This is bar -# -# -#**Expected** Output: -#0 bar:This is foo -#9 foo:This is foo -#9 bar:This is bar -#0 foo:This is bar -# -# -#This problem appears to be directly related to IGNORECASE. If -#IGNORECASE remains constant, the program behaves as expected; -#however, switching IGNORECASE seems to causes problems - it is -#almost as though the pattern stored in the variable is treated -#as a constant and the regexp() is not recompiled(?) - just a -#guess... -# -# -#Thanks, -#-Jeff Woodward diff --git a/contrib/awk/test/igncdym.in b/contrib/awk/test/igncdym.in deleted file mode 100644 index 43e361a..0000000 --- a/contrib/awk/test/igncdym.in +++ /dev/null @@ -1,2 +0,0 @@ -This is foo -This is bar diff --git a/contrib/awk/test/igncdym.ok b/contrib/awk/test/igncdym.ok deleted file mode 100644 index e715a6d..0000000 --- a/contrib/awk/test/igncdym.ok +++ /dev/null @@ -1,4 +0,0 @@ -0 bar:This is foo -9 foo:This is foo -9 bar:This is bar -0 foo:This is bar diff --git a/contrib/awk/test/leaddig.awk b/contrib/awk/test/leaddig.awk deleted file mode 100644 index 6c001ba..0000000 --- a/contrib/awk/test/leaddig.awk +++ /dev/null @@ -1,7 +0,0 @@ -# check that values with leading digits get converted the -# right way, based on a note in comp.lang.awk. -# -# run with gawk -v x=2E -f leaddig.awk -BEGIN { - print "x =", x, (x == 2), (x == 2E0), (x == 2E), (x == 2D) -} diff --git a/contrib/awk/test/leaddig.ok b/contrib/awk/test/leaddig.ok deleted file mode 100644 index 7a9d866..0000000 --- a/contrib/awk/test/leaddig.ok +++ /dev/null @@ -1 +0,0 @@ -x = 2E 0 0 0 0 diff --git a/contrib/awk/test/leadnl.awk b/contrib/awk/test/leadnl.awk deleted file mode 100644 index 0b7d0a9..0000000 --- a/contrib/awk/test/leadnl.awk +++ /dev/null @@ -1,10 +0,0 @@ -BEGIN { - RS = ""; FS = "\n" -} - -{ - print "Name is: ", $1 - print "Address is: ", $2 - print "City and State are: ", $3 - print "" -} diff --git a/contrib/awk/test/leadnl.in b/contrib/awk/test/leadnl.in deleted file mode 100644 index 67c0239..0000000 --- a/contrib/awk/test/leadnl.in +++ /dev/null @@ -1,9 +0,0 @@ - -Jane Doe -123 Main Street -Anywhere, SE 12345-6789 - -John Smith -456 Tree-lined Avenue -Smallville, MW 98765-4321 - diff --git a/contrib/awk/test/leadnl.ok b/contrib/awk/test/leadnl.ok deleted file mode 100644 index 19cb299..0000000 --- a/contrib/awk/test/leadnl.ok +++ /dev/null @@ -1,8 +0,0 @@ -Name is: Jane Doe -Address is: 123 Main Street -City and State are: Anywhere, SE 12345-6789 - -Name is: John Smith -Address is: 456 Tree-lined Avenue -City and State are: Smallville, MW 98765-4321 - diff --git a/contrib/awk/test/lint.awk b/contrib/awk/test/lint.awk deleted file mode 100644 index ea7b8e3..0000000 --- a/contrib/awk/test/lint.awk +++ /dev/null @@ -1,14 +0,0 @@ -# lint.awk --- test lint variable - -BEGIN { - a[1] = 1 - LINT = 1 - delete a[2] - LINT = "" - delete a[3] - LINT = "true" - delete a[4] - LINT = 0 - delete a[5] - print "done" -} diff --git a/contrib/awk/test/lint.ok b/contrib/awk/test/lint.ok deleted file mode 100644 index ec24d83..0000000 --- a/contrib/awk/test/lint.ok +++ /dev/null @@ -1,5 +0,0 @@ -gawk: lint.awk:7: warning: delete: index `2' not in array `a' -gawk: lint.awk:7: warning: turning off `--lint' due to assignment to `LINT' -gawk: lint.awk:11: warning: delete: index `4' not in array `a' -gawk: lint.awk:11: warning: turning off `--lint' due to assignment to `LINT' -done diff --git a/contrib/awk/test/nasty.awk b/contrib/awk/test/nasty.awk deleted file mode 100644 index b9c20c8..0000000 --- a/contrib/awk/test/nasty.awk +++ /dev/null @@ -1,92 +0,0 @@ -#From hankedr@manatee.dms.auburn.edu Tue Oct 13 22:15:59 1998 -#Return-Path: -#Received: from cssun.mathcs.emory.edu (cssun.mathcs.emory.edu [170.140.150.1]) -# by dmx.netvision.net.il (8.9.0.Beta5/8.8.6) with ESMTP id PAA03924 -# for ; Tue, 13 Oct 1998 15:32:13 +0200 (IST) -#Received: from mescaline.gnu.org (we-refuse-to-spy-on-our-users@mescaline.gnu.org [158.121.106.21]) by cssun.mathcs.emory.edu (8.7.5/8.6.9-940818.01cssun) with ESMTP id KAA11644 for ; Tue, 13 Oct 1998 10:22:32 -0400 (EDT) -#Received: from manatee.dms.auburn.edu (manatee.dms.auburn.edu [131.204.53.104]) -# by mescaline.gnu.org (8.9.1a/8.9.1) with ESMTP id KAA03250 -# for ; Tue, 13 Oct 1998 10:25:32 -0400 -#Received: (from hankedr@localhost) -# by manatee.dms.auburn.edu (8.9.1a/8.9.1) id JAA13348; -# Tue, 13 Oct 1998 09:22:29 -0500 (CDT) -#Date: Tue, 13 Oct 1998 09:22:29 -0500 (CDT) -#Message-Id: <199810131422.JAA13348@manatee.dms.auburn.edu> -#From: Darrel Hankerson -#To: arnold@gnu.org -#In-reply-to: <199810131313.QAA31784@alpha.netvision.net.il> (message from -# Aharon Robbins on Tue, 13 Oct 1998 16:10:36 +0200) -#Subject: Re: full text of bug report? -#Mime-Version: 1.0 -#Content-Type: text/plain; charset=US-ASCII -#X-UIDL: bf3fce492dad4ab030c561e7b2f27d0a -#Status: RO -# -# Do you have the full text of the a = a "\n" f() bug report? -# I can't find it.... I'm not sure there really is a bug. -# -#Yes, see below. -# -#His example has unnecessary fragments (in particular, the use of -#gensub is irrelevant). As I wrote to you earlier, the interesting -#question for me is: -# -# Is the concatenation result undefined? If the result is defined or -# implementation-dependent, then gawk has a bug. -# -# -#=== Original report ===================================================== -#From: Attila Torcsvari -#To: "'bug-gnu-utils@prep.ai.mit.edu'" -#Subject: gawk 3.0.3 bug -#Date: Thu, 17 Sep 1998 18:12:13 +0200 -#MIME-Version: 1.0 -#Content-Transfer-Encoding: 7bit -#Resent-From: bug-gnu-utils@gnu.org -#X-Mailing-List: archive/latest/3396 -#X-Loop: bug-gnu-utils@gnu.org -#Precedence: list -#Resent-Sender: bug-gnu-utils-request@gnu.org -#Content-Transfer-Encoding: 7bit -#Content-Type: text/plain; charset="us-ascii" -#Content-Length: 618 -# -#Bug-gnuers, -#please pass it to the responsible. -# -#The following generates something interesting: -# -BEGIN{ -a="aaaaa" -a=a a #10 -a=a a #20 -a=a a #40 -a=a a #80 -a=a a #160 -a=a a # i.e. a is long enough - -a=a"\n"f() # this causes the trouble -print a # guess the result -} - -function f() -{ -#print "a before: ", a -#a=gensub("a","123,","g",a) # 'a' will be just a bit longer (4 times, but still should fit: 4*160=640) -gsub(/a/, "123", a) -#print "a after: ", a -return "X" -} -# -#Possible reason: -#during f the a is modified, -#it can be even freed, because gensub modifies its size -#the printout contains trash. -# -#Used version: VC compiled WinNT 32 bit Intel. -# -#Regards, -# -#Attila Torcsvari -#Arcanum Development -# diff --git a/contrib/awk/test/nasty.ok b/contrib/awk/test/nasty.ok deleted file mode 100644 index 0ee1a73b..0000000 --- a/contrib/awk/test/nasty.ok +++ /dev/null @@ -1,2 +0,0 @@ -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -X diff --git a/contrib/awk/test/nasty2.awk b/contrib/awk/test/nasty2.awk deleted file mode 100644 index cb0bd6d..0000000 --- a/contrib/awk/test/nasty2.awk +++ /dev/null @@ -1,20 +0,0 @@ -# Based on nasty.awk, test same thing for printf -# -BEGIN { -a="aaaaa" -a=a a #10 -a=a a #20 -a=a a #40 -a=a a #80 -a=a a #160 -a=a a # i.e. a is long enough - -printf("a = %s, f() = %s\n", a, f()) -print a -} - -function f() -{ -gsub(/a/, "123", a) -return "X" -} diff --git a/contrib/awk/test/nasty2.ok b/contrib/awk/test/nasty2.ok deleted file mode 100644 index 9b62bf0..0000000 --- a/contrib/awk/test/nasty2.ok +++ /dev/null @@ -1,2 +0,0 @@ -a = aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, f() = X -123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123123 diff --git a/contrib/awk/test/nlinstr.awk b/contrib/awk/test/nlinstr.awk deleted file mode 100644 index f403715..0000000 --- a/contrib/awk/test/nlinstr.awk +++ /dev/null @@ -1,8 +0,0 @@ -BEGIN { RS = "" } - -{ - if (/^@/) - print "not ok" - else - print "ok" -} diff --git a/contrib/awk/test/nlinstr.in b/contrib/awk/test/nlinstr.in deleted file mode 100644 index 65aaaf9..0000000 --- a/contrib/awk/test/nlinstr.in +++ /dev/null @@ -1,2 +0,0 @@ -line 1 -@line 2 diff --git a/contrib/awk/test/nlinstr.ok b/contrib/awk/test/nlinstr.ok deleted file mode 100644 index 9766475..0000000 --- a/contrib/awk/test/nlinstr.ok +++ /dev/null @@ -1 +0,0 @@ -ok diff --git a/contrib/awk/test/nlstrina.awk b/contrib/awk/test/nlstrina.awk deleted file mode 100644 index 41dbd5f..0000000 --- a/contrib/awk/test/nlstrina.awk +++ /dev/null @@ -1,77 +0,0 @@ -# From E.Ab@chem.rug.nl Wed Aug 2 13:16:53 2000 -# Received: from mail.actcom.co.il -# by localhost with POP3 (fetchmail-5.1.2) -# for arnold@localhost (single-drop); Wed, 02 Aug 2000 13:16:53 -0400 (EDT) -# Received: from lmail.actcom.co.il by actcom.co.il with ESMTP -# (8.9.1a/actcom-0.2) id MAA21699 for ; -# Wed, 2 Aug 2000 12:20:38 +0300 (EET DST) -# (rfc931-sender: lmail.actcom.co.il [192.114.47.13]) -# Received: from freefriends.org (freefriends.org [63.85.55.109]) -# by lmail.actcom.co.il (8.9.3/8.9.1) with ESMTP id LAA22723 -# for ; Wed, 2 Aug 2000 11:23:22 +0300 -# Received: from mescaline.gnu.org (mescaline.gnu.org [158.121.106.21]) -# by freefriends.org (8.9.3/8.9.3) with ESMTP id FAA23582 -# for ; Wed, 2 Aug 2000 05:18:59 -0400 -# Received: from dep.chem.rug.nl (dep.chem.rug.nl [129.125.7.81]) -# by mescaline.gnu.org (8.9.1a/8.9.1) with ESMTP id FAA30670; -# Wed, 2 Aug 2000 05:20:24 -0400 -# Received: from rugmd34.chem.rug.nl (rugmd34.chem.rug.nl [129.125.42.34]) -# by dep.chem.rug.nl (8.9.3/8.9.3/Debian 8.9.3-21) with ESMTP id LAA17089; -# Wed, 2 Aug 2000 11:20:23 +0200 -# Received: from chem.rug.nl (localhost [127.0.0.1]) by rugmd34.chem.rug.nl (980427.SGI.8.8.8/980728.SGI.AUTOCF) via ESMTP id LAA25392; Wed, 2 Aug 2000 11:20:22 +0200 (MDT) -# Sender: E.Ab@chem.rug.nl -# Message-ID: <3987E7D5.2BDC5FD3@chem.rug.nl> -# Date: Wed, 02 Aug 2000 11:20:21 +0200 -# From: Eiso AB -# X-Mailer: Mozilla 4.72C-SGI [en] (X11; I; IRIX 6.5 IP32) -# X-Accept-Language: en -# MIME-Version: 1.0 -# To: bug-gnu-utils@gnu.org, arnold@gnu.org -# Subject: bug? [GNU Awk 3.0.5] -# -# Content-Type: text/plain; charset=us-ascii -# Content-Transfer-Encoding: 7bit -# X-UIDL: \f8"!(8G!!ZL$#!h>X!! -# Status: R -# -# hi Arnold, -# -# -# Please try the script beneath... -# I'm not sure if this is a bug or not, but I would expect -# the empty string as an array index just to be treated -# like any other string -# -# so if ("" in ta) would be true, and for ( i in ta ) should loop only once. -# -BEGIN { - v="" - ta[v]++ - if ( v in ta) print "a",v,++ta[v],ta[v] - print "b",v,++ta[v],ta[v] - for( i in ta) print "c",++c,i,ta[i] -} -# -# goodluck, Eiso -# -# -- -# _________ -# _______________________________/ Eiso AB \_________________________ -# -# o -# -# o Dept. of Biochemistry -# University of Groningen -# The Netherlands -# o -# . . -# o ^ mailto:eiso@chem.rug.nl -# | - _ mailto:eiso@dds.nl -# \__|__/ http://md.chem.rug.nl/~eiso -# | tel 4326 -# | -# / \ -# / \ -# | | -# ________ ._| |_. ________________________________________________ -# diff --git a/contrib/awk/test/nlstrina.ok b/contrib/awk/test/nlstrina.ok deleted file mode 100644 index 690f1a2..0000000 --- a/contrib/awk/test/nlstrina.ok +++ /dev/null @@ -1,3 +0,0 @@ -a 2 2 -b 3 3 -c 1 3 diff --git a/contrib/awk/test/numindex.awk b/contrib/awk/test/numindex.awk deleted file mode 100644 index 1762e45..0000000 --- a/contrib/awk/test/numindex.awk +++ /dev/null @@ -1,55 +0,0 @@ -#To: bug-gnu-utils@gnu.org -#cc: arnold@gnu.org -#Subject: Possible bug in GNU Awk 3.0.4 -#Date: Wed, 24 Nov 1999 21:47:24 +0000 -#From: Daniel Elphick -#Message-Id: -# -#This is a multipart MIME message. -# -#--==_Exmh_-11192982200 -#Content-Type: text/plain; charset=us-ascii -# -# -#When I use the attached awk script unique on the attached data file, it -#reports that all 4 lines of the data are the same. Using mawk it correctly -#reports that there are no repeats. -# -#I don't know if there are limits on the size of associative array keys for the -#purposes of reliable indexing but if there is then it is not (obviously) -#documented. -# -# -#--==_Exmh_-11192982200 -#Content-Type: text/plain ; name="data"; charset=us-ascii -#Content-Description: data -#Content-Disposition: attachment; filename="data" -# -#322322111111112232231111 -#322322111111112213223111 -#322322111111112211132231 -#322322111111112211113223 -# -#--==_Exmh_-11192982200 -#Content-Type: text/plain ; name="unique"; charset=us-ascii -#Content-Description: unique -#Content-Disposition: attachment; filename="unique" -# -{ - if($0 in a) - { - printf("line %d has been seen before at line %d\n", NR, a[$0]) - repeat_count += 1 - } - else - { - a[$0] = NR - } - count += 1 -} -END { -# printf("%d %f%%\n", repeat_count, (float)repeat_count / count * 100) - printf("%d %f%%\n", repeat_count, repeat_count / count * 100) -} -# -#--==_Exmh_-11192982200-- diff --git a/contrib/awk/test/numindex.in b/contrib/awk/test/numindex.in deleted file mode 100644 index 3852058..0000000 --- a/contrib/awk/test/numindex.in +++ /dev/null @@ -1,4 +0,0 @@ -322322111111112232231111 -322322111111112213223111 -322322111111112211132231 -322322111111112211113223 diff --git a/contrib/awk/test/numindex.ok b/contrib/awk/test/numindex.ok deleted file mode 100644 index 8e086f6..0000000 --- a/contrib/awk/test/numindex.ok +++ /dev/null @@ -1 +0,0 @@ -0 0.000000% diff --git a/contrib/awk/test/octsub.awk b/contrib/awk/test/octsub.awk deleted file mode 100644 index 65e9689..0000000 --- a/contrib/awk/test/octsub.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN{ ++x[03]; print "/" x[0] "/" x[3] "/"} diff --git a/contrib/awk/test/octsub.ok b/contrib/awk/test/octsub.ok deleted file mode 100644 index 95cbdc7..0000000 --- a/contrib/awk/test/octsub.ok +++ /dev/null @@ -1 +0,0 @@ -//1/ diff --git a/contrib/awk/test/ofmt.awk b/contrib/awk/test/ofmt.awk deleted file mode 100644 index a7b63d6..0000000 --- a/contrib/awk/test/ofmt.awk +++ /dev/null @@ -1,53 +0,0 @@ -# From dragon!knorke.saar.de!florian Wed Jul 16 10:47:27 1997 -# Return-Path: -# Message-ID: <19970716164451.63610@knorke.saar.de> -# Date: Wed, 16 Jul 1997 16:44:51 +0200 -# From: Florian La Roche -# To: bug-gnu-utils@prep.ai.mit.edu -# CC: arnold@gnu.ai.mit.edu -# Subject: bug in gawk 3.0.3 -# MIME-Version: 1.0 -# Content-Type: text/plain; charset=us-ascii -# X-Mailer: Mutt 0.76 -# Status: R -# Content-Length: 1725 -# X-Lines: 177 -# X-Display-Position: 0 -# -# I have a problem with gawk 3.0.3 on linux with libc 5.4.33. -# The memory is corrupted, if I use OFMT = "%.12g". -# With OFMT = "%.6g" evrything works fine, but I don't have enough -# digits for the computation. -# -# Thanks a lot, -# Florian La Roche -# -# Here is the sample awk-Script together with sample data: -# -BEGIN { - OFMT = "%.12g" - big = 99999999999 - lowest = big - small = 0 - highest = small - dir = "" - } -$0 ~ /^[0-9]+$/ { - # some old awks do not think $0 is numeric, so use $1 - if ($1 < lowest) - lowest = $1 - if ($1 > highest) - highest = $1 - next -} -$0 ~ /\/\.:$/ { - if (dir != "") { - if (highest != small) - print dir, highest, lowest - else - print dir, "-", "-" - } - dir = substr($0, 1, length($0)-3) # trim off /.: - lowest = big - highest = small -} diff --git a/contrib/awk/test/ofmt.in b/contrib/awk/test/ofmt.in deleted file mode 100644 index 6fffdfe..0000000 --- a/contrib/awk/test/ofmt.in +++ /dev/null @@ -1,136 +0,0 @@ -alt/binaries/warez/crypto/.: -.. -... - -alt/fan/douglas-adams/.: -.. -... -7478 -7479 -7480 -7481 -7482 -7483 -7484 -7485 -7486 -7490 -7488 -7489 -7491 -7407 -7408 -7409 -7410 -7411 -7412 -7413 -7414 -7415 -7416 -7417 -7418 -7419 -7420 -7421 -7422 -7423 -7424 -7425 -7426 -7427 -7428 -7429 -7430 -7431 -7432 -7433 -7434 -7435 -7436 -7437 -7438 -7439 -7440 -7441 -7442 -7443 -7444 -7445 -7446 -7447 -7455 -7449 -7450 -7451 -7452 -7453 -7454 -7456 -7457 -7458 -7459 -7460 -7461 -7462 -7463 -7464 -7465 -7466 -7467 -7468 -7469 -7470 -7471 -7472 -7473 -7475 -7477 - -alt/os/linux/.: -.. -... - - -alt/security/.: -.. -... -pgp -ripem -keydist -index -9617 -9618 -9619 -9620 -9625 -9621 -9626 -9622 -9623 -9624 -9627 -9628 -9629 -9630 -9631 -9632 -9633 -9634 -9636 -9637 -9638 -9639 -9640 -9641 - -alt/security/index/.: -.. -... - -alt/security/keydist/.: -.. -... -253 - -/.: diff --git a/contrib/awk/test/ofmt.ok b/contrib/awk/test/ofmt.ok deleted file mode 100644 index 389c1ef..0000000 --- a/contrib/awk/test/ofmt.ok +++ /dev/null @@ -1,6 +0,0 @@ -alt/binaries/warez/crypto - - -alt/fan/douglas-adams 7491 7407 -alt/os/linux - - -alt/security 9641 9617 -alt/security/index - - -alt/security/keydist 253 253 diff --git a/contrib/awk/test/ofmtbig.awk b/contrib/awk/test/ofmtbig.awk deleted file mode 100644 index f1b2384..0000000 --- a/contrib/awk/test/ofmtbig.awk +++ /dev/null @@ -1,125 +0,0 @@ -# -# [USEMAP] -# -# Problem Report gnu/7821 -# -# awk in free(): warning: chunk is already free. -# -# Confidential -# no -# -# Severity -# serious -# -# Priority -# medium -# -# Responsible -# freebsd-bugs@freebsd.org -# -# State -# suspended -# -# Class -# sw-bug -# -# Submitter-Id -# current-users -# -# Arrival-Date -# Thu Sep 3 10:30:00 PDT 1998 -# -# Last-Modified -# Thu Sep 17 02:04:26 PDT 1998 -# -# Originator -# Alexander Litvin archer@lucky.net -# -# Organization -# -# -#Lucky Net ltd. -# -# Release -# FreeBSD 3.0-CURRENT i386 -# -# Environment -# -# -#FreeBSD grape.carrier.kiev.ua 3.0-CURRENT FreeBSD 3.0-CURRENT #121: Thu Sep 3 -#1 -#1:21:44 EEST 1998 archer@grape.carrier.kiev.ua:/usr/src/sys/compile/GRAPE -#i -#386 -# -# Description -# -# -#The problem first appeared when GNU awk in 3.0-CURRENT was apgraded to -#3.0.3. I run C-News, which uses awk extensively. After awk apgrade C-News -#expire stopped to work. It appeared that some GNU awk 3.0.3 programms when -#given absolutely legitimate input fail, giving out a number of messages: -# -#awk in free(): warning: chunk is already free. -# -# How-To-Repeat -# -# -#Run the following awk program (it is cut out of C-News expire scripts). -#I was not able to cut it down more -- omitting some portions of the -#code (e.g. OFMT line), make error go away in this case, though it -#certainly does not fix awk. -# -#----------------cut-here---------------- -#!/usr/bin/awk -f -BEGIN { - OFMT = "%.12g" - big = 99999999999 - lowest = big - small = 0 - highest = small -} - -$0 ~ /^[0-9]+$/ { - if ($1 < lowest) - lowest = $1 - if ($1 > highest) - highest = $1 - next -} - -$0 ~ /^[a-z]+/ { - print dir, highest, lowest - dir = $0 - lowest = big - highest = small -} -#----------------cut-here---------------- -# -#To get the error, just give this script the following input: -#----------------cut-here---------------- -#a -#1 -#b -#----------------cut-here---------------- -# -# Fix -# -# -#I was not able to track the error in awk sources. As a workaround, -#I just reverted to GNU awk 2.15.5. -# -# Audit-Trail -# -# -#State-Changed-From-To: open-suspended -#State-Changed-By: phk -#State-Changed-When: Thu Sep 17 02:04:08 PDT 1998 -#State-Changed-Why: -#reported to GNU maintainer. -# -# Submit Followup -# _________________________________________________________________ -# -# -# www@freebsd.org diff --git a/contrib/awk/test/ofmtbig.in b/contrib/awk/test/ofmtbig.in deleted file mode 100644 index f1e80ce..0000000 --- a/contrib/awk/test/ofmtbig.in +++ /dev/null @@ -1,3 +0,0 @@ -a -1 -b diff --git a/contrib/awk/test/ofmtbig.ok b/contrib/awk/test/ofmtbig.ok deleted file mode 100644 index 0fe9251..0000000 --- a/contrib/awk/test/ofmtbig.ok +++ /dev/null @@ -1,2 +0,0 @@ - 0 99999999999 -a 1 1 diff --git a/contrib/awk/test/ofmts.awk b/contrib/awk/test/ofmts.awk deleted file mode 100644 index 6ee3705..0000000 --- a/contrib/awk/test/ofmts.awk +++ /dev/null @@ -1,2 +0,0 @@ -BEGIN { OFMT= "%s" } -{ $1 + $2; print $1, $2 } diff --git a/contrib/awk/test/ofmts.in b/contrib/awk/test/ofmts.in deleted file mode 100644 index 50c37ec..0000000 --- a/contrib/awk/test/ofmts.in +++ /dev/null @@ -1 +0,0 @@ -1.2 2.2 diff --git a/contrib/awk/test/ofmts.ok b/contrib/awk/test/ofmts.ok deleted file mode 100644 index 50c37ec..0000000 --- a/contrib/awk/test/ofmts.ok +++ /dev/null @@ -1 +0,0 @@ -1.2 2.2 diff --git a/contrib/awk/test/opasnidx.awk b/contrib/awk/test/opasnidx.awk deleted file mode 100644 index e398860..0000000 --- a/contrib/awk/test/opasnidx.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN { b = 1; a[b] = 2; a[b++] += 1; print b,a[1] } diff --git a/contrib/awk/test/opasnidx.ok b/contrib/awk/test/opasnidx.ok deleted file mode 100644 index 654d526..0000000 --- a/contrib/awk/test/opasnidx.ok +++ /dev/null @@ -1 +0,0 @@ -2 3 diff --git a/contrib/awk/test/opasnslf.awk b/contrib/awk/test/opasnslf.awk deleted file mode 100644 index 46cd2b8..0000000 --- a/contrib/awk/test/opasnslf.awk +++ /dev/null @@ -1,6 +0,0 @@ -BEGIN { - print b += b += 1 - b = 6 - print b += b++ - print b -} diff --git a/contrib/awk/test/opasnslf.ok b/contrib/awk/test/opasnslf.ok deleted file mode 100644 index 2fa9fd5..0000000 --- a/contrib/awk/test/opasnslf.ok +++ /dev/null @@ -1,3 +0,0 @@ -2 -13 -13 diff --git a/contrib/awk/test/paramtyp.awk b/contrib/awk/test/paramtyp.awk deleted file mode 100644 index 58848bb..0000000 --- a/contrib/awk/test/paramtyp.awk +++ /dev/null @@ -1,20 +0,0 @@ -# Sun Apr 25 13:28:58 IDT 1999 -# from Juegen Khars. This program should not core dump. - function ReadPGM(f, d) { -print "ReadPGM" - d[1] = 1 - } - - function WritePGM(f, d) { -print "WritePGM" - d[1] = 0 - } - - BEGIN { -print "before ReadPGM" - ReadPGM("", d) -print "after ReadPGM" -print "before WritePGM" - WritePGM("", d) -print "after WritePGM" - } diff --git a/contrib/awk/test/paramtyp.ok b/contrib/awk/test/paramtyp.ok deleted file mode 100644 index 793f857a..0000000 --- a/contrib/awk/test/paramtyp.ok +++ /dev/null @@ -1,6 +0,0 @@ -before ReadPGM -ReadPGM -after ReadPGM -before WritePGM -WritePGM -after WritePGM diff --git a/contrib/awk/test/parseme.awk b/contrib/awk/test/parseme.awk deleted file mode 100644 index 4d6ba94..0000000 --- a/contrib/awk/test/parseme.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN { toupper(substr*line,1,12)) } diff --git a/contrib/awk/test/parseme.ok b/contrib/awk/test/parseme.ok deleted file mode 100644 index f08fee6..0000000 --- a/contrib/awk/test/parseme.ok +++ /dev/null @@ -1,3 +0,0 @@ -gawk: parseme.awk:1: BEGIN { toupper(substr*line,1,12)) } -gawk: parseme.awk:1: ^ parse error -gawk: parseme.awk:1: fatal: 0 is invalid as number of arguments for toupper diff --git a/contrib/awk/test/poundbang.awk b/contrib/awk/test/poundbang.awk deleted file mode 100755 index d60652e..0000000 --- a/contrib/awk/test/poundbang.awk +++ /dev/null @@ -1,3 +0,0 @@ -#! /tmp/gawk -f - { ccount += length($0) } -END { printf "average line length is %2.4f\n", ccount/NR} diff --git a/contrib/awk/test/printf1.awk b/contrib/awk/test/printf1.awk deleted file mode 100644 index 1cd7b99..0000000 --- a/contrib/awk/test/printf1.awk +++ /dev/null @@ -1,19 +0,0 @@ -# Tue May 25 16:36:16 IDT 1999 -# -# Test cases based on email from Andreas Schwab, schwab@gnu.org - -BEGIN { - fmt[1] = "%8.5d"; data[1] = 100 - fmt[2] = "%#o"; data[2] = 0 - fmt[3] = "%#.1o"; data[3] = 0 - fmt[4] = "%#.0o"; data[4] = 0 - fmt[5] = "%#x"; data[5] = 0 - fmt[6] = "%.0d"; data[6] = 0 - fmt[7] = "%5.0d"; data[7] = 0 - - for (i = 1; i <= 7; i++) { - format = "%s, %d --- |" fmt[i] "|\n" - printf(format, fmt[i], data[i], data[i]) - } - -} diff --git a/contrib/awk/test/printf1.ok b/contrib/awk/test/printf1.ok deleted file mode 100644 index 32b3a7d..0000000 --- a/contrib/awk/test/printf1.ok +++ /dev/null @@ -1,7 +0,0 @@ -%8.5d, 100 --- | 00100| -%#o, 0 --- |0| -%#.1o, 0 --- |0| -%#.0o, 0 --- |0| -%#x, 0 --- |0| -%.0d, 0 --- || -%5.0d, 0 --- | | diff --git a/contrib/awk/test/printfloat.awk b/contrib/awk/test/printfloat.awk deleted file mode 100644 index 1cb4066..0000000 --- a/contrib/awk/test/printfloat.awk +++ /dev/null @@ -1,62 +0,0 @@ -# Test program for checking sprintf operation with various floating -# point formats -# -# Watch out - full output of this program will have 3000 * tot lines, -# which will take a chunk of space if you will write it to your disk. -# --mj - -BEGIN { - just = "-" - plus = "+ " - alt = "#" - zero = "0" - spec = "feEgG" - fw[1] = "" - fw[2] = "1" - fw[3] = "5" - fw[4] = "10" - fw[5] = "15" - prec[1] = ".-1" - prec[2] = "" - prec[3] = ".2" - prec[4] = ".5" - prec[5] = ".10" - - num = 123.6 - factor = 1.0e-12 - tot = 8 - data[1] = 0 - data[2] = 1 - for (i = 3; i <= tot; i++) { - data[i] = num * factor - factor *= 1000 - } - - for (j = 1; j <= 2; j++) { - for (p = 1; p <= 3; p++) { - for (a = 1; a <= 2; a++) { - for (z = 1; z <= 2; z++) { - for (s = 1; s <= 5; s++) { - for (w = 1; w <= 5; w++) { - for (r = 1; r <= 5; r++) { - frmt = "|%" substr(just, j, 1) - frmt = frmt substr(plus, p, 1) - frmt = frmt substr(alt, a, 1) - frmt = frmt substr(zero, z, 1) - frmt = frmt fw[w] prec[r] - frmt = frmt substr(spec, s, 1) "|" - for (i = 1; i <= tot; i++) { - result = sprintf(frmt, data[i]) -# "normalize" if you must -# sub(/\|\./, "|0.", result) - printf("%-16s %-25s\t%g\n", frmt, - result,data[i]) - } - } - } - } - } - } - } - } -} diff --git a/contrib/awk/test/procinfs.awk b/contrib/awk/test/procinfs.awk deleted file mode 100644 index 53cfa97..0000000 --- a/contrib/awk/test/procinfs.awk +++ /dev/null @@ -1,7 +0,0 @@ -BEGIN { - printf "Initially, PROCINFO[\"FS\"] = %s\n", PROCINFO["FS"] - FIELDWIDTHS = "3 4 5 6" - printf "After assign to FIELDWIDTHS, PROCINFO[\"FS\"] = %s\n", PROCINFO["FS"] - FS = FS - printf "After assign to FS, PROCINFO[\"FS\"] = %s\n", PROCINFO["FS"] -} diff --git a/contrib/awk/test/procinfs.ok b/contrib/awk/test/procinfs.ok deleted file mode 100644 index 23aaeff..0000000 --- a/contrib/awk/test/procinfs.ok +++ /dev/null @@ -1,3 +0,0 @@ -Initially, PROCINFO["FS"] = FS -After assign to FIELDWIDTHS, PROCINFO["FS"] = FIELDWIDTHS -After assign to FS, PROCINFO["FS"] = FS diff --git a/contrib/awk/test/psx96sub.awk b/contrib/awk/test/psx96sub.awk deleted file mode 100644 index 3c7c9b7..0000000 --- a/contrib/awk/test/psx96sub.awk +++ /dev/null @@ -1,7 +0,0 @@ -BEGIN { - text = "here is some text" - repl = "" - printf "orig = \"%s\", repl = \"%s\"\n", text, repl - sub(/some/, repl, text) - printf "result is \"%s\"\n", text -} diff --git a/contrib/awk/test/psx96sub.ok b/contrib/awk/test/psx96sub.ok deleted file mode 100644 index d6b89ee..0000000 --- a/contrib/awk/test/psx96sub.ok +++ /dev/null @@ -1,2 +0,0 @@ -orig = "here is some text", repl = "" -result is "here is text" diff --git a/contrib/awk/test/rebt8b1.awk b/contrib/awk/test/rebt8b1.awk deleted file mode 100644 index 8fa43fb..0000000 --- a/contrib/awk/test/rebt8b1.awk +++ /dev/null @@ -1,138 +0,0 @@ -# From hankedr@dms.auburn.edu Sun Jan 28 12:25:43 2001 -# Received: from mail.actcom.co.il [192.114.47.13] -# by localhost with POP3 (fetchmail-5.5.0) -# for arnold@localhost (single-drop); Sun, 28 Jan 2001 12:25:43 +0200 (IST) -# Received: by actcom.co.il (mbox arobbins) -# (with Cubic Circle's cucipop (v1.31 1998/05/13) Sun Jan 28 12:27:08 2001) -# X-From_: hankedr@dms.auburn.edu Sat Jan 27 15:15:57 2001 -# Received: from lmail.actcom.co.il by actcom.co.il with ESMTP -# (8.9.1a/actcom-0.2) id PAA23801 for ; -# Sat, 27 Jan 2001 15:15:55 +0200 (EET) -# (rfc931-sender: lmail.actcom.co.il [192.114.47.13]) -# Received: from billohost.com (www.billohost.com [209.196.35.10]) -# by lmail.actcom.co.il (8.9.3/8.9.1) with ESMTP id PAA15998 -# for ; Sat, 27 Jan 2001 15:16:27 +0200 -# Received: from yak.dms.auburn.edu (yak.dms.auburn.edu [131.204.53.2]) -# by billohost.com (8.9.3/8.9.3) with ESMTP id IAA00467 -# for ; Sat, 27 Jan 2001 08:15:52 -0500 -# Received: (from hankedr@localhost) -# by yak.dms.auburn.edu (8.9.3/8.9.3/Debian/GNU) id HAA24441; -# Sat, 27 Jan 2001 07:15:44 -0600 -# Date: Sat, 27 Jan 2001 07:15:44 -0600 -# Message-Id: <200101271315.HAA24441@yak.dms.auburn.edu> -# From: Darrel Hankerson -# To: arnold@skeeve.com -# Subject: [stolfi@ic.unicamp.br: Bug in [...]* matching with acute-u] -# Mime-Version: 1.0 (generated by tm-edit 7.106) -# Content-Type: message/rfc822 -# Status: R -# -# From: Jorge Stolfi -# To: bug-gnu-utils@gnu.org -# Subject: Bug in [...]* matching with acute-u -# MIME-Version: 1.0 -# Reply-To: stolfi@ic.unicamp.br -# X-MIME-Autoconverted: from 8bit to quoted-printable by grande.dcc.unicamp.br id GAA10716 -# Sender: bug-gnu-utils-admin@gnu.org -# Errors-To: bug-gnu-utils-admin@gnu.org -# X-BeenThere: bug-gnu-utils@gnu.org -# X-Mailman-Version: 2.0 -# Precedence: bulk -# List-Help: -# List-Post: -# List-Subscribe: , -# -# List-Id: Bug reports for the GNU utilities -# List-Unsubscribe: , -# -# List-Archive: -# Date: Sat, 27 Jan 2001 06:46:11 -0200 (EDT) -# Content-Transfer-Encoding: 8bit -# X-MIME-Autoconverted: from quoted-printable to 8bit by manatee.dms.auburn.edu id CAA14936 -# Content-Type: text/plain; charset=iso-8859-1 -# -# ; -# Sun, 28 Jan 2001 18:46:00 +0200 (EET) -# (rfc931-sender: lmail.actcom.co.il [192.114.47.13]) -#Received: from billohost.com (www.billohost.com [209.196.35.10]) -# by lmail.actcom.co.il (8.9.3/8.9.1) with ESMTP id SAA18523 -# for ; Sun, 28 Jan 2001 18:46:35 +0200 -#Received: from grande.dcc.unicamp.br (grande.dcc.unicamp.br [143.106.7.8]) -# by billohost.com (8.9.3/8.9.3) with ESMTP id LAA20063 -# for ; Sun, 28 Jan 2001 11:45:54 -0500 -#Received: from amazonas.dcc.unicamp.br (amazonas.dcc.unicamp.br [143.106.7.11]) -# by grande.dcc.unicamp.br (8.9.3/8.9.3) with ESMTP id OAA29726; -# Sun, 28 Jan 2001 14:45:47 -0200 (EDT) -#Received: from coruja.dcc.unicamp.br (coruja.dcc.unicamp.br [143.106.24.80]) -# by amazonas.dcc.unicamp.br (8.8.5/8.8.5) with ESMTP id OAA06542; -# Sun, 28 Jan 2001 14:45:45 -0200 (EDT) -#Received: (from stolfi@localhost) -# by coruja.dcc.unicamp.br (8.11.0/8.11.0) id f0SGjib16703; -# Sun, 28 Jan 2001 14:45:44 -0200 (EDT) -#Date: Sun, 28 Jan 2001 14:45:44 -0200 (EDT) -#Message-Id: <200101281645.f0SGjib16703@coruja.dcc.unicamp.br> -#From: Jorge Stolfi -#To: Michal Jaegermann -#Cc: Aharon Robbins , oliva@ic.unicamp.br, -# celio@ic.unicamp.br, ducatte@ic.unicamp.br, machado@ic.unicamp.br -#Subject: Re: a regex.c problem -#MIME-Version: 1.0 -#Content-Transfer-Encoding: 8bit -#Content-Type: text/plain; charset=iso-8859-1 -#In-Reply-To: <20010128090314.A5820@ellpspace.math.ualberta.ca> -#References: <200101281207.f0SC7Un08435@skeeve.com> -# <20010128090314.A5820@ellpspace.math.ualberta.ca> -#Reply-To: stolfi@ic.unicamp.br -#Status: RO -# -# -# > [Michal] Are there any other examples of "certain characters" -# > which would throw this regex engine off? -# -#I now tested [anX]*n for X ranging trough all characters from \000 and -#\377, and got that unexpected result only for the following ones: -# -# \370 | =F8 | ø | Small o, slash -# \371 | =F9 | ù | Small u, grave accent -# \372 | =FA | ú | Small u, acute accent -# \373 | =FB | û | Small u, circumflex accent -# \374 | =FC | ü | Small u, dieresis or umlaut mark -# \375 | =FD | ý | Small y, acute accent -# \376 | =FE | þ | Small thorn, Icelandic -# \377 | =FF | ÿ | Small y, dieresis or umlaut mark -# -#I have also tried those offending REs from inside emacs (20.7.1), with -#query-replace-regexp, and it seems to be working fine. So presumably -#the bug lies in gawk itself, or in the RE parsing code, rather than in -#the matching engine? -# -#Could it be an underdimensioned table somewhere? -# -#Thanks for the help, and all the best -# -#--stolfi -# -# ---------------------------------------------------------------------- - #! /usr/bin/gawk -f - - BEGIN { - for (c = 0; c < 256; c++) - { do_test(c); } - } - - function do_test(char, pat,s,t) - { - if (char == 92) { printf "(error for \\%03o)\n", char; return; } - pat = sprintf("[an\\%03o]*n", char); - s = "bananas and ananases in canaan"; - t = s; gsub(pat, "AN", t); printf "%-8s %s\n", pat, t; -# ADR: Added: - if (s ~ pat) printf "\tmatch\n" ; else printf "\tno-match\n" - } - -# ---------------------------------------------------------------------- diff --git a/contrib/awk/test/rebt8b2.ok b/contrib/awk/test/rebt8b2.ok deleted file mode 100644 index 661109c..0000000 --- a/contrib/awk/test/rebt8b2.ok +++ /dev/null @@ -1,511 +0,0 @@ -[an\000]*n bANas ANd ANases iAN cAN - match -[an\001]*n bANas ANd ANases iAN cAN - match -[an\002]*n bANas ANd ANases iAN cAN - match -[an\003]*n bANas ANd ANases iAN cAN - match -[an\004]*n bANas ANd ANases iAN cAN - match -[an\005]*n bANas ANd ANases iAN cAN - match -[an\006]*n bANas ANd ANases iAN cAN - match -[an\007]*n bANas ANd ANases iAN cAN - match -[an\010]*n bANas ANd ANases iAN cAN - match -[an\011]*n bANas ANd ANases iAN cAN - match -[an\012]*n bANas ANd ANases iAN cAN - match -[an\013]*n bANas ANd ANases iAN cAN - match -[an\014]*n bANas ANd ANases iAN cAN - match -[an\015]*n bANas ANd ANases iAN cAN - match -[an\016]*n bANas ANd ANases iAN cAN - match -[an\017]*n bANas ANd ANases iAN cAN - match -[an\020]*n bANas ANd ANases iAN cAN - match -[an\021]*n bANas ANd ANases iAN cAN - match -[an\022]*n bANas ANd ANases iAN cAN - match -[an\023]*n bANas ANd ANases iAN cAN - match -[an\024]*n bANas ANd ANases iAN cAN - match -[an\025]*n bANas ANd ANases iAN cAN - match -[an\026]*n bANas ANd ANases iAN cAN - match -[an\027]*n bANas ANd ANases iAN cAN - match -[an\030]*n bANas ANd ANases iAN cAN - match -[an\031]*n bANas ANd ANases iAN cAN - match -[an\032]*n bANas ANd ANases iAN cAN - match -[an\033]*n bANas ANd ANases iAN cAN - match -[an\034]*n bANas ANd ANases iAN cAN - match -[an\035]*n bANas ANd ANases iAN cAN - match -[an\036]*n bANas ANd ANases iAN cAN - match -[an\037]*n bANas ANd ANases iAN cAN - match -[an\040]*n bANasANdANases iAN cAN - match -[an\041]*n bANas ANd ANases iAN cAN - match -[an\042]*n bANas ANd ANases iAN cAN - match -[an\043]*n bANas ANd ANases iAN cAN - match -[an\044]*n bANas ANd ANases iAN cAN - match -[an\045]*n bANas ANd ANases iAN cAN - match -[an\046]*n bANas ANd ANases iAN cAN - match -[an\047]*n bANas ANd ANases iAN cAN - match -[an\050]*n bANas ANd ANases iAN cAN - match -[an\051]*n bANas ANd ANases iAN cAN - match -[an\052]*n bANas ANd ANases iAN cAN - match -[an\053]*n bANas ANd ANases iAN cAN - match -[an\054]*n bANas ANd ANases iAN cAN - match -[an\055]*n bANas ANd ANases iAN cAN - match -[an\056]*n bANas ANd ANases iAN cAN - match -[an\057]*n bANas ANd ANases iAN cAN - match -[an\060]*n bANas ANd ANases iAN cAN - match -[an\061]*n bANas ANd ANases iAN cAN - match -[an\062]*n bANas ANd ANases iAN cAN - match -[an\063]*n bANas ANd ANases iAN cAN - match -[an\064]*n bANas ANd ANases iAN cAN - match -[an\065]*n bANas ANd ANases iAN cAN - match -[an\066]*n bANas ANd ANases iAN cAN - match -[an\067]*n bANas ANd ANases iAN cAN - match -[an\070]*n bANas ANd ANases iAN cAN - match -[an\071]*n bANas ANd ANases iAN cAN - match -[an\072]*n bANas ANd ANases iAN cAN - match -[an\073]*n bANas ANd ANases iAN cAN - match -[an\074]*n bANas ANd ANases iAN cAN - match -[an\075]*n bANas ANd ANases iAN cAN - match -[an\076]*n bANas ANd ANases iAN cAN - match -[an\077]*n bANas ANd ANases iAN cAN - match -[an\100]*n bANas ANd ANases iAN cAN - match -[an\101]*n bANas ANd ANases iAN cAN - match -[an\102]*n bANas ANd ANases iAN cAN - match -[an\103]*n bANas ANd ANases iAN cAN - match -[an\104]*n bANas ANd ANases iAN cAN - match -[an\105]*n bANas ANd ANases iAN cAN - match -[an\106]*n bANas ANd ANases iAN cAN - match -[an\107]*n bANas ANd ANases iAN cAN - match -[an\110]*n bANas ANd ANases iAN cAN - match -[an\111]*n bANas ANd ANases iAN cAN - match -[an\112]*n bANas ANd ANases iAN cAN - match -[an\113]*n bANas ANd ANases iAN cAN - match -[an\114]*n bANas ANd ANases iAN cAN - match -[an\115]*n bANas ANd ANases iAN cAN - match -[an\116]*n bANas ANd ANases iAN cAN - match -[an\117]*n bANas ANd ANases iAN cAN - match -[an\120]*n bANas ANd ANases iAN cAN - match -[an\121]*n bANas ANd ANases iAN cAN - match -[an\122]*n bANas ANd ANases iAN cAN - match -[an\123]*n bANas ANd ANases iAN cAN - match -[an\124]*n bANas ANd ANases iAN cAN - match -[an\125]*n bANas ANd ANases iAN cAN - match -[an\126]*n bANas ANd ANases iAN cAN - match -[an\127]*n bANas ANd ANases iAN cAN - match -[an\130]*n bANas ANd ANases iAN cAN - match -[an\131]*n bANas ANd ANases iAN cAN - match -[an\132]*n bANas ANd ANases iAN cAN - match -[an\133]*n bANas ANd ANases iAN cAN - match -(error for \134) -[an\135]*n bANANas ANd ANANases in cANaAN - match -[an\136]*n bANas ANd ANases iAN cAN - match -[an\137]*n bANas ANd ANases iAN cAN - match -[an\140]*n bANas ANd ANases iAN cAN - match -[an\141]*n bANas ANd ANases iAN cAN - match -[an\142]*n ANas ANd ANases iAN cAN - match -[an\143]*n bANas ANd ANases iAN AN - match -[an\144]*n bANas ANd ANases iAN cAN - match -[an\145]*n bANas ANd ANases iAN cAN - match -[an\146]*n bANas ANd ANases iAN cAN - match -[an\147]*n bANas ANd ANases iAN cAN - match -[an\150]*n bANas ANd ANases iAN cAN - match -[an\151]*n bANas ANd ANases AN cAN - match -[an\152]*n bANas ANd ANases iAN cAN - match -[an\153]*n bANas ANd ANases iAN cAN - match -[an\154]*n bANas ANd ANases iAN cAN - match -[an\155]*n bANas ANd ANases iAN cAN - match -[an\156]*n bANas ANd ANases iAN cAN - match -[an\157]*n bANas ANd ANases iAN cAN - match -[an\160]*n bANas ANd ANases iAN cAN - match -[an\161]*n bANas ANd ANases iAN cAN - match -[an\162]*n bANas ANd ANases iAN cAN - match -[an\163]*n bANas ANd ANases iAN cAN - match -[an\164]*n bANas ANd ANases iAN cAN - match -[an\165]*n bANas ANd ANases iAN cAN - match -[an\166]*n bANas ANd ANases iAN cAN - match -[an\167]*n bANas ANd ANases iAN cAN - match -[an\170]*n bANas ANd ANases iAN cAN - match -[an\171]*n bANas ANd ANases iAN cAN - match -[an\172]*n bANas ANd ANases iAN cAN - match -[an\173]*n bANas ANd ANases iAN cAN - match -[an\174]*n bANas ANd ANases iAN cAN - match -[an\175]*n bANas ANd ANases iAN cAN - match -[an\176]*n bANas ANd ANases iAN cAN - match -[an\177]*n bANas ANd ANases iAN cAN - match -[an\200]*n bANas ANd ANases iAN cAN - match -[an\201]*n bANas ANd ANases iAN cAN - match -[an\202]*n bANas ANd ANases iAN cAN - match -[an\203]*n bANas ANd ANases iAN cAN - match -[an\204]*n bANas ANd ANases iAN cAN - match -[an\205]*n bANas ANd ANases iAN cAN - match -[an\206]*n bANas ANd ANases iAN cAN - match -[an\207]*n bANas ANd ANases iAN cAN - match -[an\210]*n bANas ANd ANases iAN cAN - match -[an\211]*n bANas ANd ANases iAN cAN - match -[an\212]*n bANas ANd ANases iAN cAN - match -[an\213]*n bANas ANd ANases iAN cAN - match -[an\214]*n bANas ANd ANases iAN cAN - match -[an\215]*n bANas ANd ANases iAN cAN - match -[an\216]*n bANas ANd ANases iAN cAN - match -[an\217]*n bANas ANd ANases iAN cAN - match -[an\220]*n bANas ANd ANases iAN cAN - match -[an\221]*n bANas ANd ANases iAN cAN - match -[an\222]*n bANas ANd ANases iAN cAN - match -[an\223]*n bANas ANd ANases iAN cAN - match -[an\224]*n bANas ANd ANases iAN cAN - match -[an\225]*n bANas ANd ANases iAN cAN - match -[an\226]*n bANas ANd ANases iAN cAN - match -[an\227]*n bANas ANd ANases iAN cAN - match -[an\230]*n bANas ANd ANases iAN cAN - match -[an\231]*n bANas ANd ANases iAN cAN - match -[an\232]*n bANas ANd ANases iAN cAN - match -[an\233]*n bANas ANd ANases iAN cAN - match -[an\234]*n bANas ANd ANases iAN cAN - match -[an\235]*n bANas ANd ANases iAN cAN - match -[an\236]*n bANas ANd ANases iAN cAN - match -[an\237]*n bANas ANd ANases iAN cAN - match -[an\240]*n bANas ANd ANases iAN cAN - match -[an\241]*n bANas ANd ANases iAN cAN - match -[an\242]*n bANas ANd ANases iAN cAN - match -[an\243]*n bANas ANd ANases iAN cAN - match -[an\244]*n bANas ANd ANases iAN cAN - match -[an\245]*n bANas ANd ANases iAN cAN - match -[an\246]*n bANas ANd ANases iAN cAN - match -[an\247]*n bANas ANd ANases iAN cAN - match -[an\250]*n bANas ANd ANases iAN cAN - match -[an\251]*n bANas ANd ANases iAN cAN - match -[an\252]*n bANas ANd ANases iAN cAN - match -[an\253]*n bANas ANd ANases iAN cAN - match -[an\254]*n bANas ANd ANases iAN cAN - match -[an\255]*n bANas ANd ANases iAN cAN - match -[an\256]*n bANas ANd ANases iAN cAN - match -[an\257]*n bANas ANd ANases iAN cAN - match -[an\260]*n bANas ANd ANases iAN cAN - match -[an\261]*n bANas ANd ANases iAN cAN - match -[an\262]*n bANas ANd ANases iAN cAN - match -[an\263]*n bANas ANd ANases iAN cAN - match -[an\264]*n bANas ANd ANases iAN cAN - match -[an\265]*n bANas ANd ANases iAN cAN - match -[an\266]*n bANas ANd ANases iAN cAN - match -[an\267]*n bANas ANd ANases iAN cAN - match -[an\270]*n bANas ANd ANases iAN cAN - match -[an\271]*n bANas ANd ANases iAN cAN - match -[an\272]*n bANas ANd ANases iAN cAN - match -[an\273]*n bANas ANd ANases iAN cAN - match -[an\274]*n bANas ANd ANases iAN cAN - match -[an\275]*n bANas ANd ANases iAN cAN - match -[an\276]*n bANas ANd ANases iAN cAN - match -[an\277]*n bANas ANd ANases iAN cAN - match -[an\300]*n bANas ANd ANases iAN cAN - match -[an\301]*n bANas ANd ANases iAN cAN - match -[an\302]*n bANas ANd ANases iAN cAN - match -[an\303]*n bANas ANd ANases iAN cAN - match -[an\304]*n bANas ANd ANases iAN cAN - match -[an\305]*n bANas ANd ANases iAN cAN - match -[an\306]*n bANas ANd ANases iAN cAN - match -[an\307]*n bANas ANd ANases iAN cAN - match -[an\310]*n bANas ANd ANases iAN cAN - match -[an\311]*n bANas ANd ANases iAN cAN - match -[an\312]*n bANas ANd ANases iAN cAN - match -[an\313]*n bANas ANd ANases iAN cAN - match -[an\314]*n bANas ANd ANases iAN cAN - match -[an\315]*n bANas ANd ANases iAN cAN - match -[an\316]*n bANas ANd ANases iAN cAN - match -[an\317]*n bANas ANd ANases iAN cAN - match -[an\320]*n bANas ANd ANases iAN cAN - match -[an\321]*n bANas ANd ANases iAN cAN - match -[an\322]*n bANas ANd ANases iAN cAN - match -[an\323]*n bANas ANd ANases iAN cAN - match -[an\324]*n bANas ANd ANases iAN cAN - match -[an\325]*n bANas ANd ANases iAN cAN - match -[an\326]*n bANas ANd ANases iAN cAN - match -[an\327]*n bANas ANd ANases iAN cAN - match -[an\330]*n bANas ANd ANases iAN cAN - match -[an\331]*n bANas ANd ANases iAN cAN - match -[an\332]*n bANas ANd ANases iAN cAN - match -[an\333]*n bANas ANd ANases iAN cAN - match -[an\334]*n bANas ANd ANases iAN cAN - match -[an\335]*n bANas ANd ANases iAN cAN - match -[an\336]*n bANas ANd ANases iAN cAN - match -[an\337]*n bANas ANd ANases iAN cAN - match -[an\340]*n bANas ANd ANases iAN cAN - match -[an\341]*n bANas ANd ANases iAN cAN - match -[an\342]*n bANas ANd ANases iAN cAN - match -[an\343]*n bANas ANd ANases iAN cAN - match -[an\344]*n bANas ANd ANases iAN cAN - match -[an\345]*n bANas ANd ANases iAN cAN - match -[an\346]*n bANas ANd ANases iAN cAN - match -[an\347]*n bANas ANd ANases iAN cAN - match -[an\350]*n bANas ANd ANases iAN cAN - match -[an\351]*n bANas ANd ANases iAN cAN - match -[an\352]*n bANas ANd ANases iAN cAN - match -[an\353]*n bANas ANd ANases iAN cAN - match -[an\354]*n bANas ANd ANases iAN cAN - match -[an\355]*n bANas ANd ANases iAN cAN - match -[an\356]*n bANas ANd ANases iAN cAN - match -[an\357]*n bANas ANd ANases iAN cAN - match -[an\360]*n bANas ANd ANases iAN cAN - match -[an\361]*n bANas ANd ANases iAN cAN - match -[an\362]*n bANas ANd ANases iAN cAN - match -[an\363]*n bANas ANd ANases iAN cAN - match -[an\364]*n bANas ANd ANases iAN cAN - match -[an\365]*n bANas ANd ANases iAN cAN - match -[an\366]*n bANas ANd ANases iAN cAN - match -[an\367]*n bANas ANd ANases iAN cAN - match -[an\370]*n bANas ANd ANases iAN cAN - match -[an\371]*n bANas ANd ANases iAN cAN - match -[an\372]*n bANas ANd ANases iAN cAN - match -[an\373]*n bANas ANd ANases iAN cAN - match -[an\374]*n bANas ANd ANases iAN cAN - match -[an\375]*n bANas ANd ANases iAN cAN - match -[an\376]*n bANas ANd ANases iAN cAN - match -[an\377]*n bANas ANd ANases iAN cAN - match diff --git a/contrib/awk/test/redfilnm.awk b/contrib/awk/test/redfilnm.awk deleted file mode 100644 index 83cb6aa..0000000 --- a/contrib/awk/test/redfilnm.awk +++ /dev/null @@ -1,98 +0,0 @@ -#Date: Tue, 18 May 1999 12:48:07 -0500 (CDT) -#From: Darrel Hankerson -#To: arnold@gnu.org -#Subject: [christopher.procter@bt.com: RE: Getline bug in Gawk 3.0.3] -# -#Here's a reply that came directly to me. --darrel -# -# -#From: christopher.procter@bt.com -#To: hankedr@dms.auburn.edu -#Subject: RE: Getline bug in Gawk 3.0.3 -#Date: Tue, 18 May 1999 18:42:28 +0100 -# -#Sorry that was me getting carried away and cut and pasting the wrong thing -#into my email -# -#The real problem seems to be that : -#BEGIN { -#for (i=1;i<10;i++){ -# while((getline < "hello.txt")>0){ -# print $0 -# } -# close("hello.txt") -# } -#} -#works (printing the contents of hello.txt 9 times), where as:- -# -#END{ -#for (i=1;i<10;i++){ -# while((getline < "hello.txt")>0){ -# print $0 -# } -# close("hello.txt") -# } -#} -# -#doesn't, (it prints out hello.txt once followed by the iteration numbers -#from 1 to 9). -#The only difference is that one is in the BEGIN block and one in the END -#block. -# -#Sorry about the first post, I'm not a bad awk programmer, just a tired one -#:) -# -#chris -# -#> -----Original Message----- -#> From: Darrel Hankerson [SMTP:hankedr@dms.auburn.edu] -#> Sent: 18 May 1999 18:28 -#> To: christopher.procter@bt.com -#> Subject: Re: Getline bug in Gawk 3.0.3 -#> -#> Could you clarify? Your first script uses an apparently undefined -#> variable f. -#> -#> -#> christopher.procter@bt.com writes: -#> -#> BEGIN { -#> for (i=1;i<10;i++){ -#> while((getline < "hello.txt")>0){ -#> print $0 -#> } -#> close(f) -#> } -#> } -#> -#> refuses to close the file and so prints the contents of hello.txt just -#> once. -#> However:- -#> -#> BEGIN { -#> f="hello.txt" -#> for (i=1;i<10;i++){ -#> while((getline < f)>0){ -#> print $0 -#> } -#> close(f) -#> } -#> } -#> -#> works as advertised (printing the contents of hello.txt 9 times) -#> It seems like a bug in the close statement. -#> -#> -- -#> --Darrel Hankerson hankedr@mail.auburn.edu -# - -# srcdir is assigned on command line --- ADR -END { - f = srcdir "/redfilnm.in" - for (i = 1; i < 10; i++){ - while((getline < f) > 0){ - print $0 - } - close(f) - } -} diff --git a/contrib/awk/test/redfilnm.in b/contrib/awk/test/redfilnm.in deleted file mode 100644 index 4b5fa63..0000000 --- a/contrib/awk/test/redfilnm.in +++ /dev/null @@ -1 +0,0 @@ -hello, world diff --git a/contrib/awk/test/redfilnm.ok b/contrib/awk/test/redfilnm.ok deleted file mode 100644 index f9e095a..0000000 --- a/contrib/awk/test/redfilnm.ok +++ /dev/null @@ -1,9 +0,0 @@ -hello, world -hello, world -hello, world -hello, world -hello, world -hello, world -hello, world -hello, world -hello, world diff --git a/contrib/awk/test/reg/Obsolete/exp.awk b/contrib/awk/test/reg/Obsolete/exp.awk deleted file mode 100644 index 4e707f8..0000000 --- a/contrib/awk/test/reg/Obsolete/exp.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN { print exp(0), exp(1000000), exp(0.5) } diff --git a/contrib/awk/test/reg/Obsolete/exp.good b/contrib/awk/test/reg/Obsolete/exp.good deleted file mode 100644 index 07b8853..0000000 --- a/contrib/awk/test/reg/Obsolete/exp.good +++ /dev/null @@ -1,2 +0,0 @@ -1 gawk: reg/exp.awk:1: warning: exp argument 1e+06 is out of range -Inf 1.64872 diff --git a/contrib/awk/test/reg/Obsolete/exp.in b/contrib/awk/test/reg/Obsolete/exp.in deleted file mode 100644 index e69de29..0000000 diff --git a/contrib/awk/test/reg/Obsolete/log.awk b/contrib/awk/test/reg/Obsolete/log.awk deleted file mode 100644 index bcae90b..0000000 --- a/contrib/awk/test/reg/Obsolete/log.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN { print log(0), log(-1), log(100) } diff --git a/contrib/awk/test/reg/Obsolete/log.good b/contrib/awk/test/reg/Obsolete/log.good deleted file mode 100644 index 857ab77..0000000 --- a/contrib/awk/test/reg/Obsolete/log.good +++ /dev/null @@ -1,4 +0,0 @@ -log: SING error --Inf gawk: reg/log.awk:1: warning: log called with negative argument -1 -log: DOMAIN error -NaN 4.60517 diff --git a/contrib/awk/test/reg/Obsolete/log.in b/contrib/awk/test/reg/Obsolete/log.in deleted file mode 100644 index e69de29..0000000 diff --git a/contrib/awk/test/regeq.awk b/contrib/awk/test/regeq.awk deleted file mode 100644 index 0208eb2..0000000 --- a/contrib/awk/test/regeq.awk +++ /dev/null @@ -1,29 +0,0 @@ -#Date: Sat, 8 May 1999 17:42:20 +0200 -#From: Iva Cabric -#To: bug-gnu-utils@gnu.org -#Cc: arnold@gnu.org -#Subject: Problem in gawk with match -# -#Hello, -# -#gawk reports fatal error in match when first character in regexp is "=" : -# -#$ gawk '{ where = match($0, /=a/); print where}' -#gawk: cmd. line:1: { where = match($0, /=a/); print where} -#gawk: cmd. line:1: ^ parse error -#gawk: cmd. line:1: fatal: match() cannot have 0 arguments -# -#Using "\=" instead "=" works without problems : -# -#$ gawk '{ where = match($0, /\=a/); print where}' -#sdgfa -#0 -#asdfds=a -#7 -# -#Other versions of awk have no problems with "/=/" (except oawk on SunOS). -# -#-- -# @ -# -{ where = match($0, /=a/); print where} diff --git a/contrib/awk/test/regeq.in b/contrib/awk/test/regeq.in deleted file mode 100644 index 2428df3..0000000 --- a/contrib/awk/test/regeq.in +++ /dev/null @@ -1,2 +0,0 @@ -sdgfa -asdfds=a diff --git a/contrib/awk/test/regeq.ok b/contrib/awk/test/regeq.ok deleted file mode 100644 index 4596f88..0000000 --- a/contrib/awk/test/regeq.ok +++ /dev/null @@ -1,2 +0,0 @@ -0 -7 diff --git a/contrib/awk/test/regtest.sh b/contrib/awk/test/regtest.sh deleted file mode 100755 index 72b0dbf..0000000 --- a/contrib/awk/test/regtest.sh +++ /dev/null @@ -1,18 +0,0 @@ -#! /bin/sh - -case "$AWK" in -"") AWK=../gawk ;; -esac -#AWK=${AWK:-../gawk} - -for i in reg/*.awk -do - it=`basename $i .awk` - $AWK -f $i reg/$it.out 2>&1 - if cmp -s reg/$it.out reg/$it.good - then - rm -f reg/$it.out - else - echo "regtest: $it fails" - fi -done diff --git a/contrib/awk/test/regx8bit.awk b/contrib/awk/test/regx8bit.awk deleted file mode 100644 index 5ecd7eb..0000000 --- a/contrib/awk/test/regx8bit.awk +++ /dev/null @@ -1,26 +0,0 @@ -# The full test will only work in a Swedish localte -# Try things that should work across the board -# BEGIN { -# s = "så är det" -# print match(s,/\yså\y/), s ~ /\yså\y/, "å" ~ /\w/ -# } -BEGIN { - printf "\"å\" = %c\n", "å" - printf "\"ä\" = %c\n", "ä" - s = "så är det" - printf "s = \"%s\"\n", s - printf "match(s,/\\yså/) = %d\n", match(s, /\yså/) -# printf "match(s,/så\\y/) = %d\n", match(s, /så\y/) -# printf "match(s,/\\yså\\y/) = %d\n", match(s, /\yså\y/) - printf "s ~ /å/ = %d\n", s ~ /å/ - printf "s ~ /så/ = %d\n", s ~ /så/ - printf "s ~ /\\yså/ = %d\n", s ~ /\yså/ -# printf "s ~ /så\\y/ = %d\n", s ~ /så\y/ -# printf "s ~ /\\yså\\y/ = %d\n", s ~ /\yså\y/ -# printf "\"å\" ~ /\\w/ = %d\n", "å" ~ /\w/ -# printf "\"ä\" ~ /\\w/ = %d\n", "ä" ~ /\w/ -# printf "\"å\" ~ /\\yä\\y/ = %d\n", "å" ~ /\yå\y/ -# printf "\"ä\" ~ /\\yä\\y/ = %d\n", "ä" ~ /\yä\y/ -# printf "\"å\" ~ /[[:alpha:]]/ = %d\n", "å" ~ /[[:alpha:]]/ -# printf "\"ä\" ~ /[[:alpha:]]/ = %d\n", "ä" ~ /[[:alpha:]]/ -} diff --git a/contrib/awk/test/regx8bit.ok b/contrib/awk/test/regx8bit.ok deleted file mode 100644 index 76e1c0b..0000000 --- a/contrib/awk/test/regx8bit.ok +++ /dev/null @@ -1,7 +0,0 @@ -"å" = å -"ä" = ä -s = "så är det" -match(s,/\yså/) = 1 -s ~ /å/ = 1 -s ~ /så/ = 1 -s ~ /\yså/ = 1 diff --git a/contrib/awk/test/rsnul1nl.awk b/contrib/awk/test/rsnul1nl.awk deleted file mode 100644 index d8da7a6..0000000 --- a/contrib/awk/test/rsnul1nl.awk +++ /dev/null @@ -1,2 +0,0 @@ -BEGIN { RS = "" } -{ print } diff --git a/contrib/awk/test/rsnul1nl.in b/contrib/awk/test/rsnul1nl.in deleted file mode 100644 index 76de96f..0000000 --- a/contrib/awk/test/rsnul1nl.in +++ /dev/null @@ -1,3 +0,0 @@ - -This is... -the first record. diff --git a/contrib/awk/test/rsnul1nl.ok b/contrib/awk/test/rsnul1nl.ok deleted file mode 100644 index 7ce0957..0000000 --- a/contrib/awk/test/rsnul1nl.ok +++ /dev/null @@ -1,2 +0,0 @@ -This is... -the first record. diff --git a/contrib/awk/test/shadow.awk b/contrib/awk/test/shadow.awk deleted file mode 100644 index 9a58720..0000000 --- a/contrib/awk/test/shadow.awk +++ /dev/null @@ -1,21 +0,0 @@ -function foo() -{ - print "foo" -} - -function bar(A, Z, q) -{ - print "bar" -} - -function baz(C, D) -{ - print "baz" -} - -BEGIN { - A = C = D = Z = y = 1 - foo() - bar() - baz() -} diff --git a/contrib/awk/test/shadow.ok b/contrib/awk/test/shadow.ok deleted file mode 100644 index f543a88..0000000 --- a/contrib/awk/test/shadow.ok +++ /dev/null @@ -1,7 +0,0 @@ -gawk: shadow.awk:22: warning: function `bar': parameter `A' shadows global variable -gawk: shadow.awk:22: warning: function `bar': parameter `Z' shadows global variable -gawk: shadow.awk:22: warning: function `baz': parameter `C' shadows global variable -gawk: shadow.awk:22: warning: function `baz': parameter `D' shadows global variable -foo -bar -baz diff --git a/contrib/awk/test/sort1.awk b/contrib/awk/test/sort1.awk deleted file mode 100644 index 3800c40..0000000 --- a/contrib/awk/test/sort1.awk +++ /dev/null @@ -1,31 +0,0 @@ -BEGIN{ - a[1] = "barz"; - a[2] = "blattt"; - a[3] = "Zebra"; - a[4] = 1234; - - testit1(a) - - delete a - - a[1] = "barz"; - a[2] = "blattt"; - a[3] = "Zebra"; - a[4] = 1234; - - n = asort(a, b); - - print "N = ", n; - - for(i=1; i <= n; i++) - print i, a[i], b[i]; -} - -function testit1(a, count, j) -{ - print "start testit" - count = asort(a) - for (j = 1; j <= count; j++) - print j, a[j] - print "end testit" -} diff --git a/contrib/awk/test/sort1.ok b/contrib/awk/test/sort1.ok deleted file mode 100644 index 4838dab..0000000 --- a/contrib/awk/test/sort1.ok +++ /dev/null @@ -1,11 +0,0 @@ -start testit -1 1234 -2 Zebra -3 barz -4 blattt -end testit -N = 4 -1 barz 1234 -2 blattt Zebra -3 Zebra barz -4 1234 blattt diff --git a/contrib/awk/test/splitdef.awk b/contrib/awk/test/splitdef.awk deleted file mode 100644 index 694db80..0000000 --- a/contrib/awk/test/splitdef.awk +++ /dev/null @@ -1,7 +0,0 @@ -BEGIN { - data = "abc:easy:as:one:two:three" - FS = ":" - FIELDWIDTHS = "3 1 4 1 2 1 3 1 3 1 5" - n = split(data, a) - printf "n = %d, a[3] = %s\n", n, a[3] -} diff --git a/contrib/awk/test/splitdef.ok b/contrib/awk/test/splitdef.ok deleted file mode 100644 index 5f13505..0000000 --- a/contrib/awk/test/splitdef.ok +++ /dev/null @@ -1 +0,0 @@ -n = 6, a[3] = as diff --git a/contrib/awk/test/strftime.awk b/contrib/awk/test/strftime.awk deleted file mode 100644 index 8c1f401..0000000 --- a/contrib/awk/test/strftime.awk +++ /dev/null @@ -1,15 +0,0 @@ -# strftime.awk ; test the strftime code -# -# input is the output of `date', see Makefile.in -# -# The mucking about with $0 and $N is to avoid problems -# on cygwin, where the timezone field is empty and there -# are two consecutive blanks. - -{ - $3 = sprintf("%02d", $3 + 0) - print > "strftime.ok" - $0 = strftime() - $NF = $NF - print > OUTPUT -} diff --git a/contrib/awk/test/strtod.awk b/contrib/awk/test/strtod.awk deleted file mode 100644 index 27df8a4..0000000 --- a/contrib/awk/test/strtod.awk +++ /dev/null @@ -1 +0,0 @@ -{ x = "0x" $1 ; print x, x + 0 } diff --git a/contrib/awk/test/strtod.in b/contrib/awk/test/strtod.in deleted file mode 100644 index 51b4008..0000000 --- a/contrib/awk/test/strtod.in +++ /dev/null @@ -1 +0,0 @@ -345 diff --git a/contrib/awk/test/strtod.ok b/contrib/awk/test/strtod.ok deleted file mode 100644 index bedd0f2..0000000 --- a/contrib/awk/test/strtod.ok +++ /dev/null @@ -1 +0,0 @@ -0x345 0 diff --git a/contrib/awk/test/subslash.awk b/contrib/awk/test/subslash.awk deleted file mode 100644 index 87ab029..0000000 --- a/contrib/awk/test/subslash.awk +++ /dev/null @@ -1,6 +0,0 @@ -BEGIN { - i = 2 - a[i] = 5 - a[i] /= 2 - printf "a[%s] = %f\n", i, a[i] -} diff --git a/contrib/awk/test/subslash.ok b/contrib/awk/test/subslash.ok deleted file mode 100644 index 4f3beff..0000000 --- a/contrib/awk/test/subslash.ok +++ /dev/null @@ -1 +0,0 @@ -a[2] = 2.500000 diff --git a/contrib/awk/test/zeroflag.awk b/contrib/awk/test/zeroflag.awk deleted file mode 100644 index 526ed0e..0000000 --- a/contrib/awk/test/zeroflag.awk +++ /dev/null @@ -1 +0,0 @@ -BEGIN { printf("%2.1d---%02.1d\n", 2, 2) } diff --git a/contrib/awk/test/zeroflag.ok b/contrib/awk/test/zeroflag.ok deleted file mode 100644 index 937c0ed..0000000 --- a/contrib/awk/test/zeroflag.ok +++ /dev/null @@ -1 +0,0 @@ - 2--- 2 diff --git a/contrib/awk/version.c b/contrib/awk/version.c deleted file mode 100644 index 57a9abd..0000000 --- a/contrib/awk/version.c +++ /dev/null @@ -1,53 +0,0 @@ -char *version_string = "@(#)GNU Awk 3.1"; - -/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead - of the Right Hand Side */ - -/* 1.03 Fixed split() to treat strings of space and tab as FS if - the split char is ' '. - - Added -v option to print version number - - Fixed bug that caused rounding when printing large numbers */ - -/* 2.00beta Incorporated the functionality of the "new" awk as described - the book (reference not handy). Extensively tested, but no - doubt still buggy. Badly needs tuning and cleanup, in - particular in memory management which is currently almost - non-existent. */ - -/* 2.01 JF: Modified to compile under GCC, and fixed a few - bugs while I was at it. I hope I didn't add any more. - I modified parse.y to reduce the number of reduce/reduce - conflicts. There are still a few left. */ - -/* 2.02 Fixed JF's bugs; improved memory management, still needs - lots of work. */ - -/* 2.10 Major grammar rework and lots of bug fixes from David. - Major changes for performance enhancements from David. - A number of minor bug fixes and new features from Arnold. - Changes for MSDOS from Conrad Kwok and Scott Garfinkle. - The gawk.texinfo and info files included! */ - -/* 2.11 Bug fix release to 2.10. Lots of changes for portability, - speed, and configurability. */ - -/* 2.12 Lots of changes for portability, speed, and configurability. - Several bugs fixed. POSIX compliance. Removal of last set - of hard-wired limits. Atari and VMS ports added. */ - -/* 2.13 Public release of 2.12 */ - -/* 2.14 Mostly bug fixes. */ - -/* 2.15 Bug fixes plus intermixing of command-line source and files, - GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files. - `delete array'. OS/2 port added. */ - -/* 3.0 RS as regexp, RT variable, FS = "", fflush builtin, posix - regexps, IGNORECASE applies to all comparison, autoconf, source - code cleanup. See the NEWS file. */ - -/* 3.1 PROCINFO array, LINT variable, mktime builtin, BINMODE variable, - |&, tcp/ip, i18n stuff. Automake. See NEWS. */ diff --git a/contrib/awk/version.in b/contrib/awk/version.in deleted file mode 100644 index e17195b..0000000 --- a/contrib/awk/version.in +++ /dev/null @@ -1,53 +0,0 @@ -char *version_string = "@(#)GNU Awk X.Y"; - -/* 1.02 fixed /= += *= etc to return the new Left Hand Side instead - of the Right Hand Side */ - -/* 1.03 Fixed split() to treat strings of space and tab as FS if - the split char is ' '. - - Added -v option to print version number - - Fixed bug that caused rounding when printing large numbers */ - -/* 2.00beta Incorporated the functionality of the "new" awk as described - the book (reference not handy). Extensively tested, but no - doubt still buggy. Badly needs tuning and cleanup, in - particular in memory management which is currently almost - non-existent. */ - -/* 2.01 JF: Modified to compile under GCC, and fixed a few - bugs while I was at it. I hope I didn't add any more. - I modified parse.y to reduce the number of reduce/reduce - conflicts. There are still a few left. */ - -/* 2.02 Fixed JF's bugs; improved memory management, still needs - lots of work. */ - -/* 2.10 Major grammar rework and lots of bug fixes from David. - Major changes for performance enhancements from David. - A number of minor bug fixes and new features from Arnold. - Changes for MSDOS from Conrad Kwok and Scott Garfinkle. - The gawk.texinfo and info files included! */ - -/* 2.11 Bug fix release to 2.10. Lots of changes for portability, - speed, and configurability. */ - -/* 2.12 Lots of changes for portability, speed, and configurability. - Several bugs fixed. POSIX compliance. Removal of last set - of hard-wired limits. Atari and VMS ports added. */ - -/* 2.13 Public release of 2.12 */ - -/* 2.14 Mostly bug fixes. */ - -/* 2.15 Bug fixes plus intermixing of command-line source and files, - GNU long options, ARGIND, ERRNO and Plan 9 style /dev/ files. - `delete array'. OS/2 port added. */ - -/* 3.0 RS as regexp, RT variable, FS = "", fflush builtin, posix - regexps, IGNORECASE applies to all comparison, autoconf, source - code cleanup. See the NEWS file. */ - -/* 3.1 PROCINFO array, LINT variable, mktime builtin, BINMODE variable, - |&, tcp/ip, i18n stuff. Automake. See NEWS. */ -- cgit v1.1