Updated GNU utilities

author: jkh <jkh@FreeBSD.org> 1993-06-18 04:22:21 +0000
committer: jkh <jkh@FreeBSD.org> 1993-06-18 04:22:21 +0000
commit: 1109bdc96fbdae2166ae15bbc363921d1e002ee4 (patch)
tree: 0c9aba9caf0bf15d2ca4ba338fbc8c130fbb9797 /gnu/usr.bin/awk
download: FreeBSD-src-1109bdc96fbdae2166ae15bbc363921d1e002ee4.zip
FreeBSD-src-1109bdc96fbdae2166ae15bbc363921d1e002ee4.tar.gz
3 files changed, 14384 insertions, 0 deletions
diff --git a/gnu/usr.bin/awk/gawk.texi b/gnu/usr.bin/awk/gawk.texi
new file mode 100644
index 0000000..b280262
--- /dev/null
+++ b/gnu/usr.bin/awk/gawk.texi
@@ -0,0 +1,11270 @@
+\input texinfo   @c -*-texinfo-*-
+@c %**start of header (This is for running Texinfo on a region.)
+@setfilename gawk.info
+@settitle The GAWK Manual
+@c @smallbook
+@c %**end of header (This is for running Texinfo on a region.)
+
+@ifinfo
+@synindex fn cp
+@synindex vr cp
+@end ifinfo
+@iftex
+@syncodeindex fn cp
+@syncodeindex vr cp
+@end iftex
+
+@c If "finalout" is commented out, the printed output will show
+@c black boxes that mark lines that are too long.  Thus, it is
+@c unwise to comment it out when running a master in case there are
+@c overfulls which are deemed okay.
+
+@iftex
+@finalout
+@end iftex
+
+@c ===> NOTE! <==
+@c Determine the edition number in *four* places by hand:
+@c   1. First ifinfo section  2. title page  3. copyright page 4. top node
+@c To find the locations, search for !!set
+
+@ifinfo
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+This is Edition 0.15 of @cite{The GAWK Manual}, @*
+for the 2.15 version of the GNU implementation @*
+of AWK.
+
+Copyright (C) 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end ifinfo
+
+@setchapternewpage odd
+
+@c !!set edition, date, version
+@titlepage
+@title The GAWK Manual
+@subtitle Edition 0.15
+@subtitle April 1993
+@author Diane Barlow Close
+@author Arnold D. Robbins
+@author Paul H. Rubin
+@author Richard Stallman
+
+@c Include the Distribution inside the titlepage environment so
+@c that headings are turned off.  Headings on and off do not work.
+
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1989, 1991, 1992, 1993 Free Software Foundation, Inc.
+@sp 2
+        
+@c !!set edition, date, version
+This is Edition 0.15 of @cite{The GAWK Manual}, @*
+for the 2.15 version of the GNU implementation @*
+of AWK.
+
+@sp 2
+Published by the Free Software Foundation @*
+675 Massachusetts Avenue @*
+Cambridge, MA 02139 USA @*
+Printed copies are available for $20 each.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end titlepage
+
+@ifinfo
+@node Top, Preface, (dir), (dir)
+@comment  node-name,  next,  previous,  up
+@top General Introduction
+@c Preface or Licensing nodes should come right after the Top
+@c node, in `unnumbered' sections, then the chapter, `What is gawk'.
+
+This file documents @code{awk}, a program that you can use to select
+particular records in a file and perform operations upon them.
+
+@c !!set edition, date, version
+This is Edition 0.15 of @cite{The GAWK Manual}, @*
+for the 2.15 version of the GNU implementation @*
+of AWK.
+
+@end ifinfo
+
+@menu
+* Preface::                     What you can do with @code{awk}; brief history
+                                and acknowledgements.
+* Copying::                     Your right to copy and distribute @code{gawk}.
+* This Manual::                 Using this manual. 
+                                Includes sample input files that you can use.
+* Getting Started::             A basic introduction to using @code{awk}.
+                                How to run an @code{awk} program.  
+                                Command line syntax.
+* Reading Files::               How to read files and manipulate fields.
+* Printing::                    How to print using @code{awk}.  Describes the
+                                @code{print} and @code{printf} statements.  
+                                Also describes redirection of output.
+* One-liners::                  Short, sample @code{awk} programs.
+* Patterns::                    The various types of patterns 
+                                explained in detail.
+* Actions::                     The various types of actions are
+                                introduced here.  Describes
+                                expressions and the various operators in
+                                detail.  Also describes comparison expressions.
+* Expressions::                 Expressions are the basic building
+                                blocks of statements.
+* Statements::                  The various control statements are 
+                                described in detail.
+* Arrays::                      The description and use of arrays.  
+                                Also includes array-oriented control 
+                                statements.
+* Built-in::                    The built-in functions are summarized here.
+* User-defined::                User-defined functions are described in detail.
+* Built-in Variables::          Built-in Variables
+* Command Line::                How to run @code{gawk}.
+* Language History::            The evolution of the @code{awk} language.
+* Installation::                Installing @code{gawk} under 
+                                various operating systems.
+* Gawk Summary::                @code{gawk} Options and Language Summary.
+* Sample Program::              A sample @code{awk} program with a 
+                                complete explanation.
+* Bugs::                        Reporting Problems and Bugs.
+* Notes::                       Something about the 
+                                implementation of @code{gawk}.
+* Glossary::                    An explanation of some unfamiliar terms.
+* Index::                       
+@end menu
+
+@node Preface, Copying, Top, Top
+@comment  node-name,  next,  previous,  up
+@unnumbered Preface
+
+@iftex
+@cindex what is @code{awk}
+@end iftex
+If you are like many computer users, you would frequently like to make
+changes in various text files wherever certain patterns appear, or
+extract data from parts of certain lines while discarding the rest.  To
+write a program to do this in a language such as C or Pascal is a
+time-consuming inconvenience that may take many lines of code.  The job
+may be easier with @code{awk}.
+
+The @code{awk} utility interprets a special-purpose programming language
+that makes it possible to handle simple data-reformatting jobs easily
+with just a few lines of code.
+
+The GNU implementation of @code{awk} is called @code{gawk}; it is fully
+upward compatible with the System V Release 4 version of
+@code{awk}.  @code{gawk} is also upward compatible with the @sc{posix}
+(draft) specification of the @code{awk} language.  This means that all
+properly written @code{awk} programs should work with @code{gawk}.
+Thus, we usually don't distinguish between @code{gawk} and other @code{awk}
+implementations in this manual.@refill
+
+@cindex uses of @code{awk}
+This manual teaches you what @code{awk} does and how you can use
+@code{awk} effectively.  You should already be familiar with basic
+system commands such as @code{ls}.  Using @code{awk} you can: @refill
+
+@itemize @bullet
+@item
+manage small, personal databases
+
+@item
+generate reports
+
+@item
+validate data
+@item
+produce indexes, and perform other document preparation tasks
+
+@item
+even experiment with algorithms that can be adapted later to other computer
+languages
+@end itemize
+
+@iftex
+This manual has the difficult task of being both tutorial and reference.
+If you are a novice, feel free to skip over details that seem too complex.
+You should also ignore the many cross references; they are for the
+expert user, and for the on-line Info version of the manual.
+@end iftex
+
+@menu
+* History::                     The history of @code{gawk} and
+                                @code{awk}.  Acknowledgements.
+@end menu
+
+@node History,  , Preface, Preface
+@comment  node-name,  next,  previous,  up
+@unnumberedsec History of @code{awk} and @code{gawk}
+
+@cindex acronym
+@cindex history of @code{awk}
+The name @code{awk} comes from the initials of its designers: Alfred V.
+Aho, Peter J. Weinberger, and Brian W. Kernighan.  The original version of
+@code{awk} was written in 1977.  In 1985 a new version made the programming
+language more powerful, introducing user-defined functions, multiple input
+streams, and computed regular expressions.
+This new version became generally available with System V Release 3.1.
+The version in System V Release 4 added some new features and also cleaned
+up the behavior in some of the ``dark corners'' of the language.
+The specification for @code{awk} in the @sc{posix} Command Language
+and Utilities standard further clarified the language based on feedback
+from both the @code{gawk} designers, and the original @code{awk}
+designers.@refill
+
+The GNU implementation, @code{gawk}, was written in 1986 by Paul Rubin
+and Jay Fenlason, with advice from Richard Stallman.  John Woods
+contributed parts of the code as well.  In 1988 and 1989, David Trueman, with
+help from Arnold Robbins, thoroughly reworked @code{gawk} for compatibility
+with the newer @code{awk}.  Current development (1992) focuses on bug fixes,
+performance improvements, and standards compliance.
+
+We need to thank many people for their assistance in producing this
+manual.  Jay Fenlason contributed many ideas and sample programs.  Richard
+Mlynarik and Robert J. Chassell gave helpful comments on early drafts of this
+manual.  The paper @cite{A Supplemental Document for @code{awk}} by John W.
+Pierce of the Chemistry Department at UC San Diego, pinpointed several
+issues relevant both to @code{awk} implementation and to this manual, that
+would otherwise have escaped us.  David Trueman, Pat Rankin, and Michal
+Jaegermann also contributed sections of the manual.@refill
+
+The following people provided many helpful comments on this edition of
+the manual: Rick Adams, Michael Brennan, Rich Burridge, Diane Close,
+Christopher (``Topher'') Eliot, Michael Lijewski, Pat Rankin, Miriam Robbins,
+and Michal Jaegermann.  Robert J. Chassell provided much valuable advice on
+the use of Texinfo.
+
+Finally, we would like to thank Brian Kernighan of Bell Labs for invaluable
+assistance during the testing and debugging of @code{gawk}, and for
+help in clarifying numerous points about the language.@refill
+
+@node Copying, This Manual, Preface, Top
+@unnumbered GNU GENERAL PUBLIC LICENSE
+@center Version 2, June 1991
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+675 Mass Ave, Cambridge, MA 02139, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@c fakenode --- for prepinfo
+@unnumberedsec Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@c fakenode --- for prepinfo
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term ``modification''.)  Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License.  (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code.  (This alternative is
+allowed only for noncommercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@c fakenode --- for prepinfo
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@c fakenode --- for prepinfo
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@c fakenode --- for prepinfo
+@unnumberedsec How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and a brief idea of what it does.}
+Copyright (C) 19@var{yy}  @var{name of author}
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details 
+type `show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type `show c' for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License.  Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary.  Here is a sample; alter the names:
+
+@smallexample
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+`Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end smallexample
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+@node This Manual, Getting Started, Copying, Top
+@chapter Using this Manual
+@cindex manual, using this
+@cindex using this manual
+@cindex language, @code{awk}
+@cindex program, @code{awk}
+@cindex @code{awk} language
+@cindex @code{awk} program
+
+The term @code{awk} refers to a particular program, and to the language you
+use to tell this program what to do.  When we need to be careful, we call
+the program ``the @code{awk} utility'' and the language ``the @code{awk}
+language.''  The term @code{gawk} refers to a version of @code{awk} developed
+as part the GNU project.  The purpose of this manual is to explain
+both the
+@code{awk} language and how to run the @code{awk} utility.@refill
+
+While concentrating on the features of @code{gawk}, the manual will also
+attempt to describe important differences between @code{gawk} and other
+@code{awk} implementations.  In particular, any features that are not
+in the @sc{posix} standard for @code{awk} will be noted.  @refill
+
+The term @dfn{@code{awk} program} refers to a program written by you in
+the @code{awk} programming language.@refill
+
+@xref{Getting Started, ,Getting Started with @code{awk}}, for the bare
+essentials you need to know to start using @code{awk}.  
+
+Some useful ``one-liners'' are included to give you a feel for the
+@code{awk} language (@pxref{One-liners, ,Useful ``One-liners''}).
+
+@ignore
+@strong{I deleted four paragraphs here because they would confuse the
+beginner more than help him.  They mention terms such as ``field,''
+``pattern,'' ``action,'' ``built-in function'' which the beginner
+doesn't know.}
+
+@strong{If you can find a way to introduce several of these concepts here,
+enough to give the reader a map of what is to follow, that might
+be useful.  I'm not sure that can be done without taking up more
+space than ought to be used here.  There may be no way to win.}
+
+@strong{ADR: I'd like to tackle this in phase 2 of my editing.}
+@end ignore
+
+A sample @code{awk} program has been provided for you
+(@pxref{Sample Program}).@refill
+
+If you find terms that you aren't familiar with, try looking them
+up in the glossary (@pxref{Glossary}).@refill
+
+The entire @code{awk} language is summarized for quick reference in
+@ref{Gawk Summary, ,@code{gawk} Summary}.  Look there if you just need
+to refresh your memory about a particular feature.@refill
+
+Most of the time complete @code{awk} programs are used as examples, but in
+some of the more advanced sections, only the part of the @code{awk} program
+that illustrates the concept being described is shown.@refill
+
+@menu
+* Sample Data Files::           Sample data files for use in the @code{awk} 
+                                programs illustrated in this manual.
+@end menu
+
+@node Sample Data Files,  , This Manual, This Manual
+@section Data Files for the Examples
+
+@cindex input file, sample
+@cindex sample input file
+@cindex @file{BBS-list} file
+Many of the examples in this manual take their input from two sample
+data files.  The first, called @file{BBS-list}, represents a list of
+computer bulletin board systems together with information about those systems.
+The second data file, called @file{inventory-shipped}, contains
+information about shipments on a monthly basis.  Each line of these
+files is one @dfn{record}.
+
+In the file @file{BBS-list}, each record contains the name of a computer
+bulletin board, its phone number, the board's baud rate, and a code for
+the number of hours it is operational.  An @samp{A} in the last column
+means the board operates 24 hours a day.  A @samp{B} in the last
+column means the board operates evening and weekend hours, only.  A
+@samp{C} means the board operates only on weekends.
+
+@example
+aardvark     555-5553     1200/300          B
+alpo-net     555-3412     2400/1200/300     A
+barfly       555-7685     1200/300          A
+bites        555-1675     2400/1200/300     A
+camelot      555-0542     300               C
+core         555-2912     1200/300          C
+fooey        555-1234     2400/1200/300     B
+foot         555-6699     1200/300          B
+macfoo       555-6480     1200/300          A
+sdace        555-3430     2400/1200/300     A
+sabafoo      555-2127     1200/300          C
+@end example
+
+@cindex @file{inventory-shipped} file
+The second data file, called @file{inventory-shipped}, represents
+information about shipments during the year.  
+Each record contains the month of the year, the number
+of green crates shipped, the number of red boxes shipped, the number of
+orange bags shipped, and the number of blue packages shipped,
+respectively.  There are 16 entries, covering the 12 months of one year
+and 4 months of the next year.@refill
+
+@example
+Jan  13  25  15 115
+Feb  15  32  24 226
+Mar  15  24  34 228
+Apr  31  52  63 420
+May  16  34  29 208
+Jun  31  42  75 492
+Jul  24  34  67 436
+Aug  15  34  47 316
+Sep  13  55  37 277
+Oct  29  54  68 525
+Nov  20  87  82 577
+Dec  17  35  61 401
+
+Jan  21  36  64 620
+Feb  26  58  80 652
+Mar  24  75  70 495
+Apr  21  70  74 514
+@end example
+
+@ifinfo
+If you are reading this in GNU Emacs using Info, you can copy the regions
+of text showing these sample files into your own test files.  This way you
+can try out the examples shown in the remainder of this document.  You do
+this by using the command @kbd{M-x write-region} to copy text from the Info
+file into a file for use with @code{awk}
+(@xref{Misc File Ops, , , emacs, GNU Emacs Manual},
+for more information).  Using this information, create your own
+@file{BBS-list} and @file{inventory-shipped} files, and practice what you
+learn in this manual.
+@end ifinfo
+
+@node Getting Started, Reading Files, This Manual, Top
+@chapter Getting Started with @code{awk}
+@cindex script, definition of
+@cindex rule, definition of
+@cindex program, definition of
+@cindex basic function of @code{gawk}
+
+The basic function of @code{awk} is to search files for lines (or other
+units of text) that contain certain patterns.  When a line matches one
+of the patterns, @code{awk} performs specified actions on that line.
+@code{awk} keeps processing input lines in this way until the end of the
+input file is reached.@refill
+
+When you run @code{awk}, you specify an @code{awk} @dfn{program} which
+tells @code{awk} what to do.  The program consists of a series of
+@dfn{rules}.  (It may also contain @dfn{function definitions}, but that
+is an advanced feature, so we will ignore it for now.
+@xref{User-defined, ,User-defined Functions}.)  Each rule specifies one
+pattern to search for, and one action to perform when that pattern is found.
+
+Syntactically, a rule consists of a pattern followed by an action.  The
+action is enclosed in curly braces to separate it from the pattern.
+Rules are usually separated by newlines.  Therefore, an @code{awk}
+program looks like this:
+
+@example
+@var{pattern} @{ @var{action} @}
+@var{pattern} @{ @var{action} @}
+@dots{}
+@end example
+
+@menu
+* Very Simple::                 A very simple example.
+* Two Rules::                   A less simple one-line example with two rules.
+* More Complex::                A more complex example.
+* Running gawk::                How to run @code{gawk} programs; 
+                                includes command line syntax.
+* Comments::                    Adding documentation to @code{gawk} programs.
+* Statements/Lines::            Subdividing or combining statements into lines.
+* When::                        When to use @code{gawk} and 
+                                when to use other things.
+@end menu
+
+@node Very Simple, Two Rules, Getting Started, Getting Started
+@section A Very Simple Example
+
+@cindex @samp{print $0}
+The following command runs a simple @code{awk} program that searches the
+input file @file{BBS-list} for the string of characters: @samp{foo}.  (A
+string of characters is usually called, a @dfn{string}.
+The term @dfn{string} is perhaps based on similar usage in English, such
+as ``a string of pearls,'' or, ``a string of cars in a train.'')
+
+@example
+awk '/foo/ @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+When lines containing @samp{foo} are found, they are printed, because
+@w{@samp{print $0}} means print the current line.  (Just @samp{print} by
+itself means the same thing, so we could have written that
+instead.)
+
+You will notice that slashes, @samp{/}, surround the string @samp{foo}
+in the actual @code{awk} program.  The slashes indicate that @samp{foo}
+is a pattern to search for.  This type of pattern is called a
+@dfn{regular expression}, and is covered in more detail later
+(@pxref{Regexp, ,Regular Expressions as Patterns}).  There are
+single-quotes around the @code{awk} program so that the shell won't
+interpret any of it as special shell characters.@refill
+
+Here is what this program prints:
+
+@example
+@group
+fooey        555-1234     2400/1200/300     B
+foot         555-6699     1200/300          B
+macfoo       555-6480     1200/300          A
+sabafoo      555-2127     1200/300          C
+@end group
+@end example
+
+@cindex action, default
+@cindex pattern, default
+@cindex default action
+@cindex default pattern
+In an @code{awk} rule, either the pattern or the action can be omitted,
+but not both.  If the pattern is omitted, then the action is performed
+for @emph{every} input line.  If the action is omitted, the default
+action is to print all lines that match the pattern.
+
+Thus, we could leave out the action (the @code{print} statement and the curly
+braces) in the above example, and the result would be the same: all
+lines matching the pattern @samp{foo} would be printed.  By comparison,
+omitting the @code{print} statement but retaining the curly braces makes an
+empty action that does nothing; then no lines would be printed.
+
+@node Two Rules, More Complex, Very Simple, Getting Started
+@section An Example with Two Rules
+@cindex how @code{awk} works
+
+The @code{awk} utility reads the input files one line at a
+time.  For each line, @code{awk} tries the patterns of each of the rules.
+If several patterns match then several actions are run, in the order in
+which they appear in the @code{awk} program.  If no patterns match, then
+no actions are run.
+
+After processing all the rules (perhaps none) that match the line,
+@code{awk} reads the next line (however,
+@pxref{Next Statement, ,The @code{next} Statement}).  This continues
+until the end of the file is reached.@refill
+
+For example, the @code{awk} program:
+
+@example
+/12/  @{ print $0 @}
+/21/  @{ print $0 @}
+@end example
+
+@noindent
+contains two rules.  The first rule has the string @samp{12} as the
+pattern and @samp{print $0} as the action.  The second rule has the
+string @samp{21} as the pattern and also has @samp{print $0} as the
+action.  Each rule's action is enclosed in its own pair of braces.
+
+This @code{awk} program prints every line that contains the string
+@samp{12} @emph{or} the string @samp{21}.  If a line contains both
+strings, it is printed twice, once by each rule.
+
+If we run this program on our two sample data files, @file{BBS-list} and
+@file{inventory-shipped}, as shown here:
+
+@example
+awk '/12/ @{ print $0 @}
+     /21/ @{ print $0 @}' BBS-list inventory-shipped
+@end example
+
+@noindent
+we get the following output:
+
+@example
+aardvark     555-5553     1200/300          B
+alpo-net     555-3412     2400/1200/300     A
+barfly       555-7685     1200/300          A
+bites        555-1675     2400/1200/300     A
+core         555-2912     1200/300          C
+fooey        555-1234     2400/1200/300     B
+foot         555-6699     1200/300          B
+macfoo       555-6480     1200/300          A
+sdace        555-3430     2400/1200/300     A
+sabafoo      555-2127     1200/300          C
+sabafoo      555-2127     1200/300          C
+Jan  21  36  64 620
+Apr  21  70  74 514
+@end example
+
+@noindent
+Note how the line in @file{BBS-list} beginning with @samp{sabafoo}
+was printed twice, once for each rule.
+
+@node More Complex, Running gawk, Two Rules, Getting Started
+@comment  node-name,  next,  previous,  up
+@section A More Complex Example
+
+Here is an example to give you an idea of what typical @code{awk}
+programs do.  This example shows how @code{awk} can be used to
+summarize, select, and rearrange the output of another utility.  It uses
+features that haven't been covered yet, so don't worry if you don't
+understand all the details.
+
+@example
+ls -l | awk '$5 == "Nov" @{ sum += $4 @}
+             END @{ print sum @}'
+@end example
+
+This command prints the total number of bytes in all the files in the
+current directory that were last modified in November (of any year).
+(In the C shell you would need to type a semicolon and then a backslash
+at the end of the first line; in a @sc{posix}-compliant shell, such as the
+Bourne shell or the Bourne-Again shell, you can type the example as shown.)
+
+The @w{@samp{ls -l}} part of this example is a command that gives you a 
+listing of the files in a directory, including file size and date.
+Its output looks like this:@refill
+
+@example
+-rw-r--r--  1 close        1933 Nov  7 13:05 Makefile
+-rw-r--r--  1 close       10809 Nov  7 13:03 gawk.h
+-rw-r--r--  1 close         983 Apr 13 12:14 gawk.tab.h
+-rw-r--r--  1 close       31869 Jun 15 12:20 gawk.y
+-rw-r--r--  1 close       22414 Nov  7 13:03 gawk1.c
+-rw-r--r--  1 close       37455 Nov  7 13:03 gawk2.c
+-rw-r--r--  1 close       27511 Dec  9 13:07 gawk3.c
+-rw-r--r--  1 close        7989 Nov  7 13:03 gawk4.c
+@end example
+
+@noindent
+The first field contains read-write permissions, the second field contains
+the number of links to the file, and the third field identifies the owner of
+the file.  The fourth field contains the size of the file in bytes.  The
+fifth, sixth, and seventh fields contain the month, day, and time,
+respectively, that the file was last modified.  Finally, the eighth field
+contains the name of the file.
+
+The @code{$5 == "Nov"} in our @code{awk} program is an expression that
+tests whether the fifth field of the output from @w{@samp{ls -l}}
+matches the string @samp{Nov}.  Each time a line has the string
+@samp{Nov} in its fifth field, the action @samp{@{ sum += $4 @}} is
+performed.  This adds the fourth field (the file size) to the variable
+@code{sum}.  As a result, when @code{awk} has finished reading all the
+input lines, @code{sum} is the sum of the sizes of files whose
+lines matched the pattern.  (This works because @code{awk} variables
+are automatically initialized to zero.)@refill
+
+After the last line of output from @code{ls} has been processed, the
+@code{END} rule is executed, and the value of @code{sum} is
+printed.  In this example, the value of @code{sum} would be 80600.@refill
+
+These more advanced @code{awk} techniques are covered in later sections
+(@pxref{Actions, ,Overview of Actions}).  Before you can move on to more
+advanced @code{awk} programming, you have to know how @code{awk} interprets
+your input and displays your output.  By manipulating fields and using
+@code{print} statements, you can produce some very useful and spectacular
+looking reports.@refill
+
+@node Running gawk, Comments, More Complex, Getting Started
+@section How to Run @code{awk} Programs
+
+@ignore
+Date: Mon, 26 Aug 91 09:48:10 +0200
+From: gatech!vsoc07.cern.ch!matheys (Jean-Pol Matheys (CERN - ECP Division))
+To: uunet.UU.NET!skeeve!arnold
+Subject: RE: status check
+
+The introduction of Chapter 2 (i.e. before 2.1) should include
+the whole of section 2.4  -  it's better to tell people how to run awk programs
+before giving any examples
+
+ADR --- he's right.  but for now, don't do this because the rest of the
+chapter would need some rewriting.
+@end ignore
+
+@cindex command line formats
+@cindex running @code{awk} programs
+There are several ways to run an @code{awk} program.  If the program is
+short, it is easiest to include it in the command that runs @code{awk},
+like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of patterns and actions, as
+described earlier.
+
+When the program is long, it is usually more convenient to put it in a file
+and run it with a command like this:
+
+@example
+awk -f @var{program-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@menu
+* One-shot::                    Running a short throw-away @code{awk} program.
+* Read Terminal::               Using no input files (input from 
+                                terminal instead).
+* Long::                        Putting permanent @code{awk} programs in files.
+* Executable Scripts::          Making self-contained @code{awk} programs.
+@end menu
+
+@node One-shot, Read Terminal, Running gawk, Running gawk
+@subsection One-shot Throw-away @code{awk} Programs
+
+Once you are familiar with @code{awk}, you will often type simple
+programs at the moment you want to use them.  Then you can write the
+program as the first argument of the @code{awk} command, like this:
+
+@example
+awk '@var{program}' @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+@noindent
+where @var{program} consists of a series of @var{patterns} and
+@var{actions}, as described earlier.
+
+@cindex single quotes, why needed
+This command format instructs the shell to start @code{awk} and use the
+@var{program} to process records in the input file(s).  There are single
+quotes around @var{program} so that the shell doesn't interpret any
+@code{awk} characters as special shell characters.  They also cause the
+shell to treat all of @var{program} as a single argument for
+@code{awk} and allow @var{program} to be more than one line long.@refill
+
+This format is also useful for running short or medium-sized @code{awk}
+programs from shell scripts, because it avoids the need for a separate
+file for the @code{awk} program.  A self-contained shell script is more
+reliable since there are no other files to misplace.
+
+@node Read Terminal, Long, One-shot, Running gawk
+@subsection Running @code{awk} without Input Files
+
+@cindex standard input
+@cindex input, standard
+You can also run @code{awk} without any input files.  If you type the
+command line:@refill
+
+@example
+awk '@var{program}'
+@end example
+
+@noindent
+then @code{awk} applies the @var{program} to the @dfn{standard input},
+which usually means whatever you type on the terminal.  This continues
+until you indicate end-of-file by typing @kbd{Control-d}.
+
+For example, if you execute this command:
+
+@example
+awk '/th/'
+@end example
+
+@noindent
+whatever you type next is taken as data for that @code{awk}
+program.  If you go on to type the following data:
+
+@example
+Kathy
+Ben
+Tom
+Beth
+Seth
+Karen
+Thomas
+@kbd{Control-d}
+@end example
+
+@noindent
+then @code{awk} prints this output:
+
+@example
+Kathy
+Beth
+Seth
+@end example
+
+@noindent
+@cindex case sensitivity
+@cindex pattern, case sensitive
+as matching the pattern @samp{th}.  Notice that it did not recognize
+@samp{Thomas} as matching the pattern.  The @code{awk} language is
+@dfn{case sensitive}, and matches patterns exactly.  (However, you can
+override this with the variable @code{IGNORECASE}.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.)
+
+@node Long, Executable Scripts, Read Terminal, Running gawk
+@subsection Running Long Programs
+
+@cindex running long programs
+@cindex @samp{-f} option
+@cindex program file
+@cindex file, @code{awk} program
+Sometimes your @code{awk} programs can be very long.  In this case it is
+more convenient to put the program into a separate file.  To tell
+@code{awk} to use that file for its program, you type:@refill
+
+@example
+awk -f @var{source-file} @var{input-file1} @var{input-file2} @dots{}
+@end example
+
+The @samp{-f} instructs the @code{awk} utility to get the @code{awk} program
+from the file @var{source-file}.  Any file name can be used for
+@var{source-file}.  For example, you could put the program:@refill
+
+@example
+/th/
+@end example
+
+@noindent
+into the file @file{th-prog}.  Then this command:
+
+@example
+awk -f th-prog
+@end example
+
+@noindent
+does the same thing as this one:
+
+@example
+awk '/th/'
+@end example
+
+@noindent
+which was explained earlier (@pxref{Read Terminal, ,Running @code{awk} without Input Files}).
+Note that you don't usually need single quotes around the file name that you
+specify with @samp{-f}, because most file names don't contain any of the shell's
+special characters.  Notice that in @file{th-prog}, the @code{awk}
+program did not have single quotes around it.  The quotes are only needed
+for programs that are provided on the @code{awk} command line.
+
+If you want to identify your @code{awk} program files clearly as such,
+you can add the extension @file{.awk} to the file name.  This doesn't
+affect the execution of the @code{awk} program, but it does make
+``housekeeping'' easier.
+
+@node Executable Scripts,  , Long, Running gawk
+@c node-name, next, previous, up
+@subsection Executable @code{awk} Programs
+@cindex executable scripts
+@cindex scripts, executable
+@cindex self contained programs
+@cindex program, self contained
+@cindex @samp{#!}
+
+Once you have learned @code{awk}, you may want to write self-contained
+@code{awk} scripts, using the @samp{#!} script mechanism.  You can do
+this on many Unix systems @footnote{The @samp{#!} mechanism works on
+Unix systems derived from Berkeley Unix, System V Release 4, and some System
+V Release 3 systems.} (and someday on GNU).@refill
+
+For example, you could create a text file named @file{hello}, containing
+the following (where @samp{BEGIN} is a feature we have not yet
+discussed):
+
+@example
+#! /bin/awk -f
+
+# a sample awk program
+BEGIN    @{ print "hello, world" @}
+@end example
+
+@noindent
+After making this file executable (with the @code{chmod} command), you
+can simply type:
+
+@example
+hello
+@end example
+
+@noindent
+at the shell, and the system will arrange to run @code{awk} @footnote{The
+line beginning with @samp{#!} lists the full pathname of an interpreter
+to be run, and an optional initial command line argument to pass to that
+interpreter.  The operating system then runs the interpreter with the given
+argument and the full argument list of the executed program.  The first argument
+in the list is the full pathname of the @code{awk} program.  The rest of the
+argument list will either be options to @code{awk}, or data files,
+or both.} as if you had typed:@refill
+
+@example
+awk -f hello
+@end example
+
+@noindent
+Self-contained @code{awk} scripts are useful when you want to write a
+program which users can invoke without knowing that the program is
+written in @code{awk}.
+
+@cindex shell scripts
+@cindex scripts, shell
+If your system does not support the @samp{#!} mechanism, you can get a
+similar effect using a regular shell script.  It would look something
+like this:
+
+@example
+: The colon makes sure this script is executed by the Bourne shell.
+awk '@var{program}' "$@@"
+@end example
+
+Using this technique, it is @emph{vital} to enclose the @var{program} in
+single quotes to protect it from interpretation by the shell.  If you
+omit the quotes, only a shell wizard can predict the results.
+
+The @samp{"$@@"} causes the shell to forward all the command line
+arguments to the @code{awk} program, without interpretation.  The first
+line, which starts with a colon, is used so that this shell script will
+work even if invoked by a user who uses the C shell.
+@c Someday: (See @cite{The Bourne Again Shell}, by ??.)
+
+@node Comments, Statements/Lines, Running gawk, Getting Started
+@section Comments in @code{awk} Programs
+@cindex @samp{#}
+@cindex comments
+@cindex use of comments
+@cindex documenting @code{awk} programs
+@cindex programs, documenting
+
+A @dfn{comment} is some text that is included in a program for the sake
+of human readers, and that is not really part of the program.  Comments
+can explain what the program does, and how it works.  Nearly all
+programming languages have provisions for comments, because programs are
+typically hard to understand without their extra help.
+
+In the @code{awk} language, a comment starts with the sharp sign
+character, @samp{#}, and continues to the end of the line.  The
+@code{awk} language ignores the rest of a line following a sharp sign.
+For example, we could have put the following into @file{th-prog}:@refill
+
+@smallexample
+# This program finds records containing the pattern @samp{th}.  This is how
+# you continue comments on additional lines.
+/th/
+@end smallexample
+
+You can put comment lines into keyboard-composed throw-away @code{awk}
+programs also, but this usually isn't very useful; the purpose of a
+comment is to help you or another person understand the program at
+a later time.@refill
+
+@node Statements/Lines, When, Comments, Getting Started
+@section @code{awk} Statements versus Lines
+
+Most often, each line in an @code{awk} program is a separate statement or
+separate rule, like this:
+
+@example
+awk '/12/  @{ print $0 @}
+     /21/  @{ print $0 @}' BBS-list inventory-shipped
+@end example
+
+But sometimes statements can be more than one line, and lines can
+contain several statements.  You can split a statement into multiple
+lines by inserting a newline after any of the following:@refill
+
+@example
+,    @{    ?    :    ||    &&    do    else
+@end example
+
+@noindent
+A newline at any other point is considered the end of the statement.
+(Splitting lines after @samp{?} and @samp{:} is a minor @code{gawk}
+extension.  The @samp{?} and @samp{:} referred to here is the 
+three operand conditional expression described in
+@ref{Conditional Exp, ,Conditional Expressions}.)@refill
+
+@cindex backslash continuation
+@cindex continuation of lines
+If you would like to split a single statement into two lines at a point
+where a newline would terminate it, you can @dfn{continue} it by ending the
+first line with a backslash character, @samp{\}.  This is allowed
+absolutely anywhere in the statement, even in the middle of a string or
+regular expression.  For example:
+
+@example
+awk '/This program is too long, so continue it\
+ on the next line/ @{ print $1 @}'
+@end example
+
+@noindent
+We have generally not used backslash continuation in the sample programs in
+this manual.  Since in @code{gawk} there is no limit on the length of a line,
+it is never strictly necessary; it just makes programs prettier.  We have
+preferred to make them even more pretty by keeping the statements short.
+Backslash continuation is most useful when your @code{awk} program is in a
+separate source file, instead of typed in on the command line.  You should
+also note that many @code{awk} implementations are more picky about where
+you may use backslash continuation.  For maximal portability of your @code{awk}
+programs, it is best not to split your lines in the middle of a regular
+expression or a string.@refill
+
+@strong{Warning: backslash continuation does not work as described above
+with the C shell.}  Continuation with backslash works for @code{awk}
+programs in files, and also for one-shot programs @emph{provided} you
+are using a @sc{posix}-compliant shell, such as the Bourne shell or the
+Bourne-again shell.  But the C shell used on Berkeley Unix behaves
+differently!  There, you must use two backslashes in a row, followed by
+a newline.@refill
+
+@cindex multiple statements on one line
+When @code{awk} statements within one rule are short, you might want to put
+more than one of them on a line.  You do this by separating the statements
+with a semicolon, @samp{;}.
+This also applies to the rules themselves.
+Thus, the previous program could have been written:@refill
+
+@example
+/12/ @{ print $0 @} ; /21/ @{ print $0 @}
+@end example
+
+@noindent
+@strong{Note:} the requirement that rules on the same line must be
+separated with a semicolon is a recent change in the @code{awk}
+language; it was done for consistency with the treatment of statements
+within an action.
+
+@node When,  , Statements/Lines, Getting Started
+@section When to Use @code{awk}
+
+@cindex when to use @code{awk}
+@cindex applications of @code{awk}
+You might wonder how @code{awk} might be useful for you.  Using additional
+utility programs, more advanced patterns, field separators, arithmetic
+statements, and other selection criteria, you can produce much more
+complex output.  The @code{awk} language is very useful for producing
+reports from large amounts of raw data, such as summarizing information
+from the output of other utility programs like @code{ls}.  
+(@xref{More Complex, ,A More Complex Example}.)
+
+Programs written with @code{awk} are usually much smaller than they would
+be in other languages.  This makes @code{awk} programs easy to compose and
+use.  Often @code{awk} programs can be quickly composed at your terminal,
+used once, and thrown away.  Since @code{awk} programs are interpreted, you
+can avoid the usually lengthy edit-compile-test-debug cycle of software
+development.
+
+Complex programs have been written in @code{awk}, including a complete
+retargetable assembler for 8-bit microprocessors (@pxref{Glossary}, for
+more information) and a microcode assembler for a special purpose Prolog
+computer.  However, @code{awk}'s capabilities are strained by tasks of
+such complexity.
+
+If you find yourself writing @code{awk} scripts of more than, say, a few
+hundred lines, you might consider using a different programming
+language.  Emacs Lisp is a good choice if you need sophisticated string
+or pattern matching capabilities.  The shell is also good at string and
+pattern matching; in addition, it allows powerful use of the system
+utilities.  More conventional languages, such as C, C++, and Lisp, offer
+better facilities for system programming and for managing the complexity
+of large programs.  Programs in these languages may require more lines
+of source code than the equivalent @code{awk} programs, but they are
+easier to maintain and usually run more efficiently.@refill
+
+@node Reading Files, Printing, Getting Started, Top
+@chapter Reading Input Files
+
+@cindex reading files
+@cindex input
+@cindex standard input
+@vindex FILENAME
+In the typical @code{awk} program, all input is read either from the
+standard input (by default the keyboard, but often a pipe from another
+command) or from files whose names you specify on the @code{awk} command
+line.  If you specify input files, @code{awk} reads them in order, reading
+all the data from one before going on to the next.  The name of the current
+input file can be found in the built-in variable @code{FILENAME}
+(@pxref{Built-in Variables}).@refill
+
+The input is read in units called records, and processed by the
+rules one record at a time.  By default, each record is one line.  Each
+record is split automatically into fields, to make it more
+convenient for a rule to work on its parts.
+
+On rare occasions you will need to use the @code{getline} command,
+which can do explicit input from any number of files
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
+
+@menu
+* Records::                     Controlling how data is split into records.
+* Fields::                      An introduction to fields.
+* Non-Constant Fields::         Non-constant Field Numbers.
+* Changing Fields::             Changing the Contents of a Field.
+* Field Separators::            The field separator and how to change it.
+* Constant Size::               Reading constant width data.
+* Multiple Line::               Reading multi-line records.
+* Getline::                     Reading files under explicit program control
+                                using the @code{getline} function.
+* Close Input::                 Closing an input file (so you can read from
+                                the beginning once more).
+@end menu
+
+@node Records, Fields, Reading Files, Reading Files
+@section How Input is Split into Records
+
+@cindex record separator
+The @code{awk} language divides its input into records and fields.
+Records are separated by a character called the @dfn{record separator}.
+By default, the record separator is the newline character, defining
+a record to be a single line of text.@refill
+
+@iftex
+@cindex changing the record separator
+@end iftex
+@vindex RS
+Sometimes you may want to use a different character to separate your
+records.  You can use a different character by changing the built-in
+variable @code{RS}.  The value of @code{RS} is a string that says how
+to separate records; the default value is @code{"\n"}, the string containing
+just a newline character.  This is why records are, by default, single lines.
+
+@code{RS} can have any string as its value, but only the first character
+of the string is used as the record separator.  The other characters are
+ignored.  @code{RS} is exceptional in this regard; @code{awk} uses the
+full value of all its other built-in variables.@refill
+
+@ignore
+Someday this should be true!
+
+The value of @code{RS} is not limited to a one-character string.  It can
+be any regular expression (@pxref{Regexp, ,Regular Expressions as Patterns}).
+In general, each record
+ends at the next string that matches the regular expression; the next
+record starts at the end of the matching string.  This general rule is
+actually at work in the usual case, where @code{RS} contains just a
+newline: a record ends at the beginning of the next matching string (the
+next newline in the input) and the following record starts just after
+the end of this string (at the first character of the following line).
+The newline, since it matches @code{RS}, is not part of either record.@refill
+@end ignore
+
+You can change the value of @code{RS} in the @code{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+The new record-separator character should be enclosed in quotation marks to make
+a string constant.  Often the right time to do this is at the beginning
+of execution, before any input has been processed, so that the very
+first record will be read with the proper separator.  To do this, use
+the special @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).  For
+example:@refill
+
+@example
+awk 'BEGIN @{ RS = "/" @} ; @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+changes the value of @code{RS} to @code{"/"}, before reading any input.
+This is a string whose first character is a slash; as a result, records
+are separated by slashes.  Then the input file is read, and the second
+rule in the @code{awk} program (the action with no pattern) prints each
+record.  Since each @code{print} statement adds a newline at the end of
+its output, the effect of this @code{awk} program is to copy the input
+with each slash changed to a newline.
+
+Another way to change the record separator is on the command line,
+using the variable-assignment feature
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@example
+awk '@{ print $0 @}' RS="/" BBS-list
+@end example
+
+@noindent
+This sets @code{RS} to @samp{/} before processing @file{BBS-list}.
+
+Reaching the end of an input file terminates the current input record,
+even if the last character in the file is not the character in @code{RS}.
+
+@ignore
+@c merge the preceding paragraph and this stuff into one paragraph
+@c and put it in an `expert info' section.
+This produces correct behavior in the vast majority of cases, although
+the following (extreme) pipeline prints a surprising @samp{1}.  (There
+is one field, consisting of a newline.)
+
+@example
+echo | awk 'BEGIN @{ RS = "a" @} ; @{ print NF @}'
+@end example
+
+@end ignore
+
+The empty string, @code{""} (a string of no characters), has a special meaning
+as the value of @code{RS}: it means that records are separated only
+by blank lines.  @xref{Multiple Line, ,Multiple-Line Records}, for more details.
+
+@cindex number of records, @code{NR} or @code{FNR}
+@vindex NR
+@vindex FNR
+The @code{awk} utility keeps track of the number of records that have
+been read so far from the current input file.  This value is stored in a
+built-in variable called @code{FNR}.  It is reset to zero when a new
+file is started.  Another built-in variable, @code{NR}, is the total
+number of input records read so far from all files.  It starts at zero
+but is never automatically reset to zero.
+
+If you change the value of @code{RS} in the middle of an @code{awk} run,
+the new value is used to delimit subsequent records, but the record
+currently being processed (and records already processed) are not
+affected.
+
+@node Fields, Non-Constant Fields, Records, Reading Files
+@section Examining Fields
+
+@cindex examining fields
+@cindex fields
+@cindex accessing fields
+When @code{awk} reads an input record, the record is
+automatically separated or @dfn{parsed} by the interpreter into chunks
+called @dfn{fields}.  By default, fields are separated by whitespace,
+like words in a line.
+Whitespace in @code{awk} means any string of one or more spaces and/or
+tabs; other characters such as newline, formfeed, and so on, that are
+considered whitespace by other languages are @emph{not} considered
+whitespace by @code{awk}.@refill
+
+The purpose of fields is to make it more convenient for you to refer to
+these pieces of the record.  You don't have to use them---you can
+operate on the whole record if you wish---but fields are what make
+simple @code{awk} programs so powerful.
+
+@cindex @code{$} (field operator)
+@cindex operators, @code{$}
+To refer to a field in an @code{awk} program, you use a dollar-sign,
+@samp{$}, followed by the number of the field you want.  Thus, @code{$1}
+refers to the first field, @code{$2} to the second, and so on.  For
+example, suppose the following is a line of input:@refill
+
+@example
+This seems like a pretty nice example.
+@end example
+
+@noindent
+Here the first field, or @code{$1}, is @samp{This}; the second field, or
+@code{$2}, is @samp{seems}; and so on.  Note that the last field,
+@code{$7}, is @samp{example.}.  Because there is no space between the
+@samp{e} and the @samp{.}, the period is considered part of the seventh
+field.@refill
+
+No matter how many fields there are, the last field in a record can be
+represented by @code{$NF}.  So, in the example above, @code{$NF} would
+be the same as @code{$7}, which is @samp{example.}.  Why this works is
+explained below (@pxref{Non-Constant Fields, ,Non-constant Field Numbers}).
+If you try to refer to a field beyond the last one, such as @code{$8}
+when the record has only 7 fields, you get the empty string.@refill
+
+@vindex NF
+@cindex number of fields, @code{NF}
+Plain @code{NF}, with no @samp{$}, is a built-in variable whose value
+is the number of fields in the current record.
+
+@code{$0}, which looks like an attempt to refer to the zeroth field, is
+a special case: it represents the whole input record.  This is what you
+would use if you weren't interested in fields.
+
+Here are some more examples:
+
+@example
+awk '$1 ~ /foo/ @{ print $0 @}' BBS-list
+@end example
+
+@noindent
+This example prints each record in the file @file{BBS-list} whose first
+field contains the string @samp{foo}.  The operator @samp{~} is called a
+@dfn{matching operator} (@pxref{Comparison Ops, ,Comparison Expressions});
+it tests whether a string (here, the field @code{$1}) matches a given regular
+expression.@refill
+
+By contrast, the following example:
+
+@example
+awk '/foo/ @{ print $1, $NF @}' BBS-list
+@end example
+
+@noindent
+looks for @samp{foo} in @emph{the entire record} and prints the first
+field and the last field for each input record containing a
+match.@refill
+
+@node Non-Constant Fields, Changing Fields, Fields, Reading Files
+@section Non-constant Field Numbers
+
+The number of a field does not need to be a constant.  Any expression in
+the @code{awk} language can be used after a @samp{$} to refer to a
+field.  The value of the expression specifies the field number.  If the
+value is a string, rather than a number, it is converted to a number.
+Consider this example:@refill
+
+@example
+awk '@{ print $NR @}'
+@end example
+
+@noindent
+Recall that @code{NR} is the number of records read so far: 1 in the
+first record, 2 in the second, etc.  So this example prints the first
+field of the first record, the second field of the second record, and so
+on.  For the twentieth record, field number 20 is printed; most likely,
+the record has fewer than 20 fields, so this prints a blank line.
+
+Here is another example of using expressions as field numbers:
+
+@example
+awk '@{ print $(2*2) @}' BBS-list
+@end example
+
+The @code{awk} language must evaluate the expression @code{(2*2)} and use
+its value as the number of the field to print.  The @samp{*} sign
+represents multiplication, so the expression @code{2*2} evaluates to 4.
+The parentheses are used so that the multiplication is done before the
+@samp{$} operation; they are necessary whenever there is a binary
+operator in the field-number expression.  This example, then, prints the
+hours of operation (the fourth field) for every line of the file
+@file{BBS-list}.@refill
+
+If the field number you compute is zero, you get the entire record.
+Thus, @code{$(2-2)} has the same value as @code{$0}.  Negative field
+numbers are not allowed.
+
+The number of fields in the current record is stored in the built-in
+variable @code{NF} (@pxref{Built-in Variables}).  The expression
+@code{$NF} is not a special feature: it is the direct consequence of
+evaluating @code{NF} and using its value as a field number.
+
+@node Changing Fields, Field Separators, Non-Constant Fields, Reading Files
+@section Changing the Contents of a Field
+
+@cindex field, changing contents of
+@cindex changing contents of a field
+@cindex assignment to fields
+You can change the contents of a field as seen by @code{awk} within an
+@code{awk} program; this changes what @code{awk} perceives as the
+current input record.  (The actual input is untouched: @code{awk} never
+modifies the input file.)
+
+Consider this example:
+
+@smallexample
+awk '@{ $3 = $2 - 10; print $2, $3 @}' inventory-shipped
+@end smallexample
+
+@noindent
+The @samp{-} sign represents subtraction, so this program reassigns
+field three, @code{$3}, to be the value of field two minus ten,
+@code{$2 - 10}.  (@xref{Arithmetic Ops, ,Arithmetic Operators}.)
+Then field two, and the new value for field three, are printed.  
+
+In order for this to work, the text in field @code{$2} must make sense
+as a number; the string of characters must be converted to a number in
+order for the computer to do arithmetic on it.  The number resulting
+from the subtraction is converted back to a string of characters which
+then becomes field three.
+@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
+
+When you change the value of a field (as perceived by @code{awk}), the
+text of the input record is recalculated to contain the new field where
+the old one was.  Therefore, @code{$0} changes to reflect the altered
+field.  Thus,
+
+@smallexample
+awk '@{ $2 = $2 - 10; print $0 @}' inventory-shipped
+@end smallexample
+
+@noindent
+prints a copy of the input file, with 10 subtracted from the second
+field of each line.
+
+You can also assign contents to fields that are out of range.  For
+example:
+
+@smallexample
+awk '@{ $6 = ($5 + $4 + $3 + $2) ; print $6 @}' inventory-shipped
+@end smallexample
+
+@noindent
+We've just created @code{$6}, whose value is the sum of fields
+@code{$2}, @code{$3}, @code{$4}, and @code{$5}.  The @samp{+} sign
+represents addition.  For the file @file{inventory-shipped}, @code{$6}
+represents the total number of parcels shipped for a particular month.
+
+Creating a new field changes the internal @code{awk} copy of the current
+input record---the value of @code{$0}.  Thus, if you do @samp{print $0}
+after adding a field, the record printed includes the new field, with
+the appropriate number of field separators between it and the previously
+existing fields.
+
+This recomputation affects and is affected by several features not yet
+discussed, in particular, the @dfn{output field separator}, @code{OFS},
+which is used to separate the fields (@pxref{Output Separators}), and
+@code{NF} (the number of fields; @pxref{Fields, ,Examining Fields}).
+For example, the value of @code{NF} is set to the number of the highest
+field you create.@refill
+
+Note, however, that merely @emph{referencing} an out-of-range field
+does @emph{not} change the value of either @code{$0} or @code{NF}.
+Referencing an out-of-range field merely produces a null string.  For
+example:@refill
+
+@smallexample
+if ($(NF+1) != "")
+    print "can't happen"
+else
+    print "everything is normal"
+@end smallexample
+
+@noindent
+should print @samp{everything is normal}, because @code{NF+1} is certain
+to be out of range.  (@xref{If Statement, ,The @code{if} Statement},
+for more information about @code{awk}'s @code{if-else} statements.)@refill
+
+It is important to note that assigning to a field will change the
+value of @code{$0}, but will not change the value of @code{NF},
+even when you assign the null string to a field.  For example:
+
+@smallexample
+echo a b c d | awk '@{ OFS = ":"; $2 = "" ; print ; print NF @}'
+@end smallexample
+
+@noindent
+prints
+
+@smallexample
+a::c:d
+4
+@end smallexample
+
+@noindent
+The field is still there, it just has an empty value.  You can tell
+because there are two colons in a row.
+
+@node Field Separators, Constant Size, Changing Fields, Reading Files
+@section Specifying how Fields are Separated
+@vindex FS
+@cindex fields, separating
+@cindex field separator, @code{FS}
+@cindex @samp{-F} option
+
+(This section is rather long; it describes one of the most fundamental
+operations in @code{awk}.  If you are a novice with @code{awk}, we
+recommend that you re-read this section after you have studied the
+section on regular expressions, @ref{Regexp, ,Regular Expressions as Patterns}.)
+
+The way @code{awk} splits an input record into fields is controlled by
+the @dfn{field separator}, which is a single character or a regular
+expression.  @code{awk} scans the input record for matches for the
+separator; the fields themselves are the text between the matches.  For
+example, if the field separator is @samp{oo}, then the following line:
+
+@smallexample
+moo goo gai pan
+@end smallexample
+
+@noindent
+would be split into three fields: @samp{m}, @samp{@ g} and @samp{@ gai@ 
+pan}.
+
+The field separator is represented by the built-in variable @code{FS}.
+Shell programmers take note!  @code{awk} does not use the name @code{IFS}
+which is used by the shell.@refill
+
+You can change the value of @code{FS} in the @code{awk} program with the
+assignment operator, @samp{=} (@pxref{Assignment Ops, ,Assignment Expressions}).
+Often the right time to do this is at the beginning of execution,
+before any input has been processed, so that the very first record
+will be read with the proper separator.  To do this, use the special
+@code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
+For example, here we set the value of @code{FS} to the string
+@code{","}:@refill
+
+@smallexample
+awk 'BEGIN @{ FS = "," @} ; @{ print $2 @}'
+@end smallexample
+
+@noindent
+Given the input line,
+
+@smallexample
+John Q. Smith, 29 Oak St., Walamazoo, MI 42139
+@end smallexample
+
+@noindent
+this @code{awk} program extracts the string @samp{@ 29 Oak St.}.
+
+@cindex field separator, choice of
+@cindex regular expressions as field separators
+Sometimes your input data will contain separator characters that don't
+separate fields the way you thought they would.  For instance, the
+person's name in the example we've been using might have a title or
+suffix attached, such as @samp{John Q. Smith, LXIX}.  From input
+containing such a name:
+
+@smallexample
+John Q. Smith, LXIX, 29 Oak St., Walamazoo, MI 42139
+@end smallexample
+
+@noindent
+the previous sample program would extract @samp{@ LXIX}, instead of
+@samp{@ 29 Oak St.}.  If you were expecting the program to print the
+address, you would be surprised.  So choose your data layout and
+separator characters carefully to prevent such problems.
+
+As you know, by default, fields are separated by whitespace sequences
+(spaces and tabs), not by single spaces: two spaces in a row do not
+delimit an empty field.  The default value of the field separator is a
+string @w{@code{" "}} containing a single space.  If this value were
+interpreted in the usual way, each space character would separate
+fields, so two spaces in a row would make an empty field between them.
+The reason this does not happen is that a single space as the value of
+@code{FS} is a special case: it is taken to specify the default manner
+of delimiting fields.
+
+If @code{FS} is any other single character, such as @code{","}, then
+each occurrence of that character separates two fields.  Two consecutive
+occurrences delimit an empty field.  If the character occurs at the
+beginning or the end of the line, that too delimits an empty field.  The
+space character is the only single character which does not follow these
+rules.
+
+More generally, the value of @code{FS} may be a string containing any
+regular expression.  Then each match in the record for the regular
+expression separates fields.  For example, the assignment:@refill
+
+@smallexample
+FS = ", \t"
+@end smallexample
+
+@noindent
+makes every area of an input line that consists of a comma followed by a
+space and a tab, into a field separator.  (@samp{\t} stands for a
+tab.)@refill
+
+For a less trivial example of a regular expression, suppose you want
+single spaces to separate fields the way single commas were used above.
+You can set @code{FS} to @w{@code{"[@ ]"}}.  This regular expression
+matches a single space and nothing else.
+
+@c the following index entry is an overfull hbox.  --mew 30jan1992
+@cindex field separator: on command line
+@cindex command line, setting @code{FS} on
+@code{FS} can be set on the command line.  You use the @samp{-F} argument to
+do so.  For example:
+
+@smallexample
+awk -F, '@var{program}' @var{input-files}
+@end smallexample
+
+@noindent
+sets @code{FS} to be the @samp{,} character.  Notice that the argument uses
+a capital @samp{F}.  Contrast this with @samp{-f}, which specifies a file
+containing an @code{awk} program.  Case is significant in command options:
+the @samp{-F} and @samp{-f} options have nothing to do with each other.
+You can use both options at the same time to set the @code{FS} argument
+@emph{and} get an @code{awk} program from a file.@refill
+
+@c begin expert info
+The value used for the argument to @samp{-F} is processed in exactly the
+same way as assignments to the built-in variable @code{FS}.  This means that
+if the field separator contains special characters, they must be escaped
+appropriately.  For example, to use a @samp{\} as the field separator, you
+would have to type:
+
+@smallexample
+# same as FS = "\\" 
+awk -F\\\\ '@dots{}' files @dots{}
+@end smallexample
+
+@noindent
+Since @samp{\} is used for quoting in the shell, @code{awk} will see
+@samp{-F\\}.  Then @code{awk} processes the @samp{\\} for escape
+characters (@pxref{Constants, ,Constant Expressions}), finally yielding
+a single @samp{\} to be used for the field separator.
+@c end expert info
+
+As a special case, in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), if the
+argument to @samp{-F} is @samp{t}, then @code{FS} is set to the tab
+character.  (This is because if you type @samp{-F\t}, without the quotes,
+at the shell, the @samp{\} gets deleted, so @code{awk} figures that you
+really want your fields to be separated with tabs, and not @samp{t}s.
+Use @samp{-v FS="t"} on the command line if you really do want to separate
+your fields with @samp{t}s.)@refill
+
+For example, let's use an @code{awk} program file called @file{baud.awk}
+that contains the pattern @code{/300/}, and the action @samp{print $1}.
+Here is the program:
+
+@smallexample
+/300/   @{ print $1 @}
+@end smallexample
+
+Let's also set @code{FS} to be the @samp{-} character, and run the
+program on the file @file{BBS-list}.  The following command prints a
+list of the names of the bulletin boards that operate at 300 baud and
+the first three digits of their phone numbers:@refill
+
+@smallexample
+awk -F- -f baud.awk BBS-list
+@end smallexample
+
+@noindent
+It produces this output:
+
+@smallexample
+aardvark     555
+alpo
+barfly       555
+bites        555
+camelot      555
+core         555
+fooey        555
+foot         555
+macfoo       555
+sdace        555
+sabafoo      555
+@end smallexample
+
+@noindent
+Note the second line of output.  If you check the original file, you will
+see that the second line looked like this:
+
+@smallexample
+alpo-net     555-3412     2400/1200/300     A
+@end smallexample
+
+The @samp{-} as part of the system's name was used as the field
+separator, instead of the @samp{-} in the phone number that was
+originally intended.  This demonstrates why you have to be careful in
+choosing your field and record separators.
+
+The following program searches the system password file, and prints
+the entries for users who have no password:
+
+@smallexample
+awk -F: '$2 == ""' /etc/passwd
+@end smallexample
+
+@noindent
+Here we use the @samp{-F} option on the command line to set the field
+separator.  Note that fields in @file{/etc/passwd} are separated by
+colons.  The second field represents a user's encrypted password, but if
+the field is empty, that user has no password.
+
+@c begin expert info
+According to the @sc{posix} standard, @code{awk} is supposed to behave
+as if each record is split into fields at the time that it is read.
+In particular, this means that you can change the value of @code{FS}
+after a record is read, but before any of the fields are referenced.
+The value of the fields (i.e. how they were split) should reflect the
+old value of @code{FS}, not the new one.
+
+However, many implementations of @code{awk} do not do this.  Instead,
+they defer splitting the fields until a field reference actually happens,
+using the @emph{current} value of @code{FS}!  This behavior can be difficult
+to diagnose. The following example illustrates the results of the two methods.
+(The @code{sed} command prints just the first line of @file{/etc/passwd}.)
+
+@smallexample
+sed 1q /etc/passwd | awk '@{ FS = ":" ; print $1 @}'
+@end smallexample
+
+@noindent
+will usually print
+
+@smallexample
+root
+@end smallexample
+
+@noindent
+on an incorrect implementation of @code{awk}, while @code{gawk}
+will print something like
+
+@smallexample
+root:nSijPlPhZZwgE:0:0:Root:/:
+@end smallexample
+@c end expert info
+
+@c begin expert info
+There is an important difference between the two cases of @samp{FS = @w{" "}}
+(a single blank) and @samp{FS = @w{"[ \t]+"}} (which is a regular expression
+matching one or more blanks or tabs).  For both values of @code{FS}, fields
+are separated by runs of blanks and/or tabs.  However, when the value of
+@code{FS} is @code{" "}, @code{awk} will strip leading and trailing whitespace
+from the record, and then decide where the fields are.  
+
+For example, the following expression prints @samp{b}:
+
+@smallexample
+echo ' a b c d ' | awk '@{ print $2 @}'
+@end smallexample
+
+@noindent
+However, the following prints @samp{a}:
+
+@smallexample
+echo ' a b c d ' | awk 'BEGIN @{ FS = "[ \t]+" @} ; @{ print $2 @}'
+@end smallexample
+
+@noindent
+In this case, the first field is null.
+
+The stripping of leading and trailing whitespace also comes into
+play whenever @code{$0} is recomputed.  For instance, this pipeline
+
+@smallexample
+echo '   a b c d' | awk '@{ print; $2 = $2; print @}'
+@end smallexample
+
+@noindent
+produces this output:
+
+@smallexample
+   a b c d
+a b c d
+@end smallexample
+
+@noindent
+The first @code{print} statement prints the record as it was read,
+with leading whitespace intact.  The assignment to @code{$2} rebuilds
+@code{$0} by concatenating @code{$1} through @code{$NF} together,
+separated by the value of @code{OFS}.  Since the leading whitespace
+was ignored when finding @code{$1}, it is not part of the new @code{$0}.
+Finally, the last @code{print} statement prints the new @code{$0}.
+@c end expert info
+
+The following table summarizes how fields are split, based on the
+value of @code{FS}.
+
+@table @code
+@item FS == " "
+Fields are separated by runs of whitespace.  Leading and trailing
+whitespace are ignored.  This is the default.
+
+@item FS == @var{any single character}
+Fields are separated by each occurrence of the character.  Multiple
+successive occurrences delimit empty fields, as do leading and
+trailing occurrences.
+
+@item FS == @var{regexp}
+Fields are separated by occurrences of characters that match @var{regexp}.
+Leading and trailing matches of @var{regexp} delimit empty fields.
+@end table
+
+@node Constant Size, Multiple Line, Field Separators, Reading Files
+@section Reading Fixed-width Data
+
+(This section discusses an advanced, experimental feature.  If you are
+a novice @code{awk} user, you may wish to skip it on the first reading.)
+
+@code{gawk} 2.13 introduced a new facility for dealing with fixed-width fields
+with no distinctive field separator.  Data of this nature arises typically
+in one of at least two ways:  the input for old FORTRAN programs where
+numbers are run together, and the output of programs that did not anticipate
+the use of their output as input for other programs.
+
+An example of the latter is a table where all the columns are lined up by
+the use of a variable number of spaces and @emph{empty fields are just
+spaces}.  Clearly, @code{awk}'s normal field splitting based on @code{FS}
+will not work well in this case.  (Although a portable @code{awk} program
+can use a series of @code{substr} calls on @code{$0}, this is awkward and
+inefficient for a large number of fields.)@refill
+
+The splitting of an input record into fixed-width fields is specified by
+assigning a string containing space-separated numbers to the built-in
+variable @code{FIELDWIDTHS}.  Each number specifies the width of the field
+@emph{including} columns between fields.  If you want to ignore the columns
+between fields, you can specify the width as a separate field that is
+subsequently ignored.
+
+The following data is the output of the @code{w} utility.  It is useful
+to illustrate the use of @code{FIELDWIDTHS}.
+
+@smallexample
+ 10:06pm  up 21 days, 14:04,  23 users
+User     tty       login@  idle   JCPU   PCPU  what
+hzuo     ttyV0     8:58pm            9      5  vi p24.tex 
+hzang    ttyV3     6:37pm    50                -csh 
+eklye    ttyV5     9:53pm            7      1  em thes.tex 
+dportein ttyV6     8:17pm  1:47                -csh 
+gierd    ttyD3    10:00pm     1                elm 
+dave     ttyD4     9:47pm            4      4  w 
+brent    ttyp0    26Jun91  4:46  26:46   4:41  bash 
+dave     ttyq4    26Jun9115days     46     46  wnewmail 
+@end smallexample
+
+The following program takes the above input, converts the idle time to
+number of seconds and prints out the first two fields and the calculated
+idle time.  (This program uses a number of @code{awk} features that
+haven't been introduced yet.)@refill
+
+@smallexample
+BEGIN  @{ FIELDWIDTHS = "9 6 10 6 7 7 35" @}
+NR > 2 @{
+    idle = $4
+    sub(/^  */, "", idle)   # strip leading spaces
+    if (idle == "") idle = 0
+    if (idle ~ /:/) @{ split(idle, t, ":"); idle = t[1] * 60 + t[2] @}
+    if (idle ~ /days/) @{ idle *= 24 * 60 * 60 @}
+ 
+    print $1, $2, idle
+@}
+@end smallexample
+
+Here is the result of running the program on the data:
+
+@smallexample
+hzuo      ttyV0  0
+hzang     ttyV3  50
+eklye     ttyV5  0
+dportein  ttyV6  107
+gierd     ttyD3  1
+dave      ttyD4  0
+brent     ttyp0  286
+dave      ttyq4  1296000
+@end smallexample
+
+Another (possibly more practical) example of fixed-width input data
+would be the input from a deck of balloting cards.  In some parts of
+the United States, voters make their choices by punching holes in computer
+cards.  These cards are then processed to count the votes for any particular
+candidate or on any particular issue.  Since a voter may choose not to
+vote on some issue, any column on the card may be empty.  An @code{awk}
+program for processing such data could use the @code{FIELDWIDTHS} feature
+to simplify reading the data.@refill
+
+@c of course, getting gawk to run on a system with card readers is
+@c another story!
+
+This feature is still experimental, and will likely evolve over time.
+
+@node Multiple Line, Getline, Constant Size, Reading Files
+@section Multiple-Line Records
+
+@cindex multiple line records
+@cindex input, multiple line records
+@cindex reading files, multiple line records
+@cindex records, multiple line
+In some data bases, a single line cannot conveniently hold all the
+information in one entry.  In such cases, you can use multi-line
+records.
+
+The first step in doing this is to choose your data format: when records
+are not defined as single lines, how do you want to define them?
+What should separate records?
+
+One technique is to use an unusual character or string to separate
+records.  For example, you could use the formfeed character (written
+@code{\f} in @code{awk}, as in C) to separate them, making each record
+a page of the file.  To do this, just set the variable @code{RS} to
+@code{"\f"} (a string containing the formfeed character).  Any
+other character could equally well be used, as long as it won't be part
+of the data in a record.@refill
+
+@ignore
+Another technique is to have blank lines separate records.  The string
+@code{"^\n+"} is a regular expression that matches any sequence of
+newlines starting at the beginning of a line---in other words, it
+matches a sequence of blank lines.  If you set @code{RS} to this string,
+a record always ends at the first blank line encountered.  In
+addition, a regular expression always matches the longest possible
+sequence when there is a choice.  So the next record doesn't start until
+the first nonblank line that follows---no matter how many blank lines
+appear in a row, they are considered one record-separator.
+@end ignore
+
+Another technique is to have blank lines separate records.  By a special
+dispensation, a null string as the value of @code{RS} indicates that
+records are separated by one or more blank lines.  If you set @code{RS}
+to the null string, a record always ends at the first blank line
+encountered.  And the next record doesn't start until the first nonblank
+line that follows---no matter how many blank lines appear in a row, they
+are considered one record-separator. (End of file is also considered
+a record separator.)@refill
+@c !!! This use of `end of file' is confusing.  Needs to be clarified.
+
+The second step is to separate the fields in the record.  One way to do
+this is to put each field on a separate line: to do this, just set the
+variable @code{FS} to the string @code{"\n"}.  (This simple regular
+expression matches a single newline.)
+
+Another way to separate fields is to divide each of the lines into fields
+in the normal manner.  This happens by default as a result of a special
+feature: when @code{RS} is set to the null string, the newline character
+@emph{always} acts as a field separator.  This is in addition to whatever
+field separations result from @code{FS}.
+
+The original motivation for this special exception was probably so that
+you get useful behavior in the default case (i.e., @w{@code{FS == " "}}).
+This feature can be a problem if you really don't want the
+newline character to separate fields, since there is no way to
+prevent it.  However, you can work around this by using the @code{split}
+function to break up the record manually
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+@ignore
+Here are two ways to use records separated by blank lines and break each
+line into fields normally:
+
+@example
+awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} @{ print $1 @}' BBS-list
+
+@exdent @r{or}
+
+awk 'BEGIN @{ RS = "^\n+"; FS = "[ \t\n]+" @} @{ print $1 @}' BBS-list
+@end example
+@end ignore
+
+@ignore
+Here is how to use records separated by blank lines and break each
+line into fields normally:
+
+@example
+awk 'BEGIN @{ RS = ""; FS = "[ \t\n]+" @} ; @{ print $1 @}' BBS-list
+@end example
+@end ignore
+
+@node Getline, Close Input, Multiple Line, Reading Files
+@section Explicit Input with @code{getline}
+
+@findex getline
+@cindex input, explicit
+@cindex explicit input
+@cindex input, @code{getline} command
+@cindex reading files, @code{getline} command
+So far we have been getting our input files from @code{awk}'s main
+input stream---either the standard input (usually your terminal) or the
+files specified on the command line.  The @code{awk} language has a
+special built-in command called @code{getline} that
+can be used to read input under your explicit control.@refill
+
+This command is quite complex and should @emph{not} be used by
+beginners.  It is covered here because this is the chapter on input.
+The examples that follow the explanation of the @code{getline} command
+include material that has not been covered yet.  Therefore, come back
+and study the @code{getline} command @emph{after} you have reviewed the
+rest of this manual and have a good knowledge of how @code{awk} works.
+
+@vindex ERRNO
+@cindex differences: @code{gawk} and @code{awk}
+@code{getline} returns 1 if it finds a record, and 0 if the end of the
+file is encountered.  If there is some error in getting a record, such
+as a file that cannot be opened, then @code{getline} returns @minus{}1.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+In the following examples, @var{command} stands for a string value that
+represents a shell command.
+
+@table @code
+@item getline
+The @code{getline} command can be used without arguments to read input
+from the current input file.  All it does in this case is read the next
+input record and split it up into fields.  This is useful if you've
+finished processing the current record, but you want to do some special
+processing @emph{right now} on the next record.  Here's an
+example:@refill
+
+@example
+awk '@{
+     if (t = index($0, "/*")) @{
+          if (t > 1)
+               tmp = substr($0, 1, t - 1)
+          else
+               tmp = ""
+          u = index(substr($0, t + 2), "*/")
+          while (u == 0) @{
+               getline
+               t = -1
+               u = index($0, "*/")
+          @}
+          if (u <= length($0) - 2)
+               $0 = tmp substr($0, t + u + 3)
+          else
+               $0 = tmp
+     @}
+     print $0
+@}'
+@end example
+
+This @code{awk} program deletes all C-style comments, @samp{/* @dots{}
+*/}, from the input.  By replacing the @samp{print $0} with other
+statements, you could perform more complicated processing on the
+decommented input, like searching for matches of a regular
+expression.  (This program has a subtle problem---can you spot it?)
+
+@c the program to remove comments doesn't work if one
+@c comment ends and another begins on the same line.  (Your
+@c idea for restart would be useful here).  --- brennan@boeing.com
+
+This form of the @code{getline} command sets @code{NF} (the number of
+fields; @pxref{Fields, ,Examining Fields}), @code{NR} (the number of
+records read so far; @pxref{Records, ,How Input is Split into Records}),
+@code{FNR} (the number of records read from this input file), and the
+value of @code{$0}.
+
+@strong{Note:} the new value of @code{$0} is used in testing
+the patterns of any subsequent rules.  The original value
+of @code{$0} that triggered the rule which executed @code{getline}
+is lost.  By contrast, the @code{next} statement reads a new record
+but immediately begins processing it normally, starting with the first
+rule in the program.  @xref{Next Statement, ,The @code{next} Statement}.
+
+@item getline @var{var}
+This form of @code{getline} reads a record into the variable @var{var}.
+This is useful when you want your program to read the next record from
+the current input file, but you don't want to subject the record to the
+normal input processing.
+
+For example, suppose the next line is a comment, or a special string,
+and you want to read it, but you must make certain that it won't trigger
+any rules.  This version of @code{getline} allows you to read that line
+and store it in a variable so that the main
+read-a-line-and-check-each-rule loop of @code{awk} never sees it.
+
+The following example swaps every two lines of input.  For example, given:
+
+@example
+wan
+tew
+free
+phore
+@end example
+
+@noindent
+it outputs:
+
+@example
+tew
+wan
+phore
+free
+@end example
+
+@noindent
+Here's the program:
+
+@example
+@group
+awk '@{
+     if ((getline tmp) > 0) @{
+          print tmp
+          print $0
+     @} else
+          print $0
+@}'
+@end group
+@end example
+
+The @code{getline} function used in this way sets only the variables
+@code{NR} and @code{FNR} (and of course, @var{var}).  The record is not
+split into fields, so the values of the fields (including @code{$0}) and
+the value of @code{NF} do not change.@refill
+
+@item getline < @var{file}
+@cindex input redirection
+@cindex redirection of input
+This form of the @code{getline} function takes its input from the file
+@var{file}.  Here @var{file} is a string-valued expression that
+specifies the file name.  @samp{< @var{file}} is called a @dfn{redirection}
+since it directs input to come from a different place.
+
+This form is useful if you want to read your input from a particular
+file, instead of from the main input stream.  For example, the following
+program reads its input record from the file @file{foo.input} when it
+encounters a first field with a value equal to 10 in the current input
+file.@refill
+
+@example
+awk '@{
+    if ($1 == 10) @{
+         getline < "foo.input"
+         print
+    @} else
+         print
+@}'
+@end example
+
+Since the main input stream is not used, the values of @code{NR} and
+@code{FNR} are not changed.  But the record read is split into fields in
+the normal manner, so the values of @code{$0} and other fields are
+changed.  So is the value of @code{NF}.
+
+This does not cause the record to be tested against all the patterns
+in the @code{awk} program, in the way that would happen if the record
+were read normally by the main processing loop of @code{awk}.  However
+the new record is tested against any subsequent rules, just as when
+@code{getline} is used without a redirection.
+
+@item getline @var{var} < @var{file}
+This form of the @code{getline} function takes its input from the file
+@var{file} and puts it in the variable @var{var}.  As above, @var{file}
+is a string-valued expression that specifies the file from which to read.
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields.  The only variable
+changed is @var{var}.
+
+For example, the following program copies all the input files to the
+output, except for records that say @w{@samp{@@include @var{filename}}}.
+Such a record is replaced by the contents of the file
+@var{filename}.@refill
+
+@example
+awk '@{
+     if (NF == 2 && $1 == "@@include") @{
+          while ((getline line < $2) > 0)
+               print line
+          close($2)
+     @} else
+          print
+@}'
+@end example
+
+Note here how the name of the extra input file is not built into
+the program; it is taken from the data, from the second field on
+the @samp{@@include} line.@refill
+
+The @code{close} function is called to ensure that if two identical
+@samp{@@include} lines appear in the input, the entire specified file is
+included twice.  @xref{Close Input, ,Closing Input Files and Pipes}.@refill
+
+One deficiency of this program is that it does not process nested
+@samp{@@include} statements the way a true macro preprocessor would.
+
+@item @var{command} | getline
+You can @dfn{pipe} the output of a command into @code{getline}.  A pipe is
+simply a way to link the output of one program to the input of another.  In
+this case, the string @var{command} is run as a shell command and its output
+is piped into @code{awk} to be used as input.  This form of @code{getline}
+reads one record from the pipe.
+
+For example, the following program copies input to output, except for lines
+that begin with @samp{@@execute}, which are replaced by the output produced by
+running the rest of the line as a shell command:
+
+@example
+awk '@{
+     if ($1 == "@@execute") @{
+          tmp = substr($0, 10)
+          while ((tmp | getline) > 0)
+               print
+          close(tmp)
+     @} else
+          print
+@}'
+@end example
+
+@noindent
+The @code{close} function is called to ensure that if two identical
+@samp{@@execute} lines appear in the input, the command is run for
+each one.  @xref{Close Input, ,Closing Input Files and Pipes}.
+
+Given the input:
+
+@example
+foo
+bar
+baz
+@@execute who
+bletch
+@end example
+
+@noindent
+the program might produce:
+
+@example
+foo
+bar
+baz
+hack     ttyv0   Jul 13 14:22
+hack     ttyp0   Jul 13 14:23     (gnu:0)
+hack     ttyp1   Jul 13 14:23     (gnu:0)
+hack     ttyp2   Jul 13 14:23     (gnu:0)
+hack     ttyp3   Jul 13 14:23     (gnu:0)
+bletch
+@end example
+
+@noindent
+Notice that this program ran the command @code{who} and printed the result.
+(If you try this program yourself, you will get different results, showing
+you who is logged in on your system.)
+
+This variation of @code{getline} splits the record into fields, sets the
+value of @code{NF} and recomputes the value of @code{$0}.  The values of
+@code{NR} and @code{FNR} are not changed.
+
+@item @var{command} | getline @var{var}
+The output of the command @var{command} is sent through a pipe to
+@code{getline} and into the variable @var{var}.  For example, the
+following program reads the current date and time into the variable
+@code{current_time}, using the @code{date} utility, and then
+prints it.@refill
+
+@example
+awk 'BEGIN @{
+     "date" | getline current_time
+     close("date")
+     print "Report printed on " current_time
+@}'
+@end example
+
+In this version of @code{getline}, none of the built-in variables are
+changed, and the record is not split into fields.
+@end table
+
+@node Close Input,  , Getline, Reading Files
+@section Closing Input Files and Pipes
+@cindex closing input files and pipes
+@findex close
+
+If the same file name or the same shell command is used with
+@code{getline} more than once during the execution of an @code{awk}
+program, the file is opened (or the command is executed) only the first time.
+At that time, the first record of input is read from that file or command.
+The next time the same file or command is used in @code{getline}, another
+record is read from it, and so on.
+
+This implies that if you want to start reading the same file again from
+the beginning, or if you want to rerun a shell command (rather than
+reading more output from the command), you must take special steps.
+What you must do is use the @code{close} function, as follows:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression.  Its
+value must exactly equal the string that was used to open the file or
+start the command---for example, if you open a pipe with this:
+
+@example
+"sort -r names" | getline foo
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r names")
+@end example
+
+Once this function call is executed, the next @code{getline} from that
+file or command will reopen the file or rerun the command.
+
+@iftex
+@vindex ERRNO
+@cindex differences: @code{gawk} and @code{awk}
+@end iftex
+@code{close} returns a value of zero if the close succeeded.
+Otherwise, the value will be non-zero.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+@node Printing, One-liners, Reading Files, Top
+@chapter Printing Output
+
+@cindex printing
+@cindex output
+One of the most common things that actions do is to output or @dfn{print}
+some or all of the input.  For simple output, use the @code{print}
+statement.  For fancier formatting use the @code{printf} statement.
+Both are described in this chapter.
+
+@menu
+* Print::                       The @code{print} statement.
+* Print Examples::              Simple examples of @code{print} statements.
+* Output Separators::           The output separators and how to change them.
+* OFMT::                        Controlling Numeric Output With @code{print}.
+* Printf::                      The @code{printf} statement.
+* Redirection::                 How to redirect output to multiple
+                                files and pipes.
+* Special Files::               File name interpretation in @code{gawk}.
+                                @code{gawk} allows access to 
+                                inherited file descriptors.
+@end menu
+
+@node Print, Print Examples, Printing, Printing
+@section The @code{print} Statement
+@cindex @code{print} statement
+
+The @code{print} statement does output with simple, standardized
+formatting.  You specify only the strings or numbers to be printed, in a
+list separated by commas.  They are output, separated by single spaces,
+followed by a newline.  The statement looks like this:
+
+@example
+print @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of items may optionally be enclosed in parentheses.  The
+parentheses are necessary if any of the item expressions uses a
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+The relational operators are @samp{==},
+@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
+@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
+
+The items printed can be constant strings or numbers, fields of the
+current record (such as @code{$1}), variables, or any @code{awk}
+expressions.  The @code{print} statement is completely general for
+computing @emph{what} values to print.  With two exceptions,
+you cannot specify @emph{how} to print them---how many
+columns, whether to use exponential notation or not, and so on.
+(@xref{Output Separators}, and
+@ref{OFMT, ,Controlling Numeric Output with @code{print}}.)
+For that, you need the @code{printf} statement
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
+
+The simple statement @samp{print} with no items is equivalent to
+@samp{print $0}: it prints the entire current record.  To print a blank
+line, use @samp{print ""}, where @code{""} is the null, or empty,
+string.
+
+To print a fixed piece of text, use a string constant such as
+@w{@code{"Hello there"}} as one item.  If you forget to use the
+double-quote characters, your text will be taken as an @code{awk}
+expression, and you will probably get an error.  Keep in mind that a
+space is printed between any two items.
+
+Most often, each @code{print} statement makes one line of output.  But it
+isn't limited to one line.  If an item value is a string that contains a
+newline, the newline is output along with the rest of the string.  A
+single @code{print} can make any number of lines this way.
+
+@node Print Examples, Output Separators, Print, Printing
+@section Examples of @code{print} Statements
+
+Here is an example of printing a string that contains embedded newlines:
+
+@example
+awk 'BEGIN @{ print "line one\nline two\nline three" @}'
+@end example
+
+@noindent
+produces output like this:
+
+@example
+line one
+line two
+line three
+@end example
+
+Here is an example that prints the first two fields of each input record,
+with a space between them:
+
+@example
+awk '@{ print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+Its output looks like this:
+
+@example
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end example
+
+A common mistake in using the @code{print} statement is to omit the comma
+between two items.  This often has the effect of making the items run
+together in the output, with no space.  The reason for this is that
+juxtaposing two string expressions in @code{awk} means to concatenate
+them.  For example, without the comma:
+
+@example
+awk '@{ print $1 $2 @}' inventory-shipped
+@end example
+
+@noindent
+prints:
+
+@example
+@group
+Jan13
+Feb15
+Mar15
+@dots{}
+@end group
+@end example
+
+Neither example's output makes much sense to someone unfamiliar with the
+file @file{inventory-shipped}.  A heading line at the beginning would make
+it clearer.  Let's add some headings to our table of months (@code{$1}) and
+green crates shipped (@code{$2}).  We do this using the @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}) to force the headings to be printed only once:
+
+@example
+awk 'BEGIN @{  print "Month Crates"
+              print "----- ------" @}
+           @{  print $1, $2 @}' inventory-shipped
+@end example
+
+@noindent
+Did you already guess what happens?  This program prints the following:
+
+@example
+@group
+Month Crates
+----- ------
+Jan 13
+Feb 15
+Mar 15
+@dots{}
+@end group
+@end example
+
+@noindent
+The headings and the table data don't line up!  We can fix this by printing
+some spaces between the two fields:
+
+@example
+awk 'BEGIN @{ print "Month Crates"
+             print "----- ------" @}
+           @{ print $1, "     ", $2 @}' inventory-shipped
+@end example
+
+You can imagine that this way of lining up columns can get pretty
+complicated when you have many columns to fix.  Counting spaces for two
+or three columns can be simple, but more than this and you can get
+``lost'' quite easily.  This is why the @code{printf} statement was
+created (@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing});
+one of its specialties is lining up columns of data.@refill
+
+@node Output Separators, OFMT, Print Examples, Printing
+@section Output Separators
+
+@cindex output field separator, @code{OFS}
+@vindex OFS
+@vindex ORS
+@cindex output record separator, @code{ORS}
+As mentioned previously, a @code{print} statement contains a list
+of items, separated by commas.  In the output, the items are normally
+separated by single spaces.  But they do not have to be spaces; a
+single space is only the default.  You can specify any string of
+characters to use as the @dfn{output field separator} by setting the
+built-in variable @code{OFS}.  The initial value of this variable
+is the string @w{@code{" "}}, that is, just a single space.@refill
+
+The output from an entire @code{print} statement is called an
+@dfn{output record}.  Each @code{print} statement outputs one output
+record and then outputs a string called the @dfn{output record separator}.
+The built-in variable @code{ORS} specifies this string.  The initial
+value of the variable is the string @code{"\n"} containing a newline
+character; thus, normally each @code{print} statement makes a separate line.
+
+You can change how output fields and records are separated by assigning
+new values to the variables @code{OFS} and/or @code{ORS}.  The usual
+place to do this is in the @code{BEGIN} rule
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}), so
+that it happens before any input is processed.  You may also do this
+with assignments on the command line, before the names of your input
+files.@refill
+
+The following example prints the first and second fields of each input
+record separated by a semicolon, with a blank line added after each
+line:@refill
+
+@example
+@group
+awk 'BEGIN @{ OFS = ";"; ORS = "\n\n" @}
+           @{ print $1, $2 @}'  BBS-list
+@end group
+@end example
+
+If the value of @code{ORS} does not contain a newline, all your output
+will be run together on a single line, unless you output newlines some
+other way.
+
+@node OFMT, Printf, Output Separators, Printing
+@section Controlling Numeric Output with @code{print}
+@vindex OFMT
+When you use the @code{print} statement to print numeric values,
+@code{awk} internally converts the number to a string of characters,
+and prints that string.  @code{awk} uses the @code{sprintf} function
+to do this conversion.  For now, it suffices to say that the @code{sprintf}
+function accepts a @dfn{format specification} that tells it how to format
+numbers (or strings), and that there are a number of different ways that
+numbers can be formatted.  The different format specifications are discussed
+more fully in
+@ref{Printf, ,Using @code{printf} Statements for Fancier Printing}.@refill
+
+The built-in variable @code{OFMT} contains the default format specification
+that @code{print} uses with @code{sprintf} when it wants to convert a
+number to a string for printing.  By supplying different format specifications
+as the value of @code{OFMT}, you can change how @code{print} will print
+your numbers.  As a brief example:
+
+@example
+@group
+awk 'BEGIN @{ OFMT = "%d"  # print numbers as integers
+             print 17.23 @}'
+@end group
+@end example
+
+@noindent
+will print @samp{17}.
+
+@node Printf, Redirection, OFMT, Printing
+@section Using @code{printf} Statements for Fancier Printing
+@cindex formatted output
+@cindex output, formatted
+
+If you want more precise control over the output format than
+@code{print} gives you, use @code{printf}.  With @code{printf} you can
+specify the width to use for each item, and you can specify various
+stylistic choices for numbers (such as what radix to use, whether to
+print an exponent, whether to print a sign, and how many digits to print
+after the decimal point).  You do this by specifying a string, called
+the @dfn{format string}, which controls how and where to print the other
+arguments.
+
+@menu
+* Basic Printf::                Syntax of the @code{printf} statement.
+* Control Letters::             Format-control letters.
+* Format Modifiers::            Format-specification modifiers.
+* Printf Examples::             Several examples.
+@end menu
+
+@node Basic Printf, Control Letters, Printf, Printf
+@subsection Introduction to the @code{printf} Statement
+
+@cindex @code{printf} statement, syntax of
+The @code{printf} statement looks like this:@refill
+
+@example
+printf @var{format}, @var{item1}, @var{item2}, @dots{}
+@end example
+
+@noindent
+The entire list of arguments may optionally be enclosed in parentheses.  The
+parentheses are necessary if any of the item expressions uses a
+relational operator; otherwise it could be confused with a redirection
+(@pxref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}).
+The relational operators are @samp{==},
+@samp{!=}, @samp{<}, @samp{>}, @samp{>=}, @samp{<=}, @samp{~} and
+@samp{!~} (@pxref{Comparison Ops, ,Comparison Expressions}).@refill
+
+@cindex format string
+The difference between @code{printf} and @code{print} is the argument
+@var{format}.  This is an expression whose value is taken as a string; it
+specifies how to output each of the other arguments.  It is called
+the @dfn{format string}.
+
+The format string is the same as in the @sc{ansi} C library function
+@code{printf}.  Most of @var{format} is text to be output verbatim.
+Scattered among this text are @dfn{format specifiers}, one per item.
+Each format specifier says to output the next item at that place in the
+format.@refill
+
+The @code{printf} statement does not automatically append a newline to its
+output.  It outputs only what the format specifies.  So if you want
+a newline, you must include one in the format.  The output separator
+variables @code{OFS} and @code{ORS} have no effect on @code{printf}
+statements.@refill
+
+@node Control Letters, Format Modifiers, Basic Printf, Printf
+@subsection Format-Control Letters
+@cindex @code{printf}, format-control characters
+@cindex format specifier
+
+A format specifier starts with the character @samp{%} and ends with a
+@dfn{format-control letter}; it tells the @code{printf} statement how
+to output one item.  (If you actually want to output a @samp{%}, write
+@samp{%%}.)  The format-control letter specifies what kind of value to
+print.  The rest of the format specifier is made up of optional
+@dfn{modifiers} which are parameters such as the field width to use.@refill
+
+Here is a list of the format-control letters:
+
+@table @samp
+@item c
+This prints a number as an ASCII character.  Thus, @samp{printf "%c",
+65} outputs the letter @samp{A}.  The output for a string value is
+the first character of the string.
+
+@item d
+This prints a decimal integer.
+
+@item i
+This also prints a decimal integer.
+
+@item e
+This prints a number in scientific (exponential) notation.
+For example,
+
+@example
+printf "%4.3e", 1950
+@end example
+
+@noindent
+prints @samp{1.950e+03}, with a total of four significant figures of
+which three follow the decimal point.  The @samp{4.3} are @dfn{modifiers},
+discussed below.
+
+@item f
+This prints a number in floating point notation.
+
+@item g
+This prints a number in either scientific notation or floating point
+notation, whichever uses fewer characters.
+@ignore
+From: gatech!ames!elroy!cit-vax!EQL.Caltech.Edu!rankin (Pat Rankin)
+
+In the description of printf formats (p.43), the information for %g
+is incorrect (mainly, it's too much of an oversimplification).  It's
+wrong in the AWK book too, and in the gawk man page.  I suggested to
+David Trueman before 2.13 was released that the latter be revised, so
+that it matched gawk's behavior (rather than trying to change gawk to
+match the docs ;-).  The documented description is nice and simple, but
+it doesn't match the actual underlying behavior of %g in the various C
+run-time libraries that gawk relies on.  The precision value for g format
+is different than for f and e formats, so it's inaccurate to say 'g' is
+the shorter of 'e' or 'f'.  For 'g', precision represents the number of
+significant digits rather than the number of decimal places, and it has
+special rules about how to format numbers with range between 10E-1 and
+10E-4.  All in all, it's pretty messy, and I had to add that clumsy
+GFMT_WORKAROUND code because the VMS run-time library doesn't conform to
+the ANSI-C specifications.
+@end ignore
+
+@item o
+This prints an unsigned octal integer.
+
+@item s
+This prints a string.
+
+@item x
+This prints an unsigned hexadecimal integer.
+
+@item X
+This prints an unsigned hexadecimal integer.  However, for the values 10
+through 15, it uses the letters @samp{A} through @samp{F} instead of
+@samp{a} through @samp{f}.
+
+@item %
+This isn't really a format-control letter, but it does have a meaning
+when used after a @samp{%}: the sequence @samp{%%} outputs one
+@samp{%}.  It does not consume an argument.
+@end table
+
+@node Format Modifiers, Printf Examples, Control Letters, Printf
+@subsection Modifiers for @code{printf} Formats
+
+@cindex @code{printf}, modifiers
+@cindex modifiers (in format specifiers)
+A format specification can also include @dfn{modifiers} that can control
+how much of the item's value is printed and how much space it gets.  The
+modifiers come between the @samp{%} and the format-control letter.  Here
+are the possible modifiers, in the order in which they may appear:
+
+@table @samp
+@item -
+The minus sign, used before the width modifier, says to left-justify
+the argument within its specified width.  Normally the argument
+is printed right-justified in the specified width.  Thus,
+
+@example
+printf "%-4s", "foo"
+@end example
+
+@noindent
+prints @samp{foo }.
+
+@item @var{width}
+This is a number representing the desired width of a field.  Inserting any
+number between the @samp{%} sign and the format control character forces the
+field to be expanded to this width.  The default way to do this is to
+pad with spaces on the left.  For example,
+
+@example
+printf "%4s", "foo"
+@end example
+
+@noindent
+prints @samp{ foo}.
+
+The value of @var{width} is a minimum width, not a maximum.  If the item
+value requires more than @var{width} characters, it can be as wide as
+necessary.  Thus,
+
+@example
+printf "%4s", "foobar"
+@end example
+
+@noindent
+prints @samp{foobar}.
+
+Preceding the @var{width} with a minus sign causes the output to be
+padded with spaces on the right, instead of on the left.
+
+@item .@var{prec}
+This is a number that specifies the precision to use when printing.
+This specifies the number of digits you want printed to the right of the
+decimal point.  For a string, it specifies the maximum number of
+characters from the string that should be printed.
+@end table
+
+The C library @code{printf}'s dynamic @var{width} and @var{prec}
+capability (for example, @code{"%*.*s"}) is supported.  Instead of
+supplying explicit @var{width} and/or @var{prec} values in the format
+string, you pass them in the argument list.  For example:@refill
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "<%*.*s>\n", w, p, s
+@end example
+
+@noindent
+is exactly equivalent to
+
+@example
+s = "abcdefg"
+printf "<%5.3s>\n", s
+@end example
+
+@noindent
+Both programs output @samp{@w{<@bullet{}@bullet{}abc>}}.  (We have
+used the bullet symbol ``@bullet{}'' to represent a space, to clearly
+show you that there are two spaces in the output.)@refill
+
+Earlier versions of @code{awk} did not support this capability.  You may
+simulate it by using concatenation to build up the format string,
+like so:@refill
+
+@example
+w = 5
+p = 3
+s = "abcdefg"
+printf "<%" w "." p "s>\n", s
+@end example
+
+@noindent
+This is not particularly easy to read, however.
+
+@node Printf Examples,  , Format Modifiers, Printf
+@subsection Examples of Using @code{printf}
+
+Here is how to use @code{printf} to make an aligned table:
+
+@example
+awk '@{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end example
+
+@noindent
+prints the names of bulletin boards (@code{$1}) of the file
+@file{BBS-list} as a string of 10 characters, left justified.  It also
+prints the phone numbers (@code{$2}) afterward on the line.  This
+produces an aligned two-column table of names and phone numbers:@refill
+
+@example
+@group
+aardvark   555-5553
+alpo-net   555-3412
+barfly     555-7685
+bites      555-1675
+camelot    555-0542
+core       555-2912
+fooey      555-1234
+foot       555-6699
+macfoo     555-6480
+sdace      555-3430
+sabafoo    555-2127
+@end group
+@end example
+
+Did you notice that we did not specify that the phone numbers be printed
+as numbers?  They had to be printed as strings because the numbers are
+separated by a dash.  This dash would be interpreted as a minus sign if
+we had tried to print the phone numbers as numbers.  This would have led
+to some pretty confusing results.
+
+We did not specify a width for the phone numbers because they are the
+last things on their lines.  We don't need to put spaces after them.
+
+We could make our table look even nicer by adding headings to the tops
+of the columns.  To do this, use the @code{BEGIN} pattern
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns})
+to force the header to be printed only once, at the beginning of
+the @code{awk} program:@refill
+
+@example
+@group
+awk 'BEGIN @{ print "Name      Number"
+             print "----      ------" @}
+     @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+Did you notice that we mixed @code{print} and @code{printf} statements in
+the above example?  We could have used just @code{printf} statements to get
+the same results:
+
+@example
+@group
+awk 'BEGIN @{ printf "%-10s %s\n", "Name", "Number"
+             printf "%-10s %s\n", "----", "------" @}
+     @{ printf "%-10s %s\n", $1, $2 @}' BBS-list
+@end group
+@end example
+
+@noindent
+By outputting each column heading with the same format specification
+used for the elements of the column, we have made sure that the headings
+are aligned just like the columns.
+
+The fact that the same format specification is used three times can be
+emphasized by storing it in a variable, like this:
+
+@example
+awk 'BEGIN @{ format = "%-10s %s\n"
+             printf format, "Name", "Number"
+             printf format, "----", "------" @}
+     @{ printf format, $1, $2 @}' BBS-list
+@end example
+
+See if you can use the @code{printf} statement to line up the headings and
+table data for our @file{inventory-shipped} example covered earlier in the
+section on the @code{print} statement
+(@pxref{Print, ,The @code{print} Statement}).@refill
+
+@node Redirection, Special Files, Printf, Printing
+@section Redirecting Output of @code{print} and @code{printf}
+
+@cindex output redirection
+@cindex redirection of output
+So far we have been dealing only with output that prints to the standard
+output, usually your terminal.  Both @code{print} and @code{printf} can
+also send their output to other places.
+This is called @dfn{redirection}.@refill
+
+A redirection appears after the @code{print} or @code{printf} statement.
+Redirections in @code{awk} are written just like redirections in shell
+commands, except that they are written inside the @code{awk} program.
+
+@menu
+* File/Pipe Redirection::       Redirecting Output to Files and Pipes.
+* Close Output::                How to close output files and pipes.
+@end menu
+
+@node File/Pipe Redirection, Close Output, Redirection, Redirection
+@subsection Redirecting Output to Files and Pipes
+
+Here are the three forms of output redirection.  They are all shown for
+the @code{print} statement, but they work identically for @code{printf}
+also.@refill
+
+@table @code
+@item print @var{items} > @var{output-file}
+This type of redirection prints the items onto the output file
+@var{output-file}.  The file name @var{output-file} can be any
+expression.  Its value is changed to a string and then used as a
+file name (@pxref{Expressions, ,Expressions as Action Statements}).@refill
+
+When this type of redirection is used, the @var{output-file} is erased
+before the first output is written to it.  Subsequent writes do not
+erase @var{output-file}, but append to it.  If @var{output-file} does
+not exist, then it is created.@refill
+
+For example, here is how one @code{awk} program can write a list of
+BBS names to a file @file{name-list} and a list of phone numbers to a
+file @file{phone-list}.  Each output file contains one name or number
+per line.
+
+@smallexample
+awk '@{ print $2 > "phone-list"
+       print $1 > "name-list" @}' BBS-list
+@end smallexample
+
+@item print @var{items} >> @var{output-file}
+This type of redirection prints the items onto the output file
+@var{output-file}.  The difference between this and the
+single-@samp{>} redirection is that the old contents (if any) of
+@var{output-file} are not erased.  Instead, the @code{awk} output is
+appended to the file.
+
+@cindex pipes for output
+@cindex output, piping
+@item print @var{items} | @var{command}
+It is also possible to send output through a @dfn{pipe} instead of into a
+file.   This type of redirection opens a pipe to @var{command} and writes
+the values of @var{items} through this pipe, to another process created
+to execute @var{command}.@refill
+
+The redirection argument @var{command} is actually an @code{awk}
+expression.  Its value is converted to a string, whose contents give the
+shell command to be run.
+
+For example, this produces two files, one unsorted list of BBS names
+and one list sorted in reverse alphabetical order:
+
+@smallexample
+awk '@{ print $1 > "names.unsorted"
+       print $1 | "sort -r > names.sorted" @}' BBS-list
+@end smallexample
+
+Here the unsorted list is written with an ordinary redirection while
+the sorted list is written by piping through the @code{sort} utility.
+
+Here is an example that uses redirection to mail a message to a mailing
+list @samp{bug-system}.  This might be useful when trouble is encountered
+in an @code{awk} script run periodically for system maintenance.
+
+@smallexample
+report = "mail bug-system"
+print "Awk script failed:", $0 | report
+print "at record number", FNR, "of", FILENAME  | report
+close(report)
+@end smallexample
+
+We call the @code{close} function here because it's a good idea to close
+the pipe as soon as all the intended output has been sent to it.
+@xref{Close Output, ,Closing Output Files and Pipes}, for more information
+on this.  This example also illustrates the use of a variable to represent
+a @var{file} or @var{command}: it is not necessary to always
+use a string constant.  Using a variable is generally a good idea,
+since @code{awk} requires you to spell the string value identically
+every time.
+@end table
+
+Redirecting output using @samp{>}, @samp{>>}, or @samp{|} asks the system
+to open a file or pipe only if the particular @var{file} or @var{command}
+you've specified has not already been written to by your program, or if
+it has been closed since it was last written to.@refill
+
+@node Close Output,  , File/Pipe Redirection, Redirection
+@subsection Closing Output Files and Pipes
+@cindex closing output files and pipes
+@findex close
+
+When a file or pipe is opened, the file name or command associated with
+it is remembered by @code{awk} and subsequent writes to the same file or
+command are appended to the previous writes.  The file or pipe stays
+open until @code{awk} exits.  This is usually convenient.
+
+Sometimes there is a reason to close an output file or pipe earlier
+than that.  To do this, use the @code{close} function, as follows:
+
+@example
+close(@var{filename})
+@end example
+
+@noindent
+or
+
+@example
+close(@var{command})
+@end example
+
+The argument @var{filename} or @var{command} can be any expression.
+Its value must exactly equal the string used to open the file or pipe
+to begin with---for example, if you open a pipe with this:
+
+@example
+print $1 | "sort -r > names.sorted"
+@end example
+
+@noindent
+then you must close it with this:
+
+@example
+close("sort -r > names.sorted")
+@end example
+
+Here are some reasons why you might need to close an output file:
+
+@itemize @bullet
+@item
+To write a file and read it back later on in the same @code{awk}
+program.  Close the file when you are finished writing it; then
+you can start reading it with @code{getline}
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
+
+@item
+To write numerous files, successively, in the same @code{awk}
+program.  If you don't close the files, eventually you may exceed a
+system limit on the number of open files in one process.  So close
+each one when you are finished writing it.
+
+@item
+To make a command finish.  When you redirect output through a pipe,
+the command reading the pipe normally continues to try to read input
+as long as the pipe is open.  Often this means the command cannot
+really do its work until the pipe is closed.  For example, if you
+redirect output to the @code{mail} program, the message is not
+actually sent until the pipe is closed.
+
+@item
+To run the same program a second time, with the same arguments.
+This is not the same thing as giving more input to the first run!
+
+For example, suppose you pipe output to the @code{mail} program.  If you
+output several lines redirected to this pipe without closing it, they make
+a single message of several lines.  By contrast, if you close the pipe
+after each line of output, then each line makes a separate message.
+@end itemize
+
+@iftex
+@vindex ERRNO
+@cindex differences: @code{gawk} and @code{awk}
+@end iftex
+@code{close} returns a value of zero if the close succeeded.
+Otherwise, the value will be non-zero.
+In this case, @code{gawk} sets the variable @code{ERRNO} to a string
+describing the error that occurred.
+
+@node Special Files,  , Redirection, Printing
+@section Standard I/O Streams
+@cindex standard input
+@cindex standard output
+@cindex standard error output
+@cindex file descriptors
+
+Running programs conventionally have three input and output streams
+already available to them for reading and writing.  These are known as
+the @dfn{standard input}, @dfn{standard output}, and @dfn{standard error
+output}.  These streams are, by default, terminal input and output, but
+they are often redirected with the shell, via the @samp{<}, @samp{<<},
+@samp{>}, @samp{>>}, @samp{>&} and @samp{|} operators.  Standard error
+is used only for writing error messages; the reason we have two separate
+streams, standard output and standard error, is so that they can be
+redirected separately.
+
+@iftex
+@cindex differences: @code{gawk} and @code{awk}
+@end iftex
+In other implementations of @code{awk}, the only way to write an error
+message to standard error in an @code{awk} program is as follows:
+
+@smallexample
+print "Serious error detected!\n" | "cat 1>&2"
+@end smallexample
+
+@noindent
+This works by opening a pipeline to a shell command which can access the
+standard error stream which it inherits from the @code{awk} process.
+This is far from elegant, and is also inefficient, since it requires a
+separate process.  So people writing @code{awk} programs have often
+neglected to do this.  Instead, they have sent the error messages to the
+terminal, like this:
+
+@smallexample
+@group
+NF != 4 @{
+   printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/tty"
+@}
+@end group
+@end smallexample
+
+@noindent
+This has the same effect most of the time, but not always: although the
+standard error stream is usually the terminal, it can be redirected, and
+when that happens, writing to the terminal is not correct.  In fact, if
+@code{awk} is run from a background job, it may not have a terminal at all.
+Then opening @file{/dev/tty} will fail.
+
+@code{gawk} provides special file names for accessing the three standard
+streams.  When you redirect input or output in @code{gawk}, if the file name
+matches one of these special names, then @code{gawk} directly uses the
+stream it stands for.
+
+@cindex @file{/dev/stdin}
+@cindex @file{/dev/stdout}
+@cindex @file{/dev/stderr}
+@cindex @file{/dev/fd/}
+@table @file
+@item /dev/stdin
+The standard input (file descriptor 0).
+
+@item /dev/stdout
+The standard output (file descriptor 1).
+
+@item /dev/stderr
+The standard error output (file descriptor 2).
+
+@item /dev/fd/@var{N}
+The file associated with file descriptor @var{N}.  Such a file must have
+been opened by the program initiating the @code{awk} execution (typically
+the shell).  Unless you take special pains, only descriptors 0, 1 and 2
+are available.
+@end table
+
+The file names @file{/dev/stdin}, @file{/dev/stdout}, and @file{/dev/stderr}
+are aliases for @file{/dev/fd/0}, @file{/dev/fd/1}, and @file{/dev/fd/2},
+respectively, but they are more self-explanatory.
+
+The proper way to write an error message in a @code{gawk} program
+is to use @file{/dev/stderr}, like this:
+
+@smallexample
+NF != 4 @{
+  printf("line %d skipped: doesn't have 4 fields\n", FNR) > "/dev/stderr"
+@}
+@end smallexample
+
+@code{gawk} also provides special file names that give access to information
+about the running @code{gawk} process.  Each of these ``files'' provides
+a single record of information.  To read them more than once, you must
+first close them with the @code{close} function
+(@pxref{Close Input, ,Closing Input Files and Pipes}).
+The filenames are:
+
+@cindex @file{/dev/pid}
+@cindex @file{/dev/pgrpid}
+@cindex @file{/dev/ppid}
+@cindex @file{/dev/user}
+@table @file
+@item /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+
+@item  /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+
+@item  /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with blanks.  The fields represent the
+following information:
+
+@table @code
+@item $1
+The value of the @code{getuid} system call.
+
+@item $2
+The value of the @code{geteuid} system call.
+
+@item $3
+The value of the @code{getgid} system call.
+
+@item $4
+The value of the @code{getegid} system call.
+@end table
+
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)@refill
+@end table
+
+These special file names may be used on the command line as data
+files, as well as for I/O redirections within an @code{awk} program.
+They may not be used as source files with the @samp{-f} option.
+
+Recognition of these special file names is disabled if @code{gawk} is in
+compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
+
+@quotation
+@strong{Caution}:  Unless your system actually has a @file{/dev/fd} directory
+(or any of the other above listed special files),
+the interpretation of these file names is done by @code{gawk} itself.
+For example, using @samp{/dev/fd/4} for output will actually write on
+file descriptor 4, and not on a new file descriptor that was @code{dup}'ed
+from file descriptor 4.  Most of the time this does not matter; however, it
+is important to @emph{not} close any of the files related to file descriptors
+0, 1, and 2.  If you do close one of these files, unpredictable behavior
+will result.
+@end quotation
+
+@node One-liners, Patterns, Printing, Top
+@chapter Useful ``One-liners''
+
+@cindex one-liners
+Useful @code{awk} programs are often short, just a line or two.  Here is a
+collection of useful, short programs to get you started.  Some of these
+programs contain constructs that haven't been covered yet.  The description
+of the program will give you a good idea of what is going on, but please
+read the rest of the manual to become an @code{awk} expert!
+
+@c Per suggestions from Michal Jaegermann
+@ifinfo
+Since you are reading this in Info, each line of the example code is
+enclosed in quotes, to represent text that you would type literally.
+The examples themselves represent shell commands that use single quotes
+to keep the shell from interpreting the contents of the program.
+When reading the examples, focus on the text between the open and close
+quotes.
+@end ifinfo
+
+@table @code
+@item awk '@{ if (NF > max) max = NF @}
+@itemx @ @ @ @ @ END @{ print max @}'
+This program prints the maximum number of fields on any input line.
+
+@item awk 'length($0) > 80'
+This program prints every line longer than 80 characters.  The sole
+rule has a relational expression as its pattern, and has no action (so the
+default action, printing the record, is used).
+
+@item awk 'NF > 0'
+This program prints every line that has at least one field.  This is an
+easy way to delete blank lines from a file (or rather, to create a new
+file similar to the old file but from which the blank lines have been
+deleted).
+
+@item awk '@{ if (NF > 0) print @}'
+This program also prints every line that has at least one field.  Here we
+allow the rule to match every line, then decide in the action whether
+to print.
+
+@item awk@ 'BEGIN@ @{@ for (i = 1; i <= 7; i++)
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ print int(101 * rand()) @}'
+This program prints 7 random numbers from 0 to 100, inclusive.
+
+@item ls -l @var{files} | awk '@{ x += $4 @} ; END @{ print "total bytes: " x @}'
+This program prints the total number of bytes used by @var{files}.
+
+@item expand@ @var{file}@ |@ awk@ '@{ if (x < length()) x = length() @}
+@itemx @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ END @{ print "maximum line length is " x @}'
+This program prints the maximum line length of @var{file}.  The input
+is piped through the @code{expand} program to change tabs into spaces,
+so the widths compared are actually the right-margin columns.
+
+@item awk 'BEGIN @{ FS = ":" @}
+@itemx @ @ @ @ @ @{ print $1 | "sort" @}' /etc/passwd
+This program prints a sorted list of the login names of all users.
+
+@item awk '@{ nlines++ @}
+@itemx @ @ @ @ @ END@ @{ print nlines @}'
+This programs counts lines in a file.
+
+@item awk 'END @{ print NR @}'
+This program also counts lines in a file, but lets @code{awk} do the work.
+
+@item awk '@{ print NR, $0 @}'
+This program adds line numbers to all its input files,
+similar to @samp{cat -n}.
+@end table
+
+@node Patterns, Actions, One-liners, Top
+@chapter Patterns
+@cindex pattern, definition of
+
+Patterns in @code{awk} control the execution of rules: a rule is
+executed when its pattern matches the current input record.  This
+chapter tells all about how to write patterns.
+
+@menu
+* Kinds of Patterns::           A list of all kinds of patterns.
+                                The following subsections describe 
+                                them in detail.
+* Regexp::                      Regular expressions such as @samp{/foo/}.
+* Comparison Patterns::         Comparison expressions such as @code{$1 > 10}.
+* Boolean Patterns::            Combining comparison expressions.
+* Expression Patterns::         Any expression can be used as a pattern.
+* Ranges::                      Pairs of patterns specify record ranges.
+* BEGIN/END::                   Specifying initialization and cleanup rules.
+* Empty::                       The empty pattern, which matches every record.
+@end menu
+
+@node Kinds of Patterns, Regexp, Patterns, Patterns
+@section Kinds of Patterns
+@cindex patterns, types of
+
+Here is a summary of the types of patterns supported in @code{awk}.
+@c At the next rewrite, check to see that this order matches the
+@c order in the text.  It might not matter to a reader, but it's good
+@c style.  Also, it might be nice to mention all the topics of sections
+@c that follow in this list; that way people can scan and know when to
+@c expect a specific topic.  Specifically please also make an entry
+@c for Boolean operators as patterns in the right place.  --mew
+
+@table @code
+@item /@var{regular expression}/
+A regular expression as a pattern.  It matches when the text of the
+input record fits the regular expression.
+(@xref{Regexp, ,Regular Expressions as Patterns}.)@refill
+
+@item @var{expression}
+A single expression.  It matches when its value, converted to a number,
+is nonzero (if a number) or nonnull (if a string).
+(@xref{Expression Patterns, ,Expressions as Patterns}.)@refill
+
+@item @var{pat1}, @var{pat2}
+A pair of patterns separated by a comma, specifying a range of records.
+(@xref{Ranges, ,Specifying Record Ranges with Patterns}.)
+
+@item BEGIN
+@itemx END
+Special patterns to supply start-up or clean-up information to
+@code{awk}.  (@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.)
+
+@item @var{null}
+The empty pattern matches every input record.
+(@xref{Empty, ,The Empty Pattern}.)@refill
+@end table
+
+
+@node Regexp, Comparison Patterns, Kinds of Patterns, Patterns
+@section Regular Expressions as Patterns
+@cindex pattern, regular expressions
+@cindex regexp
+@cindex regular expressions as patterns
+
+A @dfn{regular expression}, or @dfn{regexp}, is a way of describing a
+class of strings.  A regular expression enclosed in slashes (@samp{/})
+is an @code{awk} pattern that matches every input record whose text
+belongs to that class.
+
+The simplest regular expression is a sequence of letters, numbers, or
+both.  Such a regexp matches any string that contains that sequence.
+Thus, the regexp @samp{foo} matches any string containing @samp{foo}.
+Therefore, the pattern @code{/foo/} matches any input record containing
+@samp{foo}.  Other kinds of regexps let you specify more complicated
+classes of strings.
+
+@menu
+* Regexp Usage::                How to Use Regular Expressions
+* Regexp Operators::            Regular Expression Operators
+* Case-sensitivity::            How to do case-insensitive matching.
+@end menu
+
+@node Regexp Usage, Regexp Operators, Regexp, Regexp
+@subsection How to Use Regular Expressions
+
+A regular expression can be used as a pattern by enclosing it in
+slashes.  Then the regular expression is matched against the
+entire text of each record.  (Normally, it only needs
+to match some part of the text in order to succeed.)  For example, this
+prints the second field of each record that contains @samp{foo} anywhere:
+
+@example
+awk '/foo/ @{ print $2 @}' BBS-list
+@end example
+
+@cindex regular expression matching operators
+@cindex string-matching operators
+@cindex operators, string-matching
+@cindex operators, regexp matching
+@cindex regexp search operators
+Regular expressions can also be used in comparison expressions.  Then
+you can specify the string to match against; it need not be the entire
+current input record.  These comparison expressions can be used as
+patterns or in @code{if}, @code{while}, @code{for}, and @code{do} statements.
+
+@table @code
+@item @var{exp} ~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a character string)
+is matched by @var{regexp}.  The following example matches, or selects,
+all input records with the upper-case letter @samp{J} somewhere in the
+first field:@refill
+
+@example
+awk '$1 ~ /J/' inventory-shipped
+@end example
+
+So does this:
+
+@example
+awk '@{ if ($1 ~ /J/) print @}' inventory-shipped
+@end example
+
+@item @var{exp} !~ /@var{regexp}/
+This is true if the expression @var{exp} (taken as a character string)
+is @emph{not} matched by @var{regexp}.  The following example matches,
+or selects, all input records whose first field @emph{does not} contain
+the upper-case letter @samp{J}:@refill
+
+@example
+awk '$1 !~ /J/' inventory-shipped
+@end example
+@end table
+
+@cindex computed regular expressions
+@cindex regular expressions, computed
+@cindex dynamic regular expressions
+The right hand side of a @samp{~} or @samp{!~} operator need not be a
+constant regexp (i.e., a string of characters between slashes).  It may
+be any expression.  The expression is evaluated, and converted if
+necessary to a string; the contents of the string are used as the
+regexp.  A regexp that is computed in this way is called a @dfn{dynamic
+regexp}.  For example:
+
+@example
+identifier_regexp = "[A-Za-z_][A-Za-z_0-9]+"
+$0 ~ identifier_regexp
+@end example
+
+@noindent
+sets @code{identifier_regexp} to a regexp that describes @code{awk}
+variable names, and tests if the input record matches this regexp.
+
+@node Regexp Operators, Case-sensitivity, Regexp Usage, Regexp
+@subsection Regular Expression Operators
+@cindex metacharacters
+@cindex regular expression metacharacters
+
+You can combine regular expressions with the following characters,
+called @dfn{regular expression operators}, or @dfn{metacharacters}, to
+increase the power and versatility of regular expressions.
+
+Here is a table of metacharacters.  All characters not listed in the
+table stand for themselves.
+
+@table @code
+@item ^
+This matches the beginning of the string or the beginning of a line
+within the string.  For example:
+
+@example
+^@@chapter
+@end example
+
+@noindent
+matches the @samp{@@chapter} at the beginning of a string, and can be used
+to identify chapter beginnings in Texinfo source files.
+
+@item $
+This is similar to @samp{^}, but it matches only at the end of a string
+or the end of a line within the string.  For example:
+
+@example
+p$
+@end example
+
+@noindent
+matches a record that ends with a @samp{p}.
+
+@item .
+This matches any single character except a newline.  For example:
+
+@example
+.P
+@end example
+
+@noindent
+matches any single character followed by a @samp{P} in a string.  Using
+concatenation we can make regular expressions like @samp{U.A}, which
+matches any three-character sequence that begins with @samp{U} and ends
+with @samp{A}.
+
+@item [@dots{}]
+This is called a @dfn{character set}.  It matches any one of the
+characters that are enclosed in the square brackets.  For example:
+
+@example
+[MVX]
+@end example
+
+@noindent
+matches any one of the characters @samp{M}, @samp{V}, or @samp{X} in a
+string.@refill
+
+Ranges of characters are indicated by using a hyphen between the beginning
+and ending characters, and enclosing the whole thing in brackets.  For
+example:@refill
+
+@example
+[0-9]
+@end example
+
+@noindent
+matches any digit.
+
+To include the character @samp{\}, @samp{]}, @samp{-} or @samp{^} in a
+character set, put a @samp{\} in front of it.  For example:
+
+@example
+[d\]]
+@end example
+
+@noindent
+matches either @samp{d}, or @samp{]}.@refill
+
+This treatment of @samp{\} is compatible with other @code{awk}
+implementations, and is also mandated by the @sc{posix} Command Language
+and Utilities standard.  The regular expressions in @code{awk} are a superset
+of the @sc{posix} specification for Extended Regular Expressions (EREs).
+@sc{posix} EREs are based on the regular expressions accepted by the
+traditional @code{egrep} utility.
+
+In @code{egrep} syntax, backslash is not syntactically special within
+square brackets.  This means that special tricks have to be used to
+represent the characters @samp{]}, @samp{-} and @samp{^} as members of a
+character set.
+
+In @code{egrep} syntax, to match @samp{-}, write it as @samp{---},
+which is a range containing only @w{@samp{-}.}  You may also give @samp{-}
+as the first or last character in the set.  To match @samp{^}, put it
+anywhere except as the first character of a set.  To match a @samp{]},
+make it the first character in the set.  For example:@refill
+
+@example
+[]d^]
+@end example
+
+@noindent
+matches either @samp{]}, @samp{d} or @samp{^}.@refill
+
+@item [^ @dots{}]
+This is a @dfn{complemented character set}.  The first character after
+the @samp{[} @emph{must} be a @samp{^}.  It matches any characters
+@emph{except} those in the square brackets (or newline).  For example:
+
+@example
+[^0-9]
+@end example
+
+@noindent
+matches any character that is not a digit.
+
+@item |
+This is the @dfn{alternation operator} and it is used to specify
+alternatives.  For example:
+
+@example
+^P|[0-9]
+@end example
+
+@noindent
+matches any string that matches either @samp{^P} or @samp{[0-9]}.  This
+means it matches any string that contains a digit or starts with @samp{P}.
+
+The alternation applies to the largest possible regexps on either side.
+@item (@dots{})
+Parentheses are used for grouping in regular expressions as in
+arithmetic.  They can be used to concatenate regular expressions
+containing the alternation operator, @samp{|}.
+
+@item *
+This symbol means that the preceding regular expression is to be
+repeated as many times as possible to find a match.  For example:
+
+@example
+ph*
+@end example
+
+@noindent
+applies the @samp{*} symbol to the preceding @samp{h} and looks for matches
+to one @samp{p} followed by any number of @samp{h}s.  This will also match
+just @samp{p} if no @samp{h}s are present.
+
+The @samp{*} repeats the @emph{smallest} possible preceding expression.
+(Use parentheses if you wish to repeat a larger expression.)  It finds
+as many repetitions as possible.  For example:
+
+@example
+awk '/\(c[ad][ad]*r x\)/ @{ print @}' sample
+@end example
+
+@noindent
+prints every record in the input containing a string of the form
+@samp{(car x)}, @samp{(cdr x)}, @samp{(cadr x)}, and so on.@refill
+
+@item +
+This symbol is similar to @samp{*}, but the preceding expression must be
+matched at least once.  This means that:
+
+@example
+wh+y
+@end example
+
+@noindent
+would match @samp{why} and @samp{whhy} but not @samp{wy}, whereas
+@samp{wh*y} would match all three of these strings.  This is a simpler
+way of writing the last @samp{*} example:
+
+@example
+awk '/\(c[ad]+r x\)/ @{ print @}' sample
+@end example
+
+@item ?
+This symbol is similar to @samp{*}, but the preceding expression can be
+matched once or not at all.  For example:
+
+@example
+fe?d
+@end example
+
+@noindent
+will match @samp{fed} and @samp{fd}, but nothing else.@refill
+
+@item \
+This is used to suppress the special meaning of a character when
+matching.  For example:
+
+@example
+\$
+@end example
+
+@noindent
+matches the character @samp{$}.
+
+The escape sequences used for string constants
+(@pxref{Constants, ,Constant Expressions}) are
+valid in regular expressions as well; they are also introduced by a
+@samp{\}.@refill
+@end table
+
+In regular expressions, the @samp{*}, @samp{+}, and @samp{?} operators have
+the highest precedence, followed by concatenation, and finally by @samp{|}.
+As in arithmetic, parentheses can change how operators are grouped.@refill
+
+@node Case-sensitivity,  , Regexp Operators, Regexp
+@subsection Case-sensitivity in Matching
+
+Case is normally significant in regular expressions, both when matching
+ordinary characters (i.e., not metacharacters), and inside character
+sets.  Thus a @samp{w} in a regular expression matches only a lower case
+@samp{w} and not an upper case @samp{W}.
+
+The simplest way to do a case-independent match is to use a character
+set: @samp{[Ww]}.  However, this can be cumbersome if you need to use it
+often; and it can make the regular expressions harder for humans to
+read.  There are two other alternatives that you might prefer.
+
+One way to do a case-insensitive match at a particular point in the
+program is to convert the data to a single case, using the
+@code{tolower} or @code{toupper} built-in string functions (which we
+haven't discussed yet;
+@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+For example:@refill
+
+@example
+tolower($1) ~ /foo/  @{ @dots{} @}
+@end example
+
+@noindent
+converts the first field to lower case before matching against it.
+
+Another method is to set the variable @code{IGNORECASE} to a nonzero
+value (@pxref{Built-in Variables}).  When @code{IGNORECASE} is not zero,
+@emph{all} regexp operations ignore case.  Changing the value of
+@code{IGNORECASE} dynamically controls the case sensitivity of your
+program as it runs.  Case is significant by default because
+@code{IGNORECASE} (like most variables) is initialized to zero.
+
+@example
+x = "aB"
+if (x ~ /ab/) @dots{}   # this test will fail
+
+IGNORECASE = 1
+if (x ~ /ab/) @dots{}   # now it will succeed
+@end example
+
+In general, you cannot use @code{IGNORECASE} to make certain rules
+case-insensitive and other rules case-sensitive, because there is no way
+to set @code{IGNORECASE} just for the pattern of a particular rule.  To
+do this, you must use character sets or @code{tolower}.  However, one
+thing you can do only with @code{IGNORECASE} is turn case-sensitivity on
+or off dynamically for all the rules at once.@refill
+
+@code{IGNORECASE} can be set on the command line, or in a @code{BEGIN}
+rule.  Setting @code{IGNORECASE} from the command line is a way to make
+a program case-insensitive without having to edit it.
+
+The value of @code{IGNORECASE} has no effect if @code{gawk} is in
+compatibility mode (@pxref{Command Line, ,Invoking @code{awk}}).
+Case is always significant in compatibility mode.@refill
+
+@node Comparison Patterns, Boolean Patterns, Regexp, Patterns
+@section Comparison Expressions as Patterns
+@cindex comparison expressions as patterns
+@cindex pattern, comparison expressions
+@cindex relational operators
+@cindex operators, relational
+
+@dfn{Comparison patterns} test relationships such as equality between
+two strings or numbers.  They are a special case of expression patterns
+(@pxref{Expression Patterns, ,Expressions as Patterns}).  They are written
+with @dfn{relational operators}, which are a superset of those in C.
+Here is a table of them:@refill
+
+@table @code
+@item @var{x} < @var{y}
+True if @var{x} is less than @var{y}.
+
+@item @var{x} <= @var{y}
+True if @var{x} is less than or equal to @var{y}.
+
+@item @var{x} > @var{y}
+True if @var{x} is greater than @var{y}.
+
+@item @var{x} >= @var{y}
+True if @var{x} is greater than or equal to @var{y}.
+
+@item @var{x} == @var{y}
+True if @var{x} is equal to @var{y}.
+
+@item @var{x} != @var{y}
+True if @var{x} is not equal to @var{y}.
+
+@item @var{x} ~ @var{y}
+True if @var{x} matches the regular expression described by @var{y}.
+
+@item @var{x} !~ @var{y}
+True if @var{x} does not match the regular expression described by @var{y}.
+@end table
+
+The operands of a relational operator are compared as numbers if they
+are both numbers.  Otherwise they are converted to, and compared as,
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers},
+for the detailed rules).  Strings are compared by comparing the first
+character of each, then the second character of each,
+and so on, until there is a difference.  If the two strings are equal until
+the shorter one runs out, the shorter one is considered to be less than the
+longer one.  Thus, @code{"10"} is less than @code{"9"}, and @code{"abc"}
+is less than @code{"abcd"}.@refill
+
+The left operand of the @samp{~} and @samp{!~} operators is a string.
+The right operand is either a constant regular expression enclosed in
+slashes (@code{/@var{regexp}/}), or any expression, whose string value
+is used as a dynamic regular expression
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
+
+The following example prints the second field of each input record
+whose first field is precisely @samp{foo}.
+
+@example
+awk '$1 == "foo" @{ print $2 @}' BBS-list
+@end example
+
+@noindent
+Contrast this with the following regular expression match, which would
+accept any record with a first field that contains @samp{foo}:
+
+@example
+awk '$1 ~ "foo" @{ print $2 @}' BBS-list
+@end example
+
+@noindent
+or, equivalently, this one:
+
+@example
+awk '$1 ~ /foo/ @{ print $2 @}' BBS-list
+@end example
+
+@node Boolean Patterns, Expression Patterns, Comparison Patterns, Patterns
+@section Boolean Operators and Patterns
+@cindex patterns, boolean
+@cindex boolean patterns
+
+A @dfn{boolean pattern} is an expression which combines other patterns
+using the @dfn{boolean operators} ``or'' (@samp{||}), ``and''
+(@samp{&&}), and ``not'' (@samp{!}).  Whether the boolean pattern
+matches an input record depends on whether its subpatterns match.
+
+For example, the following command prints all records in the input file
+@file{BBS-list} that contain both @samp{2400} and @samp{foo}.@refill
+
+@example
+awk '/2400/ && /foo/' BBS-list
+@end example
+
+The following command prints all records in the input file
+@file{BBS-list} that contain @emph{either} @samp{2400} or @samp{foo}, or
+both.@refill
+
+@example
+awk '/2400/ || /foo/' BBS-list
+@end example
+
+The following command prints all records in the input file
+@file{BBS-list} that do @emph{not} contain the string @samp{foo}.
+
+@example
+awk '! /foo/' BBS-list
+@end example
+
+Note that boolean patterns are a special case of expression patterns
+(@pxref{Expression Patterns, ,Expressions as Patterns}); they are
+expressions that use the boolean operators.
+@xref{Boolean Ops, ,Boolean Expressions}, for complete information
+on the boolean operators.@refill
+
+The subpatterns of a boolean pattern can be constant regular
+expressions, comparisons, or any other @code{awk} expressions.  Range
+patterns are not expressions, so they cannot appear inside boolean
+patterns.  Likewise, the special patterns @code{BEGIN} and @code{END},
+which never match any input record, are not expressions and cannot
+appear inside boolean patterns.
+
+@node Expression Patterns, Ranges, Boolean Patterns, Patterns
+@section Expressions as Patterns
+
+Any @code{awk} expression is also valid as an @code{awk} pattern.
+Then the pattern ``matches'' if the expression's value is nonzero (if a
+number) or nonnull (if a string).
+
+The expression is reevaluated each time the rule is tested against a new
+input record.  If the expression uses fields such as @code{$1}, the
+value depends directly on the new input record's text; otherwise, it
+depends only on what has happened so far in the execution of the
+@code{awk} program, but that may still be useful.
+
+Comparison patterns are actually a special case of this.  For
+example, the expression @code{$5 == "foo"} has the value 1 when the
+value of @code{$5} equals @code{"foo"}, and 0 otherwise; therefore, this
+expression as a pattern matches when the two values are equal.
+
+Boolean patterns are also special cases of expression patterns.
+
+A constant regexp as a pattern is also a special case of an expression
+pattern.  @code{/foo/} as an expression has the value 1 if @samp{foo}
+appears in the current input record; thus, as a pattern, @code{/foo/}
+matches any record containing @samp{foo}.
+
+Other implementations of @code{awk} that are not yet @sc{posix} compliant
+are less general than @code{gawk}: they allow comparison expressions, and
+boolean combinations thereof (optionally with parentheses), but not
+necessarily other kinds of expressions.
+
+@node Ranges, BEGIN/END, Expression Patterns, Patterns
+@section Specifying Record Ranges with Patterns
+
+@cindex range pattern
+@cindex patterns, range
+A @dfn{range pattern} is made of two patterns separated by a comma, of
+the form @code{@var{begpat}, @var{endpat}}.  It matches ranges of
+consecutive input records.  The first pattern @var{begpat} controls
+where the range begins, and the second one @var{endpat} controls where
+it ends.  For example,@refill
+
+@example
+awk '$1 == "on", $1 == "off"'
+@end example
+
+@noindent
+prints every record between @samp{on}/@samp{off} pairs, inclusive.
+
+A range pattern starts out by matching @var{begpat}
+against every input record; when a record matches @var{begpat}, the
+range pattern becomes @dfn{turned on}.  The range pattern matches this
+record.  As long as it stays turned on, it automatically matches every
+input record read.  It also matches @var{endpat} against
+every input record; when that succeeds, the range pattern is turned
+off again for the following record.  Now it goes back to checking
+@var{begpat} against each record.
+
+The record that turns on the range pattern and the one that turns it
+off both match the range pattern.  If you don't want to operate on
+these records, you can write @code{if} statements in the rule's action
+to distinguish them.
+
+It is possible for a pattern to be turned both on and off by the same
+record, if both conditions are satisfied by that record.  Then the action is
+executed for just that record.
+
+@node BEGIN/END, Empty, Ranges, Patterns
+@section @code{BEGIN} and @code{END} Special Patterns
+
+@cindex @code{BEGIN} special pattern
+@cindex patterns, @code{BEGIN}
+@cindex @code{END} special pattern
+@cindex patterns, @code{END}
+@code{BEGIN} and @code{END} are special patterns.  They are not used to
+match input records.  Rather, they are used for supplying start-up or
+clean-up information to your @code{awk} script.  A @code{BEGIN} rule is
+executed, once, before the first input record has been read.  An @code{END}
+rule is executed, once, after all the input has been read.  For
+example:@refill
+
+@example
+awk 'BEGIN @{ print "Analysis of `foo'" @}
+     /foo/ @{ ++foobar @}
+     END   @{ print "`foo' appears " foobar " times." @}' BBS-list
+@end example
+
+This program finds the number of records in the input file @file{BBS-list}
+that contain the string @samp{foo}.  The @code{BEGIN} rule prints a title
+for the report.  There is no need to use the @code{BEGIN} rule to
+initialize the counter @code{foobar} to zero, as @code{awk} does this
+for us automatically (@pxref{Variables}).
+
+The second rule increments the variable @code{foobar} every time a
+record containing the pattern @samp{foo} is read.  The @code{END} rule
+prints the value of @code{foobar} at the end of the run.@refill
+
+The special patterns @code{BEGIN} and @code{END} cannot be used in ranges
+or with boolean operators (indeed, they cannot be used with any operators).
+
+An @code{awk} program may have multiple @code{BEGIN} and/or @code{END}
+rules.  They are executed in the order they appear, all the @code{BEGIN}
+rules at start-up and all the @code{END} rules at termination.
+
+Multiple @code{BEGIN} and @code{END} sections are useful for writing
+library functions, since each library can have its own @code{BEGIN} or
+@code{END} rule to do its own initialization and/or cleanup.  Note that
+the order in which library functions are named on the command line
+controls the order in which their @code{BEGIN} and @code{END} rules are
+executed.  Therefore you have to be careful to write such rules in
+library files so that the order in which they are executed doesn't matter.
+@xref{Command Line, ,Invoking @code{awk}}, for more information on
+using library functions.
+
+If an @code{awk} program only has a @code{BEGIN} rule, and no other
+rules, then the program exits after the @code{BEGIN} rule has been run.
+(Older versions of @code{awk} used to keep reading and ignoring input
+until end of file was seen.)  However, if an @code{END} rule exists as
+well, then the input will be read, even if there are no other rules in
+the program.  This is necessary in case the @code{END} rule checks the
+@code{NR} variable.
+
+@code{BEGIN} and @code{END} rules must have actions; there is no default
+action for these rules since there is no current record when they run.
+
+@node Empty,  , BEGIN/END, Patterns
+@comment  node-name,  next,  previous,  up
+@section The Empty Pattern
+
+@cindex empty pattern
+@cindex pattern, empty
+An empty pattern is considered to match @emph{every} input record.  For
+example, the program:@refill
+
+@example
+awk '@{ print $1 @}' BBS-list
+@end example
+
+@noindent
+prints the first field of every record.
+
+@node Actions, Expressions, Patterns, Top
+@chapter Overview of Actions
+@cindex action, definition of
+@cindex curly braces
+@cindex action, curly braces
+@cindex action, separating statements
+
+An @code{awk} program or script consists of a series of
+rules and function definitions, interspersed.  (Functions are
+described later.  @xref{User-defined, ,User-defined Functions}.)
+
+A rule contains a pattern and an action, either of which may be
+omitted.  The purpose of the @dfn{action} is to tell @code{awk} what to do
+once a match for the pattern is found.  Thus, the entire program
+looks somewhat like this:
+
+@example
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@r{[}@var{pattern}@r{]} @r{[}@{ @var{action} @}@r{]}
+@dots{}
+function @var{name} (@var{args}) @{ @dots{} @}
+@dots{}
+@end example
+
+An action consists of one or more @code{awk} @dfn{statements}, enclosed
+in curly braces (@samp{@{} and @samp{@}}).  Each statement specifies one
+thing to be done.  The statements are separated by newlines or
+semicolons.
+
+The curly braces around an action must be used even if the action
+contains only one statement, or even if it contains no statements at
+all.  However, if you omit the action entirely, omit the curly braces as
+well.  (An omitted action is equivalent to @samp{@{ print $0 @}}.)
+
+Here are the kinds of statements supported in @code{awk}:
+
+@itemize @bullet
+@item
+Expressions, which can call functions or assign values to variables
+(@pxref{Expressions, ,Expressions as Action Statements}).  Executing
+this kind of statement simply computes the value of the expression and
+then ignores it.  This is useful when the expression has side effects
+(@pxref{Assignment Ops, ,Assignment Expressions}).@refill
+
+@item
+Control statements, which specify the control flow of @code{awk}
+programs.  The @code{awk} language gives you C-like constructs
+(@code{if}, @code{for}, @code{while}, and so on) as well as a few
+special ones (@pxref{Statements, ,Control Statements in Actions}).@refill
+
+@item
+Compound statements, which consist of one or more statements enclosed in
+curly braces.  A compound statement is used in order to put several
+statements together in the body of an @code{if}, @code{while}, @code{do}
+or @code{for} statement.
+
+@item
+Input control, using the @code{getline} command
+(@pxref{Getline, ,Explicit Input with @code{getline}}), and the @code{next}
+statement (@pxref{Next Statement, ,The @code{next} Statement}).
+
+@item
+Output statements, @code{print} and @code{printf}.
+@xref{Printing, ,Printing Output}.@refill
+
+@item
+Deletion statements, for deleting array elements.
+@xref{Delete, ,The @code{delete} Statement}.@refill
+@end itemize
+
+@iftex
+The next two chapters cover in detail expressions and control
+statements, respectively.  We go on to treat arrays and built-in
+functions, both of which are used in expressions.  Then we proceed
+to discuss how to define your own functions.
+@end iftex
+
+@node Expressions, Statements, Actions, Top
+@chapter Expressions as Action Statements
+@cindex expression
+
+Expressions are the basic building block of @code{awk} actions.  An
+expression evaluates to a value, which you can print, test, store in a
+variable or pass to a function.  But beyond that, an expression can assign a new value to a variable
+or a field, with an assignment operator.
+
+An expression can serve as a statement on its own.  Most other kinds of
+statements contain one or more expressions which specify data to be
+operated on.  As in other languages, expressions in @code{awk} include
+variables, array references, constants, and function calls, as well as
+combinations of these with various operators.
+
+@menu
+* Constants::                   String, numeric, and regexp constants.
+* Variables::                   Variables give names to values for later use.
+* Arithmetic Ops::              Arithmetic operations (@samp{+}, @samp{-}, etc.)
+* Concatenation::               Concatenating strings.
+* Comparison Ops::              Comparison of numbers and strings 
+                                with @samp{<}, etc.
+* Boolean Ops::                 Combining comparison expressions 
+                                using boolean operators
+                                @samp{||} (``or''), @samp{&&} (``and'') and @samp{!} (``not'').
+
+* Assignment Ops::              Changing the value of a variable or a field.
+* Increment Ops::               Incrementing the numeric value of a variable.
+
+* Conversion::                  The conversion of strings to numbers 
+                                and vice versa.
+* Values::                      The whole truth about numbers and strings.
+* Conditional Exp::             Conditional expressions select 
+                                between two subexpressions under control 
+                                of a third subexpression.
+* Function Calls::              A function call is an expression.
+* Precedence::                  How various operators nest.
+@end menu
+
+@node Constants, Variables, Expressions, Expressions
+@section Constant Expressions
+@cindex constants, types of
+@cindex string constants
+
+The simplest type of expression is the @dfn{constant}, which always has
+the same value.  There are three types of constants: numeric constants,
+string constants, and regular expression constants.
+
+@cindex numeric constant
+@cindex numeric value
+A @dfn{numeric constant} stands for a number.  This number can be an
+integer, a decimal fraction, or a number in scientific (exponential)
+notation.  Note that all numeric values are represented within
+@code{awk} in double-precision floating point.  Here are some examples
+of numeric constants, which all have the same value:
+
+@example
+105
+1.05e+2
+1050e-1
+@end example
+
+A string constant consists of a sequence of characters enclosed in
+double-quote marks.  For example:
+
+@example
+"parrot"
+@end example
+
+@noindent
+@iftex
+@cindex differences between @code{gawk} and @code{awk}
+@end iftex
+represents the string whose contents are @samp{parrot}.  Strings in
+@code{gawk} can be of any length and they can contain all the possible
+8-bit ASCII characters including ASCII NUL.  Other @code{awk}
+implementations may have difficulty with some character codes.@refill
+
+@cindex escape sequence notation
+Some characters cannot be included literally in a string constant.  You
+represent them instead with @dfn{escape sequences}, which are character
+sequences beginning with a backslash (@samp{\}).
+
+One use of an escape sequence is to include a double-quote character in
+a string constant.  Since a plain double-quote would end the string, you
+must use @samp{\"} to represent a single double-quote character as a
+part of the string.  
+The 
+backslash character itself is another character that cannot be
+included normally; you write @samp{\\} to put one backslash in the
+string.  Thus, the string whose contents are the two characters
+@samp{"\} must be written @code{"\"\\"}.
+
+Another use of backslash is to represent unprintable characters
+such as newline.  While there is nothing to stop you from writing most
+of these characters directly in a string constant, they may look ugly.
+
+Here is a table of all the escape sequences used in @code{awk}:
+
+@table @code
+@item \\
+Represents a literal backslash, @samp{\}.
+
+@item \a
+Represents the ``alert'' character, control-g, ASCII code 7.
+
+@item \b
+Represents a backspace, control-h, ASCII code 8.
+
+@item \f
+Represents a formfeed, control-l, ASCII code 12.
+
+@item \n
+Represents a newline, control-j, ASCII code 10.
+
+@item \r
+Represents a carriage return, control-m, ASCII code 13.
+
+@item \t
+Represents a horizontal tab, control-i, ASCII code 9.
+
+@item \v
+Represents a vertical tab, control-k, ASCII code 11.
+
+@item \@var{nnn}
+Represents the octal value @var{nnn}, where @var{nnn} are one to three
+digits between 0 and 7.  For example, the code for the ASCII ESC
+(escape) character is @samp{\033}.@refill
+
+@item \x@var{hh}@dots{}
+Represents the hexadecimal value @var{hh}, where @var{hh} are hexadecimal
+digits (@samp{0} through @samp{9} and either @samp{A} through @samp{F} or
+@samp{a} through @samp{f}).  Like the same construct in @sc{ansi} C, the escape
+sequence continues until the first non-hexadecimal digit is seen.  However,
+using more than two hexadecimal digits produces undefined results.  (The
+@samp{\x} escape sequence is not allowed in @sc{posix} @code{awk}.)@refill
+@end table
+
+A @dfn{constant regexp} is a regular expression description enclosed in
+slashes, such as @code{/^beginning and end$/}.  Most regexps used in
+@code{awk} programs are constant, but the @samp{~} and @samp{!~}
+operators can also match computed or ``dynamic'' regexps
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).@refill
+
+Constant regexps may be used like simple expressions.  When a
+constant regexp is not on the right hand side of the @samp{~} or
+@samp{!~} operators, it has the same meaning as if it appeared
+in a pattern, i.e. @samp{($0 ~ /foo/)}
+(@pxref{Expression Patterns, ,Expressions as Patterns}).
+This means that the two code segments,@refill
+
+@example
+if ($0 ~ /barfly/ || $0 ~ /camelot/)
+    print "found"
+@end example
+
+@noindent
+and
+
+@example
+if (/barfly/ || /camelot/)
+    print "found"
+@end example
+
+@noindent
+are exactly equivalent.  One rather bizarre consequence of this rule is
+that the following boolean expression is legal, but does not do what the user
+intended:@refill
+
+@example
+if (/foo/ ~ $1) print "found foo"
+@end example
+
+This code is ``obviously'' testing @code{$1} for a match against the regexp
+@code{/foo/}.  But in fact, the expression @code{(/foo/ ~ $1)} actually means
+@code{(($0 ~ /foo/) ~ $1)}.  In other words, first match the input record
+against the regexp @code{/foo/}.  The result will be either a 0 or a 1,
+depending upon the success or failure of the match.  Then match that result
+against the first field in the record.@refill
+
+Since it is unlikely that you would ever really wish to make this kind of
+test, @code{gawk} will issue a warning when it sees this construct in
+a program.@refill
+
+Another consequence of this rule is that the assignment statement
+
+@example
+matches = /foo/
+@end example
+
+@noindent
+will assign either 0 or 1 to the variable @code{matches}, depending
+upon the contents of the current input record.
+
+Constant regular expressions are also used as the first argument for
+the @code{sub} and @code{gsub} functions
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+This feature of the language was never well documented until the
+@sc{posix} specification.
+
+You may be wondering, when is
+
+@example
+$1 ~ /foo/ @{ @dots{} @}
+@end example
+
+@noindent
+preferable to
+
+@example
+$1 ~ "foo" @{ @dots{} @}
+@end example
+
+Since the right-hand sides of both @samp{~} operators are constants,
+it is more efficient to use the @samp{/foo/} form: @code{awk} can note
+that you have supplied a regexp and store it internally in a form that
+makes pattern matching more efficient.  In the second form, @code{awk}
+must first convert the string into this internal form, and then perform
+the pattern matching.  The first form is also better style; it shows 
+clearly that you intend a regexp match.
+
+@node Variables, Arithmetic Ops, Constants, Expressions
+@section Variables
+@cindex variables, user-defined
+@cindex user-defined variables
+@c there should be more than one subsection, ideally.  Not a big deal.
+@c But usually there are supposed to be at least two.  One way to get
+@c around this is to write the info in the subsection as the info in the
+@c section itself and not have any subsections..  --mew
+
+Variables let you give names to values and refer to them later.  You have
+already seen variables in many of the examples.  The name of a variable
+must be a sequence of letters, digits and underscores, but it may not begin
+with a digit.  Case is significant in variable names; @code{a} and @code{A}
+are distinct variables.
+
+A variable name is a valid expression by itself; it represents the
+variable's current value.  Variables are given new values with
+@dfn{assignment operators} and @dfn{increment operators}.
+@xref{Assignment Ops, ,Assignment Expressions}.
+
+A few variables have special built-in meanings, such as @code{FS}, the
+field separator, and @code{NF}, the number of fields in the current
+input record.  @xref{Built-in Variables}, for a list of them.  These
+built-in variables can be used and assigned just like all other
+variables, but their values are also used or changed automatically by
+@code{awk}.  Each built-in variable's name is made entirely of upper case
+letters.
+
+Variables in @code{awk} can be assigned either numeric or string
+values.  By default, variables are initialized to the null string, which
+is effectively zero if converted to a number.  There is no need to
+``initialize'' each variable explicitly in @code{awk}, the way you would in C or most other traditional languages.
+
+@menu
+* Assignment Options::          Setting variables on the command line
+                                and a summary of command line syntax.
+                                This is an advanced method of input.
+@end menu
+
+@node Assignment Options,  , Variables, Variables
+@subsection Assigning Variables on the Command Line
+
+You can set any @code{awk} variable by including a @dfn{variable assignment}
+among the arguments on the command line when you invoke @code{awk}
+(@pxref{Command Line, ,Invoking @code{awk}}).  Such an assignment has
+this form:@refill
+
+@example
+@var{variable}=@var{text}
+@end example
+
+@noindent
+With it, you can set a variable either at the beginning of the
+@code{awk} run or in between input files.
+
+If you precede the assignment with the @samp{-v} option, like this:
+
+@example
+-v @var{variable}=@var{text}
+@end example
+
+@noindent
+then the variable is set at the very beginning, before even the
+@code{BEGIN} rules are run.  The @samp{-v} option and its assignment
+must precede all the file name arguments, as well as the program text.
+
+Otherwise, the variable assignment is performed at a time determined by
+its position among the input file arguments: after the processing of the
+preceding input file argument.  For example:
+
+@example
+awk '@{ print $n @}' n=4 inventory-shipped n=2 BBS-list
+@end example
+
+@noindent
+prints the value of field number @code{n} for all input records.  Before
+the first file is read, the command line sets the variable @code{n}
+equal to 4.  This causes the fourth field to be printed in lines from
+the file @file{inventory-shipped}.  After the first file has finished,
+but before the second file is started, @code{n} is set to 2, so that the
+second field is printed in lines from @file{BBS-list}.
+
+Command line arguments are made available for explicit examination by
+the @code{awk} program in an array named @code{ARGV}
+(@pxref{Built-in Variables}).@refill
+
+@code{awk} processes the values of command line assignments for escape
+sequences (@pxref{Constants, ,Constant Expressions}).
+
+@node Arithmetic Ops, Concatenation, Variables, Expressions
+@section Arithmetic Operators
+@cindex arithmetic operators
+@cindex operators, arithmetic
+@cindex addition
+@cindex subtraction
+@cindex multiplication
+@cindex division
+@cindex remainder
+@cindex quotient
+@cindex exponentiation
+
+The @code{awk} language uses the common arithmetic operators when
+evaluating expressions.  All of these arithmetic operators follow normal
+precedence rules, and work as you would expect them to.  This example
+divides field three by field four, adds field two, stores the result
+into field one, and prints the resulting altered input record:
+
+@example
+awk '@{ $1 = $2 + $3 / $4; print @}' inventory-shipped
+@end example
+
+The arithmetic operators in @code{awk} are:
+
+@table @code
+@item @var{x} + @var{y}
+Addition.
+
+@item @var{x} - @var{y}
+Subtraction.
+
+@item - @var{x}
+Negation.
+
+@item + @var{x}
+Unary plus.  No real effect on the expression.
+
+@item @var{x} * @var{y}
+Multiplication.
+
+@item @var{x} / @var{y}
+Division.  Since all numbers in @code{awk} are double-precision
+floating point, the result is not rounded to an integer: @code{3 / 4}
+has the value 0.75.
+
+@item @var{x} % @var{y}
+@iftex
+@cindex differences between @code{gawk} and @code{awk}
+@end iftex
+Remainder.  The quotient is rounded toward zero to an integer,
+multiplied by @var{y} and this result is subtracted from @var{x}.
+This operation is sometimes known as ``trunc-mod.''  The following
+relation always holds:
+
+@example
+b * int(a / b) + (a % b) == a
+@end example
+
+One possibly undesirable effect of this definition of remainder is that
+@code{@var{x} % @var{y}} is negative if @var{x} is negative.  Thus,
+
+@example
+-17 % 8 = -1
+@end example
+
+In other @code{awk} implementations, the signedness of the remainder
+may be machine dependent.
+
+@item @var{x} ^ @var{y}
+@itemx @var{x} ** @var{y}
+Exponentiation: @var{x} raised to the @var{y} power.  @code{2 ^ 3} has
+the value 8.  The character sequence @samp{**} is equivalent to
+@samp{^}.  (The @sc{posix} standard only specifies the use of @samp{^}
+for exponentiation.)
+@end table
+
+@node Concatenation, Comparison Ops, Arithmetic Ops, Expressions
+@section String Concatenation
+
+@cindex string operators
+@cindex operators, string
+@cindex concatenation
+There is only one string operation: concatenation.  It does not have a
+specific operator to represent it.  Instead, concatenation is performed by
+writing expressions next to one another, with no operator.  For example:
+
+@example
+awk '@{ print "Field number one: " $1 @}' BBS-list
+@end example
+
+@noindent
+produces, for the first record in @file{BBS-list}:
+
+@example
+Field number one: aardvark
+@end example
+
+Without the space in the string constant after the @samp{:}, the line
+would run together.  For example:
+
+@example
+awk '@{ print "Field number one:" $1 @}' BBS-list
+@end example
+
+@noindent
+produces, for the first record in @file{BBS-list}:
+
+@example
+Field number one:aardvark
+@end example
+
+Since string concatenation does not have an explicit operator, it is
+often necessary to insure that it happens where you want it to by
+enclosing the items to be concatenated in parentheses.  For example, the
+following code fragment does not concatenate @code{file} and @code{name}
+as you might expect:
+
+@example
+file = "file"
+name = "name"
+print "something meaningful" > file name
+@end example
+
+@noindent
+It is necessary to use the following:
+
+@example
+print "something meaningful" > (file name)
+@end example
+
+We recommend you use parentheses around concatenation in all but the
+most common contexts (such as in the right-hand operand of @samp{=}).
+
+@ignore
+@code{gawk} actually now allows a concatenation on the right hand
+side of a @code{>} redirection, but other @code{awk}s don't.  So for
+now we won't mention that fact.
+@end ignore
+
+@node Comparison Ops, Boolean Ops, Concatenation, Expressions
+@section Comparison Expressions
+@cindex comparison expressions
+@cindex expressions, comparison
+@cindex relational operators
+@cindex operators, relational
+@cindex regexp operators
+
+@dfn{Comparison expressions} compare strings or numbers for
+relationships such as equality.  They are written using @dfn{relational
+operators}, which are a superset of those in C.  Here is a table of
+them:
+
+@table @code
+@item @var{x} < @var{y}
+True if @var{x} is less than @var{y}.
+
+@item @var{x} <= @var{y}
+True if @var{x} is less than or equal to @var{y}.
+
+@item @var{x} > @var{y}
+True if @var{x} is greater than @var{y}.
+
+@item @var{x} >= @var{y}
+True if @var{x} is greater than or equal to @var{y}.
+
+@item @var{x} == @var{y}
+True if @var{x} is equal to @var{y}.
+
+@item @var{x} != @var{y}
+True if @var{x} is not equal to @var{y}.
+
+@item @var{x} ~ @var{y}
+True if the string @var{x} matches the regexp denoted by @var{y}.
+
+@item @var{x} !~ @var{y}
+True if the string @var{x} does not match the regexp denoted by @var{y}.
+
+@item @var{subscript} in @var{array}
+True if array @var{array} has an element with the subscript @var{subscript}.
+@end table
+
+Comparison expressions have the value 1 if true and 0 if false.
+
+The rules @code{gawk} uses for performing comparisons are based on those
+in draft 11.2 of the @sc{posix} standard.  The @sc{posix} standard introduced
+the concept of a @dfn{numeric string}, which is simply a string that looks
+like a number, for example, @code{@w{" +2"}}.
+
+@vindex CONVFMT
+When performing a relational operation, @code{gawk} considers the type of an
+operand to be the type it received on its last @emph{assignment}, rather
+than the type of its last @emph{use}
+(@pxref{Values, ,Numeric and String Values}).
+This type is @emph{unknown} when the operand is from an ``external'' source:
+field variables, command line arguments, array elements resulting from a
+@code{split} operation, and the value of an @code{ENVIRON} element.
+In this case only, if the operand is a numeric string, then it is
+considered to be of both string type and numeric type.  If at least one
+operand of a comparison is of string type only, then a string
+comparison is performed.  Any numeric operand will be converted to a
+string using the value of @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+If one operand of a comparison is numeric, and the other operand is
+either numeric or both numeric and string, then @code{gawk} does a
+numeric comparison.  If both operands have both types, then the
+comparison is numeric.  Strings are compared
+by comparing the first character of each, then the second character of each,
+and so on.  Thus @code{"10"} is less than @code{"9"}.  If there are two
+strings where one is a prefix of the other, the shorter string is less than
+the longer one.  Thus @code{"abc"} is less than @code{"abcd"}.@refill
+
+Here are some sample expressions, how @code{gawk} compares them, and what
+the result of the comparison is.
+
+@table @code
+@item 1.5 <= 2.0
+numeric comparison (true)
+
+@item "abc" >= "xyz"
+string comparison (false)
+
+@item 1.5 != " +2"
+string comparison (true)
+
+@item "1e2" < "3"
+string comparison (true)
+
+@item a = 2; b = "2"
+@itemx a == b
+string comparison (true)
+@end table
+
+@example
+echo 1e2 3 | awk '@{ print ($1 < $2) ? "true" : "false" @}'
+@end example
+
+@noindent
+prints @samp{false} since both @code{$1} and @code{$2} are numeric
+strings and thus have both string and numeric types, thus dictating
+a numeric comparison.
+
+The purpose of the comparison rules and the use of numeric strings is
+to attempt to produce the behavior that is ``least surprising,'' while
+still ``doing the right thing.''
+
+String comparisons and regular expression comparisons are very different.
+For example,
+
+@example
+$1 == "foo"
+@end example
+
+@noindent
+has the value of 1, or is true, if the first field of the current input
+record is precisely @samp{foo}.  By contrast, 
+
+@example
+$1 ~ /foo/
+@end example
+
+@noindent
+has the value 1 if the first field contains @samp{foo}, such as @samp{foobar}.
+
+The right hand operand of the @samp{~} and @samp{!~} operators may be
+either a constant regexp (@code{/@dots{}/}), or it may be an ordinary
+expression, in which case the value of the expression as a string is a
+dynamic regexp (@pxref{Regexp Usage, ,How to Use Regular Expressions}).
+
+@cindex regexp as expression
+In very recent implementations of @code{awk}, a constant regular
+expression in slashes by itself is also an expression.  The regexp
+@code{/@var{regexp}/} is an abbreviation for this comparison expression:
+
+@example
+$0 ~ /@var{regexp}/
+@end example
+
+In some contexts it may be necessary to write parentheses around the
+regexp to avoid confusing the @code{gawk} parser.  For example,
+@code{(/x/ - /y/) > threshold} is not allowed, but @code{((/x/) - (/y/))
+> threshold} parses properly.
+
+One special place where @code{/foo/} is @emph{not} an abbreviation for
+@code{$0 ~ /foo/} is when it is the right-hand operand of @samp{~} or
+@samp{!~}! @xref{Constants, ,Constant Expressions}, where this is
+discussed in more detail.
+
+@node Boolean Ops, Assignment Ops, Comparison Ops, Expressions
+@section Boolean Expressions
+@cindex expressions, boolean
+@cindex boolean expressions
+@cindex operators, boolean
+@cindex boolean operators
+@cindex logical operations
+@cindex and operator
+@cindex or operator
+@cindex not operator
+
+A @dfn{boolean expression} is a combination of comparison expressions or
+matching expressions, using the boolean operators ``or''
+(@samp{||}), ``and'' (@samp{&&}), and ``not'' (@samp{!}), along with
+parentheses to control nesting.  The truth of the boolean expression is
+computed by combining the truth values of the component expressions.
+
+Boolean expressions can be used wherever comparison and matching
+expressions can be used.  They can be used in @code{if}, @code{while}
+@code{do} and @code{for} statements.  They have numeric values (1 if true,
+0 if false), which come into play if the result of the boolean expression
+is stored in a variable, or used in arithmetic.@refill
+
+In addition, every boolean expression is also a valid boolean pattern, so
+you can use it as a pattern to control the execution of rules.
+
+Here are descriptions of the three boolean operators, with an example of
+each.  It may be instructive to compare these examples with the
+analogous examples of boolean patterns
+(@pxref{Boolean Patterns, ,Boolean Operators and Patterns}), which
+use the same boolean operators in patterns instead of expressions.@refill
+
+@table @code
+@item @var{boolean1} && @var{boolean2}
+True if both @var{boolean1} and @var{boolean2} are true.  For example,
+the following statement prints the current input record if it contains
+both @samp{2400} and @samp{foo}.@refill
+
+@smallexample
+if ($0 ~ /2400/ && $0 ~ /foo/) print
+@end smallexample
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is true.  This can make a difference when @var{boolean2} contains
+expressions that have side effects: in the case of @code{$0 ~ /foo/ &&
+($2 == bar++)}, the variable @code{bar} is not incremented if there is
+no @samp{foo} in the record.
+
+@item @var{boolean1} || @var{boolean2}
+True if at least one of @var{boolean1} or @var{boolean2} is true.
+For example, the following command prints all records in the input
+file @file{BBS-list} that contain @emph{either} @samp{2400} or
+@samp{foo}, or both.@refill
+
+@smallexample
+awk '@{ if ($0 ~ /2400/ || $0 ~ /foo/) print @}' BBS-list
+@end smallexample
+
+The subexpression @var{boolean2} is evaluated only if @var{boolean1}
+is false.  This can make a difference when @var{boolean2} contains
+expressions that have side effects.
+
+@item !@var{boolean}
+True if @var{boolean} is false.  For example, the following program prints
+all records in the input file @file{BBS-list} that do @emph{not} contain the
+string @samp{foo}.
+
+@smallexample
+awk '@{ if (! ($0 ~ /foo/)) print @}' BBS-list
+@end smallexample
+@end table
+
+@node Assignment Ops, Increment Ops, Boolean Ops, Expressions
+@section Assignment Expressions
+@cindex assignment operators
+@cindex operators, assignment
+@cindex expressions, assignment
+
+An @dfn{assignment} is an expression that stores a new value into a
+variable.  For example, let's assign the value 1 to the variable
+@code{z}:@refill
+
+@example
+z = 1
+@end example
+
+After this expression is executed, the variable @code{z} has the value 1.
+Whatever old value @code{z} had before the assignment is forgotten.
+
+Assignments can store string values also.  For example, this would store
+the value @code{"this food is good"} in the variable @code{message}:
+
+@example
+thing = "food"
+predicate = "good"
+message = "this " thing " is " predicate
+@end example
+
+@noindent
+(This also illustrates concatenation of strings.)
+
+The @samp{=} sign is called an @dfn{assignment operator}.  It is the
+simplest assignment operator because the value of the right-hand
+operand is stored unchanged.
+
+@cindex side effect
+Most operators (addition, concatenation, and so on) have no effect
+except to compute a value.  If you ignore the value, you might as well
+not use the operator.  An assignment operator is different; it does
+produce a value, but even if you ignore the value, the assignment still
+makes itself felt through the alteration of the variable.  We call this
+a @dfn{side effect}.
+
+@cindex lvalue
+The left-hand operand of an assignment need not be a variable
+(@pxref{Variables}); it can also be a field
+(@pxref{Changing Fields, ,Changing the Contents of a Field}) or
+an array element (@pxref{Arrays, ,Arrays in @code{awk}}).
+These are all called @dfn{lvalues},
+which means they can appear on the left-hand side of an assignment operator.
+The right-hand operand may be any expression; it produces the new value
+which the assignment stores in the specified variable, field or array
+element.@refill
+
+It is important to note that variables do @emph{not} have permanent types.
+The type of a variable is simply the type of whatever value it happens
+to hold at the moment.  In the following program fragment, the variable
+@code{foo} has a numeric value at first, and a string value later on:
+
+@example
+foo = 1
+print foo
+foo = "bar"
+print foo
+@end example
+
+@noindent
+When the second assignment gives @code{foo} a string value, the fact that
+it previously had a numeric value is forgotten.
+
+An assignment is an expression, so it has a value: the same value that
+is assigned.  Thus, @code{z = 1} as an expression has the value 1.
+One consequence of this is that you can write multiple assignments together:
+
+@example
+x = y = z = 0
+@end example
+
+@noindent
+stores the value 0 in all three variables.  It does this because the
+value of @code{z = 0}, which is 0, is stored into @code{y}, and then
+the value of @code{y = z = 0}, which is 0, is stored into @code{x}.
+
+You can use an assignment anywhere an expression is called for.  For
+example, it is valid to write @code{x != (y = 1)} to set @code{y} to 1
+and then test whether @code{x} equals 1.  But this style tends to make
+programs hard to read; except in a one-shot program, you should
+rewrite it to get rid of such nesting of assignments.  This is never very
+hard.
+
+Aside from @samp{=}, there are several other assignment operators that
+do arithmetic with the old value of the variable.  For example, the
+operator @samp{+=} computes a new value by adding the right-hand value
+to the old value of the variable.  Thus, the following assignment adds
+5 to the value of @code{foo}:
+
+@example
+foo += 5
+@end example
+
+@noindent
+This is precisely equivalent to the following:
+
+@example
+foo = foo + 5
+@end example
+
+@noindent
+Use whichever one makes the meaning of your program clearer.
+
+Here is a table of the arithmetic assignment operators.  In each
+case, the right-hand operand is an expression whose value is converted
+to a number.
+
+@table @code
+@item @var{lvalue} += @var{increment}
+Adds @var{increment} to the value of @var{lvalue} to make the new value
+of @var{lvalue}.
+
+@item @var{lvalue} -= @var{decrement}
+Subtracts @var{decrement} from the value of @var{lvalue}.
+
+@item @var{lvalue} *= @var{coefficient}
+Multiplies the value of @var{lvalue} by @var{coefficient}.
+
+@item @var{lvalue} /= @var{quotient}
+Divides the value of @var{lvalue} by @var{quotient}.
+
+@item @var{lvalue} %= @var{modulus}
+Sets @var{lvalue} to its remainder by @var{modulus}.
+
+@item @var{lvalue} ^= @var{power}
+@itemx @var{lvalue} **= @var{power}
+Raises @var{lvalue} to the power @var{power}.
+(Only the @code{^=} operator is specified by @sc{posix}.)
+@end table
+
+@ignore
+From: gatech!ames!elroy!cit-vax!EQL.Caltech.Edu!rankin (Pat Rankin)
+     In the discussion of assignment operators, it states that
+``foo += 5'' "is precisely equivalent to" ``foo = foo + 5'' (p.77).  That
+may be true for simple variables, but it's not true for expressions with
+side effects, like array references.  For proof, try
+  BEGIN {
+	  foo[rand()] += 5;               for (x in foo) print x, foo[x]
+	  bar[rand()] = bar[rand()] + 5;  for (x in bar) print x, bar[x]
+  }
+I suspect that the original statement is simply untrue--that '+=' is more
+efficient in all cases.
+
+ADR --- Try to add something about this here for the next go 'round.
+@end ignore
+
+@node Increment Ops, Conversion, Assignment Ops, Expressions
+@section Increment Operators
+
+@cindex increment operators
+@cindex operators, increment
+@dfn{Increment operators} increase or decrease the value of a variable
+by 1.  You could do the same thing with an assignment operator, so
+the increment operators add no power to the @code{awk} language; but they
+are convenient abbreviations for something very common.
+
+The operator to add 1 is written @samp{++}.  It can be used to increment
+a variable either before or after taking its value.
+
+To pre-increment a variable @var{v}, write @code{++@var{v}}.  This adds
+1 to the value of @var{v} and that new value is also the value of this
+expression.  The assignment expression @code{@var{v} += 1} is completely
+equivalent.
+
+Writing the @samp{++} after the variable specifies post-increment.  This
+increments the variable value just the same; the difference is that the
+value of the increment expression itself is the variable's @emph{old}
+value.  Thus, if @code{foo} has the value 4, then the expression @code{foo++}
+has the value 4, but it changes the value of @code{foo} to 5.
+
+The post-increment @code{foo++} is nearly equivalent to writing @code{(foo
++= 1) - 1}.  It is not perfectly equivalent because all numbers in
+@code{awk} are floating point: in floating point, @code{foo + 1 - 1} does
+not necessarily equal @code{foo}.  But the difference is minute as
+long as you stick to numbers that are fairly small (less than a trillion).
+
+Any lvalue can be incremented.  Fields and array elements are incremented
+just like variables.  (Use @samp{$(i++)} when you wish to do a field reference
+and a variable increment at the same time.  The parentheses are necessary
+because of the precedence of the field reference operator, @samp{$}.)
+@c expert information in the last parenthetical remark
+
+The decrement operator @samp{--} works just like @samp{++} except that
+it subtracts 1 instead of adding.  Like @samp{++}, it can be used before
+the lvalue to pre-decrement or after it to post-decrement.
+
+Here is a summary of increment and decrement expressions.
+
+@table @code
+@item ++@var{lvalue}
+This expression increments @var{lvalue} and the new value becomes the
+value of this expression.
+
+@item @var{lvalue}++
+This expression causes the contents of @var{lvalue} to be incremented.
+The value of the expression is the @emph{old} value of @var{lvalue}.
+
+@item --@var{lvalue}
+Like @code{++@var{lvalue}}, but instead of adding, it subtracts.  It
+decrements @var{lvalue} and delivers the value that results.
+
+@item @var{lvalue}--
+Like @code{@var{lvalue}++}, but instead of adding, it subtracts.  It
+decrements @var{lvalue}.  The value of the expression is the @emph{old}
+value of @var{lvalue}.
+@end table
+
+@node Conversion, Values, Increment Ops, Expressions
+@section Conversion of Strings and Numbers
+
+@cindex conversion of strings and numbers
+Strings are converted to numbers, and numbers to strings, if the context
+of the @code{awk} program demands it.  For example, if the value of
+either @code{foo} or @code{bar} in the expression @code{foo + bar}
+happens to be a string, it is converted to a number before the addition
+is performed.  If numeric values appear in string concatenation, they
+are converted to strings.  Consider this:@refill
+
+@example
+two = 2; three = 3
+print (two three) + 4
+@end example
+
+@noindent
+This eventually prints the (numeric) value 27.  The numeric values of
+the variables @code{two} and @code{three} are converted to strings and
+concatenated together, and the resulting string is converted back to the
+number 23, to which 4 is then added.
+
+If, for some reason, you need to force a number to be converted to a
+string, concatenate the null string with that number.  To force a string
+to be converted to a number, add zero to that string.
+
+A string is converted to a number by interpreting a numeric prefix
+of the string as numerals:
+@code{"2.5"} converts to 2.5, @code{"1e3"} converts to 1000, and @code{"25fix"}
+has a numeric value of 25.
+Strings that can't be interpreted as valid numbers are converted to
+zero.
+
+@vindex CONVFMT
+The exact manner in which numbers are converted into strings is controlled
+by the @code{awk} built-in variable @code{CONVFMT} (@pxref{Built-in Variables}).
+Numbers are converted using a special version of the @code{sprintf} function
+(@pxref{Built-in, ,Built-in Functions}) with @code{CONVFMT} as the format
+specifier.@refill
+
+@code{CONVFMT}'s default value is @code{"%.6g"}, which prints a value with
+at least six significant digits.  For some applications you will want to
+change it to specify more precision.  Double precision on most modern
+machines gives you 16 or 17 decimal digits of precision.
+
+Strange results can happen if you set @code{CONVFMT} to a string that doesn't
+tell @code{sprintf} how to format floating point numbers in a useful way.
+For example, if you forget the @samp{%} in the format, all numbers will be
+converted to the same constant string.@refill
+
+As a special case, if a number is an integer, then the result of converting
+it to a string is @emph{always} an integer, no matter what the value of
+@code{CONVFMT} may be.  Given the following code fragment:
+
+@example
+CONVFMT = "%2.2f"
+a = 12
+b = a ""
+@end example
+
+@noindent
+@code{b} has the value @code{"12"}, not @code{"12.00"}.
+
+@ignore
+For the 2.14 version, describe the ``stickyness'' of conversions.  Right now
+the manual assumes everywhere that variables are either numbers or strings;
+in fact both kinds of values may be valid.  If both happen to be valid, a
+conversion isn't necessary and isn't done.  Revising the manual to be
+consistent with this, though, is too big a job to tackle at the moment.
+
+7/92: This has sort of been done, only the section isn't completely right!
+      What to do?
+7/92: Pretty much fixed, at least for the short term, thanks to text
+      from David.
+@end ignore
+
+@vindex OFMT
+Prior to the @sc{posix} standard, @code{awk} specified that the value
+of @code{OFMT} was used for converting numbers to strings.  @code{OFMT}
+specifies the output format to use when printing numbers with @code{print}.
+@code{CONVFMT} was introduced in order to separate the semantics of
+conversions from the semantics of printing.  Both @code{CONVFMT} and
+@code{OFMT} have the same default value: @code{"%.6g"}.  In the vast majority
+of cases, old @code{awk} programs will not change their behavior.
+However, this use of @code{OFMT} is something to keep in mind if you must
+port your program to other implementations of @code{awk}; we recommend
+that instead of changing your programs, you just port @code{gawk} itself!@refill
+
+@node Values, Conditional Exp, Conversion, Expressions
+@section Numeric and String Values
+@cindex conversion of strings and numbers
+
+Through most of this manual, we present @code{awk} values (such as constants,
+fields, or variables) as @emph{either} numbers @emph{or} strings.  This is
+a convenient way to think about them, since typically they are used in only
+one way, or the other.
+
+In truth though, @code{awk} values can be @emph{both} string and
+numeric, at the same time.  Internally, @code{awk} represents values
+with a string, a (floating point) number, and an indication that one,
+the other, or both representations of the value are valid.
+
+Keeping track of both kinds of values is important for execution
+efficiency:  a variable can acquire a string value the first time it
+is used as a string, and then that string value can be used until the
+variable is assigned a new value.  Thus, if a variable with only a numeric
+value is used in several concatenations in a row, it only has to be given
+a string representation once.  The numeric value remains valid, so that
+no conversion back to a number is necessary if the variable is later used
+in an arithmetic expression.
+
+Tracking both kinds of values is also important for precise numerical
+calculations.  Consider the following:
+
+@smallexample
+a = 123.321
+CONVFMT = "%3.1f"
+b = a " is a number"
+c = a + 1.654
+@end smallexample
+
+@noindent
+The variable @code{a} receives a string value in the concatenation and
+assignment to @code{b}.  The string value of @code{a} is @code{"123.3"}.
+If the numeric value was lost when it was converted to a string, then the
+numeric use of @code{a} in the last statement would lose information.
+@code{c} would be assigned the value 124.954 instead of 124.975.
+Such errors accumulate rapidly, and very adversely affect numeric
+computations.@refill
+
+Once a numeric value acquires a corresponding string value, it stays valid
+until a new assignment is made.  If @code{CONVFMT}
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) changes in the
+meantime, the old string value will still be used.  For example:@refill
+
+@smallexample
+BEGIN @{
+    CONVFMT = "%2.2f"
+    a = 123.456
+    b = a ""                # force `a' to have string value too
+    printf "a = %s\n", a
+    CONVFMT = "%.6g"
+    printf "a = %s\n", a
+    a += 0                  # make `a' numeric only again
+    printf "a = %s\n", a    # use `a' as string
+@}
+@end smallexample
+
+@noindent
+This program prints @samp{a = 123.46} twice, and then prints
+@samp{a = 123.456}.
+
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the rules that
+specify how string values are made from numeric values.
+
+@node Conditional Exp, Function Calls, Values, Expressions
+@section Conditional Expressions
+@cindex conditional expression
+@cindex expression, conditional
+
+A @dfn{conditional expression} is a special kind of expression with
+three operands.  It allows you to use one expression's value to select
+one of two other expressions.
+
+The conditional expression looks the same as in the C language:
+
+@example
+@var{selector} ? @var{if-true-exp} : @var{if-false-exp}
+@end example
+
+@noindent
+There are three subexpressions.  The first, @var{selector}, is always
+computed first.  If it is ``true'' (not zero and not null) then
+@var{if-true-exp} is computed next and its value becomes the value of
+the whole expression.  Otherwise, @var{if-false-exp} is computed next
+and its value becomes the value of the whole expression.@refill
+
+For example, this expression produces the absolute value of @code{x}:
+
+@example
+x > 0 ? x : -x
+@end example
+
+Each time the conditional expression is computed, exactly one of
+@var{if-true-exp} and @var{if-false-exp} is computed; the other is ignored.
+This is important when the expressions contain side effects.  For example,
+this conditional expression examines element @code{i} of either array
+@code{a} or array @code{b}, and increments @code{i}.
+
+@example
+x == y ? a[i++] : b[i++]
+@end example
+
+@noindent
+This is guaranteed to increment @code{i} exactly once, because each time
+one or the other of the two increment expressions is executed,
+and the other is not.
+
+@node Function Calls, Precedence, Conditional Exp, Expressions
+@section Function Calls
+@cindex function call
+@cindex calling a function
+
+A @dfn{function} is a name for a particular calculation.  Because it has
+a name, you can ask for it by name at any point in the program.  For
+example, the function @code{sqrt} computes the square root of a number.
+
+A fixed set of functions are @dfn{built-in}, which means they are
+available in every @code{awk} program.  The @code{sqrt} function is one
+of these.  @xref{Built-in, ,Built-in Functions}, for a list of built-in
+functions and their descriptions.  In addition, you can define your own
+functions in the program for use elsewhere in the same program.
+@xref{User-defined, ,User-defined Functions}, for how to do this.@refill
+
+@cindex arguments in function call
+The way to use a function is with a @dfn{function call} expression,
+which consists of the function name followed by a list of
+@dfn{arguments} in parentheses.  The arguments are expressions which
+give the raw materials for the calculation that the function will do.
+When there is more than one argument, they are separated by commas.  If
+there are no arguments, write just @samp{()} after the function name.
+Here are some examples:
+
+@example
+sqrt(x^2 + y^2)      # @r{One argument}
+atan2(y, x)          # @r{Two arguments}
+rand()               # @r{No arguments}
+@end example
+
+@strong{Do not put any space between the function name and the
+open-parenthesis!}  A user-defined function name looks just like the name of
+a variable, and space would make the expression look like concatenation
+of a variable with an expression inside parentheses.  Space before the
+parenthesis is harmless with built-in functions, but it is best not to get
+into the habit of using space to avoid mistakes with user-defined
+functions. 
+
+Each function expects a particular number of arguments.  For example, the
+@code{sqrt} function must be called with a single argument, the number
+to take the square root of:
+
+@example
+sqrt(@var{argument})
+@end example
+
+Some of the built-in functions allow you to omit the final argument.
+If you do so, they use a reasonable default.
+@xref{Built-in, ,Built-in Functions}, for full details.  If arguments
+are omitted in calls to user-defined functions, then those arguments are
+treated as local variables, initialized to the null string
+(@pxref{User-defined, ,User-defined Functions}).@refill
+
+Like every other expression, the function call has a value, which is
+computed by the function based on the arguments you give it.  In this
+example, the value of @code{sqrt(@var{argument})} is the square root of the
+argument.  A function can also have side effects, such as assigning the
+values of certain variables or doing I/O.
+
+Here is a command to read numbers, one number per line, and print the
+square root of each one:
+
+@example
+awk '@{ print "The square root of", $1, "is", sqrt($1) @}'
+@end example
+
+@node Precedence,  , Function Calls, Expressions
+@section Operator Precedence (How Operators Nest)
+@cindex precedence
+@cindex operator precedence
+
+@dfn{Operator precedence} determines how operators are grouped, when
+different operators appear close by in one expression.  For example,
+@samp{*} has higher precedence than @samp{+}; thus, @code{a + b * c}
+means to multiply @code{b} and @code{c}, and then add @code{a} to the
+product (i.e., @code{a + (b * c)}).
+
+You can overrule the precedence of the operators by using parentheses.
+You can think of the precedence rules as saying where the
+parentheses are assumed if you do not write parentheses yourself.  In
+fact, it is wise to always use parentheses whenever you have an unusual
+combination of operators, because other people who read the program may
+not remember what the precedence is in this case.  You might forget,
+too; then you could make a mistake.  Explicit parentheses will help prevent
+any such mistake.
+
+When operators of equal precedence are used together, the leftmost
+operator groups first, except for the assignment, conditional and
+exponentiation operators, which group in the opposite order.
+Thus, @code{a - b + c} groups as @code{(a - b) + c};
+@code{a = b = c} groups as @code{a = (b = c)}.@refill
+
+The precedence of prefix unary operators does not matter as long as only
+unary operators are involved, because there is only one way to parse
+them---innermost first.  Thus, @code{$++i} means @code{$(++i)} and
+@code{++$x} means @code{++($x)}.  However, when another operator follows
+the operand, then the precedence of the unary operators can matter.
+Thus, @code{$x^2} means @code{($x)^2}, but @code{-x^2} means
+@code{-(x^2)}, because @samp{-} has lower precedence than @samp{^}
+while @samp{$} has higher precedence.
+
+Here is a table of the operators of @code{awk}, in order of increasing
+precedence:
+
+@table @asis
+@item assignment
+@samp{=}, @samp{+=}, @samp{-=}, @samp{*=}, @samp{/=}, @samp{%=},
+@samp{^=}, @samp{**=}.  These operators group right-to-left.
+(The @samp{**=} operator is not specified by @sc{posix}.)
+
+@item conditional
+@samp{?:}.  This operator groups right-to-left.
+
+@item logical ``or''.
+@samp{||}.
+
+@item logical ``and''.
+@samp{&&}.
+
+@item array membership
+@samp{in}.
+
+@item matching
+@samp{~}, @samp{!~}.
+
+@item relational, and redirection
+The relational operators and the redirections have the same precedence
+level.  Characters such as @samp{>} serve both as relationals and as
+redirections; the context distinguishes between the two meanings.
+
+The relational operators are @samp{<}, @samp{<=}, @samp{==}, @samp{!=},
+@samp{>=} and @samp{>}.
+
+The I/O redirection operators are @samp{<}, @samp{>}, @samp{>>} and
+@samp{|}.
+
+Note that I/O redirection operators in @code{print} and @code{printf}
+statements belong to the statement level, not to expressions.  The
+redirection does not produce an expression which could be the operand of
+another operator.  As a result, it does not make sense to use a
+redirection operator near another operator of lower precedence, without
+parentheses.  Such combinations, for example @samp{print foo > a ? b :
+c}, result in syntax errors.
+
+@item concatenation
+No special token is used to indicate concatenation.
+The operands are simply written side by side.
+
+@item add, subtract
+@samp{+}, @samp{-}.
+
+@item multiply, divide, mod
+@samp{*}, @samp{/}, @samp{%}.
+
+@item unary plus, minus, ``not''
+@samp{+}, @samp{-}, @samp{!}.
+
+@item exponentiation
+@samp{^}, @samp{**}.  These operators group right-to-left.
+(The @samp{**} operator is not specified by @sc{posix}.)
+
+@item increment, decrement
+@samp{++}, @samp{--}.
+
+@item field
+@samp{$}.
+@end table
+
+@node Statements, Arrays, Expressions, Top
+@chapter Control Statements in Actions
+@cindex control statement
+
+@dfn{Control statements} such as @code{if}, @code{while}, and so on
+control the flow of execution in @code{awk} programs.  Most of the
+control statements in @code{awk} are patterned on similar statements in
+C.
+
+All the control statements start with special keywords such as @code{if}
+and @code{while}, to distinguish them from simple expressions.
+
+Many control statements contain other statements; for example, the
+@code{if} statement contains another statement which may or may not be
+executed.  The contained statement is called the @dfn{body}.  If you
+want to include more than one statement in the body, group them into a
+single compound statement with curly braces, separating them with
+newlines or semicolons.
+
+@menu
+* If Statement::                Conditionally execute 
+                                some @code{awk} statements.
+* While Statement::             Loop until some condition is satisfied.
+* Do Statement::                Do specified action while looping until some
+                                condition is satisfied.
+* For Statement::               Another looping statement, that provides
+                                initialization and increment clauses.
+* Break Statement::             Immediately exit the innermost enclosing loop.
+* Continue Statement::          Skip to the end of the innermost 
+                                enclosing loop.
+* Next Statement::              Stop processing the current input record.
+* Next File Statement::         Stop processing the current file.
+* Exit Statement::              Stop execution of @code{awk}.
+@end menu
+
+@node If Statement, While Statement, Statements, Statements
+@section The @code{if} Statement
+
+@cindex @code{if} statement
+The @code{if}-@code{else} statement is @code{awk}'s decision-making
+statement.  It looks like this:@refill
+
+@example
+if (@var{condition}) @var{then-body} @r{[}else @var{else-body}@r{]}
+@end example
+
+@noindent
+@var{condition} is an expression that controls what the rest of the
+statement will do.  If @var{condition} is true, @var{then-body} is
+executed; otherwise, @var{else-body} is executed (assuming that the
+@code{else} clause is present).  The @code{else} part of the statement is
+optional.  The condition is considered false if its value is zero or
+the null string, and true otherwise.@refill
+
+Here is an example:
+
+@example
+if (x % 2 == 0)
+    print "x is even"
+else
+    print "x is odd"
+@end example
+
+In this example, if the expression @code{x % 2 == 0} is true (that is,
+the value of @code{x} is divisible by 2), then the first @code{print}
+statement is executed, otherwise the second @code{print} statement is
+performed.@refill
+
+If the @code{else} appears on the same line as @var{then-body}, and
+@var{then-body} is not a compound statement (i.e., not surrounded by
+curly braces), then a semicolon must separate @var{then-body} from
+@code{else}.  To illustrate this, let's rewrite the previous example:
+
+@example
+awk '@{ if (x % 2 == 0) print "x is even"; else
+        print "x is odd" @}'
+@end example
+
+@noindent
+If you forget the @samp{;}, @code{awk} won't be able to parse the
+statement, and you will get a syntax error.
+
+We would not actually write this example this way, because a human
+reader might fail to see the @code{else} if it were not the first thing
+on its line.
+
+@node While Statement, Do Statement, If Statement, Statements
+@section The @code{while} Statement
+@cindex @code{while} statement
+@cindex loop
+@cindex body of a loop
+
+In programming, a @dfn{loop} means a part of a program that is (or at least can
+be) executed two or more times in succession.
+
+The @code{while} statement is the simplest looping statement in
+@code{awk}.  It repeatedly executes a statement as long as a condition is
+true.  It looks like this:
+
+@example
+while (@var{condition})
+  @var{body}
+@end example
+
+@noindent
+Here @var{body} is a statement that we call the @dfn{body} of the loop,
+and @var{condition} is an expression that controls how long the loop
+keeps running.
+
+The first thing the @code{while} statement does is test @var{condition}.
+If @var{condition} is true, it executes the statement @var{body}.
+(@var{condition} is true when the value 
+is not zero and not a null string.)  After @var{body} has been executed,
+@var{condition} is tested again, and if it is still true, @var{body} is
+executed again.  This process repeats until @var{condition} is no longer
+true.  If @var{condition} is initially false, the body of the loop is
+never executed.@refill
+
+This example prints the first three fields of each record, one per line.
+
+@example
+awk '@{ i = 1
+       while (i <= 3) @{
+           print $i
+           i++
+       @}
+@}'
+@end example
+
+@noindent
+Here the body of the loop is a compound statement enclosed in braces,
+containing two statements.
+
+The loop works like this: first, the value of @code{i} is set to 1.
+Then, the @code{while} tests whether @code{i} is less than or equal to
+three.  This is the case when @code{i} equals one, so the @code{i}-th
+field is printed.  Then the @code{i++} increments the value of @code{i}
+and the loop repeats.  The loop terminates when @code{i} reaches 4.
+
+As you can see, a newline is not required between the condition and the
+body; but using one makes the program clearer unless the body is a
+compound statement or is very simple.  The newline after the open-brace
+that begins the compound statement is not required either, but the
+program would be hard to read without it.
+
+@node Do Statement, For Statement, While Statement, Statements
+@section The @code{do}-@code{while} Statement
+
+The @code{do} loop is a variation of the @code{while} looping statement.
+The @code{do} loop executes the @var{body} once, then repeats @var{body}
+as long as @var{condition} is true.  It looks like this:
+
+@example
+do
+  @var{body}
+while (@var{condition})
+@end example
+
+Even if @var{condition} is false at the start, @var{body} is executed at
+least once (and only once, unless executing @var{body} makes
+@var{condition} true).  Contrast this with the corresponding
+@code{while} statement:
+
+@example
+while (@var{condition})
+  @var{body}
+@end example
+
+@noindent
+This statement does not execute @var{body} even once if @var{condition}
+is false to begin with.
+
+Here is an example of a @code{do} statement:
+
+@example
+awk '@{ i = 1
+       do @{
+          print $0
+          i++
+       @} while (i <= 10)
+@}'
+@end example
+
+@noindent
+prints each input record ten times.  It isn't a very realistic example,
+since in this case an ordinary @code{while} would do just as well.  But
+this reflects actual experience; there is only occasionally a real use
+for a @code{do} statement.@refill
+
+@node For Statement, Break Statement, Do Statement, Statements
+@section The @code{for} Statement
+@cindex @code{for} statement
+
+The @code{for} statement makes it more convenient to count iterations of a
+loop.  The general form of the @code{for} statement looks like this:@refill
+
+@example
+for (@var{initialization}; @var{condition}; @var{increment})
+  @var{body}
+@end example
+
+@noindent
+This statement starts by executing @var{initialization}.  Then, as long
+as @var{condition} is true, it repeatedly executes @var{body} and then
+@var{increment}.  Typically @var{initialization} sets a variable to
+either zero or one, @var{increment} adds 1 to it, and @var{condition}
+compares it against the desired number of iterations.
+
+Here is an example of a @code{for} statement:
+
+@example
+@group
+awk '@{ for (i = 1; i <= 3; i++)
+          print $i
+@}'
+@end group
+@end example
+
+@noindent
+This prints the first three fields of each input record, one field per
+line.
+
+In the @code{for} statement, @var{body} stands for any statement, but
+@var{initialization}, @var{condition} and @var{increment} are just
+expressions.  You cannot set more than one variable in the
+@var{initialization} part unless you use a multiple assignment statement
+such as @code{x = y = 0}, which is possible only if all the initial values
+are equal.  (But you can initialize additional variables by writing
+their assignments as separate statements preceding the @code{for} loop.)
+
+The same is true of the @var{increment} part; to increment additional
+variables, you must write separate statements at the end of the loop.
+The C compound expression, using C's comma operator, would be useful in
+this context, but it is not supported in @code{awk}.
+
+Most often, @var{increment} is an increment expression, as in the
+example above.  But this is not required; it can be any expression
+whatever.  For example, this statement prints all the powers of 2
+between 1 and 100:
+
+@example
+for (i = 1; i <= 100; i *= 2)
+  print i
+@end example
+
+Any of the three expressions in the parentheses following the @code{for} may
+be omitted if there is nothing to be done there.  Thus, @w{@samp{for (;x
+> 0;)}} is equivalent to @w{@samp{while (x > 0)}}.  If the
+@var{condition} is omitted, it is treated as @var{true}, effectively
+yielding an @dfn{infinite loop} (i.e., a loop that will never
+terminate).@refill
+
+In most cases, a @code{for} loop is an abbreviation for a @code{while}
+loop, as shown here:
+
+@example
+@var{initialization}
+while (@var{condition}) @{
+  @var{body}
+  @var{increment}
+@}
+@end example
+
+@noindent
+The only exception is when the @code{continue} statement
+(@pxref{Continue Statement, ,The @code{continue} Statement}) is used
+inside the loop; changing a @code{for} statement to a @code{while}
+statement in this way can change the effect of the @code{continue}
+statement inside the loop.@refill
+
+There is an alternate version of the @code{for} loop, for iterating over
+all the indices of an array:
+
+@example
+for (i in array)
+    @var{do something with} array[i]
+@end example
+
+@noindent
+@xref{Arrays, ,Arrays in @code{awk}}, for more information on this
+version of the @code{for} loop.
+
+The @code{awk} language has a @code{for} statement in addition to a
+@code{while} statement because often a @code{for} loop is both less work to
+type and more natural to think of.  Counting the number of iterations is
+very common in loops.  It can be easier to think of this counting as part
+of looping rather than as something to do inside the loop.
+
+The next section has more complicated examples of @code{for} loops.
+
+@node Break Statement, Continue Statement, For Statement, Statements
+@section The @code{break} Statement
+@cindex @code{break} statement
+@cindex loops, exiting
+
+The @code{break} statement jumps out of the innermost @code{for},
+@code{while}, or @code{do}-@code{while} loop that encloses it.  The
+following example finds the smallest divisor of any integer, and also
+identifies prime numbers:@refill
+
+@smallexample
+awk '# find smallest divisor of num
+     @{ num = $1
+       for (div = 2; div*div <= num; div++)
+         if (num % div == 0)
+           break
+       if (num % div == 0)
+         printf "Smallest divisor of %d is %d\n", num, div
+       else
+         printf "%d is prime\n", num  @}'
+@end smallexample
+
+When the remainder is zero in the first @code{if} statement, @code{awk}
+immediately @dfn{breaks out} of the containing @code{for} loop.  This means
+that @code{awk} proceeds immediately to the statement following the loop
+and continues processing.  (This is very different from the @code{exit}
+statement which stops the entire @code{awk} program.  
+@xref{Exit Statement, ,The @code{exit} Statement}.)@refill
+
+Here is another program equivalent to the previous one.  It illustrates how
+the @var{condition} of a @code{for} or @code{while} could just as well be
+replaced with a @code{break} inside an @code{if}:
+
+@smallexample
+@group
+awk '# find smallest divisor of num
+     @{ num = $1
+       for (div = 2; ; div++) @{
+         if (num % div == 0) @{
+           printf "Smallest divisor of %d is %d\n", num, div
+           break
+         @}
+         if (div*div > num) @{
+           printf "%d is prime\n", num
+           break
+         @}
+       @}
+@}'
+@end group
+@end smallexample
+
+@node Continue Statement, Next Statement, Break Statement, Statements
+@section The @code{continue} Statement
+
+@cindex @code{continue} statement
+The @code{continue} statement, like @code{break}, is used only inside
+@code{for}, @code{while}, and @code{do}-@code{while} loops.  It skips
+over the rest of the loop body, causing the next cycle around the loop
+to begin immediately.  Contrast this with @code{break}, which jumps out
+of the loop altogether.  Here is an example:@refill
+
+@example
+# print names that don't contain the string "ignore"
+
+# first, save the text of each line
+@{ names[NR] = $0 @}
+
+# print what we're interested in
+END @{
+   for (x in names) @{
+       if (names[x] ~ /ignore/)
+           continue
+       print names[x]
+   @}
+@}
+@end example
+
+If one of the input records contains the string @samp{ignore}, this
+example skips the print statement for that record, and continues back to
+the first statement in the loop.
+
+This is not a practical example of @code{continue}, since it would be
+just as easy to write the loop like this:
+
+@example
+for (x in names)
+  if (names[x] !~ /ignore/)
+    print names[x]
+@end example
+
+@ignore
+from brennan@boeing.com:
+
+page 90, section 9.6.  The example is too artificial as
+the one line program
+
+	!/ignore/
+
+does the same thing.
+@end ignore
+@c ADR --- he's right, but don't worry about this for now
+
+The @code{continue} statement in a @code{for} loop directs @code{awk} to
+skip the rest of the body of the loop, and resume execution with the
+increment-expression of the @code{for} statement.  The following program
+illustrates this fact:@refill
+
+@example
+awk 'BEGIN @{
+     for (x = 0; x <= 20; x++) @{
+         if (x == 5)
+             continue
+         printf ("%d ", x)
+     @}
+     print ""
+@}'
+@end example
+
+@noindent
+This program prints all the numbers from 0 to 20, except for 5, for
+which the @code{printf} is skipped.  Since the increment @code{x++}
+is not skipped, @code{x} does not remain stuck at 5.  Contrast the
+@code{for} loop above with the @code{while} loop:
+
+@example
+awk 'BEGIN @{
+     x = 0
+     while (x <= 20) @{
+         if (x == 5)
+             continue
+         printf ("%d ", x)
+         x++
+     @}
+     print ""
+@}'
+@end example
+
+@noindent
+This program loops forever once @code{x} gets to 5.
+
+As described above, the @code{continue} statement has no meaning when
+used outside the body of a loop.  However, although it was never documented,
+historical implementations of @code{awk} have treated the @code{continue}
+statement outside of a loop as if it were a @code{next} statement
+(@pxref{Next Statement, ,The @code{next} Statement}).  
+By default, @code{gawk} silently supports this usage.  However, if
+@samp{-W posix} has been specified on the command line
+(@pxref{Command Line, ,Invoking @code{awk}}),
+it will be treated as an error, since the @sc{posix} standard specifies
+that @code{continue} should only be used inside the body of a loop.@refill
+
+@node Next Statement, Next File Statement, Continue Statement, Statements
+@section The @code{next} Statement
+@cindex @code{next} statement
+
+The @code{next} statement forces @code{awk} to immediately stop processing
+the current record and go on to the next record.  This means that no
+further rules are executed for the current record.  The rest of the
+current rule's action is not executed either.
+
+Contrast this with the effect of the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).  That too causes
+@code{awk} to read the next record immediately, but it does not alter the
+flow of control in any way.  So the rest of the current action executes
+with a new input record.
+
+At the highest level, @code{awk} program execution is a loop that reads
+an input record and then tests each rule's pattern against it.  If you
+think of this loop as a @code{for} statement whose body contains the
+rules, then the @code{next} statement is analogous to a @code{continue}
+statement: it skips to the end of the body of this implicit loop, and
+executes the increment (which reads another record).
+
+For example, if your @code{awk} program works only on records with four
+fields, and you don't want it to fail when given bad input, you might
+use this rule near the beginning of the program:
+
+@smallexample
+NF != 4 @{
+  printf("line %d skipped: doesn't have 4 fields", FNR) > "/dev/stderr"
+  next
+@}
+@end smallexample
+
+@noindent
+so that the following rules will not see the bad record.  The error
+message is redirected to the standard error output stream, as error
+messages should be.  @xref{Special Files, ,Standard I/O Streams}.
+
+According to the @sc{posix} standard, the behavior is undefined if
+the @code{next} statement is used in a @code{BEGIN} or @code{END} rule.
+@code{gawk} will treat it as a syntax error.
+
+If the @code{next} statement causes the end of the input to be reached,
+then the code in the @code{END} rules, if any, will be executed.
+@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
+
+@node Next File Statement, Exit Statement, Next Statement, Statements
+@section The @code{next file} Statement
+
+@cindex @code{next file} statement
+The @code{next file} statement is similar to the @code{next} statement.
+However, instead of abandoning processing of the current record, the
+@code{next file} statement instructs @code{awk} to stop processing the
+current data file.
+
+Upon execution of the @code{next file} statement, @code{FILENAME} is
+updated to the name of the next data file listed on the command line,
+@code{FNR} is reset to 1, and processing starts over with the first
+rule in the progam.  @xref{Built-in Variables}.
+
+If the @code{next file} statement causes the end of the input to be reached,
+then the code in the @code{END} rules, if any, will be executed.
+@xref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}.
+
+The @code{next file} statement is a @code{gawk} extension; it is not
+(currently) available in any other @code{awk} implementation.  You can
+simulate its behavior by creating a library file named @file{nextfile.awk},
+with the following contents.  (This sample program uses user-defined
+functions, a feature that has not been presented yet.
+@xref{User-defined, ,User-defined Functions},
+for more information.)@refill
+
+@smallexample
+# nextfile --- function to skip remaining records in current file
+
+# this should be read in before the "main" awk program
+
+function nextfile() @{ _abandon_ = FILENAME; next @}
+
+_abandon_ == FILENAME && FNR > 1   @{ next @}
+_abandon_ == FILENAME && FNR == 1  @{ _abandon_ = "" @}
+@end smallexample
+
+The @code{nextfile} function simply sets a ``private'' variable@footnote{Since
+all variables in @code{awk} are global, this program uses the common
+practice of prefixing the variable name with an underscore.  In fact, it
+also suffixes the variable name with an underscore, as extra insurance
+against using a variable name that might be used in some other library
+file.} to the name of the current data file, and then retrieves the next
+record.  Since this file is read before the main @code{awk} program,
+the rules that follows the function definition will be executed before the
+rules in the main program.  The first rule continues to skip records as long as
+the name of the input file has not changed, and this is not the first
+record in the file.  This rule is sufficient most of the time.  But what if
+the @emph{same} data file is named twice in a row on the command line?
+This rule would not process the data file the second time.  The second rule
+catches this case: If the data file name is what was being skipped, but
+@code{FNR} is 1, then this is the second time the file is being processed,
+and it should not be skipped.
+
+The @code{next file} statement would be useful if you have many data
+files to process, and due to the nature of the data, you expect that you
+would not want to process every record in the file.  In order to move on to
+the next data file, you would have to continue scanning the unwanted
+records (as described above).  The @code{next file} statement accomplishes
+this much more efficiently.
+
+@ignore
+Would it make sense down the road to nuke `next file' in favor of
+semantics that would make this work?
+
+        function nextfile() { ARGIND++ ; next }
+@end ignore
+
+@node Exit Statement,  , Next File Statement, Statements
+@section The @code{exit} Statement
+
+@cindex @code{exit} statement
+The @code{exit} statement causes @code{awk} to immediately stop
+executing the current rule and to stop processing input; any remaining input
+is ignored.@refill
+
+If an @code{exit} statement is executed from a @code{BEGIN} rule the
+program stops processing everything immediately.  No input records are
+read.  However, if an @code{END} rule is present, it is executed
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).
+
+If @code{exit} is used as part of an @code{END} rule, it causes
+the program to stop immediately.
+
+An @code{exit} statement that is part of an ordinary rule (that is, not part
+of a @code{BEGIN} or @code{END} rule) stops the execution of any further
+automatic rules, but the @code{END} rule is executed if there is one.
+If you do not want the @code{END} rule to do its job in this case, you
+can set a variable to nonzero before the @code{exit} statement, and check
+that variable in the @code{END} rule.
+
+If an argument is supplied to @code{exit}, its value is used as the exit
+status code for the @code{awk} process.  If no argument is supplied,
+@code{exit} returns status zero (success).@refill
+
+For example, let's say you've discovered an error condition you really
+don't know how to handle.  Conventionally, programs report this by
+exiting with a nonzero status.  Your @code{awk} program can do this
+using an @code{exit} statement with a nonzero argument.  Here's an
+example of this:@refill
+
+@example
+@group
+BEGIN @{
+       if (("date" | getline date_now) < 0) @{
+         print "Can't get system date" > "/dev/stderr"
+         exit 4
+       @}
+@}
+@end group
+@end example
+
+@node Arrays, Built-in, Statements, Top
+@chapter Arrays in @code{awk}
+
+An @dfn{array} is a table of values, called @dfn{elements}.  The
+elements of an array are distinguished by their indices.  @dfn{Indices}
+may be either numbers or strings.  Each array has a name, which looks
+like a variable name, but must not be in use as a variable name in the
+same @code{awk} program.
+
+@menu
+* Array Intro::                 Introduction to Arrays
+* Reference to Elements::       How to examine one element of an array.
+* Assigning Elements::          How to change an element of an array.
+* Array Example::               Basic Example of an Array
+* Scanning an Array::           A variation of the @code{for} statement.  
+                                It loops through the indices of 
+                                an array's existing elements.
+* Delete::                      The @code{delete} statement removes 
+                                an element from an array.
+* Numeric Array Subscripts::    How to use numbers as subscripts in @code{awk}.
+* Multi-dimensional::           Emulating multi-dimensional arrays in @code{awk}.
+* Multi-scanning::              Scanning multi-dimensional arrays.
+@end menu
+
+@node Array Intro, Reference to Elements, Arrays, Arrays
+@section Introduction to Arrays
+
+@cindex arrays
+The @code{awk} language has one-dimensional @dfn{arrays} for storing groups
+of related strings or numbers.
+
+Every @code{awk} array must have a name.  Array names have the same
+syntax as variable names; any valid variable name would also be a valid
+array name.  But you cannot use one name in both ways (as an array and
+as a variable) in one @code{awk} program.
+
+Arrays in @code{awk} superficially resemble arrays in other programming
+languages; but there are fundamental differences.  In @code{awk}, you
+don't need to specify the size of an array before you start to use it.
+Additionally, any number or string in @code{awk} may be used as an
+array index.
+
+In most other languages, you have to @dfn{declare} an array and specify
+how many elements or components it contains.  In such languages, the
+declaration causes a contiguous block of memory to be allocated for that
+many elements.  An index in the array must be a positive integer; for
+example, the index 0 specifies the first element in the array, which is
+actually stored at the beginning of the block of memory.  Index 1
+specifies the second element, which is stored in memory right after the
+first element, and so on.  It is impossible to add more elements to the
+array, because it has room for only as many elements as you declared.
+
+A contiguous array of four elements might look like this,
+conceptually, if the element values are @code{8}, @code{"foo"},
+@code{""} and @code{30}:@refill
+
+@example
++---------+---------+--------+---------+
+|    8    |  "foo"  |   ""   |    30   |    @r{value}
++---------+---------+--------+---------+
+     0         1         2         3        @r{index}
+@end example
+
+@noindent
+Only the values are stored; the indices are implicit from the order of
+the values.  @code{8} is the value at index 0, because @code{8} appears in the
+position with 0 elements before it.
+
+@cindex arrays, definition of
+@cindex associative arrays
+Arrays in @code{awk} are different: they are @dfn{associative}.  This means
+that each array is a collection of pairs: an index, and its corresponding
+array element value:
+
+@example
+@r{Element} 4     @r{Value} 30
+@r{Element} 2     @r{Value} "foo"
+@r{Element} 1     @r{Value} 8
+@r{Element} 3     @r{Value} ""
+@end example
+
+@noindent
+We have shown the pairs in jumbled order because their order is irrelevant.
+
+One advantage of an associative array is that new pairs can be added
+at any time.  For example, suppose we add to the above array a tenth element
+whose value is @w{@code{"number ten"}}.  The result is this:
+
+@example
+@r{Element} 10    @r{Value} "number ten"
+@r{Element} 4     @r{Value} 30
+@r{Element} 2     @r{Value} "foo"
+@r{Element} 1     @r{Value} 8
+@r{Element} 3     @r{Value} ""
+@end example
+
+@noindent
+Now the array is @dfn{sparse} (i.e., some indices are missing): it has
+elements 1--4 and 10, but doesn't have elements 5, 6, 7, 8, or 9.@refill
+
+Another consequence of associative arrays is that the indices don't
+have to be positive integers.  Any number, or even a string, can be
+an index.  For example, here is an array which translates words from
+English into French:
+
+@example
+@r{Element} "dog" @r{Value} "chien"
+@r{Element} "cat" @r{Value} "chat"
+@r{Element} "one" @r{Value} "un"
+@r{Element} 1     @r{Value} "un"
+@end example
+
+@noindent
+Here we decided to translate the number 1 in both spelled-out and
+numeric form---thus illustrating that a single array can have both
+numbers and strings as indices.
+
+When @code{awk} creates an array for you, e.g., with the @code{split}
+built-in function,
+that array's indices are consecutive integers starting at 1.
+(@xref{String Functions, ,Built-in Functions for String Manipulation}.)
+
+@node Reference to Elements, Assigning Elements, Array Intro, Arrays
+@section Referring to an Array Element
+@cindex array reference
+@cindex element of array
+@cindex reference to array
+
+The principal way of using an array is to refer to one of its elements.
+An array reference is an expression which looks like this:
+
+@example
+@var{array}[@var{index}]
+@end example
+
+@noindent
+Here, @var{array} is the name of an array.  The expression @var{index} is
+the index of the element of the array that you want.
+
+The value of the array reference is the current value of that array
+element.  For example, @code{foo[4.3]} is an expression for the element
+of array @code{foo} at index 4.3.
+
+If you refer to an array element that has no recorded value, the value
+of the reference is @code{""}, the null string.  This includes elements
+to which you have not assigned any value, and elements that have been
+deleted (@pxref{Delete, ,The @code{delete} Statement}).  Such a reference
+automatically creates that array element, with the null string as its value.
+(In some cases, this is unfortunate, because it might waste memory inside
+@code{awk}).
+
+@cindex arrays, presence of elements
+You can find out if an element exists in an array at a certain index with
+the expression:
+
+@example
+@var{index} in @var{array}
+@end example
+
+@noindent
+This expression tests whether or not the particular index exists,
+without the side effect of creating that element if it is not present.
+The expression has the value 1 (true) if @code{@var{array}[@var{index}]}
+exists, and 0 (false) if it does not exist.@refill
+
+For example, to test whether the array @code{frequencies} contains the
+index @code{"2"}, you could write this statement:@refill
+
+@smallexample
+if ("2" in frequencies) print "Subscript \"2\" is present."
+@end smallexample
+
+Note that this is @emph{not} a test of whether or not the array
+@code{frequencies} contains an element whose @emph{value} is @code{"2"}.
+(There is no way to do that except to scan all the elements.)  Also, this
+@emph{does not} create @code{frequencies["2"]}, while the following
+(incorrect) alternative would do so:@refill
+
+@smallexample
+if (frequencies["2"] != "") print "Subscript \"2\" is present."
+@end smallexample
+
+@node Assigning Elements, Array Example, Reference to Elements, Arrays
+@section Assigning Array Elements
+@cindex array assignment
+@cindex element assignment
+
+Array elements are lvalues: they can be assigned values just like
+@code{awk} variables:
+
+@example
+@var{array}[@var{subscript}] = @var{value}
+@end example
+
+@noindent
+Here @var{array} is the name of your array.  The expression
+@var{subscript} is the index of the element of the array that you want
+to assign a value.  The expression @var{value} is the value you are
+assigning to that element of the array.@refill
+
+@node Array Example, Scanning an Array, Assigning Elements, Arrays
+@section Basic Example of an Array
+
+The following program takes a list of lines, each beginning with a line
+number, and prints them out in order of line number.  The line numbers are
+not in order, however, when they are first read:  they are scrambled.  This
+program sorts the lines by making an array using the line numbers as
+subscripts.  It then prints out the lines in sorted order of their numbers.
+It is a very simple program, and gets confused if it encounters repeated
+numbers, gaps, or lines that don't begin with a number.@refill
+
+@example
+@{
+  if ($1 > max)
+    max = $1
+  arr[$1] = $0
+@}
+
+END @{
+  for (x = 1; x <= max; x++)
+    print arr[x]
+@}
+@end example
+
+The first rule keeps track of the largest line number seen so far;
+it also stores each line into the array @code{arr}, at an index that
+is the line's number.
+
+The second rule runs after all the input has been read, to print out
+all the lines.
+
+When this program is run with the following input:
+
+@example
+5  I am the Five man
+2  Who are you?  The new number two!
+4  . . . And four on the floor
+1  Who is number one?
+3  I three you.
+@end example
+
+@noindent
+its output is this:
+
+@example
+1  Who is number one?
+2  Who are you?  The new number two!
+3  I three you.
+4  . . . And four on the floor
+5  I am the Five man
+@end example
+
+If a line number is repeated, the last line with a given number overrides
+the others.
+
+Gaps in the line numbers can be handled with an easy improvement to the
+program's @code{END} rule:
+
+@example
+END @{
+  for (x = 1; x <= max; x++)
+    if (x in arr)
+      print arr[x]
+@}
+@end example
+
+@node Scanning an Array, Delete, Array Example, Arrays
+@section Scanning all Elements of an Array
+@cindex @code{for (x in @dots{})}
+@cindex arrays, special @code{for} statement
+@cindex scanning an array
+
+In programs that use arrays, often you need a loop that executes
+once for each element of an array.  In other languages, where arrays are
+contiguous and indices are limited to positive integers, this is
+easy: the largest index is one less than the length of the array, and you can
+find all the valid indices by counting from zero up to that value.  This
+technique won't do the job in @code{awk}, since any number or string
+may be an array index.  So @code{awk} has a special kind of @code{for}
+statement for scanning an array:
+
+@example
+for (@var{var} in @var{array})
+  @var{body}
+@end example
+
+@noindent
+This loop executes @var{body} once for each different value that your
+program has previously used as an index in @var{array}, with the
+variable @var{var} set to that index.@refill
+
+Here is a program that uses this form of the @code{for} statement.  The
+first rule scans the input records and notes which words appear (at
+least once) in the input, by storing a 1 into the array @code{used} with
+the word as index.  The second rule scans the elements of @code{used} to
+find all the distinct words that appear in the input.  It prints each
+word that is more than 10 characters long, and also prints the number of
+such words.  @xref{Built-in, ,Built-in Functions}, for more information
+on the built-in function @code{length}.
+
+@smallexample
+# Record a 1 for each word that is used at least once.
+@{
+  for (i = 1; i <= NF; i++)
+    used[$i] = 1
+@}
+
+# Find number of distinct words more than 10 characters long.
+END @{
+  for (x in used)
+    if (length(x) > 10) @{
+      ++num_long_words
+      print x
+  @}
+  print num_long_words, "words longer than 10 characters"
+@}
+@end smallexample
+
+@noindent
+@xref{Sample Program}, for a more detailed example of this type.
+
+The order in which elements of the array are accessed by this statement
+is determined by the internal arrangement of the array elements within
+@code{awk} and cannot be controlled or changed.  This can lead to
+problems if new elements are added to @var{array} by statements in
+@var{body}; you cannot predict whether or not the @code{for} loop will
+reach them.  Similarly, changing @var{var} inside the loop can produce
+strange results.  It is best to avoid such things.@refill
+
+@node Delete, Numeric Array Subscripts, Scanning an Array, Arrays
+@section The @code{delete} Statement
+@cindex @code{delete} statement
+@cindex deleting elements of arrays
+@cindex removing elements of arrays
+@cindex arrays, deleting an element
+
+You can remove an individual element of an array using the @code{delete}
+statement:
+
+@example
+delete @var{array}[@var{index}]
+@end example
+
+You can not refer to an array element after it has been deleted;
+it is as if you had never referred
+to it and had never given it any value.  You can no longer obtain any
+value the element once had.
+
+Here is an example of deleting elements in an array:
+
+@example
+for (i in frequencies)
+  delete frequencies[i]
+@end example
+
+@noindent
+This example removes all the elements from the array @code{frequencies}.
+
+If you delete an element, a subsequent @code{for} statement to scan the array
+will not report that element, and the @code{in} operator to check for
+the presence of that element will return 0:
+
+@example
+delete foo[4]
+if (4 in foo)
+  print "This will never be printed"
+@end example
+
+It is not an error to delete an element which does not exist.
+
+@node Numeric Array Subscripts, Multi-dimensional, Delete, Arrays
+@section Using Numbers to Subscript Arrays
+
+An important aspect of arrays to remember is that array subscripts
+are @emph{always} strings.  If you use a numeric value as a subscript,
+it will be converted to a string value before it is used for subscripting
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@cindex conversions, during subscripting
+@cindex numbers, used as subscripts
+@vindex CONVFMT
+This means that the value of the @code{CONVFMT} can potentially
+affect how your program accesses elements of an array.  For example:
+
+@example
+a = b = 12.153
+data[a] = 1
+CONVFMT = "%2.2f"
+if (b in data)
+    printf "%s is in data", b
+else
+    printf "%s is not in data", b
+@end example
+
+@noindent
+should print @samp{12.15 is not in data}.  The first statement gives
+both @code{a} and @code{b} the same numeric value.  Assigning to
+@code{data[a]} first gives @code{a} the string value @code{"12.153"}
+(using the default conversion value of @code{CONVFMT}, @code{"%.6g"}),
+and then assigns 1 to @code{data["12.153"]}.  The program then changes
+the value of @code{CONVFMT}.  The test @samp{(b in data)} forces @code{b}
+to be converted to a string, this time @code{"12.15"}, since the value of
+@code{CONVFMT} only allows two significant digits.  This test fails,
+since @code{"12.15"} is a different string from @code{"12.153"}.@refill
+
+According to the rules for conversions
+(@pxref{Conversion, ,Conversion of Strings and Numbers}), integer
+values are always converted to strings as integers, no matter what the
+value of @code{CONVFMT} may happen to be.  So the usual case of@refill
+
+@example
+for (i = 1; i <= maxsub; i++)
+    @i{do something with} array[i]
+@end example
+
+@noindent
+will work, no matter what the value of @code{CONVFMT}.
+
+Like many things in @code{awk}, the majority of the time things work
+as you would expect them to work.  But it is useful to have a precise
+knowledge of the actual rules, since sometimes they can have a subtle
+effect on your programs.
+
+@node Multi-dimensional, Multi-scanning, Numeric Array Subscripts, Arrays
+@section Multi-dimensional Arrays
+
+@c the following index entry is an overfull hbox.  --mew 30jan1992
+@cindex subscripts in arrays
+@cindex arrays, multi-dimensional subscripts
+@cindex multi-dimensional subscripts
+A multi-dimensional array is an array in which an element is identified
+by a sequence of indices, not a single index.  For example, a
+two-dimensional array requires two indices.  The usual way (in most
+languages, including @code{awk}) to refer to an element of a
+two-dimensional array named @code{grid} is with
+@code{grid[@var{x},@var{y}]}.
+
+@vindex SUBSEP
+Multi-dimensional arrays are supported in @code{awk} through
+concatenation of indices into one string.  What happens is that
+@code{awk} converts the indices into strings
+(@pxref{Conversion, ,Conversion of Strings and Numbers}) and
+concatenates them together, with a separator between them.  This creates
+a single string that describes the values of the separate indices.  The
+combined string is used as a single index into an ordinary,
+one-dimensional array.  The separator used is the value of the built-in
+variable @code{SUBSEP}.@refill
+
+For example, suppose we evaluate the expression @code{foo[5,12]="value"}
+when the value of @code{SUBSEP} is @code{"@@"}.  The numbers 5 and 12 are
+converted to strings and
+concatenated with an @samp{@@} between them, yielding @code{"5@@12"}; thus,
+the array element @code{foo["5@@12"]} is set to @code{"value"}.@refill
+
+Once the element's value is stored, @code{awk} has no record of whether
+it was stored with a single index or a sequence of indices.  The two
+expressions @code{foo[5,12]} and @w{@code{foo[5 SUBSEP 12]}} always have
+the same value.
+
+The default value of @code{SUBSEP} is the string @code{"\034"},
+which contains a nonprinting character that is unlikely to appear in an
+@code{awk} program or in the input data.
+
+The usefulness of choosing an unlikely character comes from the fact
+that index values that contain a string matching @code{SUBSEP} lead to
+combined strings that are ambiguous.  Suppose that @code{SUBSEP} were
+@code{"@@"}; then @w{@code{foo["a@@b", "c"]}} and @w{@code{foo["a",
+"b@@c"]}} would be indistinguishable because both would actually be
+stored as @code{foo["a@@b@@c"]}.  Because @code{SUBSEP} is
+@code{"\034"}, such confusion can arise only when an index
+contains the character with ASCII code 034, which is a rare
+event.@refill
+
+You can test whether a particular index-sequence exists in a
+``multi-dimensional'' array with the same operator @code{in} used for single
+dimensional arrays.  Instead of a single index as the left-hand operand,
+write the whole sequence of indices, separated by commas, in
+parentheses:@refill
+
+@example
+(@var{subscript1}, @var{subscript2}, @dots{}) in @var{array}
+@end example
+
+The following example treats its input as a two-dimensional array of
+fields; it rotates this array 90 degrees clockwise and prints the
+result.  It assumes that all lines have the same number of
+elements.
+
+@example
+awk '@{
+     if (max_nf < NF)
+          max_nf = NF
+     max_nr = NR
+     for (x = 1; x <= NF; x++)
+          vector[x, NR] = $x
+@}
+
+END @{
+     for (x = 1; x <= max_nf; x++) @{
+          for (y = max_nr; y >= 1; --y)
+               printf("%s ", vector[x, y])
+          printf("\n")
+     @}
+@}'
+@end example
+
+@noindent
+When given the input:
+
+@example
+@group
+1 2 3 4 5 6
+2 3 4 5 6 1
+3 4 5 6 1 2
+4 5 6 1 2 3
+@end group
+@end example
+
+@noindent
+it produces:
+
+@example
+@group
+4 3 2 1
+5 4 3 2
+6 5 4 3
+1 6 5 4
+2 1 6 5
+3 2 1 6
+@end group
+@end example
+
+@node Multi-scanning,  , Multi-dimensional, Arrays
+@section Scanning Multi-dimensional Arrays
+
+There is no special @code{for} statement for scanning a
+``multi-dimensional'' array; there cannot be one, because in truth there
+are no multi-dimensional arrays or elements; there is only a
+multi-dimensional @emph{way of accessing} an array.
+
+However, if your program has an array that is always accessed as
+multi-dimensional, you can get the effect of scanning it by combining
+the scanning @code{for} statement
+(@pxref{Scanning an Array, ,Scanning all Elements of an Array}) with the
+@code{split} built-in function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+It works like this:@refill
+
+@example
+for (combined in @var{array}) @{
+  split(combined, separate, SUBSEP)
+  @dots{}
+@}
+@end example
+
+@noindent
+This finds each concatenated, combined index in the array, and splits it
+into the individual indices by breaking it apart where the value of
+@code{SUBSEP} appears.  The split-out indices become the elements of
+the array @code{separate}.
+
+Thus, suppose you have previously stored in @code{@var{array}[1,
+"foo"]}; then an element with index @code{"1\034foo"} exists in
+@var{array}.  (Recall that the default value of @code{SUBSEP} contains
+the character with code 034.)  Sooner or later the @code{for} statement
+will find that index and do an iteration with @code{combined} set to
+@code{"1\034foo"}.  Then the @code{split} function is called as
+follows:
+
+@example
+split("1\034foo", separate, "\034")
+@end example
+
+@noindent
+The result of this is to set @code{separate[1]} to 1 and @code{separate[2]}
+to @code{"foo"}.  Presto, the original sequence of separate indices has
+been recovered.
+
+@node Built-in, User-defined, Arrays, Top
+@chapter Built-in Functions
+
+@cindex built-in functions
+@dfn{Built-in} functions are functions that are always available for
+your @code{awk} program to call.  This chapter defines all the built-in
+functions in @code{awk}; some of them are mentioned in other sections,
+but they are summarized here for your convenience.  (You can also define
+new functions yourself.  @xref{User-defined, ,User-defined Functions}.)
+
+@menu
+* Calling Built-in::            How to call built-in functions.
+* Numeric Functions::           Functions that work with numbers,
+                                including @code{int}, @code{sin} and @code{rand}.
+* String Functions::            Functions for string manipulation,
+                                such as @code{split}, @code{match}, and @code{sprintf}.
+* I/O Functions::               Functions for files and shell commands.
+* Time Functions::              Functions for dealing with time stamps.
+@end menu
+
+@node Calling Built-in, Numeric Functions, Built-in, Built-in
+@section Calling Built-in Functions
+
+To call a built-in function, write the name of the function followed
+by arguments in parentheses.  For example, @code{atan2(y + z, 1)}
+is a call to the function @code{atan2}, with two arguments.
+
+Whitespace is ignored between the built-in function name and the
+open-parenthesis, but we recommend that you avoid using whitespace
+there.  User-defined functions do not permit whitespace in this way, and
+you will find it easier to avoid mistakes by following a simple
+convention which always works: no whitespace after a function name.
+
+Each built-in function accepts a certain number of arguments.  In most
+cases, any extra arguments given to built-in functions are ignored.  The
+defaults for omitted arguments vary from function to function and are
+described under the individual functions.
+
+When a function is called, expressions that create the function's actual
+parameters are evaluated completely before the function call is performed.
+For example, in the code fragment:
+
+@example
+i = 4
+j = sqrt(i++)
+@end example
+
+@noindent
+the variable @code{i} is set to 5 before @code{sqrt} is called
+with a value of 4 for its actual parameter.
+
+@node Numeric Functions, String Functions, Calling Built-in, Built-in
+@section Numeric Built-in Functions
+@c I didn't make all the examples small because a couple of them were
+@c short already. --mew 29jan1992
+
+Here is a full list of built-in functions that work with numbers:
+
+@table @code
+@item int(@var{x})
+This gives you the integer part of @var{x}, truncated toward 0.  This
+produces the nearest integer to @var{x}, located between @var{x} and 0.
+
+For example, @code{int(3)} is 3, @code{int(3.9)} is 3, @code{int(-3.9)}
+is @minus{}3, and @code{int(-3)} is @minus{}3 as well.@refill
+
+@item sqrt(@var{x})
+This gives you the positive square root of @var{x}.  It reports an error
+if @var{x} is negative.  Thus, @code{sqrt(4)} is 2.@refill
+
+@item exp(@var{x})
+This gives you the exponential of @var{x}, or reports an error if
+@var{x} is out of range.  The range of values @var{x} can have depends
+on your machine's floating point representation.@refill
+
+@item log(@var{x})
+This gives you the natural logarithm of @var{x}, if @var{x} is positive;
+otherwise, it reports an error.@refill
+
+@item sin(@var{x})
+This gives you the sine of @var{x}, with @var{x} in radians.
+
+@item cos(@var{x})
+This gives you the cosine of @var{x}, with @var{x} in radians.
+
+@item atan2(@var{y}, @var{x})
+This gives you the arctangent of @code{@var{y} / @var{x}} in radians.
+
+@item rand()
+This gives you a random number.  The values of @code{rand} are
+uniformly-distributed between 0 and 1.  The value is never 0 and never
+1.
+
+Often you want random integers instead.  Here is a user-defined function
+you can use to obtain a random nonnegative integer less than @var{n}:
+
+@example
+function randint(n) @{
+     return int(n * rand())
+@}
+@end example
+
+@noindent
+The multiplication produces a random real number greater than 0 and less
+than @var{n}.  We then make it an integer (using @code{int}) between 0
+and @code{@var{n} @minus{} 1}.
+
+Here is an example where a similar function is used to produce
+random integers between 1 and @var{n}.  Note that this program will
+print a new random number for each input record.
+
+@smallexample
+awk '
+# Function to roll a simulated die.
+function roll(n) @{ return 1 + int(rand() * n) @}
+
+# Roll 3 six-sided dice and print total number of points.
+@{
+      printf("%d points\n", roll(6)+roll(6)+roll(6))
+@}'
+@end smallexample
+
+@strong{Note:} @code{rand} starts generating numbers from the same
+point, or @dfn{seed}, each time you run @code{awk}.  This means that
+a program will produce the same results each time you run it.
+The numbers are random within one @code{awk} run, but predictable
+from run to run.  This is convenient for debugging, but if you want
+a program to do different things each time it is used, you must change
+the seed to a value that will be different in each run.  To do this,
+use @code{srand}.
+
+@item srand(@var{x})
+The function @code{srand} sets the starting point, or @dfn{seed},
+for generating random numbers to the value @var{x}.
+
+Each seed value leads to a particular sequence of ``random'' numbers.
+Thus, if you set the seed to the same value a second time, you will get
+the same sequence of ``random'' numbers again.
+
+If you omit the argument @var{x}, as in @code{srand()}, then the current
+date and time of day are used for a seed.  This is the way to get random
+numbers that are truly unpredictable.
+
+The return value of @code{srand} is the previous seed.  This makes it
+easy to keep track of the seeds for use in consistently reproducing
+sequences of random numbers.
+@end table
+
+@node String Functions, I/O Functions, Numeric Functions, Built-in
+@section Built-in Functions for String Manipulation
+
+The functions in this section look at or change the text of one or more
+strings.
+
+@table @code
+@item index(@var{in}, @var{find})
+@findex match
+This searches the string @var{in} for the first occurrence of the string
+@var{find}, and returns the position in characters where that occurrence
+begins in the string @var{in}.  For example:@refill
+
+@smallexample
+awk 'BEGIN @{ print index("peanut", "an") @}'
+@end smallexample
+
+@noindent
+prints @samp{3}.  If @var{find} is not found, @code{index} returns 0.
+(Remember that string indices in @code{awk} start at 1.)
+
+@item length(@var{string})
+@findex length
+This gives you the number of characters in @var{string}.  If
+@var{string} is a number, the length of the digit string representing
+that number is returned.  For example, @code{length("abcde")} is 5.  By
+contrast, @code{length(15 * 35)} works out to 3.  How?  Well, 15 * 35 =
+525, and 525 is then converted to the string @samp{"525"}, which has
+three characters.
+
+If no argument is supplied, @code{length} returns the length of @code{$0}.
+
+In older versions of @code{awk}, you could call the @code{length} function
+without any parentheses.  Doing so is marked as ``deprecated'' in the
+@sc{posix} standard.  This means that while you can do this in your
+programs, it is a feature that can eventually be removed from a future
+version of the standard.  Therefore, for maximal portability of your
+@code{awk} programs you should always supply the parentheses.
+
+@item match(@var{string}, @var{regexp})
+@findex match
+The @code{match} function searches the string, @var{string}, for the
+longest, leftmost substring matched by the regular expression,
+@var{regexp}.  It returns the character position, or @dfn{index}, of
+where that substring begins (1, if it starts at the beginning of
+@var{string}).  If no match if found, it returns 0.
+
+@vindex RSTART
+@vindex RLENGTH
+The @code{match} function sets the built-in variable @code{RSTART} to
+the index.  It also sets the built-in variable @code{RLENGTH} to the
+length in characters of the matched substring.  If no match is found,
+@code{RSTART} is set to 0, and @code{RLENGTH} to @minus{}1.
+
+For example:
+
+@smallexample
+awk '@{
+       if ($1 == "FIND")
+         regex = $2
+       else @{
+         where = match($0, regex)
+         if (where)
+           print "Match of", regex, "found at", where, "in", $0
+       @}
+@}'
+@end smallexample
+
+@noindent
+This program looks for lines that match the regular expression stored in
+the variable @code{regex}.  This regular expression can be changed.  If the
+first word on a line is @samp{FIND}, @code{regex} is changed to be the
+second word on that line.  Therefore, given:
+
+@smallexample
+FIND fo*bar
+My program was a foobar
+But none of it would doobar
+FIND Melvin
+JF+KM
+This line is property of The Reality Engineering Co.
+This file created by Melvin.
+@end smallexample
+
+@noindent
+@code{awk} prints:
+
+@smallexample
+Match of fo*bar found at 18 in My program was a foobar
+Match of Melvin found at 26 in This file created by Melvin.
+@end smallexample
+
+@item split(@var{string}, @var{array}, @var{fieldsep})
+@findex split
+This divides @var{string} into pieces separated by @var{fieldsep},
+and stores the pieces in @var{array}.  The first piece is stored in
+@code{@var{array}[1]}, the second piece in @code{@var{array}[2]}, and so
+forth.  The string value of the third argument, @var{fieldsep}, is
+a regexp describing where to split @var{string} (much as @code{FS} can
+be a regexp describing where to split input records).  If
+the @var{fieldsep} is omitted, the value of @code{FS} is used.
+@code{split} returns the number of elements created.@refill
+
+The @code{split} function, then, splits strings into pieces in a
+manner similar to the way input lines are split into fields.  For example:
+
+@smallexample
+split("auto-da-fe", a, "-")
+@end smallexample
+
+@noindent
+splits the string @samp{auto-da-fe} into three fields using @samp{-} as the
+separator.  It sets the contents of the array @code{a} as follows:
+
+@smallexample
+a[1] = "auto"
+a[2] = "da"
+a[3] = "fe"
+@end smallexample
+
+@noindent
+The value returned by this call to @code{split} is 3.
+
+As with input field-splitting, when the value of @var{fieldsep} is
+@code{" "}, leading and trailing whitespace is ignored, and the elements
+are separated by runs of whitespace.
+
+@item sprintf(@var{format}, @var{expression1},@dots{})
+@findex sprintf
+This returns (without printing) the string that @code{printf} would
+have printed out with the same arguments
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).
+For example:@refill
+
+@smallexample
+sprintf("pi = %.2f (approx.)", 22/7)
+@end smallexample
+
+@noindent
+returns the string @w{@code{"pi = 3.14 (approx.)"}}.
+
+@item sub(@var{regexp}, @var{replacement}, @var{target})
+@findex sub
+The @code{sub} function alters the value of @var{target}.
+It searches this value, which should be a string, for the
+leftmost substring matched by the regular expression, @var{regexp},
+extending this match as far as possible.  Then the entire string is
+changed by replacing the matched text with @var{replacement}.
+The modified string becomes the new value of @var{target}.
+
+This function is peculiar because @var{target} is not simply
+used to compute a value, and not just any expression will do: it
+must be a variable, field or array reference, so that @code{sub} can
+store a modified value there.  If this argument is omitted, then the
+default is to use and alter @code{$0}.
+
+For example:@refill
+
+@smallexample
+str = "water, water, everywhere"
+sub(/at/, "ith", str)
+@end smallexample
+
+@noindent
+sets @code{str} to @w{@code{"wither, water, everywhere"}}, by replacing the
+leftmost, longest occurrence of @samp{at} with @samp{ith}.
+
+The @code{sub} function returns the number of substitutions made (either
+one or zero).
+
+If the special character @samp{&} appears in @var{replacement}, it
+stands for the precise substring that was matched by @var{regexp}.  (If
+the regexp can match more than one string, then this precise substring
+may vary.)  For example:@refill
+
+@smallexample
+awk '@{ sub(/candidate/, "& and his wife"); print @}'
+@end smallexample
+
+@noindent
+changes the first occurrence of @samp{candidate} to @samp{candidate
+and his wife} on each input line.
+
+Here is another example:
+
+@smallexample
+awk 'BEGIN @{
+        str = "daabaaa"
+        sub(/a*/, "c&c", str)
+        print str
+@}'
+@end smallexample
+
+@noindent
+prints @samp{dcaacbaaa}.  This show how @samp{&} can represent a non-constant
+string, and also illustrates the ``leftmost, longest'' rule.
+
+The effect of this special character (@samp{&}) can be turned off by putting a
+backslash before it in the string.  As usual, to insert one backslash in
+the string, you must write two backslashes.  Therefore, write @samp{\\&}
+in a string constant to include a literal @samp{&} in the replacement.
+For example, here is how to replace the first @samp{|} on each line with
+an @samp{&}:@refill
+
+@smallexample
+awk '@{ sub(/\|/, "\\&"); print @}'
+@end smallexample
+
+@strong{Note:} as mentioned above, the third argument to @code{sub} must
+be an lvalue.  Some versions of @code{awk} allow the third argument to
+be an expression which is not an lvalue.  In such a case, @code{sub}
+would still search for the pattern and return 0 or 1, but the result of
+the substitution (if any) would be thrown away because there is no place
+to put it.  Such versions of @code{awk} accept expressions like
+this:@refill
+
+@smallexample
+sub(/USA/, "United States", "the USA and Canada")
+@end smallexample
+
+@noindent
+But that is considered erroneous in @code{gawk}.
+
+@item gsub(@var{regexp}, @var{replacement}, @var{target})
+@findex gsub
+This is similar to the @code{sub} function, except @code{gsub} replaces
+@emph{all} of the longest, leftmost, @emph{nonoverlapping} matching
+substrings it can find.  The @samp{g} in @code{gsub} stands for
+``global,'' which means replace everywhere.  For example:@refill
+
+@smallexample
+awk '@{ gsub(/Britain/, "United Kingdom"); print @}'
+@end smallexample
+
+@noindent
+replaces all occurrences of the string @samp{Britain} with @samp{United
+Kingdom} for all input records.@refill
+
+The @code{gsub} function returns the number of substitutions made.  If
+the variable to be searched and altered, @var{target}, is
+omitted, then the entire input record, @code{$0}, is used.@refill
+
+As in @code{sub}, the characters @samp{&} and @samp{\} are special, and
+the third argument must be an lvalue.
+
+@item substr(@var{string}, @var{start}, @var{length})
+@findex substr
+This returns a @var{length}-character-long substring of @var{string},
+starting at character number @var{start}.  The first character of a
+string is character number one.  For example,
+@code{substr("washington", 5, 3)} returns @code{"ing"}.@refill
+
+If @var{length} is not present, this function returns the whole suffix of
+@var{string} that begins at character number @var{start}.  For example,
+@code{substr("washington", 5)} returns @code{"ington"}.  This is also
+the case if @var{length} is greater than the number of characters remaining
+in the string, counting from character number @var{start}.
+
+@item tolower(@var{string})
+@findex tolower
+This returns a copy of @var{string}, with each upper-case character
+in the string replaced with its corresponding lower-case character.
+Nonalphabetic characters are left unchanged.  For example,
+@code{tolower("MiXeD cAsE 123")} returns @code{"mixed case 123"}.
+
+@item toupper(@var{string})
+@findex toupper
+This returns a copy of @var{string}, with each lower-case character
+in the string replaced with its corresponding upper-case character.
+Nonalphabetic characters are left unchanged.  For example,
+@code{toupper("MiXeD cAsE 123")} returns @code{"MIXED CASE 123"}.
+@end table
+
+@node I/O Functions, Time Functions, String Functions, Built-in
+@section Built-in Functions for Input/Output
+
+@table @code
+@item close(@var{filename})
+Close the file @var{filename}, for input or output.  The argument may
+alternatively be a shell command that was used for redirecting to or
+from a pipe; then the pipe is closed.
+
+@xref{Close Input, ,Closing Input Files and Pipes}, regarding closing
+input files and pipes.  @xref{Close Output, ,Closing Output Files and Pipes},
+regarding closing output files and pipes.@refill
+
+@item system(@var{command})
+@findex system
+@c the following index entry is an overfull hbox.  --mew 30jan1992
+@cindex interaction, @code{awk} and other programs
+The system function allows the user to execute operating system commands
+and then return to the @code{awk} program.  The @code{system} function
+executes the command given by the string @var{command}.  It returns, as
+its value, the status returned by the command that was executed.
+
+For example, if the following fragment of code is put in your @code{awk}
+program:
+
+@smallexample
+END @{
+     system("mail -s 'awk run done' operator < /dev/null")
+@}
+@end smallexample
+
+@noindent
+the system operator will be sent mail when the @code{awk} program
+finishes processing input and begins its end-of-input processing.
+
+Note that much the same result can be obtained by redirecting
+@code{print} or @code{printf} into a pipe.  However, if your @code{awk}
+program is interactive, @code{system} is useful for cranking up large
+self-contained programs, such as a shell or an editor.@refill
+
+Some operating systems cannot implement the @code{system} function.
+@code{system} causes a fatal error if it is not supported.
+@end table
+
+@c fakenode --- for prepinfo
+@subheading Controlling Output Buffering with @code{system}
+@cindex flushing buffers
+@cindex buffers, flushing
+@cindex buffering output
+@cindex output, buffering
+
+Many utility programs will @dfn{buffer} their output; they save information
+to be written to a disk file or terminal in memory, until there is enough
+to be written in one operation.  This is often more efficient than writing
+every little bit of information as soon as it is ready.  However, sometimes
+it is necessary to force a program to @dfn{flush} its buffers; that is,
+write the information to its destination, even if a buffer is not full.
+You can do this from your @code{awk} program by calling @code{system}
+with a null string as its argument:
+
+@example
+system("")   # flush output
+@end example
+
+@noindent
+@code{gawk} treats this use of the @code{system} function as a special
+case, and is smart enough not to run a shell (or other command
+interpreter) with the empty command.  Therefore, with @code{gawk}, this
+idiom is not only useful, it is efficient.  While this idiom should work
+with other @code{awk} implementations, it will not necessarily avoid
+starting an unnecessary shell.
+@ignore
+Need a better explanation, perhaps in a separate paragraph.  Explain that
+for
+
+awk 'BEGIN { print "hi"
+             system("echo hello")
+             print "howdy" }'
+
+that the output had better be
+
+             hi
+             hello
+             howdy
+
+and not
+
+             hello
+             hi
+             howdy
+
+which it would be if awk did not flush its buffers before calling system.
+@end ignore
+
+@node Time Functions,  , I/O Functions, Built-in
+@section Functions for Dealing with Time Stamps
+
+@cindex time stamps
+@cindex time of day
+A common use for @code{awk} programs is the processing of log files.
+Log files often contain time stamp information, indicating when a
+particular log record was written.  Many programs log their time stamp
+in the form returned by the @code{time} system call, which is the
+number of seconds since a particular epoch.  On @sc{posix} systems,
+it is the number of seconds since Midnight, January 1, 1970, @sc{utc}.
+
+In order to make it easier to process such log files, and to easily produce
+useful reports, @code{gawk} provides two functions for working with time
+stamps.  Both of these are @code{gawk} extensions; they are not specified
+in the @sc{posix} standard, nor are they in any other known version
+of @code{awk}.
+
+@table @code
+@item systime()
+@findex systime
+This function returns the current time as the number of seconds since
+the system epoch.  On @sc{posix} systems, this is the number of seconds
+since Midnight, January 1, 1970, @sc{utc}.  It may be a different number on
+other systems.
+
+@item strftime(@var{format}, @var{timestamp})
+@findex strftime
+This function returns a string.  It is similar to the function of the
+same name in the @sc{ansi} C standard library.  The time specified by
+@var{timestamp} is used to produce a string, based on the contents
+of the @var{format} string.
+@end table
+
+The @code{systime} function allows you to compare a time stamp from a
+log file with the current time of day.  In particular, it is easy to
+determine how long ago a particular record was logged.  It also allows
+you to produce log records using the ``seconds since the epoch'' format.
+
+The @code{strftime} function allows you to easily turn a time stamp
+into human-readable information.  It is similar in nature to the @code{sprintf}
+function, copying non-format specification characters verbatim to the
+returned string, and substituting date and time values for format
+specifications in the @var{format} string.  If no @var{timestamp} argument
+is supplied, @code{gawk} will use the current time of day as the
+time stamp.@refill
+
+@code{strftime} is guaranteed by the @sc{ansi} C standard to support
+the following date format specifications:
+
+@table @code
+@item %a
+The locale's abbreviated weekday name.
+
+@item %A
+The locale's full weekday name.
+
+@item %b
+The locale's abbreviated month name.
+
+@item %B
+The locale's full month name.
+
+@item %c
+The locale's ``appropriate'' date and time representation.
+
+@item %d
+The day of the month as a decimal number (01--31).
+
+@item %H
+The hour (24-hour clock) as a decimal number (00--23).
+
+@item %I
+The hour (12-hour clock) as a decimal number (01--12).
+
+@item %j
+The day of the year as a decimal number (001--366).
+
+@item %m
+The month as a decimal number (01--12).
+
+@item %M
+The minute as a decimal number (00--59).
+
+@item %p
+The locale's equivalent of the AM/PM designations associated
+with a 12-hour clock.
+
+@item %S
+The second as a decimal number (00--61).  (Occasionally there are
+minutes in a year with one or two leap seconds, which is why the
+seconds can go from 0 all the way to 61.)
+
+@item %U
+The week number of the year (the first Sunday as the first day of week 1)
+as a decimal number (00--53).
+
+@item %w
+The weekday as a decimal number (0--6).  Sunday is day 0.
+
+@item %W
+The week number of the year (the first Monday as the first day of week 1)
+as a decimal number (00--53).
+
+@item %x
+The locale's ``appropriate'' date representation.
+
+@item %X
+The locale's ``appropriate'' time representation.
+
+@item %y
+The year without century as a decimal number (00--99).
+
+@item %Y
+The year with century as a decimal number.
+
+@item %Z
+The time zone name or abbreviation, or no characters if
+no time zone is determinable.
+
+@item %%
+A literal @samp{%}.
+@end table
+
+@c The parenthetical remark here should really be a footnote, but
+@c it gave formatting problems at the FSF. So for now put it in
+@c parentheses.
+If a conversion specifier is not one of the above, the behavior is
+undefined.  (This is because the @sc{ansi} standard for C leaves the
+behavior of the C version of @code{strftime} undefined, and @code{gawk}
+will use the system's version of @code{strftime} if it's there.
+Typically, the conversion specifier will either not appear in the
+returned string, or it will appear literally.)
+
+Informally, a @dfn{locale} is the geographic place in which a program
+is meant to run.  For example, a common way to abbreviate the date
+September 4, 1991 in the United States would be ``9/4/91''.
+In many countries in Europe, however, it would be abbreviated ``4.9.91''.
+Thus, the @samp{%x} specification in a @code{"US"} locale might produce
+@samp{9/4/91}, while in a @code{"EUROPE"} locale, it might produce
+@samp{4.9.91}.  The @sc{ansi} C standard defines a default @code{"C"}
+locale, which is an environment that is typical of what most C programmers
+are used to.
+
+A public-domain C version of @code{strftime} is shipped with @code{gawk}
+for systems that are not yet fully @sc{ansi}-compliant.  If that version is
+used to compile @code{gawk} (@pxref{Installation, ,Installing @code{gawk}}),
+then the following additional format specifications are available:@refill
+
+@table @code
+@item %D
+Equivalent to specifying @samp{%m/%d/%y}.
+
+@item %e
+The day of the month, padded with a blank if it is only one digit.
+
+@item %h
+Equivalent to @samp{%b}, above.
+
+@item %n
+A newline character (ASCII LF).
+
+@item %r
+Equivalent to specifying @samp{%I:%M:%S %p}.
+
+@item %R
+Equivalent to specifying @samp{%H:%M}.
+
+@item %T
+Equivalent to specifying @samp{%H:%M:%S}.
+
+@item %t
+A TAB character.
+
+@item %k
+is replaced by the hour (24-hour clock) as a decimal number (0-23).
+Single digit numbers are padded with a blank.
+
+@item %l
+is replaced by the hour (12-hour clock) as a decimal number (1-12).
+Single digit numbers are padded with a blank.
+
+@item %C
+The century, as a number between 00 and 99.
+
+@item %u
+is replaced by the weekday as a decimal number
+[1 (Monday)--7].
+
+@item %V
+is replaced by the week number of the year (the first Monday as the first
+day of week 1) as a decimal number (01--53).
+The method for determining the week number is as specified by ISO 8601
+(to wit: if the week containing January 1 has four or more days in the
+new year, then it is week 1, otherwise it is week 53 of the previous year
+and the next week is week 1).@refill
+
+@item %Ec %EC %Ex %Ey %EY %Od %Oe %OH %OI
+@itemx %Om %OM %OS %Ou %OU %OV %Ow %OW %Oy
+These are ``alternate representations'' for the specifications
+that use only the second letter (@samp{%c}, @samp{%C}, and so on).
+They are recognized, but their normal representations are used.
+(These facilitate compliance with the @sc{posix} @code{date}
+utility.)@refill
+
+@item %v
+The date in VMS format (e.g. 20-JUN-1991).
+@end table
+
+Here are two examples that use @code{strftime}.  The first is an
+@code{awk} version of the C @code{ctime} function.  (This is a
+user defined function, which we have not discussed yet.
+@xref{User-defined, ,User-defined Functions}, for more information.)
+
+@smallexample
+# ctime.awk
+#
+# awk version of C ctime(3) function
+
+function ctime(ts,    format)
+@{
+    format = "%a %b %e %H:%M:%S %Z %Y"
+    if (ts == 0)
+        ts = systime()         # use current time as default
+    return strftime(format, ts)
+@}
+@end smallexample
+
+This next example is an @code{awk} implementation of the @sc{posix}
+@code{date} utility.  Normally, the @code{date} utility prints the
+current date and time of day in a well known format.  However, if you
+provide an argument to it that begins with a @samp{+}, @code{date}
+will copy non-format specifier characters to the standard output, and
+will interpret the current time according to the format specifiers in
+the string.  For example:
+
+@smallexample
+date '+Today is %A, %B %d, %Y.'
+@end smallexample
+
+@noindent
+might print
+
+@smallexample
+Today is Thursday, July 11, 1991.
+@end smallexample
+
+Here is the @code{awk} version of the @code{date} utility.
+
+@smallexample
+#! /usr/bin/gawk -f
+#
+# date --- implement the P1003.2 Draft 11 'date' command
+#
+# Bug: does not recognize the -u argument.
+
+BEGIN    \
+@{
+    format = "%a %b %e %H:%M:%S %Z %Y"
+    exitval = 0
+
+    if (ARGC > 2)
+        exitval = 1
+    else if (ARGC == 2) @{
+        format = ARGV[1]
+        if (format ~ /^\+/)
+            format = substr(format, 2)    # remove leading +
+    @}
+    print strftime(format)
+    exit exitval
+@}
+@end smallexample
+
+@node User-defined, Built-in Variables, Built-in, Top
+@chapter User-defined Functions
+
+@cindex user-defined functions
+@cindex functions, user-defined
+Complicated @code{awk} programs can often be simplified by defining
+your own functions.  User-defined functions can be called just like
+built-in ones (@pxref{Function Calls}), but it is up to you to define
+them---to tell @code{awk} what they should do.
+
+@menu
+* Definition Syntax::           How to write definitions and what they mean.
+* Function Example::            An example function definition and 
+                                what it does.
+* Function Caveats::            Things to watch out for.
+* Return Statement::            Specifying the value a function returns.
+@end menu
+
+@node Definition Syntax, Function Example, User-defined, User-defined
+@section Syntax of Function Definitions
+@cindex defining functions
+@cindex function definition
+
+Definitions of functions can appear anywhere between the rules of the
+@code{awk} program.  Thus, the general form of an @code{awk} program is
+extended to include sequences of rules @emph{and} user-defined function
+definitions.
+
+The definition of a function named @var{name} looks like this:
+
+@example
+function @var{name} (@var{parameter-list}) @{
+     @var{body-of-function}
+@}
+@end example
+
+@noindent
+@var{name} is the name of the function to be defined.  A valid function
+name is like a valid variable name: a sequence of letters, digits and
+underscores, not starting with a digit.  Functions share the same pool
+of names as variables and arrays.
+
+@var{parameter-list} is a list of the function's arguments and local
+variable names, separated by commas.  When the function is called,
+the argument names are used to hold the argument values given in
+the call.  The local variables are initialized to the null string.
+
+The @var{body-of-function} consists of @code{awk} statements.  It is the
+most important part of the definition, because it says what the function
+should actually @emph{do}.  The argument names exist to give the body a
+way to talk about the arguments; local variables, to give the body
+places to keep temporary values.
+
+Argument names are not distinguished syntactically from local variable
+names; instead, the number of arguments supplied when the function is
+called determines how many argument variables there are.  Thus, if three
+argument values are given, the first three names in @var{parameter-list}
+are arguments, and the rest are local variables.
+
+It follows that if the number of arguments is not the same in all calls
+to the function, some of the names in @var{parameter-list} may be
+arguments on some occasions and local variables on others.  Another
+way to think of this is that omitted arguments default to the
+null string.
+
+Usually when you write a function you know how many names you intend to
+use for arguments and how many you intend to use as locals.  By
+convention, you should write an extra space between the arguments and
+the locals, so other people can follow how your function is
+supposed to be used.
+
+During execution of the function body, the arguments and local variable
+values hide or @dfn{shadow} any variables of the same names used in the
+rest of the program.  The shadowed variables are not accessible in the
+function definition, because there is no way to name them while their
+names have been taken away for the local variables.  All other variables
+used in the @code{awk} program can be referenced or set normally in the
+function definition.
+
+The arguments and local variables last only as long as the function body
+is executing.  Once the body finishes, the shadowed variables come back.
+
+The function body can contain expressions which call functions.  They
+can even call this function, either directly or by way of another
+function.  When this happens, we say the function is @dfn{recursive}.
+
+There is no need in @code{awk} to put the definition of a function
+before all uses of the function.  This is because @code{awk} reads the
+entire program before starting to execute any of it.
+
+In many @code{awk} implementations, the keyword @code{function} may be
+abbreviated @code{func}.  However, @sc{posix} only specifies the use of
+the keyword @code{function}.  This actually has some practical implications.
+If @code{gawk} is in @sc{posix}-compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then the following
+statement will @emph{not} define a function:@refill
+
+@example
+func foo() @{ a = sqrt($1) ; print a @}
+@end example
+
+@noindent
+Instead it defines a rule that, for each record, concatenates the value
+of the variable @samp{func} with the return value of the function @samp{foo},
+and based on the truth value of the result, executes the corresponding action.
+This is probably not what was desired.  (@code{awk} accepts this input as
+syntactically valid, since functions may be used before they are defined
+in @code{awk} programs.)
+
+@node Function Example, Function Caveats, Definition Syntax, User-defined
+@section Function Definition Example
+
+Here is an example of a user-defined function, called @code{myprint}, that
+takes a number and prints it in a specific format.
+
+@example
+function myprint(num)
+@{
+     printf "%6.3g\n", num
+@}
+@end example
+
+@noindent
+To illustrate, here is an @code{awk} rule which uses our @code{myprint}
+function:
+
+@example
+$3 > 0     @{ myprint($3) @}
+@end example
+
+@noindent
+This program prints, in our special format, all the third fields that
+contain a positive number in our input.  Therefore, when given:
+
+@example
+ 1.2   3.4    5.6   7.8
+ 9.10 11.12 -13.14 15.16
+17.18 19.20  21.22 23.24
+@end example
+
+@noindent
+this program, using our function to format the results, prints:
+
+@example
+   5.6
+  21.2
+@end example
+
+Here is a rather contrived example of a recursive function.  It prints a
+string backwards:
+
+@example
+function rev (str, len) @{
+    if (len == 0) @{
+        printf "\n"
+        return
+    @}
+    printf "%c", substr(str, len, 1)
+    rev(str, len - 1)
+@}
+@end example
+
+@node Function Caveats, Return Statement, Function Example, User-defined
+@section Calling User-defined Functions
+
+@dfn{Calling a function} means causing the function to run and do its job.
+A function call is an expression, and its value is the value returned by
+the function.
+
+A function call consists of the function name followed by the arguments
+in parentheses.  What you write in the call for the arguments are
+@code{awk} expressions; each time the call is executed, these
+expressions are evaluated, and the values are the actual arguments.  For
+example, here is a call to @code{foo} with three arguments (the first
+being a string concatenation):
+
+@example
+foo(x y, "lose", 4 * z)
+@end example
+
+@quotation
+@strong{Caution:} whitespace characters (spaces and tabs) are not allowed
+between the function name and the open-parenthesis of the argument list.
+If you write whitespace by mistake, @code{awk} might think that you mean
+to concatenate a variable with an expression in parentheses.  However, it
+notices that you used a function name and not a variable name, and reports
+an error.
+@end quotation
+
+@cindex call by value
+When a function is called, it is given a @emph{copy} of the values of
+its arguments.  This is called @dfn{call by value}.  The caller may use
+a variable as the expression for the argument, but the called function
+does not know this: it only knows what value the argument had.  For
+example, if you write this code:
+
+@example
+foo = "bar"
+z = myfunc(foo)
+@end example
+
+@noindent
+then you should not think of the argument to @code{myfunc} as being
+``the variable @code{foo}.''  Instead, think of the argument as the
+string value, @code{"bar"}.
+
+If the function @code{myfunc} alters the values of its local variables,
+this has no effect on any other variables.  In particular, if @code{myfunc}
+does this:
+
+@example
+function myfunc (win) @{
+  print win
+  win = "zzz"
+  print win
+@}
+@end example
+
+@noindent
+to change its first argument variable @code{win}, this @emph{does not}
+change the value of @code{foo} in the caller.  The role of @code{foo} in
+calling @code{myfunc} ended when its value, @code{"bar"}, was computed.
+If @code{win} also exists outside of @code{myfunc}, the function body
+cannot alter this outer value, because it is shadowed during the
+execution of @code{myfunc} and cannot be seen or changed from there.
+
+@cindex call by reference
+However, when arrays are the parameters to functions, they are @emph{not}
+copied.  Instead, the array itself is made available for direct manipulation
+by the function.  This is usually called @dfn{call by reference}.
+Changes made to an array parameter inside the body of a function @emph{are}
+visible outside that function.  
+@ifinfo
+This can be @strong{very} dangerous if you do not watch what you are
+doing.  For example:@refill
+@end ifinfo
+@iftex
+@emph{This can be very dangerous if you do not watch what you are
+doing.}  For example:@refill
+@end iftex
+
+@example
+function changeit (array, ind, nvalue) @{
+     array[ind] = nvalue
+@}
+
+BEGIN @{
+           a[1] = 1 ; a[2] = 2 ; a[3] = 3
+           changeit(a, 2, "two")
+           printf "a[1] = %s, a[2] = %s, a[3] = %s\n", a[1], a[2], a[3]
+      @}
+@end example
+
+@noindent
+prints @samp{a[1] = 1, a[2] = two, a[3] = 3}, because calling
+@code{changeit} stores @code{"two"} in the second element of @code{a}.
+
+@node Return Statement,  , Function Caveats, User-defined
+@section The @code{return} Statement
+@cindex @code{return} statement
+
+The body of a user-defined function can contain a @code{return} statement.
+This statement returns control to the rest of the @code{awk} program.  It
+can also be used to return a value for use in the rest of the @code{awk}
+program.  It looks like this:@refill
+
+@example
+return @var{expression}
+@end example
+
+The @var{expression} part is optional.  If it is omitted, then the returned
+value is undefined and, therefore, unpredictable.
+
+A @code{return} statement with no value expression is assumed at the end of
+every function definition.  So if control reaches the end of the function
+body, then the function returns an unpredictable value.  @code{awk}
+will not warn you if you use the return value of such a function; you will
+simply get unpredictable or unexpected results.
+
+Here is an example of a user-defined function that returns a value
+for the largest number among the elements of an array:@refill
+
+@example
+@group
+function maxelt (vec,   i, ret) @{
+     for (i in vec) @{
+          if (ret == "" || vec[i] > ret)
+               ret = vec[i]
+     @}
+     return ret
+@}
+@end group
+@end example
+
+@noindent
+You call @code{maxelt} with one argument, which is an array name.  The local
+variables @code{i} and @code{ret} are not intended to be arguments;
+while there is nothing to stop you from passing two or three arguments
+to @code{maxelt}, the results would be strange.  The extra space before
+@code{i} in the function parameter list is to indicate that @code{i} and
+@code{ret} are not supposed to be arguments.  This is a convention which
+you should follow when you define functions.
+
+Here is a program that uses our @code{maxelt} function.  It loads an
+array, calls @code{maxelt}, and then reports the maximum number in that
+array:@refill
+
+@example
+@group
+awk '
+function maxelt (vec,   i, ret) @{
+     for (i in vec) @{
+          if (ret == "" || vec[i] > ret)
+               ret = vec[i]
+     @}
+     return ret
+@}
+@end group
+
+@group
+# Load all fields of each record into nums.
+@{
+          for(i = 1; i <= NF; i++)
+               nums[NR, i] = $i
+@}
+
+END @{
+     print maxelt(nums)
+@}'
+@end group
+@end example
+
+Given the following input:
+
+@example
+@group
+ 1 5 23 8 16
+44 3 5 2 8 26
+256 291 1396 2962 100
+-6 467 998 1101
+99385 11 0 225
+@end group
+@end example
+
+@noindent
+our program tells us (predictably) that:
+
+@example
+99385
+@end example
+
+@noindent
+is the largest number in our array.
+
+@node Built-in Variables, Command Line, User-defined, Top
+@chapter Built-in Variables
+@cindex built-in variables
+
+Most @code{awk} variables are available for you to use for your own
+purposes; they never change except when your program assigns values to
+them, and never affect anything except when your program examines them.
+
+A few variables have special built-in meanings.  Some of them @code{awk}
+examines automatically, so that they enable you to tell @code{awk} how
+to do certain things.  Others are set automatically by @code{awk}, so
+that they carry information from the internal workings of @code{awk} to
+your program.
+
+This chapter documents all the built-in variables of @code{gawk}.  Most
+of them are also documented in the chapters where their areas of
+activity are described.
+
+@menu
+* User-modified::               Built-in variables that you change 
+                                to control @code{awk}.
+* Auto-set::                    Built-in variables where @code{awk} 
+                                gives you information.
+@end menu
+
+@node User-modified, Auto-set, Built-in Variables, Built-in Variables
+@section Built-in Variables that Control @code{awk}
+@cindex built-in variables, user modifiable
+
+This is a list of the variables which you can change to control how
+@code{awk} does certain things.
+
+@table @code
+@iftex
+@vindex CONVFMT
+@end iftex
+@item CONVFMT
+This string is used by @code{awk} to control conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function.  Its default value is @code{"%.6g"}.
+@code{CONVFMT} was introduced by the @sc{posix} standard.@refill
+
+@iftex
+@vindex FIELDWIDTHS
+@end iftex
+@item FIELDWIDTHS
+This is a space separated list of columns that tells @code{gawk}
+how to manage input with fixed, columnar boundaries.  It is an
+experimental feature that is still evolving.  Assigning to @code{FIELDWIDTHS}
+overrides the use of @code{FS} for field splitting.
+@xref{Constant Size, ,Reading Fixed-width Data}, for more information.@refill
+
+If @code{gawk} is in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then @code{FIELDWIDTHS}
+has no special meaning, and field splitting operations are done based
+exclusively on the value of @code{FS}.@refill
+
+@iftex
+@vindex FS
+@end iftex
+@item FS
+@code{FS} is the input field separator
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).
+The value is a single-character string or a multi-character regular
+expression that matches the separations between fields in an input
+record.@refill
+
+The default value is @w{@code{" "}}, a string consisting of a single
+space.  As a special exception, this value actually means that any
+sequence of spaces and tabs is a single separator.  It also causes
+spaces and tabs at the beginning or end of a line to be ignored.
+
+You can set the value of @code{FS} on the command line using the
+@samp{-F} option:
+
+@example
+awk -F, '@var{program}' @var{input-files}
+@end example
+
+If @code{gawk} is using @code{FIELDWIDTHS} for field-splitting,
+assigning a value to @code{FS} will cause @code{gawk} to return to
+the normal, regexp-based, field splitting.
+
+@item IGNORECASE
+@iftex
+@vindex IGNORECASE
+@end iftex
+If @code{IGNORECASE} is nonzero, then @emph{all} regular expression
+matching is done in a case-independent fashion.  In particular, regexp
+matching with @samp{~} and @samp{!~}, and the @code{gsub} @code{index},
+@code{match}, @code{split} and @code{sub} functions all ignore case when
+doing their particular regexp operations.  @strong{Note:} since field
+splitting with the value of the @code{FS} variable is also a regular
+expression operation, that too is done with case ignored.
+@xref{Case-sensitivity, ,Case-sensitivity in Matching}.
+
+If @code{gawk} is in compatibility mode
+(@pxref{Command Line, ,Invoking @code{awk}}), then @code{IGNORECASE} has
+no special meaning, and regexp operations are always case-sensitive.@refill
+
+@item OFMT
+@iftex
+@vindex OFMT
+@end iftex
+This string is used by @code{awk} to control conversion of numbers to
+strings (@pxref{Conversion, ,Conversion of Strings and Numbers}) for
+printing with the @code{print} statement.
+It works by being passed, in effect, as the first argument to the
+@code{sprintf} function.  Its default value is @code{"%.6g"}.
+Earlier versions of @code{awk} also used @code{OFMT} to specify the
+format for converting numbers to strings in general expressions; this
+has been taken over by @code{CONVFMT}.@refill
+
+@item OFS
+@iftex
+@vindex OFS
+@end iftex
+This is the output field separator (@pxref{Output Separators}).  It is
+output between the fields output by a @code{print} statement.  Its
+default value is @w{@code{" "}}, a string consisting of a single space.
+
+@item ORS
+@iftex
+@vindex ORS
+@end iftex
+This is the output record separator.  It is output at the end of every
+@code{print} statement.  Its default value is a string containing a
+single newline character, which could be written as @code{"\n"}.
+(@xref{Output Separators}.)@refill
+
+@item RS
+@iftex
+@vindex RS
+@end iftex
+This is @code{awk}'s input record separator.  Its default value is a string
+containing a single newline character, which means that an input record
+consists of a single line of text.
+(@xref{Records, ,How Input is Split into Records}.)@refill
+
+@item SUBSEP
+@iftex
+@vindex SUBSEP
+@end iftex
+@code{SUBSEP} is the subscript separator.  It has the default value of
+@code{"\034"}, and is used to separate the parts of the name of a
+multi-dimensional array.  Thus, if you access @code{foo[12,3]}, it
+really accesses @code{foo["12\0343"]}
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
+@end table
+
+@node Auto-set,  , User-modified, Built-in Variables
+@section Built-in Variables that Convey Information
+
+This is a list of the variables that are set automatically by @code{awk}
+on certain occasions so as to provide information to your program.
+
+@table @code
+@item ARGC
+@itemx ARGV
+@iftex
+@vindex ARGC
+@vindex ARGV
+@end iftex
+The command-line arguments available to @code{awk} programs are stored in
+an array called @code{ARGV}.  @code{ARGC} is the number of command-line
+arguments present.  @xref{Command Line, ,Invoking @code{awk}}.
+@code{ARGV} is indexed from zero to @w{@code{ARGC - 1}}.  For example:@refill
+
+@example
+awk 'BEGIN @{
+       for (i = 0; i < ARGC; i++) 
+           print ARGV[i] 
+     @}' inventory-shipped BBS-list
+@end example
+
+@noindent
+In this example, @code{ARGV[0]} contains @code{"awk"}, @code{ARGV[1]}
+contains @code{"inventory-shipped"}, and @code{ARGV[2]} contains
+@code{"BBS-list"}.  The value of @code{ARGC} is 3, one more than the
+index of the last element in @code{ARGV} since the elements are numbered
+from zero.@refill
+
+The names @code{ARGC} and @code{ARGV}, as well the convention of indexing
+the array from 0 to @w{@code{ARGC - 1}}, are derived from the C language's
+method of accessing command line arguments.@refill
+
+Notice that the @code{awk} program is not entered in @code{ARGV}.  The
+other special command line options, with their arguments, are also not
+entered.  But variable assignments on the command line @emph{are}
+treated as arguments, and do show up in the @code{ARGV} array.
+
+Your program can alter @code{ARGC} and the elements of @code{ARGV}.
+Each time @code{awk} reaches the end of an input file, it uses the next
+element of @code{ARGV} as the name of the next input file.  By storing a
+different string there, your program can change which files are read.
+You can use @code{"-"} to represent the standard input.  By storing
+additional elements and incrementing @code{ARGC} you can cause
+additional files to be read.
+
+If you decrease the value of @code{ARGC}, that eliminates input files
+from the end of the list.  By recording the old value of @code{ARGC}
+elsewhere, your program can treat the eliminated arguments as
+something other than file names.
+
+To eliminate a file from the middle of the list, store the null string
+(@code{""}) into @code{ARGV} in place of the file's name.  As a
+special feature, @code{awk} ignores file names that have been
+replaced with the null string.
+
+@ignore
+see getopt.awk in the examples...
+@end ignore
+
+@item ARGIND
+@vindex ARGIND
+The index in @code{ARGV} of the current file being processed.
+Every time @code{gawk} opens a new data file for processing, it sets
+@code{ARGIND} to the index in @code{ARGV} of the file name.  Thus, the
+condition @samp{FILENAME == ARGV[ARGIND]} is always true.
+
+This variable is useful in file processing; it allows you to tell how far
+along you are in the list of data files, and to distinguish between
+multiple successive instances of the same filename on the command line.
+
+While you can change the value of @code{ARGIND} within your @code{awk}
+program, @code{gawk} will automatically set it to a new value when the
+next file is opened.
+
+This variable is a @code{gawk} extension; in other @code{awk} implementations
+it is not special.
+
+@item ENVIRON
+@vindex ENVIRON
+This is an array that contains the values of the environment.  The array
+indices are the environment variable names; the values are the values of
+the particular environment variables.  For example,
+@code{ENVIRON["HOME"]} might be @file{/u/close}.  Changing this array
+does not affect the environment passed on to any programs that
+@code{awk} may spawn via redirection or the @code{system} function.
+(In a future version of @code{gawk}, it may do so.)
+
+Some operating systems may not have environment variables.
+On such systems, the array @code{ENVIRON} is empty.
+
+@item ERRNO
+@iftex
+@vindex ERRNO
+@end iftex
+If a system error occurs either doing a redirection for @code{getline},
+during a read for @code{getline}, or during a @code{close} operation,
+then @code{ERRNO} will contain a string describing the error.
+
+This variable is a @code{gawk} extension; in other @code{awk} implementations
+it is not special.
+
+@item FILENAME
+@iftex
+@vindex FILENAME
+@end iftex
+This is the name of the file that @code{awk} is currently reading.
+If @code{awk} is reading from the standard input (in other words,
+there are no files listed on the command line),
+@code{FILENAME} is set to @code{"-"}.
+@code{FILENAME} is changed each time a new file is read
+(@pxref{Reading Files, ,Reading Input Files}).@refill
+
+@item FNR
+@iftex
+@vindex FNR
+@end iftex
+@code{FNR} is the current record number in the current file.  @code{FNR} is
+incremented each time a new record is read
+(@pxref{Getline, ,Explicit Input with @code{getline}}).  It is reinitialized
+to 0 each time a new input file is started.@refill
+
+@item NF
+@iftex
+@vindex NF
+@end iftex
+@code{NF} is the number of fields in the current input record.
+@code{NF} is set each time a new record is read, when a new field is
+created, or when @code{$0} changes (@pxref{Fields, ,Examining Fields}).@refill
+
+@item NR
+@iftex
+@vindex NR
+@end iftex
+This is the number of input records @code{awk} has processed since
+the beginning of the program's execution.
+(@pxref{Records, ,How Input is Split into Records}).
+@code{NR} is set each time a new record is read.@refill
+
+@item RLENGTH
+@iftex
+@vindex RLENGTH
+@end iftex
+@code{RLENGTH} is the length of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RLENGTH} is set by invoking the @code{match} function.  Its value
+is the length of the matched string, or @minus{}1 if no match was found.@refill
+
+@item RSTART
+@iftex
+@vindex RSTART
+@end iftex
+@code{RSTART} is the start-index in characters of the substring matched by the
+@code{match} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+@code{RSTART} is set by invoking the @code{match} function.  Its value
+is the position of the string where the matched substring starts, or 0
+if no match was found.@refill
+@end table
+
+@node Command Line, Language History, Built-in Variables, Top
+@c node-name, next, previous, up
+@chapter Invoking @code{awk}
+@cindex command line
+@cindex invocation of @code{gawk}
+@cindex arguments, command line
+@cindex options, command line
+@cindex long options
+@cindex options, long
+
+There are two ways to run @code{awk}: with an explicit program, or with
+one or more program files.  Here are templates for both of them; items
+enclosed in @samp{@r{[}@dots{}@r{]}} in these templates are optional.
+
+Besides traditional one-letter @sc{posix}-style options, @code{gawk} also
+supports GNU long named options.
+
+@example
+awk @r{[@var{POSIX or GNU style options}]} -f progfile @r{[@code{--}]} @var{file} @dots{}
+awk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+@menu
+* Options::                     Command line options and their meanings.
+* Other Arguments::             Input file names and variable assignments.
+* AWKPATH Variable::            Searching directories for @code{awk} programs.
+* Obsolete::                    Obsolete Options and/or features.
+* Undocumented::                Undocumented Options and Features.
+@end menu
+
+@node Options, Other Arguments, Command Line, Command Line
+@section Command Line Options
+
+Options begin with a minus sign, and consist of a single character.
+GNU style long named options consist of two minus signs and
+a keyword that can be abbreviated if the abbreviation allows the option
+to be uniquely identified.  If the option takes an argument, then the
+keyword is immediately followed by an equals sign (@samp{=}) and the
+argument's value.  For brevity, the discussion below only refers to the
+traditional short options; however the long and short options are
+interchangeable in all contexts.
+
+Each long named option for @code{gawk} has a corresponding
+@sc{posix}-style option.  The options and their meanings are as follows:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator=@var{fs}
+@iftex
+@cindex @code{-F} option
+@end iftex
+@cindex @code{--field-separator} option
+Sets the @code{FS} variable to @var{fs}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
+
+@item -f @var{source-file}
+@itemx --file=@var{source-file}
+@iftex
+@cindex @code{-f} option
+@end iftex
+@cindex @code{--file} option
+Indicates that the @code{awk} program is to be found in @var{source-file}
+instead of in the first non-option argument.
+
+@item -v @var{var}=@var{val}
+@itemx --assign=@var{var}=@var{val}
+@cindex @samp{-v} option
+@cindex @code{--assign} option
+Sets the variable @var{var} to the value @var{val} @emph{before}
+execution of the program begins.  Such variable values are available
+inside the @code{BEGIN} rule (see below for a fuller explanation).
+
+The @samp{-v} option can only set one variable, but you can use
+it more than once, setting another variable each time, like this:
+@samp{@w{-v foo=1} @w{-v bar=2}}.
+
+@item -W @var{gawk-opt}
+@cindex @samp{-W} option
+Following the @sc{posix} standard, options that are implementation
+specific are supplied as arguments to the @samp{-W} option.  With @code{gawk},
+these arguments may be separated by commas, or quoted and separated by
+whitespace.  Case is ignored when processing these options.  These options
+also have corresponding GNU style long named options.  The following
+@code{gawk}-specific options are available:
+
+@table @code
+@item -W compat
+@itemx --compat
+@cindex @code{--compat} option
+Specifies @dfn{compatibility mode}, in which the GNU extensions in
+@code{gawk} are disabled, so that @code{gawk} behaves just like Unix
+@code{awk}.
+@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
+which summarizes the extensions.  Also see
+@ref{Compatibility Mode, ,Downward Compatibility and Debugging}.@refill
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+@cindex @code{--copyleft} option
+@cindex @code{--copyright} option
+Print the short version of the General Public License.
+This option may disappear in a future version of @code{gawk}.  
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+@cindex @code{--help} option
+@cindex @code{--usage} option
+Print a ``usage'' message summarizing the short and long style options
+that @code{gawk} accepts, and then exit.
+
+@item -W lint
+@itemx --lint
+@cindex @code{--lint} option
+Provide warnings about constructs that are dubious or non-portable to
+other @code{awk} implementations.
+Some warnings are issued when @code{gawk} first reads your program.  Others
+are issued at run-time, as your program executes.
+
+@item -W posix
+@itemx --posix
+@cindex @code{--posix} option
+Operate in strict @sc{posix} mode.  This disables all @code{gawk}
+extensions (just like @code{-W compat}), and adds the following additional
+restrictions:
+
+@itemize @bullet{}
+@item
+@code{\x} escape sequences are not recognized
+(@pxref{Constants, ,Constant Expressions}).@refill
+
+@item
+The synonym @code{func} for the keyword @code{function} is not
+recognized (@pxref{Definition Syntax, ,Syntax of Function Definitions}).
+
+@item
+The operators @samp{**} and @samp{**=} cannot be used in
+place of @samp{^} and @samp{^=} (@pxref{Arithmetic Ops, ,Arithmetic Operators},
+and also @pxref{Assignment Ops, ,Assignment Expressions}).@refill
+
+@item
+Specifying @samp{-Ft} on the command line does not set the value
+of @code{FS} to be a single tab character
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
+@end itemize
+
+Although you can supply both @samp{-W compat} and @samp{-W posix} on the
+command line, @samp{-W posix} will take precedence.
+
+@item -W source=@var{program-text}
+@itemx --source=@var{program-text}
+@cindex @code{--source} option
+Program source code is taken from the @var{program-text}.  This option
+allows you to mix @code{awk} source code in files with program source
+code that you would enter on the command line. This is particularly useful
+when you have library functions that you wish to use from your command line
+programs (@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+@item -W version
+@itemx --version
+@cindex @code{--version} option
+Prints version information for this particular copy of @code{gawk}.
+This is so you can determine if your copy of @code{gawk} is up to date
+with respect to whatever the Free Software Foundation is currently
+distributing.  This option may disappear in a future version of @code{gawk}.
+@end table
+
+@item --
+Signals the end of the command line options.  The following arguments
+are not treated as options even if they begin with @samp{-}.  This
+interpretation of @samp{--} follows the @sc{posix} argument parsing
+conventions.
+
+This is useful if you have file names that start with @samp{-},
+or in shell scripts, if you have file names that will be specified
+by the user which could start with @samp{-}.
+@end table
+
+Any other options are flagged as invalid with a warning message, but
+are otherwise ignored.
+
+In compatibility mode, as a special case, if the value of @var{fs} supplied
+to the @samp{-F} option is @samp{t}, then @code{FS} is set to the tab
+character (@code{"\t"}).  This is only true for @samp{-W compat}, and not
+for @samp{-W posix}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}).@refill
+
+If the @samp{-f} option is @emph{not} used, then the first non-option
+command line argument is expected to be the program text.
+
+The @samp{-f} option may be used more than once on the command line.
+If it is, @code{awk} reads its program source from all of the named files, as
+if they had been concatenated together into one big file.  This is
+useful for creating libraries of @code{awk} functions.  Useful functions
+can be written once, and then retrieved from a standard place, instead
+of having to be included into each individual program.  You can still
+type in a program at the terminal and use library functions, by specifying
+@samp{-f /dev/tty}.  @code{awk} will read a file from the terminal
+to use as part of the @code{awk} program.  After typing your program,
+type @kbd{Control-d} (the end-of-file character) to terminate it.
+(You may also use @samp{-f -} to read program source from the standard
+input, but then you will not be able to also use the standard input as a
+source of data.)
+
+Because it is clumsy using the standard @code{awk} mechanisms to mix source
+file and command line @code{awk} programs, @code{gawk} provides the
+@samp{--source} option.  This does not require you to pre-empt the standard
+input for your source code, and allows you to easily mix command line
+and library source code
+(@pxref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}).
+
+If no @samp{-f} or @samp{--source} option is specified, then @code{gawk}
+will use the first non-option command line argument as the text of the
+program source code.
+
+@node Other Arguments, AWKPATH Variable, Options, Command Line
+@section Other Command Line Arguments
+
+Any additional arguments on the command line are normally treated as
+input files to be processed in the order specified.  However, an
+argument that has the form @code{@var{var}=@var{value}}, means to assign
+the value @var{value} to the variable @var{var}---it does not specify a
+file at all.
+
+@vindex ARGV
+All these arguments are made available to your @code{awk} program in the
+@code{ARGV} array (@pxref{Built-in Variables}).  Command line options
+and the program text (if present) are omitted from the @code{ARGV}
+array.  All other arguments, including variable assignments, are
+included.
+
+The distinction between file name arguments and variable-assignment
+arguments is made when @code{awk} is about to open the next input file.
+At that point in execution, it checks the ``file name'' to see whether
+it is really a variable assignment; if so, @code{awk} sets the variable
+instead of reading a file.
+
+Therefore, the variables actually receive the specified values after all
+previously specified files have been read.  In particular, the values of
+variables assigned in this fashion are @emph{not} available inside a
+@code{BEGIN} rule
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}),
+since such rules are run before @code{awk} begins scanning the argument list.
+The values given on the command line are processed for escape sequences
+(@pxref{Constants, ,Constant Expressions}).@refill
+
+In some earlier implementations of @code{awk}, when a variable assignment
+occurred before any file names, the assignment would happen @emph{before}
+the @code{BEGIN} rule was executed.  Some applications came to depend
+upon this ``feature.''  When @code{awk} was changed to be more consistent,
+the @samp{-v} option was added to accommodate applications that depended
+upon this old behavior.
+
+The variable assignment feature is most useful for assigning to variables
+such as @code{RS}, @code{OFS}, and @code{ORS}, which control input and
+output formats, before scanning the data files.  It is also useful for
+controlling state if multiple passes are needed over a data file.  For
+example:@refill
+
+@cindex multiple passes over data
+@cindex passes, multiple
+@smallexample
+awk 'pass == 1  @{ @var{pass 1 stuff} @}
+     pass == 2  @{ @var{pass 2 stuff} @}' pass=1 datafile pass=2 datafile
+@end smallexample
+
+Given the variable assignment feature, the @samp{-F} option is not
+strictly necessary.  It remains for historical compatibility.
+
+@node AWKPATH Variable, Obsolete, Other Arguments, Command Line
+@section The @code{AWKPATH} Environment Variable
+@cindex @code{AWKPATH} environment variable
+@cindex search path
+@cindex directory search
+@cindex path, search
+@iftex
+@cindex differences between @code{gawk} and @code{awk}
+@end iftex
+
+The previous section described how @code{awk} program files can be named
+on the command line with the @samp{-f} option.  In some @code{awk}
+implementations, you must supply a precise path name for each program
+file, unless the file is in the current directory.
+
+But in @code{gawk}, if the file name supplied in the @samp{-f} option
+does not contain a @samp{/}, then @code{gawk} searches a list of
+directories (called the @dfn{search path}), one by one, looking for a
+file with the specified name.
+
+The search path is actually a string consisting of directory names
+separated by colons.  @code{gawk} gets its search path from the
+@code{AWKPATH} environment variable.  If that variable does not exist,
+@code{gawk} uses the default path, which is
+@samp{.:/usr/lib/awk:/usr/local/lib/awk}.  (Programs written by
+system administrators should use an @code{AWKPATH} variable that
+does not include the current directory, @samp{.}.)@refill
+
+The search path feature is particularly useful for building up libraries
+of useful @code{awk} functions.  The library files can be placed in a
+standard directory that is in the default path, and then specified on
+the command line with a short file name.  Otherwise, the full file name
+would have to be typed for each file.
+
+By combining the @samp{--source} and @samp{-f} options, your command line
+@code{awk} programs can use facilities in @code{awk} library files.
+
+Path searching is not done if @code{gawk} is in compatibility mode.
+This is true for both @samp{-W compat} and @samp{-W posix}.
+@xref{Options, ,Command Line Options}.
+
+@strong{Note:} if you want files in the current directory to be found,
+you must include the current directory in the path, either by writing
+@file{.} as an entry in the path, or by writing a null entry in the
+path.  (A null entry is indicated by starting or ending the path with a
+colon, or by placing two colons next to each other (@samp{::}).)  If the
+current directory is not included in the path, then files cannot be
+found in the current directory.  This path search mechanism is identical
+to the shell's.
+@c someday, @cite{The Bourne Again Shell}....
+
+@node Obsolete, Undocumented, AWKPATH Variable, Command Line
+@section Obsolete Options and/or Features
+
+@cindex deprecated options
+@cindex obsolete options
+@cindex deprecated features
+@cindex obsolete features
+This section describes features and/or command line options from the
+previous release of @code{gawk} that are either not available in the
+current version, or that are still supported but deprecated (meaning that
+they will @emph{not} be in the next release).
+
+@c update this section for each release!
+
+For version 2.15 of @code{gawk}, the following command line options
+from version 2.11.1 are no longer recognized.
+
+@table @samp
+@ignore
+@item -nostalgia
+Use @samp{-W nostalgia} instead.
+@end ignore
+
+@item -c
+Use @samp{-W compat} instead.
+
+@item -V
+Use @samp{-W version} instead.
+
+@item -C
+Use @samp{-W copyright} instead.
+
+@item -a
+@itemx -e
+These options produce an ``unrecognized option'' error message but have
+no effect on the execution of @code{gawk}.  The @sc{posix} standard now
+specifies traditional @code{awk} regular expressions for the @code{awk} utility.
+@end table
+
+The public-domain version of @code{strftime} that is distributed with
+@code{gawk} changed for the 2.14 release.  The @samp{%V} conversion specifier
+that used to generate the date in VMS format was changed to @samp{%v}.
+This is because the @sc{posix} standard for the @code{date} utility now
+specifies a @samp{%V} conversion specifier.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for details.
+
+@node Undocumented,  , Obsolete, Command Line
+@section Undocumented Options and Features
+
+This section intentionally left blank.
+
+@c Read The Source, Luke!
+
+@ignore
+@c If these came out in the Info file or TeX manual, then they wouldn't
+@c be undocumented, would they?
+
+@code{gawk} has one undocumented option:
+
+@table @samp
+@item -W nostalgia
+Print the message @code{"awk: bailing out near line 1"} and dump core.
+This option was inspired by the common behavior of very early versions of
+Unix @code{awk}, and by a t--shirt.
+@end table
+
+Early versions of @code{awk} used to not require any separator (either
+a newline or @samp{;}) between the rules in @code{awk} programs.  Thus,
+it was common to see one-line programs like:
+
+@example
+awk '@{ sum += $1 @} END @{ print sum @}'
+@end example
+
+@code{gawk} actually supports this, but it is purposely undocumented
+since it is considered bad style.  The correct way to write such a program
+is either
+
+@example
+awk '@{ sum += $1 @} ; END @{ print sum @}'
+@end example
+
+@noindent
+or
+
+@example
+awk '@{ sum += $1 @}
+     END @{ print sum @}' data
+@end example
+
+@noindent
+@xref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a fuller
+explanation.@refill
+
+As an accident of the implementation of the original Unix @code{awk}, if
+a built-in function used @code{$0} as its default argument, it was possible
+to call that function without the parentheses.  In particular, it was
+common practice to use the @code{length} function in this fashion.
+For example, the pipeline:
+
+@example
+echo abcdef | awk '@{ print length @}'
+@end example
+
+@noindent
+would print @samp{6}.
+
+For backwards compatibility with old programs, @code{gawk} supports
+this usage, but only for the @code{length} function.  New programs should
+@emph{not} call the @code{length} function this way.  In particular,
+this usage will not be portable to other @sc{posix} compliant versions
+of @code{awk}.  It is also poor style.
+
+@end ignore
+
+@node Language History, Installation, Command Line, Top
+@chapter The Evolution of the @code{awk} Language
+
+This manual describes the GNU implementation of @code{awk}, which is patterned
+after the @sc{posix} specification.  Many @code{awk} users are only familiar
+with the original @code{awk} implementation in Version 7 Unix, which is also
+the basis for the version in Berkeley Unix (through 4.3--Reno).  This chapter
+briefly describes the evolution of the @code{awk} language.
+
+@menu
+* V7/S5R3.1::                   The major changes between V7 and 
+                                System V Release 3.1.
+* S5R4::                        Minor changes between System V 
+                                Releases 3.1 and 4.
+* POSIX::                       New features from the @sc{posix} standard.
+* POSIX/GNU::                   The extensions in @code{gawk} 
+                                not in @sc{posix} @code{awk}.
+@end menu
+
+@node V7/S5R3.1, S5R4, Language History, Language History
+@section Major Changes between V7 and S5R3.1
+
+The @code{awk} language evolved considerably between the release of
+Version 7 Unix (1978) and the new version first made widely available in
+System V Release 3.1 (1987).  This section summarizes the changes, with
+cross-references to further details.
+
+@itemize @bullet
+@item
+The requirement for @samp{;} to separate rules on a line
+(@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}).
+
+@item
+User-defined functions, and the @code{return} statement
+(@pxref{User-defined, ,User-defined Functions}).
+
+@item
+The @code{delete} statement (@pxref{Delete, ,The @code{delete} Statement}).
+
+@item
+The @code{do}-@code{while} statement
+(@pxref{Do Statement, ,The @code{do}-@code{while} Statement}).@refill
+
+@item
+The built-in functions @code{atan2}, @code{cos}, @code{sin}, @code{rand} and
+@code{srand} (@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The built-in functions @code{gsub}, @code{sub}, and @code{match}
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).
+
+@item
+The built-in functions @code{close}, which closes an open file, and
+@code{system}, which allows the user to execute operating system
+commands (@pxref{I/O Functions, ,Built-in Functions for Input/Output}).@refill
+@c Does the above verbiage prevents an overfull hbox?  --mew, rjc 24jan1992
+
+@item
+The @code{ARGC}, @code{ARGV}, @code{FNR}, @code{RLENGTH}, @code{RSTART},
+and @code{SUBSEP} built-in variables (@pxref{Built-in Variables}).
+
+@item
+The conditional expression using the operators @samp{?} and @samp{:}
+(@pxref{Conditional Exp, ,Conditional Expressions}).@refill
+
+@item
+The exponentiation operator @samp{^}
+(@pxref{Arithmetic Ops, ,Arithmetic Operators}) and its assignment operator
+form @samp{^=} (@pxref{Assignment Ops, ,Assignment Expressions}).@refill
+
+@item
+C-compatible operator precedence, which breaks some old @code{awk}
+programs (@pxref{Precedence, ,Operator Precedence (How Operators Nest)}).
+
+@item
+Regexps as the value of @code{FS}
+(@pxref{Field Separators, ,Specifying how Fields are Separated}), and as the
+third argument to the @code{split} function
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+@item
+Dynamic regexps as operands of the @samp{~} and @samp{!~} operators
+(@pxref{Regexp Usage, ,How to Use Regular Expressions}).
+
+@item
+Escape sequences (@pxref{Constants, ,Constant Expressions}) in regexps.@refill
+
+@item
+The escape sequences @samp{\b}, @samp{\f}, and @samp{\r}
+(@pxref{Constants, ,Constant Expressions}).
+
+@item
+Redirection of input for the @code{getline} function
+(@pxref{Getline, ,Explicit Input with @code{getline}}).@refill
+
+@item
+Multiple @code{BEGIN} and @code{END} rules
+(@pxref{BEGIN/END, ,@code{BEGIN} and @code{END} Special Patterns}).@refill
+
+@item
+Simulated multi-dimensional arrays
+(@pxref{Multi-dimensional, ,Multi-dimensional Arrays}).@refill
+@end itemize
+
+@node S5R4, POSIX, V7/S5R3.1, Language History
+@section Changes between S5R3.1 and S5R4
+
+The System V Release 4 version of Unix @code{awk} added these features
+(some of which originated in @code{gawk}):
+
+@itemize @bullet
+@item
+The @code{ENVIRON} variable (@pxref{Built-in Variables}).
+
+@item
+Multiple @samp{-f} options on the command line
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The @samp{-v} option for assigning variables before program execution begins
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The @samp{--} option for terminating command line options.
+
+@item
+The @samp{\a}, @samp{\v}, and @samp{\x} escape sequences
+(@pxref{Constants, ,Constant Expressions}).@refill
+
+@item
+A defined return value for the @code{srand} built-in function
+(@pxref{Numeric Functions, ,Numeric Built-in Functions}).
+
+@item
+The @code{toupper} and @code{tolower} built-in string functions
+for case translation
+(@pxref{String Functions, ,Built-in Functions for String Manipulation}).@refill
+
+@item
+A cleaner specification for the @samp{%c} format-control letter in the
+@code{printf} function
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
+
+@item
+The ability to dynamically pass the field width and precision (@code{"%*.*d"})
+in the argument list of the @code{printf} function
+(@pxref{Printf, ,Using @code{printf} Statements for Fancier Printing}).@refill
+
+@item
+The use of constant regexps such as @code{/foo/} as expressions, where
+they are equivalent to use of the matching operator, as in @code{$0 ~
+/foo/} (@pxref{Constants, ,Constant Expressions}).
+@end itemize
+
+@node POSIX, POSIX/GNU, S5R4, Language History
+@section Changes between S5R4 and POSIX @code{awk}
+
+The @sc{posix} Command Language and Utilities standard for @code{awk}
+introduced the following changes into the language:
+
+@itemize @bullet{}
+@item
+The use of @samp{-W} for implementation-specific options.
+
+@item
+The use of @code{CONVFMT} for controlling the conversion of numbers
+to strings (@pxref{Conversion, ,Conversion of Strings and Numbers}).
+
+@item
+The concept of a numeric string, and tighter comparison rules to go
+with it (@pxref{Comparison Ops, ,Comparison Expressions}).
+
+@item
+More complete documentation of many of the previously undocumented
+features of the language.
+@end itemize
+
+@node POSIX/GNU,  , POSIX, Language History
+@section Extensions in @code{gawk} not in POSIX @code{awk}
+
+The GNU implementation, @code{gawk}, adds these features:
+
+@itemize @bullet
+@item
+The @code{AWKPATH} environment variable for specifying a path search for
+the @samp{-f} command line option
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The various @code{gawk} specific features available via the @samp{-W}
+command line option (@pxref{Command Line, ,Invoking @code{awk}}).
+
+@item
+The @code{ARGIND} variable, that tracks the movement of @code{FILENAME}
+through @code{ARGV}.  (@pxref{Built-in Variables}).
+
+@item
+The @code{ERRNO} variable, that contains the system error message when
+@code{getline} returns @minus{}1, or when @code{close} fails.
+(@pxref{Built-in Variables}).
+
+@item
+The @code{IGNORECASE} variable and its effects
+(@pxref{Case-sensitivity, ,Case-sensitivity in Matching}).@refill
+
+@item
+The @code{FIELDWIDTHS} variable and its effects
+(@pxref{Constant Size, ,Reading Fixed-width Data}).@refill
+
+@item
+The @code{next file} statement for skipping to the next data file
+(@pxref{Next File Statement, ,The @code{next file} Statement}).@refill
+
+@item
+The @code{systime} and @code{strftime} built-in functions for obtaining
+and printing time stamps
+(@pxref{Time Functions, ,Functions for Dealing with Time Stamps}).@refill
+
+@item
+The @file{/dev/stdin}, @file{/dev/stdout}, @file{/dev/stderr}, and
+@file{/dev/fd/@var{n}} file name interpretation
+(@pxref{Special Files, ,Standard I/O Streams}).@refill
+
+@item
+The @samp{-W compat} option to turn off these extensions
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@item
+The @samp{-W posix} option for full @sc{posix} compliance
+(@pxref{Command Line, ,Invoking @code{awk}}).@refill
+
+@end itemize
+
+@node Installation, Gawk Summary, Language History, Top
+@chapter Installing @code{gawk}
+
+This chapter provides instructions for installing @code{gawk} on the
+various platforms that are supported by the developers.  The primary
+developers support Unix (and one day, GNU), while the other ports were
+contributed.  The file @file{ACKNOWLEDGMENT} in the @code{gawk}
+distribution lists the electronic mail addresses of the people who did
+the respective ports.@refill
+
+@menu
+* Gawk Distribution::           What is in the @code{gawk} distribution.
+* Unix Installation::           Installing @code{gawk} under various versions
+                                of Unix.
+* VMS Installation::            Installing @code{gawk} on VMS.
+* MS-DOS Installation::         Installing @code{gawk} on MS-DOS.
+* Atari Installation::          Installing @code{gawk} on the Atari ST.
+@end menu
+
+@node Gawk Distribution, Unix Installation, Installation, Installation
+@section The @code{gawk} Distribution
+
+This section first describes how to get and extract the @code{gawk}
+distribution, and then discusses what is in the various files and
+subdirectories.
+
+@menu
+* Extracting::                  How to get and extract the distribution.
+* Distribution contents::       What is in the distribution.
+@end menu
+
+@node Extracting, Distribution contents, Gawk Distribution, Gawk Distribution
+@subsection Getting the @code{gawk} Distribution
+
+@cindex getting gawk
+@cindex anonymous ftp
+@cindex anonymous uucp
+@cindex ftp, anonymous
+@cindex uucp, anonymous
+@code{gawk} is distributed as a @code{tar} file compressed with the
+GNU Zip program, @code{gzip}.  You can
+get it via anonymous @code{ftp} to the Internet host @code{prep.ai.mit.edu}.
+Like all GNU software, it will be archived at other well known systems,
+from which it will be possible to use some sort of anonymous @code{uucp} to
+obtain the distribution as well.
+You can also order @code{gawk} on tape or CD-ROM directly from the
+Free Software Foundation.  (The address is on the copyright page.)
+Doing so directly contributes to the support of the foundation and to
+the production of more free software.
+
+Once you have the distribution (for example,
+@file{gawk-2.15.0.tar.z}), first use @code{gzip} to expand the
+file, and then use @code{tar} to extract it.  You can use the following
+pipeline to produce the @code{gawk} distribution:
+
+@example
+# Under System V, add 'o' to the tar flags
+gzip -d -c gawk-2.15.0.tar.z | tar -xvpf -
+@end example
+
+@noindent
+This will create a directory named @file{gawk-2.15} in the current
+directory.
+
+The distribution file name is of the form @file{gawk-2.15.@var{n}.tar.Z}.
+The @var{n} represents a @dfn{patchlevel}, meaning that minor bugs have
+been fixed in the major release.  The current patchlevel is 0, but when
+retrieving distributions, you should get the version with the highest
+patchlevel.@refill
+
+If you are not on a Unix system, you will need to make other arrangements
+for getting and extracting the @code{gawk} distribution.  You should consult
+a local expert.
+
+@node Distribution contents,  , Extracting, Gawk Distribution
+@subsection Contents of the @code{gawk} Distribution
+
+@code{gawk} has a number of C source files, documentation files,
+subdirectories and files related to the configuration process
+(@pxref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}),
+and several subdirectories related to different, non-Unix,
+operating systems.@refill
+
+@table @asis
+@item various @samp{.c}, @samp{.y}, and @samp{.h} files 
+
+The C and YACC source files are the actual @code{gawk} source code.
+@end table
+
+@table @file
+@item README
+@itemx README.VMS
+@itemx README.dos
+@itemx README.rs6000
+@itemx README.ultrix
+Descriptive files: @file{README} for @code{gawk} under Unix, and the
+rest for the various hardware and software combinations.
+
+@item PORTS
+A list of systems to which @code{gawk} has been ported, and which
+have successfully run the test suite.
+
+@item ACKNOWLEDGMENT
+A list of the people who contributed major parts of the code or documentation.
+
+@item NEWS
+A list of changes to @code{gawk} since the last release or patch.
+
+@item COPYING
+The GNU General Public License.
+
+@item FUTURES
+A brief list of features and/or changes being contemplated for future
+releases, with some indication of the time frame for the feature, based
+on its difficulty.
+
+@item LIMITATIONS
+A list of those factors that limit @code{gawk}'s performance.
+Most of these depend on the hardware or operating system software, and
+are not limits in @code{gawk} itself.@refill
+
+@item PROBLEMS
+A file describing known problems with the current release.
+
+@item gawk.1
+The @code{troff} source for a manual page describing @code{gawk}.
+
+@item gawk.texinfo
+@ifinfo
+The @code{texinfo} source file for this Info file.
+It should be processed with @TeX{} to produce a printed manual, and
+with @code{makeinfo} to produce the Info file.@refill
+@end ifinfo
+@iftex
+The @code{texinfo} source file for this manual.
+It should be processed with @TeX{} to produce a printed manual, and
+with @code{makeinfo} to produce the Info file.@refill
+@end iftex
+
+@item Makefile.in
+@itemx config
+@itemx config.in
+@itemx configure
+@itemx missing
+@itemx mungeconf
+These files and subdirectories are used when configuring @code{gawk}
+for various Unix systems.  They are explained in detail in
+@ref{Unix Installation, ,Compiling and Installing @code{gawk} on Unix}.@refill
+
+@item atari
+Files needed for building @code{gawk} on an Atari ST.
+@xref{Atari Installation, ,Installing @code{gawk} on the Atari ST}, for details.
+
+@item pc
+Files needed for building @code{gawk} under MS-DOS.
+@xref{MS-DOS Installation, ,Installing @code{gawk} on MS-DOS}, for details.
+
+@item vms
+Files needed for building @code{gawk} under VMS.
+@xref{VMS Installation, ,Compiling Installing and Running @code{gawk} on VMS}, for details.
+
+@item test
+Many interesting @code{awk} programs, provided as a test suite for
+@code{gawk}.  You can use @samp{make test} from the top level @code{gawk}
+directory to run your version of @code{gawk} against the test suite.
+@c There are many programs here that are useful in their own right.
+If @code{gawk} successfully passes @samp{make test} then you can
+be confident of a successful port.@refill
+@end table
+
+@node Unix Installation, VMS Installation, Gawk Distribution, Installation
+@section Compiling and Installing @code{gawk} on Unix
+
+Often, you can compile and install @code{gawk} by typing only two
+commands.  However, if you do not use a supported system, you may need
+to configure @code{gawk} for your system yourself.
+
+@menu
+* Quick Installation::          Compiling @code{gawk} on a 
+                                supported Unix version.
+* Configuration Philosophy::    How it's all supposed to work.
+* New Configurations::          What to do if there is no supplied 
+                                configuration for your system.
+@end menu
+
+@node Quick Installation, Configuration Philosophy, Unix Installation, Unix Installation
+@subsection Compiling @code{gawk} for a Supported Unix Version
+
+@cindex installation, unix
+After you have extracted the @code{gawk} distribution, @code{cd}
+to @file{gawk-2.15}.  Look in the @file{config} subdirectory for a
+file that matches your hardware/software combination.  In general,
+only the software is relevant; for example @code{sunos41} is used
+for SunOS 4.1, on both Sun 3 and Sun 4 hardware.@refill
+
+If you find such a file, run the command:
+
+@example
+# assume you have SunOS 4.1
+./configure sunos41
+@end example
+
+This produces a @file{Makefile} and @file{config.h} tailored to your
+system.  You may wish to edit the @file{Makefile} to use a different
+C compiler, such as @code{gcc}, the GNU C compiler, if you have it.
+You may also wish to change the @code{CFLAGS} variable, which controls
+the command line options that are passed to the C compiler (such as
+optimization levels, or compiling for debugging).@refill
+
+After you have configured @file{Makefile} and @file{config.h}, type:
+
+@example
+make
+@end example
+
+@noindent
+and shortly thereafter, you should have an executable version of @code{gawk}.
+That's all there is to it!
+
+@node Configuration Philosophy, New Configurations, Quick Installation, Unix Installation
+@subsection The Configuration Process
+
+(This section is of interest only if you know something about using the
+C language and the Unix operating system.)
+
+The source code for @code{gawk} generally attempts to adhere to industry
+standards wherever possible.  This means that @code{gawk} uses library
+routines that are specified by the @sc{ansi} C standard and by the @sc{posix}
+operating system interface standard.  When using an @sc{ansi} C compiler,
+function prototypes are provided to help improve the compile-time checking.
+
+Many older Unix systems do not support all of either the @sc{ansi} or the
+@sc{posix} standards.  The @file{missing} subdirectory in the @code{gawk}
+distribution contains replacement versions of those subroutines that are
+most likely to be missing.
+
+The @file{config.h} file that is created by the @code{configure} program
+contains definitions that describe features of the particular operating
+system where you are attempting to compile @code{gawk}.  For the most
+part, it lists which standard subroutines are @emph{not} available.
+For example, if your system lacks the @samp{getopt} routine, then
+@samp{GETOPT_MISSING} would be defined.
+
+@file{config.h} also defines constants that describe facts about your
+variant of Unix.  For example, there may not be an @samp{st_blksize}
+element in the @code{stat} structure.  In this case @samp{BLKSIZE_MISSING}
+would be defined.
+
+Based on the list in @file{config.h} of standard subroutines that are
+missing, @file{missing.c} will do a @samp{#include} of the appropriate
+file(s) from the @file{missing} subdirectory.@refill
+
+Conditionally compiled code in the other source files relies on the
+other definitions in the @file{config.h} file.
+
+Besides creating @file{config.h}, @code{configure} produces a @file{Makefile}
+from @file{Makefile.in}.  There are a number of lines in @file{Makefile.in}
+that are system or feature specific.  For example, there is line that begins
+with @samp{##MAKE_ALLOCA_C##}.  This is normally a comment line, since
+it starts with @samp{#}.  If a configuration file has @samp{MAKE_ALLOCA_C}
+in it, then @code{configure} will delete the @samp{##MAKE_ALLOCA_C##}
+from the beginning of the line.  This will enable the rules in the
+@file{Makefile} that use a C version of @samp{alloca}.  There are several
+similar features that work in this fashion.@refill
+
+@node New Configurations,  , Configuration Philosophy, Unix Installation
+@subsection Configuring @code{gawk} for a New System
+
+(This section is of interest only if you know something about using the
+C language and the Unix operating system, and if you have to install
+@code{gawk} on a system that is not supported by the @code{gawk} distribution.
+If you are a C or Unix novice, get help from a local expert.)
+
+If you need to configure @code{gawk} for a Unix system that is not
+supported in the distribution, first see
+@ref{Configuration Philosophy, ,The Configuration Process}.
+Then, copy @file{config.in} to @file{config.h}, and copy
+@file{Makefile.in} to @file{Makefile}.@refill
+
+Next, edit both files.  Both files are liberally commented, and the
+necessary changes should be straightforward.
+
+While editing @file{config.h}, you need to determine what library
+routines you do or do not have by consulting your system documentation, or
+by perusing your actual libraries using the @code{ar} or @code{nm} utilities.
+In the worst case, simply do not define @emph{any} of the macros for missing
+subroutines.  When you compile @code{gawk}, the final link-editing step
+will fail.  The link editor will provide you with a list of unresolved external
+references---these are the missing subroutines.  Edit @file{config.h} again
+and recompile, and you should be set.@refill
+
+Editing the @file{Makefile} should also be straightforward.  Enable or
+disable the lines that begin with @samp{##MAKE_@var{whatever}##}, as
+appropriate.  Select the correct C compiler and @code{CFLAGS} for it.
+Then run @code{make}.
+
+Getting a correct configuration is likely to be an iterative process.
+Do not be discouraged if it takes you several tries.  If you have no
+luck whatsoever, please report your system type, and the steps you took.
+Once you do have a working configuration, please send it to the maintainers
+so that support for your system can be added to the official release.
+
+@xref{Bugs, ,Reporting Problems and Bugs}, for information on how to report
+problems in configuring @code{gawk}.  You may also use the same mechanisms
+for sending in new configurations.@refill
+
+@node VMS Installation, MS-DOS Installation, Unix Installation, Installation
+@section Compiling, Installing, and Running @code{gawk} on VMS
+
+@c based on material from
+@c Pat Rankin <rankin@eql.caltech.edu>
+
+@cindex installation, vms
+This section describes how to compile and install @code{gawk} under VMS.
+
+@menu
+* VMS Compilation::             How to compile @code{gawk} under VMS.
+* VMS Installation Details::    How to install @code{gawk} under VMS.
+* VMS Running::                 How to run @code{gawk} under VMS.
+* VMS POSIX::                   Alternate instructions for VMS POSIX.
+@end menu
+
+@node VMS Compilation, VMS Installation Details, VMS Installation, VMS Installation
+@subsection Compiling @code{gawk} under VMS
+
+To compile @code{gawk} under VMS, there is a @code{DCL} command procedure that
+will issue all the necessary @code{CC} and @code{LINK} commands, and there is
+also a @file{Makefile} for use with the @code{MMS} utility.  From the source
+directory, use either
+
+@smallexample
+$ @@[.VMS]VMSBUILD.COM
+@end smallexample
+
+@noindent
+or
+
+@smallexample
+$ MMS/DESCRIPTION=[.VMS]DECSRIP.MMS GAWK
+@end smallexample
+
+Depending upon which C compiler you are using, follow one of the sets
+of instructions in this table:
+
+@table @asis
+@item VAX C V3.x
+Use either @file{vmsbuild.com} or @file{descrip.mms} as is.  These use
+@code{CC/OPTIMIZE=NOLINE}, which is essential for Version 3.0.
+
+@item VAX C V2.x
+You must have Version 2.3 or 2.4; older ones won't work.  Edit either
+@file{vmsbuild.com} or @file{descrip.mms} according to the comments in them.
+For @file{vmsbuild.com}, this just entails removing two @samp{!} delimiters.
+Also edit @file{config.h} (which is a copy of file @file{[.config]vms-conf.h})
+and comment out or delete the two lines @samp{#define __STDC__ 0} and
+@samp{#define VAXC_BUILTINS} near the end.@refill
+
+@item GNU C
+Edit @file{vmsbuild.com} or @file{descrip.mms}; the changes are different
+from those for VAX C V2.x, but equally straightforward.  No changes to
+@file{config.h} should be needed.
+
+@item DEC C
+Edit @file{vmsbuild.com} or @file{descrip.mms} according to their comments.
+No changes to @file{config.h} should be needed.
+@end table
+
+@code{gawk} 2.15 has been tested under VAX/VMS 5.5-1 using VAX C V3.2,
+GNU C 1.40 and 2.3.  It should work without modifications for VMS V4.6 and up.
+
+@node VMS Installation Details, VMS Running, VMS Compilation, VMS Installation
+@subsection Installing @code{gawk} on VMS
+
+To install @code{gawk}, all you need is a ``foreign'' command, which is
+a @code{DCL} symbol whose value begins with a dollar sign.
+
+@smallexample
+$ GAWK :== $device:[directory]GAWK
+@end smallexample
+
+@noindent
+(Substitute the actual location of @code{gawk.exe} for
+@samp{device:[directory]}.) The symbol should be placed in the
+@file{login.com} of any user who wishes to run @code{gawk},
+so that it will be defined every time the user logs on.
+Alternatively, the symbol may be placed in the system-wide
+@file{sylogin.com} procedure, which will allow all users
+to run @code{gawk}.@refill
+
+Optionally, the help entry can be loaded into a VMS help library:
+
+@smallexample
+$ LIBRARY/HELP SYS$HELP:HELPLIB [.VMS]GAWK.HLP
+@end smallexample
+
+@noindent
+(You may want to substitute a site-specific help library rather than
+the standard VMS library @samp{HELPLIB}.)  After loading the help text,
+
+@c this is so tiny, but `should' be smallexample for consistency sake...
+@c I didn't because it was so short.  --mew 29jan1992
+@example
+$ HELP GAWK
+@end example
+
+@noindent
+will provide information about both the @code{gawk} implementation and the
+@code{awk} programming language.
+
+The logical name @samp{AWK_LIBRARY} can designate a default location
+for @code{awk} program files.  For the @samp{-f} option, if the specified
+filename has no device or directory path information in it, @code{gawk}
+will look in the current directory first, then in the directory specified
+by the translation of @samp{AWK_LIBRARY} if the file was not found.
+If after searching in both directories, the file still is not found,
+then @code{gawk} appends the suffix @samp{.awk} to the filename and the
+file search will be re-tried.  If @samp{AWK_LIBRARY} is not defined, that
+portion of the file search will fail benignly.@refill
+
+@node VMS Running, VMS POSIX, VMS Installation Details, VMS Installation
+@subsection Running @code{gawk} on VMS
+
+Command line parsing and quoting conventions are significantly different
+on VMS, so examples in this manual or from other sources often need minor
+changes.  They @emph{are} minor though, and all @code{awk} programs
+should run correctly.
+
+Here are a couple of trivial tests:
+
+@smallexample
+$ gawk -- "BEGIN @{print ""Hello, World!""@}"
+$ gawk -"W" version     ! could also be -"W version" or "-W version"
+@end smallexample
+
+@noindent
+Note that upper-case and mixed-case text must be quoted.
+
+The VMS port of @code{gawk} includes a @code{DCL}-style interface in addition
+to the original shell-style interface (see the help entry for details).
+One side-effect of dual command line parsing is that if there is only a
+single parameter (as in the quoted string program above), the command
+becomes ambiguous.  To work around this, the normally optional @samp{--}
+flag is required to force Unix style rather than @code{DCL} parsing.  If any
+other dash-type options (or multiple parameters such as data files to be
+processed) are present, there is no ambiguity and @samp{--} can be omitted.
+
+The default search path when looking for @code{awk} program files specified
+by the @samp{-f} option is @code{"SYS$DISK:[],AWK_LIBRARY:"}.  The logical
+name @samp{AWKPATH} can be used to override this default.  The format
+of @samp{AWKPATH} is a comma-separated list of directory specifications.
+When defining it, the value should be quoted so that it retains a single
+translation, and not a multi-translation @code{RMS} searchlist.
+
+@node VMS POSIX,  , VMS Running, VMS Installation
+@subsection Building and using @code{gawk} under VMS POSIX
+
+Ignore the instructions above, although @file{vms/gawk.hlp} should still
+be made available in a help library.  Make sure that the two scripts,
+@file{configure} and @file{mungeconf}, are executable; use @samp{chmod +x}
+on them if necessary.  Then execute the following commands:
+
+@smallexample
+$ POSIX
+psx> configure vms-posix
+psx> make awktab.c gawk
+@end smallexample
+
+@noindent
+The first command will construct files @file{config.h} and @file{Makefile}
+out of templates.  The second command will compile and link @code{gawk}.
+Due to a @code{make} bug in VMS POSIX V1.0 and V1.1,
+the file @file{awktab.c} must be given as an explicit target or it will
+not be built and the final link step will fail.  Ignore the warning
+@samp{"Could not find lib m in lib list"}; it is harmless, caused by the
+explicit use of @samp{-lm} as a linker option which is not needed
+under VMS POSIX.  Under V1.1 (but not V1.0) a problem with the @code{yacc}
+skeleton @file{/etc/yyparse.c} will cause a compiler warning for
+@file{awktab.c}, followed by a linker warning about compilation warnings
+in the resulting object module.  These warnings can be ignored.@refill
+
+Once built, @code{gawk} will work like any other shell utility.  Unlike
+the normal VMS port of @code{gawk}, no special command line manipulation is
+needed in the VMS POSIX environment.
+
+@node MS-DOS Installation, Atari Installation, VMS Installation, Installation
+@section Installing @code{gawk} on MS-DOS
+
+@cindex installation, ms-dos
+The first step is to get all the files in the @code{gawk} distribution
+onto your PC.  Move all the files from the @file{pc} directory into
+the main directory where the other files are.  Edit the file
+@file{make.bat} so that it will be an acceptable MS-DOS batch file.
+This means making sure that all lines are terminated with the ASCII
+carriage return and line feed characters.
+restrictions.
+
+@code{gawk} has only been compiled with version 5.1 of the Microsoft
+C compiler.  The file @file{make.bat} from the @file{pc} directory
+assumes that you have this compiler.
+
+Copy the file @file{setargv.obj} from the library directory where it
+resides to the @code{gawk} source code directory.
+
+Run @file{make.bat}.  This will compile @code{gawk} for you, and link it.
+That's all there is to it!
+
+@node Atari Installation,  , MS-DOS Installation, Installation
+@section Installing @code{gawk} on the Atari ST
+
+@c based on material from
+@c Michal Jaegermann <ntomczak@vm.ucs.ualberta.ca>
+
+@cindex installation, atari
+This section assumes that you are running TOS.  It applies to other Atari
+models (STe, TT) as well.
+
+In order to use @code{gawk}, you need to have a shell, either text or
+graphics, that does not map all the characters of a command line to
+upper case.  Maintaining case distinction in option flags is very
+important (@pxref{Command Line, ,Invoking @code{awk}}).  Popular shells
+like @code{gulam} or @code{gemini} will work, as will newer versions of
+@code{desktop}.  Support for I/O redirection is necessary to make it easy
+to import @code{awk} programs from other environments.  Pipes are nice to have,
+but not vital.
+
+If you have received an executable version of @code{gawk}, place it,
+as usual, anywhere in your @code{PATH} where your shell will find it.
+
+While executing, @code{gawk} creates a number of temporary files.
+@code{gawk} looks for either of the environment variables @code{TEMP}
+or @code{TMPDIR}, in that order.  If either one is found, its value
+is assumed to be a directory for temporary files.  This directory
+must exist, and if you can spare the memory, it is a good idea to
+put it on a @sc{ram} drive.  If neither @code{TEMP} nor @code{TMPDIR}
+are found, then @code{gawk} uses the current directory for its
+temporary files.
+
+The ST version of @code{gawk} searches for its program files as
+described in @ref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable}.
+On the ST, the default value for the @code{AWKPATH} variable is
+@code{@w{".,c:\lib\awk,c:\gnu\lib\awk"}}.
+The search path can be modified by explicitly setting @code{AWKPATH} to
+whatever you wish.  Note that colons cannot be used on the ST to separate
+elements in the @code{AWKPATH} variable, since they have another, reserved,
+meaning.  Instead, you must use a comma to separate elements in the path.
+If you are recompiling @code{gawk} on the ST, then you can choose a new
+default search path, by setting the value of @samp{DEFPATH} in the file
+@file{...\config\atari}.  You may choose a different separator character
+by setting the value of @samp{ENVSEP} in the same file.  The new values will
+be used when creating the header file @file{config.h}.@refill
+
+@ignore
+As a last resort, small
+adjustments can be made directly on the executable version of @code{gawk}
+using a binary editor.@refill
+@end ignore
+
+Although @code{awk} allows great flexibility in doing I/O redirections
+from within a program, this facility should be used with care on the ST.
+In some circumstances the OS routines for file handle pool processing
+lose track of certain events, causing the computer to crash, and requiring
+a reboot.  Often a warm reboot is sufficient.  Fortunately, this happens
+infrequently, and in rather esoteric situations.  In particular, avoid
+having one part of an @code{awk} program using @code{print}
+statements explicitly redirected to @code{"/dev/stdout"}, while other
+@code{print} statements use the default standard output, and a
+calling shell has redirected standard output to a file.@refill
+@c whew!
+
+When @code{gawk} is compiled with the ST version of @code{gcc} and its
+usual libraries, it will accept both @samp{/} and @samp{\} as path separators.
+While this is convenient, it should be remembered that this removes one,
+technically legal, character (@samp{/}) from your file names, and that
+it may create problems for external programs, called via the @code{system()}
+function, which may not support this convention.  Whenever it is possible
+that a file created by @code{gawk} will be used by some other program,
+use only backslashes.  Also remember that in @code{awk}, backslashes in
+strings have to be doubled in order to get literal backslashes.
+
+The initial port of @code{gawk} to the ST was done with @code{gcc}.
+If you wish to recompile @code{gawk} from scratch, you will need to use
+a compiler that accepts @sc{ansi} standard C (such as @code{gcc}, Turbo C,
+or Prospero C).  If @code{sizeof(int) != @w{sizeof(int *)}}, the correctness
+of the generated code depends heavily on the fact that all function calls
+have function prototypes in the current scope.  If your compiler does
+not accept function prototypes, you will probably have to add a
+number of casts to the code.@refill
+
+If you are using @code{gcc}, make sure that you have up-to-date libraries.
+Older versions have problems with some library functions (@code{atan2()},
+@code{strftime()}, the @samp{%g} conversion in @code{sprintf()}) which
+may affect the operation of @code{gawk}.
+
+In the @file{atari} subdirectory of the @code{gawk} distribution is
+a version of the @code{system()} function that has been tested with
+@code{gulam} and @code{msh}; it should work with other shells as well.
+With @code{gulam}, it passes the string to be executed without spawning
+an extra copy of a shell.  It is possible to replace this version of
+@code{system()} with a similar function from a library or from some other
+source if that version would be a better choice for the shell you prefer.
+
+The files needed to recompile @code{gawk} on the ST can be found in
+the @file{atari} directory.  The provided files and instructions below
+assume that you have the GNU C compiler (@code{gcc}), the @code{gulam} shell,
+and an ST version of @code{sed}. The @file{Makefile} is set up to use
+@file{byacc} as a @file{yacc} replacement.  With a different set of tools some
+adjustments and/or editing will be needed.@refill
+
+@code{cd} to the @file{atari} directory.  Copy @file{Makefile.st} to
+@file{makefile} in the source (parent) directory.  Possibly adjust
+@file{../config/atari} to suit your system.  Execute the script @file{mkconf.g}
+which will create the header file @file{../config.h}.  Go back to the source
+directory.  If you are not using @code{gcc}, check the file @file{missing.c}.
+It may be necessary to change forward slashes in the references to files
+from the @file{atari} subdirectory into backslashes.  Type @code{make} and
+enjoy.@refill
+
+Compilation with @code{gcc} of some of the bigger modules, like
+@file{awk_tab.c}, may require a full four megabytes of memory.  On smaller
+machines you would need to cut down on optimizations, or you would have to
+switch to another, less memory hungry, compiler.@refill
+
+@node Gawk Summary, Sample Program, Installation, Top
+@appendix @code{gawk} Summary
+
+This appendix provides a brief summary of the @code{gawk} command line and the
+@code{awk} language.  It is designed to serve as ``quick reference.''  It is
+therefore terse, but complete.
+
+@menu
+* Command Line Summary::        Recapitulation of the command line.
+* Language Summary::            A terse review of the language.
+* Variables/Fields::            Variables, fields, and arrays.
+* Rules Summary::               Patterns and Actions, and their 
+                                component parts.
+* Functions Summary::           Defining and calling functions.
+* Historical Features::         Some undocumented but supported ``features''.
+@end menu
+
+@node Command Line Summary, Language Summary, Gawk Summary, Gawk Summary
+@appendixsec Command Line Options Summary
+
+The command line consists of options to @code{gawk} itself, the
+@code{awk} program text (if not supplied via the @samp{-f} option), and
+values to be made available in the @code{ARGC} and @code{ARGV}
+predefined @code{awk} variables:
+
+@example
+awk @r{[@var{POSIX or GNU style options}]} -f source-file @r{[@code{--}]} @var{file} @dots{}
+awk @r{[@var{POSIX or GNU style options}]} @r{[@code{--}]} '@var{program}' @var{file} @dots{}
+@end example
+
+The options that @code{gawk} accepts are:
+
+@table @code
+@item -F @var{fs}
+@itemx --field-separator=@var{fs}
+Use @var{fs} for the input field separator (the value of the @code{FS}
+predefined variable).
+
+@item -f @var{program-file}
+@itemx --file=@var{program-file}
+Read the @code{awk} program source from the file @var{program-file}, instead
+of from the first command line argument.
+
+@item -v @var{var}=@var{val}
+@itemx --assign=@var{var}=@var{val}
+Assign the variable @var{var} the value @var{val} before program execution
+begins.
+
+@item -W compat
+@itemx --compat
+Specifies compatibility mode, in which @code{gawk} extensions are turned
+off.
+
+@item -W copyleft
+@itemx -W copyright
+@itemx --copyleft
+@itemx --copyright
+Print the short version of the General Public License on the error
+output.  This option may disappear in a future version of @code{gawk}.
+
+@item -W help
+@itemx -W usage
+@itemx --help
+@itemx --usage
+Print a relatively short summary of the available options on the error output.
+
+@item -W lint
+@itemx --lint
+Give warnings about dubious or non-portable @code{awk} constructs.
+
+@item -W posix
+@itemx --posix
+Specifies @sc{posix} compatibility mode, in which @code{gawk} extensions
+are turned off and additional restrictions apply.
+
+@item -W source=@var{program-text}
+@itemx --source=@var{program-text}
+Use @var{program-text} as @code{awk} program source code.  This option allows
+mixing command line source code with source code from files, and is
+particularly useful for mixing command line programs with library functions.
+
+@item -W version
+@itemx --version
+Print version information for this particular copy of @code{gawk} on the error
+output.  This option may disappear in a future version of @code{gawk}.
+
+@item --
+Signal the end of options.  This is useful to allow further arguments to the
+@code{awk} program itself to start with a @samp{-}.  This is mainly for
+consistency with the argument parsing conventions of @sc{posix}.
+@end table
+
+Any other options are flagged as invalid, but are otherwise ignored.
+@xref{Command Line, ,Invoking @code{awk}}, for more details.
+
+@node Language Summary, Variables/Fields, Command Line Summary, Gawk Summary
+@appendixsec Language Summary
+
+An @code{awk} program consists of a sequence of pattern-action statements
+and optional function definitions.
+
+@example
+@var{pattern}    @{ @var{action statements} @}
+
+function @var{name}(@var{parameter list})     @{ @var{action statements} @}
+@end example
+
+@code{gawk} first reads the program source from the
+@var{program-file}(s) if specified, or from the first non-option
+argument on the command line.  The @samp{-f} option may be used multiple
+times on the command line.  @code{gawk} reads the program text from all
+the @var{program-file} files, effectively concatenating them in the
+order they are specified.  This is useful for building libraries of
+@code{awk} functions, without having to include them in each new
+@code{awk} program that uses them.  To use a library function in a file
+from a program typed in on the command line, specify @samp{-f /dev/tty};
+then type your program, and end it with a @kbd{Control-d}.
+@xref{Command Line, ,Invoking @code{awk}}.@refill
+
+The environment variable @code{AWKPATH} specifies a search path to use
+when finding source files named with the @samp{-f} option.  The default
+path, which is
+@samp{.:/usr/lib/awk:/usr/local/lib/awk} is used if @code{AWKPATH} is not set.
+If a file name given to the @samp{-f} option contains a @samp{/} character,
+no path search is performed.
+@xref{AWKPATH Variable, ,The @code{AWKPATH} Environment Variable},
+for a full description of the @code{AWKPATH} environment variable.@refill
+
+@code{gawk} compiles the program into an internal form, and then proceeds to
+read each file named in the @code{ARGV} array.  If there are no files named
+on the command line, @code{gawk} reads the standard input.
+
+If a ``file'' named on the command line has the form
+@samp{@var{var}=@var{val}}, it is treated as a variable assignment: the
+variable @var{var} is assigned the value @var{val}.
+If any of the files have a value that is the null string, that
+element in the list is skipped.@refill
+
+For each line in the input, @code{gawk} tests to see if it matches any
+@var{pattern} in the @code{awk} program.  For each pattern that the line
+matches, the associated @var{action} is executed.
+
+@node Variables/Fields, Rules Summary, Language Summary, Gawk Summary
+@appendixsec Variables and Fields
+
+@code{awk} variables are dynamic; they come into existence when they are
+first used.  Their values are either floating-point numbers or strings.
+@code{awk} also has one-dimension arrays; multiple-dimensional arrays
+may be simulated.  There are several predefined variables that
+@code{awk} sets as a program runs; these are summarized below.
+
+@menu
+* Fields Summary::              Input field splitting.
+* Built-in Summary::            @code{awk}'s built-in variables.
+* Arrays Summary::              Using arrays.
+* Data Type Summary::           Values in @code{awk} are numbers or strings.
+@end menu
+
+@node Fields Summary, Built-in Summary, Variables/Fields, Variables/Fields
+@appendixsubsec Fields
+
+As each input line is read, @code{gawk} splits the line into
+@var{fields}, using the value of the @code{FS} variable as the field
+separator.  If @code{FS} is a single character, fields are separated by
+that character.  Otherwise, @code{FS} is expected to be a full regular
+expression.  In the special case that @code{FS} is a single blank,
+fields are separated by runs of blanks and/or tabs.  Note that the value
+of @code{IGNORECASE} (@pxref{Case-sensitivity, ,Case-sensitivity in Matching})
+also affects how fields are split when @code{FS} is a regular expression.@refill
+
+Each field in the input line may be referenced by its position, @code{$1},
+@code{$2}, and so on.  @code{$0} is the whole line.  The value of a field may
+be assigned to as well.  Field numbers need not be constants:
+
+@example
+n = 5
+print $n
+@end example
+
+@noindent
+prints the fifth field in the input line.  The variable @code{NF} is set to
+the total number of fields in the input line.
+
+References to nonexistent fields (i.e., fields after @code{$NF}) return
+the null-string.  However, assigning to a nonexistent field (e.g.,
+@code{$(NF+2) = 5}) increases the value of @code{NF}, creates any
+intervening fields with the null string as their value, and causes the
+value of @code{$0} to be recomputed, with the fields being separated by
+the value of @code{OFS}.@refill
+
+@xref{Reading Files, ,Reading Input Files}, for a full description of the
+way @code{awk} defines and uses fields.
+
+@node Built-in Summary, Arrays Summary, Fields Summary, Variables/Fields
+@appendixsubsec Built-in Variables
+
+@code{awk}'s built-in variables are:
+
+@table @code
+@item ARGC
+The number of command line arguments (not including options or the
+@code{awk} program itself).
+
+@item ARGIND
+The index in @code{ARGV} of the current file being processed.
+It is always true that @samp{FILENAME == ARGV[ARGIND]}.
+
+@item ARGV
+The array of command line arguments.  The array is indexed from 0 to
+@code{ARGC} @minus{} 1.  Dynamically changing the contents of @code{ARGV}
+can control the files used for data.@refill
+
+@item CONVFMT
+The conversion format to use when converting numbers to strings.
+
+@item FIELDWIDTHS
+A space separated list of numbers describing the fixed-width input data.
+
+@item ENVIRON
+An array containing the values of the environment variables.  The array
+is indexed by variable name, each element being the value of that
+variable.  Thus, the environment variable @code{HOME} would be in
+@code{ENVIRON["HOME"]}.  Its value might be @file{/u/close}.
+
+Changing this array does not affect the environment seen by programs
+which @code{gawk} spawns via redirection or the @code{system} function.
+(This may change in a future version of @code{gawk}.)
+
+Some operating systems do not have environment variables.
+The array @code{ENVIRON} is empty when running on these systems.
+
+@item ERRNO
+The system error message when an error occurs using @code{getline}
+or @code{close}.
+
+@item FILENAME
+The name of the current input file.  If no files are specified on the command
+line, the value of @code{FILENAME} is @samp{-}.
+
+@item FNR
+The input record number in the current input file.
+
+@item FS
+The input field separator, a blank by default.
+
+@item IGNORECASE
+The case-sensitivity flag for regular expression operations.  If
+@code{IGNORECASE} has a nonzero value, then pattern matching in rules,
+field splitting with @code{FS}, regular expression matching with
+@samp{~} and @samp{!~}, and the @code{gsub}, @code{index}, @code{match},
+@code{split} and @code{sub} predefined functions all ignore case
+when doing regular expression operations.@refill
+
+@item NF
+The number of fields in the current input record.
+
+@item NR
+The total number of input records seen so far.
+
+@item OFMT
+The output format for numbers for the @code{print} statement,
+@code{"%.6g"} by default.
+
+@item OFS
+The output field separator, a blank by default.
+
+@item ORS
+The output record separator, by default a newline.
+
+@item RS
+The input record separator, by default a newline.  @code{RS} is exceptional
+in that only the first character of its string value is used for separating
+records.  If @code{RS} is set to the null string, then records are separated by
+blank lines.  When @code{RS} is set to the null string, then the newline
+character always acts as a field separator, in addition to whatever value
+@code{FS} may have.@refill
+
+@item RSTART
+The index of the first character matched by @code{match}; 0 if no match.
+
+@item RLENGTH
+The length of the string matched by @code{match}; @minus{}1 if no match.
+
+@item SUBSEP
+The string used to separate multiple subscripts in array elements, by
+default @code{"\034"}.
+@end table
+
+@xref{Built-in Variables}, for more information.
+
+@node Arrays Summary, Data Type Summary, Built-in Summary, Variables/Fields
+@appendixsubsec Arrays
+
+Arrays are subscripted with an expression between square brackets
+(@samp{[} and @samp{]}).  Array subscripts are @emph{always} strings;
+numbers are converted to strings as necessary, following the standard
+conversion rules
+(@pxref{Conversion, ,Conversion of Strings and Numbers}).@refill
+
+If you use multiple expressions separated by commas inside the square
+brackets, then the array subscript is a string consisting of the
+concatenation of the individual subscript values, converted to strings,
+separated by the subscript separator (the value of @code{SUBSEP}).
+
+The special operator @code{in} may be used in an @code{if} or
+@code{while} statement to see if an array has an index consisting of a
+particular value.
+
+@example
+if (val in array)
+        print array[val]
+@end example
+
+If the array has multiple subscripts, use @code{(i, j, @dots{}) in array}
+to test for existence of an element.
+
+The @code{in} construct may also be used in a @code{for} loop to iterate
+over all the elements of an array.
+@xref{Scanning an Array, ,Scanning all Elements of an Array}.@refill
+
+An element may be deleted from an array using the @code{delete} statement.
+
+@xref{Arrays, ,Arrays in @code{awk}}, for more detailed information.
+
+@node Data Type Summary,  , Arrays Summary, Variables/Fields
+@appendixsubsec Data Types
+
+The value of an @code{awk} expression is always either a number
+or a string.
+
+Certain contexts (such as arithmetic operators) require numeric
+values.  They convert strings to numbers by interpreting the text
+of the string as a numeral.  If the string does not look like a
+numeral, it converts to 0.
+
+Certain contexts (such as concatenation) require string values.
+They convert numbers to strings by effectively printing them
+with @code{sprintf}.
+@xref{Conversion, ,Conversion of Strings and Numbers}, for the details.@refill
+
+To force conversion of a string value to a number, simply add 0
+to it.  If the value you start with is already a number, this
+does not change it.
+
+To force conversion of a numeric value to a string, concatenate it with
+the null string.
+
+The @code{awk} language defines comparisons as being done numerically if
+both operands are numeric, or if one is numeric and the other is a numeric
+string.  Otherwise one or both operands are converted to strings and a
+string comparison is performed.
+
+Uninitialized variables have the string value @code{""} (the null, or
+empty, string).  In contexts where a number is required, this is
+equivalent to 0.
+
+@xref{Variables}, for more information on variable naming and initialization;
+@pxref{Conversion, ,Conversion of Strings and Numbers}, for more information
+on how variable values are interpreted.@refill
+
+@node Rules Summary, Functions Summary, Variables/Fields, Gawk Summary
+@appendixsec Patterns and Actions
+
+@menu
+* Pattern Summary::             Quick overview of patterns.
+* Regexp Summary::              Quick overview of regular expressions.
+* Actions Summary::             Quick overview of actions.
+@end menu
+
+An @code{awk} program is mostly composed of rules, each consisting of a
+pattern followed by an action.  The action is enclosed in @samp{@{} and
+@samp{@}}.  Either the pattern may be missing, or the action may be
+missing, but, of course, not both.  If the pattern is missing, the
+action is executed for every single line of input.  A missing action is
+equivalent to this action,
+
+@example
+@{ print @}
+@end example
+
+@noindent
+which prints the entire line.
+
+Comments begin with the @samp{#} character, and continue until the end of the
+line.  Blank lines may be used to separate statements.  Normally, a statement
+ends with a newline, however, this is not the case for lines ending in a
+@samp{,}, @samp{@{}, @samp{?}, @samp{:}, @samp{&&}, or @samp{||}.  Lines
+ending in @code{do} or @code{else} also have their statements automatically
+continued on the following line.  In other cases, a line can be continued by
+ending it with a @samp{\}, in which case the newline is ignored.@refill
+
+Multiple statements may be put on one line by separating them with a @samp{;}.
+This applies to both the statements within the action part of a rule (the
+usual case), and to the rule statements.
+
+@xref{Comments, ,Comments in @code{awk} Programs}, for information on
+@code{awk}'s commenting convention;
+@pxref{Statements/Lines, ,@code{awk} Statements versus Lines}, for a
+description of the line continuation mechanism in @code{awk}.@refill
+
+@node Pattern Summary, Regexp Summary, Rules Summary, Rules Summary
+@appendixsubsec Patterns
+
+@code{awk} patterns may be one of the following:
+
+@example
+/@var{regular expression}/
+@var{relational expression}
+@var{pattern} && @var{pattern}
+@var{pattern} || @var{pattern}
+@var{pattern} ? @var{pattern} : @var{pattern}
+(@var{pattern})
+! @var{pattern}
+@var{pattern1}, @var{pattern2}
+BEGIN
+END
+@end example
+
+@code{BEGIN} and @code{END} are two special kinds of patterns that are not
+tested against the input.  The action parts of all @code{BEGIN} rules are
+merged as if all the statements had been written in a single @code{BEGIN}
+rule.  They are executed before any of the input is read.  Similarly, all the
+@code{END} rules are merged, and executed when all the input is exhausted (or
+when an @code{exit} statement is executed).  @code{BEGIN} and @code{END}
+patterns cannot be combined with other patterns in pattern expressions.
+@code{BEGIN} and @code{END} rules cannot have missing action parts.@refill
+
+For @samp{/@var{regular-expression}/} patterns, the associated statement is
+executed for each input line that matches the regular expression.  Regular
+expressions are extensions of those in @code{egrep}, and are summarized below.
+
+A @var{relational expression} may use any of the operators defined below in
+the section on actions.  These generally test whether certain fields match
+certain regular expressions.
+
+The @samp{&&}, @samp{||}, and @samp{!} operators are logical ``and,''
+logical ``or,'' and logical ``not,'' respectively, as in C.  They do
+short-circuit evaluation, also as in C, and are used for combining more
+primitive pattern expressions.  As in most languages, parentheses may be
+used to change the order of evaluation.
+
+The @samp{?:} operator is like the same operator in C.  If the first
+pattern matches, then the second pattern is matched against the input
+record; otherwise, the third is matched.  Only one of the second and
+third patterns is matched.
+
+The @samp{@var{pattern1}, @var{pattern2}} form of a pattern is called a
+range pattern.  It matches all input lines starting with a line that
+matches @var{pattern1}, and continuing until a line that matches
+@var{pattern2}, inclusive.  A range pattern cannot be used as an operand
+to any of the pattern operators.
+
+@xref{Patterns}, for a full description of the pattern part of @code{awk}
+rules.
+
+@node Regexp Summary, Actions Summary, Pattern Summary, Rules Summary
+@appendixsubsec Regular Expressions
+
+Regular expressions are the extended kind found in @code{egrep}.
+They are composed of characters as follows:
+
+@table @code
+@item @var{c}
+matches the character @var{c} (assuming @var{c} is a character with no
+special meaning in regexps).
+
+@item \@var{c}
+matches the literal character @var{c}.
+
+@item .
+matches any character except newline.
+
+@item ^
+matches the beginning of a line or a string.
+
+@item $
+matches the end of a line or a string.
+
+@item [@var{abc}@dots{}]
+matches any of the characters @var{abc}@dots{} (character class).
+
+@item [^@var{abc}@dots{}]
+matches any character except @var{abc}@dots{} and newline (negated
+character class).
+
+@item @var{r1}|@var{r2}
+matches either @var{r1} or @var{r2} (alternation).
+
+@item @var{r1r2}
+matches @var{r1}, and then @var{r2} (concatenation).
+
+@item @var{r}+
+matches one or more @var{r}'s.
+
+@item @var{r}*
+matches zero or more @var{r}'s. 
+
+@item @var{r}?
+matches zero or one @var{r}'s. 
+
+@item (@var{r})
+matches @var{r} (grouping).
+@end table
+
+@xref{Regexp, ,Regular Expressions as Patterns}, for a more detailed
+explanation of regular expressions.
+
+The escape sequences allowed in string constants are also valid in
+regular expressions (@pxref{Constants, ,Constant Expressions}).
+
+@node Actions Summary,  , Regexp Summary, Rules Summary
+@appendixsubsec Actions
+
+Action statements are enclosed in braces, @samp{@{} and @samp{@}}.
+Action statements consist of the usual assignment, conditional, and looping
+statements found in most languages.  The operators, control statements,
+and input/output statements available are patterned after those in C.
+
+@menu
+* Operator Summary::            @code{awk} operators.
+* Control Flow Summary::        The control statements.
+* I/O Summary::                 The I/O statements.
+* Printf Summary::              A summary of @code{printf}.
+* Special File Summary::        Special file names interpreted internally.
+* Numeric Functions Summary::   Built-in numeric functions.
+* String Functions Summary::    Built-in string functions.
+* Time Functions Summary::      Built-in time functions.
+* String Constants Summary::    Escape sequences in strings.
+@end menu
+
+@node Operator Summary, Control Flow Summary, Actions Summary, Actions Summary
+@appendixsubsubsec Operators
+
+The operators in @code{awk}, in order of increasing precedence, are:
+
+@table @code
+@item = += -= *= /= %= ^=
+Assignment.  Both absolute assignment (@code{@var{var}=@var{value}})
+and operator assignment (the other forms) are supported.
+
+@item ?:
+A conditional expression, as in C.  This has the form @code{@var{expr1} ?
+@var{expr2} : @var{expr3}}.  If @var{expr1} is true, the value of the
+expression is @var{expr2}; otherwise it is @var{expr3}.  Only one of
+@var{expr2} and @var{expr3} is evaluated.@refill
+
+@item ||
+Logical ``or''.
+
+@item &&
+Logical ``and''.
+
+@item ~ !~
+Regular expression match, negated match.
+
+@item < <= > >= != ==
+The usual relational operators.
+
+@item @var{blank}
+String concatenation.
+
+@item + -
+Addition and subtraction.
+
+@item * / %
+Multiplication, division, and modulus.
+
+@item + - !
+Unary plus, unary minus, and logical negation.
+
+@item ^
+Exponentiation (@samp{**} may also be used, and @samp{**=} for the assignment
+operator, but they are not specified in the @sc{posix} standard).
+
+@item ++ --
+Increment and decrement, both prefix and postfix.
+
+@item $
+Field reference.
+@end table
+
+@xref{Expressions, ,Expressions as Action Statements}, for a full
+description of all the operators listed above.
+@xref{Fields, ,Examining Fields}, for a description of the field
+reference operator.@refill
+
+@node Control Flow Summary, I/O Summary, Operator Summary, Actions Summary
+@appendixsubsubsec Control Statements
+
+The control statements are as follows:
+
+@example
+if (@var{condition}) @var{statement} @r{[} else @var{statement} @r{]}
+while (@var{condition}) @var{statement}
+do @var{statement} while (@var{condition})
+for (@var{expr1}; @var{expr2}; @var{expr3}) @var{statement}
+for (@var{var} in @var{array}) @var{statement}
+break
+continue
+delete @var{array}[@var{index}]
+exit @r{[} @var{expression} @r{]}
+@{ @var{statements} @}
+@end example
+
+@xref{Statements, ,Control Statements in Actions}, for a full description
+of all the control statements listed above.
+
+@node I/O Summary, Printf Summary, Control Flow Summary, Actions Summary
+@appendixsubsubsec I/O Statements
+
+The input/output statements are as follows:
+
+@table @code
+@item getline
+Set @code{$0} from next input record; set @code{NF}, @code{NR}, @code{FNR}.
+
+@item getline <@var{file}
+Set @code{$0} from next record of @var{file}; set @code{NF}.
+
+@item getline @var{var}
+Set @var{var} from next input record; set @code{NF}, @code{FNR}.
+
+@item getline @var{var} <@var{file}
+Set @var{var} from next record of @var{file}.
+
+@item next
+Stop processing the current input record.  The next input record is read and
+processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+
+@item next file
+Stop processing the current input file.  The next input record read comes
+from the next input file.  @code{FILENAME} is updated, @code{FNR} is set to 1, 
+and processing starts over with the first pattern in the @code{awk} program.
+If the end of the input data is reached, the @code{END} rule(s), if any,
+are executed.
+
+@item print
+Prints the current record.
+
+@item print @var{expr-list}
+Prints expressions.
+
+@item print @var{expr-list} > @var{file}
+Prints expressions on @var{file}.
+
+@item printf @var{fmt, expr-list}
+Format and print.
+
+@item printf @var{fmt, expr-list} > file
+Format and print on @var{file}.
+@end table
+
+Other input/output redirections are also allowed.  For @code{print} and
+@code{printf}, @samp{>> @var{file}} appends output to the @var{file},
+and @samp{| @var{command}} writes on a pipe.  In a similar fashion,
+@samp{@var{command} | getline} pipes input into @code{getline}.
+@code{getline} returns 0 on end of file, and @minus{}1 on an error.@refill
+
+@xref{Getline, ,Explicit Input with @code{getline}}, for a full description
+of the @code{getline} statement.
+@xref{Printing, ,Printing Output}, for a full description of @code{print} and
+@code{printf}.  Finally, @pxref{Next Statement, ,The @code{next} Statement},
+for a description of how the @code{next} statement works.@refill
+
+@node Printf Summary, Special File Summary, I/O Summary, Actions Summary
+@appendixsubsubsec @code{printf} Summary
+
+The @code{awk} @code{printf} statement and @code{sprintf} function
+accept the following conversion specification formats:
+
+@table @code
+@item %c
+An ASCII character.  If the argument used for @samp{%c} is numeric, it is
+treated as a character and printed.  Otherwise, the argument is assumed to
+be a string, and the only first character of that string is printed.
+
+@item %d
+@itemx %i
+A decimal number (the integer part).
+
+@item %e
+A floating point number of the form
+@samp{@r{[}-@r{]}d.ddddddE@r{[}+-@r{]}dd}.@refill
+
+@item %f
+A floating point number of the form
+@r{[}@code{-}@r{]}@code{ddd.dddddd}.
+
+@item %g
+Use @samp{%e} or @samp{%f} conversion, whichever produces a shorter string,
+with nonsignificant zeros suppressed.
+
+@item %o
+An unsigned octal number (again, an integer).
+
+@item %s
+A character string.
+
+@item %x
+An unsigned hexadecimal number (an integer).
+
+@item %X
+Like @samp{%x}, except use @samp{A} through @samp{F} instead of @samp{a}
+through @samp{f} for decimal 10 through 15.@refill
+
+@item %%
+A single @samp{%} character; no argument is converted.
+@end table
+
+There are optional, additional parameters that may lie between the @samp{%}
+and the control letter:
+
+@table @code
+@item -
+The expression should be left-justified within its field.
+
+@item @var{width}
+The field should be padded to this width.  If @var{width} has a leading zero,
+then the field is padded with zeros.  Otherwise it is padded with blanks.
+
+@item .@var{prec}
+A number indicating the maximum width of strings or digits to the right
+of the decimal point.
+@end table
+
+Either or both of the @var{width} and @var{prec} values may be specified
+as @samp{*}.  In that case, the particular value is taken from the argument
+list.
+
+@xref{Printf, ,Using @code{printf} Statements for Fancier Printing}, for
+examples and for a more detailed description.
+
+@node Special File Summary, Numeric Functions Summary, Printf Summary, Actions Summary
+@appendixsubsubsec Special File Names
+
+When doing I/O redirection from either @code{print} or @code{printf} into a
+file, or via @code{getline} from a file, @code{gawk} recognizes certain special
+file names internally.  These file names allow access to open file descriptors
+inherited from @code{gawk}'s parent process (usually the shell).  The
+file names are:
+
+@table @file
+@item /dev/stdin
+The standard input.
+
+@item /dev/stdout
+The standard output.
+
+@item /dev/stderr
+The standard error output.
+
+@item /dev/fd/@var{n}
+The file denoted by the open file descriptor @var{n}.
+@end table
+
+In addition the following files provide process related information
+about the running @code{gawk} program.
+
+@table @file
+@item /dev/pid
+Reading this file returns the process ID of the current process,
+in decimal, terminated with a newline.
+
+@item  /dev/ppid
+Reading this file returns the parent process ID of the current process,
+in decimal, terminated with a newline.
+
+@item  /dev/pgrpid
+Reading this file returns the process group ID of the current process,
+in decimal, terminated with a newline.
+
+@item /dev/user
+Reading this file returns a single record terminated with a newline.
+The fields are separated with blanks.  The fields represent the
+following information:
+
+@table @code
+@item $1
+The value of the @code{getuid} system call.
+
+@item $2
+The value of the @code{geteuid} system call.
+
+@item $3
+The value of the @code{getgid} system call.
+
+@item $4
+The value of the @code{getegid} system call.
+@end table
+
+If there are any additional fields, they are the group IDs returned by
+@code{getgroups} system call.
+(Multiple groups may not be supported on all systems.)@refill
+@end table
+
+@noindent
+These file names may also be used on the command line to name data files.
+These file names are only recognized internally if you do not
+actually have files by these names on your system.
+
+@xref{Special Files, ,Standard I/O Streams}, for a longer description that
+provides the motivation for this feature.
+
+@node Numeric Functions Summary, String Functions Summary, Special File Summary, Actions Summary
+@appendixsubsubsec Numeric Functions
+
+@code{awk} has the following predefined arithmetic functions:
+
+@table @code
+@item atan2(@var{y}, @var{x})
+returns the arctangent of @var{y/x} in radians.
+
+@item cos(@var{expr})
+returns the cosine in radians.
+
+@item exp(@var{expr})
+the exponential function.
+
+@item int(@var{expr})
+truncates to integer.
+
+@item log(@var{expr})
+the natural logarithm function.
+
+@item rand()
+returns a random number between 0 and 1.
+
+@item sin(@var{expr})
+returns the sine in radians.
+
+@item sqrt(@var{expr})
+the square root function.
+
+@item srand(@var{expr})
+use @var{expr} as a new seed for the random number generator.  If no @var{expr}
+is provided, the time of day is used.  The return value is the previous
+seed for the random number generator.
+@end table
+
+@node String Functions Summary, Time Functions Summary, Numeric Functions Summary, Actions Summary
+@appendixsubsubsec String Functions
+
+@code{awk} has the following predefined string functions:
+
+@table @code
+@item gsub(@var{r}, @var{s}, @var{t})
+for each substring matching the regular expression @var{r} in the string
+@var{t}, substitute the string @var{s}, and return the number of substitutions.
+If @var{t} is not supplied, use @code{$0}.
+
+@item index(@var{s}, @var{t})
+returns the index of the string @var{t} in the string @var{s}, or 0 if
+@var{t} is not present.
+
+@item length(@var{s})
+returns the length of the string @var{s}.  The length of @code{$0}
+is returned if no argument is supplied.
+
+@item match(@var{s}, @var{r})
+returns the position in @var{s} where the regular expression @var{r}
+occurs, or 0 if @var{r} is not present, and sets the values of @code{RSTART}
+and @code{RLENGTH}.
+
+@item split(@var{s}, @var{a}, @var{r})
+splits the string @var{s} into the array @var{a} on the regular expression
+@var{r}, and returns the number of fields.  If @var{r} is omitted, @code{FS}
+is used instead.
+
+@item sprintf(@var{fmt}, @var{expr-list})
+prints @var{expr-list} according to @var{fmt}, and returns the resulting string.
+
+@item sub(@var{r}, @var{s}, @var{t})
+this is just like @code{gsub}, but only the first matching substring is
+replaced.
+
+@item substr(@var{s}, @var{i}, @var{n})
+returns the @var{n}-character substring of @var{s} starting at @var{i}.
+If @var{n} is omitted, the rest of @var{s} is used.
+
+@item tolower(@var{str})
+returns a copy of the string @var{str}, with all the upper-case characters in
+@var{str} translated to their corresponding lower-case counterparts.
+Nonalphabetic characters are left unchanged.
+
+@item toupper(@var{str})
+returns a copy of the string @var{str}, with all the lower-case characters in
+@var{str} translated to their corresponding upper-case counterparts.
+Nonalphabetic characters are left unchanged.
+
+@item system(@var{cmd-line})
+Execute the command @var{cmd-line}, and return the exit status.
+@end table
+
+@node Time Functions Summary, String Constants Summary, String Functions Summary, Actions Summary
+@appendixsubsubsec  Built-in time functions
+
+The following two functions are available for getting the current
+time of day, and for formatting time stamps.
+
+@table @code
+@item systime()
+returns the current time of day as the number of seconds since a particular
+epoch (Midnight, January 1, 1970 @sc{utc}, on @sc{posix} systems).
+
+@item strftime(@var{format}, @var{timestamp})
+formats @var{timestamp} according to the specification in @var{format}.
+The current time of day is used if no @var{timestamp} is supplied.
+@xref{Time Functions, ,Functions for Dealing with Time Stamps}, for the
+details on the conversion specifiers that @code{strftime} accepts.@refill
+@end table
+
+@iftex
+@xref{Built-in, ,Built-in Functions}, for a description of all of
+@code{awk}'s built-in functions.
+@end iftex
+
+@node String Constants Summary,  , Time Functions Summary, Actions Summary
+@appendixsubsubsec String Constants
+
+String constants in @code{awk} are sequences of characters enclosed
+between double quotes (@code{"}).  Within strings, certain @dfn{escape sequences}
+are recognized, as in C.  These are:
+
+@table @code
+@item \\
+A literal backslash.
+
+@item \a
+The ``alert'' character; usually the ASCII BEL character.
+
+@item \b
+Backspace.
+
+@item \f
+Formfeed.
+
+@item \n
+Newline.
+
+@item \r
+Carriage return.
+
+@item \t
+Horizontal tab.
+
+@item \v
+Vertical tab.
+
+@item \x@var{hex digits}
+The character represented by the string of hexadecimal digits following
+the @samp{\x}.  As in @sc{ansi} C, all following hexadecimal digits are
+considered part of the escape sequence.  (This feature should tell us
+something about language design by committee.)  E.g., @code{"\x1B"} is a
+string containing the ASCII ESC (escape) character.  (The @samp{\x}
+escape sequence is not in @sc{posix} @code{awk}.)
+
+@item \@var{ddd}
+The character represented by the 1-, 2-, or 3-digit sequence of octal
+digits.  Thus, @code{"\033"} is also a string containing the ASCII ESC
+(escape) character.
+
+@item \@var{c}
+The literal character @var{c}.
+@end table
+
+The escape sequences may also be used inside constant regular expressions
+(e.g., the regexp @code{@w{/[@ \t\f\n\r\v]/}} matches whitespace
+characters).@refill
+
+@xref{Constants, ,Constant Expressions}.
+
+@node Functions Summary, Historical Features, Rules Summary, Gawk Summary
+@appendixsec Functions
+
+Functions in @code{awk} are defined as follows:
+
+@example
+function @var{name}(@var{parameter list}) @{ @var{statements} @}
+@end example
+
+Actual parameters supplied in the function call are used to instantiate
+the formal parameters declared in the function.  Arrays are passed by
+reference, other variables are passed by value.
+
+If there are fewer arguments passed than there are names in @var{parameter-list},
+the extra names are given the null string as value.  Extra names have the
+effect of local variables.
+
+The open-parenthesis in a function call of a user-defined function must
+immediately follow the function name, without any intervening white space.
+This is to avoid a syntactic ambiguity with the concatenation operator.
+
+The word @code{func} may be used in place of @code{function} (but not in
+@sc{posix} @code{awk}).
+
+Use the @code{return} statement to return a value from a function.
+
+@xref{User-defined, ,User-defined Functions}, for a more complete description.
+
+@node Historical Features,  , Functions Summary, Gawk Summary
+@appendixsec Historical Features
+
+There are two features of historical @code{awk} implementations that
+@code{gawk} supports.  First, it is possible to call the @code{length}
+built-in function not only with no arguments, but even without parentheses!
+
+@example
+a = length
+@end example
+
+@noindent
+is the same as either of
+
+@example
+a = length()
+a = length($0)
+@end example
+
+@noindent
+This feature is marked as ``deprecated'' in the @sc{posix} standard, and
+@code{gawk} will issue a warning about its use if @samp{-W lint} is
+specified on the command line.
+
+The other feature is the use of the @code{continue} statement outside the
+body of a @code{while}, @code{for}, or @code{do} loop.  Traditional
+@code{awk} implementations have treated such usage as equivalent to the
+@code{next} statement.  @code{gawk} will support this usage if @samp{-W posix}
+has not been specified.
+
+@node Sample Program, Bugs, Gawk Summary, Top
+@appendix Sample Program
+
+The following example is a complete @code{awk} program, which prints
+the number of occurrences of each word in its input.  It illustrates the
+associative nature of @code{awk} arrays by using strings as subscripts.  It
+also demonstrates the @samp{for @var{x} in @var{array}} construction.
+Finally, it shows how @code{awk} can be used in conjunction with other
+utility programs to do a useful task of some complexity with a minimum of
+effort.  Some explanations follow the program listing.@refill
+
+@example
+awk '
+# Print list of word frequencies
+@{
+    for (i = 1; i <= NF; i++)
+        freq[$i]++
+@}
+
+END @{
+    for (word in freq)
+        printf "%s\t%d\n", word, freq[word]
+@}'
+@end example
+
+The first thing to notice about this program is that it has two rules.  The
+first rule, because it has an empty pattern, is executed on every line of
+the input.  It uses @code{awk}'s field-accessing mechanism
+(@pxref{Fields, ,Examining Fields}) to pick out the individual words from
+the line, and the built-in variable @code{NF} (@pxref{Built-in Variables})
+to know how many fields are available.@refill
+
+For each input word, an element of the array @code{freq} is incremented to
+reflect that the word has been seen an additional time.@refill
+
+The second rule, because it has the pattern @code{END}, is not executed
+until the input has been exhausted.  It prints out the contents of the
+@code{freq} table that has been built up inside the first action.@refill
+
+Note that this program has several problems that would prevent it from being
+useful by itself on real text files:@refill
+
+@itemize @bullet
+@item
+Words are detected using the @code{awk} convention that fields are
+separated by whitespace and that other characters in the input (except
+newlines) don't have any special meaning to @code{awk}.  This means that
+punctuation characters count as part of words.@refill
+
+@item
+The @code{awk} language considers upper and lower case characters to be
+distinct.  Therefore, @samp{foo} and @samp{Foo} are not treated by this
+program as the same word.  This is undesirable since in normal text, words
+are capitalized if they begin sentences, and a frequency analyzer should not
+be sensitive to that.@refill
+
+@item
+The output does not come out in any useful order.  You're more likely to be
+interested in which words occur most frequently, or having an alphabetized
+table of how frequently each word occurs.@refill
+@end itemize
+
+The way to solve these problems is to use some of the more advanced
+features of the @code{awk} language.  First, we use @code{tolower} to remove
+case distinctions.  Next, we use @code{gsub} to remove punctuation
+characters.  Finally, we use the system @code{sort} utility to process the
+output of the @code{awk} script.  First, here is the new version of
+the program:@refill
+
+@example
+awk '
+# Print list of word frequencies
+@{
+    $0 = tolower($0)    # remove case distinctions
+    gsub(/[^a-z0-9_ \t]/, "", $0)  # remove punctuation
+    for (i = 1; i <= NF; i++)
+        freq[$i]++
+@}
+
+END @{
+    for (word in freq)
+        printf "%s\t%d\n", word, freq[word]
+@}'
+@end example
+
+Assuming we have saved this program in a file named @file{frequency.awk},
+and that the data is in @file{file1}, the following pipeline
+
+@example
+awk -f frequency.awk file1 | sort +1 -nr
+@end example
+
+@noindent
+produces a table of the words appearing in @file{file1} in order of
+decreasing frequency.
+
+The @code{awk} program suitably massages the data and produces a word
+frequency table, which is not ordered.
+
+The @code{awk} script's output is then sorted by the @code{sort} command and
+printed on the terminal.  The options given to @code{sort} in this example
+specify to sort using the second field of each input line (skipping one field),
+that the sort keys should be treated as numeric quantities (otherwise
+@samp{15} would come before @samp{5}), and that the sorting should be done
+in descending (reverse) order.@refill
+
+We could have even done the @code{sort} from within the program, by
+changing the @code{END} action to:
+
+@example
+END @{
+    sort = "sort +1 -nr"
+    for (word in freq)
+        printf "%s\t%d\n", word, freq[word] | sort
+    close(sort)
+@}'
+@end example
+
+See the general operating system documentation for more information on how
+to use the @code{sort} command.@refill
+
+@ignore
+@strong{ADR: I have some more substantial programs courtesy of Rick Adams
+at UUNET.  I am planning on incorporating those either in addition to or
+instead of this program.}
+
+@strong{I would also like to incorporate the general @code{translate}
+function that I have written.}
+
+@strong{I have a ton of other sample programs to include too.}
+@end ignore
+
+@node Bugs, Notes, Sample Program, Top
+@appendix Reporting Problems and Bugs
+
+@c This chapter stolen shamelessly from the GNU m4 manual.
+@c This chapter has been unshamelessly altered to emulate changes made to
+@c make.texi from whence it was originally shamelessly stolen! :-} --mew
+
+If you have problems with @code{gawk} or think that you have found a bug,
+please report it to the developers; we cannot promise to do anything
+but we might well want to fix it.
+
+Before reporting a bug, make sure you have actually found a real bug.
+Carefully reread the documentation and see if it really says you can do
+what you're trying to do.  If it's not clear whether you should be able
+to do something or not, report that too; it's a bug in the documentation!
+
+Before reporting a bug or trying to fix it yourself, try to isolate it
+to the smallest possible @code{awk} program and input data file that
+reproduces the problem.  Then send us the program and data file,
+some idea of what kind of Unix system you're using, and the exact results
+@code{gawk} gave you.  Also say what you expected to occur; this will help
+us decide whether the problem was really in the documentation.
+
+Once you have a precise problem, send e-mail to (Internet)
+@samp{bug-gnu-utils@@prep.ai.mit.edu} or (UUCP)
+@samp{mit-eddie!prep.ai.mit.edu!bug-gnu-utils}.  Please include the
+version number of @code{gawk} you are using.  You can get this information
+with the command @samp{gawk -W version '@{@}' /dev/null}.
+You should send carbon copies of your mail to David Trueman at
+@samp{david@@cs.dal.ca}, and to Arnold Robbins, who can be reached at
+@samp{arnold@@skeeve.atl.ga.us}.  David is most likely to fix code
+problems, while Arnold is most likely to fix documentation problems.@refill
+
+Non-bug suggestions are always welcome as well.  If you have questions
+about things that are unclear in the documentation or are just obscure
+features, ask Arnold Robbins; he will try to help you out, although he
+may not have the time to fix the problem.  You can send him electronic mail at the Internet address
+above.
+
+If you find bugs in one of the non-Unix ports of @code{gawk}, please send
+an electronic mail message to the person who maintains that port.  They
+are listed below, and also in the @file{README} file in the @code{gawk}
+distribution.  Information in the @code{README} file should be considered
+authoritative if it conflicts with this manual.
+
+The people maintaining the non-Unix ports of @code{gawk} are:
+
+@table @asis
+@item MS-DOS
+The port to MS-DOS is maintained by Scott Deifik.
+His electronic mail address is @samp{scottd@@amgen.com}.
+
+@item VMS
+The port to VAX VMS is maintained by Pat Rankin.
+His electronic mail address is @samp{rankin@@eql.caltech.edu}.
+
+@item Atari ST
+The port to the Atari ST is maintained by Michal Jaegermann.
+His electronic mail address is @samp{ntomczak@@vm.ucs.ualberta.ca}.
+
+@end table
+
+If your bug is also reproducible under Unix, please send copies of your
+report to the general GNU bug list, as well as to Arnold Robbins and David
+Trueman, at the addresses listed above.
+
+@node Notes, Glossary, Bugs, Top
+@appendix Implementation Notes
+
+This appendix contains information mainly of interest to implementors and
+maintainers of @code{gawk}.  Everything in it applies specifically to
+@code{gawk}, and not to other implementations.
+
+@menu
+* Compatibility Mode::          How to disable certain @code{gawk} extensions.
+* Future Extensions::           New features we may implement soon.
+* Improvements::                Suggestions for improvements by volunteers.
+@end menu
+
+@node Compatibility Mode, Future Extensions, Notes, Notes
+@appendixsec Downward Compatibility and Debugging
+
+@xref{POSIX/GNU, ,Extensions in @code{gawk} not in POSIX @code{awk}},
+for a summary of the GNU extensions to the @code{awk} language and program.
+All of these features can be turned off by invoking @code{gawk} with the
+@samp{-W compat} option, or with the @samp{-W posix} option.@refill
+
+If @code{gawk} is compiled for debugging with @samp{-DDEBUG}, then there
+is one more option available on the command line:
+
+@table @samp
+@item -W parsedebug
+Print out the parse stack information as the program is being parsed.
+@end table
+
+This option is intended only for serious @code{gawk} developers,
+and not for the casual user.  It probably has not even been compiled into
+your version of @code{gawk}, since it slows down execution.
+
+@node Future Extensions, Improvements, Compatibility Mode, Notes
+@appendixsec Probable Future Extensions
+
+This section briefly lists extensions that indicate the directions we are
+currently considering for @code{gawk}.  The file @file{FUTURES} in the
+@code{gawk} distributions lists these extensions, as well as several others.
+
+@table @asis
+@item @code{RS} as a regexp
+The meaning of @code{RS} may be generalized along the lines of @code{FS}.
+
+@item Control of subprocess environment
+Changes made in @code{gawk} to the array @code{ENVIRON} may be
+propagated to subprocesses run by @code{gawk}.
+
+@item Databases
+It may be possible to map a GDBM/NDBM/SDBM file into an @code{awk} array.
+
+@item Single-character fields
+The null string, @code{""}, as a field separator, will cause field
+splitting and the @code{split} function to separate individual characters.
+Thus, @code{split(a, "abcd", "")} would yield @code{a[1] == "a"},
+@code{a[2] == "b"}, and so on.
+
+@item More @code{lint} warnings
+There are more things that could be checked for portability.
+
+@item @code{RECLEN} variable for fixed length records
+Along with @code{FIELDWIDTHS}, this would speed up the processing of
+fixed-length records.
+
+@item @code{RT} variable to hold the record terminator
+It is occasionally useful to have access to the actual string of
+characters that matched the @code{RS} variable.  The @code{RT}
+variable would hold these characters.
+
+@item A @code{restart} keyword
+After modifying @code{$0}, @code{restart} would restart the pattern
+matching loop, without reading a new record from the input.
+
+@item A @samp{|&} redirection
+The @samp{|&} redirection, in place of @samp{|}, would open a two-way
+pipeline for communication with a sub-process (via @code{getline} and
+@code{print} and @code{printf}).
+
+@item @code{IGNORECASE} affecting all comparisons
+The effects of the @code{IGNORECASE} variable may be generalized to
+all string comparisons, and not just regular expression operations.
+
+@item A way to mix command line source code and library files
+There may be a new option that would make it possible to easily use library
+functions from a program entered on the command line.
+@c probably a @samp{-s} option...
+
+@item GNU-style long options
+We will add GNU-style long options
+to @code{gawk} for compatibility with other GNU programs.
+(For example, @samp{--field-separator=:} would be equivalent to
+@samp{-F:}.)@refill
+
+@c this is @emph{very} long term --- not worth including right now.
+@ignore
+@item The C Comma Operator
+We may add the C comma operator, which takes the form
+@code{@var{expr1},@var{expr2}}.  The first expression is evaluated, and the
+result is thrown away.  The value of the full expression is the value of
+@var{expr2}.@refill
+@end ignore
+@end table
+
+@node Improvements,  , Future Extensions, Notes
+@appendixsec Suggestions for Improvements
+
+Here are some projects that would-be @code{gawk} hackers might like to take
+on.  They vary in size from a few days to a few weeks of programming,
+depending on which one you choose and how fast a programmer you are.  Please
+send any improvements you write to the maintainers at the GNU
+project.@refill
+
+@enumerate
+@item
+Compilation of @code{awk} programs: @code{gawk} uses a Bison (YACC-like)
+parser to convert the script given it into a syntax tree; the syntax
+tree is then executed by a simple recursive evaluator.  This method incurs
+a lot of overhead, since the recursive evaluator performs many procedure
+calls to do even the simplest things.@refill
+
+It should be possible for @code{gawk} to convert the script's parse tree
+into a C program which the user would then compile, using the normal
+C compiler and a special @code{gawk} library to provide all the needed
+functions (regexps, fields, associative arrays, type coercion, and so
+on).@refill
+
+An easier possibility might be for an intermediate phase of @code{awk} to
+convert the parse tree into a linear byte code form like the one used
+in GNU Emacs Lisp.  The recursive evaluator would then be replaced by
+a straight line byte code interpreter that would be intermediate in speed
+between running a compiled program and doing what @code{gawk} does
+now.@refill
+
+This may actually happen for the 3.0 version of @code{gawk}.
+
+@item
+An error message section has not been included in this version of the
+manual.  Perhaps some nice beta testers will document some of the messages
+for the future.
+
+@item
+The programs in the test suite could use documenting in this manual.
+
+@item
+The programs and data files in the manual should be available in
+separate files to facilitate experimentation.
+
+@item
+See the @file{FUTURES} file for more ideas.  Contact us if you would
+seriously like to tackle any of the items listed there.
+@end enumerate
+
+@node Glossary, Index, Notes, Top
+@appendix Glossary
+
+@table @asis
+@item Action
+A series of @code{awk} statements attached to a rule.  If the rule's
+pattern matches an input record, the @code{awk} language executes the
+rule's action.  Actions are always enclosed in curly braces.
+@xref{Actions, ,Overview of Actions}.@refill
+
+@item Amazing @code{awk} Assembler
+Henry Spencer at the University of Toronto wrote a retargetable assembler
+completely as @code{awk} scripts.  It is thousands of lines long, including
+machine descriptions for several 8-bit microcomputers.
+@c It is distributed with @code{gawk} (as part of the test suite) and
+It is a good example of a
+program that would have been better written in another language.@refill
+
+@item @sc{ansi}
+The American National Standards Institute.  This organization produces
+many standards, among them the standard for the C programming language.
+
+@item Assignment
+An @code{awk} expression that changes the value of some @code{awk}
+variable or data object.  An object that you can assign to is called an
+@dfn{lvalue}.  @xref{Assignment Ops, ,Assignment Expressions}.@refill
+
+@item @code{awk} Language
+The language in which @code{awk} programs are written.
+
+@item @code{awk} Program
+An @code{awk} program consists of a series of @dfn{patterns} and
+@dfn{actions}, collectively known as @dfn{rules}.  For each input record
+given to the program, the program's rules are all processed in turn.
+@code{awk} programs may also contain function definitions.@refill
+
+@item @code{awk} Script
+Another name for an @code{awk} program.
+
+@item Built-in Function
+The @code{awk} language provides built-in functions that perform various
+numerical, time stamp related, and string computations.  Examples are
+@code{sqrt} (for the square root of a number) and @code{substr} (for a
+substring of a string).  @xref{Built-in, ,Built-in Functions}.@refill
+
+@item Built-in Variable
+@code{ARGC}, @code{ARGIND}, @code{ARGV}, @code{CONVFMT}, @code{ENVIRON},
+@code{ERRNO}, @code{FIELDWIDTHS}, @code{FILENAME}, @code{FNR}, @code{FS},
+@code{IGNORECASE}, @code{NF}, @code{NR}, @code{OFMT}, @code{OFS}, @code{ORS},
+@code{RLENGTH}, @code{RSTART}, @code{RS}, and @code{SUBSEP},
+are the variables that have special
+meaning to @code{awk}.  Changing some of them affects @code{awk}'s running
+environment.  @xref{Built-in Variables}.@refill
+
+@item Braces
+See ``Curly Braces.''
+
+@item C
+The system programming language that most GNU software is written in.  The
+@code{awk} programming language has C-like syntax, and this manual
+points out similarities between @code{awk} and C when appropriate.@refill
+
+@item CHEM
+A preprocessor for @code{pic} that reads descriptions of molecules
+and produces @code{pic} input for drawing them.  It was written by
+Brian Kernighan, and is available from @code{netlib@@research.att.com}.@refill
+
+@item Compound Statement
+A series of @code{awk} statements, enclosed in curly braces.  Compound
+statements may be nested.
+@xref{Statements, ,Control Statements in Actions}.@refill
+
+@item Concatenation
+Concatenating two strings means sticking them together, one after another,
+giving a new string.  For example, the string @samp{foo} concatenated with
+the string @samp{bar} gives the string @samp{foobar}.
+@xref{Concatenation, ,String Concatenation}.@refill
+
+@item Conditional Expression
+An expression using the @samp{?:} ternary operator, such as
+@code{@var{expr1} ? @var{expr2} : @var{expr3}}.  The expression
+@var{expr1} is evaluated; if the result is true, the value of the whole
+expression is the value of @var{expr2} otherwise the value is
+@var{expr3}.  In either case, only one of @var{expr2} and @var{expr3}
+is evaluated.  @xref{Conditional Exp, ,Conditional Expressions}.@refill
+
+@item Constant Regular Expression
+A constant regular expression is a regular expression written within
+slashes, such as @samp{/foo/}.  This regular expression is chosen
+when you write the @code{awk} program, and cannot be changed doing
+its execution.  @xref{Regexp Usage, ,How to Use Regular Expressions}.
+
+@item Comparison Expression
+A relation that is either true or false, such as @code{(a < b)}.
+Comparison expressions are used in @code{if}, @code{while}, and @code{for}
+statements, and in patterns to select which input records to process.
+@xref{Comparison Ops, ,Comparison Expressions}.@refill
+
+@item Curly Braces
+The characters @samp{@{} and @samp{@}}.  Curly braces are used in
+@code{awk} for delimiting actions, compound statements, and function
+bodies.@refill
+
+@item Data Objects
+These are numbers and strings of characters.  Numbers are converted into
+strings and vice versa, as needed.
+@xref{Conversion, ,Conversion of Strings and Numbers}.@refill
+
+@item Dynamic Regular Expression
+A dynamic regular expression is a regular expression written as an
+ordinary expression.  It could be a string constant, such as
+@code{"foo"}, but it may also be an expression whose value may vary.
+@xref{Regexp Usage, ,How to Use Regular Expressions}.
+
+@item Escape Sequences
+A special sequence of characters used for describing nonprinting
+characters, such as @samp{\n} for newline, or @samp{\033} for the ASCII
+ESC (escape) character.  @xref{Constants, ,Constant Expressions}.
+
+@item Field
+When @code{awk} reads an input record, it splits the record into pieces
+separated by whitespace (or by a separator regexp which you can
+change by setting the built-in variable @code{FS}).  Such pieces are
+called fields.  If the pieces are of fixed length, you can use the built-in
+variable @code{FIELDWIDTHS} to describe their lengths.
+@xref{Records, ,How Input is Split into Records}.@refill
+
+@item Format
+Format strings are used to control the appearance of output in the
+@code{printf} statement.  Also, data conversions from numbers to strings
+are controlled by the format string contained in the built-in variable
+@code{CONVFMT}.  @xref{Control Letters, ,Format-Control Letters}.@refill
+
+@item Function
+A specialized group of statements often used to encapsulate general
+or program-specific tasks.  @code{awk} has a number of built-in
+functions, and also allows you to define your own.
+@xref{Built-in, ,Built-in Functions}.
+Also, see @ref{User-defined, ,User-defined Functions}.@refill
+
+@item @code{gawk}
+The GNU implementation of @code{awk}.
+
+@item GNU
+``GNU's not Unix''.  An on-going project of the Free Software Foundation
+to create a complete, freely distributable, @sc{posix}-compliant computing
+environment.
+
+@item Input Record
+A single chunk of data read in by @code{awk}.  Usually, an @code{awk} input
+record consists of one line of text.
+@xref{Records, ,How Input is Split into Records}.@refill
+
+@item Keyword
+In the @code{awk} language, a keyword is a word that has special
+meaning.  Keywords are reserved and may not be used as variable names.
+
+@code{awk}'s keywords are:
+@code{if},
+@code{else},
+@code{while},
+@code{do@dots{}while},
+@code{for},
+@code{for@dots{}in},
+@code{break},
+@code{continue},
+@code{delete},
+@code{next},
+@code{function},
+@code{func},
+and @code{exit}.@refill
+
+@item Lvalue
+An expression that can appear on the left side of an assignment
+operator.  In most languages, lvalues can be variables or array
+elements.  In @code{awk}, a field designator can also be used as an
+lvalue.@refill
+
+@item Number
+A numeric valued data object.  The @code{gawk} implementation uses double
+precision floating point to represent numbers.@refill
+
+@item Pattern
+Patterns tell @code{awk} which input records are interesting to which
+rules.
+
+A pattern is an arbitrary conditional expression against which input is
+tested.  If the condition is satisfied, the pattern is said to @dfn{match}
+the input record.  A typical pattern might compare the input record against
+a regular expression.  @xref{Patterns}.@refill
+
+@item @sc{posix}
+The name for a series of standards being developed by the @sc{ieee}
+that specify a Portable Operating System interface.  The ``IX'' denotes
+the Unix heritage of these standards.  The main standard of interest for
+@code{awk} users is P1003.2, the Command Language and Utilities standard.
+
+@item Range (of input lines)
+A sequence of consecutive lines from the input file.  A pattern
+can specify ranges of input lines for @code{awk} to process, or it can
+specify single lines.  @xref{Patterns}.@refill
+
+@item Recursion
+When a function calls itself, either directly or indirectly.
+If this isn't clear, refer to the entry for ``recursion.''
+
+@item Redirection
+Redirection means performing input from other than the standard input
+stream, or output to other than the standard output stream.
+
+You can redirect the output of the @code{print} and @code{printf} statements
+to a file or a system command, using the @samp{>}, @samp{>>}, and @samp{|}
+operators.  You can redirect input to the @code{getline} statement using
+the @samp{<} and @samp{|} operators.
+@xref{Redirection, ,Redirecting Output of @code{print} and @code{printf}}.@refill
+
+@item Regular Expression
+See ``regexp.''
+
+@item Regexp
+Short for @dfn{regular expression}.  A regexp is a pattern that denotes a
+set of strings, possibly an infinite set.  For example, the regexp
+@samp{R.*xp} matches any string starting with the letter @samp{R}
+and ending with the letters @samp{xp}.  In @code{awk}, regexps are
+used in patterns and in conditional expressions.  Regexps may contain
+escape sequences.  @xref{Regexp, ,Regular Expressions as Patterns}.@refill
+
+@item Rule
+A segment of an @code{awk} program, that specifies how to process single
+input records.  A rule consists of a @dfn{pattern} and an @dfn{action}.
+@code{awk} reads an input record; then, for each rule, if the input record
+satisfies the rule's pattern, @code{awk} executes the rule's action.
+Otherwise, the rule does nothing for that input record.@refill
+
+@item Side Effect
+A side effect occurs when an expression has an effect aside from merely
+producing a value.  Assignment expressions, increment expressions and
+function calls have side effects.  @xref{Assignment Ops, ,Assignment Expressions}.
+
+@item Special File
+A file name interpreted internally by @code{gawk}, instead of being handed
+directly to the underlying operating system.  For example, @file{/dev/stdin}.
+@xref{Special Files, ,Standard I/O Streams}.
+
+@item Stream Editor
+A program that reads records from an input stream and processes them one
+or more at a time.  This is in contrast with batch programs, which may
+expect to read their input files in entirety before starting to do
+anything, and with interactive programs, which require input from the
+user.@refill
+
+@item String
+A datum consisting of a sequence of characters, such as @samp{I am a
+string}.  Constant strings are written with double-quotes in the
+@code{awk} language, and may contain escape sequences.
+@xref{Constants, ,Constant Expressions}.
+
+@item Whitespace
+A sequence of blank or tab characters occurring inside an input record or a
+string.@refill
+@end table
+
+@node Index,  , Glossary, Top
+@unnumbered Index
+@printindex cp
+
+@summarycontents
+@contents
+@bye
+
+Unresolved Issues:
+------------------
+1. From: ntomczak@vm.ucs.ualberta.ca (Michal Jaegermann)
+   Examples of usage tend to suggest that /../ and ".." delimiters
+   can be used for regular expressions, even if definition is consistently
+   using /../.  I am not sure what the real rules are and in particular
+   what of the following is a bug and what is a feature:
+   # This program matches everything
+      '"\(" { print }'
+   # This one complains about mismatched parenthesis
+      '$0 ~ "\(" { print }'
+   # This one behaves in an expected manner
+      '/\(/ { print }'
+   You may also try to use "\(" as an argument to match() to see what
+   will happen.
+
+2. From ADR.
+
+   The posix (and original Unix!) notion of awk values as both number
+   and string values needs to be put into the manual.  This involves
+   major and minor rewrites of most of the manual, but should help in
+   clarifying many of the weirder points of the language.
+
+3. From ADR.
+
+   The manual should be reorganized.  Expressions should be introduced
+   early, building up to regexps as expressions, and from there to their
+   use as patterns and then in actions.  Built-in vars should come earlier
+   in the manual too.  The 'expert info' sections marked with comments
+   should get their own sections or subsections with nodes and titles.
+   The manual should be gone over thoroughly for indexing.
+
+4. From ADR.
+
+   Robert J. Chassell points out that awk programs should have some indication
+   of how to use them.  It would be useful to perhaps have a "programming
+   style" section of the manual that would include this and other tips.
+
+5. From ADR in response to moraes@uunet.ca
+   (This would make the beginnings of a good "puzzles" section...)
+
+   Date: Mon, 2 Dec 91 10:08:05 EST
+   From: gatech!cc!arnold (Arnold Robbins)
+   To: cs.dal.ca!david, uunet.ca!moraes
+   Subject: redirecting to /dev/stderr
+   Cc: skeeve!arnold, boeing.com!brennan, research.att.com!bwk
+   
+   In 2.13.3 the following program no longer dumps core:
+   
+   	BEGIN { print "hello" > /dev/stderr ; exit(1) }
+   
+   Instead, it creates a file named `0' with the word `hello' in it. AWK
+   semantics strikes again.  The meaning of the statement is
+   
+   		print "hello" > (($0 ~ /dev/) stderr)
+   
+   /dev/ tests $0 for the pattern `dev'.  This yields a 0.  The variable stderr,
+   having never been used, has a null string in it.  The concatenation yields
+   a string value of "0" which is used as the file name.  Sigh.
+   
+   I think with some more time I can come up with a decent fix, but it will
+   probably only print a diagnostic with -Wlint.
+   
+   Arnold
+
diff --git a/gnu/usr.bin/awk/regex.c b/gnu/usr.bin/awk/regex.c
new file mode 100644
index 0000000..f4dd4c2
--- /dev/null
+++ b/gnu/usr.bin/awk/regex.c
@@ -0,0 +1,2854 @@
+/* Extended regular expression matching and search library.
+   Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 1, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+
+/* To test, compile with -Dtest.  This Dtestable feature turns this into
+   a self-contained program which reads a pattern, describes how it
+   compiles, then reads a string and searches for it.
+   
+   On the other hand, if you compile with both -Dtest and -Dcanned you
+   can run some tests we've already thought of.  */
+
+
+#ifdef emacs
+
+/* The `emacs' switch turns on certain special matching commands
+  that make sense only in emacs. */
+
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+/* We write fatal error messages on standard error.  */
+#include <stdio.h>
+
+/* isalpha(3) etc. are used for the character classes.  */
+#include <ctype.h>
+
+#else	/* not emacs */
+
+#include "awk.h"
+
+#define	NO_ALLOCA	/* try it out for now */
+#ifndef NO_ALLOCA
+/* Make alloca work the best possible way.  */
+#ifdef __GNUC__
+#ifndef atarist
+#ifndef alloca
+#define alloca __builtin_alloca
+#endif
+#endif /* atarist */
+#else
+#if defined(sparc) && !defined(__GNUC__)
+#include <alloca.h>
+#else
+char *alloca ();
+#endif
+#endif /* __GNUC__ */
+
+#define FREE_AND_RETURN_VOID(stackb)	return
+#define FREE_AND_RETURN(stackb,val)	return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+        (stackx = (unsigned char **) alloca (2 * len			\
+                                            * sizeof (unsigned char *)),\
+	/* Only copy what is in use.  */				\
+        (unsigned char **) memcpy (stackx, stackb, len * sizeof (char *)))
+#else  /* NO_ALLOCA defined */
+#define FREE_AND_RETURN_VOID(stackb)   free(stackb);return
+#define FREE_AND_RETURN(stackb,val)    free(stackb);return(val)
+#define DOUBLE_STACK(stackx,stackb,len) \
+        (unsigned char **) realloc (stackb, 2 * len * sizeof (unsigned char *))
+#endif /* NO_ALLOCA */
+
+static void store_jump P((char *, int, char *));
+static void insert_jump P((int, char *, char *, char *));
+static void store_jump_n P((char *, int, char *, unsigned));
+static void insert_jump_n P((int, char *, char *, char *, unsigned));
+static void insert_op_2 P((int, char *, char *, int, int ));
+static int memcmp_translate P((unsigned char *, unsigned char *,
+			       int, unsigned char *));
+long re_set_syntax P((long));
+
+/* Define the syntax stuff, so we can do the \<, \>, etc.  */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+   commands in re_match_2.  */
+#ifndef Sword 
+#define Sword 1
+#endif
+
+#define SYNTAX(c) re_syntax_table[c]
+
+
+#ifdef SYNTAX_TABLE
+
+char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+static char re_syntax_table[256];
+static void init_syntax_once P((void));
+
+
+static void
+init_syntax_once ()
+{
+   register int c;
+   static int done = 0;
+
+   if (done)
+     return;
+
+   memset (re_syntax_table, 0, sizeof re_syntax_table);
+
+   for (c = 'a'; c <= 'z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = 'A'; c <= 'Z'; c++)
+     re_syntax_table[c] = Sword;
+
+   for (c = '0'; c <= '9'; c++)
+     re_syntax_table[c] = Sword;
+ 
+   /* Add specific syntax for ISO Latin-1.  */
+   for (c = 0300; c <= 0377; c++)
+     re_syntax_table[c] = Sword;
+   re_syntax_table[0327] = 0;
+   re_syntax_table[0367] = 0;
+
+   done = 1;
+}
+
+#endif /* SYNTAX_TABLE */
+#undef P
+#endif /* emacs */
+
+
+/* Sequents are missing isgraph.  */
+#ifndef isgraph
+#define isgraph(c) (isprint((c)) && !isspace((c)))
+#endif
+
+/* Get the interface, including the syntax bits.  */
+#include "regex.h"
+
+
+/* These are the command codes that appear in compiled regular
+   expressions, one per byte.  Some command codes are followed by
+   argument bytes.  A command code can specify any interpretation
+   whatsoever for its arguments.  Zero-bytes may appear in the compiled
+   regular expression.
+   
+   The value of `exactn' is needed in search.c (search_buffer) in emacs.
+   So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+   `exactn' we use here must also be 1.  */
+
+enum regexpcode
+  {
+    unused=0,
+    exactn=1, /* Followed by one byte giving n, then by n literal bytes.  */
+    begline,  /* Fail unless at beginning of line.  */
+    endline,  /* Fail unless at end of line.  */
+    jump,     /* Followed by two bytes giving relative address to jump to.  */
+    on_failure_jump,	 /* Followed by two bytes giving relative address of 
+			    place to resume at in case of failure.  */
+    finalize_jump,	 /* Throw away latest failure point and then jump to 
+			    address.  */
+    maybe_finalize_jump, /* Like jump but finalize if safe to do so.
+			    This is used to jump back to the beginning
+			    of a repeat.  If the command that follows
+			    this jump is clearly incompatible with the
+			    one at the beginning of the repeat, such that
+			    we can be sure that there is no use backtracking
+			    out of repetitions already completed,
+			    then we finalize.  */
+    dummy_failure_jump,  /* Jump, and push a dummy failure point. This 
+			    failure point will be thrown away if an attempt 
+                            is made to use it for a failure. A + construct 
+                            makes this before the first repeat.  Also
+                            use it as an intermediary kind of jump when
+                            compiling an or construct.  */
+    succeed_n,	 /* Used like on_failure_jump except has to succeed n times;
+		    then gets turned into an on_failure_jump. The relative
+                    address following it is useless until then.  The
+                    address is followed by two bytes containing n.  */
+    jump_n,	 /* Similar to jump, but jump n times only; also the relative
+		    address following is in turn followed by yet two more bytes
+                    containing n.  */
+    set_number_at,	/* Set the following relative location to the
+			   subsequent number.  */
+    anychar,	 /* Matches any (more or less) one character.  */
+    charset,     /* Matches any one char belonging to specified set.
+		    First following byte is number of bitmap bytes.
+		    Then come bytes for a bitmap saying which chars are in.
+		    Bits in each byte are ordered low-bit-first.
+		    A character is in the set if its bit is 1.
+		    A character too large to have a bit in the map
+		    is automatically not in the set.  */
+    charset_not, /* Same parameters as charset, but match any character
+                    that is not one of those specified.  */
+    start_memory, /* Start remembering the text that is matched, for
+		    storing in a memory register.  Followed by one
+                    byte containing the register number.  Register numbers
+                    must be in the range 0 through RE_NREGS.  */
+    stop_memory, /* Stop remembering the text that is matched
+		    and store it in a memory register.  Followed by
+                    one byte containing the register number. Register
+                    numbers must be in the range 0 through RE_NREGS.  */
+    duplicate,   /* Match a duplicate of something remembered.
+		    Followed by one byte containing the index of the memory 
+                    register.  */
+    before_dot,	 /* Succeeds if before point.  */
+    at_dot,	 /* Succeeds if at point.  */
+    after_dot,	 /* Succeeds if after point.  */
+    begbuf,      /* Succeeds if at beginning of buffer.  */
+    endbuf,      /* Succeeds if at end of buffer.  */
+    wordchar,    /* Matches any word-constituent character.  */
+    notwordchar, /* Matches any char that is not a word-constituent.  */
+    wordbeg,	 /* Succeeds if at word beginning.  */
+    wordend,	 /* Succeeds if at word end.  */
+    wordbound,   /* Succeeds if at a word boundary.  */
+    notwordbound,/* Succeeds if not at a word boundary.  */
+    syntaxspec,  /* Matches any character whose syntax is specified.
+		    followed by a byte which contains a syntax code,
+                    e.g., Sword.  */
+    notsyntaxspec /* Matches any character whose syntax differs from
+                     that specified.  */
+  };
+
+ 
+/* Number of failure points to allocate space for initially,
+   when matching.  If this number is exceeded, more space is allocated,
+   so it is not a hard limit.  */
+
+#ifndef NFAILURES
+#define NFAILURES 80
+#endif
+
+#ifdef CHAR_UNSIGNED
+#define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c)) /* for IBM RT */
+#endif
+#ifndef SIGN_EXTEND_CHAR
+#define SIGN_EXTEND_CHAR(x) (x)
+#endif
+ 
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
+#define STORE_NUMBER(destination, number)				\
+  { (destination)[0] = (number) & 0377;					\
+    (destination)[1] = (number) >> 8; }
+  
+/* Same as STORE_NUMBER, except increment the destination pointer to
+   the byte after where the number is stored.  Watch out that values for
+   DESTINATION such as p + 1 won't work, whereas p will.  */
+#define STORE_NUMBER_AND_INCR(destination, number)			\
+  { STORE_NUMBER(destination, number);					\
+    (destination) += 2; }
+
+
+/* Put into DESTINATION a number stored in two contingous bytes starting
+   at SOURCE.  */
+#define EXTRACT_NUMBER(destination, source)				\
+  { (destination) = *(source) & 0377;					\
+    (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
+
+/* Same as EXTRACT_NUMBER, except increment the pointer for source to
+   point to second byte of SOURCE.  Note that SOURCE has to be a value
+   such as p, not, e.g., p + 1. */
+#define EXTRACT_NUMBER_AND_INCR(destination, source)			\
+  { EXTRACT_NUMBER (destination, source);				\
+    (source) += 2; }
+
+
+/* Specify the precise syntax of regexps for compilation.  This provides
+   for compatibility for various utilities which historically have
+   different, incompatible syntaxes.
+   
+   The argument SYNTAX is a bit-mask comprised of the various bits
+   defined in regex.h.  */
+
+long
+re_set_syntax (syntax)
+  long syntax;
+{
+  long ret;
+
+  ret = obscure_syntax;
+  obscure_syntax = syntax;
+  return ret;
+}
+
+/* Set by re_set_syntax to the current regexp syntax to recognize.  */
+long obscure_syntax = 0;
+
+
+
+/* Macros for re_compile_pattern, which is found below these definitions.  */
+
+#define CHAR_CLASS_MAX_LENGTH  6
+
+/* Fetch the next character in the uncompiled pattern, translating it if
+   necessary.  */
+#define PATFETCH(c)							\
+  {if (p == pend) goto end_of_pattern;					\
+  c = * (unsigned char *) p++;						\
+  if (translate) c = translate[c]; }
+
+/* Fetch the next character in the uncompiled pattern, with no
+   translation.  */
+#define PATFETCH_RAW(c)							\
+ {if (p == pend) goto end_of_pattern;					\
+  c = * (unsigned char *) p++; }
+
+#define PATUNFETCH p--
+
+
+/* If the buffer isn't allocated when it comes in, use this.  */
+#define INIT_BUF_SIZE  28
+
+/* Make sure we have at least N more bytes of space in buffer.  */
+#define GET_BUFFER_SPACE(n)						\
+  {								        \
+    while (b - bufp->buffer + (n) >= bufp->allocated)			\
+      EXTEND_BUFFER;							\
+  }
+
+/* Make sure we have one more byte of buffer space and then add CH to it.  */
+#define BUFPUSH(ch)							\
+  {									\
+    GET_BUFFER_SPACE (1);						\
+    *b++ = (char) (ch);							\
+  }
+  
+/* Extend the buffer by twice its current size via reallociation and
+   reset the pointers that pointed into the old allocation to point to
+   the correct places in the new allocation.  If extending the buffer
+   results in it being larger than 1 << 16, then flag memory exhausted.  */
+#define EXTEND_BUFFER							\
+  { char *old_buffer = bufp->buffer;					\
+    if (bufp->allocated == (1L<<16)) goto too_big;			\
+    bufp->allocated *= 2;						\
+    if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16);		\
+    bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated);	\
+    if (bufp->buffer == 0)						\
+      goto memory_exhausted;						\
+    b = (b - old_buffer) + bufp->buffer;				\
+    if (fixup_jump)							\
+      fixup_jump = (fixup_jump - old_buffer) + bufp->buffer;		\
+    if (laststart)							\
+      laststart = (laststart - old_buffer) + bufp->buffer;		\
+    begalt = (begalt - old_buffer) + bufp->buffer;			\
+    if (pending_exact)							\
+      pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
+  }
+
+/* Set the bit for character C in a character set list.  */
+#define SET_LIST_BIT(c)  (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
+
+/* Get the next unsigned number in the uncompiled pattern.  */
+#define GET_UNSIGNED_NUMBER(num) 					\
+  { if (p != pend) 							\
+      { 								\
+        PATFETCH (c); 							\
+	while (isdigit (c)) 						\
+	  { 								\
+	    if (num < 0) 						\
+	       num = 0; 						\
+            num = num * 10 + c - '0'; 					\
+	    if (p == pend) 						\
+	       break; 							\
+	    PATFETCH (c); 						\
+	  } 								\
+        } 								\
+  }
+
+/* Subroutines for re_compile_pattern.  */
+/* static void store_jump (), insert_jump (), store_jump_n (),
+	    insert_jump_n (), insert_op_2 (); */
+
+
+/* re_compile_pattern takes a regular-expression string
+   and converts it into a buffer full of byte commands for matching.
+
+   PATTERN   is the address of the pattern string
+   SIZE      is the length of it.
+   BUFP	    is a  struct re_pattern_buffer *  which points to the info
+	     on where to store the byte commands.
+	     This structure contains a  char *  which points to the
+	     actual space, which should have been obtained with malloc.
+	     re_compile_pattern may use realloc to grow the buffer space.
+
+   The number of bytes of commands can be found out by looking in
+   the `struct re_pattern_buffer' that bufp pointed to, after
+   re_compile_pattern returns. */
+
+char *
+re_compile_pattern (pattern, size, bufp)
+     char *pattern;
+     size_t size;
+     struct re_pattern_buffer *bufp;
+{
+  register char *b = bufp->buffer;
+  register char *p = pattern;
+  char *pend = pattern + size;
+  register unsigned c, c1;
+  char *p0;
+  unsigned char *translate = (unsigned char *) bufp->translate;
+
+  /* Address of the count-byte of the most recently inserted `exactn'
+     command.  This makes it possible to tell whether a new exact-match
+     character can be added to that command or requires a new `exactn'
+     command.  */
+     
+  char *pending_exact = 0;
+
+  /* Address of the place where a forward-jump should go to the end of
+     the containing expression.  Each alternative of an `or', except the
+     last, ends with a forward-jump of this sort.  */
+
+  char *fixup_jump = 0;
+
+  /* Address of start of the most recently finished expression.
+     This tells postfix * where to find the start of its operand.  */
+
+  char *laststart = 0;
+
+  /* In processing a repeat, 1 means zero matches is allowed.  */
+
+  char zero_times_ok;
+
+  /* In processing a repeat, 1 means many matches is allowed.  */
+
+  char many_times_ok;
+
+  /* Address of beginning of regexp, or inside of last \(.  */
+
+  char *begalt = b;
+
+  /* In processing an interval, at least this many matches must be made.  */
+  int lower_bound;
+
+  /* In processing an interval, at most this many matches can be made.  */
+  int upper_bound;
+
+  /* Place in pattern (i.e., the {) to which to go back if the interval
+     is invalid.  */
+  char *beg_interval = 0;
+  
+  /* Stack of information saved by \( and restored by \).
+     Four stack elements are pushed by each \(:
+       First, the value of b.
+       Second, the value of fixup_jump.
+       Third, the value of regnum.
+       Fourth, the value of begalt.  */
+
+  int stackb[40];
+  int *stackp = stackb;
+  int *stacke = stackb + 40;
+  int *stackt;
+
+  /* Counts \('s as they are encountered.  Remembered for the matching \),
+     where it becomes the register number to put in the stop_memory
+     command.  */
+
+  int regnum = 1;
+
+  bufp->fastmap_accurate = 0;
+
+#ifndef emacs
+#ifndef SYNTAX_TABLE
+  /* Initialize the syntax table.  */
+   init_syntax_once();
+#endif
+#endif
+
+  if (bufp->allocated == 0)
+    {
+      bufp->allocated = INIT_BUF_SIZE;
+      if (bufp->buffer)
+	/* EXTEND_BUFFER loses when bufp->allocated is 0.  */
+	bufp->buffer = (char *) realloc (bufp->buffer, INIT_BUF_SIZE);
+      else
+	/* Caller did not allocate a buffer.  Do it for them.  */
+	bufp->buffer = (char *) malloc (INIT_BUF_SIZE);
+      if (!bufp->buffer) goto memory_exhausted;
+      begalt = b = bufp->buffer;
+    }
+
+  while (p != pend)
+    {
+      PATFETCH (c);
+
+      switch (c)
+	{
+	case '$':
+	  {
+	    char *p1 = p;
+	    /* When testing what follows the $,
+	       look past the \-constructs that don't consume anything.  */
+	    if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+	      while (p1 != pend)
+		{
+		  if (*p1 == '\\' && p1 + 1 != pend
+		      && (p1[1] == '<' || p1[1] == '>'
+			  || p1[1] == '`' || p1[1] == '\''
+#ifdef emacs
+			  || p1[1] == '='
+#endif
+			  || p1[1] == 'b' || p1[1] == 'B'))
+		    p1 += 2;
+		  else
+		    break;
+		}
+            if (obscure_syntax & RE_TIGHT_VBAR)
+	      {
+		if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p1 != pend)
+		  goto normal_char;
+		/* Make operand of last vbar end before this `$'.  */
+		if (fixup_jump)
+		  store_jump (fixup_jump, jump, b);
+		fixup_jump = 0;
+		BUFPUSH (endline);
+		break;
+	      }
+	    /* $ means succeed if at end of line, but only in special contexts.
+	      If validly in the middle of a pattern, it is a normal character. */
+
+            if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && p1 != pend)
+	      goto invalid_pattern;
+	    if (p1 == pend || *p1 == '\n'
+		|| (obscure_syntax & RE_CONTEXT_INDEP_OPS)
+		|| (obscure_syntax & RE_NO_BK_PARENS
+		    ? *p1 == ')'
+		    : *p1 == '\\' && p1[1] == ')')
+		|| (obscure_syntax & RE_NO_BK_VBAR
+		    ? *p1 == '|'
+		    : *p1 == '\\' && p1[1] == '|'))
+	      {
+		BUFPUSH (endline);
+		break;
+	      }
+	    goto normal_char;
+          }
+	case '^':
+	  /* ^ means succeed if at beg of line, but only if no preceding 
+             pattern.  */
+             
+          if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS) && laststart)
+            goto invalid_pattern;
+          if (laststart && p - 2 >= pattern && p[-2] != '\n'
+	       && !(obscure_syntax & RE_CONTEXT_INDEP_OPS))
+	    goto normal_char;
+	  if (obscure_syntax & RE_TIGHT_VBAR)
+	    {
+	      if (p != pattern + 1
+		  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+		goto normal_char;
+	      BUFPUSH (begline);
+	      begalt = b;
+	    }
+	  else
+	    BUFPUSH (begline);
+	  break;
+
+	case '+':
+	case '?':
+	  if ((obscure_syntax & RE_BK_PLUS_QM)
+	      || (obscure_syntax & RE_LIMITED_OPS))
+	    goto normal_char;
+	handle_plus:
+	case '*':
+	  /* If there is no previous pattern, char not special. */
+	  if (!laststart)
+            {
+              if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+                goto invalid_pattern;
+              else if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
+		goto normal_char;
+            }
+	  /* If there is a sequence of repetition chars,
+	     collapse it down to just one.  */
+	  zero_times_ok = 0;
+	  many_times_ok = 0;
+	  while (1)
+	    {
+	      zero_times_ok |= c != '+';
+	      many_times_ok |= c != '?';
+	      if (p == pend)
+		break;
+	      PATFETCH (c);
+	      if (c == '*')
+		;
+	      else if (!(obscure_syntax & RE_BK_PLUS_QM)
+		       && (c == '+' || c == '?'))
+		;
+	      else if ((obscure_syntax & RE_BK_PLUS_QM)
+		       && c == '\\')
+		{
+		  /* int c1; */
+		  PATFETCH (c1);
+		  if (!(c1 == '+' || c1 == '?'))
+		    {
+		      PATUNFETCH;
+		      PATUNFETCH;
+		      break;
+		    }
+		  c = c1;
+		}
+	      else
+		{
+		  PATUNFETCH;
+		  break;
+		}
+	    }
+
+	  /* Star, etc. applied to an empty pattern is equivalent
+	     to an empty pattern.  */
+	  if (!laststart)  
+	    break;
+
+	  /* Now we know whether or not zero matches is allowed
+	     and also whether or not two or more matches is allowed.  */
+	  if (many_times_ok)
+	    {
+	      /* If more than one repetition is allowed, put in at the
+                 end a backward relative jump from b to before the next
+                 jump we're going to put in below (which jumps from
+                 laststart to after this jump).  */
+              GET_BUFFER_SPACE (3);
+	      store_jump (b, maybe_finalize_jump, laststart - 3);
+	      b += 3;  	/* Because store_jump put stuff here.  */
+	    }
+          /* On failure, jump from laststart to b + 3, which will be the
+             end of the buffer after this jump is inserted.  */
+          GET_BUFFER_SPACE (3);
+	  insert_jump (on_failure_jump, laststart, b + 3, b);
+	  pending_exact = 0;
+	  b += 3;
+	  if (!zero_times_ok)
+	    {
+	      /* At least one repetition is required, so insert a
+                 dummy-failure before the initial on-failure-jump
+                 instruction of the loop. This effects a skip over that
+                 instruction the first time we hit that loop.  */
+              GET_BUFFER_SPACE (6);
+              insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
+	      b += 3;
+	    }
+	  break;
+
+	case '.':
+	  laststart = b;
+	  BUFPUSH (anychar);
+	  break;
+
+        case '[':
+          if (p == pend)
+            goto invalid_pattern;
+	  while (b - bufp->buffer
+		 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
+	    EXTEND_BUFFER;
+
+	  laststart = b;
+	  if (*p == '^')
+	    {
+              BUFPUSH (charset_not); 
+              p++;
+            }
+	  else
+	    BUFPUSH (charset);
+	  p0 = p;
+
+	  BUFPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+	  /* Clear the whole map */
+	  memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
+          
+	  if ((obscure_syntax & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
+            SET_LIST_BIT ('\n');
+
+
+	  /* Read in characters and ranges, setting map bits.  */
+	  while (1)
+	    {
+	      /* Don't translate while fetching, in case it's a range bound.
+		 When we set the bit for the character, we translate it.  */
+	      PATFETCH_RAW (c);
+
+	      /* If set, \ escapes characters when inside [...].  */
+	      if ((obscure_syntax & RE_AWK_CLASS_HACK) && c == '\\')
+	        {
+	          PATFETCH(c1);
+                  SET_LIST_BIT (c1);
+	          continue;
+	        }
+              if (c == ']')
+                {
+                  if (p == p0 + 1)
+                    {
+		      /* If this is an empty bracket expression.  */
+                      if ((obscure_syntax & RE_NO_EMPTY_BRACKETS) 
+                          && p == pend)
+                        goto invalid_pattern;
+                    }
+                  else 
+		    /* Stop if this isn't merely a ] inside a bracket
+                       expression, but rather the end of a bracket
+                       expression.  */
+                    break;
+                }
+              /* Get a range.  */
+              if (p[0] == '-' && p[1] != ']')
+		{
+                  PATFETCH (c1);
+		  /* Don't translate the range bounds while fetching them.  */
+		  PATFETCH_RAW (c1);
+                  
+		  if ((obscure_syntax & RE_NO_EMPTY_RANGES) && c > c1)
+                    goto invalid_pattern;
+                    
+		  if ((obscure_syntax & RE_NO_HYPHEN_RANGE_END) 
+                      && c1 == '-' && *p != ']')
+                    goto invalid_pattern;
+                    
+                  while (c <= c1)
+		    {
+		      /* Translate each char that's in the range.  */
+		      if (translate)
+			SET_LIST_BIT (translate[c]);
+		      else
+			SET_LIST_BIT (c);
+                      c++;
+		    }
+                }
+	      else if ((obscure_syntax & RE_CHAR_CLASSES)
+			&&  c == '[' && p[0] == ':')
+                {
+		  /* Longest valid character class word has six characters.  */
+                  char str[CHAR_CLASS_MAX_LENGTH];
+		  PATFETCH (c);
+		  c1 = 0;
+		  /* If no ] at end.  */
+                  if (p == pend)
+                    goto invalid_pattern;
+		  while (1)
+		    {
+		      /* Don't translate the ``character class'' characters.  */
+                      PATFETCH_RAW (c);
+		      if (c == ':' || c == ']' || p == pend
+                          || c1 == CHAR_CLASS_MAX_LENGTH)
+		        break;
+		      str[c1++] = c;
+		    }
+		  str[c1] = '\0';
+		  if (p == pend 	
+		      || c == ']'	/* End of the bracket expression.  */
+                      || p[0] != ']'
+		      || p + 1 == pend
+                      || (strcmp (str, "alpha") != 0 
+                          && strcmp (str, "upper") != 0
+			  && strcmp (str, "lower") != 0 
+                          && strcmp (str, "digit") != 0
+			  && strcmp (str, "alnum") != 0 
+                          && strcmp (str, "xdigit") != 0
+			  && strcmp (str, "space") != 0 
+                          && strcmp (str, "print") != 0
+			  && strcmp (str, "punct") != 0 
+                          && strcmp (str, "graph") != 0
+			  && strcmp (str, "cntrl") != 0))
+		    {
+		       /* Undo the ending character, the letters, and leave 
+                          the leading : and [ (but set bits for them).  */
+                      c1++;
+		      while (c1--)    
+			PATUNFETCH;
+		      SET_LIST_BIT ('[');
+		      SET_LIST_BIT (':');
+	            }
+                  else
+                    {
+                      /* The ] at the end of the character class.  */
+                      PATFETCH (c);					
+                      if (c != ']')
+                        goto invalid_pattern;
+		      for (c = 0; c < (1 << BYTEWIDTH); c++)
+			{
+			  if ((strcmp (str, "alpha") == 0  && isalpha (c))
+			       || (strcmp (str, "upper") == 0  && isupper (c))
+			       || (strcmp (str, "lower") == 0  && islower (c))
+			       || (strcmp (str, "digit") == 0  && isdigit (c))
+			       || (strcmp (str, "alnum") == 0  && isalnum (c))
+			       || (strcmp (str, "xdigit") == 0  && isxdigit (c))
+			       || (strcmp (str, "space") == 0  && isspace (c))
+			       || (strcmp (str, "print") == 0  && isprint (c))
+			       || (strcmp (str, "punct") == 0  && ispunct (c))
+			       || (strcmp (str, "graph") == 0  && isgraph (c))
+			       || (strcmp (str, "cntrl") == 0  && iscntrl (c)))
+			    SET_LIST_BIT (c);
+			}
+		    }
+                }
+              else if (translate)
+		SET_LIST_BIT (translate[c]);
+	      else
+                SET_LIST_BIT (c);
+	    }
+
+          /* Discard any character set/class bitmap bytes that are all
+             0 at the end of the map. Decrement the map-length byte too.  */
+          while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 
+            b[-1]--; 
+          b += b[-1];
+          break;
+
+	case '(':
+	  if (! (obscure_syntax & RE_NO_BK_PARENS))
+	    goto normal_char;
+	  else
+	    goto handle_open;
+
+	case ')':
+	  if (! (obscure_syntax & RE_NO_BK_PARENS))
+	    goto normal_char;
+	  else
+	    goto handle_close;
+
+        case '\n':
+	  if (! (obscure_syntax & RE_NEWLINE_OR))
+	    goto normal_char;
+	  else
+	    goto handle_bar;
+
+	case '|':
+	  if ((obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+              && (! laststart  ||  p == pend))
+	    goto invalid_pattern;
+          else if (! (obscure_syntax & RE_NO_BK_VBAR))
+	    goto normal_char;
+	  else
+	    goto handle_bar;
+
+	case '{':
+           if (! ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+                  && (obscure_syntax & RE_INTERVALS)))
+             goto normal_char;
+           else
+             goto handle_interval;
+             
+        case '\\':
+	  if (p == pend) goto invalid_pattern;
+	  PATFETCH_RAW (c);
+	  switch (c)
+	    {
+	    case '(':
+	      if (obscure_syntax & RE_NO_BK_PARENS)
+		goto normal_backsl;
+	    handle_open:
+	      if (stackp == stacke) goto nesting_too_deep;
+
+              /* Laststart should point to the start_memory that we are about
+                 to push (unless the pattern has RE_NREGS or more ('s).  */
+              *stackp++ = b - bufp->buffer;    
+	      if (regnum < RE_NREGS)
+	        {
+		  BUFPUSH (start_memory);
+		  BUFPUSH (regnum);
+	        }
+	      *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
+	      *stackp++ = regnum++;
+	      *stackp++ = begalt - bufp->buffer;
+	      fixup_jump = 0;
+	      laststart = 0;
+	      begalt = b;
+	      break;
+
+	    case ')':
+	      if (obscure_syntax & RE_NO_BK_PARENS)
+		goto normal_backsl;
+	    handle_close:
+	      if (stackp == stackb) goto unmatched_close;
+	      begalt = *--stackp + bufp->buffer;
+	      if (fixup_jump)
+		store_jump (fixup_jump, jump, b);
+	      if (stackp[-1] < RE_NREGS)
+		{
+		  BUFPUSH (stop_memory);
+		  BUFPUSH (stackp[-1]);
+		}
+	      stackp -= 2;
+              fixup_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
+              laststart = *--stackp + bufp->buffer;
+	      break;
+
+	    case '|':
+              if ((obscure_syntax & RE_LIMITED_OPS)
+	          || (obscure_syntax & RE_NO_BK_VBAR))
+		goto normal_backsl;
+	    handle_bar:
+              if (obscure_syntax & RE_LIMITED_OPS)
+                goto normal_char;
+	      /* Insert before the previous alternative a jump which
+                 jumps to this alternative if the former fails.  */
+              GET_BUFFER_SPACE (6);
+              insert_jump (on_failure_jump, begalt, b + 6, b);
+	      pending_exact = 0;
+	      b += 3;
+	      /* The alternative before the previous alternative has a
+                 jump after it which gets executed if it gets matched.
+                 Adjust that jump so it will jump to the previous
+                 alternative's analogous jump (put in below, which in
+                 turn will jump to the next (if any) alternative's such
+                 jump, etc.).  The last such jump jumps to the correct
+                 final destination.  */
+              if (fixup_jump)
+		store_jump (fixup_jump, jump, b);
+                
+	      /* Leave space for a jump after previous alternative---to be 
+                 filled in later.  */
+              fixup_jump = b;
+              b += 3;
+
+              laststart = 0;
+	      begalt = b;
+	      break;
+
+            case '{': 
+              if (! (obscure_syntax & RE_INTERVALS)
+		  /* Let \{ be a literal.  */
+                  || ((obscure_syntax & RE_INTERVALS)
+                      && (obscure_syntax & RE_NO_BK_CURLY_BRACES))
+		  /* If it's the string "\{".  */
+		  || (p - 2 == pattern  &&  p == pend))
+                goto normal_backsl;
+            handle_interval:
+	      beg_interval = p - 1;		/* The {.  */
+              /* If there is no previous pattern, this isn't an interval.  */
+	      if (!laststart)
+	        {
+                  if (obscure_syntax & RE_CONTEXTUAL_INVALID_OPS)
+		    goto invalid_pattern;
+                  else
+                    goto normal_backsl;
+                }
+              /* It also isn't an interval if not preceded by an re
+                 matching a single character or subexpression, or if
+                 the current type of intervals can't handle back
+                 references and the previous thing is a back reference.  */
+              if (! (*laststart == anychar
+		     || *laststart == charset
+		     || *laststart == charset_not
+		     || *laststart == start_memory
+		     || (*laststart == exactn  &&  laststart[1] == 1)
+		     || (! (obscure_syntax & RE_NO_BK_REFS)
+                         && *laststart == duplicate)))
+                {
+                  if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
+                    goto normal_char;
+                    
+		  /* Posix extended syntax is handled in previous
+                     statement; this is for Posix basic syntax.  */
+                  if (obscure_syntax & RE_INTERVALS)
+                    goto invalid_pattern;
+                    
+                  goto normal_backsl;
+		}
+              lower_bound = -1;			/* So can see if are set.  */
+	      upper_bound = -1;
+              GET_UNSIGNED_NUMBER (lower_bound);
+	      if (c == ',')
+		{
+		  GET_UNSIGNED_NUMBER (upper_bound);
+		  if (upper_bound < 0)
+		    upper_bound = RE_DUP_MAX;
+		}
+	      if (upper_bound < 0)
+		upper_bound = lower_bound;
+              if (! (obscure_syntax & RE_NO_BK_CURLY_BRACES)) 
+                {
+                  if (c != '\\')
+                    goto invalid_pattern;
+                  PATFETCH (c);
+                }
+	      if (c != '}' || lower_bound < 0 || upper_bound > RE_DUP_MAX
+		  || lower_bound > upper_bound 
+                  || ((obscure_syntax & RE_NO_BK_CURLY_BRACES) 
+		      && p != pend  && *p == '{')) 
+	        {
+		  if (obscure_syntax & RE_NO_BK_CURLY_BRACES)
+                    goto unfetch_interval;
+                  else
+                    goto invalid_pattern;
+		}
+
+	      /* If upper_bound is zero, don't want to succeed at all; 
+ 		 jump from laststart to b + 3, which will be the end of
+                 the buffer after this jump is inserted.  */
+                 
+               if (upper_bound == 0)
+                 {
+                   GET_BUFFER_SPACE (3);
+                   insert_jump (jump, laststart, b + 3, b);
+                   b += 3;
+                 }
+
+               /* Otherwise, after lower_bound number of succeeds, jump
+                  to after the jump_n which will be inserted at the end
+                  of the buffer, and insert that jump_n.  */
+               else 
+		 { /* Set to 5 if only one repetition is allowed and
+	              hence no jump_n is inserted at the current end of
+                      the buffer; then only space for the succeed_n is
+                      needed.  Otherwise, need space for both the
+                      succeed_n and the jump_n.  */
+                      
+                   unsigned slots_needed = upper_bound == 1 ? 5 : 10;
+                     
+                   GET_BUFFER_SPACE (slots_needed);
+                   /* Initialize the succeed_n to n, even though it will
+                      be set by its attendant set_number_at, because
+                      re_compile_fastmap will need to know it.  Jump to
+                      what the end of buffer will be after inserting
+                      this succeed_n and possibly appending a jump_n.  */
+                   insert_jump_n (succeed_n, laststart, b + slots_needed, 
+		                  b, lower_bound);
+                   b += 5; 	/* Just increment for the succeed_n here.  */
+
+		  /* More than one repetition is allowed, so put in at
+		     the end of the buffer a backward jump from b to the
+                     succeed_n we put in above.  By the time we've gotten
+                     to this jump when matching, we'll have matched once
+                     already, so jump back only upper_bound - 1 times.  */
+
+                   if (upper_bound > 1)
+                     {
+                       store_jump_n (b, jump_n, laststart, upper_bound - 1);
+                       b += 5;
+                       /* When hit this when matching, reset the
+                          preceding jump_n's n to upper_bound - 1.  */
+                       BUFPUSH (set_number_at);
+		       GET_BUFFER_SPACE (2);
+                       STORE_NUMBER_AND_INCR (b, -5);
+                       STORE_NUMBER_AND_INCR (b, upper_bound - 1);
+                     }
+		   /* When hit this when matching, set the succeed_n's n.  */
+                   GET_BUFFER_SPACE (5);
+		   insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
+                   b += 5;
+                 }
+              pending_exact = 0;
+	      beg_interval = 0;
+              break;
+
+
+            unfetch_interval:
+	      /* If an invalid interval, match the characters as literals.  */
+	       if (beg_interval)
+                 p = beg_interval;
+  	       else
+                 {
+                   fprintf (stderr, 
+		      "regex: no interval beginning to which to backtrack.\n");
+		   exit (1);
+                 }
+                 
+               beg_interval = 0;
+               PATFETCH (c);		/* normal_char expects char in `c'.  */
+	       goto normal_char;
+	       break;
+
+#ifdef emacs
+	    case '=':
+	      BUFPUSH (at_dot);
+	      break;
+
+	    case 's':	
+	      laststart = b;
+	      BUFPUSH (syntaxspec);
+	      PATFETCH (c);
+	      BUFPUSH (syntax_spec_code[c]);
+	      break;
+
+	    case 'S':
+	      laststart = b;
+	      BUFPUSH (notsyntaxspec);
+	      PATFETCH (c);
+	      BUFPUSH (syntax_spec_code[c]);
+	      break;
+#endif /* emacs */
+
+	    case 'w':
+	      laststart = b;
+	      BUFPUSH (wordchar);
+	      break;
+
+	    case 'W':
+	      laststart = b;
+	      BUFPUSH (notwordchar);
+	      break;
+
+	    case '<':
+	      BUFPUSH (wordbeg);
+	      break;
+
+	    case '>':
+	      BUFPUSH (wordend);
+	      break;
+
+	    case 'b':
+	      BUFPUSH (wordbound);
+	      break;
+
+	    case 'B':
+	      BUFPUSH (notwordbound);
+	      break;
+
+	    case '`':
+	      BUFPUSH (begbuf);
+	      break;
+
+	    case '\'':
+	      BUFPUSH (endbuf);
+	      break;
+
+	    case '1':
+	    case '2':
+	    case '3':
+	    case '4':
+	    case '5':
+	    case '6':
+	    case '7':
+	    case '8':
+	    case '9':
+	      if (obscure_syntax & RE_NO_BK_REFS)
+                goto normal_char;
+              c1 = c - '0';
+	      if (c1 >= regnum)
+		{
+  		  if (obscure_syntax & RE_NO_EMPTY_BK_REF)
+                    goto invalid_pattern;
+                  else
+                    goto normal_char;
+                }
+              /* Can't back reference to a subexpression if inside of it.  */
+              for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
+ 		if (*stackt == c1)
+		  goto normal_char;
+	      laststart = b;
+	      BUFPUSH (duplicate);
+	      BUFPUSH (c1);
+	      break;
+
+	    case '+':
+	    case '?':
+	      if (obscure_syntax & RE_BK_PLUS_QM)
+		goto handle_plus;
+	      else
+                goto normal_backsl;
+              break;
+
+            default:
+	    normal_backsl:
+	      /* You might think it would be useful for \ to mean
+		 not to translate; but if we don't translate it
+		 it will never match anything.  */
+	      if (translate) c = translate[c];
+	      goto normal_char;
+	    }
+	  break;
+
+	default:
+	normal_char:		/* Expects the character in `c'.  */
+	  if (!pending_exact || pending_exact + *pending_exact + 1 != b
+	      || *pending_exact == 0177 || *p == '*' || *p == '^'
+	      || ((obscure_syntax & RE_BK_PLUS_QM)
+		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+		  : (*p == '+' || *p == '?'))
+	      || ((obscure_syntax & RE_INTERVALS) 
+                  && ((obscure_syntax & RE_NO_BK_CURLY_BRACES)
+		      ? *p == '{'
+                      : (p[0] == '\\' && p[1] == '{'))))
+	    {
+	      laststart = b;
+	      BUFPUSH (exactn);
+	      pending_exact = b;
+	      BUFPUSH (0);
+	    }
+	  BUFPUSH (c);
+	  (*pending_exact)++;
+	}
+    }
+
+  if (fixup_jump)
+    store_jump (fixup_jump, jump, b);
+
+  if (stackp != stackb) goto unmatched_open;
+
+  bufp->used = b - bufp->buffer;
+  return 0;
+
+ invalid_pattern:
+  return "Invalid regular expression";
+
+ unmatched_open:
+  return "Unmatched \\(";
+
+ unmatched_close:
+  return "Unmatched \\)";
+
+ end_of_pattern:
+  return "Premature end of regular expression";
+
+ nesting_too_deep:
+  return "Nesting too deep";
+
+ too_big:
+  return "Regular expression too big";
+
+ memory_exhausted:
+  return "Memory exhausted";
+}
+
+
+/* Store a jump of the form <OPCODE> <relative address>.
+   Store in the location FROM a jump operation to jump to relative
+   address FROM - TO.  OPCODE is the opcode to store.  */
+
+static void
+store_jump (from, opcode, to)
+     char *from, *to;
+     int opcode;
+{
+  from[0] = (char)opcode;
+  STORE_NUMBER(from + 1, to - (from + 3));
+}
+
+
+/* Open up space before char FROM, and insert there a jump to TO.
+   CURRENT_END gives the end of the storage not in use, so we know 
+   how much data to copy up. OP is the opcode of the jump to insert.
+
+   If you call this function, you must zero out pending_exact.  */
+
+static void
+insert_jump (op, from, to, current_end)
+     int op;
+     char *from, *to, *current_end;
+{
+  register char *pfrom = current_end;		/* Copy from here...  */
+  register char *pto = current_end + 3;		/* ...to here.  */
+
+  while (pfrom != from)			       
+    *--pto = *--pfrom;
+  store_jump (from, op, to);
+}
+
+
+/* Store a jump of the form <opcode> <relative address> <n> .
+
+   Store in the location FROM a jump operation to jump to relative
+   address FROM - TO.  OPCODE is the opcode to store, N is a number the
+   jump uses, say, to decide how many times to jump.
+   
+   If you call this function, you must zero out pending_exact.  */
+
+static void
+store_jump_n (from, opcode, to, n)
+     char *from, *to;
+     int opcode;
+     unsigned n;
+{
+  from[0] = (char)opcode;
+  STORE_NUMBER (from + 1, to - (from + 3));
+  STORE_NUMBER (from + 3, n);
+}
+
+
+/* Similar to insert_jump, but handles a jump which needs an extra
+   number to handle minimum and maximum cases.  Open up space at
+   location FROM, and insert there a jump to TO.  CURRENT_END gives the
+   end of the storage in use, so we know how much data to copy up. OP is
+   the opcode of the jump to insert.
+
+   If you call this function, you must zero out pending_exact.  */
+
+static void
+insert_jump_n (op, from, to, current_end, n)
+     int op;
+     char *from, *to, *current_end;
+     unsigned n;
+{
+  register char *pfrom = current_end;		/* Copy from here...  */
+  register char *pto = current_end + 5;		/* ...to here.  */
+
+  while (pfrom != from)			       
+    *--pto = *--pfrom;
+  store_jump_n (from, op, to, n);
+}
+
+
+/* Open up space at location THERE, and insert operation OP followed by
+   NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so
+   we know how much data to copy up.
+
+   If you call this function, you must zero out pending_exact.  */
+
+static void
+insert_op_2 (op, there, current_end, num_1, num_2)
+     int op;
+     char *there, *current_end;
+     int num_1, num_2;
+{
+  register char *pfrom = current_end;		/* Copy from here...  */
+  register char *pto = current_end + 5;		/* ...to here.  */
+
+  while (pfrom != there)			       
+    *--pto = *--pfrom;
+  
+  there[0] = (char)op;
+  STORE_NUMBER (there + 1, num_1);
+  STORE_NUMBER (there + 3, num_2);
+}
+
+
+
+/* Given a pattern, compute a fastmap from it.  The fastmap records
+   which of the (1 << BYTEWIDTH) possible characters can start a string
+   that matches the pattern.  This fastmap is used by re_search to skip
+   quickly over totally implausible text.
+
+   The caller must supply the address of a (1 << BYTEWIDTH)-byte data 
+   area as bufp->fastmap.
+   The other components of bufp describe the pattern to be used.  */
+
+void
+re_compile_fastmap (bufp)
+     struct re_pattern_buffer *bufp;
+{
+  unsigned char *pattern = (unsigned char *) bufp->buffer;
+  int size = bufp->used;
+  register char *fastmap = bufp->fastmap;
+  register unsigned char *p = pattern;
+  register unsigned char *pend = pattern + size;
+  register int j, k;
+  unsigned char *translate = (unsigned char *) bufp->translate;
+  unsigned is_a_succeed_n;
+
+#ifndef NO_ALLOCA
+  unsigned char *stackb[NFAILURES];
+  unsigned char **stackp = stackb;
+
+#else
+  unsigned char **stackb;
+  unsigned char **stackp;
+  stackb = (unsigned char **) malloc (NFAILURES * sizeof (unsigned char *));
+  stackp = stackb;
+
+#endif /* NO_ALLOCA */
+  memset (fastmap, 0, (1 << BYTEWIDTH));
+  bufp->fastmap_accurate = 1;
+  bufp->can_be_null = 0;
+      
+  while (p)
+    {
+      is_a_succeed_n = 0;
+      if (p == pend)
+	{
+	  bufp->can_be_null = 1;
+	  break;
+	}
+#ifdef SWITCH_ENUM_BUG
+      switch ((int) ((enum regexpcode) *p++))
+#else
+      switch ((enum regexpcode) *p++)
+#endif
+	{
+	case exactn:
+	  if (translate)
+	    fastmap[translate[p[1]]] = 1;
+	  else
+	    fastmap[p[1]] = 1;
+	  break;
+
+        case begline:
+        case before_dot:
+	case at_dot:
+	case after_dot:
+	case begbuf:
+	case endbuf:
+	case wordbound:
+	case notwordbound:
+	case wordbeg:
+	case wordend:
+          continue;
+
+	case endline:
+	  if (translate)
+	    fastmap[translate['\n']] = 1;
+	  else
+	    fastmap['\n'] = 1;
+            
+	  if (bufp->can_be_null != 1)
+	    bufp->can_be_null = 2;
+	  break;
+
+	case jump_n:
+        case finalize_jump:
+	case maybe_finalize_jump:
+	case jump:
+	case dummy_failure_jump:
+          EXTRACT_NUMBER_AND_INCR (j, p);
+	  p += j;	
+	  if (j > 0)
+	    continue;
+          /* Jump backward reached implies we just went through
+	     the body of a loop and matched nothing.
+	     Opcode jumped to should be an on_failure_jump.
+	     Just treat it like an ordinary jump.
+	     For a * loop, it has pushed its failure point already;
+	     If so, discard that as redundant.  */
+
+          if ((enum regexpcode) *p != on_failure_jump
+	      && (enum regexpcode) *p != succeed_n)
+	    continue;
+          p++;
+          EXTRACT_NUMBER_AND_INCR (j, p);
+          p += j;		
+          if (stackp != stackb && *stackp == p)
+            stackp--;
+          continue;
+	  
+        case on_failure_jump:
+	handle_on_failure_jump:
+          EXTRACT_NUMBER_AND_INCR (j, p);
+          *++stackp = p + j;
+	  if (is_a_succeed_n)
+            EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
+	  continue;
+
+	case succeed_n:
+	  is_a_succeed_n = 1;
+          /* Get to the number of times to succeed.  */
+          p += 2;		
+	  /* Increment p past the n for when k != 0.  */
+          EXTRACT_NUMBER_AND_INCR (k, p);
+          if (k == 0)
+	    {
+              p -= 4;
+              goto handle_on_failure_jump;
+            }
+          continue;
+          
+	case set_number_at:
+          p += 4;
+          continue;
+
+        case start_memory:
+	case stop_memory:
+	  p++;
+	  continue;
+
+	case duplicate:
+	  bufp->can_be_null = 1;
+	  fastmap['\n'] = 1;
+	case anychar:
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if (j != '\n')
+	      fastmap[j] = 1;
+	  if (bufp->can_be_null)
+	    {
+	      FREE_AND_RETURN_VOID(stackb);
+	    }
+	  /* Don't return; check the alternative paths
+	     so we can set can_be_null if appropriate.  */
+	  break;
+
+	case wordchar:
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if (SYNTAX (j) == Sword)
+	      fastmap[j] = 1;
+	  break;
+
+	case notwordchar:
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if (SYNTAX (j) != Sword)
+	      fastmap[j] = 1;
+	  break;
+
+#ifdef emacs
+	case syntaxspec:
+	  k = *p++;
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if (SYNTAX (j) == (enum syntaxcode) k)
+	      fastmap[j] = 1;
+	  break;
+
+	case notsyntaxspec:
+	  k = *p++;
+	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	    if (SYNTAX (j) != (enum syntaxcode) k)
+	      fastmap[j] = 1;
+	  break;
+
+#else /* not emacs */
+	case syntaxspec:
+	case notsyntaxspec:
+	  break;
+#endif /* not emacs */
+
+	case charset:
+	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+	      {
+		if (translate)
+		  fastmap[translate[j]] = 1;
+		else
+		  fastmap[j] = 1;
+	      }
+	  break;
+
+	case charset_not:
+	  /* Chars beyond end of map must be allowed */
+	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+	    if (translate)
+	      fastmap[translate[j]] = 1;
+	    else
+	      fastmap[j] = 1;
+
+	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+	      {
+		if (translate)
+		  fastmap[translate[j]] = 1;
+		else
+		  fastmap[j] = 1;
+	      }
+	  break;
+
+	case unused:	/* pacify gcc -Wall */
+	  break;
+	}
+
+      /* Get here means we have successfully found the possible starting
+         characters of one path of the pattern.  We need not follow this
+         path any farther.  Instead, look at the next alternative
+         remembered in the stack.  */
+   if (stackp != stackb)
+	p = *stackp--;
+      else
+	break;
+    }
+   FREE_AND_RETURN_VOID(stackb);
+}
+
+
+
+/* Like re_search_2, below, but only one string is specified, and
+   doesn't let you say where to stop matching. */
+
+int
+re_search (pbufp, string, size, startpos, range, regs)
+     struct re_pattern_buffer *pbufp;
+     char *string;
+     int size, startpos, range;
+     struct re_registers *regs;
+{
+  return re_search_2 (pbufp, (char *) 0, 0, string, size, startpos, range, 
+		      regs, size);
+}
+
+
+/* Using the compiled pattern in PBUFP->buffer, first tries to match the
+   virtual concatenation of STRING1 and STRING2, starting first at index
+   STARTPOS, then at STARTPOS + 1, and so on.  RANGE is the number of
+   places to try before giving up.  If RANGE is negative, it searches
+   backwards, i.e., the starting positions tried are STARTPOS, STARTPOS
+   - 1, etc.  STRING1 and STRING2 are of SIZE1 and SIZE2, respectively.
+   In REGS, return the indices of the virtual concatenation of STRING1
+   and STRING2 that matched the entire PBUFP->buffer and its contained
+   subexpressions.  Do not consider matching one past the index MSTOP in
+   the virtual concatenation of STRING1 and STRING2.
+
+   The value returned is the position in the strings at which the match
+   was found, or -1 if no match was found, or -2 if error (such as
+   failure stack overflow).  */
+
+int
+re_search_2 (pbufp, string1, size1, string2, size2, startpos, range,
+	     regs, mstop)
+     struct re_pattern_buffer *pbufp;
+     char *string1, *string2;
+     int size1, size2;
+     int startpos;
+     register int range;
+     struct re_registers *regs;
+     int mstop;
+{
+  register char *fastmap = pbufp->fastmap;
+  register unsigned char *translate = (unsigned char *) pbufp->translate;
+  int total_size = size1 + size2;
+  int endpos = startpos + range;
+  int val;
+
+  /* Check for out-of-range starting position.  */
+  if (startpos < 0  ||  startpos > total_size)
+    return -1;
+    
+  /* Fix up range if it would eventually take startpos outside of the
+     virtual concatenation of string1 and string2.  */
+  if (endpos < -1)
+    range = -1 - startpos;
+  else if (endpos > total_size)
+    range = total_size - startpos;
+
+  /* Update the fastmap now if not correct already.  */
+  if (fastmap && !pbufp->fastmap_accurate)
+    re_compile_fastmap (pbufp);
+  
+  /* If the search isn't to be a backwards one, don't waste time in a
+     long search for a pattern that says it is anchored.  */
+  if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
+      && range > 0)
+    {
+      if (startpos > 0)
+	return -1;
+      else
+	range = 1;
+    }
+
+  while (1)
+    { 
+      /* If a fastmap is supplied, skip quickly over characters that
+         cannot possibly be the start of a match.  Note, however, that
+         if the pattern can possibly match the null string, we must
+         test it at each starting point so that we take the first null
+         string we get.  */
+
+      if (fastmap && startpos < total_size && pbufp->can_be_null != 1)
+	{
+	  if (range > 0)	/* Searching forwards.  */
+	    {
+	      register int lim = 0;
+	      register unsigned char *p;
+	      int irange = range;
+	      if (startpos < size1 && startpos + range >= size1)
+		lim = range - (size1 - startpos);
+
+	      p = ((unsigned char *)
+		   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
+
+              while (range > lim && !fastmap[translate 
+                                             ? translate[*p++]
+                                             : *p++])
+		    range--;
+	      startpos += irange - range;
+	    }
+	  else				/* Searching backwards.  */
+	    {
+	      register unsigned char c;
+
+              if (string1 == 0 || startpos >= size1)
+		c = string2[startpos - size1];
+	      else 
+		c = string1[startpos];
+
+              c &= 0xff;
+	      if (translate ? !fastmap[translate[c]] : !fastmap[c])
+		goto advance;
+	    }
+	}
+
+      if (range >= 0 && startpos == total_size
+	  && fastmap && pbufp->can_be_null == 0)
+	return -1;
+
+      val = re_match_2 (pbufp, string1, size1, string2, size2, startpos,
+			regs, mstop);
+      if (val >= 0)
+	return startpos;
+      if (val == -2)
+	return -2;
+
+#ifndef NO_ALLOCA
+#ifdef C_ALLOCA
+      alloca (0);
+#endif /* C_ALLOCA */
+
+#endif /* NO_ALLOCA */
+    advance:
+      if (!range) 
+        break;
+      else if (range > 0) 
+        {
+          range--; 
+          startpos++;
+        }
+      else
+        {
+          range++; 
+          startpos--;
+        }
+    }
+  return -1;
+}
+
+
+
+#ifndef emacs   /* emacs never uses this.  */
+int
+re_match (pbufp, string, size, pos, regs)
+     struct re_pattern_buffer *pbufp;
+     char *string;
+     int size, pos;
+     struct re_registers *regs;
+{
+  return re_match_2 (pbufp, (char *) 0, 0, string, size, pos, regs, size); 
+}
+#endif /* not emacs */
+
+
+/* The following are used for re_match_2, defined below:  */
+
+/* Roughly the maximum number of failure points on the stack.  Would be
+   exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed.  */
+   
+int re_max_failures = 2000;
+
+/* Routine used by re_match_2.  */
+/* static int memcmp_translate (); *//* already declared */
+
+
+/* Structure and accessing macros used in re_match_2:  */
+
+struct register_info
+{
+  unsigned is_active : 1;
+  unsigned matched_something : 1;
+};
+
+#define IS_ACTIVE(R)  ((R).is_active)
+#define MATCHED_SOMETHING(R)  ((R).matched_something)
+
+
+/* Macros used by re_match_2:  */
+
+
+/* I.e., regstart, regend, and reg_info.  */
+
+#define NUM_REG_ITEMS  3
+
+/* We push at most this many things on the stack whenever we
+   fail.  The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
+   arguments to the PUSH_FAILURE_POINT macro.  */
+
+#define MAX_NUM_FAILURE_ITEMS   (RE_NREGS * NUM_REG_ITEMS + 2)
+
+
+/* We push this many things on the stack whenever we fail.  */
+
+#define NUM_FAILURE_ITEMS  (last_used_reg * NUM_REG_ITEMS + 2)
+
+
+/* This pushes most of the information about the current state we will want
+   if we ever fail back to it.  */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place)			\
+  {									\
+    long last_used_reg, this_reg;					\
+									\
+    /* Find out how many registers are active or have been matched.	\
+       (Aside from register zero, which is only set at the end.)  */	\
+    for (last_used_reg = RE_NREGS - 1; last_used_reg > 0; last_used_reg--)\
+      if (regstart[last_used_reg] != (unsigned char *)(-1L))		\
+        break;								\
+									\
+    if (stacke - stackp < NUM_FAILURE_ITEMS)				\
+      {									\
+	unsigned char **stackx;						\
+	unsigned int len = stacke - stackb;				\
+	if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS)		\
+	  {								\
+	    FREE_AND_RETURN(stackb,(-2));				\
+	  }								\
+									\
+        /* Roughly double the size of the stack.  */			\
+        stackx = DOUBLE_STACK(stackx,stackb,len);			\
+	/* Rearrange the pointers. */					\
+	stackp = stackx + (stackp - stackb);				\
+	stackb = stackx;						\
+	stacke = stackb + 2 * len;					\
+      }									\
+									\
+    /* Now push the info for each of those registers.  */		\
+    for (this_reg = 1; this_reg <= last_used_reg; this_reg++)		\
+      {									\
+        *stackp++ = regstart[this_reg];					\
+        *stackp++ = regend[this_reg];					\
+        *stackp++ = (unsigned char *) &reg_info[this_reg];		\
+      }									\
+									\
+    /* Push how many registers we saved.  */				\
+    *stackp++ = (unsigned char *) last_used_reg;			\
+									\
+    *stackp++ = pattern_place;                                          \
+    *stackp++ = string_place;                                           \
+  }
+  
+
+/* This pops what PUSH_FAILURE_POINT pushes.  */
+
+#define POP_FAILURE_POINT()						\
+  {									\
+    int temp;								\
+    stackp -= 2;		/* Remove failure points.  */		\
+    temp = (int) *--stackp;	/* How many regs pushed.  */	        \
+    temp *= NUM_REG_ITEMS;	/* How much to take off the stack.  */	\
+    stackp -= temp; 		/* Remove the register info.  */	\
+  }
+
+
+#define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
+
+/* Is true if there is a first string and if PTR is pointing anywhere
+   inside it or just past the end.  */
+   
+#define IS_IN_FIRST_STRING(ptr) 					\
+	(size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* Call before fetching a character with *d.  This switches over to
+   string2 if necessary.  */
+
+#define PREFETCH							\
+ while (d == dend)						    	\
+  {									\
+    /* end of string2 => fail.  */					\
+    if (dend == end_match_2) 						\
+      goto fail;							\
+    /* end of string1 => advance to string2.  */ 			\
+    d = string2;						        \
+    dend = end_match_2;							\
+  }
+
+
+/* Call this when have matched something; it sets `matched' flags for the
+   registers corresponding to the subexpressions of which we currently
+   are inside.  */
+#define SET_REGS_MATCHED 						\
+  { unsigned this_reg; 							\
+    for (this_reg = 0; this_reg < RE_NREGS; this_reg++) 		\
+      { 								\
+        if (IS_ACTIVE(reg_info[this_reg]))				\
+          MATCHED_SOMETHING(reg_info[this_reg]) = 1;			\
+        else								\
+          MATCHED_SOMETHING(reg_info[this_reg]) = 0;			\
+      } 								\
+  }
+
+/* Test if at very beginning or at very end of the virtual concatenation
+   of string1 and string2.  If there is only one string, we've put it in
+   string2.  */
+
+#define AT_STRINGS_BEG  (d == (size1 ? string1 : string2)  ||  !size2)
+#define AT_STRINGS_END  (d == end2)	
+
+#define AT_WORD_BOUNDARY						\
+  (AT_STRINGS_BEG || AT_STRINGS_END || IS_A_LETTER (d - 1) != IS_A_LETTER (d))
+
+/* We have two special cases to check for: 
+     1) if we're past the end of string1, we have to look at the first
+        character in string2;
+     2) if we're before the beginning of string2, we have to look at the
+        last character in string1; we assume there is a string1, so use
+        this in conjunction with AT_STRINGS_BEG.  */
+#define IS_A_LETTER(d)							\
+  (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
+   == Sword)
+
+
+/* Match the pattern described by PBUFP against the virtual
+   concatenation of STRING1 and STRING2, which are of SIZE1 and SIZE2,
+   respectively.  Start the match at index POS in the virtual
+   concatenation of STRING1 and STRING2.  In REGS, return the indices of
+   the virtual concatenation of STRING1 and STRING2 that matched the
+   entire PBUFP->buffer and its contained subexpressions.  Do not
+   consider matching one past the index MSTOP in the virtual
+   concatenation of STRING1 and STRING2.
+
+   If pbufp->fastmap is nonzero, then it had better be up to date.
+
+   The reason that the data to match are specified as two components
+   which are to be regarded as concatenated is so this function can be
+   used directly on the contents of an Emacs buffer.
+
+   -1 is returned if there is no match.  -2 is returned if there is an
+   error (such as match stack overflow).  Otherwise the value is the
+   length of the substring which was matched.  */
+
+int
+re_match_2 (pbufp, string1_arg, size1, string2_arg, size2, pos, regs, mstop)
+     struct re_pattern_buffer *pbufp;
+     char *string1_arg, *string2_arg;
+     int size1, size2;
+     int pos;
+     struct re_registers *regs;
+     int mstop;
+{
+  register unsigned char *p = (unsigned char *) pbufp->buffer;
+
+  /* Pointer to beyond end of buffer.  */
+  register unsigned char *pend = p + pbufp->used;
+
+  unsigned char *string1 = (unsigned char *) string1_arg;
+  unsigned char *string2 = (unsigned char *) string2_arg;
+  unsigned char *end1;		/* Just past end of first string.  */
+  unsigned char *end2;		/* Just past end of second string.  */
+
+  /* Pointers into string1 and string2, just past the last characters in
+     each to consider matching.  */
+  unsigned char *end_match_1, *end_match_2;
+
+  register unsigned char *d, *dend;
+  register int mcnt;			/* Multipurpose.  */
+  unsigned char *translate = (unsigned char *) pbufp->translate;
+  unsigned is_a_jump_n = 0;
+
+ /* Failure point stack.  Each place that can handle a failure further
+    down the line pushes a failure point on this stack.  It consists of
+    restart, regend, and reg_info for all registers corresponding to the
+    subexpressions we're currently inside, plus the number of such
+    registers, and, finally, two char *'s.  The first char * is where to
+    resume scanning the pattern; the second one is where to resume
+    scanning the strings.  If the latter is zero, the failure point is a
+    ``dummy''; if a failure happens and the failure point is a dummy, it
+    gets discarded and the next next one is tried.  */
+
+#ifndef NO_ALLOCA
+  unsigned char *initial_stack[MAX_NUM_FAILURE_ITEMS * NFAILURES];
+#endif
+  unsigned char **stackb;
+  unsigned char **stackp;
+  unsigned char **stacke;
+
+
+  /* Information on the contents of registers. These are pointers into
+     the input strings; they record just what was matched (on this
+     attempt) by a subexpression part of the pattern, that is, the
+     regnum-th regstart pointer points to where in the pattern we began
+     matching and the regnum-th regend points to right after where we
+     stopped matching the regnum-th subexpression.  (The zeroth register
+     keeps track of what the whole pattern matches.)  */
+     
+  unsigned char *regstart[RE_NREGS];
+  unsigned char *regend[RE_NREGS];
+
+  /* The is_active field of reg_info helps us keep track of which (possibly
+     nested) subexpressions we are currently in. The matched_something
+     field of reg_info[reg_num] helps us tell whether or not we have
+     matched any of the pattern so far this time through the reg_num-th
+     subexpression.  These two fields get reset each time through any
+     loop their register is in.  */
+
+  struct register_info reg_info[RE_NREGS];
+
+
+  /* The following record the register info as found in the above
+     variables when we find a match better than any we've seen before. 
+     This happens as we backtrack through the failure points, which in
+     turn happens only if we have not yet matched the entire string.  */
+
+  unsigned best_regs_set = 0;
+  unsigned char *best_regstart[RE_NREGS];
+  unsigned char *best_regend[RE_NREGS];
+
+  /* Initialize the stack. */
+#ifdef NO_ALLOCA
+  stackb = (unsigned char **) malloc (MAX_NUM_FAILURE_ITEMS * NFAILURES * sizeof (char *));
+#else
+  stackb = initial_stack;
+#endif
+  stackp = stackb;
+  stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
+
+#ifdef DEBUG_REGEX
+  fprintf (stderr, "Entering re_match_2(%s%s)\n", string1_arg, string2_arg);
+#endif
+
+  /* Initialize subexpression text positions to -1 to mark ones that no
+     \( or ( and \) or ) has been seen for. Also set all registers to
+     inactive and mark them as not having matched anything or ever
+     failed.  */
+  for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+    {
+      regstart[mcnt] = regend[mcnt] = (unsigned char *) (-1L);
+      IS_ACTIVE (reg_info[mcnt]) = 0;
+      MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+    }
+  
+  if (regs)
+    for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+      regs->start[mcnt] = regs->end[mcnt] = -1;
+
+  /* Set up pointers to ends of strings.
+     Don't allow the second string to be empty unless both are empty.  */
+  if (size2 == 0)
+    {
+      string2 = string1;
+      size2 = size1;
+      string1 = 0;
+      size1 = 0;
+    }
+  end1 = string1 + size1;
+  end2 = string2 + size2;
+
+  /* Compute where to stop matching, within the two strings.  */
+  if (mstop <= size1)
+    {
+      end_match_1 = string1 + mstop;
+      end_match_2 = string2;
+    }
+  else
+    {
+      end_match_1 = end1;
+      end_match_2 = string2 + mstop - size1;
+    }
+
+  /* `p' scans through the pattern as `d' scans through the data. `dend'
+     is the end of the input string that `d' points within. `d' is
+     advanced into the following input string whenever necessary, but
+     this happens before fetching; therefore, at the beginning of the
+     loop, `d' can be pointing at the end of a string, but it cannot
+     equal string2.  */
+
+  if (size1 != 0 && pos <= size1)
+    d = string1 + pos, dend = end_match_1;
+  else
+    d = string2 + pos - size1, dend = end_match_2;
+
+
+  /* This loops over pattern commands.  It exits by returning from the
+     function if match is complete, or it drops through if match fails
+     at this starting point in the input data.  */
+
+  while (1)
+    {
+#ifdef DEBUG_REGEX
+      fprintf (stderr,
+	       "regex loop(%d):  matching 0x%02d\n",
+	       p - (unsigned char *) pbufp->buffer,
+	       *p);
+#endif
+      is_a_jump_n = 0;
+      /* End of pattern means we might have succeeded.  */
+      if (p == pend)
+	{
+	  /* If not end of string, try backtracking.  Otherwise done.  */
+          if (d != end_match_2)
+	    {
+              if (stackp != stackb)
+                {
+                  /* More failure points to try.  */
+
+                  unsigned in_same_string = 
+        	          	IS_IN_FIRST_STRING (best_regend[0]) 
+	        	        == MATCHING_IN_FIRST_STRING;
+
+                  /* If exceeds best match so far, save it.  */
+                  if (! best_regs_set
+                      || (in_same_string && d > best_regend[0])
+                      || (! in_same_string && ! MATCHING_IN_FIRST_STRING))
+                    {
+                      best_regs_set = 1;
+                      best_regend[0] = d;	/* Never use regstart[0].  */
+                      
+                      for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
+                        {
+                          best_regstart[mcnt] = regstart[mcnt];
+                          best_regend[mcnt] = regend[mcnt];
+                        }
+                    }
+                  goto fail;	       
+                }
+              /* If no failure points, don't restore garbage.  */
+              else if (best_regs_set)   
+                {
+	      restore_best_regs:
+                  /* Restore best match.  */
+                  d = best_regend[0];
+                  
+		  for (mcnt = 0; mcnt < RE_NREGS; mcnt++)
+		    {
+		      regstart[mcnt] = best_regstart[mcnt];
+		      regend[mcnt] = best_regend[mcnt];
+		    }
+                }
+            }
+
+	  /* If caller wants register contents data back, convert it 
+	     to indices.  */
+	  if (regs)
+	    {
+	      regs->start[0] = pos;
+	      if (MATCHING_IN_FIRST_STRING)
+		regs->end[0] = d - string1;
+	      else
+		regs->end[0] = d - string2 + size1;
+	      for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
+		{
+		  if (regend[mcnt] == (unsigned char *)(-1L))
+		    {
+		      regs->start[mcnt] = -1;
+		      regs->end[mcnt] = -1;
+		      continue;
+		    }
+		  if (IS_IN_FIRST_STRING (regstart[mcnt]))
+		    regs->start[mcnt] = regstart[mcnt] - string1;
+		  else
+		    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
+                    
+		  if (IS_IN_FIRST_STRING (regend[mcnt]))
+		    regs->end[mcnt] = regend[mcnt] - string1;
+		  else
+		    regs->end[mcnt] = regend[mcnt] - string2 + size1;
+		}
+	    }
+	  FREE_AND_RETURN(stackb,
+			  (d - pos - (MATCHING_IN_FIRST_STRING ?
+				      string1 :
+				      string2 - size1)));
+        }
+
+      /* Otherwise match next pattern command.  */
+#ifdef SWITCH_ENUM_BUG
+      switch ((int) ((enum regexpcode) *p++))
+#else
+      switch ((enum regexpcode) *p++)
+#endif
+	{
+
+	/* \( [or `(', as appropriate] is represented by start_memory,
+           \) by stop_memory.  Both of those commands are followed by
+           a register number in the next byte.  The text matched
+           within the \( and \) is recorded under that number.  */
+	case start_memory:
+          regstart[*p] = d;
+          IS_ACTIVE (reg_info[*p]) = 1;
+          MATCHED_SOMETHING (reg_info[*p]) = 0;
+          p++;
+          break;
+
+	case stop_memory:
+          regend[*p] = d;
+          IS_ACTIVE (reg_info[*p]) = 0;
+
+          /* If just failed to match something this time around with a sub-
+	     expression that's in a loop, try to force exit from the loop.  */
+          if ((! MATCHED_SOMETHING (reg_info[*p])
+	       || (enum regexpcode) p[-3] == start_memory)
+	      && (p + 1) != pend)              
+            {
+	      register unsigned char *p2 = p + 1;
+              mcnt = 0;
+              switch (*p2++)
+                {
+                  case jump_n:
+		    is_a_jump_n = 1;
+                  case finalize_jump:
+		  case maybe_finalize_jump:
+		  case jump:
+		  case dummy_failure_jump:
+                    EXTRACT_NUMBER_AND_INCR (mcnt, p2);
+		    if (is_a_jump_n)
+		      p2 += 2;
+                    break;
+                }
+	      p2 += mcnt;
+        
+              /* If the next operation is a jump backwards in the pattern
+	         to an on_failure_jump, exit from the loop by forcing a
+                 failure after pushing on the stack the on_failure_jump's 
+                 jump in the pattern, and d.  */
+	      if (mcnt < 0 && (enum regexpcode) *p2++ == on_failure_jump)
+		{
+                  EXTRACT_NUMBER_AND_INCR (mcnt, p2);
+                  PUSH_FAILURE_POINT (p2 + mcnt, d);
+                  goto fail;
+                }
+            }
+          p++;
+          break;
+
+	/* \<digit> has been turned into a `duplicate' command which is
+           followed by the numeric value of <digit> as the register number.  */
+        case duplicate:
+	  {
+	    int regno = *p++;   /* Get which register to match against */
+	    register unsigned char *d2, *dend2;
+
+	    /* Where in input to try to start matching.  */
+            d2 = regstart[regno];
+            
+            /* Where to stop matching; if both the place to start and
+               the place to stop matching are in the same string, then
+               set to the place to stop, otherwise, for now have to use
+               the end of the first string.  */
+
+            dend2 = ((IS_IN_FIRST_STRING (regstart[regno]) 
+		      == IS_IN_FIRST_STRING (regend[regno]))
+		     ? regend[regno] : end_match_1);
+	    while (1)
+	      {
+		/* If necessary, advance to next segment in register
+                   contents.  */
+		while (d2 == dend2)
+		  {
+		    if (dend2 == end_match_2) break;
+		    if (dend2 == regend[regno]) break;
+		    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
+		  }
+		/* At end of register contents => success */
+		if (d2 == dend2) break;
+
+		/* If necessary, advance to next segment in data.  */
+		PREFETCH;
+
+		/* How many characters left in this segment to match.  */
+		mcnt = dend - d;
+                
+		/* Want how many consecutive characters we can match in
+                   one shot, so, if necessary, adjust the count.  */
+                if (mcnt > dend2 - d2)
+		  mcnt = dend2 - d2;
+                  
+		/* Compare that many; failure if mismatch, else move
+                   past them.  */
+		if (translate 
+                    ? memcmp_translate (d, d2, mcnt, translate) 
+                    : memcmp ((char *)d, (char *)d2, mcnt))
+		  goto fail;
+		d += mcnt, d2 += mcnt;
+	      }
+	  }
+	  break;
+
+	case anychar:
+	  PREFETCH;	  /* Fetch a data character. */
+	  /* Match anything but a newline, maybe even a null.  */
+	  if ((translate ? translate[*d] : *d) == '\n'
+              || ((obscure_syntax & RE_DOT_NOT_NULL) 
+                  && (translate ? translate[*d] : *d) == '\000'))
+	    goto fail;
+	  SET_REGS_MATCHED;
+          d++;
+	  break;
+
+	case charset:
+	case charset_not:
+	  {
+	    int not = 0;	    /* Nonzero for charset_not.  */
+	    register int c;
+	    if (*(p - 1) == (unsigned char) charset_not)
+	      not = 1;
+
+	    PREFETCH;	    /* Fetch a data character. */
+
+	    if (translate)
+	      c = translate[*d];
+	    else
+	      c = *d;
+
+	    if (c < *p * BYTEWIDTH
+		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+	      not = !not;
+
+	    p += 1 + *p;
+
+	    if (!not) goto fail;
+	    SET_REGS_MATCHED;
+            d++;
+	    break;
+	  }
+
+	case begline:
+          if ((size1 != 0 && d == string1)
+              || (size1 == 0 && size2 != 0 && d == string2)
+              || (d && d[-1] == '\n')
+              || (size1 == 0 && size2 == 0))
+            break;
+          else
+            goto fail;
+            
+	case endline:
+	  if (d == end2
+	      || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
+	    break;
+	  goto fail;
+
+	/* `or' constructs are handled by starting each alternative with
+           an on_failure_jump that points to the start of the next
+           alternative.  Each alternative except the last ends with a
+           jump to the joining point.  (Actually, each jump except for
+           the last one really jumps to the following jump, because
+           tensioning the jumps is a hassle.)  */
+
+	/* The start of a stupid repeat has an on_failure_jump that points
+	   past the end of the repeat text. This makes a failure point so 
+           that on failure to match a repetition, matching restarts past
+           as many repetitions have been found with no way to fail and
+           look for another one.  */
+
+	/* A smart repeat is similar but loops back to the on_failure_jump
+	   so that each repetition makes another failure point.  */
+
+	case on_failure_jump:
+        on_failure:
+          EXTRACT_NUMBER_AND_INCR (mcnt, p);
+          PUSH_FAILURE_POINT (p + mcnt, d);
+          break;
+
+	/* The end of a smart repeat has a maybe_finalize_jump back.
+	   Change it either to a finalize_jump or an ordinary jump.  */
+	case maybe_finalize_jump:
+          EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	  {
+	    register unsigned char *p2 = p;
+	    /* Compare what follows with the beginning of the repeat.
+	       If we can establish that there is nothing that they would
+	       both match, we can change to finalize_jump.  */
+	    while (p2 + 1 != pend
+		   && (*p2 == (unsigned char) stop_memory
+		       || *p2 == (unsigned char) start_memory))
+	      p2 += 2;				/* Skip over reg number.  */
+	    if (p2 == pend)
+	      p[-3] = (unsigned char) finalize_jump;
+	    else if (*p2 == (unsigned char) exactn
+		     || *p2 == (unsigned char) endline)
+	      {
+		register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
+		register unsigned char *p1 = p + mcnt;
+		/* p1[0] ... p1[2] are an on_failure_jump.
+		   Examine what follows that.  */
+		if (p1[3] == (unsigned char) exactn && p1[5] != c)
+		  p[-3] = (unsigned char) finalize_jump;
+		else if (p1[3] == (unsigned char) charset
+			 || p1[3] == (unsigned char) charset_not)
+		  {
+		    int not = p1[3] == (unsigned char) charset_not;
+		    if (c < p1[4] * BYTEWIDTH
+			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+		      not = !not;
+		    /* `not' is 1 if c would match.  */
+		    /* That means it is not safe to finalize.  */
+		    if (!not)
+		      p[-3] = (unsigned char) finalize_jump;
+		  }
+	      }
+	  }
+	  p -= 2;		/* Point at relative address again.  */
+	  if (p[-1] != (unsigned char) finalize_jump)
+	    {
+	      p[-1] = (unsigned char) jump;	
+	      goto nofinalize;
+	    }
+        /* Note fall through.  */
+
+	/* The end of a stupid repeat has a finalize_jump back to the
+           start, where another failure point will be made which will
+           point to after all the repetitions found so far.  */
+
+        /* Take off failure points put on by matching on_failure_jump 
+           because didn't fail.  Also remove the register information
+           put on by the on_failure_jump.  */
+        case finalize_jump:
+          POP_FAILURE_POINT ();
+        /* Note fall through.  */
+        
+	/* Jump without taking off any failure points.  */
+        case jump:
+	nofinalize:
+	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	  p += mcnt;
+	  break;
+
+        case dummy_failure_jump:
+          /* Normally, the on_failure_jump pushes a failure point, which
+             then gets popped at finalize_jump.  We will end up at
+             finalize_jump, also, and with a pattern of, say, `a+', we
+             are skipping over the on_failure_jump, so we have to push
+             something meaningless for finalize_jump to pop.  */
+          PUSH_FAILURE_POINT (0, 0);
+          goto nofinalize;
+
+
+        /* Have to succeed matching what follows at least n times.  Then
+          just handle like an on_failure_jump.  */
+        case succeed_n: 
+          EXTRACT_NUMBER (mcnt, p + 2);
+          /* Originally, this is how many times we HAVE to succeed.  */
+          if (mcnt)
+            {
+               mcnt--;
+	       p += 2;
+               STORE_NUMBER_AND_INCR (p, mcnt);
+            }
+	  else if (mcnt == 0)
+            {
+	      p[2] = unused;
+              p[3] = unused;
+              goto on_failure;
+            }
+          else
+	    { 
+              fprintf (stderr, "regex: the succeed_n's n is not set.\n");
+              exit (1);
+	    }
+          break;
+        
+        case jump_n: 
+          EXTRACT_NUMBER (mcnt, p + 2);
+          /* Originally, this is how many times we CAN jump.  */
+          if (mcnt)
+            {
+               mcnt--;
+               STORE_NUMBER(p + 2, mcnt);
+	       goto nofinalize;	     /* Do the jump without taking off
+			                any failure points.  */
+            }
+          /* If don't have to jump any more, skip over the rest of command.  */
+	  else      
+	    p += 4;		     
+          break;
+        
+	case set_number_at:
+	  {
+  	    register unsigned char *p1;
+
+            EXTRACT_NUMBER_AND_INCR (mcnt, p);
+            p1 = p + mcnt;
+            EXTRACT_NUMBER_AND_INCR (mcnt, p);
+	    STORE_NUMBER (p1, mcnt);
+            break;
+          }
+
+        /* Ignore these.  Used to ignore the n of succeed_n's which
+           currently have n == 0.  */
+        case unused:
+          break;
+
+        case wordbound:
+	  if (AT_WORD_BOUNDARY)
+	    break;
+	  goto fail;
+
+	case notwordbound:
+	  if (AT_WORD_BOUNDARY)
+	    goto fail;
+	  break;
+
+	case wordbeg:
+	  if (IS_A_LETTER (d) && (!IS_A_LETTER (d - 1) || AT_STRINGS_BEG))
+	    break;
+	  goto fail;
+
+	case wordend:
+          /* Have to check if AT_STRINGS_BEG before looking at d - 1.  */
+	  if (!AT_STRINGS_BEG && IS_A_LETTER (d - 1) 
+              && (!IS_A_LETTER (d) || AT_STRINGS_END))
+	    break;
+	  goto fail;
+
+#ifdef emacs
+	case before_dot:
+	  if (PTR_CHAR_POS (d) >= point)
+	    goto fail;
+	  break;
+
+	case at_dot:
+	  if (PTR_CHAR_POS (d) != point)
+	    goto fail;
+	  break;
+
+	case after_dot:
+	  if (PTR_CHAR_POS (d) <= point)
+	    goto fail;
+	  break;
+
+	case wordchar:
+	  mcnt = (int) Sword;
+	  goto matchsyntax;
+
+	case syntaxspec:
+	  mcnt = *p++;
+	matchsyntax:
+	  PREFETCH;
+	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
+          SET_REGS_MATCHED;
+	  break;
+	  
+	case notwordchar:
+	  mcnt = (int) Sword;
+	  goto matchnotsyntax;
+
+	case notsyntaxspec:
+	  mcnt = *p++;
+	matchnotsyntax:
+	  PREFETCH;
+	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
+	  SET_REGS_MATCHED;
+          break;
+
+#else /* not emacs */
+
+	case wordchar:
+	  PREFETCH;
+          if (!IS_A_LETTER (d))
+            goto fail;
+	  SET_REGS_MATCHED;
+	  break;
+	  
+	case notwordchar:
+	  PREFETCH;
+	  if (IS_A_LETTER (d))
+            goto fail;
+          SET_REGS_MATCHED;
+	  break;
+
+	case before_dot:
+	case at_dot:
+	case after_dot:
+	case syntaxspec:
+	case notsyntaxspec:
+	  break;
+
+#endif /* not emacs */
+
+	case begbuf:
+          if (AT_STRINGS_BEG)
+            break;
+          goto fail;
+
+        case endbuf:
+	  if (AT_STRINGS_END)
+	    break;
+	  goto fail;
+
+	case exactn:
+	  /* Match the next few pattern characters exactly.
+	     mcnt is how many characters to match.  */
+	  mcnt = *p++;
+	  /* This is written out as an if-else so we don't waste time
+             testing `translate' inside the loop.  */
+          if (translate)
+	    {
+	      do
+		{
+		  PREFETCH;
+		  if (translate[*d++] != *p++) goto fail;
+		}
+	      while (--mcnt);
+	    }
+	  else
+	    {
+	      do
+		{
+		  PREFETCH;
+		  if (*d++ != *p++) goto fail;
+		}
+	      while (--mcnt);
+	    }
+	  SET_REGS_MATCHED;
+          break;
+	}
+      continue;  /* Successfully executed one pattern command; keep going.  */
+
+    /* Jump here if any matching operation fails. */
+    fail:
+      if (stackp != stackb)
+	/* A restart point is known.  Restart there and pop it. */
+	{
+          short last_used_reg, this_reg;
+          
+          /* If this failure point is from a dummy_failure_point, just
+             skip it.  */
+	  if (!stackp[-2])
+            {
+              POP_FAILURE_POINT ();
+              goto fail;
+            }
+
+          d = *--stackp;
+	  p = *--stackp;
+          if (d >= string1 && d <= end1)
+	    dend = end_match_1;
+          /* Restore register info.  */
+          last_used_reg = (long) *--stackp;
+          
+          /* Make the ones that weren't saved -1 or 0 again.  */
+          for (this_reg = RE_NREGS - 1; this_reg > last_used_reg; this_reg--)
+            {
+              regend[this_reg] = (unsigned char *) (-1L);
+              regstart[this_reg] = (unsigned char *) (-1L);
+              IS_ACTIVE (reg_info[this_reg]) = 0;
+              MATCHED_SOMETHING (reg_info[this_reg]) = 0;
+            }
+          
+          /* And restore the rest from the stack.  */
+          for ( ; this_reg > 0; this_reg--)
+            {
+              reg_info[this_reg] = *(struct register_info *) *--stackp;
+              regend[this_reg] = *--stackp;
+              regstart[this_reg] = *--stackp;
+            }
+	}
+      else
+        break;   /* Matching at this starting point really fails.  */
+    }
+
+  if (best_regs_set)
+    goto restore_best_regs;
+
+  FREE_AND_RETURN(stackb,(-1)); 	/* Failure to match.  */
+}
+
+
+static int
+memcmp_translate (s1, s2, len, translate)
+     unsigned char *s1, *s2;
+     register int len;
+     unsigned char *translate;
+{
+  register unsigned char *p1 = s1, *p2 = s2;
+  while (len)
+    {
+      if (translate [*p1++] != translate [*p2++]) return 1;
+      len--;
+    }
+  return 0;
+}
+
+
+
+/* Entry points compatible with 4.2 BSD regex library.  */
+
+#if !defined(emacs) && !defined(GAWK)
+
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+     char *s;
+{
+  if (!s)
+    {
+      if (!re_comp_buf.buffer)
+	return "No previous regular expression";
+      return 0;
+    }
+
+  if (!re_comp_buf.buffer)
+    {
+      if (!(re_comp_buf.buffer = (char *) malloc (200)))
+	return "Memory exhausted";
+      re_comp_buf.allocated = 200;
+      if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
+	return "Memory exhausted";
+    }
+  return re_compile_pattern (s, strlen (s), &re_comp_buf);
+}
+
+int
+re_exec (s)
+     char *s;
+{
+  int len = strlen (s);
+  return 0 <= re_search (&re_comp_buf, s, len, 0, len,
+			 (struct re_registers *) 0);
+}
+#endif /* not emacs && not GAWK */
+
+
+
+#ifdef test
+
+#ifdef atarist
+long _stksize = 2L;  /* reserve memory for stack */
+#endif
+#include <stdio.h>
+
+/* Indexed by a character, gives the upper case equivalent of the
+   character.  */
+
+char upcase[0400] = 
+  { 000, 001, 002, 003, 004, 005, 006, 007,
+    010, 011, 012, 013, 014, 015, 016, 017,
+    020, 021, 022, 023, 024, 025, 026, 027,
+    030, 031, 032, 033, 034, 035, 036, 037,
+    040, 041, 042, 043, 044, 045, 046, 047,
+    050, 051, 052, 053, 054, 055, 056, 057,
+    060, 061, 062, 063, 064, 065, 066, 067,
+    070, 071, 072, 073, 074, 075, 076, 077,
+    0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+    0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+    0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+    0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
+    0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+    0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+    0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+    0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
+    0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
+    0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
+    0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
+    0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
+    0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
+    0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
+    0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
+    0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
+    0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
+    0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
+    0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
+    0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
+    0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
+    0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
+    0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
+    0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
+  };
+
+#ifdef canned
+
+#include "tests.h"
+
+typedef enum { extended_test, basic_test } test_type;
+
+/* Use this to run the tests we've thought of.  */
+
+void
+main ()
+{
+  test_type t = extended_test;
+
+  if (t == basic_test)
+    {
+      printf ("Running basic tests:\n\n");
+      test_posix_basic ();
+    }
+  else if (t == extended_test)
+    {
+      printf ("Running extended tests:\n\n");
+      test_posix_extended (); 
+    }
+}
+
+#else /* not canned */
+
+/* Use this to run interactive tests.  */
+
+void
+main (argc, argv)
+     int argc;
+     char **argv;
+{
+  char pat[80];
+  struct re_pattern_buffer buf;
+  int i;
+  char c;
+  char fastmap[(1 << BYTEWIDTH)];
+
+  /* Allow a command argument to specify the style of syntax.  */
+  if (argc > 1)
+    obscure_syntax = atol (argv[1]);
+
+  buf.allocated = 40;
+  buf.buffer = (char *) malloc (buf.allocated);
+  buf.fastmap = fastmap;
+  buf.translate = upcase;
+
+  while (1)
+    {
+      gets (pat);
+
+      if (*pat)
+	{
+          re_compile_pattern (pat, strlen(pat), &buf);
+
+	  for (i = 0; i < buf.used; i++)
+	    printchar (buf.buffer[i]);
+
+	  putchar ('\n');
+
+	  printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
+
+	  re_compile_fastmap (&buf);
+	  printf ("Allowed by fastmap: ");
+	  for (i = 0; i < (1 << BYTEWIDTH); i++)
+	    if (fastmap[i]) printchar (i);
+	  putchar ('\n');
+	}
+
+      gets (pat);	/* Now read the string to match against */
+
+      i = re_match (&buf, pat, strlen (pat), 0, 0);
+      printf ("Match value %d.\n", i);
+    }
+}
+
+#endif
+
+
+#ifdef NOTDEF
+print_buf (bufp)
+     struct re_pattern_buffer *bufp;
+{
+  int i;
+
+  printf ("buf is :\n----------------\n");
+  for (i = 0; i < bufp->used; i++)
+    printchar (bufp->buffer[i]);
+  
+  printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
+  
+  printf ("Allowed by fastmap: ");
+  for (i = 0; i < (1 << BYTEWIDTH); i++)
+    if (bufp->fastmap[i])
+      printchar (i);
+  printf ("\nAllowed by translate: ");
+  if (bufp->translate)
+    for (i = 0; i < (1 << BYTEWIDTH); i++)
+      if (bufp->translate[i])
+	printchar (i);
+  printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
+  printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
+}
+#endif /* NOTDEF */
+
+printchar (c)
+     char c;
+{
+  if (c < 040 || c >= 0177)
+    {
+      putchar ('\\');
+      putchar (((c >> 6) & 3) + '0');
+      putchar (((c >> 3) & 7) + '0');
+      putchar ((c & 7) + '0');
+    }
+  else
+    putchar (c);
+}
+
+error (string)
+     char *string;
+{
+  puts (string);
+  exit (1);
+}
+#endif /* test */
diff --git a/gnu/usr.bin/awk/regex.h b/gnu/usr.bin/awk/regex.h
new file mode 100644
index 0000000..fce11c3
--- /dev/null
+++ b/gnu/usr.bin/awk/regex.h
@@ -0,0 +1,260 @@
+/* Definitions for data structures callers pass the regex library.
+
+   Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 1, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+
+#ifndef __REGEXP_LIBRARY
+#define __REGEXP_LIBRARY
+
+/* Define number of parens for which we record the beginnings and ends.
+   This affects how much space the `struct re_registers' type takes up.  */
+#ifndef RE_NREGS
+#define RE_NREGS 10
+#endif
+
+#define BYTEWIDTH 8
+
+
+/* Maximum number of duplicates an interval can allow.  */
+#ifndef RE_DUP_MAX
+#define RE_DUP_MAX  ((1 << 15) - 1) 
+#endif
+
+
+/* This defines the various regexp syntaxes.  */
+extern long obscure_syntax;
+
+
+/* The following bits are used in the obscure_syntax variable to choose among
+   alternative regexp syntaxes.  */
+
+/* If this bit is set, plain parentheses serve as grouping, and backslash
+     parentheses are needed for literal searching.
+   If not set, backslash-parentheses are grouping, and plain parentheses
+     are for literal searching.  */
+#define RE_NO_BK_PARENS	1L
+
+/* If this bit is set, plain | serves as the `or'-operator, and \| is a 
+     literal.
+   If not set, \| serves as the `or'-operator, and | is a literal.  */
+#define RE_NO_BK_VBAR (1L << 1)
+
+/* If this bit is not set, plain + or ? serves as an operator, and \+, \? are 
+     literals.
+   If set, \+, \? are operators and plain +, ? are literals.  */
+#define RE_BK_PLUS_QM (1L << 2)
+
+/* If this bit is set, | binds tighter than ^ or $.
+   If not set, the contrary.  */
+#define RE_TIGHT_VBAR (1L << 3)
+
+/* If this bit is set, then treat newline as an OR operator.
+   If not set, treat it as a normal character.  */
+#define RE_NEWLINE_OR (1L << 4)
+
+/* If this bit is set, then special characters may act as normal
+   characters in some contexts. Specifically, this applies to:
+	^ -- only special at the beginning, or after ( or |;
+	$ -- only special at the end, or before ) or |;
+	*, +, ? -- only special when not after the beginning, (, or |.
+   If this bit is not set, special characters (such as *, ^, and $)
+   always have their special meaning regardless of the surrounding
+   context.  */
+#define RE_CONTEXT_INDEP_OPS (1L << 5)
+
+/* If this bit is not set, then \ before anything inside [ and ] is taken as 
+     a real \.
+   If set, then such a \ escapes the following character.  This is a
+     special case for awk.  */
+#define RE_AWK_CLASS_HACK (1L << 6)
+
+/* If this bit is set, then \{ and \} or { and } serve as interval operators.
+   If not set, then \{ and \} and { and } are treated as literals.  */
+#define RE_INTERVALS (1L << 7)
+
+/* If this bit is not set, then \{ and \} serve as interval operators and 
+     { and } are literals.
+   If set, then { and } serve as interval operators and \{ and \} are 
+     literals.  */
+#define RE_NO_BK_CURLY_BRACES (1L << 8)
+
+/* If this bit is set, then character classes are supported; they are:
+     [:alpha:],	[:upper:], [:lower:],  [:digit:], [:alnum:], [:xdigit:],
+     [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+   If not set, then character classes are not supported.  */
+#define RE_CHAR_CLASSES (1L << 9)
+
+/* If this bit is set, then the dot re doesn't match a null byte.
+   If not set, it does.  */
+#define RE_DOT_NOT_NULL (1L << 10)
+
+/* If this bit is set, then [^...] doesn't match a newline.
+   If not set, it does.  */
+#define RE_HAT_NOT_NEWLINE (1L << 11)
+
+/* If this bit is set, back references are recognized.
+   If not set, they aren't.  */
+#define RE_NO_BK_REFS (1L << 12)
+
+/* If this bit is set, back references must refer to a preceding
+   subexpression.  If not set, a back reference to a nonexistent
+   subexpression is treated as literal characters.  */
+#define RE_NO_EMPTY_BK_REF (1L << 13)
+
+/* If this bit is set, bracket expressions can't be empty.  
+   If it is set, they can be empty.  */
+#define RE_NO_EMPTY_BRACKETS (1L << 14)
+
+/* If this bit is set, then *, +, ? and { cannot be first in an re or
+   immediately after a |, or a (.  Furthermore, a | cannot be first or
+   last in an re, or immediately follow another | or a (.  Also, a ^
+   cannot appear in a nonleading position and a $ cannot appear in a
+   nontrailing position (outside of bracket expressions, that is).  */
+#define RE_CONTEXTUAL_INVALID_OPS (1L << 15)
+
+/* If this bit is set, then +, ? and | aren't recognized as operators.
+   If it's not, they are.  */
+#define RE_LIMITED_OPS (1L << 16)
+
+/* If this bit is set, then an ending range point has to collate higher
+     or equal to the starting range point.
+   If it's not set, then when the ending range point collates higher
+     than the starting range point, the range is just considered empty.  */
+#define RE_NO_EMPTY_RANGES (1L << 17)
+
+/* If this bit is set, then a hyphen (-) can't be an ending range point.
+   If it isn't, then it can.  */
+#define RE_NO_HYPHEN_RANGE_END (1L << 18)
+
+
+/* Define combinations of bits for the standard possibilities.  */
+#define RE_SYNTAX_POSIX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+			| RE_CONTEXT_INDEP_OPS)
+#define RE_SYNTAX_AWK (RE_NO_BK_PARENS | RE_NO_BK_VBAR | RE_AWK_CLASS_HACK)
+#define RE_SYNTAX_EGREP (RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+			| RE_CONTEXT_INDEP_OPS | RE_NEWLINE_OR)
+#define RE_SYNTAX_GREP (RE_BK_PLUS_QM | RE_NEWLINE_OR)
+#define RE_SYNTAX_EMACS 0
+#define RE_SYNTAX_POSIX_BASIC (RE_INTERVALS | RE_BK_PLUS_QM 		\
+			| RE_CHAR_CLASSES | RE_DOT_NOT_NULL 		\
+                        | RE_HAT_NOT_NEWLINE | RE_NO_EMPTY_BK_REF 	\
+                        | RE_NO_EMPTY_BRACKETS | RE_LIMITED_OPS		\
+                        | RE_NO_EMPTY_RANGES | RE_NO_HYPHEN_RANGE_END)	
+                        
+#define RE_SYNTAX_POSIX_EXTENDED (RE_INTERVALS | RE_NO_BK_CURLY_BRACES	   \
+			| RE_NO_BK_VBAR | RE_NO_BK_PARENS 		   \
+                        | RE_HAT_NOT_NEWLINE | RE_CHAR_CLASSES 		   \
+                        | RE_NO_EMPTY_BRACKETS | RE_CONTEXTUAL_INVALID_OPS \
+                        | RE_NO_BK_REFS | RE_NO_EMPTY_RANGES 		   \
+                        | RE_NO_HYPHEN_RANGE_END)
+
+
+/* This data structure is used to represent a compiled pattern.  */
+
+struct re_pattern_buffer
+  {
+    char *buffer;	/* Space holding the compiled pattern commands.  */
+    long allocated;	/* Size of space that `buffer' points to. */
+    long used;		/* Length of portion of buffer actually occupied  */
+    char *fastmap;	/* Pointer to fastmap, if any, or zero if none.  */
+			/* re_search uses the fastmap, if there is one,
+			   to skip over totally implausible characters.  */
+    char *translate;	/* Translate table to apply to all characters before 
+		           comparing, or zero for no translation.
+			   The translation is applied to a pattern when it is 
+                           compiled and to data when it is matched.  */
+    char fastmap_accurate;
+			/* Set to zero when a new pattern is stored,
+			   set to one when the fastmap is updated from it.  */
+    char can_be_null;   /* Set to one by compiling fastmap
+			   if this pattern might match the null string.
+			   It does not necessarily match the null string
+			   in that case, but if this is zero, it cannot.
+			   2 as value means can match null string
+			   but at end of range or before a character
+			   listed in the fastmap.  */
+  };
+
+
+/* search.c (search_buffer) needs this one value.  It is defined both in
+   regex.c and here.  */
+#define RE_EXACTN_VALUE 1
+
+
+/* Structure to store register contents data in.
+
+   Pass the address of such a structure as an argument to re_match, etc.,
+   if you want this information back.
+
+   For i from 1 to RE_NREGS - 1, start[i] records the starting index in
+   the string of where the ith subexpression matched, and end[i] records
+   one after the ending index.  start[0] and end[0] are analogous, for
+   the entire pattern.  */
+
+struct re_registers
+  {
+    int start[RE_NREGS];
+    int end[RE_NREGS];
+  };
+
+
+
+#ifdef __STDC__
+
+extern char *re_compile_pattern (char *, size_t, struct re_pattern_buffer *);
+/* Is this really advertised?  */
+extern void re_compile_fastmap (struct re_pattern_buffer *);
+extern int re_search (struct re_pattern_buffer *, char*, int, int, int,
+		      struct re_registers *);
+extern int re_search_2 (struct re_pattern_buffer *, char *, int,
+			char *, int, int, int,
+			struct re_registers *, int);
+extern int re_match (struct re_pattern_buffer *, char *, int, int,
+		     struct re_registers *);
+extern int re_match_2 (struct re_pattern_buffer *, char *, int,
+		       char *, int, int, struct re_registers *, int);
+extern long re_set_syntax (long syntax);
+
+#ifndef GAWK
+/* 4.2 bsd compatibility.  */
+extern char *re_comp (char *);
+extern int re_exec (char *);
+#endif
+
+#else /* !__STDC__ */
+
+extern char *re_compile_pattern ();
+/* Is this really advertised? */
+extern void re_compile_fastmap ();
+extern int re_search (), re_search_2 ();
+extern int re_match (), re_match_2 ();
+extern long re_set_syntax();
+
+#ifndef GAWK
+/* 4.2 bsd compatibility.  */
+extern char *re_comp ();
+extern int re_exec ();
+#endif
+
+#endif /* __STDC__ */
+
+
+#ifdef SYNTAX_TABLE
+extern char *re_syntax_table;
+#endif
+
+#endif /* !__REGEXP_LIBRARY */
author	jkh <jkh@FreeBSD.org>	1993-06-18 04:22:21 +0000
committer	jkh <jkh@FreeBSD.org>	1993-06-18 04:22:21 +0000
commit	1109bdc96fbdae2166ae15bbc363921d1e002ee4 (patch)
tree	0c9aba9caf0bf15d2ca4ba338fbc8c130fbb9797 /gnu/usr.bin/awk
download	FreeBSD-src-1109bdc96fbdae2166ae15bbc363921d1e002ee4.zip FreeBSD-src-1109bdc96fbdae2166ae15bbc363921d1e002ee4.tar.gz