summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/ptx
diff options
context:
space:
mode:
authoralm <alm@FreeBSD.org>1994-05-06 07:54:54 +0000
committeralm <alm@FreeBSD.org>1994-05-06 07:54:54 +0000
commitfb89d535a678f4e5589fdb43996219ebf099abe0 (patch)
tree12c352d0c53343c34d0e7c21c1a1b6f32a46d649 /gnu/usr.bin/ptx
parent51aed44005c21ebe3ddca3903fc14057ba15f03c (diff)
downloadFreeBSD-src-fb89d535a678f4e5589fdb43996219ebf099abe0.zip
FreeBSD-src-fb89d535a678f4e5589fdb43996219ebf099abe0.tar.gz
ptx: permuted index generator
Diffstat (limited to 'gnu/usr.bin/ptx')
-rw-r--r--gnu/usr.bin/ptx/.stamp-h.in0
-rw-r--r--gnu/usr.bin/ptx/COPYING339
-rw-r--r--gnu/usr.bin/ptx/ChangeLog546
-rw-r--r--gnu/usr.bin/ptx/Makefile7
-rw-r--r--gnu/usr.bin/ptx/NEWS53
-rw-r--r--gnu/usr.bin/ptx/README23
-rw-r--r--gnu/usr.bin/ptx/THANKS23
-rw-r--r--gnu/usr.bin/ptx/TODO94
-rw-r--r--gnu/usr.bin/ptx/alloca.c484
-rw-r--r--gnu/usr.bin/ptx/argmatch.c94
-rw-r--r--gnu/usr.bin/ptx/bumpalloc.h58
-rw-r--r--gnu/usr.bin/ptx/check-out65
-rw-r--r--gnu/usr.bin/ptx/config.h57
-rw-r--r--gnu/usr.bin/ptx/diacrit.c148
-rw-r--r--gnu/usr.bin/ptx/diacrit.h16
-rw-r--r--gnu/usr.bin/ptx/error.c117
-rw-r--r--gnu/usr.bin/ptx/examples/README21
-rw-r--r--gnu/usr.bin/ptx/examples/ajay/Makefile28
-rw-r--r--gnu/usr.bin/ptx/examples/ajay/README41
-rw-r--r--gnu/usr.bin/ptx/examples/ajay/footer.tex1
-rw-r--r--gnu/usr.bin/ptx/examples/ajay/header.tex21
-rw-r--r--gnu/usr.bin/ptx/examples/ajay/tip.forgptx10
-rw-r--r--gnu/usr.bin/ptx/examples/ajay/x.pl22
-rw-r--r--gnu/usr.bin/ptx/examples/ignore/README65
-rw-r--r--gnu/usr.bin/ptx/examples/ignore/bix109
-rw-r--r--gnu/usr.bin/ptx/examples/ignore/eign163
-rwxr-xr-xgnu/usr.bin/ptx/examples/include.pl79
-rw-r--r--gnu/usr.bin/ptx/examples/latex/Makefile15
-rw-r--r--gnu/usr.bin/ptx/examples/latex/README10
-rw-r--r--gnu/usr.bin/ptx/examples/latex/latex.tex11
-rw-r--r--gnu/usr.bin/ptx/examples/latex/table.tex65
-rw-r--r--gnu/usr.bin/ptx/examples/luke/README2
-rw-r--r--gnu/usr.bin/ptx/examples/luke/xxroff.sh108
-rw-r--r--gnu/usr.bin/ptx/getopt.c757
-rw-r--r--gnu/usr.bin/ptx/getopt.h129
-rw-r--r--gnu/usr.bin/ptx/getopt1.c187
-rwxr-xr-xgnu/usr.bin/ptx/mkinstalldirs35
-rw-r--r--gnu/usr.bin/ptx/ptx.c2237
-rw-r--r--gnu/usr.bin/ptx/ptx.info496
-rw-r--r--gnu/usr.bin/ptx/ptx.texinfo554
-rw-r--r--gnu/usr.bin/ptx/xmalloc.c88
41 files changed, 7378 insertions, 0 deletions
diff --git a/gnu/usr.bin/ptx/.stamp-h.in b/gnu/usr.bin/ptx/.stamp-h.in
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/gnu/usr.bin/ptx/.stamp-h.in
diff --git a/gnu/usr.bin/ptx/COPYING b/gnu/usr.bin/ptx/COPYING
new file mode 100644
index 0000000..a43ea21
--- /dev/null
+++ b/gnu/usr.bin/ptx/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/gnu/usr.bin/ptx/ChangeLog b/gnu/usr.bin/ptx/ChangeLog
new file mode 100644
index 0000000..fffb47f
--- /dev/null
+++ b/gnu/usr.bin/ptx/ChangeLog
@@ -0,0 +1,546 @@
+Fri Nov 5 23:10:07 1993 Francois Pinard (pinard@icule)
+
+ * Version 0.3
+
+ * check-out: New name for check_out.
+ * Makefile.in: Change check_out for check-out everywhere.
+ Reported by Jim Meyering <meyering@comco.com>.
+
+ * Makefile.in (realclean): Do not remove .stamp-h.in and
+ config.h.in. One should not need Autoconf installed.
+ Reported by Nelson Beebe <beebe@math.utah.edu>.
+
+ * ptx.c: Add missing definition of isxdigit.
+ Reported by Nelson Beebe <beebe@math.utah.edu>.
+
+ * ptx.c: Define S_ISREG if not defined, then use it.
+ Reported by Karl Berry <karl@cs.umb.edu>.
+
+Wed Nov 3 15:53:00 1993 Francois Pinard (pinard@icule)
+
+ * mkinstalldirs: New, from elsewhere.
+ * Makefile.in: Use it.
+
+Mon Nov 1 00:48:34 1993 Francois Pinard (pinard@lagrande.IRO.UMontreal.CA)
+
+ * Makefile.in (clean): Delete ptx, not the obsolete $(PROGS).
+
+Sun Oct 31 15:04:57 1993 Francois Pinard (pinard@raptor.IRO.UMontreal.CA)
+
+ * ptx.c (alloc_and_compile_regex): Zero out the whole allocated
+ pattern, not just a few fields.
+
+ * ptx.c (alloc_and_compile_regex): Clarify error message.
+
+Thu Oct 28 08:29:29 1993 Francois Pinard (pinard@compy.IRO.UMontreal.CA)
+
+ * ptx.c (print_copyright): Deleted. Rather use a "copyright"
+ variable, print to standard output instead of standard error.
+
+ * ptx.c: Use error instead of fprintf (stderr, ...).
+
+ * ptx.c: Rename fold_lower_to_upper to ignore_case.
+
+Wed Oct 27 18:41:52 1993 Francois Pinard (pinard@lagrande.IRO.UMontreal.CA)
+
+ * ptx.c: Add option -M for using another macro name than "xx".
+ Reported by Thorsten Ohl <ohl@physics.harvard.edu>.
+
+ * examples/ignore/: New files.
+ * eign: Linked to examples/ignore/eign.
+ * Makefile.in: Install and uninstall $(datadir)/eign.
+ * configure.in: Remove testing of a default ignore file.
+ Reported by Nelson Beebe <beebe@math.utah.edu>.
+
+ * ptx.c (main): Add --help and --version processing.
+ (print_version): Deleted.
+
+ * ptx.c: Use -traditional instead of --no-gnu-extensions,
+ --ignore-case instead of --fold-letter-case, --format=<format>
+ instead of --tex-output and --roff-output.
+ * argmatch.c: New file. Taken from fileutils/lib.
+ Reported by Karl Berry <karl@cs.umb.edu>.
+
+Tue Oct 26 08:39:14 1993 Francois Pinard (pinard@icule)
+
+ * ptx.c (usage): New name for usage_and_exit. Accept an exit
+ status parameter. If zero, print full help on stdout. If
+ non-zero, print a one-line helper on stderr.
+
+ * ptx.c: Remove sizeof_occurs and OCCURS_ALIGNMENT complexity.
+ The memory savings did not justify the portability headaches.
+
+ * ptx.c (copy_unescaped_string): New function.
+ (main): Use it with options -F, -S and -W.
+ Reported by Dave Cottingham <dc@haiti.gsfc.nasa.gov>.
+
+ * ptx.c (fix_output_parameters): Force edit of '\f', because some
+ systems does not consider it to be whitespace.
+ Reported by Stephane Berube <berube@iro.umontreal.ca>.
+
+ * ptx.c (fix_output_parameters): For roff output, do not disallow
+ characters with 8th bit set.
+ Reported by James Clark <jjc@jclark.com>.
+
+ * Makefile.in (dist): Include examples/ in distribution.
+
+Mon Oct 25 15:46:16 1993 Francois Pinard (pinard@icule)
+
+ * ptx.c: Change --display-width to --width, for consistency with
+ other GNU programs.
+
+ * examples/ajay/: New files.
+ Reported by Ajay Shah <ajayshah@cmie.ernet.in>.
+ Reported by Rakesh Chauhan <rk@cmie.ernet.in>.
+
+ * examples/luke/: New files.
+ Reported by Luke Kendall <luke@research.canon.oz.au>.
+
+ * examples/latex/: New files.
+
+ * ptx.c (find_occurs_in_text): Assign 0 to refererence_length so
+ GNU C will not warn anymore against its unitialized use.
+ Reported by Loic Dachary <L.Dachary@cs.ucl.ac.uk>.
+
+ * lib/: Move routines in main directory first, then destroy.
+ * Makefile.in: Merge lib/Makefile.in, clean up.
+ * configure.in: Do not create lib/Makefile.in.
+
+ * acconfig.h: New file.
+ * .stamp-h.in: Used for timestamping autoheader.
+ * Makefile.in: Use acconfig.h and .stamp-h.in. Force
+ autoheader whenever acconfig.h is modified.
+
+Wed Jun 9 15:01:28 1993 Francois Pinard (pinard@icule)
+
+ * Makefile.in (dist): Replace "echo `pwd`" by a mere "pwd".
+ Create a gzip file.
+
+Sat May 22 20:18:31 1993 Francois Pinard (pinard@icule)
+
+ * Makefile.in: Replace $(PROGS) by ptx.
+
+ * diacrit.h: Change `c' to `chr', better protect it.
+
+ * lib/COPYING.LIB: Deleted.
+ * lib/Makefile.in: Adjust accordingly.
+
+Sat Feb 6 15:03:13 1993 Francois Pinard (pinard@icule)
+
+ * Makefile.in, lib/Makefile.in: In dist goals, ensure 777 mode for
+ directories, so older tar's will restore file modes properly.
+
+Sun Jan 17 15:42:35 1993 Francois Pinard (pinard@icule)
+
+ * Makefile.in, lib/Makefile.in: Put $(CFLAGS) after $(CPPFLAGS),
+ so the installer can override automatically configured choices.
+ Reported by Karl Berry <karl@cs.umb.edu>.
+
+Tue Jan 12 09:21:22 1993 Francois Pinard (pinard at icule)
+
+ * configure.in: Check for setchrclass().
+ * diacrit.[hc]: New file, extracted from my own ctype.[hc].
+ * ctype.[hc]: Deleted.
+ * Makefile.in: Distribute diacrit.[hc], but not ctype.[hc].
+ * ptx.c: Include "diacrit.h" rather than "ctype.h".
+ Include <ctype.h> for ANSI C, or else, use our own definitions.
+ (initialize_regex): Use ctype.h macros for making the folding
+ table and for making the \w+ fastmap. Previously, was reusing the
+ regex syntax table or looking at character bit structure.
+ (main): Execute setchrclass (NULL) if available and ANSI C.
+
+ * Spelling fixes in various files.
+ Reported by Jim Meyering <meyering@cs.utexas.edu>.
+
+Thu Jan 7 20:19:25 1993 Francois Pinard (pinard at icule)
+
+ * Makefile.in: Using autoheader, derive config.h.in from
+ configure.in. Distribute config.h.in.
+ Use config.status for reconstructing config.h from config.h.in.
+ Have all $(OBJECTS) depend upon config.h.
+ Always use -I. calling the C compiler, for config.h to be found.
+ Remove config.h in distclean-local.
+ * lib/Makefile.in: Always use -I.. calling the C compiler, for
+ config.h to be found. Also use $(DEFS).
+ Have all $(OBJECTS) depend upon ../config.h.
+ * configure.in: Create config.h from config.h.in.
+ * ptx.c, ctype.c: Conditionnaly include config.h.
+
+Fri Jan 1 19:52:49 1993 Francois Pinard (pinard at icule)
+
+ * Makefile.in, lib/Makefile.in: Reinstate $(CPPFLAGS), use it.
+ Richard wants it there. Remove $(ALLFLAGS) and reequilibrate.
+
+Sun Dec 27 05:57:55 1992 Francois Pinard (pinard at icule)
+
+ * ptx.c (find_occurs_in_text): Introduce word_start and word_end
+ variables, and use them instead of the word_regs structure. This
+ takes care of the fact newer regex.h does not allocate the arrays
+ any more, and these were used even when regexps were not compiled.
+
+ * Makefile, lib/Makefile.in: Define CHAR_SET_SIZE for SYNTAX_TABLE
+ to work correctly.
+
+ * configure.in: Replace AC_USG by AC_HAVE_HEADERS(string.h).
+ Cleanup and reorganize a little.
+
+ * ptx.c: Renamed from gptx.c. Add -G (--no-gnu-extensions)
+ and clarify some long option names by making them more
+ explicit. Remove all PTX_COMPATIBILITY conditionals.
+ Introduce gnu_extensions variable initialized to 1. Let -G
+ give it the value 0, but still allow and process GNU specific
+ options and long option names. The Ignore file is now the same
+ whatever the value of gnu_extensions.
+ * ptx.texinfo: Renamed from gptx.texinfo, adjusted.
+ * Makefile.in, configure.in: Adjusted accordingly. Now
+ installs only one program under the name $(binprefix)ptx.
+
+ * gptx.c (perror_and_exit): Deleted. Use error() directly.
+
+ * gptx.c: Remove unneeded prototypes for system library routines.
+
+ * gptx.c (compare_words, compare_occurs): #define first and second
+ instead of using an intermediate variable.
+
+ * configure.in: Use AC_CONST.
+ * gptx.h: Do not define const.
+ * Define volatile dependent on __GNUC__, not __STDC__, and define
+ it to __volatile__.
+
+ * gptx.h, version.c: Deleted, integrated into gptx.c.
+ * Remove src/ and doc/ subdirectories, merging them in main.
+ * Move lib/bumpalloc.h, lib/ctype.[ch] in main directory.
+ * Integrate all ChangeLogs in main ChangeLog.
+ * Integrate all Makefiles in main Makefile and lib/Makefile,
+ rewriting them all along the way.
+
+Fri Nov 13 00:10:31 1992 Francois Pinard (pinard at icule)
+
+ * Makefile.in (dist): chmod a+r before making the tar file.
+
+Tue Oct 6 12:47:00 1992 Francois Pinard (pinard at icule)
+
+ * {,doc/,lib/,src/}Makefile.in: Use exec_prefix. Add `uninstall'.
+
+Wed Aug 19 16:02:09 1992 Francois Pinard (pinard at icule)
+
+ * ansi2knr.c: New file, from Ghostscript distribution.
+ * gptx.c: Get rid of many __STDC__ tests.
+ * version.c: Idem.
+
+Fri Aug 14 22:53:05 1992 Francois Pinard (pinard at icule)
+
+ * gptx.c: Use HAVE_MCHECK instead of MCHECK_MISSING.
+ * configure.in: Use AC_HAVE_FUNCS instead of AC_MISSING_FUNCS.
+
+ * configure.in: Autoconfigure for mcheck and strerror.
+ Reported by Bernd Nordhausen <bernd@iss.nus.sg>.
+
+Thu Jun 18 09:15:12 1992 Francois Pinard (pinard at icule)
+
+ * configure.in, all Makefile's: Adapt to Autoconf 0.118.
+
+Sun Feb 2 16:23:47 1992 Francois Pinard (pinard at icule)
+
+ * gptx.c (main): Returns int.
+
+Tue Dec 10 09:53:21 1991 Francois Pinard (pinard at icule)
+
+ * gptx.c (usage_and_exit): Print --OPTION instead of +OPTION.
+
+Wed Dec 4 10:31:06 1991 Francois Pinard (pinard at icule)
+
+ * gptx.c (compare_occurs, compare_words): Change parameters to
+ (void *) to comply with qsort ANSI declaration, and cast the true
+ type inside the function, each time a parameter is used.
+ Reported by Byron Rakitzis <byron@archone.tamu.edu>.
+
+Mon Dec 2 10:41:43 1991 Francois Pinard (pinard at icule)
+
+ * gptx.c: Removed comma at end of enum.
+
+ * version.c: Add a few missing `const's.
+
+ * gptx.c: Add prototypes for close, fstat, open, perror and read
+ if __STDC__.
+
+ * gptx.c: Remove useless alloca declaration.
+
+Sat Nov 9 20:03:37 1991 Francois Pinard (pinard at icule)
+
+ * configure.in, all/Makefile.in: Directory reorganization,
+ including separate src and doc, in plus of lib. Ensure all
+ Makefile's can be used independently.
+
+Thu Nov 7 11:20:38 1991 Francois Pinard (pinard at icule)
+
+ * gptx.texinfo: Renamed from gptx.texi. Now `TeX'able.
+ * Makefile.in: Ensure distributing texinfo.tex.
+ Reported by Karl Berry <karl@cs.umb.edu>.
+
+ * configure.in: Take care of POSIXish ISC.
+ Reported by Karl Berry <karl@cs.umb.edu>.
+
+Tue Nov 5 09:42:58 1991 Francois Pinard (pinard at icule)
+
+ * configure.in, Makefile.in: Do not absolutize $(srcdir), because
+ this could create problems with automounters.
+
+ * configure.in, Makefile.in: Remove IF_* devices, they were
+ solving a problem caused only by non timestamping shars, and
+ gptx is now distributed in tar format.
+
+Mon Oct 28 14:39:36 1991 Francois Pinard (pinard at icule)
+
+ * configure.in: New file.
+ * configure: Automatically generated from file configure.in
+ and David MacKenzie's autoconf.
+
+Sat Oct 19 20:06:28 1991 Francois Pinard (pinard at icule)
+
+ * configure: Use ANSI header files if present, even with non ANSI
+ compilers.
+ Reported by David MacKenzie <djm@eng.umd.edu>.
+
+Tue Oct 15 08:43:13 1991 Francois Pinard (pinard at icule)
+
+ * Makefile.in: Install gptx and ptx separately. On DEC Ultrix
+ 4.1, install cannot install more than one file at a time.
+ Reported by Simon Leinen <simon@liasun1.epfl.ch>.
+
+Fri Oct 11 15:19:42 1991 Francois Pinard (pinard at icule)
+
+ * Makefile.in: `realclean' did not work, because lib/Makefile was
+ disappearing at `distclean' time. I tried separate doc and src
+ directories, but this is not worth the heaviness. Split some
+ goals instead, using _doc, _lib and _src suffixes.
+
+Fri Oct 10 18:04:21 1991 Francois Pinard (pinard at icule)
+
+ * Version 0.2
+
+Wed Oct 9 16:13:42 1991 Francois Pinard (pinard at icule)
+
+ * configure, Makefile.in: New files.
+ * Makefile, GNUmakefile, Depends: Deleted.
+
+ * gptx.c: Change -A output from `FILE(NN):' to `FILE:NN:'.
+
+ * gptx.c, gptx.h, version.c: Reinstate __STDC__ tests.
+
+Tue Jun 25 11:35:32 1991 Francois Pinard (pinard at icule)
+
+ * gptx.c: Something is wrong in -r reference allocation, I suspect
+ casting does not do what I expect. I relax the constraints so to
+ make it work for now. To be revisited.
+
+ * gptx.c: Call initialize_regex sooner, to ensure folded_chars is
+ properly initialized when -f and -i are simultaneously used.
+
+ * gptx.c: Remove -p option and rather compile two separate
+ programs, one by defining PTX_COMPATIBILITY, to conform a GNU
+ standard asking to not depend on the program installed name. This
+ also removes the -p option, so loosing the debatable advantage of
+ dynamically reverting to ptx compatibility mode.
+
+ * gptx.h: Cleanup. Don't duplicate stdlib.h.
+
+Wed Dec 5 18:00:23 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c (usage_and_exit): Change -C explanation.
+
+Sun Oct 28 16:11:36 1990 Francois Pinard (pinard at icule)
+
+ * gptx.h: Remove the PROTO macros and usage.
+ * gptx.c: Remove all the #ifdef __STDC__ noise.
+ * version.c: Remove all the #ifdef __STDC__ noise.
+
+Wed Jul 25 12:20:45 1990 Francois Pinard (pinard at icule)
+
+ * ctype.[ch]: Linked from my library.
+
+Wed Jul 11 10:53:13 1990 Francois Pinard (pinard at icule)
+
+ * bumpalloc.h: Linked from my library.
+
+Sun Aug 5 13:17:25 1990 Francois Pinard (pinard at icule)
+
+ * Version 0.1
+
+ * gptx.c: Implement IGNORE and PIGNORE defines.
+
+ * gptx.c: Implement special character protection for roff and TeX
+ output, through the edited_flag array.
+
+Fri Aug 3 12:47:35 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Implement new -R option for automatic referencing, with
+ the possibility of multiple input files in normal mode. Now,
+ option -r implies ptx compatibility mode default for -S; exclude
+ reference from context whenever easy to do, and allow coselection
+ of both -r and -R.
+
+Wed Aug 1 12:00:07 1990 Francois Pinard (pinard at icule)
+
+ * gptx.[hc]: Define and use OCCURS_ALIGNMENT, to avoid those
+ `Bus error's on Sparcs.
+
+Fri Jul 27 12:04:40 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c (initialize_regex): Use only isalpha and "ctype.h" to
+ initialize Sword syntax, getting rid of any other explicit ISO
+ 8859-1 references. This will make the MS-DOS port easier,
+ character set wise.
+
+ * gptx.c (swallow_file_in_memory): Revised along the lines of
+ io.c from GNU diff 1.14, so it could handle stin and fifos,
+ and work faster.
+
+ * gptx.c (perror_and_exit): New function, use it where convenient.
+
+Thu Jul 26 13:28:13 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c (swallow_input_text): Remove white space compression even
+ if not in ptx compatibility mode. This being out of the way, use
+ swallow_file_in_memory instead of inputting characters one by one.
+
+Wed Jul 25 12:20:45 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c (find_occurs_in_text): Include the sentence separator as
+ part of the right context, except for separator's suffix white
+ space. Formerly, it was excluded from all contexts.
+
+ * gptx.h: Check STDLIB_PROTO_ALREADY to conditionalize prototype
+ declarations for standard C library routines; check __GNUC__
+ before using `volatile' on function prototypes.
+
+ * gptx.c: (find_occurs_in_text): Maintain the maximum length of
+ all words read.
+ (define_all_fields): Optimize scanning longish left contexts by
+ sometimes doing a backward jump from the keyword instead of always
+ scanning forward from the left context boundary.
+
+Sun Jul 22 09:18:21 1990 Francois Pinard (pinard at icule)
+
+ * gptx (alloc_and_compile_regex): Realloc out all extra allocated
+ space.
+
+Mon Jul 16 09:07:25 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: In OCCURS structure, modify left, right and reference
+ pointers and make them displacements, to save some space. Define
+ DELTA typedef, use it, make all other necessary changes.
+
+ * gptx.c: Work on portability. Define const and volatile to
+ nothing if not __STDC__. On BSD, define str[r]chr to be [r]index.
+ Avoid writings specific to GNU C.
+
+Sun Jul 15 17:28:39 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Add a word_fastmap array and use it if -W has not been
+ specified, instead of using default regexps. Finish implementing
+ the Break files.
+
+Sat Jul 14 10:54:21 1990 Francois Pinard (pinard at icule)
+
+ * gptx.[ch], version.c: Use prototypes in all header
+ functions. Add some missing const declarations.
+
+Fri Jul 13 10:16:34 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Enforce ptx compatibility mode by disallowing normal
+ mode extensions. Disallow -p if extensions are used.
+
+ * gptx.c: Finish implementation of Ignore and Only files.
+
+Wed Jul 11 10:53:13 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Revise WORD typedef and use it in OCCURS typedef;
+ adjust all usages. Add BLOCK and WORD_ARRAY typedefs, revise in
+ various place to make better usage of these. Use BUMP_ALLOC.
+
+Tue Jul 10 09:02:26 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Add -L option, `latin1_charset' variable and support.
+
+ * gptx.c: Remove old generate_roff and generate_tex variables,
+ replace with output_format which is of enum type.
+
+Mon Jul 9 10:40:41 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c (compare_words): Check word_regex.translate and do not
+ use the translation table if not computed. Also protect against
+ possible 8-bit problems.
+
+ * gptx.c (alloc_and_compile_regex): New function.
+
+Sun Jul 8 17:52:14 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Make a more systematic use of SKIP_* macros, to get rid
+ of explicit ' ' references when possible.
+
+ * gptx.c: Replace `head' field by `left' in the OCCURS structure,
+ delay the `before' computation from find_occurs_in_text to
+ define_all_fields, and make all necessary adjustments. Also
+ add a `right' field in the OCCURS structure, use it to get rid of
+ explicit '\n' references when possible.
+
+ * gptx.c (initialize_regex): New function. Compute the syntax
+ table for regex. Get rid of previous break_chars_init variable
+ and break_chars array, use word_regex and word_regex_string
+ instead.
+
+ * gptx.c: Use re_search to find words and re_match to skip over
+ them. Add -W option and support. Use re_search to find end of
+ lines or end of sentences, add -S option and support.
+
+Sat Jul 7 08:50:40 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Change PRINT_SPACES and PRINT_FIELD macros to
+ print_spaces and print_field routines, respectively.
+
+Fri Jul 6 09:44:39 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c (generate_output): Split into define_all_fields,
+ generate_all_output, output_one_roff_line, output_one_tex_line,
+ and output_one_tty_line.
+
+ * gptx.c: Move the inline code to reallocate the text buffer into
+ reallocate_text_buffer. Correct a small bug in this area.
+
+ * gptx.c: Modify -F to accept a STRING argument, modify output
+ routines to handle truncation marks having more than one
+ character.
+
+Thu Jul 5 11:08:59 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Add -F option and logic.
+
+ * gptx.c: Select ptx compatibility mode if program is
+ installed under the name `ptx'. Install both gptx and ptx.
+
+Thu Jun 7 17:21:25 1990 Francois Pinard (pinard at icule)
+
+ * gptx.c: Make each OCCURS a variable size thing, depending on
+ various options; mark occurs_found table size with an integer
+ counter instead of an end pointer.
+
+Sat Apr 14 20:01:09 1990 Francois Pinard (pinard at icule)
+
+ * Version 0.0
+
+ * gptx.c: Removed limitations on table sizes: it should now go
+ until an `Out of memory' error. Use xmalloc. Rename some
+ variables.
+
+ * version.c, gptx.c (usage_and_exit): Add -C option to print
+ Copyright.
+
+Mon Mar 12 17:59:42 1990 Francois Pinard (pinard at icule)
+
+ * ChangeLog initialisation. Previous experiments towards gptx
+ were done at the defunct site ora.odyssee.qc.ca, which was a
+ Sun-3/160 running SunOS 3.0. The files have been stocked for
+ a long time to kovic.iro archives, then imported to icule.
+
+ * gptx.c: GCC linted.
diff --git a/gnu/usr.bin/ptx/Makefile b/gnu/usr.bin/ptx/Makefile
new file mode 100644
index 0000000..b778f7a
--- /dev/null
+++ b/gnu/usr.bin/ptx/Makefile
@@ -0,0 +1,7 @@
+PROG= ptx
+SRCS= argmatch.c diacrit.c error.c getopt.c getopt1.c ptx.c regex.c xmalloc.c
+
+MAN1= NOMAN
+CFLAGS+= -DHAVE_CONFIG_H -DDEFAULT_IGNORE_FILE=\"/usr/share/dict/eign\"
+
+.include <bsd.prog.mk>
diff --git a/gnu/usr.bin/ptx/NEWS b/gnu/usr.bin/ptx/NEWS
new file mode 100644
index 0000000..6f97bf9
--- /dev/null
+++ b/gnu/usr.bin/ptx/NEWS
@@ -0,0 +1,53 @@
+GNU permuted indexer NEWS - User visible changes.
+Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
+Francois Pinard <pinard@iro.umontreal.ca>, 1992.
+
+Version 0.3 - 1993-10-??, by Franc,ois Pinard
+
+* GNU ptx installs as a single program, -G option dynamically reverts
+to the System V compatible behaviour, yet being liberal with options.
+
+* It should install more easily on more systems, source code is
+unprotoized on the fly for older C compilers.
+
+* A default ignore file is installed along with GNU ptx, ptx uses it.
+
+* Options -F, -S and -W interpret most \-escapes themselves.
+
+* Option -M can be use to change "xx" to another macro name.
+
+* CHRCLASS environment variable is obeyed for systems supporting it.
+
+* Long option names have been cleaned up a little.
+
+* Some examples are given in the example/ directory structure.
+
+
+Version 0.2 - 1991-10-10, by Franc,ois Pinard
+
+* Reference format (with -A) has been modified slightly to better
+comply with GNU standards for line reporting.
+
+* Option -p removed, rather compile two separate programs, one with
+GNU extensions, the other being strict on System V compatibility.
+
+
+Version 0.1 - 1990-08-05, by Franc,ois Pinard
+
+* Add many options: -L for Latin1, -R for automatic referencing, -W
+for regular expressions describing words, -S for regular expressions
+describing end of lines or sentences. Let -F specify the truncation
+strings.
+
+* Implementing Ignore files and Only files.
+
+* Option -p dynamically enforces strict System V compatibility.
+
+* Correct a few bugs and portability problems, have faster input,
+faster processing, and use less memory.
+
+
+Version 0.0 - 1990-04-14, by Franc,ois Pinard
+
+* Initial release.
+
diff --git a/gnu/usr.bin/ptx/README b/gnu/usr.bin/ptx/README
new file mode 100644
index 0000000..240b7ee
--- /dev/null
+++ b/gnu/usr.bin/ptx/README
@@ -0,0 +1,23 @@
+This is an beta release of GNU ptx, a permuted index generator. GNU
+ptx can handle multiple input files at once, produce TeX compatible
+output, or a readable KWIC (keywords in their context) without the
+need of nroff. This version does not handle huge input files, that
+is, those which do not fit in memory all at once.
+
+The command syntax is not the same as UNIX ptx: all given files are
+input files, the results are produced on standard output by default.
+GNU ptx manual is provided in Texinfo format. Calling `ptx --help'
+prints an option summary. Please note that an overall renaming of all
+options is foreseeable: GNU ptx specifications are not frozen yet.
+
+See the file COPYING for copying conditions.
+
+See the file THANKS for a list of contributors.
+
+See the file NEWS for a list of major changes in the current release.
+
+See the file INSTALL for compilation and installation instructions.
+
+Mail suggestions and bug reports (including documentation errors) for
+these programs to bug-gnu-utils@prep.ai.mit.edu.
+
diff --git a/gnu/usr.bin/ptx/THANKS b/gnu/usr.bin/ptx/THANKS
new file mode 100644
index 0000000..e6a45cf
--- /dev/null
+++ b/gnu/usr.bin/ptx/THANKS
@@ -0,0 +1,23 @@
+GNU permuted indexer has originally been written by François Pinard.
+Other people contributed to the GNU permuted index by reporting
+problems, suggesting various improvements or submitting actual code.
+Here is a list of these people. Help me keep it complete and exempt
+of errors.
+
+Ajay Shah ajayshah@cmie.ernet.in
+Bernd Nordhausen bernd@iss.nus.sg
+Byron Rakitzis byron@archone.tamu.edu
+Dave Cottingham dc@haiti.gsfc.nasa.gov
+David J. MacKenzie djm@eng.umd.edu
+Francois Pinard pinard@iro.umontreal.ca
+Janne Himanka shem@syy.oulu.fi
+James Clark jjc@jclark.com
+Jim Meyering meyering@comco.com
+Karl Berry karl@cs.umb.edu
+Loic Dachary L.Dachary@cs.ucl.ac.uk
+Luke Kendall luke@research.canon.oz.au
+Nelson Beebe beebe@math.utah.edu
+Rakesh Chauhan rk@cmie.ernet.in
+Simon Leinen simon@liasun1.epfl.ch
+Stephane Berube berube@iro.umontreal.ca
+Thorsten Ohl ohl@physics.harvard.edu
diff --git a/gnu/usr.bin/ptx/TODO b/gnu/usr.bin/ptx/TODO
new file mode 100644
index 0000000..6714313
--- /dev/null
+++ b/gnu/usr.bin/ptx/TODO
@@ -0,0 +1,94 @@
+TODO file for GNU ptx - last revised 05 November 1993.
+Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+Francois Pinard <pinard@iro.umontreal.ca>, 1992.
+
+The following are more or less in decreasing order of priority.
+
+* Use rx instead of regex.
+
+* Correct the infinite loop using -S '$' or -S '^'.
+
+* Use mmap for swallowing files (maybe wrong when memory edited).
+
+* Understand and mimic `-t' option, if I can.
+
+* Sort keywords intelligently for Latin-1 code. See how to interface
+this character set with various output formats. Also, introduce
+options to inverse-sort and possibly to reverse-sort.
+
+* Improve speed for Ignore and Only tables. Consider hashing instead
+of sorting. Consider playing with obstacks to digest them.
+
+* Provide better handling of format effectors obtained from input, and
+also attempt white space compression on output which would still
+maximize full output width usage.
+
+* See how TeX mode could be made more useful, and if a texinfo mode
+would mean something to someone.
+
+* Provide multiple language support
+
+Most of the boosting work should go along the line of fast recognition
+of multiple and complex boundaries, which define various `languages'.
+Each such language has its own rules for words, sentences, paragraphs,
+and reporting requests. This is less difficult than I first thought:
+
+ . Recognize language modifiers with each option. At least -b, -i, -o,
+-W, -S, and also new language switcher options, will have such
+modifiers. Modifiers on language switchers will allow or disallow
+language transitions.
+
+ . Complete the transformation of underlying variables into arrays in
+the code.
+
+ . Implement a heap of positions in the input file. There is one entry
+in the heap for each compiled regexp; it is initialized by a re_search
+after each regexp compile. Regexps reschedule themselves in the heap
+when their position passes while scanning input. In this way, looking
+simultaneously for a lot of regexps should not be too inefficient,
+once the scanning starts. If this works ok, maybe consider accepting
+regexps in Only and Ignore tables.
+
+ . Merge with language processing boundary processing options, really
+integrating -S processing as a special case. Maybe, implement several
+level of boundaries. See how to implement a stack of languages, for
+handling quotations. See if more sophisticated references could be
+handled as another special case of a language.
+
+* Tackle other aspects, in a more long term view
+
+ . Add options for statistics, frequency lists, referencing, and all
+other prescreening tools and subsidiary tasks of concordance
+production.
+
+ . Develop an interactive mode. Even better, construct a GNU emacs
+interface. I'm looking at Gene Myers <gene@cs.arizona.edu> suffix
+arrays as a possible implementation along those ideas.
+
+ . Implement hooks so word classification and tagging should be merged
+in. See how to effectively hook in lemmatisation or other
+morphological features. It is far from being clear by now how to
+interface this correctly, so some experimentation is mandatory.
+
+ . Profile and speed up the whole thing.
+
+ . Make it work on small address space machines. Consider three levels
+of hugeness for files, and three corresponding algorithms to make
+optimal use of memory. The first case is when all the input files and
+all the word references fit in memory: this is the case currently
+implemented. The second case is when the files cannot fit all together
+in memory, but the word references do. The third case is when even
+the word references cannot fit in memory.
+
+ . There also are subsidiary developments for in-core incremental sort
+routines as well as for external sort packages. The need for more
+flexible sort packages comes partly from the fact that linguists use
+kinds of keys which compare in unusual and more sophisticated ways.
+GNU `sort' and `ptx' could evolve together.
+
+
+Local Variables:
+mode: outline
+outline-regexp: " *[-+*.] \\| "
+eval: (hide-body)
+End:
diff --git a/gnu/usr.bin/ptx/alloca.c b/gnu/usr.bin/ptx/alloca.c
new file mode 100644
index 0000000..bd4932a
--- /dev/null
+++ b/gnu/usr.bin/ptx/alloca.c
@@ -0,0 +1,484 @@
+/* alloca.c -- allocate automatically reclaimed memory
+ (Mostly) portable public-domain implementation -- D A Gwyn
+
+ This implementation of the PWB library alloca function,
+ which is used to allocate space off the run-time stack so
+ that it is automatically reclaimed upon procedure exit,
+ was inspired by discussions with J. Q. Johnson of Cornell.
+ J.Otto Tennant <jot@cray.com> contributed the Cray support.
+
+ There are some preprocessor constants that can
+ be defined when compiling for your specific system, for
+ improved efficiency; however, the defaults should be okay.
+
+ The general concept of this implementation is to keep
+ track of all alloca-allocated blocks, and reclaim any
+ that are found to be deeper in the stack than the current
+ invocation. This heuristic does not reclaim storage as
+ soon as it becomes invalid, but it will do so eventually.
+
+ As a special case, alloca(0) reclaims storage without
+ allocating any. It is a good idea to use alloca(0) in
+ your main control loop, etc. to force garbage collection. */
+
+#ifdef HAVE_CONFIG_H
+#if defined (emacs) || defined (CONFIG_BROKETS)
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+/* If compiling with GCC 2, this file's not needed. */
+#if !defined (__GNUC__) || __GNUC__ < 2
+
+/* If someone has defined alloca as a macro,
+ there must be some other way alloca is supposed to work. */
+#ifndef alloca
+
+#ifdef emacs
+#ifdef static
+/* actually, only want this if static is defined as ""
+ -- this is for usg, in which emacs must undefine static
+ in order to make unexec workable
+ */
+#ifndef STACK_DIRECTION
+you
+lose
+-- must know STACK_DIRECTION at compile-time
+#endif /* STACK_DIRECTION undefined */
+#endif /* static */
+#endif /* emacs */
+
+/* If your stack is a linked list of frames, you have to
+ provide an "address metric" ADDRESS_FUNCTION macro. */
+
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
+long i00afunc ();
+#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg))
+#else
+#define ADDRESS_FUNCTION(arg) &(arg)
+#endif
+
+#if __STDC__
+typedef void *pointer;
+#else
+typedef char *pointer;
+#endif
+
+#define NULL 0
+
+/* Different portions of Emacs need to call different versions of
+ malloc. The Emacs executable needs alloca to call xmalloc, because
+ ordinary malloc isn't protected from input signals. On the other
+ hand, the utilities in lib-src need alloca to call malloc; some of
+ them are very simple, and don't have an xmalloc routine.
+
+ Non-Emacs programs expect this to call use xmalloc.
+
+ Callers below should use malloc. */
+
+#ifndef emacs
+#define malloc xmalloc
+#endif
+extern pointer malloc ();
+
+/* Define STACK_DIRECTION if you know the direction of stack
+ growth for your system; otherwise it will be automatically
+ deduced at run-time.
+
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown */
+
+#ifndef STACK_DIRECTION
+#define STACK_DIRECTION 0 /* Direction unknown. */
+#endif
+
+#if STACK_DIRECTION != 0
+
+#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */
+
+#else /* STACK_DIRECTION == 0; need run-time code. */
+
+static int stack_dir; /* 1 or -1 once known. */
+#define STACK_DIR stack_dir
+
+static void
+find_stack_direction ()
+{
+ static char *addr = NULL; /* Address of first `dummy', once known. */
+ auto char dummy; /* To get stack address. */
+
+ if (addr == NULL)
+ { /* Initial entry. */
+ addr = ADDRESS_FUNCTION (dummy);
+
+ find_stack_direction (); /* Recurse once. */
+ }
+ else
+ {
+ /* Second entry. */
+ if (ADDRESS_FUNCTION (dummy) > addr)
+ stack_dir = 1; /* Stack grew upward. */
+ else
+ stack_dir = -1; /* Stack grew downward. */
+ }
+}
+
+#endif /* STACK_DIRECTION == 0 */
+
+/* An "alloca header" is used to:
+ (a) chain together all alloca'ed blocks;
+ (b) keep track of stack depth.
+
+ It is very important that sizeof(header) agree with malloc
+ alignment chunk size. The following default should work okay. */
+
+#ifndef ALIGN_SIZE
+#define ALIGN_SIZE sizeof(double)
+#endif
+
+typedef union hdr
+{
+ char align[ALIGN_SIZE]; /* To force sizeof(header). */
+ struct
+ {
+ union hdr *next; /* For chaining headers. */
+ char *deep; /* For stack depth measure. */
+ } h;
+} header;
+
+static header *last_alloca_header = NULL; /* -> last alloca header. */
+
+/* Return a pointer to at least SIZE bytes of storage,
+ which will be automatically reclaimed upon exit from
+ the procedure that called alloca. Originally, this space
+ was supposed to be taken from the current stack frame of the
+ caller, but that method cannot be made to work for some
+ implementations of C, for example under Gould's UTX/32. */
+
+pointer
+alloca (size)
+ unsigned size;
+{
+ auto char probe; /* Probes stack depth: */
+ register char *depth = ADDRESS_FUNCTION (probe);
+
+#if STACK_DIRECTION == 0
+ if (STACK_DIR == 0) /* Unknown growth direction. */
+ find_stack_direction ();
+#endif
+
+ /* Reclaim garbage, defined as all alloca'd storage that
+ was allocated from deeper in the stack than currently. */
+
+ {
+ register header *hp; /* Traverses linked list. */
+
+ for (hp = last_alloca_header; hp != NULL;)
+ if ((STACK_DIR > 0 && hp->h.deep > depth)
+ || (STACK_DIR < 0 && hp->h.deep < depth))
+ {
+ register header *np = hp->h.next;
+
+ free ((pointer) hp); /* Collect garbage. */
+
+ hp = np; /* -> next header. */
+ }
+ else
+ break; /* Rest are not deeper. */
+
+ last_alloca_header = hp; /* -> last valid storage. */
+ }
+
+ if (size == 0)
+ return NULL; /* No allocation required. */
+
+ /* Allocate combined header + user data storage. */
+
+ {
+ register pointer new = malloc (sizeof (header) + size);
+ /* Address of header. */
+
+ ((header *) new)->h.next = last_alloca_header;
+ ((header *) new)->h.deep = depth;
+
+ last_alloca_header = (header *) new;
+
+ /* User storage begins just after header. */
+
+ return (pointer) ((char *) new + sizeof (header));
+ }
+}
+
+#if defined (CRAY) && defined (CRAY_STACKSEG_END)
+
+#ifdef DEBUG_I00AFUNC
+#include <stdio.h>
+#endif
+
+#ifndef CRAY_STACK
+#define CRAY_STACK
+#ifndef CRAY2
+/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */
+struct stack_control_header
+ {
+ long shgrow:32; /* Number of times stack has grown. */
+ long shaseg:32; /* Size of increments to stack. */
+ long shhwm:32; /* High water mark of stack. */
+ long shsize:32; /* Current size of stack (all segments). */
+ };
+
+/* The stack segment linkage control information occurs at
+ the high-address end of a stack segment. (The stack
+ grows from low addresses to high addresses.) The initial
+ part of the stack segment linkage control information is
+ 0200 (octal) words. This provides for register storage
+ for the routine which overflows the stack. */
+
+struct stack_segment_linkage
+ {
+ long ss[0200]; /* 0200 overflow words. */
+ long sssize:32; /* Number of words in this segment. */
+ long ssbase:32; /* Offset to stack base. */
+ long:32;
+ long sspseg:32; /* Offset to linkage control of previous
+ segment of stack. */
+ long:32;
+ long sstcpt:32; /* Pointer to task common address block. */
+ long sscsnm; /* Private control structure number for
+ microtasking. */
+ long ssusr1; /* Reserved for user. */
+ long ssusr2; /* Reserved for user. */
+ long sstpid; /* Process ID for pid based multi-tasking. */
+ long ssgvup; /* Pointer to multitasking thread giveup. */
+ long sscray[7]; /* Reserved for Cray Research. */
+ long ssa0;
+ long ssa1;
+ long ssa2;
+ long ssa3;
+ long ssa4;
+ long ssa5;
+ long ssa6;
+ long ssa7;
+ long sss0;
+ long sss1;
+ long sss2;
+ long sss3;
+ long sss4;
+ long sss5;
+ long sss6;
+ long sss7;
+ };
+
+#else /* CRAY2 */
+/* The following structure defines the vector of words
+ returned by the STKSTAT library routine. */
+struct stk_stat
+ {
+ long now; /* Current total stack size. */
+ long maxc; /* Amount of contiguous space which would
+ be required to satisfy the maximum
+ stack demand to date. */
+ long high_water; /* Stack high-water mark. */
+ long overflows; /* Number of stack overflow ($STKOFEN) calls. */
+ long hits; /* Number of internal buffer hits. */
+ long extends; /* Number of block extensions. */
+ long stko_mallocs; /* Block allocations by $STKOFEN. */
+ long underflows; /* Number of stack underflow calls ($STKRETN). */
+ long stko_free; /* Number of deallocations by $STKRETN. */
+ long stkm_free; /* Number of deallocations by $STKMRET. */
+ long segments; /* Current number of stack segments. */
+ long maxs; /* Maximum number of stack segments so far. */
+ long pad_size; /* Stack pad size. */
+ long current_address; /* Current stack segment address. */
+ long current_size; /* Current stack segment size. This
+ number is actually corrupted by STKSTAT to
+ include the fifteen word trailer area. */
+ long initial_address; /* Address of initial segment. */
+ long initial_size; /* Size of initial segment. */
+ };
+
+/* The following structure describes the data structure which trails
+ any stack segment. I think that the description in 'asdef' is
+ out of date. I only describe the parts that I am sure about. */
+
+struct stk_trailer
+ {
+ long this_address; /* Address of this block. */
+ long this_size; /* Size of this block (does not include
+ this trailer). */
+ long unknown2;
+ long unknown3;
+ long link; /* Address of trailer block of previous
+ segment. */
+ long unknown5;
+ long unknown6;
+ long unknown7;
+ long unknown8;
+ long unknown9;
+ long unknown10;
+ long unknown11;
+ long unknown12;
+ long unknown13;
+ long unknown14;
+ };
+
+#endif /* CRAY2 */
+#endif /* not CRAY_STACK */
+
+#ifdef CRAY2
+/* Determine a "stack measure" for an arbitrary ADDRESS.
+ I doubt that "lint" will like this much. */
+
+static long
+i00afunc (long *address)
+{
+ struct stk_stat status;
+ struct stk_trailer *trailer;
+ long *block, size;
+ long result = 0;
+
+ /* We want to iterate through all of the segments. The first
+ step is to get the stack status structure. We could do this
+ more quickly and more directly, perhaps, by referencing the
+ $LM00 common block, but I know that this works. */
+
+ STKSTAT (&status);
+
+ /* Set up the iteration. */
+
+ trailer = (struct stk_trailer *) (status.current_address
+ + status.current_size
+ - 15);
+
+ /* There must be at least one stack segment. Therefore it is
+ a fatal error if "trailer" is null. */
+
+ if (trailer == 0)
+ abort ();
+
+ /* Discard segments that do not contain our argument address. */
+
+ while (trailer != 0)
+ {
+ block = (long *) trailer->this_address;
+ size = trailer->this_size;
+ if (block == 0 || size == 0)
+ abort ();
+ trailer = (struct stk_trailer *) trailer->link;
+ if ((block <= address) && (address < (block + size)))
+ break;
+ }
+
+ /* Set the result to the offset in this segment and add the sizes
+ of all predecessor segments. */
+
+ result = address - block;
+
+ if (trailer == 0)
+ {
+ return result;
+ }
+
+ do
+ {
+ if (trailer->this_size <= 0)
+ abort ();
+ result += trailer->this_size;
+ trailer = (struct stk_trailer *) trailer->link;
+ }
+ while (trailer != 0);
+
+ /* We are done. Note that if you present a bogus address (one
+ not in any segment), you will get a different number back, formed
+ from subtracting the address of the first block. This is probably
+ not what you want. */
+
+ return (result);
+}
+
+#else /* not CRAY2 */
+/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP.
+ Determine the number of the cell within the stack,
+ given the address of the cell. The purpose of this
+ routine is to linearize, in some sense, stack addresses
+ for alloca. */
+
+static long
+i00afunc (long address)
+{
+ long stkl = 0;
+
+ long size, pseg, this_segment, stack;
+ long result = 0;
+
+ struct stack_segment_linkage *ssptr;
+
+ /* Register B67 contains the address of the end of the
+ current stack segment. If you (as a subprogram) store
+ your registers on the stack and find that you are past
+ the contents of B67, you have overflowed the segment.
+
+ B67 also points to the stack segment linkage control
+ area, which is what we are really interested in. */
+
+ stkl = CRAY_STACKSEG_END ();
+ ssptr = (struct stack_segment_linkage *) stkl;
+
+ /* If one subtracts 'size' from the end of the segment,
+ one has the address of the first word of the segment.
+
+ If this is not the first segment, 'pseg' will be
+ nonzero. */
+
+ pseg = ssptr->sspseg;
+ size = ssptr->sssize;
+
+ this_segment = stkl - size;
+
+ /* It is possible that calling this routine itself caused
+ a stack overflow. Discard stack segments which do not
+ contain the target address. */
+
+ while (!(this_segment <= address && address <= stkl))
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl);
+#endif
+ if (pseg == 0)
+ break;
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ this_segment = stkl - size;
+ }
+
+ result = address - this_segment;
+
+ /* If you subtract pseg from the current end of the stack,
+ you get the address of the previous stack segment's end.
+ This seems a little convoluted to me, but I'll bet you save
+ a cycle somewhere. */
+
+ while (pseg != 0)
+ {
+#ifdef DEBUG_I00AFUNC
+ fprintf (stderr, "%011o %011o\n", pseg, size);
+#endif
+ stkl = stkl - pseg;
+ ssptr = (struct stack_segment_linkage *) stkl;
+ size = ssptr->sssize;
+ pseg = ssptr->sspseg;
+ result += size;
+ }
+ return (result);
+}
+
+#endif /* not CRAY2 */
+#endif /* CRAY */
+
+#endif /* no alloca */
+#endif /* not GCC version 2 */
diff --git a/gnu/usr.bin/ptx/argmatch.c b/gnu/usr.bin/ptx/argmatch.c
new file mode 100644
index 0000000..17e088b
--- /dev/null
+++ b/gnu/usr.bin/ptx/argmatch.c
@@ -0,0 +1,94 @@
+/* argmatch.c -- find a match for a string in an array
+ Copyright (C) 1990 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by David MacKenzie <djm@ai.mit.edu> */
+
+#ifdef HAVE_CONFIG_H
+#if defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+#include <stdio.h>
+#ifdef STDC_HEADERS
+#include <string.h>
+#endif
+
+extern char *program_name;
+
+/* If ARG is an unambiguous match for an element of the
+ null-terminated array OPTLIST, return the index in OPTLIST
+ of the matched element, else -1 if it does not match any element
+ or -2 if it is ambiguous (is a prefix of more than one element). */
+
+int
+argmatch (arg, optlist)
+ char *arg;
+ char **optlist;
+{
+ int i; /* Temporary index in OPTLIST. */
+ int arglen; /* Length of ARG. */
+ int matchind = -1; /* Index of first nonexact match. */
+ int ambiguous = 0; /* If nonzero, multiple nonexact match(es). */
+
+ arglen = strlen (arg);
+
+ /* Test all elements for either exact match or abbreviated matches. */
+ for (i = 0; optlist[i]; i++)
+ {
+ if (!strncmp (optlist[i], arg, arglen))
+ {
+ if (strlen (optlist[i]) == arglen)
+ /* Exact match found. */
+ return i;
+ else if (matchind == -1)
+ /* First nonexact match found. */
+ matchind = i;
+ else
+ /* Second nonexact match found. */
+ ambiguous = 1;
+ }
+ }
+ if (ambiguous)
+ return -2;
+ else
+ return matchind;
+}
+
+/* Error reporting for argmatch.
+ KIND is a description of the type of entity that was being matched.
+ VALUE is the invalid value that was given.
+ PROBLEM is the return value from argmatch. */
+
+void
+invalid_arg (kind, value, problem)
+ char *kind;
+ char *value;
+ int problem;
+{
+ fprintf (stderr, "%s: ", program_name);
+ if (problem == -1)
+ fprintf (stderr, "invalid");
+ else /* Assume -2. */
+ fprintf (stderr, "ambiguous");
+ fprintf (stderr, " %s `%s'\n", kind, value);
+}
diff --git a/gnu/usr.bin/ptx/bumpalloc.h b/gnu/usr.bin/ptx/bumpalloc.h
new file mode 100644
index 0000000..bbf901f
--- /dev/null
+++ b/gnu/usr.bin/ptx/bumpalloc.h
@@ -0,0 +1,58 @@
+/* BUMP_ALLOC macro - increase table allocation by one element.
+ Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
+ Francois Pinard <pinard@iro.umontreal.ca>, 1990.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/*-------------------------------------------------------------------------.
+| Bump the allocation of the array pointed to by TABLE whenever required. |
+| The table already has already COUNT elements in it, this macro ensure it |
+| has enough space to accommodate at least one more element. Space is |
+| allocated (2 ^ EXPONENT) elements at a time. Each element of the array |
+| is of type TYPE. |
+`-------------------------------------------------------------------------*/
+
+/* Routines `xmalloc' and `xrealloc' are called to do the actual memory
+ management. This implies that the program will abort with an `Virtual
+ Memory exhausted!' error if any problem arise.
+
+ To work correctly, at least EXPONENT and TYPE should always be the
+ same for all uses of this macro for any given TABLE. A secure way to
+ achieve this is to never use this macro directly, but use it to define
+ other macros, which would then be TABLE-specific.
+
+ The first time through, COUNT is usually zero. Note that COUNT is not
+ updated by this macro, but it should be update elsewhere, later. This
+ is convenient, because it allows TABLE[COUNT] to refer to the new
+ element at the end. Once its construction is completed, COUNT++ will
+ record it in the table. Calling this macro several times in a row
+ without updating COUNT is a bad thing to do. */
+
+#define BUMP_ALLOC(Table, Count, Exponent, Type) \
+ BUMP_ALLOC_WITH_SIZE ((Table), (Count), (Exponent), Type, sizeof (Type))
+
+/* In cases `sizeof TYPE' would not always yield the correct value for
+ the size of each element entry, this macro accepts a supplementary
+ SIZE argument. The EXPONENT, TYPE and SIZE parameters should still
+ have the same value for all macro calls related to a specific TABLE. */
+
+#define BUMP_ALLOC_WITH_SIZE(Table, Count, Exponent, Type, Size) \
+ if (((Count) & (~(~0 << (Exponent)))) == 0) \
+ if ((Count) == 0) \
+ (Table) = (Type *) xmalloc ((1 << (Exponent)) * (Size)); \
+ else \
+ (Table) = (Type *) \
+ xrealloc ((Table), ((Count) + (1 << (Exponent))) * (Size)); \
+ else
diff --git a/gnu/usr.bin/ptx/check-out b/gnu/usr.bin/ptx/check-out
new file mode 100644
index 0000000..4d13c48
--- /dev/null
+++ b/gnu/usr.bin/ptx/check-out
@@ -0,0 +1,65 @@
+:30: /ranslate to certain respons ibilities for you if you distr/
+:183: c/ These actions are proh ibited by law if you do not ac
+:278: AS BEEN ADVISED OF THE POSS IBILITY OF SUCH DAMAGES. /Y H
+:232: /his License may add an expl icit geographical distribution/
+:267: /COST OF ALL NECESSARY SERV ICING, REPAIR OR CORRECTION.
+:216: /ht claims or to contest val idity of any such claims; this/
+:45: e/ If the software is mod ified by someone else and pass
+:57: pying, distribution and mod ification follow. /for co
+:60: /PYING, DISTRIBUTION AND MOD IFICATION 0. This License a/
+:68: /either verbatim or with mod ifications and/or translated i/
+:70: limitation in the term "mod ification".) /ithout
+:72: /pying, distribution and mod ification are not covered by t/
+:92: /opy and distribute such mod ifications or work under the t/
+:95: /a) You must cause the mod ified files to carry prominent/
+:103: ommands in/ c) If the mod ified program normally reads c
+:114: quirements apply to the mod ified work as a whole. /se re
+:115: are not derived/ If ident ifiable sections of that work
+:156: of the work for making mod ifications to it. /ed form
+:243: Lice/ If the Program spec ifies a version number of this
+:46: /hat they have is not the or iginal, so that any problems i/
+:47: /will not reflect on the or iginal authors' reputations.
+:191: /eives a license from the or iginal licensor to copy, distr/
+:231: /yrighted interfaces, the or iginal copyright holder who pl/
+:265: /ED WARRANTIES OF MERCHANTAB ILITY AND FITNESS FOR A PARTIC/
+:274: /NG OUT OF THE USE OR INAB ILITY TO USE THE PROGRAM (INCL/
+:303: /warranty of MERCHANTAB ILITY or FITNESS FOR A PARTICU/
+:69: /ation is included without l imitation in the term "modific/
+:198: /for any other reason (not l imited to patent issues), cond/
+:232: /geographical distribution l imitation excluding those coun/
+:235: /License incorporates the l imitation as if written in the/
+:239: Such new versions will be s imilar in spirit to the presen/
+:264: /PLIED, INCLUDING, BUT NOT L IMITED TO, THE IMPLIED WARRANT/
+:274: /ROGRAM (INCLUDING BUT NOT L IMITED TO LOSS OF DATA OR DATA/
+:67: /hat is to say, a work conta ining the Program or a portion/
+:158: /ny associated interface def inition files, plus the script/
+:34: /fee, you must give the rec ipients all the rights that yo/
+:46: /passed on, we want its rec ipients to know that what they/
+:84: /nty; and give any other rec ipients of the Program a copy/
+:190: /ed on the Program), the rec ipient automatically receives/
+:193: /her restrictions on the rec ipients' exercise of the right/
+:239: /sions will be similar in sp irit to the present version, b/
+:254: o goals of prese/ Our dec ision will be guided by the tw
+:273: /OR CONSEQUENTIAL DAMAGES AR ISING OUT OF THE USE OR INAB/
+:315: /teractive mode: Gnomov ision version 69, Copyright (C/
+:316: /y name of author Gnomov ision comes with ABSOLUTELY NO/
+:330: /st in the program `Gnomov ision' (which makes passes at/
+:30: /late to certain responsibil ities for you if you distribut/
+:56: The precise terms and cond itions for copying, distributi/
+:60: /C LICENSE TERMS AND COND ITIONS FOR COPYING, DISTRIBUTI/
+:93: /also meet all of these cond itions: a) You must cause/
+:109: /rogram under these cond itions, and telling the user h/
+:129: ther work not bas/ In add ition, mere aggregation of ano
+:186: /and all its terms and cond itions for copying, distributi/
+:192: ect to these terms and cond itions. /am subj
+:199: /ted to patent issues), cond itions are imposed on you (whe/
+:200: /e) that contradict the cond itions of this License, they d/
+:201: ot excuse you from the cond itions of this License. /do n
+:244: /ollowing the terms and cond itions either of that version/
+:251: /ams whose distribution cond itions are different, write to/
+:262: /WHEN OTHERWISE STATED IN WR ITING THE COPYRIGHT HOLDERS AN/
+:270: /ABLE LAW OR AGREED TO IN WR ITING WILL ANY COPYRIGHT HOLDE/
+:280: ly/ END OF TERMS AND COND ITIONS Appendix: How to App
+:318: /e it under certain cond itions; type `show c' for deta/
+:52: /of a free program will ind ividually obtain patent licens/
+:72: stribution and mod/ Act ivities other than copying, di
diff --git a/gnu/usr.bin/ptx/config.h b/gnu/usr.bin/ptx/config.h
new file mode 100644
index 0000000..93e7ed1
--- /dev/null
+++ b/gnu/usr.bin/ptx/config.h
@@ -0,0 +1,57 @@
+/* config.h. Generated automatically by configure. */
+/* config.h.in. Generated automatically from configure.in by autoheader. */
+
+/* Define if using alloca.c. */
+/* #undef C_ALLOCA */
+
+/* Define if type char is unsigned and you are not using gcc. */
+/* #undef __CHAR_UNSIGNED__ */
+
+/* Define to empty if the keyword does not work. */
+/* #undef const */
+
+/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems.
+ This function is required for alloca.c support on those systems. */
+/* #undef CRAY_STACKSEG_END */
+
+/* Define if you have alloca.h and it should be used (not Ultrix). */
+/* #undef HAVE_ALLOCA_H */
+
+/* Define if you don't have vprintf but do have _doprnt. */
+/* #undef HAVE_DOPRNT */
+
+/* Define if you have the vprintf function. */
+#define HAVE_VPRINTF 1
+
+/* Define if you need to in order for stat and other things to work. */
+/* #undef _POSIX_SOURCE */
+
+/* If using the C implementation of alloca, define if you know the
+ direction of stack growth for your system; otherwise it will be
+ automatically deduced at run-time.
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
+ */
+/* #undef STACK_DIRECTION */
+
+/* Define if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* In regex, request the capability of modifying the letter syntax. */
+#define SYNTAX_TABLE 1
+
+/* In regex, use 8 bits per character. */
+#define CHAR_SET_SIZE 256
+
+/* Define if you have mcheck. */
+/* #undef HAVE_MCHECK */
+
+/* Define if you have setchrclass. */
+/* #undef HAVE_SETCHRCLASS */
+
+/* Define if you have strerror. */
+#define HAVE_STRERROR 1
+
+/* Define if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
diff --git a/gnu/usr.bin/ptx/diacrit.c b/gnu/usr.bin/ptx/diacrit.c
new file mode 100644
index 0000000..29e319b
--- /dev/null
+++ b/gnu/usr.bin/ptx/diacrit.c
@@ -0,0 +1,148 @@
+/* Diacritics processing for a few character codes.
+ Copyright (C) 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+ Francois Pinard <pinard@iro.umontreal.ca>, 1988.
+
+ All this file is a temporary hack, waiting for locales in GNU.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "diacrit.h"
+
+/* ISO 8859-1 Latin-1 code is used as the underlying character set. If
+ MSDOS is defined, IBM-PC's character set code is used instead. */
+
+/*--------------------------------------------------------------------.
+| For each alphabetic character, returns what it would be without its |
+| possible diacritic symbol. |
+`--------------------------------------------------------------------*/
+
+const char diacrit_base[256] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+ 'X', 'Y', 'Z', 0, 0, 0, 0, 0,
+ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+ 'x', 'y', 'z', 0, 0, 0, 0, 0,
+
+#ifdef MSDOS
+
+ 'C', 'u', 'e', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'i', 'i', 'i', 'A', 'A',
+ 'E', 'e', 'E', 'o', 'o', 'o', 'u', 'u',
+ 'y', 'O', 'U', 0, 0, 0, 0, 0,
+ 'a', 'i', 'o', 'u', 'n', 'N', 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+#else /* not MSDOS */
+
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C',
+ 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I',
+ 0, 'N', 'O', 'O', 'O', 'O', 'O', 0,
+ 'O', 'U', 'U', 'U', 'U', 'Y', 0, 0,
+ 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
+ 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
+ 0, 'n', 'o', 'o', 'o', 'o', 'o', 0,
+ 'o', 'u', 'u', 'u', 'u', 'y', 0, 'y',
+
+#endif /* not MSDOS */
+};
+
+/*------------------------------------------------------------------------.
+| For each alphabetic character, returns a code of what its diacritic is, |
+| according to the following codes: 1 (eE) over aA for latin diphtongs; 2 |
+| (') acute accent; 3 (`) grave accent; 4 (^) circumflex accent; 5 (") |
+| umlaut or diaraesis; 6 (~) tilda; 7 (,) cedilla; 8 (o) covering degree |
+| symbol; 9 (|) slashed character. |
+`------------------------------------------------------------------------*/
+
+const char diacrit_diac[256] =
+{
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 4, 0,
+ 3, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 6, 0,
+
+#ifdef MSDOS
+
+ 7, 5, 2, 4, 5, 3, 8, 7,
+ 4, 5, 3, 5, 4, 3, 5, 8,
+ 2, 1, 1, 4, 5, 3, 4, 3,
+ 5, 5, 5, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 6, 6, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+#else /* not MSDOS */
+
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 3, 2, 4, 6, 5, 8, 1, 7,
+ 3, 2, 4, 5, 3, 2, 4, 5,
+ 0, 6, 3, 2, 4, 6, 5, 0,
+ 9, 3, 2, 4, 5, 2, 0, 0,
+ 3, 2, 4, 6, 5, 8, 1, 7,
+ 3, 2, 4, 5, 3, 2, 4, 5,
+ 0, 6, 3, 2, 4, 6, 5, 0,
+ 9, 3, 2, 4, 5, 2, 0, 0,
+
+#endif /* not MSDOS */
+};
diff --git a/gnu/usr.bin/ptx/diacrit.h b/gnu/usr.bin/ptx/diacrit.h
new file mode 100644
index 0000000..c880a45
--- /dev/null
+++ b/gnu/usr.bin/ptx/diacrit.h
@@ -0,0 +1,16 @@
+/* Diacritics processing for a few character codes.
+ Copyright (C) 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+ Francois Pinard <pinard@iro.umontreal.ca>, 1988.
+
+ All this file is a temporary hack, waiting for locales in GNU.
+*/
+
+extern const char diacrit_base[]; /* characters without diacritics */
+extern const char diacrit_diac[]; /* diacritic code for each character */
+
+/* Returns CHR without its diacritic. CHR is known to be alphabetic. */
+#define tobase(chr) (diacrit_base[(unsigned char) (chr)])
+
+/* Returns a diacritic code for CHR. CHR is known to be alphabetic. */
+#define todiac(chr) (diacrit_diac[(unsigned char) (chr)])
+
diff --git a/gnu/usr.bin/ptx/error.c b/gnu/usr.bin/ptx/error.c
new file mode 100644
index 0000000..41d66fb
--- /dev/null
+++ b/gnu/usr.bin/ptx/error.c
@@ -0,0 +1,117 @@
+/* error.c -- error handler for noninteractive utilities
+ Copyright (C) 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* Written by David MacKenzie. */
+
+#ifdef HAVE_CONFIG_H
+#if defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+#include <stdio.h>
+
+#ifdef HAVE_VPRINTF
+
+#if __STDC__
+#include <stdarg.h>
+#define VA_START(args, lastarg) va_start(args, lastarg)
+#else /* !__STDC__ */
+#include <varargs.h>
+#define VA_START(args, lastarg) va_start(args)
+#endif /* !__STDC__ */
+
+#else /* !HAVE_VPRINTF */
+
+#ifdef HAVE_DOPRNT
+#define va_alist args
+#define va_dcl int args;
+#else /* !HAVE_DOPRNT */
+#define va_alist a1, a2, a3, a4, a5, a6, a7, a8
+#define va_dcl char *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
+#endif /* !HAVE_DOPRNT */
+
+#endif /* !HAVE_VPRINTF */
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#include <string.h>
+#else /* !STDC_HEADERS */
+void exit ();
+#endif /* !STDC_HEADERS */
+
+extern char *program_name;
+
+#ifndef HAVE_STRERROR
+static char *
+private_strerror (errnum)
+ int errnum;
+{
+ extern char *sys_errlist[];
+ extern int sys_nerr;
+
+ if (errnum > 0 && errnum <= sys_nerr)
+ return sys_errlist[errnum];
+ return "Unknown system error";
+}
+#define strerror private_strerror
+#endif /* !HAVE_STRERROR */
+
+/* Print the program name and error message MESSAGE, which is a printf-style
+ format string with optional args.
+ If ERRNUM is nonzero, print its corresponding system error message.
+ Exit with status STATUS if it is nonzero. */
+/* VARARGS */
+void
+#if defined (HAVE_VPRINTF) && __STDC__
+error (int status, int errnum, char *message, ...)
+#else /* !HAVE_VPRINTF or !__STDC__ */
+error (status, errnum, message, va_alist)
+ int status;
+ int errnum;
+ char *message;
+ va_dcl
+#endif /* !HAVE_VPRINTF or !__STDC__ */
+{
+#ifdef HAVE_VPRINTF
+ va_list args;
+#endif /* HAVE_VPRINTF */
+
+ fprintf (stderr, "%s: ", program_name);
+#ifdef HAVE_VPRINTF
+ VA_START (args, message);
+ vfprintf (stderr, message, args);
+ va_end (args);
+#else /* !HAVE_VPRINTF */
+#ifdef HAVE_DOPRNT
+ _doprnt (message, &args, stderr);
+#else /* !HAVE_DOPRNT */
+ fprintf (stderr, message, a1, a2, a3, a4, a5, a6, a7, a8);
+#endif /* !HAVE_DOPRNT */
+#endif /* !HAVE_VPRINTF */
+ if (errnum)
+ fprintf (stderr, ": %s", strerror (errnum));
+ putc ('\n', stderr);
+ fflush (stderr);
+ if (status)
+ exit (status);
+}
diff --git a/gnu/usr.bin/ptx/examples/README b/gnu/usr.bin/ptx/examples/README
new file mode 100644
index 0000000..038034f
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/README
@@ -0,0 +1,21 @@
+Various examples of GNU ptx usages.
+Francois Pinard <pinard@iro.umontreal.ca>, 1993.
+
+This directory contains a few examples contributed by GNU ptx users.
+Feel free to look at them for tricks or ideas. When an example
+requires many files, a subdirectory is used to hold them together.
+I have not necessarily tested these examples recently, if at all.
+
+If you have examples you would like to share, please submit them to
+me. You may also submit corrections to the examples given in this
+directory, however, please write to the authors first, since they most
+probably will like to have their say about their own contribution.
+
+* include.pl: A Perl script studying system include files.
+
+* luke/: A shell script permuting indices for man pages. It contains
+two examples of an .xx definition for *roff, one simple, one complex.
+
+* latex/: A simple example of \xx definition for latex.
+
+* ajay/: A more complex application of latex with ptx.
diff --git a/gnu/usr.bin/ptx/examples/ajay/Makefile b/gnu/usr.bin/ptx/examples/ajay/Makefile
new file mode 100644
index 0000000..bff099c
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ajay/Makefile
@@ -0,0 +1,28 @@
+JUNKFILES = tip-index.ps tip-index.dvi tip-index.tex tip-index.log \
+ tip-index.aux
+
+tip-index.ps : tip-index.dvi
+ dvips tip-index.dvi
+
+tip-index.dvi : tip-index.tex
+ latex tip-index.tex
+
+tip-index.tex : tip.texified header.tex footer.tex
+ cat header.tex tip.texified footer.tex > tip-index.tex
+
+tip.texified : tip.eign tip.forgptx Makefile
+ gptx -f -r -i ./tip.eign -T < tip.forgptx | x.pl > tip.texified
+
+tip.eign : /usr/lib/eign exclude-words
+ cat /usr/lib/eign exclude-words > tip.eign
+
+screenlist : tip.texified
+ cat tip.texified \
+ | gawk -F\{ '{count[$$4]++} \
+ END {for (s in count) printf("%d %20s\n", count[s], s)}' \
+ | tr -d '}' \
+ | sort -n > screenlist
+ @echo "Check (say) the last 100 lines of ./screenlist".
+
+clean :
+ rm -f tip.eign tip.texified $(JUNKFILES) screenlist
diff --git a/gnu/usr.bin/ptx/examples/ajay/README b/gnu/usr.bin/ptx/examples/ajay/README
new file mode 100644
index 0000000..7b55ca2
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ajay/README
@@ -0,0 +1,41 @@
+To: pinard@iro.umontreal.ca
+Subject: Re: Gptx suggestions and help request
+Date: Tue, 28 Sep 93 11:30:04 +0500
+From: ajayshah@cmie.ernet.in
+
+[...] My plaintext input looks like: "pagenum multiword-phrase" where
+the multiword phrase is atmost five words. So [...], I'm doing two
+columns in small type.
+
+I got one of the programmers here to write me a tex macro for my
+problem. When it goes into production I'll mail you a few files: a
+sample input, the gptx command, the output, and the tex macro. If you
+find these interesting you can ship them with future gptx releases.
+
+Thanks a lot for gptx. If you have a mailing list of loyal users,
+you can add us to it :-)
+
+
+To: pinard@iro.umontreal.ca
+Cc: rk@cmie.ernet.in
+Subject: All glue code I used with gptx
+Date: Tue, 05 Oct 93 15:23:44 +0500
+From: ajayshah@zigma.cmie.ernet.in
+
+That is a full set of a files for an example of "production use". You
+are welcome to post them, or use them as a sample supplied with the
+gptx distribution, etc., with absolutely no restrictions on what
+anyone does with this. In case you do so, please acknowledge the
+contribution of Rakesh Chauhan, rk@cmie.ernet.in, who is the author of
+x.pl and header.tex. [...]
+
+As you can tell, I used it for a 100% realworld problem, and it
+worked. Thanks a million. If you'd like, I can send you a hardcopy
+of the full finished document (just send me your mailing address). If
+you would like to mention the name of this document when you use
+these files as a demo, it is
+
+ Trends in Industrial Production
+ September 1993
+ Centre for Monitoring Indian Economy, Bombay, India.
+
diff --git a/gnu/usr.bin/ptx/examples/ajay/footer.tex b/gnu/usr.bin/ptx/examples/ajay/footer.tex
new file mode 100644
index 0000000..6b47932
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ajay/footer.tex
@@ -0,0 +1 @@
+\end{document}
diff --git a/gnu/usr.bin/ptx/examples/ajay/header.tex b/gnu/usr.bin/ptx/examples/ajay/header.tex
new file mode 100644
index 0000000..04a9c64
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ajay/header.tex
@@ -0,0 +1,21 @@
+\documentstyle [twocolumn,a4]{article}
+
+\pagestyle{empty}
+
+\textwidth 6.8in
+\oddsidemargin -.8in
+\evensidemargin -.8in
+\textheight 10in
+\topmargin -1in
+% \columnseprule 1pt
+
+\begin{document}
+
+\def\xx #1#2#3#4#5#6{\hbox to \hsize{%
+\hbox to 1.4in{\hfill #2}\hskip .05in%
+\hbox to .8in{\it #3\hfil}\hskip .05in%
+\hbox to 1.4in{#4\hfil}\hskip .05in%
+\hbox{\hfil #6}\hfil}%
+}
+
+\scriptsize
diff --git a/gnu/usr.bin/ptx/examples/ajay/tip.forgptx b/gnu/usr.bin/ptx/examples/ajay/tip.forgptx
new file mode 100644
index 0000000..ecf6e0e
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ajay/tip.forgptx
@@ -0,0 +1,10 @@
+1 Zinc concentrate
+1 Coal
+1 Ball clay
+1 Non-coking coal
+1 Calcareous sand
+1 Natural Gas
+1 Chalk
+1 Bauxite
+1 Clay (others)
+1 Copper ore
diff --git a/gnu/usr.bin/ptx/examples/ajay/x.pl b/gnu/usr.bin/ptx/examples/ajay/x.pl
new file mode 100644
index 0000000..e0615ba
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ajay/x.pl
@@ -0,0 +1,22 @@
+#! /usr/local/bin/perl
+
+while ($l = <>)
+{
+chop $l;
+
+$l =~ s/\\xx //;
+$l =~ s/}{/|/g;
+$l =~ s/{//g;
+$l =~ s/}//g;
+@x = split(/\|/, $l);
+
+printf ("\\xx ");
+for ($i = 0; $i <= $#x; $i++)
+ {
+ $v = substr($x[$i], 0, 17);
+ $v =~ s/\\$//;
+ printf("{%s}", $v);
+ }
+printf ("\n");
+
+}
diff --git a/gnu/usr.bin/ptx/examples/ignore/README b/gnu/usr.bin/ptx/examples/ignore/README
new file mode 100644
index 0000000..33ee19e
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ignore/README
@@ -0,0 +1,65 @@
+From beebe@math.utah.edu Wed Oct 27 19:37:22 1993
+Date: Tue, 26 Oct 93 15:43:19 MDT
+From: "Nelson H. F. Beebe" <beebe@math.utah.edu>
+To: pinard@iro.umontreal.ca
+Subject: Re: Another short comment on gptx 0.2
+
+/usr/lib/eign: DECstation 5000, ULTRIX 4.3
+ HP 9000/735, HP-UX 9.0
+ IBM RS/6000, AIX 2.3
+ IBM 3090, AIX MP370 2.1
+ Stardent 1520, OS 2.2
+ Sun SPARCstation, SunOS 4.x
+
+No eign anywhere on: HP 375, BSD 4.3 (ptx.c is in /usr/src/usr.bin,
+ and the source code refers to /usr/lib/eign,
+ but I could not find it in the source tree)
+ NeXT, Mach 3.0 (though documented in man pages)
+ Sun SPARCstation, Solaris 2.x
+ SGI Indigo, IRIX 4.0.x
+
+The contents of the eign files that I found on the above machines were
+almost identical. With the exception of the Stardent and the IBM
+3090, there were only two such files, one with 150 words, and the
+other with 133, with only a few differences between them (some words
+in the 133-word file were not in the 150-word file). I found the
+133-word variant in groff-1.06/src/indxbib. I used archie to search
+for eign, and it found 7 sites, all with the groff versions.
+
+The Stardent and IBM 3090 eign files have the same contents as the
+150-word version, but have a multiline copyright comment at the
+beginning. None of the others contains a copyright.
+
+I recently had occasion to build a similar list of words for bibindex,
+which indexes a BibTeX .bib file, and for which omission of common
+words, like articles and prepositions, helps to reduce the size of the
+index. I didn't use eign to build that list, but instead, went
+through the word lists from 3.8MB of .bib files in the tuglib
+collection on ftp.math.utah.edu:pub/tex/bib, and collected words to be
+ignored. That list includes words from several languages. I'll leave
+it up to you to decide whether you wish to merge them or not; I
+suspect it may be a better design choice to keep a separate eign file
+for each language, although in my own application of ptx-ing
+bibliographies, the titles do occur in multiple languages, so a
+mixed-language eign is appropriate. Since there are standard ISO
+2-letter abbreviations for every country, perhaps one could have
+eign.xy for country xy (of course, only approximately is country ==
+language). The exact list of words in eign is not so critical; its
+only purpose is to reduce the size of the output by not indexing words
+that occur very frequently and have little content in themselves.
+
+I'm enclosing a shar bundle at the end of this message with the merger
+of the multiple eign versions (duplicates eliminated, and the list
+sorted into 179 unique words), followed by the bibindex list.
+
+
+
+========================================================================
+Nelson H. F. Beebe Tel: +1 801 581 5254
+Center for Scientific Computing FAX: +1 801 581 4148
+Department of Mathematics, 105 JWB Internet: beebe@math.utah.edu
+University of Utah
+Salt Lake City, UT 84112, USA
+========================================================================
+
+
diff --git a/gnu/usr.bin/ptx/examples/ignore/bix b/gnu/usr.bin/ptx/examples/ignore/bix
new file mode 100644
index 0000000..b9a8ba6
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ignore/bix
@@ -0,0 +1,109 @@
+ab
+aber
+als
+an
+and
+are
+as
+auf
+aus
+az
+bei
+bir
+but
+da
+das
+dat
+de
+dei
+dem
+den
+der
+des
+det
+di
+die
+dos
+een
+eene
+egy
+ei
+ein
+eine
+einen
+einer
+eines
+eit
+el
+en
+er
+es
+et
+ett
+eyn
+eyne
+for
+from
+fuer
+fur
+gl
+gli
+ha
+haben
+had
+hai
+has
+hat
+have
+he
+heis
+hen
+hena
+henas
+het
+hin
+hinar
+hinir
+hinn
+hith
+ho
+hoi
+il
+in
+ist
+ka
+ke
+la
+las
+le
+les
+lo
+los
+mia
+mit
+na
+nji
+not
+oder
+of
+on
+or
+os
+others
+sie
+sind
+so
+ta
+the
+to
+um
+uma
+un
+una
+und
+une
+uno
+unter
+von
+with
+yr
diff --git a/gnu/usr.bin/ptx/examples/ignore/eign b/gnu/usr.bin/ptx/examples/ignore/eign
new file mode 100644
index 0000000..0401245
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/ignore/eign
@@ -0,0 +1,163 @@
+a
+about
+after
+against
+all
+also
+an
+and
+another
+any
+are
+as
+at
+back
+be
+because
+been
+before
+being
+between
+both
+but
+by
+came
+can
+come
+could
+current
+day
+did
+do
+down
+each
+end
+even
+first
+for
+from
+get
+go
+good
+great
+had
+has
+have
+he
+her
+here
+him
+his
+how
+i
+if
+in
+into
+is
+it
+its
+just
+know
+last
+life
+like
+little
+long
+made
+make
+man
+many
+may
+me
+men
+might
+more
+most
+mr
+much
+must
+my
+name
+never
+new
+no
+not
+now
+of
+off
+old
+on
+one
+only
+or
+other
+our
+out
+over
+own
+part
+people
+point
+right
+said
+same
+say
+see
+she
+should
+since
+so
+some
+start
+state
+still
+such
+take
+than
+that
+the
+their
+them
+then
+there
+these
+they
+this
+those
+three
+through
+time
+to
+too
+true
+try
+two
+under
+up
+us
+use
+used
+value
+very
+was
+way
+we
+well
+were
+what
+when
+where
+which
+while
+who
+why
+will
+with
+without
+work
+world
+would
+year
+years
+you
+your
diff --git a/gnu/usr.bin/ptx/examples/include.pl b/gnu/usr.bin/ptx/examples/include.pl
new file mode 100755
index 0000000..cb3c0ff
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/include.pl
@@ -0,0 +1,79 @@
+#!/usr/bin/perl -- # -*-Perl-*-
+eval "exec /usr/bin/perl -S $0 $*"
+ if $running_under_some_shell;
+
+# Construct a permuted index for all system include files.
+# Copyright (C) 1991 Free Software Foundation, Inc.
+# Francois Pinard <pinard@iro.umontreal.ca>, June 1991.
+
+# NOTE: about removing asm statements?
+# NOTE: about removing strings?
+# NOTE: about ignoring 0xHEXDIGITS, unchar/ushort/etc.
+
+# Construct a sorted list of system include files.
+
+opendir (DIR, "/usr/include");
+@includes = sort grep (-f "/usr/include/$_", readdir (DIR));
+opendir (DIR, "/usr/include/sys");
+foreach (sort grep (-f "/usr/include/sys/$_", readdir (DIR))) {
+ push (@includes, "sys/$_");
+}
+closedir (DIR);
+
+# Launch the permuted indexer, with a list of ignore words.
+
+$ignore = "/tmp/incptx.$$";
+open (IGNORE, "> $ignore");
+print IGNORE join ("\n", split (' ', <<IGNORE)), "\n";
+asm at at386 break bss case ch char continue copyright corporation
+default define defined do double dst else endif enum extern file flag
+float for goto i286 i386 ident if ifdef ifndef int interactive len
+lint long m32 mpat num pdp11 printf ptr register return sco5 short siz
+sizeof src static str struct sun switch sys systems type typedef u370
+u3b u3b15 u3b2 u3b5 undef union unsigned vax void while win
+IGNORE
+close IGNORE;
+exit 0;
+
+open (OUTPUT, "| ptx -r -f -W '[a-zA-Z_][a-zA-Z_0-9]+' -F ... -i $ignore")
+ || die "ptx did not start\n";
+select (OUTPUT);
+
+# Reformat all files, removing C comments and adding a reference field.
+
+foreach $include (@includes)
+{
+ warn "Reading /usr/include/$include\n";
+ open (INPUT, "/usr/include/$include");
+ while (<INPUT>)
+ {
+
+ # Get rid of comments.
+
+ $comment = $next_comment;
+ if ($comment)
+ {
+ $next_comment = !s,^.*\*/,,;
+ }
+ else
+ {
+ s,/\*.*\*/,,g;
+ $next_comment = s,/\*.*,,;
+ }
+ next if $comment && $next_comment;
+
+ # Remove extraneous white space.
+
+ s/[ \t]+/ /g;
+ s/ $//;
+ next if /^$/;
+
+ # Print the line with its reference.
+
+ print "$include($.): ", $_;
+ }
+}
+
+warn "All read, now ptx' game!\n";
+close OUTPUT || die "ptx failed...\n";
+unlink $ignore;
diff --git a/gnu/usr.bin/ptx/examples/latex/Makefile b/gnu/usr.bin/ptx/examples/latex/Makefile
new file mode 100644
index 0000000..5f930b2
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/latex/Makefile
@@ -0,0 +1,15 @@
+# Example of using ptx with latex.
+# Copyright (C) 1993 Free Software Foundation, Inc.
+# Francois Pinard <pinard@iro.umontreal.ca>, 1993.
+
+PTX = ../ptx
+PTX_OPTIONS = -AfTWi.i
+
+try: latex.dvi
+ xdvi latex
+
+latex.dvi: latex.tex table.tex
+ latex latex
+
+table.tex: Makefile ../COPYING
+ $(PTX) $(PTX_OPTIONS) ../COPYING | sed 's/ //' > table.tex
diff --git a/gnu/usr.bin/ptx/examples/latex/README b/gnu/usr.bin/ptx/examples/latex/README
new file mode 100644
index 0000000..fc5098a
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/latex/README
@@ -0,0 +1,10 @@
+Date: Sun, 26 Sep 93 19:07:10 EDT
+From: Francois Pinard <pinard@iro.umontreal.ca>
+To: ajayshah@cmie.ernet.in
+Subject: Re: Gptx suggestions and help request
+
+ In fact, if you could send me such a macro right now I would be
+ thrilled :-)
+
+Ok, I worked out this example for you. Even if a little rude, you can
+still start from it for your own need. [...]
diff --git a/gnu/usr.bin/ptx/examples/latex/latex.tex b/gnu/usr.bin/ptx/examples/latex/latex.tex
new file mode 100644
index 0000000..1f0a2f1
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/latex/latex.tex
@@ -0,0 +1,11 @@
+\documentstyle[11pt]{article}
+\begin{document}
+
+\def\xx#1#2#3#4#5#6{\hbox{
+ \hbox to2.5in{\hfil#5#2}
+ \hbox to3.0in{{\sl #3}\,#4#1\hfil}
+ \hbox to1.5in{\tiny#6\hfil}
+}}
+\input table
+
+\end{document}
diff --git a/gnu/usr.bin/ptx/examples/latex/table.tex b/gnu/usr.bin/ptx/examples/latex/table.tex
new file mode 100644
index 0000000..b68ea38
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/latex/table.tex
@@ -0,0 +1,65 @@
+\xx {}{ate to certain respons}{ibi}{lities for you if you}{}{../COPYING:30}
+\xx {}{These actions are proh}{ibi}{ted by law if you do n}{}{../COPYING:183}
+\xx {}{EN ADVISED OF THE POSS}{IBI}{LITY OF SUCH DAMAGES.}{}{../COPYING:278}
+\xx {}{icense may add an expl}{ici}{t geographical distrib}{}{../COPYING:232}
+\xx {}{OF ALL NECESSARY SERV}{ICI}{NG, REPAIR OR CORRECTI}{}{../COPYING:267}
+\xx {}{aims or to contest val}{idi}{ty of any such claims;}{}{../COPYING:216}
+\xx {}{If the software is mod}{ifi}{ed by someone else and}{}{../COPYING:45}
+\xx {}{, distribution and mod}{ifi}{cation follow.}{pying}{../COPYING:57}
+\xx {}{, DISTRIBUTION AND MOD}{IFI}{CATION 0. This Lice}{}{../COPYING:60}
+\xx {}{r verbatim or with mod}{ifi}{cations and/or transla}{}{../COPYING:68}
+\xx {}{ation in the term "mod}{ifi}{cation".)}{t limit}{../COPYING:70}
+\xx {}{, distribution and mod}{ifi}{cation are not covered}{}{../COPYING:72}
+\xx {}{nd distribute such mod}{ifi}{cations or work under}{}{../COPYING:92}
+\xx {}{You must cause the mod}{ifi}{ed files to carry prom}{}{../COPYING:95}
+\xx {ads c}{c) If the mod}{ifi}{ed program normally re}{}{../COPYING:103}
+\xx {}{ments apply to the mod}{ifi}{ed work as a whole.}{}{../COPYING:114}
+\xx {work are n}{If ident}{ifi}{able sections of that}{}{../COPYING:115}
+\xx {}{he work for making mod}{ifi}{cations to it.}{of t}{../COPYING:156}
+\xx {}{If the Program spec}{ifi}{es a version number of}{}{../COPYING:243}
+\xx {}{hey have is not the or}{igi}{nal, so that any probl}{}{../COPYING:46}
+\xx {}{not reflect on the or}{igi}{nal authors' reputatio}{}{../COPYING:47}
+\xx {}{a license from the or}{igi}{nal licensor to copy,}{}{../COPYING:191}
+\xx {}{ted interfaces, the or}{igi}{nal copyright holder w}{}{../COPYING:231}
+\xx {}{RRANTIES OF MERCHANTAB}{ILI}{TY AND FITNESS FOR A P}{}{../COPYING:265}
+\xx {}{OUT OF THE USE OR INAB}{ILI}{TY TO USE THE PROGRAM}{}{../COPYING:274}
+\xx {}{anty of MERCHANTAB}{ILI}{TY or FITNESS FOR A PA}{}{../COPYING:303}
+\xx {}{is included without l}{imi}{tation in the term "mo}{}{../COPYING:69}
+\xx {}{ny other reason (not l}{imi}{ted to patent issues),}{}{../COPYING:198}
+\xx {}{aphical distribution l}{imi}{tation excluding those}{}{../COPYING:232}
+\xx {}{nse incorporates the l}{imi}{tation as if written i}{}{../COPYING:235}
+\xx {}{new versions will be s}{imi}{lar in spirit to the p}{}{../COPYING:239}
+\xx {}{, INCLUDING, BUT NOT L}{IMI}{TED TO, THE IMPLIED WA}{}{../COPYING:264}
+\xx {}{M (INCLUDING BUT NOT L}{IMI}{TED TO LOSS OF DATA OR}{}{../COPYING:274}
+\xx {}{s to say, a work conta}{ini}{ng the Program or a po}{}{../COPYING:67}
+\xx {}{sociated interface def}{ini}{tion files, plus the s}{}{../COPYING:158}
+\xx {}{you must give the rec}{ipi}{ents all the rights th}{}{../COPYING:34}
+\xx {}{ed on, we want its rec}{ipi}{ents to know that what}{}{../COPYING:46}
+\xx {}{and give any other rec}{ipi}{ents of the Program a}{}{../COPYING:84}
+\xx {}{the Program), the rec}{ipi}{ent automatically rece}{}{../COPYING:190}
+\xx {}{estrictions on the rec}{ipi}{ents' exercise of the}{}{../COPYING:193}
+\xx {}{will be similar in sp}{iri}{t to the present versi}{}{../COPYING:239}
+\xx {he two goal}{Our dec}{isi}{on will be guided by t}{}{../COPYING:254}
+\xx {}{NSEQUENTIAL DAMAGES AR}{ISI}{NG OUT OF THE USE OR I}{}{../COPYING:273}
+\xx {}{tive mode: Gnomov}{isi}{on version 69, Copyrig}{}{../COPYING:315}
+\xx {}{e of author Gnomov}{isi}{on comes with ABSOLUTE}{}{../COPYING:316}
+\xx {}{the program `Gnomov}{isi}{on' (which makes passe}{}{../COPYING:330}
+\xx {}{to certain responsibil}{iti}{es for you if you dist}{}{../COPYING:30}
+\xx {}{precise terms and cond}{iti}{ons for copying, distr}{}{../COPYING:56}
+\xx {}{ENSE TERMS AND COND}{ITI}{ONS FOR COPYING, DISTR}{}{../COPYING:60}
+\xx {}{meet all of these cond}{iti}{ons: a) You must}{}{../COPYING:93}
+\xx {}{m under these cond}{iti}{ons, and telling the u}{}{../COPYING:109}
+\xx {f another wo}{In add}{iti}{on, mere aggregation o}{}{../COPYING:129}
+\xx {}{all its terms and cond}{iti}{ons for copying, distr}{}{../COPYING:186}
+\xx {}{o these terms and cond}{iti}{ons.}{bject t}{../COPYING:192}
+\xx {}{o patent issues), cond}{iti}{ons are imposed on you}{}{../COPYING:199}
+\xx {}{at contradict the cond}{iti}{ons of this License, t}{}{../COPYING:200}
+\xx {}{cuse you from the cond}{iti}{ons of this License.}{}{../COPYING:201}
+\xx {}{ing the terms and cond}{iti}{ons either of that ver}{}{../COPYING:244}
+\xx {}{hose distribution cond}{iti}{ons are different, wri}{}{../COPYING:251}
+\xx {}{OTHERWISE STATED IN WR}{ITI}{NG THE COPYRIGHT HOLDE}{}{../COPYING:262}
+\xx {}{LAW OR AGREED TO IN WR}{ITI}{NG WILL ANY COPYRIGHT}{}{../COPYING:270}
+\xx {}{END OF TERMS AND COND}{ITI}{ONS Appendix: How t}{}{../COPYING:280}
+\xx {}{under certain cond}{iti}{ons; type `show c' for}{}{../COPYING:318}
+\xx {}{free program will ind}{ivi}{dually obtain patent l}{}{../COPYING:52}
+\xx {g, distribution}{Act}{ivi}{ties other than copyin}{}{../COPYING:72}
diff --git a/gnu/usr.bin/ptx/examples/luke/README b/gnu/usr.bin/ptx/examples/luke/README
new file mode 100644
index 0000000..6291861
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/luke/README
@@ -0,0 +1,2 @@
+From: Luke Kendall <luke@research.canon.oz.au>
+Date: Wed, 16 Oct 91 12:26:39 EST
diff --git a/gnu/usr.bin/ptx/examples/luke/xxroff.sh b/gnu/usr.bin/ptx/examples/luke/xxroff.sh
new file mode 100644
index 0000000..55ef908
--- /dev/null
+++ b/gnu/usr.bin/ptx/examples/luke/xxroff.sh
@@ -0,0 +1,108 @@
+#!/bin/sh
+#
+# Author: Luke Kendall
+#
+MYNAME=`basename $0`
+usage="usage: $MYNAME [man-directory]
+ (generates permuted index of -man files in directory)"
+md=/usr/man
+#
+if [ $# = 0 ]
+then
+ echo "$MYNAME: no man directory specified: assuming $md"
+elif [ $# != 1 ]
+then
+ echo "$usage"
+ exit 1
+elif [ -d $1 ]
+then
+ md="$1"
+else
+ echo "$usage"
+ exit 1
+fi
+echo "Permuted index of $md:"
+out=ptx.tr
+# ------ clumsy permuted index macros (replaced by stuff below) ------------
+cat <<'EOF' > $out
+.pn 1
+.de xx
+\\$1 \\$2 \\fB\\$3\\fR \\$4 \\s-1\\$5\\s0
+..
+.pl 10i
+.de NP
+.ev 1
+.ft 1
+.ps 10
+.sp 0.75c
+.tl '\s-2\\fIpermuted index\\fP\s0'\- \\n% \-'\s-2\\fIpermuted index\\fP\s0'
+.pn +1
+.bp
+.ev
+..
+.wh 9i NP
+.nf
+.na
+.ta 6.5i-1.1iR 6.5iR 6.51iR 6.52R
+.ll 6.0i
+.po 0i
+.sp 0.25i
+'\"
+EOF
+# ------ ------- ------- ------- ------- -------
+# ------ alternate permuted index macros (from net) ------------
+cat <<'EOF' > $out
+.pl 10i
+.de NP
+.ev 1
+.ft 1
+.ps 10
+.sp 0.75c
+.tl '\s-2\\fIpermuted index\\fP\s0'\- \\n% \-'\s-2\\fIpermuted index\\fP\s0'
+.pn +1
+.bp
+.ev
+..
+.wh 9i NP
+.po 0.5i
+.sp 0.25i
+.tr ~ \" tildes will translate to blanks
+'\".ll 80 \" line length of output
+.ll 6.0i \" line length of output
+.nf \" must be in no-fill mode
+.nr )r \n(.lu-10n \" set position of reference in line (10 less than length)
+.nr )k \n()ru/2u \" set position of keyword (approx. centered)
+.ds s2 ~~~ \" this is the center gap -- 3 spaces
+.de xx \"definition of xx macro
+.ds s1\" \" initialise to null string
+.if \w@\\$2@ .ds s1 ~\" \"set to single blank if there is second arg
+.ds s3\" \" initialise to null string
+.if \w@\\$4@ .ds s3 ~\" \"set to single blank if there is second arg
+.ds s4 ~\" \" set to single blank
+.ds s5 ~\" \" set to single blank
+.ds y \\*(s4\a\\*(s5\" \" blank, leader, blank
+.ta \\n()ru-\w@\\*(s5@u \" set tab just to left of ref
+\h@\\n()ku-\w@\\$1\\*(s1\\$2\\*(s2@u@\\$1\\*(s1\\$2\\*(s2\\$3\\*(s3\\$4\\*y\\$5
+..
+ ~
+EOF
+# ------ ------- ------- ------- ------- -------
+find $md -type f -name "*.[1-8nl]*" -print |
+while read f
+do
+ man=`basename $f`
+ man=`expr "$man" : "\(.*\)\.[^\.]*"`
+echo $man:
+ #
+ # Use 1st non-"." and non-"'" started line as input to ptx (this
+ # should be the synopsis after the `.SH NAME');
+ # strip any "\-" from it (a silly sort key for ptx to avoid);
+ # insert a leading man page name for the -r option to find
+ #
+ sed -n '/^[^.]/s/\\-//g;/^[^.]/p;/^[^.]/q' $f | sed "s/^/($man) /"
+done | ptx -t -f -r >> $out
+#
+# Turn the troff'able permuted index file into PostScript
+#
+psroff -t -rL10i $out > ptx.ps
+echo "$out and ptx.ps produced from man directory $md."
diff --git a/gnu/usr.bin/ptx/getopt.c b/gnu/usr.bin/ptx/getopt.c
new file mode 100644
index 0000000..7a4673b
--- /dev/null
+++ b/gnu/usr.bin/ptx/getopt.c
@@ -0,0 +1,757 @@
+/* Getopt for GNU.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+ before changing it!
+
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 1993
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef HAVE_CONFIG_H
+#if defined (emacs) || defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+#ifndef __STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>. */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+#include <stdlib.h>
+#endif /* GNU C library. */
+
+/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a
+ long-named option. Because this is not POSIX.2 compliant, it is
+ being phased out. */
+/* #define GETOPT_COMPAT */
+
+/* This version of `getopt' appears to the caller like standard Unix `getopt'
+ but it behaves differently for the user, since it allows the user
+ to intersperse the options with the other arguments.
+
+ As `getopt' works, it permutes the elements of ARGV so that,
+ when it is done, all the options precede everything else. Thus
+ all application programs are extended to handle flexible argument order.
+
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
+ Then the behavior is completely standard.
+
+ GNU application programs can use a third alternative mode in which
+ they can distinguish the relative order of options and other arguments. */
+
+#include "getopt.h"
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+char *optarg = 0;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+/* XXX 1003.2 says this must be 1 before any call. */
+int optind = 0;
+
+/* The next char to be scanned in the option-element
+ in which the last option character we returned was found.
+ This allows us to pick up the scan where we left off.
+
+ If this is zero, or a null string, it means resume the scan
+ by advancing to the next ARGV-element. */
+
+static char *nextchar;
+
+/* Callers store zero here to inhibit the error message
+ for unrecognized options. */
+
+int opterr = 1;
+
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
+/* Describe how to deal with options that follow non-option ARGV-elements.
+
+ If the caller did not specify anything,
+ the default is REQUIRE_ORDER if the environment variable
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
+
+ REQUIRE_ORDER means don't recognize them as options;
+ stop option processing when the first non-option is seen.
+ This is what Unix does.
+ This mode of operation is selected by either setting the environment
+ variable POSIXLY_CORRECT, or using `+' as the first character
+ of the list of option characters.
+
+ PERMUTE is the default. We permute the contents of ARGV as we scan,
+ so that eventually all the non-options are at the end. This allows options
+ to be given in any order, even with programs that were not written to
+ expect this.
+
+ RETURN_IN_ORDER is an option available to programs that were written
+ to expect options and other ARGV-elements in any order and that care about
+ the ordering of the two. We describe each non-option ARGV-element
+ as if it were the argument of an option with character code 1.
+ Using `-' as the first character of the list of option characters
+ selects this mode of operation.
+
+ The special argument `--' forces an end of option-scanning regardless
+ of the value of `ordering'. In the case of RETURN_IN_ORDER, only
+ `--' can cause `getopt' to return EOF with `optind' != ARGC. */
+
+static enum
+{
+ REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
+} ordering;
+
+#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
+#include <string.h>
+#define my_index strchr
+#else
+
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
+
+char *getenv ();
+
+static char *
+my_index (str, chr)
+ const char *str;
+ int chr;
+{
+ while (*str)
+ {
+ if (*str == chr)
+ return (char *) str;
+ str++;
+ }
+ return 0;
+}
+
+/* If using GCC, we can safely declare strlen this way.
+ If not using GCC, it is ok not to declare it.
+ (Supposedly there are some machines where it might get a warning,
+ but changing this conditional to __STDC__ is too risky.) */
+#ifdef __GNUC__
+#ifdef IN_GCC
+#include "gstddef.h"
+#else
+#include <stddef.h>
+#endif
+extern size_t strlen (const char *);
+#endif
+
+#endif /* GNU C library. */
+
+/* Handle permutation of arguments. */
+
+/* Describe the part of ARGV that contains non-options that have
+ been skipped. `first_nonopt' is the index in ARGV of the first of them;
+ `last_nonopt' is the index after the last of them. */
+
+static int first_nonopt;
+static int last_nonopt;
+
+/* Exchange two adjacent subsequences of ARGV.
+ One subsequence is elements [first_nonopt,last_nonopt)
+ which contains all the non-options that have been skipped so far.
+ The other is elements [last_nonopt,optind), which contains all
+ the options processed since those non-options were skipped.
+
+ `first_nonopt' and `last_nonopt' are relocated so that they describe
+ the new indices of the non-options in ARGV after they are moved. */
+
+static void
+exchange (argv)
+ char **argv;
+{
+ int bottom = first_nonopt;
+ int middle = last_nonopt;
+ int top = optind;
+ char *tem;
+
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
+
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
+
+ /* Update records for the slots the non-options now occupy. */
+
+ first_nonopt += (optind - last_nonopt);
+ last_nonopt = optind;
+}
+
+/* Scan elements of ARGV (whose length is ARGC) for option characters
+ given in OPTSTRING.
+
+ If an element of ARGV starts with '-', and is not exactly "-" or "--",
+ then it is an option element. The characters of this element
+ (aside from the initial '-') are option characters. If `getopt'
+ is called repeatedly, it returns successively each of the option characters
+ from each of the option elements.
+
+ If `getopt' finds another option character, it returns that character,
+ updating `optind' and `nextchar' so that the next call to `getopt' can
+ resume the scan with the following option character or ARGV-element.
+
+ If there are no more option characters, `getopt' returns `EOF'.
+ Then `optind' is the index in ARGV of the first ARGV-element
+ that is not an option. (The ARGV-elements have been permuted
+ so that those that are not options now come last.)
+
+ OPTSTRING is a string containing the legitimate option characters.
+ If an option character is seen that is not listed in OPTSTRING,
+ return '?' after printing an error message. If you set `opterr' to
+ zero, the error message is suppressed but we still return '?'.
+
+ If a char in OPTSTRING is followed by a colon, that means it wants an arg,
+ so the following text in the same ARGV-element, or the text of the following
+ ARGV-element, is returned in `optarg'. Two colons mean an option that
+ wants an optional arg; if there is text in the current ARGV-element,
+ it is returned in `optarg', otherwise `optarg' is set to zero.
+
+ If OPTSTRING starts with `-' or `+', it requests different methods of
+ handling the non-option ARGV-elements.
+ See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
+
+ Long-named options begin with `--' instead of `-'.
+ Their names may be abbreviated as long as the abbreviation is unique
+ or is an exact match for some defined option. If they have an
+ argument, it follows the option name in the same ARGV-element, separated
+ from the option name by a `=', or else the in next ARGV-element.
+ When `getopt' finds a long-named option, it returns 0 if that option's
+ `flag' field is nonzero, the value of the option's `val' field
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
+
+int
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
+{
+ int option_index;
+
+ optarg = 0;
+
+ /* Initialize the internal data when the first call is made.
+ Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ if (optind == 0)
+ {
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (getenv ("POSIXLY_CORRECT") != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+ }
+
+ if (nextchar == NULL || *nextchar == '\0')
+ {
+ if (ordering == PERMUTE)
+ {
+ /* If we have just processed some options following some non-options,
+ exchange them so that the options come first. */
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (last_nonopt != optind)
+ first_nonopt = optind;
+
+ /* Now skip any additional non-options
+ and extend the range of non-options previously skipped. */
+
+ while (optind < argc
+ && (argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ optind++;
+ last_nonopt = optind;
+ }
+
+ /* Special ARGV-element `--' means premature end of options.
+ Skip it like a null option,
+ then exchange with previous non-options as if it were an option,
+ then skip everything else like a non-option. */
+
+ if (optind != argc && !strcmp (argv[optind], "--"))
+ {
+ optind++;
+
+ if (first_nonopt != last_nonopt && last_nonopt != optind)
+ exchange ((char **) argv);
+ else if (first_nonopt == last_nonopt)
+ first_nonopt = optind;
+ last_nonopt = argc;
+
+ optind = argc;
+ }
+
+ /* If we have done all the ARGV-elements, stop the scan
+ and back over any non-options that we skipped and permuted. */
+
+ if (optind == argc)
+ {
+ /* Set the next-arg-index to point at the non-options
+ that we previously skipped, so the caller will digest them. */
+ if (first_nonopt != last_nonopt)
+ optind = first_nonopt;
+ return EOF;
+ }
+
+ /* If we have come to a non-option and did not permute it,
+ either stop the scan or describe it to the caller and pass it by. */
+
+ if ((argv[optind][0] != '-' || argv[optind][1] == '\0')
+#ifdef GETOPT_COMPAT
+ && (longopts == NULL
+ || argv[optind][0] != '+' || argv[optind][1] == '\0')
+#endif /* GETOPT_COMPAT */
+ )
+ {
+ if (ordering == REQUIRE_ORDER)
+ return EOF;
+ optarg = argv[optind++];
+ return 1;
+ }
+
+ /* We have found another option-ARGV-element.
+ Start decoding its characters. */
+
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
+ }
+
+ if (longopts != NULL
+ && ((argv[optind][0] == '-'
+ && (argv[optind][1] == '-' || long_only))
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ ))
+ {
+ const struct option *p;
+ char *s = nextchar;
+ int exact = 0;
+ int ambig = 0;
+ const struct option *pfound = NULL;
+ int indfound;
+
+ while (*s && *s != '=')
+ s++;
+
+ /* Test all options for either exact match or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name;
+ p++, option_index++)
+ if (!strncmp (p->name, nextchar, s - nextchar))
+ {
+ if (s - nextchar == strlen (p->name))
+ {
+ /* Exact match found. */
+ pfound = p;
+ indfound = option_index;
+ exact = 1;
+ break;
+ }
+ else if (pfound == NULL)
+ {
+ /* First nonexact match found. */
+ pfound = p;
+ indfound = option_index;
+ }
+ else
+ /* Second nonexact match found. */
+ ambig = 1;
+ }
+
+ if (ambig && !exact)
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' is ambiguous\n",
+ argv[0], argv[optind]);
+ nextchar += strlen (nextchar);
+ optind++;
+ return '?';
+ }
+
+ if (pfound != NULL)
+ {
+ option_index = indfound;
+ optind++;
+ if (*s)
+ {
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = s + 1;
+ else
+ {
+ if (opterr)
+ {
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ "%s: option `--%s' doesn't allow an argument\n",
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ "%s: option `%c%s' doesn't allow an argument\n",
+ argv[0], argv[optind - 1][0], pfound->name);
+ }
+ nextchar += strlen (nextchar);
+ return '?';
+ }
+ }
+ else if (pfound->has_arg == 1)
+ {
+ if (optind < argc)
+ optarg = argv[optind++];
+ else
+ {
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' requires an argument\n",
+ argv[0], argv[optind - 1]);
+ nextchar += strlen (nextchar);
+ return optstring[0] == ':' ? ':' : '?';
+ }
+ }
+ nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
+ if (pfound->flag)
+ {
+ *(pfound->flag) = pfound->val;
+ return 0;
+ }
+ return pfound->val;
+ }
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+#ifdef GETOPT_COMPAT
+ || argv[optind][0] == '+'
+#endif /* GETOPT_COMPAT */
+ || my_index (optstring, *nextchar) == NULL)
+ {
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, "%s: unrecognized option `--%s'\n",
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
+ optind++;
+ return '?';
+ }
+ }
+
+ /* Look at and handle the next option-character. */
+
+ {
+ char c = *nextchar++;
+ char *temp = my_index (optstring, c);
+
+ /* Increment `optind' when we start to process its last character. */
+ if (*nextchar == '\0')
+ ++optind;
+
+ if (temp == NULL || c == ':')
+ {
+ if (opterr)
+ {
+#if 0
+ if (c < 040 || c >= 0177)
+ fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
+ argv[0], c);
+ else
+ fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c);
+#else
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
+#endif
+ }
+ optopt = c;
+ return '?';
+ }
+ if (temp[1] == ':')
+ {
+ if (temp[2] == ':')
+ {
+ /* This is an option that accepts an argument optionally. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ optind++;
+ }
+ else
+ optarg = 0;
+ nextchar = NULL;
+ }
+ else
+ {
+ /* This is an option that requires an argument. */
+ if (*nextchar != '\0')
+ {
+ optarg = nextchar;
+ /* If we end this ARGV-element by taking the rest as an arg,
+ we must advance to the next element now. */
+ optind++;
+ }
+ else if (optind == argc)
+ {
+ if (opterr)
+ {
+#if 0
+ fprintf (stderr, "%s: option `-%c' requires an argument\n",
+ argv[0], c);
+#else
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, "%s: option requires an argument -- %c\n",
+ argv[0], c);
+#endif
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
+ }
+ else
+ /* We already incremented `optind' once;
+ increment it again when taking next ARGV-elt as argument. */
+ optarg = argv[optind++];
+ nextchar = NULL;
+ }
+ }
+ return c;
+ }
+}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
+
+#ifdef TEST
+
+/* Compile with -DTEST to make an executable for use in testing
+ the above definition of `getopt'. */
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+
+ c = getopt (argc, argv, "abc:d:0123456789");
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/gnu/usr.bin/ptx/getopt.h b/gnu/usr.bin/ptx/getopt.h
new file mode 100644
index 0000000..45541f5
--- /dev/null
+++ b/gnu/usr.bin/ptx/getopt.h
@@ -0,0 +1,129 @@
+/* Declarations for getopt.
+ Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `getopt' to the caller.
+ When `getopt' finds an option that takes an argument,
+ the argument value is returned here.
+ Also, when `ordering' is RETURN_IN_ORDER,
+ each non-option ARGV-element is returned here. */
+
+extern char *optarg;
+
+/* Index in ARGV of the next element to be scanned.
+ This is used for communication to and from the caller
+ and for communication between successive calls to `getopt'.
+
+ On entry to `getopt', zero means this is the first call; initialize.
+
+ When `getopt' returns EOF, this is the index of the first of the
+ non-option elements that the caller should itself scan.
+
+ Otherwise, `optind' communicates from one call to the next
+ how much of ARGV has been scanned so far. */
+
+extern int optind;
+
+/* Callers store zero here to inhibit the error message `getopt' prints
+ for unrecognized options. */
+
+extern int opterr;
+
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
+/* Describe the long-named options requested by the application.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
+
+ The field `has_arg' is:
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
+
+ If the field `flag' is not NULL, it points to a variable that is set
+ to the value given in the field `val' when the option is found, but
+ left unchanged if the option is not found.
+
+ To have a long-named option do something other than set an `int' to
+ a compiled-in constant, such as set a value from `optarg', set the
+ option's `flag' field to zero and its `val' field to a nonzero
+ value (the equivalent single-letter option character, if there is
+ one). For long options that have a zero `flag' field, `getopt'
+ returns the contents of the `val' field. */
+
+struct option
+{
+#if __STDC__
+ const char *name;
+#else
+ char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
+ int has_arg;
+ int *flag;
+ int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct option'. */
+
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
+
+#if __STDC__
+#if defined(__GNU_LIBRARY__)
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* not __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* not __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GETOPT_H */
diff --git a/gnu/usr.bin/ptx/getopt1.c b/gnu/usr.bin/ptx/getopt1.c
new file mode 100644
index 0000000..f784b57
--- /dev/null
+++ b/gnu/usr.bin/ptx/getopt1.c
@@ -0,0 +1,187 @@
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 1993
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef HAVE_CONFIG_H
+#if defined (emacs) || defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+#include "getopt.h"
+
+#ifndef __STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
+#define const
+#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+#include <stdlib.h>
+#else
+char *getenv ();
+#endif
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+int
+getopt_long (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
+}
+
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
+ but does match a short option, it is parsed as a short option
+ instead. */
+
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
+ int argc;
+ char *const *argv;
+ const char *options;
+ const struct option *long_options;
+ int *opt_index;
+{
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
+}
+
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
+
+#ifdef TEST
+
+#include <stdio.h>
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int c;
+ int digit_optind = 0;
+
+ while (1)
+ {
+ int this_option_optind = optind ? optind : 1;
+ int option_index = 0;
+ static struct option long_options[] =
+ {
+ {"add", 1, 0, 0},
+ {"append", 0, 0, 0},
+ {"delete", 1, 0, 0},
+ {"verbose", 0, 0, 0},
+ {"create", 0, 0, 0},
+ {"file", 1, 0, 0},
+ {0, 0, 0, 0}
+ };
+
+ c = getopt_long (argc, argv, "abc:d:0123456789",
+ long_options, &option_index);
+ if (c == EOF)
+ break;
+
+ switch (c)
+ {
+ case 0:
+ printf ("option %s", long_options[option_index].name);
+ if (optarg)
+ printf (" with arg %s", optarg);
+ printf ("\n");
+ break;
+
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (digit_optind != 0 && digit_optind != this_option_optind)
+ printf ("digits occur in two different argv-elements.\n");
+ digit_optind = this_option_optind;
+ printf ("option %c\n", c);
+ break;
+
+ case 'a':
+ printf ("option a\n");
+ break;
+
+ case 'b':
+ printf ("option b\n");
+ break;
+
+ case 'c':
+ printf ("option c with value `%s'\n", optarg);
+ break;
+
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
+ case '?':
+ break;
+
+ default:
+ printf ("?? getopt returned character code 0%o ??\n", c);
+ }
+ }
+
+ if (optind < argc)
+ {
+ printf ("non-option ARGV-elements: ");
+ while (optind < argc)
+ printf ("%s ", argv[optind++]);
+ printf ("\n");
+ }
+
+ exit (0);
+}
+
+#endif /* TEST */
diff --git a/gnu/usr.bin/ptx/mkinstalldirs b/gnu/usr.bin/ptx/mkinstalldirs
new file mode 100755
index 0000000..0e29377
--- /dev/null
+++ b/gnu/usr.bin/ptx/mkinstalldirs
@@ -0,0 +1,35 @@
+#!/bin/sh
+# Make directory hierarchy.
+# Written by Noah Friedman <friedman@prep.ai.mit.edu>
+# Public domain.
+
+defaultIFS='
+'
+IFS="${IFS-${defaultIFS}}"
+
+errstatus=0
+
+for file in ${1+"$@"} ; do
+ oIFS="${IFS}"
+ # Some sh's can't handle IFS=/ for some reason.
+ IFS='%'
+ set - `echo ${file} | sed -e 's@/@%@g' -e 's@^%@/@'`
+ IFS="${oIFS}"
+
+ pathcomp=''
+
+ for d in ${1+"$@"} ; do
+ pathcomp="${pathcomp}${d}"
+
+ if test ! -d "${pathcomp}"; then
+ echo "mkdir $pathcomp" 1>&2
+ mkdir "${pathcomp}" || errstatus=$?
+ fi
+
+ pathcomp="${pathcomp}/"
+ done
+done
+
+exit $errstatus
+
+# eof
diff --git a/gnu/usr.bin/ptx/ptx.c b/gnu/usr.bin/ptx/ptx.c
new file mode 100644
index 0000000..2dc306e
--- /dev/null
+++ b/gnu/usr.bin/ptx/ptx.c
@@ -0,0 +1,2237 @@
+/* Permuted index for GNU, with keywords in their context.
+ Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
+ Francois Pinard <pinard@iro.umontreal.ca>, 1988.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+const char *version_string = "GNU ptx version 0.3";
+
+char *const copyright = "\
+This program is free software; you can redistribute it and/or modify\n\
+it under the terms of the GNU General Public License as published by\n\
+the Free Software Foundation; either version 2, or (at your option)\n\
+any later version.\n\
+\n\
+This program is distributed in the hope that it will be useful,\n\
+but WITHOUT ANY WARRANTY; without even the implied warranty of\n\
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\
+GNU General Public License for more details.\n\
+\n\
+You should have received a copy of the GNU General Public License\n\
+along with this program; if not, write to the Free Software\n\
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n";
+
+/* Reallocation step when swallowing non regular files. The value is not
+ the actual reallocation step, but its base two logarithm. */
+#define SWALLOW_REALLOC_LOG 12
+
+/* Imported from "regex.c". */
+#define Sword 1
+
+#ifdef STDC_HEADERS
+
+#include <stdlib.h>
+#include <ctype.h>
+
+#else /* not STDC_HEADERS */
+
+/* These definitions work, for all 256 characters. */
+#define isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n')
+#define isxdigit(c) \
+ (((unsigned char) (c) >= 'a' && (unsigned char) (c) <= 'f') \
+ || ((unsigned char) (c) >= 'A' && (unsigned char) (c) <= 'F') \
+ || ((unsigned char) (c) >= '0' && (unsigned char) (c) <= '9'))
+#define islower(c) ((unsigned char) (c) >= 'a' && (unsigned char) (c) <= 'z')
+#define isupper(c) ((unsigned char) (c) >= 'A' && (unsigned char) (c) <= 'Z')
+#define isalpha(c) (islower (c) || isupper (c))
+#define toupper(c) (islower (c) ? (c) - 'a' + 'A' : (c))
+
+#endif /* not STDC_HEADERS */
+
+#if !defined (isascii) || defined (STDC_HEADERS)
+#undef isascii
+#define isascii(c) 1
+#endif
+
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+#define ISODIGIT(c) ((c) >= '0' && (c) <= '7')
+#define HEXTOBIN(c) ((c)>='a'&&(c)<='f' ? (c)-'a'+10 : (c)>='A'&&(c)<='F' ? (c)-'A'+10 : (c)-'0')
+#define OCTTOBIN(c) ((c) - '0')
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if !defined(S_ISREG) && defined(S_IFREG)
+#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
+#endif
+
+#ifdef HAVE_STRING_H
+#include <string.h>
+#else /* not HAVE_STRING_H */
+#include <strings.h>
+#define strchr index
+#define strrchr rindex
+#endif /* not HAVE_STRING_H */
+
+#include "getopt.h"
+
+#include <errno.h>
+#ifndef errno
+extern int errno;
+#endif
+
+#include "bumpalloc.h"
+#include "diacrit.h"
+#include "regex.h"
+
+#ifndef __STDC__
+void *xmalloc ();
+void *xrealloc ();
+#else
+void *xmalloc (int);
+void *xrealloc (void *, int);
+#endif
+
+
+/* Global definitions. */
+
+const char *program_name; /* name of this program */
+static int show_help = 0; /* display usage information and exit */
+static int show_version = 0; /* print the version and exit */
+
+/* Program options. */
+
+enum Format
+{
+ DUMB_FORMAT, /* output for a dumb terminal */
+ ROFF_FORMAT, /* output for `troff' or `nroff' */
+ TEX_FORMAT, /* output for `TeX' or `LaTeX' */
+ UNKNOWN_FORMAT /* output format still unknown */
+};
+
+int gnu_extensions = 1; /* trigger all GNU extensions */
+int auto_reference = 0; /* references are `file_name:line_number:' */
+int input_reference = 0; /* references at beginning of input lines */
+int right_reference = 0; /* output references after right context */
+int line_width = 72; /* output line width in characters */
+int gap_size = 3; /* number of spaces between output fields */
+const char *truncation_string = "/";
+ /* string used to mark line truncations */
+const char *macro_name = "xx"; /* macro name for roff or TeX output */
+enum Format output_format = UNKNOWN_FORMAT;
+ /* output format */
+
+int ignore_case = 0; /* fold lower to upper case for sorting */
+const char *context_regex_string = NULL;
+ /* raw regex for end of context */
+const char *word_regex_string = NULL;
+ /* raw regex for a keyword */
+const char *break_file = NULL; /* name of the `Break characters' file */
+const char *only_file = NULL; /* name of the `Only words' file */
+const char *ignore_file = NULL; /* name of the `Ignore words' file */
+
+/* A BLOCK delimit a region in memory of arbitrary size, like the copy of a
+ whole file. A WORD is something smaller, its length should fit in a
+ short integer. A WORD_TABLE may contain several WORDs. */
+
+typedef struct
+ {
+ char *start; /* pointer to beginning of region */
+ char *end; /* pointer to end + 1 of region */
+ }
+BLOCK;
+
+typedef struct
+ {
+ char *start; /* pointer to beginning of region */
+ short size; /* length of the region */
+ }
+WORD;
+
+typedef struct
+ {
+ WORD *start; /* array of WORDs */
+ size_t length; /* number of entries */
+ }
+WORD_TABLE;
+
+/* Pattern description tables. */
+
+/* For each character, provide its folded equivalent. */
+unsigned char folded_chars[CHAR_SET_SIZE];
+
+/* For each character, indicate if it is part of a word. */
+char syntax_table[CHAR_SET_SIZE];
+char *re_syntax_table = syntax_table;
+
+/* Compiled regex for end of context. */
+struct re_pattern_buffer *context_regex;
+
+/* End of context pattern register indices. */
+struct re_registers context_regs;
+
+/* Compiled regex for a keyword. */
+struct re_pattern_buffer *word_regex;
+
+/* Keyword pattern register indices. */
+struct re_registers word_regs;
+
+/* A word characters fastmap is used only when no word regexp has been
+ provided. A word is then made up of a sequence of one or more characters
+ allowed by the fastmap. Contains !0 if character allowed in word. Not
+ only this is faster in most cases, but it simplifies the implementation
+ of the Break files. */
+char word_fastmap[CHAR_SET_SIZE];
+
+/* Maximum length of any word read. */
+int maximum_word_length;
+
+/* Maximum width of any reference used. */
+int reference_max_width;
+
+
+/* Ignore and Only word tables. */
+
+WORD_TABLE ignore_table; /* table of words to ignore */
+WORD_TABLE only_table; /* table of words to select */
+
+#define ALLOC_NEW_WORD(table) \
+ BUMP_ALLOC ((table)->start, (table)->length, 8, WORD)
+
+/* Source text table, and scanning macros. */
+
+int number_input_files; /* number of text input files */
+int total_line_count; /* total number of lines seen so far */
+const char **input_file_name; /* array of text input file names */
+int *file_line_count; /* array of `total_line_count' values at end */
+
+BLOCK text_buffer; /* file to study */
+char *text_buffer_maxend; /* allocated end of text_buffer */
+
+/* SKIP_NON_WHITE used only for getting or skipping the reference. */
+
+#define SKIP_NON_WHITE(cursor, limit) \
+ while (cursor < limit && !isspace(*cursor)) \
+ cursor++
+
+#define SKIP_WHITE(cursor, limit) \
+ while (cursor < limit && isspace(*cursor)) \
+ cursor++
+
+#define SKIP_WHITE_BACKWARDS(cursor, start) \
+ while (cursor > start && isspace(cursor[-1])) \
+ cursor--
+
+#define SKIP_SOMETHING(cursor, limit) \
+ do \
+ if (word_regex_string) \
+ { \
+ int count; \
+ count = re_match (word_regex, cursor, limit - cursor, 0, NULL); \
+ cursor += count <= 0 ? 1 : count; \
+ } \
+ else if (word_fastmap[(unsigned char) *cursor]) \
+ while (cursor < limit && word_fastmap[(unsigned char) *cursor]) \
+ cursor++; \
+ else \
+ cursor++; \
+ while (0)
+
+/* Occurrences table.
+
+ The `keyword' pointer provides the central word, which is surrounded
+ by a left context and a right context. The `keyword' and `length'
+ field allow full 8-bit characters keys, even including NULs. At other
+ places in this program, the name `keyafter' refers to the keyword
+ followed by its right context.
+
+ The left context does not extend, towards the beginning of the file,
+ further than a distance given by the `left' value. This value is
+ relative to the keyword beginning, it is usually negative. This
+ insures that, except for white space, we will never have to backward
+ scan the source text, when it is time to generate the final output
+ lines.
+
+ The right context, indirectly attainable through the keyword end, does
+ not extend, towards the end of the file, further than a distance given
+ by the `right' value. This value is relative to the keyword
+ beginning, it is usually positive.
+
+ When automatic references are used, the `reference' value is the
+ overall line number in all input files read so far, in this case, it
+ is of type (int). When input references are used, the `reference'
+ value indicates the distance between the keyword beginning and the
+ start of the reference field, it is of type (DELTA) and usually
+ negative. */
+
+typedef short DELTA; /* to hold displacement within one context */
+
+typedef struct
+ {
+ WORD key; /* description of the keyword */
+ DELTA left; /* distance to left context start */
+ DELTA right; /* distance to right context end */
+ int reference; /* reference descriptor */
+ }
+OCCURS;
+
+/* The various OCCURS tables are indexed by the language. But the time
+ being, there is no such multiple language support. */
+
+OCCURS *occurs_table[1]; /* all words retained from the read text */
+size_t number_of_occurs[1]; /* number of used slots in occurs_table */
+
+#define ALLOC_NEW_OCCURS(language) \
+ BUMP_ALLOC (occurs_table[language], number_of_occurs[language], 9, OCCURS)
+
+
+/* Communication among output routines. */
+
+/* Indicate if special output processing is requested for each character. */
+char edited_flag[CHAR_SET_SIZE];
+
+int half_line_width; /* half of line width, reference excluded */
+int before_max_width; /* maximum width of before field */
+int keyafter_max_width; /* maximum width of keyword-and-after field */
+int truncation_string_length; /* length of string used to flag truncation */
+
+/* When context is limited by lines, wraparound may happen on final output:
+ the `head' pointer gives access to some supplementary left context which
+ will be seen at the end of the output line, the `tail' pointer gives
+ access to some supplementary right context which will be seen at the
+ beginning of the output line. */
+
+BLOCK tail; /* tail field */
+int tail_truncation; /* flag truncation after the tail field */
+
+BLOCK before; /* before field */
+int before_truncation; /* flag truncation before the before field */
+
+BLOCK keyafter; /* keyword-and-after field */
+int keyafter_truncation; /* flag truncation after the keyafter field */
+
+BLOCK head; /* head field */
+int head_truncation; /* flag truncation before the head field */
+
+BLOCK reference; /* reference field for input reference mode */
+
+
+/* Miscellaneous routines. */
+
+/*------------------------------------------------------.
+| Duplicate string STRING, while evaluating \-escapes. |
+`------------------------------------------------------*/
+
+/* Loosely adapted from GNU shellutils printf.c code. */
+
+char *
+copy_unescaped_string (const char *string)
+{
+ char *result; /* allocated result */
+ char *cursor; /* cursor in result */
+ int value; /* value of \nnn escape */
+ int length; /* length of \nnn escape */
+
+ result = xmalloc (strlen (string) + 1);
+ cursor = result;
+
+ while (*string)
+ if (*string == '\\')
+ {
+ string++;
+ switch (*string)
+ {
+ case 'x': /* \xhhh escape, 3 chars maximum */
+ value = 0;
+ for (length = 0, string++;
+ length < 3 && ISXDIGIT (*string);
+ length++, string++)
+ value = value * 16 + HEXTOBIN (*string);
+ if (length == 0)
+ {
+ *cursor++ = '\\';
+ *cursor++ = 'x';
+ }
+ else
+ *cursor++ = value;
+ break;
+
+ case '0': /* \0ooo escape, 3 chars maximum */
+ value = 0;
+ for (length = 0, string++;
+ length < 3 && ISODIGIT (*string);
+ length++, string++)
+ value = value * 8 + OCTTOBIN (*string);
+ *cursor++ = value;
+ break;
+
+ case 'a': /* alert */
+#if __STDC__
+ *cursor++ = '\a';
+#else
+ *cursor++ = 7;
+#endif
+ string++;
+ break;
+
+ case 'b': /* backspace */
+ *cursor++ = '\b';
+ string++;
+ break;
+
+ case 'c': /* cancel the rest of the output */
+ while (*string)
+ string++;
+ break;
+
+ case 'f': /* form feed */
+ *cursor++ = '\f';
+ string++;
+ break;
+
+ case 'n': /* new line */
+ *cursor++ = '\n';
+ string++;
+ break;
+
+ case 'r': /* carriage return */
+ *cursor++ = '\r';
+ string++;
+ break;
+
+ case 't': /* horizontal tab */
+ *cursor++ = '\t';
+ string++;
+ break;
+
+ case 'v': /* vertical tab */
+#if __STDC__
+ *cursor++ = '\v';
+#else
+ *cursor++ = 11;
+#endif
+ string++;
+ break;
+
+ default:
+ *cursor++ = '\\';
+ *cursor++ = *string++;
+ break;
+ }
+ }
+ else
+ *cursor++ = *string++;
+
+ *cursor = '\0';
+ return result;
+}
+
+/*-------------------------------------------------------------------.
+| Compile the regex represented by STRING, diagnose and abort if any |
+| error. Returns the compiled regex structure. |
+`-------------------------------------------------------------------*/
+
+struct re_pattern_buffer *
+alloc_and_compile_regex (const char *string)
+{
+ struct re_pattern_buffer *pattern; /* newly allocated structure */
+ const char *message; /* error message returned by regex.c */
+
+ pattern = (struct re_pattern_buffer *)
+ xmalloc (sizeof (struct re_pattern_buffer));
+ memset (pattern, 0, sizeof (struct re_pattern_buffer));
+
+ pattern->buffer = NULL;
+ pattern->allocated = 0;
+ pattern->translate = ignore_case ? (char *) folded_chars : NULL;
+ pattern->fastmap = (char *) xmalloc (CHAR_SET_SIZE);
+
+ message = re_compile_pattern (string, strlen (string), pattern);
+ if (message)
+ error (1, 0, "%s (for regexp `%s')", message, string);
+
+ /* The fastmap should be compiled before `re_match'. The following
+ call is not mandatory, because `re_search' is always called sooner,
+ and it compiles the fastmap if this has not been done yet. */
+
+ re_compile_fastmap (pattern);
+
+ /* Do not waste extra allocated space. */
+
+ if (pattern->allocated > pattern->used)
+ {
+ pattern->buffer
+ = (unsigned char *) xrealloc (pattern->buffer, pattern->used);
+ pattern->allocated = pattern->used;
+ }
+
+ return pattern;
+}
+
+/*------------------------------------------------------------------------.
+| This will initialize various tables for pattern match and compiles some |
+| regexps. |
+`------------------------------------------------------------------------*/
+
+void
+initialize_regex (void)
+{
+ int character; /* character value */
+
+ /* Initialize the regex syntax table. */
+
+ for (character = 0; character < CHAR_SET_SIZE; character++)
+ syntax_table[character] = isalpha (character) ? Sword : 0;
+
+ /* Initialize the case folding table. */
+
+ if (ignore_case)
+ for (character = 0; character < CHAR_SET_SIZE; character++)
+ folded_chars[character] = toupper (character);
+
+ /* Unless the user already provided a description of the end of line or
+ end of sentence sequence, select an end of line sequence to compile.
+ If the user provided an empty definition, thus disabling end of line
+ or sentence feature, make it NULL to speed up tests. If GNU
+ extensions are enabled, use end of sentence like in GNU emacs. If
+ disabled, use end of lines. */
+
+ if (context_regex_string)
+ {
+ if (!*context_regex_string)
+ context_regex_string = NULL;
+ }
+ else if (gnu_extensions && !input_reference)
+ context_regex_string = "[.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*";
+ else
+ context_regex_string = "\n";
+
+ if (context_regex_string)
+ context_regex = alloc_and_compile_regex (context_regex_string);
+
+ /* If the user has already provided a non-empty regexp to describe
+ words, compile it. Else, unless this has already been done through
+ a user provided Break character file, construct a fastmap of
+ characters that may appear in a word. If GNU extensions enabled,
+ include only letters of the underlying character set. If disabled,
+ include almost everything, even punctuations; stop only on white
+ space. */
+
+ if (word_regex_string && *word_regex_string)
+ word_regex = alloc_and_compile_regex (word_regex_string);
+ else if (!break_file)
+ if (gnu_extensions)
+ {
+
+ /* Simulate \w+. */
+
+ for (character = 0; character < CHAR_SET_SIZE; character++)
+ word_fastmap[character] = isalpha (character);
+ }
+ else
+ {
+
+ /* Simulate [^ \t\n]+. */
+
+ memset (word_fastmap, 1, CHAR_SET_SIZE);
+ word_fastmap[' '] = 0;
+ word_fastmap['\t'] = 0;
+ word_fastmap['\n'] = 0;
+ }
+}
+
+/*------------------------------------------------------------------------.
+| This routine will attempt to swallow a whole file name FILE_NAME into a |
+| contiguous region of memory and return a description of it into BLOCK. |
+| Standard input is assumed whenever FILE_NAME is NULL, empty or "-". |
+| |
+| Previously, in some cases, white space compression was attempted while |
+| inputting text. This was defeating some regexps like default end of |
+| sentence, which checks for two consecutive spaces. If white space |
+| compression is ever reinstated, it should be in output routines. |
+`------------------------------------------------------------------------*/
+
+void
+swallow_file_in_memory (const char *file_name, BLOCK *block)
+{
+ int file_handle; /* file descriptor number */
+ struct stat stat_block; /* stat block for file */
+ int allocated_length; /* allocated length of memory buffer */
+ int used_length; /* used length in memory buffer */
+ int read_length; /* number of character gotten on last read */
+
+ /* As special cases, a file name which is NULL or "-" indicates standard
+ input, which is already opened. In all other cases, open the file from
+ its name. */
+
+ if (!file_name || !*file_name || strcmp (file_name, "-") == 0)
+ file_handle = fileno (stdin);
+ else
+ if ((file_handle = open (file_name, O_RDONLY)) < 0)
+ error (1, errno, file_name);
+
+ /* If the file is a plain, regular file, allocate the memory buffer all at
+ once and swallow the file in one blow. In other cases, read the file
+ repeatedly in smaller chunks until we have it all, reallocating memory
+ once in a while, as we go. */
+
+ if (fstat (file_handle, &stat_block) < 0)
+ error (1, errno, file_name);
+
+ if (S_ISREG (stat_block.st_mode))
+ {
+ block->start = (char *) xmalloc ((int) stat_block.st_size);
+
+ if (read (file_handle, block->start, (int) stat_block.st_size)
+ != stat_block.st_size)
+ error (1, errno, file_name);
+
+ block->end = block->start + stat_block.st_size;
+ }
+ else
+ {
+ block->start = (char *) xmalloc (1 << SWALLOW_REALLOC_LOG);
+ used_length = 0;
+ allocated_length = (1 << SWALLOW_REALLOC_LOG);
+
+ while ((read_length = read (file_handle,
+ block->start + used_length,
+ allocated_length - used_length)) > 0)
+ {
+ used_length += read_length;
+ if (used_length == allocated_length)
+ {
+ allocated_length += (1 << SWALLOW_REALLOC_LOG);
+ block->start
+ = (char *) xrealloc (block->start, allocated_length);
+ }
+ }
+
+ if (read_length < 0)
+ error (1, errno, file_name);
+
+ block->end = block->start + used_length;
+ }
+
+ /* Close the file, but only if it was not the standard input. */
+
+ if (file_handle != fileno (stdin))
+ close (file_handle);
+}
+
+/* Sort and search routines. */
+
+/*--------------------------------------------------------------------------.
+| Compare two words, FIRST and SECOND, and return 0 if they are identical. |
+| Return less than 0 if the first word goes before the second; return |
+| greater than 0 if the first word goes after the second. |
+| |
+| If a word is indeed a prefix of the other, the shorter should go first. |
+`--------------------------------------------------------------------------*/
+
+int
+compare_words (const void *void_first, const void *void_second)
+{
+#define first ((WORD *) void_first)
+#define second ((WORD *) void_second)
+ int length; /* minimum of two lengths */
+ int counter; /* cursor in words */
+ int value; /* value of comparison */
+
+ length = first->size < second->size ? first->size : second->size;
+
+ if (ignore_case)
+ {
+ for (counter = 0; counter < length; counter++)
+ {
+ value = (folded_chars [(unsigned char) (first->start[counter])]
+ - folded_chars [(unsigned char) (second->start[counter])]);
+ if (value != 0)
+ return value;
+ }
+ }
+ else
+ {
+ for (counter = 0; counter < length; counter++)
+ {
+ value = ((unsigned char) first->start[counter]
+ - (unsigned char) second->start[counter]);
+ if (value != 0)
+ return value;
+ }
+ }
+
+ return first->size - second->size;
+#undef first
+#undef second
+}
+
+/*-----------------------------------------------------------------------.
+| Decides which of two OCCURS, FIRST or SECOND, should lexicographically |
+| go first. In case of a tie, preserve the original order through a |
+| pointer comparison. |
+`-----------------------------------------------------------------------*/
+
+int
+compare_occurs (const void *void_first, const void *void_second)
+{
+#define first ((OCCURS *) void_first)
+#define second ((OCCURS *) void_second)
+ int value;
+
+ value = compare_words (&first->key, &second->key);
+ return value == 0 ? first->key.start - second->key.start : value;
+#undef first
+#undef second
+}
+
+/*------------------------------------------------------------.
+| Return !0 if WORD appears in TABLE. Uses a binary search. |
+`------------------------------------------------------------*/
+
+int
+search_table (WORD *word, WORD_TABLE *table)
+{
+ int lowest; /* current lowest possible index */
+ int highest; /* current highest possible index */
+ int middle; /* current middle index */
+ int value; /* value from last comparison */
+
+ lowest = 0;
+ highest = table->length - 1;
+ while (lowest <= highest)
+ {
+ middle = (lowest + highest) / 2;
+ value = compare_words (word, table->start + middle);
+ if (value < 0)
+ highest = middle - 1;
+ else if (value > 0)
+ lowest = middle + 1;
+ else
+ return 1;
+ }
+ return 0;
+}
+
+/*---------------------------------------------------------------------.
+| Sort the whole occurs table in memory. Presumably, `qsort' does not |
+| take intermediate copies or table elements, so the sort will be |
+| stabilized throughout the comparison routine. |
+`---------------------------------------------------------------------*/
+
+void
+sort_found_occurs (void)
+{
+
+ /* Only one language for the time being. */
+
+ qsort (occurs_table[0], number_of_occurs[0], sizeof (OCCURS),
+ compare_occurs);
+}
+
+/* Parameter files reading routines. */
+
+/*----------------------------------------------------------------------.
+| Read a file named FILE_NAME, containing a set of break characters. |
+| Build a content to the array word_fastmap in which all characters are |
+| allowed except those found in the file. Characters may be repeated. |
+`----------------------------------------------------------------------*/
+
+void
+digest_break_file (const char *file_name)
+{
+ BLOCK file_contents; /* to receive a copy of the file */
+ char *cursor; /* cursor in file copy */
+
+ swallow_file_in_memory (file_name, &file_contents);
+
+ /* Make the fastmap and record the file contents in it. */
+
+ memset (word_fastmap, 1, CHAR_SET_SIZE);
+ for (cursor = file_contents.start; cursor < file_contents.end; cursor++)
+ word_fastmap[(unsigned char) *cursor] = 0;
+
+ if (!gnu_extensions)
+ {
+
+ /* If GNU extensions are enabled, the only way to avoid newline as
+ a break character is to write all the break characters in the
+ file with no newline at all, not even at the end of the file.
+ If disabled, spaces, tabs and newlines are always considered as
+ break characters even if not included in the break file. */
+
+ word_fastmap[' '] = 0;
+ word_fastmap['\t'] = 0;
+ word_fastmap['\n'] = 0;
+ }
+
+ /* Return the space of the file, which is no more required. */
+
+ free (file_contents.start);
+}
+
+/*-----------------------------------------------------------------------.
+| Read a file named FILE_NAME, containing one word per line, then |
+| construct in TABLE a table of WORD descriptors for them. The routine |
+| swallows the whole file in memory; this is at the expense of space |
+| needed for newlines, which are useless; however, the reading is fast. |
+`-----------------------------------------------------------------------*/
+
+void
+digest_word_file (const char *file_name, WORD_TABLE *table)
+{
+ BLOCK file_contents; /* to receive a copy of the file */
+ char *cursor; /* cursor in file copy */
+ char *word_start; /* start of the current word */
+
+ swallow_file_in_memory (file_name, &file_contents);
+
+ table->start = NULL;
+ table->length = 0;
+
+ /* Read the whole file. */
+
+ cursor = file_contents.start;
+ while (cursor < file_contents.end)
+ {
+
+ /* Read one line, and save the word in contains. */
+
+ word_start = cursor;
+ while (cursor < file_contents.end && *cursor != '\n')
+ cursor++;
+
+ /* Record the word in table if it is not empty. */
+
+ if (cursor > word_start)
+ {
+ ALLOC_NEW_WORD (table);
+ table->start[table->length].start = word_start;
+ table->start[table->length].size = cursor - word_start;
+ table->length++;
+ }
+
+ /* This test allows for an incomplete line at end of file. */
+
+ if (cursor < file_contents.end)
+ cursor++;
+ }
+
+ /* Finally, sort all the words read. */
+
+ qsort (table->start, table->length, (size_t) sizeof (WORD), compare_words);
+}
+
+
+/* Keyword recognition and selection. */
+
+/*----------------------------------------------------------------------.
+| For each keyword in the source text, constructs an OCCURS structure. |
+`----------------------------------------------------------------------*/
+
+void
+find_occurs_in_text (void)
+{
+ char *cursor; /* for scanning the source text */
+ char *scan; /* for scanning the source text also */
+ char *line_start; /* start of the current input line */
+ char *line_scan; /* newlines scanned until this point */
+ int reference_length; /* length of reference in input mode */
+ WORD possible_key; /* possible key, to ease searches */
+ OCCURS *occurs_cursor; /* current OCCURS under construction */
+
+ char *context_start; /* start of left context */
+ char *context_end; /* end of right context */
+ char *word_start; /* start of word */
+ char *word_end; /* end of word */
+ char *next_context_start; /* next start of left context */
+
+ /* reference_length is always used within `if (input_reference)'.
+ However, GNU C diagnoses that it may be used uninitialized. The
+ following assignment is merely to shut it up. */
+
+ reference_length = 0;
+
+ /* Tracking where lines start is helpful for reference processing. In
+ auto reference mode, this allows counting lines. In input reference
+ mode, this permits finding the beginning of the references.
+
+ The first line begins with the file, skip immediately this very first
+ reference in input reference mode, to help further rejection any word
+ found inside it. Also, unconditionally assigning these variable has
+ the happy effect of shutting up lint. */
+
+ line_start = text_buffer.start;
+ line_scan = line_start;
+ if (input_reference)
+ {
+ SKIP_NON_WHITE (line_scan, text_buffer.end);
+ reference_length = line_scan - line_start;
+ SKIP_WHITE (line_scan, text_buffer.end);
+ }
+
+ /* Process the whole buffer, one line or one sentence at a time. */
+
+ for (cursor = text_buffer.start;
+ cursor < text_buffer.end;
+ cursor = next_context_start)
+ {
+
+ /* `context_start' gets initialized before the processing of each
+ line, or once for the whole buffer if no end of line or sentence
+ sequence separator. */
+
+ context_start = cursor;
+
+ /* If a end of line or end of sentence sequence is defined and
+ non-empty, `next_context_start' will be recomputed to be the end of
+ each line or sentence, before each one is processed. If no such
+ sequence, then `next_context_start' is set at the end of the whole
+ buffer, which is then considered to be a single line or sentence.
+ This test also accounts for the case of an incomplete line or
+ sentence at the end of the buffer. */
+
+ if (context_regex_string
+ && (re_search (context_regex, cursor, text_buffer.end - cursor,
+ 0, text_buffer.end - cursor, &context_regs)
+ >= 0))
+ next_context_start = cursor + context_regs.end[0];
+
+ else
+ next_context_start = text_buffer.end;
+
+ /* Include the separator into the right context, but not any suffix
+ white space in this separator; this insures it will be seen in
+ output and will not take more space than necessary. */
+
+ context_end = next_context_start;
+ SKIP_WHITE_BACKWARDS (context_end, context_start);
+
+ /* Read and process a single input line or sentence, one word at a
+ time. */
+
+ while (1)
+ {
+ if (word_regex)
+
+ /* If a word regexp has been compiled, use it to skip at the
+ beginning of the next word. If there is no such word, exit
+ the loop. */
+
+ {
+ if (re_search (word_regex, cursor, context_end - cursor,
+ 0, context_end - cursor, &word_regs)
+ < 0)
+ break;
+ word_start = cursor + word_regs.start[0];
+ word_end = cursor + word_regs.end[0];
+ }
+ else
+
+ /* Avoid re_search and use the fastmap to skip to the
+ beginning of the next word. If there is no more word in
+ the buffer, exit the loop. */
+
+ {
+ scan = cursor;
+ while (scan < context_end
+ && !word_fastmap[(unsigned char) *scan])
+ scan++;
+
+ if (scan == context_end)
+ break;
+
+ word_start = scan;
+
+ while (scan < context_end
+ && word_fastmap[(unsigned char) *scan])
+ scan++;
+
+ word_end = scan;
+ }
+
+ /* Skip right to the beginning of the found word. */
+
+ cursor = word_start;
+
+ /* Skip any zero length word. Just advance a single position,
+ then go fetch the next word. */
+
+ if (word_end == word_start)
+ {
+ cursor++;
+ continue;
+ }
+
+ /* This is a genuine, non empty word, so save it as a possible
+ key. Then skip over it. Also, maintain the maximum length of
+ all words read so far. It is mandatory to take the maximum
+ length of all words in the file, without considering if they
+ are actually kept or rejected, because backward jumps at output
+ generation time may fall in *any* word. */
+
+ possible_key.start = cursor;
+ possible_key.size = word_end - word_start;
+ cursor += possible_key.size;
+
+ if (possible_key.size > maximum_word_length)
+ maximum_word_length = possible_key.size;
+
+ /* In input reference mode, update `line_start' from its previous
+ value. Count the lines just in case auto reference mode is
+ also selected. If it happens that the word just matched is
+ indeed part of a reference; just ignore it. */
+
+ if (input_reference)
+ {
+ while (line_scan < possible_key.start)
+ if (*line_scan == '\n')
+ {
+ total_line_count++;
+ line_scan++;
+ line_start = line_scan;
+ SKIP_NON_WHITE (line_scan, text_buffer.end);
+ reference_length = line_scan - line_start;
+ }
+ else
+ line_scan++;
+ if (line_scan > possible_key.start)
+ continue;
+ }
+
+ /* Ignore the word if an `Ignore words' table exists and if it is
+ part of it. Also ignore the word if an `Only words' table and
+ if it is *not* part of it.
+
+ It is allowed that both tables be used at once, even if this
+ may look strange for now. Just ignore a word that would appear
+ in both. If regexps are eventually implemented for these
+ tables, the Ignore table could then reject words that would
+ have been previously accepted by the Only table. */
+
+ if (ignore_file && search_table (&possible_key, &ignore_table))
+ continue;
+ if (only_file && !search_table (&possible_key, &only_table))
+ continue;
+
+ /* A non-empty word has been found. First of all, insure
+ proper allocation of the next OCCURS, and make a pointer to
+ where it will be constructed. */
+
+ ALLOC_NEW_OCCURS (0);
+ occurs_cursor = occurs_table[0] + number_of_occurs[0];
+
+ /* Define the refence field, if any. */
+
+ if (auto_reference)
+ {
+
+ /* While auto referencing, update `line_start' from its
+ previous value, counting lines as we go. If input
+ referencing at the same time, `line_start' has been
+ advanced earlier, and the following loop is never really
+ executed. */
+
+ while (line_scan < possible_key.start)
+ if (*line_scan == '\n')
+ {
+ total_line_count++;
+ line_scan++;
+ line_start = line_scan;
+ SKIP_NON_WHITE (line_scan, text_buffer.end);
+ }
+ else
+ line_scan++;
+
+ occurs_cursor->reference = total_line_count;
+ }
+ else if (input_reference)
+ {
+
+ /* If only input referencing, `line_start' has been computed
+ earlier to detect the case the word matched would be part
+ of the reference. The reference position is simply the
+ value of `line_start'. */
+
+ occurs_cursor->reference
+ = (DELTA) (line_start - possible_key.start);
+ if (reference_length > reference_max_width)
+ reference_max_width = reference_length;
+ }
+
+ /* Exclude the reference from the context in simple cases. */
+
+ if (input_reference && line_start == context_start)
+ {
+ SKIP_NON_WHITE (context_start, context_end);
+ SKIP_WHITE (context_start, context_end);
+ }
+
+ /* Completes the OCCURS structure. */
+
+ occurs_cursor->key = possible_key;
+ occurs_cursor->left = context_start - possible_key.start;
+ occurs_cursor->right = context_end - possible_key.start;
+
+ number_of_occurs[0]++;
+ }
+ }
+}
+
+/* Formatting and actual output - service routines. */
+
+/*-----------------------------------------.
+| Prints some NUMBER of spaces on stdout. |
+`-----------------------------------------*/
+
+void
+print_spaces (int number)
+{
+ int counter;
+
+ for (counter = number; counter > 0; counter--)
+ putchar (' ');
+}
+
+/*-------------------------------------.
+| Prints the field provided by FIELD. |
+`-------------------------------------*/
+
+void
+print_field (BLOCK field)
+{
+ char *cursor; /* Cursor in field to print */
+ int character; /* Current character */
+ int base; /* Base character, without diacritic */
+ int diacritic; /* Diacritic code for the character */
+
+ /* Whitespace is not really compressed. Instead, each white space
+ character (tab, vt, ht etc.) is printed as one single space. */
+
+ for (cursor = field.start; cursor < field.end; cursor++)
+ {
+ character = (unsigned char) *cursor;
+ if (edited_flag[character])
+ {
+
+ /* First check if this is a diacriticized character.
+
+ This works only for TeX. I do not know how diacriticized
+ letters work with `roff'. Please someone explain it to me! */
+
+ diacritic = todiac (character);
+ if (diacritic != 0 && output_format == TEX_FORMAT)
+ {
+ base = tobase (character);
+ switch (diacritic)
+ {
+
+ case 1: /* Latin diphthongs */
+ switch (base)
+ {
+ case 'o':
+ printf ("\\oe{}");
+ break;
+
+ case 'O':
+ printf ("\\OE{}");
+ break;
+
+ case 'a':
+ printf ("\\ae{}");
+ break;
+
+ case 'A':
+ printf ("\\AE{}");
+ break;
+
+ default:
+ putchar (' ');
+ }
+ break;
+
+ case 2: /* Acute accent */
+ printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base);
+ break;
+
+ case 3: /* Grave accent */
+ printf ("\\`%s%c", (base == 'i' ? "\\" : ""), base);
+ break;
+
+ case 4: /* Circumflex accent */
+ printf ("\\^%s%c", (base == 'i' ? "\\" : ""), base);
+ break;
+
+ case 5: /* Diaeresis */
+ printf ("\\\"%s%c", (base == 'i' ? "\\" : ""), base);
+ break;
+
+ case 6: /* Tilde accent */
+ printf ("\\~%s%c", (base == 'i' ? "\\" : ""), base);
+ break;
+
+ case 7: /* Cedilla */
+ printf ("\\c{%c}", base);
+ break;
+
+ case 8: /* Small circle beneath */
+ switch (base)
+ {
+ case 'a':
+ printf ("\\aa{}");
+ break;
+
+ case 'A':
+ printf ("\\AA{}");
+ break;
+
+ default:
+ putchar (' ');
+ }
+ break;
+
+ case 9: /* Strike through */
+ switch (base)
+ {
+ case 'o':
+ printf ("\\o{}");
+ break;
+
+ case 'O':
+ printf ("\\O{}");
+ break;
+
+ default:
+ putchar (' ');
+ }
+ break;
+ }
+ }
+ else
+
+ /* This is not a diacritic character, so handle cases which are
+ really specific to `roff' or TeX. All white space processing
+ is done as the default case of this switch. */
+
+ switch (character)
+ {
+ case '"':
+ /* In roff output format, double any quote. */
+ putchar ('"');
+ putchar ('"');
+ break;
+
+ case '$':
+ case '%':
+ case '&':
+ case '#':
+ case '_':
+ /* In TeX output format, precede these with a backslash. */
+ putchar ('\\');
+ putchar (character);
+ break;
+
+ case '{':
+ case '}':
+ /* In TeX output format, precede these with a backslash and
+ force mathematical mode. */
+ printf ("$\\%c$", character);
+ break;
+
+ case '\\':
+ /* In TeX output mode, request production of a backslash. */
+ printf ("\\backslash{}");
+ break;
+
+ default:
+ /* Any other flagged character produces a single space. */
+ putchar (' ');
+ }
+ }
+ else
+ putchar (*cursor);
+ }
+}
+
+
+/* Formatting and actual output - planning routines. */
+
+/*--------------------------------------------------------------------.
+| From information collected from command line options and input file |
+| readings, compute and fix some output parameter values. |
+`--------------------------------------------------------------------*/
+
+void
+fix_output_parameters (void)
+{
+ int file_index; /* index in text input file arrays */
+ int line_ordinal; /* line ordinal value for reference */
+ char ordinal_string[12]; /* edited line ordinal for reference */
+ int reference_width; /* width for the whole reference */
+ int character; /* character ordinal */
+ const char *cursor; /* cursor in some constant strings */
+
+ /* In auto reference mode, the maximum width of this field is
+ precomputed and subtracted from the overall line width. Add one for
+ the column which separate the file name from the line number. */
+
+ if (auto_reference)
+ {
+ reference_max_width = 0;
+ for (file_index = 0; file_index < number_input_files; file_index++)
+ {
+ line_ordinal = file_line_count[file_index] + 1;
+ if (file_index > 0)
+ line_ordinal -= file_line_count[file_index - 1];
+ sprintf (ordinal_string, "%d", line_ordinal);
+ reference_width = strlen (ordinal_string);
+ if (input_file_name[file_index])
+ reference_width += strlen (input_file_name[file_index]);
+ if (reference_width > reference_max_width)
+ reference_max_width = reference_width;
+ }
+ reference_max_width++;
+ reference.start = (char *) xmalloc (reference_max_width + 1);
+ }
+
+ /* If the reference appears to the left of the output line, reserve some
+ space for it right away, including one gap size. */
+
+ if ((auto_reference || input_reference) && !right_reference)
+ line_width -= reference_max_width + gap_size;
+
+ /* The output lines, minimally, will contain from left to right a left
+ context, a gap, and a keyword followed by the right context with no
+ special intervening gap. Half of the line width is dedicated to the
+ left context and the gap, the other half is dedicated to the keyword
+ and the right context; these values are computed once and for all here.
+ There also are tail and head wrap around fields, used when the keyword
+ is near the beginning or the end of the line, or when some long word
+ cannot fit in, but leave place from wrapped around shorter words. The
+ maximum width of these fields are recomputed separately for each line,
+ on a case by case basis. It is worth noting that it cannot happen that
+ both the tail and head fields are used at once. */
+
+ half_line_width = line_width / 2;
+ before_max_width = half_line_width - gap_size;
+ keyafter_max_width = half_line_width;
+
+ /* If truncation_string is the empty string, make it NULL to speed up
+ tests. In this case, truncation_string_length will never get used, so
+ there is no need to set it. */
+
+ if (truncation_string && *truncation_string)
+ truncation_string_length = strlen (truncation_string);
+ else
+ truncation_string = NULL;
+
+ if (gnu_extensions)
+ {
+
+ /* When flagging truncation at the left of the keyword, the
+ truncation mark goes at the beginning of the before field,
+ unless there is a head field, in which case the mark goes at the
+ left of the head field. When flagging truncation at the right
+ of the keyword, the mark goes at the end of the keyafter field,
+ unless there is a tail field, in which case the mark goes at the
+ end of the tail field. Only eight combination cases could arise
+ for truncation marks:
+
+ . None.
+ . One beginning the before field.
+ . One beginning the head field.
+ . One ending the keyafter field.
+ . One ending the tail field.
+ . One beginning the before field, another ending the keyafter field.
+ . One ending the tail field, another beginning the before field.
+ . One ending the keyafter field, another beginning the head field.
+
+ So, there is at most two truncation marks, which could appear both
+ on the left side of the center of the output line, both on the
+ right side, or one on either side. */
+
+ before_max_width -= 2 * truncation_string_length;
+ keyafter_max_width -= 2 * truncation_string_length;
+ }
+ else
+ {
+
+ /* I never figured out exactly how UNIX' ptx plans the output width
+ of its various fields. If GNU extensions are disabled, do not
+ try computing the field widths correctly; instead, use the
+ following formula, which does not completely imitate UNIX' ptx,
+ but almost. */
+
+ keyafter_max_width -= 2 * truncation_string_length + 1;
+ }
+
+ /* Compute which characters need special output processing. Initialize
+ by flagging any white space character. Some systems do not consider
+ form feed as a space character, but we do. */
+
+ for (character = 0; character < CHAR_SET_SIZE; character++)
+ edited_flag[character] = isspace (character);
+ edited_flag['\f'] = 1;
+
+ /* Complete the special character flagging according to selected output
+ format. */
+
+ switch (output_format)
+ {
+ case UNKNOWN_FORMAT:
+ /* Should never happen. */
+
+ case DUMB_FORMAT:
+ break;
+
+ case ROFF_FORMAT:
+
+ /* `Quote' characters should be doubled. */
+
+ edited_flag['"'] = 1;
+ break;
+
+ case TEX_FORMAT:
+
+ /* Various characters need special processing. */
+
+ for (cursor = "$%&#_{}\\"; *cursor; cursor++)
+ edited_flag[*cursor] = 1;
+
+ /* Any character with 8th bit set will print to a single space, unless
+ it is diacriticized. */
+
+ for (character = 0200; character < CHAR_SET_SIZE; character++)
+ edited_flag[character] = todiac (character) != 0;
+ break;
+ }
+}
+
+/*------------------------------------------------------------------.
+| Compute the position and length of all the output fields, given a |
+| pointer to some OCCURS. |
+`------------------------------------------------------------------*/
+
+void
+define_all_fields (OCCURS *occurs)
+{
+ int tail_max_width; /* allowable width of tail field */
+ int head_max_width; /* allowable width of head field */
+ char *cursor; /* running cursor in source text */
+ char *left_context_start; /* start of left context */
+ char *right_context_end; /* end of right context */
+ char *left_field_start; /* conservative start for `head'/`before' */
+ int file_index; /* index in text input file arrays */
+ const char *file_name; /* file name for reference */
+ int line_ordinal; /* line ordinal for reference */
+
+ /* Define `keyafter', start of left context and end of right context.
+ `keyafter' starts at the saved position for keyword and extend to the
+ right from the end of the keyword, eating separators or full words, but
+ not beyond maximum allowed width for `keyafter' field or limit for the
+ right context. Suffix spaces will be removed afterwards. */
+
+ keyafter.start = occurs->key.start;
+ keyafter.end = keyafter.start + occurs->key.size;
+ left_context_start = keyafter.start + occurs->left;
+ right_context_end = keyafter.start + occurs->right;
+
+ cursor = keyafter.end;
+ while (cursor < right_context_end
+ && cursor <= keyafter.start + keyafter_max_width)
+ {
+ keyafter.end = cursor;
+ SKIP_SOMETHING (cursor, right_context_end);
+ }
+ if (cursor <= keyafter.start + keyafter_max_width)
+ keyafter.end = cursor;
+
+ keyafter_truncation = truncation_string && keyafter.end < right_context_end;
+
+ SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start);
+
+ /* When the left context is wide, it might take some time to catch up from
+ the left context boundary to the beginning of the `head' or `before'
+ fields. So, in this case, to speed the catchup, we jump back from the
+ keyword, using some secure distance, possibly falling in the middle of
+ a word. A secure backward jump would be at least half the maximum
+ width of a line, plus the size of the longest word met in the whole
+ input. We conclude this backward jump by a skip forward of at least
+ one word. In this manner, we should not inadvertently accept only part
+ of a word. From the reached point, when it will be time to fix the
+ beginning of `head' or `before' fields, we will skip forward words or
+ delimiters until we get sufficiently near. */
+
+ if (-occurs->left > half_line_width + maximum_word_length)
+ {
+ left_field_start
+ = keyafter.start - (half_line_width + maximum_word_length);
+ SKIP_SOMETHING (left_field_start, keyafter.start);
+ }
+ else
+ left_field_start = keyafter.start + occurs->left;
+
+ /* `before' certainly ends at the keyword, but not including separating
+ spaces. It starts after than the saved value for the left context, by
+ advancing it until it falls inside the maximum allowed width for the
+ before field. There will be no prefix spaces either. `before' only
+ advances by skipping single separators or whole words. */
+
+ before.start = left_field_start;
+ before.end = keyafter.start;
+ SKIP_WHITE_BACKWARDS (before.end, before.start);
+
+ while (before.start + before_max_width < before.end)
+ SKIP_SOMETHING (before.start, before.end);
+
+ if (truncation_string)
+ {
+ cursor = before.start;
+ SKIP_WHITE_BACKWARDS (cursor, text_buffer.start);
+ before_truncation = cursor > left_context_start;
+ }
+ else
+ before_truncation = 0;
+
+ SKIP_WHITE (before.start, text_buffer.end);
+
+ /* The tail could not take more columns than what has been left in the
+ left context field, and a gap is mandatory. It starts after the
+ right context, and does not contain prefixed spaces. It ends at
+ the end of line, the end of buffer or when the tail field is full,
+ whichever comes first. It cannot contain only part of a word, and
+ has no suffixed spaces. */
+
+ tail_max_width
+ = before_max_width - (before.end - before.start) - gap_size;
+
+ if (tail_max_width > 0)
+ {
+ tail.start = keyafter.end;
+ SKIP_WHITE (tail.start, text_buffer.end);
+
+ tail.end = tail.start;
+ cursor = tail.end;
+ while (cursor < right_context_end
+ && cursor < tail.start + tail_max_width)
+ {
+ tail.end = cursor;
+ SKIP_SOMETHING (cursor, right_context_end);
+ }
+
+ if (cursor < tail.start + tail_max_width)
+ tail.end = cursor;
+
+ if (tail.end > tail.start)
+ {
+ keyafter_truncation = 0;
+ tail_truncation = truncation_string && tail.end < right_context_end;
+ }
+ else
+ tail_truncation = 0;
+
+ SKIP_WHITE_BACKWARDS (tail.end, tail.start);
+ }
+ else
+ {
+
+ /* No place left for a tail field. */
+
+ tail.start = NULL;
+ tail.end = NULL;
+ tail_truncation = 0;
+ }
+
+ /* `head' could not take more columns than what has been left in the right
+ context field, and a gap is mandatory. It ends before the left
+ context, and does not contain suffixed spaces. Its pointer is advanced
+ until the head field has shrunk to its allowed width. It cannot
+ contain only part of a word, and has no suffixed spaces. */
+
+ head_max_width
+ = keyafter_max_width - (keyafter.end - keyafter.start) - gap_size;
+
+ if (head_max_width > 0)
+ {
+ head.end = before.start;
+ SKIP_WHITE_BACKWARDS (head.end, text_buffer.start);
+
+ head.start = left_field_start;
+ while (head.start + head_max_width < head.end)
+ SKIP_SOMETHING (head.start, head.end);
+
+ if (head.end > head.start)
+ {
+ before_truncation = 0;
+ head_truncation = (truncation_string
+ && head.start > left_context_start);
+ }
+ else
+ head_truncation = 0;
+
+ SKIP_WHITE (head.start, head.end);
+ }
+ else
+ {
+
+ /* No place left for a head field. */
+
+ head.start = NULL;
+ head.end = NULL;
+ head_truncation = 0;
+ }
+
+ if (auto_reference)
+ {
+
+ /* Construct the reference text in preallocated space from the file
+ name and the line number. Find out in which file the reference
+ occurred. Standard input yields an empty file name. Insure line
+ numbers are one based, even if they are computed zero based. */
+
+ file_index = 0;
+ while (file_line_count[file_index] < occurs->reference)
+ file_index++;
+
+ file_name = input_file_name[file_index];
+ if (!file_name)
+ file_name = "";
+
+ line_ordinal = occurs->reference + 1;
+ if (file_index > 0)
+ line_ordinal -= file_line_count[file_index - 1];
+
+ sprintf (reference.start, "%s:%d", file_name, line_ordinal);
+ reference.end = reference.start + strlen (reference.start);
+ }
+ else if (input_reference)
+ {
+
+ /* Reference starts at saved position for reference and extends right
+ until some white space is met. */
+
+ reference.start = keyafter.start + (DELTA) occurs->reference;
+ reference.end = reference.start;
+ SKIP_NON_WHITE (reference.end, right_context_end);
+ }
+}
+
+
+/* Formatting and actual output - control routines. */
+
+/*----------------------------------------------------------------------.
+| Output the current output fields as one line for `troff' or `nroff'. |
+`----------------------------------------------------------------------*/
+
+void
+output_one_roff_line (void)
+{
+ /* Output the `tail' field. */
+
+ printf (".%s \"", macro_name);
+ print_field (tail);
+ if (tail_truncation)
+ printf ("%s", truncation_string);
+ putchar ('"');
+
+ /* Output the `before' field. */
+
+ printf (" \"");
+ if (before_truncation)
+ printf ("%s", truncation_string);
+ print_field (before);
+ putchar ('"');
+
+ /* Output the `keyafter' field. */
+
+ printf (" \"");
+ print_field (keyafter);
+ if (keyafter_truncation)
+ printf ("%s", truncation_string);
+ putchar ('"');
+
+ /* Output the `head' field. */
+
+ printf (" \"");
+ if (head_truncation)
+ printf ("%s", truncation_string);
+ print_field (head);
+ putchar ('"');
+
+ /* Conditionally output the `reference' field. */
+
+ if (auto_reference || input_reference)
+ {
+ printf (" \"");
+ print_field (reference);
+ putchar ('"');
+ }
+
+ putchar ('\n');
+}
+
+/*---------------------------------------------------------.
+| Output the current output fields as one line for `TeX'. |
+`---------------------------------------------------------*/
+
+void
+output_one_tex_line (void)
+{
+ BLOCK key; /* key field, isolated */
+ BLOCK after; /* after field, isolated */
+ char *cursor; /* running cursor in source text */
+
+ printf ("\\%s ", macro_name);
+ printf ("{");
+ print_field (tail);
+ printf ("}{");
+ print_field (before);
+ printf ("}{");
+ key.start = keyafter.start;
+ after.end = keyafter.end;
+ cursor = keyafter.start;
+ SKIP_SOMETHING (cursor, keyafter.end);
+ key.end = cursor;
+ after.start = cursor;
+ print_field (key);
+ printf ("}{");
+ print_field (after);
+ printf ("}{");
+ print_field (head);
+ printf ("}");
+ if (auto_reference || input_reference)
+ {
+ printf ("{");
+ print_field (reference);
+ printf ("}");
+ }
+ printf ("\n");
+}
+
+/*-------------------------------------------------------------------.
+| Output the current output fields as one line for a dumb terminal. |
+`-------------------------------------------------------------------*/
+
+void
+output_one_dumb_line (void)
+{
+ if (!right_reference)
+ if (auto_reference)
+ {
+
+ /* Output the `reference' field, in such a way that GNU emacs
+ next-error will handle it. The ending colon is taken from the
+ gap which follows. */
+
+ print_field (reference);
+ putchar (':');
+ print_spaces (reference_max_width
+ + gap_size
+ - (reference.end - reference.start)
+ - 1);
+ }
+ else
+ {
+
+ /* Output the `reference' field and its following gap. */
+
+ print_field (reference);
+ print_spaces (reference_max_width
+ + gap_size
+ - (reference.end - reference.start));
+ }
+
+ if (tail.start < tail.end)
+ {
+ /* Output the `tail' field. */
+
+ print_field (tail);
+ if (tail_truncation)
+ printf ("%s", truncation_string);
+
+ print_spaces (half_line_width - gap_size
+ - (before.end - before.start)
+ - (before_truncation ? truncation_string_length : 0)
+ - (tail.end - tail.start)
+ - (tail_truncation ? truncation_string_length : 0));
+ }
+ else
+ print_spaces (half_line_width - gap_size
+ - (before.end - before.start)
+ - (before_truncation ? truncation_string_length : 0));
+
+ /* Output the `before' field. */
+
+ if (before_truncation)
+ printf ("%s", truncation_string);
+ print_field (before);
+
+ print_spaces (gap_size);
+
+ /* Output the `keyafter' field. */
+
+ print_field (keyafter);
+ if (keyafter_truncation)
+ printf ("%s", truncation_string);
+
+ if (head.start < head.end)
+ {
+ /* Output the `head' field. */
+
+ print_spaces (half_line_width
+ - (keyafter.end - keyafter.start)
+ - (keyafter_truncation ? truncation_string_length : 0)
+ - (head.end - head.start)
+ - (head_truncation ? truncation_string_length : 0));
+ if (head_truncation)
+ printf ("%s", truncation_string);
+ print_field (head);
+ }
+ else
+
+ if ((auto_reference || input_reference) && right_reference)
+ print_spaces (half_line_width
+ - (keyafter.end - keyafter.start)
+ - (keyafter_truncation ? truncation_string_length : 0));
+
+ if ((auto_reference || input_reference) && right_reference)
+ {
+ /* Output the `reference' field. */
+
+ print_spaces (gap_size);
+ print_field (reference);
+ }
+
+ printf ("\n");
+}
+
+/*------------------------------------------------------------------------.
+| Scan the whole occurs table and, for each entry, output one line in the |
+| appropriate format. |
+`------------------------------------------------------------------------*/
+
+void
+generate_all_output (void)
+{
+ int occurs_index; /* index of keyword entry being processed */
+ OCCURS *occurs_cursor; /* current keyword entry being processed */
+
+
+ /* The following assignments are useful to provide default values in case
+ line contexts or references are not used, in which case these variables
+ would never be computed. */
+
+ tail.start = NULL;
+ tail.end = NULL;
+ tail_truncation = 0;
+
+ head.start = NULL;
+ head.end = NULL;
+ head_truncation = 0;
+
+
+ /* Loop over all keyword occurrences. */
+
+ occurs_cursor = occurs_table[0];
+
+ for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++)
+ {
+ /* Compute the exact size of every field and whenever truncation flags
+ are present or not. */
+
+ define_all_fields (occurs_cursor);
+
+ /* Produce one output line according to selected format. */
+
+ switch (output_format)
+ {
+ case UNKNOWN_FORMAT:
+ /* Should never happen. */
+
+ case DUMB_FORMAT:
+ output_one_dumb_line ();
+ break;
+
+ case ROFF_FORMAT:
+ output_one_roff_line ();
+ break;
+
+ case TEX_FORMAT:
+ output_one_tex_line ();
+ break;
+ }
+
+ /* Advance the cursor into the occurs table. */
+
+ occurs_cursor++;
+ }
+}
+
+/* Option decoding and main program. */
+
+/*------------------------------------------------------.
+| Print program identification and options, then exit. |
+`------------------------------------------------------*/
+
+void
+usage (int status)
+{
+ if (status != 0)
+ fprintf (stderr, "Try `%s --help' for more information.\n", program_name);
+ else
+ {
+ printf ("\
+Usage: %s [OPTION]... [INPUT]... (without -G)\n\
+ or: %s -G [OPTION]... [INPUT [OUTPUT]]\n", program_name, program_name);
+ printf ("\
+\n\
+ -A, --auto-reference output automatically generated references\n\
+ -C, --copyright display Copyright and copying conditions\n\
+ -G, --traditional behave more like System V `ptx'\n\
+ -F, --flag-truncation=STRING use STRING for flagging line truncations\n\
+ -M, --macro-name=STRING macro name to use instead of `xx'\n\
+ -O, --format=roff generate output as roff directives\n\
+ -R, --right-side-refs put references at right, not counted in -w\n\
+ -S, --sentence-regexp=REGEXP for end of lines or end of sentences\n\
+ -T, --format=tex generate output as TeX directives\n\
+ -W, --word-regexp=REGEXP use REGEXP to match each keyword\n\
+ -b, --break-file=FILE word break characters in this FILE\n\
+ -f, --ignore-case fold lower case to upper case for sorting\n\
+ -g, --gap-size=NUMBER gap size in columns between output fields\n\
+ -i, --ignore-file=FILE read ignore word list from FILE\n\
+ -o, --only-file=FILE read only word list from this FILE\n\
+ -r, --references first field of each line is a reference\n\
+ -t, --typeset-mode - not implemented -\n\
+ -w, --width=NUMBER output width in columns, reference excluded\n\
+ --help display this help and exit\n\
+ --version output version information and exit\n\
+\n\
+With no FILE or if FILE is -, read Standard Input. `-F /' by default.\n");
+ }
+ exit (status);
+}
+
+/*----------------------------------------------------------------------.
+| Main program. Decode ARGC arguments passed through the ARGV array of |
+| strings, then launch execution. |
+`----------------------------------------------------------------------*/
+
+/* Long options equivalences. */
+const struct option long_options[] =
+{
+ {"auto-reference", no_argument, NULL, 'A'},
+ {"break-file", required_argument, NULL, 'b'},
+ {"copyright", no_argument, NULL, 'C'},
+ {"flag-truncation", required_argument, NULL, 'F'},
+ {"ignore-case", no_argument, NULL, 'f'},
+ {"gap-size", required_argument, NULL, 'g'},
+ {"help", no_argument, &show_help, 1},
+ {"ignore-file", required_argument, NULL, 'i'},
+ {"macro-name", required_argument, NULL, 'M'},
+ {"only-file", required_argument, NULL, 'o'},
+ {"references", no_argument, NULL, 'r'},
+ {"right-side-refs", no_argument, NULL, 'R'},
+ {"format", required_argument, NULL, 10},
+ {"sentence-regexp", required_argument, NULL, 'S'},
+ {"traditional", no_argument, NULL, 'G'},
+ {"typeset-mode", no_argument, NULL, 't'},
+ {"version", no_argument, &show_version, 1},
+ {"width", required_argument, NULL, 'w'},
+ {"word-regexp", required_argument, NULL, 'W'},
+ {0, 0, 0, 0},
+};
+
+static char const* const format_args[] =
+{
+ "roff", "tex", 0
+};
+
+int
+main (int argc, char *const argv[])
+{
+ int optchar; /* argument character */
+ extern int optind; /* index of argument */
+ extern char *optarg; /* value or argument */
+ int file_index; /* index in text input file arrays */
+
+#ifdef HAVE_MCHECK
+ /* Use GNU malloc checking. It has proven to be useful! */
+ mcheck ();
+#endif /* HAVE_MCHECK */
+
+#ifdef STDC_HEADERS
+#ifdef HAVE_SETCHRCLASS
+ setchrclass (NULL);
+#endif
+#endif
+
+ /* Decode program options. */
+
+ program_name = argv[0];
+
+ while ((optchar = getopt_long (argc, argv, "ACF:GM:ORS:TW:b:i:fg:o:trw:",
+ long_options, NULL)),
+ optchar != EOF)
+ {
+ switch (optchar)
+ {
+ default:
+ usage (1);
+
+ case 0:
+ break;
+
+ case 'C':
+ printf ("%s", copyright);
+ exit (0);
+
+ case 'G':
+ gnu_extensions = 0;
+ break;
+
+ case 'b':
+ break_file = optarg;
+ break;
+
+ case 'f':
+ ignore_case = 1;
+ break;
+
+ case 'g':
+ gap_size = atoi (optarg);
+ break;
+
+ case 'i':
+ ignore_file = optarg;
+ break;
+
+ case 'o':
+ only_file = optarg;
+ break;
+
+ case 'r':
+ input_reference = 1;
+ break;
+
+ case 't':
+ /* A decouvrir... */
+ break;
+
+ case 'w':
+ line_width = atoi (optarg);
+ break;
+
+ case 'A':
+ auto_reference = 1;
+ break;
+
+ case 'F':
+ truncation_string = copy_unescaped_string (optarg);
+ break;
+
+ case 'M':
+ macro_name = optarg;
+ break;
+
+ case 'O':
+ output_format = ROFF_FORMAT;
+ break;
+
+ case 'R':
+ right_reference = 1;
+ break;
+
+ case 'S':
+ context_regex_string = copy_unescaped_string (optarg);
+ break;
+
+ case 'T':
+ output_format = TEX_FORMAT;
+ break;
+
+ case 'W':
+ word_regex_string = copy_unescaped_string (optarg);
+ break;
+
+ case 10:
+ switch (argmatch (optarg, format_args))
+ {
+ default:
+ usage (1);
+
+ case 0:
+ output_format = ROFF_FORMAT;
+ break;
+
+ case 1:
+ output_format = TEX_FORMAT;
+ break;
+ }
+ }
+ }
+
+ /* Process trivial options. */
+
+ if (show_help)
+ usage (0);
+
+ if (show_version)
+ {
+ printf ("%s\n", version_string);
+ exit (0);
+ }
+
+ /* Change the default Ignore file if one is defined. */
+
+#ifdef DEFAULT_IGNORE_FILE
+ if (!ignore_file)
+ ignore_file = DEFAULT_IGNORE_FILE;
+#endif
+
+ /* Process remaining arguments. If GNU extensions are enabled, process
+ all arguments as input parameters. If disabled, accept at most two
+ arguments, the second of which is an output parameter. */
+
+ if (optind == argc)
+ {
+
+ /* No more argument simply means: read standard input. */
+
+ input_file_name = (const char **) xmalloc (sizeof (const char *));
+ file_line_count = (int *) xmalloc (sizeof (int));
+ number_input_files = 1;
+ input_file_name[0] = NULL;
+ }
+ else if (gnu_extensions)
+ {
+ number_input_files = argc - optind;
+ input_file_name
+ = (const char **) xmalloc (number_input_files * sizeof (const char *));
+ file_line_count
+ = (int *) xmalloc (number_input_files * sizeof (int));
+
+ for (file_index = 0; file_index < number_input_files; file_index++)
+ {
+ input_file_name[file_index] = argv[optind];
+ if (!*argv[optind] || strcmp (argv[optind], "-") == 0)
+ input_file_name[0] = NULL;
+ else
+ input_file_name[0] = argv[optind];
+ optind++;
+ }
+ }
+ else
+ {
+
+ /* There is one necessary input file. */
+
+ number_input_files = 1;
+ input_file_name = (const char **) xmalloc (sizeof (const char *));
+ file_line_count = (int *) xmalloc (sizeof (int));
+ if (!*argv[optind] || strcmp (argv[optind], "-") == 0)
+ input_file_name[0] = NULL;
+ else
+ input_file_name[0] = argv[optind];
+ optind++;
+
+ /* Redirect standard output, only if requested. */
+
+ if (optind < argc)
+ {
+ fclose (stdout);
+ if (fopen (argv[optind], "w") == NULL)
+ error (1, errno, argv[optind]);
+ optind++;
+ }
+
+ /* Diagnose any other argument as an error. */
+
+ if (optind < argc)
+ usage (1);
+ }
+
+ /* If the output format has not been explicitly selected, choose dumb
+ terminal format if GNU extensions are enabled, else `roff' format. */
+
+ if (output_format == UNKNOWN_FORMAT)
+ output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT;
+
+ /* Initialize the main tables. */
+
+ initialize_regex ();
+
+ /* Read `Break character' file, if any. */
+
+ if (break_file)
+ digest_break_file (break_file);
+
+ /* Read `Ignore words' file and `Only words' files, if any. If any of
+ these files is empty, reset the name of the file to NULL, to avoid
+ unnecessary calls to search_table. */
+
+ if (ignore_file)
+ {
+ digest_word_file (ignore_file, &ignore_table);
+ if (ignore_table.length == 0)
+ ignore_file = NULL;
+ }
+
+ if (only_file)
+ {
+ digest_word_file (only_file, &only_table);
+ if (only_table.length == 0)
+ only_file = NULL;
+ }
+
+ /* Prepare to study all the input files. */
+
+ number_of_occurs[0] = 0;
+ total_line_count = 0;
+ maximum_word_length = 0;
+ reference_max_width = 0;
+
+ for (file_index = 0; file_index < number_input_files; file_index++)
+ {
+
+ /* Read the file in core, than study it. */
+
+ swallow_file_in_memory (input_file_name[file_index], &text_buffer);
+ find_occurs_in_text ();
+
+ /* Maintain for each file how many lines has been read so far when its
+ end is reached. Incrementing the count first is a simple kludge to
+ handle a possible incomplete line at end of file. */
+
+ total_line_count++;
+ file_line_count[file_index] = total_line_count;
+ }
+
+ /* Do the output process phase. */
+
+ sort_found_occurs ();
+ fix_output_parameters ();
+ generate_all_output ();
+
+ /* All done. */
+
+ exit (0);
+}
diff --git a/gnu/usr.bin/ptx/ptx.info b/gnu/usr.bin/ptx/ptx.info
new file mode 100644
index 0000000..3bbd1bb
--- /dev/null
+++ b/gnu/usr.bin/ptx/ptx.info
@@ -0,0 +1,496 @@
+This is Info file ptx.info, produced by Makeinfo-1.47 from the input
+file ./ptx.texinfo.
+
+ This file documents the `ptx' command, which has the purpose of
+generated permuted indices for group of files.
+
+ Copyright (C) 1990, 1991, 1993 by the Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+manual provided the copyright notice and this permission notice are
+preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+this manual under the conditions for verbatim copying, provided that
+the entire resulting derived work is distributed under the terms of a
+permission notice identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+manual into another language, under the above conditions for modified
+versions, except that this permission notice may be stated in a
+translation approved by the Foundation.
+
+
+File: ptx.info, Node: Top, Next: Invoking ptx, Prev: (dir), Up: (dir)
+
+Introduction
+************
+
+ This is the 0.3 beta release of `ptx', the GNU version of a permuted
+index generator. This software has the main goal of providing a
+replacement for the traditional `ptx' as found on System V machines,
+able to handle small files quickly, while providing a platform for more
+development.
+
+ This version reimplements and extends traditional `ptx'. Among
+other things, it can produce a readable "KWIC" (keywords in their
+context) without the need of `nroff', there is also an option to
+produce TeX compatible output. This version does not handle huge input
+files, that is, those files which do not fit in memory all at once.
+
+ *Please note* that an overall renaming of all options is
+foreseeable. In fact, GNU ptx specifications are not frozen yet.
+
+* Menu:
+
+* Invoking ptx:: How to use this program
+* Compatibility:: The GNU extensions to `ptx'
+
+ -- The Detailed Node Listing --
+
+How to use this program
+
+* General options:: Options which affect general program behaviour.
+* Charset selection:: Underlying character set considerations.
+* Input processing:: Input fields, contexts, and keyword selection.
+* Output formatting:: Types of output format, and sizing the fields.
+
+
+File: ptx.info, Node: Invoking ptx, Next: Compatibility, Prev: Top, Up: Top
+
+How to use this program
+***********************
+
+ This tool reads a text file and essentially produces a permuted
+index, with each keyword in its context. The calling sketch is one of:
+
+ ptx [OPTION ...] [FILE ...]
+
+ or:
+
+ ptx -G [OPTION ...] [INPUT [OUTPUT]]
+
+ The `-G' (or its equivalent: `--traditional') option disables all
+GNU extensions and revert to traditional mode, thus introducing some
+limitations, and changes several of the program's default option values.
+When `-G' is not specified, GNU extensions are always enabled. GNU
+extensions to `ptx' are documented wherever appropriate in this
+document. See *Note Compatibility:: for an explicit list of them.
+
+ Individual options are explained later in this document.
+
+ When GNU extensions are enabled, there may be zero, one or several
+FILE after the options. If there is no FILE, the program reads the
+standard input. If there is one or several FILE, they give the name of
+input files which are all read in turn, as if all the input files were
+concatenated. However, there is a full contextual break between each
+file and, when automatic referencing is requested, file names and line
+numbers refer to individual text input files. In all cases, the
+program produces the permuted index onto the standard output.
+
+ When GNU extensions are *not* enabled, that is, when the program
+operates in traditional mode, there may be zero, one or two parameters
+besides the options. If there is no parameters, the program reads the
+standard input and produces the permuted index onto the standard output.
+If there is only one parameter, it names the text INPUT to be read
+instead of the standard input. If two parameters are given, they give
+respectively the name of the INPUT file to read and the name of the
+OUTPUT file to produce. *Be very careful* to note that, in this case,
+the contents of file given by the second parameter is destroyed. This
+behaviour is dictated only by System V `ptx' compatibility, because GNU
+Standards discourage output parameters not introduced by an option.
+
+ Note that for *any* file named as the value of an option or as an
+input text file, a single dash `-' may be used, in which case standard
+input is assumed. However, it would not make sense to use this
+convention more than once per program invocation.
+
+* Menu:
+
+* General options:: Options which affect general program behaviour.
+* Charset selection:: Underlying character set considerations.
+* Input processing:: Input fields, contexts, and keyword selection.
+* Output formatting:: Types of output format, and sizing the fields.
+
+
+File: ptx.info, Node: General options, Next: Charset selection, Prev: Invoking ptx, Up: Invoking ptx
+
+General options
+===============
+
+`-C'
+`--copyright'
+ Prints a short note about the Copyright and copying conditions,
+ then exit without further processing.
+
+`-G'
+`--traditional'
+ As already explained, this option disables all GNU extensions to
+ `ptx' and switch to traditional mode.
+
+`--help'
+ Prints a short help on standard output, then exit without further
+ processing.
+
+`--version'
+ Prints the program verison on standard output, then exit without
+ further processing.
+
+
+File: ptx.info, Node: Charset selection, Next: Input processing, Prev: General options, Up: Invoking ptx
+
+Charset selection
+=================
+
+ As it is setup now, the program assumes that the input file is coded
+using 8-bit ISO 8859-1 code, also known as Latin-1 character set,
+*unless* if it is compiled for MS-DOS, in which case it uses the
+character set of the IBM-PC. (GNU `ptx' is not known to work on
+smaller MS-DOS machines anymore.) Compared to 7-bit ASCII, the set of
+characters which are letters is then different, this fact alters the
+behaviour of regular expression matching. Thus, the default regular
+expression for a keyword allows foreign or diacriticized letters.
+Keyword sorting, however, is still crude; it obeys the underlying
+character set ordering quite blindly.
+
+`-f'
+`--ignore-case'
+ Fold lower case letters to upper case for sorting.
+
+
+File: ptx.info, Node: Input processing, Next: Output formatting, Prev: Charset selection, Up: Invoking ptx
+
+Word selection
+==============
+
+`-b FILE'
+`--break-file=FILE'
+ This option is an alternative way to option `-W' for describing
+ which characters make up words. This option introduces the name
+ of a file which contains a list of characters which can*not* be
+ part of one word, this file is called the "Break file". Any
+ character which is not part of the Break file is a word
+ constituent. If both options `-b' and `-W' are specified, then
+ `-W' has precedence and `-b' is ignored.
+
+ When GNU extensions are enabled, the only way to avoid newline as a
+ break character is to write all the break characters in the file
+ with no newline at all, not even at the end of the file. When GNU
+ extensions are disabled, spaces, tabs and newlines are always
+ considered as break characters even if not included in the Break
+ file.
+
+`-i FILE'
+`--ignore-file=FILE'
+ The file associated with this option contains a list of words
+ which will never be taken as keywords in concordance output. It
+ is called the "Ignore file". The file contains exactly one word
+ in each line; the end of line separation of words is not subject
+ to the value of the `-S' option.
+
+ There is a default Ignore file used by `ptx' when this option is
+ not specified, usually found in `/usr/local/lib/eign' if this has
+ not been changed at installation time. If you want to deactivate
+ the default Ignore file, specify `/dev/null' instead.
+
+`-o FILE'
+`--only-file=FILE'
+ The file associated with this option contains a list of words
+ which will be retained in concordance output, any word not
+ mentioned in this file is ignored. The file is called the "Only
+ file". The file contains exactly one word in each line; the end
+ of line separation of words is not subject to the value of the
+ `-S' option.
+
+ There is no default for the Only file. In the case there are both
+ an Only file and an Ignore file, a word will be subject to be a
+ keyword only if it is given in the Only file and not given in the
+ Ignore file.
+
+`-r'
+`--references'
+ On each input line, the leading sequence of non white characters
+ will be taken to be a reference that has the purpose of
+ identifying this input line on the produced permuted index. See
+ *Note Output formatting:: for more information about reference
+ production. Using this option change the default value for option
+ `-S'.
+
+ Using this option, the program does not try very hard to remove
+ references from contexts in output, but it succeeds in doing so
+ *when* the context ends exactly at the newline. If option `-r' is
+ used with `-S' default value, or when GNU extensions are disabled,
+ this condition is always met and references are completely
+ excluded from the output contexts.
+
+`-S REGEXP'
+`--sentence-regexp=REGEXP'
+ This option selects which regular expression will describe the end
+ of a line or the end of a sentence. In fact, there is other
+ distinction between end of lines or end of sentences than the
+ effect of this regular expression, and input line boundaries have
+ no special significance outside this option. By default, when GNU
+ extensions are enabled and if `-r' option is not used, end of
+ sentences are used. In this case, the precise REGEX is imported
+ from GNU emacs:
+
+ [.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*
+
+ Whenever GNU extensions are disabled or if `-r' option is used, end
+ of lines are used; in this case, the default REGEXP is just:
+
+ \n
+
+ Using an empty REGEXP is equivalent to completely disabling end of
+ line or end of sentence recognition. In this case, the whole file
+ is considered to be a single big line or sentence. The user might
+ want to disallow all truncation flag generation as well, through
+ option `-F ""'. *Note Syntax of Regular Expressions:
+ (emacs)Regexps.
+
+ When the keywords happen to be near the beginning of the input
+ line or sentence, this often creates an unused area at the
+ beginning of the output context line; when the keywords happen to
+ be near the end of the input line or sentence, this often creates
+ an unused area at the end of the output context line. The program
+ tries to fill those unused areas by wrapping around context in
+ them; the tail of the input line or sentence is used to fill the
+ unused area on the left of the output line; the head of the input
+ line or sentence is used to fill the unused area on the right of
+ the output line.
+
+ As a matter of convenience to the user, many usual backslashed
+ escape sequences, as found in the C language, are recognized and
+ converted to the corresponding characters by `ptx' itself.
+
+`-W REGEXP'
+`--word-regexp=REGEXP'
+ This option selects which regular expression will describe each
+ keyword. By default, if GNU extensions are enabled, a word is a
+ sequence of letters; the REGEXP used is `\w+'. When GNU
+ extensions are disabled, a word is by default anything which ends
+ with a space, a tab or a newline; the REGEXP used is `[^ \t\n]+'.
+
+ An empty REGEXP is equivalent to not using this option, letting the
+ default dive in. *Note Syntax of Regular Expressions:
+ (emacs)Regexps.
+
+ As a matter of convenience to the user, many usual backslashed
+ escape sequences, as found in the C language, are recognized and
+ converted to the corresponding characters by `ptx' itself.
+
+
+File: ptx.info, Node: Output formatting, Prev: Input processing, Up: Invoking ptx
+
+Output formatting
+=================
+
+ Output format is mainly controlled by `-O' and `-T' options,
+described in the table below. When neither `-O' nor `-T' is selected,
+and if GNU extensions are enabled, the program choose an output format
+suited for a dumb terminal. Each keyword occurrence is output to the
+center of one line, surrounded by its left and right contexts. Each
+field is properly justified, so the concordance output could readily be
+observed. As a special feature, if automatic references are selected
+by option `-A' and are output before the left context, that is, if
+option `-R' is *not* selected, then a colon is added after the
+reference; this nicely interfaces with GNU Emacs `next-error'
+processing. In this default output format, each white space character,
+like newline and tab, is merely changed to exactly one space, with no
+special attempt to compress consecutive spaces. This might change in
+the future. Except for those white space characters, every other
+character of the underlying set of 256 characters is transmitted
+verbatim.
+
+ Output format is further controlled by the following options.
+
+`-g NUMBER'
+`--gap-size=NUMBER'
+ Select the size of the minimum white gap between the fields on the
+ output line.
+
+`-w NUMBER'
+`--width=NUMBER'
+ Select the output maximum width of each final line. If references
+ are used, they are included or excluded from the output maximum
+ width depending on the value of option `-R'. If this option is not
+ selected, that is, when references are output before the left
+ context, the output maximum width takes into account the maximum
+ length of all references. If this options is selected, that is,
+ when references are output after the right context, the output
+ maximum width does not take into account the space taken by
+ references, nor the gap that precedes them.
+
+`-A'
+`--auto-reference'
+ Select automatic references. Each input line will have an
+ automatic reference made up of the file name and the line ordinal,
+ with a single colon between them. However, the file name will be
+ empty when standard input is being read. If both `-A' and `-r'
+ are selected, then the input reference is still read and skipped,
+ but the automatic reference is used at output time, overriding the
+ input reference.
+
+`-R'
+`--right-side-refs'
+ In default output format, when option `-R' is not used, any
+ reference produced by the effect of options `-r' or `-A' are given
+ to the far right of output lines, after the right context. In
+ default output format, when option `-R' is specified, references
+ are rather given to the beginning of each output line, before the
+ left context. For any other output format, option `-R' is almost
+ ignored, except for the fact that the width of references is *not*
+ taken into account in total output width given by `-w' whenever
+ `-R' is selected.
+
+ This option is automatically selected whenever GNU extensions are
+ disabled.
+
+`-F STRING'
+`--flac-truncation=STRING'
+ This option will request that any truncation in the output be
+ reported using the string STRING. Most output fields
+ theoretically extend towards the beginning or the end of the
+ current line, or current sentence, as selected with option `-S'.
+ But there is a maximum allowed output line width, changeable
+ through option `-w', which is further divided into space for
+ various output fields. When a field has to be truncated because
+ cannot extend until the beginning or the end of the current line
+ to fit in the, then a truncation occurs. By default, the string
+ used is a single slash, as in `-F /'.
+
+ STRING may have more than one character, as in `-F ...'. Also, in
+ the particular case STRING is empty (`-F ""'), truncation flagging
+ is disabled, and no truncation marks are appended in this case.
+
+ As a matter of convenience to the user, many usual backslashed
+ escape sequences, as found in the C language, are recognized and
+ converted to the corresponding characters by `ptx' itself.
+
+`-M STRING'
+`--macro-name=STRING'
+ Select another STRING to be used instead of `xx', while generating
+ output suitable for `nroff', `troff' or TeX.
+
+`-O'
+`--format=roff'
+ Choose an output format suitable for `nroff' or `troff'
+ processing. Each output line will look like:
+
+ .xx "TAIL" "BEFORE" "KEYWORD_AND_AFTER" "HEAD" "REF"
+
+ so it will be possible to write an `.xx' roff macro to take care of
+ the output typesetting. This is the default output format when GNU
+ extensions are disabled. Option `-M' might be used to change `xx'
+ to another macro name.
+
+ In this output format, each non-graphical character, like newline
+ and tab, is merely changed to exactly one space, with no special
+ attempt to compress consecutive spaces. Each quote character: `"'
+ is doubled so it will be correctly processed by `nroff' or `troff'.
+
+`-T'
+`--format=tex'
+ Choose an output format suitable for TeX processing. Each output
+ line will look like:
+
+ \xx {TAIL}{BEFORE}{KEYWORD}{AFTER}{HEAD}{REF}
+
+ so it will be possible to write write a `\xx' definition to take
+ care of the output typesetting. Note that when references are not
+ being produced, that is, neither option `-A' nor option `-r' is
+ selected, the last parameter of each `\xx' call is inhibited.
+ Option `-M' might be used to change `xx' to another macro name.
+
+ In this output format, some special characters, like `$', `%',
+ `&', `#' and `_' are automatically protected with a backslash.
+ Curly brackets `{', `}' are also protected with a backslash, but
+ also enclosed in a pair of dollar signs to force mathematical
+ mode. The backslash itself produces the sequence `\backslash{}'.
+ Circumflex and tilde diacritics produce the sequence `^\{ }' and
+ `~\{ }' respectively. Other diacriticized characters of the
+ underlying character set produce an appropriate TeX sequence as
+ far as possible. The other non-graphical characters, like newline
+ and tab, and all others characters which are not part of ASCII,
+ are merely changed to exactly one space, with no special attempt
+ to compress consecutive spaces. Let me know how to improve this
+ special character processing for TeX.
+
+
+File: ptx.info, Node: Compatibility, Prev: Invoking ptx, Up: Top
+
+The GNU extensions to `ptx'
+***************************
+
+ This version of `ptx' contains a few features which do not exist in
+System V `ptx'. These extra features are suppressed by using the `-G'
+command line option, unless overridden by other command line options.
+Some GNU extensions cannot be recovered by overriding, so the simple
+rule is to avoid `-G' if you care about GNU extensions. Here are the
+differences between this program and System V `ptx'.
+
+ * This program can read many input files at once, it always writes
+ the resulting concordance on standard output. On the other end,
+ System V `ptx' reads only one file and produce the result on
+ standard output or, if a second FILE parameter is given on the
+ command, to that FILE.
+
+ Having output parameters not introduced by options is a quite
+ dangerous practice which GNU avoids as far as possible. So, for
+ using `ptx' portably between GNU and System V, you should pay
+ attention to always use it with a single input file, and always
+ expect the result on standard output. You might also want to
+ automatically configure in a `-G' option to `ptx' calls in
+ products using `ptx', if the configurator finds that the installed
+ `ptx' accepts `-G'.
+
+ * The only options available in System V `ptx' are options `-b',
+ `-f', `-g', `-i', `-o', `-r', `-t' and `-w'. All other options
+ are GNU extensions and are not repeated in this enumeration.
+ Moreover, some options have a slightly different meaning when GNU
+ extensions are enabled, as explained below.
+
+ * By default, concordance output is not formatted for `troff' or
+ `nroff'. It is rather formatted for a dumb terminal. `troff' or
+ `nroff' output may still be selected through option `-O'.
+
+ * Unless `-R' option is used, the maximum reference width is
+ subtracted from the total output line width. With GNU extensions
+ disabled, width of references is not taken into account in the
+ output line width computations.
+
+ * All 256 characters, even `NUL's, are always read and processed from
+ input file with no adverse effect, even if GNU extensions are
+ disabled. However, System V `ptx' does not accept 8-bit
+ characters, a few control characters are rejected, and the tilda
+ `~' is condemned.
+
+ * Input line length is only limited by available memory, even if GNU
+ extensions are disabled. However, System V `ptx' processes only
+ the first 200 characters in each line.
+
+ * The break (non-word) characters default to be every character
+ except all letters of the underlying character set, diacriticized
+ or not. When GNU extensions are disabled, the break characters
+ default to space, tab and newline only.
+
+ * The program makes better use of output line width. If GNU
+ extensions are disabled, the program rather tries to imitate
+ System V `ptx', but still, there are some slight disposition
+ glitches this program does not completely reproduce.
+
+ * The user can specify both an Ignore file and an Only file. This
+ is not allowed with System V `ptx'.
+
+
+
+Tag Table:
+Node: Top939
+Node: Invoking ptx2298
+Node: General options5025
+Node: Charset selection5639
+Node: Input processing6514
+Node: Output formatting12205
+Node: Compatibility18737
+
+End Tag Table
diff --git a/gnu/usr.bin/ptx/ptx.texinfo b/gnu/usr.bin/ptx/ptx.texinfo
new file mode 100644
index 0000000..e690c55
--- /dev/null
+++ b/gnu/usr.bin/ptx/ptx.texinfo
@@ -0,0 +1,554 @@
+\input texinfo @c -*-texinfo-*-
+@c %**start of header
+@setfilename ptx.info
+@settitle GNU @code{ptx} reference manual
+@finalout
+@c %**end of header
+
+@ifinfo
+This file documents the @code{ptx} command, which has the purpose of
+generated permuted indices for group of files.
+
+Copyright (C) 1990, 1991, 1993 by the Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end ifinfo
+
+@titlepage
+@title ptx
+@subtitle The GNU permuted indexer
+@subtitle Edition 0.3, for ptx version 0.3
+@subtitle November 1993
+@author by Francois Pinard
+
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1990, 1991, 1993 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided that the entire
+resulting derived work is distributed under the terms of a permission
+notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that this permission notice may be stated in a translation approved
+by the Foundation.
+@end titlepage
+
+@node Top, Invoking ptx, (dir), (dir)
+@chapter Introduction
+
+This is the 0.3 beta release of @code{ptx}, the GNU version of a
+permuted index generator. This software has the main goal of providing
+a replacement for the traditional @code{ptx} as found on System V
+machines, able to handle small files quickly, while providing a platform
+for more development.
+
+This version reimplements and extends traditional @code{ptx}. Among
+other things, it can produce a readable @dfn{KWIC} (keywords in their
+context) without the need of @code{nroff}, there is also an option to
+produce @TeX{} compatible output. This version does not handle huge
+input files, that is, those files which do not fit in memory all at
+once.
+
+@emph{Please note} that an overall renaming of all options is
+foreseeable. In fact, GNU ptx specifications are not frozen yet.
+
+@menu
+* Invoking ptx:: How to use this program
+* Compatibility:: The GNU extensions to @code{ptx}
+
+ --- The Detailed Node Listing ---
+
+How to use this program
+
+* General options:: Options which affect general program behaviour.
+* Charset selection:: Underlying character set considerations.
+* Input processing:: Input fields, contexts, and keyword selection.
+* Output formatting:: Types of output format, and sizing the fields.
+@end menu
+
+@node Invoking ptx, Compatibility, Top, Top
+@chapter How to use this program
+
+This tool reads a text file and essentially produces a permuted index, with
+each keyword in its context. The calling sketch is one of:
+
+@example
+ptx [@var{option} @dots{}] [@var{file} @dots{}]
+@end example
+
+or:
+
+@example
+ptx -G [@var{option} @dots{}] [@var{input} [@var{output}]]
+@end example
+
+The @samp{-G} (or its equivalent: @samp{--traditional}) option disables
+all GNU extensions and revert to traditional mode, thus introducing some
+limitations, and changes several of the program's default option values.
+When @samp{-G} is not specified, GNU extensions are always enabled. GNU
+extensions to @code{ptx} are documented wherever appropriate in this
+document. See @xref{Compatibility} for an explicit list of them.
+
+Individual options are explained later in this document.
+
+When GNU extensions are enabled, there may be zero, one or several
+@var{file} after the options. If there is no @var{file}, the program
+reads the standard input. If there is one or several @var{file}, they
+give the name of input files which are all read in turn, as if all the
+input files were concatenated. However, there is a full contextual
+break between each file and, when automatic referencing is requested,
+file names and line numbers refer to individual text input files. In
+all cases, the program produces the permuted index onto the standard
+output.
+
+When GNU extensions are @emph{not} enabled, that is, when the program
+operates in traditional mode, there may be zero, one or two parameters
+besides the options. If there is no parameters, the program reads the
+standard input and produces the permuted index onto the standard output.
+If there is only one parameter, it names the text @var{input} to be read
+instead of the standard input. If two parameters are given, they give
+respectively the name of the @var{input} file to read and the name of
+the @var{output} file to produce. @emph{Be very careful} to note that,
+in this case, the contents of file given by the second parameter is
+destroyed. This behaviour is dictated only by System V @code{ptx}
+compatibility, because GNU Standards discourage output parameters not
+introduced by an option.
+
+Note that for @emph{any} file named as the value of an option or as an
+input text file, a single dash @kbd{-} may be used, in which case
+standard input is assumed. However, it would not make sense to use this
+convention more than once per program invocation.
+
+@menu
+* General options:: Options which affect general program behaviour.
+* Charset selection:: Underlying character set considerations.
+* Input processing:: Input fields, contexts, and keyword selection.
+* Output formatting:: Types of output format, and sizing the fields.
+@end menu
+
+@node General options, Charset selection, Invoking ptx, Invoking ptx
+@section General options
+
+@table @code
+
+@item -C
+@itemx --copyright
+Prints a short note about the Copyright and copying conditions, then
+exit without further processing.
+
+@item -G
+@itemx --traditional
+As already explained, this option disables all GNU extensions to
+@code{ptx} and switch to traditional mode.
+
+@item --help
+Prints a short help on standard output, then exit without further
+processing.
+
+@item --version
+Prints the program verison on standard output, then exit without further
+processing.
+
+@end table
+
+@node Charset selection, Input processing, General options, Invoking ptx
+@section Charset selection
+
+As it is setup now, the program assumes that the input file is coded
+using 8-bit ISO 8859-1 code, also known as Latin-1 character set,
+@emph{unless} if it is compiled for MS-DOS, in which case it uses the
+character set of the IBM-PC. (GNU @code{ptx} is not known to work on
+smaller MS-DOS machines anymore.) Compared to 7-bit ASCII, the set of
+characters which are letters is then different, this fact alters the
+behaviour of regular expression matching. Thus, the default regular
+expression for a keyword allows foreign or diacriticized letters.
+Keyword sorting, however, is still crude; it obeys the underlying
+character set ordering quite blindly.
+
+@table @code
+
+@item -f
+@itemx --ignore-case
+Fold lower case letters to upper case for sorting.
+
+@end table
+
+@node Input processing, Output formatting, Charset selection, Invoking ptx
+@section Word selection
+
+@table @code
+
+@item -b @var{file}
+@item --break-file=@var{file}
+
+This option is an alternative way to option @code{-W} for describing
+which characters make up words. This option introduces the name of a
+file which contains a list of characters which can@emph{not} be part of
+one word, this file is called the @dfn{Break file}. Any character which
+is not part of the Break file is a word constituent. If both options
+@code{-b} and @code{-W} are specified, then @code{-W} has precedence and
+@code{-b} is ignored.
+
+When GNU extensions are enabled, the only way to avoid newline as a
+break character is to write all the break characters in the file with no
+newline at all, not even at the end of the file. When GNU extensions
+are disabled, spaces, tabs and newlines are always considered as break
+characters even if not included in the Break file.
+
+@item -i @var{file}
+@itemx --ignore-file=@var{file}
+
+The file associated with this option contains a list of words which will
+never be taken as keywords in concordance output. It is called the
+@dfn{Ignore file}. The file contains exactly one word in each line; the
+end of line separation of words is not subject to the value of the
+@code{-S} option.
+
+There is a default Ignore file used by @code{ptx} when this option is
+not specified, usually found in @file{/usr/local/lib/eign} if this has
+not been changed at installation time. If you want to deactivate the
+default Ignore file, specify @code{/dev/null} instead.
+
+@item -o @var{file}
+@itemx --only-file=@var{file}
+
+The file associated with this option contains a list of words which will
+be retained in concordance output, any word not mentioned in this file
+is ignored. The file is called the @dfn{Only file}. The file contains
+exactly one word in each line; the end of line separation of words is
+not subject to the value of the @code{-S} option.
+
+There is no default for the Only file. In the case there are both an
+Only file and an Ignore file, a word will be subject to be a keyword
+only if it is given in the Only file and not given in the Ignore file.
+
+@item -r
+@itemx --references
+
+On each input line, the leading sequence of non white characters will be
+taken to be a reference that has the purpose of identifying this input
+line on the produced permuted index. See @xref{Output formatting} for
+more information about reference production. Using this option change
+the default value for option @code{-S}.
+
+Using this option, the program does not try very hard to remove
+references from contexts in output, but it succeeds in doing so
+@emph{when} the context ends exactly at the newline. If option
+@code{-r} is used with @code{-S} default value, or when GNU extensions
+are disabled, this condition is always met and references are completely
+excluded from the output contexts.
+
+@item -S @var{regexp}
+@itemx --sentence-regexp=@var{regexp}
+
+This option selects which regular expression will describe the end of a
+line or the end of a sentence. In fact, there is other distinction
+between end of lines or end of sentences than the effect of this regular
+expression, and input line boundaries have no special significance
+outside this option. By default, when GNU extensions are enabled and if
+@code{-r} option is not used, end of sentences are used. In this
+case, the precise @var{regex} is imported from GNU emacs:
+
+@example
+[.?!][]\"')@}]*\\($\\|\t\\| \\)[ \t\n]*
+@end example
+
+Whenever GNU extensions are disabled or if @code{-r} option is used, end
+of lines are used; in this case, the default @var{regexp} is just:
+
+@example
+\n
+@end example
+
+Using an empty REGEXP is equivalent to completely disabling end of line or end
+of sentence recognition. In this case, the whole file is considered to
+be a single big line or sentence. The user might want to disallow all
+truncation flag generation as well, through option @code{-F ""}.
+@xref{Regexps, , Syntax of Regular Expressions, emacs, The GNU Emacs
+Manual}.
+
+When the keywords happen to be near the beginning of the input line or
+sentence, this often creates an unused area at the beginning of the
+output context line; when the keywords happen to be near the end of the
+input line or sentence, this often creates an unused area at the end of
+the output context line. The program tries to fill those unused areas
+by wrapping around context in them; the tail of the input line or
+sentence is used to fill the unused area on the left of the output line;
+the head of the input line or sentence is used to fill the unused area
+on the right of the output line.
+
+As a matter of convenience to the user, many usual backslashed escape
+sequences, as found in the C language, are recognized and converted to
+the corresponding characters by @code{ptx} itself.
+
+@item -W @var{regexp}
+@itemx --word-regexp=@var{regexp}
+
+This option selects which regular expression will describe each keyword.
+By default, if GNU extensions are enabled, a word is a sequence of
+letters; the @var{regexp} used is @code{\w+}. When GNU extensions are
+disabled, a word is by default anything which ends with a space, a tab
+or a newline; the @var{regexp} used is @code{[^ \t\n]+}.
+
+An empty REGEXP is equivalent to not using this option, letting the
+default dive in. @xref{Regexps, , Syntax of Regular Expressions, emacs,
+The GNU Emacs Manual}.
+
+As a matter of convenience to the user, many usual backslashed escape
+sequences, as found in the C language, are recognized and converted to
+the corresponding characters by @code{ptx} itself.
+
+@end table
+
+@node Output formatting, , Input processing, Invoking ptx
+@section Output formatting
+
+Output format is mainly controlled by @code{-O} and @code{-T} options,
+described in the table below. When neither @code{-O} nor @code{-T} is
+selected, and if GNU extensions are enabled, the program choose an
+output format suited for a dumb terminal. Each keyword occurrence is
+output to the center of one line, surrounded by its left and right
+contexts. Each field is properly justified, so the concordance output
+could readily be observed. As a special feature, if automatic
+references are selected by option @code{-A} and are output before the
+left context, that is, if option @code{-R} is @emph{not} selected, then
+a colon is added after the reference; this nicely interfaces with GNU
+Emacs @code{next-error} processing. In this default output format, each
+white space character, like newline and tab, is merely changed to
+exactly one space, with no special attempt to compress consecutive
+spaces. This might change in the future. Except for those white space
+characters, every other character of the underlying set of 256
+characters is transmitted verbatim.
+
+Output format is further controlled by the following options.
+
+@table @code
+
+@item -g @var{number}
+@itemx --gap-size=@var{number}
+
+Select the size of the minimum white gap between the fields on the output
+line.
+
+@item -w @var{number}
+@itemx --width=@var{number}
+
+Select the output maximum width of each final line. If references are
+used, they are included or excluded from the output maximum width
+depending on the value of option @code{-R}. If this option is not
+selected, that is, when references are output before the left context,
+the output maximum width takes into account the maximum length of all
+references. If this options is selected, that is, when references are
+output after the right context, the output maximum width does not take
+into account the space taken by references, nor the gap that precedes
+them.
+
+@item -A
+@itemx --auto-reference
+
+Select automatic references. Each input line will have an automatic
+reference made up of the file name and the line ordinal, with a single
+colon between them. However, the file name will be empty when standard
+input is being read. If both @code{-A} and @code{-r} are selected, then
+the input reference is still read and skipped, but the automatic
+reference is used at output time, overriding the input reference.
+
+@item -R
+@itemx --right-side-refs
+
+In default output format, when option @code{-R} is not used, any
+reference produced by the effect of options @code{-r} or @code{-A} are
+given to the far right of output lines, after the right context. In
+default output format, when option @code{-R} is specified, references
+are rather given to the beginning of each output line, before the left
+context. For any other output format, option @code{-R} is almost
+ignored, except for the fact that the width of references is @emph{not}
+taken into account in total output width given by @code{-w} whenever
+@code{-R} is selected.
+
+This option is automatically selected whenever GNU extensions are
+disabled.
+
+@item -F @var{string}
+@itemx --flac-truncation=@var{string}
+
+This option will request that any truncation in the output be reported
+using the string @var{string}. Most output fields theoretically extend
+towards the beginning or the end of the current line, or current
+sentence, as selected with option @code{-S}. But there is a maximum
+allowed output line width, changeable through option @code{-w}, which is
+further divided into space for various output fields. When a field has
+to be truncated because cannot extend until the beginning or the end of
+the current line to fit in the, then a truncation occurs. By default,
+the string used is a single slash, as in @code{-F /}.
+
+@var{string} may have more than one character, as in @code{-F ...}.
+Also, in the particular case @var{string} is empty (@code{-F ""}),
+truncation flagging is disabled, and no truncation marks are appended in
+this case.
+
+As a matter of convenience to the user, many usual backslashed escape
+sequences, as found in the C language, are recognized and converted to
+the corresponding characters by @code{ptx} itself.
+
+@item -M @var{string}
+@itemx --macro-name=@var{string}
+
+Select another @var{string} to be used instead of @samp{xx}, while
+generating output suitable for @code{nroff}, @code{troff} or @TeX{}.
+
+@item -O
+@itemx --format=roff
+
+Choose an output format suitable for @code{nroff} or @code{troff}
+processing. Each output line will look like:
+
+@example
+.xx "@var{tail}" "@var{before}" "@var{keyword_and_after}" "@var{head}" "@var{ref}"
+@end example
+
+so it will be possible to write an @samp{.xx} roff macro to take care of
+the output typesetting. This is the default output format when GNU
+extensions are disabled. Option @samp{-M} might be used to change
+@samp{xx} to another macro name.
+
+In this output format, each non-graphical character, like newline and
+tab, is merely changed to exactly one space, with no special attempt to
+compress consecutive spaces. Each quote character: @kbd{"} is doubled
+so it will be correctly processed by @code{nroff} or @code{troff}.
+
+@item -T
+@itemx --format=tex
+
+Choose an output format suitable for @TeX{} processing. Each output
+line will look like:
+
+@example
+\xx @{@var{tail}@}@{@var{before}@}@{@var{keyword}@}@{@var{after}@}@{@var{head}@}@{@var{ref}@}
+@end example
+
+@noindent
+so it will be possible to write write a @code{\xx} definition to take
+care of the output typesetting. Note that when references are not being
+produced, that is, neither option @code{-A} nor option @code{-r} is
+selected, the last parameter of each @code{\xx} call is inhibited.
+Option @samp{-M} might be used to change @samp{xx} to another macro
+name.
+
+In this output format, some special characters, like @kbd{$}, @kbd{%},
+@kbd{&}, @kbd{#} and @kbd{_} are automatically protected with a
+backslash. Curly brackets @kbd{@{}, @kbd{@}} are also protected with a
+backslash, but also enclosed in a pair of dollar signs to force
+mathematical mode. The backslash itself produces the sequence
+@code{\backslash@{@}}. Circumflex and tilde diacritics produce the
+sequence @code{^\@{ @}} and @code{~\@{ @}} respectively. Other
+diacriticized characters of the underlying character set produce an
+appropriate @TeX{} sequence as far as possible. The other non-graphical
+characters, like newline and tab, and all others characters which are
+not part of ASCII, are merely changed to exactly one space, with no
+special attempt to compress consecutive spaces. Let me know how to
+improve this special character processing for @TeX{}.
+
+@end table
+
+@node Compatibility, , Invoking ptx, Top
+@chapter The GNU extensions to @code{ptx}
+
+This version of @code{ptx} contains a few features which do not exist in
+System V @code{ptx}. These extra features are suppressed by using the
+@samp{-G} command line option, unless overridden by other command line
+options. Some GNU extensions cannot be recovered by overriding, so the
+simple rule is to avoid @samp{-G} if you care about GNU extensions.
+Here are the differences between this program and System V @code{ptx}.
+
+@itemize @bullet
+
+@item
+This program can read many input files at once, it always writes the
+resulting concordance on standard output. On the other end, System V
+@code{ptx} reads only one file and produce the result on standard output
+or, if a second @var{file} parameter is given on the command, to that
+@var{file}.
+
+Having output parameters not introduced by options is a quite dangerous
+practice which GNU avoids as far as possible. So, for using @code{ptx}
+portably between GNU and System V, you should pay attention to always
+use it with a single input file, and always expect the result on
+standard output. You might also want to automatically configure in a
+@samp{-G} option to @code{ptx} calls in products using @code{ptx}, if
+the configurator finds that the installed @code{ptx} accepts @samp{-G}.
+
+@item
+The only options available in System V @code{ptx} are options @samp{-b},
+@samp{-f}, @samp{-g}, @samp{-i}, @samp{-o}, @samp{-r}, @samp{-t} and
+@samp{-w}. All other options are GNU extensions and are not repeated in
+this enumeration. Moreover, some options have a slightly different
+meaning when GNU extensions are enabled, as explained below.
+
+@item
+By default, concordance output is not formatted for @code{troff} or
+@code{nroff}. It is rather formatted for a dumb terminal. @code{troff}
+or @code{nroff} output may still be selected through option @code{-O}.
+
+@item
+Unless @code{-R} option is used, the maximum reference width is
+subtracted from the total output line width. With GNU extensions
+disabled, width of references is not taken into account in the output
+line width computations.
+
+@item
+All 256 characters, even @kbd{NUL}s, are always read and processed from
+input file with no adverse effect, even if GNU extensions are disabled.
+However, System V @code{ptx} does not accept 8-bit characters, a few
+control characters are rejected, and the tilda @kbd{~} is condemned.
+
+@item
+Input line length is only limited by available memory, even if GNU
+extensions are disabled. However, System V @code{ptx} processes only
+the first 200 characters in each line.
+
+@item
+The break (non-word) characters default to be every character except all
+letters of the underlying character set, diacriticized or not. When GNU
+extensions are disabled, the break characters default to space, tab and
+newline only.
+
+@item
+The program makes better use of output line width. If GNU extensions
+are disabled, the program rather tries to imitate System V @code{ptx},
+but still, there are some slight disposition glitches this program does
+not completely reproduce.
+
+@item
+The user can specify both an Ignore file and an Only file. This is not
+allowed with System V @code{ptx}.
+
+@end itemize
+
+@bye
diff --git a/gnu/usr.bin/ptx/xmalloc.c b/gnu/usr.bin/ptx/xmalloc.c
new file mode 100644
index 0000000..58a81b5
--- /dev/null
+++ b/gnu/usr.bin/ptx/xmalloc.c
@@ -0,0 +1,88 @@
+/* xmalloc.c -- malloc with out of memory checking
+ Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifdef HAVE_CONFIG_H
+#if defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+#if __STDC__
+#define VOID void
+#else
+#define VOID char
+#endif
+
+#include <sys/types.h>
+
+#if STDC_HEADERS
+#include <stdlib.h>
+#else
+VOID *malloc ();
+VOID *realloc ();
+void free ();
+#endif
+
+#if __STDC__ && defined (HAVE_VPRINTF)
+void error (int, int, char const *, ...);
+#else
+void error ();
+#endif
+
+/* Allocate N bytes of memory dynamically, with error checking. */
+
+VOID *
+xmalloc (n)
+ size_t n;
+{
+ VOID *p;
+
+ p = malloc (n);
+ if (p == 0)
+ /* Must exit with 2 for `cmp'. */
+ error (2, 0, "virtual memory exhausted");
+ return p;
+}
+
+/* Change the size of an allocated block of memory P to N bytes,
+ with error checking.
+ If P is NULL, run xmalloc.
+ If N is 0, run free and return NULL. */
+
+VOID *
+xrealloc (p, n)
+ VOID *p;
+ size_t n;
+{
+ if (p == 0)
+ return xmalloc (n);
+ if (n == 0)
+ {
+ free (p);
+ return 0;
+ }
+ p = realloc (p, n);
+ if (p == 0)
+ /* Must exit with 2 for `cmp'. */
+ error (2, 0, "virtual memory exhausted");
+ return p;
+}
OpenPOWER on IntegriCloud