From fb89d535a678f4e5589fdb43996219ebf099abe0 Mon Sep 17 00:00:00 2001 From: alm Date: Fri, 6 May 1994 07:54:54 +0000 Subject: ptx: permuted index generator --- gnu/usr.bin/ptx/.stamp-h.in | 0 gnu/usr.bin/ptx/COPYING | 339 +++++ gnu/usr.bin/ptx/ChangeLog | 546 +++++++ gnu/usr.bin/ptx/Makefile | 7 + gnu/usr.bin/ptx/NEWS | 53 + gnu/usr.bin/ptx/README | 23 + gnu/usr.bin/ptx/THANKS | 23 + gnu/usr.bin/ptx/TODO | 94 ++ gnu/usr.bin/ptx/alloca.c | 484 +++++++ gnu/usr.bin/ptx/argmatch.c | 94 ++ gnu/usr.bin/ptx/bumpalloc.h | 58 + gnu/usr.bin/ptx/check-out | 65 + gnu/usr.bin/ptx/config.h | 57 + gnu/usr.bin/ptx/diacrit.c | 148 ++ gnu/usr.bin/ptx/diacrit.h | 16 + gnu/usr.bin/ptx/error.c | 117 ++ gnu/usr.bin/ptx/examples/README | 21 + gnu/usr.bin/ptx/examples/ajay/Makefile | 28 + gnu/usr.bin/ptx/examples/ajay/README | 41 + gnu/usr.bin/ptx/examples/ajay/footer.tex | 1 + gnu/usr.bin/ptx/examples/ajay/header.tex | 21 + gnu/usr.bin/ptx/examples/ajay/tip.forgptx | 10 + gnu/usr.bin/ptx/examples/ajay/x.pl | 22 + gnu/usr.bin/ptx/examples/ignore/README | 65 + gnu/usr.bin/ptx/examples/ignore/bix | 109 ++ gnu/usr.bin/ptx/examples/ignore/eign | 163 +++ gnu/usr.bin/ptx/examples/include.pl | 79 + gnu/usr.bin/ptx/examples/latex/Makefile | 15 + gnu/usr.bin/ptx/examples/latex/README | 10 + gnu/usr.bin/ptx/examples/latex/latex.tex | 11 + gnu/usr.bin/ptx/examples/latex/table.tex | 65 + gnu/usr.bin/ptx/examples/luke/README | 2 + gnu/usr.bin/ptx/examples/luke/xxroff.sh | 108 ++ gnu/usr.bin/ptx/getopt.c | 757 ++++++++++ gnu/usr.bin/ptx/getopt.h | 129 ++ gnu/usr.bin/ptx/getopt1.c | 187 +++ gnu/usr.bin/ptx/mkinstalldirs | 35 + gnu/usr.bin/ptx/ptx.c | 2237 +++++++++++++++++++++++++++++ gnu/usr.bin/ptx/ptx.info | 496 +++++++ gnu/usr.bin/ptx/ptx.texinfo | 554 +++++++ gnu/usr.bin/ptx/xmalloc.c | 88 ++ 41 files changed, 7378 insertions(+) create mode 100644 gnu/usr.bin/ptx/.stamp-h.in create mode 100644 gnu/usr.bin/ptx/COPYING create mode 100644 gnu/usr.bin/ptx/ChangeLog create mode 100644 gnu/usr.bin/ptx/Makefile create mode 100644 gnu/usr.bin/ptx/NEWS create mode 100644 gnu/usr.bin/ptx/README create mode 100644 gnu/usr.bin/ptx/THANKS create mode 100644 gnu/usr.bin/ptx/TODO create mode 100644 gnu/usr.bin/ptx/alloca.c create mode 100644 gnu/usr.bin/ptx/argmatch.c create mode 100644 gnu/usr.bin/ptx/bumpalloc.h create mode 100644 gnu/usr.bin/ptx/check-out create mode 100644 gnu/usr.bin/ptx/config.h create mode 100644 gnu/usr.bin/ptx/diacrit.c create mode 100644 gnu/usr.bin/ptx/diacrit.h create mode 100644 gnu/usr.bin/ptx/error.c create mode 100644 gnu/usr.bin/ptx/examples/README create mode 100644 gnu/usr.bin/ptx/examples/ajay/Makefile create mode 100644 gnu/usr.bin/ptx/examples/ajay/README create mode 100644 gnu/usr.bin/ptx/examples/ajay/footer.tex create mode 100644 gnu/usr.bin/ptx/examples/ajay/header.tex create mode 100644 gnu/usr.bin/ptx/examples/ajay/tip.forgptx create mode 100644 gnu/usr.bin/ptx/examples/ajay/x.pl create mode 100644 gnu/usr.bin/ptx/examples/ignore/README create mode 100644 gnu/usr.bin/ptx/examples/ignore/bix create mode 100644 gnu/usr.bin/ptx/examples/ignore/eign create mode 100755 gnu/usr.bin/ptx/examples/include.pl create mode 100644 gnu/usr.bin/ptx/examples/latex/Makefile create mode 100644 gnu/usr.bin/ptx/examples/latex/README create mode 100644 gnu/usr.bin/ptx/examples/latex/latex.tex create mode 100644 gnu/usr.bin/ptx/examples/latex/table.tex create mode 100644 gnu/usr.bin/ptx/examples/luke/README create mode 100644 gnu/usr.bin/ptx/examples/luke/xxroff.sh create mode 100644 gnu/usr.bin/ptx/getopt.c create mode 100644 gnu/usr.bin/ptx/getopt.h create mode 100644 gnu/usr.bin/ptx/getopt1.c create mode 100755 gnu/usr.bin/ptx/mkinstalldirs create mode 100644 gnu/usr.bin/ptx/ptx.c create mode 100644 gnu/usr.bin/ptx/ptx.info create mode 100644 gnu/usr.bin/ptx/ptx.texinfo create mode 100644 gnu/usr.bin/ptx/xmalloc.c (limited to 'gnu/usr.bin/ptx') diff --git a/gnu/usr.bin/ptx/.stamp-h.in b/gnu/usr.bin/ptx/.stamp-h.in new file mode 100644 index 0000000..e69de29 diff --git a/gnu/usr.bin/ptx/COPYING b/gnu/usr.bin/ptx/COPYING new file mode 100644 index 0000000..a43ea21 --- /dev/null +++ b/gnu/usr.bin/ptx/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 675 Mass Ave, Cambridge, MA 02139, USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + Appendix: How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/gnu/usr.bin/ptx/ChangeLog b/gnu/usr.bin/ptx/ChangeLog new file mode 100644 index 0000000..fffb47f --- /dev/null +++ b/gnu/usr.bin/ptx/ChangeLog @@ -0,0 +1,546 @@ +Fri Nov 5 23:10:07 1993 Francois Pinard (pinard@icule) + + * Version 0.3 + + * check-out: New name for check_out. + * Makefile.in: Change check_out for check-out everywhere. + Reported by Jim Meyering . + + * Makefile.in (realclean): Do not remove .stamp-h.in and + config.h.in. One should not need Autoconf installed. + Reported by Nelson Beebe . + + * ptx.c: Add missing definition of isxdigit. + Reported by Nelson Beebe . + + * ptx.c: Define S_ISREG if not defined, then use it. + Reported by Karl Berry . + +Wed Nov 3 15:53:00 1993 Francois Pinard (pinard@icule) + + * mkinstalldirs: New, from elsewhere. + * Makefile.in: Use it. + +Mon Nov 1 00:48:34 1993 Francois Pinard (pinard@lagrande.IRO.UMontreal.CA) + + * Makefile.in (clean): Delete ptx, not the obsolete $(PROGS). + +Sun Oct 31 15:04:57 1993 Francois Pinard (pinard@raptor.IRO.UMontreal.CA) + + * ptx.c (alloc_and_compile_regex): Zero out the whole allocated + pattern, not just a few fields. + + * ptx.c (alloc_and_compile_regex): Clarify error message. + +Thu Oct 28 08:29:29 1993 Francois Pinard (pinard@compy.IRO.UMontreal.CA) + + * ptx.c (print_copyright): Deleted. Rather use a "copyright" + variable, print to standard output instead of standard error. + + * ptx.c: Use error instead of fprintf (stderr, ...). + + * ptx.c: Rename fold_lower_to_upper to ignore_case. + +Wed Oct 27 18:41:52 1993 Francois Pinard (pinard@lagrande.IRO.UMontreal.CA) + + * ptx.c: Add option -M for using another macro name than "xx". + Reported by Thorsten Ohl . + + * examples/ignore/: New files. + * eign: Linked to examples/ignore/eign. + * Makefile.in: Install and uninstall $(datadir)/eign. + * configure.in: Remove testing of a default ignore file. + Reported by Nelson Beebe . + + * ptx.c (main): Add --help and --version processing. + (print_version): Deleted. + + * ptx.c: Use -traditional instead of --no-gnu-extensions, + --ignore-case instead of --fold-letter-case, --format= + instead of --tex-output and --roff-output. + * argmatch.c: New file. Taken from fileutils/lib. + Reported by Karl Berry . + +Tue Oct 26 08:39:14 1993 Francois Pinard (pinard@icule) + + * ptx.c (usage): New name for usage_and_exit. Accept an exit + status parameter. If zero, print full help on stdout. If + non-zero, print a one-line helper on stderr. + + * ptx.c: Remove sizeof_occurs and OCCURS_ALIGNMENT complexity. + The memory savings did not justify the portability headaches. + + * ptx.c (copy_unescaped_string): New function. + (main): Use it with options -F, -S and -W. + Reported by Dave Cottingham . + + * ptx.c (fix_output_parameters): Force edit of '\f', because some + systems does not consider it to be whitespace. + Reported by Stephane Berube . + + * ptx.c (fix_output_parameters): For roff output, do not disallow + characters with 8th bit set. + Reported by James Clark . + + * Makefile.in (dist): Include examples/ in distribution. + +Mon Oct 25 15:46:16 1993 Francois Pinard (pinard@icule) + + * ptx.c: Change --display-width to --width, for consistency with + other GNU programs. + + * examples/ajay/: New files. + Reported by Ajay Shah . + Reported by Rakesh Chauhan . + + * examples/luke/: New files. + Reported by Luke Kendall . + + * examples/latex/: New files. + + * ptx.c (find_occurs_in_text): Assign 0 to refererence_length so + GNU C will not warn anymore against its unitialized use. + Reported by Loic Dachary . + + * lib/: Move routines in main directory first, then destroy. + * Makefile.in: Merge lib/Makefile.in, clean up. + * configure.in: Do not create lib/Makefile.in. + + * acconfig.h: New file. + * .stamp-h.in: Used for timestamping autoheader. + * Makefile.in: Use acconfig.h and .stamp-h.in. Force + autoheader whenever acconfig.h is modified. + +Wed Jun 9 15:01:28 1993 Francois Pinard (pinard@icule) + + * Makefile.in (dist): Replace "echo `pwd`" by a mere "pwd". + Create a gzip file. + +Sat May 22 20:18:31 1993 Francois Pinard (pinard@icule) + + * Makefile.in: Replace $(PROGS) by ptx. + + * diacrit.h: Change `c' to `chr', better protect it. + + * lib/COPYING.LIB: Deleted. + * lib/Makefile.in: Adjust accordingly. + +Sat Feb 6 15:03:13 1993 Francois Pinard (pinard@icule) + + * Makefile.in, lib/Makefile.in: In dist goals, ensure 777 mode for + directories, so older tar's will restore file modes properly. + +Sun Jan 17 15:42:35 1993 Francois Pinard (pinard@icule) + + * Makefile.in, lib/Makefile.in: Put $(CFLAGS) after $(CPPFLAGS), + so the installer can override automatically configured choices. + Reported by Karl Berry . + +Tue Jan 12 09:21:22 1993 Francois Pinard (pinard at icule) + + * configure.in: Check for setchrclass(). + * diacrit.[hc]: New file, extracted from my own ctype.[hc]. + * ctype.[hc]: Deleted. + * Makefile.in: Distribute diacrit.[hc], but not ctype.[hc]. + * ptx.c: Include "diacrit.h" rather than "ctype.h". + Include for ANSI C, or else, use our own definitions. + (initialize_regex): Use ctype.h macros for making the folding + table and for making the \w+ fastmap. Previously, was reusing the + regex syntax table or looking at character bit structure. + (main): Execute setchrclass (NULL) if available and ANSI C. + + * Spelling fixes in various files. + Reported by Jim Meyering . + +Thu Jan 7 20:19:25 1993 Francois Pinard (pinard at icule) + + * Makefile.in: Using autoheader, derive config.h.in from + configure.in. Distribute config.h.in. + Use config.status for reconstructing config.h from config.h.in. + Have all $(OBJECTS) depend upon config.h. + Always use -I. calling the C compiler, for config.h to be found. + Remove config.h in distclean-local. + * lib/Makefile.in: Always use -I.. calling the C compiler, for + config.h to be found. Also use $(DEFS). + Have all $(OBJECTS) depend upon ../config.h. + * configure.in: Create config.h from config.h.in. + * ptx.c, ctype.c: Conditionnaly include config.h. + +Fri Jan 1 19:52:49 1993 Francois Pinard (pinard at icule) + + * Makefile.in, lib/Makefile.in: Reinstate $(CPPFLAGS), use it. + Richard wants it there. Remove $(ALLFLAGS) and reequilibrate. + +Sun Dec 27 05:57:55 1992 Francois Pinard (pinard at icule) + + * ptx.c (find_occurs_in_text): Introduce word_start and word_end + variables, and use them instead of the word_regs structure. This + takes care of the fact newer regex.h does not allocate the arrays + any more, and these were used even when regexps were not compiled. + + * Makefile, lib/Makefile.in: Define CHAR_SET_SIZE for SYNTAX_TABLE + to work correctly. + + * configure.in: Replace AC_USG by AC_HAVE_HEADERS(string.h). + Cleanup and reorganize a little. + + * ptx.c: Renamed from gptx.c. Add -G (--no-gnu-extensions) + and clarify some long option names by making them more + explicit. Remove all PTX_COMPATIBILITY conditionals. + Introduce gnu_extensions variable initialized to 1. Let -G + give it the value 0, but still allow and process GNU specific + options and long option names. The Ignore file is now the same + whatever the value of gnu_extensions. + * ptx.texinfo: Renamed from gptx.texinfo, adjusted. + * Makefile.in, configure.in: Adjusted accordingly. Now + installs only one program under the name $(binprefix)ptx. + + * gptx.c (perror_and_exit): Deleted. Use error() directly. + + * gptx.c: Remove unneeded prototypes for system library routines. + + * gptx.c (compare_words, compare_occurs): #define first and second + instead of using an intermediate variable. + + * configure.in: Use AC_CONST. + * gptx.h: Do not define const. + * Define volatile dependent on __GNUC__, not __STDC__, and define + it to __volatile__. + + * gptx.h, version.c: Deleted, integrated into gptx.c. + * Remove src/ and doc/ subdirectories, merging them in main. + * Move lib/bumpalloc.h, lib/ctype.[ch] in main directory. + * Integrate all ChangeLogs in main ChangeLog. + * Integrate all Makefiles in main Makefile and lib/Makefile, + rewriting them all along the way. + +Fri Nov 13 00:10:31 1992 Francois Pinard (pinard at icule) + + * Makefile.in (dist): chmod a+r before making the tar file. + +Tue Oct 6 12:47:00 1992 Francois Pinard (pinard at icule) + + * {,doc/,lib/,src/}Makefile.in: Use exec_prefix. Add `uninstall'. + +Wed Aug 19 16:02:09 1992 Francois Pinard (pinard at icule) + + * ansi2knr.c: New file, from Ghostscript distribution. + * gptx.c: Get rid of many __STDC__ tests. + * version.c: Idem. + +Fri Aug 14 22:53:05 1992 Francois Pinard (pinard at icule) + + * gptx.c: Use HAVE_MCHECK instead of MCHECK_MISSING. + * configure.in: Use AC_HAVE_FUNCS instead of AC_MISSING_FUNCS. + + * configure.in: Autoconfigure for mcheck and strerror. + Reported by Bernd Nordhausen . + +Thu Jun 18 09:15:12 1992 Francois Pinard (pinard at icule) + + * configure.in, all Makefile's: Adapt to Autoconf 0.118. + +Sun Feb 2 16:23:47 1992 Francois Pinard (pinard at icule) + + * gptx.c (main): Returns int. + +Tue Dec 10 09:53:21 1991 Francois Pinard (pinard at icule) + + * gptx.c (usage_and_exit): Print --OPTION instead of +OPTION. + +Wed Dec 4 10:31:06 1991 Francois Pinard (pinard at icule) + + * gptx.c (compare_occurs, compare_words): Change parameters to + (void *) to comply with qsort ANSI declaration, and cast the true + type inside the function, each time a parameter is used. + Reported by Byron Rakitzis . + +Mon Dec 2 10:41:43 1991 Francois Pinard (pinard at icule) + + * gptx.c: Removed comma at end of enum. + + * version.c: Add a few missing `const's. + + * gptx.c: Add prototypes for close, fstat, open, perror and read + if __STDC__. + + * gptx.c: Remove useless alloca declaration. + +Sat Nov 9 20:03:37 1991 Francois Pinard (pinard at icule) + + * configure.in, all/Makefile.in: Directory reorganization, + including separate src and doc, in plus of lib. Ensure all + Makefile's can be used independently. + +Thu Nov 7 11:20:38 1991 Francois Pinard (pinard at icule) + + * gptx.texinfo: Renamed from gptx.texi. Now `TeX'able. + * Makefile.in: Ensure distributing texinfo.tex. + Reported by Karl Berry . + + * configure.in: Take care of POSIXish ISC. + Reported by Karl Berry . + +Tue Nov 5 09:42:58 1991 Francois Pinard (pinard at icule) + + * configure.in, Makefile.in: Do not absolutize $(srcdir), because + this could create problems with automounters. + + * configure.in, Makefile.in: Remove IF_* devices, they were + solving a problem caused only by non timestamping shars, and + gptx is now distributed in tar format. + +Mon Oct 28 14:39:36 1991 Francois Pinard (pinard at icule) + + * configure.in: New file. + * configure: Automatically generated from file configure.in + and David MacKenzie's autoconf. + +Sat Oct 19 20:06:28 1991 Francois Pinard (pinard at icule) + + * configure: Use ANSI header files if present, even with non ANSI + compilers. + Reported by David MacKenzie . + +Tue Oct 15 08:43:13 1991 Francois Pinard (pinard at icule) + + * Makefile.in: Install gptx and ptx separately. On DEC Ultrix + 4.1, install cannot install more than one file at a time. + Reported by Simon Leinen . + +Fri Oct 11 15:19:42 1991 Francois Pinard (pinard at icule) + + * Makefile.in: `realclean' did not work, because lib/Makefile was + disappearing at `distclean' time. I tried separate doc and src + directories, but this is not worth the heaviness. Split some + goals instead, using _doc, _lib and _src suffixes. + +Fri Oct 10 18:04:21 1991 Francois Pinard (pinard at icule) + + * Version 0.2 + +Wed Oct 9 16:13:42 1991 Francois Pinard (pinard at icule) + + * configure, Makefile.in: New files. + * Makefile, GNUmakefile, Depends: Deleted. + + * gptx.c: Change -A output from `FILE(NN):' to `FILE:NN:'. + + * gptx.c, gptx.h, version.c: Reinstate __STDC__ tests. + +Tue Jun 25 11:35:32 1991 Francois Pinard (pinard at icule) + + * gptx.c: Something is wrong in -r reference allocation, I suspect + casting does not do what I expect. I relax the constraints so to + make it work for now. To be revisited. + + * gptx.c: Call initialize_regex sooner, to ensure folded_chars is + properly initialized when -f and -i are simultaneously used. + + * gptx.c: Remove -p option and rather compile two separate + programs, one by defining PTX_COMPATIBILITY, to conform a GNU + standard asking to not depend on the program installed name. This + also removes the -p option, so loosing the debatable advantage of + dynamically reverting to ptx compatibility mode. + + * gptx.h: Cleanup. Don't duplicate stdlib.h. + +Wed Dec 5 18:00:23 1990 Francois Pinard (pinard at icule) + + * gptx.c (usage_and_exit): Change -C explanation. + +Sun Oct 28 16:11:36 1990 Francois Pinard (pinard at icule) + + * gptx.h: Remove the PROTO macros and usage. + * gptx.c: Remove all the #ifdef __STDC__ noise. + * version.c: Remove all the #ifdef __STDC__ noise. + +Wed Jul 25 12:20:45 1990 Francois Pinard (pinard at icule) + + * ctype.[ch]: Linked from my library. + +Wed Jul 11 10:53:13 1990 Francois Pinard (pinard at icule) + + * bumpalloc.h: Linked from my library. + +Sun Aug 5 13:17:25 1990 Francois Pinard (pinard at icule) + + * Version 0.1 + + * gptx.c: Implement IGNORE and PIGNORE defines. + + * gptx.c: Implement special character protection for roff and TeX + output, through the edited_flag array. + +Fri Aug 3 12:47:35 1990 Francois Pinard (pinard at icule) + + * gptx.c: Implement new -R option for automatic referencing, with + the possibility of multiple input files in normal mode. Now, + option -r implies ptx compatibility mode default for -S; exclude + reference from context whenever easy to do, and allow coselection + of both -r and -R. + +Wed Aug 1 12:00:07 1990 Francois Pinard (pinard at icule) + + * gptx.[hc]: Define and use OCCURS_ALIGNMENT, to avoid those + `Bus error's on Sparcs. + +Fri Jul 27 12:04:40 1990 Francois Pinard (pinard at icule) + + * gptx.c (initialize_regex): Use only isalpha and "ctype.h" to + initialize Sword syntax, getting rid of any other explicit ISO + 8859-1 references. This will make the MS-DOS port easier, + character set wise. + + * gptx.c (swallow_file_in_memory): Revised along the lines of + io.c from GNU diff 1.14, so it could handle stin and fifos, + and work faster. + + * gptx.c (perror_and_exit): New function, use it where convenient. + +Thu Jul 26 13:28:13 1990 Francois Pinard (pinard at icule) + + * gptx.c (swallow_input_text): Remove white space compression even + if not in ptx compatibility mode. This being out of the way, use + swallow_file_in_memory instead of inputting characters one by one. + +Wed Jul 25 12:20:45 1990 Francois Pinard (pinard at icule) + + * gptx.c (find_occurs_in_text): Include the sentence separator as + part of the right context, except for separator's suffix white + space. Formerly, it was excluded from all contexts. + + * gptx.h: Check STDLIB_PROTO_ALREADY to conditionalize prototype + declarations for standard C library routines; check __GNUC__ + before using `volatile' on function prototypes. + + * gptx.c: (find_occurs_in_text): Maintain the maximum length of + all words read. + (define_all_fields): Optimize scanning longish left contexts by + sometimes doing a backward jump from the keyword instead of always + scanning forward from the left context boundary. + +Sun Jul 22 09:18:21 1990 Francois Pinard (pinard at icule) + + * gptx (alloc_and_compile_regex): Realloc out all extra allocated + space. + +Mon Jul 16 09:07:25 1990 Francois Pinard (pinard at icule) + + * gptx.c: In OCCURS structure, modify left, right and reference + pointers and make them displacements, to save some space. Define + DELTA typedef, use it, make all other necessary changes. + + * gptx.c: Work on portability. Define const and volatile to + nothing if not __STDC__. On BSD, define str[r]chr to be [r]index. + Avoid writings specific to GNU C. + +Sun Jul 15 17:28:39 1990 Francois Pinard (pinard at icule) + + * gptx.c: Add a word_fastmap array and use it if -W has not been + specified, instead of using default regexps. Finish implementing + the Break files. + +Sat Jul 14 10:54:21 1990 Francois Pinard (pinard at icule) + + * gptx.[ch], version.c: Use prototypes in all header + functions. Add some missing const declarations. + +Fri Jul 13 10:16:34 1990 Francois Pinard (pinard at icule) + + * gptx.c: Enforce ptx compatibility mode by disallowing normal + mode extensions. Disallow -p if extensions are used. + + * gptx.c: Finish implementation of Ignore and Only files. + +Wed Jul 11 10:53:13 1990 Francois Pinard (pinard at icule) + + * gptx.c: Revise WORD typedef and use it in OCCURS typedef; + adjust all usages. Add BLOCK and WORD_ARRAY typedefs, revise in + various place to make better usage of these. Use BUMP_ALLOC. + +Tue Jul 10 09:02:26 1990 Francois Pinard (pinard at icule) + + * gptx.c: Add -L option, `latin1_charset' variable and support. + + * gptx.c: Remove old generate_roff and generate_tex variables, + replace with output_format which is of enum type. + +Mon Jul 9 10:40:41 1990 Francois Pinard (pinard at icule) + + * gptx.c (compare_words): Check word_regex.translate and do not + use the translation table if not computed. Also protect against + possible 8-bit problems. + + * gptx.c (alloc_and_compile_regex): New function. + +Sun Jul 8 17:52:14 1990 Francois Pinard (pinard at icule) + + * gptx.c: Make a more systematic use of SKIP_* macros, to get rid + of explicit ' ' references when possible. + + * gptx.c: Replace `head' field by `left' in the OCCURS structure, + delay the `before' computation from find_occurs_in_text to + define_all_fields, and make all necessary adjustments. Also + add a `right' field in the OCCURS structure, use it to get rid of + explicit '\n' references when possible. + + * gptx.c (initialize_regex): New function. Compute the syntax + table for regex. Get rid of previous break_chars_init variable + and break_chars array, use word_regex and word_regex_string + instead. + + * gptx.c: Use re_search to find words and re_match to skip over + them. Add -W option and support. Use re_search to find end of + lines or end of sentences, add -S option and support. + +Sat Jul 7 08:50:40 1990 Francois Pinard (pinard at icule) + + * gptx.c: Change PRINT_SPACES and PRINT_FIELD macros to + print_spaces and print_field routines, respectively. + +Fri Jul 6 09:44:39 1990 Francois Pinard (pinard at icule) + + * gptx.c (generate_output): Split into define_all_fields, + generate_all_output, output_one_roff_line, output_one_tex_line, + and output_one_tty_line. + + * gptx.c: Move the inline code to reallocate the text buffer into + reallocate_text_buffer. Correct a small bug in this area. + + * gptx.c: Modify -F to accept a STRING argument, modify output + routines to handle truncation marks having more than one + character. + +Thu Jul 5 11:08:59 1990 Francois Pinard (pinard at icule) + + * gptx.c: Add -F option and logic. + + * gptx.c: Select ptx compatibility mode if program is + installed under the name `ptx'. Install both gptx and ptx. + +Thu Jun 7 17:21:25 1990 Francois Pinard (pinard at icule) + + * gptx.c: Make each OCCURS a variable size thing, depending on + various options; mark occurs_found table size with an integer + counter instead of an end pointer. + +Sat Apr 14 20:01:09 1990 Francois Pinard (pinard at icule) + + * Version 0.0 + + * gptx.c: Removed limitations on table sizes: it should now go + until an `Out of memory' error. Use xmalloc. Rename some + variables. + + * version.c, gptx.c (usage_and_exit): Add -C option to print + Copyright. + +Mon Mar 12 17:59:42 1990 Francois Pinard (pinard at icule) + + * ChangeLog initialisation. Previous experiments towards gptx + were done at the defunct site ora.odyssee.qc.ca, which was a + Sun-3/160 running SunOS 3.0. The files have been stocked for + a long time to kovic.iro archives, then imported to icule. + + * gptx.c: GCC linted. diff --git a/gnu/usr.bin/ptx/Makefile b/gnu/usr.bin/ptx/Makefile new file mode 100644 index 0000000..b778f7a --- /dev/null +++ b/gnu/usr.bin/ptx/Makefile @@ -0,0 +1,7 @@ +PROG= ptx +SRCS= argmatch.c diacrit.c error.c getopt.c getopt1.c ptx.c regex.c xmalloc.c + +MAN1= NOMAN +CFLAGS+= -DHAVE_CONFIG_H -DDEFAULT_IGNORE_FILE=\"/usr/share/dict/eign\" + +.include diff --git a/gnu/usr.bin/ptx/NEWS b/gnu/usr.bin/ptx/NEWS new file mode 100644 index 0000000..6f97bf9 --- /dev/null +++ b/gnu/usr.bin/ptx/NEWS @@ -0,0 +1,53 @@ +GNU permuted indexer NEWS - User visible changes. +Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc. +Francois Pinard , 1992. + +Version 0.3 - 1993-10-??, by Franc,ois Pinard + +* GNU ptx installs as a single program, -G option dynamically reverts +to the System V compatible behaviour, yet being liberal with options. + +* It should install more easily on more systems, source code is +unprotoized on the fly for older C compilers. + +* A default ignore file is installed along with GNU ptx, ptx uses it. + +* Options -F, -S and -W interpret most \-escapes themselves. + +* Option -M can be use to change "xx" to another macro name. + +* CHRCLASS environment variable is obeyed for systems supporting it. + +* Long option names have been cleaned up a little. + +* Some examples are given in the example/ directory structure. + + +Version 0.2 - 1991-10-10, by Franc,ois Pinard + +* Reference format (with -A) has been modified slightly to better +comply with GNU standards for line reporting. + +* Option -p removed, rather compile two separate programs, one with +GNU extensions, the other being strict on System V compatibility. + + +Version 0.1 - 1990-08-05, by Franc,ois Pinard + +* Add many options: -L for Latin1, -R for automatic referencing, -W +for regular expressions describing words, -S for regular expressions +describing end of lines or sentences. Let -F specify the truncation +strings. + +* Implementing Ignore files and Only files. + +* Option -p dynamically enforces strict System V compatibility. + +* Correct a few bugs and portability problems, have faster input, +faster processing, and use less memory. + + +Version 0.0 - 1990-04-14, by Franc,ois Pinard + +* Initial release. + diff --git a/gnu/usr.bin/ptx/README b/gnu/usr.bin/ptx/README new file mode 100644 index 0000000..240b7ee --- /dev/null +++ b/gnu/usr.bin/ptx/README @@ -0,0 +1,23 @@ +This is an beta release of GNU ptx, a permuted index generator. GNU +ptx can handle multiple input files at once, produce TeX compatible +output, or a readable KWIC (keywords in their context) without the +need of nroff. This version does not handle huge input files, that +is, those which do not fit in memory all at once. + +The command syntax is not the same as UNIX ptx: all given files are +input files, the results are produced on standard output by default. +GNU ptx manual is provided in Texinfo format. Calling `ptx --help' +prints an option summary. Please note that an overall renaming of all +options is foreseeable: GNU ptx specifications are not frozen yet. + +See the file COPYING for copying conditions. + +See the file THANKS for a list of contributors. + +See the file NEWS for a list of major changes in the current release. + +See the file INSTALL for compilation and installation instructions. + +Mail suggestions and bug reports (including documentation errors) for +these programs to bug-gnu-utils@prep.ai.mit.edu. + diff --git a/gnu/usr.bin/ptx/THANKS b/gnu/usr.bin/ptx/THANKS new file mode 100644 index 0000000..e6a45cf --- /dev/null +++ b/gnu/usr.bin/ptx/THANKS @@ -0,0 +1,23 @@ +GNU permuted indexer has originally been written by François Pinard. +Other people contributed to the GNU permuted index by reporting +problems, suggesting various improvements or submitting actual code. +Here is a list of these people. Help me keep it complete and exempt +of errors. + +Ajay Shah ajayshah@cmie.ernet.in +Bernd Nordhausen bernd@iss.nus.sg +Byron Rakitzis byron@archone.tamu.edu +Dave Cottingham dc@haiti.gsfc.nasa.gov +David J. MacKenzie djm@eng.umd.edu +Francois Pinard pinard@iro.umontreal.ca +Janne Himanka shem@syy.oulu.fi +James Clark jjc@jclark.com +Jim Meyering meyering@comco.com +Karl Berry karl@cs.umb.edu +Loic Dachary L.Dachary@cs.ucl.ac.uk +Luke Kendall luke@research.canon.oz.au +Nelson Beebe beebe@math.utah.edu +Rakesh Chauhan rk@cmie.ernet.in +Simon Leinen simon@liasun1.epfl.ch +Stephane Berube berube@iro.umontreal.ca +Thorsten Ohl ohl@physics.harvard.edu diff --git a/gnu/usr.bin/ptx/TODO b/gnu/usr.bin/ptx/TODO new file mode 100644 index 0000000..6714313 --- /dev/null +++ b/gnu/usr.bin/ptx/TODO @@ -0,0 +1,94 @@ +TODO file for GNU ptx - last revised 05 November 1993. +Copyright (C) 1992, 1993 Free Software Foundation, Inc. +Francois Pinard , 1992. + +The following are more or less in decreasing order of priority. + +* Use rx instead of regex. + +* Correct the infinite loop using -S '$' or -S '^'. + +* Use mmap for swallowing files (maybe wrong when memory edited). + +* Understand and mimic `-t' option, if I can. + +* Sort keywords intelligently for Latin-1 code. See how to interface +this character set with various output formats. Also, introduce +options to inverse-sort and possibly to reverse-sort. + +* Improve speed for Ignore and Only tables. Consider hashing instead +of sorting. Consider playing with obstacks to digest them. + +* Provide better handling of format effectors obtained from input, and +also attempt white space compression on output which would still +maximize full output width usage. + +* See how TeX mode could be made more useful, and if a texinfo mode +would mean something to someone. + +* Provide multiple language support + +Most of the boosting work should go along the line of fast recognition +of multiple and complex boundaries, which define various `languages'. +Each such language has its own rules for words, sentences, paragraphs, +and reporting requests. This is less difficult than I first thought: + + . Recognize language modifiers with each option. At least -b, -i, -o, +-W, -S, and also new language switcher options, will have such +modifiers. Modifiers on language switchers will allow or disallow +language transitions. + + . Complete the transformation of underlying variables into arrays in +the code. + + . Implement a heap of positions in the input file. There is one entry +in the heap for each compiled regexp; it is initialized by a re_search +after each regexp compile. Regexps reschedule themselves in the heap +when their position passes while scanning input. In this way, looking +simultaneously for a lot of regexps should not be too inefficient, +once the scanning starts. If this works ok, maybe consider accepting +regexps in Only and Ignore tables. + + . Merge with language processing boundary processing options, really +integrating -S processing as a special case. Maybe, implement several +level of boundaries. See how to implement a stack of languages, for +handling quotations. See if more sophisticated references could be +handled as another special case of a language. + +* Tackle other aspects, in a more long term view + + . Add options for statistics, frequency lists, referencing, and all +other prescreening tools and subsidiary tasks of concordance +production. + + . Develop an interactive mode. Even better, construct a GNU emacs +interface. I'm looking at Gene Myers suffix +arrays as a possible implementation along those ideas. + + . Implement hooks so word classification and tagging should be merged +in. See how to effectively hook in lemmatisation or other +morphological features. It is far from being clear by now how to +interface this correctly, so some experimentation is mandatory. + + . Profile and speed up the whole thing. + + . Make it work on small address space machines. Consider three levels +of hugeness for files, and three corresponding algorithms to make +optimal use of memory. The first case is when all the input files and +all the word references fit in memory: this is the case currently +implemented. The second case is when the files cannot fit all together +in memory, but the word references do. The third case is when even +the word references cannot fit in memory. + + . There also are subsidiary developments for in-core incremental sort +routines as well as for external sort packages. The need for more +flexible sort packages comes partly from the fact that linguists use +kinds of keys which compare in unusual and more sophisticated ways. +GNU `sort' and `ptx' could evolve together. + + +Local Variables: +mode: outline +outline-regexp: " *[-+*.] \\| " +eval: (hide-body) +End: diff --git a/gnu/usr.bin/ptx/alloca.c b/gnu/usr.bin/ptx/alloca.c new file mode 100644 index 0000000..bd4932a --- /dev/null +++ b/gnu/usr.bin/ptx/alloca.c @@ -0,0 +1,484 @@ +/* alloca.c -- allocate automatically reclaimed memory + (Mostly) portable public-domain implementation -- D A Gwyn + + This implementation of the PWB library alloca function, + which is used to allocate space off the run-time stack so + that it is automatically reclaimed upon procedure exit, + was inspired by discussions with J. Q. Johnson of Cornell. + J.Otto Tennant contributed the Cray support. + + There are some preprocessor constants that can + be defined when compiling for your specific system, for + improved efficiency; however, the defaults should be okay. + + The general concept of this implementation is to keep + track of all alloca-allocated blocks, and reclaim any + that are found to be deeper in the stack than the current + invocation. This heuristic does not reclaim storage as + soon as it becomes invalid, but it will do so eventually. + + As a special case, alloca(0) reclaims storage without + allocating any. It is a good idea to use alloca(0) in + your main control loop, etc. to force garbage collection. */ + +#ifdef HAVE_CONFIG_H +#if defined (emacs) || defined (CONFIG_BROKETS) +#include +#else +#include "config.h" +#endif +#endif + +/* If compiling with GCC 2, this file's not needed. */ +#if !defined (__GNUC__) || __GNUC__ < 2 + +/* If someone has defined alloca as a macro, + there must be some other way alloca is supposed to work. */ +#ifndef alloca + +#ifdef emacs +#ifdef static +/* actually, only want this if static is defined as "" + -- this is for usg, in which emacs must undefine static + in order to make unexec workable + */ +#ifndef STACK_DIRECTION +you +lose +-- must know STACK_DIRECTION at compile-time +#endif /* STACK_DIRECTION undefined */ +#endif /* static */ +#endif /* emacs */ + +/* If your stack is a linked list of frames, you have to + provide an "address metric" ADDRESS_FUNCTION macro. */ + +#if defined (CRAY) && defined (CRAY_STACKSEG_END) +long i00afunc (); +#define ADDRESS_FUNCTION(arg) (char *) i00afunc (&(arg)) +#else +#define ADDRESS_FUNCTION(arg) &(arg) +#endif + +#if __STDC__ +typedef void *pointer; +#else +typedef char *pointer; +#endif + +#define NULL 0 + +/* Different portions of Emacs need to call different versions of + malloc. The Emacs executable needs alloca to call xmalloc, because + ordinary malloc isn't protected from input signals. On the other + hand, the utilities in lib-src need alloca to call malloc; some of + them are very simple, and don't have an xmalloc routine. + + Non-Emacs programs expect this to call use xmalloc. + + Callers below should use malloc. */ + +#ifndef emacs +#define malloc xmalloc +#endif +extern pointer malloc (); + +/* Define STACK_DIRECTION if you know the direction of stack + growth for your system; otherwise it will be automatically + deduced at run-time. + + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown */ + +#ifndef STACK_DIRECTION +#define STACK_DIRECTION 0 /* Direction unknown. */ +#endif + +#if STACK_DIRECTION != 0 + +#define STACK_DIR STACK_DIRECTION /* Known at compile-time. */ + +#else /* STACK_DIRECTION == 0; need run-time code. */ + +static int stack_dir; /* 1 or -1 once known. */ +#define STACK_DIR stack_dir + +static void +find_stack_direction () +{ + static char *addr = NULL; /* Address of first `dummy', once known. */ + auto char dummy; /* To get stack address. */ + + if (addr == NULL) + { /* Initial entry. */ + addr = ADDRESS_FUNCTION (dummy); + + find_stack_direction (); /* Recurse once. */ + } + else + { + /* Second entry. */ + if (ADDRESS_FUNCTION (dummy) > addr) + stack_dir = 1; /* Stack grew upward. */ + else + stack_dir = -1; /* Stack grew downward. */ + } +} + +#endif /* STACK_DIRECTION == 0 */ + +/* An "alloca header" is used to: + (a) chain together all alloca'ed blocks; + (b) keep track of stack depth. + + It is very important that sizeof(header) agree with malloc + alignment chunk size. The following default should work okay. */ + +#ifndef ALIGN_SIZE +#define ALIGN_SIZE sizeof(double) +#endif + +typedef union hdr +{ + char align[ALIGN_SIZE]; /* To force sizeof(header). */ + struct + { + union hdr *next; /* For chaining headers. */ + char *deep; /* For stack depth measure. */ + } h; +} header; + +static header *last_alloca_header = NULL; /* -> last alloca header. */ + +/* Return a pointer to at least SIZE bytes of storage, + which will be automatically reclaimed upon exit from + the procedure that called alloca. Originally, this space + was supposed to be taken from the current stack frame of the + caller, but that method cannot be made to work for some + implementations of C, for example under Gould's UTX/32. */ + +pointer +alloca (size) + unsigned size; +{ + auto char probe; /* Probes stack depth: */ + register char *depth = ADDRESS_FUNCTION (probe); + +#if STACK_DIRECTION == 0 + if (STACK_DIR == 0) /* Unknown growth direction. */ + find_stack_direction (); +#endif + + /* Reclaim garbage, defined as all alloca'd storage that + was allocated from deeper in the stack than currently. */ + + { + register header *hp; /* Traverses linked list. */ + + for (hp = last_alloca_header; hp != NULL;) + if ((STACK_DIR > 0 && hp->h.deep > depth) + || (STACK_DIR < 0 && hp->h.deep < depth)) + { + register header *np = hp->h.next; + + free ((pointer) hp); /* Collect garbage. */ + + hp = np; /* -> next header. */ + } + else + break; /* Rest are not deeper. */ + + last_alloca_header = hp; /* -> last valid storage. */ + } + + if (size == 0) + return NULL; /* No allocation required. */ + + /* Allocate combined header + user data storage. */ + + { + register pointer new = malloc (sizeof (header) + size); + /* Address of header. */ + + ((header *) new)->h.next = last_alloca_header; + ((header *) new)->h.deep = depth; + + last_alloca_header = (header *) new; + + /* User storage begins just after header. */ + + return (pointer) ((char *) new + sizeof (header)); + } +} + +#if defined (CRAY) && defined (CRAY_STACKSEG_END) + +#ifdef DEBUG_I00AFUNC +#include +#endif + +#ifndef CRAY_STACK +#define CRAY_STACK +#ifndef CRAY2 +/* Stack structures for CRAY-1, CRAY X-MP, and CRAY Y-MP */ +struct stack_control_header + { + long shgrow:32; /* Number of times stack has grown. */ + long shaseg:32; /* Size of increments to stack. */ + long shhwm:32; /* High water mark of stack. */ + long shsize:32; /* Current size of stack (all segments). */ + }; + +/* The stack segment linkage control information occurs at + the high-address end of a stack segment. (The stack + grows from low addresses to high addresses.) The initial + part of the stack segment linkage control information is + 0200 (octal) words. This provides for register storage + for the routine which overflows the stack. */ + +struct stack_segment_linkage + { + long ss[0200]; /* 0200 overflow words. */ + long sssize:32; /* Number of words in this segment. */ + long ssbase:32; /* Offset to stack base. */ + long:32; + long sspseg:32; /* Offset to linkage control of previous + segment of stack. */ + long:32; + long sstcpt:32; /* Pointer to task common address block. */ + long sscsnm; /* Private control structure number for + microtasking. */ + long ssusr1; /* Reserved for user. */ + long ssusr2; /* Reserved for user. */ + long sstpid; /* Process ID for pid based multi-tasking. */ + long ssgvup; /* Pointer to multitasking thread giveup. */ + long sscray[7]; /* Reserved for Cray Research. */ + long ssa0; + long ssa1; + long ssa2; + long ssa3; + long ssa4; + long ssa5; + long ssa6; + long ssa7; + long sss0; + long sss1; + long sss2; + long sss3; + long sss4; + long sss5; + long sss6; + long sss7; + }; + +#else /* CRAY2 */ +/* The following structure defines the vector of words + returned by the STKSTAT library routine. */ +struct stk_stat + { + long now; /* Current total stack size. */ + long maxc; /* Amount of contiguous space which would + be required to satisfy the maximum + stack demand to date. */ + long high_water; /* Stack high-water mark. */ + long overflows; /* Number of stack overflow ($STKOFEN) calls. */ + long hits; /* Number of internal buffer hits. */ + long extends; /* Number of block extensions. */ + long stko_mallocs; /* Block allocations by $STKOFEN. */ + long underflows; /* Number of stack underflow calls ($STKRETN). */ + long stko_free; /* Number of deallocations by $STKRETN. */ + long stkm_free; /* Number of deallocations by $STKMRET. */ + long segments; /* Current number of stack segments. */ + long maxs; /* Maximum number of stack segments so far. */ + long pad_size; /* Stack pad size. */ + long current_address; /* Current stack segment address. */ + long current_size; /* Current stack segment size. This + number is actually corrupted by STKSTAT to + include the fifteen word trailer area. */ + long initial_address; /* Address of initial segment. */ + long initial_size; /* Size of initial segment. */ + }; + +/* The following structure describes the data structure which trails + any stack segment. I think that the description in 'asdef' is + out of date. I only describe the parts that I am sure about. */ + +struct stk_trailer + { + long this_address; /* Address of this block. */ + long this_size; /* Size of this block (does not include + this trailer). */ + long unknown2; + long unknown3; + long link; /* Address of trailer block of previous + segment. */ + long unknown5; + long unknown6; + long unknown7; + long unknown8; + long unknown9; + long unknown10; + long unknown11; + long unknown12; + long unknown13; + long unknown14; + }; + +#endif /* CRAY2 */ +#endif /* not CRAY_STACK */ + +#ifdef CRAY2 +/* Determine a "stack measure" for an arbitrary ADDRESS. + I doubt that "lint" will like this much. */ + +static long +i00afunc (long *address) +{ + struct stk_stat status; + struct stk_trailer *trailer; + long *block, size; + long result = 0; + + /* We want to iterate through all of the segments. The first + step is to get the stack status structure. We could do this + more quickly and more directly, perhaps, by referencing the + $LM00 common block, but I know that this works. */ + + STKSTAT (&status); + + /* Set up the iteration. */ + + trailer = (struct stk_trailer *) (status.current_address + + status.current_size + - 15); + + /* There must be at least one stack segment. Therefore it is + a fatal error if "trailer" is null. */ + + if (trailer == 0) + abort (); + + /* Discard segments that do not contain our argument address. */ + + while (trailer != 0) + { + block = (long *) trailer->this_address; + size = trailer->this_size; + if (block == 0 || size == 0) + abort (); + trailer = (struct stk_trailer *) trailer->link; + if ((block <= address) && (address < (block + size))) + break; + } + + /* Set the result to the offset in this segment and add the sizes + of all predecessor segments. */ + + result = address - block; + + if (trailer == 0) + { + return result; + } + + do + { + if (trailer->this_size <= 0) + abort (); + result += trailer->this_size; + trailer = (struct stk_trailer *) trailer->link; + } + while (trailer != 0); + + /* We are done. Note that if you present a bogus address (one + not in any segment), you will get a different number back, formed + from subtracting the address of the first block. This is probably + not what you want. */ + + return (result); +} + +#else /* not CRAY2 */ +/* Stack address function for a CRAY-1, CRAY X-MP, or CRAY Y-MP. + Determine the number of the cell within the stack, + given the address of the cell. The purpose of this + routine is to linearize, in some sense, stack addresses + for alloca. */ + +static long +i00afunc (long address) +{ + long stkl = 0; + + long size, pseg, this_segment, stack; + long result = 0; + + struct stack_segment_linkage *ssptr; + + /* Register B67 contains the address of the end of the + current stack segment. If you (as a subprogram) store + your registers on the stack and find that you are past + the contents of B67, you have overflowed the segment. + + B67 also points to the stack segment linkage control + area, which is what we are really interested in. */ + + stkl = CRAY_STACKSEG_END (); + ssptr = (struct stack_segment_linkage *) stkl; + + /* If one subtracts 'size' from the end of the segment, + one has the address of the first word of the segment. + + If this is not the first segment, 'pseg' will be + nonzero. */ + + pseg = ssptr->sspseg; + size = ssptr->sssize; + + this_segment = stkl - size; + + /* It is possible that calling this routine itself caused + a stack overflow. Discard stack segments which do not + contain the target address. */ + + while (!(this_segment <= address && address <= stkl)) + { +#ifdef DEBUG_I00AFUNC + fprintf (stderr, "%011o %011o %011o\n", this_segment, address, stkl); +#endif + if (pseg == 0) + break; + stkl = stkl - pseg; + ssptr = (struct stack_segment_linkage *) stkl; + size = ssptr->sssize; + pseg = ssptr->sspseg; + this_segment = stkl - size; + } + + result = address - this_segment; + + /* If you subtract pseg from the current end of the stack, + you get the address of the previous stack segment's end. + This seems a little convoluted to me, but I'll bet you save + a cycle somewhere. */ + + while (pseg != 0) + { +#ifdef DEBUG_I00AFUNC + fprintf (stderr, "%011o %011o\n", pseg, size); +#endif + stkl = stkl - pseg; + ssptr = (struct stack_segment_linkage *) stkl; + size = ssptr->sssize; + pseg = ssptr->sspseg; + result += size; + } + return (result); +} + +#endif /* not CRAY2 */ +#endif /* CRAY */ + +#endif /* no alloca */ +#endif /* not GCC version 2 */ diff --git a/gnu/usr.bin/ptx/argmatch.c b/gnu/usr.bin/ptx/argmatch.c new file mode 100644 index 0000000..17e088b --- /dev/null +++ b/gnu/usr.bin/ptx/argmatch.c @@ -0,0 +1,94 @@ +/* argmatch.c -- find a match for a string in an array + Copyright (C) 1990 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by David MacKenzie */ + +#ifdef HAVE_CONFIG_H +#if defined (CONFIG_BROKETS) +/* We use instead of "config.h" so that a compilation + using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h + (which it would do because it found this file in $srcdir). */ +#include +#else +#include "config.h" +#endif +#endif + +#include +#ifdef STDC_HEADERS +#include +#endif + +extern char *program_name; + +/* If ARG is an unambiguous match for an element of the + null-terminated array OPTLIST, return the index in OPTLIST + of the matched element, else -1 if it does not match any element + or -2 if it is ambiguous (is a prefix of more than one element). */ + +int +argmatch (arg, optlist) + char *arg; + char **optlist; +{ + int i; /* Temporary index in OPTLIST. */ + int arglen; /* Length of ARG. */ + int matchind = -1; /* Index of first nonexact match. */ + int ambiguous = 0; /* If nonzero, multiple nonexact match(es). */ + + arglen = strlen (arg); + + /* Test all elements for either exact match or abbreviated matches. */ + for (i = 0; optlist[i]; i++) + { + if (!strncmp (optlist[i], arg, arglen)) + { + if (strlen (optlist[i]) == arglen) + /* Exact match found. */ + return i; + else if (matchind == -1) + /* First nonexact match found. */ + matchind = i; + else + /* Second nonexact match found. */ + ambiguous = 1; + } + } + if (ambiguous) + return -2; + else + return matchind; +} + +/* Error reporting for argmatch. + KIND is a description of the type of entity that was being matched. + VALUE is the invalid value that was given. + PROBLEM is the return value from argmatch. */ + +void +invalid_arg (kind, value, problem) + char *kind; + char *value; + int problem; +{ + fprintf (stderr, "%s: ", program_name); + if (problem == -1) + fprintf (stderr, "invalid"); + else /* Assume -2. */ + fprintf (stderr, "ambiguous"); + fprintf (stderr, " %s `%s'\n", kind, value); +} diff --git a/gnu/usr.bin/ptx/bumpalloc.h b/gnu/usr.bin/ptx/bumpalloc.h new file mode 100644 index 0000000..bbf901f --- /dev/null +++ b/gnu/usr.bin/ptx/bumpalloc.h @@ -0,0 +1,58 @@ +/* BUMP_ALLOC macro - increase table allocation by one element. + Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc. + Francois Pinard , 1990. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/*-------------------------------------------------------------------------. +| Bump the allocation of the array pointed to by TABLE whenever required. | +| The table already has already COUNT elements in it, this macro ensure it | +| has enough space to accommodate at least one more element. Space is | +| allocated (2 ^ EXPONENT) elements at a time. Each element of the array | +| is of type TYPE. | +`-------------------------------------------------------------------------*/ + +/* Routines `xmalloc' and `xrealloc' are called to do the actual memory + management. This implies that the program will abort with an `Virtual + Memory exhausted!' error if any problem arise. + + To work correctly, at least EXPONENT and TYPE should always be the + same for all uses of this macro for any given TABLE. A secure way to + achieve this is to never use this macro directly, but use it to define + other macros, which would then be TABLE-specific. + + The first time through, COUNT is usually zero. Note that COUNT is not + updated by this macro, but it should be update elsewhere, later. This + is convenient, because it allows TABLE[COUNT] to refer to the new + element at the end. Once its construction is completed, COUNT++ will + record it in the table. Calling this macro several times in a row + without updating COUNT is a bad thing to do. */ + +#define BUMP_ALLOC(Table, Count, Exponent, Type) \ + BUMP_ALLOC_WITH_SIZE ((Table), (Count), (Exponent), Type, sizeof (Type)) + +/* In cases `sizeof TYPE' would not always yield the correct value for + the size of each element entry, this macro accepts a supplementary + SIZE argument. The EXPONENT, TYPE and SIZE parameters should still + have the same value for all macro calls related to a specific TABLE. */ + +#define BUMP_ALLOC_WITH_SIZE(Table, Count, Exponent, Type, Size) \ + if (((Count) & (~(~0 << (Exponent)))) == 0) \ + if ((Count) == 0) \ + (Table) = (Type *) xmalloc ((1 << (Exponent)) * (Size)); \ + else \ + (Table) = (Type *) \ + xrealloc ((Table), ((Count) + (1 << (Exponent))) * (Size)); \ + else diff --git a/gnu/usr.bin/ptx/check-out b/gnu/usr.bin/ptx/check-out new file mode 100644 index 0000000..4d13c48 --- /dev/null +++ b/gnu/usr.bin/ptx/check-out @@ -0,0 +1,65 @@ +:30: /ranslate to certain respons ibilities for you if you distr/ +:183: c/ These actions are proh ibited by law if you do not ac +:278: AS BEEN ADVISED OF THE POSS IBILITY OF SUCH DAMAGES. /Y H +:232: /his License may add an expl icit geographical distribution/ +:267: /COST OF ALL NECESSARY SERV ICING, REPAIR OR CORRECTION. +:216: /ht claims or to contest val idity of any such claims; this/ +:45: e/ If the software is mod ified by someone else and pass +:57: pying, distribution and mod ification follow. /for co +:60: /PYING, DISTRIBUTION AND MOD IFICATION 0. This License a/ +:68: /either verbatim or with mod ifications and/or translated i/ +:70: limitation in the term "mod ification".) /ithout +:72: /pying, distribution and mod ification are not covered by t/ +:92: /opy and distribute such mod ifications or work under the t/ +:95: /a) You must cause the mod ified files to carry prominent/ +:103: ommands in/ c) If the mod ified program normally reads c +:114: quirements apply to the mod ified work as a whole. /se re +:115: are not derived/ If ident ifiable sections of that work +:156: of the work for making mod ifications to it. /ed form +:243: Lice/ If the Program spec ifies a version number of this +:46: /hat they have is not the or iginal, so that any problems i/ +:47: /will not reflect on the or iginal authors' reputations. +:191: /eives a license from the or iginal licensor to copy, distr/ +:231: /yrighted interfaces, the or iginal copyright holder who pl/ +:265: /ED WARRANTIES OF MERCHANTAB ILITY AND FITNESS FOR A PARTIC/ +:274: /NG OUT OF THE USE OR INAB ILITY TO USE THE PROGRAM (INCL/ +:303: /warranty of MERCHANTAB ILITY or FITNESS FOR A PARTICU/ +:69: /ation is included without l imitation in the term "modific/ +:198: /for any other reason (not l imited to patent issues), cond/ +:232: /geographical distribution l imitation excluding those coun/ +:235: /License incorporates the l imitation as if written in the/ +:239: Such new versions will be s imilar in spirit to the presen/ +:264: /PLIED, INCLUDING, BUT NOT L IMITED TO, THE IMPLIED WARRANT/ +:274: /ROGRAM (INCLUDING BUT NOT L IMITED TO LOSS OF DATA OR DATA/ +:67: /hat is to say, a work conta ining the Program or a portion/ +:158: /ny associated interface def inition files, plus the script/ +:34: /fee, you must give the rec ipients all the rights that yo/ +:46: /passed on, we want its rec ipients to know that what they/ +:84: /nty; and give any other rec ipients of the Program a copy/ +:190: /ed on the Program), the rec ipient automatically receives/ +:193: /her restrictions on the rec ipients' exercise of the right/ +:239: /sions will be similar in sp irit to the present version, b/ +:254: o goals of prese/ Our dec ision will be guided by the tw +:273: /OR CONSEQUENTIAL DAMAGES AR ISING OUT OF THE USE OR INAB/ +:315: /teractive mode: Gnomov ision version 69, Copyright (C/ +:316: /y name of author Gnomov ision comes with ABSOLUTELY NO/ +:330: /st in the program `Gnomov ision' (which makes passes at/ +:30: /late to certain responsibil ities for you if you distribut/ +:56: The precise terms and cond itions for copying, distributi/ +:60: /C LICENSE TERMS AND COND ITIONS FOR COPYING, DISTRIBUTI/ +:93: /also meet all of these cond itions: a) You must cause/ +:109: /rogram under these cond itions, and telling the user h/ +:129: ther work not bas/ In add ition, mere aggregation of ano +:186: /and all its terms and cond itions for copying, distributi/ +:192: ect to these terms and cond itions. /am subj +:199: /ted to patent issues), cond itions are imposed on you (whe/ +:200: /e) that contradict the cond itions of this License, they d/ +:201: ot excuse you from the cond itions of this License. /do n +:244: /ollowing the terms and cond itions either of that version/ +:251: /ams whose distribution cond itions are different, write to/ +:262: /WHEN OTHERWISE STATED IN WR ITING THE COPYRIGHT HOLDERS AN/ +:270: /ABLE LAW OR AGREED TO IN WR ITING WILL ANY COPYRIGHT HOLDE/ +:280: ly/ END OF TERMS AND COND ITIONS Appendix: How to App +:318: /e it under certain cond itions; type `show c' for deta/ +:52: /of a free program will ind ividually obtain patent licens/ +:72: stribution and mod/ Act ivities other than copying, di diff --git a/gnu/usr.bin/ptx/config.h b/gnu/usr.bin/ptx/config.h new file mode 100644 index 0000000..93e7ed1 --- /dev/null +++ b/gnu/usr.bin/ptx/config.h @@ -0,0 +1,57 @@ +/* config.h. Generated automatically by configure. */ +/* config.h.in. Generated automatically from configure.in by autoheader. */ + +/* Define if using alloca.c. */ +/* #undef C_ALLOCA */ + +/* Define if type char is unsigned and you are not using gcc. */ +/* #undef __CHAR_UNSIGNED__ */ + +/* Define to empty if the keyword does not work. */ +/* #undef const */ + +/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems. + This function is required for alloca.c support on those systems. */ +/* #undef CRAY_STACKSEG_END */ + +/* Define if you have alloca.h and it should be used (not Ultrix). */ +/* #undef HAVE_ALLOCA_H */ + +/* Define if you don't have vprintf but do have _doprnt. */ +/* #undef HAVE_DOPRNT */ + +/* Define if you have the vprintf function. */ +#define HAVE_VPRINTF 1 + +/* Define if you need to in order for stat and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at run-time. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown + */ +/* #undef STACK_DIRECTION */ + +/* Define if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* In regex, request the capability of modifying the letter syntax. */ +#define SYNTAX_TABLE 1 + +/* In regex, use 8 bits per character. */ +#define CHAR_SET_SIZE 256 + +/* Define if you have mcheck. */ +/* #undef HAVE_MCHECK */ + +/* Define if you have setchrclass. */ +/* #undef HAVE_SETCHRCLASS */ + +/* Define if you have strerror. */ +#define HAVE_STRERROR 1 + +/* Define if you have the header file. */ +#define HAVE_STRING_H 1 diff --git a/gnu/usr.bin/ptx/diacrit.c b/gnu/usr.bin/ptx/diacrit.c new file mode 100644 index 0000000..29e319b --- /dev/null +++ b/gnu/usr.bin/ptx/diacrit.c @@ -0,0 +1,148 @@ +/* Diacritics processing for a few character codes. + Copyright (C) 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + Francois Pinard , 1988. + + All this file is a temporary hack, waiting for locales in GNU. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "diacrit.h" + +/* ISO 8859-1 Latin-1 code is used as the underlying character set. If + MSDOS is defined, IBM-PC's character set code is used instead. */ + +/*--------------------------------------------------------------------. +| For each alphabetic character, returns what it would be without its | +| possible diacritic symbol. | +`--------------------------------------------------------------------*/ + +const char diacrit_base[256] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', + 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 0, 0, 0, 0, 0, + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 0, 0, 0, 0, 0, + +#ifdef MSDOS + + 'C', 'u', 'e', 'a', 'a', 'a', 'a', 'c', + 'e', 'e', 'e', 'i', 'i', 'i', 'A', 'A', + 'E', 'e', 'E', 'o', 'o', 'o', 'u', 'u', + 'y', 'O', 'U', 0, 0, 0, 0, 0, + 'a', 'i', 'o', 'u', 'n', 'N', 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + +#else /* not MSDOS */ + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 'A', 'A', 'A', 'A', 'A', 'A', 'A', 'C', + 'E', 'E', 'E', 'E', 'I', 'I', 'I', 'I', + 0, 'N', 'O', 'O', 'O', 'O', 'O', 0, + 'O', 'U', 'U', 'U', 'U', 'Y', 0, 0, + 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c', + 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i', + 0, 'n', 'o', 'o', 'o', 'o', 'o', 0, + 'o', 'u', 'u', 'u', 'u', 'y', 0, 'y', + +#endif /* not MSDOS */ +}; + +/*------------------------------------------------------------------------. +| For each alphabetic character, returns a code of what its diacritic is, | +| according to the following codes: 1 (eE) over aA for latin diphtongs; 2 | +| (') acute accent; 3 (`) grave accent; 4 (^) circumflex accent; 5 (") | +| umlaut or diaraesis; 6 (~) tilda; 7 (,) cedilla; 8 (o) covering degree | +| symbol; 9 (|) slashed character. | +`------------------------------------------------------------------------*/ + +const char diacrit_diac[256] = +{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 4, 0, + 3, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 6, 0, + +#ifdef MSDOS + + 7, 5, 2, 4, 5, 3, 8, 7, + 4, 5, 3, 5, 4, 3, 5, 8, + 2, 1, 1, 4, 5, 3, 4, 3, + 5, 5, 5, 0, 0, 0, 0, 0, + 2, 2, 2, 2, 6, 6, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + +#else /* not MSDOS */ + + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 3, 2, 4, 6, 5, 8, 1, 7, + 3, 2, 4, 5, 3, 2, 4, 5, + 0, 6, 3, 2, 4, 6, 5, 0, + 9, 3, 2, 4, 5, 2, 0, 0, + 3, 2, 4, 6, 5, 8, 1, 7, + 3, 2, 4, 5, 3, 2, 4, 5, + 0, 6, 3, 2, 4, 6, 5, 0, + 9, 3, 2, 4, 5, 2, 0, 0, + +#endif /* not MSDOS */ +}; diff --git a/gnu/usr.bin/ptx/diacrit.h b/gnu/usr.bin/ptx/diacrit.h new file mode 100644 index 0000000..c880a45 --- /dev/null +++ b/gnu/usr.bin/ptx/diacrit.h @@ -0,0 +1,16 @@ +/* Diacritics processing for a few character codes. + Copyright (C) 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + Francois Pinard , 1988. + + All this file is a temporary hack, waiting for locales in GNU. +*/ + +extern const char diacrit_base[]; /* characters without diacritics */ +extern const char diacrit_diac[]; /* diacritic code for each character */ + +/* Returns CHR without its diacritic. CHR is known to be alphabetic. */ +#define tobase(chr) (diacrit_base[(unsigned char) (chr)]) + +/* Returns a diacritic code for CHR. CHR is known to be alphabetic. */ +#define todiac(chr) (diacrit_diac[(unsigned char) (chr)]) + diff --git a/gnu/usr.bin/ptx/error.c b/gnu/usr.bin/ptx/error.c new file mode 100644 index 0000000..41d66fb --- /dev/null +++ b/gnu/usr.bin/ptx/error.c @@ -0,0 +1,117 @@ +/* error.c -- error handler for noninteractive utilities + Copyright (C) 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by David MacKenzie. */ + +#ifdef HAVE_CONFIG_H +#if defined (CONFIG_BROKETS) +/* We use instead of "config.h" so that a compilation + using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h + (which it would do because it found this file in $srcdir). */ +#include +#else +#include "config.h" +#endif +#endif + +#include + +#ifdef HAVE_VPRINTF + +#if __STDC__ +#include +#define VA_START(args, lastarg) va_start(args, lastarg) +#else /* !__STDC__ */ +#include +#define VA_START(args, lastarg) va_start(args) +#endif /* !__STDC__ */ + +#else /* !HAVE_VPRINTF */ + +#ifdef HAVE_DOPRNT +#define va_alist args +#define va_dcl int args; +#else /* !HAVE_DOPRNT */ +#define va_alist a1, a2, a3, a4, a5, a6, a7, a8 +#define va_dcl char *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; +#endif /* !HAVE_DOPRNT */ + +#endif /* !HAVE_VPRINTF */ + +#ifdef STDC_HEADERS +#include +#include +#else /* !STDC_HEADERS */ +void exit (); +#endif /* !STDC_HEADERS */ + +extern char *program_name; + +#ifndef HAVE_STRERROR +static char * +private_strerror (errnum) + int errnum; +{ + extern char *sys_errlist[]; + extern int sys_nerr; + + if (errnum > 0 && errnum <= sys_nerr) + return sys_errlist[errnum]; + return "Unknown system error"; +} +#define strerror private_strerror +#endif /* !HAVE_STRERROR */ + +/* Print the program name and error message MESSAGE, which is a printf-style + format string with optional args. + If ERRNUM is nonzero, print its corresponding system error message. + Exit with status STATUS if it is nonzero. */ +/* VARARGS */ +void +#if defined (HAVE_VPRINTF) && __STDC__ +error (int status, int errnum, char *message, ...) +#else /* !HAVE_VPRINTF or !__STDC__ */ +error (status, errnum, message, va_alist) + int status; + int errnum; + char *message; + va_dcl +#endif /* !HAVE_VPRINTF or !__STDC__ */ +{ +#ifdef HAVE_VPRINTF + va_list args; +#endif /* HAVE_VPRINTF */ + + fprintf (stderr, "%s: ", program_name); +#ifdef HAVE_VPRINTF + VA_START (args, message); + vfprintf (stderr, message, args); + va_end (args); +#else /* !HAVE_VPRINTF */ +#ifdef HAVE_DOPRNT + _doprnt (message, &args, stderr); +#else /* !HAVE_DOPRNT */ + fprintf (stderr, message, a1, a2, a3, a4, a5, a6, a7, a8); +#endif /* !HAVE_DOPRNT */ +#endif /* !HAVE_VPRINTF */ + if (errnum) + fprintf (stderr, ": %s", strerror (errnum)); + putc ('\n', stderr); + fflush (stderr); + if (status) + exit (status); +} diff --git a/gnu/usr.bin/ptx/examples/README b/gnu/usr.bin/ptx/examples/README new file mode 100644 index 0000000..038034f --- /dev/null +++ b/gnu/usr.bin/ptx/examples/README @@ -0,0 +1,21 @@ +Various examples of GNU ptx usages. +Francois Pinard , 1993. + +This directory contains a few examples contributed by GNU ptx users. +Feel free to look at them for tricks or ideas. When an example +requires many files, a subdirectory is used to hold them together. +I have not necessarily tested these examples recently, if at all. + +If you have examples you would like to share, please submit them to +me. You may also submit corrections to the examples given in this +directory, however, please write to the authors first, since they most +probably will like to have their say about their own contribution. + +* include.pl: A Perl script studying system include files. + +* luke/: A shell script permuting indices for man pages. It contains +two examples of an .xx definition for *roff, one simple, one complex. + +* latex/: A simple example of \xx definition for latex. + +* ajay/: A more complex application of latex with ptx. diff --git a/gnu/usr.bin/ptx/examples/ajay/Makefile b/gnu/usr.bin/ptx/examples/ajay/Makefile new file mode 100644 index 0000000..bff099c --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ajay/Makefile @@ -0,0 +1,28 @@ +JUNKFILES = tip-index.ps tip-index.dvi tip-index.tex tip-index.log \ + tip-index.aux + +tip-index.ps : tip-index.dvi + dvips tip-index.dvi + +tip-index.dvi : tip-index.tex + latex tip-index.tex + +tip-index.tex : tip.texified header.tex footer.tex + cat header.tex tip.texified footer.tex > tip-index.tex + +tip.texified : tip.eign tip.forgptx Makefile + gptx -f -r -i ./tip.eign -T < tip.forgptx | x.pl > tip.texified + +tip.eign : /usr/lib/eign exclude-words + cat /usr/lib/eign exclude-words > tip.eign + +screenlist : tip.texified + cat tip.texified \ + | gawk -F\{ '{count[$$4]++} \ + END {for (s in count) printf("%d %20s\n", count[s], s)}' \ + | tr -d '}' \ + | sort -n > screenlist + @echo "Check (say) the last 100 lines of ./screenlist". + +clean : + rm -f tip.eign tip.texified $(JUNKFILES) screenlist diff --git a/gnu/usr.bin/ptx/examples/ajay/README b/gnu/usr.bin/ptx/examples/ajay/README new file mode 100644 index 0000000..7b55ca2 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ajay/README @@ -0,0 +1,41 @@ +To: pinard@iro.umontreal.ca +Subject: Re: Gptx suggestions and help request +Date: Tue, 28 Sep 93 11:30:04 +0500 +From: ajayshah@cmie.ernet.in + +[...] My plaintext input looks like: "pagenum multiword-phrase" where +the multiword phrase is atmost five words. So [...], I'm doing two +columns in small type. + +I got one of the programmers here to write me a tex macro for my +problem. When it goes into production I'll mail you a few files: a +sample input, the gptx command, the output, and the tex macro. If you +find these interesting you can ship them with future gptx releases. + +Thanks a lot for gptx. If you have a mailing list of loyal users, +you can add us to it :-) + + +To: pinard@iro.umontreal.ca +Cc: rk@cmie.ernet.in +Subject: All glue code I used with gptx +Date: Tue, 05 Oct 93 15:23:44 +0500 +From: ajayshah@zigma.cmie.ernet.in + +That is a full set of a files for an example of "production use". You +are welcome to post them, or use them as a sample supplied with the +gptx distribution, etc., with absolutely no restrictions on what +anyone does with this. In case you do so, please acknowledge the +contribution of Rakesh Chauhan, rk@cmie.ernet.in, who is the author of +x.pl and header.tex. [...] + +As you can tell, I used it for a 100% realworld problem, and it +worked. Thanks a million. If you'd like, I can send you a hardcopy +of the full finished document (just send me your mailing address). If +you would like to mention the name of this document when you use +these files as a demo, it is + + Trends in Industrial Production + September 1993 + Centre for Monitoring Indian Economy, Bombay, India. + diff --git a/gnu/usr.bin/ptx/examples/ajay/footer.tex b/gnu/usr.bin/ptx/examples/ajay/footer.tex new file mode 100644 index 0000000..6b47932 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ajay/footer.tex @@ -0,0 +1 @@ +\end{document} diff --git a/gnu/usr.bin/ptx/examples/ajay/header.tex b/gnu/usr.bin/ptx/examples/ajay/header.tex new file mode 100644 index 0000000..04a9c64 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ajay/header.tex @@ -0,0 +1,21 @@ +\documentstyle [twocolumn,a4]{article} + +\pagestyle{empty} + +\textwidth 6.8in +\oddsidemargin -.8in +\evensidemargin -.8in +\textheight 10in +\topmargin -1in +% \columnseprule 1pt + +\begin{document} + +\def\xx #1#2#3#4#5#6{\hbox to \hsize{% +\hbox to 1.4in{\hfill #2}\hskip .05in% +\hbox to .8in{\it #3\hfil}\hskip .05in% +\hbox to 1.4in{#4\hfil}\hskip .05in% +\hbox{\hfil #6}\hfil}% +} + +\scriptsize diff --git a/gnu/usr.bin/ptx/examples/ajay/tip.forgptx b/gnu/usr.bin/ptx/examples/ajay/tip.forgptx new file mode 100644 index 0000000..ecf6e0e --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ajay/tip.forgptx @@ -0,0 +1,10 @@ +1 Zinc concentrate +1 Coal +1 Ball clay +1 Non-coking coal +1 Calcareous sand +1 Natural Gas +1 Chalk +1 Bauxite +1 Clay (others) +1 Copper ore diff --git a/gnu/usr.bin/ptx/examples/ajay/x.pl b/gnu/usr.bin/ptx/examples/ajay/x.pl new file mode 100644 index 0000000..e0615ba --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ajay/x.pl @@ -0,0 +1,22 @@ +#! /usr/local/bin/perl + +while ($l = <>) +{ +chop $l; + +$l =~ s/\\xx //; +$l =~ s/}{/|/g; +$l =~ s/{//g; +$l =~ s/}//g; +@x = split(/\|/, $l); + +printf ("\\xx "); +for ($i = 0; $i <= $#x; $i++) + { + $v = substr($x[$i], 0, 17); + $v =~ s/\\$//; + printf("{%s}", $v); + } +printf ("\n"); + +} diff --git a/gnu/usr.bin/ptx/examples/ignore/README b/gnu/usr.bin/ptx/examples/ignore/README new file mode 100644 index 0000000..33ee19e --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ignore/README @@ -0,0 +1,65 @@ +From beebe@math.utah.edu Wed Oct 27 19:37:22 1993 +Date: Tue, 26 Oct 93 15:43:19 MDT +From: "Nelson H. F. Beebe" +To: pinard@iro.umontreal.ca +Subject: Re: Another short comment on gptx 0.2 + +/usr/lib/eign: DECstation 5000, ULTRIX 4.3 + HP 9000/735, HP-UX 9.0 + IBM RS/6000, AIX 2.3 + IBM 3090, AIX MP370 2.1 + Stardent 1520, OS 2.2 + Sun SPARCstation, SunOS 4.x + +No eign anywhere on: HP 375, BSD 4.3 (ptx.c is in /usr/src/usr.bin, + and the source code refers to /usr/lib/eign, + but I could not find it in the source tree) + NeXT, Mach 3.0 (though documented in man pages) + Sun SPARCstation, Solaris 2.x + SGI Indigo, IRIX 4.0.x + +The contents of the eign files that I found on the above machines were +almost identical. With the exception of the Stardent and the IBM +3090, there were only two such files, one with 150 words, and the +other with 133, with only a few differences between them (some words +in the 133-word file were not in the 150-word file). I found the +133-word variant in groff-1.06/src/indxbib. I used archie to search +for eign, and it found 7 sites, all with the groff versions. + +The Stardent and IBM 3090 eign files have the same contents as the +150-word version, but have a multiline copyright comment at the +beginning. None of the others contains a copyright. + +I recently had occasion to build a similar list of words for bibindex, +which indexes a BibTeX .bib file, and for which omission of common +words, like articles and prepositions, helps to reduce the size of the +index. I didn't use eign to build that list, but instead, went +through the word lists from 3.8MB of .bib files in the tuglib +collection on ftp.math.utah.edu:pub/tex/bib, and collected words to be +ignored. That list includes words from several languages. I'll leave +it up to you to decide whether you wish to merge them or not; I +suspect it may be a better design choice to keep a separate eign file +for each language, although in my own application of ptx-ing +bibliographies, the titles do occur in multiple languages, so a +mixed-language eign is appropriate. Since there are standard ISO +2-letter abbreviations for every country, perhaps one could have +eign.xy for country xy (of course, only approximately is country == +language). The exact list of words in eign is not so critical; its +only purpose is to reduce the size of the output by not indexing words +that occur very frequently and have little content in themselves. + +I'm enclosing a shar bundle at the end of this message with the merger +of the multiple eign versions (duplicates eliminated, and the list +sorted into 179 unique words), followed by the bibindex list. + + + +======================================================================== +Nelson H. F. Beebe Tel: +1 801 581 5254 +Center for Scientific Computing FAX: +1 801 581 4148 +Department of Mathematics, 105 JWB Internet: beebe@math.utah.edu +University of Utah +Salt Lake City, UT 84112, USA +======================================================================== + + diff --git a/gnu/usr.bin/ptx/examples/ignore/bix b/gnu/usr.bin/ptx/examples/ignore/bix new file mode 100644 index 0000000..b9a8ba6 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ignore/bix @@ -0,0 +1,109 @@ +ab +aber +als +an +and +are +as +auf +aus +az +bei +bir +but +da +das +dat +de +dei +dem +den +der +des +det +di +die +dos +een +eene +egy +ei +ein +eine +einen +einer +eines +eit +el +en +er +es +et +ett +eyn +eyne +for +from +fuer +fur +gl +gli +ha +haben +had +hai +has +hat +have +he +heis +hen +hena +henas +het +hin +hinar +hinir +hinn +hith +ho +hoi +il +in +ist +ka +ke +la +las +le +les +lo +los +mia +mit +na +nji +not +oder +of +on +or +os +others +sie +sind +so +ta +the +to +um +uma +un +una +und +une +uno +unter +von +with +yr diff --git a/gnu/usr.bin/ptx/examples/ignore/eign b/gnu/usr.bin/ptx/examples/ignore/eign new file mode 100644 index 0000000..0401245 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/ignore/eign @@ -0,0 +1,163 @@ +a +about +after +against +all +also +an +and +another +any +are +as +at +back +be +because +been +before +being +between +both +but +by +came +can +come +could +current +day +did +do +down +each +end +even +first +for +from +get +go +good +great +had +has +have +he +her +here +him +his +how +i +if +in +into +is +it +its +just +know +last +life +like +little +long +made +make +man +many +may +me +men +might +more +most +mr +much +must +my +name +never +new +no +not +now +of +off +old +on +one +only +or +other +our +out +over +own +part +people +point +right +said +same +say +see +she +should +since +so +some +start +state +still +such +take +than +that +the +their +them +then +there +these +they +this +those +three +through +time +to +too +true +try +two +under +up +us +use +used +value +very +was +way +we +well +were +what +when +where +which +while +who +why +will +with +without +work +world +would +year +years +you +your diff --git a/gnu/usr.bin/ptx/examples/include.pl b/gnu/usr.bin/ptx/examples/include.pl new file mode 100755 index 0000000..cb3c0ff --- /dev/null +++ b/gnu/usr.bin/ptx/examples/include.pl @@ -0,0 +1,79 @@ +#!/usr/bin/perl -- # -*-Perl-*- +eval "exec /usr/bin/perl -S $0 $*" + if $running_under_some_shell; + +# Construct a permuted index for all system include files. +# Copyright (C) 1991 Free Software Foundation, Inc. +# Francois Pinard , June 1991. + +# NOTE: about removing asm statements? +# NOTE: about removing strings? +# NOTE: about ignoring 0xHEXDIGITS, unchar/ushort/etc. + +# Construct a sorted list of system include files. + +opendir (DIR, "/usr/include"); +@includes = sort grep (-f "/usr/include/$_", readdir (DIR)); +opendir (DIR, "/usr/include/sys"); +foreach (sort grep (-f "/usr/include/sys/$_", readdir (DIR))) { + push (@includes, "sys/$_"); +} +closedir (DIR); + +# Launch the permuted indexer, with a list of ignore words. + +$ignore = "/tmp/incptx.$$"; +open (IGNORE, "> $ignore"); +print IGNORE join ("\n", split (' ', <) + { + + # Get rid of comments. + + $comment = $next_comment; + if ($comment) + { + $next_comment = !s,^.*\*/,,; + } + else + { + s,/\*.*\*/,,g; + $next_comment = s,/\*.*,,; + } + next if $comment && $next_comment; + + # Remove extraneous white space. + + s/[ \t]+/ /g; + s/ $//; + next if /^$/; + + # Print the line with its reference. + + print "$include($.): ", $_; + } +} + +warn "All read, now ptx' game!\n"; +close OUTPUT || die "ptx failed...\n"; +unlink $ignore; diff --git a/gnu/usr.bin/ptx/examples/latex/Makefile b/gnu/usr.bin/ptx/examples/latex/Makefile new file mode 100644 index 0000000..5f930b2 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/latex/Makefile @@ -0,0 +1,15 @@ +# Example of using ptx with latex. +# Copyright (C) 1993 Free Software Foundation, Inc. +# Francois Pinard , 1993. + +PTX = ../ptx +PTX_OPTIONS = -AfTWi.i + +try: latex.dvi + xdvi latex + +latex.dvi: latex.tex table.tex + latex latex + +table.tex: Makefile ../COPYING + $(PTX) $(PTX_OPTIONS) ../COPYING | sed 's/ //' > table.tex diff --git a/gnu/usr.bin/ptx/examples/latex/README b/gnu/usr.bin/ptx/examples/latex/README new file mode 100644 index 0000000..fc5098a --- /dev/null +++ b/gnu/usr.bin/ptx/examples/latex/README @@ -0,0 +1,10 @@ +Date: Sun, 26 Sep 93 19:07:10 EDT +From: Francois Pinard +To: ajayshah@cmie.ernet.in +Subject: Re: Gptx suggestions and help request + + In fact, if you could send me such a macro right now I would be + thrilled :-) + +Ok, I worked out this example for you. Even if a little rude, you can +still start from it for your own need. [...] diff --git a/gnu/usr.bin/ptx/examples/latex/latex.tex b/gnu/usr.bin/ptx/examples/latex/latex.tex new file mode 100644 index 0000000..1f0a2f1 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/latex/latex.tex @@ -0,0 +1,11 @@ +\documentstyle[11pt]{article} +\begin{document} + +\def\xx#1#2#3#4#5#6{\hbox{ + \hbox to2.5in{\hfil#5#2} + \hbox to3.0in{{\sl #3}\,#4#1\hfil} + \hbox to1.5in{\tiny#6\hfil} +}} +\input table + +\end{document} diff --git a/gnu/usr.bin/ptx/examples/latex/table.tex b/gnu/usr.bin/ptx/examples/latex/table.tex new file mode 100644 index 0000000..b68ea38 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/latex/table.tex @@ -0,0 +1,65 @@ +\xx {}{ate to certain respons}{ibi}{lities for you if you}{}{../COPYING:30} +\xx {}{These actions are proh}{ibi}{ted by law if you do n}{}{../COPYING:183} +\xx {}{EN ADVISED OF THE POSS}{IBI}{LITY OF SUCH DAMAGES.}{}{../COPYING:278} +\xx {}{icense may add an expl}{ici}{t geographical distrib}{}{../COPYING:232} +\xx {}{OF ALL NECESSARY SERV}{ICI}{NG, REPAIR OR CORRECTI}{}{../COPYING:267} +\xx {}{aims or to contest val}{idi}{ty of any such claims;}{}{../COPYING:216} +\xx {}{If the software is mod}{ifi}{ed by someone else and}{}{../COPYING:45} +\xx {}{, distribution and mod}{ifi}{cation follow.}{pying}{../COPYING:57} +\xx {}{, DISTRIBUTION AND MOD}{IFI}{CATION 0. This Lice}{}{../COPYING:60} +\xx {}{r verbatim or with mod}{ifi}{cations and/or transla}{}{../COPYING:68} +\xx {}{ation in the term "mod}{ifi}{cation".)}{t limit}{../COPYING:70} +\xx {}{, distribution and mod}{ifi}{cation are not covered}{}{../COPYING:72} +\xx {}{nd distribute such mod}{ifi}{cations or work under}{}{../COPYING:92} +\xx {}{You must cause the mod}{ifi}{ed files to carry prom}{}{../COPYING:95} +\xx {ads c}{c) If the mod}{ifi}{ed program normally re}{}{../COPYING:103} +\xx {}{ments apply to the mod}{ifi}{ed work as a whole.}{}{../COPYING:114} +\xx {work are n}{If ident}{ifi}{able sections of that}{}{../COPYING:115} +\xx {}{he work for making mod}{ifi}{cations to it.}{of t}{../COPYING:156} +\xx {}{If the Program spec}{ifi}{es a version number of}{}{../COPYING:243} +\xx {}{hey have is not the or}{igi}{nal, so that any probl}{}{../COPYING:46} +\xx {}{not reflect on the or}{igi}{nal authors' reputatio}{}{../COPYING:47} +\xx {}{a license from the or}{igi}{nal licensor to copy,}{}{../COPYING:191} +\xx {}{ted interfaces, the or}{igi}{nal copyright holder w}{}{../COPYING:231} +\xx {}{RRANTIES OF MERCHANTAB}{ILI}{TY AND FITNESS FOR A P}{}{../COPYING:265} +\xx {}{OUT OF THE USE OR INAB}{ILI}{TY TO USE THE PROGRAM}{}{../COPYING:274} +\xx {}{anty of MERCHANTAB}{ILI}{TY or FITNESS FOR A PA}{}{../COPYING:303} +\xx {}{is included without l}{imi}{tation in the term "mo}{}{../COPYING:69} +\xx {}{ny other reason (not l}{imi}{ted to patent issues),}{}{../COPYING:198} +\xx {}{aphical distribution l}{imi}{tation excluding those}{}{../COPYING:232} +\xx {}{nse incorporates the l}{imi}{tation as if written i}{}{../COPYING:235} +\xx {}{new versions will be s}{imi}{lar in spirit to the p}{}{../COPYING:239} +\xx {}{, INCLUDING, BUT NOT L}{IMI}{TED TO, THE IMPLIED WA}{}{../COPYING:264} +\xx {}{M (INCLUDING BUT NOT L}{IMI}{TED TO LOSS OF DATA OR}{}{../COPYING:274} +\xx {}{s to say, a work conta}{ini}{ng the Program or a po}{}{../COPYING:67} +\xx {}{sociated interface def}{ini}{tion files, plus the s}{}{../COPYING:158} +\xx {}{you must give the rec}{ipi}{ents all the rights th}{}{../COPYING:34} +\xx {}{ed on, we want its rec}{ipi}{ents to know that what}{}{../COPYING:46} +\xx {}{and give any other rec}{ipi}{ents of the Program a}{}{../COPYING:84} +\xx {}{the Program), the rec}{ipi}{ent automatically rece}{}{../COPYING:190} +\xx {}{estrictions on the rec}{ipi}{ents' exercise of the}{}{../COPYING:193} +\xx {}{will be similar in sp}{iri}{t to the present versi}{}{../COPYING:239} +\xx {he two goal}{Our dec}{isi}{on will be guided by t}{}{../COPYING:254} +\xx {}{NSEQUENTIAL DAMAGES AR}{ISI}{NG OUT OF THE USE OR I}{}{../COPYING:273} +\xx {}{tive mode: Gnomov}{isi}{on version 69, Copyrig}{}{../COPYING:315} +\xx {}{e of author Gnomov}{isi}{on comes with ABSOLUTE}{}{../COPYING:316} +\xx {}{the program `Gnomov}{isi}{on' (which makes passe}{}{../COPYING:330} +\xx {}{to certain responsibil}{iti}{es for you if you dist}{}{../COPYING:30} +\xx {}{precise terms and cond}{iti}{ons for copying, distr}{}{../COPYING:56} +\xx {}{ENSE TERMS AND COND}{ITI}{ONS FOR COPYING, DISTR}{}{../COPYING:60} +\xx {}{meet all of these cond}{iti}{ons: a) You must}{}{../COPYING:93} +\xx {}{m under these cond}{iti}{ons, and telling the u}{}{../COPYING:109} +\xx {f another wo}{In add}{iti}{on, mere aggregation o}{}{../COPYING:129} +\xx {}{all its terms and cond}{iti}{ons for copying, distr}{}{../COPYING:186} +\xx {}{o these terms and cond}{iti}{ons.}{bject t}{../COPYING:192} +\xx {}{o patent issues), cond}{iti}{ons are imposed on you}{}{../COPYING:199} +\xx {}{at contradict the cond}{iti}{ons of this License, t}{}{../COPYING:200} +\xx {}{cuse you from the cond}{iti}{ons of this License.}{}{../COPYING:201} +\xx {}{ing the terms and cond}{iti}{ons either of that ver}{}{../COPYING:244} +\xx {}{hose distribution cond}{iti}{ons are different, wri}{}{../COPYING:251} +\xx {}{OTHERWISE STATED IN WR}{ITI}{NG THE COPYRIGHT HOLDE}{}{../COPYING:262} +\xx {}{LAW OR AGREED TO IN WR}{ITI}{NG WILL ANY COPYRIGHT}{}{../COPYING:270} +\xx {}{END OF TERMS AND COND}{ITI}{ONS Appendix: How t}{}{../COPYING:280} +\xx {}{under certain cond}{iti}{ons; type `show c' for}{}{../COPYING:318} +\xx {}{free program will ind}{ivi}{dually obtain patent l}{}{../COPYING:52} +\xx {g, distribution}{Act}{ivi}{ties other than copyin}{}{../COPYING:72} diff --git a/gnu/usr.bin/ptx/examples/luke/README b/gnu/usr.bin/ptx/examples/luke/README new file mode 100644 index 0000000..6291861 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/luke/README @@ -0,0 +1,2 @@ +From: Luke Kendall +Date: Wed, 16 Oct 91 12:26:39 EST diff --git a/gnu/usr.bin/ptx/examples/luke/xxroff.sh b/gnu/usr.bin/ptx/examples/luke/xxroff.sh new file mode 100644 index 0000000..55ef908 --- /dev/null +++ b/gnu/usr.bin/ptx/examples/luke/xxroff.sh @@ -0,0 +1,108 @@ +#!/bin/sh +# +# Author: Luke Kendall +# +MYNAME=`basename $0` +usage="usage: $MYNAME [man-directory] + (generates permuted index of -man files in directory)" +md=/usr/man +# +if [ $# = 0 ] +then + echo "$MYNAME: no man directory specified: assuming $md" +elif [ $# != 1 ] +then + echo "$usage" + exit 1 +elif [ -d $1 ] +then + md="$1" +else + echo "$usage" + exit 1 +fi +echo "Permuted index of $md:" +out=ptx.tr +# ------ clumsy permuted index macros (replaced by stuff below) ------------ +cat <<'EOF' > $out +.pn 1 +.de xx +\\$1 \\$2 \\fB\\$3\\fR \\$4 \\s-1\\$5\\s0 +.. +.pl 10i +.de NP +.ev 1 +.ft 1 +.ps 10 +.sp 0.75c +.tl '\s-2\\fIpermuted index\\fP\s0'\- \\n% \-'\s-2\\fIpermuted index\\fP\s0' +.pn +1 +.bp +.ev +.. +.wh 9i NP +.nf +.na +.ta 6.5i-1.1iR 6.5iR 6.51iR 6.52R +.ll 6.0i +.po 0i +.sp 0.25i +'\" +EOF +# ------ ------- ------- ------- ------- ------- +# ------ alternate permuted index macros (from net) ------------ +cat <<'EOF' > $out +.pl 10i +.de NP +.ev 1 +.ft 1 +.ps 10 +.sp 0.75c +.tl '\s-2\\fIpermuted index\\fP\s0'\- \\n% \-'\s-2\\fIpermuted index\\fP\s0' +.pn +1 +.bp +.ev +.. +.wh 9i NP +.po 0.5i +.sp 0.25i +.tr ~ \" tildes will translate to blanks +'\".ll 80 \" line length of output +.ll 6.0i \" line length of output +.nf \" must be in no-fill mode +.nr )r \n(.lu-10n \" set position of reference in line (10 less than length) +.nr )k \n()ru/2u \" set position of keyword (approx. centered) +.ds s2 ~~~ \" this is the center gap -- 3 spaces +.de xx \"definition of xx macro +.ds s1\" \" initialise to null string +.if \w@\\$2@ .ds s1 ~\" \"set to single blank if there is second arg +.ds s3\" \" initialise to null string +.if \w@\\$4@ .ds s3 ~\" \"set to single blank if there is second arg +.ds s4 ~\" \" set to single blank +.ds s5 ~\" \" set to single blank +.ds y \\*(s4\a\\*(s5\" \" blank, leader, blank +.ta \\n()ru-\w@\\*(s5@u \" set tab just to left of ref +\h@\\n()ku-\w@\\$1\\*(s1\\$2\\*(s2@u@\\$1\\*(s1\\$2\\*(s2\\$3\\*(s3\\$4\\*y\\$5 +.. + ~ +EOF +# ------ ------- ------- ------- ------- ------- +find $md -type f -name "*.[1-8nl]*" -print | +while read f +do + man=`basename $f` + man=`expr "$man" : "\(.*\)\.[^\.]*"` +echo $man: + # + # Use 1st non-"." and non-"'" started line as input to ptx (this + # should be the synopsis after the `.SH NAME'); + # strip any "\-" from it (a silly sort key for ptx to avoid); + # insert a leading man page name for the -r option to find + # + sed -n '/^[^.]/s/\\-//g;/^[^.]/p;/^[^.]/q' $f | sed "s/^/($man) /" +done | ptx -t -f -r >> $out +# +# Turn the troff'able permuted index file into PostScript +# +psroff -t -rL10i $out > ptx.ps +echo "$out and ptx.ps produced from man directory $md." diff --git a/gnu/usr.bin/ptx/getopt.c b/gnu/usr.bin/ptx/getopt.c new file mode 100644 index 0000000..7a4673b --- /dev/null +++ b/gnu/usr.bin/ptx/getopt.c @@ -0,0 +1,757 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 92, 1993 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef HAVE_CONFIG_H +#if defined (emacs) || defined (CONFIG_BROKETS) +/* We use instead of "config.h" so that a compilation + using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h + (which it would do because it found this file in $srcdir). */ +#include +#else +#include "config.h" +#endif +#endif + +#ifndef __STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +#ifndef const +#define const +#endif +#endif + +/* This tells Alpha OSF/1 not to define a getopt prototype in . */ +#ifndef _NO_PROTO +#define _NO_PROTO +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined (_LIBC) || !defined (__GNU_LIBRARY__) + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +#include +#endif /* GNU C library. */ + +/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a + long-named option. Because this is not POSIX.2 compliant, it is + being phased out. */ +/* #define GETOPT_COMPAT */ + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = 0; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* XXX 1003.2 says this must be 1 before any call. */ +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +#include +#define my_index strchr +#else + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +char *getenv (); + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. + (Supposedly there are some machines where it might get a warning, + but changing this conditional to __STDC__ is too risky.) */ +#ifdef __GNUC__ +#ifdef IN_GCC +#include "gstddef.h" +#else +#include +#endif +extern size_t strlen (const char *); +#endif + +#endif /* GNU C library. */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + int option_index; + + optarg = 0; + + /* Initialize the internal data when the first call is made. + Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + if (optind == 0) + { + first_nonopt = last_nonopt = optind = 1; + + nextchar = NULL; + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (getenv ("POSIXLY_CORRECT") != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + } + + if (nextchar == NULL || *nextchar == '\0') + { + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Now skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + optind++; + last_nonopt = optind; + } + + /* Special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if ((argv[optind][0] != '-' || argv[optind][1] == '\0') +#ifdef GETOPT_COMPAT + && (longopts == NULL + || argv[optind][0] != '+' || argv[optind][1] == '\0') +#endif /* GETOPT_COMPAT */ + ) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Start decoding its characters. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + if (longopts != NULL + && ((argv[optind][0] == '-' + && (argv[optind][1] == '-' || long_only)) +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + )) + { + const struct option *p; + char *s = nextchar; + int exact = 0; + int ambig = 0; + const struct option *pfound = NULL; + int indfound; + + while (*s && *s != '=') + s++; + + /* Test all options for either exact match or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; + p++, option_index++) + if (!strncmp (p->name, nextchar, s - nextchar)) + { + if (s - nextchar == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, "%s: option `%s' is ambiguous\n", + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*s) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = s + 1; + else + { + if (opterr) + { + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + "%s: option `--%s' doesn't allow an argument\n", + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + "%s: option `%c%s' doesn't allow an argument\n", + argv[0], argv[optind - 1][0], pfound->name); + } + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, "%s: option `%s' requires an argument\n", + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' +#ifdef GETOPT_COMPAT + || argv[optind][0] == '+' +#endif /* GETOPT_COMPAT */ + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, "%s: unrecognized option `--%s'\n", + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, "%s: unrecognized option `%c%s'\n", + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + return '?'; + } + } + + /* Look at and handle the next option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { +#if 0 + if (c < 040 || c >= 0177) + fprintf (stderr, "%s: unrecognized option, character code 0%o\n", + argv[0], c); + else + fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c); +#else + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c); +#endif + } + optopt = c; + return '?'; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = 0; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { +#if 0 + fprintf (stderr, "%s: option `-%c' requires an argument\n", + argv[0], c); +#else + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, "%s: option requires an argument -- %c\n", + argv[0], c); +#endif + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* _LIBC or not __GNU_LIBRARY__. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/ptx/getopt.h b/gnu/usr.bin/ptx/getopt.h new file mode 100644 index 0000000..45541f5 --- /dev/null +++ b/gnu/usr.bin/ptx/getopt.h @@ -0,0 +1,129 @@ +/* Declarations for getopt. + Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _GETOPT_H +#define _GETOPT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +#if __STDC__ + const char *name; +#else + char *name; +#endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +#if __STDC__ +#if defined(__GNU_LIBRARY__) +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int argc, char *const *argv, const char *shortopts); +#else /* not __GNU_LIBRARY__ */ +extern int getopt (); +#endif /* not __GNU_LIBRARY__ */ +extern int getopt_long (int argc, char *const *argv, const char *shortopts, + const struct option *longopts, int *longind); +extern int getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind, + int long_only); +#else /* not __STDC__ */ +extern int getopt (); +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +#endif /* not __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _GETOPT_H */ diff --git a/gnu/usr.bin/ptx/getopt1.c b/gnu/usr.bin/ptx/getopt1.c new file mode 100644 index 0000000..f784b57 --- /dev/null +++ b/gnu/usr.bin/ptx/getopt1.c @@ -0,0 +1,187 @@ +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987, 88, 89, 90, 91, 92, 1993 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef HAVE_CONFIG_H +#if defined (emacs) || defined (CONFIG_BROKETS) +/* We use instead of "config.h" so that a compilation + using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h + (which it would do because it found this file in $srcdir). */ +#include +#else +#include "config.h" +#endif +#endif + +#include "getopt.h" + +#ifndef __STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +#ifndef const +#define const +#endif +#endif + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined (_LIBC) || !defined (__GNU_LIBRARY__) + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include +#else +char *getenv (); +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + + +#endif /* _LIBC or not __GNU_LIBRARY__. */ + +#ifdef TEST + +#include + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == EOF) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/ptx/mkinstalldirs b/gnu/usr.bin/ptx/mkinstalldirs new file mode 100755 index 0000000..0e29377 --- /dev/null +++ b/gnu/usr.bin/ptx/mkinstalldirs @@ -0,0 +1,35 @@ +#!/bin/sh +# Make directory hierarchy. +# Written by Noah Friedman +# Public domain. + +defaultIFS=' +' +IFS="${IFS-${defaultIFS}}" + +errstatus=0 + +for file in ${1+"$@"} ; do + oIFS="${IFS}" + # Some sh's can't handle IFS=/ for some reason. + IFS='%' + set - `echo ${file} | sed -e 's@/@%@g' -e 's@^%@/@'` + IFS="${oIFS}" + + pathcomp='' + + for d in ${1+"$@"} ; do + pathcomp="${pathcomp}${d}" + + if test ! -d "${pathcomp}"; then + echo "mkdir $pathcomp" 1>&2 + mkdir "${pathcomp}" || errstatus=$? + fi + + pathcomp="${pathcomp}/" + done +done + +exit $errstatus + +# eof diff --git a/gnu/usr.bin/ptx/ptx.c b/gnu/usr.bin/ptx/ptx.c new file mode 100644 index 0000000..2dc306e --- /dev/null +++ b/gnu/usr.bin/ptx/ptx.c @@ -0,0 +1,2237 @@ +/* Permuted index for GNU, with keywords in their context. + Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc. + Francois Pinard , 1988. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +const char *version_string = "GNU ptx version 0.3"; + +char *const copyright = "\ +This program is free software; you can redistribute it and/or modify\n\ +it under the terms of the GNU General Public License as published by\n\ +the Free Software Foundation; either version 2, or (at your option)\n\ +any later version.\n\ +\n\ +This program is distributed in the hope that it will be useful,\n\ +but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ +GNU General Public License for more details.\n\ +\n\ +You should have received a copy of the GNU General Public License\n\ +along with this program; if not, write to the Free Software\n\ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"; + +/* Reallocation step when swallowing non regular files. The value is not + the actual reallocation step, but its base two logarithm. */ +#define SWALLOW_REALLOC_LOG 12 + +/* Imported from "regex.c". */ +#define Sword 1 + +#ifdef STDC_HEADERS + +#include +#include + +#else /* not STDC_HEADERS */ + +/* These definitions work, for all 256 characters. */ +#define isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') +#define isxdigit(c) \ + (((unsigned char) (c) >= 'a' && (unsigned char) (c) <= 'f') \ + || ((unsigned char) (c) >= 'A' && (unsigned char) (c) <= 'F') \ + || ((unsigned char) (c) >= '0' && (unsigned char) (c) <= '9')) +#define islower(c) ((unsigned char) (c) >= 'a' && (unsigned char) (c) <= 'z') +#define isupper(c) ((unsigned char) (c) >= 'A' && (unsigned char) (c) <= 'Z') +#define isalpha(c) (islower (c) || isupper (c)) +#define toupper(c) (islower (c) ? (c) - 'a' + 'A' : (c)) + +#endif /* not STDC_HEADERS */ + +#if !defined (isascii) || defined (STDC_HEADERS) +#undef isascii +#define isascii(c) 1 +#endif + +#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) +#define ISODIGIT(c) ((c) >= '0' && (c) <= '7') +#define HEXTOBIN(c) ((c)>='a'&&(c)<='f' ? (c)-'a'+10 : (c)>='A'&&(c)<='F' ? (c)-'A'+10 : (c)-'0') +#define OCTTOBIN(c) ((c) - '0') + +#include +#include +#include +#include + +#if !defined(S_ISREG) && defined(S_IFREG) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif + +#ifdef HAVE_STRING_H +#include +#else /* not HAVE_STRING_H */ +#include +#define strchr index +#define strrchr rindex +#endif /* not HAVE_STRING_H */ + +#include "getopt.h" + +#include +#ifndef errno +extern int errno; +#endif + +#include "bumpalloc.h" +#include "diacrit.h" +#include "regex.h" + +#ifndef __STDC__ +void *xmalloc (); +void *xrealloc (); +#else +void *xmalloc (int); +void *xrealloc (void *, int); +#endif + + +/* Global definitions. */ + +const char *program_name; /* name of this program */ +static int show_help = 0; /* display usage information and exit */ +static int show_version = 0; /* print the version and exit */ + +/* Program options. */ + +enum Format +{ + DUMB_FORMAT, /* output for a dumb terminal */ + ROFF_FORMAT, /* output for `troff' or `nroff' */ + TEX_FORMAT, /* output for `TeX' or `LaTeX' */ + UNKNOWN_FORMAT /* output format still unknown */ +}; + +int gnu_extensions = 1; /* trigger all GNU extensions */ +int auto_reference = 0; /* references are `file_name:line_number:' */ +int input_reference = 0; /* references at beginning of input lines */ +int right_reference = 0; /* output references after right context */ +int line_width = 72; /* output line width in characters */ +int gap_size = 3; /* number of spaces between output fields */ +const char *truncation_string = "/"; + /* string used to mark line truncations */ +const char *macro_name = "xx"; /* macro name for roff or TeX output */ +enum Format output_format = UNKNOWN_FORMAT; + /* output format */ + +int ignore_case = 0; /* fold lower to upper case for sorting */ +const char *context_regex_string = NULL; + /* raw regex for end of context */ +const char *word_regex_string = NULL; + /* raw regex for a keyword */ +const char *break_file = NULL; /* name of the `Break characters' file */ +const char *only_file = NULL; /* name of the `Only words' file */ +const char *ignore_file = NULL; /* name of the `Ignore words' file */ + +/* A BLOCK delimit a region in memory of arbitrary size, like the copy of a + whole file. A WORD is something smaller, its length should fit in a + short integer. A WORD_TABLE may contain several WORDs. */ + +typedef struct + { + char *start; /* pointer to beginning of region */ + char *end; /* pointer to end + 1 of region */ + } +BLOCK; + +typedef struct + { + char *start; /* pointer to beginning of region */ + short size; /* length of the region */ + } +WORD; + +typedef struct + { + WORD *start; /* array of WORDs */ + size_t length; /* number of entries */ + } +WORD_TABLE; + +/* Pattern description tables. */ + +/* For each character, provide its folded equivalent. */ +unsigned char folded_chars[CHAR_SET_SIZE]; + +/* For each character, indicate if it is part of a word. */ +char syntax_table[CHAR_SET_SIZE]; +char *re_syntax_table = syntax_table; + +/* Compiled regex for end of context. */ +struct re_pattern_buffer *context_regex; + +/* End of context pattern register indices. */ +struct re_registers context_regs; + +/* Compiled regex for a keyword. */ +struct re_pattern_buffer *word_regex; + +/* Keyword pattern register indices. */ +struct re_registers word_regs; + +/* A word characters fastmap is used only when no word regexp has been + provided. A word is then made up of a sequence of one or more characters + allowed by the fastmap. Contains !0 if character allowed in word. Not + only this is faster in most cases, but it simplifies the implementation + of the Break files. */ +char word_fastmap[CHAR_SET_SIZE]; + +/* Maximum length of any word read. */ +int maximum_word_length; + +/* Maximum width of any reference used. */ +int reference_max_width; + + +/* Ignore and Only word tables. */ + +WORD_TABLE ignore_table; /* table of words to ignore */ +WORD_TABLE only_table; /* table of words to select */ + +#define ALLOC_NEW_WORD(table) \ + BUMP_ALLOC ((table)->start, (table)->length, 8, WORD) + +/* Source text table, and scanning macros. */ + +int number_input_files; /* number of text input files */ +int total_line_count; /* total number of lines seen so far */ +const char **input_file_name; /* array of text input file names */ +int *file_line_count; /* array of `total_line_count' values at end */ + +BLOCK text_buffer; /* file to study */ +char *text_buffer_maxend; /* allocated end of text_buffer */ + +/* SKIP_NON_WHITE used only for getting or skipping the reference. */ + +#define SKIP_NON_WHITE(cursor, limit) \ + while (cursor < limit && !isspace(*cursor)) \ + cursor++ + +#define SKIP_WHITE(cursor, limit) \ + while (cursor < limit && isspace(*cursor)) \ + cursor++ + +#define SKIP_WHITE_BACKWARDS(cursor, start) \ + while (cursor > start && isspace(cursor[-1])) \ + cursor-- + +#define SKIP_SOMETHING(cursor, limit) \ + do \ + if (word_regex_string) \ + { \ + int count; \ + count = re_match (word_regex, cursor, limit - cursor, 0, NULL); \ + cursor += count <= 0 ? 1 : count; \ + } \ + else if (word_fastmap[(unsigned char) *cursor]) \ + while (cursor < limit && word_fastmap[(unsigned char) *cursor]) \ + cursor++; \ + else \ + cursor++; \ + while (0) + +/* Occurrences table. + + The `keyword' pointer provides the central word, which is surrounded + by a left context and a right context. The `keyword' and `length' + field allow full 8-bit characters keys, even including NULs. At other + places in this program, the name `keyafter' refers to the keyword + followed by its right context. + + The left context does not extend, towards the beginning of the file, + further than a distance given by the `left' value. This value is + relative to the keyword beginning, it is usually negative. This + insures that, except for white space, we will never have to backward + scan the source text, when it is time to generate the final output + lines. + + The right context, indirectly attainable through the keyword end, does + not extend, towards the end of the file, further than a distance given + by the `right' value. This value is relative to the keyword + beginning, it is usually positive. + + When automatic references are used, the `reference' value is the + overall line number in all input files read so far, in this case, it + is of type (int). When input references are used, the `reference' + value indicates the distance between the keyword beginning and the + start of the reference field, it is of type (DELTA) and usually + negative. */ + +typedef short DELTA; /* to hold displacement within one context */ + +typedef struct + { + WORD key; /* description of the keyword */ + DELTA left; /* distance to left context start */ + DELTA right; /* distance to right context end */ + int reference; /* reference descriptor */ + } +OCCURS; + +/* The various OCCURS tables are indexed by the language. But the time + being, there is no such multiple language support. */ + +OCCURS *occurs_table[1]; /* all words retained from the read text */ +size_t number_of_occurs[1]; /* number of used slots in occurs_table */ + +#define ALLOC_NEW_OCCURS(language) \ + BUMP_ALLOC (occurs_table[language], number_of_occurs[language], 9, OCCURS) + + +/* Communication among output routines. */ + +/* Indicate if special output processing is requested for each character. */ +char edited_flag[CHAR_SET_SIZE]; + +int half_line_width; /* half of line width, reference excluded */ +int before_max_width; /* maximum width of before field */ +int keyafter_max_width; /* maximum width of keyword-and-after field */ +int truncation_string_length; /* length of string used to flag truncation */ + +/* When context is limited by lines, wraparound may happen on final output: + the `head' pointer gives access to some supplementary left context which + will be seen at the end of the output line, the `tail' pointer gives + access to some supplementary right context which will be seen at the + beginning of the output line. */ + +BLOCK tail; /* tail field */ +int tail_truncation; /* flag truncation after the tail field */ + +BLOCK before; /* before field */ +int before_truncation; /* flag truncation before the before field */ + +BLOCK keyafter; /* keyword-and-after field */ +int keyafter_truncation; /* flag truncation after the keyafter field */ + +BLOCK head; /* head field */ +int head_truncation; /* flag truncation before the head field */ + +BLOCK reference; /* reference field for input reference mode */ + + +/* Miscellaneous routines. */ + +/*------------------------------------------------------. +| Duplicate string STRING, while evaluating \-escapes. | +`------------------------------------------------------*/ + +/* Loosely adapted from GNU shellutils printf.c code. */ + +char * +copy_unescaped_string (const char *string) +{ + char *result; /* allocated result */ + char *cursor; /* cursor in result */ + int value; /* value of \nnn escape */ + int length; /* length of \nnn escape */ + + result = xmalloc (strlen (string) + 1); + cursor = result; + + while (*string) + if (*string == '\\') + { + string++; + switch (*string) + { + case 'x': /* \xhhh escape, 3 chars maximum */ + value = 0; + for (length = 0, string++; + length < 3 && ISXDIGIT (*string); + length++, string++) + value = value * 16 + HEXTOBIN (*string); + if (length == 0) + { + *cursor++ = '\\'; + *cursor++ = 'x'; + } + else + *cursor++ = value; + break; + + case '0': /* \0ooo escape, 3 chars maximum */ + value = 0; + for (length = 0, string++; + length < 3 && ISODIGIT (*string); + length++, string++) + value = value * 8 + OCTTOBIN (*string); + *cursor++ = value; + break; + + case 'a': /* alert */ +#if __STDC__ + *cursor++ = '\a'; +#else + *cursor++ = 7; +#endif + string++; + break; + + case 'b': /* backspace */ + *cursor++ = '\b'; + string++; + break; + + case 'c': /* cancel the rest of the output */ + while (*string) + string++; + break; + + case 'f': /* form feed */ + *cursor++ = '\f'; + string++; + break; + + case 'n': /* new line */ + *cursor++ = '\n'; + string++; + break; + + case 'r': /* carriage return */ + *cursor++ = '\r'; + string++; + break; + + case 't': /* horizontal tab */ + *cursor++ = '\t'; + string++; + break; + + case 'v': /* vertical tab */ +#if __STDC__ + *cursor++ = '\v'; +#else + *cursor++ = 11; +#endif + string++; + break; + + default: + *cursor++ = '\\'; + *cursor++ = *string++; + break; + } + } + else + *cursor++ = *string++; + + *cursor = '\0'; + return result; +} + +/*-------------------------------------------------------------------. +| Compile the regex represented by STRING, diagnose and abort if any | +| error. Returns the compiled regex structure. | +`-------------------------------------------------------------------*/ + +struct re_pattern_buffer * +alloc_and_compile_regex (const char *string) +{ + struct re_pattern_buffer *pattern; /* newly allocated structure */ + const char *message; /* error message returned by regex.c */ + + pattern = (struct re_pattern_buffer *) + xmalloc (sizeof (struct re_pattern_buffer)); + memset (pattern, 0, sizeof (struct re_pattern_buffer)); + + pattern->buffer = NULL; + pattern->allocated = 0; + pattern->translate = ignore_case ? (char *) folded_chars : NULL; + pattern->fastmap = (char *) xmalloc (CHAR_SET_SIZE); + + message = re_compile_pattern (string, strlen (string), pattern); + if (message) + error (1, 0, "%s (for regexp `%s')", message, string); + + /* The fastmap should be compiled before `re_match'. The following + call is not mandatory, because `re_search' is always called sooner, + and it compiles the fastmap if this has not been done yet. */ + + re_compile_fastmap (pattern); + + /* Do not waste extra allocated space. */ + + if (pattern->allocated > pattern->used) + { + pattern->buffer + = (unsigned char *) xrealloc (pattern->buffer, pattern->used); + pattern->allocated = pattern->used; + } + + return pattern; +} + +/*------------------------------------------------------------------------. +| This will initialize various tables for pattern match and compiles some | +| regexps. | +`------------------------------------------------------------------------*/ + +void +initialize_regex (void) +{ + int character; /* character value */ + + /* Initialize the regex syntax table. */ + + for (character = 0; character < CHAR_SET_SIZE; character++) + syntax_table[character] = isalpha (character) ? Sword : 0; + + /* Initialize the case folding table. */ + + if (ignore_case) + for (character = 0; character < CHAR_SET_SIZE; character++) + folded_chars[character] = toupper (character); + + /* Unless the user already provided a description of the end of line or + end of sentence sequence, select an end of line sequence to compile. + If the user provided an empty definition, thus disabling end of line + or sentence feature, make it NULL to speed up tests. If GNU + extensions are enabled, use end of sentence like in GNU emacs. If + disabled, use end of lines. */ + + if (context_regex_string) + { + if (!*context_regex_string) + context_regex_string = NULL; + } + else if (gnu_extensions && !input_reference) + context_regex_string = "[.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]*"; + else + context_regex_string = "\n"; + + if (context_regex_string) + context_regex = alloc_and_compile_regex (context_regex_string); + + /* If the user has already provided a non-empty regexp to describe + words, compile it. Else, unless this has already been done through + a user provided Break character file, construct a fastmap of + characters that may appear in a word. If GNU extensions enabled, + include only letters of the underlying character set. If disabled, + include almost everything, even punctuations; stop only on white + space. */ + + if (word_regex_string && *word_regex_string) + word_regex = alloc_and_compile_regex (word_regex_string); + else if (!break_file) + if (gnu_extensions) + { + + /* Simulate \w+. */ + + for (character = 0; character < CHAR_SET_SIZE; character++) + word_fastmap[character] = isalpha (character); + } + else + { + + /* Simulate [^ \t\n]+. */ + + memset (word_fastmap, 1, CHAR_SET_SIZE); + word_fastmap[' '] = 0; + word_fastmap['\t'] = 0; + word_fastmap['\n'] = 0; + } +} + +/*------------------------------------------------------------------------. +| This routine will attempt to swallow a whole file name FILE_NAME into a | +| contiguous region of memory and return a description of it into BLOCK. | +| Standard input is assumed whenever FILE_NAME is NULL, empty or "-". | +| | +| Previously, in some cases, white space compression was attempted while | +| inputting text. This was defeating some regexps like default end of | +| sentence, which checks for two consecutive spaces. If white space | +| compression is ever reinstated, it should be in output routines. | +`------------------------------------------------------------------------*/ + +void +swallow_file_in_memory (const char *file_name, BLOCK *block) +{ + int file_handle; /* file descriptor number */ + struct stat stat_block; /* stat block for file */ + int allocated_length; /* allocated length of memory buffer */ + int used_length; /* used length in memory buffer */ + int read_length; /* number of character gotten on last read */ + + /* As special cases, a file name which is NULL or "-" indicates standard + input, which is already opened. In all other cases, open the file from + its name. */ + + if (!file_name || !*file_name || strcmp (file_name, "-") == 0) + file_handle = fileno (stdin); + else + if ((file_handle = open (file_name, O_RDONLY)) < 0) + error (1, errno, file_name); + + /* If the file is a plain, regular file, allocate the memory buffer all at + once and swallow the file in one blow. In other cases, read the file + repeatedly in smaller chunks until we have it all, reallocating memory + once in a while, as we go. */ + + if (fstat (file_handle, &stat_block) < 0) + error (1, errno, file_name); + + if (S_ISREG (stat_block.st_mode)) + { + block->start = (char *) xmalloc ((int) stat_block.st_size); + + if (read (file_handle, block->start, (int) stat_block.st_size) + != stat_block.st_size) + error (1, errno, file_name); + + block->end = block->start + stat_block.st_size; + } + else + { + block->start = (char *) xmalloc (1 << SWALLOW_REALLOC_LOG); + used_length = 0; + allocated_length = (1 << SWALLOW_REALLOC_LOG); + + while ((read_length = read (file_handle, + block->start + used_length, + allocated_length - used_length)) > 0) + { + used_length += read_length; + if (used_length == allocated_length) + { + allocated_length += (1 << SWALLOW_REALLOC_LOG); + block->start + = (char *) xrealloc (block->start, allocated_length); + } + } + + if (read_length < 0) + error (1, errno, file_name); + + block->end = block->start + used_length; + } + + /* Close the file, but only if it was not the standard input. */ + + if (file_handle != fileno (stdin)) + close (file_handle); +} + +/* Sort and search routines. */ + +/*--------------------------------------------------------------------------. +| Compare two words, FIRST and SECOND, and return 0 if they are identical. | +| Return less than 0 if the first word goes before the second; return | +| greater than 0 if the first word goes after the second. | +| | +| If a word is indeed a prefix of the other, the shorter should go first. | +`--------------------------------------------------------------------------*/ + +int +compare_words (const void *void_first, const void *void_second) +{ +#define first ((WORD *) void_first) +#define second ((WORD *) void_second) + int length; /* minimum of two lengths */ + int counter; /* cursor in words */ + int value; /* value of comparison */ + + length = first->size < second->size ? first->size : second->size; + + if (ignore_case) + { + for (counter = 0; counter < length; counter++) + { + value = (folded_chars [(unsigned char) (first->start[counter])] + - folded_chars [(unsigned char) (second->start[counter])]); + if (value != 0) + return value; + } + } + else + { + for (counter = 0; counter < length; counter++) + { + value = ((unsigned char) first->start[counter] + - (unsigned char) second->start[counter]); + if (value != 0) + return value; + } + } + + return first->size - second->size; +#undef first +#undef second +} + +/*-----------------------------------------------------------------------. +| Decides which of two OCCURS, FIRST or SECOND, should lexicographically | +| go first. In case of a tie, preserve the original order through a | +| pointer comparison. | +`-----------------------------------------------------------------------*/ + +int +compare_occurs (const void *void_first, const void *void_second) +{ +#define first ((OCCURS *) void_first) +#define second ((OCCURS *) void_second) + int value; + + value = compare_words (&first->key, &second->key); + return value == 0 ? first->key.start - second->key.start : value; +#undef first +#undef second +} + +/*------------------------------------------------------------. +| Return !0 if WORD appears in TABLE. Uses a binary search. | +`------------------------------------------------------------*/ + +int +search_table (WORD *word, WORD_TABLE *table) +{ + int lowest; /* current lowest possible index */ + int highest; /* current highest possible index */ + int middle; /* current middle index */ + int value; /* value from last comparison */ + + lowest = 0; + highest = table->length - 1; + while (lowest <= highest) + { + middle = (lowest + highest) / 2; + value = compare_words (word, table->start + middle); + if (value < 0) + highest = middle - 1; + else if (value > 0) + lowest = middle + 1; + else + return 1; + } + return 0; +} + +/*---------------------------------------------------------------------. +| Sort the whole occurs table in memory. Presumably, `qsort' does not | +| take intermediate copies or table elements, so the sort will be | +| stabilized throughout the comparison routine. | +`---------------------------------------------------------------------*/ + +void +sort_found_occurs (void) +{ + + /* Only one language for the time being. */ + + qsort (occurs_table[0], number_of_occurs[0], sizeof (OCCURS), + compare_occurs); +} + +/* Parameter files reading routines. */ + +/*----------------------------------------------------------------------. +| Read a file named FILE_NAME, containing a set of break characters. | +| Build a content to the array word_fastmap in which all characters are | +| allowed except those found in the file. Characters may be repeated. | +`----------------------------------------------------------------------*/ + +void +digest_break_file (const char *file_name) +{ + BLOCK file_contents; /* to receive a copy of the file */ + char *cursor; /* cursor in file copy */ + + swallow_file_in_memory (file_name, &file_contents); + + /* Make the fastmap and record the file contents in it. */ + + memset (word_fastmap, 1, CHAR_SET_SIZE); + for (cursor = file_contents.start; cursor < file_contents.end; cursor++) + word_fastmap[(unsigned char) *cursor] = 0; + + if (!gnu_extensions) + { + + /* If GNU extensions are enabled, the only way to avoid newline as + a break character is to write all the break characters in the + file with no newline at all, not even at the end of the file. + If disabled, spaces, tabs and newlines are always considered as + break characters even if not included in the break file. */ + + word_fastmap[' '] = 0; + word_fastmap['\t'] = 0; + word_fastmap['\n'] = 0; + } + + /* Return the space of the file, which is no more required. */ + + free (file_contents.start); +} + +/*-----------------------------------------------------------------------. +| Read a file named FILE_NAME, containing one word per line, then | +| construct in TABLE a table of WORD descriptors for them. The routine | +| swallows the whole file in memory; this is at the expense of space | +| needed for newlines, which are useless; however, the reading is fast. | +`-----------------------------------------------------------------------*/ + +void +digest_word_file (const char *file_name, WORD_TABLE *table) +{ + BLOCK file_contents; /* to receive a copy of the file */ + char *cursor; /* cursor in file copy */ + char *word_start; /* start of the current word */ + + swallow_file_in_memory (file_name, &file_contents); + + table->start = NULL; + table->length = 0; + + /* Read the whole file. */ + + cursor = file_contents.start; + while (cursor < file_contents.end) + { + + /* Read one line, and save the word in contains. */ + + word_start = cursor; + while (cursor < file_contents.end && *cursor != '\n') + cursor++; + + /* Record the word in table if it is not empty. */ + + if (cursor > word_start) + { + ALLOC_NEW_WORD (table); + table->start[table->length].start = word_start; + table->start[table->length].size = cursor - word_start; + table->length++; + } + + /* This test allows for an incomplete line at end of file. */ + + if (cursor < file_contents.end) + cursor++; + } + + /* Finally, sort all the words read. */ + + qsort (table->start, table->length, (size_t) sizeof (WORD), compare_words); +} + + +/* Keyword recognition and selection. */ + +/*----------------------------------------------------------------------. +| For each keyword in the source text, constructs an OCCURS structure. | +`----------------------------------------------------------------------*/ + +void +find_occurs_in_text (void) +{ + char *cursor; /* for scanning the source text */ + char *scan; /* for scanning the source text also */ + char *line_start; /* start of the current input line */ + char *line_scan; /* newlines scanned until this point */ + int reference_length; /* length of reference in input mode */ + WORD possible_key; /* possible key, to ease searches */ + OCCURS *occurs_cursor; /* current OCCURS under construction */ + + char *context_start; /* start of left context */ + char *context_end; /* end of right context */ + char *word_start; /* start of word */ + char *word_end; /* end of word */ + char *next_context_start; /* next start of left context */ + + /* reference_length is always used within `if (input_reference)'. + However, GNU C diagnoses that it may be used uninitialized. The + following assignment is merely to shut it up. */ + + reference_length = 0; + + /* Tracking where lines start is helpful for reference processing. In + auto reference mode, this allows counting lines. In input reference + mode, this permits finding the beginning of the references. + + The first line begins with the file, skip immediately this very first + reference in input reference mode, to help further rejection any word + found inside it. Also, unconditionally assigning these variable has + the happy effect of shutting up lint. */ + + line_start = text_buffer.start; + line_scan = line_start; + if (input_reference) + { + SKIP_NON_WHITE (line_scan, text_buffer.end); + reference_length = line_scan - line_start; + SKIP_WHITE (line_scan, text_buffer.end); + } + + /* Process the whole buffer, one line or one sentence at a time. */ + + for (cursor = text_buffer.start; + cursor < text_buffer.end; + cursor = next_context_start) + { + + /* `context_start' gets initialized before the processing of each + line, or once for the whole buffer if no end of line or sentence + sequence separator. */ + + context_start = cursor; + + /* If a end of line or end of sentence sequence is defined and + non-empty, `next_context_start' will be recomputed to be the end of + each line or sentence, before each one is processed. If no such + sequence, then `next_context_start' is set at the end of the whole + buffer, which is then considered to be a single line or sentence. + This test also accounts for the case of an incomplete line or + sentence at the end of the buffer. */ + + if (context_regex_string + && (re_search (context_regex, cursor, text_buffer.end - cursor, + 0, text_buffer.end - cursor, &context_regs) + >= 0)) + next_context_start = cursor + context_regs.end[0]; + + else + next_context_start = text_buffer.end; + + /* Include the separator into the right context, but not any suffix + white space in this separator; this insures it will be seen in + output and will not take more space than necessary. */ + + context_end = next_context_start; + SKIP_WHITE_BACKWARDS (context_end, context_start); + + /* Read and process a single input line or sentence, one word at a + time. */ + + while (1) + { + if (word_regex) + + /* If a word regexp has been compiled, use it to skip at the + beginning of the next word. If there is no such word, exit + the loop. */ + + { + if (re_search (word_regex, cursor, context_end - cursor, + 0, context_end - cursor, &word_regs) + < 0) + break; + word_start = cursor + word_regs.start[0]; + word_end = cursor + word_regs.end[0]; + } + else + + /* Avoid re_search and use the fastmap to skip to the + beginning of the next word. If there is no more word in + the buffer, exit the loop. */ + + { + scan = cursor; + while (scan < context_end + && !word_fastmap[(unsigned char) *scan]) + scan++; + + if (scan == context_end) + break; + + word_start = scan; + + while (scan < context_end + && word_fastmap[(unsigned char) *scan]) + scan++; + + word_end = scan; + } + + /* Skip right to the beginning of the found word. */ + + cursor = word_start; + + /* Skip any zero length word. Just advance a single position, + then go fetch the next word. */ + + if (word_end == word_start) + { + cursor++; + continue; + } + + /* This is a genuine, non empty word, so save it as a possible + key. Then skip over it. Also, maintain the maximum length of + all words read so far. It is mandatory to take the maximum + length of all words in the file, without considering if they + are actually kept or rejected, because backward jumps at output + generation time may fall in *any* word. */ + + possible_key.start = cursor; + possible_key.size = word_end - word_start; + cursor += possible_key.size; + + if (possible_key.size > maximum_word_length) + maximum_word_length = possible_key.size; + + /* In input reference mode, update `line_start' from its previous + value. Count the lines just in case auto reference mode is + also selected. If it happens that the word just matched is + indeed part of a reference; just ignore it. */ + + if (input_reference) + { + while (line_scan < possible_key.start) + if (*line_scan == '\n') + { + total_line_count++; + line_scan++; + line_start = line_scan; + SKIP_NON_WHITE (line_scan, text_buffer.end); + reference_length = line_scan - line_start; + } + else + line_scan++; + if (line_scan > possible_key.start) + continue; + } + + /* Ignore the word if an `Ignore words' table exists and if it is + part of it. Also ignore the word if an `Only words' table and + if it is *not* part of it. + + It is allowed that both tables be used at once, even if this + may look strange for now. Just ignore a word that would appear + in both. If regexps are eventually implemented for these + tables, the Ignore table could then reject words that would + have been previously accepted by the Only table. */ + + if (ignore_file && search_table (&possible_key, &ignore_table)) + continue; + if (only_file && !search_table (&possible_key, &only_table)) + continue; + + /* A non-empty word has been found. First of all, insure + proper allocation of the next OCCURS, and make a pointer to + where it will be constructed. */ + + ALLOC_NEW_OCCURS (0); + occurs_cursor = occurs_table[0] + number_of_occurs[0]; + + /* Define the refence field, if any. */ + + if (auto_reference) + { + + /* While auto referencing, update `line_start' from its + previous value, counting lines as we go. If input + referencing at the same time, `line_start' has been + advanced earlier, and the following loop is never really + executed. */ + + while (line_scan < possible_key.start) + if (*line_scan == '\n') + { + total_line_count++; + line_scan++; + line_start = line_scan; + SKIP_NON_WHITE (line_scan, text_buffer.end); + } + else + line_scan++; + + occurs_cursor->reference = total_line_count; + } + else if (input_reference) + { + + /* If only input referencing, `line_start' has been computed + earlier to detect the case the word matched would be part + of the reference. The reference position is simply the + value of `line_start'. */ + + occurs_cursor->reference + = (DELTA) (line_start - possible_key.start); + if (reference_length > reference_max_width) + reference_max_width = reference_length; + } + + /* Exclude the reference from the context in simple cases. */ + + if (input_reference && line_start == context_start) + { + SKIP_NON_WHITE (context_start, context_end); + SKIP_WHITE (context_start, context_end); + } + + /* Completes the OCCURS structure. */ + + occurs_cursor->key = possible_key; + occurs_cursor->left = context_start - possible_key.start; + occurs_cursor->right = context_end - possible_key.start; + + number_of_occurs[0]++; + } + } +} + +/* Formatting and actual output - service routines. */ + +/*-----------------------------------------. +| Prints some NUMBER of spaces on stdout. | +`-----------------------------------------*/ + +void +print_spaces (int number) +{ + int counter; + + for (counter = number; counter > 0; counter--) + putchar (' '); +} + +/*-------------------------------------. +| Prints the field provided by FIELD. | +`-------------------------------------*/ + +void +print_field (BLOCK field) +{ + char *cursor; /* Cursor in field to print */ + int character; /* Current character */ + int base; /* Base character, without diacritic */ + int diacritic; /* Diacritic code for the character */ + + /* Whitespace is not really compressed. Instead, each white space + character (tab, vt, ht etc.) is printed as one single space. */ + + for (cursor = field.start; cursor < field.end; cursor++) + { + character = (unsigned char) *cursor; + if (edited_flag[character]) + { + + /* First check if this is a diacriticized character. + + This works only for TeX. I do not know how diacriticized + letters work with `roff'. Please someone explain it to me! */ + + diacritic = todiac (character); + if (diacritic != 0 && output_format == TEX_FORMAT) + { + base = tobase (character); + switch (diacritic) + { + + case 1: /* Latin diphthongs */ + switch (base) + { + case 'o': + printf ("\\oe{}"); + break; + + case 'O': + printf ("\\OE{}"); + break; + + case 'a': + printf ("\\ae{}"); + break; + + case 'A': + printf ("\\AE{}"); + break; + + default: + putchar (' '); + } + break; + + case 2: /* Acute accent */ + printf ("\\'%s%c", (base == 'i' ? "\\" : ""), base); + break; + + case 3: /* Grave accent */ + printf ("\\`%s%c", (base == 'i' ? "\\" : ""), base); + break; + + case 4: /* Circumflex accent */ + printf ("\\^%s%c", (base == 'i' ? "\\" : ""), base); + break; + + case 5: /* Diaeresis */ + printf ("\\\"%s%c", (base == 'i' ? "\\" : ""), base); + break; + + case 6: /* Tilde accent */ + printf ("\\~%s%c", (base == 'i' ? "\\" : ""), base); + break; + + case 7: /* Cedilla */ + printf ("\\c{%c}", base); + break; + + case 8: /* Small circle beneath */ + switch (base) + { + case 'a': + printf ("\\aa{}"); + break; + + case 'A': + printf ("\\AA{}"); + break; + + default: + putchar (' '); + } + break; + + case 9: /* Strike through */ + switch (base) + { + case 'o': + printf ("\\o{}"); + break; + + case 'O': + printf ("\\O{}"); + break; + + default: + putchar (' '); + } + break; + } + } + else + + /* This is not a diacritic character, so handle cases which are + really specific to `roff' or TeX. All white space processing + is done as the default case of this switch. */ + + switch (character) + { + case '"': + /* In roff output format, double any quote. */ + putchar ('"'); + putchar ('"'); + break; + + case '$': + case '%': + case '&': + case '#': + case '_': + /* In TeX output format, precede these with a backslash. */ + putchar ('\\'); + putchar (character); + break; + + case '{': + case '}': + /* In TeX output format, precede these with a backslash and + force mathematical mode. */ + printf ("$\\%c$", character); + break; + + case '\\': + /* In TeX output mode, request production of a backslash. */ + printf ("\\backslash{}"); + break; + + default: + /* Any other flagged character produces a single space. */ + putchar (' '); + } + } + else + putchar (*cursor); + } +} + + +/* Formatting and actual output - planning routines. */ + +/*--------------------------------------------------------------------. +| From information collected from command line options and input file | +| readings, compute and fix some output parameter values. | +`--------------------------------------------------------------------*/ + +void +fix_output_parameters (void) +{ + int file_index; /* index in text input file arrays */ + int line_ordinal; /* line ordinal value for reference */ + char ordinal_string[12]; /* edited line ordinal for reference */ + int reference_width; /* width for the whole reference */ + int character; /* character ordinal */ + const char *cursor; /* cursor in some constant strings */ + + /* In auto reference mode, the maximum width of this field is + precomputed and subtracted from the overall line width. Add one for + the column which separate the file name from the line number. */ + + if (auto_reference) + { + reference_max_width = 0; + for (file_index = 0; file_index < number_input_files; file_index++) + { + line_ordinal = file_line_count[file_index] + 1; + if (file_index > 0) + line_ordinal -= file_line_count[file_index - 1]; + sprintf (ordinal_string, "%d", line_ordinal); + reference_width = strlen (ordinal_string); + if (input_file_name[file_index]) + reference_width += strlen (input_file_name[file_index]); + if (reference_width > reference_max_width) + reference_max_width = reference_width; + } + reference_max_width++; + reference.start = (char *) xmalloc (reference_max_width + 1); + } + + /* If the reference appears to the left of the output line, reserve some + space for it right away, including one gap size. */ + + if ((auto_reference || input_reference) && !right_reference) + line_width -= reference_max_width + gap_size; + + /* The output lines, minimally, will contain from left to right a left + context, a gap, and a keyword followed by the right context with no + special intervening gap. Half of the line width is dedicated to the + left context and the gap, the other half is dedicated to the keyword + and the right context; these values are computed once and for all here. + There also are tail and head wrap around fields, used when the keyword + is near the beginning or the end of the line, or when some long word + cannot fit in, but leave place from wrapped around shorter words. The + maximum width of these fields are recomputed separately for each line, + on a case by case basis. It is worth noting that it cannot happen that + both the tail and head fields are used at once. */ + + half_line_width = line_width / 2; + before_max_width = half_line_width - gap_size; + keyafter_max_width = half_line_width; + + /* If truncation_string is the empty string, make it NULL to speed up + tests. In this case, truncation_string_length will never get used, so + there is no need to set it. */ + + if (truncation_string && *truncation_string) + truncation_string_length = strlen (truncation_string); + else + truncation_string = NULL; + + if (gnu_extensions) + { + + /* When flagging truncation at the left of the keyword, the + truncation mark goes at the beginning of the before field, + unless there is a head field, in which case the mark goes at the + left of the head field. When flagging truncation at the right + of the keyword, the mark goes at the end of the keyafter field, + unless there is a tail field, in which case the mark goes at the + end of the tail field. Only eight combination cases could arise + for truncation marks: + + . None. + . One beginning the before field. + . One beginning the head field. + . One ending the keyafter field. + . One ending the tail field. + . One beginning the before field, another ending the keyafter field. + . One ending the tail field, another beginning the before field. + . One ending the keyafter field, another beginning the head field. + + So, there is at most two truncation marks, which could appear both + on the left side of the center of the output line, both on the + right side, or one on either side. */ + + before_max_width -= 2 * truncation_string_length; + keyafter_max_width -= 2 * truncation_string_length; + } + else + { + + /* I never figured out exactly how UNIX' ptx plans the output width + of its various fields. If GNU extensions are disabled, do not + try computing the field widths correctly; instead, use the + following formula, which does not completely imitate UNIX' ptx, + but almost. */ + + keyafter_max_width -= 2 * truncation_string_length + 1; + } + + /* Compute which characters need special output processing. Initialize + by flagging any white space character. Some systems do not consider + form feed as a space character, but we do. */ + + for (character = 0; character < CHAR_SET_SIZE; character++) + edited_flag[character] = isspace (character); + edited_flag['\f'] = 1; + + /* Complete the special character flagging according to selected output + format. */ + + switch (output_format) + { + case UNKNOWN_FORMAT: + /* Should never happen. */ + + case DUMB_FORMAT: + break; + + case ROFF_FORMAT: + + /* `Quote' characters should be doubled. */ + + edited_flag['"'] = 1; + break; + + case TEX_FORMAT: + + /* Various characters need special processing. */ + + for (cursor = "$%&#_{}\\"; *cursor; cursor++) + edited_flag[*cursor] = 1; + + /* Any character with 8th bit set will print to a single space, unless + it is diacriticized. */ + + for (character = 0200; character < CHAR_SET_SIZE; character++) + edited_flag[character] = todiac (character) != 0; + break; + } +} + +/*------------------------------------------------------------------. +| Compute the position and length of all the output fields, given a | +| pointer to some OCCURS. | +`------------------------------------------------------------------*/ + +void +define_all_fields (OCCURS *occurs) +{ + int tail_max_width; /* allowable width of tail field */ + int head_max_width; /* allowable width of head field */ + char *cursor; /* running cursor in source text */ + char *left_context_start; /* start of left context */ + char *right_context_end; /* end of right context */ + char *left_field_start; /* conservative start for `head'/`before' */ + int file_index; /* index in text input file arrays */ + const char *file_name; /* file name for reference */ + int line_ordinal; /* line ordinal for reference */ + + /* Define `keyafter', start of left context and end of right context. + `keyafter' starts at the saved position for keyword and extend to the + right from the end of the keyword, eating separators or full words, but + not beyond maximum allowed width for `keyafter' field or limit for the + right context. Suffix spaces will be removed afterwards. */ + + keyafter.start = occurs->key.start; + keyafter.end = keyafter.start + occurs->key.size; + left_context_start = keyafter.start + occurs->left; + right_context_end = keyafter.start + occurs->right; + + cursor = keyafter.end; + while (cursor < right_context_end + && cursor <= keyafter.start + keyafter_max_width) + { + keyafter.end = cursor; + SKIP_SOMETHING (cursor, right_context_end); + } + if (cursor <= keyafter.start + keyafter_max_width) + keyafter.end = cursor; + + keyafter_truncation = truncation_string && keyafter.end < right_context_end; + + SKIP_WHITE_BACKWARDS (keyafter.end, keyafter.start); + + /* When the left context is wide, it might take some time to catch up from + the left context boundary to the beginning of the `head' or `before' + fields. So, in this case, to speed the catchup, we jump back from the + keyword, using some secure distance, possibly falling in the middle of + a word. A secure backward jump would be at least half the maximum + width of a line, plus the size of the longest word met in the whole + input. We conclude this backward jump by a skip forward of at least + one word. In this manner, we should not inadvertently accept only part + of a word. From the reached point, when it will be time to fix the + beginning of `head' or `before' fields, we will skip forward words or + delimiters until we get sufficiently near. */ + + if (-occurs->left > half_line_width + maximum_word_length) + { + left_field_start + = keyafter.start - (half_line_width + maximum_word_length); + SKIP_SOMETHING (left_field_start, keyafter.start); + } + else + left_field_start = keyafter.start + occurs->left; + + /* `before' certainly ends at the keyword, but not including separating + spaces. It starts after than the saved value for the left context, by + advancing it until it falls inside the maximum allowed width for the + before field. There will be no prefix spaces either. `before' only + advances by skipping single separators or whole words. */ + + before.start = left_field_start; + before.end = keyafter.start; + SKIP_WHITE_BACKWARDS (before.end, before.start); + + while (before.start + before_max_width < before.end) + SKIP_SOMETHING (before.start, before.end); + + if (truncation_string) + { + cursor = before.start; + SKIP_WHITE_BACKWARDS (cursor, text_buffer.start); + before_truncation = cursor > left_context_start; + } + else + before_truncation = 0; + + SKIP_WHITE (before.start, text_buffer.end); + + /* The tail could not take more columns than what has been left in the + left context field, and a gap is mandatory. It starts after the + right context, and does not contain prefixed spaces. It ends at + the end of line, the end of buffer or when the tail field is full, + whichever comes first. It cannot contain only part of a word, and + has no suffixed spaces. */ + + tail_max_width + = before_max_width - (before.end - before.start) - gap_size; + + if (tail_max_width > 0) + { + tail.start = keyafter.end; + SKIP_WHITE (tail.start, text_buffer.end); + + tail.end = tail.start; + cursor = tail.end; + while (cursor < right_context_end + && cursor < tail.start + tail_max_width) + { + tail.end = cursor; + SKIP_SOMETHING (cursor, right_context_end); + } + + if (cursor < tail.start + tail_max_width) + tail.end = cursor; + + if (tail.end > tail.start) + { + keyafter_truncation = 0; + tail_truncation = truncation_string && tail.end < right_context_end; + } + else + tail_truncation = 0; + + SKIP_WHITE_BACKWARDS (tail.end, tail.start); + } + else + { + + /* No place left for a tail field. */ + + tail.start = NULL; + tail.end = NULL; + tail_truncation = 0; + } + + /* `head' could not take more columns than what has been left in the right + context field, and a gap is mandatory. It ends before the left + context, and does not contain suffixed spaces. Its pointer is advanced + until the head field has shrunk to its allowed width. It cannot + contain only part of a word, and has no suffixed spaces. */ + + head_max_width + = keyafter_max_width - (keyafter.end - keyafter.start) - gap_size; + + if (head_max_width > 0) + { + head.end = before.start; + SKIP_WHITE_BACKWARDS (head.end, text_buffer.start); + + head.start = left_field_start; + while (head.start + head_max_width < head.end) + SKIP_SOMETHING (head.start, head.end); + + if (head.end > head.start) + { + before_truncation = 0; + head_truncation = (truncation_string + && head.start > left_context_start); + } + else + head_truncation = 0; + + SKIP_WHITE (head.start, head.end); + } + else + { + + /* No place left for a head field. */ + + head.start = NULL; + head.end = NULL; + head_truncation = 0; + } + + if (auto_reference) + { + + /* Construct the reference text in preallocated space from the file + name and the line number. Find out in which file the reference + occurred. Standard input yields an empty file name. Insure line + numbers are one based, even if they are computed zero based. */ + + file_index = 0; + while (file_line_count[file_index] < occurs->reference) + file_index++; + + file_name = input_file_name[file_index]; + if (!file_name) + file_name = ""; + + line_ordinal = occurs->reference + 1; + if (file_index > 0) + line_ordinal -= file_line_count[file_index - 1]; + + sprintf (reference.start, "%s:%d", file_name, line_ordinal); + reference.end = reference.start + strlen (reference.start); + } + else if (input_reference) + { + + /* Reference starts at saved position for reference and extends right + until some white space is met. */ + + reference.start = keyafter.start + (DELTA) occurs->reference; + reference.end = reference.start; + SKIP_NON_WHITE (reference.end, right_context_end); + } +} + + +/* Formatting and actual output - control routines. */ + +/*----------------------------------------------------------------------. +| Output the current output fields as one line for `troff' or `nroff'. | +`----------------------------------------------------------------------*/ + +void +output_one_roff_line (void) +{ + /* Output the `tail' field. */ + + printf (".%s \"", macro_name); + print_field (tail); + if (tail_truncation) + printf ("%s", truncation_string); + putchar ('"'); + + /* Output the `before' field. */ + + printf (" \""); + if (before_truncation) + printf ("%s", truncation_string); + print_field (before); + putchar ('"'); + + /* Output the `keyafter' field. */ + + printf (" \""); + print_field (keyafter); + if (keyafter_truncation) + printf ("%s", truncation_string); + putchar ('"'); + + /* Output the `head' field. */ + + printf (" \""); + if (head_truncation) + printf ("%s", truncation_string); + print_field (head); + putchar ('"'); + + /* Conditionally output the `reference' field. */ + + if (auto_reference || input_reference) + { + printf (" \""); + print_field (reference); + putchar ('"'); + } + + putchar ('\n'); +} + +/*---------------------------------------------------------. +| Output the current output fields as one line for `TeX'. | +`---------------------------------------------------------*/ + +void +output_one_tex_line (void) +{ + BLOCK key; /* key field, isolated */ + BLOCK after; /* after field, isolated */ + char *cursor; /* running cursor in source text */ + + printf ("\\%s ", macro_name); + printf ("{"); + print_field (tail); + printf ("}{"); + print_field (before); + printf ("}{"); + key.start = keyafter.start; + after.end = keyafter.end; + cursor = keyafter.start; + SKIP_SOMETHING (cursor, keyafter.end); + key.end = cursor; + after.start = cursor; + print_field (key); + printf ("}{"); + print_field (after); + printf ("}{"); + print_field (head); + printf ("}"); + if (auto_reference || input_reference) + { + printf ("{"); + print_field (reference); + printf ("}"); + } + printf ("\n"); +} + +/*-------------------------------------------------------------------. +| Output the current output fields as one line for a dumb terminal. | +`-------------------------------------------------------------------*/ + +void +output_one_dumb_line (void) +{ + if (!right_reference) + if (auto_reference) + { + + /* Output the `reference' field, in such a way that GNU emacs + next-error will handle it. The ending colon is taken from the + gap which follows. */ + + print_field (reference); + putchar (':'); + print_spaces (reference_max_width + + gap_size + - (reference.end - reference.start) + - 1); + } + else + { + + /* Output the `reference' field and its following gap. */ + + print_field (reference); + print_spaces (reference_max_width + + gap_size + - (reference.end - reference.start)); + } + + if (tail.start < tail.end) + { + /* Output the `tail' field. */ + + print_field (tail); + if (tail_truncation) + printf ("%s", truncation_string); + + print_spaces (half_line_width - gap_size + - (before.end - before.start) + - (before_truncation ? truncation_string_length : 0) + - (tail.end - tail.start) + - (tail_truncation ? truncation_string_length : 0)); + } + else + print_spaces (half_line_width - gap_size + - (before.end - before.start) + - (before_truncation ? truncation_string_length : 0)); + + /* Output the `before' field. */ + + if (before_truncation) + printf ("%s", truncation_string); + print_field (before); + + print_spaces (gap_size); + + /* Output the `keyafter' field. */ + + print_field (keyafter); + if (keyafter_truncation) + printf ("%s", truncation_string); + + if (head.start < head.end) + { + /* Output the `head' field. */ + + print_spaces (half_line_width + - (keyafter.end - keyafter.start) + - (keyafter_truncation ? truncation_string_length : 0) + - (head.end - head.start) + - (head_truncation ? truncation_string_length : 0)); + if (head_truncation) + printf ("%s", truncation_string); + print_field (head); + } + else + + if ((auto_reference || input_reference) && right_reference) + print_spaces (half_line_width + - (keyafter.end - keyafter.start) + - (keyafter_truncation ? truncation_string_length : 0)); + + if ((auto_reference || input_reference) && right_reference) + { + /* Output the `reference' field. */ + + print_spaces (gap_size); + print_field (reference); + } + + printf ("\n"); +} + +/*------------------------------------------------------------------------. +| Scan the whole occurs table and, for each entry, output one line in the | +| appropriate format. | +`------------------------------------------------------------------------*/ + +void +generate_all_output (void) +{ + int occurs_index; /* index of keyword entry being processed */ + OCCURS *occurs_cursor; /* current keyword entry being processed */ + + + /* The following assignments are useful to provide default values in case + line contexts or references are not used, in which case these variables + would never be computed. */ + + tail.start = NULL; + tail.end = NULL; + tail_truncation = 0; + + head.start = NULL; + head.end = NULL; + head_truncation = 0; + + + /* Loop over all keyword occurrences. */ + + occurs_cursor = occurs_table[0]; + + for (occurs_index = 0; occurs_index < number_of_occurs[0]; occurs_index++) + { + /* Compute the exact size of every field and whenever truncation flags + are present or not. */ + + define_all_fields (occurs_cursor); + + /* Produce one output line according to selected format. */ + + switch (output_format) + { + case UNKNOWN_FORMAT: + /* Should never happen. */ + + case DUMB_FORMAT: + output_one_dumb_line (); + break; + + case ROFF_FORMAT: + output_one_roff_line (); + break; + + case TEX_FORMAT: + output_one_tex_line (); + break; + } + + /* Advance the cursor into the occurs table. */ + + occurs_cursor++; + } +} + +/* Option decoding and main program. */ + +/*------------------------------------------------------. +| Print program identification and options, then exit. | +`------------------------------------------------------*/ + +void +usage (int status) +{ + if (status != 0) + fprintf (stderr, "Try `%s --help' for more information.\n", program_name); + else + { + printf ("\ +Usage: %s [OPTION]... [INPUT]... (without -G)\n\ + or: %s -G [OPTION]... [INPUT [OUTPUT]]\n", program_name, program_name); + printf ("\ +\n\ + -A, --auto-reference output automatically generated references\n\ + -C, --copyright display Copyright and copying conditions\n\ + -G, --traditional behave more like System V `ptx'\n\ + -F, --flag-truncation=STRING use STRING for flagging line truncations\n\ + -M, --macro-name=STRING macro name to use instead of `xx'\n\ + -O, --format=roff generate output as roff directives\n\ + -R, --right-side-refs put references at right, not counted in -w\n\ + -S, --sentence-regexp=REGEXP for end of lines or end of sentences\n\ + -T, --format=tex generate output as TeX directives\n\ + -W, --word-regexp=REGEXP use REGEXP to match each keyword\n\ + -b, --break-file=FILE word break characters in this FILE\n\ + -f, --ignore-case fold lower case to upper case for sorting\n\ + -g, --gap-size=NUMBER gap size in columns between output fields\n\ + -i, --ignore-file=FILE read ignore word list from FILE\n\ + -o, --only-file=FILE read only word list from this FILE\n\ + -r, --references first field of each line is a reference\n\ + -t, --typeset-mode - not implemented -\n\ + -w, --width=NUMBER output width in columns, reference excluded\n\ + --help display this help and exit\n\ + --version output version information and exit\n\ +\n\ +With no FILE or if FILE is -, read Standard Input. `-F /' by default.\n"); + } + exit (status); +} + +/*----------------------------------------------------------------------. +| Main program. Decode ARGC arguments passed through the ARGV array of | +| strings, then launch execution. | +`----------------------------------------------------------------------*/ + +/* Long options equivalences. */ +const struct option long_options[] = +{ + {"auto-reference", no_argument, NULL, 'A'}, + {"break-file", required_argument, NULL, 'b'}, + {"copyright", no_argument, NULL, 'C'}, + {"flag-truncation", required_argument, NULL, 'F'}, + {"ignore-case", no_argument, NULL, 'f'}, + {"gap-size", required_argument, NULL, 'g'}, + {"help", no_argument, &show_help, 1}, + {"ignore-file", required_argument, NULL, 'i'}, + {"macro-name", required_argument, NULL, 'M'}, + {"only-file", required_argument, NULL, 'o'}, + {"references", no_argument, NULL, 'r'}, + {"right-side-refs", no_argument, NULL, 'R'}, + {"format", required_argument, NULL, 10}, + {"sentence-regexp", required_argument, NULL, 'S'}, + {"traditional", no_argument, NULL, 'G'}, + {"typeset-mode", no_argument, NULL, 't'}, + {"version", no_argument, &show_version, 1}, + {"width", required_argument, NULL, 'w'}, + {"word-regexp", required_argument, NULL, 'W'}, + {0, 0, 0, 0}, +}; + +static char const* const format_args[] = +{ + "roff", "tex", 0 +}; + +int +main (int argc, char *const argv[]) +{ + int optchar; /* argument character */ + extern int optind; /* index of argument */ + extern char *optarg; /* value or argument */ + int file_index; /* index in text input file arrays */ + +#ifdef HAVE_MCHECK + /* Use GNU malloc checking. It has proven to be useful! */ + mcheck (); +#endif /* HAVE_MCHECK */ + +#ifdef STDC_HEADERS +#ifdef HAVE_SETCHRCLASS + setchrclass (NULL); +#endif +#endif + + /* Decode program options. */ + + program_name = argv[0]; + + while ((optchar = getopt_long (argc, argv, "ACF:GM:ORS:TW:b:i:fg:o:trw:", + long_options, NULL)), + optchar != EOF) + { + switch (optchar) + { + default: + usage (1); + + case 0: + break; + + case 'C': + printf ("%s", copyright); + exit (0); + + case 'G': + gnu_extensions = 0; + break; + + case 'b': + break_file = optarg; + break; + + case 'f': + ignore_case = 1; + break; + + case 'g': + gap_size = atoi (optarg); + break; + + case 'i': + ignore_file = optarg; + break; + + case 'o': + only_file = optarg; + break; + + case 'r': + input_reference = 1; + break; + + case 't': + /* A decouvrir... */ + break; + + case 'w': + line_width = atoi (optarg); + break; + + case 'A': + auto_reference = 1; + break; + + case 'F': + truncation_string = copy_unescaped_string (optarg); + break; + + case 'M': + macro_name = optarg; + break; + + case 'O': + output_format = ROFF_FORMAT; + break; + + case 'R': + right_reference = 1; + break; + + case 'S': + context_regex_string = copy_unescaped_string (optarg); + break; + + case 'T': + output_format = TEX_FORMAT; + break; + + case 'W': + word_regex_string = copy_unescaped_string (optarg); + break; + + case 10: + switch (argmatch (optarg, format_args)) + { + default: + usage (1); + + case 0: + output_format = ROFF_FORMAT; + break; + + case 1: + output_format = TEX_FORMAT; + break; + } + } + } + + /* Process trivial options. */ + + if (show_help) + usage (0); + + if (show_version) + { + printf ("%s\n", version_string); + exit (0); + } + + /* Change the default Ignore file if one is defined. */ + +#ifdef DEFAULT_IGNORE_FILE + if (!ignore_file) + ignore_file = DEFAULT_IGNORE_FILE; +#endif + + /* Process remaining arguments. If GNU extensions are enabled, process + all arguments as input parameters. If disabled, accept at most two + arguments, the second of which is an output parameter. */ + + if (optind == argc) + { + + /* No more argument simply means: read standard input. */ + + input_file_name = (const char **) xmalloc (sizeof (const char *)); + file_line_count = (int *) xmalloc (sizeof (int)); + number_input_files = 1; + input_file_name[0] = NULL; + } + else if (gnu_extensions) + { + number_input_files = argc - optind; + input_file_name + = (const char **) xmalloc (number_input_files * sizeof (const char *)); + file_line_count + = (int *) xmalloc (number_input_files * sizeof (int)); + + for (file_index = 0; file_index < number_input_files; file_index++) + { + input_file_name[file_index] = argv[optind]; + if (!*argv[optind] || strcmp (argv[optind], "-") == 0) + input_file_name[0] = NULL; + else + input_file_name[0] = argv[optind]; + optind++; + } + } + else + { + + /* There is one necessary input file. */ + + number_input_files = 1; + input_file_name = (const char **) xmalloc (sizeof (const char *)); + file_line_count = (int *) xmalloc (sizeof (int)); + if (!*argv[optind] || strcmp (argv[optind], "-") == 0) + input_file_name[0] = NULL; + else + input_file_name[0] = argv[optind]; + optind++; + + /* Redirect standard output, only if requested. */ + + if (optind < argc) + { + fclose (stdout); + if (fopen (argv[optind], "w") == NULL) + error (1, errno, argv[optind]); + optind++; + } + + /* Diagnose any other argument as an error. */ + + if (optind < argc) + usage (1); + } + + /* If the output format has not been explicitly selected, choose dumb + terminal format if GNU extensions are enabled, else `roff' format. */ + + if (output_format == UNKNOWN_FORMAT) + output_format = gnu_extensions ? DUMB_FORMAT : ROFF_FORMAT; + + /* Initialize the main tables. */ + + initialize_regex (); + + /* Read `Break character' file, if any. */ + + if (break_file) + digest_break_file (break_file); + + /* Read `Ignore words' file and `Only words' files, if any. If any of + these files is empty, reset the name of the file to NULL, to avoid + unnecessary calls to search_table. */ + + if (ignore_file) + { + digest_word_file (ignore_file, &ignore_table); + if (ignore_table.length == 0) + ignore_file = NULL; + } + + if (only_file) + { + digest_word_file (only_file, &only_table); + if (only_table.length == 0) + only_file = NULL; + } + + /* Prepare to study all the input files. */ + + number_of_occurs[0] = 0; + total_line_count = 0; + maximum_word_length = 0; + reference_max_width = 0; + + for (file_index = 0; file_index < number_input_files; file_index++) + { + + /* Read the file in core, than study it. */ + + swallow_file_in_memory (input_file_name[file_index], &text_buffer); + find_occurs_in_text (); + + /* Maintain for each file how many lines has been read so far when its + end is reached. Incrementing the count first is a simple kludge to + handle a possible incomplete line at end of file. */ + + total_line_count++; + file_line_count[file_index] = total_line_count; + } + + /* Do the output process phase. */ + + sort_found_occurs (); + fix_output_parameters (); + generate_all_output (); + + /* All done. */ + + exit (0); +} diff --git a/gnu/usr.bin/ptx/ptx.info b/gnu/usr.bin/ptx/ptx.info new file mode 100644 index 0000000..3bbd1bb --- /dev/null +++ b/gnu/usr.bin/ptx/ptx.info @@ -0,0 +1,496 @@ +This is Info file ptx.info, produced by Makeinfo-1.47 from the input +file ./ptx.texinfo. + + This file documents the `ptx' command, which has the purpose of +generated permuted indices for group of files. + + Copyright (C) 1990, 1991, 1993 by the Free Software Foundation, Inc. + + Permission is granted to make and distribute verbatim copies of this +manual provided the copyright notice and this permission notice are +preserved on all copies. + + Permission is granted to copy and distribute modified versions of +this manual under the conditions for verbatim copying, provided that +the entire resulting derived work is distributed under the terms of a +permission notice identical to this one. + + Permission is granted to copy and distribute translations of this +manual into another language, under the above conditions for modified +versions, except that this permission notice may be stated in a +translation approved by the Foundation. + + +File: ptx.info, Node: Top, Next: Invoking ptx, Prev: (dir), Up: (dir) + +Introduction +************ + + This is the 0.3 beta release of `ptx', the GNU version of a permuted +index generator. This software has the main goal of providing a +replacement for the traditional `ptx' as found on System V machines, +able to handle small files quickly, while providing a platform for more +development. + + This version reimplements and extends traditional `ptx'. Among +other things, it can produce a readable "KWIC" (keywords in their +context) without the need of `nroff', there is also an option to +produce TeX compatible output. This version does not handle huge input +files, that is, those files which do not fit in memory all at once. + + *Please note* that an overall renaming of all options is +foreseeable. In fact, GNU ptx specifications are not frozen yet. + +* Menu: + +* Invoking ptx:: How to use this program +* Compatibility:: The GNU extensions to `ptx' + + -- The Detailed Node Listing -- + +How to use this program + +* General options:: Options which affect general program behaviour. +* Charset selection:: Underlying character set considerations. +* Input processing:: Input fields, contexts, and keyword selection. +* Output formatting:: Types of output format, and sizing the fields. + + +File: ptx.info, Node: Invoking ptx, Next: Compatibility, Prev: Top, Up: Top + +How to use this program +*********************** + + This tool reads a text file and essentially produces a permuted +index, with each keyword in its context. The calling sketch is one of: + + ptx [OPTION ...] [FILE ...] + + or: + + ptx -G [OPTION ...] [INPUT [OUTPUT]] + + The `-G' (or its equivalent: `--traditional') option disables all +GNU extensions and revert to traditional mode, thus introducing some +limitations, and changes several of the program's default option values. +When `-G' is not specified, GNU extensions are always enabled. GNU +extensions to `ptx' are documented wherever appropriate in this +document. See *Note Compatibility:: for an explicit list of them. + + Individual options are explained later in this document. + + When GNU extensions are enabled, there may be zero, one or several +FILE after the options. If there is no FILE, the program reads the +standard input. If there is one or several FILE, they give the name of +input files which are all read in turn, as if all the input files were +concatenated. However, there is a full contextual break between each +file and, when automatic referencing is requested, file names and line +numbers refer to individual text input files. In all cases, the +program produces the permuted index onto the standard output. + + When GNU extensions are *not* enabled, that is, when the program +operates in traditional mode, there may be zero, one or two parameters +besides the options. If there is no parameters, the program reads the +standard input and produces the permuted index onto the standard output. +If there is only one parameter, it names the text INPUT to be read +instead of the standard input. If two parameters are given, they give +respectively the name of the INPUT file to read and the name of the +OUTPUT file to produce. *Be very careful* to note that, in this case, +the contents of file given by the second parameter is destroyed. This +behaviour is dictated only by System V `ptx' compatibility, because GNU +Standards discourage output parameters not introduced by an option. + + Note that for *any* file named as the value of an option or as an +input text file, a single dash `-' may be used, in which case standard +input is assumed. However, it would not make sense to use this +convention more than once per program invocation. + +* Menu: + +* General options:: Options which affect general program behaviour. +* Charset selection:: Underlying character set considerations. +* Input processing:: Input fields, contexts, and keyword selection. +* Output formatting:: Types of output format, and sizing the fields. + + +File: ptx.info, Node: General options, Next: Charset selection, Prev: Invoking ptx, Up: Invoking ptx + +General options +=============== + +`-C' +`--copyright' + Prints a short note about the Copyright and copying conditions, + then exit without further processing. + +`-G' +`--traditional' + As already explained, this option disables all GNU extensions to + `ptx' and switch to traditional mode. + +`--help' + Prints a short help on standard output, then exit without further + processing. + +`--version' + Prints the program verison on standard output, then exit without + further processing. + + +File: ptx.info, Node: Charset selection, Next: Input processing, Prev: General options, Up: Invoking ptx + +Charset selection +================= + + As it is setup now, the program assumes that the input file is coded +using 8-bit ISO 8859-1 code, also known as Latin-1 character set, +*unless* if it is compiled for MS-DOS, in which case it uses the +character set of the IBM-PC. (GNU `ptx' is not known to work on +smaller MS-DOS machines anymore.) Compared to 7-bit ASCII, the set of +characters which are letters is then different, this fact alters the +behaviour of regular expression matching. Thus, the default regular +expression for a keyword allows foreign or diacriticized letters. +Keyword sorting, however, is still crude; it obeys the underlying +character set ordering quite blindly. + +`-f' +`--ignore-case' + Fold lower case letters to upper case for sorting. + + +File: ptx.info, Node: Input processing, Next: Output formatting, Prev: Charset selection, Up: Invoking ptx + +Word selection +============== + +`-b FILE' +`--break-file=FILE' + This option is an alternative way to option `-W' for describing + which characters make up words. This option introduces the name + of a file which contains a list of characters which can*not* be + part of one word, this file is called the "Break file". Any + character which is not part of the Break file is a word + constituent. If both options `-b' and `-W' are specified, then + `-W' has precedence and `-b' is ignored. + + When GNU extensions are enabled, the only way to avoid newline as a + break character is to write all the break characters in the file + with no newline at all, not even at the end of the file. When GNU + extensions are disabled, spaces, tabs and newlines are always + considered as break characters even if not included in the Break + file. + +`-i FILE' +`--ignore-file=FILE' + The file associated with this option contains a list of words + which will never be taken as keywords in concordance output. It + is called the "Ignore file". The file contains exactly one word + in each line; the end of line separation of words is not subject + to the value of the `-S' option. + + There is a default Ignore file used by `ptx' when this option is + not specified, usually found in `/usr/local/lib/eign' if this has + not been changed at installation time. If you want to deactivate + the default Ignore file, specify `/dev/null' instead. + +`-o FILE' +`--only-file=FILE' + The file associated with this option contains a list of words + which will be retained in concordance output, any word not + mentioned in this file is ignored. The file is called the "Only + file". The file contains exactly one word in each line; the end + of line separation of words is not subject to the value of the + `-S' option. + + There is no default for the Only file. In the case there are both + an Only file and an Ignore file, a word will be subject to be a + keyword only if it is given in the Only file and not given in the + Ignore file. + +`-r' +`--references' + On each input line, the leading sequence of non white characters + will be taken to be a reference that has the purpose of + identifying this input line on the produced permuted index. See + *Note Output formatting:: for more information about reference + production. Using this option change the default value for option + `-S'. + + Using this option, the program does not try very hard to remove + references from contexts in output, but it succeeds in doing so + *when* the context ends exactly at the newline. If option `-r' is + used with `-S' default value, or when GNU extensions are disabled, + this condition is always met and references are completely + excluded from the output contexts. + +`-S REGEXP' +`--sentence-regexp=REGEXP' + This option selects which regular expression will describe the end + of a line or the end of a sentence. In fact, there is other + distinction between end of lines or end of sentences than the + effect of this regular expression, and input line boundaries have + no special significance outside this option. By default, when GNU + extensions are enabled and if `-r' option is not used, end of + sentences are used. In this case, the precise REGEX is imported + from GNU emacs: + + [.?!][]\"')}]*\\($\\|\t\\| \\)[ \t\n]* + + Whenever GNU extensions are disabled or if `-r' option is used, end + of lines are used; in this case, the default REGEXP is just: + + \n + + Using an empty REGEXP is equivalent to completely disabling end of + line or end of sentence recognition. In this case, the whole file + is considered to be a single big line or sentence. The user might + want to disallow all truncation flag generation as well, through + option `-F ""'. *Note Syntax of Regular Expressions: + (emacs)Regexps. + + When the keywords happen to be near the beginning of the input + line or sentence, this often creates an unused area at the + beginning of the output context line; when the keywords happen to + be near the end of the input line or sentence, this often creates + an unused area at the end of the output context line. The program + tries to fill those unused areas by wrapping around context in + them; the tail of the input line or sentence is used to fill the + unused area on the left of the output line; the head of the input + line or sentence is used to fill the unused area on the right of + the output line. + + As a matter of convenience to the user, many usual backslashed + escape sequences, as found in the C language, are recognized and + converted to the corresponding characters by `ptx' itself. + +`-W REGEXP' +`--word-regexp=REGEXP' + This option selects which regular expression will describe each + keyword. By default, if GNU extensions are enabled, a word is a + sequence of letters; the REGEXP used is `\w+'. When GNU + extensions are disabled, a word is by default anything which ends + with a space, a tab or a newline; the REGEXP used is `[^ \t\n]+'. + + An empty REGEXP is equivalent to not using this option, letting the + default dive in. *Note Syntax of Regular Expressions: + (emacs)Regexps. + + As a matter of convenience to the user, many usual backslashed + escape sequences, as found in the C language, are recognized and + converted to the corresponding characters by `ptx' itself. + + +File: ptx.info, Node: Output formatting, Prev: Input processing, Up: Invoking ptx + +Output formatting +================= + + Output format is mainly controlled by `-O' and `-T' options, +described in the table below. When neither `-O' nor `-T' is selected, +and if GNU extensions are enabled, the program choose an output format +suited for a dumb terminal. Each keyword occurrence is output to the +center of one line, surrounded by its left and right contexts. Each +field is properly justified, so the concordance output could readily be +observed. As a special feature, if automatic references are selected +by option `-A' and are output before the left context, that is, if +option `-R' is *not* selected, then a colon is added after the +reference; this nicely interfaces with GNU Emacs `next-error' +processing. In this default output format, each white space character, +like newline and tab, is merely changed to exactly one space, with no +special attempt to compress consecutive spaces. This might change in +the future. Except for those white space characters, every other +character of the underlying set of 256 characters is transmitted +verbatim. + + Output format is further controlled by the following options. + +`-g NUMBER' +`--gap-size=NUMBER' + Select the size of the minimum white gap between the fields on the + output line. + +`-w NUMBER' +`--width=NUMBER' + Select the output maximum width of each final line. If references + are used, they are included or excluded from the output maximum + width depending on the value of option `-R'. If this option is not + selected, that is, when references are output before the left + context, the output maximum width takes into account the maximum + length of all references. If this options is selected, that is, + when references are output after the right context, the output + maximum width does not take into account the space taken by + references, nor the gap that precedes them. + +`-A' +`--auto-reference' + Select automatic references. Each input line will have an + automatic reference made up of the file name and the line ordinal, + with a single colon between them. However, the file name will be + empty when standard input is being read. If both `-A' and `-r' + are selected, then the input reference is still read and skipped, + but the automatic reference is used at output time, overriding the + input reference. + +`-R' +`--right-side-refs' + In default output format, when option `-R' is not used, any + reference produced by the effect of options `-r' or `-A' are given + to the far right of output lines, after the right context. In + default output format, when option `-R' is specified, references + are rather given to the beginning of each output line, before the + left context. For any other output format, option `-R' is almost + ignored, except for the fact that the width of references is *not* + taken into account in total output width given by `-w' whenever + `-R' is selected. + + This option is automatically selected whenever GNU extensions are + disabled. + +`-F STRING' +`--flac-truncation=STRING' + This option will request that any truncation in the output be + reported using the string STRING. Most output fields + theoretically extend towards the beginning or the end of the + current line, or current sentence, as selected with option `-S'. + But there is a maximum allowed output line width, changeable + through option `-w', which is further divided into space for + various output fields. When a field has to be truncated because + cannot extend until the beginning or the end of the current line + to fit in the, then a truncation occurs. By default, the string + used is a single slash, as in `-F /'. + + STRING may have more than one character, as in `-F ...'. Also, in + the particular case STRING is empty (`-F ""'), truncation flagging + is disabled, and no truncation marks are appended in this case. + + As a matter of convenience to the user, many usual backslashed + escape sequences, as found in the C language, are recognized and + converted to the corresponding characters by `ptx' itself. + +`-M STRING' +`--macro-name=STRING' + Select another STRING to be used instead of `xx', while generating + output suitable for `nroff', `troff' or TeX. + +`-O' +`--format=roff' + Choose an output format suitable for `nroff' or `troff' + processing. Each output line will look like: + + .xx "TAIL" "BEFORE" "KEYWORD_AND_AFTER" "HEAD" "REF" + + so it will be possible to write an `.xx' roff macro to take care of + the output typesetting. This is the default output format when GNU + extensions are disabled. Option `-M' might be used to change `xx' + to another macro name. + + In this output format, each non-graphical character, like newline + and tab, is merely changed to exactly one space, with no special + attempt to compress consecutive spaces. Each quote character: `"' + is doubled so it will be correctly processed by `nroff' or `troff'. + +`-T' +`--format=tex' + Choose an output format suitable for TeX processing. Each output + line will look like: + + \xx {TAIL}{BEFORE}{KEYWORD}{AFTER}{HEAD}{REF} + + so it will be possible to write write a `\xx' definition to take + care of the output typesetting. Note that when references are not + being produced, that is, neither option `-A' nor option `-r' is + selected, the last parameter of each `\xx' call is inhibited. + Option `-M' might be used to change `xx' to another macro name. + + In this output format, some special characters, like `$', `%', + `&', `#' and `_' are automatically protected with a backslash. + Curly brackets `{', `}' are also protected with a backslash, but + also enclosed in a pair of dollar signs to force mathematical + mode. The backslash itself produces the sequence `\backslash{}'. + Circumflex and tilde diacritics produce the sequence `^\{ }' and + `~\{ }' respectively. Other diacriticized characters of the + underlying character set produce an appropriate TeX sequence as + far as possible. The other non-graphical characters, like newline + and tab, and all others characters which are not part of ASCII, + are merely changed to exactly one space, with no special attempt + to compress consecutive spaces. Let me know how to improve this + special character processing for TeX. + + +File: ptx.info, Node: Compatibility, Prev: Invoking ptx, Up: Top + +The GNU extensions to `ptx' +*************************** + + This version of `ptx' contains a few features which do not exist in +System V `ptx'. These extra features are suppressed by using the `-G' +command line option, unless overridden by other command line options. +Some GNU extensions cannot be recovered by overriding, so the simple +rule is to avoid `-G' if you care about GNU extensions. Here are the +differences between this program and System V `ptx'. + + * This program can read many input files at once, it always writes + the resulting concordance on standard output. On the other end, + System V `ptx' reads only one file and produce the result on + standard output or, if a second FILE parameter is given on the + command, to that FILE. + + Having output parameters not introduced by options is a quite + dangerous practice which GNU avoids as far as possible. So, for + using `ptx' portably between GNU and System V, you should pay + attention to always use it with a single input file, and always + expect the result on standard output. You might also want to + automatically configure in a `-G' option to `ptx' calls in + products using `ptx', if the configurator finds that the installed + `ptx' accepts `-G'. + + * The only options available in System V `ptx' are options `-b', + `-f', `-g', `-i', `-o', `-r', `-t' and `-w'. All other options + are GNU extensions and are not repeated in this enumeration. + Moreover, some options have a slightly different meaning when GNU + extensions are enabled, as explained below. + + * By default, concordance output is not formatted for `troff' or + `nroff'. It is rather formatted for a dumb terminal. `troff' or + `nroff' output may still be selected through option `-O'. + + * Unless `-R' option is used, the maximum reference width is + subtracted from the total output line width. With GNU extensions + disabled, width of references is not taken into account in the + output line width computations. + + * All 256 characters, even `NUL's, are always read and processed from + input file with no adverse effect, even if GNU extensions are + disabled. However, System V `ptx' does not accept 8-bit + characters, a few control characters are rejected, and the tilda + `~' is condemned. + + * Input line length is only limited by available memory, even if GNU + extensions are disabled. However, System V `ptx' processes only + the first 200 characters in each line. + + * The break (non-word) characters default to be every character + except all letters of the underlying character set, diacriticized + or not. When GNU extensions are disabled, the break characters + default to space, tab and newline only. + + * The program makes better use of output line width. If GNU + extensions are disabled, the program rather tries to imitate + System V `ptx', but still, there are some slight disposition + glitches this program does not completely reproduce. + + * The user can specify both an Ignore file and an Only file. This + is not allowed with System V `ptx'. + + + +Tag Table: +Node: Top939 +Node: Invoking ptx2298 +Node: General options5025 +Node: Charset selection5639 +Node: Input processing6514 +Node: Output formatting12205 +Node: Compatibility18737 + +End Tag Table diff --git a/gnu/usr.bin/ptx/ptx.texinfo b/gnu/usr.bin/ptx/ptx.texinfo new file mode 100644 index 0000000..e690c55 --- /dev/null +++ b/gnu/usr.bin/ptx/ptx.texinfo @@ -0,0 +1,554 @@ +\input texinfo @c -*-texinfo-*- +@c %**start of header +@setfilename ptx.info +@settitle GNU @code{ptx} reference manual +@finalout +@c %**end of header + +@ifinfo +This file documents the @code{ptx} command, which has the purpose of +generated permuted indices for group of files. + +Copyright (C) 1990, 1991, 1993 by the Free Software Foundation, Inc. + +Permission is granted to make and distribute verbatim copies of +this manual provided the copyright notice and this permission notice +are preserved on all copies. + +@ignore +Permission is granted to process this file through TeX and print the +results, provided the printed document carries copying permission +notice identical to this one except for the removal of this paragraph +(this paragraph not being relevant to the printed manual). + +@end ignore +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided that the entire +resulting derived work is distributed under the terms of a permission +notice identical to this one. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for modified versions, +except that this permission notice may be stated in a translation approved +by the Foundation. +@end ifinfo + +@titlepage +@title ptx +@subtitle The GNU permuted indexer +@subtitle Edition 0.3, for ptx version 0.3 +@subtitle November 1993 +@author by Francois Pinard + +@page +@vskip 0pt plus 1filll +Copyright @copyright{} 1990, 1991, 1993 Free Software Foundation, Inc. + +Permission is granted to make and distribute verbatim copies of +this manual provided the copyright notice and this permission notice +are preserved on all copies. + +Permission is granted to copy and distribute modified versions of this +manual under the conditions for verbatim copying, provided that the entire +resulting derived work is distributed under the terms of a permission +notice identical to this one. + +Permission is granted to copy and distribute translations of this manual +into another language, under the above conditions for modified versions, +except that this permission notice may be stated in a translation approved +by the Foundation. +@end titlepage + +@node Top, Invoking ptx, (dir), (dir) +@chapter Introduction + +This is the 0.3 beta release of @code{ptx}, the GNU version of a +permuted index generator. This software has the main goal of providing +a replacement for the traditional @code{ptx} as found on System V +machines, able to handle small files quickly, while providing a platform +for more development. + +This version reimplements and extends traditional @code{ptx}. Among +other things, it can produce a readable @dfn{KWIC} (keywords in their +context) without the need of @code{nroff}, there is also an option to +produce @TeX{} compatible output. This version does not handle huge +input files, that is, those files which do not fit in memory all at +once. + +@emph{Please note} that an overall renaming of all options is +foreseeable. In fact, GNU ptx specifications are not frozen yet. + +@menu +* Invoking ptx:: How to use this program +* Compatibility:: The GNU extensions to @code{ptx} + + --- The Detailed Node Listing --- + +How to use this program + +* General options:: Options which affect general program behaviour. +* Charset selection:: Underlying character set considerations. +* Input processing:: Input fields, contexts, and keyword selection. +* Output formatting:: Types of output format, and sizing the fields. +@end menu + +@node Invoking ptx, Compatibility, Top, Top +@chapter How to use this program + +This tool reads a text file and essentially produces a permuted index, with +each keyword in its context. The calling sketch is one of: + +@example +ptx [@var{option} @dots{}] [@var{file} @dots{}] +@end example + +or: + +@example +ptx -G [@var{option} @dots{}] [@var{input} [@var{output}]] +@end example + +The @samp{-G} (or its equivalent: @samp{--traditional}) option disables +all GNU extensions and revert to traditional mode, thus introducing some +limitations, and changes several of the program's default option values. +When @samp{-G} is not specified, GNU extensions are always enabled. GNU +extensions to @code{ptx} are documented wherever appropriate in this +document. See @xref{Compatibility} for an explicit list of them. + +Individual options are explained later in this document. + +When GNU extensions are enabled, there may be zero, one or several +@var{file} after the options. If there is no @var{file}, the program +reads the standard input. If there is one or several @var{file}, they +give the name of input files which are all read in turn, as if all the +input files were concatenated. However, there is a full contextual +break between each file and, when automatic referencing is requested, +file names and line numbers refer to individual text input files. In +all cases, the program produces the permuted index onto the standard +output. + +When GNU extensions are @emph{not} enabled, that is, when the program +operates in traditional mode, there may be zero, one or two parameters +besides the options. If there is no parameters, the program reads the +standard input and produces the permuted index onto the standard output. +If there is only one parameter, it names the text @var{input} to be read +instead of the standard input. If two parameters are given, they give +respectively the name of the @var{input} file to read and the name of +the @var{output} file to produce. @emph{Be very careful} to note that, +in this case, the contents of file given by the second parameter is +destroyed. This behaviour is dictated only by System V @code{ptx} +compatibility, because GNU Standards discourage output parameters not +introduced by an option. + +Note that for @emph{any} file named as the value of an option or as an +input text file, a single dash @kbd{-} may be used, in which case +standard input is assumed. However, it would not make sense to use this +convention more than once per program invocation. + +@menu +* General options:: Options which affect general program behaviour. +* Charset selection:: Underlying character set considerations. +* Input processing:: Input fields, contexts, and keyword selection. +* Output formatting:: Types of output format, and sizing the fields. +@end menu + +@node General options, Charset selection, Invoking ptx, Invoking ptx +@section General options + +@table @code + +@item -C +@itemx --copyright +Prints a short note about the Copyright and copying conditions, then +exit without further processing. + +@item -G +@itemx --traditional +As already explained, this option disables all GNU extensions to +@code{ptx} and switch to traditional mode. + +@item --help +Prints a short help on standard output, then exit without further +processing. + +@item --version +Prints the program verison on standard output, then exit without further +processing. + +@end table + +@node Charset selection, Input processing, General options, Invoking ptx +@section Charset selection + +As it is setup now, the program assumes that the input file is coded +using 8-bit ISO 8859-1 code, also known as Latin-1 character set, +@emph{unless} if it is compiled for MS-DOS, in which case it uses the +character set of the IBM-PC. (GNU @code{ptx} is not known to work on +smaller MS-DOS machines anymore.) Compared to 7-bit ASCII, the set of +characters which are letters is then different, this fact alters the +behaviour of regular expression matching. Thus, the default regular +expression for a keyword allows foreign or diacriticized letters. +Keyword sorting, however, is still crude; it obeys the underlying +character set ordering quite blindly. + +@table @code + +@item -f +@itemx --ignore-case +Fold lower case letters to upper case for sorting. + +@end table + +@node Input processing, Output formatting, Charset selection, Invoking ptx +@section Word selection + +@table @code + +@item -b @var{file} +@item --break-file=@var{file} + +This option is an alternative way to option @code{-W} for describing +which characters make up words. This option introduces the name of a +file which contains a list of characters which can@emph{not} be part of +one word, this file is called the @dfn{Break file}. Any character which +is not part of the Break file is a word constituent. If both options +@code{-b} and @code{-W} are specified, then @code{-W} has precedence and +@code{-b} is ignored. + +When GNU extensions are enabled, the only way to avoid newline as a +break character is to write all the break characters in the file with no +newline at all, not even at the end of the file. When GNU extensions +are disabled, spaces, tabs and newlines are always considered as break +characters even if not included in the Break file. + +@item -i @var{file} +@itemx --ignore-file=@var{file} + +The file associated with this option contains a list of words which will +never be taken as keywords in concordance output. It is called the +@dfn{Ignore file}. The file contains exactly one word in each line; the +end of line separation of words is not subject to the value of the +@code{-S} option. + +There is a default Ignore file used by @code{ptx} when this option is +not specified, usually found in @file{/usr/local/lib/eign} if this has +not been changed at installation time. If you want to deactivate the +default Ignore file, specify @code{/dev/null} instead. + +@item -o @var{file} +@itemx --only-file=@var{file} + +The file associated with this option contains a list of words which will +be retained in concordance output, any word not mentioned in this file +is ignored. The file is called the @dfn{Only file}. The file contains +exactly one word in each line; the end of line separation of words is +not subject to the value of the @code{-S} option. + +There is no default for the Only file. In the case there are both an +Only file and an Ignore file, a word will be subject to be a keyword +only if it is given in the Only file and not given in the Ignore file. + +@item -r +@itemx --references + +On each input line, the leading sequence of non white characters will be +taken to be a reference that has the purpose of identifying this input +line on the produced permuted index. See @xref{Output formatting} for +more information about reference production. Using this option change +the default value for option @code{-S}. + +Using this option, the program does not try very hard to remove +references from contexts in output, but it succeeds in doing so +@emph{when} the context ends exactly at the newline. If option +@code{-r} is used with @code{-S} default value, or when GNU extensions +are disabled, this condition is always met and references are completely +excluded from the output contexts. + +@item -S @var{regexp} +@itemx --sentence-regexp=@var{regexp} + +This option selects which regular expression will describe the end of a +line or the end of a sentence. In fact, there is other distinction +between end of lines or end of sentences than the effect of this regular +expression, and input line boundaries have no special significance +outside this option. By default, when GNU extensions are enabled and if +@code{-r} option is not used, end of sentences are used. In this +case, the precise @var{regex} is imported from GNU emacs: + +@example +[.?!][]\"')@}]*\\($\\|\t\\| \\)[ \t\n]* +@end example + +Whenever GNU extensions are disabled or if @code{-r} option is used, end +of lines are used; in this case, the default @var{regexp} is just: + +@example +\n +@end example + +Using an empty REGEXP is equivalent to completely disabling end of line or end +of sentence recognition. In this case, the whole file is considered to +be a single big line or sentence. The user might want to disallow all +truncation flag generation as well, through option @code{-F ""}. +@xref{Regexps, , Syntax of Regular Expressions, emacs, The GNU Emacs +Manual}. + +When the keywords happen to be near the beginning of the input line or +sentence, this often creates an unused area at the beginning of the +output context line; when the keywords happen to be near the end of the +input line or sentence, this often creates an unused area at the end of +the output context line. The program tries to fill those unused areas +by wrapping around context in them; the tail of the input line or +sentence is used to fill the unused area on the left of the output line; +the head of the input line or sentence is used to fill the unused area +on the right of the output line. + +As a matter of convenience to the user, many usual backslashed escape +sequences, as found in the C language, are recognized and converted to +the corresponding characters by @code{ptx} itself. + +@item -W @var{regexp} +@itemx --word-regexp=@var{regexp} + +This option selects which regular expression will describe each keyword. +By default, if GNU extensions are enabled, a word is a sequence of +letters; the @var{regexp} used is @code{\w+}. When GNU extensions are +disabled, a word is by default anything which ends with a space, a tab +or a newline; the @var{regexp} used is @code{[^ \t\n]+}. + +An empty REGEXP is equivalent to not using this option, letting the +default dive in. @xref{Regexps, , Syntax of Regular Expressions, emacs, +The GNU Emacs Manual}. + +As a matter of convenience to the user, many usual backslashed escape +sequences, as found in the C language, are recognized and converted to +the corresponding characters by @code{ptx} itself. + +@end table + +@node Output formatting, , Input processing, Invoking ptx +@section Output formatting + +Output format is mainly controlled by @code{-O} and @code{-T} options, +described in the table below. When neither @code{-O} nor @code{-T} is +selected, and if GNU extensions are enabled, the program choose an +output format suited for a dumb terminal. Each keyword occurrence is +output to the center of one line, surrounded by its left and right +contexts. Each field is properly justified, so the concordance output +could readily be observed. As a special feature, if automatic +references are selected by option @code{-A} and are output before the +left context, that is, if option @code{-R} is @emph{not} selected, then +a colon is added after the reference; this nicely interfaces with GNU +Emacs @code{next-error} processing. In this default output format, each +white space character, like newline and tab, is merely changed to +exactly one space, with no special attempt to compress consecutive +spaces. This might change in the future. Except for those white space +characters, every other character of the underlying set of 256 +characters is transmitted verbatim. + +Output format is further controlled by the following options. + +@table @code + +@item -g @var{number} +@itemx --gap-size=@var{number} + +Select the size of the minimum white gap between the fields on the output +line. + +@item -w @var{number} +@itemx --width=@var{number} + +Select the output maximum width of each final line. If references are +used, they are included or excluded from the output maximum width +depending on the value of option @code{-R}. If this option is not +selected, that is, when references are output before the left context, +the output maximum width takes into account the maximum length of all +references. If this options is selected, that is, when references are +output after the right context, the output maximum width does not take +into account the space taken by references, nor the gap that precedes +them. + +@item -A +@itemx --auto-reference + +Select automatic references. Each input line will have an automatic +reference made up of the file name and the line ordinal, with a single +colon between them. However, the file name will be empty when standard +input is being read. If both @code{-A} and @code{-r} are selected, then +the input reference is still read and skipped, but the automatic +reference is used at output time, overriding the input reference. + +@item -R +@itemx --right-side-refs + +In default output format, when option @code{-R} is not used, any +reference produced by the effect of options @code{-r} or @code{-A} are +given to the far right of output lines, after the right context. In +default output format, when option @code{-R} is specified, references +are rather given to the beginning of each output line, before the left +context. For any other output format, option @code{-R} is almost +ignored, except for the fact that the width of references is @emph{not} +taken into account in total output width given by @code{-w} whenever +@code{-R} is selected. + +This option is automatically selected whenever GNU extensions are +disabled. + +@item -F @var{string} +@itemx --flac-truncation=@var{string} + +This option will request that any truncation in the output be reported +using the string @var{string}. Most output fields theoretically extend +towards the beginning or the end of the current line, or current +sentence, as selected with option @code{-S}. But there is a maximum +allowed output line width, changeable through option @code{-w}, which is +further divided into space for various output fields. When a field has +to be truncated because cannot extend until the beginning or the end of +the current line to fit in the, then a truncation occurs. By default, +the string used is a single slash, as in @code{-F /}. + +@var{string} may have more than one character, as in @code{-F ...}. +Also, in the particular case @var{string} is empty (@code{-F ""}), +truncation flagging is disabled, and no truncation marks are appended in +this case. + +As a matter of convenience to the user, many usual backslashed escape +sequences, as found in the C language, are recognized and converted to +the corresponding characters by @code{ptx} itself. + +@item -M @var{string} +@itemx --macro-name=@var{string} + +Select another @var{string} to be used instead of @samp{xx}, while +generating output suitable for @code{nroff}, @code{troff} or @TeX{}. + +@item -O +@itemx --format=roff + +Choose an output format suitable for @code{nroff} or @code{troff} +processing. Each output line will look like: + +@example +.xx "@var{tail}" "@var{before}" "@var{keyword_and_after}" "@var{head}" "@var{ref}" +@end example + +so it will be possible to write an @samp{.xx} roff macro to take care of +the output typesetting. This is the default output format when GNU +extensions are disabled. Option @samp{-M} might be used to change +@samp{xx} to another macro name. + +In this output format, each non-graphical character, like newline and +tab, is merely changed to exactly one space, with no special attempt to +compress consecutive spaces. Each quote character: @kbd{"} is doubled +so it will be correctly processed by @code{nroff} or @code{troff}. + +@item -T +@itemx --format=tex + +Choose an output format suitable for @TeX{} processing. Each output +line will look like: + +@example +\xx @{@var{tail}@}@{@var{before}@}@{@var{keyword}@}@{@var{after}@}@{@var{head}@}@{@var{ref}@} +@end example + +@noindent +so it will be possible to write write a @code{\xx} definition to take +care of the output typesetting. Note that when references are not being +produced, that is, neither option @code{-A} nor option @code{-r} is +selected, the last parameter of each @code{\xx} call is inhibited. +Option @samp{-M} might be used to change @samp{xx} to another macro +name. + +In this output format, some special characters, like @kbd{$}, @kbd{%}, +@kbd{&}, @kbd{#} and @kbd{_} are automatically protected with a +backslash. Curly brackets @kbd{@{}, @kbd{@}} are also protected with a +backslash, but also enclosed in a pair of dollar signs to force +mathematical mode. The backslash itself produces the sequence +@code{\backslash@{@}}. Circumflex and tilde diacritics produce the +sequence @code{^\@{ @}} and @code{~\@{ @}} respectively. Other +diacriticized characters of the underlying character set produce an +appropriate @TeX{} sequence as far as possible. The other non-graphical +characters, like newline and tab, and all others characters which are +not part of ASCII, are merely changed to exactly one space, with no +special attempt to compress consecutive spaces. Let me know how to +improve this special character processing for @TeX{}. + +@end table + +@node Compatibility, , Invoking ptx, Top +@chapter The GNU extensions to @code{ptx} + +This version of @code{ptx} contains a few features which do not exist in +System V @code{ptx}. These extra features are suppressed by using the +@samp{-G} command line option, unless overridden by other command line +options. Some GNU extensions cannot be recovered by overriding, so the +simple rule is to avoid @samp{-G} if you care about GNU extensions. +Here are the differences between this program and System V @code{ptx}. + +@itemize @bullet + +@item +This program can read many input files at once, it always writes the +resulting concordance on standard output. On the other end, System V +@code{ptx} reads only one file and produce the result on standard output +or, if a second @var{file} parameter is given on the command, to that +@var{file}. + +Having output parameters not introduced by options is a quite dangerous +practice which GNU avoids as far as possible. So, for using @code{ptx} +portably between GNU and System V, you should pay attention to always +use it with a single input file, and always expect the result on +standard output. You might also want to automatically configure in a +@samp{-G} option to @code{ptx} calls in products using @code{ptx}, if +the configurator finds that the installed @code{ptx} accepts @samp{-G}. + +@item +The only options available in System V @code{ptx} are options @samp{-b}, +@samp{-f}, @samp{-g}, @samp{-i}, @samp{-o}, @samp{-r}, @samp{-t} and +@samp{-w}. All other options are GNU extensions and are not repeated in +this enumeration. Moreover, some options have a slightly different +meaning when GNU extensions are enabled, as explained below. + +@item +By default, concordance output is not formatted for @code{troff} or +@code{nroff}. It is rather formatted for a dumb terminal. @code{troff} +or @code{nroff} output may still be selected through option @code{-O}. + +@item +Unless @code{-R} option is used, the maximum reference width is +subtracted from the total output line width. With GNU extensions +disabled, width of references is not taken into account in the output +line width computations. + +@item +All 256 characters, even @kbd{NUL}s, are always read and processed from +input file with no adverse effect, even if GNU extensions are disabled. +However, System V @code{ptx} does not accept 8-bit characters, a few +control characters are rejected, and the tilda @kbd{~} is condemned. + +@item +Input line length is only limited by available memory, even if GNU +extensions are disabled. However, System V @code{ptx} processes only +the first 200 characters in each line. + +@item +The break (non-word) characters default to be every character except all +letters of the underlying character set, diacriticized or not. When GNU +extensions are disabled, the break characters default to space, tab and +newline only. + +@item +The program makes better use of output line width. If GNU extensions +are disabled, the program rather tries to imitate System V @code{ptx}, +but still, there are some slight disposition glitches this program does +not completely reproduce. + +@item +The user can specify both an Ignore file and an Only file. This is not +allowed with System V @code{ptx}. + +@end itemize + +@bye diff --git a/gnu/usr.bin/ptx/xmalloc.c b/gnu/usr.bin/ptx/xmalloc.c new file mode 100644 index 0000000..58a81b5 --- /dev/null +++ b/gnu/usr.bin/ptx/xmalloc.c @@ -0,0 +1,88 @@ +/* xmalloc.c -- malloc with out of memory checking + Copyright (C) 1990, 1991, 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef HAVE_CONFIG_H +#if defined (CONFIG_BROKETS) +/* We use instead of "config.h" so that a compilation + using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h + (which it would do because it found this file in $srcdir). */ +#include +#else +#include "config.h" +#endif +#endif + +#if __STDC__ +#define VOID void +#else +#define VOID char +#endif + +#include + +#if STDC_HEADERS +#include +#else +VOID *malloc (); +VOID *realloc (); +void free (); +#endif + +#if __STDC__ && defined (HAVE_VPRINTF) +void error (int, int, char const *, ...); +#else +void error (); +#endif + +/* Allocate N bytes of memory dynamically, with error checking. */ + +VOID * +xmalloc (n) + size_t n; +{ + VOID *p; + + p = malloc (n); + if (p == 0) + /* Must exit with 2 for `cmp'. */ + error (2, 0, "virtual memory exhausted"); + return p; +} + +/* Change the size of an allocated block of memory P to N bytes, + with error checking. + If P is NULL, run xmalloc. + If N is 0, run free and return NULL. */ + +VOID * +xrealloc (p, n) + VOID *p; + size_t n; +{ + if (p == 0) + return xmalloc (n); + if (n == 0) + { + free (p); + return 0; + } + p = realloc (p, n); + if (p == 0) + /* Must exit with 2 for `cmp'. */ + error (2, 0, "virtual memory exhausted"); + return p; +} -- cgit v1.1