summaryrefslogtreecommitdiffstats
path: root/gnu/lib/libregex
diff options
context:
space:
mode:
authorconklin <conklin@FreeBSD.org>1993-07-30 20:16:53 +0000
committerconklin <conklin@FreeBSD.org>1993-07-30 20:16:53 +0000
commit5e0c8d9ee2600d5f7d5c710e34249ae51db60ba7 (patch)
treee01cf2a5cc6062467dbb628a7beef06eaa39845d /gnu/lib/libregex
parent605993f7be3eb2154f219e29b739c6d76f373405 (diff)
downloadFreeBSD-src-5e0c8d9ee2600d5f7d5c710e34249ae51db60ba7.zip
FreeBSD-src-5e0c8d9ee2600d5f7d5c710e34249ae51db60ba7.tar.gz
GNU Regex 0.12
Diffstat (limited to 'gnu/lib/libregex')
-rw-r--r--gnu/lib/libregex/AUTHORS10
-rw-r--r--gnu/lib/libregex/COPYING339
-rw-r--r--gnu/lib/libregex/ChangeLog3030
-rw-r--r--gnu/lib/libregex/INSTALL117
-rw-r--r--gnu/lib/libregex/Makefile12
-rw-r--r--gnu/lib/libregex/Makefile.gnu99
-rw-r--r--gnu/lib/libregex/Makefile.in98
-rw-r--r--gnu/lib/libregex/NEWS62
-rw-r--r--gnu/lib/libregex/README60
-rw-r--r--gnu/lib/libregex/VERSION3
-rw-r--r--gnu/lib/libregex/config.status59
-rw-r--r--gnu/lib/libregex/configure462
-rw-r--r--gnu/lib/libregex/configure.in23
-rw-r--r--gnu/lib/libregex/doc/Makefile93
-rw-r--r--gnu/lib/libregex/doc/Makefile.in92
-rw-r--r--gnu/lib/libregex/doc/include.awk19
-rw-r--r--gnu/lib/libregex/doc/regex.aux136
-rw-r--r--gnu/lib/libregex/doc/regex.cps152
-rw-r--r--gnu/lib/libregex/doc/regex.info2836
-rw-r--r--gnu/lib/libregex/doc/regex.texi3138
-rw-r--r--gnu/lib/libregex/doc/xregex.texi3021
-rw-r--r--gnu/lib/libregex/regex.c4948
-rw-r--r--gnu/lib/libregex/regex.h490
-rw-r--r--gnu/lib/libregex/test/ChangeLog77
-rw-r--r--gnu/lib/libregex/test/Makefile169
-rw-r--r--gnu/lib/libregex/test/Makefile.in168
-rw-r--r--gnu/lib/libregex/test/TAGS373
-rw-r--r--gnu/lib/libregex/test/alloca.c194
-rw-r--r--gnu/lib/libregex/test/bsd-interf.c38
-rw-r--r--gnu/lib/libregex/test/debugmalloc.c273
-rw-r--r--gnu/lib/libregex/test/emacsmalloc.c844
-rw-r--r--gnu/lib/libregex/test/fileregex.c77
-rw-r--r--gnu/lib/libregex/test/g++malloc.c1288
-rw-r--r--gnu/lib/libregex/test/getpagesize.h25
-rw-r--r--gnu/lib/libregex/test/iregex.c164
-rw-r--r--gnu/lib/libregex/test/main.c49
-rw-r--r--gnu/lib/libregex/test/malloc-test.c47
-rw-r--r--gnu/lib/libregex/test/other.c503
-rw-r--r--gnu/lib/libregex/test/printchar.c14
-rw-r--r--gnu/lib/libregex/test/psx-basic.c253
-rw-r--r--gnu/lib/libregex/test/psx-extend.c1244
-rw-r--r--gnu/lib/libregex/test/psx-generic.c336
-rw-r--r--gnu/lib/libregex/test/psx-group.c440
-rw-r--r--gnu/lib/libregex/test/psx-interf.c624
-rw-r--r--gnu/lib/libregex/test/psx-interv.c140
-rw-r--r--gnu/lib/libregex/test/regexcpp.sed8
-rw-r--r--gnu/lib/libregex/test/syntax.skel74
-rw-r--r--gnu/lib/libregex/test/test.c782
-rw-r--r--gnu/lib/libregex/test/test.h141
-rw-r--r--gnu/lib/libregex/test/tregress.c464
-rw-r--r--gnu/lib/libregex/test/upcase.c39
-rw-r--r--gnu/lib/libregex/test/xmalloc.c21
52 files changed, 28168 insertions, 0 deletions
diff --git a/gnu/lib/libregex/AUTHORS b/gnu/lib/libregex/AUTHORS
new file mode 100644
index 0000000..058be99
--- /dev/null
+++ b/gnu/lib/libregex/AUTHORS
@@ -0,0 +1,10 @@
+Richard Stallman -- original version and continuing revisions of
+ regex.c and regex.h, and original version of the documentation.
+
+Karl Berry and Kathryn Hargreaves -- extensive modifications to above,
+ and all test files.
+
+Jim Blandy -- original version of re_set_registers, revisions to regex.c.
+
+Joe Arceneaux, David MacKenzie, Mike Haertel, Charles Hannum, and
+probably others -- revisions to regex.c.
diff --git a/gnu/lib/libregex/COPYING b/gnu/lib/libregex/COPYING
new file mode 100644
index 0000000..a43ea21
--- /dev/null
+++ b/gnu/lib/libregex/COPYING
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange; or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19yy name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/gnu/lib/libregex/ChangeLog b/gnu/lib/libregex/ChangeLog
new file mode 100644
index 0000000..ef919d2
--- /dev/null
+++ b/gnu/lib/libregex/ChangeLog
@@ -0,0 +1,3030 @@
+Fri Apr 2 17:31:59 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * Released version 0.12.
+
+ * regex.c (regerror): If errcode is zero, that's not a valid
+ error code, according to POSIX, but return "Success."
+
+ * regex.c (regerror): Remember to actually fetch the message
+ from re_error_msg.
+
+ * regex.c (regex_compile): Don't use the trick for ".*\n" on
+ ".+\n". Since the latter involves laying an extra choice
+ point, the backward jump isn't adjusted properly.
+
+Thu Mar 25 21:35:18 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * regex.c (regex_compile): In the handle_open and handle_close
+ sections, clear pending_exact to zero.
+
+Tue Mar 9 12:03:07 1993 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu)
+
+ * regex.c (re_search_2): In the loop which searches forward
+ using fastmap, don't forget to cast the character from the
+ string to an unsigned before using it as an index into the
+ translate map.
+
+Thu Jan 14 15:41:46 1993 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu)
+
+ * regex.h: Never define const; let the callers do it.
+ configure.in: Don't define USING_AUTOCONF.
+
+Wed Jan 6 20:49:29 1993 Jim Blandy (jimb@geech.gnu.ai.mit.edu)
+
+ * regex.c (regerror): Abort if ERRCODE is out of range.
+
+Sun Dec 20 16:19:10 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * configure.in: Arrange to #define USING_AUTOCONF.
+ * regex.h: If USING_AUTOCONF is #defined, don't mess with
+ `const' at all; autoconf has taken care of it.
+
+Mon Dec 14 21:40:39 1992 David J. MacKenzie (djm@kropotkin.gnu.ai.mit.edu)
+
+ * regex.h (RE_SYNTAX_AWK): Fix typo. From Arnold Robbins.
+
+Sun Dec 13 20:35:39 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * regex.c (compile_range): Fetch the range start and end by
+ casting the pattern pointer to an `unsigned char *' before
+ fetching through it.
+
+Sat Dec 12 09:41:01 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * regex.c: Undo change of 12/7/92; it's better for Emacs to
+ #define HAVE_CONFIG_H.
+
+Fri Dec 11 22:00:34 1992 Jim Meyering (meyering@hal.gnu.ai.mit.edu)
+
+ * regex.c: Define and use isascii-protected ctype.h macros.
+
+Fri Dec 11 05:10:38 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * regex.c (re_match_2): Undo Karl's November 10th change; it
+ keeps the group in :\(.*\) from matching :/ properly.
+
+Mon Dec 7 19:44:56 1992 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu)
+
+ * regex.c: #include config.h if either HAVE_CONFIG_H or emacs
+ is #defined.
+
+Tue Dec 1 13:33:17 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * regex.c [HAVE_CONFIG_H]: Include config.h.
+
+Wed Nov 25 23:46:02 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * regex.c (regcomp): Add parens around bitwise & for clarity.
+ Initialize preg->allocated to prevent segv.
+
+Tue Nov 24 09:22:29 1992 David J. MacKenzie (djm@goldman.gnu.ai.mit.edu)
+
+ * regex.c: Use HAVE_STRING_H, not USG.
+ * configure.in: Check for string.h, not USG.
+
+Fri Nov 20 06:33:24 1992 Karl Berry (karl@cs.umb.edu)
+
+ * regex.c (SIGN_EXTEND_CHAR) [VMS]: Back out of this change,
+ since Roland Roberts now says it was a localism.
+
+Mon Nov 16 07:01:36 1992 Karl Berry (karl@cs.umb.edu)
+
+ * regex.h (const) [!HAVE_CONST]: Test another cpp symbol (from
+ Autoconf) before zapping const.
+
+Sun Nov 15 05:36:42 1992 Jim Blandy (jimb@wookumz.gnu.ai.mit.edu)
+
+ * regex.c, regex.h: Changes for VMS from Roland B Roberts
+ <roberts@nsrl31.nsrl.rochester.edu>.
+
+Thu Nov 12 11:31:15 1992 Karl Berry (karl@cs.umb.edu)
+
+ * Makefile.in (distfiles): Include INSTALL.
+
+Tue Nov 10 09:29:23 1992 Karl Berry (karl@cs.umb.edu)
+
+ * regex.c (re_match_2): At maybe_pop_jump, if at end of string
+ and pattern, just quit the matching loop.
+
+ * regex.c (LETTER_P): Rename to `WORDCHAR_P'.
+
+ * regex.c (AT_STRINGS_{BEG,END}): Take `d' as an arg; change
+ callers.
+
+ * regex.c (re_match_2) [!emacs]: In wordchar and notwordchar
+ cases, advance d.
+
+Wed Nov 4 15:43:58 1992 Karl Berry (karl@hal.gnu.ai.mit.edu)
+
+ * regex.h (const) [!__STDC__]: Don't define if it's already defined.
+
+Sat Oct 17 19:28:19 1992 Karl Berry (karl@cs.umb.edu)
+
+ * regex.c (bcmp, bcopy, bzero): Only #define if they are not
+ already #defined.
+
+ * configure.in: Use AC_CONST.
+
+Thu Oct 15 08:39:06 1992 Karl Berry (karl@cs.umb.edu)
+
+ * regex.h (const) [!const]: Conditionalize.
+
+Fri Oct 2 13:31:42 1992 Karl Berry (karl@cs.umb.edu)
+
+ * regex.h (RE_SYNTAX_ED): New definition.
+
+Sun Sep 20 12:53:39 1992 Karl Berry (karl@cs.umb.edu)
+
+ * regex.[ch]: remove traces of `longest_p' -- dumb idea to put
+ this into the pattern buffer, as it means parallelism loses.
+
+ * Makefile.in (config.status): use sh to run configure --no-create.
+
+ * Makefile.in (realclean): OK, don't remove configure.
+
+Sat Sep 19 09:05:08 1992 Karl Berry (karl@hayley)
+
+ * regex.c (PUSH_FAILURE_POINT, POP_FAILURE_POINT) [DEBUG]: keep
+ track of how many failure points we push and pop.
+ (re_match_2) [DEBUG]: declare variables for that, and print results.
+ (DEBUG_PRINT4): new macro.
+
+ * regex.h (re_pattern_buffer): new field `longest_p' (to
+ eliminate backtracking if the user doesn't need it).
+ * regex.c (re_compile_pattern): initialize it (to 1).
+ (re_search_2): set it to zero if register information is not needed.
+ (re_match_2): if it's set, don't backtrack.
+
+ * regex.c (re_search_2): update fastmap only after checking that
+ the pattern is anchored.
+
+ * regex.c (re_match_2): do more debugging at maybe_pop_jump.
+
+ * regex.c (re_search_2): cast result of TRANSLATE for use in
+ array subscript.
+
+Thu Sep 17 19:47:16 1992 Karl Berry (karl@geech.gnu.ai.mit.edu)
+
+ * Version 0.11.
+
+Wed Sep 16 08:17:10 1992 Karl Berry (karl@hayley)
+
+ * regex.c (INIT_FAIL_STACK): rewrite as statements instead of a
+ complicated comma expr, to avoid compiler warnings (and also
+ simplify).
+ (re_compile_fastmap, re_match_2): change callers.
+
+ * regex.c (POP_FAILURE_POINT): cast pop of regstart and regend
+ to avoid compiler warnings.
+
+ * regex.h (RE_NEWLINE_ORDINARY): remove this syntax bit, and
+ remove uses.
+ * regex.c (at_{beg,end}line_loc_p): go the last mile: remove
+ the RE_NEWLINE_ORDINARY case which made the ^ in \n^ be an anchor.
+
+Tue Sep 15 09:55:29 1992 Karl Berry (karl@hayley)
+
+ * regex.c (at_begline_loc_p): new fn.
+ (at_endline_loc_p): simplify at_endline_op_p.
+ (regex_compile): in ^/$ cases, call the above.
+
+ * regex.c (POP_FAILURE_POINT): rewrite the fn as a macro again,
+ as lord's profiling indicates the function is 20% of the time.
+ (re_match_2): callers changed.
+
+ * configure.in (AC_MEMORY_H): remove, since we never use memcpy et al.
+
+Mon Sep 14 17:49:27 1992 Karl Berry (karl@hayley)
+
+ * Makefile.in (makeargs): include MFLAGS.
+
+Sun Sep 13 07:41:45 1992 Karl Berry (karl@hayley)
+
+ * regex.c (regex_compile): in \1..\9 case, make it always
+ invalid to use \<digit> if there is no preceding <digit>th subexpr.
+ * regex.h (RE_NO_MISSING_BK_REF): remove this syntax bit.
+
+ * regex.c (regex_compile): remove support for invalid empty groups.
+ * regex.h (RE_NO_EMPTY_GROUPS): remove this syntax bit.
+
+ * regex.c (FREE_VARIABLES) [!REGEX_MALLOC]: define as alloca (0),
+ to reclaim memory.
+
+ * regex.h (RE_SYNTAX_POSIX_SED): don't bother with this.
+
+Sat Sep 12 13:37:21 1992 Karl Berry (karl@hayley)
+
+ * README: incorporate emacs.diff.
+
+ * regex.h (_RE_ARGS) [!__STDC__]: define as empty parens.
+
+ * configure.in: add AC_ALLOCA.
+
+ * Put test files in subdir test, documentation in subdir doc.
+ Adjust Makefile.in and configure.in accordingly.
+
+Thu Sep 10 10:29:11 1992 Karl Berry (karl@hayley)
+
+ * regex.h (RE_SYNTAX_{POSIX_,}SED): new definitions.
+
+Wed Sep 9 06:27:09 1992 Karl Berry (karl@hayley)
+
+ * Version 0.10.
+
+Tue Sep 8 07:32:30 1992 Karl Berry (karl@hayley)
+
+ * xregex.texinfo: put the day of month into the date.
+
+ * Makefile.in (realclean): remove Texinfo-generated files.
+ (distclean): remove empty sorted index files.
+ (clean): remove dvi files, etc.
+
+ * configure.in: test for more Unix variants.
+
+ * fileregex.c: new file.
+ Makefile.in (fileregex): new target.
+
+ * iregex.c (main): move variable decls to smallest scope.
+
+ * regex.c (FREE_VARIABLES): free reg_{,info_}dummy.
+ (re_match_2): check that the allocation for those two succeeded.
+
+ * regex.c (FREE_VAR): replace FREE_NONNULL with this.
+ (FREE_VARIABLES): call it.
+ (re_match_2) [REGEX_MALLOC]: initialize all our vars to NULL.
+
+ * tregress.c (do_match): generalize simple_match.
+ (SIMPLE_NONMATCH): new macro.
+ (SIMPLE_MATCH): change from routine.
+
+ * Makefile.in (regex.texinfo): make file readonly, so we don't
+ edit it by mistake.
+
+ * many files (re_default_syntax): rename to `re_syntax_options';
+ call re_set_syntax instead of assigning to the variable where
+ possible.
+
+Mon Sep 7 10:12:16 1992 Karl Berry (karl@hayley)
+
+ * syntax.skel: don't use prototypes.
+
+ * {configure,Makefile}.in: new files.
+
+ * regex.c: include <string.h> `#if USG || STDC_HEADERS'; remove
+ obsolete test for `POSIX', and test for BSRTING.
+ Include <strings.h> if we are not USG or STDC_HEADERS.
+ Do not include <unistd.h>. What did we ever need that for?
+
+ * regex.h (RE_NO_EMPTY_ALTS): remove this.
+ (RE_SYNTAX_AWK): remove from here, too.
+ * regex.c (regex_compile): remove the check.
+ * xregex.texinfo (Alternation Operator): update.
+ * other.c (test_others): remove tests for this.
+
+ * regex.h (RE_DUP_MAX): undefine if already defined.
+
+ * regex.h: (RE_SYNTAX_POSIX*): redo to allow more operators, and
+ define new syntaxes with the minimal set.
+
+ * syntax.skel (main): used sscanf instead of scanf.
+
+ * regex.h (RE_SYNTAX_*GREP): new definitions from mike.
+
+ * regex.c (regex_compile): initialize the upper bound of
+ intervals at the beginning of the interval, not the end.
+ (From pclink@qld.tne.oz.au.)
+
+ * regex.c (handle_bar): rename to `handle_alt', for consistency.
+
+ * regex.c ({store,insert}_{op1,op2}): new routines (except the last).
+ ({STORE,INSERT}_JUMP{,2}): macros to replace the old routines,
+ which took arguments in different orders, and were generally weird.
+
+ * regex.c (PAT_PUSH*): rename to `BUF_PUSH*' -- we're not
+ appending info to the pattern!
+
+Sun Sep 6 11:26:49 1992 Karl Berry (karl@hayley)
+
+ * regex.c (regex_compile): delete the variable
+ `following_left_brace', since we never use it.
+
+ * regex.c (print_compiled_pattern): don't print the fastmap if
+ it's null.
+
+ * regex.c (re_compile_fastmap): handle
+ `on_failure_keep_string_jump' like `on_failure_jump'.
+
+ * regex.c (re_match_2): in `charset{,_not' case, cast the bit
+ count to unsigned, not unsigned char, in case we have a full
+ 32-byte bit list.
+
+ * tregress.c (simple_match): remove.
+ (simple_test): rename as `simple_match'.
+ (simple_compile): print the error string if the compile failed.
+
+ * regex.c (DO_RANGE): rewrite as a function, `compile_range', so
+ we can debug it. Change pattern characters to unsigned char
+ *'s, and change the range variable to an unsigned.
+ (regex_compile): change calls.
+
+Sat Sep 5 17:40:49 1992 Karl Berry (karl@hayley)
+
+ * regex.h (_RE_ARGS): new macro to put in argument lists (if
+ ANSI) or omit them (if K&R); don't declare routines twice.
+
+ * many files (obscure_syntax): rename to `re_default_syntax'.
+
+Fri Sep 4 09:06:53 1992 Karl Berry (karl@hayley)
+
+ * GNUmakefile (extraclean): new target.
+ (realclean): delete the info files.
+
+Wed Sep 2 08:14:42 1992 Karl Berry (karl@hayley)
+
+ * regex.h: doc fix.
+
+Sun Aug 23 06:53:15 1992 Karl Berry (karl@hayley)
+
+ * regex.[ch] (re_comp): no const in the return type (from djm).
+
+Fri Aug 14 07:25:46 1992 Karl Berry (karl@hayley)
+
+ * regex.c (DO_RANGE): declare variables as unsigned chars, not
+ signed chars (from jimb).
+
+Wed Jul 29 18:33:53 1992 Karl Berry (karl@claude.cs.umb.edu)
+
+ * Version 0.9.
+
+ * GNUmakefile (distclean): do not remove regex.texinfo.
+ (realclean): remove it here.
+
+ * tregress.c (simple_test): initialize buf.buffer.
+
+Sun Jul 26 08:59:38 1992 Karl Berry (karl@hayley)
+
+ * regex.c (push_dummy_failure): new opcode and corresponding
+ case in the various routines. Pushed at the end of
+ alternatives.
+
+ * regex.c (jump_past_next_alt): rename to `jump_past_alt', for
+ brevity.
+ (no_pop_jump): rename to `jump'.
+
+ * regex.c (regex_compile) [DEBUG]: terminate printing of pattern
+ with a newline.
+
+ * NEWS: new file.
+
+ * tregress.c (simple_{compile,match,test}): routines to simplify all
+ these little tests.
+
+ * tregress.c: test for matching as much as possible.
+
+Fri Jul 10 06:53:32 1992 Karl Berry (karl@hayley)
+
+ * Version 0.8.
+
+Wed Jul 8 06:39:31 1992 Karl Berry (karl@hayley)
+
+ * regex.c (SIGN_EXTEND_CHAR): #undef any previous definition, as
+ ours should always work properly.
+
+Mon Jul 6 07:10:50 1992 Karl Berry (karl@hayley)
+
+ * iregex.c (main) [DEBUG]: conditionalize the call to
+ print_compiled_pattern.
+
+ * iregex.c (main): initialize buf.buffer to NULL.
+ * tregress (test_regress): likewise.
+
+ * regex.c (alloca) [sparc]: #if on HAVE_ALLOCA_H instead.
+
+ * tregress.c (test_regress): didn't have jla's test quite right.
+
+Sat Jul 4 09:02:12 1992 Karl Berry (karl@hayley)
+
+ * regex.c (re_match_2): only REGEX_ALLOCATE all the register
+ vectors if the pattern actually has registers.
+ (match_end): new variable to avoid having to use best_regend[0].
+
+ * regex.c (IS_IN_FIRST_STRING): rename to FIRST_STRING_P.
+
+ * regex.c: doc fixes.
+
+ * tregess.c (test_regress): new fastmap test forwarded by rms.
+
+ * tregress.c (test_regress): initialize the fastmap field.
+
+ * tregress.c (test_regress): new test from jla that aborted
+ in re_search_2.
+
+Fri Jul 3 09:10:05 1992 Karl Berry (karl@hayley)
+
+ * tregress.c (test_regress): add tests for translating charsets,
+ from kaoru.
+
+ * GNUmakefile (common): add alloca.o.
+ * alloca.c: new file, copied from bison.
+
+ * other.c (test_others): remove var `buf', since it's no longer used.
+
+ * Below changes from ro@TechFak.Uni-Bielefeld.DE.
+
+ * tregress.c (test_regress): initialize buf.allocated.
+
+ * regex.c (re_compile_fastmap): initialize `succeed_n_p'.
+
+ * GNUmakefile (regex): depend on $(common).
+
+Wed Jul 1 07:12:46 1992 Karl Berry (karl@hayley)
+
+ * Version 0.7.
+
+ * regex.c: doc fixes.
+
+Mon Jun 29 08:09:47 1992 Karl Berry (karl@fosse)
+
+ * regex.c (pop_failure_point): change string vars to
+ `const char *' from `unsigned char *'.
+
+ * regex.c: consolidate debugging stuff.
+ (print_partial_compiled_pattern): avoid enum clash.
+
+Mon Jun 29 07:50:27 1992 Karl Berry (karl@hayley)
+
+ * xmalloc.c: new file.
+ * GNUmakefile (common): add it.
+
+ * iregex.c (print_regs): new routine (from jimb).
+ (main): call it.
+
+Sat Jun 27 10:50:59 1992 Jim Blandy (jimb@pogo.cs.oberlin.edu)
+
+ * xregex.c (re_match_2): When we have accepted a match and
+ restored d from best_regend[0], we need to set dend
+ appropriately as well.
+
+Sun Jun 28 08:48:41 1992 Karl Berry (karl@hayley)
+
+ * tregress.c: rename from regress.c.
+
+ * regex.c (print_compiled_pattern): improve charset case to ease
+ byte-counting.
+ Also, don't distinguish between Emacs and non-Emacs
+ {not,}wordchar opcodes.
+
+ * regex.c (print_fastmap): move here.
+ * test.c: from here.
+ * regex.c (print_{{partial,}compiled_pattern,double_string}):
+ rename from ..._printer. Change calls here and in test.c.
+
+ * regex.c: create from xregex.c and regexinc.c for once and for
+ all, and change the debug fns to be extern, instead of static.
+ * GNUmakefile: remove traces of xregex.c.
+ * test.c: put in externs, instead of including regexinc.c.
+
+ * xregex.c: move interactive main program and scanstring to iregex.c.
+ * iregex.c: new file.
+ * upcase.c, printchar.c: new files.
+
+ * various doc fixes and other cosmetic changes throughout.
+
+ * regexinc.c (compiled_pattern_printer): change variable name,
+ for consistency.
+ (partial_compiled_pattern_printer): print other info about the
+ compiled pattern, besides just the opcodes.
+ * xregex.c (regex_compile) [DEBUG]: print the compiled pattern
+ when we're done.
+
+ * xregex.c (re_compile_fastmap): in the duplicate case, set
+ `can_be_null' and return.
+ Also, set `bufp->can_be_null' according to a new variable,
+ `path_can_be_null'.
+ Also, rewrite main while loop to not test `p != NULL', since
+ we never set it that way.
+ Also, eliminate special `can_be_null' value for the endline case.
+ (re_search_2): don't test for the special value.
+ * regex.h (struct re_pattern_buffer): remove the definition.
+
+Sat Jun 27 15:00:40 1992 Karl Berry (karl@hayley)
+
+ * xregex.c (re_compile_fastmap): remove the `RE_' from
+ `REG_RE_MATCH_NULL_AT_END'.
+ Also, assert the fastmap in the pattern buffer is non-null.
+ Also, reset `succeed_n_p' after we've
+ paid attention to it, instead of every time through the loop.
+ Also, in the `anychar' case, only clear fastmap['\n'] if the
+ syntax says to, and don't return prematurely.
+ Also, rearrange cases in some semblance of a rational order.
+ * regex.h (REG_RE_MATCH_NULL_AT_END): remove the `RE_' from the name.
+
+ * other.c: take bug reports from here.
+ * regress.c: new file for them.
+ * GNUmakefile (test): add it.
+ * main.c (main): new possible test.
+ * test.h (test_type): new value in enum.
+
+Thu Jun 25 17:37:43 1992 Karl Berry (karl@hayley)
+
+ * xregex.c (scanstring) [test]: new function from jimb to allow some
+ escapes.
+ (main) [test]: call it (on the string, not the pattern).
+
+ * xregex.c (main): make return type `int'.
+
+Wed Jun 24 10:43:03 1992 Karl Berry (karl@hayley)
+
+ * xregex.c (pattern_offset_t): change to `int', for the benefit
+ of patterns which compile to more than 2^15 bytes.
+
+ * xregex.c (GET_BUFFER_SPACE): remove spurious braces.
+
+ * xregex.texinfo (Using Registers): put in a stub to ``document''
+ the new function.
+ * regex.h (re_set_registers) [!__STDC__]: declare.
+ * xregex.c (re_set_registers): declare K&R style (also move to a
+ different place in the file).
+
+Mon Jun 8 18:03:28 1992 Jim Blandy (jimb@pogo.cs.oberlin.edu)
+
+ * regex.h (RE_NREGS): Doc fix.
+
+ * xregex.c (re_set_registers): New function.
+ * regex.h (re_set_registers): Declaration for new function.
+
+Fri Jun 5 06:55:18 1992 Karl Berry (karl@hayley)
+
+ * main.c (main): `return 0' instead of `exit (0)'. (From Paul Eggert)
+
+ * regexinc.c (SIGN_EXTEND_CHAR): cast to unsigned char.
+ (extract_number, EXTRACT_NUMBER): don't bother to cast here.
+
+Tue Jun 2 07:37:53 1992 Karl Berry (karl@hayley)
+
+ * Version 0.6.
+
+ * Change copyrights to `1985, 89, ...'.
+
+ * regex.h (REG_RE_MATCH_NULL_AT_END): new macro.
+ * xregex.c (re_compile_fastmap): initialize `can_be_null' to
+ `p==pend', instead of in the test at the top of the loop (as
+ it was, it was always being set).
+ Also, set `can_be_null'=1 if we would jump to the end of the
+ pattern in the `on_failure_jump' cases.
+ (re_search_2): check if `can_be_null' is 1, not nonzero. This
+ was the original test in rms' regex; why did we change this?
+
+ * xregex.c (re_compile_fastmap): rename `is_a_succeed_n' to
+ `succeed_n_p'.
+
+Sat May 30 08:09:08 1992 Karl Berry (karl@hayley)
+
+ * xregex.c (re_compile_pattern): declare `regnum' as `unsigned',
+ not `regnum_t', for the benefit of those patterns with more
+ than 255 groups.
+
+ * xregex.c: rename `failure_stack' to `fail_stack', for brevity;
+ likewise for `match_nothing' to `match_null'.
+
+ * regexinc.c (REGEX_REALLOCATE): take both the new and old
+ sizes, and copy only the old bytes.
+ * xregex.c (DOUBLE_FAILURE_STACK): pass both old and new.
+ * This change from Thorsten Ohl.
+
+Fri May 29 11:45:22 1992 Karl Berry (karl@hayley)
+
+ * regexinc.c (SIGN_EXTEND_CHAR): define as `(signed char) c'
+ instead of relying on __CHAR_UNSIGNED__, to work with
+ compilers other than GCC. From Per Bothner.
+
+ * main.c (main): change return type to `int'.
+
+Mon May 18 06:37:08 1992 Karl Berry (karl@hayley)
+
+ * regex.h (RE_SYNTAX_AWK): typo in RE_RE_UNMATCHED...
+
+Fri May 15 10:44:46 1992 Karl Berry (karl@hayley)
+
+ * Version 0.5.
+
+Sun May 3 13:54:00 1992 Karl Berry (karl@hayley)
+
+ * regex.h (struct re_pattern_buffer): now it's just `regs_allocated'.
+ (REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED): new constants.
+ * xregex.c (regexec, re_compile_pattern): set the field appropriately.
+ (re_match_2): and use it. bufp can't be const any more.
+
+Fri May 1 15:43:09 1992 Karl Berry (karl@hayley)
+
+ * regexinc.c: unconditionally include <sys/types.h>, first.
+
+ * regex.h (struct re_pattern_buffer): rename
+ `caller_allocated_regs' to `regs_allocated_p'.
+ * xregex.c (re_compile_pattern): same change here.
+ (regexec): and here.
+ (re_match_2): reallocate registers if necessary.
+
+Fri Apr 10 07:46:50 1992 Karl Berry (karl@hayley)
+
+ * regex.h (RE_SYNTAX{_POSIX,}_AWK): new definitions from Arnold.
+
+Sun Mar 15 07:34:30 1992 Karl Berry (karl at hayley)
+
+ * GNUmakefile (dist): versionize regex.{c,h,texinfo}.
+
+Tue Mar 10 07:05:38 1992 Karl Berry (karl at hayley)
+
+ * Version 0.4.
+
+ * xregex.c (PUSH_FAILURE_POINT): always increment the failure id.
+ (DEBUG_STATEMENT) [DEBUG]: execute the statement even if `debug'==0.
+
+ * xregex.c (pop_failure_point): if the saved string location is
+ null, keep the current value.
+ (re_match_2): at fail, test for a dummy failure point by
+ checking the restored pattern value, not string value.
+ (re_match_2): new case, `on_failure_keep_string_jump'.
+ (regex_compile): output this opcode in the .*\n case.
+ * regexinc.c (re_opcode_t): define the opcode.
+ (partial_compiled_pattern_pattern): add the new case.
+
+Mon Mar 9 09:09:27 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (regex_compile): optimize .*\n to output an
+ unconditional jump to the ., instead of pushing failure points
+ each time through the loop.
+
+ * xregex.c (DOUBLE_FAILURE_STACK): compute the maximum size
+ ourselves (and correctly); change callers.
+
+Sun Mar 8 17:07:46 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (failure_stack_elt_t): change to `const char *', to
+ avoid warnings.
+
+ * regex.h (re_set_syntax): declare this.
+
+ * xregex.c (pop_failure_point) [DEBUG]: conditionally pass the
+ original strings and sizes; change callers.
+
+Thu Mar 5 16:35:35 1992 Karl Berry (karl at claude.cs.umb.edu)
+
+ * xregex.c (regnum_t): new type for register/group numbers.
+ (compile_stack_elt_t, regex_compile): use it.
+
+ * xregex.c (regexec): declare len as `int' to match re_search.
+
+ * xregex.c (re_match_2): don't declare p1 twice.
+
+ * xregex.c: change `while (1)' to `for (;;)' to avoid silly
+ compiler warnings.
+
+ * regex.h [__STDC__]: use #if, not #ifdef.
+
+ * regexinc.c (REGEX_REALLOCATE): cast the result of alloca to
+ (char *), to avoid warnings.
+
+ * xregex.c (regerror): declare variable as const.
+
+ * xregex.c (re_compile_pattern, re_comp): define as returning a const
+ char *.
+ * regex.h (re_compile_pattern, re_comp): likewise.
+
+Thu Mar 5 15:57:56 1992 Karl Berry (karl@hal)
+
+ * xregex.c (regcomp): declare `syntax' as unsigned.
+
+ * xregex.c (re_match_2): try to avoid compiler warnings about
+ unsigned comparisons.
+
+ * GNUmakefile (test-xlc): new target.
+
+ * regex.h (reg_errcode_t): remove trailing comma from definition.
+ * regexinc.c (re_opcode_t): likewise.
+
+Thu Mar 5 06:56:07 1992 Karl Berry (karl at hayley)
+
+ * GNUmakefile (dist): add version numbers automatically.
+ (versionfiles): new variable.
+ (regex.{c,texinfo}): don't add version numbers here.
+ * regex.h: put in placeholder instead of the version number.
+
+Fri Feb 28 07:11:33 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (re_error_msg): declare const, since it is.
+
+Sun Feb 23 05:41:57 1992 Karl Berry (karl at fosse)
+
+ * xregex.c (PAT_PUSH{,_2,_3}, ...): cast args to avoid warnings.
+ (regex_compile, regexec): return REG_NOERROR, instead
+ of 0, on success.
+ (boolean): define as char, and #define false and true.
+ * regexinc.c (STREQ): cast the result.
+
+Sun Feb 23 07:45:38 1992 Karl Berry (karl at hayley)
+
+ * GNUmakefile (test-cc, test-hc, test-pcc): new targets.
+
+ * regex.inc (extract_number, extract_number_and_incr) [DEBUG]:
+ only define if we are debugging.
+
+ * xregex.c [_AIX]: do #pragma alloca first if necessary.
+ * regexinc.c [_AIX]: remove the #pragma from here.
+
+ * regex.h (reg_syntax_t): declare as unsigned, and redo the enum
+ as #define's again. Some compilers do stupid things with enums.
+
+Thu Feb 20 07:19:47 1992 Karl Berry (karl at hayley)
+
+ * Version 0.3.
+
+ * xregex.c, regex.h (newline_anchor_match_p): rename to
+ `newline_anchor'; dumb idea to change the name.
+
+Tue Feb 18 07:09:02 1992 Karl Berry (karl at hayley)
+
+ * regexinc.c: go back to original, i.e., don't include
+ <string.h> or define strchr.
+ * xregex.c (regexec): don't bother with adding characters after
+ newlines to the fastmap; instead, just don't use a fastmap.
+ * xregex.c (regcomp): set the buffer and fastmap fields to zero.
+
+ * xregex.texinfo (GNU r.e. compiling): have to initialize more
+ than two fields.
+
+ * regex.h (struct re_pattern_buffer): rename `newline_anchor' to
+ `newline_anchor_match_p', as we're back to two cases.
+ * xregex.c (regcomp, re_compile_pattern, re_comp): change
+ accordingly.
+ (re_match_2): at begline and endline, POSIX is not a special
+ case anymore; just check newline_anchor_match_p.
+
+Thu Feb 13 16:29:33 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (*empty_string*): rename to *null_string*, for brevity.
+
+Wed Feb 12 06:36:22 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (re_compile_fastmap): at endline, don't set fastmap['\n'].
+ (re_match_2): rewrite the begline/endline cases to take account
+ of the new field newline_anchor.
+
+Tue Feb 11 14:34:55 1992 Karl Berry (karl at hayley)
+
+ * regexinc.c [!USG etc.]: include <strings.h> and define strchr
+ as index.
+
+ * xregex.c (re_search_2): when searching backwards, declare `c'
+ as a char and use casts when using it as an array subscript.
+
+ * xregex.c (regcomp): if REG_NEWLINE, set
+ RE_HAT_LISTS_NOT_NEWLINE. Set the `newline_anchor' field
+ appropriately.
+ (regex_compile): compile [^...] as matching a \n according to
+ the syntax bit.
+ (regexec): if doing REG_NEWLINE stuff, compile a fastmap and add
+ characters after any \n's to the newline.
+ * regex.h (RE_HAT_LISTS_NOT_NEWLINE): new syntax bit.
+ (struct re_pattern_buffer): rename `posix_newline' to
+ `newline_anchor', define constants for its values.
+
+Mon Feb 10 07:22:50 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (re_compile_fastmap): combine the code at the top and
+ bottom of the loop, as it's essentially identical.
+
+Sun Feb 9 10:02:19 1992 Karl Berry (karl at hayley)
+
+ * xregex.texinfo (POSIX Translate Tables): remove this, as it
+ doesn't match the spec.
+
+ * xregex.c (re_compile_fastmap): if we finish off a path, go
+ back to the top (to set can_be_null) instead of returning
+ immediately.
+
+ * xregex.texinfo: changes from bob.
+
+Sat Feb 1 07:03:25 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (re_search_2): doc fix (from rms).
+
+Fri Jan 31 09:52:04 1992 Karl Berry (karl at hayley)
+
+ * xregex.texinfo (GNU Searching): clarify the range arg.
+
+ * xregex.c (re_match_2, at_endline_op_p): add extra parens to
+ get rid of GCC 2's (silly, IMHO) warning about && within ||.
+
+ * xregex.c (common_op_match_empty_string_p): use
+ MATCH_NOTHING_UNSET_VALUE, not -1.
+
+Thu Jan 16 08:43:02 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (SET_REGS_MATCHED): only set the registers from
+ lowest to highest.
+
+ * regexinc.c (MIN): new macro.
+ * xregex.c (re_match_2): only check min (num_regs,
+ regs->num_regs) when we set the returned regs.
+
+ * xregex.c (re_match_2): set registers after the first
+ num_regs to -1 before we return.
+
+Tue Jan 14 16:01:42 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (re_match_2): initialize max (RE_NREGS, re_nsub + 1)
+ registers (from rms).
+
+ * xregex.c, regex.h: don't abbreviate `19xx' to `xx'.
+
+ * regexinc.c [!emacs]: include <sys/types.h> before <unistd.h>.
+ (from ro@thp.Uni-Koeln.DE).
+
+Thu Jan 9 07:23:00 1992 Karl Berry (karl at hayley)
+
+ * xregex.c (*unmatchable): rename to `match_empty_string_p'.
+ (CAN_MATCH_NOTHING): rename to `REG_MATCH_EMPTY_STRING_P'.
+
+ * regexinc.c (malloc, realloc): remove prototypes, as they can
+ cause clashes (from rms).
+
+Mon Jan 6 12:43:24 1992 Karl Berry (karl at claude.cs.umb.edu)
+
+ * Version 0.2.
+
+Sun Jan 5 10:50:38 1992 Karl Berry (karl at hayley)
+
+ * xregex.texinfo: bring more or less up-to-date.
+ * GNUmakefile (regex.texinfo): generate from regex.h and
+ xregex.texinfo.
+ * include.awk: new file.
+
+ * xregex.c: change all calls to the fn extract_number_and_incr
+ to the macro.
+
+ * xregex.c (re_match_2) [emacs]: in at_dot, use PTR_CHAR_POS + 1,
+ instead of bf_* and sl_*. Cast d to unsigned char *, to match
+ the declaration in Emacs' buffer.h.
+ [emacs19]: in before_dot, at_dot, and after_dot, likewise.
+
+ * regexinc.c: unconditionally include <sys/types.h>.
+
+ * regexinc.c (alloca) [!alloca]: Emacs config files sometimes
+ define this, so don't define it if it's already defined.
+
+Sun Jan 5 06:06:53 1992 Karl Berry (karl at fosse)
+
+ * xregex.c (re_comp): fix type conflicts with regex_compile (we
+ haven't been compiling this).
+
+ * regexinc.c (SIGN_EXTEND_CHAR): use `__CHAR_UNSIGNED__', not
+ `CHAR_UNSIGNED'.
+
+ * regexinc.c (NULL) [!NULL]: define it (as zero).
+
+ * regexinc.c (extract_number): remove the temporaries.
+
+Sun Jan 5 07:50:14 1992 Karl Berry (karl at hayley)
+
+ * regex.h (regerror) [!__STDC__]: return a size_t, not a size_t *.
+
+ * xregex.c (PUSH_FAILURE_POINT, ...): declare `destination' as
+ `char *' instead of `void *', to match alloca declaration.
+
+ * xregex.c (regerror): use `size_t' for the intermediate values
+ as well as the return type.
+
+ * xregex.c (regexec): cast the result of malloc.
+
+ * xregex.c (regexec): don't initialize `private_preg' in the
+ declaration, as old C compilers can't do that.
+
+ * xregex.c (main) [test]: declare printchar void.
+
+ * xregex.c (assert) [!DEBUG]: define this to do nothing, and
+ remove #ifdef DEBUG's from around asserts.
+
+ * xregex.c (re_match_2): remove error message when not debugging.
+
+Sat Jan 4 09:45:29 1992 Karl Berry (karl at hayley)
+
+ * other.c: test the bizarre duplicate case in re_compile_fastmap
+ that I just noticed.
+
+ * test.c (general_test): don't test registers beyond the end of
+ correct_regs, as well as regs.
+
+ * xregex.c (regex_compile): at handle_close, don't assign to
+ *inner_group_loc if we didn't push a start_memory (because the
+ group number was too big). In fact, don't push or pop the
+ inner_group_offset in that case.
+
+ * regex.c: rename to xregex.c, since it's not the whole thing.
+ * regex.texinfo: likewise.
+ * GNUmakefile: change to match.
+
+ * regex.c [DEBUG]: only include <stdio.h> if debugging.
+
+ * regexinc.c (SIGN_EXTEND_CHAR) [CHAR_UNSIGNED]: if it's already
+ defined, don't redefine it.
+
+ * regex.c: define _GNU_SOURCE at the beginning.
+ * regexinc.c (isblank) [!isblank]: define it.
+ (isgraph) [!isgraph]: change conditional to this, and remove the
+ sequent stuff.
+
+ * regex.c (regex_compile): add `blank' character class.
+
+ * regex.c (regex_compile): don't use a uchar variable to loop
+ through all characters.
+
+ * regex.c (regex_compile): at '[', improve logic for checking
+ that we have enough space for the charset.
+
+ * regex.h (struct re_pattern_buffer): declare translate as char
+ * again. We only use it as an array subscript once, I think.
+
+ * regex.c (TRANSLATE): new macro to cast the data character
+ before subscripting.
+ (num_internal_regs): rename to `num_regs'.
+
+Fri Jan 3 07:58:01 1992 Karl Berry (karl at hayley)
+
+ * regex.h (struct re_pattern_buffer): declare `allocated' and
+ `used' as unsigned long, since these are never negative.
+
+ * regex.c (compile_stack_element): rename to compile_stack_elt_t.
+ (failure_stack_element): similarly.
+
+ * regexinc.c (TALLOC, RETALLOC): new macros to simplify
+ allocation of arrays.
+
+ * regex.h (re_*) [__STDC__]: don't declare string args unsigned
+ char *; that makes them incompatible with string constants.
+ (struct re_pattern_buffer): declare the pattern and translate
+ table as unsigned char *.
+ * regex.c (most routines): use unsigned char vs. char consistently.
+
+ * regex.h (re_compile_pattern): do not declare the length arg as
+ const.
+ * regex.c (re_compile_pattern): likewise.
+
+ * regex.c (POINTER_TO_REG): rename to `POINTER_TO_OFFSET'.
+
+ * regex.h (re_registers): declare `start' and `end' as
+ `regoff_t', instead of `int'.
+
+ * regex.c (regexec): if either of the malloc's for the register
+ information fail, return failure.
+
+ * regex.h (RE_NREGS): define this again, as 30 (from jla).
+ (RE_ALLOCATE_REGISTERS): remove this.
+ (RE_SYNTAX_*): remove it from definitions.
+ (re_pattern_buffer): remove `return_default_num_regs', add
+ `caller_allocated_regs'.
+ * regex.c (re_compile_pattern): clear no_sub and
+ caller_allocated_regs in the pattern.
+ (regcomp): set caller_allocated_regs.
+ (re_match_2): do all register allocation at the end of the
+ match; implement new semantics.
+
+ * regex.c (MAX_REGNUM): new macro.
+ (regex_compile): at handle_open and handle_close, if the group
+ number is too large, don't push the start/stop memory.
+
+Thu Jan 2 07:56:10 1992 Karl Berry (karl at hayley)
+
+ * regex.c (re_match_2): if the back reference is to a group that
+ never matched, then goto fail, not really_fail. Also, don't
+ test if the pattern can match the empty string. Why did we
+ ever do that?
+ (really_fail): this label no longer needed.
+
+ * regexinc.c [STDC_HEADERS]: use only this to test if we should
+ include <stdlib.h>.
+
+ * regex.c (DO_RANGE, regex_compile): translate in all cases
+ except the single character after a \.
+
+ * regex.h (RE_AWK_CLASS_HACK): rename to
+ RE_BACKSLASH_ESCAPE_IN_LISTS.
+ * regex.c (regex_compile): change use.
+
+ * regex.c (re_compile_fastmap): do not translate the characters
+ again; we already translated them at compilation. (From ylo@ngs.fi.)
+
+ * regex.c (re_match_2): in case for at_dot, invert sense of
+ comparison and find the character number properly. (From
+ worley@compass.com.)
+ (re_match_2) [emacs]: remove the cases for before_dot and
+ after_dot, since there's no way to specify them, and the code
+ is wrong (judging from this change).
+
+Wed Jan 1 09:13:38 1992 Karl Berry (karl at hayley)
+
+ * psx-{interf,basic,extend}.c, other.c: set `t' as the first
+ thing, so that if we run them in sucession, general_test's
+ kludge to see if we're doing POSIX tests works.
+
+ * test.h (test_type): add `all_test'.
+ * main.c: add case for `all_test'.
+
+ * regexinc.c (partial_compiled_pattern_printer,
+ double_string_printer): don't print anything if we're passed null.
+
+ * regex.c (PUSH_FAILURE_POINT): do not scan for the highest and
+ lowest active registers.
+ (re_match_2): compute lowest/highest active regs at start_memory and
+ stop_memory.
+ (NO_{LOW,HIGH}EST_ACTIVE_REG): new sentinel values.
+ (pop_failure_point): return the lowest/highest active reg values
+ popped; change calls.
+
+ * regex.c [DEBUG]: include <assert.h>.
+ (various routines) [DEBUG]: change conditionals to assertions.
+
+ * regex.c (DEBUG_STATEMENT): new macro.
+ (PUSH_FAILURE_POINT): use it to increment num_regs_pushed.
+ (re_match_2) [DEBUG]: only declare num_regs_pushed if DEBUG.
+
+ * regex.c (*can_match_nothing): rename to *unmatchable.
+
+ * regex.c (re_match_2): at stop_memory, adjust argument reading.
+
+ * regex.h (re_pattern_buffer): declare `can_be_null' as a 2-bit
+ bit field.
+
+ * regex.h (re_pattern_buffer): declare `buffer' unsigned char *;
+ no, dumb idea. The pattern can have signed number.
+
+ * regex.c (re_match_2): in maybe_pop_jump case, skip over the
+ right number of args to the group operators, and don't do
+ anything with endline if posix_newline is not set.
+
+ * regex.c, regexinc.c (all the things we just changed): go back
+ to putting the inner group count after the start_memory,
+ because we need it in the on_failure_jump case in re_match_2.
+ But leave it after the stop_memory also, since we need it
+ there in re_match_2, and we don't have any way of getting back
+ to the start_memory.
+
+ * regexinc.c (partial_compiled_pattern_printer): adjust argument
+ reading for start/stop_memory.
+ * regex.c (re_compile_fastmap, group_can_match_nothing): likewise.
+
+Tue Dec 31 10:15:08 1991 Karl Berry (karl at hayley)
+
+ * regex.c (bits list routines): remove these.
+ (re_match_2): get the number of inner groups from the pattern,
+ instead of keeping track of it at start and stop_memory.
+ Put the count after the stop_memory, not after the
+ start_memory.
+ (compile_stack_element): remove `fixup_inner_group' member,
+ since we now put it in when we can compute it.
+ (regex_compile): at handle_open, don't push the inner group
+ offset, and at handle_close, don't pop it.
+
+ * regex.c (level routines): remove these, and their uses in
+ regex_compile. This was another manifestation of having to find
+ $'s that were endlines.
+
+ * regex.c (regexec): this does searching, not matching (a
+ well-disguised part of the standard). So rewrite to use
+ `re_search' instead of `re_match'.
+ * psx-interf.c (test_regexec): add tests to, uh, match.
+
+ * regex.h (RE_TIGHT_ALT): remove this; nobody uses it.
+ * regex.c: remove the code that was supposed to implement it.
+
+ * other.c (test_others): ^ and $ never match newline characters;
+ RE_CONTEXT_INVALID_OPS doesn't affect anchors.
+
+ * psx-interf.c (test_regerror): update for new error messages.
+
+ * psx-extend.c: it's now ok to have an alternative be just a $,
+ so remove all the tests which supposed that was invalid.
+
+Wed Dec 25 09:00:05 1991 Karl Berry (karl at hayley)
+
+ * regex.c (regex_compile): in handle_open, don't skip over ^ and
+ $ when checking for an empty group. POSIX has changed the
+ grammar.
+ * psx-extend.c (test_posix_extended): thus, move (^$) tests to
+ valid section.
+
+ * regexinc.c (boolean): move from here to test.h and regex.c.
+ * test files: declare verbose, omit_register_tests, and
+ test_should_match as boolean.
+
+ * psx-interf.c (test_posix_c_interface): remove the `c_'.
+ * main.c: likewise.
+
+ * psx-basic.c (test_posix_basic): ^ ($) is an anchor after
+ (before) an open (close) group.
+
+ * regex.c (re_match_2): in endline, correct precedence of
+ posix_newline condition.
+
+Tue Dec 24 06:45:11 1991 Karl Berry (karl at hayley)
+
+ * test.h: incorporate private-tst.h.
+ * test files: include test.h, not private-tst.h.
+
+ * test.c (general_test): set posix_newline to zero if we are
+ doing POSIX tests (unfortunately, it's difficult to call
+ regcomp in this case, which is what we should really be doing).
+
+ * regex.h (reg_syntax_t): make this an enumeration type which
+ defines the syntax bits; renames re_syntax_t.
+
+ * regex.c (at_endline_op_p): don't preincrement p; then if it's
+ not an empty string op, we lose.
+
+ * regex.h (reg_errcode_t): new enumeration type of the error
+ codes.
+ * regex.c (regex_compile): return that type.
+
+ * regex.c (regex_compile): in [, initialize
+ just_had_a_char_class to false; somehow I had changed this to
+ true.
+
+ * regex.h (RE_NO_CONSECUTIVE_REPEATS): remove this, since we
+ don't use it, and POSIX doesn't require this behavior anymore.
+ * regex.c (regex_compile): remove it from here.
+
+ * regex.c (regex_compile): remove the no_op insertions for
+ verify_and_adjust_endlines, since that doesn't exist anymore.
+
+ * regex.c (regex_compile) [DEBUG]: use printchar to print the
+ pattern, so unprintable bytes will print properly.
+
+ * regex.c: move re_error_msg back.
+ * test.c (general_test): print the compile error if the pattern
+ was invalid.
+
+Mon Dec 23 08:54:53 1991 Karl Berry (karl at hayley)
+
+ * regexinc.c: move re_error_msg here.
+
+ * regex.c (re_error_msg): the ``message'' for success must be
+ NULL, to keep the interface to re_compile_pattern the same.
+ (regerror): if the msg is null, use "Success".
+
+ * rename most test files for consistency. Change Makefile
+ correspondingly.
+
+ * test.c (most routines): add casts to (unsigned char *) when we
+ call re_{match,search}{,_2}.
+
+Sun Dec 22 09:26:06 1991 Karl Berry (karl at hayley)
+
+ * regex.c (re_match_2): declare string args as unsigned char *
+ again; don't declare non-pointer args const; declare the
+ pattern buffer const.
+ (re_match): likewise.
+ (re_search_2, re_search): likewise, except don't declare the
+ pattern const, since we make a fastmap.
+ * regex.h [__STDC__]: change prototypes.
+
+ * regex.c (regex_compile): return an error code, not a string.
+ (re_err_list): new table to map from error codes to string.
+ (re_compile_pattern): return an element of re_err_list.
+ (regcomp): don't test all the strings.
+ (regerror): just use the list.
+ (put_in_buffer): remove this.
+
+ * regex.c (equivalent_failure_points): remove this.
+
+ * regex.c (re_match_2): don't copy the string arguments into
+ non-const pointers. We never alter the data.
+
+ * regex.c (re_match_2): move assignment to `is_a_jump_n' out of
+ the main loop. Just initialize it right before we do
+ something with it.
+
+ * regex.[ch] (re_match_2): don't declare the int parameters const.
+
+Sat Dec 21 08:52:20 1991 Karl Berry (karl at hayley)
+
+ * regex.h (re_syntax_t): new type; declare to be unsigned
+ (previously we used int, but since we do bit operations on
+ this, unsigned is better, according to H&S).
+ (obscure_syntax, re_pattern_buffer): use that type.
+ * regex.c (re_set_syntax, regex_compile): likewise.
+
+ * regex.h (re_pattern_buffer): new field `posix_newline'.
+ * regex.c (re_comp, re_compile_pattern): set to zero.
+ (regcomp): set to REG_NEWLINE.
+ * regex.h (RE_HAT_LISTS_NOT_NEWLINE): remove this (we can just
+ check `posix_newline' instead.)
+
+ * regex.c (op_list_type, op_list, add_op): remove these.
+ (verify_and_adjust_endlines): remove this.
+ (pattern_offset_list_type, *pattern_offset* routines): and these.
+ These things all implemented the nonleading/nontrailing position
+ code, which was very long, had a few remaining problems, and
+ is no longer needed. So...
+
+ * regexinc.c (STREQ): new macro to abbreviate strcmp(,)==0, for
+ brevity. Change various places in regex.c to use it.
+
+ * regex{,inc}.c (enum regexpcode): change to a typedef
+ re_opcode_t, for brevity.
+
+ * regex.h (re_syntax_table) [SYNTAX_TABLE]: remove this; it
+ should only be in regex.c, I think, since we don't define it
+ in this case. Maybe it should be conditional on !SYNTAX_TABLE?
+
+ * regexinc.c (partial_compiled_pattern_printer): simplify and
+ distinguish the emacs/not-emacs (not)wordchar cases.
+
+Fri Dec 20 08:11:38 1991 Karl Berry (karl at hayley)
+
+ * regexinc.c (regexpcode) [emacs]: only define the Emacs opcodes
+ if we are ifdef emacs.
+
+ * regex.c (BUF_PUSH*): rename to PAT_PUSH*.
+
+ * regex.c (regex_compile): in $ case, go back to essentially the
+ original code for deciding endline op vs. normal char.
+ (at_endline_op_p): new routine.
+ * regex.h (RE_ANCHORS_ONLY_AT_ENDS, RE_CONTEXT_INVALID_ANCHORS,
+ RE_REPEATED_ANCHORS_AWAY, RE_NO_ANCHOR_AT_NEWLINE): remove
+ these. POSIX has simplified the rules for anchors in draft
+ 11.2.
+ (RE_NEWLINE_ORDINARY): new syntax bit.
+ (RE_CONTEXT_INDEP_ANCHORS): change description to be compatible
+ with POSIX.
+ * regex.texinfo (Syntax Bits): remove the descriptions.
+
+Mon Dec 16 08:12:40 1991 Karl Berry (karl at hayley)
+
+ * regex.c (re_match_2): in jump_past_next_alt, unconditionally
+ goto no_pop. The only register we were finding was one which
+ enclosed the whole alternative expression, not one around an
+ individual alternative. So we were never doing what we
+ thought we were doing, and this way makes (|a) against the
+ empty string fail.
+
+ * regex.c (regex_compile): remove `highest_ever_regnum', and
+ don't restore regnum from the stack; just put it into a
+ temporary to put into the stop_memory. Otherwise, groups
+ aren't numbered consecutively.
+
+ * regex.c (is_in_compile_stack): rename to
+ `group_in_compile_stack'; remove unnecessary test for the
+ stack being empty.
+
+ * regex.c (re_match_2): in on_failure_jump, skip no_op's before
+ checking for the start_memory, in case we were called from
+ succeed_n.
+
+Sun Dec 15 16:20:48 1991 Karl Berry (karl at hayley)
+
+ * regex.c (regex_compile): in duplicate case, use
+ highest_ever_regnum instead of regnum, since the latter is
+ reverted at stop_memory.
+
+ * regex.c (re_match_2): in on_failure_jump, if the * applied to
+ a group, save the information for that group and all inner
+ groups (by making it active), even though we're not inside it
+ yet.
+
+Sat Dec 14 09:50:59 1991 Karl Berry (karl at hayley)
+
+ * regex.c (PUSH_FAILURE_ITEM, POP_FAILURE_ITEM): new macros.
+ Use them instead of copying the stack manipulating a zillion
+ times.
+
+ * regex.c (PUSH_FAILURE_POINT, pop_failure_point) [DEBUG]: save
+ and restore a unique identification value for each failure point.
+
+ * regexinc.c (partial_compiled_pattern_printer): don't print an
+ extra / after duplicate commands.
+
+ * regex.c (regex_compile): in back-reference case, allow a back
+ reference to register `regnum'. Otherwise, even `\(\)\1'
+ fails, since regnum is 1 at the back-reference.
+
+ * regex.c (re_match_2): in fail, don't examine the pattern if we
+ restored to pend.
+
+ * test_private.h: rename to private_tst.h. Change includes.
+
+ * regex.c (extend_bits_list): compute existing size for realloc
+ in bytes, not blocks.
+
+ * regex.c (re_match_2): in jump_past_next_alt, the for loop was
+ missing its (empty) statement. Even so, some register tests
+ still fail, although in a different way than in the previous change.
+
+Fri Dec 13 15:55:08 1991 Karl Berry (karl at hayley)
+
+ * regex.c (re_match_2): in jump_past_next_alt, unconditionally
+ goto no_pop, since we weren't properly detecting if the
+ alternative matched something anyway. No, we need to not jump
+ to keep the register values correct; just change to not look at
+ register zero and not test RE_NO_EMPTY_ALTS (which is a
+ compile-time thing).
+
+ * regex.c (SET_REGS_MATCHED): start the loop at 1, since we never
+ care about register zero until the very end. (I think.)
+
+ * regex.c (PUSH_FAILURE_POINT, pop_failure_point): go back to
+ pushing and popping the active registers, instead of only doing
+ the registers before a group: (fooq|fo|o)*qbar against fooqbar
+ fails, since we restore back into the middle of group 1, yet it
+ isn't active, because the previous restore clobbered the active flag.
+
+Thu Dec 12 17:25:36 1991 Karl Berry (karl at hayley)
+
+ * regex.c (PUSH_FAILURE_POINT): do not call
+ `equivalent_failure_points' after all; it causes the registers
+ to be ``wrong'' (according to POSIX), and an infinite loop on
+ `((a*)*)*' against `ab'.
+
+ * regex.c (re_compile_fastmap): don't push `pend' on the failure
+ stack.
+
+Tue Dec 10 10:30:03 1991 Karl Berry (karl at hayley)
+
+ * regex.c (PUSH_FAILURE_POINT): if pushing same failure point that
+ is on the top of the stack, fail.
+ (equivalent_failure_points): new routine.
+
+ * regex.c (re_match_2): add debug statements for every opcode we
+ execute.
+
+ * regex.c (regex_compile/handle_close): restore
+ `fixup_inner_group_count' and `regnum' from the stack.
+
+Mon Dec 9 13:51:15 1991 Karl Berry (karl at hayley)
+
+ * regex.c (PUSH_FAILURE_POINT): declare `this_reg' as int, so
+ unsigned arithmetic doesn't happen when we don't want to save
+ the registers.
+
+Tue Dec 3 08:11:10 1991 Karl Berry (karl at hayley)
+
+ * regex.c (extend_bits_list): divide size by bits/block.
+
+ * regex.c (init_bits_list): remove redundant assignmen to
+ `bits_list_ptr'.
+
+ * regexinc.c (partial_compiled_pattern_printer): don't do *p++
+ twice in the same expr.
+
+ * regex.c (re_match_2): at on_failure_jump, use the correct
+ pattern positions for getting the stuff following the start_memory.
+
+ * regex.c (struct register_info): remove the bits_list for the
+ inner groups; make that a separate variable.
+
+Mon Dec 2 10:42:07 1991 Karl Berry (karl at hayley)
+
+ * regex.c (PUSH_FAILURE_POINT): don't pass `failure_stack' as an
+ arg; change callers.
+
+ * regex.c (PUSH_FAILURE_POINT): print items in order they are
+ pushed.
+ (pop_failure_point): likewise.
+
+ * regex.c (main): prompt for the pattern and string.
+
+ * regex.c (FREE_VARIABLES) [!REGEX_MALLOC]: declare as nothing;
+ remove #ifdefs from around calls.
+
+ * regex.c (extract_number, extract_number_and_incr): declare static.
+
+ * regex.c: remove the canned main program.
+ * main.c: new file.
+ * Makefile (COMMON): add main.o.
+
+Tue Sep 24 06:26:51 1991 Kathy Hargreaves (kathy at fosse)
+
+ * regex.c (re_match_2): Made `pend' and `dend' not register variables.
+ Only set string2 to string1 if string1 isn't null.
+ Send address of p, d, regstart, regend, and reg_info to
+ pop_failure_point.
+ Put in more debug statements.
+
+ * regex.c [debug]: Added global variable.
+ (DEBUG_*PRINT*): Only print if `debug' is true.
+ (DEBUG_DOUBLE_STRING_PRINTER): Changed DEBUG_STRING_PRINTER's
+ name to this.
+ Changed some comments.
+ (PUSH_FAILURE_POINT): Moved and added some debugging statements.
+ Was saving regstart on the stack twice instead of saving both
+ regstart and regend; remedied this.
+ [NUM_REGS_ITEMS]: Changed from 3 to 4, as now save lowest and
+ highest active registers instead of highest used one.
+ [NUM_NON_REG_ITEMS]: Changed name of NUM_OTHER_ITEMS to this.
+ (NUM_FAILURE_ITEMS): Use active registers instead of number 0
+ through highest used one.
+ (re_match_2): Have pop_failure_point put things in the variables.
+ (pop_failure_point): Have it do what the fail case in re_match_2
+ did with the failure stack, instead of throwing away the stuff
+ popped off. re_match_2 can ignore results when it doesn't
+ need them.
+
+
+Thu Sep 5 13:23:28 1991 Kathy Hargreaves (kathy at fosse)
+
+ * regex.c (banner): Changed copyright years to be separate.
+
+ * regex.c [CHAR_UNSIGNED]: Put __ at both ends of this name.
+ [DEBUG, debug_count, *debug_p, DEBUG_PRINT_1, DEBUG_PRINT_2,
+ DEBUG_COMPILED_PATTERN_PRINTER ,DEBUG_STRING_PRINTER]:
+ defined these for debugging.
+ (extract_number): Added this (debuggable) routine version of
+ the macro EXTRACT_NUMBER. Ditto for EXTRACT_NUMBER_AND_INCR.
+ (re_compile_pattern): Set return_default_num_regs if the
+ syntax bit RE_ALLOCATE_REGISTERS is set.
+ [REGEX_MALLOC]: Renamed USE_ALLOCA to this.
+ (BUF_POP): Got rid of this, as don't ever use it.
+ (regex_compile): Made the type of `pattern' not be register.
+ If DEBUG, print the pattern to compile.
+ (re_match_2): If had a `$' in the pattern before a `^' then
+ don't record the `^' as an anchor.
+ Put (enum regexpcode) before references to b, as suggested
+ [RE_NO_BK_BRACES]: Changed RE_NO_BK_CURLY_BRACES to this.
+ (remove_pattern_offset): Removed this unused routine.
+ (PUSH_FAILURE_POINT): Changed to only save active registers.
+ Put in debugging statements.
+ (re_compile_fastmap): Made `pattern' not a register variable.
+ Use routine for extracting numbers instead of macro.
+ (re_match_2): Made `p', `mcnt' and `mcnt2' not register variables.
+ Added `num_regs_pushed' for debugging.
+ Only malloc registers if the syntax bit RE_ALLOCATE_REGISTERS is set.
+ Put in debug statements.
+ Put the macro NOTE_INNER_GROUP's code inline, as it was the
+ only called in one place.
+ For debugging, extract numbers using routines instead of macros.
+ In case fail: only restore pushed active registers, and added
+ debugging statements.
+ (pop_failure_point): Test for underfull stack.
+ (group_can_match_nothing, common_op_can_match_nothing): For
+ debugging, extract numbers using routines instead of macros.
+ (regexec): Changed formal parameters to not be prototypes.
+ Don't initialize `regs' or `private_preg' in their declarations.
+
+Tue Jul 23 18:38:36 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h [RE_CONTEX_INDEP_OPS]: Moved the anchor stuff out of
+ this bit.
+ [RE_UNMATCHED_RIGHT_PAREN_ORD]: Defined this bit.
+ [RE_CONTEXT_INVALID_ANCHORS]: Defined this bit.
+ [RE_CONTEXT_INDEP_ANCHORS]: Defined this bit.
+ Added RE_CONTEXT_INDEP_ANCHORS to all syntaxes which had
+ RE_CONTEXT_INDEP_OPS.
+ Took RE_ANCHORS_ONLY_AT_ENDS out of the POSIX basic syntax.
+ Added RE_UNMATCHED_RIGHT_PAREN_ORD to the POSIX extended
+ syntax.
+ Took RE_REPEATED_ANCHORS_AWAY out of the POSIX extended syntax.
+ Defined REG_NOERROR (which will probably have to go away again).
+ Changed the type `off_t' to `regoff_t'.
+
+ * regex.c: Changed some commments.
+ (regex_compile): Added variable `had_an_endline' to keep track
+ of if hit a `$' since the beginning of the pattern or the last
+ alternative (if any).
+ Changed RE_CONTEXT_INVALID_OPS and RE_CONTEXT_INDEP_OPS to
+ RE_CONTEXT_INVALID_ANCHORS and RE_CONTEXT_INDEP_ANCHORS where
+ appropriate.
+ Put a `no_op' in the pattern if a repeat is only zero or one
+ times; in this case and if it is many times (whereupon a jump
+ backwards is pushed instead), keep track of the operator for
+ verify_and_adjust_endlines.
+ If RE_UNMATCHED_RIGHT_PAREN is set, make an unmatched
+ close-group operator match `)'.
+ Changed all error exits to exit (1).
+ (remove_pattern_offset): Added this routine, but don't use it.
+ (verify_and_adjust_endlines): At top of routine, if initialize
+ routines run out of memory, return true after setting
+ enough_memory false.
+ At end of endline, et al. case, don't set *p to no_op.
+ Repetition operators also set the level and active groups'
+ match statuses, unless RE_REPEATED_ANCHORS_AWAY is set.
+ (get_group_match_status): Put a return in front of call to get_bit.
+ (re_compile_fastmap): Changed is_a_succeed_n to a boolean.
+ If at end of pattern, then if the failure stack isn't empty,
+ go back to the failure point.
+ In *jump* case, only pop the stack if what's on top of it is
+ where we've just jumped to.
+ (re_search_2): Return -2 instead of val if val is -2.
+ (group_can_match_nothing, alternative_can_match_nothing,
+ common_op_can-match_nothing): Now pass in reg_info for the
+ `duplicate' case.
+ (re_match_2): Don't skip over the next alternative also if
+ empty alternatives aren't allowed.
+ In fail case, if failed to a backwards jump that's part of a
+ repetition loop, pop the current failure point and use the
+ next one.
+ (pop_failure_point): Check that there's as many register items
+ on the failure stack as the stack says there are.
+ (common_op_can_match_nothing): Added variables `ret' and
+ `reg_no' so can set reg_info for the group encountered.
+ Also break without doing anything if hit a no_op or the other
+ kinds of `endline's.
+ If not done already, set reg_info in start_memory case.
+ Put in no_pop_jump for an optimized succeed_n of zero repetitions.
+ In succeed_n case, if the number isn't zero, then return false.
+ Added `duplicate' case.
+
+Sat Jul 13 11:27:38 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (REG_NOERROR): Added this error code definition.
+
+ * regex.c: Took some redundant parens out of macros.
+ (enum regexpcode): Added jump_past_next_alt.
+ Wrapped some macros in `do..while (0)'.
+ Changed some comments.
+ (regex_compile): Use `fixup_alt_jump' instead of `fixup_jump'.
+ Use `maybe_pop_jump' instead of `maybe_pop_failure_jump'.
+ Use `jump_past_next_alt' instead of `no_pop_jump' when at the
+ end of an alternative.
+ (re_match_2): Used REGEX_ALLOCATE for the registers stuff.
+ In stop_memory case: Add more boolean tests to see if the
+ group is in a loop.
+ Added jump_past_next_alt case, which doesn't jump over the
+ next alternative if the last one didn't match anything.
+ Unfortunately, to make this work with, e.g., `(a+?*|b)*'
+ against `bb', I also had to pop the alternative's failure
+ point, which in turn broke backtracking!
+ In fail case: Detect a dummy failure point by looking at
+ failure_stack.avail - 2, not stack[-2].
+ (pop_failure_point): Only pop if the stack isn't empty; don't
+ give an error if it is. (Not sure yet this is correct.)
+ (group_can_match_nothing): Make it return a boolean instead of int.
+ Make it take an argument indicating the end of where it should look.
+ If find a group that can match nothing, set the pointer
+ argument to past the group in the pattern.
+ Took out cases which can share with alternative_can_match_nothing
+ and call common_op_can_match_nothing.
+ Took ++ out of switch, so could call common_op_can_match_nothing.
+ Wrote lots more for on_failure_jump case to handle alternatives.
+ Main loop now doesn't look for matching stop_memory, but
+ rather the argument END; return true if hit the matching
+ stop_memory; this way can call itself for inner groups.
+ (alternative_can_match_nothing): Added for alternatives.
+ (common_op_can_match_nothing): Added for previous two routines'
+ common operators.
+ (regerror): Returns a message saying there's no error if gets
+ sent REG_NOERROR.
+
+Wed Jul 3 10:43:15 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c: Removed unnecessary enclosing parens from several macros.
+ Put `do..while (0)' around a few.
+ Corrected some comments.
+ (INIT_FAILURE_STACK_SIZE): Deleted in favor of using
+ INIT_FAILURE_ALLOC.
+ (INIT_FAILURE_STACK, DOUBLE_FAILURE_STACK, PUSH_PATTERN_OP,
+ PUSH_FAILURE_POINT): Made routines of the same name (but with all
+ lowercase letters) into these macros, so could use `alloca'
+ when USE_ALLOCA is defined. The reason is stated below for
+ bits lists. Deleted analogous routines.
+ (re_compile_fastmap): Added variable void *destination for
+ PUSH_PATTERN_OP.
+ (re_match_2): Added variable void *destination for REGEX_REALLOCATE.
+ Used the failure stack macros in place of the routines.
+ Detected a dummy failure point by inspecting the failure stack's
+ (avail - 2)th element, not failure_stack.stack[-2]. This bug
+ arose when used the failure stack macros instead of the routines.
+
+ * regex.c [USE_ALLOCA]: Put this conditional around previous
+ alloca stuff and defined these to work differently depending
+ on whether or not USE_ALLOCA is defined:
+ (REGEX_ALLOCATE): Uses either `alloca' or `malloc'.
+ (REGEX_REALLOCATE): Uses either `alloca' or `realloc'.
+ (INIT_BITS_LIST, EXTEND_BITS_LIST, SET_BIT_TO_VALUE): Defined
+ macro versions of routines with the same name (only with all
+ lowercase letters) so could use `alloc' in re_match_2. This
+ is to prevent core leaks when C-g is used in Emacs and to make
+ things faster and avoid storage fragmentation. These things
+ have to be macros because the results of `alloca' go away with
+ the routine by which it's called.
+ (BITS_BLOCK_SIZE, BITS_BLOCK, BITS_MASK): Moved to above the
+ above-mentioned macros instead of before the routines defined
+ below regex_compile.
+ (set_bit_to_value): Compacted some code.
+ (reg_info_type): Changed inner_groups field to be bits_list_type
+ so could be arbitrarily long and thus handle arbitrary nesting.
+ (NOTE_INNER_GROUP): Put `do...while (0)' around it so could
+ use as a statement.
+ Changed code to use bits lists.
+ Added variable void *destination for REGEX_REALLOCATE (whose call
+ is several levels in).
+ Changed variable name of `this_bit' to `this_reg'.
+ (FREE_VARIABLES): Only define and use if USE_ALLOCA is defined.
+ (re_match_2): Use REGEX_ALLOCATE instead of malloc.
+ Instead of setting INNER_GROUPS of reg_info to zero, have to
+ use INIT_BITS_LIST and return -2 (and free variables if
+ USE_ALLOCA isn't defined) if it fails.
+
+Fri Jun 28 13:45:07 1991 Karl Berry (karl at hayley)
+
+ * regex.c (re_match_2): set value of `dend' when we restore `d'.
+
+ * regex.c: remove declaration of alloca.
+
+ * regex.c (MISSING_ISGRAPH): rename to `ISGRAPH_MISSING'.
+
+ * regex.h [_POSIX_SOURCE]: remove these conditionals; always
+ define POSIX stuff.
+ * regex.c (_POSIX_SOURCE): change conditionals to use `POSIX'
+ instead.
+
+Sat Jun 1 16:56:50 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.*: Changed RE_CONTEXTUAL_* to RE_CONTEXT_*,
+ RE_TIGHT_VBAR to RE_TIGHT_ALT, RE_NEWLINE_OR to
+ RE_NEWLINE_ALT, and RE_DOT_MATCHES_NEWLINE to RE_DOT_NEWLINE.
+
+Wed May 29 09:24:11 1991 Karl Berry (karl at hayley)
+
+ * regex.texinfo (POSIX Pattern Buffers): cross-reference the
+ correct node name (Match-beginning-of-line, not ..._line).
+ (Syntax Bits): put @code around all syntax bits.
+
+Sat May 18 16:29:58 1991 Karl Berry (karl at hayley)
+
+ * regex.c (global): add casts to keep broken compilers from
+ complaining about malloc and realloc calls.
+
+ * regex.c (isgraph) [MISSING_ISGRAPH]: change test to this,
+ instead of `#ifndef isgraph', since broken compilers can't
+ have both a macro and a symbol by the same name.
+
+ * regex.c (re_comp, re_exec) [_POSIX_SOURCE]: do not define.
+ (regcomp, regfree, regexec, regerror) [_POSIX_SOURCE && !emacs]:
+ only define in this case.
+
+Mon May 6 17:37:04 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (re_search, re_search_2): Changed BUFFER to not be const.
+
+ * regex.c (re_compile_pattern): `^' is in a leading position if
+ it precedes a newline.
+ (various routines): Added or changed header comments.
+ (double_pattern_offsets_list): Changed name from
+ `extend_pattern_offsets_list'.
+ (adjust_pattern_offsets_list): Changed return value from
+ unsigned to void.
+ (verify_and_adjust_endlines): Now returns `true' and `false'
+ instead of 1 and 0.
+ `$' is in a leading position if it follows a newline.
+ (set_bit_to_value, get_bit_value): Exit with error if POSITION < 0
+ so now calling routines don't have to.
+ (init_failure_stack, inspect_failure_stack_top,
+ pop_failure_stack_top, push_pattern_op, double_failure_stack):
+ Now return value unsigned instead of boolean.
+ (re_search, re_search_2): Changed BUFP to not be const.
+ (re_search_2): Added variable const `private_bufp' to send to
+ re_match_2.
+ (push_failure_point): Made return value unsigned instead of boolean.
+
+Sat May 4 15:32:22 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (re_compile_fastmap): Added extern for this.
+ Changed some comments.
+
+ * regex.c (re_compile_pattern): In case handle_bar: put invalid
+ pattern test before levels matching stuff.
+ Changed some commments.
+ Added optimizing test for detecting an empty alternative that
+ ends with a trailing '$' at the end of the pattern.
+ (re_compile_fastmap): Moved failure_stack stuff to before this
+ so could use it. Made its stack dynamic.
+ Made it return an int so that it could return -2 if its stack
+ couldn't be allocated.
+ Added to header comment (about the return values).
+ (init_failure_stack): Wrote so both re_match_2 and
+ re_compile_fastmap could use it similar stacks.
+ (double_failure_stack): Added for above reasons.
+ (push_pattern_op): Wrote for re_compile_fastmap.
+ (re_search_2): Now return -2 if re_compile_fastmap does.
+ (re_match_2): Made regstart and regend type failure_stack_element*.
+ (push_failure_point): Made pattern_place and string_place type
+ failure_stack_element*.
+ Call double_failure_stack now.
+ Return true instead of 1.
+
+Wed May 1 12:57:21 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (remove_intervening_anchors): Avoid erroneously making
+ ops into no_op's by making them no_op only when they're beglines.
+ (verify_and_adjust_endlines): Don't make '$' a normal character
+ if it's before a newline.
+ Look for the endline op in *p, not p[1].
+ (failure_stack_element): Added this declaration.
+ (failure_stack_type): Added this declaration.
+ (INIT_FAILURE_STACK_SIZE, FAILURE_STACK_EMPTY,
+ FAILURE_STACK_PTR_EMPTY, REMAINING_AVAIL_SLOTS): Added for
+ failure stack.
+ (FAILURE_ITEM_SIZE, PUSH_FAILURE_POINT): Deleted.
+ (FREE_VARIABLES): Now free failure_stack.stack instead of stackb.
+ (re_match_2): deleted variables `initial_stack', `stackb',
+ `stackp', and `stacke' and added `failure_stack' to replace them.
+ Replaced calls to PUSH_FAILURE_POINT with those to
+ push_failure_point.
+ (push_failure_point): Added for re_match_2.
+ (pop_failure_point): Rewrote to use a failure_stack_type of stack.
+ (can_match_nothing): Moved definition to below re_match_2.
+ (bcmp_translate): Moved definition to below re_match_2.
+
+Mon Apr 29 14:20:54 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (enum regexpcode): Added codes endline_before_newline
+ and repeated_endline_before_newline so could detect these
+ types of endlines in the intermediate stages of a compiled
+ pattern.
+ (INIT_FAILURE_ALLOC): Renamed NFAILURES to this and set it to 5.
+ (BUF_PUSH): Put `do {...} while 0' around this.
+ (BUF_PUSH_2): Defined this to cut down on expansion of EXTEND_BUFFER.
+ (regex_compile): Changed some comments.
+ Now push endline_before_newline if find a `$' before a newline
+ in the pattern.
+ If a `$' might turn into an ordinary character, set laststart
+ to point to it.
+ In '^' case, if syntax bit RE_TIGHT_VBAR is set, then for `^'
+ to be in a leading position, it must be first in the pattern.
+ Don't have to check in one of the else clauses that it's not set.
+ If RE_CONTEXTUAL_INDEP_OPS isn't set but RE_ANCHORS_ONLY_AT_ENDS
+ is, make '^' a normal character if it isn't first in the pattern.
+ Can only detect at the end if a '$' after an alternation op is a
+ trailing one, so can't immediately detect empty alternatives
+ if a '$' follows a vbar.
+ Added a picture of the ``success jumps'' in alternatives.
+ Have to set bufp->used before calling verify_and_adjust_endlines.
+ Also do it before returning all error strings.
+ (remove_intervening_anchors): Now replaces the anchor with
+ repeated_endline_before_newline if it's an endline_before_newline.
+ (verify_and_adjust_endlines): Deleted SYNTAX parameter (could
+ use bufp's) and added GROUP_FORWARD_MATCH_STATUS so could
+ detect back references referring to empty groups.
+ Added variable `bend' to point past the end of the pattern buffer.
+ Added variable `previous_p' so wouldn't have to reinspect the
+ pattern buffer to see what op we just looked at.
+ Added endline_before_newline and repeated_endline_before_newline
+ cases.
+ When checking if in a trailing position, added case where '$'
+ has to be at the pattern's end if either of the syntax bits
+ RE_ANCHORS_ONLY_AT_ENDS or RE_TIGHT_VBAR are set.
+ Since `endline' can have the intermediate form `endline_in_repeat',
+ have to change it to `endline' if RE_REPEATED_ANCHORS_AWAY
+ isn't set.
+ Now disallow empty alternatives with trailing endlines in them
+ if RE_NO_EMPTY_ALTS is set.
+ Now don't make '$' an ordinary character if it precedes a newline.
+ Don't make it an ordinary character if it's before a newline.
+ Back references now affect the level matching something only if
+ they refer to nonempty groups.
+ (can_match_nothing): Now increment p1 in the switch, which
+ changes many of the cases, but makes the code more like what
+ it was derived from.
+ Adjust the return statement to reflect above.
+ (struct register_info): Made `can_match_nothing' field an int
+ instead of a bit so could have -1 in it if never set.
+ (MAX_FAILURE_ITEMS): Changed name from MAX_NUM_FAILURE_ITEMS.
+ (FAILURE_ITEM_SIZE): Defined how much space a failure items uses.
+ (PUSH_FAILURE_POINT): Changed variable `last_used_reg's name
+ to `highest_used_reg'.
+ Added variable `num_stack_items' and changed `len's name to
+ `stack_length'.
+ Test failure stack limit in terms of number of items in it, not
+ in terms of its length. rms' fix tested length against number
+ of items, which was a misunderstanding.
+ Use `realloc' instead of `alloca' to extend the failure stack.
+ Use shifts instead of multiplying by 2.
+ (FREE_VARIABLES): Free `stackb' instead of `initial_stack', as
+ might may have been reallocated.
+ (re_match_2): When mallocing `initial_stack', now multiply
+ the number of items wanted (what was there before) by
+ FAILURE_ITEM_SIZE.
+ (pop_failure_point): Need this procedure form of the macro of
+ the same name for debugging, so left it in and deleted the
+ macro.
+ (recomp): Don't free the pattern buffer's translate field.
+
+Mon Apr 15 09:47:47 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_DUP_MAX): Moved to outside of #ifdef _POSIX_SOURCE.
+ * regex.c (#include <sys/types.h>): Removed #ifdef _POSIX_SOURCE
+ condition.
+ (malloc, realloc): Made return type void* #ifdef __STDC__.
+ (enum regexpcode): Added endline_in_repeat for the compiler's
+ use; this never ends up on the final compiled pattern.
+ (INIT_PATTERN_OFFSETS_LIST_SIZE): Initial size for
+ pattern_offsets_list_type.
+ (pattern_offset_type): Type for pattern offsets.
+ (pattern_offsets_list_type): Type for keeping a list of
+ pattern offsets.
+ (anchor_list_type): Changed to above type.
+ (PATTERN_OFFSETS_LIST_PTR_FULL): Tests if a pattern offsets
+ list is full.
+ (ANCHOR_LIST_PTR_FULL): Changed to above.
+ (BIT_BLOCK_SIZE): Changed to BITS_BLOCK_SIZE and moved to
+ above bits list routines below regex_compile.
+ (op_list_type): Defined to be pattern_offsets_list_type.
+ (compile_stack_type): Changed offsets to be
+ pattern_offset_type instead of unsigned.
+ (pointer): Changed the name of all structure fields from this
+ to `avail'.
+ (COMPILE_STACK_FULL): Changed so the stack is full if `avail'
+ is equal to `size' instead of `size' - 1.
+ (GET_BUFFER_SPACE): Changed `>=' to `>' in the while statement.
+ (regex_compile): Added variable `enough_memory' so could check
+ that routine that verifies '$' positions could return an
+ allocation error.
+ (group_count): Deleted this variable, as `regnum' already does
+ this work.
+ (op_list): Added this variable to keep track of operations
+ needed for verifying '$' positions.
+ (anchor_list): Now initialize using routine
+ `init_pattern_offsets_list'.
+ Consolidated the three bits_list initializations.
+ In case '$': Instead of trying to go past constructs which can
+ follow '$', merely detect the special case where it has to be
+ at the pattern's end, fix up any fixup jumps if necessary,
+ record the anchor if necessary and add an `endline' (and
+ possibly two `no-op's) to the pattern; will call a routine at
+ the end to verify if it's in a valid position or not.
+ (init_pattern_offsets_list): Added to initialize pattern
+ offsets lists.
+ (extend_anchor_list): Renamed this extend_pattern_offsets_list
+ and renamed parameters and internal variables appropriately.
+ (add_pattern_offset): Added this routine which both
+ record_anchor_position and add_op call.
+ (adjust_pattern_offsets_list): Add this routine to adjust by
+ some increment all the pattern offsets a list of such after a
+ given position.
+ (record_anchor_position): Now send in offset instead of
+ calculating it and just call add_pattern_offset.
+ (adjust_anchor_list): Replaced by above routine.
+ (remove_intervening_anchors): If the anchor is an `endline'
+ then replace it with `endline_in_repeat' instead of `no_op'.
+ (add_op): Added this routine to call in regex_compile
+ wherever push something relevant to verifying '$' positions.
+ (verify_and_adjust_endlines): Added routine to (1) verify that
+ '$'s in a pattern buffer (represented by `endline') were in
+ valid positions and (2) whether or not they were anchors.
+ (BITS_BLOCK_SIZE): Renamed BIT_BLOCK_SIZE and moved to right
+ above bits list routines.
+ (BITS_BLOCK): Defines which array element of a bits list the
+ bit corresponding to a given position is in.
+ (BITS_MASK): Has a 1 where the bit (in a bit list array element)
+ for a given position is.
+
+Mon Apr 1 12:09:06 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (BIT_BLOCK_SIZE): Defined this for using with
+ bits_list_type, abstracted from level_list_type so could use
+ for more things than just the level match status.
+ (regex_compile): Renamed `level_list' variable to
+ `level_match_status'.
+ Added variable `group_match_status' of type bits_list_type.
+ Kept track of whether or not for all groups any of them
+ matched other than the empty string, so detect if a back
+ reference in front of a '^' made it nonleading or not.
+ Do this by setting a match status bit for all active groups
+ whenever leave a group that matches other than the empty string.
+ Could detect which groups are active by going through the
+ stack each time, but or-ing a bits list of active groups with
+ a bits list of group match status is faster, so make a bits
+ list of active groups instead.
+ Have to check that '^' isn't in a leading position before
+ going to normal_char.
+ Whenever set level match status of the current level, also set
+ the match status of all active groups.
+ Increase the group count and make that group active whenever
+ open a group.
+ When close a group, only set the next level down if the
+ current level matches other than the empty string, and make
+ the current group inactive.
+ At a back reference, only set a level's match status if the
+ group to which the back reference refers matches other than
+ the empty string.
+ (init_bits_list): Added to initialize a bits list.
+ (get_level_value): Deleted this. (Made into
+ get_level_match_status.)
+ (extend_bits_list): Added to extend a bits list. (Made this
+ from deleted routine `extend_level_list'.)
+ (get_bit): Added to get a bit value from a bits list. (Made
+ this from deleted routine `get_level_value'.)
+ (set_bit_to_value): Added to set a bit in a bits list. (Made
+ this from deleted routine `set_level_value'.)
+ (get_level_match_status): Added this to get the match status
+ of a given level. (Made from get_level_value.)
+ (set_this_level, set_next_lower_level): Made all routines
+ which set bits extend the bits list if necessary, thus they
+ now return an unsigned value to indicate whether or not the
+ reallocation failed.
+ (increase_level): No longer extends the level list.
+ (make_group_active): Added to mark as active a given group in
+ an active groups list.
+ (make_group_inactive): Added to mark as inactive a given group
+ in an active groups list.
+ (set_match_status_of_active_groups): Added to set the match
+ status of all currently active groups.
+ (get_group_match_status): Added to get a given group's match status.
+ (no_levels_match_anything): Removed the paramenter LEVEL.
+ (PUSH_FAILURE_POINT): Added rms' bug fix and changed RE_NREGS
+ to num_internal_regs.
+
+Sun Mar 31 09:04:30 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_ANCHORS_ONLY_AT_ENDS): Added syntax so could
+ constrain '^' and '$' to only be anchors if at the beginning
+ and end of the pattern.
+ (RE_SYNTAX_POSIX_BASIC): Added the above bit.
+
+ * regex.c (enum regexcode): Changed `unused' to `no_op'.
+ (this_and_lower_levels_match_nothing): Deleted forward reference.
+ (regex_compile): case '^': if the syntax bit RE_ANCHORS_ONLY_AT_ENDS
+ is set, then '^' is only an anchor if at the beginning of the
+ pattern; only record anchor position if the syntax bit
+ RE_REPEATED_ANCHORS_AWAY is set; the '^' is a normal char if
+ the syntax bit RE_ANCHORS_ONLY_AT_END is set and we're not at
+ the beginning of the pattern (and neither RE_CONTEXTUAL_INDEP_OPS
+ nor RE_CONTEXTUAL_INDEP_OPS syntax bits are set).
+ Only adjust the anchor list if the syntax bit
+ RE_REPEATED_ANCHORS_AWAY is set.
+
+ * regex.c (level_list_type): Use to detect when '^' is
+ in a leading position.
+ (regex_compile): Added level_list_type level_list variable in
+ which we keep track of whether or not a grouping level (in its
+ current or most recent incarnation) matches anything besides the
+ empty string. Set the bit for the i-th level when detect it
+ should match something other than the empty string and the bit
+ for the (i-1)-th level when leave the i-th group. Clear all
+ bits for the i-th and higher levels if none of 0--(i - 1)-th's
+ bits are set when encounter an alternation operator on that
+ level. If no levels are set when hit a '^', then it is in a
+ leading position. We keep track of which level we're at by
+ increasing a variable current_level whenever we encounter an
+ open-group operator and decreasing it whenever we encounter a
+ close-group operator.
+ Have to adjust the anchor list contents whenever insert
+ something ahead of them (such as on_failure_jump's) in the
+ pattern.
+ (adjust_anchor_list): Adjusts the offsets in an anchor list by
+ a given increment starting at a given start position.
+ (get_level_value): Returns the bit setting of a given level.
+ (set_level_value): Sets the bit of a given level to a given value.
+ (set_this_level): Sets (to 1) the bit of a given level.
+ (set_next_lower_level): Sets (to 1) the bit of (LEVEL - 1) for a
+ given LEVEL.
+ (clear_this_and_higher_levels): Clears the bits for a given
+ level and any higher levels.
+ (extend_level_list): Adds sizeof(unsigned) more bits to a level list.
+ (increase_level): Increases by 1 the value of a given level variable.
+ (decrease_level): Decreases by 1 the value of a given level variable.
+ (lower_levels_match_nothing): Checks if any levels lower than
+ the given one match anything.
+ (no_levels_match_anything): Checks if any levels match anything.
+ (re_match_2): At case wordbeg: before looking at d-1, check that
+ we're not at the string's beginning.
+ At case wordend: Added some illuminating parentheses.
+
+Mon Mar 25 13:58:51 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_NO_ANCHOR_AT_NEWLINE): Changed syntax bit name
+ from RE_ANCHOR_NOT_NEWLINE because an anchor never matches the
+ newline itself, just the empty string either before or after it.
+ (RE_REPEATED_ANCHORS_AWAY): Added this syntax bit for ignoring
+ anchors inside groups which are operated on by repetition
+ operators.
+ (RE_DOT_MATCHES_NEWLINE): Added this bit so the match-any-character
+ operator could match a newline when it's set.
+ (RE_SYNTAX_POSIX_BASIC): Set RE_DOT_MATCHES_NEWLINE in this.
+ (RE_SYNTAX_POSIX_EXTENDED): Set RE_DOT_MATCHES_NEWLINE and
+ RE_REPEATED_ANCHORS_AWAY in this.
+ (regerror): Changed prototypes to new POSIX spec.
+
+ * regex.c (anchor_list_type): Added so could null out anchors inside
+ repeated groups.
+ (ANCHOR_LIST_PTR_FULL): Added for above type.
+ (compile_stack_element): Changed name from stack_element.
+ (compile_stack_type): Changed name from compile_stack.
+ (INIT_COMPILE_STACK_SIZE): Changed name from INIT_STACK_SIZE.
+ (COMPILE_STACK_EMPTY): Changed name from STACK_EMPTY.
+ (COMPILE_STACK_FULL): Changed name from STACK_FULL.
+ (regex_compile): Changed SYNTAX parameter to non-const.
+ Changed variable name `stack' to `compile_stack'.
+ If syntax bit RE_REPEATED_ANCHORS_AWAY is set, then naively put
+ anchors in a list when encounter them and then set them to
+ `unused' when detect they are within a group operated on by a
+ repetition operator. Need something more sophisticated than
+ this, as they should only get set to `unused' if they are in
+ positions where they would be anchors. Also need a better way to
+ detect contextually invalid anchors.
+ Changed some commments.
+ (is_in_compile_stack): Changed name from `is_in_stack'.
+ (extend_anchor_list): Added to do anchor stuff.
+ (record_anchor_position): Added to do anchor stuff.
+ (remove_intervening_anchors): Added to do anchor stuff.
+ (re_match_2): Now match a newline with the match-any-character
+ operator if RE_DOT_MATCHES_NEWLINE is set.
+ Compacted some code.
+ (regcomp): Added new POSIX newline information to the header
+ commment.
+ If REG_NEWLINE cflag is set, then now unset RE_DOT_MATCHES_NEWLINE
+ in syntax.
+ (put_in_buffer): Added to do new POSIX regerror spec. Called
+ by regerror.
+ (regerror): Changed to take a pattern buffer, error buffer and
+ its size, and return type `size_t', the size of the full error
+ message, and the first ERRBUF_SIZE - 1 characters of the full
+ error message in the error buffer.
+
+Wed Feb 27 16:38:33 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (#include <sys/types.h>): Removed this as new POSIX
+ standard has the user include it.
+ (RE_SYNTAX_POSIX_BASIC and RE_SYNTAX_POSIX_EXTENDED): Removed
+ RE_HAT_LISTS_NOT_NEWLINE as new POSIX standard has the cflag
+ REG_NEWLINE now set this. Similarly, added syntax bit
+ RE_ANCHOR_NOT_NEWLINE as this is now unset by REG_NEWLINE.
+ (RE_SYNTAX_POSIX_BASIC): Removed syntax bit
+ RE_NO_CONSECUTIVE_REPEATS as POSIX now allows them.
+
+ * regex.c (#include <sys/types.h>): Added this as new POSIX
+ standard has the user include it instead of us putting it in
+ regex.h.
+ (extern char *re_syntax_table): Made into an extern so the
+ user could allocate it.
+ (DO_RANGE): If don't find a range end, now goto invalid_range_end
+ instead of unmatched_left_bracket.
+ (regex_compile): Made variable SYNTAX non-const.????
+ Reformatted some code.
+ (re_compile_fastmap): Moved is_a_succeed_n's declaration to
+ inner braces.
+ Compacted some code.
+ (SET_NEWLINE_FLAG): Removed and put inline.
+ (regcomp): Made variable `syntax' non-const so can unset
+ RE_ANCHOR_NOT_NEWLINE syntax bit if cflag RE_NEWLINE is set.
+ If cflag RE_NEWLINE is set, set the RE_HAT_LISTS_NOT_NEWLINE
+ syntax bit and unset RE_ANCHOR_NOT_NEWLINE one of `syntax'.
+
+Wed Feb 20 16:33:38 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_NO_CONSECUTIVE_REPEATS): Changed name from
+ RE_NO_CONSEC_REPEATS.
+ (REG_ENESTING): Deleted this POSIX return value, as the stack
+ is now unbounded.
+ (struct re_pattern_buffer): Changed some comments.
+ (re_compile_pattern): Changed a comment.
+ Deleted check on stack upper bound and corresponding error.
+ Now when there's no interval contents and it's the end of the
+ pattern, go to unmatched_left_curly_brace instead of end_of_pattern.
+ Removed nesting_too_deep error, as the stack is now unbounded.
+ (regcomp): Removed REG_ENESTING case, as the stack is now unbounded.
+ (regerror): Removed REG_ENESTING case, as the stack is now unbounded.
+
+ * regex.c (MAX_STACK_SIZE): Deleted because don't need upper
+ bound on array indexed with an unsigned number.
+
+Sun Feb 17 15:50:24 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: Changed and added some comments.
+
+ * regex.c (init_syntax_once): Made `_' a word character.
+ (re_compile_pattern): Added a comment.
+ (re_match_2): Redid header comment.
+ (regexec): With header comment about PMATCH, corrected and
+ removed details found regex.h, adding a reference.
+
+Fri Feb 15 09:21:31 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (DO_RANGE): Removed argument parentheses.
+ Now get untranslated range start and end characters and set
+ list bits for the translated (if at all) versions of them and
+ all characters between them.
+ (re_match_2): Now use regs->num_regs instead of num_regs_wanted
+ wherever possible.
+ (regcomp): Now build case-fold translate table using isupper
+ and tolower facilities so will work on foreign language characters.
+
+Sat Feb 9 16:40:03 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_HAT_LISTS_NOT_NEWLINE): Changed syntax bit name
+ from RE_LISTS_NOT_NEWLINE as it only affects nonmatching lists.
+ Changed all references to the match-beginning-of-string
+ operator to match-beginning-of-line operator, as this is what
+ it does.
+ (RE_NO_CONSEC_REPEATS): Added this syntax bit.
+ (RE_SYNTAX_POSIX_BASIC): Added above bit to this.
+ (REG_PREMATURE_END): Changed name to REG_EEND.
+ (REG_EXCESS_NESTING): Changed name to REG_ENESTING.
+ (REG_TOO_BIG): Changed name to REG_ESIZE.
+ (REG_INVALID_PREV_RE): Deleted this return POSIX value.
+ Added and changed some comments.
+
+ * regex.c (re_compile_pattern): Now sets the pattern buffer's
+ `return_default_num_regs' field.
+ (typedef struct stack_element, stack_type, INIT_STACK_SIZE,
+ MAX_STACK_SIZE, STACK_EMPTY, STACK_FULL): Added for regex_compile.
+ (INIT_BUF_SIZE): Changed value from 28 to 32.
+ (BUF_PUSH): Changed name from BUFPUSH.
+ (MAX_BUF_SIZE): Added so could use in many places.
+ (IS_CHAR_CLASS_STRING): Replaced is_char_class with this.
+ (regex_compile): Added a stack which could grow dynamically
+ and which has struct elements.
+ Go back to initializing `zero_times_ok' and `many_time_ok' to
+ 0 and |=ing them inside the loop.
+ Now disallow consecutive repetition operators if the syntax
+ bit RE_NO_CONSEC_REPEATS is set.
+ Now detect trailing backslash when the compiler is expecting a
+ `?' or a `+'.
+ Changed calls to GET_BUFFER_SPACE which asked for 6 to ask for
+ 3, as that's all they needed.
+ Now check for trailing backslash inside lists.
+ Now disallow an empty alternative right before an end-of-line
+ operator.
+ Now get buffer space before leaving space for a fixup jump.
+ Now check if at pattern end when at open-interval operator.
+ Added some comments.
+ Now check if non-interval repetition operators follow an
+ interval one if the syntax bit RE_NO_CONSEC_REPEATS is set.
+ Now only check if what precedes an interval repetition
+ operator isn't a regular expression which matches one
+ character if the syntax bit RE_NO_CONSEC_REPEATS is set.
+ Now return "Unmatched [ or [^" instead of "Unmatched [".
+ (is_in_stack): Added to check if a given register number is in
+ the stack.
+ (re_match_2): If initial variable allocations fail, return -2,
+ instead of -1.
+ Now set reg's `num_regs' field when allocating regs.
+ Now before allocating them, free regs->start and end if they
+ aren't NULL and return -2 if either allocation fails.
+ Now use regs->num_regs instead of num_regs_wanted to control
+ regs loops.
+ Now increment past the newline when matching it with an
+ end-of-line operator.
+ (recomp): Added to the header comment.
+ Now return REG_ESUBREG if regex_compile returns "Unmatched [
+ or [^" instead of doing so if it returns "Unmatched [".
+ Now return REG_BADRPT if in addition to returning "Missing
+ preceding regular expression", regex_compile returns "Invalid
+ preceding regular expression".
+ Now return new return value names (see regex.h changes).
+ (regexec): Added to header comment.
+ Initialize regs structure.
+ Now match whole string.
+ Now always free regs.start and regs.end instead of just when
+ the string matched.
+ (regerror): Now return "Regex error: Unmatched [ or [^.\n"
+ instead of "Regex error: Unmatched [.\n".
+ Now return "Regex error: Preceding regular expression either
+ missing or not simple.\n" instead of "Regex error: Missing
+ preceding regular expression.\n".
+ Removed REG_INVALID_PREV_RE case (it got subsumed into the
+ REG_BADRPT case).
+
+Thu Jan 17 09:52:35 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: Changed a comment.
+
+ * regex.c: Changed and added large header comments.
+ (re_compile_pattern): Now if detect that `laststart' for an
+ interval points to a byte code for a regular expression which
+ matches more than one character, make it an internal error.
+ (regerror): Return error message, don't print it.
+
+Tue Jan 15 15:32:49 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (regcomp return codes): Added GNU ones.
+ Updated some comments.
+
+ * regex.c (DO_RANGE): Changed `obscure_syntax' to `syntax'.
+ (regex_compile): Added `following_left_brace' to keep track of
+ where pseudo interval following a valid interval starts.
+ Changed some instances that returned "Invalid regular
+ expression" to instead return error strings coinciding with
+ POSIX error codes.
+ Changed some comments.
+ Now consider only things between `[:' and `:]' to be possible
+ character class names.
+ Now a character class expression can't end a pattern; at
+ least a `]' must close the list.
+ Now if the syntax bit RE_NO_BK_CURLY_BRACES is set, then a
+ valid interval must be followed by yet another to get an error
+ for preceding an interval (in this case, the second one) with
+ a regular expression that matches more than one character.
+ Now if what follows a valid interval begins with a open
+ interval operator but doesn't begin a valid interval, then set
+ following_left_bracket to it, put it in C and go to
+ normal_char label.
+ Added some comments.
+ Return "Invalid character class name" instead of "Invalid
+ character class".
+ (regerror): Return messages for all POSIX error codes except
+ REG_ECOLLATE and REG_NEWLINE, along with all GNU error codes.
+ Added `break's after all cases.
+ (main): Call re_set_syntax instead of setting `obscure_syntax'
+ directly.
+
+Sat Jan 12 13:37:59 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (Copyright): Updated date.
+ (#include <sys/types.h>): Include unconditionally.
+ (RE_CANNOT_MATCH_NEWLINE): Deleted this syntax bit.
+ (RE_SYNTAX_POSIX_BASIC, RE_SYNTAX_POSIX_EXTENDED): Removed
+ setting the RE_ANCHOR_NOT_NEWLINE syntax bit from these.
+ Changed and added some comments.
+ (struct re_pattern_buffer): Changed some flags from chars to bits.
+ Added field `syntax'; holds which syntax pattern was compiled with.
+ Added bit flag `return_default_num_regs'.
+ (externs for GNU and Berkeley UNIX routines): Added `const's to
+ parameter types to be compatible with POSIX.
+ (#define const): Added to support old C compilers.
+
+ * regex.c (Copyright): Updated date.
+ (enum regexpcode): Deleted `newline'.
+ (regex_compile): Renamed re_compile_pattern to this, added a
+ syntax parameter so it can set the pattern buffer's `syntax'
+ field.
+ Made `pattern', and `size' `const's so could pass to POSIX
+ interface routines; also made `const' whatever interval
+ variables had to be to make this work.
+ Changed references to `obscure_syntax' to new parameter `syntax'.
+ Deleted putting `newline' in buffer when see `\n'.
+ Consider invalid character classes which have nothing wrong
+ except the character class name; if so, return character-class error.
+ (is_char_class): Added routine for regex_compile.
+ (re_compile_pattern): added a new one which calls
+ regex_compile with `obscure_syntax' as the actual parameter
+ for the formal `syntax'.
+ Gave this the old routine's header comments.
+ Made `pattern', and `size' `const's so could use POSIX interface
+ routine parameters.
+ (re_search, re_search_2, re_match, re_match_2): Changed
+ `pbufp' to `bufp'.
+ (re_search_2, re_match_2): Changed `mstop' to `stop'.
+ (re_search, re_search_2): Made all parameters except `regs'
+ `const's so could use POSIX interface routines parameters.
+ (re_search_2): Added private copies of `const' parameters so
+ could change their values.
+ (re_match_2): Made all parameters except `regs' `const's so
+ could use POSIX interface routines parameters.
+ Changed `size1' and `size2' parameters to `size1_arg' and
+ `size2_arg' and so could change; added local `size1' and
+ `size2' and set to these.
+ Added some comments.
+ Deleted `newline' case.
+ `begline' can also possibly match if `d' contains a newline;
+ if it does, we have to increment d to point past the newline.
+ Replaced references to `obscure_syntax' with `bufp->syntax'.
+ (re_comp, re_exec): Made parameter `s' a `const' so could use POSIX
+ interface routines parameters.
+ Now call regex_compile, passing `obscure_syntax' via the
+ `syntax' parameter.
+ (re_exec): Made local `len' a `const' so could pass to re_search.
+ (regcomp): Added header comment.
+ Added local `syntax' to set and pass to regex_compile rather
+ than setting global `obscure_syntax' and passing it.
+ Call regex_compile with its `syntax' parameter rather than
+ re_compile_pattern.
+ Return REG_ECTYPE if character-class error.
+ (regexec): Don't initialize `regs' to anything.
+ Made `private_preg' a nonpointer so could set to what the
+ constant `preg' points.
+ Initialize `private_preg's `return_default_num_regs' field to
+ zero because want to return `nmatch' registers, not however
+ many there are subexpressions in the pattern.
+ Also test if `nmatch' > 0 to see if should pass re_match `regs'.
+
+Tue Jan 8 15:57:17 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (struct re_pattern_buffer): Reworded comment.
+
+ * regex.c (EXTEND_BUFFER): Also reset beg_interval.
+ (re_search_2): Return val if val = -2.
+ (NUM_REG_ITEMS): Listed items in comment.
+ (NUM_OTHER_ITEMS): Defined this for using in > 1 definition.
+ (MAX_NUM_FAILURE_ITEMS): Replaced `+ 2' with NUM_OTHER_ITEMS.
+ (NUM_FAILURE_ITEMS): As with definition above and added to
+ comment.
+ (PUSH_FAILURE_POINT): Replaced `* 2's with `<< 1's.
+ (re_match_2): Test with equality with 1 to see pbufp->bol and
+ pbufp->eol are set.
+
+Fri Jan 4 15:07:22 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (struct re_pattern_buffer): Reordered some fields.
+ Updated some comments.
+ Added not_bol and not_eol fields.
+ (extern regcomp, regexec, regerror): Added return types.
+ (extern regfree): Added `extern'.
+
+ * regex.c (min): Deleted unused macro.
+ (re_match_2): Compacted some code.
+ Removed call to macro `min' from `for' loop.
+ Fixed so unused registers get filled with -1's.
+ Fail if the pattern buffer's `not_bol' field is set and
+ encounter a `begline'.
+ Fail if the pattern buffer's `not_eol' field is set and
+ encounter a `endline'.
+ Deleted redundant check for empty stack in fail case.
+ Don't free pattern buffer's components in re_comp.
+ (regexec): Initialize variable regs.
+ Added `private_preg' pattern buffer so could set `not_bol' and
+ `not_eol' fields and hand to re_match.
+ Deleted naive attempt to detect anchors.
+ Set private pattern buffer's `not_bol' and `not_eol' fields
+ according to eflags value.
+ `nmatch' must also be > 0 for us to bother allocating
+ registers to send to re_match and filling pmatch
+ with their results after the call to re_match.
+ Send private pattern buffer instead of argument to re_match.
+ If use the registers, always free them and then set them to NULL.
+ (regerror): Added this Posix routine.
+ (regfree): Added this Posix routine.
+
+Tue Jan 1 15:02:45 1991 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_NREGS): Deleted this definition, as now the user
+ can choose how many registers to have.
+ (REG_NOTBOL, REG_NOTEOL): Defined these Posix eflag bits.
+ (REG_NOMATCH, REG_BADPAT, REG_ECOLLATE, REG_ECTYPE,
+ REG_EESCAPE, REG_ESUBREG, REG_EBRACK, REG_EPAREN, REG_EBRACE,
+ REG_BADBR, REG_ERANGE, REG_ESPACE, REG_BADRPT, REG_ENEWLINE):
+ Defined these return values for Posix's regcomp and regexec.
+ Updated some comments.
+ (struct re_pattern_buffer): Now typedef this as regex_t
+ instead of the other way around.
+ (struct re_registers): Added num_regs field. Made start and
+ end fields pointers to char instead of fixed size arrays.
+ (regmatch_t): Added this Posix register type.
+ (regcomp, regexec, regerror, regfree): Added externs for these
+ Posix routines.
+
+ * regex.c (enum boolean): Typedefed this.
+ (re_pattern_buffer): Reformatted some comments.
+ (re_compile_pattern): Updated some comments.
+ Always push start_memory and its attendant number whenever
+ encounter a group, not just when its number is less than the
+ previous maximum number of registers; same for stop_memory.
+ Get 4 bytes of buffer space instead of 2 when pushing a
+ set_number_at.
+ (can_match_nothing): Added this to elaborate on and replace
+ code in re_match_2.
+ (reg_info_type): Made can_match_nothing field a bit instead of int.
+ (MIN): Added for re_match_2.
+ (re_match_2 macros): Changed all `for' loops which used
+ RE_NREGS to now use num_internal_regs as upper bounds.
+ (MAX_NUM_FAILURE_ITEMS): Use num_internal_regs instead of RE_NREGS.
+ (POP_FAILURE_POINT): Added check for empty stack.
+ (FREE_VARIABLES): Added this to free (and set to NULL)
+ variables allocated in re_match_2.
+ (re_match_2): Rearranged parameters to be in order.
+ Added variables num_regs_wanted (how many registers the user wants)
+ and num_internal_regs (how many groups there are).
+ Allocated initial_stack, regstart, regend, old_regstart,
+ old_regend, reginfo, best_regstart, and best_regend---all
+ which used to be fixed size arrays. Free them all and return
+ -1 if any fail.
+ Free above variables if starting position pos isn't valid.
+ Changed all `for' loops which used RE_NREGS to now use
+ num_internal_regs as upper bounds---except for the loops which
+ fill regs; then use num_regs_wanted.
+ Allocate regs if the user has passed it and wants more than 0
+ registers filled.
+ Set regs->start[i] and regs->end[i] to -1 if either
+ regstart[i] or regend[i] equals -1, not just the first.
+ Free allocated variables before returning.
+ Updated some comments.
+ (regcomp): Return REG_ESPACE, REG_BADPAT, REG_EPAREN when
+ appropriate.
+ Free translate array.
+ (regexec): Added this Posix interface routine.
+
+Mon Dec 24 14:21:13 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: If _POSIX_SOURCE is defined then #include <sys/types.h>.
+ Added syntax bit RE_CANNOT_MATCH_NEWLINE.
+ Defined Posix cflags: REG_EXTENDED, REG_NEWLINE, REG_ICASE, and
+ REG_NOSUB.
+ Added fields re_nsub and no_sub to struct re_pattern_buffer.
+ Typedefed regex_t to be `struct re_pattern_buffer'.
+
+ * regex.c (CHAR_SET_SIZE): Defined this to be 256 and replaced
+ incidences of this value with this constant.
+ (re_compile_pattern): Added switch case for `\n' and put
+ `newline' into the pattern buffer when encounter this.
+ Increment the pattern_buffer's `re_nsub' field whenever open a
+ group.
+ (re_match_2): Match a newline with `newline'---provided the
+ syntax bit RE_CANNOT_MATCH_NEWLINE isn't set.
+ (regcomp): Added this Posix interface routine.
+ (enum test_type): Added interface_test tag.
+ (main): Added Posix interface test.
+
+Tue Dec 18 12:58:12 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (struct re_pattern_buffer): reformatted so would fit
+ in texinfo documentation.
+
+Thu Nov 29 15:49:16 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_NO_EMPTY_ALTS): Added this bit.
+ (RE_SYNTAX_POSIX_EXTENDED): Added above bit.
+
+ * regex.c (re_compile_pattern): Disallow empty alternatives only
+ when RE_NO_EMPTY_ALTS is set, not when RE_CONTEXTUAL_INVALID_OPS is.
+ Changed RE_NO_BK_CURLY_BRACES to RE_NO_BK_PARENS when testing
+ for empty groups at label handle_open.
+ At label handle_bar: disallow empty alternatives if RE_NO_EMPTY_ALTS
+ is set.
+ Rewrote some comments.
+
+ (re_compile_fastmap): cleaned up code.
+
+ (re_search_2): Rewrote comment.
+
+ (struct register_info): Added field `inner_groups'; it records
+ which groups are inside of the current one.
+ Added field can_match_nothing; it's set if the current group
+ can match nothing.
+ Added field ever_match_something; it's set if current group
+ ever matched something.
+
+ (INNER_GROUPS): Added macro to access inner_groups field of
+ struct register_info.
+
+ (CAN_MATCH_NOTHING): Added macro to access can_match_nothing
+ field of struct register_info.
+
+ (EVER_MATCHED_SOMETHING): Added macro to access
+ ever_matched_something field of struct register_info.
+
+ (NOTE_INNER_GROUP): Defined macro to record that a given group
+ is inside of all currently active groups.
+
+ (re_match_2): Added variables *p1 and mcnt2 (multipurpose).
+ Added old_regstart and old_regend arrays to hold previous
+ register values if they need be restored.
+ Initialize added fields and variables.
+ case start_memory: Find out if the group can match nothing.
+ Save previous register values in old_restart and old_regend.
+ Record that current group is inside of all currently active
+ groups.
+ If the group is inside a loop and it ever matched anything,
+ restore its registers to values before the last failed match.
+ Restore the registers for the inner groups, too.
+ case duplicate: Can back reference to a group that never
+ matched if it can match nothing.
+
+Thu Nov 29 11:12:54 1990 Karl Berry (karl at hayley)
+
+ * regex.c (bcopy, ...): define these if either _POSIX_SOURCE or
+ STDC_HEADERS is defined; same for including <stdlib.h>.
+
+Sat Oct 6 16:04:55 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (struct re_pattern_buffer): Changed field comments.
+
+ * regex.c (re_compile_pattern): Allow a `$' to precede an
+ alternation operator (`|' or `\|').
+ Disallow `^' and/or `$' in empty groups if the syntax bit
+ RE_NO_EMPTY_GROUPS is set.
+ Wait until have parsed a valid `\{...\}' interval expression
+ before testing RE_CONTEXTUAL_INVALID_OPS to see if it's
+ invalidated by that.
+ Don't use RE_NO_BK_CURLY_BRACES to test whether or not a validly
+ parsed interval expression is invalid if it has no preceding re;
+ rather, use RE_CONTEXTUAL_INVALID_OPS.
+ If an interval parses, but there is no preceding regular
+ expression, yet the syntax bit RE_CONTEXTUAL_INDEP_OPS is set,
+ then that interval can match the empty regular expression; if
+ the bit isn't set, then the characters in the interval
+ expression are parsed as themselves (sans the backslashes).
+ In unfetch_interval case: Moved PATFETCH to above the test for
+ RE_NO_BK_CURLY_BRACES being set, which would force a goto
+ normal_backslash; the code at both normal_backsl and normal_char
+ expect a character in `c.'
+
+Sun Sep 30 11:13:48 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: Changed some comments to use the terms used in the
+ documentation.
+ (RE_CONTEXTUAL_INDEP_OPS): Changed name from `RE_CONTEXT_INDEP_OPS'.
+ (RE_LISTS_NOT_NEWLINE): Changed name from `RE_HAT_NOT_NEWLINE.'
+ (RE_ANCHOR_NOT_NEWLINE): Added this syntax bit.
+ (RE_NO_EMPTY_GROUPS): Added this syntax bit.
+ (RE_NO_HYPHEN_RANGE_END): Deleted this syntax bit.
+ (RE_SYNTAX_...): Reformatted.
+ (RE_SYNTAX_POSIX_BASIC, RE_SYNTAX_EXTENDED): Added syntax bits
+ RE_ANCHOR_NOT_NEWLINE and RE_NO_EMPTY_GROUPS, and deleted
+ RE_NO_HYPHEN_RANGE_END.
+ (RE_SYNTAX_POSIX_EXTENDED): Added syntax bit RE_DOT_NOT_NULL.
+
+ * regex.c (bcopy, bcmp, bzero): Define if _POSIX_SOURCE is defined.
+ (_POSIX_SOURCE): ifdef this, #include <stdlib.h>
+ (#ifdef emacs): Changed comment of the #endif for the its #else
+ clause to be `not emacs', not `emacs.'
+ (no_pop_jump): Changed name from `jump'.
+ (pop_failure_jump): Changed name from `finalize_jump.'
+ (maybe_pop_failure_jump): Changed name from `maybe_finalize_jump'.
+ (no_pop_jump_n): Changed name from `jump_n.'
+ (EXTEND_BUFFER): Use shift instead of multiplication to double
+ buf->allocated.
+ (DO_RANGE, recompile_pattern): Added macro to set the list bits
+ for a range.
+ (re_compile_pattern): Fixed grammar problems in some comments.
+ Checked that RE_NO_BK_VBAR is set to make `$' valid before a `|'
+ and not set to make it valid before a `\|'.
+ Checked that RE_NO_BK_PARENS is set to make `$' valid before a ')'
+ and not set to make it valid before a `\)'.
+ Disallow ranges starting with `-', unless the range is the
+ first item in a list, rather than disallowing ranges which end
+ with `-'.
+ Disallow empty groups if the syntax bit RE_NO_EMPTY_GROUPS is set.
+ Disallow nothing preceding `{' and `\{' if they represent the
+ open-interval operator and RE_CONTEXTUAL_INVALID_OPS is set.
+ (register_info_type): typedef-ed this using `struct register_info.'
+ (SET_REGS_MATCHED): Compacted the code.
+ (re_match_2): Made it fail if back reference a group which we've
+ never matched.
+ Made `^' not match a newline if the syntax bit
+ RE_ANCHOR_NOT_NEWLINE is set.
+ (really_fail): Added this label so could force a final fail that
+ would not try to use the failure stack to recover.
+
+Sat Aug 25 14:23:01 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_CONTEXTUAL_OPS): Changed name from RE_CONTEXT_OPS.
+ (global): Rewrote comments and rebroke some syntax #define lines.
+
+ * regex.c (isgraph): Added definition for sequents.
+ (global): Now refer to character set lists as ``lists.''
+ Rewrote comments containing ``\('' or ``\)'' to now refer to
+ ``groups.''
+ (RE_CONTEXTUAL_OPS): Changed name from RE_CONTEXT_OPS.
+
+ (re_compile_pattern): Expanded header comment.
+
+Sun Jul 15 14:50:25 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_CONTEX_INDEP_OPS): the comment's sense got turned
+ around when we changed how it read; changed it to be correct.
+
+Sat Jul 14 16:38:06 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_NO_EMPTY_BK_REF): changed name to
+ RE_NO_MISSING_BK_REF, as this describes it better.
+
+ * regex.c (re_compile_pattern): changed RE_NO_EMPTY_BK_REF
+ to RE_NO_MISSING_BK_REF, as above.
+
+Thu Jul 12 11:45:05 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h (RE_NO_EMPTY_BRACKETS): removed this syntax bit, as
+ bracket expressions should *never* be empty regardless of the
+ syntax. Removes this bit from RE_SYNTAX_POSIX_BASIC and
+ RE_SYNTAX_POSIX_EXTENDED.
+
+ * regex.c (SET_LIST_BIT): in the comment, now refer to character
+ sets as (non)matching sets, as bracket expressions can now match
+ other things in addition to characters.
+ (re_compile_pattern): refer to groups as such instead of `\(...\)'
+ or somesuch, because groups can now be enclosed in either plain
+ parens or backslashed ones, depending on the syntax.
+ In the '[' case, added a boolean just_had_a_char_class to detect
+ whether or not a character class begins a range (which is invalid).
+ Restore way of breaking out of a bracket expression to original way.
+ Add way to detect a range if the last thing in a bracket
+ expression was a character class.
+ Took out check for c != ']' at the end of a character class in
+ the else clause, as it had already been checked in the if part
+ that also checked the validity of the string.
+ Set or clear just_had_a_char_class as appropriate.
+ Added some comments. Changed references to character sets to
+ ``(non)matching lists.''
+
+Sun Jul 1 12:11:29 1990 Karl Berry (karl at hayley)
+
+ * regex.h (BYTEWIDTH): moved back to regex.c.
+
+ * regex.h (re_compile_fastmap): removed declaration; this
+ shouldn't be advertised.
+
+Mon May 28 15:27:53 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (ifndef Sword): Made comments more specific.
+ (global): include <stdio.h> so can write fatal messages on
+ standard error. Replaced calls to assert with fprintfs to
+ stderr and exit (1)'s.
+ (PREFETCH): Reformatted to make more readable.
+ (AT_STRINGS_BEG): Defined to test if we're at the beginning of
+ the virtual concatenation of string1 and string2.
+ (AT_STRINGS_END): Defined to test if at the end of the virtual
+ concatenation of string1 and string2.
+ (AT_WORD_BOUNDARY): Defined to test if are at a word boundary.
+ (IS_A_LETTER(d)): Defined to test if the contents of the pointer D
+ is a letter.
+ (re_match_2): Rewrote the wordbound, notwordbound, wordbeg, wordend,
+ begbuf, and endbuf cases in terms of the above four new macros.
+ Called SET_REGS_MATCHED in the matchsyntax, matchnotsyntax,
+ wordchar, and notwordchar cases.
+
+Mon May 14 14:49:13 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (re_search_2): Fixed RANGE to not ever take STARTPOS
+ outside of virtual concatenation of STRING1 and STRING2.
+ Updated header comment as to this.
+ (re_match_2): Clarified comment about MSTOP in header.
+
+Sat May 12 15:39:00 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (re_search_2): Checked for out-of-range STARTPOS.
+ Added comments.
+ When searching backwards, not only get the character with which
+ to compare to the fastmap from string2 if the starting position
+ >= size1, but also if size1 is zero; this is so won't get a
+ segmentation fault if string1 is null.
+ Reformatted code at label advance.
+
+Thu Apr 12 20:26:21 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: Added #pragma once and #ifdef...endif __REGEXP_LIBRARY.
+ (RE_EXACTN_VALUE): Added for search.c to use.
+ Reworded some comments.
+
+ regex.c: Punctuated some comments correctly.
+ (NULL): Removed this.
+ (RE_EXACTN_VALUE): Added for search.c to use.
+ (<ctype.h>): Moved this include to top of file.
+ (<assert.h>): Added this include.
+ (struct regexpcode): Assigned 0 to unused and 1 to exactn
+ because of RE_EXACTN_VALUE.
+ Added comment.
+ (various macros): Lined up backslashes near end of line.
+ (insert_jump): Cleaned up the header comment.
+ (re_search): Corrected the header comment.
+ (re_search_2): Cleaned up and completed the header comment.
+ (re_max_failures): Updated comment.
+ (struct register_info): Constructed as bits so as to save space
+ on the stack when pushing register information.
+ (IS_ACTIVE): Macro for struct register_info.
+ (MATCHED_SOMETHING): Macro for struct register_info.
+ (NUM_REG_ITEMS): How many register information items for each
+ register we have to push on the stack at each failure.
+ (MAX_NUM_FAILURE_ITEMS): If push all the registers on failure,
+ this is how many items we push on the stack.
+ (PUSH_FAILURE_POINT): Now pushes whether or not the register is
+ currently active, and whether or not it matched something.
+ Checks that there's enough space allocated to accomodate all the
+ items we currently want to push. (Before, a test for an empty
+ stack sufficed because we always pushed and popped the same
+ number of items).
+ Replaced ``2'' with MAX_NUM_FAILURE_POINTS when ``2'' refers
+ to how many things get pushed on the stack each time.
+ When copy the stack into the newly allocated storage, now only copy
+ the area in use.
+ Clarified comment.
+ (POP_FAILURE_POINT): Defined to use in places where put number
+ of registers on the stack into a variable before using it to
+ decrement the stack, so as to not confuse the compiler.
+ (IS_IN_FIRST_STRING): Defined to check if a pointer points into
+ the first string.
+ (SET_REGS_MATCHED): Changed to use the struct register_info
+ bits; also set the matched-something bit to false if the
+ register isn't currently active. (This is a redundant setting.)
+ (re_match_2): Cleaned up and completed the header comment.
+ Updated the failure stack comment.
+ Replaced the ``2'' with MAX_NUM_FAILURE_ITEMS in the static
+ allocation of initial_stack, because now more than two (now up
+ to MAX_FAILURE_ITEMS) items get pushed on the failure stack each
+ time.
+ Ditto for stackb.
+ Trashed restart_seg1, regend_seg1, best_regstart_seg1, and
+ best_regend_seg1 because they could have erroneous information
+ in them, such as when matching ``a'' (in string1) and ``ab'' (in
+ string2) with ``(a)*ab''; before using IS_IN_FIRST_STRING to see
+ whether or not the register starts or ends in string1,
+ regstart[1] pointed past the end of string1, yet regstart_seg1
+ was 0!
+ Added variable reg_info of type struct register_info to keep
+ track of currently active registers and whether or not they
+ currently match anything.
+ Commented best_regs_set.
+ Trashed reg_active and reg_matched_something and put the
+ information they held into reg_info; saves space on the stack.
+ Replaced NULL with '\000'.
+ In begline case, compacted the code.
+ Used assert to exit if had an internal error.
+ In begbuf case, because now force the string we're working on
+ into string2 if there aren't two strings, now allow d == string2
+ if there is no string1 (and the check for that is size1 == 0!);
+ also now succeeds if there aren't any strings at all.
+ (main, ifdef canned): Put test type into a variable so could
+ change it while debugging.
+
+Sat Mar 24 12:24:13 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (GET_UNSIGNED_NUMBER): Deleted references to num_fetches.
+ (re_compile_pattern): Deleted num_fetches because could keep
+ track of the number of fetches done by saving a pointer into the
+ pattern.
+ Added variable beg_interval to be used as a pointer, as above.
+ Assert that beg_interval points to something when it's used as above.
+ Initialize succeed_n's to lower_bound because re_compile_fastmap
+ needs to know it.
+ (re_compile_fastmap): Deleted unnecessary variable is_a_jump_n.
+ Added comment.
+ (re_match_2): Put number of registers on the stack into a
+ variable before using it to decrement the stack, so as to not
+ confuse the compiler.
+ Updated comments.
+ Used error routine instead of printf and exit.
+ In exactn case, restored longer code from ``original'' regex.c
+ which doesn't test translate inside a loop.
+
+ * regex.h: Moved #define NULL and the enum regexpcode definition
+ and to regex.c. Changed some comments.
+
+ regex.c (global): Updated comments about compiling and for the
+ re_compile_pattern jump routines.
+ Added #define NULL and the enum regexpcode definition (from
+ regex.h).
+ (enum regexpcode): Added set_number_at to reset the n's of
+ succeed_n's and jump_n's.
+ (re_set_syntax): Updated its comment.
+ (re_compile_pattern): Moved its heading comment to after its macros.
+ Moved its include statement to the top of the file.
+ Commented or added to comments of its macros.
+ In start_memory case: Push laststart value before adding
+ start_memory and its register number to the buffer, as they
+ might not get added.
+ Added code to put a set_number_at before each succeed_n and one
+ after each jump_n; rewrote code in what seemed a more
+ straightforward manner to put all these things in the pattern so
+ the succeed_n's would correctly jump to the set_number_at's of
+ the matching jump_n's, and so the jump_n's would correctly jump
+ to after the set_number_at's of the matching succeed_n's.
+ Initialize succeed_n n's to -1.
+ (insert_op_2): Added this to insert an operation followed by
+ two integers.
+ (re_compile_fastmap): Added set_number_at case.
+ (re_match_2): Moved heading comment to after macros.
+ Added mention of REGS to heading comment.
+ No longer turn a succeed_n with n = 0 into an on_failure_jump,
+ because n needs to be reset each time through a loop.
+ Check to see if a succeed_n's n is set by its set_number_at.
+ Added set_number_at case.
+ Updated some comments.
+ (main): Added another main to run posix tests, which is compiled
+ ifdef both test and canned. (Old main is still compiled ifdef
+ test only).
+
+Tue Mar 19 09:22:55 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.[hc]: Change all instances of the word ``legal'' to
+ ``valid'' and all instances of ``illegal'' to ``invalid.''
+
+Sun Mar 4 12:11:31 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: Added syntax bit RE_NO_EMPTY_RANGES which is set if
+ an ending range point has to collate higher or equal to the
+ starting range point.
+ Added syntax bit RE_NO_HYPHEN_RANGE_END which is set if a hyphen
+ can't be an ending range point.
+ Set to two above bits in RE_SYNTAX_POSIX_BASIC and
+ RE_SYNTAX_POSIX_EXTENDED.
+
+ regex.c: (re_compile_pattern): Don't allow empty ranges if the
+ RE_NO_EMPTY_RANGES syntax bit is set.
+ Don't let a hyphen be a range end if the RE_NO_HYPHEN_RANGE_END
+ syntax bit is set.
+ (ESTACK_PUSH_2): renamed this PUSH_FAILURE_POINT and made it
+ push all the used registers on the stack, as well as the number
+ of the highest numbered register used, and (as before) the two
+ failure points.
+ (re_match_2): Fixed up comments.
+ Added arrays best_regstart[], best_regstart_seg1[], best_regend[],
+ and best_regend_seg1[] to keep track of the best match so far
+ whenever reach the end of the pattern but not the end of the
+ string, and there are still failure points on the stack with
+ which to backtrack; if so, do the saving and force a fail.
+ If reach the end of the pattern but not the end of the string,
+ but there are no more failure points to try, restore the best
+ match so far, set the registers and return.
+ Compacted some code.
+ In stop_memory case, if the subexpression we've just left is in
+ a loop, push onto the stack the loop's on_failure_jump failure
+ point along with the current pointer into the string (d).
+ In finalize_jump case, in addition to popping the failure
+ points, pop the saved registers.
+ In the fail case, restore the registers, as well as the failure
+ points.
+
+Sun Feb 18 15:08:10 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c: (global): Defined a macro GET_BUFFER_SPACE which
+ makes sure you have a specified number of buffer bytes
+ allocated.
+ Redefined the macro BUFPUSH to use this.
+ Added comments.
+
+ (re_compile_pattern): Call GET_BUFFER_SPACE before storing or
+ inserting any jumps.
+
+ (re_match_2): Set d to string1 + pos and dend to end_match_1
+ only if string1 isn't null.
+ Force exit from a loop if it's around empty parentheses.
+ In stop_memory case, if found some jumps, increment p2 before
+ extracting address to which to jump. Also, don't need to know
+ how many more times can jump_n.
+ In begline case, d must equal string1 or string2, in that order,
+ only if they are not null.
+ In maybe_finalize_jump case, skip over start_memorys' and
+ stop_memorys' register numbers, too.
+
+Thu Feb 15 15:53:55 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (BUFPUSH): off by one goof in deciding whether to
+ EXTEND_BUFFER.
+
+Wed Jan 24 17:07:46 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: Moved definition of NULL to here.
+ Got rid of ``In other words...'' comment.
+ Added to some comments.
+
+ regex.c: (re_compile_pattern): Tried to bulletproof some code,
+ i.e., checked if backward references (e.g., p[-1]) were within
+ the range of pattern.
+
+ (re_compile_fastmap): Fixed a bug in succeed_n part where was
+ getting the amount to jump instead of how many times to jump.
+
+ (re_search_2): Changed the name of the variable ``total'' to
+ ``total_size.''
+ Condensed some code.
+
+ (re_match_2): Moved the comment about duplicate from above the
+ start_memory case to above duplicate case.
+
+ (global): Rewrote some comments.
+ Added commandline arguments to testing.
+
+Wed Jan 17 11:47:27 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c: (global): Defined a macro STORE_NUMBER which stores a
+ number into two contiguous bytes. Also defined STORE_NUMBER_AND_INCR
+ which does the same thing and then increments the pointer to the
+ storage place to point after the number.
+ Defined a macro EXTRACT_NUMBER which extracts a number from two
+ continguous bytes. Also defined EXTRACT_NUMBER_AND_INCR which
+ does the same thing and then increments the pointer to the
+ source to point to after where the number was.
+
+Tue Jan 16 12:09:19 1990 Kathy Hargreaves (kathy at hayley)
+
+ * regex.h: Incorporated rms' changes.
+ Defined RE_NO_BK_REFS syntax bit which is set when want to
+ interpret back reference patterns as literals.
+ Defined RE_NO_EMPTY_BRACKETS syntax bit which is set when want
+ empty bracket expressions to be illegal.
+ Defined RE_CONTEXTUAL_ILLEGAL_OPS syntax bit which is set when want
+ it to be illegal for *, +, ? and { to be first in an re or come
+ immediately after a | or a (, and for ^ not to appear in a
+ nonleading position and $ in a nontrailing position (outside of
+ bracket expressions, that is).
+ Defined RE_LIMITED_OPS syntax bit which is set when want +, ?
+ and | to always be literals instead of ops.
+ Fixed up the Posix syntax.
+ Changed the syntax bit comments from saying, e.g., ``0 means...''
+ to ``If this bit is set, it means...''.
+ Changed the syntax bit defines to use shifts instead of integers.
+
+ * regex.c: (global): Incorporated rms' changes.
+
+ (re_compile_pattern): Incorporated rms' changes
+ Made it illegal for a $ to appear anywhere but inside a bracket
+ expression or at the end of an re when RE_CONTEXTUAL_ILLEGAL_OPS
+ is set. Made the same hold for $ except it has to be at the
+ beginning of an re instead of the end.
+ Made the re "[]" illegal if RE_NO_EMPTY_BRACKETS is set.
+ Made it illegal for | to be first or last in an re, or immediately
+ follow another | or a (.
+ Added and embellished some comments.
+ Allowed \{ to be interpreted as a literal if RE_NO_BK_CURLY_BRACES
+ is set.
+ Made it illegal for *, +, ?, and { to appear first in an re, or
+ immediately follow a | or a ( when RE_CONTEXTUAL_ILLEGAL_OPS is set.
+ Made back references interpreted as literals if RE_NO_BK_REFS is set.
+ Made recursive intervals either illegal (if RE_NO_BK_CURLY_BRACES
+ isn't set) or interpreted as literals (if is set), if RE_INTERVALS
+ is set.
+ Made it treat +, ? and | as literals if RE_LIMITED_OPS is set.
+ Cleaned up some code.
+
+Thu Dec 21 15:31:32 1989 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c: (global): Moved RE_DUP_MAX to regex.h and made it
+ equal 2^15 - 1 instead of 1000.
+ Defined NULL to be zero.
+ Moved the definition of BYTEWIDTH to regex.h.
+ Made the global variable obscure_syntax nonstatic so the tests in
+ another file could use it.
+
+ (re_compile_pattern): Defined a maximum length (CHAR_CLASS_MAX_LENGTH)
+ for character class strings (i.e., what's between the [: and the
+ :]'s).
+ Defined a macro SET_LIST_BIT(c) which sets the bit for C in a
+ character set list.
+ Took out comments that EXTEND_BUFFER clobbers C.
+ Made the string "^" match itself, if not RE_CONTEXT_IND_OPS.
+ Added character classes to bracket expressions.
+ Change the laststart pointer saved with the start of each
+ subexpression to point to start_memory instead of after the
+ following register number. This is because the subexpression
+ might be in a loop.
+ Added comments and compacted some code.
+ Made intervals only work if preceded by an re matching a single
+ character or a subexpression.
+ Made back references to nonexistent subexpressions illegal if
+ using POSIX syntax.
+ Made intervals work on the last preceding character of a
+ concatenation of characters, e.g., ab{0,} matches abbb, not abab.
+ Moved macro PREFETCH to outside the routine.
+
+ (re_compile_fastmap): Added succeed_n to work analogously to
+ on_failure_jump if n is zero and jump_n to work analogously to
+ the other backward jumps.
+
+ (re_match_2): Defined macro SET_REGS_MATCHED to set which
+ current subexpressions had matches within them.
+ Changed some comments.
+ Added reg_active and reg_matched_something arrays to keep track
+ of in which subexpressions currently have matched something.
+ Defined MATCHING_IN_FIRST_STRING and replaced ``dend == end_match_1''
+ with it to make code easier to understand.
+ Fixed so can apply * and intervals to arbitrarily nested
+ subexpressions. (Lots of previous bugs here.)
+ Changed so won't match a newline if syntax bit RE_DOT_NOT_NULL is set.
+ Made the upcase array nonstatic so the testing file could use it also.
+
+ (main.c): Moved the tests out to another file.
+
+ (tests.c): Moved all the testing stuff here.
+
+Sat Nov 18 19:30:30 1989 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c: (re_compile_pattern): Defined RE_DUP_MAX, the maximum
+ number of times an interval can match a pattern.
+ Added macro GET_UNSIGNED_NUMBER (used to get below):
+ Added variables lower_bound and upper_bound for upper and lower
+ bounds of intervals.
+ Added variable num_fetches so intervals could do backtracking.
+ Added code to handle '{' and "\{" and intervals.
+ Added to comments.
+
+ (store_jump_n): (Added) Stores a jump with a number following the
+ relative address (for intervals).
+
+ (insert_jump_n): (Added) Inserts a jump_n.
+
+ (re_match_2): Defined a macro ESTACK_PUSH_2 for the error stack;
+ it checks for overflow and reallocates if necessary.
+
+ * regex.h: Added bits (RE_INTERVALS and RE_NO_BK_CURLY_BRACES)
+ to obscure syntax to indicate whether or not
+ a syntax handles intervals and recognizes either \{ and
+ \} or { and } as operators. Also added two syntaxes
+ RE_SYNTAX_POSIX_BASIC and RE_POSIX_EXTENDED and two command codes
+ to the enumeration regexpcode; they are succeed_n and jump_n.
+
+Sat Nov 18 19:30:30 1989 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c: (re_compile_pattern): Defined INIT_BUFF_SIZE to get rid
+ of repeated constants in code. Tested with value 1.
+ Renamed PATPUSH as BUFPUSH, since it pushes things onto the
+ buffer, not the pattern. Also made this macro extend the buffer
+ if it's full (so could do the following):
+ Took out code at top of loop that checks to see if buffer is going
+ to be full after 10 additions (and reallocates if necessary).
+
+ (insert_jump): Rearranged declaration lines so comments would read
+ better.
+
+ (re_match_2): Compacted exactn code and added more comments.
+
+ (main): Defined macros TEST_MATCH and MATCH_SELF to do
+ testing; took out loop so could use these instead.
+
+Tue Oct 24 20:57:18 1989 Kathy Hargreaves (kathy at hayley)
+
+ * regex.c (re_set_syntax): Gave argument `syntax' a type.
+ (store_jump, insert_jump): made them void functions.
+
+Local Variables:
+mode: indented-text
+left-margin: 8
+version-control: never
+End:
diff --git a/gnu/lib/libregex/INSTALL b/gnu/lib/libregex/INSTALL
new file mode 100644
index 0000000..014e0f7
--- /dev/null
+++ b/gnu/lib/libregex/INSTALL
@@ -0,0 +1,117 @@
+This is a generic INSTALL file for utilities distributions.
+If this package does not come with, e.g., installable documentation or
+data files, please ignore the references to them below.
+
+To compile this package:
+
+1. Configure the package for your system. In the directory that this
+file is in, type `./configure'. If you're using `csh' on an old
+version of System V, you might need to type `sh configure' instead to
+prevent `csh' from trying to execute `configure' itself.
+
+The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation, and
+creates the Makefile(s) (one in each subdirectory of the source
+directory). In some packages it creates a C header file containing
+system-dependent definitions. It also creates a file `config.status'
+that you can run in the future to recreate the current configuration.
+
+Running `configure' takes a minute or two. While it is running, it
+prints some messages that tell what it is doing. If you don't want to
+see the messages, run `configure' with its standard output redirected
+to `/dev/null'; for example, `./configure >/dev/null'.
+
+To compile the package in a different directory from the one
+containing the source code, you must use a version of `make' that
+supports the VPATH variable, such as GNU `make'. `cd' to the directory
+where you want the object files and executables to go and run
+`configure'. `configure' automatically checks for the source code in
+the directory that `configure' is in and in `..'. If for some reason
+`configure' is not in the source code directory that you are
+configuring, then it will report that it can't find the source code.
+In that case, run `configure' with the option `--srcdir=DIR', where
+DIR is the directory that contains the source code.
+
+By default, `make install' will install the package's files in
+/usr/local/bin, /usr/local/lib, /usr/local/man, etc. You can specify
+an installation prefix other than /usr/local by giving `configure' the
+option `--prefix=PATH'. Alternately, you can do so by giving a value
+for the `prefix' variable when you run `make', e.g.,
+ make prefix=/usr/gnu
+
+You can specify separate installation prefixes for
+architecture-specific files and architecture-independent files. If
+you give `configure' the option `--exec-prefix=PATH' or set the
+`make' variable `exec_prefix' to PATH, the package will use PATH as
+the prefix for installing programs and libraries. Data files and
+documentation will still use the regular prefix. Normally, all files
+are installed using the regular prefix.
+
+Another `configure' option is useful mainly in `Makefile' rules for
+updating `config.status' and `Makefile'. The `--no-create' option
+figures out the configuration for your system and records it in
+`config.status', without actually configuring the package (creating
+`Makefile's and perhaps a configuration header file). Later, you can
+run `./config.status' to actually configure the package. You can also
+give `config.status' the `--recheck' option, which makes it re-run
+`configure' with the same arguments you used before. This option is
+useful if you change `configure'.
+
+Some packages pay attention to `--with-PACKAGE' options to `configure',
+where PACKAGE is something like `gnu-libc' or `x' (for the X Window System).
+The README should mention any --with- options that the package recognizes.
+
+`configure' ignores any other arguments that you give it.
+
+If your system requires unusual options for compilation or linking
+that `configure' doesn't know about, you can give `configure' initial
+values for some variables by setting them in the environment. In
+Bourne-compatible shells, you can do that on the command line like
+this:
+ CC='gcc -traditional' DEFS=-D_POSIX_SOURCE ./configure
+
+The `make' variables that you might want to override with environment
+variables when running `configure' are:
+
+(For these variables, any value given in the environment overrides the
+value that `configure' would choose:)
+CC C compiler program.
+ Default is `cc', or `gcc' if `gcc' is in your PATH.
+INSTALL Program to use to install files.
+ Default is `install' if you have it, `cp' otherwise.
+
+(For these variables, any value given in the environment is added to
+the value that `configure' chooses:)
+DEFS Configuration options, in the form `-Dfoo -Dbar ...'
+ Do not use this variable in packages that create a
+ configuration header file.
+LIBS Libraries to link with, in the form `-lfoo -lbar ...'
+
+If you need to do unusual things to compile the package, we encourage
+you to figure out how `configure' could check whether to do them, and
+mail diffs or instructions to the address given in the README so we
+can include them in the next release.
+
+2. Type `make' to compile the package. If you want, you can override
+the `make' variables CFLAGS and LDFLAGS like this:
+
+ make CFLAGS=-O2 LDFLAGS=-s
+
+3. If the package comes with self-tests and you want to run them,
+type `make check'. If you're not sure whether there are any, try it;
+if `make' responds with something like
+ make: *** No way to make target `check'. Stop.
+then the package does not come with self-tests.
+
+4. Type `make install' to install programs, data files, and
+documentation.
+
+5. You can remove the program binaries and object files from the
+source directory by typing `make clean'. To also remove the
+Makefile(s), the header file containing system-dependent definitions
+(if the package uses one), and `config.status' (all the files that
+`configure' created), type `make distclean'.
+
+The file `configure.in' is used as a template to create `configure' by
+a program called `autoconf'. You will only need it if you want to
+regenerate `configure' using a newer version of `autoconf'.
diff --git a/gnu/lib/libregex/Makefile b/gnu/lib/libregex/Makefile
new file mode 100644
index 0000000..560e61b
--- /dev/null
+++ b/gnu/lib/libregex/Makefile
@@ -0,0 +1,12 @@
+# $Header: /b/source/CVS/src/gnu/lib/libregex/Makefile,v 1.2 1993/04/10 15:24:44 cgd Exp $
+
+LIB= gnuregex
+CFLAGS+=-DHAVE_STRING_H=1
+SRCS= regex.c
+NOMAN= noman
+
+afterinstall:
+ install -c -o root -g wheel -m 444 ${.CURDIR}/regex.h \
+ ${DESTDIR}/usr/include
+
+.include <bsd.lib.mk>
diff --git a/gnu/lib/libregex/Makefile.gnu b/gnu/lib/libregex/Makefile.gnu
new file mode 100644
index 0000000..0976aa8
--- /dev/null
+++ b/gnu/lib/libregex/Makefile.gnu
@@ -0,0 +1,99 @@
+# Generated automatically from Makefile.in by configure.
+# Makefile for regex.
+#
+# Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+version = 0.12
+
+# You can define CPPFLAGS on the command line. Aside from system-specific
+# flags, you can define:
+# -DREGEX_MALLOC to use malloc/realloc/free instead of alloca.
+# -DDEBUG to enable the compiled pattern disassembler and execution
+# tracing; code runs substantially slower.
+# -DEXTRACT_MACROS to use the macros EXTRACT_* (as opposed to
+# the corresponding C procedures). If not -DDEBUG, the macros
+# are used.
+CPPFLAGS =
+
+# Likewise, you can override CFLAGS to optimize, use -Wall, etc.
+CFLAGS = -g
+
+# Ditto for LDFLAGS and LOADLIBES.
+LDFLAGS =
+LOADLIBES =
+
+srcdir = .
+VPATH = .
+
+CC = gcc
+DEFS = -DHAVE_STRING_H=1
+
+SHELL = /bin/sh
+
+subdirs = doc test
+
+default all:: regex.o
+.PHONY: default all
+
+regex.o: regex.c regex.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) -I. -I$(srcdir) -c $<
+
+clean mostlyclean::
+ rm -f *.o
+
+distclean:: clean
+ rm -f Makefile config.status
+
+extraclean:: distclean
+ rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out
+
+configure: configure.in
+ autoconf
+
+config.status: configure
+ sh configure --no-create
+
+Makefile: Makefile.in config.status
+ sh config.status
+
+makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' \
+DEFS='$(DEFS)' LDFLAGS='$(LDFLAGS)' LOADLIBES='$(LOADLIBES)'
+
+default all install \
+mostlyclean clean distclean extraclean realclean \
+TAGS check::
+ for d in $(subdirs); do (cd $$d; $(MAKE) $(makeargs) $@); done
+.PHONY: install mostlyclean clean distclean extraclean realclean TAGS check
+
+# Prevent GNU make 3 from overflowing arg limit on system V.
+.NOEXPORT:
+
+distfiles = AUTHORS ChangeLog COPYING INSTALL NEWS README \
+ *.in configure regex.c regex.h
+distdir = regex-$(version)
+distargs = version=$(version) distdir=../$(distdir)/$$d
+dist: TAGS configure
+ @echo "Version numbers in: Makefile.in, ChangeLog, NEWS,"
+ @echo " regex.c, regex.h,"
+ @echo " and doc/xregex.texi (if modified)."
+ rm -rf $(distdir)
+ mkdir $(distdir)
+ ln $(distfiles) $(distdir)
+ for d in $(subdirs); do (cd $$d; $(MAKE) $(distargs) dist); done
+ tar czhf $(distdir).tar.Z $(distdir)
+ rm -rf $(distdir)
+.PHONY: dist
diff --git a/gnu/lib/libregex/Makefile.in b/gnu/lib/libregex/Makefile.in
new file mode 100644
index 0000000..836e6de
--- /dev/null
+++ b/gnu/lib/libregex/Makefile.in
@@ -0,0 +1,98 @@
+# Makefile for regex.
+#
+# Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+version = 0.12
+
+# You can define CPPFLAGS on the command line. Aside from system-specific
+# flags, you can define:
+# -DREGEX_MALLOC to use malloc/realloc/free instead of alloca.
+# -DDEBUG to enable the compiled pattern disassembler and execution
+# tracing; code runs substantially slower.
+# -DEXTRACT_MACROS to use the macros EXTRACT_* (as opposed to
+# the corresponding C procedures). If not -DDEBUG, the macros
+# are used.
+CPPFLAGS =
+
+# Likewise, you can override CFLAGS to optimize, use -Wall, etc.
+CFLAGS = -g
+
+# Ditto for LDFLAGS and LOADLIBES.
+LDFLAGS =
+LOADLIBES =
+
+srcdir = @srcdir@
+VPATH = @srcdir@
+
+CC = @CC@
+DEFS = @DEFS@
+
+SHELL = /bin/sh
+
+subdirs = doc test
+
+default all:: regex.o
+.PHONY: default all
+
+regex.o: regex.c regex.h
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) -I. -I$(srcdir) -c $<
+
+clean mostlyclean::
+ rm -f *.o
+
+distclean:: clean
+ rm -f Makefile config.status
+
+extraclean:: distclean
+ rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out
+
+configure: configure.in
+ autoconf
+
+config.status: configure
+ sh configure --no-create
+
+Makefile: Makefile.in config.status
+ sh config.status
+
+makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' \
+DEFS='$(DEFS)' LDFLAGS='$(LDFLAGS)' LOADLIBES='$(LOADLIBES)'
+
+default all install \
+mostlyclean clean distclean extraclean realclean \
+TAGS check::
+ for d in $(subdirs); do (cd $$d; $(MAKE) $(makeargs) $@); done
+.PHONY: install mostlyclean clean distclean extraclean realclean TAGS check
+
+# Prevent GNU make 3 from overflowing arg limit on system V.
+.NOEXPORT:
+
+distfiles = AUTHORS ChangeLog COPYING INSTALL NEWS README \
+ *.in configure regex.c regex.h
+distdir = regex-$(version)
+distargs = version=$(version) distdir=../$(distdir)/$$d
+dist: TAGS configure
+ @echo "Version numbers in: Makefile.in, ChangeLog, NEWS,"
+ @echo " regex.c, regex.h,"
+ @echo " and doc/xregex.texi (if modified)."
+ rm -rf $(distdir)
+ mkdir $(distdir)
+ ln $(distfiles) $(distdir)
+ for d in $(subdirs); do (cd $$d; $(MAKE) $(distargs) dist); done
+ tar czhf $(distdir).tar.Z $(distdir)
+ rm -rf $(distdir)
+.PHONY: dist
diff --git a/gnu/lib/libregex/NEWS b/gnu/lib/libregex/NEWS
new file mode 100644
index 0000000..b3a899b
--- /dev/null
+++ b/gnu/lib/libregex/NEWS
@@ -0,0 +1,62 @@
+Version 0.12
+
+* regex.c does not #define bcmp/bcopy/bzero if they already are.
+
+* regex.h does not redefine `const' if it is already defined, even if
+ __STDC__ is not defined.
+
+* RE_SYNTAX_ED added (same as POSIX BRE's).
+
+* The following bugs have been fixed, among others:
+ * The pattern \w+ doesn't infinite loop.
+ * The pattern ".+\n" is compiled correctly.
+ * Expressions with more than MAX_REGNUM groups are compiled correctly.
+
+* Patterns that end in a repetition operator (e.g., `*') match
+ slightly faster if no looping is actually necessary.
+
+Version 0.11 (17 Sep 92)
+
+* Back-references to nonexistent subexpressions, as in the r.e. `abc\1',
+ are always invalid. Previously, they could match the literal digit,
+ e.g., the stated r.e. might have matched `abc1'.
+
+* Empty subexpressions are always valid (POSIX leaves this undefined).
+
+* Simplified rules for ^ and $ being anchors.
+
+* One minor speedup (rewriting the C procedure `pop_failure_point' as a
+ macro again).
+
+* Bug fixes involving:
+ - Declarations in regex.h and non-ANSI compilers.
+ - Bracket expressions with characters between 0x80-0xff.
+ - Memory leak in re_match_2 on systems requiring `alloca (0)' to
+ free alloca'd storage.
+
+* Test and documentation files moved into subdirectories.
+
+Version 0.10 (9 Sep 92)
+
+* `obscure_syntax' is now called `re_default_syntax'.
+
+* `re_comp's return type is no longer `const', for compatibility with BSD.
+
+* POSIX syntaxes now include as much functionality as possible
+ (consistent with the standard).
+
+* Compilation conditionals normalized to what the rest of GNU is
+ migrating towards these days.
+
+* Bug fixes involving:
+ - Ranges with characters between 0x80 and 0xff, e.g., [\001-\377].
+ - `re_compile_fastmap' and the sequence `.*\n'.
+ - Intervals with exact counts, e.g., a{5}.
+
+* Changed distribution to use a standard Makefile, install the info
+ files, use a configure script, etc.
+
+Version 0.9
+
+* The longest match was not always chosen: `a*|ab' didn't match `aab'.
+
diff --git a/gnu/lib/libregex/README b/gnu/lib/libregex/README
new file mode 100644
index 0000000..918e1a0
--- /dev/null
+++ b/gnu/lib/libregex/README
@@ -0,0 +1,60 @@
+This directory contains the GNU regex library. It is compliant with
+POSIX.2, except for internationalization features.
+
+See the file NEWS for a list of major changes in the current release.
+
+See the file INSTALL for compilation instructions. (The only thing
+installed is the documentation; regex.c is compiled into regex.o, but
+not installed anywhere.)
+
+The subdirectory `doc' contains a (programmers') manual for the library.
+It's probably out-of-date. Improvements are welcome.
+
+The subdirectory `test' contains the various tests we've written.
+
+We know this code is not as fast as it might be. If you have specific
+suggestions, profiling results, or other such useful information to
+report, please do.
+
+Emacs 18 is not going use this revised regex (but Emacs 19 will). If
+you want to try it with Emacs 18, apply the patch at the end of this
+file first.
+
+Mail bug reports to bug-gnu-utils@prep.ai.mit.edu.
+
+Please include an actual regular expression that fails (and the syntax
+used to compile it); without that, there's no way to reproduce the bug,
+so there's no way we can fix it. Even if you include a patch, also
+include the regular expression in error; otherwise, we can't know for
+sure what you're trying to fix.
+
+Here is the patch to make this version of regex work with Emacs 18.
+
+*** ORIG/search.c Tue Jan 8 13:04:55 1991
+--- search.c Sun Jan 5 10:57:00 1992
+***************
+*** 25,26 ****
+--- 25,28 ----
+ #include "commands.h"
++
++ #include <sys/types.h>
+ #include "regex.h"
+***************
+*** 477,479 ****
+ /* really needed. */
+! && *(searchbuf.buffer) == (char) exactn /* first item is "exact match" */
+ && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
+--- 479,482 ----
+ /* really needed. */
+! /* first item is "exact match" */
+! && *(searchbuf.buffer) == (char) RE_EXACTN_VALUE
+ && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
+***************
+*** 1273,1275 ****
+ searchbuf.allocated = 100;
+! searchbuf.buffer = (char *) malloc (searchbuf.allocated);
+ searchbuf.fastmap = search_fastmap;
+--- 1276,1278 ----
+ searchbuf.allocated = 100;
+! searchbuf.buffer = (unsigned char *) malloc (searchbuf.allocated);
+ searchbuf.fastmap = search_fastmap;
diff --git a/gnu/lib/libregex/VERSION b/gnu/lib/libregex/VERSION
new file mode 100644
index 0000000..7182be2
--- /dev/null
+++ b/gnu/lib/libregex/VERSION
@@ -0,0 +1,3 @@
+GNU regex version 0.12
+
+complete, unmodified regex sources are available from prep.ai.mit.edu.
diff --git a/gnu/lib/libregex/config.status b/gnu/lib/libregex/config.status
new file mode 100644
index 0000000..1b276aa
--- /dev/null
+++ b/gnu/lib/libregex/config.status
@@ -0,0 +1,59 @@
+#!/bin/sh
+# Generated automatically by configure.
+# Run this file to recreate the current configuration.
+# This directory was configured as follows,
+# on host sun-lamp.cs.berkeley.edu:
+#
+# configure
+
+for arg
+do
+ case "$arg" in
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ exec /bin/sh configure ;;
+ *) echo "Usage: config.status --recheck" 2>&1; exit 1 ;;
+ esac
+done
+
+trap 'rm -f Makefile doc/Makefile test/Makefile; exit 1' 1 3 15
+CC='gcc'
+INSTALL='/usr/bin/install -c'
+INSTALL_PROGRAM='$(INSTALL)'
+INSTALL_DATA='$(INSTALL) -m 644'
+CPP='${CC-cc} -E'
+ALLOCA=''
+LIBS=''
+srcdir='.'
+DEFS=' -DHAVE_STRING_H=1'
+prefix='/usr'
+exec_prefix='${prefix}'
+prsub='s%^prefix\([ ]*\)=\([ ]*\).*$%prefix\1=\2/usr%
+s%^exec_prefix\([ ]*\)=\([ ]*\).*$%exec_prefix\1=\2${prefix}%'
+
+top_srcdir=$srcdir
+for file in .. Makefile doc/Makefile test/Makefile; do if [ "x$file" != "x.." ]; then
+ srcdir=$top_srcdir
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ dir=`echo $file|sed 's%/[^/][^/]*$%%'`
+ if test "$dir" != "$file"; then
+ test "$top_srcdir" != . && srcdir=$top_srcdir/$dir
+ test ! -d $dir && mkdir $dir
+ fi
+ echo creating $file
+ rm -f $file
+ echo "# Generated automatically from `echo $file|sed 's|.*/||'`.in by configure." > $file
+ sed -e "
+$prsub
+s%@CC@%$CC%g
+s%@INSTALL@%$INSTALL%g
+s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g
+s%@INSTALL_DATA@%$INSTALL_DATA%g
+s%@CPP@%$CPP%g
+s%@ALLOCA@%$ALLOCA%g
+s%@LIBS@%$LIBS%g
+s%@srcdir@%$srcdir%g
+s%@DEFS@%$DEFS%
+" $top_srcdir/${file}.in >> $file
+fi; done
+
+exit 0
diff --git a/gnu/lib/libregex/configure b/gnu/lib/libregex/configure
new file mode 100644
index 0000000..29c5b80
--- /dev/null
+++ b/gnu/lib/libregex/configure
@@ -0,0 +1,462 @@
+#!/bin/sh
+# Guess values for system-dependent variables and create Makefiles.
+# Generated automatically using autoconf.
+# Copyright (C) 1991, 1992, 1993 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# Usage: configure [--srcdir=DIR] [--host=HOST] [--gas] [--nfp] [--no-create]
+# [--prefix=PREFIX] [--exec-prefix=PREFIX] [--with-PACKAGE] [TARGET]
+# Ignores all args except --srcdir, --prefix, --exec-prefix, --no-create, and
+# --with-PACKAGE unless this script has special code to handle it.
+
+
+for arg
+do
+ # Handle --exec-prefix with a space before the argument.
+ if test x$next_exec_prefix = xyes; then exec_prefix=$arg; next_exec_prefix=
+ # Handle --host with a space before the argument.
+ elif test x$next_host = xyes; then next_host=
+ # Handle --prefix with a space before the argument.
+ elif test x$next_prefix = xyes; then prefix=$arg; next_prefix=
+ # Handle --srcdir with a space before the argument.
+ elif test x$next_srcdir = xyes; then srcdir=$arg; next_srcdir=
+ else
+ case $arg in
+ # For backward compatibility, also recognize exact --exec_prefix.
+ -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* | --exec=* | --exe=* | --ex=* | --e=*)
+ exec_prefix=`echo $arg | sed 's/[-a-z_]*=//'` ;;
+ -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- | --exec | --exe | --ex | --e)
+ next_exec_prefix=yes ;;
+
+ -gas | --gas | --ga | --g) ;;
+
+ -host=* | --host=* | --hos=* | --ho=* | --h=*) ;;
+ -host | --host | --hos | --ho | --h)
+ next_host=yes ;;
+
+ -nfp | --nfp | --nf) ;;
+
+ -no-create | --no-create | --no-creat | --no-crea | --no-cre | --no-cr | --no-c | --no- | --no)
+ no_create=1 ;;
+
+ -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+ prefix=`echo $arg | sed 's/[-a-z_]*=//'` ;;
+ -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+ next_prefix=yes ;;
+
+ -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=* | --s=*)
+ srcdir=`echo $arg | sed 's/[-a-z_]*=//'` ;;
+ -srcdir | --srcdir | --srcdi | --srcd | --src | --sr | --s)
+ next_srcdir=yes ;;
+
+ -with-* | --with-*)
+ package=`echo $arg|sed 's/-*with-//'`
+ # Delete all the valid chars; see if any are left.
+ if test -n "`echo $package|sed 's/[-a-zA-Z0-9_]*//g'`"; then
+ echo "configure: $package: invalid package name" >&2; exit 1
+ fi
+ eval "with_`echo $package|sed s/-/_/g`=1" ;;
+
+ *) ;;
+ esac
+ fi
+done
+
+trap 'rm -f conftest* core; exit 1' 1 3 15
+
+rm -f conftest*
+compile='${CC-cc} $CFLAGS $DEFS conftest.c -o conftest $LIBS >/dev/null 2>&1'
+
+# A filename unique to this package, relative to the directory that
+# configure is in, which we can look for to find out if srcdir is correct.
+unique_file=regex.c
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+ srcdirdefaulted=yes
+ # Try the directory containing this script, then `..'.
+ prog=$0
+ confdir=`echo $prog|sed 's%/[^/][^/]*$%%'`
+ test "X$confdir" = "X$prog" && confdir=.
+ srcdir=$confdir
+ if test ! -r $srcdir/$unique_file; then
+ srcdir=..
+ fi
+fi
+if test ! -r $srcdir/$unique_file; then
+ if test x$srcdirdefaulted = xyes; then
+ echo "configure: Can not find sources in \`${confdir}' or \`..'." 1>&2
+ else
+ echo "configure: Can not find sources in \`${srcdir}'." 1>&2
+ fi
+ exit 1
+fi
+# Preserve a srcdir of `.' to avoid automounter screwups with pwd.
+# But we can't avoid them for `..', to make subdirectories work.
+case $srcdir in
+ .|/*|~*) ;;
+ *) srcdir=`cd $srcdir; pwd` ;; # Make relative path absolute.
+esac
+
+
+if test -z "$CC"; then
+ echo checking for gcc
+ saveifs="$IFS"; IFS="${IFS}:"
+ for dir in $PATH; do
+ test -z "$dir" && dir=.
+ if test -f $dir/gcc; then
+ CC="gcc"
+ break
+ fi
+ done
+ IFS="$saveifs"
+fi
+test -z "$CC" && CC="cc"
+
+# Find out if we are using GNU C, under whatever name.
+cat > conftest.c <<EOF
+#ifdef __GNUC__
+ yes
+#endif
+EOF
+${CC-cc} -E conftest.c > conftest.out 2>&1
+if egrep yes conftest.out >/dev/null 2>&1; then
+ GCC=1 # For later tests.
+fi
+rm -f conftest*
+
+# Make sure to not get the incompatible SysV /etc/install and
+# /usr/sbin/install, which might be in PATH before a BSD-like install,
+# or the SunOS /usr/etc/install directory, or the AIX /bin/install,
+# or the AFS install, which mishandles nonexistent args. (Sigh.)
+if test -z "$INSTALL"; then
+ echo checking for install
+ saveifs="$IFS"; IFS="${IFS}:"
+ for dir in $PATH; do
+ test -z "$dir" && dir=.
+ case $dir in
+ /etc|/usr/sbin|/usr/etc|/usr/afsws/bin) ;;
+ *)
+ if test -f $dir/install; then
+ if grep dspmsg $dir/install >/dev/null 2>&1; then
+ : # AIX
+ else
+ INSTALL="$dir/install -c"
+ INSTALL_PROGRAM='$(INSTALL)'
+ INSTALL_DATA='$(INSTALL) -m 644'
+ break
+ fi
+ fi
+ ;;
+ esac
+ done
+ IFS="$saveifs"
+fi
+INSTALL=${INSTALL-cp}
+INSTALL_PROGRAM=${INSTALL_PROGRAM-'$(INSTALL)'}
+INSTALL_DATA=${INSTALL_DATA-'$(INSTALL)'}
+
+
+echo checking for AIX
+echo checking how to run the C preprocessor
+if test -z "$CPP"; then
+ CPP='${CC-cc} -E'
+ cat > conftest.c <<EOF
+#include <stdio.h>
+EOF
+err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"`
+if test -z "$err"; then
+ :
+else
+ CPP=/lib/cpp
+fi
+rm -f conftest*
+fi
+
+cat > conftest.c <<EOF
+#ifdef _AIX
+ yes
+#endif
+
+EOF
+eval "$CPP $DEFS conftest.c > conftest.out 2>&1"
+if egrep "yes" conftest.out >/dev/null 2>&1; then
+ DEFS="$DEFS -D_ALL_SOURCE=1"
+fi
+rm -f conftest*
+
+
+echo checking for DYNIX/ptx libseq
+cat > conftest.c <<EOF
+#if defined(_SEQUENT_)
+ yes
+#endif
+
+EOF
+eval "$CPP $DEFS conftest.c > conftest.out 2>&1"
+if egrep "yes" conftest.out >/dev/null 2>&1; then
+ SEQUENT=1
+fi
+rm -f conftest*
+
+test -n "$SEQUENT" && test -f /usr/lib/libseq.a &&
+ LIBS="$LIBS -lseq"
+
+echo checking for POSIXized ISC
+if test -d /etc/conf/kconfig.d &&
+ grep _POSIX_VERSION /usr/include/sys/unistd.h >/dev/null 2>&1
+then
+ ISC=1 # If later tests want to check for ISC.
+ DEFS="$DEFS -D_POSIX_SOURCE=1"
+ if test -n "$GCC"; then
+ CC="$CC -posix"
+ else
+ CC="$CC -Xp"
+ fi
+fi
+
+echo checking for minix/config.h
+cat > conftest.c <<EOF
+#include <minix/config.h>
+EOF
+err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"`
+if test -z "$err"; then
+ MINIX=1
+fi
+rm -f conftest*
+
+# The Minix shell can't assign to the same variable on the same line!
+if test -n "$MINIX"; then
+ DEFS="$DEFS -D_POSIX_SOURCE=1"
+ DEFS="$DEFS -D_POSIX_1_SOURCE=2"
+ DEFS="$DEFS -D_MINIX=1"
+fi
+
+
+echo checking for ANSI C header files
+cat > conftest.c <<EOF
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+EOF
+err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"`
+if test -z "$err"; then
+ # SunOS string.h does not declare mem*, contrary to ANSI.
+echo '#include <string.h>' > conftest.c
+eval "$CPP $DEFS conftest.c > conftest.out 2>&1"
+if egrep "memchr" conftest.out >/dev/null 2>&1; then
+ # SGI's /bin/cc from Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+cat > conftest.c <<EOF
+#include <ctype.h>
+#define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#define XOR(e,f) (((e) && !(f)) || (!(e) && (f)))
+int main () { int i; for (i = 0; i < 256; i++)
+if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2);
+exit (0); }
+
+EOF
+eval $compile
+if test -s conftest && (./conftest; exit) 2>/dev/null; then
+ DEFS="$DEFS -DSTDC_HEADERS=1"
+fi
+rm -f conftest*
+fi
+rm -f conftest*
+
+fi
+rm -f conftest*
+
+for hdr in string.h
+do
+trhdr=HAVE_`echo $hdr | tr '[a-z]./' '[A-Z]__'`
+echo checking for ${hdr}
+cat > conftest.c <<EOF
+#include <${hdr}>
+EOF
+err=`eval "$CPP $DEFS conftest.c 2>&1 >/dev/null"`
+if test -z "$err"; then
+ DEFS="$DEFS -D${trhdr}=1"
+fi
+rm -f conftest*
+done
+
+
+# The Ultrix 4.2 mips builtin alloca declared by alloca.h only works
+# for constant arguments. Useless!
+echo checking for working alloca.h
+cat > conftest.c <<EOF
+#include <alloca.h>
+main() { exit(0); }
+t() { char *p = alloca(2 * sizeof(int)); }
+EOF
+if eval $compile; then
+ DEFS="$DEFS -DHAVE_ALLOCA_H=1"
+fi
+rm -f conftest*
+
+decl="#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else
+#ifdef _AIX
+ #pragma alloca
+#else
+char *alloca ();
+#endif
+#endif
+#endif
+"
+echo checking for alloca
+cat > conftest.c <<EOF
+$decl
+main() { exit(0); }
+t() { char *p = (char *) alloca(1); }
+EOF
+if eval $compile; then
+ :
+else
+ alloca_missing=1
+fi
+rm -f conftest*
+
+if test -n "$alloca_missing"; then
+ # The SVR3 libPW and SVR4 libucb both contain incompatible functions
+ # that cause trouble. Some versions do not even contain alloca or
+ # contain a buggy version. If you still want to use their alloca,
+ # use ar to extract alloca.o from them instead of compiling alloca.c.
+ ALLOCA=alloca.o
+fi
+
+prog='/* Ultrix mips cc rejects this. */
+typedef int charset[2]; const charset x;
+/* SunOS 4.1.1 cc rejects this. */
+char const *const *p;
+char **p2;
+/* HPUX 7.0 cc rejects these. */
+++p;
+p2 = (char const* const*) p;'
+echo checking for working const
+cat > conftest.c <<EOF
+
+main() { exit(0); }
+t() { $prog }
+EOF
+if eval $compile; then
+ :
+else
+ DEFS="$DEFS -Dconst="
+fi
+rm -f conftest*
+
+
+if test -z "$prefix"
+then
+ echo checking for gcc to derive installation directory prefix
+ saveifs="$IFS"; IFS="$IFS:"
+ for dir in $PATH; do
+ test -z "$dir" && dir=.
+ if test $dir != . && test -f $dir/gcc; then
+ # Not all systems have dirname.
+ prefix=`echo $dir|sed 's%/[^/][^/]*$%%'`
+ break
+ fi
+ done
+ IFS="$saveifs"
+fi
+
+
+if test -n "$prefix"; then
+ test -z "$exec_prefix" && exec_prefix='${prefix}'
+ prsub="s%^prefix\\([ ]*\\)=\\([ ]*\\).*$%prefix\\1=\\2$prefix%"
+fi
+if test -n "$exec_prefix"; then
+ prsub="$prsub
+s%^exec_prefix\\([ ]*\\)=\\([ ]*\\).*$%\
+exec_prefix\\1=\\2$exec_prefix%"
+fi
+
+trap 'rm -f config.status; exit 1' 1 3 15
+echo creating config.status
+rm -f config.status
+cat > config.status <<EOF
+#!/bin/sh
+# Generated automatically by configure.
+# Run this file to recreate the current configuration.
+# This directory was configured as follows,
+# on host `(hostname || uname -n) 2>/dev/null`:
+#
+# $0 $*
+
+for arg
+do
+ case "\$arg" in
+ -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+ exec /bin/sh $0 $* ;;
+ *) echo "Usage: config.status --recheck" 2>&1; exit 1 ;;
+ esac
+done
+
+trap 'rm -f Makefile doc/Makefile test/Makefile; exit 1' 1 3 15
+CC='$CC'
+INSTALL='$INSTALL'
+INSTALL_PROGRAM='$INSTALL_PROGRAM'
+INSTALL_DATA='$INSTALL_DATA'
+CPP='$CPP'
+ALLOCA='$ALLOCA'
+LIBS='$LIBS'
+srcdir='$srcdir'
+DEFS='$DEFS'
+prefix='$prefix'
+exec_prefix='$exec_prefix'
+prsub='$prsub'
+EOF
+cat >> config.status <<\EOF
+
+top_srcdir=$srcdir
+for file in .. Makefile doc/Makefile test/Makefile; do if [ "x$file" != "x.." ]; then
+ srcdir=$top_srcdir
+ # Remove last slash and all that follows it. Not all systems have dirname.
+ dir=`echo $file|sed 's%/[^/][^/]*$%%'`
+ if test "$dir" != "$file"; then
+ test "$top_srcdir" != . && srcdir=$top_srcdir/$dir
+ test ! -d $dir && mkdir $dir
+ fi
+ echo creating $file
+ rm -f $file
+ echo "# Generated automatically from `echo $file|sed 's|.*/||'`.in by configure." > $file
+ sed -e "
+$prsub
+s%@CC@%$CC%g
+s%@INSTALL@%$INSTALL%g
+s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g
+s%@INSTALL_DATA@%$INSTALL_DATA%g
+s%@CPP@%$CPP%g
+s%@ALLOCA@%$ALLOCA%g
+s%@LIBS@%$LIBS%g
+s%@srcdir@%$srcdir%g
+s%@DEFS@%$DEFS%
+" $top_srcdir/${file}.in >> $file
+fi; done
+
+exit 0
+EOF
+chmod +x config.status
+test -n "$no_create" || ./config.status
+
diff --git a/gnu/lib/libregex/configure.in b/gnu/lib/libregex/configure.in
new file mode 100644
index 0000000..f0fc780
--- /dev/null
+++ b/gnu/lib/libregex/configure.in
@@ -0,0 +1,23 @@
+dnl Process this file with autoconf to produce a configure script.
+AC_INIT(regex.c)
+
+AC_PROG_CC
+AC_PROG_INSTALL
+
+dnl I'm not sure if AC_AIX and AC_DYNIX_SEQ are really necessary. The
+dnl Autoconf documentation isn't specific about which BSD functions they
+dnl provide.
+AC_AIX
+AC_DYNIX_SEQ
+AC_ISC_POSIX
+AC_MINIX
+
+AC_STDC_HEADERS
+AC_HAVE_HEADERS(string.h)
+
+AC_ALLOCA
+AC_CONST
+
+AC_PREFIX(gcc)
+
+AC_OUTPUT(Makefile doc/Makefile test/Makefile)
diff --git a/gnu/lib/libregex/doc/Makefile b/gnu/lib/libregex/doc/Makefile
new file mode 100644
index 0000000..13753ae
--- /dev/null
+++ b/gnu/lib/libregex/doc/Makefile
@@ -0,0 +1,93 @@
+# Generated automatically from Makefile.in by configure.
+# Makefile for regex documentation.
+#
+# Copyright (C) 1992 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# Installation directories.
+prefix = /usr
+infodir = $(prefix)/info
+
+srcdir = .
+VPATH = .:../.
+
+INSTALL = /usr/bin/install -c
+INSTALL_DATA = $(INSTALL) -m 644
+
+MAKEINFO = makeinfo --no-split
+SHELL = /bin/sh
+TEX = tex
+TEXINDEX = texindex
+
+default all: regex.info regex.dvi
+.PHONY: default all
+
+# We need to include some code from regex.h.
+regex.texi: xregex.texi
+ rm -f $@
+ gawk -f include.awk -vsource=../$(srcdir)/regex.h \
+ <../$(srcdir)/doc/xregex.texi \
+ | expand >$@
+ chmod a-w $@
+
+regex.dvi: regex.cps
+ $(TEX) regex.texi
+regex.cps: regex.cp
+ $(TEXINDEX) regex.??
+regex.cp: regex.texi
+ $(TEX) ../$(srcdir)/doc/regex.texi
+
+regex.info: regex.texi
+ $(MAKEINFO) ../$(srcdir)/doc/regex.texi
+
+# I know of no way to make a good TAGS file from Texinfo source.
+TAGS:
+
+check:
+.PHONY: check
+
+install: regex.info
+ -mkdir $(prefix) $(infodir)
+ for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done
+.PHONY: install
+
+clean mostlyclean:
+ rm -f regex.?? *.dvi *.log *.toc
+
+distclean: clean
+ rm -f Makefile
+ for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done
+
+realclean: distclean
+ rm -f *.info* regex.??? regex.texi TAGS
+
+extraclean: distclean
+ rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out
+.PHONY: mostlyclean clean distclean realclean extraclean
+
+Makefile: Makefile.in ../config.status
+ (cd ..; sh config.status)
+
+# Prevent GNU make 3 from overflowing arg limit on system V.
+.NOEXPORT:
+
+# Assumes $(distdir) is the place to put our files.
+distfiles = Makefile.in *.texi texinfo.tex include.awk \
+ regex.info* regex.aux regex.cps
+dist: Makefile regex.info regex.cps
+ mkdir $(distdir)
+ ln $(distfiles) $(distdir)
+.PHONY: dist
diff --git a/gnu/lib/libregex/doc/Makefile.in b/gnu/lib/libregex/doc/Makefile.in
new file mode 100644
index 0000000..2f5d382
--- /dev/null
+++ b/gnu/lib/libregex/doc/Makefile.in
@@ -0,0 +1,92 @@
+# Makefile for regex documentation.
+#
+# Copyright (C) 1992 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+# Installation directories.
+prefix = /usr/local
+infodir = $(prefix)/info
+
+srcdir = @srcdir@
+VPATH = @srcdir@:../@srcdir@
+
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+
+MAKEINFO = makeinfo --no-split
+SHELL = /bin/sh
+TEX = tex
+TEXINDEX = texindex
+
+default all: regex.info regex.dvi
+.PHONY: default all
+
+# We need to include some code from regex.h.
+regex.texi: xregex.texi
+ rm -f $@
+ gawk -f include.awk -vsource=../$(srcdir)/regex.h \
+ <../$(srcdir)/doc/xregex.texi \
+ | expand >$@
+ chmod a-w $@
+
+regex.dvi: regex.cps
+ $(TEX) regex.texi
+regex.cps: regex.cp
+ $(TEXINDEX) regex.??
+regex.cp: regex.texi
+ $(TEX) ../$(srcdir)/doc/regex.texi
+
+regex.info: regex.texi
+ $(MAKEINFO) ../$(srcdir)/doc/regex.texi
+
+# I know of no way to make a good TAGS file from Texinfo source.
+TAGS:
+
+check:
+.PHONY: check
+
+install: regex.info
+ -mkdir $(prefix) $(infodir)
+ for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done
+.PHONY: install
+
+clean mostlyclean:
+ rm -f regex.?? *.dvi *.log *.toc
+
+distclean: clean
+ rm -f Makefile
+ for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done
+
+realclean: distclean
+ rm -f *.info* regex.??? regex.texi TAGS
+
+extraclean: distclean
+ rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out
+.PHONY: mostlyclean clean distclean realclean extraclean
+
+Makefile: Makefile.in ../config.status
+ (cd ..; sh config.status)
+
+# Prevent GNU make 3 from overflowing arg limit on system V.
+.NOEXPORT:
+
+# Assumes $(distdir) is the place to put our files.
+distfiles = Makefile.in *.texi texinfo.tex include.awk \
+ regex.info* regex.aux regex.cps
+dist: Makefile regex.info regex.cps
+ mkdir $(distdir)
+ ln $(distfiles) $(distdir)
+.PHONY: dist
diff --git a/gnu/lib/libregex/doc/include.awk b/gnu/lib/libregex/doc/include.awk
new file mode 100644
index 0000000..a1df3f8
--- /dev/null
+++ b/gnu/lib/libregex/doc/include.awk
@@ -0,0 +1,19 @@
+# Assume `source' is set with -vsource=filename on the command line.
+#
+/^\[\[\[/ { inclusion = $2; # name of the thing to include.
+ printing = 0;
+ while ((getline line < source) > 0)
+ {
+ if (match (line, "\\[\\[\\[end " inclusion "\\]\\]\\]"))
+ printing = 0;
+
+ if (printing)
+ print line;
+
+ if (match (line,"\\[\\[\\[begin " inclusion "\\]\\]\\]"))
+ printing = 1;
+ }
+ close (source);
+ next;
+ }
+ { print }
diff --git a/gnu/lib/libregex/doc/regex.aux b/gnu/lib/libregex/doc/regex.aux
new file mode 100644
index 0000000..fd6a245
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.aux
@@ -0,0 +1,136 @@
+'xrdef {Overview-pg}{1}
+'xrdef {Overview-snt}{Chapter'tie1}
+'xrdef {Regular Expression Syntax-pg}{2}
+'xrdef {Regular Expression Syntax-snt}{Chapter'tie2}
+'xrdef {Syntax Bits-pg}{2}
+'xrdef {Syntax Bits-snt}{Section'tie2.1}
+'xrdef {Predefined Syntaxes-pg}{5}
+'xrdef {Predefined Syntaxes-snt}{Section'tie2.2}
+'xrdef {Collating Elements vs. Characters-pg}{6}
+'xrdef {Collating Elements vs. Characters-snt}{Section'tie2.3}
+'xrdef {The Backslash Character-pg}{7}
+'xrdef {The Backslash Character-snt}{Section'tie2.4}
+'xrdef {Common Operators-pg}{9}
+'xrdef {Common Operators-snt}{Chapter'tie3}
+'xrdef {Match-self Operator-pg}{9}
+'xrdef {Match-self Operator-snt}{Section'tie3.1}
+'xrdef {Match-any-character Operator-pg}{9}
+'xrdef {Match-any-character Operator-snt}{Section'tie3.2}
+'xrdef {Concatenation Operator-pg}{10}
+'xrdef {Concatenation Operator-snt}{Section'tie3.3}
+'xrdef {Repetition Operators-pg}{10}
+'xrdef {Repetition Operators-snt}{Section'tie3.4}
+'xrdef {Match-zero-or-more Operator-pg}{10}
+'xrdef {Match-zero-or-more Operator-snt}{Section'tie3.4.1}
+'xrdef {Match-one-or-more Operator-pg}{11}
+'xrdef {Match-one-or-more Operator-snt}{Section'tie3.4.2}
+'xrdef {Match-zero-or-one Operator-pg}{11}
+'xrdef {Match-zero-or-one Operator-snt}{Section'tie3.4.3}
+'xrdef {Interval Operators-pg}{12}
+'xrdef {Interval Operators-snt}{Section'tie3.4.4}
+'xrdef {Alternation Operator-pg}{13}
+'xrdef {Alternation Operator-snt}{Section'tie3.5}
+'xrdef {List Operators-pg}{13}
+'xrdef {List Operators-snt}{Section'tie3.6}
+'xrdef {Character Class Operators-pg}{14}
+'xrdef {Character Class Operators-snt}{Section'tie3.6.1}
+'xrdef {Range Operator-pg}{15}
+'xrdef {Range Operator-snt}{Section'tie3.6.2}
+'xrdef {Grouping Operators-pg}{16}
+'xrdef {Grouping Operators-snt}{Section'tie3.7}
+'xrdef {Back-reference Operator-pg}{17}
+'xrdef {Back-reference Operator-snt}{Section'tie3.8}
+'xrdef {Anchoring Operators-pg}{18}
+'xrdef {Anchoring Operators-snt}{Section'tie3.9}
+'xrdef {Match-beginning-of-line Operator-pg}{18}
+'xrdef {Match-beginning-of-line Operator-snt}{Section'tie3.9.1}
+'xrdef {Match-end-of-line Operator-pg}{18}
+'xrdef {Match-end-of-line Operator-snt}{Section'tie3.9.2}
+'xrdef {GNU Operators-pg}{20}
+'xrdef {GNU Operators-snt}{Chapter'tie4}
+'xrdef {Word Operators-pg}{20}
+'xrdef {Word Operators-snt}{Section'tie4.1}
+'xrdef {Non-Emacs Syntax Tables-pg}{20}
+'xrdef {Non-Emacs Syntax Tables-snt}{Section'tie4.1.1}
+'xrdef {Match-word-boundary Operator-pg}{20}
+'xrdef {Match-word-boundary Operator-snt}{Section'tie4.1.2}
+'xrdef {Match-within-word Operator-pg}{20}
+'xrdef {Match-within-word Operator-snt}{Section'tie4.1.3}
+'xrdef {Match-beginning-of-word Operator-pg}{21}
+'xrdef {Match-beginning-of-word Operator-snt}{Section'tie4.1.4}
+'xrdef {Match-end-of-word Operator-pg}{21}
+'xrdef {Match-end-of-word Operator-snt}{Section'tie4.1.5}
+'xrdef {Match-word-constituent Operator-pg}{21}
+'xrdef {Match-word-constituent Operator-snt}{Section'tie4.1.6}
+'xrdef {Match-non-word-constituent Operator-pg}{21}
+'xrdef {Match-non-word-constituent Operator-snt}{Section'tie4.1.7}
+'xrdef {Buffer Operators-pg}{21}
+'xrdef {Buffer Operators-snt}{Section'tie4.2}
+'xrdef {Match-beginning-of-buffer Operator-pg}{21}
+'xrdef {Match-beginning-of-buffer Operator-snt}{Section'tie4.2.1}
+'xrdef {Match-end-of-buffer Operator-pg}{21}
+'xrdef {Match-end-of-buffer Operator-snt}{Section'tie4.2.2}
+'xrdef {GNU Emacs Operators-pg}{22}
+'xrdef {GNU Emacs Operators-snt}{Chapter'tie5}
+'xrdef {Syntactic Class Operators-pg}{22}
+'xrdef {Syntactic Class Operators-snt}{Section'tie5.1}
+'xrdef {Emacs Syntax Tables-pg}{22}
+'xrdef {Emacs Syntax Tables-snt}{Section'tie5.1.1}
+'xrdef {Match-syntactic-class Operator-pg}{22}
+'xrdef {Match-syntactic-class Operator-snt}{Section'tie5.1.2}
+'xrdef {Match-not-syntactic-class Operator-pg}{22}
+'xrdef {Match-not-syntactic-class Operator-snt}{Section'tie5.1.3}
+'xrdef {What Gets Matched?-pg}{23}
+'xrdef {What Gets Matched?-snt}{Chapter'tie6}
+'xrdef {Programming with Regex-pg}{24}
+'xrdef {Programming with Regex-snt}{Chapter'tie7}
+'xrdef {GNU Regex Functions-pg}{24}
+'xrdef {GNU Regex Functions-snt}{Section'tie7.1}
+'xrdef {GNU Pattern Buffers-pg}{24}
+'xrdef {GNU Pattern Buffers-snt}{Section'tie7.1.1}
+'xrdef {GNU Regular Expression Compiling-pg}{26}
+'xrdef {GNU Regular Expression Compiling-snt}{Section'tie7.1.2}
+'xrdef {GNU Matching-pg}{27}
+'xrdef {GNU Matching-snt}{Section'tie7.1.3}
+'xrdef {GNU Searching-pg}{28}
+'xrdef {GNU Searching-snt}{Section'tie7.1.4}
+'xrdef {Matching/Searching with Split Data-pg}{29}
+'xrdef {Matching/Searching with Split Data-snt}{Section'tie7.1.5}
+'xrdef {Searching with Fastmaps-pg}{30}
+'xrdef {Searching with Fastmaps-snt}{Section'tie7.1.6}
+'xrdef {GNU Translate Tables-pg}{31}
+'xrdef {GNU Translate Tables-snt}{Section'tie7.1.7}
+'xrdef {Using Registers-pg}{32}
+'xrdef {Using Registers-snt}{Section'tie7.1.8}
+'xrdef {Freeing GNU Pattern Buffers-pg}{34}
+'xrdef {Freeing GNU Pattern Buffers-snt}{Section'tie7.1.9}
+'xrdef {POSIX Regex Functions-pg}{35}
+'xrdef {POSIX Regex Functions-snt}{Section'tie7.2}
+'xrdef {POSIX Pattern Buffers-pg}{35}
+'xrdef {POSIX Pattern Buffers-snt}{Section'tie7.2.1}
+'xrdef {POSIX Regular Expression Compiling-pg}{35}
+'xrdef {POSIX Regular Expression Compiling-snt}{Section'tie7.2.2}
+'xrdef {POSIX Matching-pg}{37}
+'xrdef {POSIX Matching-snt}{Section'tie7.2.3}
+'xrdef {Reporting Errors-pg}{38}
+'xrdef {Reporting Errors-snt}{Section'tie7.2.4}
+'xrdef {Using Byte Offsets-pg}{39}
+'xrdef {Using Byte Offsets-snt}{Section'tie7.2.5}
+'xrdef {Freeing POSIX Pattern Buffers-pg}{39}
+'xrdef {Freeing POSIX Pattern Buffers-snt}{Section'tie7.2.6}
+'xrdef {BSD Regex Functions-pg}{40}
+'xrdef {BSD Regex Functions-snt}{Section'tie7.3}
+'xrdef {BSD Regular Expression Compiling-pg}{40}
+'xrdef {BSD Regular Expression Compiling-snt}{Section'tie7.3.1}
+'xrdef {BSD Searching-pg}{40}
+'xrdef {BSD Searching-snt}{Section'tie7.3.2}
+'xrdef {Copying-pg}{42}
+'xrdef {Copying-snt}{Appendix'tie'char65{}}
+'xrdef {Copying-pg}{42}
+'xrdef {Copying-snt}{}
+'xrdef {Copying-pg}{43}
+'xrdef {Copying-snt}{}
+'xrdef {Copying-pg}{48}
+'xrdef {Copying-snt}{}
+'xrdef {Index-pg}{50}
+'xrdef {Index-snt}{}
diff --git a/gnu/lib/libregex/doc/regex.cps b/gnu/lib/libregex/doc/regex.cps
new file mode 100644
index 0000000..8b2e57c
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.cps
@@ -0,0 +1,152 @@
+\initial {$}
+\entry {\code {$}}{18}
+\initial {(}
+\entry {\code {(}}{16}
+\initial {)}
+\entry {\code {)}}{16}
+\initial {*}
+\entry {\samp {*}}{10}
+\initial {-}
+\entry {\samp {-}}{13}
+\initial {.}
+\entry {\samp {.}}{9}
+\initial {:}
+\entry {\samp {:]} in regex}{14}
+\initial {?}
+\entry {\samp {?}}{11}
+\initial {[}
+\entry {\samp {[}}{13}
+\entry {\samp {[:} in regex}{14}
+\entry {\samp {[{\tt\hat}}}{13}
+\initial {]}
+\entry {\samp {]}}{13}
+\initial {{\tt\char'173}}
+\entry {\samp {{\tt\char'173}}}{12}
+\initial {{\tt\char'174}}
+\entry {\code {{\tt\char'174}}}{13}
+\initial {{\tt\char'175}}
+\entry {\samp {{\tt\char'175}}}{12}
+\initial {{\tt\char43}}
+\entry {\samp {{\tt\char43}}}{11}
+\initial {{\tt\hat}}
+\entry {\samp {{\tt\hat}}}{13}
+\entry {\code {{\tt\hat}}}{18}
+\initial {{\tt\indexbackslash }}
+\entry {{\tt\indexbackslash }}{7}
+\entry {\samp {{\tt\indexbackslash }}}{13}
+\entry {\samp {{\tt\indexbackslash }'}}{21}
+\entry {\code {{\tt\indexbackslash }(}}{16}
+\entry {\code {{\tt\indexbackslash })}}{16}
+\entry {\samp {{\tt\indexbackslash }`}}{21}
+\entry {\samp {{\tt\indexbackslash }{\tt\char'173}}}{12}
+\entry {\code {{\tt\indexbackslash }{\tt\char'174}}}{13}
+\entry {\samp {{\tt\indexbackslash }{\tt\char'175}}}{12}
+\entry {\samp {{\tt\indexbackslash }{\tt\gtr}}}{21}
+\entry {\samp {{\tt\indexbackslash }{\tt\less}}}{21}
+\entry {\samp {{\tt\indexbackslash }b}}{20}
+\entry {\samp {{\tt\indexbackslash }B}}{20}
+\entry {\samp {{\tt\indexbackslash }s}}{22}
+\entry {\samp {{\tt\indexbackslash }S}}{22}
+\entry {\samp {{\tt\indexbackslash }w}}{21}
+\entry {\samp {{\tt\indexbackslash }W}}{21}
+\initial {A}
+\entry {\code {allocated \r {initialization}}}{26}
+\entry {alternation operator}{13}
+\entry {alternation operator and \samp {{\tt\hat}}}{18}
+\entry {anchoring}{18}
+\entry {anchors}{18}
+\entry {Awk}{5}
+\initial {B}
+\entry {back references}{17}
+\entry {backtracking}{10, 13}
+\entry {beginning-of-line operator}{18}
+\entry {bracket expression}{13}
+\entry {\code {buffer \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
+\entry {\code {buffer \r {initialization}}}{26}
+\initial {C}
+\entry {character classes}{14}
+\initial {E}
+\entry {Egrep}{5}
+\entry {Emacs}{5}
+\entry {end-of-line operator}{18}
+\entry {\code {end\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}
+\initial {F}
+\entry {\code {fastmap \r {initialization}}}{26}
+\entry {\code {fastmap{\_}accurate \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
+\entry {fastmaps}{30}
+\initial {G}
+\entry {Grep}{5}
+\entry {grouping}{16}
+\initial {I}
+\entry {ignoring case}{35}
+\entry {interval expression}{12}
+\initial {M}
+\entry {matching list}{13}
+\entry {matching newline}{13}
+\entry {matching with GNU functions}{27}
+\initial {N}
+\entry {\code {newline{\_}anchor \r {field in pattern buffer}}}{18}
+\entry {nonmatching list}{13}
+\entry {\code {not{\_}bol \r {field in pattern buffer}}}{18}
+\entry {\code {num_regs\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}
+\initial {O}
+\entry {open-group operator and \samp {{\tt\hat}}}{18}
+\entry {or operator}{13}
+\initial {P}
+\entry {parenthesizing}{16}
+\entry {pattern buffer initialization}{26}
+\entry {pattern buffer, definition of}{24}
+\entry {POSIX Awk}{5}
+\initial {R}
+\entry {\code {range \r {argument to \code {re{\_}search}}}}{28}
+\entry {\code {re_registers}}{32}
+\entry {\code {RE{\_}BACKSLASH{\_}ESCAPE{\_}IN{\_}LIST}}{3}
+\entry {\code {RE{\_}BK{\_}PLUS{\_}QM}}{3}
+\entry {\code {RE{\_}CHAR{\_}CLASSES}}{3}
+\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS}}{3}
+\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS \r {(and \samp {{\tt\hat}})}}}{18}
+\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}OPS}}{3}
+\entry {\code {RE{\_}CONTEXT{\_}INVALID{\_}OPS}}{3}
+\entry {\code {RE{\_}DOT{\_}NEWLINE}}{3}
+\entry {\code {RE{\_}DOT{\_}NOT{\_}NULL}}{4}
+\entry {\code {RE{\_}INTERVALS}}{4}
+\entry {\code {RE{\_}LIMITED{\_}OPS}}{4}
+\entry {\code {RE{\_}NEWLINE{\_}ALT}}{4}
+\entry {\code {RE{\_}NO{\_}BK{\_}BRACES}}{4}
+\entry {\code {RE{\_}NO{\_}BK{\_}PARENS}}{4}
+\entry {\code {RE{\_}NO{\_}BK{\_}REFS}}{4}
+\entry {\code {RE{\_}NO{\_}BK{\_}VBAR}}{4}
+\entry {\code {RE{\_}NO{\_}EMPTY{\_}RANGES}}{4}
+\entry {\code {re{\_}nsub \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
+\entry {\code {re{\_}pattern{\_}buffer \r {definition}}}{24}
+\entry {\code {re{\_}syntax{\_}options \r {initialization}}}{26}
+\entry {\code {RE{\_}UNMATCHED{\_}RIGHT{\_}PAREN{\_}ORD}}{4}
+\entry {\code {REG{\_}EXTENDED}}{35}
+\entry {\code {REG{\_}ICASE}}{35}
+\entry {\code {REG{\_}NEWLINE}}{36}
+\entry {\code {REG{\_}NOSUB}}{35}
+\entry {\code {regex.c}}{1}
+\entry {\code {regex.h}}{1}
+\entry {regexp anchoring}{18}
+\entry {\code {regmatch{\_}t}}{39}
+\entry {\code {regs{\_}allocated}}{32}
+\entry {\code {REGS{\_}FIXED}}{33}
+\entry {\code {REGS{\_}REALLOCATE}}{32}
+\entry {\code {REGS{\_}UNALLOCATED}}{32}
+\entry {regular expressions, syntax of}{2}
+\initial {S}
+\entry {searching with GNU functions}{28}
+\entry {\code {start \r {argument to \code {re{\_}search}}}}{28}
+\entry {\code {start\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}
+\entry {\code {struct re{\_}pattern{\_}buffer \r {definition}}}{24}
+\entry {subexpressions}{16}
+\entry {syntax bits}{2}
+\entry {\code {syntax \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
+\entry {syntax initialization}{26}
+\entry {syntax of regular expressions}{2}
+\initial {T}
+\entry {\code {translate \r {initialization}}}{26}
+\initial {U}
+\entry {\code {used \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
+\initial {W}
+\entry {word boundaries, matching}{20}
diff --git a/gnu/lib/libregex/doc/regex.info b/gnu/lib/libregex/doc/regex.info
new file mode 100644
index 0000000..90deede
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.info
@@ -0,0 +1,2836 @@
+This is Info file regex.info, produced by Makeinfo-1.52 from the input
+file .././doc/regex.texi.
+
+ This file documents the GNU regular expression library.
+
+ Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+manual provided the copyright notice and this permission notice are
+preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided also that the
+section entitled "GNU General Public License" is included exactly as in
+the original, and provided that the entire resulting derived work is
+distributed under the terms of a permission notice identical to this
+one.
+
+ Permission is granted to copy and distribute translations of this
+manual into another language, under the above conditions for modified
+versions, except that the section entitled "GNU General Public License"
+may be included in a translation approved by the Free Software
+Foundation instead of in the original English.
+
+
+File: regex.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir)
+
+Regular Expression Library
+**************************
+
+ This manual documents how to program with the GNU regular expression
+library. This is edition 0.12a of the manual, 19 September 1992.
+
+ The first part of this master menu lists the major nodes in this Info
+document, including the index. The rest of the menu lists all the
+lower level nodes in the document.
+
+* Menu:
+
+* Overview::
+* Regular Expression Syntax::
+* Common Operators::
+* GNU Operators::
+* GNU Emacs Operators::
+* What Gets Matched?::
+* Programming with Regex::
+* Copying:: Copying and sharing Regex.
+* Index:: General index.
+ -- The Detailed Node Listing --
+
+Regular Expression Syntax
+
+* Syntax Bits::
+* Predefined Syntaxes::
+* Collating Elements vs. Characters::
+* The Backslash Character::
+
+Common Operators
+
+* Match-self Operator:: Ordinary characters.
+* Match-any-character Operator:: .
+* Concatenation Operator:: Juxtaposition.
+* Repetition Operators:: * + ? {}
+* Alternation Operator:: |
+* List Operators:: [...] [^...]
+* Grouping Operators:: (...)
+* Back-reference Operator:: \digit
+* Anchoring Operators:: ^ $
+
+Repetition Operators
+
+* Match-zero-or-more Operator:: *
+* Match-one-or-more Operator:: +
+* Match-zero-or-one Operator:: ?
+* Interval Operators:: {}
+
+List Operators (`[' ... `]' and `[^' ... `]')
+
+* Character Class Operators:: [:class:]
+* Range Operator:: start-end
+
+Anchoring Operators
+
+* Match-beginning-of-line Operator:: ^
+* Match-end-of-line Operator:: $
+
+GNU Operators
+
+* Word Operators::
+* Buffer Operators::
+
+Word Operators
+
+* Non-Emacs Syntax Tables::
+* Match-word-boundary Operator:: \b
+* Match-within-word Operator:: \B
+* Match-beginning-of-word Operator:: \<
+* Match-end-of-word Operator:: \>
+* Match-word-constituent Operator:: \w
+* Match-non-word-constituent Operator:: \W
+
+Buffer Operators
+
+* Match-beginning-of-buffer Operator:: \`
+* Match-end-of-buffer Operator:: \'
+
+GNU Emacs Operators
+
+* Syntactic Class Operators::
+
+Syntactic Class Operators
+
+* Emacs Syntax Tables::
+* Match-syntactic-class Operator:: \sCLASS
+* Match-not-syntactic-class Operator:: \SCLASS
+
+Programming with Regex
+
+* GNU Regex Functions::
+* POSIX Regex Functions::
+* BSD Regex Functions::
+
+GNU Regex Functions
+
+* GNU Pattern Buffers:: The re_pattern_buffer type.
+* GNU Regular Expression Compiling:: re_compile_pattern ()
+* GNU Matching:: re_match ()
+* GNU Searching:: re_search ()
+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
+* Searching with Fastmaps:: re_compile_fastmap ()
+* GNU Translate Tables:: The `translate' field.
+* Using Registers:: The re_registers type and related fns.
+* Freeing GNU Pattern Buffers:: regfree ()
+
+POSIX Regex Functions
+
+* POSIX Pattern Buffers:: The regex_t type.
+* POSIX Regular Expression Compiling:: regcomp ()
+* POSIX Matching:: regexec ()
+* Reporting Errors:: regerror ()
+* Using Byte Offsets:: The regmatch_t type.
+* Freeing POSIX Pattern Buffers:: regfree ()
+
+BSD Regex Functions
+
+* BSD Regular Expression Compiling:: re_comp ()
+* BSD Searching:: re_exec ()
+
+
+File: regex.info, Node: Overview, Next: Regular Expression Syntax, Prev: Top, Up: Top
+
+Overview
+********
+
+ A "regular expression" (or "regexp", or "pattern") is a text string
+that describes some (mathematical) set of strings. A regexp R
+"matches" a string S if S is in the set of strings described by R.
+
+ Using the Regex library, you can:
+
+ * see if a string matches a specified pattern as a whole, and
+
+ * search within a string for a substring matching a specified
+ pattern.
+
+ Some regular expressions match only one string, i.e., the set they
+describe has only one member. For example, the regular expression
+`foo' matches the string `foo' and no others. Other regular
+expressions match more than one string, i.e., the set they describe has
+more than one member. For example, the regular expression `f*' matches
+the set of strings made up of any number (including zero) of `f's. As
+you can see, some characters in regular expressions match themselves
+(such as `f') and some don't (such as `*'); the ones that don't match
+themselves instead let you specify patterns that describe many
+different strings.
+
+ To either match or search for a regular expression with the Regex
+library functions, you must first compile it with a Regex pattern
+compiling function. A "compiled pattern" is a regular expression
+converted to the internal format used by the library functions. Once
+you've compiled a pattern, you can use it for matching or searching any
+number of times.
+
+ The Regex library consists of two source files: `regex.h' and
+`regex.c'. Regex provides three groups of functions with which you can
+operate on regular expressions. One group--the GNU group--is more
+powerful but not completely compatible with the other two, namely the
+POSIX and Berkeley UNIX groups; its interface was designed specifically
+for GNU. The other groups have the same interfaces as do the regular
+expression functions in POSIX and Berkeley UNIX.
+
+ We wrote this chapter with programmers in mind, not users of
+programs--such as Emacs--that use Regex. We describe the Regex library
+in its entirety, not how to write regular expressions that a particular
+program understands.
+
+
+File: regex.info, Node: Regular Expression Syntax, Next: Common Operators, Prev: Overview, Up: Top
+
+Regular Expression Syntax
+*************************
+
+ "Characters" are things you can type. "Operators" are things in a
+regular expression that match one or more characters. You compose
+regular expressions from operators, which in turn you specify using one
+or more characters.
+
+ Most characters represent what we call the match-self operator, i.e.,
+they match themselves; we call these characters "ordinary". Other
+characters represent either all or parts of fancier operators; e.g.,
+`.' represents what we call the match-any-character operator (which, no
+surprise, matches (almost) any character); we call these characters
+"special". Two different things determine what characters represent
+what operators:
+
+ 1. the regular expression syntax your program has told the Regex
+ library to recognize, and
+
+ 2. the context of the character in the regular expression.
+
+ In the following sections, we describe these things in more detail.
+
+* Menu:
+
+* Syntax Bits::
+* Predefined Syntaxes::
+* Collating Elements vs. Characters::
+* The Backslash Character::
+
+
+File: regex.info, Node: Syntax Bits, Next: Predefined Syntaxes, Up: Regular Expression Syntax
+
+Syntax Bits
+===========
+
+ In any particular syntax for regular expressions, some characters are
+always special, others are sometimes special, and others are never
+special. The particular syntax that Regex recognizes for a given
+regular expression depends on the value in the `syntax' field of the
+pattern buffer of that regular expression.
+
+ You get a pattern buffer by compiling a regular expression. *Note
+GNU Pattern Buffers::, and *Note POSIX Pattern Buffers::, for more
+information on pattern buffers. *Note GNU Regular Expression
+Compiling::, *Note POSIX Regular Expression Compiling::, and *Note BSD
+Regular Expression Compiling::, for more information on compiling.
+
+ Regex considers the value of the `syntax' field to be a collection of
+bits; we refer to these bits as "syntax bits". In most cases, they
+affect what characters represent what operators. We describe the
+meanings of the operators to which we refer in *Note Common Operators::,
+*Note GNU Operators::, and *Note GNU Emacs Operators::.
+
+ For reference, here is the complete list of syntax bits, in
+alphabetical order:
+
+`RE_BACKSLASH_ESCAPE_IN_LISTS'
+ If this bit is set, then `\' inside a list (*note List Operators::.
+ quotes (makes ordinary, if it's special) the following character;
+ if this bit isn't set, then `\' is an ordinary character inside
+ lists. (*Note The Backslash Character::, for what `\' does
+ outside of lists.)
+
+`RE_BK_PLUS_QM'
+ If this bit is set, then `\+' represents the match-one-or-more
+ operator and `\?' represents the match-zero-or-more operator; if
+ this bit isn't set, then `+' represents the match-one-or-more
+ operator and `?' represents the match-zero-or-one operator. This
+ bit is irrelevant if `RE_LIMITED_OPS' is set.
+
+`RE_CHAR_CLASSES'
+ If this bit is set, then you can use character classes in lists;
+ if this bit isn't set, then you can't.
+
+`RE_CONTEXT_INDEP_ANCHORS'
+ If this bit is set, then `^' and `$' are special anywhere outside
+ a list; if this bit isn't set, then these characters are special
+ only in certain contexts. *Note Match-beginning-of-line
+ Operator::, and *Note Match-end-of-line Operator::.
+
+`RE_CONTEXT_INDEP_OPS'
+ If this bit is set, then certain characters are special anywhere
+ outside a list; if this bit isn't set, then those characters are
+ special only in some contexts and are ordinary elsewhere.
+ Specifically, if this bit isn't set then `*', and (if the syntax
+ bit `RE_LIMITED_OPS' isn't set) `+' and `?' (or `\+' and `\?',
+ depending on the syntax bit `RE_BK_PLUS_QM') represent repetition
+ operators only if they're not first in a regular expression or
+ just after an open-group or alternation operator. The same holds
+ for `{' (or `\{', depending on the syntax bit `RE_NO_BK_BRACES') if
+ it is the beginning of a valid interval and the syntax bit
+ `RE_INTERVALS' is set.
+
+`RE_CONTEXT_INVALID_OPS'
+ If this bit is set, then repetition and alternation operators
+ can't be in certain positions within a regular expression.
+ Specifically, the regular expression is invalid if it has:
+
+ * a repetition operator first in the regular expression or just
+ after a match-beginning-of-line, open-group, or alternation
+ operator; or
+
+ * an alternation operator first or last in the regular
+ expression, just before a match-end-of-line operator, or just
+ after an alternation or open-group operator.
+
+ If this bit isn't set, then you can put the characters
+ representing the repetition and alternation characters anywhere in
+ a regular expression. Whether or not they will in fact be
+ operators in certain positions depends on other syntax bits.
+
+`RE_DOT_NEWLINE'
+ If this bit is set, then the match-any-character operator matches
+ a newline; if this bit isn't set, then it doesn't.
+
+`RE_DOT_NOT_NULL'
+ If this bit is set, then the match-any-character operator doesn't
+ match a null character; if this bit isn't set, then it does.
+
+`RE_INTERVALS'
+ If this bit is set, then Regex recognizes interval operators; if
+ this bit isn't set, then it doesn't.
+
+`RE_LIMITED_OPS'
+ If this bit is set, then Regex doesn't recognize the
+ match-one-or-more, match-zero-or-one or alternation operators; if
+ this bit isn't set, then it does.
+
+`RE_NEWLINE_ALT'
+ If this bit is set, then newline represents the alternation
+ operator; if this bit isn't set, then newline is ordinary.
+
+`RE_NO_BK_BRACES'
+ If this bit is set, then `{' represents the open-interval operator
+ and `}' represents the close-interval operator; if this bit isn't
+ set, then `\{' represents the open-interval operator and `\}'
+ represents the close-interval operator. This bit is relevant only
+ if `RE_INTERVALS' is set.
+
+`RE_NO_BK_PARENS'
+ If this bit is set, then `(' represents the open-group operator and
+ `)' represents the close-group operator; if this bit isn't set,
+ then `\(' represents the open-group operator and `\)' represents
+ the close-group operator.
+
+`RE_NO_BK_REFS'
+ If this bit is set, then Regex doesn't recognize `\'DIGIT as the
+ back reference operator; if this bit isn't set, then it does.
+
+`RE_NO_BK_VBAR'
+ If this bit is set, then `|' represents the alternation operator;
+ if this bit isn't set, then `\|' represents the alternation
+ operator. This bit is irrelevant if `RE_LIMITED_OPS' is set.
+
+`RE_NO_EMPTY_RANGES'
+ If this bit is set, then a regular expression with a range whose
+ ending point collates lower than its starting point is invalid; if
+ this bit isn't set, then Regex considers such a range to be empty.
+
+`RE_UNMATCHED_RIGHT_PAREN_ORD'
+ If this bit is set and the regular expression has no matching
+ open-group operator, then Regex considers what would otherwise be
+ a close-group operator (based on how `RE_NO_BK_PARENS' is set) to
+ match `)'.
+
+
+File: regex.info, Node: Predefined Syntaxes, Next: Collating Elements vs. Characters, Prev: Syntax Bits, Up: Regular Expression Syntax
+
+Predefined Syntaxes
+===================
+
+ If you're programming with Regex, you can set a pattern buffer's
+(*note GNU Pattern Buffers::., and *Note POSIX Pattern Buffers::)
+`syntax' field either to an arbitrary combination of syntax bits (*note
+Syntax Bits::.) or else to the configurations defined by Regex. These
+configurations define the syntaxes used by certain programs--GNU Emacs,
+POSIX Awk, traditional Awk, Grep, Egrep--in addition to syntaxes for
+POSIX basic and extended regular expressions.
+
+ The predefined syntaxes-taken directly from `regex.h'--are:
+
+ #define RE_SYNTAX_EMACS 0
+
+ #define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+ #define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+ #define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+ #define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+ #define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+ /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+ #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+ #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+ /* Syntax bits common to both basic and extended POSIX regex syntax. */
+ #define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+ #define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+ /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+ #define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+ #define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+ /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+ #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+
+File: regex.info, Node: Collating Elements vs. Characters, Next: The Backslash Character, Prev: Predefined Syntaxes, Up: Regular Expression Syntax
+
+Collating Elements vs. Characters
+=================================
+
+ POSIX generalizes the notion of a character to that of a collating
+element. It defines a "collating element" to be "a sequence of one or
+more bytes defined in the current collating sequence as a unit of
+collation."
+
+ This generalizes the notion of a character in two ways. First, a
+single character can map into two or more collating elements. For
+example, the German "es-zet" collates as the collating element `s'
+followed by another collating element `s'. Second, two or more
+characters can map into one collating element. For example, the
+Spanish `ll' collates after `l' and before `m'.
+
+ Since POSIX's "collating element" preserves the essential idea of a
+"character," we use the latter, more familiar, term in this document.
+
+
+File: regex.info, Node: The Backslash Character, Prev: Collating Elements vs. Characters, Up: Regular Expression Syntax
+
+The Backslash Character
+=======================
+
+ The `\' character has one of four different meanings, depending on
+the context in which you use it and what syntax bits are set (*note
+Syntax Bits::.). It can: 1) stand for itself, 2) quote the next
+character, 3) introduce an operator, or 4) do nothing.
+
+ 1. It stands for itself inside a list (*note List Operators::.) if
+ the syntax bit `RE_BACKSLASH_ESCAPE_IN_LISTS' is not set. For
+ example, `[\]' would match `\'.
+
+ 2. It quotes (makes ordinary, if it's special) the next character
+ when you use it either:
+
+ * outside a list,(1) or
+
+ * inside a list and the syntax bit
+ `RE_BACKSLASH_ESCAPE_IN_LISTS' is set.
+
+ 3. It introduces an operator when followed by certain ordinary
+ characters--sometimes only when certain syntax bits are set. See
+ the cases `RE_BK_PLUS_QM', `RE_NO_BK_BRACES', `RE_NO_BK_VAR',
+ `RE_NO_BK_PARENS', `RE_NO_BK_REF' in *Note Syntax Bits::. Also:
+
+ * `\b' represents the match-word-boundary operator (*note
+ Match-word-boundary Operator::.).
+
+ * `\B' represents the match-within-word operator (*note
+ Match-within-word Operator::.).
+
+ * `\<' represents the match-beginning-of-word operator
+ (*note Match-beginning-of-word Operator::.).
+
+ * `\>' represents the match-end-of-word operator (*note
+ Match-end-of-word Operator::.).
+
+ * `\w' represents the match-word-constituent operator (*note
+ Match-word-constituent Operator::.).
+
+ * `\W' represents the match-non-word-constituent operator
+ (*note Match-non-word-constituent Operator::.).
+
+ * `\`' represents the match-beginning-of-buffer operator and
+ `\'' represents the match-end-of-buffer operator (*note
+ Buffer Operators::.).
+
+ * If Regex was compiled with the C preprocessor symbol `emacs'
+ defined, then `\sCLASS' represents the match-syntactic-class
+ operator and `\SCLASS' represents the
+ match-not-syntactic-class operator (*note Syntactic Class
+ Operators::.).
+
+ 4. In all other cases, Regex ignores `\'. For example, `\n' matches
+ `n'.
+
+
+ ---------- Footnotes ----------
+
+ (1) Sometimes you don't have to explicitly quote special characters
+to make them ordinary. For instance, most characters lose any special
+meaning inside a list (*note List Operators::.). In addition, if the
+syntax bits `RE_CONTEXT_INVALID_OPS' and `RE_CONTEXT_INDEP_OPS' aren't
+set, then (for historical reasons) the matcher considers special
+characters ordinary if they are in contexts where the operations they
+represent make no sense; for example, then the match-zero-or-more
+operator (represented by `*') matches itself in the regular expression
+`*foo' because there is no preceding expression on which it can
+operate. It is poor practice, however, to depend on this behavior; if
+you want a special character to be ordinary outside a list, it's better
+to always quote it, regardless.
+
+
+File: regex.info, Node: Common Operators, Next: GNU Operators, Prev: Regular Expression Syntax, Up: Top
+
+Common Operators
+****************
+
+ You compose regular expressions from operators. In the following
+sections, we describe the regular expression operators specified by
+POSIX; GNU also uses these. Most operators have more than one
+representation as characters. *Note Regular Expression Syntax::, for
+what characters represent what operators under what circumstances.
+
+ For most operators that can be represented in two ways, one
+representation is a single character and the other is that character
+preceded by `\'. For example, either `(' or `\(' represents the
+open-group operator. Which one does depends on the setting of a syntax
+bit, in this case `RE_NO_BK_PARENS'. Why is this so? Historical
+reasons dictate some of the varying representations, while POSIX
+dictates others.
+
+ Finally, almost all characters lose any special meaning inside a list
+(*note List Operators::.).
+
+* Menu:
+
+* Match-self Operator:: Ordinary characters.
+* Match-any-character Operator:: .
+* Concatenation Operator:: Juxtaposition.
+* Repetition Operators:: * + ? {}
+* Alternation Operator:: |
+* List Operators:: [...] [^...]
+* Grouping Operators:: (...)
+* Back-reference Operator:: \digit
+* Anchoring Operators:: ^ $
+
+
+File: regex.info, Node: Match-self Operator, Next: Match-any-character Operator, Up: Common Operators
+
+The Match-self Operator (ORDINARY CHARACTER)
+============================================
+
+ This operator matches the character itself. All ordinary characters
+(*note Regular Expression Syntax::.) represent this operator. For
+example, `f' is always an ordinary character, so the regular expression
+`f' matches only the string `f'. In particular, it does *not* match
+the string `ff'.
+
+
+File: regex.info, Node: Match-any-character Operator, Next: Concatenation Operator, Prev: Match-self Operator, Up: Common Operators
+
+The Match-any-character Operator (`.')
+======================================
+
+ This operator matches any single printing or nonprinting character
+except it won't match a:
+
+newline
+ if the syntax bit `RE_DOT_NEWLINE' isn't set.
+
+null
+ if the syntax bit `RE_DOT_NOT_NULL' is set.
+
+ The `.' (period) character represents this operator. For example,
+`a.b' matches any three-character string beginning with `a' and ending
+with `b'.
+
+
+File: regex.info, Node: Concatenation Operator, Next: Repetition Operators, Prev: Match-any-character Operator, Up: Common Operators
+
+The Concatenation Operator
+==========================
+
+ This operator concatenates two regular expressions A and B. No
+character represents this operator; you simply put B after A. The
+result is a regular expression that will match a string if A matches
+its first part and B matches the rest. For example, `xy' (two
+match-self operators) matches `xy'.
+
+
+File: regex.info, Node: Repetition Operators, Next: Alternation Operator, Prev: Concatenation Operator, Up: Common Operators
+
+Repetition Operators
+====================
+
+ Repetition operators repeat the preceding regular expression a
+specified number of times.
+
+* Menu:
+
+* Match-zero-or-more Operator:: *
+* Match-one-or-more Operator:: +
+* Match-zero-or-one Operator:: ?
+* Interval Operators:: {}
+
+
+File: regex.info, Node: Match-zero-or-more Operator, Next: Match-one-or-more Operator, Up: Repetition Operators
+
+The Match-zero-or-more Operator (`*')
+-------------------------------------
+
+ This operator repeats the smallest possible preceding regular
+expression as many times as necessary (including zero) to match the
+pattern. `*' represents this operator. For example, `o*' matches any
+string made up of zero or more `o's. Since this operator operates on
+the smallest preceding regular expression, `fo*' has a repeating `o',
+not a repeating `fo'. So, `fo*' matches `f', `fo', `foo', and so on.
+
+ Since the match-zero-or-more operator is a suffix operator, it may be
+useless as such when no regular expression precedes it. This is the
+case when it:
+
+ * is first in a regular expression, or
+
+ * follows a match-beginning-of-line, open-group, or alternation
+ operator.
+
+Three different things can happen in these cases:
+
+ 1. If the syntax bit `RE_CONTEXT_INVALID_OPS' is set, then the
+ regular expression is invalid.
+
+ 2. If `RE_CONTEXT_INVALID_OPS' isn't set, but `RE_CONTEXT_INDEP_OPS'
+ is, then `*' represents the match-zero-or-more operator (which
+ then operates on the empty string).
+
+ 3. Otherwise, `*' is ordinary.
+
+
+ The matcher processes a match-zero-or-more operator by first matching
+as many repetitions of the smallest preceding regular expression as it
+can. Then it continues to match the rest of the pattern.
+
+ If it can't match the rest of the pattern, it backtracks (as many
+times as necessary), each time discarding one of the matches until it
+can either match the entire pattern or be certain that it cannot get a
+match. For example, when matching `ca*ar' against `caaar', the matcher
+first matches all three `a's of the string with the `a*' of the regular
+expression. However, it cannot then match the final `ar' of the
+regular expression against the final `r' of the string. So it
+backtracks, discarding the match of the last `a' in the string. It can
+then match the remaining `ar'.
+
+
+File: regex.info, Node: Match-one-or-more Operator, Next: Match-zero-or-one Operator, Prev: Match-zero-or-more Operator, Up: Repetition Operators
+
+The Match-one-or-more Operator (`+' or `\+')
+--------------------------------------------
+
+ If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't
+recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM'
+isn't set, then `+' represents this operator; if it is, then `\+' does.
+
+ This operator is similar to the match-zero-or-more operator except
+that it repeats the preceding regular expression at least once; *note
+Match-zero-or-more Operator::., for what it operates on, how some
+syntax bits affect it, and how Regex backtracks to match it.
+
+ For example, supposing that `+' represents the match-one-or-more
+operator; then `ca+r' matches, e.g., `car' and `caaaar', but not `cr'.
+
+
+File: regex.info, Node: Match-zero-or-one Operator, Next: Interval Operators, Prev: Match-one-or-more Operator, Up: Repetition Operators
+
+The Match-zero-or-one Operator (`?' or `\?')
+--------------------------------------------
+
+ If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't
+recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM'
+isn't set, then `?' represents this operator; if it is, then `\?' does.
+
+ This operator is similar to the match-zero-or-more operator except
+that it repeats the preceding regular expression once or not at all;
+*note Match-zero-or-more Operator::., to see what it operates on, how
+some syntax bits affect it, and how Regex backtracks to match it.
+
+ For example, supposing that `?' represents the match-zero-or-one
+operator; then `ca?r' matches both `car' and `cr', but nothing else.
+
+
+File: regex.info, Node: Interval Operators, Prev: Match-zero-or-one Operator, Up: Repetition Operators
+
+Interval Operators (`{' ... `}' or `\{' ... `\}')
+-------------------------------------------------
+
+ If the syntax bit `RE_INTERVALS' is set, then Regex recognizes
+"interval expressions". They repeat the smallest possible preceding
+regular expression a specified number of times.
+
+ If the syntax bit `RE_NO_BK_BRACES' is set, `{' represents the
+"open-interval operator" and `}' represents the "close-interval
+operator" ; otherwise, `\{' and `\}' do.
+
+ Specifically, supposing that `{' and `}' represent the open-interval
+and close-interval operators; then:
+
+`{COUNT}'
+ matches exactly COUNT occurrences of the preceding regular
+ expression.
+
+`{MIN,}'
+ matches MIN or more occurrences of the preceding regular
+ expression.
+
+`{MIN, MAX}'
+ matches at least MIN but no more than MAX occurrences of the
+ preceding regular expression.
+
+ The interval expression (but not necessarily the regular expression
+that contains it) is invalid if:
+
+ * MIN is greater than MAX, or
+
+ * any of COUNT, MIN, or MAX are outside the range zero to
+ `RE_DUP_MAX' (which symbol `regex.h' defines).
+
+ If the interval expression is invalid and the syntax bit
+`RE_NO_BK_BRACES' is set, then Regex considers all the characters in
+the would-be interval to be ordinary. If that bit isn't set, then the
+regular expression is invalid.
+
+ If the interval expression is valid but there is no preceding regular
+expression on which to operate, then if the syntax bit
+`RE_CONTEXT_INVALID_OPS' is set, the regular expression is invalid. If
+that bit isn't set, then Regex considers all the characters--other than
+backslashes, which it ignores--in the would-be interval to be ordinary.
+
+
+File: regex.info, Node: Alternation Operator, Next: List Operators, Prev: Repetition Operators, Up: Common Operators
+
+The Alternation Operator (`|' or `\|')
+======================================
+
+ If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't
+recognize this operator. Otherwise, if the syntax bit `RE_NO_BK_VBAR'
+is set, then `|' represents this operator; otherwise, `\|' does.
+
+ Alternatives match one of a choice of regular expressions: if you put
+the character(s) representing the alternation operator between any two
+regular expressions A and B, the result matches the union of the
+strings that A and B match. For example, supposing that `|' is the
+alternation operator, then `foo|bar|quux' would match any of `foo',
+`bar' or `quux'.
+
+ The alternation operator operates on the *largest* possible
+surrounding regular expressions. (Put another way, it has the lowest
+precedence of any regular expression operator.) Thus, the only way you
+can delimit its arguments is to use grouping. For example, if `(' and
+`)' are the open and close-group operators, then `fo(o|b)ar' would
+match either `fooar' or `fobar'. (`foo|bar' would match `foo' or
+`bar'.)
+
+ The matcher usually tries all combinations of alternatives so as to
+match the longest possible string. For example, when matching
+`(fooq|foo)*(qbarquux|bar)' against `fooqbarquux', it cannot take, say,
+the first ("depth-first") combination it could match, since then it
+would be content to match just `fooqbar'.
+
+
+File: regex.info, Node: List Operators, Next: Grouping Operators, Prev: Alternation Operator, Up: Common Operators
+
+List Operators (`[' ... `]' and `[^' ... `]')
+=============================================
+
+ "Lists", also called "bracket expressions", are a set of one or more
+items. An "item" is a character, a character class expression, or a
+range expression. The syntax bits affect which kinds of items you can
+put in a list. We explain the last two items in subsections below.
+Empty lists are invalid.
+
+ A "matching list" matches a single character represented by one of
+the list items. You form a matching list by enclosing one or more items
+within an "open-matching-list operator" (represented by `[') and a
+"close-list operator" (represented by `]').
+
+ For example, `[ab]' matches either `a' or `b'. `[ad]*' matches the
+empty string and any string composed of just `a's and `d's in any
+order. Regex considers invalid a regular expression with a `[' but no
+matching `]'.
+
+ "Nonmatching lists" are similar to matching lists except that they
+match a single character *not* represented by one of the list items.
+You use an "open-nonmatching-list operator" (represented by `[^'(1))
+instead of an open-matching-list operator to start a nonmatching list.
+
+ For example, `[^ab]' matches any character except `a' or `b'.
+
+ If the `posix_newline' field in the pattern buffer (*note GNU Pattern
+Buffers::. is set, then nonmatching lists do not match a newline.
+
+ Most characters lose any special meaning inside a list. The special
+characters inside a list follow.
+
+`]'
+ ends the list if it's not the first list item. So, if you want to
+ make the `]' character a list item, you must put it first.
+
+`\'
+ quotes the next character if the syntax bit
+ `RE_BACKSLASH_ESCAPE_IN_LISTS' is set.
+
+`[:'
+ represents the open-character-class operator (*note Character
+ Class Operators::.) if the syntax bit `RE_CHAR_CLASSES' is set and
+ what follows is a valid character class expression.
+
+`:]'
+ represents the close-character-class operator if the syntax bit
+ `RE_CHAR_CLASSES' is set and what precedes it is an
+ open-character-class operator followed by a valid character class
+ name.
+
+`-'
+ represents the range operator (*note Range Operator::.) if it's
+ not first or last in a list or the ending point of a range.
+
+All other characters are ordinary. For example, `[.*]' matches `.' and
+`*'.
+
+* Menu:
+
+* Character Class Operators:: [:class:]
+* Range Operator:: start-end
+
+ ---------- Footnotes ----------
+
+ (1) Regex therefore doesn't consider the `^' to be the first
+character in the list. If you put a `^' character first in (what you
+think is) a matching list, you'll turn it into a nonmatching list.
+
+
+File: regex.info, Node: Character Class Operators, Next: Range Operator, Up: List Operators
+
+Character Class Operators (`[:' ... `:]')
+-----------------------------------------
+
+ If the syntax bit `RE_CHARACTER_CLASSES' is set, then Regex
+recognizes character class expressions inside lists. A "character
+class expression" matches one character from a given class. You form a
+character class expression by putting a character class name between an
+"open-character-class operator" (represented by `[:') and a
+"close-character-class operator" (represented by `:]'). The character
+class names and their meanings are:
+
+`alnum'
+ letters and digits
+
+`alpha'
+ letters
+
+`blank'
+ system-dependent; for GNU, a space or tab
+
+`cntrl'
+ control characters (in the ASCII encoding, code 0177 and codes
+ less than 040)
+
+`digit'
+ digits
+
+`graph'
+ same as `print' except omits space
+
+`lower'
+ lowercase letters
+
+`print'
+ printable characters (in the ASCII encoding, space tilde--codes
+ 040 through 0176)
+
+`punct'
+ neither control nor alphanumeric characters
+
+`space'
+ space, carriage return, newline, vertical tab, and form feed
+
+`upper'
+ uppercase letters
+
+`xdigit'
+ hexadecimal digits: `0'-`9', `a'-`f', `A'-`F'
+
+These correspond to the definitions in the C library's `<ctype.h>'
+facility. For example, `[:alpha:]' corresponds to the standard
+facility `isalpha'. Regex recognizes character class expressions only
+inside of lists; so `[[:alpha:]]' matches any letter, but `[:alpha:]'
+outside of a bracket expression and not followed by a repetition
+operator matches just itself.
+
+
+File: regex.info, Node: Range Operator, Prev: Character Class Operators, Up: List Operators
+
+The Range Operator (`-')
+------------------------
+
+ Regex recognizes "range expressions" inside a list. They represent
+those characters that fall between two elements in the current
+collating sequence. You form a range expression by putting a "range
+operator" between two characters.(1) `-' represents the range operator.
+For example, `a-f' within a list represents all the characters from `a'
+through `f' inclusively.
+
+ If the syntax bit `RE_NO_EMPTY_RANGES' is set, then if the range's
+ending point collates less than its starting point, the range (and the
+regular expression containing it) is invalid. For example, the regular
+expression `[z-a]' would be invalid. If this bit isn't set, then Regex
+considers such a range to be empty.
+
+ Since `-' represents the range operator, if you want to make a `-'
+character itself a list item, you must do one of the following:
+
+ * Put the `-' either first or last in the list.
+
+ * Include a range whose starting point collates strictly lower than
+ `-' and whose ending point collates equal or higher. Unless a
+ range is the first item in a list, a `-' can't be its starting
+ point, but *can* be its ending point. That is because Regex
+ considers `-' to be the range operator unless it is preceded by
+ another `-'. For example, in the ASCII encoding, `)', `*', `+',
+ `,', `-', `.', and `/' are contiguous characters in the collating
+ sequence. You might think that `[)-+--/]' has two ranges: `)-+'
+ and `--/'. Rather, it has the ranges `)-+' and `+--', plus the
+ character `/', so it matches, e.g., `,', not `.'.
+
+ * Put a range whose starting point is `-' first in the list.
+
+ For example, `[-a-z]' matches a lowercase letter or a hyphen (in
+English, in ASCII).
+
+ ---------- Footnotes ----------
+
+ (1) You can't use a character class for the starting or ending point
+of a range, since a character class is not a single character.
+
+
+File: regex.info, Node: Grouping Operators, Next: Back-reference Operator, Prev: List Operators, Up: Common Operators
+
+Grouping Operators (`(' ... `)' or `\(' ... `\)')
+=================================================
+
+ A "group", also known as a "subexpression", consists of an
+"open-group operator", any number of other operators, and a
+"close-group operator". Regex treats this sequence as a unit, just as
+mathematics and programming languages treat a parenthesized expression
+as a unit.
+
+ Therefore, using "groups", you can:
+
+ * delimit the argument(s) to an alternation operator (*note
+ Alternation Operator::.) or a repetition operator (*note
+ Repetition Operators::.).
+
+ * keep track of the indices of the substring that matched a given
+ group. *Note Using Registers::, for a precise explanation. This
+ lets you:
+
+ * use the back-reference operator (*note Back-reference
+ Operator::.).
+
+ * use registers (*note Using Registers::.).
+
+ If the syntax bit `RE_NO_BK_PARENS' is set, then `(' represents the
+open-group operator and `)' represents the close-group operator;
+otherwise, `\(' and `\)' do.
+
+ If the syntax bit `RE_UNMATCHED_RIGHT_PAREN_ORD' is set and a
+close-group operator has no matching open-group operator, then Regex
+considers it to match `)'.
+
+
+File: regex.info, Node: Back-reference Operator, Next: Anchoring Operators, Prev: Grouping Operators, Up: Common Operators
+
+The Back-reference Operator ("\"DIGIT)
+======================================
+
+ If the syntax bit `RE_NO_BK_REF' isn't set, then Regex recognizes
+back references. A back reference matches a specified preceding group.
+The back reference operator is represented by `\DIGIT' anywhere after
+the end of a regular expression's DIGIT-th group (*note Grouping
+Operators::.).
+
+ DIGIT must be between `1' and `9'. The matcher assigns numbers 1
+through 9 to the first nine groups it encounters. By using one of `\1'
+through `\9' after the corresponding group's close-group operator, you
+can match a substring identical to the one that the group does.
+
+ Back references match according to the following (in all examples
+below, `(' represents the open-group, `)' the close-group, `{' the
+open-interval and `}' the close-interval operator):
+
+ * If the group matches a substring, the back reference matches an
+ identical substring. For example, `(a)\1' matches `aa' and
+ `(bana)na\1bo\1' matches `bananabanabobana'. Likewise, `(.*)\1'
+ matches any (newline-free if the syntax bit `RE_DOT_NEWLINE' isn't
+ set) string that is composed of two identical halves; the `(.*)'
+ matches the first half and the `\1' matches the second half.
+
+ * If the group matches more than once (as it might if followed by,
+ e.g., a repetition operator), then the back reference matches the
+ substring the group *last* matched. For example, `((a*)b)*\1\2'
+ matches `aabababa'; first group 1 (the outer one) matches `aab'
+ and group 2 (the inner one) matches `aa'. Then group 1 matches
+ `ab' and group 2 matches `a'. So, `\1' matches `ab' and `\2'
+ matches `a'.
+
+ * If the group doesn't participate in a match, i.e., it is part of an
+ alternative not taken or a repetition operator allows zero
+ repetitions of it, then the back reference makes the whole match
+ fail. For example, `(one()|two())-and-(three\2|four\3)' matches
+ `one-and-three' and `two-and-four', but not `one-and-four' or
+ `two-and-three'. For example, if the pattern matches `one-and-',
+ then its group 2 matches the empty string and its group 3 doesn't
+ participate in the match. So, if it then matches `four', then
+ when it tries to back reference group 3--which it will attempt to
+ do because `\3' follows the `four'--the match will fail because
+ group 3 didn't participate in the match.
+
+ You can use a back reference as an argument to a repetition operator.
+For example, `(a(b))\2*' matches `a' followed by two or more `b's.
+Similarly, `(a(b))\2{3}' matches `abbbb'.
+
+ If there is no preceding DIGIT-th subexpression, the regular
+expression is invalid.
+
+
+File: regex.info, Node: Anchoring Operators, Prev: Back-reference Operator, Up: Common Operators
+
+Anchoring Operators
+===================
+
+ These operators can constrain a pattern to match only at the
+beginning or end of the entire string or at the beginning or end of a
+line.
+
+* Menu:
+
+* Match-beginning-of-line Operator:: ^
+* Match-end-of-line Operator:: $
+
+
+File: regex.info, Node: Match-beginning-of-line Operator, Next: Match-end-of-line Operator, Up: Anchoring Operators
+
+The Match-beginning-of-line Operator (`^')
+------------------------------------------
+
+ This operator can match the empty string either at the beginning of
+the string or after a newline character. Thus, it is said to "anchor"
+the pattern to the beginning of a line.
+
+ In the cases following, `^' represents this operator. (Otherwise,
+`^' is ordinary.)
+
+ * It (the `^') is first in the pattern, as in `^foo'.
+
+ * The syntax bit `RE_CONTEXT_INDEP_ANCHORS' is set, and it is outside
+ a bracket expression.
+
+ * It follows an open-group or alternation operator, as in `a\(^b\)'
+ and `a\|^b'. *Note Grouping Operators::, and *Note Alternation
+ Operator::.
+
+ These rules imply that some valid patterns containing `^' cannot be
+matched; for example, `foo^bar' if `RE_CONTEXT_INDEP_ANCHORS' is set.
+
+ If the `not_bol' field is set in the pattern buffer (*note GNU
+Pattern Buffers::.), then `^' fails to match at the beginning of the
+string. *Note POSIX Matching::, for when you might find this useful.
+
+ If the `newline_anchor' field is set in the pattern buffer, then `^'
+fails to match after a newline. This is useful when you do not regard
+the string to be matched as broken into lines.
+
+
+File: regex.info, Node: Match-end-of-line Operator, Prev: Match-beginning-of-line Operator, Up: Anchoring Operators
+
+The Match-end-of-line Operator (`$')
+------------------------------------
+
+ This operator can match the empty string either at the end of the
+string or before a newline character in the string. Thus, it is said
+to "anchor" the pattern to the end of a line.
+
+ It is always represented by `$'. For example, `foo$' usually
+matches, e.g., `foo' and, e.g., the first three characters of
+`foo\nbar'.
+
+ Its interaction with the syntax bits and pattern buffer fields is
+exactly the dual of `^''s; see the previous section. (That is,
+"beginning" becomes "end", "next" becomes "previous", and "after"
+becomes "before".)
+
+
+File: regex.info, Node: GNU Operators, Next: GNU Emacs Operators, Prev: Common Operators, Up: Top
+
+GNU Operators
+*************
+
+ Following are operators that GNU defines (and POSIX doesn't).
+
+* Menu:
+
+* Word Operators::
+* Buffer Operators::
+
+
+File: regex.info, Node: Word Operators, Next: Buffer Operators, Up: GNU Operators
+
+Word Operators
+==============
+
+ The operators in this section require Regex to recognize parts of
+words. Regex uses a syntax table to determine whether or not a
+character is part of a word, i.e., whether or not it is
+"word-constituent".
+
+* Menu:
+
+* Non-Emacs Syntax Tables::
+* Match-word-boundary Operator:: \b
+* Match-within-word Operator:: \B
+* Match-beginning-of-word Operator:: \<
+* Match-end-of-word Operator:: \>
+* Match-word-constituent Operator:: \w
+* Match-non-word-constituent Operator:: \W
+
+
+File: regex.info, Node: Non-Emacs Syntax Tables, Next: Match-word-boundary Operator, Up: Word Operators
+
+Non-Emacs Syntax Tables
+-----------------------
+
+ A "syntax table" is an array indexed by the characters in your
+character set. In the ASCII encoding, therefore, a syntax table has
+256 elements. Regex always uses a `char *' variable `re_syntax_table'
+as its syntax table. In some cases, it initializes this variable and
+in others it expects you to initialize it.
+
+ * If Regex is compiled with the preprocessor symbols `emacs' and
+ `SYNTAX_TABLE' both undefined, then Regex allocates
+ `re_syntax_table' and initializes an element I either to `Sword'
+ (which it defines) if I is a letter, number, or `_', or to zero if
+ it's not.
+
+ * If Regex is compiled with `emacs' undefined but `SYNTAX_TABLE'
+ defined, then Regex expects you to define a `char *' variable
+ `re_syntax_table' to be a valid syntax table.
+
+ * *Note Emacs Syntax Tables::, for what happens when Regex is
+ compiled with the preprocessor symbol `emacs' defined.
+
+
+File: regex.info, Node: Match-word-boundary Operator, Next: Match-within-word Operator, Prev: Non-Emacs Syntax Tables, Up: Word Operators
+
+The Match-word-boundary Operator (`\b')
+---------------------------------------
+
+ This operator (represented by `\b') matches the empty string at
+either the beginning or the end of a word. For example, `\brat\b'
+matches the separate word `rat'.
+
+
+File: regex.info, Node: Match-within-word Operator, Next: Match-beginning-of-word Operator, Prev: Match-word-boundary Operator, Up: Word Operators
+
+The Match-within-word Operator (`\B')
+-------------------------------------
+
+ This operator (represented by `\B') matches the empty string within a
+word. For example, `c\Brat\Be' matches `crate', but `dirty \Brat'
+doesn't match `dirty rat'.
+
+
+File: regex.info, Node: Match-beginning-of-word Operator, Next: Match-end-of-word Operator, Prev: Match-within-word Operator, Up: Word Operators
+
+The Match-beginning-of-word Operator (`\<')
+-------------------------------------------
+
+ This operator (represented by `\<') matches the empty string at the
+beginning of a word.
+
+
+File: regex.info, Node: Match-end-of-word Operator, Next: Match-word-constituent Operator, Prev: Match-beginning-of-word Operator, Up: Word Operators
+
+The Match-end-of-word Operator (`\>')
+-------------------------------------
+
+ This operator (represented by `\>') matches the empty string at the
+end of a word.
+
+
+File: regex.info, Node: Match-word-constituent Operator, Next: Match-non-word-constituent Operator, Prev: Match-end-of-word Operator, Up: Word Operators
+
+The Match-word-constituent Operator (`\w')
+------------------------------------------
+
+ This operator (represented by `\w') matches any word-constituent
+character.
+
+
+File: regex.info, Node: Match-non-word-constituent Operator, Prev: Match-word-constituent Operator, Up: Word Operators
+
+The Match-non-word-constituent Operator (`\W')
+----------------------------------------------
+
+ This operator (represented by `\W') matches any character that is not
+word-constituent.
+
+
+File: regex.info, Node: Buffer Operators, Prev: Word Operators, Up: GNU Operators
+
+Buffer Operators
+================
+
+ Following are operators which work on buffers. In Emacs, a "buffer"
+is, naturally, an Emacs buffer. For other programs, Regex considers the
+entire string to be matched as the buffer.
+
+* Menu:
+
+* Match-beginning-of-buffer Operator:: \`
+* Match-end-of-buffer Operator:: \'
+
+
+File: regex.info, Node: Match-beginning-of-buffer Operator, Next: Match-end-of-buffer Operator, Up: Buffer Operators
+
+The Match-beginning-of-buffer Operator (`\`')
+---------------------------------------------
+
+ This operator (represented by `\`') matches the empty string at the
+beginning of the buffer.
+
+
+File: regex.info, Node: Match-end-of-buffer Operator, Prev: Match-beginning-of-buffer Operator, Up: Buffer Operators
+
+The Match-end-of-buffer Operator (`\'')
+---------------------------------------
+
+ This operator (represented by `\'') matches the empty string at the
+end of the buffer.
+
+
+File: regex.info, Node: GNU Emacs Operators, Next: What Gets Matched?, Prev: GNU Operators, Up: Top
+
+GNU Emacs Operators
+*******************
+
+ Following are operators that GNU defines (and POSIX doesn't) that you
+can use only when Regex is compiled with the preprocessor symbol
+`emacs' defined.
+
+* Menu:
+
+* Syntactic Class Operators::
+
+
+File: regex.info, Node: Syntactic Class Operators, Up: GNU Emacs Operators
+
+Syntactic Class Operators
+=========================
+
+ The operators in this section require Regex to recognize the syntactic
+classes of characters. Regex uses a syntax table to determine this.
+
+* Menu:
+
+* Emacs Syntax Tables::
+* Match-syntactic-class Operator:: \sCLASS
+* Match-not-syntactic-class Operator:: \SCLASS
+
+
+File: regex.info, Node: Emacs Syntax Tables, Next: Match-syntactic-class Operator, Up: Syntactic Class Operators
+
+Emacs Syntax Tables
+-------------------
+
+ A "syntax table" is an array indexed by the characters in your
+character set. In the ASCII encoding, therefore, a syntax table has
+256 elements.
+
+ If Regex is compiled with the preprocessor symbol `emacs' defined,
+then Regex expects you to define and initialize the variable
+`re_syntax_table' to be an Emacs syntax table. Emacs' syntax tables
+are more complicated than Regex's own (*note Non-Emacs Syntax
+Tables::.). *Note Syntax: (emacs)Syntax, for a description of Emacs'
+syntax tables.
+
+
+File: regex.info, Node: Match-syntactic-class Operator, Next: Match-not-syntactic-class Operator, Prev: Emacs Syntax Tables, Up: Syntactic Class Operators
+
+The Match-syntactic-class Operator (`\s'CLASS)
+----------------------------------------------
+
+ This operator matches any character whose syntactic class is
+represented by a specified character. `\sCLASS' represents this
+operator where CLASS is the character representing the syntactic class
+you want. For example, `w' represents the syntactic class of
+word-constituent characters, so `\sw' matches any word-constituent
+character.
+
+
+File: regex.info, Node: Match-not-syntactic-class Operator, Prev: Match-syntactic-class Operator, Up: Syntactic Class Operators
+
+The Match-not-syntactic-class Operator (`\S'CLASS)
+--------------------------------------------------
+
+ This operator is similar to the match-syntactic-class operator except
+that it matches any character whose syntactic class is *not*
+represented by the specified character. `\SCLASS' represents this
+operator. For example, `w' represents the syntactic class of
+word-constituent characters, so `\Sw' matches any character that is not
+word-constituent.
+
+
+File: regex.info, Node: What Gets Matched?, Next: Programming with Regex, Prev: GNU Emacs Operators, Up: Top
+
+What Gets Matched?
+******************
+
+ Regex usually matches strings according to the "leftmost longest"
+rule; that is, it chooses the longest of the leftmost matches. This
+does not mean that for a regular expression containing subexpressions
+that it simply chooses the longest match for each subexpression, left to
+right; the overall match must also be the longest possible one.
+
+ For example, `(ac*)(c*d[ac]*)\1' matches `acdacaaa', not `acdac', as
+it would if it were to choose the longest match for the first
+subexpression.
+
+
+File: regex.info, Node: Programming with Regex, Next: Copying, Prev: What Gets Matched?, Up: Top
+
+Programming with Regex
+**********************
+
+ Here we describe how you use the Regex data structures and functions
+in C programs. Regex has three interfaces: one designed for GNU, one
+compatible with POSIX and one compatible with Berkeley UNIX.
+
+* Menu:
+
+* GNU Regex Functions::
+* POSIX Regex Functions::
+* BSD Regex Functions::
+
+
+File: regex.info, Node: GNU Regex Functions, Next: POSIX Regex Functions, Up: Programming with Regex
+
+GNU Regex Functions
+===================
+
+ If you're writing code that doesn't need to be compatible with either
+POSIX or Berkeley UNIX, you can use these functions. They provide more
+options than the other interfaces.
+
+* Menu:
+
+* GNU Pattern Buffers:: The re_pattern_buffer type.
+* GNU Regular Expression Compiling:: re_compile_pattern ()
+* GNU Matching:: re_match ()
+* GNU Searching:: re_search ()
+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
+* Searching with Fastmaps:: re_compile_fastmap ()
+* GNU Translate Tables:: The `translate' field.
+* Using Registers:: The re_registers type and related fns.
+* Freeing GNU Pattern Buffers:: regfree ()
+
+
+File: regex.info, Node: GNU Pattern Buffers, Next: GNU Regular Expression Compiling, Up: GNU Regex Functions
+
+GNU Pattern Buffers
+-------------------
+
+ To compile, match, or search for a given regular expression, you must
+supply a pattern buffer. A "pattern buffer" holds one compiled regular
+expression.(1)
+
+ You can have several different pattern buffers simultaneously, each
+holding a compiled pattern for a different regular expression.
+
+ `regex.h' defines the pattern buffer `struct' as follows:
+
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ char *translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+ #define REGS_UNALLOCATED 0
+ #define REGS_REALLOCATE 1
+ #define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+ ---------- Footnotes ----------
+
+ (1) Regular expressions are also referred to as "patterns," hence
+the name "pattern buffer."
+
+
+File: regex.info, Node: GNU Regular Expression Compiling, Next: GNU Matching, Prev: GNU Pattern Buffers, Up: GNU Regex Functions
+
+GNU Regular Expression Compiling
+--------------------------------
+
+ In GNU, you can both match and search for a given regular expression.
+To do either, you must first compile it in a pattern buffer (*note GNU
+Pattern Buffers::.).
+
+ Regular expressions match according to the syntax with which they were
+compiled; with GNU, you indicate what syntax you want by setting the
+variable `re_syntax_options' (declared in `regex.h' and defined in
+`regex.c') before calling the compiling function, `re_compile_pattern'
+(see below). *Note Syntax Bits::, and *Note Predefined Syntaxes::.
+
+ You can change the value of `re_syntax_options' at any time.
+Usually, however, you set its value once and then never change it.
+
+ `re_compile_pattern' takes a pattern buffer as an argument. You must
+initialize the following fields:
+
+`translate initialization'
+`translate'
+ Initialize this to point to a translate table if you want one, or
+ to zero if you don't. We explain translate tables in *Note GNU
+ Translate Tables::.
+
+`fastmap'
+ Initialize this to nonzero if you want a fastmap, or to zero if you
+ don't.
+
+`buffer'
+`allocated'
+ If you want `re_compile_pattern' to allocate memory for the
+ compiled pattern, set both of these to zero. If you have an
+ existing block of memory (allocated with `malloc') you want Regex
+ to use, set `buffer' to its address and `allocated' to its size (in
+ bytes).
+
+ `re_compile_pattern' uses `realloc' to extend the space for the
+ compiled pattern as necessary.
+
+ To compile a pattern buffer, use:
+
+ char *
+ re_compile_pattern (const char *REGEX, const int REGEX_SIZE,
+ struct re_pattern_buffer *PATTERN_BUFFER)
+
+REGEX is the regular expression's address, REGEX_SIZE is its length,
+and PATTERN_BUFFER is the pattern buffer's address.
+
+ If `re_compile_pattern' successfully compiles the regular expression,
+it returns zero and sets `*PATTERN_BUFFER' to the compiled pattern. It
+sets the pattern buffer's fields as follows:
+
+`buffer'
+ to the compiled pattern.
+
+`used'
+ to the number of bytes the compiled pattern in `buffer' occupies.
+
+`syntax'
+ to the current value of `re_syntax_options'.
+
+`re_nsub'
+ to the number of subexpressions in REGEX.
+
+`fastmap_accurate'
+ to zero on the theory that the pattern you're compiling is
+ different than the one previously compiled into `buffer'; in that
+ case (since you can't make a fastmap without a compiled pattern),
+ `fastmap' would either contain an incompatible fastmap, or nothing
+ at all.
+
+ If `re_compile_pattern' can't compile REGEX, it returns an error
+string corresponding to one of the errors listed in *Note POSIX Regular
+Expression Compiling::.
+
+
+File: regex.info, Node: GNU Matching, Next: GNU Searching, Prev: GNU Regular Expression Compiling, Up: GNU Regex Functions
+
+GNU Matching
+------------
+
+ Matching the GNU way means trying to match as much of a string as
+possible starting at a position within it you specify. Once you've
+compiled a pattern into a pattern buffer (*note GNU Regular Expression
+Compiling::.), you can ask the matcher to match that pattern against a
+string using:
+
+ int
+ re_match (struct re_pattern_buffer *PATTERN_BUFFER,
+ const char *STRING, const int SIZE,
+ const int START, struct re_registers *REGS)
+
+PATTERN_BUFFER is the address of a pattern buffer containing a compiled
+pattern. STRING is the string you want to match; it can contain
+newline and null characters. SIZE is the length of that string. START
+is the string index at which you want to begin matching; the first
+character of STRING is at index zero. *Note Using Registers::, for a
+explanation of REGS; you can safely pass zero.
+
+ `re_match' matches the regular expression in PATTERN_BUFFER against
+the string STRING according to the syntax in PATTERN_BUFFERS's `syntax'
+field. (*Note GNU Regular Expression Compiling::, for how to set it.)
+The function returns -1 if the compiled pattern does not match any part
+of STRING and -2 if an internal error happens; otherwise, it returns
+how many (possibly zero) characters of STRING the pattern matched.
+
+ An example: suppose PATTERN_BUFFER points to a pattern buffer
+containing the compiled pattern for `a*', and STRING points to `aaaaab'
+(whereupon SIZE should be 6). Then if START is 2, `re_match' returns 3,
+i.e., `a*' would have matched the last three `a's in STRING. If START
+is 0, `re_match' returns 5, i.e., `a*' would have matched all the `a's
+in STRING. If START is either 5 or 6, it returns zero.
+
+ If START is not between zero and SIZE, then `re_match' returns -1.
+
+
+File: regex.info, Node: GNU Searching, Next: Matching/Searching with Split Data, Prev: GNU Matching, Up: GNU Regex Functions
+
+GNU Searching
+-------------
+
+ "Searching" means trying to match starting at successive positions
+within a string. The function `re_search' does this.
+
+ Before calling `re_search', you must compile your regular expression.
+*Note GNU Regular Expression Compiling::.
+
+ Here is the function declaration:
+
+ int
+ re_search (struct re_pattern_buffer *PATTERN_BUFFER,
+ const char *STRING, const int SIZE,
+ const int START, const int RANGE,
+ struct re_registers *REGS)
+
+whose arguments are the same as those to `re_match' (*note GNU
+Matching::.) except that the two arguments START and RANGE replace
+`re_match''s argument START.
+
+ If RANGE is positive, then `re_search' attempts a match starting
+first at index START, then at START + 1 if that fails, and so on, up to
+START + RANGE; if RANGE is negative, then it attempts a match starting
+first at index START, then at START -1 if that fails, and so on.
+
+ If START is not between zero and SIZE, then `re_search' returns -1.
+When RANGE is positive, `re_search' adjusts RANGE so that START + RANGE
+- 1 is between zero and SIZE, if necessary; that way it won't search
+outside of STRING. Similarly, when RANGE is negative, `re_search'
+adjusts RANGE so that START + RANGE + 1 is between zero and SIZE, if
+necessary.
+
+ If the `fastmap' field of PATTERN_BUFFER is zero, `re_search' matches
+starting at consecutive positions; otherwise, it uses `fastmap' to make
+the search more efficient. *Note Searching with Fastmaps::.
+
+ If no match is found, `re_search' returns -1. If a match is found,
+it returns the index where the match began. If an internal error
+happens, it returns -2.
+
+
+File: regex.info, Node: Matching/Searching with Split Data, Next: Searching with Fastmaps, Prev: GNU Searching, Up: GNU Regex Functions
+
+Matching and Searching with Split Data
+--------------------------------------
+
+ Using the functions `re_match_2' and `re_search_2', you can match or
+search in data that is divided into two strings.
+
+ The function:
+
+ int
+ re_match_2 (struct re_pattern_buffer *BUFFER,
+ const char *STRING1, const int SIZE1,
+ const char *STRING2, const int SIZE2,
+ const int START,
+ struct re_registers *REGS,
+ const int STOP)
+
+is similar to `re_match' (*note GNU Matching::.) except that you pass
+*two* data strings and sizes, and an index STOP beyond which you don't
+want the matcher to try matching. As with `re_match', if it succeeds,
+`re_match_2' returns how many characters of STRING it matched. Regard
+STRING1 and STRING2 as concatenated when you set the arguments START and
+STOP and use the contents of REGS; `re_match_2' never returns a value
+larger than SIZE1 + SIZE2.
+
+ The function:
+
+ int
+ re_search_2 (struct re_pattern_buffer *BUFFER,
+ const char *STRING1, const int SIZE1,
+ const char *STRING2, const int SIZE2,
+ const int START, const int RANGE,
+ struct re_registers *REGS,
+ const int STOP)
+
+is similarly related to `re_search'.
+
+
+File: regex.info, Node: Searching with Fastmaps, Next: GNU Translate Tables, Prev: Matching/Searching with Split Data, Up: GNU Regex Functions
+
+Searching with Fastmaps
+-----------------------
+
+ If you're searching through a long string, you should use a fastmap.
+Without one, the searcher tries to match at consecutive positions in the
+string. Generally, most of the characters in the string could not start
+a match. It takes much longer to try matching at a given position in
+the string than it does to check in a table whether or not the
+character at that position could start a match. A "fastmap" is such a
+table.
+
+ More specifically, a fastmap is an array indexed by the characters in
+your character set. Under the ASCII encoding, therefore, a fastmap has
+256 elements. If you want the searcher to use a fastmap with a given
+pattern buffer, you must allocate the array and assign the array's
+address to the pattern buffer's `fastmap' field. You either can
+compile the fastmap yourself or have `re_search' do it for you; when
+`fastmap' is nonzero, it automatically compiles a fastmap the first
+time you search using a particular compiled pattern.
+
+ To compile a fastmap yourself, use:
+
+ int
+ re_compile_fastmap (struct re_pattern_buffer *PATTERN_BUFFER)
+
+PATTERN_BUFFER is the address of a pattern buffer. If the character C
+could start a match for the pattern, `re_compile_fastmap' makes
+`PATTERN_BUFFER->fastmap[C]' nonzero. It returns 0 if it can compile a
+fastmap and -2 if there is an internal error. For example, if `|' is
+the alternation operator and PATTERN_BUFFER holds the compiled pattern
+for `a|b', then `re_compile_fastmap' sets `fastmap['a']' and
+`fastmap['b']' (and no others).
+
+ `re_search' uses a fastmap as it moves along in the string: it checks
+the string's characters until it finds one that's in the fastmap. Then
+it tries matching at that character. If the match fails, it repeats
+the process. So, by using a fastmap, `re_search' doesn't waste time
+trying to match at positions in the string that couldn't start a match.
+
+ If you don't want `re_search' to use a fastmap, store zero in the
+`fastmap' field of the pattern buffer before calling `re_search'.
+
+ Once you've initialized a pattern buffer's `fastmap' field, you need
+never do so again--even if you compile a new pattern in it--provided
+the way the field is set still reflects whether or not you want a
+fastmap. `re_search' will still either do nothing if `fastmap' is null
+or, if it isn't, compile a new fastmap for the new pattern.
+
+
+File: regex.info, Node: GNU Translate Tables, Next: Using Registers, Prev: Searching with Fastmaps, Up: GNU Regex Functions
+
+GNU Translate Tables
+--------------------
+
+ If you set the `translate' field of a pattern buffer to a translate
+table, then the GNU Regex functions to which you've passed that pattern
+buffer use it to apply a simple transformation to all the regular
+expression and string characters at which they look.
+
+ A "translate table" is an array indexed by the characters in your
+character set. Under the ASCII encoding, therefore, a translate table
+has 256 elements. The array's elements are also characters in your
+character set. When the Regex functions see a character C, they use
+`translate[C]' in its place, with one exception: the character after a
+`\' is not translated. (This ensures that, the operators, e.g., `\B'
+and `\b', are always distinguishable.)
+
+ For example, a table that maps all lowercase letters to the
+corresponding uppercase ones would cause the matcher to ignore
+differences in case.(1) Such a table would map all characters except
+lowercase letters to themselves, and lowercase letters to the
+corresponding uppercase ones. Under the ASCII encoding, here's how you
+could initialize such a table (we'll call it `case_fold'):
+
+ for (i = 0; i < 256; i++)
+ case_fold[i] = i;
+ for (i = 'a'; i <= 'z'; i++)
+ case_fold[i] = i - ('a' - 'A');
+
+ You tell Regex to use a translate table on a given pattern buffer by
+assigning that table's address to the `translate' field of that buffer.
+If you don't want Regex to do any translation, put zero into this
+field. You'll get weird results if you change the table's contents
+anytime between compiling the pattern buffer, compiling its fastmap, and
+matching or searching with the pattern buffer.
+
+ ---------- Footnotes ----------
+
+ (1) A table that maps all uppercase letters to the corresponding
+lowercase ones would work just as well for this purpose.
+
+
+File: regex.info, Node: Using Registers, Next: Freeing GNU Pattern Buffers, Prev: GNU Translate Tables, Up: GNU Regex Functions
+
+Using Registers
+---------------
+
+ A group in a regular expression can match a (posssibly empty)
+substring of the string that regular expression as a whole matched.
+The matcher remembers the beginning and end of the substring matched by
+each group.
+
+ To find out what they matched, pass a nonzero REGS argument to a GNU
+matching or searching function (*note GNU Matching::. and *Note GNU
+Searching::), i.e., the address of a structure of this type, as defined
+in `regex.h':
+
+ struct re_registers
+ {
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+ };
+
+ Except for (possibly) the NUM_REGS'th element (see below), the Ith
+element of the `start' and `end' arrays records information about the
+Ith group in the pattern. (They're declared as C pointers, but this is
+only because not all C compilers accept zero-length arrays;
+conceptually, it is simplest to think of them as arrays.)
+
+ The `start' and `end' arrays are allocated in various ways, depending
+on the value of the `regs_allocated' field in the pattern buffer passed
+to the matcher.
+
+ The simplest and perhaps most useful is to let the matcher
+(re)allocate enough space to record information for all the groups in
+the regular expression. If `regs_allocated' is `REGS_UNALLOCATED', the
+matcher allocates 1 + RE_NSUB (another field in the pattern buffer;
+*note GNU Pattern Buffers::.). The extra element is set to -1, and
+sets `regs_allocated' to `REGS_REALLOCATE'. Then on subsequent calls
+with the same pattern buffer and REGS arguments, the matcher
+reallocates more space if necessary.
+
+ It would perhaps be more logical to make the `regs_allocated' field
+part of the `re_registers' structure, instead of part of the pattern
+buffer. But in that case the caller would be forced to initialize the
+structure before passing it. Much existing code doesn't do this
+initialization, and it's arguably better to avoid it anyway.
+
+ `re_compile_pattern' sets `regs_allocated' to `REGS_UNALLOCATED', so
+if you use the GNU regular expression functions, you get this behavior
+by default.
+
+ xx document re_set_registers
+
+ POSIX, on the other hand, requires a different interface: the caller
+is supposed to pass in a fixed-length array which the matcher fills.
+Therefore, if `regs_allocated' is `REGS_FIXED' the matcher simply fills
+that array.
+
+ The following examples illustrate the information recorded in the
+`re_registers' structure. (In all of them, `(' represents the
+open-group and `)' the close-group operator. The first character in
+the string STRING is at index 0.)
+
+ * If the regular expression has an I-th group not contained within
+ another group that matches a substring of STRING, then the
+ function sets `REGS->start[I]' to the index in STRING where the
+ substring matched by the I-th group begins, and `REGS->end[I]' to
+ the index just beyond that substring's end. The function sets
+ `REGS->start[0]' and `REGS->end[0]' to analogous information about
+ the entire pattern.
+
+ For example, when you match `((a)(b))' against `ab', you get:
+
+ * 0 in `REGS->start[0]' and 2 in `REGS->end[0]'
+
+ * 0 in `REGS->start[1]' and 2 in `REGS->end[1]'
+
+ * 0 in `REGS->start[2]' and 1 in `REGS->end[2]'
+
+ * 1 in `REGS->start[3]' and 2 in `REGS->end[3]'
+
+ * If a group matches more than once (as it might if followed by,
+ e.g., a repetition operator), then the function reports the
+ information about what the group *last* matched.
+
+ For example, when you match the pattern `(a)*' against the string
+ `aa', you get:
+
+ * 0 in `REGS->start[0]' and 2 in `REGS->end[0]'
+
+ * 1 in `REGS->start[1]' and 2 in `REGS->end[1]'
+
+ * If the I-th group does not participate in a successful match,
+ e.g., it is an alternative not taken or a repetition operator
+ allows zero repetitions of it, then the function sets
+ `REGS->start[I]' and `REGS->end[I]' to -1.
+
+ For example, when you match the pattern `(a)*b' against the string
+ `b', you get:
+
+ * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'
+
+ * -1 in `REGS->start[1]' and -1 in `REGS->end[1]'
+
+ * If the I-th group matches a zero-length string, then the function
+ sets `REGS->start[I]' and `REGS->end[I]' to the index just beyond
+ that zero-length string.
+
+ For example, when you match the pattern `(a*)b' against the string
+ `b', you get:
+
+ * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'
+
+ * 0 in `REGS->start[1]' and 0 in `REGS->end[1]'
+
+ * If an I-th group contains a J-th group in turn not contained
+ within any other group within group I and the function reports a
+ match of the I-th group, then it records in `REGS->start[J]' and
+ `REGS->end[J]' the last match (if it matched) of the J-th group.
+
+ For example, when you match the pattern `((a*)b)*' against the
+ string `abb', group 2 last matches the empty string, so you get
+ what it previously matched:
+
+ * 0 in `REGS->start[0]' and 3 in `REGS->end[0]'
+
+ * 2 in `REGS->start[1]' and 3 in `REGS->end[1]'
+
+ * 2 in `REGS->start[2]' and 2 in `REGS->end[2]'
+
+ When you match the pattern `((a)*b)*' against the string `abb',
+ group 2 doesn't participate in the last match, so you get:
+
+ * 0 in `REGS->start[0]' and 3 in `REGS->end[0]'
+
+ * 2 in `REGS->start[1]' and 3 in `REGS->end[1]'
+
+ * 0 in `REGS->start[2]' and 1 in `REGS->end[2]'
+
+ * If an I-th group contains a J-th group in turn not contained
+ within any other group within group I and the function sets
+ `REGS->start[I]' and `REGS->end[I]' to -1, then it also sets
+ `REGS->start[J]' and `REGS->end[J]' to -1.
+
+ For example, when you match the pattern `((a)*b)*c' against the
+ string `c', you get:
+
+ * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'
+
+ * -1 in `REGS->start[1]' and -1 in `REGS->end[1]'
+
+ * -1 in `REGS->start[2]' and -1 in `REGS->end[2]'
+
+
+File: regex.info, Node: Freeing GNU Pattern Buffers, Prev: Using Registers, Up: GNU Regex Functions
+
+Freeing GNU Pattern Buffers
+---------------------------
+
+ To free any allocated fields of a pattern buffer, you can use the
+POSIX function described in *Note Freeing POSIX Pattern Buffers::,
+since the type `regex_t'--the type for POSIX pattern buffers--is
+equivalent to the type `re_pattern_buffer'. After freeing a pattern
+buffer, you need to again compile a regular expression in it (*note GNU
+Regular Expression Compiling::.) before passing it to a matching or
+searching function.
+
+
+File: regex.info, Node: POSIX Regex Functions, Next: BSD Regex Functions, Prev: GNU Regex Functions, Up: Programming with Regex
+
+POSIX Regex Functions
+=====================
+
+ If you're writing code that has to be POSIX compatible, you'll need
+to use these functions. Their interfaces are as specified by POSIX,
+draft 1003.2/D11.2.
+
+* Menu:
+
+* POSIX Pattern Buffers:: The regex_t type.
+* POSIX Regular Expression Compiling:: regcomp ()
+* POSIX Matching:: regexec ()
+* Reporting Errors:: regerror ()
+* Using Byte Offsets:: The regmatch_t type.
+* Freeing POSIX Pattern Buffers:: regfree ()
+
+
+File: regex.info, Node: POSIX Pattern Buffers, Next: POSIX Regular Expression Compiling, Up: POSIX Regex Functions
+
+POSIX Pattern Buffers
+---------------------
+
+ To compile or match a given regular expression the POSIX way, you
+must supply a pattern buffer exactly the way you do for GNU (*note GNU
+Pattern Buffers::.). POSIX pattern buffers have type `regex_t', which
+is equivalent to the GNU pattern buffer type `re_pattern_buffer'.
+
+
+File: regex.info, Node: POSIX Regular Expression Compiling, Next: POSIX Matching, Prev: POSIX Pattern Buffers, Up: POSIX Regex Functions
+
+POSIX Regular Expression Compiling
+----------------------------------
+
+ With POSIX, you can only search for a given regular expression; you
+can't match it. To do this, you must first compile it in a pattern
+buffer, using `regcomp'.
+
+ To compile a pattern buffer, use:
+
+ int
+ regcomp (regex_t *PREG, const char *REGEX, int CFLAGS)
+
+PREG is the initialized pattern buffer's address, REGEX is the regular
+expression's address, and CFLAGS is the compilation flags, which Regex
+considers as a collection of bits. Here are the valid bits, as defined
+in `regex.h':
+
+`REG_EXTENDED'
+ says to use POSIX Extended Regular Expression syntax; if this isn't
+ set, then says to use POSIX Basic Regular Expression syntax.
+ `regcomp' sets PREG's `syntax' field accordingly.
+
+`REG_ICASE'
+ says to ignore case; `regcomp' sets PREG's `translate' field to a
+ translate table which ignores case, replacing anything you've put
+ there before.
+
+`REG_NOSUB'
+ says to set PREG's `no_sub' field; *note POSIX Matching::., for
+ what this means.
+
+`REG_NEWLINE'
+ says that a:
+
+ * match-any-character operator (*note Match-any-character
+ Operator::.) doesn't match a newline.
+
+ * nonmatching list not containing a newline (*note List
+ Operators::.) matches a newline.
+
+ * match-beginning-of-line operator (*note
+ Match-beginning-of-line Operator::.) matches the empty string
+ immediately after a newline, regardless of how `REG_NOTBOL'
+ is set (*note POSIX Matching::., for an explanation of
+ `REG_NOTBOL').
+
+ * match-end-of-line operator (*note Match-beginning-of-line
+ Operator::.) matches the empty string immediately before a
+ newline, regardless of how `REG_NOTEOL' is set (*note POSIX
+ Matching::., for an explanation of `REG_NOTEOL').
+
+ If `regcomp' successfully compiles the regular expression, it returns
+zero and sets `*PATTERN_BUFFER' to the compiled pattern. Except for
+`syntax' (which it sets as explained above), it also sets the same
+fields the same way as does the GNU compiling function (*note GNU
+Regular Expression Compiling::.).
+
+ If `regcomp' can't compile the regular expression, it returns one of
+the error codes listed here. (Except when noted differently, the
+syntax of in all examples below is basic regular expression syntax.)
+
+`REG_BADRPT'
+ For example, the consecutive repetition operators `**' in `a**'
+ are invalid. As another example, if the syntax is extended
+ regular expression syntax, then the repetition operator `*' with
+ nothing on which to operate in `*' is invalid.
+
+`REG_BADBR'
+ For example, the COUNT `-1' in `a\{-1' is invalid.
+
+`REG_EBRACE'
+ For example, `a\{1' is missing a close-interval operator.
+
+`REG_EBRACK'
+ For example, `[a' is missing a close-list operator.
+
+`REG_ERANGE'
+ For example, the range ending point `z' that collates lower than
+ does its starting point `a' in `[z-a]' is invalid. Also, the
+ range with the character class `[:alpha:]' as its starting point in
+ `[[:alpha:]-|]'.
+
+`REG_ECTYPE'
+ For example, the character class name `foo' in `[[:foo:]' is
+ invalid.
+
+`REG_EPAREN'
+ For example, `a\)' is missing an open-group operator and `\(a' is
+ missing a close-group operator.
+
+`REG_ESUBREG'
+ For example, the back reference `\2' that refers to a nonexistent
+ subexpression in `\(a\)\2' is invalid.
+
+`REG_EEND'
+ Returned when a regular expression causes no other more specific
+ error.
+
+`REG_EESCAPE'
+ For example, the trailing backslash `\' in `a\' is invalid, as is
+ the one in `\'.
+
+`REG_BADPAT'
+ For example, in the extended regular expression syntax, the empty
+ group `()' in `a()b' is invalid.
+
+`REG_ESIZE'
+ Returned when a regular expression needs a pattern buffer larger
+ than 65536 bytes.
+
+`REG_ESPACE'
+ Returned when a regular expression makes Regex to run out of
+ memory.
+
+
+File: regex.info, Node: POSIX Matching, Next: Reporting Errors, Prev: POSIX Regular Expression Compiling, Up: POSIX Regex Functions
+
+POSIX Matching
+--------------
+
+ Matching the POSIX way means trying to match a null-terminated string
+starting at its first character. Once you've compiled a pattern into a
+pattern buffer (*note POSIX Regular Expression Compiling::.), you can
+ask the matcher to match that pattern against a string using:
+
+ int
+ regexec (const regex_t *PREG, const char *STRING,
+ size_t NMATCH, regmatch_t PMATCH[], int EFLAGS)
+
+PREG is the address of a pattern buffer for a compiled pattern. STRING
+is the string you want to match.
+
+ *Note Using Byte Offsets::, for an explanation of PMATCH. If you
+pass zero for NMATCH or you compiled PREG with the compilation flag
+`REG_NOSUB' set, then `regexec' will ignore PMATCH; otherwise, you must
+allocate it to have at least NMATCH elements. `regexec' will record
+NMATCH byte offsets in PMATCH, and set to -1 any unused elements up to
+PMATCH`[NMATCH]' - 1.
+
+ EFLAGS specifies "execution flags"--namely, the two bits `REG_NOTBOL'
+and `REG_NOTEOL' (defined in `regex.h'). If you set `REG_NOTBOL', then
+the match-beginning-of-line operator (*note Match-beginning-of-line
+Operator::.) always fails to match. This lets you match against pieces
+of a line, as you would need to if, say, searching for repeated
+instances of a given pattern in a line; it would work correctly for
+patterns both with and without match-beginning-of-line operators.
+`REG_NOTEOL' works analogously for the match-end-of-line operator
+(*note Match-end-of-line Operator::.); it exists for symmetry.
+
+ `regexec' tries to find a match for PREG in STRING according to the
+syntax in PREG's `syntax' field. (*Note POSIX Regular Expression
+Compiling::, for how to set it.) The function returns zero if the
+compiled pattern matches STRING and `REG_NOMATCH' (defined in
+`regex.h') if it doesn't.
+
+
+File: regex.info, Node: Reporting Errors, Next: Using Byte Offsets, Prev: POSIX Matching, Up: POSIX Regex Functions
+
+Reporting Errors
+----------------
+
+ If either `regcomp' or `regexec' fail, they return a nonzero error
+code, the possibilities for which are defined in `regex.h'. *Note
+POSIX Regular Expression Compiling::, and *Note POSIX Matching::, for
+what these codes mean. To get an error string corresponding to these
+codes, you can use:
+
+ size_t
+ regerror (int ERRCODE,
+ const regex_t *PREG,
+ char *ERRBUF,
+ size_t ERRBUF_SIZE)
+
+ERRCODE is an error code, PREG is the address of the pattern buffer
+which provoked the error, ERRBUF is the error buffer, and ERRBUF_SIZE
+is ERRBUF's size.
+
+ `regerror' returns the size in bytes of the error string
+corresponding to ERRCODE (including its terminating null). If ERRBUF
+and ERRBUF_SIZE are nonzero, it also returns in ERRBUF the first
+ERRBUF_SIZE - 1 characters of the error string, followed by a null.
+eRRBUF_SIZE must be a nonnegative number less than or equal to the size
+in bytes of ERRBUF.
+
+ You can call `regerror' with a null ERRBUF and a zero ERRBUF_SIZE to
+determine how large ERRBUF need be to accommodate `regerror''s error
+string.
+
+
+File: regex.info, Node: Using Byte Offsets, Next: Freeing POSIX Pattern Buffers, Prev: Reporting Errors, Up: POSIX Regex Functions
+
+Using Byte Offsets
+------------------
+
+ In POSIX, variables of type `regmatch_t' hold analogous information,
+but are not identical to, GNU's registers (*note Using Registers::.).
+To get information about registers in POSIX, pass to `regexec' a
+nonzero PMATCH of type `regmatch_t', i.e., the address of a structure
+of this type, defined in `regex.h':
+
+ typedef struct
+ {
+ regoff_t rm_so;
+ regoff_t rm_eo;
+ } regmatch_t;
+
+ When reading in *Note Using Registers::, about how the matching
+function stores the information into the registers, substitute PMATCH
+for REGS, `PMATCH[I]->rm_so' for `REGS->start[I]' and
+`PMATCH[I]->rm_eo' for `REGS->end[I]'.
+
+
+File: regex.info, Node: Freeing POSIX Pattern Buffers, Prev: Using Byte Offsets, Up: POSIX Regex Functions
+
+Freeing POSIX Pattern Buffers
+-----------------------------
+
+ To free any allocated fields of a pattern buffer, use:
+
+ void
+ regfree (regex_t *PREG)
+
+PREG is the pattern buffer whose allocated fields you want freed.
+`regfree' also sets PREG's `allocated' and `used' fields to zero.
+After freeing a pattern buffer, you need to again compile a regular
+expression in it (*note POSIX Regular Expression Compiling::.) before
+passing it to the matching function (*note POSIX Matching::.).
+
+
+File: regex.info, Node: BSD Regex Functions, Prev: POSIX Regex Functions, Up: Programming with Regex
+
+BSD Regex Functions
+===================
+
+ If you're writing code that has to be Berkeley UNIX compatible,
+you'll need to use these functions whose interfaces are the same as
+those in Berkeley UNIX.
+
+* Menu:
+
+* BSD Regular Expression Compiling:: re_comp ()
+* BSD Searching:: re_exec ()
+
+
+File: regex.info, Node: BSD Regular Expression Compiling, Next: BSD Searching, Up: BSD Regex Functions
+
+BSD Regular Expression Compiling
+--------------------------------
+
+ With Berkeley UNIX, you can only search for a given regular
+expression; you can't match one. To search for it, you must first
+compile it. Before you compile it, you must indicate the regular
+expression syntax you want it compiled according to by setting the
+variable `re_syntax_options' (declared in `regex.h' to some syntax
+(*note Regular Expression Syntax::.).
+
+ To compile a regular expression use:
+
+ char *
+ re_comp (char *REGEX)
+
+REGEX is the address of a null-terminated regular expression.
+`re_comp' uses an internal pattern buffer, so you can use only the most
+recently compiled pattern buffer. This means that if you want to use a
+given regular expression that you've already compiled--but it isn't the
+latest one you've compiled--you'll have to recompile it. If you call
+`re_comp' with the null string (*not* the empty string) as the
+argument, it doesn't change the contents of the pattern buffer.
+
+ If `re_comp' successfully compiles the regular expression, it returns
+zero. If it can't compile the regular expression, it returns an error
+string. `re_comp''s error messages are identical to those of
+`re_compile_pattern' (*note GNU Regular Expression Compiling::.).
+
+
+File: regex.info, Node: BSD Searching, Prev: BSD Regular Expression Compiling, Up: BSD Regex Functions
+
+BSD Searching
+-------------
+
+ Searching the Berkeley UNIX way means searching in a string starting
+at its first character and trying successive positions within it to
+find a match. Once you've compiled a pattern using `re_comp' (*note
+BSD Regular Expression Compiling::.), you can ask Regex to search for
+that pattern in a string using:
+
+ int
+ re_exec (char *STRING)
+
+STRING is the address of the null-terminated string in which you want
+to search.
+
+ `re_exec' returns either 1 for success or 0 for failure. It
+automatically uses a GNU fastmap (*note Searching with Fastmaps::.).
+
+
+File: regex.info, Node: Copying, Next: Index, Prev: Programming with Regex, Up: Top
+
+GNU GENERAL PUBLIC LICENSE
+**************************
+
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+Preamble
+========
+
+ The licenses for most software are designed to take away your freedom
+to share and change it. By contrast, the GNU General Public License is
+intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it in
+new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 1. This License applies to any program or other work which contains a
+ notice placed by the copyright holder saying it may be distributed
+ under the terms of this General Public License. The "Program",
+ below, refers to any such program or work, and a "work based on
+ the Program" means either the Program or any derivative work under
+ copyright law: that is to say, a work containing the Program or a
+ portion of it, either verbatim or with modifications and/or
+ translated into another language. (Hereinafter, translation is
+ included without limitation in the term "modification".) Each
+ licensee is addressed as "you".
+
+ Activities other than copying, distribution and modification are
+ not covered by this License; they are outside its scope. The act
+ of running the Program is not restricted, and the output from the
+ Program is covered only if its contents constitute a work based on
+ the Program (independent of having been made by running the
+ Program). Whether that is true depends on what the Program does.
+
+ 2. You may copy and distribute verbatim copies of the Program's
+ source code as you receive it, in any medium, provided that you
+ conspicuously and appropriately publish on each copy an appropriate
+ copyright notice and disclaimer of warranty; keep intact all the
+ notices that refer to this License and to the absence of any
+ warranty; and give any other recipients of the Program a copy of
+ this License along with the Program.
+
+ You may charge a fee for the physical act of transferring a copy,
+ and you may at your option offer warranty protection in exchange
+ for a fee.
+
+ 3. You may modify your copy or copies of the Program or any portion
+ of it, thus forming a work based on the Program, and copy and
+ distribute such modifications or work under the terms of Section 1
+ above, provided that you also meet all of these conditions:
+
+ a. You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b. You must cause any work that you distribute or publish, that
+ in whole or in part contains or is derived from the Program
+ or any part thereof, to be licensed as a whole at no charge
+ to all third parties under the terms of this License.
+
+ c. If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display
+ an announcement including an appropriate copyright notice and
+ a notice that there is no warranty (or else, saying that you
+ provide a warranty) and that users may redistribute the
+ program under these conditions, and telling the user how to
+ view a copy of this License. (Exception: if the Program
+ itself is interactive but does not normally print such an
+ announcement, your work based on the Program is not required
+ to print an announcement.)
+
+ These requirements apply to the modified work as a whole. If
+ identifiable sections of that work are not derived from the
+ Program, and can be reasonably considered independent and separate
+ works in themselves, then this License, and its terms, do not
+ apply to those sections when you distribute them as separate
+ works. But when you distribute the same sections as part of a
+ whole which is a work based on the Program, the distribution of
+ the whole must be on the terms of this License, whose permissions
+ for other licensees extend to the entire whole, and thus to each
+ and every part regardless of who wrote it.
+
+ Thus, it is not the intent of this section to claim rights or
+ contest your rights to work written entirely by you; rather, the
+ intent is to exercise the right to control the distribution of
+ derivative or collective works based on the Program.
+
+ In addition, mere aggregation of another work not based on the
+ Program with the Program (or with a work based on the Program) on
+ a volume of a storage or distribution medium does not bring the
+ other work under the scope of this License.
+
+ 4. You may copy and distribute the Program (or a work based on it,
+ under Section 2) in object code or executable form under the terms
+ of Sections 1 and 2 above provided that you also do one of the
+ following:
+
+ a. Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Sections 1 and 2 above on a medium customarily used for
+ software interchange; or,
+
+ b. Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a
+ medium customarily used for software interchange; or,
+
+ c. Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with
+ such an offer, in accord with Subsection b above.)
+
+ The source code for a work means the preferred form of the work for
+ making modifications to it. For an executable work, complete
+ source code means all the source code for all modules it contains,
+ plus any associated interface definition files, plus the scripts
+ used to control compilation and installation of the executable.
+ However, as a special exception, the source code distributed need
+ not include anything that is normally distributed (in either
+ source or binary form) with the major components (compiler,
+ kernel, and so on) of the operating system on which the executable
+ runs, unless that component itself accompanies the executable.
+
+ If distribution of executable or object code is made by offering
+ access to copy from a designated place, then offering equivalent
+ access to copy the source code from the same place counts as
+ distribution of the source code, even though third parties are not
+ compelled to copy the source along with the object code.
+
+ 5. You may not copy, modify, sublicense, or distribute the Program
+ except as expressly provided under this License. Any attempt
+ otherwise to copy, modify, sublicense or distribute the Program is
+ void, and will automatically terminate your rights under this
+ License. However, parties who have received copies, or rights,
+ from you under this License will not have their licenses
+ terminated so long as such parties remain in full compliance.
+
+ 6. You are not required to accept this License, since you have not
+ signed it. However, nothing else grants you permission to modify
+ or distribute the Program or its derivative works. These actions
+ are prohibited by law if you do not accept this License.
+ Therefore, by modifying or distributing the Program (or any work
+ based on the Program), you indicate your acceptance of this
+ License to do so, and all its terms and conditions for copying,
+ distributing or modifying the Program or works based on it.
+
+ 7. Each time you redistribute the Program (or any work based on the
+ Program), the recipient automatically receives a license from the
+ original licensor to copy, distribute or modify the Program
+ subject to these terms and conditions. You may not impose any
+ further restrictions on the recipients' exercise of the rights
+ granted herein. You are not responsible for enforcing compliance
+ by third parties to this License.
+
+ 8. If, as a consequence of a court judgment or allegation of patent
+ infringement or for any other reason (not limited to patent
+ issues), conditions are imposed on you (whether by court order,
+ agreement or otherwise) that contradict the conditions of this
+ License, they do not excuse you from the conditions of this
+ License. If you cannot distribute so as to satisfy simultaneously
+ your obligations under this License and any other pertinent
+ obligations, then as a consequence you may not distribute the
+ Program at all. For example, if a patent license would not permit
+ royalty-free redistribution of the Program by all those who
+ receive copies directly or indirectly through you, then the only
+ way you could satisfy both it and this License would be to refrain
+ entirely from distribution of the Program.
+
+ If any portion of this section is held invalid or unenforceable
+ under any particular circumstance, the balance of the section is
+ intended to apply and the section as a whole is intended to apply
+ in other circumstances.
+
+ It is not the purpose of this section to induce you to infringe any
+ patents or other property right claims or to contest validity of
+ any such claims; this section has the sole purpose of protecting
+ the integrity of the free software distribution system, which is
+ implemented by public license practices. Many people have made
+ generous contributions to the wide range of software distributed
+ through that system in reliance on consistent application of that
+ system; it is up to the author/donor to decide if he or she is
+ willing to distribute software through any other system and a
+ licensee cannot impose that choice.
+
+ This section is intended to make thoroughly clear what is believed
+ to be a consequence of the rest of this License.
+
+ 9. If the distribution and/or use of the Program is restricted in
+ certain countries either by patents or by copyrighted interfaces,
+ the original copyright holder who places the Program under this
+ License may add an explicit geographical distribution limitation
+ excluding those countries, so that distribution is permitted only
+ in or among countries not thus excluded. In such case, this
+ License incorporates the limitation as if written in the body of
+ this License.
+
+ 10. The Free Software Foundation may publish revised and/or new
+ versions of the General Public License from time to time. Such
+ new versions will be similar in spirit to the present version, but
+ may differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+ Program specifies a version number of this License which applies
+ to it and "any later version", you have the option of following
+ the terms and conditions either of that version or of any later
+ version published by the Free Software Foundation. If the Program
+ does not specify a version number of this License, you may choose
+ any version ever published by the Free Software Foundation.
+
+ 11. If you wish to incorporate parts of the Program into other free
+ programs whose distribution conditions are different, write to the
+ author to ask for permission. For software which is copyrighted
+ by the Free Software Foundation, write to the Free Software
+ Foundation; we sometimes make exceptions for this. Our decision
+ will be guided by the two goals of preserving the free status of
+ all derivatives of our free software and of promoting the sharing
+ and reuse of software generally.
+
+ NO WARRANTY
+
+ 12. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
+ WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
+ LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
+ WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
+ NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
+ QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
+ SERVICING, REPAIR OR CORRECTION.
+
+ 13. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+ WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
+ MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
+ LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+ INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
+ INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
+ OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
+ OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+Appendix: How to Apply These Terms to Your New Programs
+=======================================================
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
+ Copyright (C) 19YY NAME OF AUTHOR
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+ Also add information on how to contact you by electronic and paper
+mail.
+
+ If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+ The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+ You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the program,
+if necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ SIGNATURE OF TY COON, 1 April 1989
+ Ty Coon, President of Vice
+
+ This General Public License does not permit incorporating your
+program into proprietary programs. If your program is a subroutine
+library, you may consider it more useful to permit linking proprietary
+applications with the library. If this is what you want to do, use the
+GNU Library General Public License instead of this License.
+
+
+File: regex.info, Node: Index, Prev: Copying, Up: Top
+
+Index
+*****
+
+* Menu:
+
+* $: Match-end-of-line Operator.
+* (: Grouping Operators.
+* ): Grouping Operators.
+* *: Match-zero-or-more Operator.
+* +: Match-one-or-more Operator.
+* -: List Operators.
+* .: Match-any-character Operator.
+* :] in regex: Character Class Operators.
+* ?: Match-zero-or-one Operator.
+* {: Interval Operators.
+* }: Interval Operators.
+* [: in regex: Character Class Operators.
+* [^: List Operators.
+* [: List Operators.
+* \': Match-end-of-buffer Operator.
+* \<: Match-beginning-of-word Operator.
+* \>: Match-end-of-word Operator.
+* \{: Interval Operators.
+* \}: Interval Operators.
+* \b: Match-word-boundary Operator.
+* \B: Match-within-word Operator.
+* \s: Match-syntactic-class Operator.
+* \S: Match-not-syntactic-class Operator.
+* \w: Match-word-constituent Operator.
+* \W: Match-non-word-constituent Operator.
+* \`: Match-beginning-of-buffer Operator.
+* \: List Operators.
+* ]: List Operators.
+* ^: List Operators.
+* allocated initialization: GNU Regular Expression Compiling.
+* alternation operator: Alternation Operator.
+* alternation operator and ^: Match-beginning-of-line Operator.
+* anchoring: Anchoring Operators.
+* anchors: Match-end-of-line Operator.
+* anchors: Match-beginning-of-line Operator.
+* Awk: Predefined Syntaxes.
+* back references: Back-reference Operator.
+* backtracking: Match-zero-or-more Operator.
+* backtracking: Alternation Operator.
+* beginning-of-line operator: Match-beginning-of-line Operator.
+* bracket expression: List Operators.
+* buffer field, set by re_compile_pattern: GNU Regular Expression Compiling.
+* buffer initialization: GNU Regular Expression Compiling.
+* character classes: Character Class Operators.
+* Egrep: Predefined Syntaxes.
+* Emacs: Predefined Syntaxes.
+* end in struct re_registers: Using Registers.
+* end-of-line operator: Match-end-of-line Operator.
+* fastmap initialization: GNU Regular Expression Compiling.
+* fastmaps: Searching with Fastmaps.
+* fastmap_accurate field, set by re_compile_pattern: GNU Regular Expression Compiling.
+* Grep: Predefined Syntaxes.
+* grouping: Grouping Operators.
+* ignoring case: POSIX Regular Expression Compiling.
+* interval expression: Interval Operators.
+* matching list: List Operators.
+* matching newline: List Operators.
+* matching with GNU functions: GNU Matching.
+* newline_anchor field in pattern buffer: Match-beginning-of-line Operator.
+* nonmatching list: List Operators.
+* not_bol field in pattern buffer: Match-beginning-of-line Operator.
+* num_regs in struct re_registers: Using Registers.
+* open-group operator and ^: Match-beginning-of-line Operator.
+* or operator: Alternation Operator.
+* parenthesizing: Grouping Operators.
+* pattern buffer initialization: GNU Regular Expression Compiling.
+* pattern buffer, definition of: GNU Pattern Buffers.
+* POSIX Awk: Predefined Syntaxes.
+* range argument to re_search: GNU Searching.
+* regex.c: Overview.
+* regex.h: Overview.
+* regexp anchoring: Anchoring Operators.
+* regmatch_t: Using Byte Offsets.
+* regs_allocated: Using Registers.
+* REGS_FIXED: Using Registers.
+* REGS_REALLOCATE: Using Registers.
+* REGS_UNALLOCATED: Using Registers.
+* regular expressions, syntax of: Regular Expression Syntax.
+* REG_EXTENDED: POSIX Regular Expression Compiling.
+* REG_ICASE: POSIX Regular Expression Compiling.
+* REG_NEWLINE: POSIX Regular Expression Compiling.
+* REG_NOSUB: POSIX Regular Expression Compiling.
+* RE_BACKSLASH_ESCAPE_IN_LIST: Syntax Bits.
+* RE_BK_PLUS_QM: Syntax Bits.
+* RE_CHAR_CLASSES: Syntax Bits.
+* RE_CONTEXT_INDEP_ANCHORS: Syntax Bits.
+* RE_CONTEXT_INDEP_ANCHORS (and ^): Match-beginning-of-line Operator.
+* RE_CONTEXT_INDEP_OPS: Syntax Bits.
+* RE_CONTEXT_INVALID_OPS: Syntax Bits.
+* RE_DOT_NEWLINE: Syntax Bits.
+* RE_DOT_NOT_NULL: Syntax Bits.
+* RE_INTERVALS: Syntax Bits.
+* RE_LIMITED_OPS: Syntax Bits.
+* RE_NEWLINE_ALT: Syntax Bits.
+* RE_NO_BK_BRACES: Syntax Bits.
+* RE_NO_BK_PARENS: Syntax Bits.
+* RE_NO_BK_REFS: Syntax Bits.
+* RE_NO_BK_VBAR: Syntax Bits.
+* RE_NO_EMPTY_RANGES: Syntax Bits.
+* re_nsub field, set by re_compile_pattern: GNU Regular Expression Compiling.
+* re_pattern_buffer definition: GNU Pattern Buffers.
+* re_registers: Using Registers.
+* re_syntax_options initialization: GNU Regular Expression Compiling.
+* RE_UNMATCHED_RIGHT_PAREN_ORD: Syntax Bits.
+* searching with GNU functions: GNU Searching.
+* start argument to re_search: GNU Searching.
+* start in struct re_registers: Using Registers.
+* struct re_pattern_buffer definition: GNU Pattern Buffers.
+* subexpressions: Grouping Operators.
+* syntax field, set by re_compile_pattern: GNU Regular Expression Compiling.
+* syntax bits: Syntax Bits.
+* syntax initialization: GNU Regular Expression Compiling.
+* syntax of regular expressions: Regular Expression Syntax.
+* translate initialization: GNU Regular Expression Compiling.
+* used field, set by re_compile_pattern: GNU Regular Expression Compiling.
+* word boundaries, matching: Match-word-boundary Operator.
+* \: The Backslash Character.
+* \(: Grouping Operators.
+* \): Grouping Operators.
+* \|: Alternation Operator.
+* ^: Match-beginning-of-line Operator.
+* |: Alternation Operator.
+
+
+
+Tag Table:
+Node: Top1064
+Node: Overview4562
+Node: Regular Expression Syntax6746
+Node: Syntax Bits7916
+Node: Predefined Syntaxes14018
+Node: Collating Elements vs. Characters17872
+Node: The Backslash Character18835
+Node: Common Operators21992
+Node: Match-self Operator23445
+Node: Match-any-character Operator23941
+Node: Concatenation Operator24520
+Node: Repetition Operators25017
+Node: Match-zero-or-more Operator25436
+Node: Match-one-or-more Operator27483
+Node: Match-zero-or-one Operator28341
+Node: Interval Operators29196
+Node: Alternation Operator30991
+Node: List Operators32489
+Node: Character Class Operators35272
+Node: Range Operator36901
+Node: Grouping Operators38930
+Node: Back-reference Operator40251
+Node: Anchoring Operators43073
+Node: Match-beginning-of-line Operator43447
+Node: Match-end-of-line Operator44779
+Node: GNU Operators45518
+Node: Word Operators45767
+Node: Non-Emacs Syntax Tables46391
+Node: Match-word-boundary Operator47465
+Node: Match-within-word Operator47858
+Node: Match-beginning-of-word Operator48255
+Node: Match-end-of-word Operator48588
+Node: Match-word-constituent Operator48908
+Node: Match-non-word-constituent Operator49234
+Node: Buffer Operators49545
+Node: Match-beginning-of-buffer Operator49952
+Node: Match-end-of-buffer Operator50264
+Node: GNU Emacs Operators50558
+Node: Syntactic Class Operators50901
+Node: Emacs Syntax Tables51307
+Node: Match-syntactic-class Operator51963
+Node: Match-not-syntactic-class Operator52560
+Node: What Gets Matched?53150
+Node: Programming with Regex53799
+Node: GNU Regex Functions54237
+Node: GNU Pattern Buffers55078
+Node: GNU Regular Expression Compiling58303
+Node: GNU Matching61181
+Node: GNU Searching63101
+Node: Matching/Searching with Split Data64913
+Node: Searching with Fastmaps66369
+Node: GNU Translate Tables68921
+Node: Using Registers70892
+Node: Freeing GNU Pattern Buffers77000
+Node: POSIX Regex Functions77593
+Node: POSIX Pattern Buffers78266
+Node: POSIX Regular Expression Compiling78709
+Node: POSIX Matching82836
+Node: Reporting Errors84791
+Node: Using Byte Offsets86048
+Node: Freeing POSIX Pattern Buffers86861
+Node: BSD Regex Functions87467
+Node: BSD Regular Expression Compiling87886
+Node: BSD Searching89258
+Node: Copying89960
+Node: Index109122
+
+End Tag Table
diff --git a/gnu/lib/libregex/doc/regex.texi b/gnu/lib/libregex/doc/regex.texi
new file mode 100644
index 0000000..d93953e
--- /dev/null
+++ b/gnu/lib/libregex/doc/regex.texi
@@ -0,0 +1,3138 @@
+\input texinfo
+@c %**start of header
+@setfilename regex.info
+@settitle Regex
+@c %**end of header
+
+@c \\{fill-paragraph} works better (for me, anyway) if the text in the
+@c source file isn't indented.
+@paragraphindent 2
+
+@c Define a new index for our magic constants.
+@defcodeindex cn
+
+@c Put everything in one index (arbitrarily chosen to be the concept index).
+@syncodeindex cn cp
+@syncodeindex ky cp
+@syncodeindex pg cp
+@syncodeindex tp cp
+@syncodeindex vr cp
+
+@c Here is what we use in the Info `dir' file:
+@c * Regex: (regex). Regular expression library.
+
+
+@ifinfo
+This file documents the GNU regular expression library.
+
+Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of this
+manual provided the copyright notice and this permission notice are
+preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries a copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+@end ignore
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided also that the
+section entitled ``GNU General Public License'' is included exactly as
+in the original, and provided that the entire resulting derived work is
+distributed under the terms of a permission notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that the section entitled ``GNU General Public License'' may be
+included in a translation approved by the Free Software Foundation
+instead of in the original English.
+@end ifinfo
+
+
+@titlepage
+
+@title Regex
+@subtitle edition 0.12a
+@subtitle 19 September 1992
+@author Kathryn A. Hargreaves
+@author Karl Berry
+
+@page
+
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1992 Free Software Foundation.
+
+Permission is granted to make and distribute verbatim copies of this
+manual provided the copyright notice and this permission notice are
+preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided also that the
+section entitled ``GNU General Public License'' is included exactly as
+in the original, and provided that the entire resulting derived work is
+distributed under the terms of a permission notice identical to this
+one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that the section entitled ``GNU General Public License'' may be
+included in a translation approved by the Free Software Foundation
+instead of in the original English.
+
+@end titlepage
+
+
+@ifinfo
+@node Top, Overview, (dir), (dir)
+@top Regular Expression Library
+
+This manual documents how to program with the GNU regular expression
+library. This is edition 0.12a of the manual, 19 September 1992.
+
+The first part of this master menu lists the major nodes in this Info
+document, including the index. The rest of the menu lists all the
+lower level nodes in the document.
+
+@menu
+* Overview::
+* Regular Expression Syntax::
+* Common Operators::
+* GNU Operators::
+* GNU Emacs Operators::
+* What Gets Matched?::
+* Programming with Regex::
+* Copying:: Copying and sharing Regex.
+* Index:: General index.
+ --- The Detailed Node Listing ---
+
+Regular Expression Syntax
+
+* Syntax Bits::
+* Predefined Syntaxes::
+* Collating Elements vs. Characters::
+* The Backslash Character::
+
+Common Operators
+
+* Match-self Operator:: Ordinary characters.
+* Match-any-character Operator:: .
+* Concatenation Operator:: Juxtaposition.
+* Repetition Operators:: * + ? @{@}
+* Alternation Operator:: |
+* List Operators:: [...] [^...]
+* Grouping Operators:: (...)
+* Back-reference Operator:: \digit
+* Anchoring Operators:: ^ $
+
+Repetition Operators
+
+* Match-zero-or-more Operator:: *
+* Match-one-or-more Operator:: +
+* Match-zero-or-one Operator:: ?
+* Interval Operators:: @{@}
+
+List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})
+
+* Character Class Operators:: [:class:]
+* Range Operator:: start-end
+
+Anchoring Operators
+
+* Match-beginning-of-line Operator:: ^
+* Match-end-of-line Operator:: $
+
+GNU Operators
+
+* Word Operators::
+* Buffer Operators::
+
+Word Operators
+
+* Non-Emacs Syntax Tables::
+* Match-word-boundary Operator:: \b
+* Match-within-word Operator:: \B
+* Match-beginning-of-word Operator:: \<
+* Match-end-of-word Operator:: \>
+* Match-word-constituent Operator:: \w
+* Match-non-word-constituent Operator:: \W
+
+Buffer Operators
+
+* Match-beginning-of-buffer Operator:: \`
+* Match-end-of-buffer Operator:: \'
+
+GNU Emacs Operators
+
+* Syntactic Class Operators::
+
+Syntactic Class Operators
+
+* Emacs Syntax Tables::
+* Match-syntactic-class Operator:: \sCLASS
+* Match-not-syntactic-class Operator:: \SCLASS
+
+Programming with Regex
+
+* GNU Regex Functions::
+* POSIX Regex Functions::
+* BSD Regex Functions::
+
+GNU Regex Functions
+
+* GNU Pattern Buffers:: The re_pattern_buffer type.
+* GNU Regular Expression Compiling:: re_compile_pattern ()
+* GNU Matching:: re_match ()
+* GNU Searching:: re_search ()
+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
+* Searching with Fastmaps:: re_compile_fastmap ()
+* GNU Translate Tables:: The `translate' field.
+* Using Registers:: The re_registers type and related fns.
+* Freeing GNU Pattern Buffers:: regfree ()
+
+POSIX Regex Functions
+
+* POSIX Pattern Buffers:: The regex_t type.
+* POSIX Regular Expression Compiling:: regcomp ()
+* POSIX Matching:: regexec ()
+* Reporting Errors:: regerror ()
+* Using Byte Offsets:: The regmatch_t type.
+* Freeing POSIX Pattern Buffers:: regfree ()
+
+BSD Regex Functions
+
+* BSD Regular Expression Compiling:: re_comp ()
+* BSD Searching:: re_exec ()
+@end menu
+@end ifinfo
+@node Overview, Regular Expression Syntax, Top, Top
+@chapter Overview
+
+A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text
+string that describes some (mathematical) set of strings. A regexp
+@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of
+strings described by @var{r}.
+
+Using the Regex library, you can:
+
+@itemize @bullet
+
+@item
+see if a string matches a specified pattern as a whole, and
+
+@item
+search within a string for a substring matching a specified pattern.
+
+@end itemize
+
+Some regular expressions match only one string, i.e., the set they
+describe has only one member. For example, the regular expression
+@samp{foo} matches the string @samp{foo} and no others. Other regular
+expressions match more than one string, i.e., the set they describe has
+more than one member. For example, the regular expression @samp{f*}
+matches the set of strings made up of any number (including zero) of
+@samp{f}s. As you can see, some characters in regular expressions match
+themselves (such as @samp{f}) and some don't (such as @samp{*}); the
+ones that don't match themselves instead let you specify patterns that
+describe many different strings.
+
+To either match or search for a regular expression with the Regex
+library functions, you must first compile it with a Regex pattern
+compiling function. A @dfn{compiled pattern} is a regular expression
+converted to the internal format used by the library functions. Once
+you've compiled a pattern, you can use it for matching or searching any
+number of times.
+
+The Regex library consists of two source files: @file{regex.h} and
+@file{regex.c}.
+@pindex regex.h
+@pindex regex.c
+Regex provides three groups of functions with which you can operate on
+regular expressions. One group---the @sc{gnu} group---is more powerful
+but not completely compatible with the other two, namely the @sc{posix}
+and Berkeley @sc{unix} groups; its interface was designed specifically
+for @sc{gnu}. The other groups have the same interfaces as do the
+regular expression functions in @sc{posix} and Berkeley
+@sc{unix}.
+
+We wrote this chapter with programmers in mind, not users of
+programs---such as Emacs---that use Regex. We describe the Regex
+library in its entirety, not how to write regular expressions that a
+particular program understands.
+
+
+@node Regular Expression Syntax, Common Operators, Overview, Top
+@chapter Regular Expression Syntax
+
+@cindex regular expressions, syntax of
+@cindex syntax of regular expressions
+
+@dfn{Characters} are things you can type. @dfn{Operators} are things in
+a regular expression that match one or more characters. You compose
+regular expressions from operators, which in turn you specify using one
+or more characters.
+
+Most characters represent what we call the match-self operator, i.e.,
+they match themselves; we call these characters @dfn{ordinary}. Other
+characters represent either all or parts of fancier operators; e.g.,
+@samp{.} represents what we call the match-any-character operator
+(which, no surprise, matches (almost) any character); we call these
+characters @dfn{special}. Two different things determine what
+characters represent what operators:
+
+@enumerate
+@item
+the regular expression syntax your program has told the Regex library to
+recognize, and
+
+@item
+the context of the character in the regular expression.
+@end enumerate
+
+In the following sections, we describe these things in more detail.
+
+@menu
+* Syntax Bits::
+* Predefined Syntaxes::
+* Collating Elements vs. Characters::
+* The Backslash Character::
+@end menu
+
+
+@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax
+@section Syntax Bits
+
+@cindex syntax bits
+
+In any particular syntax for regular expressions, some characters are
+always special, others are sometimes special, and others are never
+special. The particular syntax that Regex recognizes for a given
+regular expression depends on the value in the @code{syntax} field of
+the pattern buffer of that regular expression.
+
+You get a pattern buffer by compiling a regular expression. @xref{GNU
+Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information
+on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX
+Regular Expression Compiling}, and @ref{BSD Regular Expression
+Compiling}, for more information on compiling.
+
+Regex considers the value of the @code{syntax} field to be a collection
+of bits; we refer to these bits as @dfn{syntax bits}. In most cases,
+they affect what characters represent what operators. We describe the
+meanings of the operators to which we refer in @ref{Common Operators},
+@ref{GNU Operators}, and @ref{GNU Emacs Operators}.
+
+For reference, here is the complete list of syntax bits, in alphabetical
+order:
+
+@table @code
+
+@cnindex RE_BACKSLASH_ESCAPE_IN_LIST
+@item RE_BACKSLASH_ESCAPE_IN_LISTS
+If this bit is set, then @samp{\} inside a list (@pxref{List Operators}
+quotes (makes ordinary, if it's special) the following character; if
+this bit isn't set, then @samp{\} is an ordinary character inside lists.
+(@xref{The Backslash Character}, for what `\' does outside of lists.)
+
+@cnindex RE_BK_PLUS_QM
+@item RE_BK_PLUS_QM
+If this bit is set, then @samp{\+} represents the match-one-or-more
+operator and @samp{\?} represents the match-zero-or-more operator; if
+this bit isn't set, then @samp{+} represents the match-one-or-more
+operator and @samp{?} represents the match-zero-or-one operator. This
+bit is irrelevant if @code{RE_LIMITED_OPS} is set.
+
+@cnindex RE_CHAR_CLASSES
+@item RE_CHAR_CLASSES
+If this bit is set, then you can use character classes in lists; if this
+bit isn't set, then you can't.
+
+@cnindex RE_CONTEXT_INDEP_ANCHORS
+@item RE_CONTEXT_INDEP_ANCHORS
+If this bit is set, then @samp{^} and @samp{$} are special anywhere outside
+a list; if this bit isn't set, then these characters are special only in
+certain contexts. @xref{Match-beginning-of-line Operator}, and
+@ref{Match-end-of-line Operator}.
+
+@cnindex RE_CONTEXT_INDEP_OPS
+@item RE_CONTEXT_INDEP_OPS
+If this bit is set, then certain characters are special anywhere outside
+a list; if this bit isn't set, then those characters are special only in
+some contexts and are ordinary elsewhere. Specifically, if this bit
+isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS}
+isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending
+on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators
+only if they're not first in a regular expression or just after an
+open-group or alternation operator. The same holds for @samp{@{} (or
+@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if
+it is the beginning of a valid interval and the syntax bit
+@code{RE_INTERVALS} is set.
+
+@cnindex RE_CONTEXT_INVALID_OPS
+@item RE_CONTEXT_INVALID_OPS
+If this bit is set, then repetition and alternation operators can't be
+in certain positions within a regular expression. Specifically, the
+regular expression is invalid if it has:
+
+@itemize @bullet
+
+@item
+a repetition operator first in the regular expression or just after a
+match-beginning-of-line, open-group, or alternation operator; or
+
+@item
+an alternation operator first or last in the regular expression, just
+before a match-end-of-line operator, or just after an alternation or
+open-group operator.
+
+@end itemize
+
+If this bit isn't set, then you can put the characters representing the
+repetition and alternation characters anywhere in a regular expression.
+Whether or not they will in fact be operators in certain positions
+depends on other syntax bits.
+
+@cnindex RE_DOT_NEWLINE
+@item RE_DOT_NEWLINE
+If this bit is set, then the match-any-character operator matches
+a newline; if this bit isn't set, then it doesn't.
+
+@cnindex RE_DOT_NOT_NULL
+@item RE_DOT_NOT_NULL
+If this bit is set, then the match-any-character operator doesn't match
+a null character; if this bit isn't set, then it does.
+
+@cnindex RE_INTERVALS
+@item RE_INTERVALS
+If this bit is set, then Regex recognizes interval operators; if this bit
+isn't set, then it doesn't.
+
+@cnindex RE_LIMITED_OPS
+@item RE_LIMITED_OPS
+If this bit is set, then Regex doesn't recognize the match-one-or-more,
+match-zero-or-one or alternation operators; if this bit isn't set, then
+it does.
+
+@cnindex RE_NEWLINE_ALT
+@item RE_NEWLINE_ALT
+If this bit is set, then newline represents the alternation operator; if
+this bit isn't set, then newline is ordinary.
+
+@cnindex RE_NO_BK_BRACES
+@item RE_NO_BK_BRACES
+If this bit is set, then @samp{@{} represents the open-interval operator
+and @samp{@}} represents the close-interval operator; if this bit isn't
+set, then @samp{\@{} represents the open-interval operator and
+@samp{\@}} represents the close-interval operator. This bit is relevant
+only if @code{RE_INTERVALS} is set.
+
+@cnindex RE_NO_BK_PARENS
+@item RE_NO_BK_PARENS
+If this bit is set, then @samp{(} represents the open-group operator and
+@samp{)} represents the close-group operator; if this bit isn't set, then
+@samp{\(} represents the open-group operator and @samp{\)} represents
+the close-group operator.
+
+@cnindex RE_NO_BK_REFS
+@item RE_NO_BK_REFS
+If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as
+the back reference operator; if this bit isn't set, then it does.
+
+@cnindex RE_NO_BK_VBAR
+@item RE_NO_BK_VBAR
+If this bit is set, then @samp{|} represents the alternation operator;
+if this bit isn't set, then @samp{\|} represents the alternation
+operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set.
+
+@cnindex RE_NO_EMPTY_RANGES
+@item RE_NO_EMPTY_RANGES
+If this bit is set, then a regular expression with a range whose ending
+point collates lower than its starting point is invalid; if this bit
+isn't set, then Regex considers such a range to be empty.
+
+@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD
+@item RE_UNMATCHED_RIGHT_PAREN_ORD
+If this bit is set and the regular expression has no matching open-group
+operator, then Regex considers what would otherwise be a close-group
+operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}.
+
+@end table
+
+
+@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax
+@section Predefined Syntaxes
+
+If you're programming with Regex, you can set a pattern buffer's
+(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers})
+@code{syntax} field either to an arbitrary combination of syntax bits
+(@pxref{Syntax Bits}) or else to the configurations defined by Regex.
+These configurations define the syntaxes used by certain
+programs---@sc{gnu} Emacs,
+@cindex Emacs
+@sc{posix} Awk,
+@cindex POSIX Awk
+traditional Awk,
+@cindex Awk
+Grep,
+@cindex Grep
+@cindex Egrep
+Egrep---in addition to syntaxes for @sc{posix} basic and extended
+regular expressions.
+
+The predefined syntaxes--taken directly from @file{regex.h}---are:
+
+@example
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+@end example
+
+@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax
+@section Collating Elements vs.@: Characters
+
+@sc{posix} generalizes the notion of a character to that of a
+collating element. It defines a @dfn{collating element} to be ``a
+sequence of one or more bytes defined in the current collating sequence
+as a unit of collation.''
+
+This generalizes the notion of a character in
+two ways. First, a single character can map into two or more collating
+elements. For example, the German
+@tex
+`\ss'
+@end tex
+@ifinfo
+``es-zet''
+@end ifinfo
+collates as the collating element @samp{s} followed by another collating
+element @samp{s}. Second, two or more characters can map into one
+collating element. For example, the Spanish @samp{ll} collates after
+@samp{l} and before @samp{m}.
+
+Since @sc{posix}'s ``collating element'' preserves the essential idea of
+a ``character,'' we use the latter, more familiar, term in this document.
+
+@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax
+@section The Backslash Character
+
+@cindex \
+The @samp{\} character has one of four different meanings, depending on
+the context in which you use it and what syntax bits are set
+(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next
+character, 3) introduce an operator, or 4) do nothing.
+
+@enumerate
+@item
+It stands for itself inside a list
+(@pxref{List Operators}) if the syntax bit
+@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]}
+would match @samp{\}.
+
+@item
+It quotes (makes ordinary, if it's special) the next character when you
+use it either:
+
+@itemize @bullet
+@item
+outside a list,@footnote{Sometimes
+you don't have to explicitly quote special characters to make
+them ordinary. For instance, most characters lose any special meaning
+inside a list (@pxref{List Operators}). In addition, if the syntax bits
+@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS}
+aren't set, then (for historical reasons) the matcher considers special
+characters ordinary if they are in contexts where the operations they
+represent make no sense; for example, then the match-zero-or-more
+operator (represented by @samp{*}) matches itself in the regular
+expression @samp{*foo} because there is no preceding expression on which
+it can operate. It is poor practice, however, to depend on this
+behavior; if you want a special character to be ordinary outside a list,
+it's better to always quote it, regardless.} or
+
+@item
+inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set.
+
+@end itemize
+
+@item
+It introduces an operator when followed by certain ordinary
+characters---sometimes only when certain syntax bits are set. See the
+cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR},
+@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also:
+
+@itemize @bullet
+@item
+@samp{\b} represents the match-word-boundary operator
+(@pxref{Match-word-boundary Operator}).
+
+@item
+@samp{\B} represents the match-within-word operator
+(@pxref{Match-within-word Operator}).
+
+@item
+@samp{\<} represents the match-beginning-of-word operator @*
+(@pxref{Match-beginning-of-word Operator}).
+
+@item
+@samp{\>} represents the match-end-of-word operator
+(@pxref{Match-end-of-word Operator}).
+
+@item
+@samp{\w} represents the match-word-constituent operator
+(@pxref{Match-word-constituent Operator}).
+
+@item
+@samp{\W} represents the match-non-word-constituent operator
+(@pxref{Match-non-word-constituent Operator}).
+
+@item
+@samp{\`} represents the match-beginning-of-buffer
+operator and @samp{\'} represents the match-end-of-buffer operator
+(@pxref{Buffer Operators}).
+
+@item
+If Regex was compiled with the C preprocessor symbol @code{emacs}
+defined, then @samp{\s@var{class}} represents the match-syntactic-class
+operator and @samp{\S@var{class}} represents the
+match-not-syntactic-class operator (@pxref{Syntactic Class Operators}).
+
+@end itemize
+
+@item
+In all other cases, Regex ignores @samp{\}. For example,
+@samp{\n} matches @samp{n}.
+
+@end enumerate
+
+@node Common Operators, GNU Operators, Regular Expression Syntax, Top
+@chapter Common Operators
+
+You compose regular expressions from operators. In the following
+sections, we describe the regular expression operators specified by
+@sc{posix}; @sc{gnu} also uses these. Most operators have more than one
+representation as characters. @xref{Regular Expression Syntax}, for
+what characters represent what operators under what circumstances.
+
+For most operators that can be represented in two ways, one
+representation is a single character and the other is that character
+preceded by @samp{\}. For example, either @samp{(} or @samp{\(}
+represents the open-group operator. Which one does depends on the
+setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is
+this so? Historical reasons dictate some of the varying
+representations, while @sc{posix} dictates others.
+
+Finally, almost all characters lose any special meaning inside a list
+(@pxref{List Operators}).
+
+@menu
+* Match-self Operator:: Ordinary characters.
+* Match-any-character Operator:: .
+* Concatenation Operator:: Juxtaposition.
+* Repetition Operators:: * + ? @{@}
+* Alternation Operator:: |
+* List Operators:: [...] [^...]
+* Grouping Operators:: (...)
+* Back-reference Operator:: \digit
+* Anchoring Operators:: ^ $
+@end menu
+
+@node Match-self Operator, Match-any-character Operator, , Common Operators
+@section The Match-self Operator (@var{ordinary character})
+
+This operator matches the character itself. All ordinary characters
+(@pxref{Regular Expression Syntax}) represent this operator. For
+example, @samp{f} is always an ordinary character, so the regular
+expression @samp{f} matches only the string @samp{f}. In
+particular, it does @emph{not} match the string @samp{ff}.
+
+@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators
+@section The Match-any-character Operator (@code{.})
+
+@cindex @samp{.}
+
+This operator matches any single printing or nonprinting character
+except it won't match a:
+
+@table @asis
+@item newline
+if the syntax bit @code{RE_DOT_NEWLINE} isn't set.
+
+@item null
+if the syntax bit @code{RE_DOT_NOT_NULL} is set.
+
+@end table
+
+The @samp{.} (period) character represents this operator. For example,
+@samp{a.b} matches any three-character string beginning with @samp{a}
+and ending with @samp{b}.
+
+@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators
+@section The Concatenation Operator
+
+This operator concatenates two regular expressions @var{a} and @var{b}.
+No character represents this operator; you simply put @var{b} after
+@var{a}. The result is a regular expression that will match a string if
+@var{a} matches its first part and @var{b} matches the rest. For
+example, @samp{xy} (two match-self operators) matches @samp{xy}.
+
+@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators
+@section Repetition Operators
+
+Repetition operators repeat the preceding regular expression a specified
+number of times.
+
+@menu
+* Match-zero-or-more Operator:: *
+* Match-one-or-more Operator:: +
+* Match-zero-or-one Operator:: ?
+* Interval Operators:: @{@}
+@end menu
+
+@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators
+@subsection The Match-zero-or-more Operator (@code{*})
+
+@cindex @samp{*}
+
+This operator repeats the smallest possible preceding regular expression
+as many times as necessary (including zero) to match the pattern.
+@samp{*} represents this operator. For example, @samp{o*}
+matches any string made up of zero or more @samp{o}s. Since this
+operator operates on the smallest preceding regular expression,
+@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So,
+@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on.
+
+Since the match-zero-or-more operator is a suffix operator, it may be
+useless as such when no regular expression precedes it. This is the
+case when it:
+
+@itemize @bullet
+@item
+is first in a regular expression, or
+
+@item
+follows a match-beginning-of-line, open-group, or alternation
+operator.
+
+@end itemize
+
+@noindent
+Three different things can happen in these cases:
+
+@enumerate
+@item
+If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the
+regular expression is invalid.
+
+@item
+If @code{RE_CONTEXT_INVALID_OPS} isn't set, but
+@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the
+match-zero-or-more operator (which then operates on the empty string).
+
+@item
+Otherwise, @samp{*} is ordinary.
+
+@end enumerate
+
+@cindex backtracking
+The matcher processes a match-zero-or-more operator by first matching as
+many repetitions of the smallest preceding regular expression as it can.
+Then it continues to match the rest of the pattern.
+
+If it can't match the rest of the pattern, it backtracks (as many times
+as necessary), each time discarding one of the matches until it can
+either match the entire pattern or be certain that it cannot get a
+match. For example, when matching @samp{ca*ar} against @samp{caaar},
+the matcher first matches all three @samp{a}s of the string with the
+@samp{a*} of the regular expression. However, it cannot then match the
+final @samp{ar} of the regular expression against the final @samp{r} of
+the string. So it backtracks, discarding the match of the last @samp{a}
+in the string. It can then match the remaining @samp{ar}.
+
+
+@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators
+@subsection The Match-one-or-more Operator (@code{+} or @code{\+})
+
+@cindex @samp{+}
+
+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize
+this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't
+set, then @samp{+} represents this operator; if it is, then @samp{\+}
+does.
+
+This operator is similar to the match-zero-or-more operator except that
+it repeats the preceding regular expression at least once;
+@pxref{Match-zero-or-more Operator}, for what it operates on, how some
+syntax bits affect it, and how Regex backtracks to match it.
+
+For example, supposing that @samp{+} represents the match-one-or-more
+operator; then @samp{ca+r} matches, e.g., @samp{car} and
+@samp{caaaar}, but not @samp{cr}.
+
+@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators
+@subsection The Match-zero-or-one Operator (@code{?} or @code{\?})
+@cindex @samp{?}
+
+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't
+recognize this operator. Otherwise, if the syntax bit
+@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator;
+if it is, then @samp{\?} does.
+
+This operator is similar to the match-zero-or-more operator except that
+it repeats the preceding regular expression once or not at all;
+@pxref{Match-zero-or-more Operator}, to see what it operates on, how
+some syntax bits affect it, and how Regex backtracks to match it.
+
+For example, supposing that @samp{?} represents the match-zero-or-one
+operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but
+nothing else.
+
+@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators
+@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}})
+
+@cindex interval expression
+@cindex @samp{@{}
+@cindex @samp{@}}
+@cindex @samp{\@{}
+@cindex @samp{\@}}
+
+If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes
+@dfn{interval expressions}. They repeat the smallest possible preceding
+regular expression a specified number of times.
+
+If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents
+the @dfn{open-interval operator} and @samp{@}} represents the
+@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do.
+
+Specifically, supposing that @samp{@{} and @samp{@}} represent the
+open-interval and close-interval operators; then:
+
+@table @code
+@item @{@var{count}@}
+matches exactly @var{count} occurrences of the preceding regular
+expression.
+
+@item @{@var{min,}@}
+matches @var{min} or more occurrences of the preceding regular
+expression.
+
+@item @{@var{min, max}@}
+matches at least @var{min} but no more than @var{max} occurrences of
+the preceding regular expression.
+
+@end table
+
+The interval expression (but not necessarily the regular expression that
+contains it) is invalid if:
+
+@itemize @bullet
+@item
+@var{min} is greater than @var{max}, or
+
+@item
+any of @var{count}, @var{min}, or @var{max} are outside the range
+zero to @code{RE_DUP_MAX} (which symbol @file{regex.h}
+defines).
+
+@end itemize
+
+If the interval expression is invalid and the syntax bit
+@code{RE_NO_BK_BRACES} is set, then Regex considers all the
+characters in the would-be interval to be ordinary. If that bit
+isn't set, then the regular expression is invalid.
+
+If the interval expression is valid but there is no preceding regular
+expression on which to operate, then if the syntax bit
+@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid.
+If that bit isn't set, then Regex considers all the characters---other
+than backslashes, which it ignores---in the would-be interval to be
+ordinary.
+
+
+@node Alternation Operator, List Operators, Repetition Operators, Common Operators
+@section The Alternation Operator (@code{|} or @code{\|})
+
+@kindex |
+@kindex \|
+@cindex alternation operator
+@cindex or operator
+
+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't
+recognize this operator. Otherwise, if the syntax bit
+@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator;
+otherwise, @samp{\|} does.
+
+Alternatives match one of a choice of regular expressions:
+if you put the character(s) representing the alternation operator between
+any two regular expressions @var{a} and @var{b}, the result matches
+the union of the strings that @var{a} and @var{b} match. For
+example, supposing that @samp{|} is the alternation operator, then
+@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or
+@samp{quux}.
+
+@ignore
+@c Nobody needs to disallow empty alternatives any more.
+If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular
+expressions @var{a} or @var{b} is empty, the
+regular expression is invalid. More precisely, if this syntax bit is
+set, then the alternation operator can't:
+
+@itemize @bullet
+@item
+be first or last in a regular expression;
+
+@item
+follow either another alternation operator or an open-group operator
+(@pxref{Grouping Operators}); or
+
+@item
+precede a close-group operator.
+
+@end itemize
+
+@noindent
+For example, supposing @samp{(} and @samp{)} represent the open and
+close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar},
+@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid.
+@end ignore
+
+The alternation operator operates on the @emph{largest} possible
+surrounding regular expressions. (Put another way, it has the lowest
+precedence of any regular expression operator.)
+Thus, the only way you can
+delimit its arguments is to use grouping. For example, if @samp{(} and
+@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar}
+would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would
+match @samp{foo} or @samp{bar}.)
+
+@cindex backtracking
+The matcher usually tries all combinations of alternatives so as to
+match the longest possible string. For example, when matching
+@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot
+take, say, the first (``depth-first'') combination it could match, since
+then it would be content to match just @samp{fooqbar}.
+
+@comment xx something about leftmost-longest
+
+
+@node List Operators, Grouping Operators, Alternation Operator, Common Operators
+@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})
+
+@cindex matching list
+@cindex @samp{[}
+@cindex @samp{]}
+@cindex @samp{^}
+@cindex @samp{-}
+@cindex @samp{\}
+@cindex @samp{[^}
+@cindex nonmatching list
+@cindex matching newline
+@cindex bracket expression
+
+@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or
+more items. An @dfn{item} is a character,
+@ignore
+(These get added when they get implemented.)
+a collating symbol, an equivalence class expression,
+@end ignore
+a character class expression, or a range expression. The syntax bits
+affect which kinds of items you can put in a list. We explain the last
+two items in subsections below. Empty lists are invalid.
+
+A @dfn{matching list} matches a single character represented by one of
+the list items. You form a matching list by enclosing one or more items
+within an @dfn{open-matching-list operator} (represented by @samp{[})
+and a @dfn{close-list operator} (represented by @samp{]}).
+
+For example, @samp{[ab]} matches either @samp{a} or @samp{b}.
+@samp{[ad]*} matches the empty string and any string composed of just
+@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular
+expression with a @samp{[} but no matching
+@samp{]}.
+
+@dfn{Nonmatching lists} are similar to matching lists except that they
+match a single character @emph{not} represented by one of the list
+items. You use an @dfn{open-nonmatching-list operator} (represented by
+@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be
+the first character in the list. If you put a @samp{^} character first
+in (what you think is) a matching list, you'll turn it into a
+nonmatching list.}) instead of an open-matching-list operator to start a
+nonmatching list.
+
+For example, @samp{[^ab]} matches any character except @samp{a} or
+@samp{b}.
+
+If the @code{posix_newline} field in the pattern buffer (@pxref{GNU
+Pattern Buffers} is set, then nonmatching lists do not match a newline.
+
+Most characters lose any special meaning inside a list. The special
+characters inside a list follow.
+
+@table @samp
+@item ]
+ends the list if it's not the first list item. So, if you want to make
+the @samp{]} character a list item, you must put it first.
+
+@item \
+quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is
+set.
+
+@ignore
+Put these in if they get implemented.
+
+@item [.
+represents the open-collating-symbol operator (@pxref{Collating Symbol
+Operators}).
+
+@item .]
+represents the close-collating-symbol operator.
+
+@item [=
+represents the open-equivalence-class operator (@pxref{Equivalence Class
+Operators}).
+
+@item =]
+represents the close-equivalence-class operator.
+
+@end ignore
+
+@item [:
+represents the open-character-class operator (@pxref{Character Class
+Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what
+follows is a valid character class expression.
+
+@item :]
+represents the close-character-class operator if the syntax bit
+@code{RE_CHAR_CLASSES} is set and what precedes it is an
+open-character-class operator followed by a valid character class name.
+
+@item -
+represents the range operator (@pxref{Range Operator}) if it's
+not first or last in a list or the ending point of a range.
+
+@end table
+
+@noindent
+All other characters are ordinary. For example, @samp{[.*]} matches
+@samp{.} and @samp{*}.
+
+@menu
+* Character Class Operators:: [:class:]
+* Range Operator:: start-end
+@end menu
+
+@ignore
+(If collating symbols and equivalence class expressions get implemented,
+then add this.)
+
+node Collating Symbol Operators
+subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]})
+
+If the syntax bit @code{XX} is set, then you can represent
+collating symbols inside lists. You form a @dfn{collating symbol} by
+putting a collating element between an @dfn{open-collating-symbol
+operator} and an @dfn{close-collating-symbol operator}. @samp{[.}
+represents the open-collating-symbol operator and @samp{.]} represents
+the close-collating-symbol operator. For example, if @samp{ll} is a
+collating element, then @samp{[[.ll.]]} would match @samp{ll}.
+
+node Equivalence Class Operators
+subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]})
+@cindex equivalence class expression in regex
+@cindex @samp{[=} in regex
+@cindex @samp{=]} in regex
+
+If the syntax bit @code{XX} is set, then Regex recognizes equivalence class
+expressions inside lists. A @dfn{equivalence class expression} is a set
+of collating elements which all belong to the same equivalence class.
+You form an equivalence class expression by putting a collating
+element between an @dfn{open-equivalence-class operator} and a
+@dfn{close-equivalence-class operator}. @samp{[=} represents the
+open-equivalence-class operator and @samp{=]} represents the
+close-equivalence-class operator. For example, if @samp{a} and @samp{A}
+were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]}
+would match both @samp{a} and @samp{A}. If the collating element in an
+equivalence class expression isn't part of an equivalence class, then
+the matcher considers the equivalence class expression to be a collating
+symbol.
+
+@end ignore
+
+@node Character Class Operators, Range Operator, , List Operators
+@subsection Character Class Operators (@code{[:} @dots{} @code{:]})
+
+@cindex character classes
+@cindex @samp{[:} in regex
+@cindex @samp{:]} in regex
+
+If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex
+recognizes character class expressions inside lists. A @dfn{character
+class expression} matches one character from a given class. You form a
+character class expression by putting a character class name between an
+@dfn{open-character-class operator} (represented by @samp{[:}) and a
+@dfn{close-character-class operator} (represented by @samp{:]}). The
+character class names and their meanings are:
+
+@table @code
+
+@item alnum
+letters and digits
+
+@item alpha
+letters
+
+@item blank
+system-dependent; for @sc{gnu}, a space or tab
+
+@item cntrl
+control characters (in the @sc{ascii} encoding, code 0177 and codes
+less than 040)
+
+@item digit
+digits
+
+@item graph
+same as @code{print} except omits space
+
+@item lower
+lowercase letters
+
+@item print
+printable characters (in the @sc{ascii} encoding, space
+tilde---codes 040 through 0176)
+
+@item punct
+neither control nor alphanumeric characters
+
+@item space
+space, carriage return, newline, vertical tab, and form feed
+
+@item upper
+uppercase letters
+
+@item xdigit
+hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F}
+
+@end table
+
+@noindent
+These correspond to the definitions in the C library's @file{<ctype.h>}
+facility. For example, @samp{[:alpha:]} corresponds to the standard
+facility @code{isalpha}. Regex recognizes character class expressions
+only inside of lists; so @samp{[[:alpha:]]} matches any letter, but
+@samp{[:alpha:]} outside of a bracket expression and not followed by a
+repetition operator matches just itself.
+
+@node Range Operator, , Character Class Operators, List Operators
+@subsection The Range Operator (@code{-})
+
+Regex recognizes @dfn{range expressions} inside a list. They represent
+those characters
+that fall between two elements in the current collating sequence. You
+form a range expression by putting a @dfn{range operator} between two
+@ignore
+(If these get implemented, then substitute this for ``characters.'')
+of any of the following: characters, collating elements, collating symbols,
+and equivalence class expressions. The starting point of the range and
+the ending point of the range don't have to be the same kind of item,
+e.g., the starting point could be a collating element and the ending
+point could be an equivalence class expression. If a range's ending
+point is an equivalence class, then all the collating elements in that
+class will be in the range.
+@end ignore
+characters.@footnote{You can't use a character class for the starting
+or ending point of a range, since a character class is not a single
+character.} @samp{-} represents the range operator. For example,
+@samp{a-f} within a list represents all the characters from @samp{a}
+through @samp{f}
+inclusively.
+
+If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's
+ending point collates less than its starting point, the range (and the
+regular expression containing it) is invalid. For example, the regular
+expression @samp{[z-a]} would be invalid. If this bit isn't set, then
+Regex considers such a range to be empty.
+
+Since @samp{-} represents the range operator, if you want to make a
+@samp{-} character itself
+a list item, you must do one of the following:
+
+@itemize @bullet
+@item
+Put the @samp{-} either first or last in the list.
+
+@item
+Include a range whose starting point collates strictly lower than
+@samp{-} and whose ending point collates equal or higher. Unless a
+range is the first item in a list, a @samp{-} can't be its starting
+point, but @emph{can} be its ending point. That is because Regex
+considers @samp{-} to be the range operator unless it is preceded by
+another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)},
+@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are
+contiguous characters in the collating sequence. You might think that
+@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it
+has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so
+it matches, e.g., @samp{,}, not @samp{.}.
+
+@item
+Put a range whose starting point is @samp{-} first in the list.
+
+@end itemize
+
+For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in
+English, in @sc{ascii}).
+
+
+@node Grouping Operators, Back-reference Operator, List Operators, Common Operators
+@section Grouping Operators (@code{(} @dots{} @code{)} or @code{\(} @dots{} @code{\)})
+
+@kindex (
+@kindex )
+@kindex \(
+@kindex \)
+@cindex grouping
+@cindex subexpressions
+@cindex parenthesizing
+
+A @dfn{group}, also known as a @dfn{subexpression}, consists of an
+@dfn{open-group operator}, any number of other operators, and a
+@dfn{close-group operator}. Regex treats this sequence as a unit, just
+as mathematics and programming languages treat a parenthesized
+expression as a unit.
+
+Therefore, using @dfn{groups}, you can:
+
+@itemize @bullet
+@item
+delimit the argument(s) to an alternation operator (@pxref{Alternation
+Operator}) or a repetition operator (@pxref{Repetition
+Operators}).
+
+@item
+keep track of the indices of the substring that matched a given group.
+@xref{Using Registers}, for a precise explanation.
+This lets you:
+
+@itemize @bullet
+@item
+use the back-reference operator (@pxref{Back-reference Operator}).
+
+@item
+use registers (@pxref{Using Registers}).
+
+@end itemize
+
+@end itemize
+
+If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents
+the open-group operator and @samp{)} represents the
+close-group operator; otherwise, @samp{\(} and @samp{\)} do.
+
+If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a
+close-group operator has no matching open-group operator, then Regex
+considers it to match @samp{)}.
+
+
+@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators
+@section The Back-reference Operator (@dfn{\}@var{digit})
+
+@cindex back references
+
+If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes
+back references. A back reference matches a specified preceding group.
+The back reference operator is represented by @samp{\@var{digit}}
+anywhere after the end of a regular expression's @w{@var{digit}-th}
+group (@pxref{Grouping Operators}).
+
+@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns
+numbers 1 through 9 to the first nine groups it encounters. By using
+one of @samp{\1} through @samp{\9} after the corresponding group's
+close-group operator, you can match a substring identical to the
+one that the group does.
+
+Back references match according to the following (in all examples below,
+@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{}
+the open-interval and @samp{@}} the close-interval operator):
+
+@itemize @bullet
+@item
+If the group matches a substring, the back reference matches an
+identical substring. For example, @samp{(a)\1} matches @samp{aa} and
+@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise,
+@samp{(.*)\1} matches any (newline-free if the syntax bit
+@code{RE_DOT_NEWLINE} isn't set) string that is composed of two
+identical halves; the @samp{(.*)} matches the first half and the
+@samp{\1} matches the second half.
+
+@item
+If the group matches more than once (as it might if followed
+by, e.g., a repetition operator), then the back reference matches the
+substring the group @emph{last} matched. For example,
+@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the
+outer one) matches @samp{aab} and @w{group 2} (the inner one) matches
+@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches
+@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches
+@samp{a}.
+
+@item
+If the group doesn't participate in a match, i.e., it is part of an
+alternative not taken or a repetition operator allows zero repetitions
+of it, then the back reference makes the whole match fail. For example,
+@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three}
+and @samp{two-and-four}, but not @samp{one-and-four} or
+@samp{two-and-three}. For example, if the pattern matches
+@samp{one-and-}, then its @w{group 2} matches the empty string and its
+@w{group 3} doesn't participate in the match. So, if it then matches
+@samp{four}, then when it tries to back reference @w{group 3}---which it
+will attempt to do because @samp{\3} follows the @samp{four}---the match
+will fail because @w{group 3} didn't participate in the match.
+
+@end itemize
+
+You can use a back reference as an argument to a repetition operator. For
+example, @samp{(a(b))\2*} matches @samp{a} followed by two or more
+@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}.
+
+If there is no preceding @w{@var{digit}-th} subexpression, the regular
+expression is invalid.
+
+
+@node Anchoring Operators, , Back-reference Operator, Common Operators
+@section Anchoring Operators
+
+@cindex anchoring
+@cindex regexp anchoring
+
+These operators can constrain a pattern to match only at the beginning or
+end of the entire string or at the beginning or end of a line.
+
+@menu
+* Match-beginning-of-line Operator:: ^
+* Match-end-of-line Operator:: $
+@end menu
+
+
+@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators
+@subsection The Match-beginning-of-line Operator (@code{^})
+
+@kindex ^
+@cindex beginning-of-line operator
+@cindex anchors
+
+This operator can match the empty string either at the beginning of the
+string or after a newline character. Thus, it is said to @dfn{anchor}
+the pattern to the beginning of a line.
+
+In the cases following, @samp{^} represents this operator. (Otherwise,
+@samp{^} is ordinary.)
+
+@itemize @bullet
+
+@item
+It (the @samp{^}) is first in the pattern, as in @samp{^foo}.
+
+@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})}
+@item
+The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside
+a bracket expression.
+
+@cindex open-group operator and @samp{^}
+@cindex alternation operator and @samp{^}
+@item
+It follows an open-group or alternation operator, as in @samp{a\(^b\)}
+and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation
+Operator}.
+
+@end itemize
+
+These rules imply that some valid patterns containing @samp{^} cannot be
+matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS}
+is set.
+
+@vindex not_bol @r{field in pattern buffer}
+If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU
+Pattern Buffers}), then @samp{^} fails to match at the beginning of the
+string. @xref{POSIX Matching}, for when you might find this useful.
+
+@vindex newline_anchor @r{field in pattern buffer}
+If the @code{newline_anchor} field is set in the pattern buffer, then
+@samp{^} fails to match after a newline. This is useful when you do not
+regard the string to be matched as broken into lines.
+
+
+@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators
+@subsection The Match-end-of-line Operator (@code{$})
+
+@kindex $
+@cindex end-of-line operator
+@cindex anchors
+
+This operator can match the empty string either at the end of
+the string or before a newline character in the string. Thus, it is
+said to @dfn{anchor} the pattern to the end of a line.
+
+It is always represented by @samp{$}. For example, @samp{foo$} usually
+matches, e.g., @samp{foo} and, e.g., the first three characters of
+@samp{foo\nbar}.
+
+Its interaction with the syntax bits and pattern buffer fields is
+exactly the dual of @samp{^}'s; see the previous section. (That is,
+``beginning'' becomes ``end'', ``next'' becomes ``previous'', and
+``after'' becomes ``before''.)
+
+
+@node GNU Operators, GNU Emacs Operators, Common Operators, Top
+@chapter GNU Operators
+
+Following are operators that @sc{gnu} defines (and @sc{posix} doesn't).
+
+@menu
+* Word Operators::
+* Buffer Operators::
+@end menu
+
+@node Word Operators, Buffer Operators, , GNU Operators
+@section Word Operators
+
+The operators in this section require Regex to recognize parts of words.
+Regex uses a syntax table to determine whether or not a character is
+part of a word, i.e., whether or not it is @dfn{word-constituent}.
+
+@menu
+* Non-Emacs Syntax Tables::
+* Match-word-boundary Operator:: \b
+* Match-within-word Operator:: \B
+* Match-beginning-of-word Operator:: \<
+* Match-end-of-word Operator:: \>
+* Match-word-constituent Operator:: \w
+* Match-non-word-constituent Operator:: \W
+@end menu
+
+@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators
+@subsection Non-Emacs Syntax Tables
+
+A @dfn{syntax table} is an array indexed by the characters in your
+character set. In the @sc{ascii} encoding, therefore, a syntax table
+has 256 elements. Regex always uses a @code{char *} variable
+@code{re_syntax_table} as its syntax table. In some cases, it
+initializes this variable and in others it expects you to initialize it.
+
+@itemize @bullet
+@item
+If Regex is compiled with the preprocessor symbols @code{emacs} and
+@code{SYNTAX_TABLE} both undefined, then Regex allocates
+@code{re_syntax_table} and initializes an element @var{i} either to
+@code{Sword} (which it defines) if @var{i} is a letter, number, or
+@samp{_}, or to zero if it's not.
+
+@item
+If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE}
+defined, then Regex expects you to define a @code{char *} variable
+@code{re_syntax_table} to be a valid syntax table.
+
+@item
+@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with
+the preprocessor symbol @code{emacs} defined.
+
+@end itemize
+
+@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators
+@subsection The Match-word-boundary Operator (@code{\b})
+
+@cindex @samp{\b}
+@cindex word boundaries, matching
+
+This operator (represented by @samp{\b}) matches the empty string at
+either the beginning or the end of a word. For example, @samp{\brat\b}
+matches the separate word @samp{rat}.
+
+@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators
+@subsection The Match-within-word Operator (@code{\B})
+
+@cindex @samp{\B}
+
+This operator (represented by @samp{\B}) matches the empty string within
+a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but
+@samp{dirty \Brat} doesn't match @samp{dirty rat}.
+
+@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators
+@subsection The Match-beginning-of-word Operator (@code{\<})
+
+@cindex @samp{\<}
+
+This operator (represented by @samp{\<}) matches the empty string at the
+beginning of a word.
+
+@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators
+@subsection The Match-end-of-word Operator (@code{\>})
+
+@cindex @samp{\>}
+
+This operator (represented by @samp{\>}) matches the empty string at the
+end of a word.
+
+@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators
+@subsection The Match-word-constituent Operator (@code{\w})
+
+@cindex @samp{\w}
+
+This operator (represented by @samp{\w}) matches any word-constituent
+character.
+
+@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators
+@subsection The Match-non-word-constituent Operator (@code{\W})
+
+@cindex @samp{\W}
+
+This operator (represented by @samp{\W}) matches any character that is
+not word-constituent.
+
+
+@node Buffer Operators, , Word Operators, GNU Operators
+@section Buffer Operators
+
+Following are operators which work on buffers. In Emacs, a @dfn{buffer}
+is, naturally, an Emacs buffer. For other programs, Regex considers the
+entire string to be matched as the buffer.
+
+@menu
+* Match-beginning-of-buffer Operator:: \`
+* Match-end-of-buffer Operator:: \'
+@end menu
+
+
+@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators
+@subsection The Match-beginning-of-buffer Operator (@code{\`})
+
+@cindex @samp{\`}
+
+This operator (represented by @samp{\`}) matches the empty string at the
+beginning of the buffer.
+
+@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators
+@subsection The Match-end-of-buffer Operator (@code{\'})
+
+@cindex @samp{\'}
+
+This operator (represented by @samp{\'}) matches the empty string at the
+end of the buffer.
+
+
+@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top
+@chapter GNU Emacs Operators
+
+Following are operators that @sc{gnu} defines (and @sc{posix} doesn't)
+that you can use only when Regex is compiled with the preprocessor
+symbol @code{emacs} defined.
+
+@menu
+* Syntactic Class Operators::
+@end menu
+
+
+@node Syntactic Class Operators, , , GNU Emacs Operators
+@section Syntactic Class Operators
+
+The operators in this section require Regex to recognize the syntactic
+classes of characters. Regex uses a syntax table to determine this.
+
+@menu
+* Emacs Syntax Tables::
+* Match-syntactic-class Operator:: \sCLASS
+* Match-not-syntactic-class Operator:: \SCLASS
+@end menu
+
+@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators
+@subsection Emacs Syntax Tables
+
+A @dfn{syntax table} is an array indexed by the characters in your
+character set. In the @sc{ascii} encoding, therefore, a syntax table
+has 256 elements.
+
+If Regex is compiled with the preprocessor symbol @code{emacs} defined,
+then Regex expects you to define and initialize the variable
+@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax
+tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax
+Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual},
+for a description of Emacs' syntax tables.
+
+@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators
+@subsection The Match-syntactic-class Operator (@code{\s}@var{class})
+
+@cindex @samp{\s}
+
+This operator matches any character whose syntactic class is represented
+by a specified character. @samp{\s@var{class}} represents this operator
+where @var{class} is the character representing the syntactic class you
+want. For example, @samp{w} represents the syntactic
+class of word-constituent characters, so @samp{\sw} matches any
+word-constituent character.
+
+@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators
+@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class})
+
+@cindex @samp{\S}
+
+This operator is similar to the match-syntactic-class operator except
+that it matches any character whose syntactic class is @emph{not}
+represented by the specified character. @samp{\S@var{class}} represents
+this operator. For example, @samp{w} represents the syntactic class of
+word-constituent characters, so @samp{\Sw} matches any character that is
+not word-constituent.
+
+
+@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top
+@chapter What Gets Matched?
+
+Regex usually matches strings according to the ``leftmost longest''
+rule; that is, it chooses the longest of the leftmost matches. This
+does not mean that for a regular expression containing subexpressions
+that it simply chooses the longest match for each subexpression, left to
+right; the overall match must also be the longest possible one.
+
+For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not
+@samp{acdac}, as it would if it were to choose the longest match for the
+first subexpression.
+
+
+@node Programming with Regex, Copying, What Gets Matched?, Top
+@chapter Programming with Regex
+
+Here we describe how you use the Regex data structures and functions in
+C programs. Regex has three interfaces: one designed for @sc{gnu}, one
+compatible with @sc{posix} and one compatible with Berkeley @sc{unix}.
+
+@menu
+* GNU Regex Functions::
+* POSIX Regex Functions::
+* BSD Regex Functions::
+@end menu
+
+
+@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex
+@section GNU Regex Functions
+
+If you're writing code that doesn't need to be compatible with either
+@sc{posix} or Berkeley @sc{unix}, you can use these functions. They
+provide more options than the other interfaces.
+
+@menu
+* GNU Pattern Buffers:: The re_pattern_buffer type.
+* GNU Regular Expression Compiling:: re_compile_pattern ()
+* GNU Matching:: re_match ()
+* GNU Searching:: re_search ()
+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
+* Searching with Fastmaps:: re_compile_fastmap ()
+* GNU Translate Tables:: The `translate' field.
+* Using Registers:: The re_registers type and related fns.
+* Freeing GNU Pattern Buffers:: regfree ()
+@end menu
+
+
+@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions
+@subsection GNU Pattern Buffers
+
+@cindex pattern buffer, definition of
+@tindex re_pattern_buffer @r{definition}
+@tindex struct re_pattern_buffer @r{definition}
+
+To compile, match, or search for a given regular expression, you must
+supply a pattern buffer. A @dfn{pattern buffer} holds one compiled
+regular expression.@footnote{Regular expressions are also referred to as
+``patterns,'' hence the name ``pattern buffer.''}
+
+You can have several different pattern buffers simultaneously, each
+holding a compiled pattern for a different regular expression.
+
+@file{regex.h} defines the pattern buffer @code{struct} as follows:
+
+@example
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ char *translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+@end example
+
+
+@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions
+@subsection GNU Regular Expression Compiling
+
+In @sc{gnu}, you can both match and search for a given regular
+expression. To do either, you must first compile it in a pattern buffer
+(@pxref{GNU Pattern Buffers}).
+
+@cindex syntax initialization
+@vindex re_syntax_options @r{initialization}
+Regular expressions match according to the syntax with which they were
+compiled; with @sc{gnu}, you indicate what syntax you want by setting
+the variable @code{re_syntax_options} (declared in @file{regex.h} and
+defined in @file{regex.c}) before calling the compiling function,
+@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and
+@ref{Predefined Syntaxes}.
+
+You can change the value of @code{re_syntax_options} at any time.
+Usually, however, you set its value once and then never change it.
+
+@cindex pattern buffer initialization
+@code{re_compile_pattern} takes a pattern buffer as an argument. You
+must initialize the following fields:
+
+@table @code
+
+@item translate @r{initialization}
+
+@item translate
+@vindex translate @r{initialization}
+Initialize this to point to a translate table if you want one, or to
+zero if you don't. We explain translate tables in @ref{GNU Translate
+Tables}.
+
+@item fastmap
+@vindex fastmap @r{initialization}
+Initialize this to nonzero if you want a fastmap, or to zero if you
+don't.
+
+@item buffer
+@itemx allocated
+@vindex buffer @r{initialization}
+@vindex allocated @r{initialization}
+@findex malloc
+If you want @code{re_compile_pattern} to allocate memory for the
+compiled pattern, set both of these to zero. If you have an existing
+block of memory (allocated with @code{malloc}) you want Regex to use,
+set @code{buffer} to its address and @code{allocated} to its size (in
+bytes).
+
+@code{re_compile_pattern} uses @code{realloc} to extend the space for
+the compiled pattern as necessary.
+
+@end table
+
+To compile a pattern buffer, use:
+
+@findex re_compile_pattern
+@example
+char *
+re_compile_pattern (const char *@var{regex}, const int @var{regex_size},
+ struct re_pattern_buffer *@var{pattern_buffer})
+@end example
+
+@noindent
+@var{regex} is the regular expression's address, @var{regex_size} is its
+length, and @var{pattern_buffer} is the pattern buffer's address.
+
+If @code{re_compile_pattern} successfully compiles the regular
+expression, it returns zero and sets @code{*@var{pattern_buffer}} to the
+compiled pattern. It sets the pattern buffer's fields as follows:
+
+@table @code
+@item buffer
+@vindex buffer @r{field, set by @code{re_compile_pattern}}
+to the compiled pattern.
+
+@item used
+@vindex used @r{field, set by @code{re_compile_pattern}}
+to the number of bytes the compiled pattern in @code{buffer} occupies.
+
+@item syntax
+@vindex syntax @r{field, set by @code{re_compile_pattern}}
+to the current value of @code{re_syntax_options}.
+
+@item re_nsub
+@vindex re_nsub @r{field, set by @code{re_compile_pattern}}
+to the number of subexpressions in @var{regex}.
+
+@item fastmap_accurate
+@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}}
+to zero on the theory that the pattern you're compiling is different
+than the one previously compiled into @code{buffer}; in that case (since
+you can't make a fastmap without a compiled pattern),
+@code{fastmap} would either contain an incompatible fastmap, or nothing
+at all.
+
+@c xx what else?
+@end table
+
+If @code{re_compile_pattern} can't compile @var{regex}, it returns an
+error string corresponding to one of the errors listed in @ref{POSIX
+Regular Expression Compiling}.
+
+
+@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions
+@subsection GNU Matching
+
+@cindex matching with GNU functions
+
+Matching the @sc{gnu} way means trying to match as much of a string as
+possible starting at a position within it you specify. Once you've compiled
+a pattern into a pattern buffer (@pxref{GNU Regular Expression
+Compiling}), you can ask the matcher to match that pattern against a
+string using:
+
+@findex re_match
+@example
+int
+re_match (struct re_pattern_buffer *@var{pattern_buffer},
+ const char *@var{string}, const int @var{size},
+ const int @var{start}, struct re_registers *@var{regs})
+@end example
+
+@noindent
+@var{pattern_buffer} is the address of a pattern buffer containing a
+compiled pattern. @var{string} is the string you want to match; it can
+contain newline and null characters. @var{size} is the length of that
+string. @var{start} is the string index at which you want to
+begin matching; the first character of @var{string} is at index zero.
+@xref{Using Registers}, for a explanation of @var{regs}; you can safely
+pass zero.
+
+@code{re_match} matches the regular expression in @var{pattern_buffer}
+against the string @var{string} according to the syntax in
+@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular
+Expression Compiling}, for how to set it.) The function returns
+@math{-1} if the compiled pattern does not match any part of
+@var{string} and @math{-2} if an internal error happens; otherwise, it
+returns how many (possibly zero) characters of @var{string} the pattern
+matched.
+
+An example: suppose @var{pattern_buffer} points to a pattern buffer
+containing the compiled pattern for @samp{a*}, and @var{string} points
+to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start}
+is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the
+last three @samp{a}s in @var{string}. If @var{start} is 0,
+@code{re_match} returns 5, i.e., @samp{a*} would have matched all the
+@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns
+zero.
+
+If @var{start} is not between zero and @var{size}, then
+@code{re_match} returns @math{-1}.
+
+
+@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions
+@subsection GNU Searching
+
+@cindex searching with GNU functions
+
+@dfn{Searching} means trying to match starting at successive positions
+within a string. The function @code{re_search} does this.
+
+Before calling @code{re_search}, you must compile your regular
+expression. @xref{GNU Regular Expression Compiling}.
+
+Here is the function declaration:
+
+@findex re_search
+@example
+int
+re_search (struct re_pattern_buffer *@var{pattern_buffer},
+ const char *@var{string}, const int @var{size},
+ const int @var{start}, const int @var{range},
+ struct re_registers *@var{regs})
+@end example
+
+@noindent
+@vindex start @r{argument to @code{re_search}}
+@vindex range @r{argument to @code{re_search}}
+whose arguments are the same as those to @code{re_match} (@pxref{GNU
+Matching}) except that the two arguments @var{start} and @var{range}
+replace @code{re_match}'s argument @var{start}.
+
+If @var{range} is positive, then @code{re_search} attempts a match
+starting first at index @var{start}, then at @math{@var{start} + 1} if
+that fails, and so on, up to @math{@var{start} + @var{range}}; if
+@var{range} is negative, then it attempts a match starting first at
+index @var{start}, then at @math{@var{start} -1} if that fails, and so
+on.
+
+If @var{start} is not between zero and @var{size}, then @code{re_search}
+returns @math{-1}. When @var{range} is positive, @code{re_search}
+adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is
+between zero and @var{size}, if necessary; that way it won't search
+outside of @var{string}. Similarly, when @var{range} is negative,
+@code{re_search} adjusts @var{range} so that @math{@var{start} +
+@var{range} + 1} is between zero and @var{size}, if necessary.
+
+If the @code{fastmap} field of @var{pattern_buffer} is zero,
+@code{re_search} matches starting at consecutive positions; otherwise,
+it uses @code{fastmap} to make the search more efficient.
+@xref{Searching with Fastmaps}.
+
+If no match is found, @code{re_search} returns @math{-1}. If
+a match is found, it returns the index where the match began. If an
+internal error happens, it returns @math{-2}.
+
+
+@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions
+@subsection Matching and Searching with Split Data
+
+Using the functions @code{re_match_2} and @code{re_search_2}, you can
+match or search in data that is divided into two strings.
+
+The function:
+
+@findex re_match_2
+@example
+int
+re_match_2 (struct re_pattern_buffer *@var{buffer},
+ const char *@var{string1}, const int @var{size1},
+ const char *@var{string2}, const int @var{size2},
+ const int @var{start},
+ struct re_registers *@var{regs},
+ const int @var{stop})
+@end example
+
+@noindent
+is similar to @code{re_match} (@pxref{GNU Matching}) except that you
+pass @emph{two} data strings and sizes, and an index @var{stop} beyond
+which you don't want the matcher to try matching. As with
+@code{re_match}, if it succeeds, @code{re_match_2} returns how many
+characters of @var{string} it matched. Regard @var{string1} and
+@var{string2} as concatenated when you set the arguments @var{start} and
+@var{stop} and use the contents of @var{regs}; @code{re_match_2} never
+returns a value larger than @math{@var{size1} + @var{size2}}.
+
+The function:
+
+@findex re_search_2
+@example
+int
+re_search_2 (struct re_pattern_buffer *@var{buffer},
+ const char *@var{string1}, const int @var{size1},
+ const char *@var{string2}, const int @var{size2},
+ const int @var{start}, const int @var{range},
+ struct re_registers *@var{regs},
+ const int @var{stop})
+@end example
+
+@noindent
+is similarly related to @code{re_search}.
+
+
+@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions
+@subsection Searching with Fastmaps
+
+@cindex fastmaps
+If you're searching through a long string, you should use a fastmap.
+Without one, the searcher tries to match at consecutive positions in the
+string. Generally, most of the characters in the string could not start
+a match. It takes much longer to try matching at a given position in the
+string than it does to check in a table whether or not the character at
+that position could start a match. A @dfn{fastmap} is such a table.
+
+More specifically, a fastmap is an array indexed by the characters in
+your character set. Under the @sc{ascii} encoding, therefore, a fastmap
+has 256 elements. If you want the searcher to use a fastmap with a
+given pattern buffer, you must allocate the array and assign the array's
+address to the pattern buffer's @code{fastmap} field. You either can
+compile the fastmap yourself or have @code{re_search} do it for you;
+when @code{fastmap} is nonzero, it automatically compiles a fastmap the
+first time you search using a particular compiled pattern.
+
+To compile a fastmap yourself, use:
+
+@findex re_compile_fastmap
+@example
+int
+re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer})
+@end example
+
+@noindent
+@var{pattern_buffer} is the address of a pattern buffer. If the
+character @var{c} could start a match for the pattern,
+@code{re_compile_fastmap} makes
+@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns
+@math{0} if it can compile a fastmap and @math{-2} if there is an
+internal error. For example, if @samp{|} is the alternation operator
+and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then
+@code{re_compile_fastmap} sets @code{fastmap['a']} and
+@code{fastmap['b']} (and no others).
+
+@code{re_search} uses a fastmap as it moves along in the string: it
+checks the string's characters until it finds one that's in the fastmap.
+Then it tries matching at that character. If the match fails, it
+repeats the process. So, by using a fastmap, @code{re_search} doesn't
+waste time trying to match at positions in the string that couldn't
+start a match.
+
+If you don't want @code{re_search} to use a fastmap,
+store zero in the @code{fastmap} field of the pattern buffer before
+calling @code{re_search}.
+
+Once you've initialized a pattern buffer's @code{fastmap} field, you
+need never do so again---even if you compile a new pattern in
+it---provided the way the field is set still reflects whether or not you
+want a fastmap. @code{re_search} will still either do nothing if
+@code{fastmap} is null or, if it isn't, compile a new fastmap for the
+new pattern.
+
+@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions
+@subsection GNU Translate Tables
+
+If you set the @code{translate} field of a pattern buffer to a translate
+table, then the @sc{gnu} Regex functions to which you've passed that
+pattern buffer use it to apply a simple transformation
+to all the regular expression and string characters at which they look.
+
+A @dfn{translate table} is an array indexed by the characters in your
+character set. Under the @sc{ascii} encoding, therefore, a translate
+table has 256 elements. The array's elements are also characters in
+your character set. When the Regex functions see a character @var{c},
+they use @code{translate[@var{c}]} in its place, with one exception: the
+character after a @samp{\} is not translated. (This ensures that, the
+operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.)
+
+For example, a table that maps all lowercase letters to the
+corresponding uppercase ones would cause the matcher to ignore
+differences in case.@footnote{A table that maps all uppercase letters to
+the corresponding lowercase ones would work just as well for this
+purpose.} Such a table would map all characters except lowercase letters
+to themselves, and lowercase letters to the corresponding uppercase
+ones. Under the @sc{ascii} encoding, here's how you could initialize
+such a table (we'll call it @code{case_fold}):
+
+@example
+for (i = 0; i < 256; i++)
+ case_fold[i] = i;
+for (i = 'a'; i <= 'z'; i++)
+ case_fold[i] = i - ('a' - 'A');
+@end example
+
+You tell Regex to use a translate table on a given pattern buffer by
+assigning that table's address to the @code{translate} field of that
+buffer. If you don't want Regex to do any translation, put zero into
+this field. You'll get weird results if you change the table's contents
+anytime between compiling the pattern buffer, compiling its fastmap, and
+matching or searching with the pattern buffer.
+
+@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions
+@subsection Using Registers
+
+A group in a regular expression can match a (posssibly empty) substring
+of the string that regular expression as a whole matched. The matcher
+remembers the beginning and end of the substring matched by
+each group.
+
+To find out what they matched, pass a nonzero @var{regs} argument to a
+@sc{gnu} matching or searching function (@pxref{GNU Matching} and
+@ref{GNU Searching}), i.e., the address of a structure of this type, as
+defined in @file{regex.h}:
+
+@c We don't bother to include this directly from regex.h,
+@c since it changes so rarely.
+@example
+@tindex re_registers
+@vindex num_regs @r{in @code{struct re_registers}}
+@vindex start @r{in @code{struct re_registers}}
+@vindex end @r{in @code{struct re_registers}}
+struct re_registers
+@{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+@};
+@end example
+
+Except for (possibly) the @var{num_regs}'th element (see below), the
+@var{i}th element of the @code{start} and @code{end} arrays records
+information about the @var{i}th group in the pattern. (They're declared
+as C pointers, but this is only because not all C compilers accept
+zero-length arrays; conceptually, it is simplest to think of them as
+arrays.)
+
+The @code{start} and @code{end} arrays are allocated in various ways,
+depending on the value of the @code{regs_allocated}
+@vindex regs_allocated
+field in the pattern buffer passed to the matcher.
+
+The simplest and perhaps most useful is to let the matcher (re)allocate
+enough space to record information for all the groups in the regular
+expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED},
+@vindex REGS_UNALLOCATED
+the matcher allocates @math{1 + @var{re_nsub}} (another field in the
+pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set
+to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}.
+@vindex REGS_REALLOCATE
+Then on subsequent calls with the same pattern buffer and @var{regs}
+arguments, the matcher reallocates more space if necessary.
+
+It would perhaps be more logical to make the @code{regs_allocated} field
+part of the @code{re_registers} structure, instead of part of the
+pattern buffer. But in that case the caller would be forced to
+initialize the structure before passing it. Much existing code doesn't
+do this initialization, and it's arguably better to avoid it anyway.
+
+@code{re_compile_pattern} sets @code{regs_allocated} to
+@code{REGS_UNALLOCATED},
+so if you use the GNU regular expression
+functions, you get this behavior by default.
+
+xx document re_set_registers
+
+@sc{posix}, on the other hand, requires a different interface: the
+caller is supposed to pass in a fixed-length array which the matcher
+fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED}
+@vindex REGS_FIXED
+the matcher simply fills that array.
+
+The following examples illustrate the information recorded in the
+@code{re_registers} structure. (In all of them, @samp{(} represents the
+open-group and @samp{)} the close-group operator. The first character
+in the string @var{string} is at index 0.)
+
+@c xx i'm not sure this is all true anymore.
+
+@itemize @bullet
+
+@item
+If the regular expression has an @w{@var{i}-th}
+group not contained within another group that matches a
+substring of @var{string}, then the function sets
+@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where
+the substring matched by the @w{@var{i}-th} group begins, and
+@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that
+substring's end. The function sets @code{@w{@var{regs}->}start[0]} and
+@code{@w{@var{regs}->}end[0]} to analogous information about the entire
+pattern.
+
+For example, when you match @samp{((a)(b))} against @samp{ab}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}
+
+@item
+0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}
+
+@item
+0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}
+
+@item
+1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]}
+@end itemize
+
+@item
+If a group matches more than once (as it might if followed by,
+e.g., a repetition operator), then the function reports the information
+about what the group @emph{last} matched.
+
+For example, when you match the pattern @samp{(a)*} against the string
+@samp{aa}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}
+
+@item
+1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}
+@end itemize
+
+@item
+If the @w{@var{i}-th} group does not participate in a
+successful match, e.g., it is an alternative not taken or a
+repetition operator allows zero repetitions of it, then the function
+sets @code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}.
+
+For example, when you match the pattern @samp{(a)*b} against
+the string @samp{b}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
+
+@item
+@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}
+@end itemize
+
+@item
+If the @w{@var{i}-th} group matches a zero-length string, then the
+function sets @code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that
+zero-length string.
+
+For example, when you match the pattern @samp{(a*)b} against the string
+@samp{b}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
+
+@item
+0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}
+@end itemize
+
+@ignore
+The function sets @code{@w{@var{regs}->}start[0]} and
+@code{@w{@var{regs}->}end[0]} to analogous information about the entire
+pattern.
+
+For example, when you match the pattern @samp{(a*)} against the empty
+string, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]}
+
+@item
+0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}
+@end itemize
+@end ignore
+
+@item
+If an @w{@var{i}-th} group contains a @w{@var{j}-th} group
+in turn not contained within any other group within group @var{i} and
+the function reports a match of the @w{@var{i}-th} group, then it
+records in @code{@w{@var{regs}->}start[@var{j}]} and
+@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of
+the @w{@var{j}-th} group.
+
+For example, when you match the pattern @samp{((a*)b)*} against the
+string @samp{abb}, @w{group 2} last matches the empty string, so you
+get what it previously matched:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}
+
+@item
+2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}
+
+@item
+2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]}
+@end itemize
+
+When you match the pattern @samp{((a)*b)*} against the string
+@samp{abb}, @w{group 2} doesn't participate in the last match, so you
+get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}
+
+@item
+2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}
+
+@item
+0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}
+@end itemize
+
+@item
+If an @w{@var{i}-th} group contains a @w{@var{j}-th} group
+in turn not contained within any other group within group @var{i}
+and the function sets
+@code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets
+@code{@w{@var{regs}->}start[@var{j}]} and
+@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}.
+
+For example, when you match the pattern @samp{((a)*b)*c} against the
+string @samp{c}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
+
+@item
+@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}
+
+@item
+@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]}
+@end itemize
+
+@end itemize
+
+@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions
+@subsection Freeing GNU Pattern Buffers
+
+To free any allocated fields of a pattern buffer, you can use the
+@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers},
+since the type @code{regex_t}---the type for @sc{posix} pattern
+buffers---is equivalent to the type @code{re_pattern_buffer}. After
+freeing a pattern buffer, you need to again compile a regular expression
+in it (@pxref{GNU Regular Expression Compiling}) before passing it to
+a matching or searching function.
+
+
+@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex
+@section POSIX Regex Functions
+
+If you're writing code that has to be @sc{posix} compatible, you'll need
+to use these functions. Their interfaces are as specified by @sc{posix},
+draft 1003.2/D11.2.
+
+@menu
+* POSIX Pattern Buffers:: The regex_t type.
+* POSIX Regular Expression Compiling:: regcomp ()
+* POSIX Matching:: regexec ()
+* Reporting Errors:: regerror ()
+* Using Byte Offsets:: The regmatch_t type.
+* Freeing POSIX Pattern Buffers:: regfree ()
+@end menu
+
+
+@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions
+@subsection POSIX Pattern Buffers
+
+To compile or match a given regular expression the @sc{posix} way, you
+must supply a pattern buffer exactly the way you do for @sc{gnu}
+(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type
+@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer
+type @code{re_pattern_buffer}.
+
+
+@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions
+@subsection POSIX Regular Expression Compiling
+
+With @sc{posix}, you can only search for a given regular expression; you
+can't match it. To do this, you must first compile it in a
+pattern buffer, using @code{regcomp}.
+
+@ignore
+Before calling @code{regcomp}, you must initialize this pattern buffer
+as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See
+below, however, for how to choose a syntax with which to compile.
+@end ignore
+
+To compile a pattern buffer, use:
+
+@findex regcomp
+@example
+int
+regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags})
+@end example
+
+@noindent
+@var{preg} is the initialized pattern buffer's address, @var{regex} is
+the regular expression's address, and @var{cflags} is the compilation
+flags, which Regex considers as a collection of bits. Here are the
+valid bits, as defined in @file{regex.h}:
+
+@table @code
+
+@item REG_EXTENDED
+@vindex REG_EXTENDED
+says to use @sc{posix} Extended Regular Expression syntax; if this isn't
+set, then says to use @sc{posix} Basic Regular Expression syntax.
+@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly.
+
+@item REG_ICASE
+@vindex REG_ICASE
+@cindex ignoring case
+says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate}
+field to a translate table which ignores case, replacing anything you've
+put there before.
+
+@item REG_NOSUB
+@vindex REG_NOSUB
+says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching},
+for what this means.
+
+@item REG_NEWLINE
+@vindex REG_NEWLINE
+says that a:
+
+@itemize @bullet
+
+@item
+match-any-character operator (@pxref{Match-any-character
+Operator}) doesn't match a newline.
+
+@item
+nonmatching list not containing a newline (@pxref{List
+Operators}) matches a newline.
+
+@item
+match-beginning-of-line operator (@pxref{Match-beginning-of-line
+Operator}) matches the empty string immediately after a newline,
+regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for
+an explanation of @code{REG_NOTBOL}).
+
+@item
+match-end-of-line operator (@pxref{Match-beginning-of-line
+Operator}) matches the empty string immediately before a newline,
+regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching},
+for an explanation of @code{REG_NOTEOL}).
+
+@end itemize
+
+@end table
+
+If @code{regcomp} successfully compiles the regular expression, it
+returns zero and sets @code{*@var{pattern_buffer}} to the compiled
+pattern. Except for @code{syntax} (which it sets as explained above), it
+also sets the same fields the same way as does the @sc{gnu} compiling
+function (@pxref{GNU Regular Expression Compiling}).
+
+If @code{regcomp} can't compile the regular expression, it returns one
+of the error codes listed here. (Except when noted differently, the
+syntax of in all examples below is basic regular expression syntax.)
+
+@table @code
+
+@comment repetitions
+@item REG_BADRPT
+For example, the consecutive repetition operators @samp{**} in
+@samp{a**} are invalid. As another example, if the syntax is extended
+regular expression syntax, then the repetition operator @samp{*} with
+nothing on which to operate in @samp{*} is invalid.
+
+@item REG_BADBR
+For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid.
+
+@item REG_EBRACE
+For example, @samp{a\@{1} is missing a close-interval operator.
+
+@comment lists
+@item REG_EBRACK
+For example, @samp{[a} is missing a close-list operator.
+
+@item REG_ERANGE
+For example, the range ending point @samp{z} that collates lower than
+does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the
+range with the character class @samp{[:alpha:]} as its starting point in
+@samp{[[:alpha:]-|]}.
+
+@item REG_ECTYPE
+For example, the character class name @samp{foo} in @samp{[[:foo:]} is
+invalid.
+
+@comment groups
+@item REG_EPAREN
+For example, @samp{a\)} is missing an open-group operator and @samp{\(a}
+is missing a close-group operator.
+
+@item REG_ESUBREG
+For example, the back reference @samp{\2} that refers to a nonexistent
+subexpression in @samp{\(a\)\2} is invalid.
+
+@comment unfinished business
+
+@item REG_EEND
+Returned when a regular expression causes no other more specific error.
+
+@item REG_EESCAPE
+For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the
+one in @samp{\}.
+
+@comment kitchen sink
+@item REG_BADPAT
+For example, in the extended regular expression syntax, the empty group
+@samp{()} in @samp{a()b} is invalid.
+
+@comment internal
+@item REG_ESIZE
+Returned when a regular expression needs a pattern buffer larger than
+65536 bytes.
+
+@item REG_ESPACE
+Returned when a regular expression makes Regex to run out of memory.
+
+@end table
+
+
+@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions
+@subsection POSIX Matching
+
+Matching the @sc{posix} way means trying to match a null-terminated
+string starting at its first character. Once you've compiled a pattern
+into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you
+can ask the matcher to match that pattern against a string using:
+
+@findex regexec
+@example
+int
+regexec (const regex_t *@var{preg}, const char *@var{string},
+ size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags})
+@end example
+
+@noindent
+@var{preg} is the address of a pattern buffer for a compiled pattern.
+@var{string} is the string you want to match.
+
+@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you
+pass zero for @var{nmatch} or you compiled @var{preg} with the
+compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore
+@var{pmatch}; otherwise, you must allocate it to have at least
+@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte
+offsets in @var{pmatch}, and set to @math{-1} any unused elements up to
+@math{@var{pmatch}@code{[@var{nmatch}]} - 1}.
+
+@var{eflags} specifies @dfn{execution flags}---namely, the two bits
+@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If
+you set @code{REG_NOTBOL}, then the match-beginning-of-line operator
+(@pxref{Match-beginning-of-line Operator}) always fails to match.
+This lets you match against pieces of a line, as you would need to if,
+say, searching for repeated instances of a given pattern in a line; it
+would work correctly for patterns both with and without
+match-beginning-of-line operators. @code{REG_NOTEOL} works analogously
+for the match-end-of-line operator (@pxref{Match-end-of-line
+Operator}); it exists for symmetry.
+
+@code{regexec} tries to find a match for @var{preg} in @var{string}
+according to the syntax in @var{preg}'s @code{syntax} field.
+(@xref{POSIX Regular Expression Compiling}, for how to set it.) The
+function returns zero if the compiled pattern matches @var{string} and
+@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't.
+
+@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions
+@subsection Reporting Errors
+
+If either @code{regcomp} or @code{regexec} fail, they return a nonzero
+error code, the possibilities for which are defined in @file{regex.h}.
+@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for
+what these codes mean. To get an error string corresponding to these
+codes, you can use:
+
+@findex regerror
+@example
+size_t
+regerror (int @var{errcode},
+ const regex_t *@var{preg},
+ char *@var{errbuf},
+ size_t @var{errbuf_size})
+@end example
+
+@noindent
+@var{errcode} is an error code, @var{preg} is the address of the pattern
+buffer which provoked the error, @var{errbuf} is the error buffer, and
+@var{errbuf_size} is @var{errbuf}'s size.
+
+@code{regerror} returns the size in bytes of the error string
+corresponding to @var{errcode} (including its terminating null). If
+@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in
+@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the
+error string, followed by a null.
+@var{errbuf_size} must be a nonnegative number less than or equal to the
+size in bytes of @var{errbuf}.
+
+You can call @code{regerror} with a null @var{errbuf} and a zero
+@var{errbuf_size} to determine how large @var{errbuf} need be to
+accommodate @code{regerror}'s error string.
+
+@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions
+@subsection Using Byte Offsets
+
+In @sc{posix}, variables of type @code{regmatch_t} hold analogous
+information, but are not identical to, @sc{gnu}'s registers (@pxref{Using
+Registers}). To get information about registers in @sc{posix}, pass to
+@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e.,
+the address of a structure of this type, defined in
+@file{regex.h}:
+
+@tindex regmatch_t
+@example
+typedef struct
+@{
+ regoff_t rm_so;
+ regoff_t rm_eo;
+@} regmatch_t;
+@end example
+
+When reading in @ref{Using Registers}, about how the matching function
+stores the information into the registers, substitute @var{pmatch} for
+@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for
+@code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for
+@code{@w{@var{regs}->}end[@var{i}]}.
+
+@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions
+@subsection Freeing POSIX Pattern Buffers
+
+To free any allocated fields of a pattern buffer, use:
+
+@findex regfree
+@example
+void
+regfree (regex_t *@var{preg})
+@end example
+
+@noindent
+@var{preg} is the pattern buffer whose allocated fields you want freed.
+@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used}
+fields to zero. After freeing a pattern buffer, you need to again
+compile a regular expression in it (@pxref{POSIX Regular Expression
+Compiling}) before passing it to the matching function (@pxref{POSIX
+Matching}).
+
+
+@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex
+@section BSD Regex Functions
+
+If you're writing code that has to be Berkeley @sc{unix} compatible,
+you'll need to use these functions whose interfaces are the same as those
+in Berkeley @sc{unix}.
+
+@menu
+* BSD Regular Expression Compiling:: re_comp ()
+* BSD Searching:: re_exec ()
+@end menu
+
+@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions
+@subsection BSD Regular Expression Compiling
+
+With Berkeley @sc{unix}, you can only search for a given regular
+expression; you can't match one. To search for it, you must first
+compile it. Before you compile it, you must indicate the regular
+expression syntax you want it compiled according to by setting the
+variable @code{re_syntax_options} (declared in @file{regex.h} to some
+syntax (@pxref{Regular Expression Syntax}).
+
+To compile a regular expression use:
+
+@findex re_comp
+@example
+char *
+re_comp (char *@var{regex})
+@end example
+
+@noindent
+@var{regex} is the address of a null-terminated regular expression.
+@code{re_comp} uses an internal pattern buffer, so you can use only the
+most recently compiled pattern buffer. This means that if you want to
+use a given regular expression that you've already compiled---but it
+isn't the latest one you've compiled---you'll have to recompile it. If
+you call @code{re_comp} with the null string (@emph{not} the empty
+string) as the argument, it doesn't change the contents of the pattern
+buffer.
+
+If @code{re_comp} successfully compiles the regular expression, it
+returns zero. If it can't compile the regular expression, it returns
+an error string. @code{re_comp}'s error messages are identical to those
+of @code{re_compile_pattern} (@pxref{GNU Regular Expression
+Compiling}).
+
+@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions
+@subsection BSD Searching
+
+Searching the Berkeley @sc{unix} way means searching in a string
+starting at its first character and trying successive positions within
+it to find a match. Once you've compiled a pattern using @code{re_comp}
+(@pxref{BSD Regular Expression Compiling}), you can ask Regex
+to search for that pattern in a string using:
+
+@findex re_exec
+@example
+int
+re_exec (char *@var{string})
+@end example
+
+@noindent
+@var{string} is the address of the null-terminated string in which you
+want to search.
+
+@code{re_exec} returns either 1 for success or 0 for failure. It
+automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}).
+
+
+@node Copying, Index, Programming with Regex, Top
+@appendix GNU GENERAL PUBLIC LICENSE
+@center Version 2, June 1991
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+675 Mass Ave, Cambridge, MA 02139, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@unnumberedsec Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term ``modification''.) Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License. (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code. (This alternative is
+allowed only for noncommercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@unnumberedsec Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and a brief idea of what it does.}
+Copyright (C) 19@var{yy} @var{name of author}
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type `show c' for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary. Here is a sample; alter the names:
+
+@example
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+`Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end example
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+
+@node Index, , Copying, Top
+@unnumbered Index
+
+@printindex cp
+
+@contents
+
+@bye
diff --git a/gnu/lib/libregex/doc/xregex.texi b/gnu/lib/libregex/doc/xregex.texi
new file mode 100644
index 0000000..9292b35
--- /dev/null
+++ b/gnu/lib/libregex/doc/xregex.texi
@@ -0,0 +1,3021 @@
+\input texinfo
+@c %**start of header
+@setfilename regex.info
+@settitle Regex
+@c %**end of header
+
+@c \\{fill-paragraph} works better (for me, anyway) if the text in the
+@c source file isn't indented.
+@paragraphindent 2
+
+@c Define a new index for our magic constants.
+@defcodeindex cn
+
+@c Put everything in one index (arbitrarily chosen to be the concept index).
+@syncodeindex cn cp
+@syncodeindex ky cp
+@syncodeindex pg cp
+@syncodeindex tp cp
+@syncodeindex vr cp
+
+@c Here is what we use in the Info `dir' file:
+@c * Regex: (regex). Regular expression library.
+
+
+@ifinfo
+This file documents the GNU regular expression library.
+
+Copyright (C) 1992, 1993 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of this
+manual provided the copyright notice and this permission notice are
+preserved on all copies.
+
+@ignore
+Permission is granted to process this file through TeX and print the
+results, provided the printed document carries a copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+@end ignore
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided also that the
+section entitled ``GNU General Public License'' is included exactly as
+in the original, and provided that the entire resulting derived work is
+distributed under the terms of a permission notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that the section entitled ``GNU General Public License'' may be
+included in a translation approved by the Free Software Foundation
+instead of in the original English.
+@end ifinfo
+
+
+@titlepage
+
+@title Regex
+@subtitle edition 0.12a
+@subtitle 19 September 1992
+@author Kathryn A. Hargreaves
+@author Karl Berry
+
+@page
+
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1992 Free Software Foundation.
+
+Permission is granted to make and distribute verbatim copies of this
+manual provided the copyright notice and this permission notice are
+preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided also that the
+section entitled ``GNU General Public License'' is included exactly as
+in the original, and provided that the entire resulting derived work is
+distributed under the terms of a permission notice identical to this
+one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that the section entitled ``GNU General Public License'' may be
+included in a translation approved by the Free Software Foundation
+instead of in the original English.
+
+@end titlepage
+
+
+@ifinfo
+@node Top, Overview, (dir), (dir)
+@top Regular Expression Library
+
+This manual documents how to program with the GNU regular expression
+library. This is edition 0.12a of the manual, 19 September 1992.
+
+The first part of this master menu lists the major nodes in this Info
+document, including the index. The rest of the menu lists all the
+lower level nodes in the document.
+
+@menu
+* Overview::
+* Regular Expression Syntax::
+* Common Operators::
+* GNU Operators::
+* GNU Emacs Operators::
+* What Gets Matched?::
+* Programming with Regex::
+* Copying:: Copying and sharing Regex.
+* Index:: General index.
+ --- The Detailed Node Listing ---
+
+Regular Expression Syntax
+
+* Syntax Bits::
+* Predefined Syntaxes::
+* Collating Elements vs. Characters::
+* The Backslash Character::
+
+Common Operators
+
+* Match-self Operator:: Ordinary characters.
+* Match-any-character Operator:: .
+* Concatenation Operator:: Juxtaposition.
+* Repetition Operators:: * + ? @{@}
+* Alternation Operator:: |
+* List Operators:: [...] [^...]
+* Grouping Operators:: (...)
+* Back-reference Operator:: \digit
+* Anchoring Operators:: ^ $
+
+Repetition Operators
+
+* Match-zero-or-more Operator:: *
+* Match-one-or-more Operator:: +
+* Match-zero-or-one Operator:: ?
+* Interval Operators:: @{@}
+
+List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})
+
+* Character Class Operators:: [:class:]
+* Range Operator:: start-end
+
+Anchoring Operators
+
+* Match-beginning-of-line Operator:: ^
+* Match-end-of-line Operator:: $
+
+GNU Operators
+
+* Word Operators::
+* Buffer Operators::
+
+Word Operators
+
+* Non-Emacs Syntax Tables::
+* Match-word-boundary Operator:: \b
+* Match-within-word Operator:: \B
+* Match-beginning-of-word Operator:: \<
+* Match-end-of-word Operator:: \>
+* Match-word-constituent Operator:: \w
+* Match-non-word-constituent Operator:: \W
+
+Buffer Operators
+
+* Match-beginning-of-buffer Operator:: \`
+* Match-end-of-buffer Operator:: \'
+
+GNU Emacs Operators
+
+* Syntactic Class Operators::
+
+Syntactic Class Operators
+
+* Emacs Syntax Tables::
+* Match-syntactic-class Operator:: \sCLASS
+* Match-not-syntactic-class Operator:: \SCLASS
+
+Programming with Regex
+
+* GNU Regex Functions::
+* POSIX Regex Functions::
+* BSD Regex Functions::
+
+GNU Regex Functions
+
+* GNU Pattern Buffers:: The re_pattern_buffer type.
+* GNU Regular Expression Compiling:: re_compile_pattern ()
+* GNU Matching:: re_match ()
+* GNU Searching:: re_search ()
+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
+* Searching with Fastmaps:: re_compile_fastmap ()
+* GNU Translate Tables:: The `translate' field.
+* Using Registers:: The re_registers type and related fns.
+* Freeing GNU Pattern Buffers:: regfree ()
+
+POSIX Regex Functions
+
+* POSIX Pattern Buffers:: The regex_t type.
+* POSIX Regular Expression Compiling:: regcomp ()
+* POSIX Matching:: regexec ()
+* Reporting Errors:: regerror ()
+* Using Byte Offsets:: The regmatch_t type.
+* Freeing POSIX Pattern Buffers:: regfree ()
+
+BSD Regex Functions
+
+* BSD Regular Expression Compiling:: re_comp ()
+* BSD Searching:: re_exec ()
+@end menu
+@end ifinfo
+@node Overview, Regular Expression Syntax, Top, Top
+@chapter Overview
+
+A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text
+string that describes some (mathematical) set of strings. A regexp
+@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of
+strings described by @var{r}.
+
+Using the Regex library, you can:
+
+@itemize @bullet
+
+@item
+see if a string matches a specified pattern as a whole, and
+
+@item
+search within a string for a substring matching a specified pattern.
+
+@end itemize
+
+Some regular expressions match only one string, i.e., the set they
+describe has only one member. For example, the regular expression
+@samp{foo} matches the string @samp{foo} and no others. Other regular
+expressions match more than one string, i.e., the set they describe has
+more than one member. For example, the regular expression @samp{f*}
+matches the set of strings made up of any number (including zero) of
+@samp{f}s. As you can see, some characters in regular expressions match
+themselves (such as @samp{f}) and some don't (such as @samp{*}); the
+ones that don't match themselves instead let you specify patterns that
+describe many different strings.
+
+To either match or search for a regular expression with the Regex
+library functions, you must first compile it with a Regex pattern
+compiling function. A @dfn{compiled pattern} is a regular expression
+converted to the internal format used by the library functions. Once
+you've compiled a pattern, you can use it for matching or searching any
+number of times.
+
+The Regex library consists of two source files: @file{regex.h} and
+@file{regex.c}.
+@pindex regex.h
+@pindex regex.c
+Regex provides three groups of functions with which you can operate on
+regular expressions. One group---the @sc{gnu} group---is more powerful
+but not completely compatible with the other two, namely the @sc{posix}
+and Berkeley @sc{unix} groups; its interface was designed specifically
+for @sc{gnu}. The other groups have the same interfaces as do the
+regular expression functions in @sc{posix} and Berkeley
+@sc{unix}.
+
+We wrote this chapter with programmers in mind, not users of
+programs---such as Emacs---that use Regex. We describe the Regex
+library in its entirety, not how to write regular expressions that a
+particular program understands.
+
+
+@node Regular Expression Syntax, Common Operators, Overview, Top
+@chapter Regular Expression Syntax
+
+@cindex regular expressions, syntax of
+@cindex syntax of regular expressions
+
+@dfn{Characters} are things you can type. @dfn{Operators} are things in
+a regular expression that match one or more characters. You compose
+regular expressions from operators, which in turn you specify using one
+or more characters.
+
+Most characters represent what we call the match-self operator, i.e.,
+they match themselves; we call these characters @dfn{ordinary}. Other
+characters represent either all or parts of fancier operators; e.g.,
+@samp{.} represents what we call the match-any-character operator
+(which, no surprise, matches (almost) any character); we call these
+characters @dfn{special}. Two different things determine what
+characters represent what operators:
+
+@enumerate
+@item
+the regular expression syntax your program has told the Regex library to
+recognize, and
+
+@item
+the context of the character in the regular expression.
+@end enumerate
+
+In the following sections, we describe these things in more detail.
+
+@menu
+* Syntax Bits::
+* Predefined Syntaxes::
+* Collating Elements vs. Characters::
+* The Backslash Character::
+@end menu
+
+
+@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax
+@section Syntax Bits
+
+@cindex syntax bits
+
+In any particular syntax for regular expressions, some characters are
+always special, others are sometimes special, and others are never
+special. The particular syntax that Regex recognizes for a given
+regular expression depends on the value in the @code{syntax} field of
+the pattern buffer of that regular expression.
+
+You get a pattern buffer by compiling a regular expression. @xref{GNU
+Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information
+on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX
+Regular Expression Compiling}, and @ref{BSD Regular Expression
+Compiling}, for more information on compiling.
+
+Regex considers the value of the @code{syntax} field to be a collection
+of bits; we refer to these bits as @dfn{syntax bits}. In most cases,
+they affect what characters represent what operators. We describe the
+meanings of the operators to which we refer in @ref{Common Operators},
+@ref{GNU Operators}, and @ref{GNU Emacs Operators}.
+
+For reference, here is the complete list of syntax bits, in alphabetical
+order:
+
+@table @code
+
+@cnindex RE_BACKSLASH_ESCAPE_IN_LIST
+@item RE_BACKSLASH_ESCAPE_IN_LISTS
+If this bit is set, then @samp{\} inside a list (@pxref{List Operators}
+quotes (makes ordinary, if it's special) the following character; if
+this bit isn't set, then @samp{\} is an ordinary character inside lists.
+(@xref{The Backslash Character}, for what `\' does outside of lists.)
+
+@cnindex RE_BK_PLUS_QM
+@item RE_BK_PLUS_QM
+If this bit is set, then @samp{\+} represents the match-one-or-more
+operator and @samp{\?} represents the match-zero-or-more operator; if
+this bit isn't set, then @samp{+} represents the match-one-or-more
+operator and @samp{?} represents the match-zero-or-one operator. This
+bit is irrelevant if @code{RE_LIMITED_OPS} is set.
+
+@cnindex RE_CHAR_CLASSES
+@item RE_CHAR_CLASSES
+If this bit is set, then you can use character classes in lists; if this
+bit isn't set, then you can't.
+
+@cnindex RE_CONTEXT_INDEP_ANCHORS
+@item RE_CONTEXT_INDEP_ANCHORS
+If this bit is set, then @samp{^} and @samp{$} are special anywhere outside
+a list; if this bit isn't set, then these characters are special only in
+certain contexts. @xref{Match-beginning-of-line Operator}, and
+@ref{Match-end-of-line Operator}.
+
+@cnindex RE_CONTEXT_INDEP_OPS
+@item RE_CONTEXT_INDEP_OPS
+If this bit is set, then certain characters are special anywhere outside
+a list; if this bit isn't set, then those characters are special only in
+some contexts and are ordinary elsewhere. Specifically, if this bit
+isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS}
+isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending
+on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators
+only if they're not first in a regular expression or just after an
+open-group or alternation operator. The same holds for @samp{@{} (or
+@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if
+it is the beginning of a valid interval and the syntax bit
+@code{RE_INTERVALS} is set.
+
+@cnindex RE_CONTEXT_INVALID_OPS
+@item RE_CONTEXT_INVALID_OPS
+If this bit is set, then repetition and alternation operators can't be
+in certain positions within a regular expression. Specifically, the
+regular expression is invalid if it has:
+
+@itemize @bullet
+
+@item
+a repetition operator first in the regular expression or just after a
+match-beginning-of-line, open-group, or alternation operator; or
+
+@item
+an alternation operator first or last in the regular expression, just
+before a match-end-of-line operator, or just after an alternation or
+open-group operator.
+
+@end itemize
+
+If this bit isn't set, then you can put the characters representing the
+repetition and alternation characters anywhere in a regular expression.
+Whether or not they will in fact be operators in certain positions
+depends on other syntax bits.
+
+@cnindex RE_DOT_NEWLINE
+@item RE_DOT_NEWLINE
+If this bit is set, then the match-any-character operator matches
+a newline; if this bit isn't set, then it doesn't.
+
+@cnindex RE_DOT_NOT_NULL
+@item RE_DOT_NOT_NULL
+If this bit is set, then the match-any-character operator doesn't match
+a null character; if this bit isn't set, then it does.
+
+@cnindex RE_INTERVALS
+@item RE_INTERVALS
+If this bit is set, then Regex recognizes interval operators; if this bit
+isn't set, then it doesn't.
+
+@cnindex RE_LIMITED_OPS
+@item RE_LIMITED_OPS
+If this bit is set, then Regex doesn't recognize the match-one-or-more,
+match-zero-or-one or alternation operators; if this bit isn't set, then
+it does.
+
+@cnindex RE_NEWLINE_ALT
+@item RE_NEWLINE_ALT
+If this bit is set, then newline represents the alternation operator; if
+this bit isn't set, then newline is ordinary.
+
+@cnindex RE_NO_BK_BRACES
+@item RE_NO_BK_BRACES
+If this bit is set, then @samp{@{} represents the open-interval operator
+and @samp{@}} represents the close-interval operator; if this bit isn't
+set, then @samp{\@{} represents the open-interval operator and
+@samp{\@}} represents the close-interval operator. This bit is relevant
+only if @code{RE_INTERVALS} is set.
+
+@cnindex RE_NO_BK_PARENS
+@item RE_NO_BK_PARENS
+If this bit is set, then @samp{(} represents the open-group operator and
+@samp{)} represents the close-group operator; if this bit isn't set, then
+@samp{\(} represents the open-group operator and @samp{\)} represents
+the close-group operator.
+
+@cnindex RE_NO_BK_REFS
+@item RE_NO_BK_REFS
+If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as
+the back reference operator; if this bit isn't set, then it does.
+
+@cnindex RE_NO_BK_VBAR
+@item RE_NO_BK_VBAR
+If this bit is set, then @samp{|} represents the alternation operator;
+if this bit isn't set, then @samp{\|} represents the alternation
+operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set.
+
+@cnindex RE_NO_EMPTY_RANGES
+@item RE_NO_EMPTY_RANGES
+If this bit is set, then a regular expression with a range whose ending
+point collates lower than its starting point is invalid; if this bit
+isn't set, then Regex considers such a range to be empty.
+
+@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD
+@item RE_UNMATCHED_RIGHT_PAREN_ORD
+If this bit is set and the regular expression has no matching open-group
+operator, then Regex considers what would otherwise be a close-group
+operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}.
+
+@end table
+
+
+@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax
+@section Predefined Syntaxes
+
+If you're programming with Regex, you can set a pattern buffer's
+(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers})
+@code{syntax} field either to an arbitrary combination of syntax bits
+(@pxref{Syntax Bits}) or else to the configurations defined by Regex.
+These configurations define the syntaxes used by certain
+programs---@sc{gnu} Emacs,
+@cindex Emacs
+@sc{posix} Awk,
+@cindex POSIX Awk
+traditional Awk,
+@cindex Awk
+Grep,
+@cindex Grep
+@cindex Egrep
+Egrep---in addition to syntaxes for @sc{posix} basic and extended
+regular expressions.
+
+The predefined syntaxes--taken directly from @file{regex.h}---are:
+
+@example
+[[[ syntaxes ]]]
+@end example
+
+@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax
+@section Collating Elements vs.@: Characters
+
+@sc{posix} generalizes the notion of a character to that of a
+collating element. It defines a @dfn{collating element} to be ``a
+sequence of one or more bytes defined in the current collating sequence
+as a unit of collation.''
+
+This generalizes the notion of a character in
+two ways. First, a single character can map into two or more collating
+elements. For example, the German
+@tex
+`\ss'
+@end tex
+@ifinfo
+``es-zet''
+@end ifinfo
+collates as the collating element @samp{s} followed by another collating
+element @samp{s}. Second, two or more characters can map into one
+collating element. For example, the Spanish @samp{ll} collates after
+@samp{l} and before @samp{m}.
+
+Since @sc{posix}'s ``collating element'' preserves the essential idea of
+a ``character,'' we use the latter, more familiar, term in this document.
+
+@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax
+@section The Backslash Character
+
+@cindex \
+The @samp{\} character has one of four different meanings, depending on
+the context in which you use it and what syntax bits are set
+(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next
+character, 3) introduce an operator, or 4) do nothing.
+
+@enumerate
+@item
+It stands for itself inside a list
+(@pxref{List Operators}) if the syntax bit
+@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]}
+would match @samp{\}.
+
+@item
+It quotes (makes ordinary, if it's special) the next character when you
+use it either:
+
+@itemize @bullet
+@item
+outside a list,@footnote{Sometimes
+you don't have to explicitly quote special characters to make
+them ordinary. For instance, most characters lose any special meaning
+inside a list (@pxref{List Operators}). In addition, if the syntax bits
+@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS}
+aren't set, then (for historical reasons) the matcher considers special
+characters ordinary if they are in contexts where the operations they
+represent make no sense; for example, then the match-zero-or-more
+operator (represented by @samp{*}) matches itself in the regular
+expression @samp{*foo} because there is no preceding expression on which
+it can operate. It is poor practice, however, to depend on this
+behavior; if you want a special character to be ordinary outside a list,
+it's better to always quote it, regardless.} or
+
+@item
+inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set.
+
+@end itemize
+
+@item
+It introduces an operator when followed by certain ordinary
+characters---sometimes only when certain syntax bits are set. See the
+cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR},
+@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also:
+
+@itemize @bullet
+@item
+@samp{\b} represents the match-word-boundary operator
+(@pxref{Match-word-boundary Operator}).
+
+@item
+@samp{\B} represents the match-within-word operator
+(@pxref{Match-within-word Operator}).
+
+@item
+@samp{\<} represents the match-beginning-of-word operator @*
+(@pxref{Match-beginning-of-word Operator}).
+
+@item
+@samp{\>} represents the match-end-of-word operator
+(@pxref{Match-end-of-word Operator}).
+
+@item
+@samp{\w} represents the match-word-constituent operator
+(@pxref{Match-word-constituent Operator}).
+
+@item
+@samp{\W} represents the match-non-word-constituent operator
+(@pxref{Match-non-word-constituent Operator}).
+
+@item
+@samp{\`} represents the match-beginning-of-buffer
+operator and @samp{\'} represents the match-end-of-buffer operator
+(@pxref{Buffer Operators}).
+
+@item
+If Regex was compiled with the C preprocessor symbol @code{emacs}
+defined, then @samp{\s@var{class}} represents the match-syntactic-class
+operator and @samp{\S@var{class}} represents the
+match-not-syntactic-class operator (@pxref{Syntactic Class Operators}).
+
+@end itemize
+
+@item
+In all other cases, Regex ignores @samp{\}. For example,
+@samp{\n} matches @samp{n}.
+
+@end enumerate
+
+@node Common Operators, GNU Operators, Regular Expression Syntax, Top
+@chapter Common Operators
+
+You compose regular expressions from operators. In the following
+sections, we describe the regular expression operators specified by
+@sc{posix}; @sc{gnu} also uses these. Most operators have more than one
+representation as characters. @xref{Regular Expression Syntax}, for
+what characters represent what operators under what circumstances.
+
+For most operators that can be represented in two ways, one
+representation is a single character and the other is that character
+preceded by @samp{\}. For example, either @samp{(} or @samp{\(}
+represents the open-group operator. Which one does depends on the
+setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is
+this so? Historical reasons dictate some of the varying
+representations, while @sc{posix} dictates others.
+
+Finally, almost all characters lose any special meaning inside a list
+(@pxref{List Operators}).
+
+@menu
+* Match-self Operator:: Ordinary characters.
+* Match-any-character Operator:: .
+* Concatenation Operator:: Juxtaposition.
+* Repetition Operators:: * + ? @{@}
+* Alternation Operator:: |
+* List Operators:: [...] [^...]
+* Grouping Operators:: (...)
+* Back-reference Operator:: \digit
+* Anchoring Operators:: ^ $
+@end menu
+
+@node Match-self Operator, Match-any-character Operator, , Common Operators
+@section The Match-self Operator (@var{ordinary character})
+
+This operator matches the character itself. All ordinary characters
+(@pxref{Regular Expression Syntax}) represent this operator. For
+example, @samp{f} is always an ordinary character, so the regular
+expression @samp{f} matches only the string @samp{f}. In
+particular, it does @emph{not} match the string @samp{ff}.
+
+@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators
+@section The Match-any-character Operator (@code{.})
+
+@cindex @samp{.}
+
+This operator matches any single printing or nonprinting character
+except it won't match a:
+
+@table @asis
+@item newline
+if the syntax bit @code{RE_DOT_NEWLINE} isn't set.
+
+@item null
+if the syntax bit @code{RE_DOT_NOT_NULL} is set.
+
+@end table
+
+The @samp{.} (period) character represents this operator. For example,
+@samp{a.b} matches any three-character string beginning with @samp{a}
+and ending with @samp{b}.
+
+@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators
+@section The Concatenation Operator
+
+This operator concatenates two regular expressions @var{a} and @var{b}.
+No character represents this operator; you simply put @var{b} after
+@var{a}. The result is a regular expression that will match a string if
+@var{a} matches its first part and @var{b} matches the rest. For
+example, @samp{xy} (two match-self operators) matches @samp{xy}.
+
+@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators
+@section Repetition Operators
+
+Repetition operators repeat the preceding regular expression a specified
+number of times.
+
+@menu
+* Match-zero-or-more Operator:: *
+* Match-one-or-more Operator:: +
+* Match-zero-or-one Operator:: ?
+* Interval Operators:: @{@}
+@end menu
+
+@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators
+@subsection The Match-zero-or-more Operator (@code{*})
+
+@cindex @samp{*}
+
+This operator repeats the smallest possible preceding regular expression
+as many times as necessary (including zero) to match the pattern.
+@samp{*} represents this operator. For example, @samp{o*}
+matches any string made up of zero or more @samp{o}s. Since this
+operator operates on the smallest preceding regular expression,
+@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So,
+@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on.
+
+Since the match-zero-or-more operator is a suffix operator, it may be
+useless as such when no regular expression precedes it. This is the
+case when it:
+
+@itemize @bullet
+@item
+is first in a regular expression, or
+
+@item
+follows a match-beginning-of-line, open-group, or alternation
+operator.
+
+@end itemize
+
+@noindent
+Three different things can happen in these cases:
+
+@enumerate
+@item
+If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the
+regular expression is invalid.
+
+@item
+If @code{RE_CONTEXT_INVALID_OPS} isn't set, but
+@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the
+match-zero-or-more operator (which then operates on the empty string).
+
+@item
+Otherwise, @samp{*} is ordinary.
+
+@end enumerate
+
+@cindex backtracking
+The matcher processes a match-zero-or-more operator by first matching as
+many repetitions of the smallest preceding regular expression as it can.
+Then it continues to match the rest of the pattern.
+
+If it can't match the rest of the pattern, it backtracks (as many times
+as necessary), each time discarding one of the matches until it can
+either match the entire pattern or be certain that it cannot get a
+match. For example, when matching @samp{ca*ar} against @samp{caaar},
+the matcher first matches all three @samp{a}s of the string with the
+@samp{a*} of the regular expression. However, it cannot then match the
+final @samp{ar} of the regular expression against the final @samp{r} of
+the string. So it backtracks, discarding the match of the last @samp{a}
+in the string. It can then match the remaining @samp{ar}.
+
+
+@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators
+@subsection The Match-one-or-more Operator (@code{+} or @code{\+})
+
+@cindex @samp{+}
+
+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize
+this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't
+set, then @samp{+} represents this operator; if it is, then @samp{\+}
+does.
+
+This operator is similar to the match-zero-or-more operator except that
+it repeats the preceding regular expression at least once;
+@pxref{Match-zero-or-more Operator}, for what it operates on, how some
+syntax bits affect it, and how Regex backtracks to match it.
+
+For example, supposing that @samp{+} represents the match-one-or-more
+operator; then @samp{ca+r} matches, e.g., @samp{car} and
+@samp{caaaar}, but not @samp{cr}.
+
+@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators
+@subsection The Match-zero-or-one Operator (@code{?} or @code{\?})
+@cindex @samp{?}
+
+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't
+recognize this operator. Otherwise, if the syntax bit
+@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator;
+if it is, then @samp{\?} does.
+
+This operator is similar to the match-zero-or-more operator except that
+it repeats the preceding regular expression once or not at all;
+@pxref{Match-zero-or-more Operator}, to see what it operates on, how
+some syntax bits affect it, and how Regex backtracks to match it.
+
+For example, supposing that @samp{?} represents the match-zero-or-one
+operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but
+nothing else.
+
+@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators
+@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}})
+
+@cindex interval expression
+@cindex @samp{@{}
+@cindex @samp{@}}
+@cindex @samp{\@{}
+@cindex @samp{\@}}
+
+If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes
+@dfn{interval expressions}. They repeat the smallest possible preceding
+regular expression a specified number of times.
+
+If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents
+the @dfn{open-interval operator} and @samp{@}} represents the
+@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do.
+
+Specifically, supposing that @samp{@{} and @samp{@}} represent the
+open-interval and close-interval operators; then:
+
+@table @code
+@item @{@var{count}@}
+matches exactly @var{count} occurrences of the preceding regular
+expression.
+
+@item @{@var{min,}@}
+matches @var{min} or more occurrences of the preceding regular
+expression.
+
+@item @{@var{min, max}@}
+matches at least @var{min} but no more than @var{max} occurrences of
+the preceding regular expression.
+
+@end table
+
+The interval expression (but not necessarily the regular expression that
+contains it) is invalid if:
+
+@itemize @bullet
+@item
+@var{min} is greater than @var{max}, or
+
+@item
+any of @var{count}, @var{min}, or @var{max} are outside the range
+zero to @code{RE_DUP_MAX} (which symbol @file{regex.h}
+defines).
+
+@end itemize
+
+If the interval expression is invalid and the syntax bit
+@code{RE_NO_BK_BRACES} is set, then Regex considers all the
+characters in the would-be interval to be ordinary. If that bit
+isn't set, then the regular expression is invalid.
+
+If the interval expression is valid but there is no preceding regular
+expression on which to operate, then if the syntax bit
+@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid.
+If that bit isn't set, then Regex considers all the characters---other
+than backslashes, which it ignores---in the would-be interval to be
+ordinary.
+
+
+@node Alternation Operator, List Operators, Repetition Operators, Common Operators
+@section The Alternation Operator (@code{|} or @code{\|})
+
+@kindex |
+@kindex \|
+@cindex alternation operator
+@cindex or operator
+
+If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't
+recognize this operator. Otherwise, if the syntax bit
+@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator;
+otherwise, @samp{\|} does.
+
+Alternatives match one of a choice of regular expressions:
+if you put the character(s) representing the alternation operator between
+any two regular expressions @var{a} and @var{b}, the result matches
+the union of the strings that @var{a} and @var{b} match. For
+example, supposing that @samp{|} is the alternation operator, then
+@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or
+@samp{quux}.
+
+@ignore
+@c Nobody needs to disallow empty alternatives any more.
+If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular
+expressions @var{a} or @var{b} is empty, the
+regular expression is invalid. More precisely, if this syntax bit is
+set, then the alternation operator can't:
+
+@itemize @bullet
+@item
+be first or last in a regular expression;
+
+@item
+follow either another alternation operator or an open-group operator
+(@pxref{Grouping Operators}); or
+
+@item
+precede a close-group operator.
+
+@end itemize
+
+@noindent
+For example, supposing @samp{(} and @samp{)} represent the open and
+close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar},
+@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid.
+@end ignore
+
+The alternation operator operates on the @emph{largest} possible
+surrounding regular expressions. (Put another way, it has the lowest
+precedence of any regular expression operator.)
+Thus, the only way you can
+delimit its arguments is to use grouping. For example, if @samp{(} and
+@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar}
+would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would
+match @samp{foo} or @samp{bar}.)
+
+@cindex backtracking
+The matcher usually tries all combinations of alternatives so as to
+match the longest possible string. For example, when matching
+@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot
+take, say, the first (``depth-first'') combination it could match, since
+then it would be content to match just @samp{fooqbar}.
+
+@comment xx something about leftmost-longest
+
+
+@node List Operators, Grouping Operators, Alternation Operator, Common Operators
+@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})
+
+@cindex matching list
+@cindex @samp{[}
+@cindex @samp{]}
+@cindex @samp{^}
+@cindex @samp{-}
+@cindex @samp{\}
+@cindex @samp{[^}
+@cindex nonmatching list
+@cindex matching newline
+@cindex bracket expression
+
+@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or
+more items. An @dfn{item} is a character,
+@ignore
+(These get added when they get implemented.)
+a collating symbol, an equivalence class expression,
+@end ignore
+a character class expression, or a range expression. The syntax bits
+affect which kinds of items you can put in a list. We explain the last
+two items in subsections below. Empty lists are invalid.
+
+A @dfn{matching list} matches a single character represented by one of
+the list items. You form a matching list by enclosing one or more items
+within an @dfn{open-matching-list operator} (represented by @samp{[})
+and a @dfn{close-list operator} (represented by @samp{]}).
+
+For example, @samp{[ab]} matches either @samp{a} or @samp{b}.
+@samp{[ad]*} matches the empty string and any string composed of just
+@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular
+expression with a @samp{[} but no matching
+@samp{]}.
+
+@dfn{Nonmatching lists} are similar to matching lists except that they
+match a single character @emph{not} represented by one of the list
+items. You use an @dfn{open-nonmatching-list operator} (represented by
+@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be
+the first character in the list. If you put a @samp{^} character first
+in (what you think is) a matching list, you'll turn it into a
+nonmatching list.}) instead of an open-matching-list operator to start a
+nonmatching list.
+
+For example, @samp{[^ab]} matches any character except @samp{a} or
+@samp{b}.
+
+If the @code{posix_newline} field in the pattern buffer (@pxref{GNU
+Pattern Buffers} is set, then nonmatching lists do not match a newline.
+
+Most characters lose any special meaning inside a list. The special
+characters inside a list follow.
+
+@table @samp
+@item ]
+ends the list if it's not the first list item. So, if you want to make
+the @samp{]} character a list item, you must put it first.
+
+@item \
+quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is
+set.
+
+@ignore
+Put these in if they get implemented.
+
+@item [.
+represents the open-collating-symbol operator (@pxref{Collating Symbol
+Operators}).
+
+@item .]
+represents the close-collating-symbol operator.
+
+@item [=
+represents the open-equivalence-class operator (@pxref{Equivalence Class
+Operators}).
+
+@item =]
+represents the close-equivalence-class operator.
+
+@end ignore
+
+@item [:
+represents the open-character-class operator (@pxref{Character Class
+Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what
+follows is a valid character class expression.
+
+@item :]
+represents the close-character-class operator if the syntax bit
+@code{RE_CHAR_CLASSES} is set and what precedes it is an
+open-character-class operator followed by a valid character class name.
+
+@item -
+represents the range operator (@pxref{Range Operator}) if it's
+not first or last in a list or the ending point of a range.
+
+@end table
+
+@noindent
+All other characters are ordinary. For example, @samp{[.*]} matches
+@samp{.} and @samp{*}.
+
+@menu
+* Character Class Operators:: [:class:]
+* Range Operator:: start-end
+@end menu
+
+@ignore
+(If collating symbols and equivalence class expressions get implemented,
+then add this.)
+
+node Collating Symbol Operators
+subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]})
+
+If the syntax bit @code{XX} is set, then you can represent
+collating symbols inside lists. You form a @dfn{collating symbol} by
+putting a collating element between an @dfn{open-collating-symbol
+operator} and an @dfn{close-collating-symbol operator}. @samp{[.}
+represents the open-collating-symbol operator and @samp{.]} represents
+the close-collating-symbol operator. For example, if @samp{ll} is a
+collating element, then @samp{[[.ll.]]} would match @samp{ll}.
+
+node Equivalence Class Operators
+subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]})
+@cindex equivalence class expression in regex
+@cindex @samp{[=} in regex
+@cindex @samp{=]} in regex
+
+If the syntax bit @code{XX} is set, then Regex recognizes equivalence class
+expressions inside lists. A @dfn{equivalence class expression} is a set
+of collating elements which all belong to the same equivalence class.
+You form an equivalence class expression by putting a collating
+element between an @dfn{open-equivalence-class operator} and a
+@dfn{close-equivalence-class operator}. @samp{[=} represents the
+open-equivalence-class operator and @samp{=]} represents the
+close-equivalence-class operator. For example, if @samp{a} and @samp{A}
+were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]}
+would match both @samp{a} and @samp{A}. If the collating element in an
+equivalence class expression isn't part of an equivalence class, then
+the matcher considers the equivalence class expression to be a collating
+symbol.
+
+@end ignore
+
+@node Character Class Operators, Range Operator, , List Operators
+@subsection Character Class Operators (@code{[:} @dots{} @code{:]})
+
+@cindex character classes
+@cindex @samp{[:} in regex
+@cindex @samp{:]} in regex
+
+If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex
+recognizes character class expressions inside lists. A @dfn{character
+class expression} matches one character from a given class. You form a
+character class expression by putting a character class name between an
+@dfn{open-character-class operator} (represented by @samp{[:}) and a
+@dfn{close-character-class operator} (represented by @samp{:]}). The
+character class names and their meanings are:
+
+@table @code
+
+@item alnum
+letters and digits
+
+@item alpha
+letters
+
+@item blank
+system-dependent; for @sc{gnu}, a space or tab
+
+@item cntrl
+control characters (in the @sc{ascii} encoding, code 0177 and codes
+less than 040)
+
+@item digit
+digits
+
+@item graph
+same as @code{print} except omits space
+
+@item lower
+lowercase letters
+
+@item print
+printable characters (in the @sc{ascii} encoding, space
+tilde---codes 040 through 0176)
+
+@item punct
+neither control nor alphanumeric characters
+
+@item space
+space, carriage return, newline, vertical tab, and form feed
+
+@item upper
+uppercase letters
+
+@item xdigit
+hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F}
+
+@end table
+
+@noindent
+These correspond to the definitions in the C library's @file{<ctype.h>}
+facility. For example, @samp{[:alpha:]} corresponds to the standard
+facility @code{isalpha}. Regex recognizes character class expressions
+only inside of lists; so @samp{[[:alpha:]]} matches any letter, but
+@samp{[:alpha:]} outside of a bracket expression and not followed by a
+repetition operator matches just itself.
+
+@node Range Operator, , Character Class Operators, List Operators
+@subsection The Range Operator (@code{-})
+
+Regex recognizes @dfn{range expressions} inside a list. They represent
+those characters
+that fall between two elements in the current collating sequence. You
+form a range expression by putting a @dfn{range operator} between two
+@ignore
+(If these get implemented, then substitute this for ``characters.'')
+of any of the following: characters, collating elements, collating symbols,
+and equivalence class expressions. The starting point of the range and
+the ending point of the range don't have to be the same kind of item,
+e.g., the starting point could be a collating element and the ending
+point could be an equivalence class expression. If a range's ending
+point is an equivalence class, then all the collating elements in that
+class will be in the range.
+@end ignore
+characters.@footnote{You can't use a character class for the starting
+or ending point of a range, since a character class is not a single
+character.} @samp{-} represents the range operator. For example,
+@samp{a-f} within a list represents all the characters from @samp{a}
+through @samp{f}
+inclusively.
+
+If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's
+ending point collates less than its starting point, the range (and the
+regular expression containing it) is invalid. For example, the regular
+expression @samp{[z-a]} would be invalid. If this bit isn't set, then
+Regex considers such a range to be empty.
+
+Since @samp{-} represents the range operator, if you want to make a
+@samp{-} character itself
+a list item, you must do one of the following:
+
+@itemize @bullet
+@item
+Put the @samp{-} either first or last in the list.
+
+@item
+Include a range whose starting point collates strictly lower than
+@samp{-} and whose ending point collates equal or higher. Unless a
+range is the first item in a list, a @samp{-} can't be its starting
+point, but @emph{can} be its ending point. That is because Regex
+considers @samp{-} to be the range operator unless it is preceded by
+another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)},
+@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are
+contiguous characters in the collating sequence. You might think that
+@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it
+has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so
+it matches, e.g., @samp{,}, not @samp{.}.
+
+@item
+Put a range whose starting point is @samp{-} first in the list.
+
+@end itemize
+
+For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in
+English, in @sc{ascii}).
+
+
+@node Grouping Operators, Back-reference Operator, List Operators, Common Operators
+@section Grouping Operators (@code{(} @dots{} @code{)} or @code{\(} @dots{} @code{\)})
+
+@kindex (
+@kindex )
+@kindex \(
+@kindex \)
+@cindex grouping
+@cindex subexpressions
+@cindex parenthesizing
+
+A @dfn{group}, also known as a @dfn{subexpression}, consists of an
+@dfn{open-group operator}, any number of other operators, and a
+@dfn{close-group operator}. Regex treats this sequence as a unit, just
+as mathematics and programming languages treat a parenthesized
+expression as a unit.
+
+Therefore, using @dfn{groups}, you can:
+
+@itemize @bullet
+@item
+delimit the argument(s) to an alternation operator (@pxref{Alternation
+Operator}) or a repetition operator (@pxref{Repetition
+Operators}).
+
+@item
+keep track of the indices of the substring that matched a given group.
+@xref{Using Registers}, for a precise explanation.
+This lets you:
+
+@itemize @bullet
+@item
+use the back-reference operator (@pxref{Back-reference Operator}).
+
+@item
+use registers (@pxref{Using Registers}).
+
+@end itemize
+
+@end itemize
+
+If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents
+the open-group operator and @samp{)} represents the
+close-group operator; otherwise, @samp{\(} and @samp{\)} do.
+
+If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a
+close-group operator has no matching open-group operator, then Regex
+considers it to match @samp{)}.
+
+
+@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators
+@section The Back-reference Operator (@dfn{\}@var{digit})
+
+@cindex back references
+
+If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes
+back references. A back reference matches a specified preceding group.
+The back reference operator is represented by @samp{\@var{digit}}
+anywhere after the end of a regular expression's @w{@var{digit}-th}
+group (@pxref{Grouping Operators}).
+
+@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns
+numbers 1 through 9 to the first nine groups it encounters. By using
+one of @samp{\1} through @samp{\9} after the corresponding group's
+close-group operator, you can match a substring identical to the
+one that the group does.
+
+Back references match according to the following (in all examples below,
+@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{}
+the open-interval and @samp{@}} the close-interval operator):
+
+@itemize @bullet
+@item
+If the group matches a substring, the back reference matches an
+identical substring. For example, @samp{(a)\1} matches @samp{aa} and
+@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise,
+@samp{(.*)\1} matches any (newline-free if the syntax bit
+@code{RE_DOT_NEWLINE} isn't set) string that is composed of two
+identical halves; the @samp{(.*)} matches the first half and the
+@samp{\1} matches the second half.
+
+@item
+If the group matches more than once (as it might if followed
+by, e.g., a repetition operator), then the back reference matches the
+substring the group @emph{last} matched. For example,
+@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the
+outer one) matches @samp{aab} and @w{group 2} (the inner one) matches
+@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches
+@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches
+@samp{a}.
+
+@item
+If the group doesn't participate in a match, i.e., it is part of an
+alternative not taken or a repetition operator allows zero repetitions
+of it, then the back reference makes the whole match fail. For example,
+@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three}
+and @samp{two-and-four}, but not @samp{one-and-four} or
+@samp{two-and-three}. For example, if the pattern matches
+@samp{one-and-}, then its @w{group 2} matches the empty string and its
+@w{group 3} doesn't participate in the match. So, if it then matches
+@samp{four}, then when it tries to back reference @w{group 3}---which it
+will attempt to do because @samp{\3} follows the @samp{four}---the match
+will fail because @w{group 3} didn't participate in the match.
+
+@end itemize
+
+You can use a back reference as an argument to a repetition operator. For
+example, @samp{(a(b))\2*} matches @samp{a} followed by two or more
+@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}.
+
+If there is no preceding @w{@var{digit}-th} subexpression, the regular
+expression is invalid.
+
+
+@node Anchoring Operators, , Back-reference Operator, Common Operators
+@section Anchoring Operators
+
+@cindex anchoring
+@cindex regexp anchoring
+
+These operators can constrain a pattern to match only at the beginning or
+end of the entire string or at the beginning or end of a line.
+
+@menu
+* Match-beginning-of-line Operator:: ^
+* Match-end-of-line Operator:: $
+@end menu
+
+
+@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators
+@subsection The Match-beginning-of-line Operator (@code{^})
+
+@kindex ^
+@cindex beginning-of-line operator
+@cindex anchors
+
+This operator can match the empty string either at the beginning of the
+string or after a newline character. Thus, it is said to @dfn{anchor}
+the pattern to the beginning of a line.
+
+In the cases following, @samp{^} represents this operator. (Otherwise,
+@samp{^} is ordinary.)
+
+@itemize @bullet
+
+@item
+It (the @samp{^}) is first in the pattern, as in @samp{^foo}.
+
+@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})}
+@item
+The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside
+a bracket expression.
+
+@cindex open-group operator and @samp{^}
+@cindex alternation operator and @samp{^}
+@item
+It follows an open-group or alternation operator, as in @samp{a\(^b\)}
+and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation
+Operator}.
+
+@end itemize
+
+These rules imply that some valid patterns containing @samp{^} cannot be
+matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS}
+is set.
+
+@vindex not_bol @r{field in pattern buffer}
+If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU
+Pattern Buffers}), then @samp{^} fails to match at the beginning of the
+string. @xref{POSIX Matching}, for when you might find this useful.
+
+@vindex newline_anchor @r{field in pattern buffer}
+If the @code{newline_anchor} field is set in the pattern buffer, then
+@samp{^} fails to match after a newline. This is useful when you do not
+regard the string to be matched as broken into lines.
+
+
+@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators
+@subsection The Match-end-of-line Operator (@code{$})
+
+@kindex $
+@cindex end-of-line operator
+@cindex anchors
+
+This operator can match the empty string either at the end of
+the string or before a newline character in the string. Thus, it is
+said to @dfn{anchor} the pattern to the end of a line.
+
+It is always represented by @samp{$}. For example, @samp{foo$} usually
+matches, e.g., @samp{foo} and, e.g., the first three characters of
+@samp{foo\nbar}.
+
+Its interaction with the syntax bits and pattern buffer fields is
+exactly the dual of @samp{^}'s; see the previous section. (That is,
+``beginning'' becomes ``end'', ``next'' becomes ``previous'', and
+``after'' becomes ``before''.)
+
+
+@node GNU Operators, GNU Emacs Operators, Common Operators, Top
+@chapter GNU Operators
+
+Following are operators that @sc{gnu} defines (and @sc{posix} doesn't).
+
+@menu
+* Word Operators::
+* Buffer Operators::
+@end menu
+
+@node Word Operators, Buffer Operators, , GNU Operators
+@section Word Operators
+
+The operators in this section require Regex to recognize parts of words.
+Regex uses a syntax table to determine whether or not a character is
+part of a word, i.e., whether or not it is @dfn{word-constituent}.
+
+@menu
+* Non-Emacs Syntax Tables::
+* Match-word-boundary Operator:: \b
+* Match-within-word Operator:: \B
+* Match-beginning-of-word Operator:: \<
+* Match-end-of-word Operator:: \>
+* Match-word-constituent Operator:: \w
+* Match-non-word-constituent Operator:: \W
+@end menu
+
+@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators
+@subsection Non-Emacs Syntax Tables
+
+A @dfn{syntax table} is an array indexed by the characters in your
+character set. In the @sc{ascii} encoding, therefore, a syntax table
+has 256 elements. Regex always uses a @code{char *} variable
+@code{re_syntax_table} as its syntax table. In some cases, it
+initializes this variable and in others it expects you to initialize it.
+
+@itemize @bullet
+@item
+If Regex is compiled with the preprocessor symbols @code{emacs} and
+@code{SYNTAX_TABLE} both undefined, then Regex allocates
+@code{re_syntax_table} and initializes an element @var{i} either to
+@code{Sword} (which it defines) if @var{i} is a letter, number, or
+@samp{_}, or to zero if it's not.
+
+@item
+If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE}
+defined, then Regex expects you to define a @code{char *} variable
+@code{re_syntax_table} to be a valid syntax table.
+
+@item
+@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with
+the preprocessor symbol @code{emacs} defined.
+
+@end itemize
+
+@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators
+@subsection The Match-word-boundary Operator (@code{\b})
+
+@cindex @samp{\b}
+@cindex word boundaries, matching
+
+This operator (represented by @samp{\b}) matches the empty string at
+either the beginning or the end of a word. For example, @samp{\brat\b}
+matches the separate word @samp{rat}.
+
+@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators
+@subsection The Match-within-word Operator (@code{\B})
+
+@cindex @samp{\B}
+
+This operator (represented by @samp{\B}) matches the empty string within
+a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but
+@samp{dirty \Brat} doesn't match @samp{dirty rat}.
+
+@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators
+@subsection The Match-beginning-of-word Operator (@code{\<})
+
+@cindex @samp{\<}
+
+This operator (represented by @samp{\<}) matches the empty string at the
+beginning of a word.
+
+@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators
+@subsection The Match-end-of-word Operator (@code{\>})
+
+@cindex @samp{\>}
+
+This operator (represented by @samp{\>}) matches the empty string at the
+end of a word.
+
+@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators
+@subsection The Match-word-constituent Operator (@code{\w})
+
+@cindex @samp{\w}
+
+This operator (represented by @samp{\w}) matches any word-constituent
+character.
+
+@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators
+@subsection The Match-non-word-constituent Operator (@code{\W})
+
+@cindex @samp{\W}
+
+This operator (represented by @samp{\W}) matches any character that is
+not word-constituent.
+
+
+@node Buffer Operators, , Word Operators, GNU Operators
+@section Buffer Operators
+
+Following are operators which work on buffers. In Emacs, a @dfn{buffer}
+is, naturally, an Emacs buffer. For other programs, Regex considers the
+entire string to be matched as the buffer.
+
+@menu
+* Match-beginning-of-buffer Operator:: \`
+* Match-end-of-buffer Operator:: \'
+@end menu
+
+
+@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators
+@subsection The Match-beginning-of-buffer Operator (@code{\`})
+
+@cindex @samp{\`}
+
+This operator (represented by @samp{\`}) matches the empty string at the
+beginning of the buffer.
+
+@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators
+@subsection The Match-end-of-buffer Operator (@code{\'})
+
+@cindex @samp{\'}
+
+This operator (represented by @samp{\'}) matches the empty string at the
+end of the buffer.
+
+
+@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top
+@chapter GNU Emacs Operators
+
+Following are operators that @sc{gnu} defines (and @sc{posix} doesn't)
+that you can use only when Regex is compiled with the preprocessor
+symbol @code{emacs} defined.
+
+@menu
+* Syntactic Class Operators::
+@end menu
+
+
+@node Syntactic Class Operators, , , GNU Emacs Operators
+@section Syntactic Class Operators
+
+The operators in this section require Regex to recognize the syntactic
+classes of characters. Regex uses a syntax table to determine this.
+
+@menu
+* Emacs Syntax Tables::
+* Match-syntactic-class Operator:: \sCLASS
+* Match-not-syntactic-class Operator:: \SCLASS
+@end menu
+
+@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators
+@subsection Emacs Syntax Tables
+
+A @dfn{syntax table} is an array indexed by the characters in your
+character set. In the @sc{ascii} encoding, therefore, a syntax table
+has 256 elements.
+
+If Regex is compiled with the preprocessor symbol @code{emacs} defined,
+then Regex expects you to define and initialize the variable
+@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax
+tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax
+Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual},
+for a description of Emacs' syntax tables.
+
+@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators
+@subsection The Match-syntactic-class Operator (@code{\s}@var{class})
+
+@cindex @samp{\s}
+
+This operator matches any character whose syntactic class is represented
+by a specified character. @samp{\s@var{class}} represents this operator
+where @var{class} is the character representing the syntactic class you
+want. For example, @samp{w} represents the syntactic
+class of word-constituent characters, so @samp{\sw} matches any
+word-constituent character.
+
+@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators
+@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class})
+
+@cindex @samp{\S}
+
+This operator is similar to the match-syntactic-class operator except
+that it matches any character whose syntactic class is @emph{not}
+represented by the specified character. @samp{\S@var{class}} represents
+this operator. For example, @samp{w} represents the syntactic class of
+word-constituent characters, so @samp{\Sw} matches any character that is
+not word-constituent.
+
+
+@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top
+@chapter What Gets Matched?
+
+Regex usually matches strings according to the ``leftmost longest''
+rule; that is, it chooses the longest of the leftmost matches. This
+does not mean that for a regular expression containing subexpressions
+that it simply chooses the longest match for each subexpression, left to
+right; the overall match must also be the longest possible one.
+
+For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not
+@samp{acdac}, as it would if it were to choose the longest match for the
+first subexpression.
+
+
+@node Programming with Regex, Copying, What Gets Matched?, Top
+@chapter Programming with Regex
+
+Here we describe how you use the Regex data structures and functions in
+C programs. Regex has three interfaces: one designed for @sc{gnu}, one
+compatible with @sc{posix} and one compatible with Berkeley @sc{unix}.
+
+@menu
+* GNU Regex Functions::
+* POSIX Regex Functions::
+* BSD Regex Functions::
+@end menu
+
+
+@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex
+@section GNU Regex Functions
+
+If you're writing code that doesn't need to be compatible with either
+@sc{posix} or Berkeley @sc{unix}, you can use these functions. They
+provide more options than the other interfaces.
+
+@menu
+* GNU Pattern Buffers:: The re_pattern_buffer type.
+* GNU Regular Expression Compiling:: re_compile_pattern ()
+* GNU Matching:: re_match ()
+* GNU Searching:: re_search ()
+* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
+* Searching with Fastmaps:: re_compile_fastmap ()
+* GNU Translate Tables:: The `translate' field.
+* Using Registers:: The re_registers type and related fns.
+* Freeing GNU Pattern Buffers:: regfree ()
+@end menu
+
+
+@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions
+@subsection GNU Pattern Buffers
+
+@cindex pattern buffer, definition of
+@tindex re_pattern_buffer @r{definition}
+@tindex struct re_pattern_buffer @r{definition}
+
+To compile, match, or search for a given regular expression, you must
+supply a pattern buffer. A @dfn{pattern buffer} holds one compiled
+regular expression.@footnote{Regular expressions are also referred to as
+``patterns,'' hence the name ``pattern buffer.''}
+
+You can have several different pattern buffers simultaneously, each
+holding a compiled pattern for a different regular expression.
+
+@file{regex.h} defines the pattern buffer @code{struct} as follows:
+
+@example
+[[[ pattern_buffer ]]]
+@end example
+
+
+@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions
+@subsection GNU Regular Expression Compiling
+
+In @sc{gnu}, you can both match and search for a given regular
+expression. To do either, you must first compile it in a pattern buffer
+(@pxref{GNU Pattern Buffers}).
+
+@cindex syntax initialization
+@vindex re_syntax_options @r{initialization}
+Regular expressions match according to the syntax with which they were
+compiled; with @sc{gnu}, you indicate what syntax you want by setting
+the variable @code{re_syntax_options} (declared in @file{regex.h} and
+defined in @file{regex.c}) before calling the compiling function,
+@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and
+@ref{Predefined Syntaxes}.
+
+You can change the value of @code{re_syntax_options} at any time.
+Usually, however, you set its value once and then never change it.
+
+@cindex pattern buffer initialization
+@code{re_compile_pattern} takes a pattern buffer as an argument. You
+must initialize the following fields:
+
+@table @code
+
+@item translate @r{initialization}
+
+@item translate
+@vindex translate @r{initialization}
+Initialize this to point to a translate table if you want one, or to
+zero if you don't. We explain translate tables in @ref{GNU Translate
+Tables}.
+
+@item fastmap
+@vindex fastmap @r{initialization}
+Initialize this to nonzero if you want a fastmap, or to zero if you
+don't.
+
+@item buffer
+@itemx allocated
+@vindex buffer @r{initialization}
+@vindex allocated @r{initialization}
+@findex malloc
+If you want @code{re_compile_pattern} to allocate memory for the
+compiled pattern, set both of these to zero. If you have an existing
+block of memory (allocated with @code{malloc}) you want Regex to use,
+set @code{buffer} to its address and @code{allocated} to its size (in
+bytes).
+
+@code{re_compile_pattern} uses @code{realloc} to extend the space for
+the compiled pattern as necessary.
+
+@end table
+
+To compile a pattern buffer, use:
+
+@findex re_compile_pattern
+@example
+char *
+re_compile_pattern (const char *@var{regex}, const int @var{regex_size},
+ struct re_pattern_buffer *@var{pattern_buffer})
+@end example
+
+@noindent
+@var{regex} is the regular expression's address, @var{regex_size} is its
+length, and @var{pattern_buffer} is the pattern buffer's address.
+
+If @code{re_compile_pattern} successfully compiles the regular
+expression, it returns zero and sets @code{*@var{pattern_buffer}} to the
+compiled pattern. It sets the pattern buffer's fields as follows:
+
+@table @code
+@item buffer
+@vindex buffer @r{field, set by @code{re_compile_pattern}}
+to the compiled pattern.
+
+@item used
+@vindex used @r{field, set by @code{re_compile_pattern}}
+to the number of bytes the compiled pattern in @code{buffer} occupies.
+
+@item syntax
+@vindex syntax @r{field, set by @code{re_compile_pattern}}
+to the current value of @code{re_syntax_options}.
+
+@item re_nsub
+@vindex re_nsub @r{field, set by @code{re_compile_pattern}}
+to the number of subexpressions in @var{regex}.
+
+@item fastmap_accurate
+@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}}
+to zero on the theory that the pattern you're compiling is different
+than the one previously compiled into @code{buffer}; in that case (since
+you can't make a fastmap without a compiled pattern),
+@code{fastmap} would either contain an incompatible fastmap, or nothing
+at all.
+
+@c xx what else?
+@end table
+
+If @code{re_compile_pattern} can't compile @var{regex}, it returns an
+error string corresponding to one of the errors listed in @ref{POSIX
+Regular Expression Compiling}.
+
+
+@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions
+@subsection GNU Matching
+
+@cindex matching with GNU functions
+
+Matching the @sc{gnu} way means trying to match as much of a string as
+possible starting at a position within it you specify. Once you've compiled
+a pattern into a pattern buffer (@pxref{GNU Regular Expression
+Compiling}), you can ask the matcher to match that pattern against a
+string using:
+
+@findex re_match
+@example
+int
+re_match (struct re_pattern_buffer *@var{pattern_buffer},
+ const char *@var{string}, const int @var{size},
+ const int @var{start}, struct re_registers *@var{regs})
+@end example
+
+@noindent
+@var{pattern_buffer} is the address of a pattern buffer containing a
+compiled pattern. @var{string} is the string you want to match; it can
+contain newline and null characters. @var{size} is the length of that
+string. @var{start} is the string index at which you want to
+begin matching; the first character of @var{string} is at index zero.
+@xref{Using Registers}, for a explanation of @var{regs}; you can safely
+pass zero.
+
+@code{re_match} matches the regular expression in @var{pattern_buffer}
+against the string @var{string} according to the syntax in
+@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular
+Expression Compiling}, for how to set it.) The function returns
+@math{-1} if the compiled pattern does not match any part of
+@var{string} and @math{-2} if an internal error happens; otherwise, it
+returns how many (possibly zero) characters of @var{string} the pattern
+matched.
+
+An example: suppose @var{pattern_buffer} points to a pattern buffer
+containing the compiled pattern for @samp{a*}, and @var{string} points
+to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start}
+is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the
+last three @samp{a}s in @var{string}. If @var{start} is 0,
+@code{re_match} returns 5, i.e., @samp{a*} would have matched all the
+@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns
+zero.
+
+If @var{start} is not between zero and @var{size}, then
+@code{re_match} returns @math{-1}.
+
+
+@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions
+@subsection GNU Searching
+
+@cindex searching with GNU functions
+
+@dfn{Searching} means trying to match starting at successive positions
+within a string. The function @code{re_search} does this.
+
+Before calling @code{re_search}, you must compile your regular
+expression. @xref{GNU Regular Expression Compiling}.
+
+Here is the function declaration:
+
+@findex re_search
+@example
+int
+re_search (struct re_pattern_buffer *@var{pattern_buffer},
+ const char *@var{string}, const int @var{size},
+ const int @var{start}, const int @var{range},
+ struct re_registers *@var{regs})
+@end example
+
+@noindent
+@vindex start @r{argument to @code{re_search}}
+@vindex range @r{argument to @code{re_search}}
+whose arguments are the same as those to @code{re_match} (@pxref{GNU
+Matching}) except that the two arguments @var{start} and @var{range}
+replace @code{re_match}'s argument @var{start}.
+
+If @var{range} is positive, then @code{re_search} attempts a match
+starting first at index @var{start}, then at @math{@var{start} + 1} if
+that fails, and so on, up to @math{@var{start} + @var{range}}; if
+@var{range} is negative, then it attempts a match starting first at
+index @var{start}, then at @math{@var{start} -1} if that fails, and so
+on.
+
+If @var{start} is not between zero and @var{size}, then @code{re_search}
+returns @math{-1}. When @var{range} is positive, @code{re_search}
+adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is
+between zero and @var{size}, if necessary; that way it won't search
+outside of @var{string}. Similarly, when @var{range} is negative,
+@code{re_search} adjusts @var{range} so that @math{@var{start} +
+@var{range} + 1} is between zero and @var{size}, if necessary.
+
+If the @code{fastmap} field of @var{pattern_buffer} is zero,
+@code{re_search} matches starting at consecutive positions; otherwise,
+it uses @code{fastmap} to make the search more efficient.
+@xref{Searching with Fastmaps}.
+
+If no match is found, @code{re_search} returns @math{-1}. If
+a match is found, it returns the index where the match began. If an
+internal error happens, it returns @math{-2}.
+
+
+@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions
+@subsection Matching and Searching with Split Data
+
+Using the functions @code{re_match_2} and @code{re_search_2}, you can
+match or search in data that is divided into two strings.
+
+The function:
+
+@findex re_match_2
+@example
+int
+re_match_2 (struct re_pattern_buffer *@var{buffer},
+ const char *@var{string1}, const int @var{size1},
+ const char *@var{string2}, const int @var{size2},
+ const int @var{start},
+ struct re_registers *@var{regs},
+ const int @var{stop})
+@end example
+
+@noindent
+is similar to @code{re_match} (@pxref{GNU Matching}) except that you
+pass @emph{two} data strings and sizes, and an index @var{stop} beyond
+which you don't want the matcher to try matching. As with
+@code{re_match}, if it succeeds, @code{re_match_2} returns how many
+characters of @var{string} it matched. Regard @var{string1} and
+@var{string2} as concatenated when you set the arguments @var{start} and
+@var{stop} and use the contents of @var{regs}; @code{re_match_2} never
+returns a value larger than @math{@var{size1} + @var{size2}}.
+
+The function:
+
+@findex re_search_2
+@example
+int
+re_search_2 (struct re_pattern_buffer *@var{buffer},
+ const char *@var{string1}, const int @var{size1},
+ const char *@var{string2}, const int @var{size2},
+ const int @var{start}, const int @var{range},
+ struct re_registers *@var{regs},
+ const int @var{stop})
+@end example
+
+@noindent
+is similarly related to @code{re_search}.
+
+
+@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions
+@subsection Searching with Fastmaps
+
+@cindex fastmaps
+If you're searching through a long string, you should use a fastmap.
+Without one, the searcher tries to match at consecutive positions in the
+string. Generally, most of the characters in the string could not start
+a match. It takes much longer to try matching at a given position in the
+string than it does to check in a table whether or not the character at
+that position could start a match. A @dfn{fastmap} is such a table.
+
+More specifically, a fastmap is an array indexed by the characters in
+your character set. Under the @sc{ascii} encoding, therefore, a fastmap
+has 256 elements. If you want the searcher to use a fastmap with a
+given pattern buffer, you must allocate the array and assign the array's
+address to the pattern buffer's @code{fastmap} field. You either can
+compile the fastmap yourself or have @code{re_search} do it for you;
+when @code{fastmap} is nonzero, it automatically compiles a fastmap the
+first time you search using a particular compiled pattern.
+
+To compile a fastmap yourself, use:
+
+@findex re_compile_fastmap
+@example
+int
+re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer})
+@end example
+
+@noindent
+@var{pattern_buffer} is the address of a pattern buffer. If the
+character @var{c} could start a match for the pattern,
+@code{re_compile_fastmap} makes
+@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns
+@math{0} if it can compile a fastmap and @math{-2} if there is an
+internal error. For example, if @samp{|} is the alternation operator
+and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then
+@code{re_compile_fastmap} sets @code{fastmap['a']} and
+@code{fastmap['b']} (and no others).
+
+@code{re_search} uses a fastmap as it moves along in the string: it
+checks the string's characters until it finds one that's in the fastmap.
+Then it tries matching at that character. If the match fails, it
+repeats the process. So, by using a fastmap, @code{re_search} doesn't
+waste time trying to match at positions in the string that couldn't
+start a match.
+
+If you don't want @code{re_search} to use a fastmap,
+store zero in the @code{fastmap} field of the pattern buffer before
+calling @code{re_search}.
+
+Once you've initialized a pattern buffer's @code{fastmap} field, you
+need never do so again---even if you compile a new pattern in
+it---provided the way the field is set still reflects whether or not you
+want a fastmap. @code{re_search} will still either do nothing if
+@code{fastmap} is null or, if it isn't, compile a new fastmap for the
+new pattern.
+
+@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions
+@subsection GNU Translate Tables
+
+If you set the @code{translate} field of a pattern buffer to a translate
+table, then the @sc{gnu} Regex functions to which you've passed that
+pattern buffer use it to apply a simple transformation
+to all the regular expression and string characters at which they look.
+
+A @dfn{translate table} is an array indexed by the characters in your
+character set. Under the @sc{ascii} encoding, therefore, a translate
+table has 256 elements. The array's elements are also characters in
+your character set. When the Regex functions see a character @var{c},
+they use @code{translate[@var{c}]} in its place, with one exception: the
+character after a @samp{\} is not translated. (This ensures that, the
+operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.)
+
+For example, a table that maps all lowercase letters to the
+corresponding uppercase ones would cause the matcher to ignore
+differences in case.@footnote{A table that maps all uppercase letters to
+the corresponding lowercase ones would work just as well for this
+purpose.} Such a table would map all characters except lowercase letters
+to themselves, and lowercase letters to the corresponding uppercase
+ones. Under the @sc{ascii} encoding, here's how you could initialize
+such a table (we'll call it @code{case_fold}):
+
+@example
+for (i = 0; i < 256; i++)
+ case_fold[i] = i;
+for (i = 'a'; i <= 'z'; i++)
+ case_fold[i] = i - ('a' - 'A');
+@end example
+
+You tell Regex to use a translate table on a given pattern buffer by
+assigning that table's address to the @code{translate} field of that
+buffer. If you don't want Regex to do any translation, put zero into
+this field. You'll get weird results if you change the table's contents
+anytime between compiling the pattern buffer, compiling its fastmap, and
+matching or searching with the pattern buffer.
+
+@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions
+@subsection Using Registers
+
+A group in a regular expression can match a (posssibly empty) substring
+of the string that regular expression as a whole matched. The matcher
+remembers the beginning and end of the substring matched by
+each group.
+
+To find out what they matched, pass a nonzero @var{regs} argument to a
+@sc{gnu} matching or searching function (@pxref{GNU Matching} and
+@ref{GNU Searching}), i.e., the address of a structure of this type, as
+defined in @file{regex.h}:
+
+@c We don't bother to include this directly from regex.h,
+@c since it changes so rarely.
+@example
+@tindex re_registers
+@vindex num_regs @r{in @code{struct re_registers}}
+@vindex start @r{in @code{struct re_registers}}
+@vindex end @r{in @code{struct re_registers}}
+struct re_registers
+@{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+@};
+@end example
+
+Except for (possibly) the @var{num_regs}'th element (see below), the
+@var{i}th element of the @code{start} and @code{end} arrays records
+information about the @var{i}th group in the pattern. (They're declared
+as C pointers, but this is only because not all C compilers accept
+zero-length arrays; conceptually, it is simplest to think of them as
+arrays.)
+
+The @code{start} and @code{end} arrays are allocated in various ways,
+depending on the value of the @code{regs_allocated}
+@vindex regs_allocated
+field in the pattern buffer passed to the matcher.
+
+The simplest and perhaps most useful is to let the matcher (re)allocate
+enough space to record information for all the groups in the regular
+expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED},
+@vindex REGS_UNALLOCATED
+the matcher allocates @math{1 + @var{re_nsub}} (another field in the
+pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set
+to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}.
+@vindex REGS_REALLOCATE
+Then on subsequent calls with the same pattern buffer and @var{regs}
+arguments, the matcher reallocates more space if necessary.
+
+It would perhaps be more logical to make the @code{regs_allocated} field
+part of the @code{re_registers} structure, instead of part of the
+pattern buffer. But in that case the caller would be forced to
+initialize the structure before passing it. Much existing code doesn't
+do this initialization, and it's arguably better to avoid it anyway.
+
+@code{re_compile_pattern} sets @code{regs_allocated} to
+@code{REGS_UNALLOCATED},
+so if you use the GNU regular expression
+functions, you get this behavior by default.
+
+xx document re_set_registers
+
+@sc{posix}, on the other hand, requires a different interface: the
+caller is supposed to pass in a fixed-length array which the matcher
+fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED}
+@vindex REGS_FIXED
+the matcher simply fills that array.
+
+The following examples illustrate the information recorded in the
+@code{re_registers} structure. (In all of them, @samp{(} represents the
+open-group and @samp{)} the close-group operator. The first character
+in the string @var{string} is at index 0.)
+
+@c xx i'm not sure this is all true anymore.
+
+@itemize @bullet
+
+@item
+If the regular expression has an @w{@var{i}-th}
+group not contained within another group that matches a
+substring of @var{string}, then the function sets
+@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where
+the substring matched by the @w{@var{i}-th} group begins, and
+@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that
+substring's end. The function sets @code{@w{@var{regs}->}start[0]} and
+@code{@w{@var{regs}->}end[0]} to analogous information about the entire
+pattern.
+
+For example, when you match @samp{((a)(b))} against @samp{ab}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}
+
+@item
+0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}
+
+@item
+0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}
+
+@item
+1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]}
+@end itemize
+
+@item
+If a group matches more than once (as it might if followed by,
+e.g., a repetition operator), then the function reports the information
+about what the group @emph{last} matched.
+
+For example, when you match the pattern @samp{(a)*} against the string
+@samp{aa}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}
+
+@item
+1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}
+@end itemize
+
+@item
+If the @w{@var{i}-th} group does not participate in a
+successful match, e.g., it is an alternative not taken or a
+repetition operator allows zero repetitions of it, then the function
+sets @code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}.
+
+For example, when you match the pattern @samp{(a)*b} against
+the string @samp{b}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
+
+@item
+@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}
+@end itemize
+
+@item
+If the @w{@var{i}-th} group matches a zero-length string, then the
+function sets @code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that
+zero-length string.
+
+For example, when you match the pattern @samp{(a*)b} against the string
+@samp{b}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
+
+@item
+0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}
+@end itemize
+
+@ignore
+The function sets @code{@w{@var{regs}->}start[0]} and
+@code{@w{@var{regs}->}end[0]} to analogous information about the entire
+pattern.
+
+For example, when you match the pattern @samp{(a*)} against the empty
+string, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]}
+
+@item
+0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}
+@end itemize
+@end ignore
+
+@item
+If an @w{@var{i}-th} group contains a @w{@var{j}-th} group
+in turn not contained within any other group within group @var{i} and
+the function reports a match of the @w{@var{i}-th} group, then it
+records in @code{@w{@var{regs}->}start[@var{j}]} and
+@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of
+the @w{@var{j}-th} group.
+
+For example, when you match the pattern @samp{((a*)b)*} against the
+string @samp{abb}, @w{group 2} last matches the empty string, so you
+get what it previously matched:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}
+
+@item
+2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}
+
+@item
+2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]}
+@end itemize
+
+When you match the pattern @samp{((a)*b)*} against the string
+@samp{abb}, @w{group 2} doesn't participate in the last match, so you
+get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}
+
+@item
+2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}
+
+@item
+0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}
+@end itemize
+
+@item
+If an @w{@var{i}-th} group contains a @w{@var{j}-th} group
+in turn not contained within any other group within group @var{i}
+and the function sets
+@code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets
+@code{@w{@var{regs}->}start[@var{j}]} and
+@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}.
+
+For example, when you match the pattern @samp{((a)*b)*c} against the
+string @samp{c}, you get:
+
+@itemize
+@item
+0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
+
+@item
+@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}
+
+@item
+@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]}
+@end itemize
+
+@end itemize
+
+@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions
+@subsection Freeing GNU Pattern Buffers
+
+To free any allocated fields of a pattern buffer, you can use the
+@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers},
+since the type @code{regex_t}---the type for @sc{posix} pattern
+buffers---is equivalent to the type @code{re_pattern_buffer}. After
+freeing a pattern buffer, you need to again compile a regular expression
+in it (@pxref{GNU Regular Expression Compiling}) before passing it to
+a matching or searching function.
+
+
+@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex
+@section POSIX Regex Functions
+
+If you're writing code that has to be @sc{posix} compatible, you'll need
+to use these functions. Their interfaces are as specified by @sc{posix},
+draft 1003.2/D11.2.
+
+@menu
+* POSIX Pattern Buffers:: The regex_t type.
+* POSIX Regular Expression Compiling:: regcomp ()
+* POSIX Matching:: regexec ()
+* Reporting Errors:: regerror ()
+* Using Byte Offsets:: The regmatch_t type.
+* Freeing POSIX Pattern Buffers:: regfree ()
+@end menu
+
+
+@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions
+@subsection POSIX Pattern Buffers
+
+To compile or match a given regular expression the @sc{posix} way, you
+must supply a pattern buffer exactly the way you do for @sc{gnu}
+(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type
+@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer
+type @code{re_pattern_buffer}.
+
+
+@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions
+@subsection POSIX Regular Expression Compiling
+
+With @sc{posix}, you can only search for a given regular expression; you
+can't match it. To do this, you must first compile it in a
+pattern buffer, using @code{regcomp}.
+
+@ignore
+Before calling @code{regcomp}, you must initialize this pattern buffer
+as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See
+below, however, for how to choose a syntax with which to compile.
+@end ignore
+
+To compile a pattern buffer, use:
+
+@findex regcomp
+@example
+int
+regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags})
+@end example
+
+@noindent
+@var{preg} is the initialized pattern buffer's address, @var{regex} is
+the regular expression's address, and @var{cflags} is the compilation
+flags, which Regex considers as a collection of bits. Here are the
+valid bits, as defined in @file{regex.h}:
+
+@table @code
+
+@item REG_EXTENDED
+@vindex REG_EXTENDED
+says to use @sc{posix} Extended Regular Expression syntax; if this isn't
+set, then says to use @sc{posix} Basic Regular Expression syntax.
+@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly.
+
+@item REG_ICASE
+@vindex REG_ICASE
+@cindex ignoring case
+says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate}
+field to a translate table which ignores case, replacing anything you've
+put there before.
+
+@item REG_NOSUB
+@vindex REG_NOSUB
+says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching},
+for what this means.
+
+@item REG_NEWLINE
+@vindex REG_NEWLINE
+says that a:
+
+@itemize @bullet
+
+@item
+match-any-character operator (@pxref{Match-any-character
+Operator}) doesn't match a newline.
+
+@item
+nonmatching list not containing a newline (@pxref{List
+Operators}) matches a newline.
+
+@item
+match-beginning-of-line operator (@pxref{Match-beginning-of-line
+Operator}) matches the empty string immediately after a newline,
+regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for
+an explanation of @code{REG_NOTBOL}).
+
+@item
+match-end-of-line operator (@pxref{Match-beginning-of-line
+Operator}) matches the empty string immediately before a newline,
+regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching},
+for an explanation of @code{REG_NOTEOL}).
+
+@end itemize
+
+@end table
+
+If @code{regcomp} successfully compiles the regular expression, it
+returns zero and sets @code{*@var{pattern_buffer}} to the compiled
+pattern. Except for @code{syntax} (which it sets as explained above), it
+also sets the same fields the same way as does the @sc{gnu} compiling
+function (@pxref{GNU Regular Expression Compiling}).
+
+If @code{regcomp} can't compile the regular expression, it returns one
+of the error codes listed here. (Except when noted differently, the
+syntax of in all examples below is basic regular expression syntax.)
+
+@table @code
+
+@comment repetitions
+@item REG_BADRPT
+For example, the consecutive repetition operators @samp{**} in
+@samp{a**} are invalid. As another example, if the syntax is extended
+regular expression syntax, then the repetition operator @samp{*} with
+nothing on which to operate in @samp{*} is invalid.
+
+@item REG_BADBR
+For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid.
+
+@item REG_EBRACE
+For example, @samp{a\@{1} is missing a close-interval operator.
+
+@comment lists
+@item REG_EBRACK
+For example, @samp{[a} is missing a close-list operator.
+
+@item REG_ERANGE
+For example, the range ending point @samp{z} that collates lower than
+does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the
+range with the character class @samp{[:alpha:]} as its starting point in
+@samp{[[:alpha:]-|]}.
+
+@item REG_ECTYPE
+For example, the character class name @samp{foo} in @samp{[[:foo:]} is
+invalid.
+
+@comment groups
+@item REG_EPAREN
+For example, @samp{a\)} is missing an open-group operator and @samp{\(a}
+is missing a close-group operator.
+
+@item REG_ESUBREG
+For example, the back reference @samp{\2} that refers to a nonexistent
+subexpression in @samp{\(a\)\2} is invalid.
+
+@comment unfinished business
+
+@item REG_EEND
+Returned when a regular expression causes no other more specific error.
+
+@item REG_EESCAPE
+For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the
+one in @samp{\}.
+
+@comment kitchen sink
+@item REG_BADPAT
+For example, in the extended regular expression syntax, the empty group
+@samp{()} in @samp{a()b} is invalid.
+
+@comment internal
+@item REG_ESIZE
+Returned when a regular expression needs a pattern buffer larger than
+65536 bytes.
+
+@item REG_ESPACE
+Returned when a regular expression makes Regex to run out of memory.
+
+@end table
+
+
+@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions
+@subsection POSIX Matching
+
+Matching the @sc{posix} way means trying to match a null-terminated
+string starting at its first character. Once you've compiled a pattern
+into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you
+can ask the matcher to match that pattern against a string using:
+
+@findex regexec
+@example
+int
+regexec (const regex_t *@var{preg}, const char *@var{string},
+ size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags})
+@end example
+
+@noindent
+@var{preg} is the address of a pattern buffer for a compiled pattern.
+@var{string} is the string you want to match.
+
+@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you
+pass zero for @var{nmatch} or you compiled @var{preg} with the
+compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore
+@var{pmatch}; otherwise, you must allocate it to have at least
+@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte
+offsets in @var{pmatch}, and set to @math{-1} any unused elements up to
+@math{@var{pmatch}@code{[@var{nmatch}]} - 1}.
+
+@var{eflags} specifies @dfn{execution flags}---namely, the two bits
+@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If
+you set @code{REG_NOTBOL}, then the match-beginning-of-line operator
+(@pxref{Match-beginning-of-line Operator}) always fails to match.
+This lets you match against pieces of a line, as you would need to if,
+say, searching for repeated instances of a given pattern in a line; it
+would work correctly for patterns both with and without
+match-beginning-of-line operators. @code{REG_NOTEOL} works analogously
+for the match-end-of-line operator (@pxref{Match-end-of-line
+Operator}); it exists for symmetry.
+
+@code{regexec} tries to find a match for @var{preg} in @var{string}
+according to the syntax in @var{preg}'s @code{syntax} field.
+(@xref{POSIX Regular Expression Compiling}, for how to set it.) The
+function returns zero if the compiled pattern matches @var{string} and
+@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't.
+
+@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions
+@subsection Reporting Errors
+
+If either @code{regcomp} or @code{regexec} fail, they return a nonzero
+error code, the possibilities for which are defined in @file{regex.h}.
+@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for
+what these codes mean. To get an error string corresponding to these
+codes, you can use:
+
+@findex regerror
+@example
+size_t
+regerror (int @var{errcode},
+ const regex_t *@var{preg},
+ char *@var{errbuf},
+ size_t @var{errbuf_size})
+@end example
+
+@noindent
+@var{errcode} is an error code, @var{preg} is the address of the pattern
+buffer which provoked the error, @var{errbuf} is the error buffer, and
+@var{errbuf_size} is @var{errbuf}'s size.
+
+@code{regerror} returns the size in bytes of the error string
+corresponding to @var{errcode} (including its terminating null). If
+@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in
+@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the
+error string, followed by a null.
+@var{errbuf_size} must be a nonnegative number less than or equal to the
+size in bytes of @var{errbuf}.
+
+You can call @code{regerror} with a null @var{errbuf} and a zero
+@var{errbuf_size} to determine how large @var{errbuf} need be to
+accommodate @code{regerror}'s error string.
+
+@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions
+@subsection Using Byte Offsets
+
+In @sc{posix}, variables of type @code{regmatch_t} hold analogous
+information, but are not identical to, @sc{gnu}'s registers (@pxref{Using
+Registers}). To get information about registers in @sc{posix}, pass to
+@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e.,
+the address of a structure of this type, defined in
+@file{regex.h}:
+
+@tindex regmatch_t
+@example
+typedef struct
+@{
+ regoff_t rm_so;
+ regoff_t rm_eo;
+@} regmatch_t;
+@end example
+
+When reading in @ref{Using Registers}, about how the matching function
+stores the information into the registers, substitute @var{pmatch} for
+@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for
+@code{@w{@var{regs}->}start[@var{i}]} and
+@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for
+@code{@w{@var{regs}->}end[@var{i}]}.
+
+@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions
+@subsection Freeing POSIX Pattern Buffers
+
+To free any allocated fields of a pattern buffer, use:
+
+@findex regfree
+@example
+void
+regfree (regex_t *@var{preg})
+@end example
+
+@noindent
+@var{preg} is the pattern buffer whose allocated fields you want freed.
+@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used}
+fields to zero. After freeing a pattern buffer, you need to again
+compile a regular expression in it (@pxref{POSIX Regular Expression
+Compiling}) before passing it to the matching function (@pxref{POSIX
+Matching}).
+
+
+@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex
+@section BSD Regex Functions
+
+If you're writing code that has to be Berkeley @sc{unix} compatible,
+you'll need to use these functions whose interfaces are the same as those
+in Berkeley @sc{unix}.
+
+@menu
+* BSD Regular Expression Compiling:: re_comp ()
+* BSD Searching:: re_exec ()
+@end menu
+
+@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions
+@subsection BSD Regular Expression Compiling
+
+With Berkeley @sc{unix}, you can only search for a given regular
+expression; you can't match one. To search for it, you must first
+compile it. Before you compile it, you must indicate the regular
+expression syntax you want it compiled according to by setting the
+variable @code{re_syntax_options} (declared in @file{regex.h} to some
+syntax (@pxref{Regular Expression Syntax}).
+
+To compile a regular expression use:
+
+@findex re_comp
+@example
+char *
+re_comp (char *@var{regex})
+@end example
+
+@noindent
+@var{regex} is the address of a null-terminated regular expression.
+@code{re_comp} uses an internal pattern buffer, so you can use only the
+most recently compiled pattern buffer. This means that if you want to
+use a given regular expression that you've already compiled---but it
+isn't the latest one you've compiled---you'll have to recompile it. If
+you call @code{re_comp} with the null string (@emph{not} the empty
+string) as the argument, it doesn't change the contents of the pattern
+buffer.
+
+If @code{re_comp} successfully compiles the regular expression, it
+returns zero. If it can't compile the regular expression, it returns
+an error string. @code{re_comp}'s error messages are identical to those
+of @code{re_compile_pattern} (@pxref{GNU Regular Expression
+Compiling}).
+
+@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions
+@subsection BSD Searching
+
+Searching the Berkeley @sc{unix} way means searching in a string
+starting at its first character and trying successive positions within
+it to find a match. Once you've compiled a pattern using @code{re_comp}
+(@pxref{BSD Regular Expression Compiling}), you can ask Regex
+to search for that pattern in a string using:
+
+@findex re_exec
+@example
+int
+re_exec (char *@var{string})
+@end example
+
+@noindent
+@var{string} is the address of the null-terminated string in which you
+want to search.
+
+@code{re_exec} returns either 1 for success or 0 for failure. It
+automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}).
+
+
+@node Copying, Index, Programming with Regex, Top
+@appendix GNU GENERAL PUBLIC LICENSE
+@center Version 2, June 1991
+
+@display
+Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
+675 Mass Ave, Cambridge, MA 02139, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@unnumberedsec Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+@end ifinfo
+
+@enumerate
+@item
+This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The ``Program'', below,
+refers to any such program or work, and a ``work based on the Program''
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term ``modification''.) Each licensee is addressed as ``you''.
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+@item
+You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+@item
+You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+@enumerate a
+@item
+You must cause the modified files to carry prominent notices
+stating that you changed the files and the date of any change.
+
+@item
+You must cause any work that you distribute or publish, that in
+whole or in part contains or is derived from the Program or any
+part thereof, to be licensed as a whole at no charge to all third
+parties under the terms of this License.
+
+@item
+If the modified program normally reads commands interactively
+when run, you must cause it, when started running for such
+interactive use in the most ordinary way, to print or display an
+announcement including an appropriate copyright notice and a
+notice that there is no warranty (or else, saying that you provide
+a warranty) and that users may redistribute the program under
+these conditions, and telling the user how to view a copy of this
+License. (Exception: if the Program itself is interactive but
+does not normally print such an announcement, your work based on
+the Program is not required to print an announcement.)
+@end enumerate
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+@item
+You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+@enumerate a
+@item
+Accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of Sections
+1 and 2 above on a medium customarily used for software interchange; or,
+
+@item
+Accompany it with a written offer, valid for at least three
+years, to give any third party, for a charge no more than your
+cost of physically performing source distribution, a complete
+machine-readable copy of the corresponding source code, to be
+distributed under the terms of Sections 1 and 2 above on a medium
+customarily used for software interchange; or,
+
+@item
+Accompany it with the information you received as to the offer
+to distribute corresponding source code. (This alternative is
+allowed only for noncommercial distribution and only if you
+received the program in object code or executable form with such
+an offer, in accord with Subsection b above.)
+@end enumerate
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+@item
+You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+@item
+You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+@item
+If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+@item
+If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@unnumberedsec Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the ``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and a brief idea of what it does.}
+Copyright (C) 19@var{yy} @var{name of author}
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type `show c' for details.
+@end smallexample
+
+The hypothetical commands @samp{show w} and @samp{show c} should show
+the appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than @samp{show w} and
+@samp{show c}; they could even be mouse-clicks or menu items---whatever
+suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary. Here is a sample; alter the names:
+
+@example
+Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+`Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end example
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
+
+
+@node Index, , Copying, Top
+@unnumbered Index
+
+@printindex cp
+
+@contents
+
+@bye
diff --git a/gnu/lib/libregex/regex.c b/gnu/lib/libregex/regex.c
new file mode 100644
index 0000000..8169880
--- /dev/null
+++ b/gnu/lib/libregex/regex.c
@@ -0,0 +1,4948 @@
+/* Extended regular expression matching and search library,
+ version 0.12.
+ (Implements POSIX draft P10003.2/D11.2, except for
+ internationalization features.)
+
+ Copyright (C) 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+/* AIX requires this to be the first thing in the file. */
+#if defined (_AIX) && !defined (REGEX_MALLOC)
+ #pragma alloca
+#endif
+
+#define _GNU_SOURCE
+
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+/* The `emacs' switch turns on certain matching commands
+ that make sense only in Emacs. */
+#ifdef emacs
+
+#include "lisp.h"
+#include "buffer.h"
+#include "syntax.h"
+
+/* Emacs uses `NULL' as a predicate. */
+#undef NULL
+
+#else /* not emacs */
+
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ `BSTRING', as far as I know, and neither of them use this code. */
+#if HAVE_STRING_H || STDC_HEADERS
+#include <string.h>
+#ifndef bcmp
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
+#endif
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
+#endif
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+
+/* Define the syntax stuff for \<, \>, etc. */
+
+/* This must be nonzero for the wordchar and notwordchar pattern
+ commands in re_match_2. */
+#ifndef Sword
+#define Sword 1
+#endif
+
+#ifdef SYNTAX_TABLE
+
+extern char *re_syntax_table;
+
+#else /* not SYNTAX_TABLE */
+
+/* How many characters in the character set. */
+#define CHAR_SET_SIZE 256
+
+static char re_syntax_table[CHAR_SET_SIZE];
+
+static void
+init_syntax_once ()
+{
+ register int c;
+ static int done = 0;
+
+ if (done)
+ return;
+
+ bzero (re_syntax_table, sizeof re_syntax_table);
+
+ for (c = 'a'; c <= 'z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = 'A'; c <= 'Z'; c++)
+ re_syntax_table[c] = Sword;
+
+ for (c = '0'; c <= '9'; c++)
+ re_syntax_table[c] = Sword;
+
+ re_syntax_table['_'] = Sword;
+
+ done = 1;
+}
+
+#endif /* not SYNTAX_TABLE */
+
+#define SYNTAX(c) re_syntax_table[c]
+
+#endif /* not emacs */
+
+/* Get the interface, including the syntax bits. */
+#include "regex.h"
+
+/* isalpha etc. are used for the character classes. */
+#include <ctype.h>
+
+#ifndef isascii
+#define isascii(c) 1
+#endif
+
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
+#endif
+
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
+/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
+ use `alloca' instead of `malloc'. This is because using malloc in
+ re_search* or re_match* could cause memory leaks when C-g is used in
+ Emacs; also, malloc is slower and causes storage fragmentation. On
+ the other hand, malloc is more portable, and easier to debug.
+
+ Because we sometimes use alloca, some routines have to be macros,
+ not functions -- `alloca'-allocated space disappears at the end of the
+ function it is called in. */
+
+#ifdef REGEX_MALLOC
+
+#define REGEX_ALLOCATE malloc
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
+
+#else /* not REGEX_MALLOC */
+
+/* Emacs already defines alloca, sometimes. */
+#ifndef alloca
+
+/* Make alloca work the best possible way. */
+#ifdef __GNUC__
+#define alloca __builtin_alloca
+#else /* not __GNUC__ */
+#if HAVE_ALLOCA_H
+#include <alloca.h>
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
+char *alloca ();
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
+
+#endif /* not alloca */
+
+#define REGEX_ALLOCATE alloca
+
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
+ destination)
+
+#endif /* not REGEX_MALLOC */
+
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
+/* (Re)Allocate N items of type T using malloc, or fail. */
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
+#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
+
+#define BYTEWIDTH 8 /* In bits. */
+
+#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
+
+/* These are the command codes that appear in compiled regular
+ expressions. Some opcodes are followed by argument bytes. A
+ command code can specify any interpretation whatsoever for its
+ arguments. Zero bytes may appear in the compiled regular expression.
+
+ The value of `exactn' is needed in search.c (search_buffer) in Emacs.
+ So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
+ `exactn' we use here must also be 1. */
+
+typedef enum
+{
+ no_op = 0,
+
+ /* Followed by one byte giving n, then by n literal bytes. */
+ exactn = 1,
+
+ /* Matches any (more or less) character. */
+ anychar,
+
+ /* Matches any one char belonging to specified set. First
+ following byte is number of bitmap bytes. Then come bytes
+ for a bitmap saying which chars are in. Bits in each byte
+ are ordered low-bit-first. A character is in the set if its
+ bit is 1. A character too large to have a bit in the map is
+ automatically not in the set. */
+ charset,
+
+ /* Same parameters as charset, but match any character that is
+ not one of those specified. */
+ charset_not,
+
+ /* Start remembering the text that is matched, for storing in a
+ register. Followed by one byte with the register number, in
+ the range 0 to one less than the pattern buffer's re_nsub
+ field. Then followed by one byte with the number of groups
+ inner to this one. (This last has to be part of the
+ start_memory only because we need it in the on_failure_jump
+ of re_match_2.) */
+ start_memory,
+
+ /* Stop remembering the text that is matched and store it in a
+ memory register. Followed by one byte with the register
+ number, in the range 0 to one less than `re_nsub' in the
+ pattern buffer, and one byte with the number of inner groups,
+ just like `start_memory'. (We need the number of inner
+ groups here because we don't have any easy way of finding the
+ corresponding start_memory when we're at a stop_memory.) */
+ stop_memory,
+
+ /* Match a duplicate of something remembered. Followed by one
+ byte containing the register number. */
+ duplicate,
+
+ /* Fail unless at beginning of line. */
+ begline,
+
+ /* Fail unless at end of line. */
+ endline,
+
+ /* Succeeds if at beginning of buffer (if emacs) or at beginning
+ of string to be matched (if not). */
+ begbuf,
+
+ /* Analogously, for end of buffer/string. */
+ endbuf,
+
+ /* Followed by two byte relative address to which to jump. */
+ jump,
+
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
+
+ /* Followed by two-byte relative address of place to resume at
+ in case of failure. */
+ on_failure_jump,
+
+ /* Like on_failure_jump, but pushes a placeholder instead of the
+ current string position when executed. */
+ on_failure_keep_string_jump,
+
+ /* Throw away latest failure point and then jump to following
+ two-byte relative address. */
+ pop_failure_jump,
+
+ /* Change to pop_failure_jump if know won't have to backtrack to
+ match; otherwise change to jump. This is used to jump
+ back to the beginning of a repeat. If what follows this jump
+ clearly won't match what the repeat does, such that we can be
+ sure that there is no use backtracking out of repetitions
+ already matched, then we change it to a pop_failure_jump.
+ Followed by two-byte address. */
+ maybe_pop_jump,
+
+ /* Jump to following two-byte address, and push a dummy failure
+ point. This failure point will be thrown away if an attempt
+ is made to use it for a failure. A `+' construct makes this
+ before the first repeat. Also used as an intermediary kind
+ of jump when compiling an alternative. */
+ dummy_failure_jump,
+
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
+ succeed_n,
+
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
+
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
+ set_number_at,
+
+ wordchar, /* Matches any word-constituent character. */
+ notwordchar, /* Matches any char that is not a word-constituent. */
+
+ wordbeg, /* Succeeds if at word beginning. */
+ wordend, /* Succeeds if at word end. */
+
+ wordbound, /* Succeeds if at a word boundary. */
+ notwordbound /* Succeeds if not at a word boundary. */
+
+#ifdef emacs
+ ,before_dot, /* Succeeds if before point. */
+ at_dot, /* Succeeds if at point. */
+ after_dot, /* Succeeds if after point. */
+
+ /* Matches any character whose syntax is specified. Followed by
+ a byte which contains a syntax code, e.g., Sword. */
+ syntaxspec,
+
+ /* Matches any character whose syntax is not that specified. */
+ notsyntaxspec
+#endif /* emacs */
+} re_opcode_t;
+
+/* Common operations on the compiled pattern. */
+
+/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
+
+#define STORE_NUMBER(destination, number) \
+ do { \
+ (destination)[0] = (number) & 0377; \
+ (destination)[1] = (number) >> 8; \
+ } while (0)
+
+/* Same as STORE_NUMBER, except increment DESTINATION to
+ the byte after where the number is stored. Therefore, DESTINATION
+ must be an lvalue. */
+
+#define STORE_NUMBER_AND_INCR(destination, number) \
+ do { \
+ STORE_NUMBER (destination, number); \
+ (destination) += 2; \
+ } while (0)
+
+/* Put into DESTINATION a number stored in two contiguous bytes starting
+ at SOURCE. */
+
+#define EXTRACT_NUMBER(destination, source) \
+ do { \
+ (destination) = *(source) & 0377; \
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number (dest, source)
+ int *dest;
+ unsigned char *source;
+{
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
+}
+
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
+ SOURCE must be an lvalue. */
+
+#define EXTRACT_NUMBER_AND_INCR(destination, source) \
+ do { \
+ EXTRACT_NUMBER (destination, source); \
+ (source) += 2; \
+ } while (0)
+
+#ifdef DEBUG
+static void
+extract_number_and_incr (destination, source)
+ int *destination;
+ unsigned char **source;
+{
+ extract_number (destination, *source);
+ *source += 2;
+}
+
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
+
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
+#ifdef DEBUG
+
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
+extern void printchar ();
+
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ printchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ printchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
+ the START pointer into it and ending just before the pointer END. */
+
+void
+print_partial_compiled_pattern (start, end)
+ unsigned char *start;
+ unsigned char *end;
+{
+ int mcnt, mcnt2;
+ unsigned char *p = start;
+ unsigned char *pend = end;
+
+ if (start == NULL)
+ {
+ printf ("(null)\n");
+ return;
+ }
+
+ /* Loop over pattern commands. */
+ while (p < pend)
+ {
+ switch ((re_opcode_t) *p++)
+ {
+ case no_op:
+ printf ("/no_op");
+ break;
+
+ case exactn:
+ mcnt = *p++;
+ printf ("/exactn/%d", mcnt);
+ do
+ {
+ putchar ('/');
+ printchar (*p++);
+ }
+ while (--mcnt);
+ break;
+
+ case start_memory:
+ mcnt = *p++;
+ printf ("/start_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case stop_memory:
+ mcnt = *p++;
+ printf ("/stop_memory/%d/%d", mcnt, *p++);
+ break;
+
+ case duplicate:
+ printf ("/duplicate/%d", *p++);
+ break;
+
+ case anychar:
+ printf ("/anychar");
+ break;
+
+ case charset:
+ case charset_not:
+ {
+ register int c;
+
+ printf ("/charset%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+ assert (p + *p < pend);
+
+ for (c = 0; c < *p; c++)
+ {
+ unsigned bit;
+ unsigned char map_byte = p[1 + c];
+
+ putchar ('/');
+
+ for (bit = 0; bit < BYTEWIDTH; bit++)
+ if (map_byte & (1 << bit))
+ printchar (c * BYTEWIDTH + bit);
+ }
+ p += 1 + *p;
+ break;
+ }
+
+ case begline:
+ printf ("/begline");
+ break;
+
+ case endline:
+ printf ("/endline");
+ break;
+
+ case on_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_jump/0/%d", mcnt);
+ break;
+
+ case on_failure_keep_string_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/on_failure_keep_string_jump/0/%d", mcnt);
+ break;
+
+ case dummy_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/dummy_failure_jump/0/%d", mcnt);
+ break;
+
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
+ case maybe_pop_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/maybe_pop_jump/0/%d", mcnt);
+ break;
+
+ case pop_failure_jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/pop_failure_jump/0/%d", mcnt);
+ break;
+
+ case jump_past_alt:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump_past_alt/0/%d", mcnt);
+ break;
+
+ case jump:
+ extract_number_and_incr (&mcnt, &p);
+ printf ("/jump/0/%d", mcnt);
+ break;
+
+ case succeed_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case jump_n:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case set_number_at:
+ extract_number_and_incr (&mcnt, &p);
+ extract_number_and_incr (&mcnt2, &p);
+ printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
+ break;
+
+ case wordbound:
+ printf ("/wordbound");
+ break;
+
+ case notwordbound:
+ printf ("/notwordbound");
+ break;
+
+ case wordbeg:
+ printf ("/wordbeg");
+ break;
+
+ case wordend:
+ printf ("/wordend");
+
+#ifdef emacs
+ case before_dot:
+ printf ("/before_dot");
+ break;
+
+ case at_dot:
+ printf ("/at_dot");
+ break;
+
+ case after_dot:
+ printf ("/after_dot");
+ break;
+
+ case syntaxspec:
+ printf ("/syntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+
+ case notsyntaxspec:
+ printf ("/notsyntaxspec");
+ mcnt = *p++;
+ printf ("/%d", mcnt);
+ break;
+#endif /* emacs */
+
+ case wordchar:
+ printf ("/wordchar");
+ break;
+
+ case notwordchar:
+ printf ("/notwordchar");
+ break;
+
+ case begbuf:
+ printf ("/begbuf");
+ break;
+
+ case endbuf:
+ printf ("/endbuf");
+ break;
+
+ default:
+ printf ("?%d", *(p-1));
+ }
+ }
+ printf ("/\n");
+}
+
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %d\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
+}
+
+
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
+ int size1;
+ int size2;
+{
+ unsigned this_char;
+
+ if (where == NULL)
+ printf ("(null)");
+ else
+ {
+ if (FIRST_STRING_P (where))
+ {
+ for (this_char = where - string1; this_char < size1; this_char++)
+ printchar (string1[this_char]);
+
+ where = string2;
+ }
+
+ for (this_char = where - string2; this_char < size2; this_char++)
+ printchar (string2[this_char]);
+ }
+}
+
+#else /* not DEBUG */
+
+#undef assert
+#define assert(e)
+
+#define DEBUG_STATEMENT(e)
+#define DEBUG_PRINT1(x)
+#define DEBUG_PRINT2(x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
+
+#endif /* not DEBUG */
+
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
+
+
+/* Specify the precise syntax of regexps for compilation. This provides
+ for compatibility for various utilities which historically have
+ different, incompatible syntaxes.
+
+ The argument SYNTAX is a bit mask comprised of the various bits
+ defined in regex.h. We return the old syntax. */
+
+reg_syntax_t
+re_set_syntax (syntax)
+ reg_syntax_t syntax;
+{
+ reg_syntax_t ret = re_syntax_options;
+
+ re_syntax_options = syntax;
+ return ret;
+}
+
+/* This table gives an error message for each of the error codes listed
+ in regex.h. Obviously the order here has to be same as there. */
+
+static const char *re_error_msg[] =
+ { NULL, /* REG_NOERROR */
+ "No match", /* REG_NOMATCH */
+ "Invalid regular expression", /* REG_BADPAT */
+ "Invalid collation character", /* REG_ECOLLATE */
+ "Invalid character class name", /* REG_ECTYPE */
+ "Trailing backslash", /* REG_EESCAPE */
+ "Invalid back reference", /* REG_ESUBREG */
+ "Unmatched [ or [^", /* REG_EBRACK */
+ "Unmatched ( or \\(", /* REG_EPAREN */
+ "Unmatched \\{", /* REG_EBRACE */
+ "Invalid content of \\{\\}", /* REG_BADBR */
+ "Invalid range end", /* REG_ERANGE */
+ "Memory exhausted", /* REG_ESPACE */
+ "Invalid preceding regular expression", /* REG_BADRPT */
+ "Premature end of regular expression", /* REG_EEND */
+ "Regular expression too big", /* REG_ESIZE */
+ "Unmatched ) or \\)", /* REG_ERPAREN */
+ };
+
+/* Subroutine declarations and macros for regex_compile. */
+
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
+
+/* Fetch the next character in the uncompiled pattern---translating it
+ if necessary. Also cast from a signed character in the constant
+ string passed to us by the user to an unsigned char that we can use
+ as an array index (in, e.g., `translate'). */
+#define PATFETCH(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ if (translate) c = translate[c]; \
+ } while (0)
+
+/* Fetch the next character in the uncompiled pattern, with no
+ translation. */
+#define PATFETCH_RAW(c) \
+ do {if (p == pend) return REG_EEND; \
+ c = (unsigned char) *p++; \
+ } while (0)
+
+/* Go backwards one character in the pattern. */
+#define PATUNFETCH p--
+
+
+/* If `translate' is non-null, return translate[D], else just D. We
+ cast the subscript to translate because some data is declared as
+ `char *', to avoid warnings when a string constant is passed. But
+ when we use a character as a subscript we must make it unsigned. */
+#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
+
+
+/* Macros for outputting the compiled pattern into `buffer'. */
+
+/* If the buffer isn't allocated when it comes in, use this. */
+#define INIT_BUF_SIZE 32
+
+/* Make sure we have at least N more bytes of space in buffer. */
+#define GET_BUFFER_SPACE(n) \
+ while (b - bufp->buffer + (n) > bufp->allocated) \
+ EXTEND_BUFFER ()
+
+/* Make sure we have one more byte of buffer space and then add C to it. */
+#define BUF_PUSH(c) \
+ do { \
+ GET_BUFFER_SPACE (1); \
+ *b++ = (unsigned char) (c); \
+ } while (0)
+
+
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
+ do { \
+ GET_BUFFER_SPACE (2); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ } while (0)
+
+
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
+ do { \
+ GET_BUFFER_SPACE (3); \
+ *b++ = (unsigned char) (c1); \
+ *b++ = (unsigned char) (c2); \
+ *b++ = (unsigned char) (c3); \
+ } while (0)
+
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
+#define MAX_BUF_SIZE (1L << 16)
+
+
+/* Extend the buffer by twice its current size via realloc and
+ reset the pointers that pointed into the old block to point to the
+ correct places in the new one. If extending the buffer results in it
+ being larger than MAX_BUF_SIZE, then flag memory exhausted. */
+#define EXTEND_BUFFER() \
+ do { \
+ unsigned char *old_buffer = bufp->buffer; \
+ if (bufp->allocated == MAX_BUF_SIZE) \
+ return REG_ESIZE; \
+ bufp->allocated <<= 1; \
+ if (bufp->allocated > MAX_BUF_SIZE) \
+ bufp->allocated = MAX_BUF_SIZE; \
+ bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
+ if (bufp->buffer == NULL) \
+ return REG_ESPACE; \
+ /* If the buffer moved, move all the pointers into it. */ \
+ if (old_buffer != bufp->buffer) \
+ { \
+ b = (b - old_buffer) + bufp->buffer; \
+ begalt = (begalt - old_buffer) + bufp->buffer; \
+ if (fixup_alt_jump) \
+ fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
+ if (laststart) \
+ laststart = (laststart - old_buffer) + bufp->buffer; \
+ if (pending_exact) \
+ pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
+ } \
+ } while (0)
+
+
+/* Since we have one byte reserved for the register number argument to
+ {start,stop}_memory, the maximum number of groups we can report
+ things about is what fits in that byte. */
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
+
+
+/* Macros for the compile stack. */
+
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+typedef int pattern_offset_t;
+
+typedef struct
+{
+ pattern_offset_t begalt_offset;
+ pattern_offset_t fixup_alt_jump;
+ pattern_offset_t inner_group_offset;
+ pattern_offset_t laststart_offset;
+ regnum_t regnum;
+} compile_stack_elt_t;
+
+
+typedef struct
+{
+ compile_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} compile_stack_type;
+
+
+#define INIT_COMPILE_STACK_SIZE 32
+
+#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
+#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
+
+/* The next available element. */
+#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
+
+
+/* Set the bit for character C in a list. */
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
+
+
+/* Get the next unsigned number in the uncompiled pattern. */
+#define GET_UNSIGNED_NUMBER(num) \
+ { if (p != pend) \
+ { \
+ PATFETCH (c); \
+ while (ISDIGIT (c)) \
+ { \
+ if (num < 0) \
+ num = 0; \
+ num = num * 10 + c - '0'; \
+ if (p == pend) \
+ break; \
+ PATFETCH (c); \
+ } \
+ } \
+ }
+
+#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
+
+#define IS_CHAR_CLASS(string) \
+ (STREQ (string, "alpha") || STREQ (string, "upper") \
+ || STREQ (string, "lower") || STREQ (string, "digit") \
+ || STREQ (string, "alnum") || STREQ (string, "xdigit") \
+ || STREQ (string, "space") || STREQ (string, "print") \
+ || STREQ (string, "punct") || STREQ (string, "graph") \
+ || STREQ (string, "cntrl") || STREQ (string, "blank"))
+
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate'
+ fields are set in BUFP on entry.
+
+ If it succeeds, results are put in BUFP (if it returns an error, the
+ contents of BUFP are undefined):
+ `buffer' is the compiled pattern;
+ `syntax' is set to SYNTAX;
+ `used' is set to the length of the compiled pattern;
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
+
+ The `fastmap' and `newline_anchor' fields are neither
+ examined nor set. */
+
+static reg_errcode_t
+regex_compile (pattern, size, syntax, bufp)
+ const char *pattern;
+ int size;
+ reg_syntax_t syntax;
+ struct re_pattern_buffer *bufp;
+{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
+ register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
+ const char *p1;
+
+ /* Points to the end of the buffer, where we should append. */
+ register unsigned char *b;
+
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
+ /* Points to the current (ending) position in the pattern. */
+ const char *p = pattern;
+ const char *pend = pattern + size;
+
+ /* How to translate the characters in the pattern. */
+ char *translate = bufp->translate;
+
+ /* Address of the count-byte of the most recently inserted `exactn'
+ command. This makes it possible to tell if a new exact-match
+ character can be added to that command or if the character requires
+ a new `exactn' command. */
+ unsigned char *pending_exact = 0;
+
+ /* Address of start of the most recently finished expression.
+ This tells, e.g., postfix * where to find the start of its
+ operand. Reset at the beginning of groups and alternatives. */
+ unsigned char *laststart = 0;
+
+ /* Address of beginning of regexp, or inside of last group. */
+ unsigned char *begalt;
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
+ /* Address of the place where a forward jump should go to the end of
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
+ unsigned char *fixup_alt_jump = 0;
+
+ /* Counts open-groups as they are encountered. Remembered for the
+ matching close-group on the compile stack, so the same register
+ number is put in the stop_memory as the start_memory. */
+ regnum_t regnum = 0;
+
+#ifdef DEBUG
+ DEBUG_PRINT1 ("\nCompiling pattern: ");
+ if (debug)
+ {
+ unsigned debug_count;
+
+ for (debug_count = 0; debug_count < size; debug_count++)
+ printchar (pattern[debug_count]);
+ putchar ('\n');
+ }
+#endif /* DEBUG */
+
+ /* Initialize the compile stack. */
+ compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
+ if (compile_stack.stack == NULL)
+ return REG_ESPACE;
+
+ compile_stack.size = INIT_COMPILE_STACK_SIZE;
+ compile_stack.avail = 0;
+
+ /* Initialize the pattern buffer. */
+ bufp->syntax = syntax;
+ bufp->fastmap_accurate = 0;
+ bufp->not_bol = bufp->not_eol = 0;
+
+ /* Set `used' to zero, so that if we return an error, the pattern
+ printer (for debugging) will think there's no pattern. We reset it
+ at the end. */
+ bufp->used = 0;
+
+ /* Always count groups, whether or not bufp->no_sub is set. */
+ bufp->re_nsub = 0;
+
+#if !defined (emacs) && !defined (SYNTAX_TABLE)
+ /* Initialize the syntax table. */
+ init_syntax_once ();
+#endif
+
+ if (bufp->allocated == 0)
+ {
+ if (bufp->buffer)
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
+ RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
+ }
+ else
+ { /* Caller did not allocate a buffer. Do it for them. */
+ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
+ }
+ if (!bufp->buffer) return REG_ESPACE;
+
+ bufp->allocated = INIT_BUF_SIZE;
+ }
+
+ begalt = b = bufp->buffer;
+
+ /* Loop through the uncompiled pattern until we're at the end. */
+ while (p != pend)
+ {
+ PATFETCH (c);
+
+ switch (c)
+ {
+ case '^':
+ {
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '$':
+ {
+ if ( /* If at end of pattern, it's an operator. */
+ p == pend
+ /* If context independent, it's an operator. */
+ || syntax & RE_CONTEXT_INDEP_ANCHORS
+ /* Otherwise, depends on what's next. */
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
+ else
+ goto normal_char;
+ }
+ break;
+
+
+ case '+':
+ case '?':
+ if ((syntax & RE_BK_PLUS_QM)
+ || (syntax & RE_LIMITED_OPS))
+ goto normal_char;
+ handle_plus:
+ case '*':
+ /* If there is no previous pattern... */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (!(syntax & RE_CONTEXT_INDEP_OPS))
+ goto normal_char;
+ }
+
+ {
+ /* Are we optimizing this jump? */
+ boolean keep_string_p = false;
+
+ /* 1 means zero (many) matches is allowed. */
+ char zero_times_ok = 0, many_times_ok = 0;
+
+ /* If there is a sequence of repetition chars, collapse it
+ down to just one (the right one). We can't combine
+ interval operators with these because of, e.g., `a{2}*',
+ which should only match an even number of `a's. */
+
+ for (;;)
+ {
+ zero_times_ok |= c != '+';
+ many_times_ok |= c != '?';
+
+ if (p == pend)
+ break;
+
+ PATFETCH (c);
+
+ if (c == '*'
+ || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
+ ;
+
+ else if (syntax & RE_BK_PLUS_QM && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ if (!(c1 == '+' || c1 == '?'))
+ {
+ PATUNFETCH;
+ PATUNFETCH;
+ break;
+ }
+
+ c = c1;
+ }
+ else
+ {
+ PATUNFETCH;
+ break;
+ }
+
+ /* If we get here, we found another repeat character. */
+ }
+
+ /* Star, etc. applied to an empty pattern is equivalent
+ to an empty pattern. */
+ if (!laststart)
+ break;
+
+ /* Now we know whether or not zero matches is allowed
+ and also whether or not two or more matches is allowed. */
+ if (many_times_ok)
+ { /* More than one repetition is allowed, so put in at the
+ end a backward relative jump from `b' to before the next
+ jump we're going to put in below (which jumps from
+ laststart to after this jump).
+
+ But if we are at the `*' in the exact sequence `.*\n',
+ insert an unconditional jump backwards to the .,
+ instead of the beginning of the loop. This way we only
+ push a failure point once, instead of every time
+ through the loop. */
+ assert (p - 1 > pattern);
+
+ /* Allocate the space for the jump. */
+ GET_BUFFER_SPACE (3);
+
+ /* We know we are not at the first character of the pattern,
+ because laststart was nonzero. And we've already
+ incremented `p', by the way, to be the character after
+ the `*'. Do we have to do something analogous here
+ for null bytes, because of RE_DOT_NOT_NULL? */
+ if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
+ && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
+ && !(syntax & RE_DOT_NEWLINE))
+ { /* We have .*\n. */
+ STORE_JUMP (jump, b, laststart);
+ keep_string_p = true;
+ }
+ else
+ /* Anything else. */
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
+
+ /* We've added more stuff to the buffer. */
+ b += 3;
+ }
+
+ /* On failure, jump from laststart to b + 3, which will be the
+ end of the buffer after this jump is inserted. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
+ : on_failure_jump,
+ laststart, b + 3);
+ pending_exact = 0;
+ b += 3;
+
+ if (!zero_times_ok)
+ {
+ /* At least one repetition is required, so insert a
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
+ b += 3;
+ }
+ }
+ break;
+
+
+ case '.':
+ laststart = b;
+ BUF_PUSH (anychar);
+ break;
+
+
+ case '[':
+ {
+ boolean had_char_class = false;
+
+ if (p == pend) return REG_EBRACK;
+
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
+
+ laststart = b;
+
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
+ if (*p == '^')
+ p++;
+
+ /* Remember the first position in the bracket expression. */
+ p1 = p;
+
+ /* Push the number of bytes in the bitmap. */
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* Clear the whole map. */
+ bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
+
+ /* charset_not matches newline according to a syntax bit. */
+ if ((re_opcode_t) b[-2] == charset_not
+ && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
+ SET_LIST_BIT ('\n');
+
+ /* Read in characters and ranges, setting map bits. */
+ for (;;)
+ {
+ if (p == pend) return REG_EBRACK;
+
+ PATFETCH (c);
+
+ /* \ might escape characters inside [...] and [^...]. */
+ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
+ {
+ if (p == pend) return REG_EESCAPE;
+
+ PATFETCH (c1);
+ SET_LIST_BIT (c1);
+ continue;
+ }
+
+ /* Could be the end of the bracket expression. If it's
+ not (i.e., when the bracket expression is `[]' so
+ far), the ']' character bit gets set way below. */
+ if (c == ']' && p != p1 + 1)
+ break;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character class. */
+ if (had_char_class && c == '-' && *p != ']')
+ return REG_ERANGE;
+
+ /* Look ahead to see if it's a range when the last thing
+ was a character: if this is a hyphen not at the
+ beginning or the end of a list, then it's the range
+ operator. */
+ if (c == '-'
+ && !(p - 2 >= pattern && p[-2] == '[')
+ && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
+ && *p != ']')
+ {
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ else if (p[0] == '-' && p[1] != ']')
+ { /* This handles ranges made up of characters only. */
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
+ }
+
+ /* See if we're at the beginning of a possible character
+ class. */
+
+ else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
+ { /* Leave room for the null. */
+ char str[CHAR_CLASS_MAX_LENGTH + 1];
+
+ PATFETCH (c);
+ c1 = 0;
+
+ /* If pattern is `[[:'. */
+ if (p == pend) return REG_EBRACK;
+
+ for (;;)
+ {
+ PATFETCH (c);
+ if (c == ':' || c == ']' || p == pend
+ || c1 == CHAR_CLASS_MAX_LENGTH)
+ break;
+ str[c1++] = c;
+ }
+ str[c1] = '\0';
+
+ /* If isn't a word bracketed by `[:' and:`]':
+ undo the ending character, the letters, and leave
+ the leading `:' and `[' (but set bits for them). */
+ if (c == ':' && *p == ']')
+ {
+ int ch;
+ boolean is_alnum = STREQ (str, "alnum");
+ boolean is_alpha = STREQ (str, "alpha");
+ boolean is_blank = STREQ (str, "blank");
+ boolean is_cntrl = STREQ (str, "cntrl");
+ boolean is_digit = STREQ (str, "digit");
+ boolean is_graph = STREQ (str, "graph");
+ boolean is_lower = STREQ (str, "lower");
+ boolean is_print = STREQ (str, "print");
+ boolean is_punct = STREQ (str, "punct");
+ boolean is_space = STREQ (str, "space");
+ boolean is_upper = STREQ (str, "upper");
+ boolean is_xdigit = STREQ (str, "xdigit");
+
+ if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
+
+ /* Throw away the ] at the end of the character
+ class. */
+ PATFETCH (c);
+
+ if (p == pend) return REG_EBRACK;
+
+ for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
+ {
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch))
+ || (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch))
+ || (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
+ SET_LIST_BIT (ch);
+ }
+ had_char_class = true;
+ }
+ else
+ {
+ c1++;
+ while (c1--)
+ PATUNFETCH;
+ SET_LIST_BIT ('[');
+ SET_LIST_BIT (':');
+ had_char_class = false;
+ }
+ }
+ else
+ {
+ had_char_class = false;
+ SET_LIST_BIT (c);
+ }
+ }
+
+ /* Discard any (non)matching list bytes that are all 0 at the
+ end of the map. Decrease the map-length byte too. */
+ while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
+ b[-1]--;
+ b += b[-1];
+ }
+ break;
+
+
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_open;
+ else
+ goto normal_char;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS)
+ goto handle_close;
+ else
+ goto normal_char;
+
+
+ case '\n':
+ if (syntax & RE_NEWLINE_ALT)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '|':
+ if (syntax & RE_NO_BK_VBAR)
+ goto handle_alt;
+ else
+ goto normal_char;
+
+
+ case '{':
+ if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
+ goto handle_interval;
+ else
+ goto normal_char;
+
+
+ case '\\':
+ if (p == pend) return REG_EESCAPE;
+
+ /* Do not translate the character after the \, so that we can
+ distinguish, e.g., \B from \b, even if we normally would
+ translate, e.g., B to b. */
+ PATFETCH_RAW (c);
+
+ switch (c)
+ {
+ case '(':
+ if (syntax & RE_NO_BK_PARENS)
+ goto normal_backslash;
+
+ handle_open:
+ bufp->re_nsub++;
+ regnum++;
+
+ if (COMPILE_STACK_FULL)
+ {
+ RETALLOC (compile_stack.stack, compile_stack.size << 1,
+ compile_stack_elt_t);
+ if (compile_stack.stack == NULL) return REG_ESPACE;
+
+ compile_stack.size <<= 1;
+ }
+
+ /* These are the values to restore when we hit end of this
+ group. They are all relative offsets, so that if the
+ whole pattern moves because of realloc, they will still
+ be valid. */
+ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
+ COMPILE_STACK_TOP.fixup_alt_jump
+ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
+ COMPILE_STACK_TOP.regnum = regnum;
+
+ /* We will eventually replace the 0 with the number of
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
+ if (regnum <= MAX_REGNUM)
+ {
+ COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
+ BUF_PUSH_3 (start_memory, regnum, 0);
+ }
+
+ compile_stack.avail++;
+
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+ break;
+
+
+ case ')':
+ if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
+
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_backslash;
+ else
+ return REG_ERPAREN;
+
+ handle_close:
+ if (fixup_alt_jump)
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
+
+ /* See similar code for backslashed left paren above. */
+ if (COMPILE_STACK_EMPTY)
+ if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
+ goto normal_char;
+ else
+ return REG_ERPAREN;
+
+ /* Since we just checked for an empty stack above, this
+ ``can't happen''. */
+ assert (compile_stack.avail != 0);
+ {
+ /* We don't just want to restore into `regnum', because
+ later groups should continue to be numbered higher,
+ as in `(ab)c(de)' -- the second group is #2. */
+ regnum_t this_group_regnum;
+
+ compile_stack.avail--;
+ begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
+ fixup_alt_jump
+ = COMPILE_STACK_TOP.fixup_alt_jump
+ ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
+ : 0;
+ laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
+ this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
+
+ /* We're at the end of the group, so now we know how many
+ groups were inside this one. */
+ if (this_group_regnum <= MAX_REGNUM)
+ {
+ unsigned char *inner_group_loc
+ = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
+
+ *inner_group_loc = regnum - this_group_regnum;
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
+ regnum - this_group_regnum);
+ }
+ }
+ break;
+
+
+ case '|': /* `\|'. */
+ if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
+ goto normal_backslash;
+ handle_alt:
+ if (syntax & RE_LIMITED_OPS)
+ goto normal_char;
+
+ /* Insert before the previous alternative a jump which
+ jumps to this alternative if the former fails. */
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
+ pending_exact = 0;
+ b += 3;
+
+ /* The alternative before this one has a jump after it
+ which gets executed if it gets matched. Adjust that
+ jump so it will jump to this alternative's analogous
+ jump (put in below, which in turn will jump to the next
+ (if any) alternative's such jump, etc.). The last such
+ jump jumps to the correct final destination. A picture:
+ _____ _____
+ | | | |
+ | v | v
+ a | b | c
+
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ /* Mark and leave space for a jump after this alternative,
+ to be filled in later either by next alternative or
+ when know we're at the end of a series of alternatives. */
+ fixup_alt_jump = b;
+ GET_BUFFER_SPACE (3);
+ b += 3;
+
+ laststart = 0;
+ begalt = b;
+ break;
+
+
+ case '{':
+ /* If \{ is a literal. */
+ if (!(syntax & RE_INTERVALS)
+ /* If we're at `\{' and it's not the open-interval
+ operator. */
+ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
+ || (p - 2 == pattern && p == pend))
+ goto normal_backslash;
+
+ handle_interval:
+ {
+ /* If got here, then the syntax allows intervals. */
+
+ /* At least (most) this many matches must be made. */
+ int lower_bound = -1, upper_bound = -1;
+
+ beg_interval = p - 1;
+
+ if (p == pend)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_EBRACE;
+ }
+
+ GET_UNSIGNED_NUMBER (lower_bound);
+
+ if (c == ',')
+ {
+ GET_UNSIGNED_NUMBER (upper_bound);
+ if (upper_bound < 0) upper_bound = RE_DUP_MAX;
+ }
+ else
+ /* Interval such as `{1}' => match exactly once. */
+ upper_bound = lower_bound;
+
+ if (lower_bound < 0 || upper_bound > RE_DUP_MAX
+ || lower_bound > upper_bound)
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (c != '\\') return REG_EBRACE;
+
+ PATFETCH (c);
+ }
+
+ if (c != '}')
+ {
+ if (syntax & RE_NO_BK_BRACES)
+ goto unfetch_interval;
+ else
+ return REG_BADBR;
+ }
+
+ /* We just parsed a valid interval. */
+
+ /* If it's invalid to have no preceding re. */
+ if (!laststart)
+ {
+ if (syntax & RE_CONTEXT_INVALID_OPS)
+ return REG_BADRPT;
+ else if (syntax & RE_CONTEXT_INDEP_OPS)
+ laststart = b;
+ else
+ goto unfetch_interval;
+ }
+
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
+ if (upper_bound == 0)
+ {
+ GET_BUFFER_SPACE (3);
+ INSERT_JUMP (jump, laststart, b + 3);
+ b += 3;
+ }
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
+ if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
+ pending_exact = 0;
+ beg_interval = NULL;
+ }
+ break;
+
+ unfetch_interval:
+ /* If an invalid interval, match the characters as literals. */
+ assert (beg_interval);
+ p = beg_interval;
+ beg_interval = NULL;
+
+ /* normal_char and normal_backslash need `c'. */
+ PATFETCH (c);
+
+ if (!(syntax & RE_NO_BK_BRACES))
+ {
+ if (p > pattern && p[-1] == '\\')
+ goto normal_backslash;
+ }
+ goto normal_char;
+
+#ifdef emacs
+ /* There is no way to specify the before_dot and after_dot
+ operators. rms says this is ok. --karl */
+ case '=':
+ BUF_PUSH (at_dot);
+ break;
+
+ case 's':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ break;
+
+ case 'S':
+ laststart = b;
+ PATFETCH (c);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ break;
+#endif /* emacs */
+
+
+ case 'w':
+ laststart = b;
+ BUF_PUSH (wordchar);
+ break;
+
+
+ case 'W':
+ laststart = b;
+ BUF_PUSH (notwordchar);
+ break;
+
+
+ case '<':
+ BUF_PUSH (wordbeg);
+ break;
+
+ case '>':
+ BUF_PUSH (wordend);
+ break;
+
+ case 'b':
+ BUF_PUSH (wordbound);
+ break;
+
+ case 'B':
+ BUF_PUSH (notwordbound);
+ break;
+
+ case '`':
+ BUF_PUSH (begbuf);
+ break;
+
+ case '\'':
+ BUF_PUSH (endbuf);
+ break;
+
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ if (syntax & RE_NO_BK_REFS)
+ goto normal_char;
+
+ c1 = c - '0';
+
+ if (c1 > regnum)
+ return REG_ESUBREG;
+
+ /* Can't back reference to a subexpression if inside of it. */
+ if (group_in_compile_stack (compile_stack, c1))
+ goto normal_char;
+
+ laststart = b;
+ BUF_PUSH_2 (duplicate, c1);
+ break;
+
+
+ case '+':
+ case '?':
+ if (syntax & RE_BK_PLUS_QM)
+ goto handle_plus;
+ else
+ goto normal_backslash;
+
+ default:
+ normal_backslash:
+ /* You might think it would be useful for \ to mean
+ not to translate; but if we don't translate it
+ it will never match anything. */
+ c = TRANSLATE (c);
+ goto normal_char;
+ }
+ break;
+
+
+ default:
+ /* Expects the character in `c'. */
+ normal_char:
+ /* If no exactn currently being built. */
+ if (!pending_exact
+
+ /* If last exactn not at current position. */
+ || pending_exact + *pending_exact + 1 != b
+
+ /* We have only one byte following the exactn for the count. */
+ || *pending_exact == (1 << BYTEWIDTH) - 1
+
+ /* If followed by a repetition operator. */
+ || *p == '*' || *p == '^'
+ || ((syntax & RE_BK_PLUS_QM)
+ ? *p == '\\' && (p[1] == '+' || p[1] == '?')
+ : (*p == '+' || *p == '?'))
+ || ((syntax & RE_INTERVALS)
+ && ((syntax & RE_NO_BK_BRACES)
+ ? *p == '{'
+ : (p[0] == '\\' && p[1] == '{'))))
+ {
+ /* Start building a new exactn. */
+
+ laststart = b;
+
+ BUF_PUSH_2 (exactn, 0);
+ pending_exact = b - 1;
+ }
+
+ BUF_PUSH (c);
+ (*pending_exact)++;
+ break;
+ } /* switch (c) */
+ } /* while p != pend */
+
+
+ /* Through the pattern now. */
+
+ if (fixup_alt_jump)
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
+
+ if (!COMPILE_STACK_EMPTY)
+ return REG_EPAREN;
+
+ free (compile_stack.stack);
+
+ /* We have succeeded; set the length of the buffer. */
+ bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: ");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
+ return REG_NOERROR;
+} /* regex_compile */
+
+/* Subroutines for `regex_compile'. */
+
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
+
+static void
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
+}
+
+
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+{
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
+}
+
+
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
+
+static void
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
+
+
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
+
+static void
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+ unsigned char *end;
+{
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
+
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op2 (op, loc, arg1, arg2);
+}
+
+
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
+
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const char *pattern, *p;
+ reg_syntax_t syntax;
+{
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
+
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
+}
+
+
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
+
+static boolean
+at_endline_loc_p (p, pend, syntax)
+ const char *p, *pend;
+ int syntax;
+{
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : NULL;
+
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
+}
+
+
+/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
+ false if it's not. */
+
+static boolean
+group_in_compile_stack (compile_stack, regnum)
+ compile_stack_type compile_stack;
+ regnum_t regnum;
+{
+ int this_element;
+
+ for (this_element = compile_stack.avail - 1;
+ this_element >= 0;
+ this_element--)
+ if (compile_stack.stack[this_element].regnum == regnum)
+ return true;
+
+ return false;
+}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+ const char **p_ptr, *pend;
+ char *translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ int range_start, range_end;
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ range_start = ((unsigned char *) p)[-2];
+ range_end = ((unsigned char *) p)[0];
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
+
+/* Failure stack declarations and macros; both re_compile_fastmap and
+ re_match_2 use a failure stack. These have to be macros because of
+ REGEX_ALLOCATE. */
+
+
+/* Number of failure points for which to initially allocate space
+ when matching. If this number is exceeded, we allocate more
+ space, so it is not a hard limit. */
+#ifndef INIT_FAILURE_ALLOC
+#define INIT_FAILURE_ALLOC 5
+#endif
+
+/* Roughly the maximum number of failure points on the stack. Would be
+ exactly that if always used MAX_FAILURE_SPACE each time we failed.
+ This is a variable only so users of regex can assign to it; we never
+ change it ourselves. */
+int re_max_failures = 2000;
+
+typedef const unsigned char *fail_stack_elt_t;
+
+typedef struct
+{
+ fail_stack_elt_t *stack;
+ unsigned size;
+ unsigned avail; /* Offset of next open position. */
+} fail_stack_type;
+
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
+
+
+/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
+
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
+
+
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
+
+ Return 1 if succeeds, and 0 if either ran out of memory
+ allocating space for it or it was already too large.
+
+ REGEX_REALLOCATE requires `destination' be declared. */
+
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+ ? 0 \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
+ \
+ (fail_stack).stack == NULL \
+ ? 0 \
+ : ((fail_stack).size <<= 1, \
+ 1)))
+
+
+/* Push PATTERN_OP on FAIL_STACK.
+
+ Return 1 if was able to do so and 0 if ran out of memory allocating
+ space to do so. */
+#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (fail_stack)) \
+ ? 0 \
+ : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
+ 1))
+
+/* This pushes an item onto the failure stack. Must be a four-byte
+ value. Assumes the variable `fail_stack'. Probably should only
+ be called from within `PUSH_FAILURE_POINT'. */
+#define PUSH_FAILURE_ITEM(item) \
+ fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
+
+/* The complement operation. Assumes `fail_stack' is nonempty. */
+#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
+
+/* Used to omit pushing failure point id's when we're not debugging. */
+#ifdef DEBUG
+#define DEBUG_PUSH PUSH_FAILURE_ITEM
+#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
+#else
+#define DEBUG_PUSH(item)
+#define DEBUG_POP(item_addr)
+#endif
+
+
+/* Push the information about the state we will need
+ if we ever fail back to it.
+
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
+ declared.
+
+ Does `return FAILURE_CODE' if runs out of memory. */
+
+#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
+ do { \
+ char *destination; \
+ /* Must be int, so when we don't save any registers, the arithmetic \
+ of 0 + -1 isn't done as unsigned. */ \
+ int this_reg; \
+ \
+ DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
+ DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
+ \
+ DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
+ DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
+ \
+ /* Ensure we have enough space allocated for what we will push. */ \
+ while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
+ { \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
+ return failure_code; \
+ \
+ DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
+ (fail_stack).size); \
+ DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
+ } \
+ \
+ /* Push the info, starting with the registers. */ \
+ DEBUG_PRINT1 ("\n"); \
+ \
+ for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
+ this_reg++) \
+ { \
+ DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \
+ DEBUG_STATEMENT (num_regs_pushed++); \
+ \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ PUSH_FAILURE_ITEM (regstart[this_reg]); \
+ \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ PUSH_FAILURE_ITEM (regend[this_reg]); \
+ \
+ DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
+ DEBUG_PRINT2 (" match_null=%d", \
+ REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" matched_something=%d", \
+ MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT2 (" ever_matched=%d", \
+ EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
+ DEBUG_PRINT1 ("\n"); \
+ PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
+ } \
+ \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
+ PUSH_FAILURE_ITEM (lowest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
+ PUSH_FAILURE_ITEM (highest_active_reg); \
+ \
+ DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
+ PUSH_FAILURE_ITEM (pattern_place); \
+ \
+ DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
+ size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ PUSH_FAILURE_ITEM (string_place); \
+ \
+ DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
+ DEBUG_PUSH (failure_id); \
+ } while (0)
+
+/* This is the number of items that are pushed and popped on the stack
+ for each register. */
+#define NUM_REG_ITEMS 3
+
+/* Individual items aside from the registers. */
+#ifdef DEBUG
+#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */
+#else
+#define NUM_NONREG_ITEMS 4
+#endif
+
+/* We push at most this many items on the stack. */
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+
+/* We actually push this many items. */
+#define NUM_FAILURE_ITEMS \
+ ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \
+ + NUM_NONREG_ITEMS)
+
+/* How many items can still be added to the stack without overflowing it. */
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ int this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_ITEM (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
+
+/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
+ BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
+ characters can start a string that matches the pattern. This fastmap
+ is used by re_search to skip quickly over impossible starting points.
+
+ The caller must supply the address of a (1 << BYTEWIDTH)-byte data
+ area as BUFP->fastmap.
+
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
+
+ Returns 0 if we succeed, -2 if an internal error. */
+
+int
+re_compile_fastmap (bufp)
+ struct re_pattern_buffer *bufp;
+{
+ int j, k;
+ fail_stack_type fail_stack;
+#ifndef REGEX_MALLOC
+ char *destination;
+#endif
+ /* We don't push any register information onto the failure stack. */
+ unsigned num_regs = 0;
+
+ register char *fastmap = bufp->fastmap;
+ unsigned char *pattern = bufp->buffer;
+ unsigned long size = bufp->used;
+ const unsigned char *p = pattern;
+ register unsigned char *pend = pattern + size;
+
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
+
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ bufp->can_be_null = 0;
+
+ while (p != pend || !FAIL_STACK_EMPTY ())
+ {
+ if (p == pend)
+ {
+ bufp->can_be_null |= path_can_be_null;
+
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail];
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ return 0;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
+ case exactn:
+ fastmap[p[1]] = 1;
+ break;
+
+
+ case charset:
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
+ fastmap[j] = 1;
+ break;
+
+
+ case charset_not:
+ /* Chars beyond end of map must be allowed. */
+ for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
+ if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
+ fastmap[j] = 1;
+ break;
+
+
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = 0;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ return 0;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* not emacs */
+
+
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbound:
+ case notwordbound:
+ case wordbeg:
+ case wordend:
+ case push_dummy_failure:
+ continue;
+
+
+ case jump_n:
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case jump_past_alt:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+ if (j > 0)
+ continue;
+
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
+ if ((re_opcode_t) *p != on_failure_jump
+ && (re_opcode_t) *p != succeed_n)
+ continue;
+
+ p++;
+ EXTRACT_NUMBER_AND_INCR (j, p);
+ p += j;
+
+ /* If what's on the stack is where we are now, pop it. */
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1] == p)
+ fail_stack.avail--;
+
+ continue;
+
+
+ case on_failure_jump:
+ case on_failure_keep_string_jump:
+ handle_on_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (j, p);
+
+ /* For some patterns, e.g., `(a?)?', `p+j' here points to the
+ end of the pattern. We don't want to push such a point,
+ since when we restore it above, entering the switch will
+ increment `p' past the end of the pattern. We don't need
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
+ if (p + j < pend)
+ {
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
+ return -2;
+ }
+ else
+ bufp->can_be_null = 1;
+
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
+
+ continue;
+
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p += 2;
+
+ /* Increment p past the n for when k != 0. */
+ EXTRACT_NUMBER_AND_INCR (k, p);
+ if (k == 0)
+ {
+ p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
+ goto handle_on_failure_jump;
+ }
+ continue;
+
+
+ case set_number_at:
+ p += 4;
+ continue;
+
+
+ case start_memory:
+ case stop_memory:
+ p += 2;
+ continue;
+
+
+ default:
+ abort (); /* We have listed all the cases. */
+ } /* switch *p++ */
+
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
+ p = pend;
+ } /* while p */
+
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
+ return 0;
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t) 0;
+ }
+}
+
+/* Searching routines. */
+
+/* Like re_search_2, below, but only one string is specified, and
+ doesn't let you say where to stop matching. */
+
+int
+re_search (bufp, string, size, startpos, range, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, startpos, range;
+ struct re_registers *regs;
+{
+ return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
+ regs, size);
+}
+
+
+/* Using the compiled pattern in BUFP->buffer, first tries to match the
+ virtual concatenation of STRING1 and STRING2, starting first at index
+ STARTPOS, then at STARTPOS + 1, and so on.
+
+ STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
+
+ RANGE is how far to scan while trying to match. RANGE = 0 means try
+ only at STARTPOS; in general, the last start tried is STARTPOS +
+ RANGE.
+
+ In REGS, return the indices of the virtual concatenation of STRING1
+ and STRING2 that matched the entire BUFP->buffer and its contained
+ subexpressions.
+
+ Do not consider matching one past the index STOP in the virtual
+ concatenation of STRING1 and STRING2.
+
+ We return either the position in the strings at which the match was
+ found, -1 if no match, or -2 if error (such as failure
+ stack overflow). */
+
+int
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int startpos;
+ int range;
+ struct re_registers *regs;
+ int stop;
+{
+ int val;
+ register char *fastmap = bufp->fastmap;
+ register char *translate = bufp->translate;
+ int total_size = size1 + size2;
+ int endpos = startpos + range;
+
+ /* Check for out-of-range STARTPOS. */
+ if (startpos < 0 || startpos > total_size)
+ return -1;
+
+ /* Fix up RANGE if it might eventually take us outside
+ the virtual concatenation of STRING1 and STRING2. */
+ if (endpos < -1)
+ range = -1 - startpos;
+ else if (endpos > total_size)
+ range = total_size - startpos;
+
+ /* If the search isn't to be a backwards one, don't waste time in a
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
+ {
+ if (startpos > 0)
+ return -1;
+ else
+ range = 1;
+ }
+
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
+ for (;;)
+ {
+ /* If a fastmap is supplied, skip quickly over characters that
+ cannot be the start of a match. If the pattern can match the
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
+ if (fastmap && startpos < total_size && !bufp->can_be_null)
+ {
+ if (range > 0) /* Searching forwards. */
+ {
+ register const char *d;
+ register int lim = 0;
+ int irange = range;
+
+ if (startpos < size1 && startpos + range >= size1)
+ lim = range - (size1 - startpos);
+
+ d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
+
+ /* Written out as an if-else to avoid testing `translate'
+ inside the loop. */
+ if (translate)
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
+ else
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
+
+ startpos += irange - range;
+ }
+ else /* Searching backwards. */
+ {
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
+ goto advance;
+ }
+ }
+
+ /* If can't match the null string, and that's all we have left, fail. */
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
+ return -1;
+
+ val = re_match_2 (bufp, string1, size1, string2, size2,
+ startpos, regs, stop);
+ if (val >= 0)
+ return startpos;
+
+ if (val == -2)
+ return -2;
+
+ advance:
+ if (!range)
+ break;
+ else if (range > 0)
+ {
+ range--;
+ startpos++;
+ }
+ else
+ {
+ range++;
+ startpos--;
+ }
+ }
+ return -1;
+} /* re_search_2 */
+
+/* Declarations and macros for re_match_2. */
+
+static int bcmp_translate ();
+static boolean alt_match_null_string_p (),
+ common_op_match_null_string_p (),
+ group_match_null_string_p ();
+
+/* Structure for per-register (a.k.a. per-group) information.
+ This must not be longer than one word, because we push this value
+ onto the failure stack. Other register information, such as the
+ starting and ending positions (which are addresses), and the list of
+ inner groups (which is a bits list) are maintained in separate
+ variables.
+
+ We are making a (strictly speaking) nonportable assumption here: that
+ the compiler will pack our bit fields into something that fits into
+ the type of `word', i.e., is something that fits into one item on the
+ failure stack. */
+typedef union
+{
+ fail_stack_elt_t word;
+ struct
+ {
+ /* This field is one if this group can match the empty string,
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
+ unsigned match_null_string_p : 2;
+ unsigned is_active : 1;
+ unsigned matched_something : 1;
+ unsigned ever_matched_something : 1;
+ } bits;
+} register_info_type;
+
+#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
+#define IS_ACTIVE(R) ((R).bits.is_active)
+#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
+#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
+
+
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
+#define SET_REGS_MATCHED() \
+ do \
+ { \
+ unsigned r; \
+ for (r = lowest_active_reg; r <= highest_active_reg; r++) \
+ { \
+ MATCHED_SOMETHING (reg_info[r]) \
+ = EVER_MATCHED_SOMETHING (reg_info[r]) \
+ = 1; \
+ } \
+ } \
+ while (0)
+
+
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
+
+/* Registers are set to a sentinel when they haven't yet matched. */
+#define REG_UNSET_VALUE ((char *) -1)
+#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
+
+
+/* Macros for dealing with the split strings in re_match_2. */
+
+#define MATCHING_IN_FIRST_STRING (dend == end_match_1)
+
+/* Call before fetching a character with *d. This switches over to
+ string2 if necessary. */
+#define PREFETCH() \
+ while (d == dend) \
+ { \
+ /* End of string2 => fail. */ \
+ if (dend == end_match_2) \
+ goto fail; \
+ /* End of string1 => advance to string2. */ \
+ d = string2; \
+ dend = end_match_2; \
+ }
+
+
+/* Test if at very beginning or at very end of the virtual concatenation
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
+
+
+/* Test if D points to a character which is word-constituent. We have
+ two special cases to check for: if past the end of string1, look at
+ the first character in string2; and if before the beginning of
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
+ == Sword)
+
+/* Test if the character before D and the one at D differ with respect
+ to being word-constituent. */
+#define AT_WORD_BOUNDARY(d) \
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
+
+
+/* Free everything we malloc. */
+#ifdef REGEX_MALLOC
+#define FREE_VAR(var) if (var) free (var); var = NULL
+#define FREE_VARIABLES() \
+ do { \
+ FREE_VAR (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
+ } while (0)
+#else /* not REGEX_MALLOC */
+/* Some MIPS systems (at least) want this to free alloca'd storage. */
+#define FREE_VARIABLES() alloca (0)
+#endif /* not REGEX_MALLOC */
+
+
+/* These values must meet several constraints. They must not be valid
+ register values; since we have a limit of 255 registers (because
+ we use only one byte in the pattern for the register number), we can
+ use numbers larger than 255. They must differ by 1, because of
+ NUM_FAILURE_ITEMS above. And the value for the lowest register must
+ be larger than the value for the highest register, so we do not try
+ to actually save any registers when none are active. */
+#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
+#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
+
+/* Matching routines. */
+
+#ifndef emacs /* Emacs never uses this. */
+/* re_match is like re_match_2 except it takes only a single string. */
+
+int
+re_match (bufp, string, size, pos, regs)
+ struct re_pattern_buffer *bufp;
+ const char *string;
+ int size, pos;
+ struct re_registers *regs;
+ {
+ return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
+}
+#endif /* not emacs */
+
+
+/* re_match_2 matches the compiled pattern in BUFP against the
+ the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
+ and SIZE2, respectively). We start matching at POS, and stop
+ matching at STOP.
+
+ If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
+
+ We return -1 if no match, -2 if an internal error (such as the
+ failure stack overflowing). Otherwise, we return the length of the
+ matched substring. */
+
+int
+re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
+ struct re_pattern_buffer *bufp;
+ const char *string1, *string2;
+ int size1, size2;
+ int pos;
+ struct re_registers *regs;
+ int stop;
+{
+ /* General temporaries. */
+ int mcnt;
+ unsigned char *p1;
+
+ /* Just past the end of the corresponding string. */
+ const char *end1, *end2;
+
+ /* Pointers into string1 and string2, just past the last characters in
+ each to consider matching. */
+ const char *end_match_1, *end_match_2;
+
+ /* Where we are in the data, and the end of the current string. */
+ const char *d, *dend;
+
+ /* Where we are in the pattern, and the end of the pattern. */
+ unsigned char *p = bufp->buffer;
+ register unsigned char *pend = p + bufp->used;
+
+ /* We use this to map every character in the string. */
+ char *translate = bufp->translate;
+
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+ fail_stack_type fail_stack;
+#ifdef DEBUG
+ static unsigned failure_id = 0;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
+#endif
+
+ /* We fill all the registers internally, independent of what we
+ return, for use in backreferences. The number here includes
+ an element for register zero. */
+ unsigned num_regs = bufp->re_nsub + 1;
+
+ /* The currently active registers. */
+ unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+
+ /* Information on the contents of registers. These are pointers into
+ the input strings; they record just what was matched (on this
+ attempt) by a subexpression part of the pattern, that is, the
+ regnum-th regstart pointer points to where in the pattern we began
+ matching and the regnum-th regend points to right after where we
+ stopped matching the regnum-th subexpression. (The zeroth register
+ keeps track of what the whole pattern matches.) */
+ const char **regstart, **regend;
+
+ /* If a group that's operated upon by a repetition operator fails to
+ match anything, then the register for its start will need to be
+ restored because it will have been set to wherever in the string we
+ are when we last see its open-group operator. Similarly for a
+ register's end. */
+ const char **old_regstart, **old_regend;
+
+ /* The is_active field of reg_info helps us keep track of which (possibly
+ nested) subexpressions we are currently in. The matched_something
+ field of reg_info[reg_num] helps us tell whether or not we have
+ matched any of the pattern so far this time through the reg_num-th
+ subexpression. These two fields get reset each time through any
+ loop their register is in. */
+ register_info_type *reg_info;
+
+ /* The following record the register info as found in the above
+ variables when we find a match better than any we've seen before.
+ This happens as we backtrack through the failure points, which in
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+ const char **best_regstart, **best_regend;
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
+
+ /* Used when we pop values we don't care about. */
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
+
+#ifdef DEBUG
+ /* Counts the total number of registers pushed. */
+ unsigned num_regs_pushed = 0;
+#endif
+
+ DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
+
+ INIT_FAIL_STACK ();
+
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
+ {
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
+ }
+#ifdef REGEX_MALLOC
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* REGEX_MALLOC */
+
+ /* The starting position is bogus. */
+ if (pos < 0 || pos > size1 + size2)
+ {
+ FREE_VARIABLES ();
+ return -1;
+ }
+
+ /* Initialize subexpression text positions to -1 to mark ones that no
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = regend[mcnt]
+ = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
+
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
+ IS_ACTIVE (reg_info[mcnt]) = 0;
+ MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
+ }
+
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
+ if (size2 == 0 && string1 != NULL)
+ {
+ string2 = string1;
+ size2 = size1;
+ string1 = 0;
+ size1 = 0;
+ }
+ end1 = string1 + size1;
+ end2 = string2 + size2;
+
+ /* Compute where to stop matching, within the two strings. */
+ if (stop <= size1)
+ {
+ end_match_1 = string1 + stop;
+ end_match_2 = string2;
+ }
+ else
+ {
+ end_match_1 = end1;
+ end_match_2 = string2 + stop - size1;
+ }
+
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
+ this happens before fetching; therefore, at the beginning of the
+ loop, `d' can be pointing at the end of a string, but it cannot
+ equal `string2'. */
+ if (size1 > 0 && pos <= size1)
+ {
+ d = string1 + pos;
+ dend = end_match_1;
+ }
+ else
+ {
+ d = string2 + pos - size1;
+ dend = end_match_2;
+ }
+
+ DEBUG_PRINT1 ("The compiled pattern is: ");
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
+ DEBUG_PRINT1 ("The string to match is: `");
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
+ DEBUG_PRINT1 ("'\n");
+
+ /* This loops over pattern commands. It exits by returning from the
+ function if the match is complete, or it drops through if the match
+ fails at this starting point in the input data. */
+ for (;;)
+ {
+ DEBUG_PRINT2 ("\n0x%x: ", p);
+
+ if (p == pend)
+ { /* End of pattern means we might have succeeded. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
+ if (d != end_match_2)
+ {
+ DEBUG_PRINT1 ("backtracking.\n");
+
+ if (!FAIL_STACK_EMPTY ())
+ { /* More failure points to try. */
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
+
+ /* If exceeds best match so far, save it. */
+ if (!best_regs_set
+ || (same_str_p && d > match_end)
+ || (!same_str_p && !MATCHING_IN_FIRST_STRING))
+ {
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ best_regstart[mcnt] = regstart[mcnt];
+ best_regend[mcnt] = regend[mcnt];
+ }
+ }
+ goto fail;
+ }
+
+ /* If no failure points, don't restore garbage. */
+ else if (best_regs_set)
+ {
+ restore_best_regs:
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
+
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
+
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
+ {
+ regstart[mcnt] = best_regstart[mcnt];
+ regend[mcnt] = best_regend[mcnt];
+ }
+ }
+ } /* d != end_match_2 */
+
+ DEBUG_PRINT1 ("Accepting match.\n");
+
+ /* If caller wants register contents data back, do it. */
+ if (regs && !bufp->no_sub)
+ {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
+ regs->num_regs = MAX (RE_NREGS, num_regs + 1);
+ regs->start = TALLOC (regs->num_regs, regoff_t);
+ regs->end = TALLOC (regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ bufp->regs_allocated = REGS_REALLOCATE;
+ }
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ }
+ }
+ else
+ assert (bufp->regs_allocated == REGS_FIXED);
+
+ /* Convert the pointer data in `regstart' and `regend' to
+ indices. Register zero has to be set differently,
+ since we haven't kept track of any info for it. */
+ if (regs->num_regs > 0)
+ {
+ regs->start[0] = pos;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
+ : d - string2 + size1);
+ }
+
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
+ for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
+ {
+ if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ else
+ {
+ regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
+ regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
+ }
+ }
+
+ /* If the regs structure we return has more elements than
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
+ for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
+ regs->start[mcnt] = regs->end[mcnt] = -1;
+ } /* regs && !bufp->no_sub */
+
+ FREE_VARIABLES ();
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
+
+ mcnt = d - pos - (MATCHING_IN_FIRST_STRING
+ ? string1
+ : string2 - size1);
+
+ DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
+
+ return mcnt;
+ }
+
+ /* Otherwise match next pattern command. */
+#ifdef SWITCH_ENUM_BUG
+ switch ((int) ((re_opcode_t) *p++))
+#else
+ switch ((re_opcode_t) *p++)
+#endif
+ {
+ /* Ignore these. Used to ignore the n of succeed_n's which
+ currently have n == 0. */
+ case no_op:
+ DEBUG_PRINT1 ("EXECUTING no_op.\n");
+ break;
+
+
+ /* Match the next n pattern characters exactly. The following
+ byte in the pattern defines n, and the n bytes after that
+ are the characters to match. */
+ case exactn:
+ mcnt = *p++;
+ DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
+
+ /* This is written out as an if-else so we don't waste time
+ testing `translate' inside the loop. */
+ if (translate)
+ {
+ do
+ {
+ PREFETCH ();
+ if (translate[(unsigned char) *d++] != (char) *p++)
+ goto fail;
+ }
+ while (--mcnt);
+ }
+ else
+ {
+ do
+ {
+ PREFETCH ();
+ if (*d++ != (char) *p++) goto fail;
+ }
+ while (--mcnt);
+ }
+ SET_REGS_MATCHED ();
+ break;
+
+
+ /* Match any character except possibly a newline or a null. */
+ case anychar:
+ DEBUG_PRINT1 ("EXECUTING anychar.\n");
+
+ PREFETCH ();
+
+ if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
+ || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
+ goto fail;
+
+ SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
+ d++;
+ break;
+
+
+ case charset:
+ case charset_not:
+ {
+ register unsigned char c;
+ boolean not = (re_opcode_t) *(p - 1) == charset_not;
+
+ DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
+
+ PREFETCH ();
+ c = TRANSLATE (*d); /* The character to match. */
+
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
+ && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ p += 1 + *p;
+
+ if (!not) goto fail;
+
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+ }
+
+
+ /* The beginning of a group is represented by start_memory.
+ The arguments are the register number in the next byte, and the
+ number of groups inner to this one in the next. The text
+ matched within the group is recorded (in the internal
+ registers data structure) under the register number. */
+ case start_memory:
+ DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
+
+ /* Find out if this group can match the empty string. */
+ p1 = p; /* To send to group_match_null_string_p. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[*p])
+ = group_match_null_string_p (&p1, pend, reg_info);
+
+ /* Save the position in the string where we were the last time
+ we were at this open-group operator in case the group is
+ operated upon by a repetition operator, e.g., with `(a*)*b'
+ against `ab'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
+ : regstart[*p];
+ DEBUG_PRINT2 (" old_regstart: %d\n",
+ POINTER_TO_OFFSET (old_regstart[*p]));
+
+ regstart[*p] = d;
+ DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
+
+ IS_ACTIVE (reg_info[*p]) = 1;
+ MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* This is the new highest active register. */
+ highest_active_reg = *p;
+
+ /* If nothing was active before, this is the new lowest active
+ register. */
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *p;
+
+ /* Move past the register number and inner group count. */
+ p += 2;
+ break;
+
+
+ /* The stop_memory opcode represents the end of a group. Its
+ arguments are the same as start_memory's: the register
+ number, and the number of inner groups. */
+ case stop_memory:
+ DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
+
+ /* We need to save the string position the last time we were at
+ this close-group operator in case the group is operated
+ upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
+ against `aba'; then we want to ignore where we are now in
+ the string in case this attempt to match fails. */
+ old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
+ ? REG_UNSET (regend[*p]) ? d : regend[*p]
+ : regend[*p];
+ DEBUG_PRINT2 (" old_regend: %d\n",
+ POINTER_TO_OFFSET (old_regend[*p]));
+
+ regend[*p] = d;
+ DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
+
+ /* This register isn't active anymore. */
+ IS_ACTIVE (reg_info[*p]) = 0;
+
+ /* If this was the only register active, nothing is active
+ anymore. */
+ if (lowest_active_reg == highest_active_reg)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ { /* We must scan for the new highest active register, since
+ it isn't necessarily one less than now: consider
+ (a(b)c(d(e)f)g). When group 3 ends, after the f), the
+ new highest active register is 1. */
+ unsigned char r = *p - 1;
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
+ r--;
+
+ /* If we end up at register zero, that means that we saved
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
+ registers 1 and 2 as a result of the *, but when we pop
+ back to the second ), we are at the stop_memory 1.
+ Thus, nothing is active. */
+ if (r == 0)
+ {
+ lowest_active_reg = NO_LOWEST_ACTIVE_REG;
+ highest_active_reg = NO_HIGHEST_ACTIVE_REG;
+ }
+ else
+ highest_active_reg = r;
+ }
+
+ /* If just failed to match something this time around with a
+ group that's operated on by a repetition operator, try to
+ force exit from the ``loop'', and restore the register
+ information for this group that we had before trying this
+ last match. */
+ if ((!MATCHED_SOMETHING (reg_info[*p])
+ || (re_opcode_t) p[-3] == start_memory)
+ && (p + 2) < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ p1 = p + 2;
+ mcnt = 0;
+ switch ((re_opcode_t) *p1++)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case pop_failure_jump:
+ case maybe_pop_jump:
+ case jump:
+ case dummy_failure_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (is_a_jump_n)
+ p1 += 2;
+ break;
+
+ default:
+ /* do nothing */ ;
+ }
+ p1 += mcnt;
+
+ /* If the next operation is a jump backwards in the pattern
+ to an on_failure_jump right before the start_memory
+ corresponding to this stop_memory, exit from the loop
+ by forcing a failure after pushing on the stack the
+ on_failure_jump's jump in the pattern, and d. */
+ if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
+ && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
+ {
+ /* If this group ever matched anything, then restore
+ what its registers were before trying this last
+ failed match, e.g., with `(a*)*b' against `ab' for
+ regstart[1], and, e.g., with `((a*)*(b*)*)*'
+ against `aba' for regend[3].
+
+ Also restore the registers for inner groups for,
+ e.g., `((a*)(b*))*' against `aba' (register 3 would
+ otherwise get trashed). */
+
+ if (EVER_MATCHED_SOMETHING (reg_info[*p]))
+ {
+ unsigned r;
+
+ EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
+
+ /* Restore this and inner groups' (if any) registers. */
+ for (r = *p; r < *p + *(p + 1); r++)
+ {
+ regstart[r] = old_regstart[r];
+
+ /* xx why this test? */
+ if ((int) old_regend[r] >= (int) regstart[r])
+ regend[r] = old_regend[r];
+ }
+ }
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
+
+ goto fail;
+ }
+ }
+
+ /* Move past the register number and the inner group count. */
+ p += 2;
+ break;
+
+
+ /* \<digit> has been turned into a `duplicate' command which is
+ followed by the numeric value of <digit> as the register number. */
+ case duplicate:
+ {
+ register const char *d2, *dend2;
+ int regno = *p++; /* Get which register to match against. */
+ DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
+
+ /* Can't back reference a group which we've never matched. */
+ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
+ goto fail;
+
+ /* Where in input to try to start matching. */
+ d2 = regstart[regno];
+
+ /* Where to stop matching; if both the place to start and
+ the place to stop matching are in the same string, then
+ set to the place to stop, otherwise, for now have to use
+ the end of the first string. */
+
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
+ ? regend[regno] : end_match_1);
+ for (;;)
+ {
+ /* If necessary, advance to next segment in register
+ contents. */
+ while (d2 == dend2)
+ {
+ if (dend2 == end_match_2) break;
+ if (dend2 == regend[regno]) break;
+
+ /* End of string1 => advance to string2. */
+ d2 = string2;
+ dend2 = regend[regno];
+ }
+ /* At end of register contents => success */
+ if (d2 == dend2) break;
+
+ /* If necessary, advance to next segment in data. */
+ PREFETCH ();
+
+ /* How many characters left in this segment to match. */
+ mcnt = dend - d;
+
+ /* Want how many consecutive characters we can match in
+ one shot, so, if necessary, adjust the count. */
+ if (mcnt > dend2 - d2)
+ mcnt = dend2 - d2;
+
+ /* Compare that many; failure if mismatch, else move
+ past them. */
+ if (translate
+ ? bcmp_translate (d, d2, mcnt, translate)
+ : bcmp (d, d2, mcnt))
+ goto fail;
+ d += mcnt, d2 += mcnt;
+ }
+ }
+ break;
+
+
+ /* begline matches the empty string at the beginning of the string
+ (unless `not_bol' is set in `bufp'), and, if
+ `newline_anchor' is set, after newlines. */
+ case begline:
+ DEBUG_PRINT1 ("EXECUTING begline.\n");
+
+ if (AT_STRINGS_BEG (d))
+ {
+ if (!bufp->not_bol) break;
+ }
+ else if (d[-1] == '\n' && bufp->newline_anchor)
+ {
+ break;
+ }
+ /* In all other cases, we fail. */
+ goto fail;
+
+
+ /* endline is the dual of begline. */
+ case endline:
+ DEBUG_PRINT1 ("EXECUTING endline.\n");
+
+ if (AT_STRINGS_END (d))
+ {
+ if (!bufp->not_eol) break;
+ }
+
+ /* We have to ``prefetch'' the next character. */
+ else if ((d == end1 ? *string2 : *d) == '\n'
+ && bufp->newline_anchor)
+ {
+ break;
+ }
+ goto fail;
+
+
+ /* Match at the very beginning of the data. */
+ case begbuf:
+ DEBUG_PRINT1 ("EXECUTING begbuf.\n");
+ if (AT_STRINGS_BEG (d))
+ break;
+ goto fail;
+
+
+ /* Match at the very end of the data. */
+ case endbuf:
+ DEBUG_PRINT1 ("EXECUTING endbuf.\n");
+ if (AT_STRINGS_END (d))
+ break;
+ goto fail;
+
+
+ /* on_failure_keep_string_jump is used to optimize `.*\n'. It
+ pushes NULL as the value for the string on the stack. Then
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
+
+ Because this is used only in specific cases, we don't need to
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
+ case on_failure_keep_string_jump:
+ DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
+
+ PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
+ break;
+
+
+ /* Uses of on_failure_jump:
+
+ Each alternative starts with an on_failure_jump that points
+ to the beginning of the next alternative. Each alternative
+ except the last ends with a jump that in effect jumps past
+ the rest of the alternatives. (They really jump to the
+ ending jump of the following alternative, because tensioning
+ these jumps is a hassle.)
+
+ Repeats start with an on_failure_jump that points past both
+ the repetition text and either the following jump or
+ pop_failure_jump back to this on_failure_jump. */
+ case on_failure_jump:
+ on_failure:
+ DEBUG_PRINT1 ("EXECUTING on_failure_jump");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
+
+ /* If this on_failure_jump comes right before a group (i.e.,
+ the original * applied to a group), save the information
+ for that group and all inner ones, so that if we fail back
+ to this point, the group's information will be correct.
+ For example, in \(a*\)*\1, we need the preceding group,
+ and in \(\(a*\)b*\)\2, we need the inner group. */
+
+ /* We can't use `p' to check ahead because we push
+ a failure point to `p + mcnt' after we do this. */
+ p1 = p;
+
+ /* We need to skip no_op's before we look for the
+ start_memory in case this on_failure_jump is happening as
+ the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
+ against aba. */
+ while (p1 < pend && (re_opcode_t) *p1 == no_op)
+ p1++;
+
+ if (p1 < pend && (re_opcode_t) *p1 == start_memory)
+ {
+ /* We have a new highest active register now. This will
+ get reset at the start_memory we are about to get to,
+ but we will have saved all the registers relevant to
+ this repetition op, as described above. */
+ highest_active_reg = *(p1 + 1) + *(p1 + 2);
+ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
+ lowest_active_reg = *(p1 + 1);
+ }
+
+ DEBUG_PRINT1 (":\n");
+ PUSH_FAILURE_POINT (p + mcnt, d, -2);
+ break;
+
+
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
+ case maybe_pop_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
+ {
+ register unsigned char *p2 = p;
+
+ /* Compare the beginning of the repeat with what in the
+ pattern follows its end. If we can establish that there
+ is nothing that they would both match, i.e., that we
+ would have to backtrack because of (as in, e.g., `a*a')
+ then we can change to pop_failure_jump, because we'll
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
+
+ /* Skip over open/close-group commands. */
+ while (p2 + 2 < pend
+ && ((re_opcode_t) *p2 == stop_memory
+ || (re_opcode_t) *p2 == start_memory))
+ p2 += 3; /* Skip over args, too. */
+
+ /* If we're at the end of the pattern, we can change. */
+ if (p2 == pend)
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
+
+ else if ((re_opcode_t) *p2 == exactn
+ || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
+ {
+ register unsigned char c
+ = *p2 == (unsigned char) endline ? '\n' : p2[2];
+ p1 = p + mcnt;
+
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
+ if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
+ else if ((re_opcode_t) p1[3] == charset
+ || (re_opcode_t) p1[3] == charset_not)
+ {
+ int not = (re_opcode_t) p1[3] == charset_not;
+
+ if (c < (unsigned char) (p1[4] * BYTEWIDTH)
+ && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
+ not = !not;
+
+ /* `not' is equal to 1 if c would match, which means
+ that we can't change to pop_failure_jump. */
+ if (!not)
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
+ }
+ }
+ }
+ p -= 2; /* Point at relative address again. */
+ if ((re_opcode_t) p[-1] != pop_failure_jump)
+ {
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
+ }
+ /* Note fall through. */
+
+
+ /* The end of a simple repeat has a pop_failure_jump back to
+ its matching on_failure_jump, where the latter will push a
+ failure point. The pop_failure_jump takes off failure
+ points put on by this pop_failure_jump's matching
+ on_failure_jump; we got through the pattern to here from the
+ matching on_failure_jump, so didn't fail. */
+ case pop_failure_jump:
+ {
+ /* We need to pass separate storage for the lowest and
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ unsigned dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
+
+ DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
+ }
+ /* Note fall through. */
+
+
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ unconditional_jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
+ p += mcnt; /* Do the jump. */
+ DEBUG_PRINT2 ("(to 0x%x).\n", p);
+ break;
+
+
+ /* We need this opcode so we can detect where alternatives end
+ in `group_match_null_string_p' et al. */
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
+
+
+ /* Normally, the on_failure_jump pushes a failure point, which
+ then gets popped at pop_failure_jump. We will end up at
+ pop_failure_jump, also, and with a pattern of, say, `a+', we
+ are skipping over the on_failure_jump, so we have to push
+ something meaningless for pop_failure_jump to pop. */
+ case dummy_failure_jump:
+ DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
+ /* It doesn't matter what we push for the string here. What
+ the code at `fail' tests is the value for the pattern. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ break;
+
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
+ case succeed_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+
+ assert (mcnt >= 0);
+ /* Originally, this is how many times we HAVE to succeed. */
+ if (mcnt > 0)
+ {
+ mcnt--;
+ p += 2;
+ STORE_NUMBER_AND_INCR (p, mcnt);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
+ }
+ else if (mcnt == 0)
+ {
+ DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
+ p[2] = (unsigned char) no_op;
+ p[3] = (unsigned char) no_op;
+ goto on_failure;
+ }
+ break;
+
+ case jump_n:
+ EXTRACT_NUMBER (mcnt, p + 2);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
+
+ /* Originally, this is how many times we CAN jump. */
+ if (mcnt)
+ {
+ mcnt--;
+ STORE_NUMBER (p + 2, mcnt);
+ goto unconditional_jump;
+ }
+ /* If don't have to jump any more, skip over the rest of command. */
+ else
+ p += 4;
+ break;
+
+ case set_number_at:
+ {
+ DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
+
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ p1 = p + mcnt;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
+ STORE_NUMBER (p1, mcnt);
+ break;
+ }
+
+ case wordbound:
+ DEBUG_PRINT1 ("EXECUTING wordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ break;
+ goto fail;
+
+ case notwordbound:
+ DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
+ if (AT_WORD_BOUNDARY (d))
+ goto fail;
+ break;
+
+ case wordbeg:
+ DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
+ break;
+ goto fail;
+
+ case wordend:
+ DEBUG_PRINT1 ("EXECUTING wordend.\n");
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
+ break;
+ goto fail;
+
+#ifdef emacs
+#ifdef emacs19
+ case before_dot:
+ DEBUG_PRINT1 ("EXECUTING before_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) >= point)
+ goto fail;
+ break;
+
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) != point)
+ goto fail;
+ break;
+
+ case after_dot:
+ DEBUG_PRINT1 ("EXECUTING after_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) <= point)
+ goto fail;
+ break;
+#else /* not emacs19 */
+ case at_dot:
+ DEBUG_PRINT1 ("EXECUTING at_dot.\n");
+ if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
+ goto fail;
+ break;
+#endif /* not emacs19 */
+
+ case syntaxspec:
+ DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchsyntax;
+
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
+ mcnt = (int) Sword;
+ matchsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+ case notsyntaxspec:
+ DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
+ mcnt = *p++;
+ goto matchnotsyntax;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
+ mcnt = (int) Sword;
+ matchnotsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
+ goto fail;
+ SET_REGS_MATCHED ();
+ break;
+
+#else /* not emacs */
+ case wordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+
+ case notwordchar:
+ DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
+ PREFETCH ();
+ if (WORDCHAR_P (d))
+ goto fail;
+ SET_REGS_MATCHED ();
+ d++;
+ break;
+#endif /* not emacs */
+
+ default:
+ abort ();
+ }
+ continue; /* Successfully executed one pattern command; keep going. */
+
+
+ /* We goto here if a matching operation fails. */
+ fail:
+ if (!FAIL_STACK_EMPTY ())
+ { /* A restart point is known. Restore to that state. */
+ DEBUG_PRINT1 ("\nFAIL:\n");
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
+
+ /* If this failure point is a dummy, try the next one. */
+ if (!p)
+ goto fail;
+
+ /* If we failed to the end of the pattern, don't examine *p. */
+ assert (p <= pend);
+ if (p < pend)
+ {
+ boolean is_a_jump_n = false;
+
+ /* If failed to a backwards jump that's part of a repetition
+ loop, need to pop this failure point and use the next one. */
+ switch ((re_opcode_t) *p)
+ {
+ case jump_n:
+ is_a_jump_n = true;
+ case maybe_pop_jump:
+ case pop_failure_jump:
+ case jump:
+ p1 = p + 1;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+
+ if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
+ || (!is_a_jump_n
+ && (re_opcode_t) *p1 == on_failure_jump))
+ goto fail;
+ break;
+ default:
+ /* do nothing */ ;
+ }
+ }
+
+ if (d >= string1 && d <= end1)
+ dend = end_match_1;
+ }
+ else
+ break; /* Matching at this starting point really fails. */
+ } /* for (;;) */
+
+ if (best_regs_set)
+ goto restore_best_regs;
+
+ FREE_VARIABLES ();
+
+ return -1; /* Failure to match. */
+} /* re_match_2 */
+
+/* Subroutine definitions for re_match_2. */
+
+
+/* We are passed P pointing to a register number after a start_memory.
+
+ Return true if the pattern up to the corresponding stop_memory can
+ match the empty string, and false otherwise.
+
+ If we find the matching stop_memory, sets P to point to one past its number.
+ Otherwise, sets P to an undefined byte less than or equal to END.
+
+ We don't handle duplicates properly (yet). */
+
+static boolean
+group_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ /* Point to after the args to the start_memory. */
+ unsigned char *p1 = *p + 2;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and return true or
+ false, as appropriate, when we get to one that can't, or to the
+ matching stop_memory. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* Could be either a loop or a series of alternatives. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ /* If the next operation is not a jump backwards in the
+ pattern. */
+
+ if (mcnt >= 0)
+ {
+ /* Go through the on_failure_jumps of the alternatives,
+ seeing if any of the alternatives cannot match nothing.
+ The last alternative starts with only a jump,
+ whereas the rest start with on_failure_jump and end
+ with a jump, e.g., here is the pattern for `a|b|c':
+
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
+ /exactn/1/c
+
+ So, we have to first go through the first (n-1)
+ alternatives and then deal with the last one separately. */
+
+
+ /* Deal with the first (n-1) alternatives, which start
+ with an on_failure_jump (see above) that jumps to right
+ past a jump_past_alt. */
+
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
+ {
+ /* `mcnt' holds how many bytes long the alternative
+ is, including the ending `jump_past_alt' and
+ its number. */
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
+ reg_info))
+ return false;
+
+ /* Move to right after this alternative, including the
+ jump_past_alt. */
+ p1 += mcnt;
+
+ /* Break if it's the beginning of an n-th alternative
+ that doesn't begin with an on_failure_jump. */
+ if ((re_opcode_t) *p1 != on_failure_jump)
+ break;
+
+ /* Still have to check that it's not an n-th
+ alternative that starts with an on_failure_jump. */
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
+ {
+ /* Get to the beginning of the n-th alternative. */
+ p1 -= 3;
+ break;
+ }
+ }
+
+ /* Deal with the last alternative: go back and get number
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
+ EXTRACT_NUMBER (mcnt, p1 - 2);
+
+ if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
+ return false;
+
+ p1 += mcnt; /* Get past the n-th alternative. */
+ } /* if mcnt > 0 */
+ break;
+
+
+ case stop_memory:
+ assert (p1[1] == **p);
+ *p = p1 + 2;
+ return true;
+
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return false;
+} /* group_match_null_string_p */
+
+
+/* Similar to group_match_null_string_p, but doesn't deal with alternatives:
+ It expects P to be the first byte of a single alternative and END one
+ byte past the last. The alternative can contain groups. */
+
+static boolean
+alt_match_null_string_p (p, end, reg_info)
+ unsigned char *p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ unsigned char *p1 = p;
+
+ while (p1 < end)
+ {
+ /* Skip over opcodes that can match nothing, and break when we get
+ to one that can't. */
+
+ switch ((re_opcode_t) *p1)
+ {
+ /* It's a loop. */
+ case on_failure_jump:
+ p1++;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ break;
+
+ default:
+ if (!common_op_match_null_string_p (&p1, end, reg_info))
+ return false;
+ }
+ } /* while p1 < end */
+
+ return true;
+} /* alt_match_null_string_p */
+
+
+/* Deals with the ops common to group_match_null_string_p and
+ alt_match_null_string_p.
+
+ Sets P to one after the op and its arguments, if any. */
+
+static boolean
+common_op_match_null_string_p (p, end, reg_info)
+ unsigned char **p, *end;
+ register_info_type *reg_info;
+{
+ int mcnt;
+ boolean ret;
+ int reg_no;
+ unsigned char *p1 = *p;
+
+ switch ((re_opcode_t) *p1++)
+ {
+ case no_op:
+ case begline:
+ case endline:
+ case begbuf:
+ case endbuf:
+ case wordbeg:
+ case wordend:
+ case wordbound:
+ case notwordbound:
+#ifdef emacs
+ case before_dot:
+ case at_dot:
+ case after_dot:
+#endif
+ break;
+
+ case start_memory:
+ reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
+ ret = group_match_null_string_p (&p1, end, reg_info);
+
+ /* Have to set this here in case we're checking a group which
+ contains a group and a back reference to it. */
+
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
+ REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
+
+ if (!ret)
+ return false;
+ break;
+
+ /* If this is an optimized succeed_n for zero times, make the jump. */
+ case jump:
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ if (mcnt >= 0)
+ p1 += mcnt;
+ else
+ return false;
+ break;
+
+ case succeed_n:
+ /* Get to the number of times to succeed. */
+ p1 += 2;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+
+ if (mcnt == 0)
+ {
+ p1 -= 4;
+ EXTRACT_NUMBER_AND_INCR (mcnt, p1);
+ p1 += mcnt;
+ }
+ else
+ return false;
+ break;
+
+ case duplicate:
+ if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
+ return false;
+ break;
+
+ case set_number_at:
+ p1 += 4;
+
+ default:
+ /* All other opcodes mean we cannot match the empty string. */
+ return false;
+ }
+
+ *p = p1;
+ return true;
+} /* common_op_match_null_string_p */
+
+
+/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
+ bytes; nonzero otherwise. */
+
+static int
+bcmp_translate (s1, s2, len, translate)
+ unsigned char *s1, *s2;
+ register int len;
+ char *translate;
+{
+ register unsigned char *p1 = s1, *p2 = s2;
+ while (len)
+ {
+ if (translate[*p1++] != translate[*p2++]) return 1;
+ len--;
+ }
+ return 0;
+}
+
+/* Entry points for GNU code. */
+
+/* re_compile_pattern is the GNU regular expression compiler: it
+ compiles PATTERN (of length SIZE) and puts the result in BUFP.
+ Returns 0 if the pattern was valid, otherwise an error string.
+
+ Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+ are set in BUFP on entry.
+
+ We call regex_compile to do the actual compilation. */
+
+const char *
+re_compile_pattern (pattern, length, bufp)
+ const char *pattern;
+ int length;
+ struct re_pattern_buffer *bufp;
+{
+ reg_errcode_t ret;
+
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
+
+ /* And GNU code determines whether or not to get register information
+ by passing null for the REGS argument to re_match, etc., not by
+ setting no_sub. */
+ bufp->no_sub = 0;
+
+ /* Match anchors at newline. */
+ bufp->newline_anchor = 1;
+
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
+
+ return re_error_msg[(int) ret];
+}
+
+/* Entry points compatible with 4.2 BSD regex library. We don't define
+ them if this is an Emacs or POSIX compilation. */
+
+#if !defined (emacs) && !defined (_POSIX_SOURCE)
+
+/* BSD has one and only one pattern buffer. */
+static struct re_pattern_buffer re_comp_buf;
+
+char *
+re_comp (s)
+ const char *s;
+{
+ reg_errcode_t ret;
+
+ if (!s)
+ {
+ if (!re_comp_buf.buffer)
+ return "No previous regular expression";
+ return 0;
+ }
+
+ if (!re_comp_buf.buffer)
+ {
+ re_comp_buf.buffer = (unsigned char *) malloc (200);
+ if (re_comp_buf.buffer == NULL)
+ return "Memory exhausted";
+ re_comp_buf.allocated = 200;
+
+ re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
+ if (re_comp_buf.fastmap == NULL)
+ return "Memory exhausted";
+ }
+
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
+ /* Match anchors at newlines. */
+ re_comp_buf.newline_anchor = 1;
+
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
+
+ /* Yes, we're discarding `const' here. */
+ return (char *) re_error_msg[(int) ret];
+}
+
+
+int
+re_exec (s)
+ const char *s;
+{
+ const int len = strlen (s);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
+}
+#endif /* not emacs and not _POSIX_SOURCE */
+
+/* POSIX.2 functions. Don't define these for Emacs. */
+
+#ifndef emacs
+
+/* regcomp takes a regular expression as a string and compiles it.
+
+ PREG is a regex_t *. We do not expect any fields to be initialized,
+ since POSIX says we shouldn't. Thus, we set
+
+ `buffer' to the compiled pattern;
+ `used' to the length of the compiled pattern;
+ `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
+ REG_EXTENDED bit in CFLAGS is set; otherwise, to
+ RE_SYNTAX_POSIX_BASIC;
+ `newline_anchor' to REG_NEWLINE being set in CFLAGS;
+ `fastmap' and `fastmap_accurate' to zero;
+ `re_nsub' to the number of subexpressions in PATTERN.
+
+ PATTERN is the address of the pattern string.
+
+ CFLAGS is a series of bits which affect compilation.
+
+ If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
+ use POSIX basic syntax.
+
+ If REG_NEWLINE is set, then . and [^...] don't match newline.
+ Also, regexec will try a match beginning after every newline.
+
+ If REG_ICASE is set, then we considers upper- and lowercase
+ versions of letters to be equivalent when matching.
+
+ If REG_NOSUB is set, then when PREG is passed to regexec, that
+ routine will report only success or failure, and nothing about the
+ registers.
+
+ It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
+ the return codes and their meanings.) */
+
+int
+regcomp (preg, pattern, cflags)
+ regex_t *preg;
+ const char *pattern;
+ int cflags;
+{
+ reg_errcode_t ret;
+ unsigned syntax
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+
+ /* regex_compile will allocate the space for the compiled pattern. */
+ preg->buffer = 0;
+ preg->allocated = 0;
+
+ /* Don't bother to use a fastmap when searching. This simplifies the
+ REG_NEWLINE case: if we used a fastmap, we'd have to put all the
+ characters after newlines into the fastmap. This way, we just try
+ every character. */
+ preg->fastmap = 0;
+
+ if (cflags & REG_ICASE)
+ {
+ unsigned i;
+
+ preg->translate = (char *) malloc (CHAR_SET_SIZE);
+ if (preg->translate == NULL)
+ return (int) REG_ESPACE;
+
+ /* Map uppercase characters to corresponding lowercase ones. */
+ for (i = 0; i < CHAR_SET_SIZE; i++)
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
+ }
+ else
+ preg->translate = NULL;
+
+ /* If REG_NEWLINE is set, newlines are treated differently. */
+ if (cflags & REG_NEWLINE)
+ { /* REG_NEWLINE implies neither . nor [^...] match newline. */
+ syntax &= ~RE_DOT_NEWLINE;
+ syntax |= RE_HAT_LISTS_NOT_NEWLINE;
+ /* It also changes the matching behavior. */
+ preg->newline_anchor = 1;
+ }
+ else
+ preg->newline_anchor = 0;
+
+ preg->no_sub = !!(cflags & REG_NOSUB);
+
+ /* POSIX says a null character in the pattern terminates it, so we
+ can use strlen here in compiling the pattern. */
+ ret = regex_compile (pattern, strlen (pattern), syntax, preg);
+
+ /* POSIX doesn't distinguish between an unmatched open-group and an
+ unmatched close-group: both are REG_EPAREN. */
+ if (ret == REG_ERPAREN) ret = REG_EPAREN;
+
+ return (int) ret;
+}
+
+
+/* regexec searches for a given pattern, specified by PREG, in the
+ string STRING.
+
+ If NMATCH is zero or REG_NOSUB was set in the cflags argument to
+ `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
+ least NMATCH elements, and we set them to the offsets of the
+ corresponding matched substrings.
+
+ EFLAGS specifies `execution flags' which affect matching: if
+ REG_NOTBOL is set, then ^ does not match at the beginning of the
+ string; if REG_NOTEOL is set, then $ does not match at the end.
+
+ We return 0 if we find a match and REG_NOMATCH if not. */
+
+int
+regexec (preg, string, nmatch, pmatch, eflags)
+ const regex_t *preg;
+ const char *string;
+ size_t nmatch;
+ regmatch_t pmatch[];
+ int eflags;
+{
+ int ret;
+ struct re_registers regs;
+ regex_t private_preg;
+ int len = strlen (string);
+ boolean want_reg_info = !preg->no_sub && nmatch > 0;
+
+ private_preg = *preg;
+
+ private_preg.not_bol = !!(eflags & REG_NOTBOL);
+ private_preg.not_eol = !!(eflags & REG_NOTEOL);
+
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
+
+ if (want_reg_info)
+ {
+ regs.num_regs = nmatch;
+ regs.start = TALLOC (nmatch, regoff_t);
+ regs.end = TALLOC (nmatch, regoff_t);
+ if (regs.start == NULL || regs.end == NULL)
+ return (int) REG_NOMATCH;
+ }
+
+ /* Perform the searching operation. */
+ ret = re_search (&private_preg, string, len,
+ /* start: */ 0, /* range: */ len,
+ want_reg_info ? &regs : (struct re_registers *) 0);
+
+ /* Copy the register information to the POSIX structure. */
+ if (want_reg_info)
+ {
+ if (ret >= 0)
+ {
+ unsigned r;
+
+ for (r = 0; r < nmatch; r++)
+ {
+ pmatch[r].rm_so = regs.start[r];
+ pmatch[r].rm_eo = regs.end[r];
+ }
+ }
+
+ /* If we needed the temporary register info, free the space now. */
+ free (regs.start);
+ free (regs.end);
+ }
+
+ /* We want zero return to mean success, unlike `re_search'. */
+ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
+}
+
+
+/* Returns a message corresponding to an error code, ERRCODE, returned
+ from either regcomp or regexec. We don't use PREG here. */
+
+size_t
+regerror (errcode, preg, errbuf, errbuf_size)
+ int errcode;
+ const regex_t *preg;
+ char *errbuf;
+ size_t errbuf_size;
+{
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = re_error_msg[errcode];
+
+ /* POSIX doesn't require that we do anything in this case, but why
+ not be nice. */
+ if (! msg)
+ msg = "Success";
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
+
+ if (errbuf_size != 0)
+ {
+ if (msg_size > errbuf_size)
+ {
+ strncpy (errbuf, msg, errbuf_size - 1);
+ errbuf[errbuf_size - 1] = 0;
+ }
+ else
+ strcpy (errbuf, msg);
+ }
+
+ return msg_size;
+}
+
+
+/* Free dynamically allocated space used by PREG. */
+
+void
+regfree (preg)
+ regex_t *preg;
+{
+ if (preg->buffer != NULL)
+ free (preg->buffer);
+ preg->buffer = NULL;
+
+ preg->allocated = 0;
+ preg->used = 0;
+
+ if (preg->fastmap != NULL)
+ free (preg->fastmap);
+ preg->fastmap = NULL;
+ preg->fastmap_accurate = 0;
+
+ if (preg->translate != NULL)
+ free (preg->translate);
+ preg->translate = NULL;
+}
+
+#endif /* not emacs */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/regex.h b/gnu/lib/libregex/regex.h
new file mode 100644
index 0000000..408dd21
--- /dev/null
+++ b/gnu/lib/libregex/regex.h
@@ -0,0 +1,490 @@
+/* Definitions for data structures and routines for the regular
+ expression library, version 0.12.
+
+ Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+
+#ifndef __REGEXP_LIBRARY_H__
+#define __REGEXP_LIBRARY_H__
+
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
+
+/* The following bits are used to determine the regexp syntax we
+ recognize. The set/not-set meanings are chosen so that Emacs syntax
+ remains the value 0. The bits are given in alphabetical order, and
+ the definitions shifted by one from the previous bit; thus, when we
+ add or remove a bit, only one other definition need change. */
+typedef unsigned reg_syntax_t;
+
+/* If this bit is not set, then \ inside a bracket expression is literal.
+ If set, then such a \ quotes the following character. */
+#define RE_BACKSLASH_ESCAPE_IN_LISTS (1)
+
+/* If this bit is not set, then + and ? are operators, and \+ and \? are
+ literals.
+ If set, then \+ and \? are operators and + and ? are literals. */
+#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1)
+
+/* If this bit is set, then character classes are supported. They are:
+ [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
+ [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
+ If not set, then character classes are not supported. */
+#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
+
+/* If this bit is set, then ^ and $ are always anchors (outside bracket
+ expressions, of course).
+ If this bit is not set, then it depends:
+ ^ is an anchor if it is at the beginning of a regular
+ expression or after an open-group or an alternation operator;
+ $ is an anchor if it is at the end of a regular expression, or
+ before a close-group or an alternation operator.
+
+ This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
+#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
+
+/* If this bit is set, then special characters are always special
+ regardless of where they are in the pattern.
+ If this bit is not set, then special characters are special only in
+ some contexts; otherwise they are ordinary. Specifically,
+ * + ? and intervals are only special when not after the beginning,
+ open-group, or alternation operator. */
+#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
+
+/* If this bit is set, then *, +, ?, and { cannot be first in an re or
+ immediately after an alternation or begin-group operator. */
+#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
+
+/* If this bit is set, then . matches newline.
+ If not set, then it doesn't. */
+#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
+
+/* If this bit is set, then . doesn't match NUL.
+ If not set, then it does. */
+#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
+
+/* If this bit is set, nonmatching lists [^...] do not match newline.
+ If not set, they do. */
+#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1)
+
+/* If this bit is set, either \{...\} or {...} defines an
+ interval, depending on RE_NO_BK_BRACES.
+ If not set, \{, \}, {, and } are literals. */
+#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
+
+/* If this bit is set, +, ? and | aren't recognized as operators.
+ If not set, they are. */
+#define RE_LIMITED_OPS (RE_INTERVALS << 1)
+
+/* If this bit is set, newline is an alternation operator.
+ If not set, newline is literal. */
+#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
+
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
+
+/* If this bit is set, (...) defines a group, and \( and \) are literals.
+ If not set, \(...\) defines a group, and ( and ) are literals. */
+#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
+
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
+#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
+
+/* If this bit is set, then | is an alternation operator, and \| is literal.
+ If not set, then \| is an alternation operator, and | is literal. */
+#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
+
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
+
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
+
+/* This global variable defines the particular regexp syntax to use (for
+ some interfaces). When a regexp is compiled, the syntax used is
+ stored in the pattern buffer, so changing this does not affect
+ already-compiled regexps. */
+extern reg_syntax_t re_syntax_options;
+
+/* Define combinations of the above bits for the standard possibilities.
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
+/* [[[begin syntaxes]]] */
+#define RE_SYNTAX_EMACS 0
+
+#define RE_SYNTAX_AWK \
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
+
+#define RE_SYNTAX_GREP \
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
+
+#define RE_SYNTAX_EGREP \
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
+
+#define RE_SYNTAX_POSIX_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
+
+#define RE_SYNTAX_POSIX_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
+
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
+#undef RE_DUP_MAX
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
+
+
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
+
+/* If this bit is set, then use extended regular expression syntax.
+ If not set, then use basic regular expression syntax. */
+#define REG_EXTENDED 1
+
+/* If this bit is set, then ignore case when matching.
+ If not set, then case is significant. */
+#define REG_ICASE (REG_EXTENDED << 1)
+
+/* If this bit is set, then anchors do not match at newline
+ characters in the string.
+ If not set, then anchors do match at newlines. */
+#define REG_NEWLINE (REG_ICASE << 1)
+
+/* If this bit is set, then report only success or fail in regexec.
+ If not set, then returns differ between not matching and errors. */
+#define REG_NOSUB (REG_NEWLINE << 1)
+
+
+/* POSIX `eflags' bits (i.e., information for regexec). */
+
+/* If this bit is set, then the beginning-of-line operator doesn't match
+ the beginning of the string (presumably because it's not the
+ beginning of a line).
+ If not set, then the beginning-of-line operator does match the
+ beginning of the string. */
+#define REG_NOTBOL 1
+
+/* Like REG_NOTBOL, except for the end-of-line. */
+#define REG_NOTEOL (1 << 1)
+
+
+/* If any error codes are removed, changed, or added, update the
+ `re_error_msg' table in regex.c. */
+typedef enum
+{
+ REG_NOERROR = 0, /* Success. */
+ REG_NOMATCH, /* Didn't find a match (for regexec). */
+
+ /* POSIX regcomp return error codes. (In the order listed in the
+ standard.) */
+ REG_BADPAT, /* Invalid pattern. */
+ REG_ECOLLATE, /* Not implemented. */
+ REG_ECTYPE, /* Invalid character class name. */
+ REG_EESCAPE, /* Trailing backslash. */
+ REG_ESUBREG, /* Invalid back reference. */
+ REG_EBRACK, /* Unmatched left bracket. */
+ REG_EPAREN, /* Parenthesis imbalance. */
+ REG_EBRACE, /* Unmatched \{. */
+ REG_BADBR, /* Invalid contents of \{\}. */
+ REG_ERANGE, /* Invalid range end. */
+ REG_ESPACE, /* Ran out of memory. */
+ REG_BADRPT, /* No preceding re for repetition op. */
+
+ /* Error codes we've added. */
+ REG_EEND, /* Premature end. */
+ REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
+ REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
+} reg_errcode_t;
+
+/* This data structure represents a compiled pattern. Before calling
+ the pattern compiler, the fields `buffer', `allocated', `fastmap',
+ `translate', and `no_sub' can be set. After the pattern has been
+ compiled, the `re_nsub' field is available. All other fields are
+ private to the regex routines. */
+
+struct re_pattern_buffer
+{
+/* [[[begin pattern_buffer]]] */
+ /* Space that holds the compiled pattern. It is declared as
+ `unsigned char *' because its elements are
+ sometimes used as array indexes. */
+ unsigned char *buffer;
+
+ /* Number of bytes to which `buffer' points. */
+ unsigned long allocated;
+
+ /* Number of bytes actually used in `buffer'. */
+ unsigned long used;
+
+ /* Syntax setting with which the pattern was compiled. */
+ reg_syntax_t syntax;
+
+ /* Pointer to a fastmap, if any, otherwise zero. re_search uses
+ the fastmap, if there is one, to skip over impossible
+ starting points for matches. */
+ char *fastmap;
+
+ /* Either a translate table to apply to all characters before
+ comparing them, or zero for no translation. The translation
+ is applied to a pattern when it is compiled and to a string
+ when it is matched. */
+ char *translate;
+
+ /* Number of subexpressions found by the compiler. */
+ size_t re_nsub;
+
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
+ unsigned fastmap_accurate : 1;
+
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
+ unsigned no_sub : 1;
+
+ /* If set, a beginning-of-line anchor doesn't match at the
+ beginning of the string. */
+ unsigned not_bol : 1;
+
+ /* Similarly for an end-of-line anchor. */
+ unsigned not_eol : 1;
+
+ /* If true, an anchor at a newline matches. */
+ unsigned newline_anchor : 1;
+
+/* [[[end pattern_buffer]]] */
+};
+
+typedef struct re_pattern_buffer regex_t;
+
+
+/* search.c (search_buffer) in Emacs needs this one opcode value. It is
+ defined both in `regex.c' and here. */
+#define RE_EXACTN_VALUE 1
+
+/* Type for byte offsets within the string. POSIX mandates this. */
+typedef int regoff_t;
+
+
+/* This is the structure we store register match data in. See
+ regex.texinfo for a full description of what registers match. */
+struct re_registers
+{
+ unsigned num_regs;
+ regoff_t *start;
+ regoff_t *end;
+};
+
+
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
+#ifndef RE_NREGS
+#define RE_NREGS 30
+#endif
+
+
+/* POSIX specification for registers. Aside from the different names than
+ `re_registers', POSIX uses an array of structures, instead of a
+ structure of arrays. */
+typedef struct
+{
+ regoff_t rm_so; /* Byte offset from string's start to substring's start. */
+ regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
+} regmatch_t;
+
+/* Declarations for routines. */
+
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
+#if __STDC__
+
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
+
+/* Compile the regular expression PATTERN, with length LENGTH
+ and syntax given by the global `re_syntax_options', into the buffer
+ BUFFER. Return NULL if successful, and an error string if not. */
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, int length,
+ struct re_pattern_buffer *buffer));
+
+
+/* Compile a fastmap for the compiled pattern in BUFFER; used to
+ accelerate searches. Return 0 if successful and -2 if was an
+ internal error. */
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
+
+
+/* Search in the string STRING (with length LENGTH) for the pattern
+ compiled into BUFFER. Start searching at position START, for RANGE
+ characters. Return the starting position of the match, -1 for no
+ match, or -2 for an internal error. Also return register
+ information in REGS (if REGS and BUFFER->no_sub are nonzero). */
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
+
+
+/* Like `re_search', but search in the concatenation of STRING1 and
+ STRING2. Also, stop searching at index START + STOP. */
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
+
+
+/* Like `re_search', but return how many characters in STRING the regexp
+ in BUFFER matched, starting at position START. */
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
+
+
+/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
+
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
+
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
+
+/* POSIX compatibility. */
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
+
+#endif /* not __REGEXP_LIBRARY_H__ */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/test/ChangeLog b/gnu/lib/libregex/test/ChangeLog
new file mode 100644
index 0000000..f0265bb
--- /dev/null
+++ b/gnu/lib/libregex/test/ChangeLog
@@ -0,0 +1,77 @@
+Thu Mar 25 21:23:43 1993 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * debugmalloc.c: #include <string.h>, and remove declaration of
+ memcpy.
+
+Sun Dec 13 20:59:32 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * tregress.c (test_regress): Add regression test for matching
+ "[a-a]" against "a" with the upcase translation map.
+
+ * iregex.c (print_regs): Don't print a newline after the register
+ contents.
+ (main): Instead, write out newlines here after printing match and
+ search results; this way, we get a newline whether or not the
+ pattern matched.
+
+Fri Dec 11 03:30:50 1992 Jim Blandy (jimb@totoro.cs.oberlin.edu)
+
+ * tregress.c (test_regress): Add new test to catch bug fixed by
+ change to regex.c today.
+
+ * Makefile.in (dregex.o): Depend on `../regex.[ch]', not `regex.[ch]'.
+
+Sun Nov 15 07:51:40 1992 Karl Berry (karl@cs.umb.edu)
+
+ * debugmalloc.c (memcpy): Declare; also, include <assert.h>.
+
+ * psx-interf.c (fill_pmatch): Declare offsets as `regoff_t'
+ instead of `off_t'.
+
+Thu Nov 12 11:29:58 1992 Karl Berry (karl@cs.umb.edu)
+
+ * iregex.c (main): Remove unused variable `c'; initialize
+ the char array in C code; only call print_regs if the match and
+ search succeeded.
+ (strlen): Declare.
+
+ * tregress.c (test_regress): Bug from enami.
+
+Tue Nov 10 10:36:53 1992 Karl Berry (karl@cs.umb.edu)
+
+ * tregress.c (test_regress): Remove Emacs 19 diff bug from rms, as
+ it was never the right thing to test anyway, and the test itself
+ had bugs in it.
+
+Mon Nov 9 10:09:40 1992 Karl Berry (karl@cs.umb.edu)
+
+ * tregress.c (test_regress): Bug from meyering.
+
+Thu Sep 24 10:48:34 1992 Karl Berry (karl@cs.umb.edu)
+
+ * Makefile.in: avoid $< (except in implicit rule).
+
+Sat Sep 19 15:38:29 1992 Karl Berry (karl@hayley)
+
+ * Makefile.in (TAGS): include regex.c and regex.h.
+
+Wed Sep 16 09:29:27 1992 Karl Berry (karl@hayley)
+
+ * xmalloc.c (xmalloc): use char *, not void *, as some compilers
+ bomb out on the latter.
+
+ * Makefile.in (LOADLIBES): use LIBS instead, as that what's
+ Autoconf wants to define.
+
+ * other.c: remove tests for ^/$ around newlines.
+
+Tue Sep 15 11:01:15 1992 Karl Berry (karl@hayley)
+
+ * fileregex.c (main): call re_search_2 instead of re_search.
+
+ * Makefile.in (regex.o): make target dregex.o, so VPATH doesn't
+ find ../regex.o.
+
+Sun Sep 13 06:50:03 1992 Karl Berry (karl@hayley)
+
+ * Created.
diff --git a/gnu/lib/libregex/test/Makefile b/gnu/lib/libregex/test/Makefile
new file mode 100644
index 0000000..5a8656a
--- /dev/null
+++ b/gnu/lib/libregex/test/Makefile
@@ -0,0 +1,169 @@
+# Generated automatically from Makefile.in by configure.
+# Makefile for regex testing.
+#
+# Copyright (C) 1992 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CPPFLAGS =
+CFLAGS = -g
+LDFLAGS =
+
+srcdir = .
+VPATH = .:../.
+
+CC = gcc
+DEFS = -DHAVE_STRING_H=1
+LIBS = $(LOADLIBES)
+
+ETAGS = etags
+SHELL = /bin/sh
+
+debug = -DDEBUG
+ALL_CPPFLAGS = -I. -I$(srcdir) -I../$(srcdir) $(DEFS) $(CPPFLAGS) $(debug)
+
+.c.o:
+ $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c $<
+
+
+# Define this as `../regex.o' to get the optimized version.
+regex_o = dregex.o
+test_h = test.h
+test_o = test.o bsd-interf.o other.o tregress.o psx-basic.o psx-extend.o \
+ psx-generic.o psx-group.o psx-interf.o psx-interv.o
+common_o = printchar.o upcase.o xmalloc.o $(malloc)
+
+# We have a lot of mallocs we can try when we run afoul of strange bugs.
+malloc =
+#malloc = # the libc malloc
+#malloc = g++malloc.o
+#malloc = debugmalloc.o
+#malloc = emacsmalloc.o
+emacsmallocflags = -Drcheck -Dbotch=abort -DUSG
+
+# default is to do nothing.
+default:
+
+all: regex syntax
+
+regex: $(regex_o) $(common_o) $(test_o) main.o
+ $(CC) -o $@ $(LDFLAGS) $^ $(LIBS)
+
+# As long as we're doing tests, we enable debugging.
+dregex.o: ../regex.c ../regex.h
+ rm -f $@
+ $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c ../$(srcdir)/regex.c
+ mv regex.o $@
+
+# iregex is the interactive regex.
+iregex: $(common_o) $(regex_o) iregex.o
+ $(CC) -o $@ $(LDFLAGS) $^ $(LIBS)
+
+# fileregex searches for an r.e. in every line of a given file.
+fileregex_o = fileregex.o printchar.o $(regex_o)
+fileregex: $(fileregex_o)
+ $(CC) -o $@ $(LDFLAGS) $(fileregex_o) $(LIBS)
+
+# cppregex is regex with a preprocessed regex.c. Useful when the
+# problem is inside some macro.
+cppregex: regexcpp.o $(common_o) $(test_o) main.o
+ $(CC) -o $@ $(LDFLAGS) $^ $(LIBS)
+
+regexcpp.o: regexcpp.c
+
+regexcpp.c: regex.c regexcpp.sed
+ rm -f regexcpp.c
+ $(CC) -E $(ALL_CPPFLAGS) ../$(srcdir)/regex.c \
+ | egrep -v '^#|^ *$$' \
+ | sed -f regexcpp.sed \
+ > regexcpp.c
+ chmod a-w regexcpp.c
+
+# Have to give this malloc special flags.
+emacsmalloc.o: emacsmalloc.c
+ $(CC) -c $(CFLAGS) $(ALL_CPPFLAGS) $(emacsmallocflags) \
+ ../$(srcdir)/test/emacsmalloc.c
+
+syntax: syntax.o
+ $(CC) $(CFLAGS) -o $@ syntax.o
+
+syntax.c: syntax.skel bits
+ sed '/\[\[\[replace.*\]\]\]/r bits' syntax.skel > $@
+
+bits: regex.h
+ sed -n 1,/RE_SYNTAX_EMACS/p ../$(srcdir)/regex.h \
+ | grep "#define RE_.*1" \
+ | sed 's/^#define \(RE_[A-Z_]*\) .*/ TEST_BIT (\1);/' > $@
+
+check: regex
+ ./regex
+
+TAGS: regex.c regex.h *.h *.c
+ $(ETAGS) -t $^
+
+depend:
+ gcc -MM $(ALL_CPPFLAGS) *.c > /tmp/depend
+.PHONY: depend
+
+install:
+.PHONY: install
+
+clean mostlyclean:
+ rm -f *.o regex cppregex iregex fileregex regexcpp.c syntax
+
+distclean: clean
+ rm -f bits syntax.c Makefile
+
+extraclean: distclean
+ rm -f *~* *\#* patch* *.orig *.rej *.bak core a.out
+
+realclean: distclean
+ rm -f TAGS
+
+Makefile: Makefile.in ../config.status
+ (cd ..; sh config.status)
+
+# Prevent GNU make 3 from overflowing arg limit on system V.
+.NOEXPORT:
+
+# Assumes $(distdir) is the place to put our files.
+distfiles = ChangeLog TAGS *.in *.c *.h regexcpp.sed syntax.skel
+dist: Makefile TAGS
+ mkdir $(distdir)
+ ln $(distfiles) $(distdir)
+
+# Automatically-generated dependencies below here.
+alloca.o : alloca.c
+bsd-interf.o : bsd-interf.c
+debugmalloc.o : debugmalloc.c
+emacsmalloc.o : emacsmalloc.c getpagesize.h
+fileregex.o : fileregex.c .././regex.h
+g++malloc.o : g++malloc.c //usr/include/stdio.h getpagesize.h
+iregex.o : iregex.c .././regex.h
+main.o : main.c test.h .././regex.h
+malloc-test.o : malloc-test.c
+other.o : other.c test.h .././regex.h
+printchar.o : printchar.c
+psx-basic.o : psx-basic.c test.h .././regex.h
+psx-extend.o : psx-extend.c test.h .././regex.h
+psx-generic.o : psx-generic.c test.h .././regex.h
+psx-group.o : psx-group.c test.h .././regex.h
+psx-interf.o : psx-interf.c test.h .././regex.h
+psx-interv.o : psx-interv.c test.h .././regex.h
+syntax.o : syntax.c .././regex.h
+test.o : test.c test.h .././regex.h
+tregress.o : tregress.c test.h .././regex.h
+upcase.o : upcase.c
+xmalloc.o : xmalloc.c
diff --git a/gnu/lib/libregex/test/Makefile.in b/gnu/lib/libregex/test/Makefile.in
new file mode 100644
index 0000000..b6a4133
--- /dev/null
+++ b/gnu/lib/libregex/test/Makefile.in
@@ -0,0 +1,168 @@
+# Makefile for regex testing.
+#
+# Copyright (C) 1992 Free Software Foundation, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+CPPFLAGS =
+CFLAGS = -g
+LDFLAGS =
+
+srcdir = @srcdir@
+VPATH = @srcdir@:../@srcdir@
+
+CC = @CC@
+DEFS = @DEFS@
+LIBS = @LIBS@ $(LOADLIBES)
+
+ETAGS = etags
+SHELL = /bin/sh
+
+debug = -DDEBUG
+ALL_CPPFLAGS = -I. -I$(srcdir) -I../$(srcdir) $(DEFS) $(CPPFLAGS) $(debug)
+
+.c.o:
+ $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c $<
+
+
+# Define this as `../regex.o' to get the optimized version.
+regex_o = dregex.o
+test_h = test.h
+test_o = test.o bsd-interf.o other.o tregress.o psx-basic.o psx-extend.o \
+ psx-generic.o psx-group.o psx-interf.o psx-interv.o
+common_o = printchar.o upcase.o xmalloc.o $(malloc)
+
+# We have a lot of mallocs we can try when we run afoul of strange bugs.
+malloc = @ALLOCA@
+#malloc = # the libc malloc
+#malloc = g++malloc.o
+#malloc = debugmalloc.o
+#malloc = emacsmalloc.o
+emacsmallocflags = -Drcheck -Dbotch=abort -DUSG
+
+# default is to do nothing.
+default:
+
+all: regex syntax
+
+regex: $(regex_o) $(common_o) $(test_o) main.o
+ $(CC) -o $@ $(LDFLAGS) $^ $(LIBS)
+
+# As long as we're doing tests, we enable debugging.
+dregex.o: ../regex.c ../regex.h
+ rm -f $@
+ $(CC) $(ALL_CPPFLAGS) $(CFLAGS) -c ../$(srcdir)/regex.c
+ mv regex.o $@
+
+# iregex is the interactive regex.
+iregex: $(common_o) $(regex_o) iregex.o
+ $(CC) -o $@ $(LDFLAGS) $^ $(LIBS)
+
+# fileregex searches for an r.e. in every line of a given file.
+fileregex_o = fileregex.o printchar.o $(regex_o)
+fileregex: $(fileregex_o)
+ $(CC) -o $@ $(LDFLAGS) $(fileregex_o) $(LIBS)
+
+# cppregex is regex with a preprocessed regex.c. Useful when the
+# problem is inside some macro.
+cppregex: regexcpp.o $(common_o) $(test_o) main.o
+ $(CC) -o $@ $(LDFLAGS) $^ $(LIBS)
+
+regexcpp.o: regexcpp.c
+
+regexcpp.c: regex.c regexcpp.sed
+ rm -f regexcpp.c
+ $(CC) -E $(ALL_CPPFLAGS) ../$(srcdir)/regex.c \
+ | egrep -v '^#|^ *$$' \
+ | sed -f regexcpp.sed \
+ > regexcpp.c
+ chmod a-w regexcpp.c
+
+# Have to give this malloc special flags.
+emacsmalloc.o: emacsmalloc.c
+ $(CC) -c $(CFLAGS) $(ALL_CPPFLAGS) $(emacsmallocflags) \
+ ../$(srcdir)/test/emacsmalloc.c
+
+syntax: syntax.o
+ $(CC) $(CFLAGS) -o $@ syntax.o
+
+syntax.c: syntax.skel bits
+ sed '/\[\[\[replace.*\]\]\]/r bits' syntax.skel > $@
+
+bits: regex.h
+ sed -n 1,/RE_SYNTAX_EMACS/p ../$(srcdir)/regex.h \
+ | grep "#define RE_.*1" \
+ | sed 's/^#define \(RE_[A-Z_]*\) .*/ TEST_BIT (\1);/' > $@
+
+check: regex
+ ./regex
+
+TAGS: regex.c regex.h *.h *.c
+ $(ETAGS) -t $^
+
+depend:
+ gcc -MM $(ALL_CPPFLAGS) *.c > /tmp/depend
+.PHONY: depend
+
+install:
+.PHONY: install
+
+clean mostlyclean:
+ rm -f *.o regex cppregex iregex fileregex regexcpp.c syntax
+
+distclean: clean
+ rm -f bits syntax.c Makefile
+
+extraclean: distclean
+ rm -f *~* *\#* patch* *.orig *.rej *.bak core a.out
+
+realclean: distclean
+ rm -f TAGS
+
+Makefile: Makefile.in ../config.status
+ (cd ..; sh config.status)
+
+# Prevent GNU make 3 from overflowing arg limit on system V.
+.NOEXPORT:
+
+# Assumes $(distdir) is the place to put our files.
+distfiles = ChangeLog TAGS *.in *.c *.h regexcpp.sed syntax.skel
+dist: Makefile TAGS
+ mkdir $(distdir)
+ ln $(distfiles) $(distdir)
+
+# Automatically-generated dependencies below here.
+alloca.o : alloca.c
+bsd-interf.o : bsd-interf.c
+debugmalloc.o : debugmalloc.c
+emacsmalloc.o : emacsmalloc.c getpagesize.h
+fileregex.o : fileregex.c .././regex.h
+g++malloc.o : g++malloc.c //usr/include/stdio.h getpagesize.h
+iregex.o : iregex.c .././regex.h
+main.o : main.c test.h .././regex.h
+malloc-test.o : malloc-test.c
+other.o : other.c test.h .././regex.h
+printchar.o : printchar.c
+psx-basic.o : psx-basic.c test.h .././regex.h
+psx-extend.o : psx-extend.c test.h .././regex.h
+psx-generic.o : psx-generic.c test.h .././regex.h
+psx-group.o : psx-group.c test.h .././regex.h
+psx-interf.o : psx-interf.c test.h .././regex.h
+psx-interv.o : psx-interv.c test.h .././regex.h
+syntax.o : syntax.c .././regex.h
+test.o : test.c test.h .././regex.h
+tregress.o : tregress.c test.h .././regex.h
+upcase.o : upcase.c
+xmalloc.o : xmalloc.c
diff --git a/gnu/lib/libregex/test/TAGS b/gnu/lib/libregex/test/TAGS
new file mode 100644
index 0000000..d3aad75
--- /dev/null
+++ b/gnu/lib/libregex/test/TAGS
@@ -0,0 +1,373 @@
+
+.././regex.c,4137
+#define AT_STRINGS_BEG(3078,98376
+#define AT_STRINGS_END(3079,98449
+#define AT_WORD_BOUNDARY(3093,99002
+#define BUF_PUSH(887,24995
+#define BUF_PUSH_2(895,25208
+#define BUF_PUSH_3(904,25437
+#define DEBUG_POP(2336,74614
+#define DEBUG_PRINT1(471,14296
+#define DEBUG_PRINT1(785,21263
+#define DEBUG_PRINT2(472,14342
+#define DEBUG_PRINT3(473,14398
+#define DEBUG_PRINT3(787,21316
+#define DEBUG_PRINT4(474,14462
+#define DEBUG_PRINT_COMPILED_PATTERN(475,14534
+#define DEBUG_PRINT_COMPILED_PATTERN(789,21386
+#define DEBUG_PRINT_DOUBLE_STRING(477,14637
+#define DEBUG_PUSH(2338,74684
+#define DEBUG_STATEMENT(470,14267
+#define DOUBLE_FAIL_STACK(2299,73230
+#define EVER_MATCHED_SOMETHING(3028,96680
+#define EXTEND_BUFFER(941,26834
+#define EXTRACT_NUMBER(403,12499
+#define EXTRACT_NUMBER(422,12960
+#define EXTRACT_NUMBER_AND_INCR(430,13181
+#define EXTRACT_NUMBER_AND_INCR(448,13583
+#define FAIL_STACK_EMPTY(2271,72289
+#define FAIL_STACK_FULL(2273,72404
+#define FAIL_STACK_PTR_EMPTY(2272,72344
+#define FAIL_STACK_TOP(2274,72473
+#define FIRST_STRING_P(221,5848
+#define FREE_VAR(3100,99186
+#define FREE_VARIABLES(3101,99240
+#define FREE_VARIABLES(3116,99751
+#define GET_BUFFER_SPACE(882,24802
+#define GET_UNSIGNED_NUMBER(1017,29312
+#define INIT_FAIL_STACK(2279,72612
+#define INSERT_JUMP(923,26079
+#define INSERT_JUMP2(927,26236
+#define ISALNUM(147,3407
+#define ISALPHA(148,3455
+#define ISBLANK(135,3062
+#define ISBLANK(137,3116
+#define ISCNTRL(149,3503
+#define ISDIGIT(146,3359
+#define ISGRAPH(140,3185
+#define ISGRAPH(142,3239
+#define ISLOWER(150,3551
+#define ISPRINT(145,3311
+#define ISPUNCT(151,3599
+#define ISSPACE(152,3647
+#define ISUPPER(153,3695
+#define ISXDIGIT(154,3743
+#define IS_ACTIVE(3026,96578
+#define IS_CHAR_CLASS(1035,29793
+#define MATCHED_SOMETHING(3027,96621
+#define MAX(233,6292
+#define MIN(234,6334
+#define PATFETCH(852,23769
+#define PATFETCH_RAW(860,24020
+#define POINTER_TO_OFFSET(3050,97433
+#define POP_FAILURE_ITEM(2331,74426
+#define POP_FAILURE_POINT(2461,79538
+#define PREFETCH(3064,97916
+#define PUSH_FAILURE_ITEM(2327,74253
+#define PUSH_FAILURE_POINT(2352,75048
+#define PUSH_PATTERN_OP(2317,73841
+#define REGEX_REALLOCATE(185,4875
+#define REGEX_REALLOCATE(210,5495
+#define REGEX_TALLOC(227,6137
+#define REG_MATCH_NULL_STRING_P(3025,96511
+#define REG_UNSET(3055,97649
+#define RETALLOC(226,6058
+#define SET_LIST_BIT(1011,29089
+#define SET_REGS_MATCHED(3034,96936
+#define SIGN_EXTEND_CHAR(166,4109
+#define SIGN_EXTEND_CHAR(169,4217
+#define STORE_JUMP(915,25800
+#define STORE_JUMP2(919,25917
+#define STORE_NUMBER(384,11919
+#define STORE_NUMBER_AND_INCR(394,12242
+#define STREQ(231,6244
+#define SYNTAX(120,2790
+#define TALLOC(225,6003
+#define TRANSLATE(873,24503
+#define WORDCHAR_P(3086,98755
+alt_match_null_string_p 4466,149039
+#define assert(782,21217
+at_begline_loc_p 2131,67979
+at_endline_loc_p 2150,68557
+#define bcmp(54,1656
+bcmp_translate 4591,151831
+#define bcopy(57,1726
+typedef char boolean;236,6377
+#define bzero(60,1793
+common_op_match_null_string_p 4503,149895
+compile_range 2200,69997
+} compile_stack_elt_t;990,28602
+} compile_stack_type;998,28748
+extract_number 411,12714
+extract_number_and_incr 438,13370
+} fail_stack_type;2269,72269
+group_in_compile_stack 2172,69174
+group_match_null_string_p 4357,145267
+init_syntax_once 94,2365
+insert_op1 2091,67107
+insert_op2 2110,67475
+#define isascii(131,3018
+typedef int pattern_offset_t;981,28388
+print_compiled_pattern 726,19792
+print_double_string 753,20605
+print_fastmap 486,14835
+print_partial_compiled_pattern 518,15475
+re_comp 4650,153479
+re_compile_fastmap 2532,82428
+re_compile_pattern 4617,152520
+re_exec 4688,154373
+re_match 3136,100557
+re_match_2 3161,101399
+} re_opcode_t;378,11781
+re_search 2844,90872
+re_search_2 2877,91998
+re_set_registers 2817,90247
+re_set_syntax 808,22087
+regcomp 4736,155972
+regerror 4876,160188
+regex_compile 1062,30922
+regexec 4811,158371
+regfree 4920,161247
+} register_info_type;3023,96488
+typedef unsigned regnum_t;974,28172
+store_op1 2063,66535
+store_op2 2076,66768
+typedef const unsigned 2262,72103
+
+.././regex.h,230
+#define _RE_ARGS(394,14981
+#define _RE_ARGS(398,15036
+} reg_errcode_t;270,10874
+typedef unsigned reg_syntax_t;38,1503
+typedef struct re_pattern_buffer regex_t;346,13556
+} regmatch_t;382,14634
+typedef int regoff_t;354,13814
+
+getpagesize.h,84
+#define getpagesize(12,137
+#define getpagesize(15,191
+#define getpagesize(20,302
+
+test.h,436
+#define BRACES_TO_OPS(107,3169
+#define INVALID_PATTERN(110,3328
+#define MATCH_SELF(114,3429
+#define PARENS_TO_OPS(108,3248
+#define SAFE_STRLEN(14,201
+#define TEST_POSITIONED_MATCH(116,3470
+#define TEST_REGISTERS(104,3011
+#define TEST_REGISTERS_2(97,2703
+#define TEST_SEARCH(127,3875
+#define TEST_SEARCH_2(123,3720
+#define TEST_TRUNCATED_MATCH(120,3608
+typedef enum { false = 0, true = 1 } boolean;16,255
+} test_type;33,572
+
+alloca.c,128
+alloca 141,3996
+find_stack_direction 85,2553
+} header;127,3538
+typedef void *pointer;51,1721
+typedef char *pointer;53,1778
+
+bsd-interf.c,51
+test_berk_search 8,106
+test_bsd_interface 33,738
+
+debugmalloc.c,395
+#define TRACE(8,143
+#define TRACE1(9,197
+#define TRACE2(10,254
+#define TRACE3(11,319
+#define TRACE4(12,392
+#define USER_ALLOC(61,1440
+typedef char *address;15,480
+} *chunk;54,1225
+chunk_delete 115,2778
+chunk_insert 96,2294
+chunk_to_mem 79,1916
+free 261,5604
+free_list_available 175,3947
+malloc 203,4343
+mem_to_chunk 68,1703
+realloc 242,5309
+validate_list 153,3478
+xsbrk 21,545
+
+emacsmalloc.c,574
+#define ASSERT(178,5884
+#define ASSERT(181,5985
+#define CHAIN(166,5430
+#define bcmp(73,2821
+#define bcopy(72,2777
+#define bzero(74,2868
+calloc 603,15983
+free 484,13255
+get_lim_data 736,18517
+get_lim_data 752,18767
+get_lim_data 759,18860
+getpool 374,10263
+malloc 413,11133
+malloc_init 218,6863
+malloc_mem_free 707,17940
+malloc_mem_used 688,17683
+malloc_stats 663,17320
+malloc_usable_size 233,7147
+memalign 618,16164
+morecore 244,7380
+realloc 541,14424
+#define start_of_data(110,3486
+#define start_of_data(115,3546
+sys_sbrk 815,20804
+valloc 645,17031
+
+fileregex.c,13
+main 11,156
+
+g++malloc.c,1543
+#define UPDATE_STATS(33,1090
+#define UPDATE_STATS(35,1131
+static inline int aligned_OK(343,11189
+void* calloc(1039,28692
+void cfree(1048,28894
+static inline void* chunk2mem(619,19336
+#define clear_inuse(592,18767
+static inline void consollink(716,21398
+static void do_free_stats(544,18016
+static void do_malloc_stats(534,17741
+766,22304
+extern 762,22235
+ for 1260,34165
+void free(1028,28553
+static inline void frontlink(732,21717
+static unsigned int gcd(557,18251
+ if 1212,32427
+ if 1216,32582
+ if 1220,32737
+ if 1224,32880
+ if 1229,33094
+ if 1233,33251
+ if 1238,33463
+ if 1242,33609
+ if 1247,33739
+#define inuse(590,18680
+static inline unsigned int lcm(580,18540
+void* malloc(939,26370
+static mchunkptr malloc_find_space(858,24561
+void malloc_stats(1201,32256
+unsigned int malloc_usable_size(1054,28936
+static volatile void malloc_user_error(286,9757
+static void malloc_user_error(288,9804
+typedef struct malloc_bin* mbinptr;320,10636
+typedef struct malloc_chunk* mchunkptr;309,10247
+static inline mchunkptr mem2chunk(643,19759
+void* memalign(1118,30363
+#define next_chunk(600,18910
+#define prev_chunk(604,19023
+void* realloc(1071,29263
+static inline unsigned int request2size(335,10993
+mchunkptr sanity_check(628,19486
+#define set_inuse(591,18723
+static inline void set_size(609,19149
+static inline mbinptr size2bin(499,16914
+static inline void split(685,20463
+static 768,22312
+static inline void unlink(671,20263
+void* valloc(1194,32107
+typedef volatile void 760,22184
+764,22271
+
+iregex.c,54
+main 20,390
+print_regs 141,2638
+scanstring 87,1839
+
+main.c,13
+main 12,242
+
+malloc-test.c,112
+#define BITS_BLOCK(12,168
+#define BITS_MASK(13,228
+} bits_list_type;6,56
+init_bits_list 16,311
+main(32,621
+
+other.c,18
+test_others 6,96
+
+printchar.c,15
+printchar 2,5
+
+psx-basic.c,23
+test_posix_basic 7,84
+
+psx-extend.c,26
+test_posix_extended 7,88
+
+psx-generic.c,26
+test_posix_generic 8,117
+
+psx-group.c,20
+test_grouping 7,92
+
+psx-interf.c,416
+fill_pmatch 174,4802
+get_error_string 18,260
+init_pattern_buffer 49,1434
+test_compile 67,1925
+test_eflags 245,6876
+test_error_code_allocation 562,16619
+test_error_code_message 524,15247
+test_ignore_case 303,8525
+test_newline 330,9199
+test_nsub 117,3319
+test_pmatch 188,5121
+test_posix_interface 614,18719
+test_posix_match 359,9938
+test_regcomp 138,3725
+test_regerror 592,17621
+test_regexec 394,10783
+
+psx-interv.c,21
+test_intervals 6,93
+
+test.c,607
+#define SET_FASTMAP(447,13999
+#define bcmp(18,362
+#define bcopy(19,415
+#define bzero(20,473
+compile_and_print_pattern 666,19653
+concat 97,2673
+delimiters_to_ops 571,17477
+general_test 115,2996
+invalid_pattern 542,16821
+#define memcmp(26,611
+#define memcpy(27,660
+print_pattern_info 635,18998
+set_all_registers 58,1390
+test_all_registers 506,15567
+test_case_fold 682,19993
+test_fastmap 460,14363
+test_fastmap_search 474,14668
+test_match 776,22235
+test_match_2 766,22040
+test_match_n_times 715,20798
+test_search_return 408,13011
+valid_nonposix_pattern 646,19239
+valid_pattern 557,17182
+
+tregress.c,208
+#define SIMPLE_MATCH(74,1463
+#define SIMPLE_NONMATCH(75,1528
+do_match 78,1599
+itoa 10,199
+simple_compile 44,882
+simple_fail 21,353
+simple_fastmap 55,1115
+simple_search 100,2020
+test_regress 124,2513
+
+upcase.c,0
+
+xmalloc.c,14
+xmalloc 9,87
diff --git a/gnu/lib/libregex/test/alloca.c b/gnu/lib/libregex/test/alloca.c
new file mode 100644
index 0000000..c1ff222
--- /dev/null
+++ b/gnu/lib/libregex/test/alloca.c
@@ -0,0 +1,194 @@
+/*
+ alloca -- (mostly) portable public-domain implementation -- D A Gwyn
+
+ last edit: 86/05/30 rms
+ include config.h, since on VMS it renames some symbols.
+ Use xmalloc instead of malloc.
+
+ This implementation of the PWB library alloca() function,
+ which is used to allocate space off the run-time stack so
+ that it is automatically reclaimed upon procedure exit,
+ was inspired by discussions with J. Q. Johnson of Cornell.
+
+ It should work under any C implementation that uses an
+ actual procedure stack (as opposed to a linked list of
+ frames). There are some preprocessor constants that can
+ be defined when compiling for your specific system, for
+ improved efficiency; however, the defaults should be okay.
+
+ The general concept of this implementation is to keep
+ track of all alloca()-allocated blocks, and reclaim any
+ that are found to be deeper in the stack than the current
+ invocation. This heuristic does not reclaim storage as
+ soon as it becomes invalid, but it will do so eventually.
+
+ As a special case, alloca(0) reclaims storage without
+ allocating any. It is a good idea to use alloca(0) in
+ your main control loop, etc. to force garbage collection.
+*/
+#ifndef lint
+static char SCCSid[] = "@(#)alloca.c 1.1"; /* for the "what" utility */
+#endif
+
+#ifdef emacs
+#include "config.h"
+#ifdef static
+/* actually, only want this if static is defined as ""
+ -- this is for usg, in which emacs must undefine static
+ in order to make unexec workable
+ */
+#ifndef STACK_DIRECTION
+you
+lose
+-- must know STACK_DIRECTION at compile-time
+#endif /* STACK_DIRECTION undefined */
+#endif /* static */
+#endif /* emacs */
+
+#ifndef alloca /* If compiling with GCC, this file's not needed. */
+
+#ifdef __STDC__
+typedef void *pointer; /* generic pointer type */
+#else
+typedef char *pointer; /* generic pointer type */
+#endif
+
+#define NULL 0 /* null pointer constant */
+
+extern void free();
+extern pointer xmalloc();
+
+/*
+ Define STACK_DIRECTION if you know the direction of stack
+ growth for your system; otherwise it will be automatically
+ deduced at run-time.
+
+ STACK_DIRECTION > 0 => grows toward higher addresses
+ STACK_DIRECTION < 0 => grows toward lower addresses
+ STACK_DIRECTION = 0 => direction of growth unknown
+*/
+
+#ifndef STACK_DIRECTION
+#define STACK_DIRECTION 0 /* direction unknown */
+#endif
+
+#if STACK_DIRECTION != 0
+
+#define STACK_DIR STACK_DIRECTION /* known at compile-time */
+
+#else /* STACK_DIRECTION == 0; need run-time code */
+
+static int stack_dir; /* 1 or -1 once known */
+#define STACK_DIR stack_dir
+
+static void
+find_stack_direction (/* void */)
+{
+ static char *addr = NULL; /* address of first
+ `dummy', once known */
+ auto char dummy; /* to get stack address */
+
+ if (addr == NULL)
+ { /* initial entry */
+ addr = &dummy;
+
+ find_stack_direction (); /* recurse once */
+ }
+ else /* second entry */
+ if (&dummy > addr)
+ stack_dir = 1; /* stack grew upward */
+ else
+ stack_dir = -1; /* stack grew downward */
+}
+
+#endif /* STACK_DIRECTION == 0 */
+
+/*
+ An "alloca header" is used to:
+ (a) chain together all alloca()ed blocks;
+ (b) keep track of stack depth.
+
+ It is very important that sizeof(header) agree with malloc()
+ alignment chunk size. The following default should work okay.
+*/
+
+#ifndef ALIGN_SIZE
+#define ALIGN_SIZE sizeof(double)
+#endif
+
+typedef union hdr
+{
+ char align[ALIGN_SIZE]; /* to force sizeof(header) */
+ struct
+ {
+ union hdr *next; /* for chaining headers */
+ char *deep; /* for stack depth measure */
+ } h;
+} header;
+
+/*
+ alloca( size ) returns a pointer to at least `size' bytes of
+ storage which will be automatically reclaimed upon exit from
+ the procedure that called alloca(). Originally, this space
+ was supposed to be taken from the current stack frame of the
+ caller, but that method cannot be made to work for some
+ implementations of C, for example under Gould's UTX/32.
+*/
+
+static header *last_alloca_header = NULL; /* -> last alloca header */
+
+pointer
+alloca (size) /* returns pointer to storage */
+ unsigned size; /* # bytes to allocate */
+{
+ auto char probe; /* probes stack depth: */
+ register char *depth = &probe;
+
+#if STACK_DIRECTION == 0
+ if (STACK_DIR == 0) /* unknown growth direction */
+ find_stack_direction ();
+#endif
+
+ /* Reclaim garbage, defined as all alloca()ed storage that
+ was allocated from deeper in the stack than currently. */
+
+ {
+ register header *hp; /* traverses linked list */
+
+ for (hp = last_alloca_header; hp != NULL;)
+ if ((STACK_DIR > 0 && hp->h.deep > depth)
+ || (STACK_DIR < 0 && hp->h.deep < depth))
+ {
+ register header *np = hp->h.next;
+
+ free ((pointer) hp); /* collect garbage */
+
+ hp = np; /* -> next header */
+ }
+ else
+ break; /* rest are not deeper */
+
+ last_alloca_header = hp; /* -> last valid storage */
+ }
+
+ if (size == 0)
+ return NULL; /* no allocation required */
+
+ /* Allocate combined header + user data storage. */
+
+ {
+ register pointer new = xmalloc (sizeof (header) + size);
+ /* address of header */
+
+ ((header *)new)->h.next = last_alloca_header;
+ ((header *)new)->h.deep = depth;
+
+ last_alloca_header = (header *)new;
+
+ /* User storage begins just after header. */
+
+ return (pointer)((char *)new + sizeof(header));
+ }
+}
+
+#endif /* no alloca */
diff --git a/gnu/lib/libregex/test/bsd-interf.c b/gnu/lib/libregex/test/bsd-interf.c
new file mode 100644
index 0000000..56f9e2a
--- /dev/null
+++ b/gnu/lib/libregex/test/bsd-interf.c
@@ -0,0 +1,38 @@
+/* bsd-interf.c: test BSD interface. */
+
+#ifndef _POSIX_SOURCE /* whole file */
+
+#include "test.h"
+
+void
+test_berk_search (pattern, string)
+ const char *pattern;
+ char *string;
+{
+ const char *return_value = re_comp (pattern);
+
+ if (return_value != 0)
+ {
+ printf ("This didn't compile: `%s'.\n", pattern);
+ printf (" The error message was: `%s'.\n", return_value);
+ }
+ else
+ if (test_should_match && re_exec (string) != strlen (string))
+ {
+ printf ("Should have matched but didn't:\n");
+ printf (" The pattern was: %s.\n", pattern);
+ if (string)
+ printf (" The string was: `%s'.'n", string);
+ else
+ printf (" The string was empty.\n");
+ }
+}
+
+
+void
+test_bsd_interface ()
+{
+ test_berk_search ("a", "ab");
+}
+
+#endif /* _POSIX_SOURCE */
diff --git a/gnu/lib/libregex/test/debugmalloc.c b/gnu/lib/libregex/test/debugmalloc.c
new file mode 100644
index 0000000..5c468e2
--- /dev/null
+++ b/gnu/lib/libregex/test/debugmalloc.c
@@ -0,0 +1,273 @@
+/* debugmalloc.c: a malloc for debugging purposes. */
+
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+static unsigned trace = 0;
+#define TRACE(s) if (trace) fprintf (stderr, "%s", s)
+#define TRACE1(s, e1) if (trace) fprintf (stderr, s, e1)
+#define TRACE2(s, e1, e2) if (trace) fprintf (stderr, s, e1, e2)
+#define TRACE3(s, e1, e2, e3) if (trace) fprintf (stderr, s, e1, e2, e3)
+#define TRACE4(s, e1, e2, e3, e4) \
+ if (trace) fprintf (stderr, s, e1, e2, e3, e4)
+
+typedef char *address;
+
+
+/* Wrap our calls to sbrk. */
+
+address
+xsbrk (incr)
+ int incr;
+{
+ extern char *sbrk ();
+ address ret = sbrk (incr);
+
+ if (ret == (address) -1)
+ {
+ perror ("sbrk"); /* Actually, we should return NULL, not quit. */
+ abort ();
+ }
+
+ return ret;
+}
+
+
+
+typedef struct chunk_struct
+{
+ /* This is the size (in bytes) that has actually been actually
+ allocated, not the size that the user requested. */
+ unsigned alloc_size;
+
+ /* This is the size the user requested. */
+ unsigned user_size;
+
+ /* Points to the next block in one of the lists. */
+ struct chunk_struct *next;
+
+ /* Now comes the user's memory. */
+ address user_mem;
+
+ /* After the user's memory is a constant. */
+} *chunk;
+
+#define MALLOC_OVERHEAD 16
+
+/* We might play around with the `user_size' field, but the amount of
+ memory that is actually available in the chunk is always the size
+ allocated minus the overhead. */
+#define USER_ALLOC(c) ((c)->alloc_size - MALLOC_OVERHEAD)
+
+/* Given a pointer to a malloc-allocated block, the beginning of the
+ chunk should always be MALLOC_OVERHEAD - 4 bytes back, since the only
+ overhead after the user memory is the constant. */
+
+chunk
+mem_to_chunk (mem)
+ address mem;
+{
+ return (chunk) (mem - (MALLOC_OVERHEAD - 4));
+}
+
+
+/* The other direction is even easier, since the user's memory starts at
+ the `user_mem' member in the chunk. */
+
+address
+chunk_to_mem (c)
+ chunk c;
+{
+ return (address) &(c->user_mem);
+}
+
+
+
+/* We keep both all the allocated chunks and all the free chunks on
+ lists. Since we put the next pointers in the chunk structure, we
+ don't need a separate chunk_list structure. */
+chunk alloc_list = NULL, free_list = NULL;
+
+
+/* We always append the new chunk at the beginning of the list. */
+
+void
+chunk_insert (chunk_list, new_c)
+ chunk *chunk_list;
+ chunk new_c;
+{
+ chunk c = *chunk_list; /* old beginning of list */
+
+ TRACE3 (" Inserting 0x%x at the beginning of 0x%x, before 0x%x.\n",
+ new_c, chunk_list, c);
+
+ *chunk_list = new_c;
+ new_c->next = c;
+}
+
+
+/* Thus, removing an element means we have to search until we find it.
+ Have to delete before we insert, since insertion changes the next
+ pointer, which we need to put it on the other list. */
+
+void
+chunk_delete (chunk_list, dead_c)
+ chunk *chunk_list;
+ chunk dead_c;
+{
+ chunk c = *chunk_list;
+ chunk prev_c = NULL;
+
+ TRACE2 (" Deleting 0x%x from 0x%x:", dead_c, chunk_list);
+
+ while (c != dead_c && c != NULL)
+ {
+ TRACE1 (" 0x%x", c);
+ prev_c = c;
+ c = c->next;
+ }
+
+ if (c == NULL)
+ {
+ fprintf (stderr, "Chunk at 0x%x not found on list.\n", dead_c);
+ abort ();
+ }
+
+ if (prev_c == NULL)
+ {
+ TRACE1 (".\n Setting head to 0x%x.\n", c->next);
+ *chunk_list = c->next;
+ }
+ else
+ {
+ TRACE2 (".\n Linking next(0x%x) to 0x%x.\n", prev_c, c->next);
+ prev_c->next = c->next;
+ }
+}
+
+
+/* See if a list is hunky-dory. */
+
+void
+validate_list (chunk_list)
+ chunk *chunk_list;
+{
+ chunk c;
+
+ TRACE1 (" Validating list at 0x%x:", chunk_list);
+
+ for (c = *chunk_list; c != NULL; c = c->next)
+ {
+ assert (c->user_size < c->alloc_size);
+ assert (memcmp (chunk_to_mem (c) + c->user_size, "Karl", 4));
+ TRACE2 (" 0x%x/%d", c, c->user_size);
+ }
+
+ TRACE (".\n");
+}
+
+
+/* See if we have a free chunk of a given size. We'll take the first
+ one that is big enough. */
+
+chunk
+free_list_available (needed)
+ unsigned needed;
+{
+ chunk c;
+
+ TRACE1 (" Checking free list for %d bytes:", needed);
+
+ if (free_list == NULL)
+ {
+ return NULL;
+ }
+
+ c = free_list;
+
+ while (c != NULL && USER_ALLOC (c) < needed)
+ {
+ TRACE2 (" 0x%x/%d", c, USER_ALLOC (c));
+ c = c->next;
+ }
+
+ TRACE1 ("\n Returning 0x%x.\n", c);
+ return c;
+}
+
+
+
+
+address
+malloc (n)
+ unsigned n;
+{
+ address new_mem;
+ chunk c;
+
+ TRACE1 ("Mallocing %d bytes.\n", n);
+
+ validate_list (&free_list);
+ validate_list (&alloc_list);
+
+ c = free_list_available (n);
+
+ if (c == NULL)
+ { /* Nothing suitable on free list. Allocate a new chunk. */
+ TRACE (" not on free list.\n");
+ c = (chunk) xsbrk (n + MALLOC_OVERHEAD);
+ c->alloc_size = n + MALLOC_OVERHEAD;
+ }
+ else
+ { /* Found something on free list. Don't split it, just use as is. */
+ TRACE (" found on free list.\n");
+ chunk_delete (&free_list, c);
+ }
+
+ /* If we took this from the free list, then the user size might be
+ different now, and consequently the constant at the end might be in
+ the wrong place. */
+ c->user_size = n;
+ new_mem = chunk_to_mem (c);
+ memcpy (new_mem + n, "Karl", 4);
+ chunk_insert (&alloc_list, c);
+
+ TRACE2 ("Malloc returning 0x%x (chunk 0x%x).\n", new_mem, c);
+ return new_mem;
+}
+
+
+address
+realloc (mem, n)
+ address mem;
+ unsigned n;
+{
+ void free ();
+ chunk c = mem_to_chunk (mem);
+ address new_mem;
+
+ TRACE3 ("Reallocing %d bytes at 0x%x (chunk 0x%x).\n", n, mem, c);
+
+ new_mem = malloc (n);
+ memcpy (new_mem, mem, c->user_size);
+ free (mem);
+
+ return new_mem;
+}
+
+
+void
+free (mem)
+ address mem;
+{
+ chunk c = mem_to_chunk (mem);
+
+ TRACE2 ("Freeing memory at 0x%x (chunk at 0x%x).\n", mem, c);
+
+ validate_list (&free_list);
+ validate_list (&alloc_list);
+
+ chunk_delete (&alloc_list, c);
+ chunk_insert (&free_list, c);
+}
diff --git a/gnu/lib/libregex/test/emacsmalloc.c b/gnu/lib/libregex/test/emacsmalloc.c
new file mode 100644
index 0000000..6eee1fa
--- /dev/null
+++ b/gnu/lib/libregex/test/emacsmalloc.c
@@ -0,0 +1,844 @@
+/* dynamic memory allocation for GNU.
+ Copyright (C) 1985, 1987 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+In other words, you are welcome to use, share and improve this program.
+You are forbidden to forbid anyone else to use, share and improve
+what you give them. Help stamp out software-hoarding! */
+
+
+/*
+ * @(#)nmalloc.c 1 (Caltech) 2/21/82
+ *
+ * U of M Modified: 20 Jun 1983 ACT: strange hacks for Emacs
+ *
+ * Nov 1983, Mike@BRL, Added support for 4.1C/4.2 BSD.
+ *
+ * This is a very fast storage allocator. It allocates blocks of a small
+ * number of different sizes, and keeps free lists of each size. Blocks
+ * that don't exactly fit are passed up to the next larger size. In this
+ * implementation, the available sizes are (2^n)-4 (or -16) bytes long.
+ * This is designed for use in a program that uses vast quantities of
+ * memory, but bombs when it runs out. To make it a little better, it
+ * warns the user when he starts to get near the end.
+ *
+ * June 84, ACT: modified rcheck code to check the range given to malloc,
+ * rather than the range determined by the 2-power used.
+ *
+ * Jan 85, RMS: calls malloc_warning to issue warning on nearly full.
+ * No longer Emacs-specific; can serve as all-purpose malloc for GNU.
+ * You should call malloc_init to reinitialize after loading dumped Emacs.
+ * Call malloc_stats to get info on memory stats if MSTATS turned on.
+ * realloc knows how to return same block given, just changing its size,
+ * if the power of 2 is correct.
+ */
+
+/*
+ * nextf[i] is the pointer to the next free block of size 2^(i+3). The
+ * smallest allocatable block is 8 bytes. The overhead information will
+ * go in the first int of the block, and the returned pointer will point
+ * to the second.
+ *
+#ifdef MSTATS
+ * nmalloc[i] is the difference between the number of mallocs and frees
+ * for a given block size.
+#endif MSTATS
+ */
+
+#ifdef emacs
+/* config.h specifies which kind of system this is. */
+#include "config.h"
+#include <signal.h>
+#else
+
+/* Determine which kind of system this is. */
+#include <sys/types.h>
+#include <signal.h>
+
+#include <string.h>
+#define bcopy(s,d,n) memcpy ((d), (s), (n))
+#define bcmp(s1,s2,n) memcmp ((s1), (s2), (n))
+#define bzero(s,n) memset ((s), 0, (n))
+
+#ifndef SIGTSTP
+#ifndef VMS
+#ifndef USG
+#define USG
+#endif
+#endif /* not VMS */
+#else /* SIGTSTP */
+#ifdef SIGIO
+#define BSD4_2
+#endif /* SIGIO */
+#endif /* SIGTSTP */
+
+#endif /* not emacs */
+
+/* Define getpagesize () if the system does not. */
+#include "getpagesize.h"
+
+#ifdef BSD
+#ifdef BSD4_1
+#include <sys/vlimit.h> /* warn the user when near the end */
+#else /* if 4.2 or newer */
+#include <sys/time.h>
+#include <sys/resource.h>
+#endif /* if 4.2 or newer */
+#endif
+
+#ifdef VMS
+#include "vlimit.h"
+#endif
+
+extern char *start_of_data ();
+
+#ifdef BSD
+#ifndef DATA_SEG_BITS
+#define start_of_data() &etext
+#endif
+#endif
+
+#ifndef emacs
+#define start_of_data() &etext
+#endif
+
+#define ISALLOC ((char) 0xf7) /* magic byte that implies allocation */
+#define ISFREE ((char) 0x54) /* magic byte that implies free block */
+ /* this is for error checking only */
+#define ISMEMALIGN ((char) 0xd6) /* Stored before the value returned by
+ memalign, with the rest of the word
+ being the distance to the true
+ beginning of the block. */
+
+extern char etext;
+
+/* These two are for user programs to look at, when they are interested. */
+
+unsigned int malloc_sbrk_used; /* amount of data space used now */
+unsigned int malloc_sbrk_unused; /* amount more we can have */
+
+/* start of data space; can be changed by calling init_malloc */
+static char *data_space_start;
+
+#ifdef MSTATS
+static int nmalloc[30];
+static int nmal, nfre;
+#endif /* MSTATS */
+
+/* If range checking is not turned on, all we have is a flag indicating
+ whether memory is allocated, an index in nextf[], and a size field; to
+ realloc() memory we copy either size bytes or 1<<(index+3) bytes depending
+ on whether the former can hold the exact size (given the value of
+ 'index'). If range checking is on, we always need to know how much space
+ is allocated, so the 'size' field is never used. */
+
+struct mhead {
+ char mh_alloc; /* ISALLOC or ISFREE */
+ char mh_index; /* index in nextf[] */
+/* Remainder are valid only when block is allocated */
+ unsigned short mh_size; /* size, if < 0x10000 */
+#ifdef rcheck
+ unsigned mh_nbytes; /* number of bytes allocated */
+ int mh_magic4; /* should be == MAGIC4 */
+#endif /* rcheck */
+};
+
+/* Access free-list pointer of a block.
+ It is stored at block + 4.
+ This is not a field in the mhead structure
+ because we want sizeof (struct mhead)
+ to describe the overhead for when the block is in use,
+ and we do not want the free-list pointer to count in that. */
+
+#define CHAIN(a) \
+ (*(struct mhead **) (sizeof (char *) + (char *) (a)))
+
+#ifdef rcheck
+
+/* To implement range checking, we write magic values in at the beginning and
+ end of each allocated block, and make sure they are undisturbed whenever a
+ free or a realloc occurs. */
+/* Written in each of the 4 bytes following the block's real space */
+#define MAGIC1 0x55
+/* Written in the 4 bytes before the block's real space */
+#define MAGIC4 0x55555555
+#define ASSERT(p) if (!(p)) botch("p"); else
+#define EXTRA 4 /* 4 bytes extra for MAGIC1s */
+#else
+#define ASSERT(p) if (!(p)) abort (); else
+#define EXTRA 0
+#endif /* rcheck */
+
+
+/* nextf[i] is free list of blocks of size 2**(i + 3) */
+
+static struct mhead *nextf[30];
+
+/* busy[i] is nonzero while allocation of block size i is in progress. */
+
+static char busy[30];
+
+/* Number of bytes of writable memory we can expect to be able to get */
+static unsigned int lim_data;
+
+/* Level number of warnings already issued.
+ 0 -- no warnings issued.
+ 1 -- 75% warning already issued.
+ 2 -- 85% warning already issued.
+*/
+static int warnlevel;
+
+/* Function to call to issue a warning;
+ 0 means don't issue them. */
+static void (*warnfunction) ();
+
+/* nonzero once initial bunch of free blocks made */
+static int gotpool;
+
+char *_malloc_base;
+
+static void getpool ();
+
+/* Cause reinitialization based on job parameters;
+ also declare where the end of pure storage is. */
+void
+malloc_init (start, warnfun)
+ char *start;
+ void (*warnfun) ();
+{
+ if (start)
+ data_space_start = start;
+ lim_data = 0;
+ warnlevel = 0;
+ warnfunction = warnfun;
+}
+
+/* Return the maximum size to which MEM can be realloc'd
+ without actually requiring copying. */
+
+int
+malloc_usable_size (mem)
+ char *mem;
+{
+ struct mhead *p
+ = (struct mhead *) (mem - ((sizeof (struct mhead) + 7) & ~7));
+ int blocksize = 8 << p->mh_index;
+
+ return blocksize - sizeof (struct mhead) - EXTRA;
+}
+
+static void
+morecore (nu) /* ask system for more memory */
+ register int nu; /* size index to get more of */
+{
+ char *sbrk ();
+ register char *cp;
+ register int nblks;
+ register unsigned int siz;
+ int oldmask;
+
+#ifdef BSD
+#ifndef BSD4_1
+ int newmask = -1;
+ /* Blocking these signals interferes with debugging, at least on BSD on
+ the HP 9000/300. */
+#ifdef SIGTRAP
+ newmask &= ~(1 << SIGTRAP);
+#endif
+#ifdef SIGILL
+ newmask &= ~(1 << SIGILL);
+#endif
+#ifdef SIGTSTP
+ newmask &= ~(1 << SIGTSTP);
+#endif
+#ifdef SIGSTOP
+ newmask &= ~(1 << SIGSTOP);
+#endif
+ oldmask = sigsetmask (newmask);
+#endif
+#endif
+
+ if (!data_space_start)
+ {
+ data_space_start = start_of_data ();
+ }
+
+ if (lim_data == 0)
+ get_lim_data ();
+
+ /* On initial startup, get two blocks of each size up to 1k bytes */
+ if (!gotpool)
+ { getpool (); getpool (); gotpool = 1; }
+
+ /* Find current end of memory and issue warning if getting near max */
+
+#ifndef VMS
+ /* Maximum virtual memory on VMS is difficult to calculate since it
+ * depends on several dynmacially changing things. Also, alignment
+ * isn't that important. That is why much of the code here is ifdef'ed
+ * out for VMS systems.
+ */
+ cp = sbrk (0);
+ siz = cp - data_space_start;
+
+ if (warnfunction)
+ switch (warnlevel)
+ {
+ case 0:
+ if (siz > (lim_data / 4) * 3)
+ {
+ warnlevel++;
+ (*warnfunction) ("Warning: past 75% of memory limit");
+ }
+ break;
+ case 1:
+ if (siz > (lim_data / 20) * 17)
+ {
+ warnlevel++;
+ (*warnfunction) ("Warning: past 85% of memory limit");
+ }
+ break;
+ case 2:
+ if (siz > (lim_data / 20) * 19)
+ {
+ warnlevel++;
+ (*warnfunction) ("Warning: past 95% of memory limit");
+ }
+ break;
+ }
+
+ if ((int) cp & 0x3ff) /* land on 1K boundaries */
+ sbrk (1024 - ((int) cp & 0x3ff));
+#endif /* not VMS */
+
+ /* Take at least 2k, and figure out how many blocks of the desired size
+ we're about to get */
+ nblks = 1;
+ if ((siz = nu) < 8)
+ nblks = 1 << ((siz = 8) - nu);
+
+ if ((cp = sbrk (1 << (siz + 3))) == (char *) -1)
+ {
+#ifdef BSD
+#ifndef BSD4_1
+ sigsetmask (oldmask);
+#endif
+#endif
+ return; /* no more room! */
+ }
+ malloc_sbrk_used = siz;
+ malloc_sbrk_unused = lim_data - siz;
+
+#ifndef VMS
+ if ((int) cp & 7)
+ { /* shouldn't happen, but just in case */
+ cp = (char *) (((int) cp + 8) & ~7);
+ nblks--;
+ }
+#endif /* not VMS */
+
+ /* save new header and link the nblks blocks together */
+ nextf[nu] = (struct mhead *) cp;
+ siz = 1 << (nu + 3);
+ while (1)
+ {
+ ((struct mhead *) cp) -> mh_alloc = ISFREE;
+ ((struct mhead *) cp) -> mh_index = nu;
+ if (--nblks <= 0) break;
+ CHAIN ((struct mhead *) cp) = (struct mhead *) (cp + siz);
+ cp += siz;
+ }
+ CHAIN ((struct mhead *) cp) = 0;
+
+#ifdef BSD
+#ifndef BSD4_1
+ sigsetmask (oldmask);
+#endif
+#endif
+}
+
+static void
+getpool ()
+{
+ register int nu;
+ char * sbrk ();
+ register char *cp = sbrk (0);
+
+ if ((int) cp & 0x3ff) /* land on 1K boundaries */
+ sbrk (1024 - ((int) cp & 0x3ff));
+
+ /* Record address of start of space allocated by malloc. */
+ if (_malloc_base == 0)
+ _malloc_base = cp;
+
+ /* Get 2k of storage */
+
+ cp = sbrk (04000);
+ if (cp == (char *) -1)
+ return;
+
+ /* Divide it into an initial 8-word block
+ plus one block of size 2**nu for nu = 3 ... 10. */
+
+ CHAIN (cp) = nextf[0];
+ nextf[0] = (struct mhead *) cp;
+ ((struct mhead *) cp) -> mh_alloc = ISFREE;
+ ((struct mhead *) cp) -> mh_index = 0;
+ cp += 8;
+
+ for (nu = 0; nu < 7; nu++)
+ {
+ CHAIN (cp) = nextf[nu];
+ nextf[nu] = (struct mhead *) cp;
+ ((struct mhead *) cp) -> mh_alloc = ISFREE;
+ ((struct mhead *) cp) -> mh_index = nu;
+ cp += 8 << nu;
+ }
+}
+
+char *
+malloc (n) /* get a block */
+ unsigned n;
+{
+ register struct mhead *p;
+ register unsigned int nbytes;
+ register int nunits = 0;
+
+ /* Figure out how many bytes are required, rounding up to the nearest
+ multiple of 8, then figure out which nestf[] area to use.
+ Both the beginning of the header and the beginning of the
+ block should be on an eight byte boundary. */
+ nbytes = (n + ((sizeof *p + 7) & ~7) + EXTRA + 7) & ~7;
+ {
+ register unsigned int shiftr = (nbytes - 1) >> 2;
+
+ while (shiftr >>= 1)
+ nunits++;
+ }
+
+ /* In case this is reentrant use of malloc from signal handler,
+ pick a block size that no other malloc level is currently
+ trying to allocate. That's the easiest harmless way not to
+ interfere with the other level of execution. */
+ while (busy[nunits]) nunits++;
+ busy[nunits] = 1;
+
+ /* If there are no blocks of the appropriate size, go get some */
+ /* COULD SPLIT UP A LARGER BLOCK HERE ... ACT */
+ if (nextf[nunits] == 0)
+ morecore (nunits);
+
+ /* Get one block off the list, and set the new list head */
+ if ((p = nextf[nunits]) == 0)
+ {
+ busy[nunits] = 0;
+ return 0;
+ }
+ nextf[nunits] = CHAIN (p);
+ busy[nunits] = 0;
+
+ /* Check for free block clobbered */
+ /* If not for this check, we would gobble a clobbered free chain ptr */
+ /* and bomb out on the NEXT allocate of this size block */
+ if (p -> mh_alloc != ISFREE || p -> mh_index != nunits)
+#ifdef rcheck
+ botch ("block on free list clobbered");
+#else /* not rcheck */
+ abort ();
+#endif /* not rcheck */
+
+ /* Fill in the info, and if range checking, set up the magic numbers */
+ p -> mh_alloc = ISALLOC;
+#ifdef rcheck
+ p -> mh_nbytes = n;
+ p -> mh_magic4 = MAGIC4;
+ {
+ /* Get the location n after the beginning of the user's space. */
+ register char *m = (char *) p + ((sizeof *p + 7) & ~7) + n;
+
+ *m++ = MAGIC1, *m++ = MAGIC1, *m++ = MAGIC1, *m = MAGIC1;
+ }
+#else /* not rcheck */
+ p -> mh_size = n;
+#endif /* not rcheck */
+#ifdef MSTATS
+ nmalloc[nunits]++;
+ nmal++;
+#endif /* MSTATS */
+ return (char *) p + ((sizeof *p + 7) & ~7);
+}
+
+free (mem)
+ char *mem;
+{
+ register struct mhead *p;
+ {
+ register char *ap = mem;
+
+ if (ap == 0)
+ return;
+
+ p = (struct mhead *) (ap - ((sizeof *p + 7) & ~7));
+ if (p -> mh_alloc == ISMEMALIGN)
+ {
+ ap -= p->mh_size;
+ p = (struct mhead *) (ap - ((sizeof *p + 7) & ~7));
+ }
+
+#ifndef rcheck
+ if (p -> mh_alloc != ISALLOC)
+ abort ();
+
+#else rcheck
+ if (p -> mh_alloc != ISALLOC)
+ {
+ if (p -> mh_alloc == ISFREE)
+ botch ("free: Called with already freed block argument\n");
+ else
+ botch ("free: Called with bad argument\n");
+ }
+
+ ASSERT (p -> mh_magic4 == MAGIC4);
+ ap += p -> mh_nbytes;
+ ASSERT (*ap++ == MAGIC1); ASSERT (*ap++ == MAGIC1);
+ ASSERT (*ap++ == MAGIC1); ASSERT (*ap == MAGIC1);
+#endif /* rcheck */
+ }
+ {
+ register int nunits = p -> mh_index;
+
+ ASSERT (nunits <= 29);
+ p -> mh_alloc = ISFREE;
+
+ /* Protect against signal handlers calling malloc. */
+ busy[nunits] = 1;
+ /* Put this block on the free list. */
+ CHAIN (p) = nextf[nunits];
+ nextf[nunits] = p;
+ busy[nunits] = 0;
+
+#ifdef MSTATS
+ nmalloc[nunits]--;
+ nfre++;
+#endif /* MSTATS */
+ }
+}
+
+char *
+realloc (mem, n)
+ char *mem;
+ register unsigned n;
+{
+ register struct mhead *p;
+ register unsigned int tocopy;
+ register unsigned int nbytes;
+ register int nunits;
+
+ if (mem == 0)
+ return malloc (n);
+ p = (struct mhead *) (mem - ((sizeof *p + 7) & ~7));
+ nunits = p -> mh_index;
+ ASSERT (p -> mh_alloc == ISALLOC);
+#ifdef rcheck
+ ASSERT (p -> mh_magic4 == MAGIC4);
+ {
+ register char *m = mem + (tocopy = p -> mh_nbytes);
+ ASSERT (*m++ == MAGIC1); ASSERT (*m++ == MAGIC1);
+ ASSERT (*m++ == MAGIC1); ASSERT (*m == MAGIC1);
+ }
+#else /* not rcheck */
+ if (p -> mh_index >= 13)
+ tocopy = (1 << (p -> mh_index + 3)) - ((sizeof *p + 7) & ~7);
+ else
+ tocopy = p -> mh_size;
+#endif /* not rcheck */
+
+ /* See if desired size rounds to same power of 2 as actual size. */
+ nbytes = (n + ((sizeof *p + 7) & ~7) + EXTRA + 7) & ~7;
+
+ /* If ok, use the same block, just marking its size as changed. */
+ if (nbytes > (4 << nunits) && nbytes <= (8 << nunits))
+ {
+#ifdef rcheck
+ register char *m = mem + tocopy;
+ *m++ = 0; *m++ = 0; *m++ = 0; *m++ = 0;
+ p-> mh_nbytes = n;
+ m = mem + n;
+ *m++ = MAGIC1; *m++ = MAGIC1; *m++ = MAGIC1; *m++ = MAGIC1;
+#else /* not rcheck */
+ p -> mh_size = n;
+#endif /* not rcheck */
+ return mem;
+ }
+
+ if (n < tocopy)
+ tocopy = n;
+ {
+ register char *new;
+
+ if ((new = malloc (n)) == 0)
+ return 0;
+ bcopy (mem, new, tocopy);
+ free (mem);
+ return new;
+ }
+}
+
+/* This is in case something linked with Emacs calls calloc. */
+
+char *
+calloc (num, size)
+ unsigned num, size;
+{
+ register char *mem;
+
+ num *= size;
+ mem = malloc (num);
+ if (mem != 0)
+ bzero (mem, num);
+ return mem;
+}
+
+#ifndef VMS
+
+char *
+memalign (alignment, size)
+ unsigned alignment, size;
+{
+ register char *ptr = malloc (size + alignment);
+ register char *aligned;
+ register struct mhead *p;
+
+ if (ptr == 0)
+ return 0;
+ /* If entire block has the desired alignment, just accept it. */
+ if (((int) ptr & (alignment - 1)) == 0)
+ return ptr;
+ /* Otherwise, get address of byte in the block that has that alignment. */
+ aligned = (char *) (((int) ptr + alignment - 1) & -alignment);
+
+ /* Store a suitable indication of how to free the block,
+ so that free can find the true beginning of it. */
+ p = (struct mhead *) (aligned - ((7 + sizeof (struct mhead)) & ~7));
+ p -> mh_size = aligned - ptr;
+ p -> mh_alloc = ISMEMALIGN;
+ return aligned;
+}
+
+#ifndef HPUX
+/* This runs into trouble with getpagesize on HPUX.
+ Patching out seems cleaner than the ugly fix needed. */
+char *
+valloc (size)
+{
+ return memalign (getpagesize (), size);
+}
+#endif /* not HPUX */
+#endif /* not VMS */
+
+#ifdef MSTATS
+/* Return statistics describing allocation of blocks of size 2**n. */
+
+struct mstats_value
+ {
+ int blocksize;
+ int nfree;
+ int nused;
+ };
+
+struct mstats_value
+malloc_stats (size)
+ int size;
+{
+ struct mstats_value v;
+ register int i;
+ register struct mhead *p;
+
+ v.nfree = 0;
+
+ if (size < 0 || size >= 30)
+ {
+ v.blocksize = 0;
+ v.nused = 0;
+ return v;
+ }
+
+ v.blocksize = 1 << (size + 3);
+ v.nused = nmalloc[size];
+
+ for (p = nextf[size]; p; p = CHAIN (p))
+ v.nfree++;
+
+ return v;
+}
+int
+malloc_mem_used ()
+{
+ int i;
+ int size_used;
+
+ size_used = 0;
+
+ for (i = 0; i < 30; i++)
+ {
+ int allocation_size = 1 << (i + 3);
+ struct mhead *p;
+
+ size_used += nmalloc[i] * allocation_size;
+ }
+
+ return size_used;
+}
+
+int
+malloc_mem_free ()
+{
+ int i;
+ int size_unused;
+
+ size_unused = 0;
+
+ for (i = 0; i < 30; i++)
+ {
+ int allocation_size = 1 << (i + 3);
+ struct mhead *p;
+
+ for (p = nextf[i]; p ; p = CHAIN (p))
+ size_unused += allocation_size;
+ }
+
+ return size_unused;
+}
+#endif /* MSTATS */
+
+/*
+ * This function returns the total number of bytes that the process
+ * will be allowed to allocate via the sbrk(2) system call. On
+ * BSD systems this is the total space allocatable to stack and
+ * data. On USG systems this is the data space only.
+ */
+
+#ifdef USG
+
+get_lim_data ()
+{
+ extern long ulimit ();
+
+#ifdef ULIMIT_BREAK_VALUE
+ lim_data = ULIMIT_BREAK_VALUE;
+#else
+ lim_data = ulimit (3, 0);
+#endif
+
+ lim_data -= (long) data_space_start;
+}
+
+#else /* not USG */
+#if defined (BSD4_1) || defined (VMS)
+
+get_lim_data ()
+{
+ lim_data = vlimit (LIM_DATA, -1);
+}
+
+#else /* not BSD4_1 and not VMS */
+
+get_lim_data ()
+{
+ struct rlimit XXrlimit;
+
+ getrlimit (RLIMIT_DATA, &XXrlimit);
+#ifdef RLIM_INFINITY
+ lim_data = XXrlimit.rlim_cur & RLIM_INFINITY; /* soft limit */
+#else
+ lim_data = XXrlimit.rlim_cur; /* soft limit */
+#endif
+}
+
+#endif /* not BSD4_1 and not VMS */
+#endif /* not USG */
+
+#ifdef VMS
+/* There is a problem when dumping and restoring things on VMS. Calls
+ * to SBRK don't necessarily result in contiguous allocation. Dumping
+ * doesn't work when it isn't. Therefore, we make the initial
+ * allocation contiguous by allocating a big chunk, and do SBRKs from
+ * there. Once Emacs has dumped there is no reason to continue
+ * contiguous allocation, malloc doesn't depend on it.
+ *
+ * There is a further problem of using brk and sbrk while using VMS C
+ * run time library routines malloc, calloc, etc. The documentation
+ * says that this is a no-no, although I'm not sure why this would be
+ * a problem. In any case, we remove the necessity to call brk and
+ * sbrk, by calling calloc (to assure zero filled data) rather than
+ * sbrk.
+ *
+ * VMS_ALLOCATION_SIZE is the size of the allocation array. This
+ * should be larger than the malloc size before dumping. Making this
+ * too large will result in the startup procedure slowing down since
+ * it will require more space and time to map it in.
+ *
+ * The value for VMS_ALLOCATION_SIZE in the following define was determined
+ * by running emacs linked (and a large allocation) with the debugger and
+ * looking to see how much storage was used. The allocation was 201 pages,
+ * so I rounded it up to a power of two.
+ */
+#ifndef VMS_ALLOCATION_SIZE
+#define VMS_ALLOCATION_SIZE (512*256)
+#endif
+
+/* Use VMS RTL definitions */
+#undef sbrk
+#undef brk
+#undef malloc
+int vms_out_initial = 0;
+char vms_initial_buffer[VMS_ALLOCATION_SIZE];
+static char *vms_current_brk = &vms_initial_buffer;
+static char *vms_end_brk = &vms_initial_buffer[VMS_ALLOCATION_SIZE-1];
+
+#include <stdio.h>
+
+char *
+sys_sbrk (incr)
+ int incr;
+{
+ char *sbrk(), *temp, *ptr;
+
+ if (vms_out_initial)
+ {
+ /* out of initial allocation... */
+ if (!(temp = malloc (incr)))
+ temp = (char *) -1;
+ }
+ else
+ {
+ /* otherwise, go out of our area */
+ ptr = vms_current_brk + incr; /* new current_brk */
+ if (ptr <= vms_end_brk)
+ {
+ temp = vms_current_brk;
+ vms_current_brk = ptr;
+ }
+ else
+ {
+ vms_out_initial = 1; /* mark as out of initial allocation */
+ if (!(temp = malloc (incr)))
+ temp = (char *) -1;
+ }
+ }
+ return temp;
+}
+#endif /* VMS */
diff --git a/gnu/lib/libregex/test/fileregex.c b/gnu/lib/libregex/test/fileregex.c
new file mode 100644
index 0000000..2c27a0f
--- /dev/null
+++ b/gnu/lib/libregex/test/fileregex.c
@@ -0,0 +1,77 @@
+#include <sys/types.h>
+#include <stdio.h>
+#include "regex.h"
+
+#define BYTEWIDTH 8
+
+/* Sorry, but this is just a test program. */
+#define LINE_MAX 500
+
+int
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ FILE *f;
+ char *filename;
+ char pat[500]; /* Sorry for that maximum size, too. */
+ char line[LINE_MAX];
+ struct re_pattern_buffer buf;
+ char fastmap[(1 << BYTEWIDTH)];
+ const char *compile_ret;
+ unsigned lineno = 1;
+ unsigned nfound = 0;
+
+ /* Actually, it might be useful to allow the data file to be standard
+ input, and to specify the pattern on the command line. */
+ if (argc != 2)
+ {
+ fprintf (stderr, "Usage: %s <filename>.\n", argv[0]);
+ exit (1);
+ }
+
+ filename = argv[1];
+ f = fopen (filename, "r");
+ if (f == NULL)
+ perror (filename);
+
+ buf.allocated = 0;
+ buf.buffer = NULL;
+ buf.fastmap = fastmap;
+
+ printf ("Pattern = ", pat);
+ gets (pat);
+
+ if (feof (stdin))
+ {
+ putchar ('\n');
+ exit (0);
+ }
+
+ compile_ret = re_compile_pattern (pat, strlen (pat), &buf);
+ if (compile_ret != NULL)
+ {
+ fprintf (stderr, "%s: %s\n", pat, compile_ret);
+ exit (1);
+ }
+
+ while (fgets (line, LINE_MAX, f) != NULL)
+ {
+ size_t len = strlen (line);
+ struct re_registers regs;
+ int search_ret
+ = re_search_2 (&buf, NULL, 0, line, len, 0, len, &regs, len);
+
+ if (search_ret == -2)
+ {
+ fprintf (stderr, "%s:%d: re_search failed.\n", filename, lineno);
+ exit (1);
+ }
+
+ nfound += search_ret != -1;
+ lineno++;
+ }
+
+ printf ("Matches found: %u (out of %u lines).\n", nfound, lineno - 1);
+ return 0;
+}
diff --git a/gnu/lib/libregex/test/g++malloc.c b/gnu/lib/libregex/test/g++malloc.c
new file mode 100644
index 0000000..d55ce45
--- /dev/null
+++ b/gnu/lib/libregex/test/g++malloc.c
@@ -0,0 +1,1288 @@
+#define inline
+
+/*
+Copyright (C) 1989 Free Software Foundation
+ written by Doug Lea (dl@oswego.edu)
+
+This file is part of GNU CC.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY. No author or distributor
+accepts responsibility to anyone for the consequences of using it
+or for whether it serves any particular purpose or works at all,
+unless he says so in writing. Refer to the GNU CC General Public
+License for full details.
+
+Everyone is granted permission to copy, modify and redistribute
+GNU CC, but only under the conditions described in the
+GNU CC General Public License. A copy of this license is
+supposed to have been given to you along with GNU CC so you
+can know your rights and responsibilities. It should be in a
+file named COPYING. Among other things, the copyright notice
+and this notice must be preserved on all copies.
+*/
+
+
+
+#ifndef NO_LIBGXX_MALLOC /* ignore whole file otherwise */
+
+/* compile with -DMALLOC_STATS to collect statistics */
+/* collecting statistics slows down malloc by at least 15% */
+
+#ifdef MALLOC_STATS
+#define UPDATE_STATS(ARGS) {ARGS;}
+#else
+#define UPDATE_STATS(ARGS)
+#endif
+
+/* History
+
+
+ Tue Jan 16 04:54:27 1990 Doug Lea (dl at g.oswego.edu)
+
+ version 1 released in libg++
+
+ Sun Jan 21 05:52:47 1990 Doug Lea (dl at g.oswego.edu)
+
+ bins are now own struct for, sanity.
+
+ new victim search strategy: scan up and consolidate.
+ Both faster and less fragmentation.
+
+ refined when to scan bins for consolidation, via consollink, etc.
+
+ realloc: always try to expand chunk, avoiding some fragmentation.
+
+ changed a few inlines into macros
+
+ hardwired SBRK_UNIT to 4096 for uniformity across systems
+
+ Tue Mar 20 14:18:23 1990 Doug Lea (dl at g.oswego.edu)
+
+ calloc and cfree now correctly parameterized.
+
+ Sun Apr 1 10:00:48 1990 Doug Lea (dl at g.oswego.edu)
+
+ added memalign and valloc.
+
+ Sun Jun 24 05:46:48 1990 Doug Lea (dl at g.oswego.edu)
+
+ #include gepagesize.h only ifndef sun
+ cache pagesize after first call
+
+ Wed Jul 25 08:35:19 1990 Doug Lea (dl at g.oswego.edu)
+
+ No longer rely on a `designated victim':
+
+ 1. It sometimes caused splits of large chunks
+ when smaller ones would do, leading to
+ bad worst-case fragmentation.
+
+ 2. Scanning through the av array fast anyway,
+ so the overhead isn't worth it.
+
+ To compensate, several other minor changes:
+
+ 1. Unusable chunks are checked for consolidation during
+ searches inside bins, better distributing chunks
+ across bins.
+
+ 2. Chunks are returned when found in malloc_find_space,
+ rather than finishing cleaning everything up, to
+ avoid wasted iterations due to (1).
+*/
+
+/*
+ A version of malloc/free/realloc tuned for C++ applications.
+
+ Here's what you probably want to know first:
+
+ In various tests, this appears to be about as fast as,
+ and usually substantially less memory-wasteful than BSD/GNUemacs malloc.
+
+ Generally, it is slower (by perhaps 20%) than bsd-style malloc
+ only when bsd malloc would waste a great deal of space in
+ fragmented blocks, which this malloc recovers; or when, by
+ chance or design, nearly all requests are near the bsd malloc
+ power-of-2 allocation bin boundaries, and as many chunks are
+ used as are allocated.
+
+ It uses more space than bsd malloc only when, again by chance
+ or design, only bsdmalloc bin-sized requests are malloced, or when
+ little dynamic space is malloced, since this malloc may grab larger
+ chunks from the system at a time than bsd.
+
+ In other words, this malloc seems generally superior to bsd
+ except perhaps for programs that are specially tuned to
+ deal with bsdmalloc's characteristics. But even here, the
+ performance differences are slight.
+
+
+ This malloc, like any other, is a compromised design.
+
+
+ Chunks of memory are maintained using a `boundary tag' method as
+ described in e.g., Knuth or Standish. This means that the size of
+ the chunk is stored both in the front of the chunk and at the end.
+ This makes consolidating fragmented chunks into bigger chunks very fast.
+ The size field is also used to hold bits representing whether a
+ chunk is free or in use.
+
+ Malloced chunks have space overhead of 8 bytes: The preceding
+ and trailing size fields. When they are freed, the list pointer
+ fields are also needed.
+
+ Available chunks are kept in doubly linked lists. The lists are
+ maintained in an array of bins using a power-of-two method, except
+ that instead of 32 bins (one for each 1 << i), there are 128: each
+ power of two is split in quarters. The use of very fine bin sizes
+ closely approximates the use of one bin per actually used size,
+ without necessitating the overhead of locating such bins. It is
+ especially desirable in common C++ applications where large numbers
+ of identically-sized blocks are malloced/freed in some dynamic
+ manner, and then later are all freed. The finer bin sizes make
+ finding blocks fast, with little wasted overallocation. The
+ consolidation methods ensure that once the collection of blocks is
+ no longer useful, fragments are gathered into bigger chunks awaiting new
+ roles.
+
+ The bins av[i] serve as heads of the lists. Bins contain a dummy
+ header for the chunk lists, and a `dirty' field used to indicate
+ whether the list may need to be scanned for consolidation.
+
+ On allocation, the bin corresponding to the request size is
+ scanned, and if there is a chunk with size >= requested, it
+ is split, if too big, and used. Chunks on the list which are
+ too small are examined for consolidation during this traversal.
+
+ If no chunk exists in the list bigger bins are scanned in search of
+ a victim.
+
+ If no victim can be found, then smaller bins are examined for
+ consolidation in order to construct a victim.
+
+ Finally, if consolidation fails to come up with a usable chunk,
+ more space is obtained from the system.
+
+ After a split, the remainder is placed on
+ the back of the appropriate bin list. (All freed chunks are placed
+ on fronts of lists. All remaindered or consolidated chunks are
+ placed on the rear. Correspondingly, searching within a bin
+ starts at the front, but finding victims is from the back. All
+ of this approximates the effect of having 2 kinds of lists per
+ bin: returned chunks vs unallocated chunks, but without the overhead
+ of maintaining 2 lists.)
+
+ Deallocation (free) consists only of placing the chunk on
+ a list.
+
+ Reallocation proceeds in the usual way. If a chunk can be extended,
+ it is, else a malloc-copy-free sequence is taken.
+
+ memalign requests more than enough space from malloc, finds a
+ spot within that chunk that meets the alignment request, and
+ then possibly frees the leading and trailing space. Overreliance
+ on memalign is a sure way to fragment space.
+
+
+ Some other implementation matters:
+
+ 8 byte alignment is currently hardwired into the design. Calling
+ memalign will return a chunk that is both 8-byte aligned, and
+ meets the requested alignment.
+
+ The basic overhead of a used chunk is 8 bytes: 4 at the front and
+ 4 at the end.
+
+ When a chunk is free, 8 additional bytes are needed for free list
+ pointers. Thus, the minimum allocatable size is 16 bytes.
+
+ The existence of front and back overhead permits some reasonably
+ effective fence-bashing checks: The front and back fields must
+ be identical. This is checked only within free() and realloc().
+ The checks are fast enough to be made non-optional.
+
+ The overwriting of parts of freed memory with the freelist pointers
+ can also be very effective (albeit in an annoying way) in helping
+ users track down dangling pointers.
+
+ User overwriting of freed space will often result in crashes
+ within malloc or free.
+
+ These routines are also tuned to C++ in that free(0) is a noop and
+ a failed malloc automatically calls (*new_handler)().
+
+ malloc(0) returns a pointer to something of the minimum allocatable size.
+
+ Additional memory is gathered from the system (via sbrk) in a
+ way that allows chunks obtained across different sbrk calls to
+ be consolidated, but does not require contiguous memory: Thus,
+ it should be safe to intersperse mallocs with other sbrk calls.
+
+ This malloc is NOT designed to work in multiprocessing applications.
+ No semaphores or other concurrency control are provided to ensure
+ that multiple malloc or free calls don't run at the same time,
+ which could be disasterous.
+
+ VERY heavy use of inlines is made, for clarity. If this malloc
+ is ported via a compiler without inlining capabilities, all
+ inlines should be transformed into macros -- making them non-inline
+ makes malloc at least twice as slow.
+
+
+*/
+
+
+/* preliminaries */
+
+#ifdef __cplusplus
+#include <stdio.h>
+#else
+#include "//usr/include/stdio.h" /* needed for error reporting */
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef USG
+extern void* memset(void*, int, int);
+extern void* memcpy(void*, const void*, int);
+/*inline void bzero(void* s, int l) { memset(s, 0, l); }*/
+#else
+/*extern void bzero(void*, unsigned int);*/
+#endif
+
+/*extern void bcopy(void*, void*, unsigned int);*/
+
+extern void* sbrk(unsigned int);
+
+/* Put this in instead of commmented out stuff above. */
+#define bcopy(s,d,n) memcpy((d),(s),(n))
+#define bcmp(s1,s2,n) memcmp((s1),(s2),(n))
+#define bzero(s,n) memset((s),0,(n))
+
+
+#ifdef __GNUC__
+extern volatile void abort();
+#else
+extern void abort();
+#endif
+
+#ifdef __cplusplus
+}; /* end of extern "C" */
+#endif
+
+
+/* A good multiple to call sbrk with */
+
+#define SBRK_UNIT 4096
+
+
+
+/* how to die on detected error */
+
+#ifdef __GNUC__
+static volatile void malloc_user_error()
+#else
+static void malloc_user_error()
+#endif
+{
+ fputs("malloc/free/realloc: clobbered space detected\n", stderr); abort();
+}
+
+
+
+/* Basic overhead for each malloc'ed chunk */
+
+
+struct malloc_chunk
+{
+ unsigned int size; /* Size in bytes, including overhead. */
+ /* Or'ed with INUSE if in use. */
+
+ struct malloc_chunk* fd; /* double links -- used only if free. */
+ struct malloc_chunk* bk;
+
+};
+
+typedef struct malloc_chunk* mchunkptr;
+
+struct malloc_bin
+{
+ struct malloc_chunk hd; /* dummy list header */
+ unsigned int dirty; /* True if maybe consolidatable */
+ /* Wasting a word here makes */
+ /* sizeof(bin) a power of 2, */
+ /* which makes size2bin() faster */
+};
+
+typedef struct malloc_bin* mbinptr;
+
+
+/* sizes, alignments */
+
+
+#define SIZE_SZ (sizeof(unsigned int))
+#define MALLOC_MIN_OVERHEAD (SIZE_SZ + SIZE_SZ)
+#define MALLOC_ALIGN_MASK (MALLOC_MIN_OVERHEAD - 1)
+
+#define MINSIZE (sizeof(struct malloc_chunk) + SIZE_SZ) /* MUST == 16! */
+
+
+/* pad request bytes into a usable size */
+
+static inline unsigned int request2size(unsigned int request)
+{
+ return (request == 0) ? MINSIZE :
+ ((request + MALLOC_MIN_OVERHEAD + MALLOC_ALIGN_MASK)
+ & ~(MALLOC_ALIGN_MASK));
+}
+
+
+static inline int aligned_OK(void* m)
+{
+ return ((unsigned int)(m) & (MALLOC_ALIGN_MASK)) == 0;
+}
+
+
+/* size field or'd with INUSE when in use */
+#define INUSE 0x1
+
+
+
+/* the bins, initialized to have null double linked lists */
+
+#define MAXBIN 120 /* 1 more than needed for 32 bit addresses */
+
+#define FIRSTBIN (&(av[0]))
+
+static struct malloc_bin av[MAXBIN] =
+{
+ { { 0, &(av[0].hd), &(av[0].hd) }, 0 },
+ { { 0, &(av[1].hd), &(av[1].hd) }, 0 },
+ { { 0, &(av[2].hd), &(av[2].hd) }, 0 },
+ { { 0, &(av[3].hd), &(av[3].hd) }, 0 },
+ { { 0, &(av[4].hd), &(av[4].hd) }, 0 },
+ { { 0, &(av[5].hd), &(av[5].hd) }, 0 },
+ { { 0, &(av[6].hd), &(av[6].hd) }, 0 },
+ { { 0, &(av[7].hd), &(av[7].hd) }, 0 },
+ { { 0, &(av[8].hd), &(av[8].hd) }, 0 },
+ { { 0, &(av[9].hd), &(av[9].hd) }, 0 },
+
+ { { 0, &(av[10].hd), &(av[10].hd) }, 0 },
+ { { 0, &(av[11].hd), &(av[11].hd) }, 0 },
+ { { 0, &(av[12].hd), &(av[12].hd) }, 0 },
+ { { 0, &(av[13].hd), &(av[13].hd) }, 0 },
+ { { 0, &(av[14].hd), &(av[14].hd) }, 0 },
+ { { 0, &(av[15].hd), &(av[15].hd) }, 0 },
+ { { 0, &(av[16].hd), &(av[16].hd) }, 0 },
+ { { 0, &(av[17].hd), &(av[17].hd) }, 0 },
+ { { 0, &(av[18].hd), &(av[18].hd) }, 0 },
+ { { 0, &(av[19].hd), &(av[19].hd) }, 0 },
+
+ { { 0, &(av[20].hd), &(av[20].hd) }, 0 },
+ { { 0, &(av[21].hd), &(av[21].hd) }, 0 },
+ { { 0, &(av[22].hd), &(av[22].hd) }, 0 },
+ { { 0, &(av[23].hd), &(av[23].hd) }, 0 },
+ { { 0, &(av[24].hd), &(av[24].hd) }, 0 },
+ { { 0, &(av[25].hd), &(av[25].hd) }, 0 },
+ { { 0, &(av[26].hd), &(av[26].hd) }, 0 },
+ { { 0, &(av[27].hd), &(av[27].hd) }, 0 },
+ { { 0, &(av[28].hd), &(av[28].hd) }, 0 },
+ { { 0, &(av[29].hd), &(av[29].hd) }, 0 },
+
+ { { 0, &(av[30].hd), &(av[30].hd) }, 0 },
+ { { 0, &(av[31].hd), &(av[31].hd) }, 0 },
+ { { 0, &(av[32].hd), &(av[32].hd) }, 0 },
+ { { 0, &(av[33].hd), &(av[33].hd) }, 0 },
+ { { 0, &(av[34].hd), &(av[34].hd) }, 0 },
+ { { 0, &(av[35].hd), &(av[35].hd) }, 0 },
+ { { 0, &(av[36].hd), &(av[36].hd) }, 0 },
+ { { 0, &(av[37].hd), &(av[37].hd) }, 0 },
+ { { 0, &(av[38].hd), &(av[38].hd) }, 0 },
+ { { 0, &(av[39].hd), &(av[39].hd) }, 0 },
+
+ { { 0, &(av[40].hd), &(av[40].hd) }, 0 },
+ { { 0, &(av[41].hd), &(av[41].hd) }, 0 },
+ { { 0, &(av[42].hd), &(av[42].hd) }, 0 },
+ { { 0, &(av[43].hd), &(av[43].hd) }, 0 },
+ { { 0, &(av[44].hd), &(av[44].hd) }, 0 },
+ { { 0, &(av[45].hd), &(av[45].hd) }, 0 },
+ { { 0, &(av[46].hd), &(av[46].hd) }, 0 },
+ { { 0, &(av[47].hd), &(av[47].hd) }, 0 },
+ { { 0, &(av[48].hd), &(av[48].hd) }, 0 },
+ { { 0, &(av[49].hd), &(av[49].hd) }, 0 },
+
+ { { 0, &(av[50].hd), &(av[50].hd) }, 0 },
+ { { 0, &(av[51].hd), &(av[51].hd) }, 0 },
+ { { 0, &(av[52].hd), &(av[52].hd) }, 0 },
+ { { 0, &(av[53].hd), &(av[53].hd) }, 0 },
+ { { 0, &(av[54].hd), &(av[54].hd) }, 0 },
+ { { 0, &(av[55].hd), &(av[55].hd) }, 0 },
+ { { 0, &(av[56].hd), &(av[56].hd) }, 0 },
+ { { 0, &(av[57].hd), &(av[57].hd) }, 0 },
+ { { 0, &(av[58].hd), &(av[58].hd) }, 0 },
+ { { 0, &(av[59].hd), &(av[59].hd) }, 0 },
+
+ { { 0, &(av[60].hd), &(av[60].hd) }, 0 },
+ { { 0, &(av[61].hd), &(av[61].hd) }, 0 },
+ { { 0, &(av[62].hd), &(av[62].hd) }, 0 },
+ { { 0, &(av[63].hd), &(av[63].hd) }, 0 },
+ { { 0, &(av[64].hd), &(av[64].hd) }, 0 },
+ { { 0, &(av[65].hd), &(av[65].hd) }, 0 },
+ { { 0, &(av[66].hd), &(av[66].hd) }, 0 },
+ { { 0, &(av[67].hd), &(av[67].hd) }, 0 },
+ { { 0, &(av[68].hd), &(av[68].hd) }, 0 },
+ { { 0, &(av[69].hd), &(av[69].hd) }, 0 },
+
+ { { 0, &(av[70].hd), &(av[70].hd) }, 0 },
+ { { 0, &(av[71].hd), &(av[71].hd) }, 0 },
+ { { 0, &(av[72].hd), &(av[72].hd) }, 0 },
+ { { 0, &(av[73].hd), &(av[73].hd) }, 0 },
+ { { 0, &(av[74].hd), &(av[74].hd) }, 0 },
+ { { 0, &(av[75].hd), &(av[75].hd) }, 0 },
+ { { 0, &(av[76].hd), &(av[76].hd) }, 0 },
+ { { 0, &(av[77].hd), &(av[77].hd) }, 0 },
+ { { 0, &(av[78].hd), &(av[78].hd) }, 0 },
+ { { 0, &(av[79].hd), &(av[79].hd) }, 0 },
+
+ { { 0, &(av[80].hd), &(av[80].hd) }, 0 },
+ { { 0, &(av[81].hd), &(av[81].hd) }, 0 },
+ { { 0, &(av[82].hd), &(av[82].hd) }, 0 },
+ { { 0, &(av[83].hd), &(av[83].hd) }, 0 },
+ { { 0, &(av[84].hd), &(av[84].hd) }, 0 },
+ { { 0, &(av[85].hd), &(av[85].hd) }, 0 },
+ { { 0, &(av[86].hd), &(av[86].hd) }, 0 },
+ { { 0, &(av[87].hd), &(av[87].hd) }, 0 },
+ { { 0, &(av[88].hd), &(av[88].hd) }, 0 },
+ { { 0, &(av[89].hd), &(av[89].hd) }, 0 },
+
+ { { 0, &(av[90].hd), &(av[90].hd) }, 0 },
+ { { 0, &(av[91].hd), &(av[91].hd) }, 0 },
+ { { 0, &(av[92].hd), &(av[92].hd) }, 0 },
+ { { 0, &(av[93].hd), &(av[93].hd) }, 0 },
+ { { 0, &(av[94].hd), &(av[94].hd) }, 0 },
+ { { 0, &(av[95].hd), &(av[95].hd) }, 0 },
+ { { 0, &(av[96].hd), &(av[96].hd) }, 0 },
+ { { 0, &(av[97].hd), &(av[97].hd) }, 0 },
+ { { 0, &(av[98].hd), &(av[98].hd) }, 0 },
+ { { 0, &(av[99].hd), &(av[99].hd) }, 0 },
+
+ { { 0, &(av[100].hd), &(av[100].hd) }, 0 },
+ { { 0, &(av[101].hd), &(av[101].hd) }, 0 },
+ { { 0, &(av[102].hd), &(av[102].hd) }, 0 },
+ { { 0, &(av[103].hd), &(av[103].hd) }, 0 },
+ { { 0, &(av[104].hd), &(av[104].hd) }, 0 },
+ { { 0, &(av[105].hd), &(av[105].hd) }, 0 },
+ { { 0, &(av[106].hd), &(av[106].hd) }, 0 },
+ { { 0, &(av[107].hd), &(av[107].hd) }, 0 },
+ { { 0, &(av[108].hd), &(av[108].hd) }, 0 },
+ { { 0, &(av[109].hd), &(av[109].hd) }, 0 },
+
+ { { 0, &(av[110].hd), &(av[110].hd) }, 0 },
+ { { 0, &(av[111].hd), &(av[111].hd) }, 0 },
+ { { 0, &(av[112].hd), &(av[112].hd) }, 0 },
+ { { 0, &(av[113].hd), &(av[113].hd) }, 0 },
+ { { 0, &(av[114].hd), &(av[114].hd) }, 0 },
+ { { 0, &(av[115].hd), &(av[115].hd) }, 0 },
+ { { 0, &(av[116].hd), &(av[116].hd) }, 0 },
+ { { 0, &(av[117].hd), &(av[117].hd) }, 0 },
+ { { 0, &(av[118].hd), &(av[118].hd) }, 0 },
+ { { 0, &(av[119].hd), &(av[119].hd) }, 0 }
+};
+
+/*
+ indexing into bins
+*/
+
+static inline mbinptr size2bin(unsigned int sz)
+{
+ mbinptr b = av;
+ while (sz >= (MINSIZE * 2)) { b += 4; sz >>= 1; } /* find power of 2 */
+ b += (sz - MINSIZE) >> 2; /* find quadrant */
+ return b;
+}
+
+
+
+/* counts maintained if MALLOC_STATS defined */
+
+#ifdef MALLOC_STATS
+
+static unsigned int sbrked_mem;
+static unsigned int requested_mem;
+static unsigned int malloced_mem;
+static unsigned int freed_mem;
+static unsigned int max_used_mem;
+
+static unsigned int n_sbrks;
+static unsigned int n_mallocs;
+static unsigned int n_frees;
+static unsigned int n_reallocs;
+static unsigned int n_reallocs_with_copy;
+static unsigned int n_avail;
+static unsigned int max_inuse;
+
+static unsigned int n_malloc_chunks;
+static unsigned int n_malloc_bins;
+
+static unsigned int n_split;
+static unsigned int n_consol;
+
+
+static void do_malloc_stats(const mchunkptr p)
+{
+ ++n_mallocs;
+ if ((n_mallocs-n_frees) > max_inuse)
+ max_inuse = n_mallocs - n_frees;
+ malloced_mem += (p->size & ~(INUSE));
+ if (malloced_mem - freed_mem > max_used_mem)
+ max_used_mem = malloced_mem - freed_mem;
+}
+
+static void do_free_stats(const mchunkptr p)
+{
+ ++n_frees;
+ freed_mem += (p->size & ~(INUSE));
+}
+
+#endif
+
+
+
+/* Utilities needed below for memalign */
+/* This is redundant with libg++ support, but not if used stand-alone */
+
+static unsigned int gcd(unsigned int a, unsigned int b)
+{
+ unsigned int tmp;
+
+ if (b > a)
+ {
+ tmp = a; a = b; b = tmp;
+ }
+ for(;;)
+ {
+ if (b == 0)
+ return a;
+ else if (b == 1)
+ return b;
+ else
+ {
+ tmp = b;
+ b = a % b;
+ a = tmp;
+ }
+ }
+}
+
+static inline unsigned int lcm(unsigned int x, unsigned int y)
+{
+ return x / gcd(x, y) * y;
+}
+
+
+
+/* maintaining INUSE via size field */
+
+
+#define inuse(p) ((p)->size & INUSE)
+#define set_inuse(p) ((p)->size |= INUSE)
+#define clear_inuse(b) ((p)->size &= ~INUSE)
+
+
+/* operations on malloc_chunk addresses */
+
+
+/* return ptr to next physical malloc_chunk */
+
+#define next_chunk(p) ((mchunkptr)((char*)(p) + (p)->size))
+
+/* return ptr to previous physical malloc_chunk */
+
+#define prev_chunk(p) ((mchunkptr)((char*)(p)-((((int*)(p))[-1]) & ~(INUSE))))
+
+/* place size at front and back of chunk */
+
+
+static inline void set_size(mchunkptr p, unsigned int sz)
+{
+ p->size = *((int*)((char*)(p) + sz - SIZE_SZ)) = sz;
+}
+
+
+
+
+/* conversion from malloc headers to user pointers, and back */
+
+static inline void* chunk2mem(mchunkptr p)
+{
+ void *mem;
+ set_inuse(p);
+mem = (void*)((char*)(p) + SIZE_SZ);
+ return mem;
+}
+
+/* xxxx my own */
+mchunkptr sanity_check(void* mem)
+{
+ mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ);
+
+ /* a quick sanity check */
+ unsigned int sz = p->size & ~(INUSE);
+ if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ)))
+ malloc_user_error();
+
+ return p;
+}
+
+
+
+
+static inline mchunkptr mem2chunk(void* mem)
+{
+ mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ);
+
+ /* a quick sanity check */
+ unsigned int sz = p->size & ~(INUSE);
+ if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ)))
+ malloc_user_error();
+
+ p->size = sz; /* clears INUSE */
+ return p;
+}
+
+
+
+/* maintaining bins & pointers */
+
+
+/* maximum bin actually used */
+
+static mbinptr malloc_maxbin = FIRSTBIN;
+
+
+/* operations on lists inside bins */
+
+
+/* take a chunk off a list */
+
+static inline void unlink(mchunkptr p)
+{
+ mchunkptr b = p->bk;
+ mchunkptr f = p->fd;
+
+ f->bk = b; b->fd = f;
+
+ UPDATE_STATS (--n_avail);
+}
+
+
+
+/* split a chunk and place on the back of a list */
+
+static inline void split(mchunkptr p, unsigned int offset)
+{
+ unsigned int room = p->size - offset;
+ if (room >= MINSIZE)
+ {
+ mbinptr bn = size2bin(room); /* new bin */
+ mchunkptr h = &(bn->hd); /* its head */
+ mchunkptr b = h->bk; /* old back element */
+ mchunkptr t = (mchunkptr)((char*)(p) + offset); /* remaindered chunk */
+
+ /* set size */
+ t->size = *((int*)((char*)(t) + room - SIZE_SZ)) = room;
+
+ /* link up */
+ t->bk = b; t->fd = h; h->bk = b->fd = t;
+
+ /* adjust maxbin (h == b means was empty) */
+ if (h == b && bn > malloc_maxbin) malloc_maxbin = bn;
+
+ /* adjust size of chunk to be returned */
+ p->size = *((int*)((char*)(p) + offset - SIZE_SZ)) = offset;
+
+ UPDATE_STATS ((++n_split, ++n_avail));
+ }
+}
+
+
+
+/* place a consolidated chunk on the back of a list */
+/* like above, except no split */
+
+static inline void consollink(mchunkptr p)
+{
+ mbinptr bn = size2bin(p->size);
+ mchunkptr h = &(bn->hd);
+ mchunkptr b = h->bk;
+
+ p->bk = b; p->fd = h; h->bk = b->fd = p;
+
+ if (h == b && bn > malloc_maxbin) malloc_maxbin = bn;
+
+ UPDATE_STATS(++n_avail);
+}
+
+
+/* place a freed chunk on the front of a list */
+
+static inline void frontlink(mchunkptr p)
+{
+ mbinptr bn = size2bin(p->size);
+ mchunkptr h = &(bn->hd);
+ mchunkptr f = h->fd;
+
+ p->bk = h; p->fd = f; f->bk = h->fd = p;
+
+ if (h == f && bn > malloc_maxbin) malloc_maxbin = bn;
+
+ bn->dirty = 1;
+
+ UPDATE_STATS(++n_avail);
+}
+
+
+
+/* Dealing with sbrk */
+
+
+/* To link consecutive sbrk regions when possible */
+
+static int* last_sbrk_end;
+
+
+/* who to call when sbrk returns failure */
+
+#ifndef NO_NEW_HANDLER
+typedef volatile void (*vfp)();
+#ifdef __cplusplus
+extern "C" vfp __new_handler;
+#else
+extern vfp __new_handler;
+#endif
+#endif
+
+static mchunkptr malloc_from_sys(unsigned nb)
+{
+ mchunkptr p;
+ unsigned int sbrk_size;
+ int* ip;
+
+ /* Minimally, we need to pad with enough space */
+ /* to place dummy size/use fields to ends if needed */
+
+ sbrk_size = ((nb + SBRK_UNIT - 1 + SIZE_SZ + SIZE_SZ)
+ / SBRK_UNIT) * SBRK_UNIT;
+
+ ip = (int*)(sbrk(sbrk_size));
+ if ((char*)ip == (char*)(-1)) /* sbrk returns -1 on failure */
+ {
+#ifndef NO_NEW_HANDLER
+ (*__new_handler) ();
+#endif
+ return 0;
+ }
+
+ UPDATE_STATS ((++n_sbrks, sbrked_mem += sbrk_size));
+
+
+ if (last_sbrk_end != &ip[-1])
+ {
+ /* It's either first time through or someone else called sbrk. */
+ /* Arrange end-markers at front & back */
+
+ /* Shouldn't be necessary, but better to be safe */
+ while (!aligned_OK(ip)) { ++ip; sbrk_size -= SIZE_SZ; }
+
+
+ /* Mark the front as in use to prevent merging. */
+ /* Note we can get away with only 1 word, not MINSIZE overhead here */
+
+ *ip++ = SIZE_SZ | INUSE;
+
+ p = (mchunkptr)ip;
+ set_size(p,sbrk_size - (SIZE_SZ + SIZE_SZ));
+
+ }
+ else
+ {
+ mchunkptr l;
+
+ /* We can safely make the header start at end of prev sbrked chunk. */
+ /* We will still have space left at the end from a previous call */
+ /* to place the end marker, below */
+
+ p = (mchunkptr)(last_sbrk_end);
+ set_size(p, sbrk_size);
+
+
+ /* Even better, maybe we can merge with last fragment: */
+
+ l = prev_chunk(p);
+ if (!inuse(l))
+ {
+ unlink(l);
+ set_size(l, p->size + l->size);
+ p = l;
+ }
+
+ }
+
+ /* mark the end of sbrked space as in use to prevent merging */
+
+ last_sbrk_end = (int*)((char*)p + p->size);
+ *last_sbrk_end = SIZE_SZ | INUSE;
+
+ UPDATE_STATS((++n_avail, ++n_malloc_chunks));
+
+ /* make it safe to unlink in malloc */
+ UPDATE_STATS(++n_avail);
+ p->fd = p->bk = p;
+
+ return p;
+}
+
+
+
+/* Consolidate dirty bins. */
+/* Stop if found a chunk big enough to satisfy current malloc request */
+
+/* (It requires much less bookkeeping to consolidate entire bins */
+/* at once than to keep records of which chunks might be */
+/* consolidatable. So long as the lists are short, which we */
+/* try to ensure via small bin ranges, there is little wasted effort.) */
+
+static mchunkptr malloc_find_space(unsigned int nb)
+{
+ mbinptr b;
+
+ /* first, re-adjust max used bin */
+
+ while (malloc_maxbin >= FIRSTBIN &&
+ malloc_maxbin->hd.bk == &(malloc_maxbin->hd))
+ {
+ malloc_maxbin->dirty = 0;
+ --malloc_maxbin;
+ }
+
+ for (b = malloc_maxbin; b >= FIRSTBIN; --b)
+ {
+ UPDATE_STATS(++n_malloc_bins);
+
+ if (b->dirty)
+ {
+ mchunkptr h = &(b->hd); /* head of list */
+ mchunkptr p = h->fd; /* chunk traverser */
+
+ while (p != h)
+ {
+ mchunkptr nextp = p->fd; /* save, in case of relinks */
+ int consolidated = 0; /* only unlink/relink if consolidated */
+
+ mchunkptr t;
+
+ UPDATE_STATS(++n_malloc_chunks);
+
+ while (!inuse(t = prev_chunk(p))) /* consolidate backward */
+ {
+ if (!consolidated) { consolidated = 1; unlink(p); }
+ if (t == nextp) nextp = t->fd;
+ unlink(t);
+ set_size(t, t->size + p->size);
+ p = t;
+ UPDATE_STATS (++n_consol);
+ }
+
+ while (!inuse(t = next_chunk(p))) /* consolidate forward */
+ {
+ if (!consolidated) { consolidated = 1; unlink(p); }
+ if (t == nextp) nextp = t->fd;
+ unlink(t);
+ set_size(p, p->size + t->size);
+ UPDATE_STATS (++n_consol);
+ }
+
+ if (consolidated)
+ {
+ if (p->size >= nb)
+ {
+ /* make it safe to unlink in malloc */
+ UPDATE_STATS(++n_avail);
+ p->fd = p->bk = p;
+ return p;
+ }
+ else
+ consollink(p);
+ }
+
+ p = nextp;
+
+ }
+
+ b->dirty = 0;
+
+ }
+ }
+
+ /* nothing available - sbrk some more */
+
+ return malloc_from_sys(nb);
+}
+
+
+
+/* Finally, the user-level functions */
+
+void* malloc(unsigned int bytes)
+{
+ unsigned int nb = request2size(bytes); /* padded request size */
+ mbinptr b = size2bin(nb); /* corresponding bin */
+ mchunkptr hd = &(b->hd); /* head of its list */
+ mchunkptr p = hd->fd; /* chunk traverser */
+
+ UPDATE_STATS((requested_mem+=bytes, ++n_malloc_bins));
+
+ /* Try a (near) exact match in own bin */
+ /* clean out unusable but consolidatable chunks in bin while traversing */
+
+ while (p != hd)
+ {
+ UPDATE_STATS(++n_malloc_chunks);
+ if (p->size >= nb)
+ goto found;
+ else /* try to consolidate; same code as malloc_find_space */
+ {
+ mchunkptr nextp = p->fd; /* save, in case of relinks */
+ int consolidated = 0; /* only unlink/relink if consolidated */
+
+ mchunkptr t;
+
+ while (!inuse(t = prev_chunk(p))) /* consolidate backward */
+ {
+ if (!consolidated) { consolidated = 1; unlink(p); }
+ if (t == nextp) nextp = t->fd;
+ unlink(t);
+ set_size(t, t->size + p->size);
+ p = t;
+ UPDATE_STATS (++n_consol);
+ }
+
+ while (!inuse(t = next_chunk(p))) /* consolidate forward */
+ {
+ if (!consolidated) { consolidated = 1; unlink(p); }
+ if (t == nextp) nextp = t->fd;
+ unlink(t);
+ set_size(p, p->size + t->size);
+ UPDATE_STATS (++n_consol);
+ }
+
+ if (consolidated)
+ {
+ if (p->size >= nb)
+ {
+ /* make it safe to unlink again below */
+ UPDATE_STATS(++n_avail);
+ p->fd = p->bk = p;
+ goto found;
+ }
+ else
+ consollink(p);
+ }
+
+ p = nextp;
+
+ }
+ }
+
+ b->dirty = 0; /* true if got here */
+
+ /* Scan bigger bins for a victim */
+
+ while (++b <= malloc_maxbin)
+ {
+ UPDATE_STATS(++n_malloc_bins);
+ if ((p = b->hd.bk) != &(b->hd)) /* no need to check size */
+ goto found;
+ }
+
+ /* Consolidate or sbrk */
+
+ p = malloc_find_space(nb);
+
+ if (p == 0) return 0; /* allocation failure */
+
+ found: /* Use what we found */
+
+ unlink(p);
+ split(p, nb);
+ UPDATE_STATS(do_malloc_stats(p));
+ return chunk2mem(p);
+}
+
+
+
+
+void free(void* mem)
+{
+ if (mem != 0)
+ {
+ mchunkptr p = mem2chunk(mem);
+ UPDATE_STATS(do_free_stats(p));
+ frontlink(p);
+ }
+}
+
+
+void* calloc(unsigned int n, unsigned int elem_size)
+{
+ unsigned int sz = n * elem_size;
+ void* p = malloc(sz);
+ bzero(p, sz);
+ return p;
+};
+
+/* This is here for compatibility with older systems */
+void cfree(void *mem)
+{
+ free(mem);
+}
+
+
+unsigned int malloc_usable_size(void* mem)
+{
+ if (mem == 0)
+ return 0;
+ else
+ {
+ mchunkptr p = (mchunkptr)((char*)(mem) - SIZE_SZ);
+ unsigned int sz = p->size & ~(INUSE);
+ if (p->size == sz || sz != *((int*)((char*)(p) + sz - SIZE_SZ)))
+ return 0;
+ else
+ return sz - MALLOC_MIN_OVERHEAD;
+ }
+}
+
+
+
+void* realloc(void* mem, unsigned int bytes)
+{
+ if (mem == 0)
+ return malloc(bytes);
+ else
+ {
+ unsigned int nb = request2size(bytes);
+ mchunkptr p = mem2chunk(mem);
+ unsigned int oldsize = p->size;
+ int room;
+ mchunkptr nxt;
+
+ UPDATE_STATS((++n_reallocs, requested_mem += bytes-oldsize));
+
+ /* try to expand (even if already big enough), to clean up chunk */
+
+ while (!inuse(nxt = next_chunk(p)))
+ {
+ UPDATE_STATS ((malloced_mem += nxt->size, ++n_consol));
+ unlink(nxt);
+ set_size(p, p->size + nxt->size);
+ }
+
+ room = p->size - nb;
+ if (room >= 0)
+ {
+ split(p, nb);
+ UPDATE_STATS(malloced_mem -= room);
+ return chunk2mem(p);
+ }
+ else /* do the obvious */
+ {
+ void* newmem;
+ set_inuse(p); /* don't let malloc consolidate us yet! */
+ newmem = malloc(nb);
+ bcopy(mem, newmem, oldsize - SIZE_SZ);
+ free(mem);
+ UPDATE_STATS(++n_reallocs_with_copy);
+ return newmem;
+ }
+ }
+}
+
+
+
+/* return a pointer to space with at least the alignment requested */
+
+void* memalign(unsigned int alignment, unsigned int bytes)
+{
+ mchunkptr p;
+ unsigned int nb = request2size(bytes);
+
+ /* find an alignment that both we and the user can live with: */
+ /* least common multiple guarantees mutual happiness */
+ unsigned int align = lcm(alignment, MALLOC_MIN_OVERHEAD);
+ unsigned int mask = align - 1;
+
+ /* call malloc with worst case padding to hit alignment; */
+ /* we will give back extra */
+
+ unsigned int req = nb + align + MINSIZE;
+ void* m = malloc(req);
+
+ if (m == 0) return m;
+
+ p = mem2chunk(m);
+
+ /* keep statistics on track */
+
+ UPDATE_STATS(--n_mallocs);
+ UPDATE_STATS(malloced_mem -= p->size);
+ UPDATE_STATS(requested_mem -= req);
+ UPDATE_STATS(requested_mem += bytes);
+
+ if (((int)(m) & (mask)) != 0) /* misaligned */
+ {
+
+ /* find an aligned spot inside chunk */
+
+ mchunkptr ap = (mchunkptr)(( ((int)(m) + mask) & -align) - SIZE_SZ);
+
+ unsigned int gap = (unsigned int)(ap) - (unsigned int)(p);
+ unsigned int room;
+
+ /* we need to give back leading space in a chunk of at least MINSIZE */
+
+ if (gap < MINSIZE)
+ {
+ /* This works since align >= MINSIZE */
+ /* and we've malloc'd enough total room */
+
+ ap = (mchunkptr)( (int)(ap) + align );
+ gap += align;
+ }
+
+ if (gap + nb > p->size) /* can't happen unless chunk sizes corrupted */
+ malloc_user_error();
+
+ room = p->size - gap;
+
+ /* give back leader */
+ set_size(p, gap);
+ consollink(p);
+
+ /* use the rest */
+ p = ap;
+ set_size(p, room);
+ }
+
+ /* also give back spare room at the end */
+
+ split(p, nb);
+ UPDATE_STATS(do_malloc_stats(p));
+ return chunk2mem(p);
+
+}
+
+#ifndef sun
+#include "getpagesize.h"
+#endif
+
+static unsigned int malloc_pagesize = 0;
+
+void* valloc(unsigned int bytes)
+{
+ if (malloc_pagesize == 0) malloc_pagesize = getpagesize();
+ return memalign (malloc_pagesize, bytes);
+}
+
+
+void malloc_stats()
+{
+#ifndef MALLOC_STATS
+}
+#else
+ int i;
+ mchunkptr p;
+ double nm = (double)(n_mallocs + n_reallocs);
+
+ fprintf(stderr, "\nmalloc statistics\n\n");
+
+ if (n_mallocs != 0)
+ fprintf(stderr, "requests = %10u total size = %10u\tave = %10u\n",
+ n_mallocs, requested_mem, requested_mem/n_mallocs);
+
+ if (n_mallocs != 0)
+ fprintf(stderr, "mallocs = %10u total size = %10u\tave = %10u\n",
+ n_mallocs, malloced_mem, malloced_mem/n_mallocs);
+
+ if (n_frees != 0)
+ fprintf(stderr, "frees = %10u total size = %10u\tave = %10u\n",
+ n_frees, freed_mem, freed_mem/n_frees);
+
+ if (n_mallocs-n_frees != 0)
+ fprintf(stderr, "in use = %10u total size = %10u\tave = %10u\n",
+ n_mallocs-n_frees, malloced_mem-freed_mem,
+ (malloced_mem-freed_mem) / (n_mallocs-n_frees));
+
+ if (max_inuse != 0)
+ fprintf(stderr, "max in use= %10u total size = %10u\tave = %10u\n",
+ max_inuse, max_used_mem, max_used_mem / max_inuse);
+
+ if (n_avail != 0)
+ fprintf(stderr, "available = %10u total size = %10u\tave = %10u\n",
+ n_avail, sbrked_mem - (malloced_mem-freed_mem),
+ (sbrked_mem - (malloced_mem-freed_mem)) / n_avail);
+
+ if (n_sbrks != 0)
+ fprintf(stderr, "sbrks = %10u total size = %10u\tave = %10u\n\n",
+ n_sbrks, sbrked_mem, sbrked_mem/ n_sbrks);
+
+ if (n_reallocs != 0)
+ fprintf(stderr, "reallocs = %10u with copy = %10u\n\n",
+ n_reallocs, n_reallocs_with_copy);
+
+
+ if (nm != 0)
+ {
+ fprintf(stderr, "chunks scanned per malloc = %6.3f\n",
+ n_malloc_chunks / nm);
+ fprintf(stderr, "bins scanned per malloc = %6.3f\n",
+ n_malloc_bins / nm);
+ fprintf(stderr, "splits per malloc = %6.3f\n",
+ n_split / nm);
+ fprintf(stderr, "consolidations per malloc = %6.3f\n",
+ n_consol / nm);
+ }
+
+ fprintf(stderr, "\nfree chunks:\n");
+ for (i = 0; i < MAXBIN; ++i)
+ {
+ p = av[i].hd.fd;
+ if (p != &(av[i].hd))
+ {
+ unsigned int count = 1;
+ unsigned int sz = p->size;
+ for (p = p->fd; p != &(av[i].hd); p = p->fd)
+ {
+ if (p->size == sz)
+ ++count;
+ else
+ {
+ fprintf(stderr, "\tsize = %10u count = %5u\n", sz, count);
+ count = 1;
+ sz = p->size;
+ }
+ }
+
+ fprintf(stderr, "\tsize = %10u count = %5u\n", sz, count);
+
+ }
+ }
+}
+#endif /* MALLOC_STATS */
+
+#endif /* NO_LIBGXX_MALLOC */
+
+
diff --git a/gnu/lib/libregex/test/getpagesize.h b/gnu/lib/libregex/test/getpagesize.h
new file mode 100644
index 0000000..32adae6
--- /dev/null
+++ b/gnu/lib/libregex/test/getpagesize.h
@@ -0,0 +1,25 @@
+#ifdef BSD
+#ifndef BSD4_1
+#define HAVE_GETPAGESIZE
+#endif
+#endif
+
+#ifndef HAVE_GETPAGESIZE
+
+#include <sys/param.h>
+
+#ifdef EXEC_PAGESIZE
+#define getpagesize() EXEC_PAGESIZE
+#else
+#ifdef NBPG
+#define getpagesize() NBPG * CLSIZE
+#ifndef CLSIZE
+#define CLSIZE 1
+#endif /* no CLSIZE */
+#else /* no NBPG */
+#define getpagesize() NBPC
+#endif /* no NBPG */
+#endif /* no EXEC_PAGESIZE */
+
+#endif /* not HAVE_GETPAGESIZE */
+
diff --git a/gnu/lib/libregex/test/iregex.c b/gnu/lib/libregex/test/iregex.c
new file mode 100644
index 0000000..2346d44
--- /dev/null
+++ b/gnu/lib/libregex/test/iregex.c
@@ -0,0 +1,164 @@
+/* Main program for interactive testing. For maximum output, compile
+ this and regex.c with -DDEBUG. */
+
+#include <stdio.h>
+#include <sys/types.h>
+#include "regex.h"
+
+/* Don't bother to guess about <string.h> vs <strings.h>, etc. */
+extern int strlen ();
+
+#define BYTEWIDTH 8
+
+extern void printchar ();
+extern char upcase[];
+
+static void scanstring ();
+static void print_regs ();
+
+int
+main (argc, argv)
+ int argc;
+ char **argv;
+{
+ int i;
+ struct re_pattern_buffer buf;
+ char fastmap[(1 << BYTEWIDTH)];
+
+ /* Allow a command argument to specify the style of syntax. You can
+ use the `syntax' program to decode integer syntax values. */
+ if (argc > 1)
+ re_set_syntax (atoi (argv[1]));
+
+ buf.allocated = 0;
+ buf.buffer = NULL;
+ buf.fastmap = fastmap;
+ buf.translate = upcase;
+
+ for (;;)
+ {
+ char pat[500], str[500];
+ struct re_registers regs;
+
+ /* Some C compilers don't like `char pat[500] = ""'. */
+ pat[0] = 0;
+
+ printf ("Pattern (%s) = ", pat);
+ gets (pat);
+ scanstring (pat);
+
+ if (feof (stdin))
+ {
+ putchar ('\n');
+ exit (0);
+ }
+
+ if (*pat)
+ {
+ re_compile_pattern (pat, strlen (pat), &buf);
+ re_compile_fastmap (&buf);
+#ifdef DEBUG
+ print_compiled_pattern (&buf);
+#endif
+ }
+
+ printf ("String = ");
+ gets (str); /* Now read the string to match against */
+ scanstring (str);
+
+ i = re_match (&buf, str, strlen (str), 0, &regs);
+ printf ("Match value %d.\t", i);
+ if (i >= 0)
+ print_regs (regs);
+ putchar ('\n');
+
+ i = re_search (&buf, str, strlen (str), 0, strlen (str), &regs);
+ printf ("Search value %d.\t", i);
+ if (i >= 0)
+ print_regs (regs);
+ putchar ('\n');
+ }
+
+ /* We never get here, but what the heck. */
+ return 0;
+}
+
+void
+scanstring (s)
+ char *s;
+{
+ char *write = s;
+
+ while (*s != '\0')
+ {
+ if (*s == '\\')
+ {
+ s++;
+
+ switch (*s)
+ {
+ case '\0':
+ break;
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ *write = *s++ - '0';
+
+ if ('0' <= *s && *s <= '9')
+ {
+ *write = (*write << 3) + (*s++ - '0');
+ if ('0' <= *s && *s <= '9')
+ *write = (*write << 3) + (*s++ - '0');
+ }
+ write++;
+ break;
+
+ case 'n':
+ *write++ = '\n';
+ s++;
+ break;
+
+ case 't':
+ *write++ = '\t';
+ s++;
+ break;
+
+ default:
+ *write++ = *s++;
+ break;
+ }
+ }
+ else
+ *write++ = *s++;
+ }
+
+ *write++ = '\0';
+}
+
+/* Print REGS in human-readable form. */
+
+void
+print_regs (regs)
+ struct re_registers regs;
+{
+ int i, end;
+
+ printf ("Registers: ");
+
+ if (regs.num_regs == 0 || regs.start[0] == -1)
+ {
+ printf ("(none)");
+ }
+ else
+ {
+ /* Find the last register pair that matched. */
+ for (end = regs.num_regs - 1; end >= 0; end--)
+ if (regs.start[end] != -1)
+ break;
+
+ printf ("[%d ", regs.start[0]);
+ for (i = 1; i <= end; i++)
+ printf ("(%d %d) ", regs.start[i], regs.end[i]);
+ printf ("%d]", regs.end[0]);
+ }
+}
diff --git a/gnu/lib/libregex/test/main.c b/gnu/lib/libregex/test/main.c
new file mode 100644
index 0000000..28ae315
--- /dev/null
+++ b/gnu/lib/libregex/test/main.c
@@ -0,0 +1,49 @@
+/* Main routine for running various tests. Meant only to be linked with
+ all the auxiliary test source files, with `test' undefined. */
+
+#include "test.h"
+
+test_type t = all_test;
+
+
+/* Use this to run the tests we've thought of. */
+
+int
+main ()
+{
+ switch (t)
+ {
+ case all_test:
+ test_regress ();
+ test_others ();
+ test_posix_basic ();
+ test_posix_extended ();
+ test_posix_interface ();
+ break;
+
+ case other_test:
+ test_others ();
+ break;
+
+ case posix_basic_test:
+ test_posix_basic ();
+ break;
+
+ case posix_extended_test:
+ test_posix_extended ();
+ break;
+
+ case posix_interface_test:
+ test_posix_interface ();
+ break;
+
+ case regress_test:
+ test_regress ();
+ break;
+
+ default:
+ fprintf (stderr, "Unknown test %d.\n", t);
+ }
+
+ return 0;
+}
diff --git a/gnu/lib/libregex/test/malloc-test.c b/gnu/lib/libregex/test/malloc-test.c
new file mode 100644
index 0000000..7e27a15
--- /dev/null
+++ b/gnu/lib/libregex/test/malloc-test.c
@@ -0,0 +1,47 @@
+
+
+typedef struct {
+ unsigned *bits;
+ unsigned size;
+} bits_list_type;
+
+#define BYTEWIDTH 8
+#define NULL 0
+
+#define BITS_BLOCK_SIZE (sizeof (unsigned) * BYTEWIDTH)
+#define BITS_BLOCK(position) ((position) / BITS_BLOCK_SIZE)
+#define BITS_MASK(position) (1 << ((position) % BITS_BLOCK_SIZE))
+
+static unsigned
+init_bits_list (bits_list_ptr)
+ bits_list_type *bits_list_ptr;
+{
+ bits_list_ptr->bits = NULL;
+ bits_list_ptr->bits = (unsigned *) malloc (sizeof (unsigned));
+
+ if (bits_list_ptr->bits == NULL)
+ return 0;
+
+ bits_list_ptr->bits[0] = (unsigned)0;
+ bits_list_ptr->size = BITS_BLOCK_SIZE;
+
+ return 1;
+}
+
+
+main()
+{
+ bits_list_type dummy;
+ bits_list_type dummy_1;
+ bits_list_type dummy_2;
+ bits_list_type dummy_3;
+
+ init_bits_list (&dummy);
+printf("init 1\n");
+ init_bits_list (&dummy_1);
+printf("init 2\n");
+ init_bits_list (&dummy_2);
+printf("init 3\n");
+ init_bits_list (&dummy_3);
+printf("init 4\n");
+}
diff --git a/gnu/lib/libregex/test/other.c b/gnu/lib/libregex/test/other.c
new file mode 100644
index 0000000..d2ceb38
--- /dev/null
+++ b/gnu/lib/libregex/test/other.c
@@ -0,0 +1,503 @@
+/* other.c: test (not exhaustively) non-POSIX regular expressions. */
+
+#include "test.h"
+
+void
+test_others ()
+{
+ struct re_registers regs;
+
+ printf ("\nStarting non-POSIX tests.\n");
+ t = other_test;
+
+ test_should_match = true;
+
+ /* The big question: does the group participate in the match, or match
+ the empty string? */
+ re_set_syntax (RE_NO_BK_PARENS);
+ test_match ("(a*)*ab", "ab");
+ TEST_REGISTERS ("(a*)*ab", "ab", 0, 2, 0, 0, -1, -1);
+ test_match ("(a*)*", "");
+ TEST_REGISTERS ("(a*)*ab", "ab", 0, 0, 0, 0, -1, -1);
+
+ /* This tests finding the highest and lowest active registers. */
+ test_match ("(a(b)c(d(e)f)g)h(i(j)k(l(m)n)o)\\1\\2\\3\\4\\5\\6\\7\\8",
+ "abcdefghijklmnoabcdefgbdefeijklmnojlmnm");
+
+ /* Test that \< and \> match at the beginning and end of the string. */
+ test_match ("\\<abc\\>", "abc");
+
+ /* May as well test \` and \' while we're at it. */
+ test_match ("\\`abc\\'", "abc");
+
+#if 0
+ /* Test backreferencing and the fastmap -- which doesn't work. */
+ test_fastmap ("(a)*\\1", "a", 0, 0);
+#endif
+
+ /* But at least we shouldn't search improperly. */
+ test_search_return (-1, "(a)\\1", "");
+
+ re_set_syntax (RE_SYNTAX_EMACS);
+
+ MATCH_SELF("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+ MATCH_SELF ("a^");
+ MATCH_SELF ("a^b");
+ MATCH_SELF ("$a");
+ MATCH_SELF ("a$b");
+
+ re_set_syntax (RE_BACKSLASH_ESCAPE_IN_LISTS);
+ test_match ("[\\^a]", "a");
+ test_match ("[\\^a]", "^");
+
+ /* These op characters should be ordinary if RE_CONTEXT_INVALID_OPS
+ isn't set. */
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_BRACES | RE_INTERVALS
+ | RE_NO_BK_PARENS);
+ MATCH_SELF ("*");
+ test_match ("a|*", "*");
+ test_match ("(*)", "*");
+
+ MATCH_SELF ("+");
+ test_match ("a|+", "+");
+ test_match ("(+)", "+");
+
+ MATCH_SELF ("?");
+ test_match ("a|?", "?");
+ test_match ("(?)", "?");
+
+ MATCH_SELF ("{1}");
+ test_match ("a|{1}", "a");
+ test_match ("a|{1}", "{1}");
+ test_match ("({1})", "{1}");
+
+ test_match ("\\{", "{");
+
+
+ re_set_syntax (RE_LIMITED_OPS);
+ MATCH_SELF ("|");
+ MATCH_SELF ("a|");
+ MATCH_SELF ("a|");
+ MATCH_SELF ("a||");
+ MATCH_SELF ("a||");
+ MATCH_SELF ("(|)");
+
+ re_set_syntax (RE_SYNTAX_EMACS);
+ TEST_SEARCH ("^a", "b\na", 0, 3);
+ TEST_SEARCH ("b$", "b\na", 0, 3);
+
+#if 0
+ /* Newline is no longer special for anchors (16 Sep 92). --karl */
+ test_match_2 ("a\n^b", "a", "\nb");
+ test_match_2 ("a$\nb", "a\n", "b");
+#endif
+
+ /* Test grouping. */
+ re_set_syntax (RE_NO_BK_PARENS);
+
+ test_match ("()", "");
+ test_fastmap ("()", "", 0, 0);
+ TEST_REGISTERS ("()", "", 0, 0, 0, 0, -1, -1);
+
+ test_match ("((((((((()))))))))", "");
+ test_fastmap ("((((((((()))))))))", "", 0, 0);
+ test_match ("a()b", "ab");
+ TEST_REGISTERS ("a()b", "ab", 0, 2, 1, 1, -1, -1);
+
+ test_match ("(((((((((())))))))))", "");
+ test_fastmap ("(((((((((())))))))))", "", 0, 0);
+
+ test_match ("()*", "");
+ TEST_REGISTERS ("()*", "", 0, 0, 0, 0, -1, -1); /* empty string */
+ test_match ("(())*", "");
+
+ re_set_syntax (RE_CONTEXT_INDEP_OPS);
+ test_match ("*", "");
+
+ re_set_syntax (RE_INTERVALS | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES);
+ test_match ("{1}", ""); /* Should remain an interval. */
+ MATCH_SELF ("{1"); /* Not a valid interval. */
+
+ re_set_syntax (RE_NEWLINE_ALT);
+ test_match ("a\nb", "a");
+ test_match ("a\nb", "b");
+
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS);
+ test_match ("^a", "a");
+ test_match ("(^a)", "a");
+ test_match ("(a|^b)", "b");
+ test_match ("a$", "a");
+ test_match ("(a$)", "a");
+ test_match ("a$|b", "a");
+
+ /* You should be able to have empty alternatives if RE_NO_EMPTY_ALTS
+ isn't set. */
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS);
+
+ test_match ("|", "");
+ test_match ("^|a", "");
+ test_match ("^|a", "a");
+ test_match ("a|", "");
+ test_match ("a|", "a");
+ test_match ("a|$", "");
+ test_match ("a|$", "a");
+ test_match ("a||b", "a");
+ test_match ("a||b", "");
+ test_match ("a||b", "b");
+ test_match ("(|a)", "");
+ test_match ("(|a)", "a");
+ test_match ("(a|)", "");
+ test_match ("(a|)", "a");
+
+ TEST_SEARCH ("a|$", "xa", 0, 2);
+ TEST_SEARCH ("a|$", "x", 0, 1);
+ TEST_SEARCH ("$|b", "x", 0, 1);
+ TEST_SEARCH ("$|b", "xb", 0, 2);
+ TEST_SEARCH ("c(a|$)", "xca", 0, 3);
+ TEST_SEARCH ("c(a|$)", "xc", 0, 2);
+ TEST_SEARCH ("c($|b)", "xcb", 0, 3);
+ TEST_SEARCH ("c($|b)", "xc", 0, 2);
+ TEST_SEARCH ("c($|b$)", "xcb", 0, 3);
+ TEST_SEARCH ("c($|b$)", "xc", 0, 2);
+ TEST_SEARCH ("c(a$|$)", "xca", 0, 3);
+ TEST_SEARCH ("c(a$|$)", "xc", 0, 2);
+ TEST_SEARCH ("(a$|b$)|$", "x", 0, 1);
+ TEST_SEARCH ("(a$|b$)|$", "xa", 0, 2);
+ TEST_SEARCH ("(a$|b$)|$", "xb", 0, 2);
+ TEST_SEARCH ("(a$|$)|c$", "x", 0, 1);
+ TEST_SEARCH ("(a$|$)|c$", "xa", 0, 2);
+ TEST_SEARCH ("(a$|$)|c$", "xc", 0, 2);
+ TEST_SEARCH ("($|b$)|c$", "x", 0, 1);
+ TEST_SEARCH ("($|b$)|c$", "xb", 0, 2);
+ TEST_SEARCH ("($|b$)|c$", "xc", 0, 2);
+ TEST_SEARCH ("c$|(a$|$)", "x", 0, 1);
+ TEST_SEARCH ("c$|(a$|$)", "xa", 0, 2);
+ TEST_SEARCH ("c$|(a$|$)", "xc", 0, 2);
+ TEST_SEARCH ("c$|($|b$)", "x", 0, 1);
+ TEST_SEARCH ("c$|($|b$)", "xb", 0, 2);
+ TEST_SEARCH ("c$|($|b$)", "xc", 0, 2);
+ TEST_SEARCH ("$|(a$|b$)", "x", 0, 1);
+ TEST_SEARCH ("$|(a$|b$)", "xa", 0, 2);
+ TEST_SEARCH ("$|(a$|b$)", "xb", 0, 2);
+ TEST_SEARCH ("c(a$|b$)|$", "x", 0, 1);
+ TEST_SEARCH ("c(a$|b$)|$", "xca", 0, 3);
+ TEST_SEARCH ("c(a$|b$)|$", "xcb", 0, 3);
+ TEST_SEARCH ("c(a$|$)|d$", "xc", 0, 2);
+ TEST_SEARCH ("c(a$|$)|d$", "xca", 0, 3);
+ TEST_SEARCH ("c(a$|$)|d$", "xd", 0, 2);
+ TEST_SEARCH ("c($|b$)|d$", "xc", 0, 2);
+ TEST_SEARCH ("c($|b$)|d$", "xcb", 0, 3);
+ TEST_SEARCH ("c($|b$)|d$", "xd", 0, 2);
+ TEST_SEARCH ("d(c$|e((a$|$)))", "xdc", 0, 3);
+ TEST_SEARCH ("d(c$|e((a$|$)))", "xde", 0, 3);
+ TEST_SEARCH ("d(c$|e((a$|$)))", "xdea", 0, 4);
+ TEST_SEARCH ("d(c$|e(($|b$)))", "xdc", 0, 3);
+ TEST_SEARCH ("d(c$|e(($|b$)))", "xde", 0, 3);
+ TEST_SEARCH ("d(c$|e(($|b$)))", "xdeb", 0, 4);
+ TEST_SEARCH ("d($|e((a$|b$)))", "xd", 0, 2);
+ TEST_SEARCH ("d($|e((a$|b$)))", "xdea", 0, 4);
+ TEST_SEARCH ("d($|e((a$|b$)))", "xdeb", 0, 4);
+ TEST_SEARCH ("a(b$|c$)|$", "x", 0, 1);
+ TEST_SEARCH ("a(b$|c$)|$", "xab", 0, 3);
+ TEST_SEARCH ("a(b$|c$)|$", "xac", 0, 3);
+ TEST_SEARCH ("a(b$|$)|d$", "xa", 0, 2);
+ TEST_SEARCH ("a(b$|$)|d$", "xab", 0, 3);
+ TEST_SEARCH ("a(b$|$)|d$", "xd", 0, 2);
+ TEST_SEARCH ("a($|c$)|d$", "xa", 0, 2);
+ TEST_SEARCH ("a($|c$)|d$", "xac", 0, 3);
+ TEST_SEARCH ("a($|c$)|d$", "xd", 0, 2);
+ TEST_SEARCH ("d$|a(b$|$)", "xd", 0, 2);
+ TEST_SEARCH ("d$|a(b$|$)", "xa", 0, 2);
+ TEST_SEARCH ("d$|a(b$|$)", "xab", 0, 3);
+ TEST_SEARCH ("d$|a($|c$)", "xd", 0, 2);
+ TEST_SEARCH ("d$|a($|c$)", "xa", 0, 2);
+ TEST_SEARCH ("d$|a($|c$)", "xac", 0, 3);
+ TEST_SEARCH ("$|a(b$|c$)", "x", 0, 1);
+ TEST_SEARCH ("$|a(b$|c$)", "xab", 0, 3);
+ TEST_SEARCH ("$|a(b$|c$)", "xac", 0, 3);
+ TEST_SEARCH ("(a)(b$|c$)|d$", "xab", 0, 3);
+ TEST_SEARCH ("(a)(b$|c$)|d$", "xac", 0, 3);
+ TEST_SEARCH ("(a)(b$|c$)|d$", "xd", 0, 2);
+ TEST_SEARCH ("(a)(b$|$)|d$", "xa", 0, 2);
+ TEST_SEARCH ("(a)(b$|$)|d$", "xab", 0, 3);
+ TEST_SEARCH ("(a)(b$|$)|d$", "xd", 0, 2);
+ TEST_SEARCH ("(a)($|c$)|d$", "xa", 0, 2);
+ TEST_SEARCH ("(a)($|c$)|d$", "xac", 0, 3);
+ TEST_SEARCH ("(a)($|c$)|d$", "xd", 0, 2);
+ TEST_SEARCH ("d$|(a)(b$|$)", "xd", 0, 2);
+ TEST_SEARCH ("d$|(a)(b$|$)", "xa", 0, 2);
+ TEST_SEARCH ("d$|(a)(b$|$)", "xab", 0, 3);
+ TEST_SEARCH ("d$|(a)($|c$)", "xd", 0, 2);
+ TEST_SEARCH ("d$|(a)($|c$)", "xa", 0, 2);
+ TEST_SEARCH ("d$|(a)($|c$)", "xac", 0, 3);
+ TEST_SEARCH ("$|(a)(b$|c$)", "x", 0, 1);
+ TEST_SEARCH ("$|(a)(b$|c$)", "xab", 0, 3);
+ TEST_SEARCH ("$|(a)(b$|c$)", "xac", 0, 3);
+ TEST_SEARCH ("d$|(c$|(a$|$))", "x", 0, 1);
+ TEST_SEARCH ("d$|(c$|(a$|$))", "xd", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a$|$))", "xc", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a$|$))", "xa", 0, 2);
+ TEST_SEARCH ("d$|(c$|($|b$))", "x", 0, 1);
+ TEST_SEARCH ("d$|(c$|($|b$))", "xd", 0, 2);
+ TEST_SEARCH ("d$|(c$|($|b$))", "xc", 0, 2);
+ TEST_SEARCH ("d$|(c$|($|b$))", "xb", 0, 2);
+ TEST_SEARCH ("d$|($|(a$|b$))", "x", 0, 1);
+ TEST_SEARCH ("d$|($|(a$|b$))", "xd", 0, 2);
+ TEST_SEARCH ("d$|($|(a$|b$))", "xa", 0, 2);
+ TEST_SEARCH ("d$|($|(a$|b$))", "xb", 0, 2);
+ TEST_SEARCH ("$|(c$|(a$|b$))", "x", 0, 1);
+ TEST_SEARCH ("$|(c$|(a$|b$))", "xc", 0, 2);
+ TEST_SEARCH ("$|(c$|(a$|b$))", "xa", 0, 2);
+ TEST_SEARCH ("$|(c$|(a$|b$))", "xb", 0, 2);
+ TEST_SEARCH ("d$|c(a$|$)", "xd", 0, 2);
+ TEST_SEARCH ("d$|c(a$|$)", "xc", 0, 2);
+ TEST_SEARCH ("d$|c(a$|$)", "xca", 0, 3);
+ TEST_SEARCH ("d$|c($|b$)", "xd", 0, 2);
+ TEST_SEARCH ("d$|c($|b$)", "xc", 0, 2);
+ TEST_SEARCH ("d$|c($|b$)", "xcb", 0, 3);
+ TEST_SEARCH ("$|c(a$|b$)", "x", 0, 1);
+ TEST_SEARCH ("$|c(a$|b$)", "xca", 0, 3);
+ TEST_SEARCH ("$|c(a$|b$)", "xcb", 0, 3);
+ TEST_SEARCH ("e(d$|c((a$|$)))", "xed", 0, 3);
+ TEST_SEARCH ("e(d$|c((a$|$)))", "xec", 0, 3);
+ TEST_SEARCH ("e(d$|c((a$|$)))", "xeca", 0, 3);
+ TEST_SEARCH ("e(d$|c(($|b$)))", "xed", 0, 3);
+ TEST_SEARCH ("e(d$|c(($|b$)))", "xec", 0, 3);
+ TEST_SEARCH ("e(d$|c(($|b$)))", "xecb", 0, 4);
+ TEST_SEARCH ("e($|c((a$|b$)))", "xe", 0, 2);
+ TEST_SEARCH ("e($|c((a$|b$)))", "xeca", 0, 4);
+ TEST_SEARCH ("e($|c((a$|b$)))", "xecb", 0, 4);
+ TEST_SEARCH ("ed$|(c((a$|$)))", "xed", 0, 3);
+ TEST_SEARCH ("ed$|(c((a$|$)))", "xc", 0, 2);
+ TEST_SEARCH ("ed$|(c((a$|$)))", "xca", 0, 3);
+ TEST_SEARCH ("ed$|(c(($|b$)))", "xed", 0, 3);
+ TEST_SEARCH ("ed$|(c(($|b$)))", "xc", 0, 2);
+ TEST_SEARCH ("ed$|(c(($|b$)))", "xcb", 0, 3);
+ TEST_SEARCH ("$|(c((a$|b$)))", "x", 0, 1);
+ TEST_SEARCH ("$|(c((a$|b$)))", "xca", 0, 3);
+ TEST_SEARCH ("$|(c((a$|b$)))", "xcb", 0, 3);
+ TEST_SEARCH ("d$|($|(a|b)$)", "x", 0, 1);
+ TEST_SEARCH ("d$|($|(a|b)$)", "xa", 0, 2);
+ TEST_SEARCH ("d$|($|(a|b)$)", "xb", 0, 2);
+ TEST_SEARCH ("$|(c$|(a|b)$)", "x", 0, 1);
+ TEST_SEARCH ("$|(c$|(a|b)$)", "xc", 0, 2);
+ TEST_SEARCH ("$|(c$|(a|b)$)", "xa", 0, 2);
+ TEST_SEARCH ("$|(c$|(a|b)$)", "xb", 0, 2);
+
+ re_set_syntax (0);
+ test_match ("[^\n]", "a");
+ test_match ("[^a]", "\n");
+
+ TEST_SEARCH ("^a", "b\na", 0, 3);
+ TEST_SEARCH ("b$", "b\na", 0, 3);
+
+ test_case_fold ("[!-`]", "A");
+ test_case_fold ("[!-`]", "a");
+
+ re_set_syntax (RE_CONTEXT_INDEP_OPS | RE_NO_BK_VBAR | RE_NO_BK_PARENS
+ | RE_NO_BK_BRACES | RE_INTERVALS);
+ valid_nonposix_pattern ("()^a");
+ valid_nonposix_pattern ("()\\1^a");
+
+ /* Per Cederqvist (cedar@lysator.liu.se) bug. */
+
+ re_set_syntax (RE_SYNTAX_EMACS);
+
+ /* One `a' before the \n and 638 a's after it. */
+ test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "a\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+
+ /* No a's before the \n and 639 a's after it. */
+ test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+
+ /* One `a' before the \n and 639 a's after it. */
+ test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "a\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+
+ /* No a's before the \n and 640 a's after it. */
+ test_search_return (0, "\\(.*\\)\n\\(\\(.\\|\n\\)*\\)$", "\naaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
+
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS);
+ TEST_SEARCH ("^(^a)", "ab", 0, 2);
+ TEST_SEARCH ("(a$)$", "ba", 0, 2);
+ test_match ("a|$b", "$b");
+
+ /* Mike's curiosity item. */
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS);
+ test_all_registers ("(foo|foobar)(foo|bar)*\\1(foo|bar)*",
+ "foobarfoobar", "",
+ 0, 12, 0, 3, 3, 6, 9, 12, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1);
+
+ /* Another one from Mike. */
+ test_match ("(foo|foobarfoo)(bar)*", "foobarfoo");
+
+ /* And another. */
+ test_match("(foo|foobar)(bar|barfoo)?\\1", "foobarfoobar");
+
+ re_set_syntax (RE_NO_BK_PARENS | RE_INTERVALS | RE_NO_BK_VBAR
+ | RE_NO_BK_BRACES); /* xx get new ones from ext.*/
+ test_match ("((a{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)*", "bb");
+ test_all_registers ("((a{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)*", "", "bb",
+ 0, 2, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1);
+
+ test_match ("((a+?*{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)", "b");
+ test_all_registers ("((a+?*{0,}{0,0}()\\3\\b\\B\\<\\>\\`\\')|b)", "", "b",
+ 0, 1, 0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1);
+
+ /* Valid anchoring. */
+ /* See generic_test.c and extended_test.c for more search
+ tests. xx Not sure all these tests are represented in the
+ search tests. */
+
+ re_set_syntax (RE_NO_BK_PARENS | RE_NO_BK_VBAR);
+ valid_nonposix_pattern
+ ("(((((((((((((((((((((((((((((((((^a)))))))))))))))))))))))))))))))))");
+ valid_nonposix_pattern
+ ("(((((((((((((((((((((((((((((((((a$)))))))))))))))))))))))))))))))))");
+ valid_nonposix_pattern ("\\b\\B\\<\\>\\`\\'^a");
+ valid_nonposix_pattern ("a$\\b\\B\\<\\>\\`\\'");
+ valid_nonposix_pattern ("(^a)");
+ valid_nonposix_pattern ("(a$)");
+ valid_nonposix_pattern ("(^a)b");
+ valid_nonposix_pattern ("b(a$)");
+ valid_nonposix_pattern ("(^a|^b)c");
+ valid_nonposix_pattern ("c(a$|b$)");
+ valid_nonposix_pattern ("(^a|^b)|^c");
+ valid_nonposix_pattern ("(a$|b$)|c$");
+ valid_nonposix_pattern ("^c|(^a|^b)");
+ valid_nonposix_pattern ("c$|(a$|b$)");
+ valid_nonposix_pattern ("(^a|^b)c|^d");
+ valid_nonposix_pattern ("c(a$|b$)|d$");
+ valid_nonposix_pattern ("(((^a|^b))c|^d)e");
+ valid_nonposix_pattern ("(c((a|b))|d)e$");
+ valid_nonposix_pattern ("^d(c|e((a|b)))");
+ valid_nonposix_pattern ("d(c$|e((a$|b$)))");
+ valid_nonposix_pattern ("(((^a|^b))c)|^de");
+ valid_nonposix_pattern ("(((a|b))c$)|de$");
+
+ valid_nonposix_pattern ("((a$)$)$");
+ valid_nonposix_pattern ("^(^(^a))");
+
+ valid_nonposix_pattern ("^de|^(c((a|b)))");
+ valid_nonposix_pattern ("^de|(^c((a|b)))");
+ valid_nonposix_pattern ("de$|(c((a|b)$))");
+ valid_nonposix_pattern ("de$|(c((a|b))$)");
+ valid_nonposix_pattern ("de$|(c((a|b)))$");
+
+ valid_nonposix_pattern ("^a(b|c)|^d");
+ valid_nonposix_pattern ("a(b$|c$)|d$");
+ valid_nonposix_pattern ("^d|^a(b|c)");
+ valid_nonposix_pattern ("d$|a(b$|c$)");
+ valid_nonposix_pattern ("^d|^(b|c)a");
+ valid_nonposix_pattern ("d$|(b|c)a$");
+ valid_nonposix_pattern ("^(a)(b|c)|^d");
+ valid_nonposix_pattern ("(a)(b|c)$|d$");
+ valid_nonposix_pattern ("(^a)(b|c)|^d");
+ valid_nonposix_pattern ("(a)(b$|c$)|d$");
+ valid_nonposix_pattern ("^d|^(b|c)(a)");
+ valid_nonposix_pattern ("d$|(b|c)(a)$");
+ valid_nonposix_pattern ("^d|(^b|^c)(a)");
+ valid_nonposix_pattern ("d$|(b|c)(a$)");
+ valid_nonposix_pattern ("^d|^(a)(b|c)");
+ valid_nonposix_pattern ("^d|(^a)(b|c)");
+ valid_nonposix_pattern ("d$|(a)(b$|c$)");
+ valid_nonposix_pattern ("((^a|^b)|^c)|^d");
+ valid_nonposix_pattern ("d$|(c$|(a$|b$))");
+
+
+ /* Tests shouldn't match. */
+ test_should_match = false;
+
+ /* Test that RE_CONTEXT_INVALID_OPS has precedence over
+ RE_CONTEXT_INDEP_OPS. */
+
+ re_set_syntax (RE_CONTEXT_INDEP_OPS | RE_CONTEXT_INVALID_OPS
+ | RE_NO_BK_VBAR | RE_NO_BK_PARENS
+ | RE_NO_BK_BRACES | RE_INTERVALS);
+ INVALID_PATTERN ("*");
+ INVALID_PATTERN ("^*");
+ INVALID_PATTERN ("a|*");
+ INVALID_PATTERN ("(*)");
+
+ INVALID_PATTERN ("^+");
+ INVALID_PATTERN ("+");
+ INVALID_PATTERN ("a|+");
+ INVALID_PATTERN ("(+)");
+
+ INVALID_PATTERN ("^?");
+ INVALID_PATTERN ("?");
+ INVALID_PATTERN ("a|?");
+ INVALID_PATTERN ("(?)");
+
+ INVALID_PATTERN ("^{1}");
+ INVALID_PATTERN ("{1}");
+ INVALID_PATTERN ("a|{1}");
+ INVALID_PATTERN ("({1})");
+
+#if 0
+ /* No longer have this syntax option -- POSIX says empty alternatives
+ are undefined as of draft 11.2. */
+
+ /* You can't have empty alternatives if RE_NO_EMPTY_ALTS is set. */
+
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_ALTS);
+
+ INVALID_PATTERN ("|");
+ INVALID_PATTERN ("^|a");
+ INVALID_PATTERN ("a|");
+ INVALID_PATTERN ("a||");
+ INVALID_PATTERN ("a||b");
+ INVALID_PATTERN ("(|a)");
+ INVALID_PATTERN ("(a|)");
+ INVALID_PATTERN ("(a|)");
+
+
+ /* Test above with `\(' and `\)'. */
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_EMPTY_ALTS);
+ INVALID_PATTERN ("\\(|a\\)");
+ INVALID_PATTERN ("\\(a|\\)");
+
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_ALTS);
+ INVALID_PATTERN ("(|)()$|d$");
+#endif
+
+ /* Test grouping. */
+ test_match ("()", "a");
+
+ /* Test backslashed intervals that are CONTEXTly invalid if have
+ nothing on which to operate. */
+
+ re_set_syntax (RE_INTERVALS | RE_CONTEXT_INVALID_OPS);
+ INVALID_PATTERN ("\\{1\\}");
+
+ re_set_syntax (0);
+ test_match ("z-a", "a");
+
+ re_set_syntax (RE_BK_PLUS_QM);
+ INVALID_PATTERN ("a*\\");
+
+ re_set_syntax (0);
+ INVALID_PATTERN ("a*\\");
+
+ re_set_syntax (RE_BACKSLASH_ESCAPE_IN_LISTS);
+ INVALID_PATTERN ("[\\");
+
+#if 0
+ /* Empty groups are always ok now. (13 Sep 92) */
+ re_set_syntax (RE_NO_BK_VBAR | RE_NO_BK_PARENS | RE_NO_EMPTY_GROUPS);
+ INVALID_PATTERN ("(|)()$|d$");
+#endif
+
+ printf ("\nFinished non-POSIX tests.\n");
+}
+
+
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/test/printchar.c b/gnu/lib/libregex/test/printchar.c
new file mode 100644
index 0000000..1b756f4
--- /dev/null
+++ b/gnu/lib/libregex/test/printchar.c
@@ -0,0 +1,14 @@
+void
+printchar (c)
+ char c;
+{
+ if (c < 040 || c >= 0177)
+ {
+ putchar ('\\');
+ putchar (((c >> 6) & 3) + '0');
+ putchar (((c >> 3) & 7) + '0');
+ putchar ((c & 7) + '0');
+ }
+ else
+ putchar (c);
+}
diff --git a/gnu/lib/libregex/test/psx-basic.c b/gnu/lib/libregex/test/psx-basic.c
new file mode 100644
index 0000000..52535b6
--- /dev/null
+++ b/gnu/lib/libregex/test/psx-basic.c
@@ -0,0 +1,253 @@
+/* psx-basic.c: Test POSIX basic regular expressions. */
+
+#include "test.h"
+
+
+void
+test_posix_basic ()
+{
+ /* Intervals can only match up to RE_DUP_MAX occurences of anything. */
+ char dup_max_plus_one[6];
+ sprintf (dup_max_plus_one, "%d", RE_DUP_MAX + 1);
+
+ printf ("\nStarting POSIX basic tests.\n");
+ t = posix_basic_test;
+
+ re_set_syntax (RE_SYNTAX_POSIX_MINIMAL_BASIC);
+
+ test_posix_generic ();
+
+ printf ("\nContinuing POSIX basic tests.\n");
+
+/* Grouping tests that are not the same. */
+
+ test_should_match = false;
+ invalid_pattern (REG_EPAREN, PARENS_TO_OPS ("a)"));
+
+ test_should_match = true;
+ /* Special characters. */
+ MATCH_SELF ("*");
+ test_match ("\\(*\\)", "*");
+ test_match ("\\(^*\\)", "*");
+ test_match ("**", "***");
+ test_match ("***", "****");
+
+ MATCH_SELF ("{"); /* of extended... */
+ MATCH_SELF ("()"); /* also non-Posix. */
+ MATCH_SELF ("a+");
+ MATCH_SELF ("a?");
+ MATCH_SELF ("a|b");
+ MATCH_SELF ("a|"); /* No alternations, */
+ MATCH_SELF ("|a"); /* so OK if empty. */
+ MATCH_SELF ("a||");
+ test_match ("\\(|a\\)", "|a");
+ test_match ("\\(a|\\)", "a|");
+ test_match ("a\\+", "a+");
+ test_match ("a\\?", "a?");
+ test_match ("a\\|b", "a|b");
+ test_match ("^*", "*");
+ test_match ("^+", "+");
+ test_match ("^?", "?");
+ test_match ("^{", "{");
+ /* Valid subexpressions
+ (empty) in basic only. */
+ test_match ("\\(\\)", "");
+
+ test_match ("a\\(\\)", "a");
+ test_match ("\\(\\)b", "b");
+ test_match ("a\\(\\)b", "ab");
+ TEST_REGISTERS ("a\\(\\)b", "ab", 0, 2, 1, 1, -1, -1);
+
+ test_match ("\\(\\)*", "");
+ test_match ("\\(\\(\\)\\)*", "");
+ /* Valid back references. */
+
+ /* N.B.: back references to subexpressions that include a * are
+ undefined in the spec. The tests are in here to see if we handle
+ the situation consistently, but if it fails any of them, it doesn't
+ matter. */
+
+ test_match ("\\(\\)\\1", "");
+ TEST_REGISTERS ("\\(\\)\\1", "", 0, 0, 0, 0, -1, -1);
+
+ test_match ("\\(\\(\\)\\)\\(\\)\\2", "");
+
+ test_match ("\\(a\\)\\1", "aa");
+ TEST_REGISTERS ("\\(a\\)\\1", "aa", 0, 2, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a\\)\\1", "xaax", 1, 3, 1, 2, -1, -1);
+
+ test_match ("\\(\\(a\\)\\)\\1", "aa");
+ test_match ("\\(a\\)\\(b\\)\\2\\1", "abba");
+
+ test_match ("\\(a\\)*\\1", "aa");
+ TEST_REGISTERS ("\\(a\\)*\\1", "aa", 0, 2, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a\\)*\\1", "xaax", 0, 0, -1, -1, -1, -1);
+
+ test_match ("\\(\\(a\\)\\2b\\)*", "aab");
+ TEST_REGISTERS ("\\(\\(a\\)\\2b\\)*", "aab", 0, 3, 0, 3, 0, 1);
+ TEST_REGISTERS ("\\(\\(a\\)\\2b\\)*", "xaabx", 0, 0, -1, -1, -1, -1);
+
+ test_match ("\\(a*\\)*\\1", "");
+ test_match ("\\(a*\\)*\\1", "aa");
+ TEST_REGISTERS ("\\(a*\\)*\\1", "aa", 0, 2, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a*\\)*\\1", "xaax", 0, 0, 0, 0, -1, -1);
+
+ test_match ("\\(a*\\)*\\1", "");
+ test_match ("\\(a*\\)*\\1", "aa");
+ test_match ("\\(\\(a*\\)*\\)*\\1", "aa");
+ test_match ("\\(ab*\\)*\\1", "abab");
+ TEST_REGISTERS ("\\(ab*\\)*\\1", "abab", 0, 4, 0, 2, -1, -1);
+ TEST_REGISTERS ("\\(ab*\\)*\\1", "xababx", 0, 0, -1, -1, -1, -1);
+
+ test_match ("\\(a*\\)ab\\1", "aaba");
+ TEST_REGISTERS ("\\(a*\\)ab\\1", "aaba", 0, 4, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a*\\)ab\\1", "xaabax", 1, 5, 1, 2, -1, -1);
+
+ test_match ("\\(a*\\)*ab\\1", "aaba");
+ TEST_REGISTERS ("\\(a*\\)*ab\\1", "aaba", 0, 4, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a*\\)*ab\\1", "xaabax", 1, 5, 1, 2, -1, -1);
+
+ test_match ("\\(\\(a*\\)b\\)*\\2", "abb");
+ TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "abb", 0, 3, 2, 3, 2, 2);
+ TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "xabbx", 0, 0, -1, -1, -1, -1);
+
+ /* Different from above. */
+ test_match ("\\(\\(a*\\)b*\\)*\\2", "aa");
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aa", 0, 2, 0, 1, 0, 1);
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaax", 0, 0, 0, 0, 0, 0);
+
+ test_match ("\\(\\(a*\\)b*\\)*\\2", "aba");
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aba", 0, 3, 0, 2, 0, 1);
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xabax", 0, 0, 0, 0, 0, 0);
+
+ test_match ("\\(\\(a*\\)b\\)*\\2", "aababa");
+ TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "aababa", 0, 6, 3, 5, 3, 4);
+ TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2", "xaababax", 0, 0, -1, -1, -1, -1);
+
+ test_match ("\\(\\(a*\\)b*\\)*\\2", "aabaa");
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aabaa", 0, 5, 0, 3, 0, 2);
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaabaax", 0, 0, 0, 0, 0, 0);
+
+ test_match ("\\(\\(a*\\)b*\\)*\\2", "aabbaa");
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "aabbaa", 0, 6, 0, 4, 0, 2);
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaabbaax", 0, 0, 0, 0, 0, 0);
+
+ test_match ("\\(\\(a*\\)b*\\)*\\2", "abaabaa");
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "abaabaa", 0, 7, 2, 5, 2, 4);
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2", "xaababaax", 0, 0, 0, 0, 0, 0);
+
+ test_match ("\\(\\(a*\\)b*\\)*a\\2", "aabaaa");
+ TEST_REGISTERS ("\\(\\(a*\\)b*a\\)*\\2", "aabaaa", 0, 6, 0, 3, 0, 2);
+ TEST_REGISTERS ("\\(\\(a*\\)b*a\\)*\\2", "xaabaax", 0, 0, -1, -1, -1, -1);
+
+ test_match ("\\(\\(a*\\)b*\\)*\\2a", "aabaaa");
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2a", "aabaaa", 0, 6, 0, 3, 0, 2);
+ TEST_REGISTERS ("\\(\\(a*\\)b*\\)*\\2a", "xaabaaax", 1, 7, 1, 4, 1, 3);
+
+ test_match ("\\(\\(a*\\)b\\)*\\2\\1", "abaabaaaab");
+ TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2\\1", "abaabaaaab", 0, 10, 2, 5, 2, 4);
+ /* We are matching the empty string here. */
+ TEST_REGISTERS ("\\(\\(a*\\)b\\)*\\2\\1", "xabaabaaaabx", 0, 0, -1, -1, -1, -1);
+
+ test_match ("\\(a*b\\)\\1", "abab");
+ test_match ("\\(a\\)\\1\\1", "aaa");
+ test_match ("\\(a\\(c\\)d\\)\\1\\2", "acdacdc");
+
+ test_match ("\\(a\\)\\1*", "aaa");
+ TEST_REGISTERS ("\\(a\\)\\1*", "aaa", 0, 3, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a\\)\\1*", "xaaax", 1, 4, 1, 2, -1, -1);
+
+ test_match ("\\(a\\)\\{1,3\\}b\\1", "aba");
+ TEST_REGISTERS ("\\(a\\)\\{1,3\\}b\\1", "aba", 0, 3, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a\\)\\{1,3\\}b\\1", "xabax", 1, 4, 1, 2, -1, -1);
+
+ test_match ("\\(\\(a\\)\\2\\)*", "aaaa"); /* rms? */
+ TEST_REGISTERS ("\\(\\(a*b\\)\\2\\)*", "bbabab", 0, 6, 2, 6, 2, 4); /* rms? */
+
+ test_match ("\\(\\(a\\)\\1\\)*", "a1a1");
+
+ test_match ("\\(\\(a\\)\\2\\)\\1", "aaaa");
+
+ test_match ("\\(\\(a*\\)\\2\\)\\1", "aaaa");
+ TEST_REGISTERS ("\\(\\(a*\\)\\2\\)\\1", "aaaa", 0, 4, 0, 2, 0, 1);
+ TEST_REGISTERS ("\\(\\(a*\\)\\2\\)\\1", "xaaaax", 0, 0, 0, 0, 0, 0);
+
+ test_match ("\\{1\\}", "{1}");
+ test_match ("^\\{1\\}", "{1}");
+
+ test_match ("\\(a\\)\\1\\{1,2\\}", "aaa");
+ TEST_REGISTERS ("\\(a\\)\\1\\{1,2\\}", "aaa", 0, 3, 0, 1, -1, -1);
+ TEST_REGISTERS ("\\(a\\)\\1\\{1,2\\}", "xaaax", 1, 4, 1, 2, -1, -1);
+
+
+ /* Per POSIX D11.1 p. 109, leftmost longest match. */
+
+ test_match (PARENS_TO_OPS ("(.*).*\\1"), "abcabc");
+
+
+ /* Per POSIX D11.1, p. 125, leftmost longest match. */
+
+ test_match (PARENS_TO_OPS ("(ac*)c*d[ac]*\\1"), "acdacaaa");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ac*)c*d[ac]*\\1"), "acdacaaa",
+ 0, 8, 0, 1, -1, -1);
+
+ /* Anchors become ordinary, sometimes. */
+ MATCH_SELF ("a^");
+ MATCH_SELF ("$a");
+ MATCH_SELF ("$^");
+ test_fastmap ("$a^", "$", 0, 0);
+ test_match ("$^*", "$^^");
+ test_match ("\\($^\\)", "$^");
+ test_match ("$*", "$$");
+ /* xx -- known bug, solution pending test_match ("^^$", "^"); */
+ test_match ("$\\{0,\\}", "$$");
+ TEST_SEARCH ("^$*", "$$", 0, 2);
+ TEST_SEARCH ("^$\\{0,\\}", "$$", 0, 2);
+ MATCH_SELF ("2^10");
+ MATCH_SELF ("$HOME");
+ MATCH_SELF ("$1.35");
+
+
+ /* Basic regular expressions, continued; these don't match their strings. */
+ test_should_match = false;
+
+ invalid_pattern (REG_EESCAPE, "\\(a\\");
+ /* Invalid back references. */
+ test_match ("\\(a\\)\\1", "ab");
+ test_match ("\\(a\\)\\1\\1", "aab");
+ test_match ("\\(a\\)\\(b\\)\\2\\1", "abab");
+ test_match ("\\(a\\(c\\)d\\)\\1\\2", "acdc");
+ test_match ("\\(a*b\\)\\1", "abaab");
+ test_match ("\\(a\\)\\1*", "aaaaaaaaaab");
+ test_match ("\\(\\(a\\)\\1\\)*", "aaa");
+ invalid_pattern (REG_ESUBREG, "\\1");
+ invalid_pattern (REG_ESUBREG, "\\(a\\)\\2");
+ test_match ("\\(\\(a\\)\\2\\)*", "abaa");
+ test_match ("\\(\\(a\\)\\1\\)*", "a");
+ test_match ("\\(\\(a\\)\\2\\)\\1", "abaa");
+ test_match ("\\(\\(a*\\)\\2\\)\\1", "abaa");
+ /* Invalid intervals. */
+ invalid_pattern (REG_EBRACE, "a\\{");
+
+ invalid_pattern (REG_BADBR, "a\\{-1");
+ invalid_pattern (REG_BADBR, concat ("a\\{", (char *)dup_max_plus_one));
+ invalid_pattern (REG_BADBR, concat (concat ("a\\{", (char *)dup_max_plus_one), ","));
+ invalid_pattern (REG_BADBR, "a\\{1,0");
+
+ invalid_pattern (REG_EBRACE, "a\\{1");
+ invalid_pattern (REG_EBRACE, "a\\{0,");
+ invalid_pattern (REG_EBRACE, "a\\{0,1");
+ invalid_pattern (REG_EBRACE, "a\\{0,1}");
+
+ printf ("\nFinished POSIX basic tests.\n");
+}
+
+
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/test/psx-extend.c b/gnu/lib/libregex/test/psx-extend.c
new file mode 100644
index 0000000..6f02d67
--- /dev/null
+++ b/gnu/lib/libregex/test/psx-extend.c
@@ -0,0 +1,1244 @@
+/* psx-extend.c: Test POSIX extended regular expressions. */
+
+#include "test.h"
+
+
+void
+test_posix_extended ()
+{
+ /* Intervals can only match up to RE_DUP_MAX occurences of anything. */
+ char dup_max_plus_one[6];
+ sprintf (dup_max_plus_one, "%d", RE_DUP_MAX + 1);
+
+
+ printf ("\nStarting POSIX extended tests.\n");
+ t = posix_extended_test;
+
+ re_set_syntax (RE_SYNTAX_POSIX_MINIMAL_EXTENDED);
+
+ test_posix_generic ();
+
+ printf ("\nContinuing POSIX extended tests.\n");
+
+ /* Grouping tests that differ from basic's. */
+
+ test_should_match = true;
+ MATCH_SELF ("a)");
+
+ /* Valid use of special characters. */
+ test_match ("\\(a", "(a");
+ test_match ("a\\+", "a+");
+ test_match ("a\\?", "a?");
+ test_match ("\\{a", "{a");
+ test_match ("\\|a", "|a");
+ test_match ("a\\|b", "a|b");
+ test_match ("a\\|?", "a");
+ test_match ("a\\|?", "a|");
+ test_match ("a\\|*", "a");
+ test_match ("a\\|*", "a||");
+ test_match ("\\(*\\)", ")");
+ test_match ("\\(*\\)", "(()");
+ test_match ("a\\|+", "a|");
+ test_match ("a\\|+", "a||");
+ test_match ("\\(+\\)", "()");
+ test_match ("\\(+\\)", "(()");
+ test_match ("a\\||b", "a|");
+ test_match ("\\(?\\)", ")");
+ test_match ("\\(?\\)", "()");
+
+ test_match ("a+", "a");
+ test_match ("a+", "aa");
+ test_match ("a?", "");
+ test_match ("a?", "a");
+
+ /* Bracket expressions. */
+ test_match ("[(]", "(");
+ test_match ("[+]", "+");
+ test_match ("[?]", "?");
+ test_match ("[{]", "{");
+ test_match ("[|]", "|");
+ /* Subexpressions. */
+ test_match ("(a+)*", "");
+ test_match ("(a+)*", "aa");
+ test_match ("(a?)*", "");
+ test_match ("(a?)*", "aa");
+ /* (No) back references. */
+ test_match ("(a)\\1", "a1");
+ /* Invalid as intervals,
+ but are valid patterns. */
+ MATCH_SELF ("{");
+ test_match ("^{", "{");
+ test_match ("a|{", "{");
+ test_match ("({)", "{");
+ MATCH_SELF ("a{");
+ MATCH_SELF ("a{}");
+ MATCH_SELF ("a{-1");
+ MATCH_SELF ("a{-1}");
+ MATCH_SELF ("a{0");
+ MATCH_SELF ("a{0,");
+ MATCH_SELF (concat ("a{", dup_max_plus_one));
+ MATCH_SELF (concat (concat ("a{", dup_max_plus_one), ","));
+ MATCH_SELF ("a{1,0");
+ MATCH_SELF ("a{1,0}");
+ MATCH_SELF ("a{0,1");
+ test_match ("[a{0,1}]", "}");
+ test_match ("a{1,3}{-1}", "aaa{-1}");
+ test_match (concat ("a{1,3}{", dup_max_plus_one),
+ concat ("aaa{", dup_max_plus_one));
+ test_match ("a{1,3}{2,1}", "aaa{2,1}");
+ test_match ("a{1,3}{1,2", "aaa{1,2");
+ /* Valid consecutive repetitions. */
+ test_match ("a*+", "a");
+ test_match ("a*?", "a");
+ test_match ("a++", "a");
+ test_match ("a+*", "a");
+ test_match ("a+?", "a");
+ test_match ("a??", "a");
+ test_match ("a?*", "a");
+ test_match ("a?+", "a");
+
+ test_match ("a{2}?", "");
+ test_match ("a{2}?", "aa");
+ test_match ("a{2}+", "aa");
+ test_match ("a{2}{2}", "aaaa");
+
+ test_match ("a{1}?*", "");
+ test_match ("a{1}?*", "aa");
+
+ test_match ("(a?){0,3}b", "aaab");
+ test_fastmap ("(a?){0,3}b", "ab", 0, 0);
+ test_match ("(a+){0,3}b", "b");
+ test_fastmap ("(a+){0,3}b", "ab", 0, 0);
+ test_match ("(a+){0,3}b", "ab");
+ test_fastmap ("(a+){0,3}b", "ab", 0, 0);
+ test_match ("(a+){1,3}b", "aaab");
+ test_match ("(a?){1,3}b", "aaab");
+
+ test_match ("\\\\{1}", "\\"); /* Extended only. */
+
+ test_match ("(a?)?", "a");
+ test_match ("(a?b)?c", "abc");
+ test_match ("(a+)*b", "b");
+ /* Alternatives. */
+ test_match ("a|b", "a");
+ test_match ("a|b", "b");
+ test_fastmap ("a|b", "ab", 0, 0);
+
+ TEST_SEARCH ("a|b", "cb", 0, 2);
+ TEST_SEARCH ("a|b", "cb", 0, 2);
+
+ test_match ("(a|b|c)", "a");
+ test_match ("(a|b|c)", "b");
+ test_match ("(a|b|c)", "c");
+
+ test_match ("(a|b|c)*", "abccba");
+
+ test_match ("(a(b*))|c", "a"); /* xx do registers. */
+ test_match ("(a(b*))|c", "ab");
+ test_match ("(a(b*))|c", "c");
+
+ test_fastmap ("(a+?*|b)", "ab", 0, 0);
+ test_match ("(a+?*|b)", "b");
+ TEST_REGISTERS ("(a+?*|b)", "b", 0, 1, 0, 1, -1, -1);
+
+ test_fastmap ("(a+?*|b)*", "ab", 0, 0);
+ test_match ("(a+?*|b)*", "bb");
+ TEST_REGISTERS ("(a+?*|b)*", "bb", 0, 2, 1, 2, -1, -1);
+
+ test_fastmap ("(a*|b)*", "ab", 0, 0);
+ test_match ("(a*|b)*", "bb");
+ TEST_REGISTERS ("(a*|b)*", "bb", 0, 2, 1, 2, -1, -1);
+
+ test_fastmap ("((a*)|b)*", "ab", 0, 0);
+ test_match ("((a*)|b)*", "bb");
+ TEST_REGISTERS ("((a*)|b)*", "bb", 0, 2, 1, 2, 1, 1);
+
+ test_fastmap ("(a{0,}|b)*", "ab", 0, 0);
+ test_match ("(a{0,}|b)*", "bb");
+ TEST_REGISTERS ("(a{0,}|b)*", "bb", 0, 2, 1, 2, -1, -1);
+
+ test_fastmap ("((a{0,})|b)*", "ab", 0, 0);
+ test_match ("((a{0,})|b)*", "bb");
+ TEST_REGISTERS ("((a{0,})|b)*", "bb", 0, 2, 1, 2, 1, 1);
+
+ /* With c's */
+ test_fastmap ("(a+?*|b)c", "abc", 0, 0);
+ test_match ("(a+?*|b)c", "bc");
+ TEST_REGISTERS ("(a+?*|b)c", "bc", 0, 2, 0, 1, -1, -1);
+
+ test_fastmap ("(a+?*|b)*c", "abc", 0, 0);
+ test_match ("(a+?*|b)*c", "bbc");
+ TEST_REGISTERS ("(a+?*|b)*c", "bbc", 0, 3, 1, 2, -1, -1);
+
+ test_fastmap ("(a*|b)*c", "abc", 0, 0);
+ test_match ("(a*|b)*c", "bbc");
+ TEST_REGISTERS ("(a*|b)*c", "bbc", 0, 3, 1, 2, -1, -1);
+
+ test_fastmap ("((a*)|b)*c", "abc", 0, 0);
+ test_match ("((a*)|b)*c", "bbc");
+ TEST_REGISTERS ("((a*)|b)*c", "bbc", 0, 3, 1, 2, 1, 1);
+
+ test_fastmap ("(a{0,}|b)*c", "abc", 0, 0);
+ test_match ("(a{0,}|b)*c", "bbc");
+ TEST_REGISTERS ("(a{0,}|b)*c", "bbc", 0, 3, 1, 2, -1, -1);
+
+ test_fastmap ("((a{0,})|b)*c", "abc", 0, 0);
+ test_match ("((a{0,})|b)*c", "bbc");
+ TEST_REGISTERS ("((a{0,})|b)*c", "bbc", 0, 3, 1, 2, 1, 1);
+
+
+ test_fastmap ("((a{0,}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a{0,}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a{0,}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a{0,}\\b\\<)|b)*", "ab", 0, 0);
+ test_match ("((a{0,}\\b\\<)|b)*", "b");
+ TEST_REGISTERS ("((a{0,}\\b\\<)|b)*", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,1}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,1}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,1}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,2}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,2}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,2}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+
+ test_fastmap ("((a+?*{0,4095}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,4095}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,4095}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,5119}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,5119}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,5119}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,6143}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,6143}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,6143}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,8191}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,8191}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,8191}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,16383}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,16383}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,16383}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+
+ test_fastmap ("((a+?*{0,}\\b\\<)|b)", "ab", 0, 0);
+ test_match ("((a+?*{0,}\\b\\<)|b)", "b");
+ TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,}\\b\\<)|b)*", "ab", 0, 0);
+ test_match ("((a+?*{0,}\\b\\<)|b)*", "b");
+ TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)*", "b",
+ 0, 1, 0, 1, 0, 0);
+
+ test_fastmap ("((a+?*{0,}\\b\\<)|b)*", "ab", 0, 0);
+ test_match ("((a+?*{0,}\\b\\<)|b)*", "bb");
+ TEST_REGISTERS ("((a+?*{0,}\\b\\<)|b)*", "bb",
+ 0, 2, 1, 2, 0, 0);
+
+
+ /* `*' after group. */
+ test_match ("(a*|b*)*c", "c");
+ TEST_REGISTERS ("(a*|b*)*c", "c", 0, 1, 0, 0, -1, -1);
+
+ test_match ("(a*|b*)*c", "ac");
+ TEST_REGISTERS ("(a*|b*)*c", "ac", 0, 2, 0, 1, -1, -1);
+
+ test_match ("(a*|b*)*c", "aac");
+ TEST_REGISTERS ("(a*|b*)*c", "aac", 0, 3, 0, 2, -1, -1);
+
+ test_match ("(a*|b*)*c", "bbc");
+ TEST_REGISTERS ("(a*|b*)*c", "bbc", 0, 3, 0, 2, -1, -1);
+
+ test_match ("(a*|b*)*c", "abc");
+ TEST_REGISTERS ("(a*|b*)*c", "abc", 0, 3, 1, 2, -1, -1);
+
+ /* No `*' after group. */
+ test_match ("(a*|b*)c", "c");
+ TEST_REGISTERS ("(a*|b*)c", "c", 0, 1, 0, 0, -1, -1);
+
+ test_match ("(a*|b*)c", "ac");
+ TEST_REGISTERS ("(a*|b*)c", "ac", 0, 2, 0, 1, -1, -1);
+
+ test_match ("(a*|b*)c", "bc");
+ TEST_REGISTERS ("(a*|b*)c", "bc", 0, 2, 0, 1, -1, -1);
+
+ test_match ("(a*|b*)c", "aac");
+ TEST_REGISTERS ("(a*|b*)c", "aac", 0, 3, 0, 2, -1, -1);
+
+ /* Same as above, but with no `*'s in alternatives.
+
+ test_match ("(a|b)*c", "c"); /* `*' after group. */
+ TEST_REGISTERS ("(a|b)*c", "c", 0, 1, -1, -1, -1, -1);
+
+ test_match ("(a|b)*c", "ac");
+ TEST_REGISTERS ("(a|b)*c", "ac", 0, 2, 0, 1, -1, -1);
+
+ test_match ("(a|b)*c", "bc");
+ TEST_REGISTERS ("(a|b)*c", "bc", 0, 2, 0, 1, -1, -1);
+
+ test_match ("(a|b)*c", "abc");
+ TEST_REGISTERS ("(a|b)*c", "abc", 0, 3, 1, 2, -1, -1);
+
+
+ test_match ("(a*|b*)c", "bbc");
+ TEST_REGISTERS ("(a*|b*)c", "bbc", 0, 3, 0, 2, -1, -1);
+
+ /* Complicated second alternative. */
+
+ test_match ("(a*|(b*)*)*c", "bc");
+ TEST_REGISTERS ("(a*|(b*)*)*c", "bc", 0, 2, 0, 1, 0, 1);
+
+ test_match ("(a*|(b*|c*)*)*d", "bd");
+ TEST_REGISTERS ("(a*|(b*|c*)*)*d", "bd", 0, 2, 0, 1, 0, 1);
+
+ test_match ("(a*|(b*|c*)*)*d", "bbd");
+ TEST_REGISTERS ("(a*|(b*|c*)*)*d", "bbd", 0, 3, 0, 2, 0, 2);
+
+ test_match ("(a*|(b*|c*)*)*d", "cd");
+ TEST_REGISTERS ("(a*|(b*|c*)*)*d", "cd", 0, 2, 0, 1, 0, 1);
+
+ test_match ("(a*|(b*|c*)*)*d", "ccd");
+ TEST_REGISTERS ("(a*|(b*|c*)*)*d", "ccd", 0, 3, 0, 2, 0, 2);
+
+ test_match ("(a*|b*|c*)*d", "aad");
+ TEST_REGISTERS ("(a*|b*|c*)*d", "aad", 0, 3, 0, 2, 0, 2);
+
+ test_match ("(a*|b*|c*)*d", "bbd");
+ TEST_REGISTERS ("(a*|b*|c*)*d", "bbd", 0, 3, 0, 2, 0, 2);
+
+ test_match ("(a*|b*|c*)*d", "ccd");
+ TEST_REGISTERS ("(a*|b*|c*)*d", "ccd", 0, 3, 0, 2, 0, 2);
+
+ /* Valid anchoring. */
+ valid_pattern ("a^");
+ valid_pattern ("a^b");
+ valid_pattern ("$a");
+ valid_pattern ("a$b");
+ valid_pattern ("foo^bar");
+ valid_pattern ("foo$bar");
+ valid_pattern ("(^)");
+ valid_pattern ("($)");
+ valid_pattern ("(^$)");
+
+ /* These are the same (but valid) as those (invalid) in other_test.c. */
+ valid_pattern
+ ("(((((((((((((((((((((((((((((((((a^)))))))))))))))))))))))))))))))))");
+ valid_pattern
+ ("((((((((((((((((((((((((((((((((($a)))))))))))))))))))))))))))))))))");
+ valid_pattern ("\\(^a\\)");
+ valid_pattern ("a\\|^b");
+ valid_pattern ("\\w^a");
+ valid_pattern ("\\W^a");
+ valid_pattern ("(a^)");
+ valid_pattern ("($a)");
+ valid_pattern ("a(^b)");
+ valid_pattern ("a$(b)");
+ valid_pattern ("(a)^b");
+ valid_pattern ("(a)$b");
+ valid_pattern ("(a)(^b)");
+ valid_pattern ("(a$)(b)");
+ valid_pattern ("(a|b)^c");
+ valid_pattern ("(a|b)$c");
+ valid_pattern ("(a$|b)c");
+ valid_pattern ("(a|b$)c");
+ valid_pattern ("a(b|^c)");
+ valid_pattern ("a(^b|c)");
+ valid_pattern ("a$(b|c)");
+ valid_pattern ("(a)(^b|c)");
+ valid_pattern ("(a)(b|^c)");
+ valid_pattern ("(b$|c)(a)");
+ valid_pattern ("(b|c$)(a)");
+ valid_pattern ("(a(^b|c))");
+ valid_pattern ("(a(b|^c))");
+ valid_pattern ("((b$|c)a)");
+ valid_pattern ("((b|c$)a)");
+ valid_pattern ("((^a|^b)^c)");
+ valid_pattern ("(c$(a$|b$))");
+ valid_pattern ("((^a|^b)^c)");
+ valid_pattern ("((a$|b$)c)");
+ valid_pattern ("(c$(a$|b$))");
+ valid_pattern ("((^a|^b)|^c)^d");
+ valid_pattern ("((a$|b$)|c$)d$");
+ valid_pattern ("d$(c$|(a$|b$))");
+ valid_pattern ("((^a|^b)|^c)(^d)");
+ valid_pattern ("((a$|b$)|c$)(d$)");
+ valid_pattern ("(d$)((a$|b$)|c$)");
+ valid_pattern ("((^a|^b)|^c)((^d))");
+ valid_pattern ("((a$|b$)|c$)((d$))");
+ valid_pattern ("((d$))((a$|b$)|c$)");
+ valid_pattern ("(((^a|^b))c|^d)^e");
+ valid_pattern ("(((a$|b$))c|d$)$e$");
+ valid_pattern ("e$(d$|c((a$|b$)))");
+ valid_pattern ("(^a)((^b))");
+ valid_pattern ("(a$)((b$))");
+ valid_pattern ("((^a))(^b)");
+ valid_pattern ("((a$))(b$)");
+ valid_pattern ("((^a))((^b))");
+ valid_pattern ("((a$))((b$))");
+ valid_pattern ("((^a)^b)");
+ valid_pattern ("((a$)b$)");
+ valid_pattern ("(b$(a$))");
+ valid_pattern ("(((^a)b)^c)");
+ valid_pattern ("(((a$)b)c$)");
+ valid_pattern ("(c$(b(a$)))");
+ valid_pattern ("(((^a)b)c)^d");
+ valid_pattern ("(((a$)b)c)d$");
+ valid_pattern ("d$(c(b(a$)))");
+ valid_pattern (".^a");
+ valid_pattern ("a$.");
+ valid_pattern ("[a]^b");
+ valid_pattern ("b$[a]");
+ valid_pattern ("\\(a$\\)");
+ valid_pattern ("a$\\|b");
+ valid_pattern ("(^a|^b)^c");
+ valid_pattern ("c$(a$|b$)");
+ valid_pattern ("(^a|^b)^|^c");
+ valid_pattern ("(a$|b$)$|$c$");
+ valid_pattern ("(a$|$b$)$|c$");
+ valid_pattern ("($a$|b$)$|c$");
+ valid_pattern ("$(a$|b$)$|c$");
+ valid_pattern ("^c|d(^a|^b)");
+ valid_pattern ("(^a|^b)|d^c");
+ valid_pattern ("c$|(a$|b$)d");
+ valid_pattern ("c$d|(a$|b$)");
+ valid_pattern ("c(^a|^b)|^d");
+ valid_pattern ("(a$|b$)c|d$");
+ valid_pattern ("c(((^a|^b))|^d)e");
+ valid_pattern ("(c((^a|^b))|^d)e");
+ valid_pattern ("((c(^a|^b))|^d)e");
+ valid_pattern ("(((^a|^b))|c^d)e");
+ valid_pattern ("(((^a|^b))|^d)^e");
+ valid_pattern ("(c$((a|b))|d)e$");
+ valid_pattern ("(c((a$|b$))|d)e$");
+ valid_pattern ("(c((a|b)$)|d)e$");
+ valid_pattern ("(c((a|b))|d$)e$");
+ valid_pattern ("^d(^c|e((a|b)))");
+ valid_pattern ("^d(c|^e((a|b)))");
+ valid_pattern ("^d(c|e(^(a|b)))");
+ valid_pattern ("^d(c|e((^a|b)))");
+ valid_pattern ("^d(c|e((a|^b)))");
+ valid_pattern ("^d(c|e((a|b^)))");
+ valid_pattern ("^d(c|e((a|b)^))");
+ valid_pattern ("^d(c|e((a|b))^)");
+ valid_pattern ("^d(c|e((a|b)))^");
+ valid_pattern ("d$(c$|e((a$|b$)))");
+ valid_pattern ("d(c$|e$((a$|b$)))");
+ valid_pattern ("(((^a|^b))^c)|^de");
+ valid_pattern ("(((^a|^b))c)|^d^e");
+ valid_pattern ("(((a$|b))c$)|de$");
+ valid_pattern ("(((a|b$))c$)|de$");
+ valid_pattern ("(((a|b))c$)|d$e$");
+ valid_pattern ("^d^e|^(c((a|b)))");
+ valid_pattern ("^de|^(c^((a|b)))");
+ valid_pattern ("^de|^(c(^(a|b)))");
+ valid_pattern ("^de|^(c((^a|b)))");
+ valid_pattern ("^de|^(c((a|^b)))");
+ valid_pattern ("^de|(^c(^(a|b)))");
+ valid_pattern ("^de|(^c((^a|b)))");
+ valid_pattern ("^de|(^c((a|^b)))");
+ valid_pattern ("de$|(c($(a|b)$))");
+ valid_pattern ("de$|(c$((a|b)$))");
+ valid_pattern ("de$|($c((a|b)$))");
+ valid_pattern ("de$|$(c((a|b)$))");
+ valid_pattern ("de$|(c($(a|b))$)");
+ valid_pattern ("de$|(c$((a|b))$)");
+ valid_pattern ("de$|$(c((a|b))$)");
+ valid_pattern ("de$|(c($(a|b)))$");
+ valid_pattern ("de$|(c$((a|b)))$");
+ valid_pattern ("de$|($c((a|b)))$");
+ valid_pattern ("de$|$(c((a|b)))$");
+ valid_pattern ("^a(^b|c)|^d");
+ valid_pattern ("^a(b|^c)|^d");
+ valid_pattern ("^a(b|c^)|^d");
+ valid_pattern ("^a(b|c)^|^d");
+ valid_pattern ("a$(b$|c$)|d$");
+ valid_pattern ("^d|^a(^b|c)");
+ valid_pattern ("^d|^a(b|^c)");
+ valid_pattern ("d$|a$(b$|c$)");
+ valid_pattern ("^d|^(b|c)^a");
+ valid_pattern ("d$|(b|c$)a$");
+ valid_pattern ("d$|(b$|c)a$");
+ valid_pattern ("^(a)^(b|c)|^d");
+ valid_pattern ("^(a)(^b|c)|^d");
+ valid_pattern ("^(a)(b|^c)|^d");
+ valid_pattern ("(a)$(b|c)$|d$");
+ valid_pattern ("(a$)(b|c)$|d$");
+ valid_pattern ("(^a)(^b|c)|^d");
+ valid_pattern ("(^a)(b|^c)|^d");
+ valid_pattern ("(a)$(b$|c$)|d$");
+ valid_pattern ("(a$)(b$|c$)|d$");
+ valid_pattern ("^d|^(b|c)^(a)");
+ valid_pattern ("^d|^(b|c)(^a)");
+ valid_pattern ("d$|(b|c$)(a)$");
+ valid_pattern ("d$|(b$|c)(a)$");
+ valid_pattern ("^d|(^b|^c)^(a)");
+ valid_pattern ("^d|(^b|^c)(^a)");
+ valid_pattern ("d$|(b|c)$(a$)");
+ valid_pattern ("d$|(b|c$)(a$)");
+ valid_pattern ("d$|(b$|c)(a$)");
+ valid_pattern ("^d|^(a)^(b|c)");
+ valid_pattern ("^d|^(a)(^b|c)");
+ valid_pattern ("^d|^(a)(b|^c)");
+ valid_pattern ("^d|(^a)^(b|c)");
+ valid_pattern ("^d|(^a)(^b|c)");
+ valid_pattern ("^d|(^a)(b|^c)");
+ valid_pattern ("d$|(a)$(b$|c$)");
+ valid_pattern ("d$|(a$)(b$|c$)");
+ valid_pattern ("((e^a|^b)|^c)|^d");
+ valid_pattern ("((^a|e^b)|^c)|^d");
+ valid_pattern ("((^a|^b)|e^c)|^d");
+ valid_pattern ("((^a|^b)|^c)|e^d");
+ valid_pattern ("d$e|(c$|(a$|b$))");
+ valid_pattern ("d$|(c$e|(a$|b$))");
+ valid_pattern ("d$|(c$|(a$e|b$))");
+ valid_pattern ("d$|(c$|(a$|b$e))");
+ valid_pattern ("d$|(c$|(a$|b$)e)");
+ valid_pattern ("d$|(c$|(a$|b$))e");
+ valid_pattern ("(a|b)^|c");
+ valid_pattern ("(a|b)|c^");
+ valid_pattern ("$(a|b)|c");
+ valid_pattern ("(a|b)|$c");
+ valid_pattern ("(a^|^b)|^c");
+ valid_pattern ("(^a|b^)|^c");
+ valid_pattern ("(^a|^b)|c^");
+ valid_pattern ("($a|b$)|c$");
+ valid_pattern ("(a$|$b)|c$");
+ valid_pattern ("(a$|b$)|$c");
+ valid_pattern ("c^|(^a|^b)");
+ valid_pattern ("^c|(a^|^b)");
+ valid_pattern ("^c|(^a|b^)");
+ valid_pattern ("$c|(a$|b$)");
+ valid_pattern ("c$|($a|b$)");
+ valid_pattern ("c$|(a$|$b)");
+ valid_pattern ("c^|^(a|b)");
+ valid_pattern ("^c|(a|b)^");
+ valid_pattern ("$c|(a|b)$");
+ valid_pattern ("c$|$(a|b)");
+ valid_pattern ("(a^|^b)c|^d");
+ valid_pattern ("(^a|b^)c|^d");
+ valid_pattern ("(^a|^b)c|d^");
+ valid_pattern ("(^a|^b)^c|^d");
+ valid_pattern ("(a|b)c$|$d");
+ valid_pattern ("(a|b)$c$|d$");
+ valid_pattern ("(a|b)$c$|d$");
+ valid_pattern ("(a|b$)c$|d$");
+ valid_pattern ("(a$|b)c$|d$");
+ valid_pattern ("($a|b)c$|d$");
+ valid_pattern ("$(a|b)c$|d$");
+ valid_pattern ("^d|^c^(a|b)");
+ valid_pattern ("^d|^c(^a|b)");
+ valid_pattern ("^d|^c(a|^b)");
+ valid_pattern ("^d|^c(a|b^)");
+ valid_pattern ("^d|^c(a|b)^");
+ valid_pattern ("$d|c(a$|b$)");
+ valid_pattern ("d$|c($a$|b$)");
+ valid_pattern ("d$|c$(a$|b$)");
+ valid_pattern ("d$|$c(a$|b$)");
+
+ valid_pattern ("(((a^|^b))c|^d)e");
+ valid_pattern ("(((^a|b^))c|^d)e");
+ valid_pattern ("(((^a|^b))^c|^d)e");
+ valid_pattern ("((^(a|b))c|d^)e");
+ valid_pattern ("(^((a|b))c|^d)^e");
+ valid_pattern ("(^((a|b)^)c|^d)e");
+ valid_pattern ("(^((a^|b))c|^d)e");
+ valid_pattern ("(^((a|b^))c|^d)e");
+ valid_pattern ("(^((a|b)^)c|^d)e");
+ valid_pattern ("(^((a|b))^c|^d)e");
+ valid_pattern ("(^((a|b))c^|^d)e");
+ valid_pattern ("(^((a|b))c|^d^)e");
+ valid_pattern ("(^((a|b))c|^d)^e");
+ valid_pattern ("(((a|b))c|d)$e$");
+ valid_pattern ("(((a|b))c|d$)e$");
+ valid_pattern ("(((a|b))c|$d)e$");
+ valid_pattern ("(((a|b))c$|d)e$");
+ valid_pattern ("(((a|b))$c|d)e$");
+ valid_pattern ("(((a|b)$)c|d)e$");
+ valid_pattern ("(((a|b$))c|d)e$");
+ valid_pattern ("(((a$|b))c|d)e$");
+ valid_pattern ("((($a|b))c|d)e$");
+ valid_pattern ("(($(a|b))c|d)e$");
+ valid_pattern ("($((a|b))c|d)e$");
+ valid_pattern ("$(((a|b))c|d)e$");
+ valid_pattern ("(^((a|b)^)c|^d)e");
+ valid_pattern ("(^((a|b))^c|^d)e");
+ valid_pattern ("(^((a|b))c|^d^)e");
+ valid_pattern ("(^((a|b))c|^d)^e");
+
+ valid_pattern ("^e(^d|c((a|b)))");
+ valid_pattern ("^e(d|^c((a|b)))");
+ valid_pattern ("^e(d|c^((a|b)))");
+ valid_pattern ("^e(d|c(^(a|b)))");
+ valid_pattern ("^e(d|c((^a|b)))");
+ valid_pattern ("^e(d|c((a|^b)))");
+ valid_pattern ("^e(d|c((a|b^)))");
+ valid_pattern ("^e(d|c((a|b)^))");
+ valid_pattern ("^e(d|c((a|b))^)");
+ valid_pattern ("^e(d|c((a|b)))^");
+ valid_pattern ("e$(d$|c((a$|b$)))");
+ valid_pattern ("e(d$|c$((a$|b$)))");
+ valid_pattern ("e(d$|c($(a$|b$)))");
+ valid_pattern ("e(d$|c(($a$|b$)))");
+ valid_pattern ("e$(d$|c((a|b)$))");
+ valid_pattern ("e($d$|c((a|b)$))");
+ valid_pattern ("e(d$|$c((a|b)$))");
+ valid_pattern ("e(d$|c$((a|b)$))");
+ valid_pattern ("e(d$|c($(a|b)$))");
+ valid_pattern ("e(d$|c(($a|b)$))");
+ valid_pattern ("e(d$|c((a|$b)$))");
+ valid_pattern ("e(d$|c((a$|$b$)))");
+
+ valid_pattern ("e$(d$|c((a|b))$)");
+ valid_pattern ("e($d$|c((a|b))$)");
+ valid_pattern ("e(d$|$c((a|b))$)");
+ valid_pattern ("e(d$|c$((a|b))$)");
+ valid_pattern ("e(d$|c($(a|b))$)");
+ valid_pattern ("e(d$|c(($a|b))$)");
+ valid_pattern ("e(d$|c((a|$b))$)");
+ valid_pattern ("e$(d$|c((a|b)))$");
+ valid_pattern ("e($d$|c((a|b)))$");
+ valid_pattern ("e(d$|$c((a|b)))$");
+ valid_pattern ("e(d$|c$((a|b)))$");
+ valid_pattern ("e(d$|c($(a|b)))$");
+ valid_pattern ("e(d$|c(($a|b)))$");
+ valid_pattern ("e(d$|c((a|$b)))$");
+ valid_pattern ("(((^a|^b)^)c)|^de");
+ valid_pattern ("(((^a|^b))^c)|^de");
+ valid_pattern ("(((^a|^b))c)^|^de");
+ valid_pattern ("$(((a|b))c$)|de$");
+ valid_pattern ("($((a|b))c$)|de$");
+ valid_pattern ("(($(a|b))c$)|de$");
+ valid_pattern ("((($a|b))c$)|de$");
+ valid_pattern ("(((a|$b))c$)|de$");
+ valid_pattern ("(((a|b)$)c$)|de$");
+ valid_pattern ("(((a|b))$c$)|de$");
+ valid_pattern ("$(((a|b))c)$|de$");
+ valid_pattern ("($((a|b))c)$|de$");
+ valid_pattern ("(($(a|b))c)$|de$");
+ valid_pattern ("((($a|b))c)$|de$");
+ valid_pattern ("(((a|$b))c)$|de$");
+ valid_pattern ("(((a|b)$)c)$|de$");
+ valid_pattern ("(((a|b))$c)$|de$");
+ valid_pattern ("^ed|^(c((a|b)))^");
+ valid_pattern ("^ed|^(c((a|b))^)");
+ valid_pattern ("^ed|^(c((a|b)^))");
+ valid_pattern ("^ed|^(c((a|b^)))");
+ valid_pattern ("^ed|^(c((a^|b)))");
+ valid_pattern ("^ed|^(c((^a|b)))");
+ valid_pattern ("^ed|^(c(^(a|b)))");
+ valid_pattern ("^ed|^(c^((a|b)))");
+ valid_pattern ("^ed|(^c((a|b)))^");
+ valid_pattern ("^ed|(^c((a|b))^)");
+ valid_pattern ("^ed|(^c((a|b)^))");
+ valid_pattern ("^ed|(^c((a|b^)))");
+ valid_pattern ("^ed|(^c((a|^b)))");
+ valid_pattern ("^ed|(^c((a^|b)))");
+ valid_pattern ("^ed|(^c((^a|b)))");
+ valid_pattern ("^ed|(^c(^(a|b)))");
+ valid_pattern ("^ed|(^c(^(a|b)))");
+ valid_pattern ("^ed|(^c^((a|b)))");
+ valid_pattern ("ed$|$(c((a|b)))$");
+ valid_pattern ("ed$|($c((a|b)))$");
+ valid_pattern ("ed$|(c$((a|b)))$");
+ valid_pattern ("ed$|(c($(a|b)))$");
+ valid_pattern ("ed$|(c(($a|b)))$");
+ valid_pattern ("ed$|(c((a|$b)))$");
+ valid_pattern ("ed$|$(c((a|b))$)");
+ valid_pattern ("ed$|($c((a|b))$)");
+ valid_pattern ("ed$|(c$((a|b))$)");
+ valid_pattern ("ed$|(c($(a|b))$)");
+ valid_pattern ("ed$|(c(($a|b))$)");
+ valid_pattern ("ed$|(c((a|$b))$)");
+ valid_pattern ("ed$|$(c((a|b)$))");
+ valid_pattern ("ed$|($c((a|b)$))");
+ valid_pattern ("ed$|(c$((a|b)$))");
+ valid_pattern ("ed$|(c($(a|b)$))");
+ valid_pattern ("ed$|(c(($a|b)$))");
+ valid_pattern ("ed$|(c((a|$b)$))");
+ valid_pattern ("ed$|$(c((a|b)$))");
+ valid_pattern ("ed$|($c((a|b)$))");
+ valid_pattern ("ed$|(c$((a|b)$))");
+ valid_pattern ("ed$|(c($(a|b)$))");
+ valid_pattern ("ed$|(c(($a|b)$))");
+ valid_pattern ("ed$|(c((a|$b)$))");
+ valid_pattern ("ed$|$(c((a|b)$))");
+ valid_pattern ("ed$|($c((a|b)$))");
+ valid_pattern ("ed$|(c$((a|b)$))");
+ valid_pattern ("ed$|(c($(a|b)$))");
+ valid_pattern ("ed$|(c(($a|b)$))");
+ valid_pattern ("ed$|(c((a|$b)$))");
+ valid_pattern ("ed$|$(c((a|b)$))");
+ valid_pattern ("ed$|($c((a|b)$))");
+ valid_pattern ("ed$|(c$((a|b)$))");
+ valid_pattern ("ed$|(c($(a|b)$))");
+ valid_pattern ("ed$|(c(($a|b)$))");
+ valid_pattern ("ed$|(c((a|$b)$))");
+ valid_pattern ("ed$|$(c((a|b)$))");
+ valid_pattern ("ed$|($c((a|b)$))");
+ valid_pattern ("ed$|(c$((a|b)$))");
+ valid_pattern ("ed$|(c($(a|b)$))");
+ valid_pattern ("ed$|(c(($a|b)$))");
+ valid_pattern ("ed$|(c((a|$b)$))");
+ valid_pattern ("ed$|$(c((a$|b$)))");
+ valid_pattern ("ed$|($c((a$|b$)))");
+ valid_pattern ("ed$|(c$((a$|b$)))");
+ valid_pattern ("ed$|(c($(a$|b$)))");
+ valid_pattern ("ed$|(c(($a$|b$)))");
+ valid_pattern ("ed$|(c((a$|$b$)))");
+ valid_pattern ("^a(b|c)^|^d");
+ valid_pattern ("^a(b|c^)|^d");
+ valid_pattern ("^a(b|^c)|^d");
+ valid_pattern ("^a(b^|c)|^d");
+ valid_pattern ("^a(^b|c)|^d");
+ valid_pattern ("^a^(b|c)|^d");
+ valid_pattern ("$a(b$|c$)|d$");
+ valid_pattern ("a$(b$|c$)|d$");
+ valid_pattern ("a($b$|c$)|d$");
+ valid_pattern ("a(b$|$c$)|d$");
+ valid_pattern ("a(b$|c$)|$d$");
+ valid_pattern ("^(a^)(b|c)|^d");
+ valid_pattern ("^(a)^(b|c)|^d");
+ valid_pattern ("^(a)(^b|c)|^d");
+ valid_pattern ("^(a)(b^|c)|^d");
+ valid_pattern ("^(a)(b|^c)|^d");
+ valid_pattern ("^(a)(b|c^)|^d");
+ valid_pattern ("^(a)(b|c)^|^d");
+ valid_pattern ("(^a^)(b|c)|^d");
+ valid_pattern ("(^a)^(b|c)|^d");
+ valid_pattern ("(^a)(^b|c)|^d");
+ valid_pattern ("(^a)(b^|c)|^d");
+ valid_pattern ("(^a)(b|^c)|^d");
+ valid_pattern ("(^a)(b|c^)|^d");
+ valid_pattern ("(^a)(b|c)^|^d");
+
+ valid_pattern ("(a)(b$|c$)d$");
+ valid_pattern ("(a)(b|$c)$|d$");
+ valid_pattern ("(a)($b|c)$|d$");
+ valid_pattern ("(a)$(b|c)$|d$");
+ valid_pattern ("(a$)(b|c)$|d$");
+ valid_pattern ("($a)(b|c)$|d$");
+ valid_pattern ("$(a)(b|c)$|d$");
+ valid_pattern ("(b|c)($a)$|d$");
+ valid_pattern ("(b|c)$(a)$|d$");
+ valid_pattern ("(b|c$)(a)$|d$");
+ valid_pattern ("(b|$c)(a)$|d$");
+ valid_pattern ("(b$|c)(a)$|d$");
+ valid_pattern ("($b|c)(a)$|d$");
+ valid_pattern ("$(b|c)(a)$|d$");
+ valid_pattern ("(b|c)($a$)|d$");
+ valid_pattern ("(b|c)$(a$)|d$");
+ valid_pattern ("(b|c$)(a$)|d$");
+ valid_pattern ("(b|$c)(a$)|d$");
+ valid_pattern ("(b$|c)(a$)|d$");
+ valid_pattern ("($b|c)(a$)|d$");
+ valid_pattern ("$(b|c)(a$)|d$");
+ valid_pattern ("(a)$(b$|c$)|d$");
+ valid_pattern ("(a$)(b$|c$)|d$");
+ valid_pattern ("($a)(b$|c$)|d$");
+ valid_pattern ("$(a)(b$|c$)|d$");
+ valid_pattern ("^d|^(b^|c)(a)");
+ valid_pattern ("^d|^(b|c^)(a)");
+ valid_pattern ("^d|^(b|c)^(a)");
+ valid_pattern ("^d|^(b|c)(^a)");
+ valid_pattern ("^d|^(b|c)(a^)");
+ valid_pattern ("^d|^(b|c)(a)^");
+ valid_pattern ("^d|(^b|^c^)(a)");
+ valid_pattern ("^d|(^b|^c)^(a)");
+ valid_pattern ("^d|(^b|^c)(^a)");
+ valid_pattern ("^d|(^b|^c)(a^)");
+ valid_pattern ("^d|(^b|^c)(a)^");
+ valid_pattern ("d$|(b|c)($a$)");
+ valid_pattern ("d$|(b|c)$(a$)");
+ valid_pattern ("d$|(b|c$)(a$)");
+ valid_pattern ("d$|(b$|c)(a$)");
+ valid_pattern ("d$|($b|c)(a$)");
+ valid_pattern ("d$|$(b|c)(a$)");
+ valid_pattern ("d$|(b|c)($a)$");
+ valid_pattern ("d$|(b|c)$(a)$");
+ valid_pattern ("d$|(b|c$)(a)$");
+ valid_pattern ("d$|(b$|c)(a)$");
+ valid_pattern ("d$|($b|c)(a)$");
+ valid_pattern ("d$|$(b|c)(a)$");
+ valid_pattern ("^d|^(a^)(b|c)");
+ valid_pattern ("^d|^(a)^(b|c)");
+ valid_pattern ("^d|^(a)(^b|c)");
+ valid_pattern ("^d|^(a)(b^|c)");
+ valid_pattern ("^d|^(a)(b|^c)");
+ valid_pattern ("^d|^(a)(b|c^)");
+ valid_pattern ("^d|^(a)(b|c)^");
+ valid_pattern ("^d|(^a^)(b|c)");
+ valid_pattern ("^d|(^a)^(b|c)");
+ valid_pattern ("^d|(^a)(^b|c)");
+ valid_pattern ("^d|(^a)(b^|c)");
+ valid_pattern ("^d|(^a)(b|^c)");
+ valid_pattern ("^d|(^a)(b|c^)");
+ valid_pattern ("^d|(^a)(b|c)^");
+ valid_pattern ("d$|(a)$(b$|c$)");
+ valid_pattern ("d$|(a$)(b$|c$)");
+ valid_pattern ("d$|($a)(b$|c$)");
+ valid_pattern ("d$|$(a)(b$|c$)");
+ valid_pattern ("d$|(a)(b|$c)$");
+ valid_pattern ("d$|(a)($b|c)$");
+ valid_pattern ("d$|(a)$(b|c)$");
+ valid_pattern ("d$|(a$)(b|c)$");
+ valid_pattern ("d$|($a)(b|c)$");
+ valid_pattern ("d$|$(a)(b|c)$");
+ valid_pattern ("((^a|^b)|^c)|^d^");
+ valid_pattern ("((^a|^b)|^c)^|^d");
+ valid_pattern ("((^a|^b)|^c^)|^d");
+ valid_pattern ("((^a|^b)^|^c)|^d");
+ valid_pattern ("((^a|^b^)|^c)|^d");
+ valid_pattern ("((^a^|^b)|^c)|^d");
+ valid_pattern ("((a|b)|c)|$d$");
+ valid_pattern ("((a|b)|$c)|d$");
+ valid_pattern ("((a|$b)|c)|d$");
+ valid_pattern ("(($a|b)|c)|d$");
+ valid_pattern ("($(a|b)|c)|d$");
+ valid_pattern ("$((a|b)|c)|d$");
+ valid_pattern ("^d^|(c|(a|b))");
+ valid_pattern ("^d|(c^|(a|b))");
+ valid_pattern ("^d|(c|(a^|b))");
+ valid_pattern ("^d|(c|(a|b^))");
+ valid_pattern ("^d|(c|(a|b)^)");
+ valid_pattern ("^d|(c|(a|b))^");
+ valid_pattern ("d$|(c$|(a$|$b$))");
+ valid_pattern ("d$|(c$|($a$|b$))");
+ valid_pattern ("d$|($c$|(a$|b$))");
+ valid_pattern ("d$|$(c$|(a$|b$))");
+ valid_pattern ("$d$|(c$|(a$|b$))");
+ valid_pattern ("d$|(c$|(a|$b)$)");
+ valid_pattern ("d$|(c$|($a|b)$)");
+ valid_pattern ("d$|($c$|(a|b)$)");
+ valid_pattern ("d$|$(c$|(a|b)$)");
+ valid_pattern ("$d$|(c$|(a|b)$)");
+ valid_pattern ("d$|(c$|(a|$b))$");
+ valid_pattern ("d$|(c$|($a|b))$");
+ valid_pattern ("d$|($c$|(a|b))$");
+ valid_pattern ("d$|$(c$|(a|b))$");
+ valid_pattern ("$d$|(c$|(a|b))$");
+ valid_pattern ("^c^|(^a|^b)");
+ valid_pattern ("^c|(^a^|^b)");
+ valid_pattern ("^c|(^a|^b^)");
+ valid_pattern ("^c|(^a|^b)^");
+ valid_pattern ("c$|(a$|$b$)");
+ valid_pattern ("c$|($a$|b$)");
+ valid_pattern ("c$|$(a$|b$)");
+ valid_pattern ("$c$|(a$|b$)");
+ valid_pattern ("^d^(c|e((a|b)))");
+ valid_pattern ("^d(^c|e((a|b)))");
+ valid_pattern ("^d(c^|e((a|b)))");
+ valid_pattern ("^d(c|^e((a|b)))");
+ valid_pattern ("^d(c|e^((a|b)))");
+ valid_pattern ("^d(c|e(^(a|b)))");
+ valid_pattern ("^d(c|e((^a|b)))");
+ valid_pattern ("^d(c|e((a|^b)))");
+ valid_pattern ("^d(c|e((a|b^)))");
+ valid_pattern ("^d(c|e((a|b)^))");
+ valid_pattern ("^d(c|e((a|b))^)");
+ valid_pattern ("^d(c|e((a|b)))^");
+ valid_pattern ("d(c$|e($(a$|b$)))");
+ valid_pattern ("d(c$|e$((a$|b$)))");
+ valid_pattern ("d(c$|$e((a$|b$)))");
+ valid_pattern ("d($c$|e((a$|b$)))");
+ valid_pattern ("d$(c$|e((a$|b$)))");
+ valid_pattern ("$d(c$|e((a$|b$)))");
+ valid_pattern ("^d|^a^(b|c)");
+ valid_pattern ("^d|^a(^b|c)");
+ valid_pattern ("^d|^a(b^|c)");
+ valid_pattern ("^d|^a(b|^c)");
+ valid_pattern ("^d|^a(b|c^)");
+ valid_pattern ("^d|^a(b|c)^");
+ valid_pattern ("d$|a($b$|c$)");
+ valid_pattern ("d$|a$(b$|c$)");
+ valid_pattern ("d$|$a(b$|c$)");
+ valid_pattern ("$d$|a(b$|c$)");
+ valid_pattern ("^d|^(b^|c)a");
+ valid_pattern ("^d|^(b|c^)a");
+ valid_pattern ("^d|^(b|c)^a");
+ valid_pattern ("^d|^(b|c)a^");
+ valid_pattern ("d$|(b|c)$a$");
+ valid_pattern ("d$|(b|c$)a$");
+ valid_pattern ("d$|(b|$c)a$");
+ valid_pattern ("d$|(b$|c)a$");
+ valid_pattern ("d$|($b|c)a$");
+ valid_pattern ("d$|$(b|c)a$");
+ valid_pattern ("$d$|(b|c)a$");
+
+ /* xx Do these use all the valid_nonposix_pattern ones in other_test.c? */
+
+ TEST_SEARCH ("(^a|^b)c", "ac", 0, 2);
+ TEST_SEARCH ("(^a|^b)c", "bc", 0, 2);
+ TEST_SEARCH ("c(a$|b$)", "ca", 0, 2);
+ TEST_SEARCH ("c(a$|b$)", "cb", 0, 2);
+ TEST_SEARCH ("^(a|b)|^c", "ad", 0, 2);
+ TEST_SEARCH ("^(a|b)|^c", "bd", 0, 2);
+ TEST_SEARCH ("(a|b)$|c$", "da", 0, 2);
+ TEST_SEARCH ("(a|b)$|c$", "db", 0, 2);
+ TEST_SEARCH ("(a|b)$|c$", "dc", 0, 2);
+ TEST_SEARCH ("(^a|^b)|^c", "ad", 0, 2);
+ TEST_SEARCH ("(^a|^b)|^c", "bd", 0, 2);
+ TEST_SEARCH ("(^a|^b)|^c", "cd", 0, 2);
+ TEST_SEARCH ("(a$|b$)|c$", "da", 0, 2);
+ TEST_SEARCH ("(a$|b$)|c$", "db", 0, 2);
+ TEST_SEARCH ("(a$|b$)|c$", "dc", 0, 2);
+ TEST_SEARCH ("^c|(^a|^b)", "ad", 0, 2);
+ TEST_SEARCH ("^c|(^a|^b)", "bd", 0, 2);
+ TEST_SEARCH ("^c|(^a|^b)", "cd", 0, 2);
+ TEST_SEARCH ("c$|(a$|b$)", "da", 0, 2);
+ TEST_SEARCH ("c$|(a$|b$)", "db", 0, 2);
+ TEST_SEARCH ("c$|(a$|b$)", "dc", 0, 2);
+ TEST_SEARCH ("^c|^(a|b)", "ad", 0, 2);
+ TEST_SEARCH ("^c|^(a|b)", "bd", 0, 2);
+ TEST_SEARCH ("^c|^(a|b)", "cd", 0, 2);
+ TEST_SEARCH ("c$|(a|b)$", "da", 0, 2);
+ TEST_SEARCH ("c$|(a|b)$", "db", 0, 2);
+ TEST_SEARCH ("c$|(a|b)$", "dc", 0, 2);
+ TEST_SEARCH ("(^a|^b)c|^d", "ace", 0, 3);
+ TEST_SEARCH ("(^a|^b)c|^d", "bce", 0, 3);
+ TEST_SEARCH ("(^a|^b)c|^d", "de", 0, 2);
+ TEST_SEARCH ("(a|b)c$|d$", "eac", 0, 3);
+ TEST_SEARCH ("(a|b)c$|d$", "ebc", 0, 3);
+ TEST_SEARCH ("(a|b)c$|d$", "ed", 0, 3);
+ TEST_SEARCH ("^d|^c(a|b)", "cae", 0, 3);
+ TEST_SEARCH ("^d|^c(a|b)", "cbe", 0, 3);
+ TEST_SEARCH ("^d|^c(a|b)", "de", 0, 3);
+ TEST_SEARCH ("d$|c(a$|b$)", "eca", 0, 3);
+ TEST_SEARCH ("d$|c(a$|b$)", "ecb", 0, 3);
+ TEST_SEARCH ("d$|c(a$|b$)", "ed", 0, 3);
+
+ TEST_SEARCH ("(((^a|^b))c|^d)e", "acef", 0, 4);
+ TEST_SEARCH ("(((^a|^b))c|^d)e", "bcef", 0, 4);
+ TEST_SEARCH ("(((^a|^b))c|^d)e", "def", 0, 3);
+
+ TEST_SEARCH ("((^(a|b))c|^d)e", "acef", 0, 4);
+ TEST_SEARCH ("((^(a|b))c|^d)e", "bcef", 0, 4);
+ TEST_SEARCH ("((^(a|b))c|^d)e", "def", 0, 3);
+
+ TEST_SEARCH ("(^((a|b))c|^d)e", "acef", 0, 4);
+ TEST_SEARCH ("(^((a|b))c|^d)e", "bcef", 0, 4);
+ TEST_SEARCH ("(^((a|b))c|^d)e", "def", 0, 3);
+
+ TEST_SEARCH ("(((a|b))c|d)e$", "face", 0, 4);
+ TEST_SEARCH ("(((a|b))c|d)e$", "fbce", 0, 4);
+ TEST_SEARCH ("(((a|b))c|d)e$", "fde", 0, 3);
+
+ TEST_SEARCH ("^e(d|c((a|b)))", "edf", 0, 3);
+ TEST_SEARCH ("^e(d|c((a|b)))", "ecaf", 0, 4);
+ TEST_SEARCH ("^e(d|c((a|b)))", "ecbf", 0, 4);
+
+ TEST_SEARCH ("e(d$|c((a$|b$)))", "fed", 0, 3);
+ TEST_SEARCH ("e(d$|c((a$|b$)))", "feca", 0, 4);
+ TEST_SEARCH ("e(d$|c((a$|b$)))", "fecb", 0, 4);
+
+ TEST_SEARCH ("e(d$|c((a|b)$))", "fed", 0, 3);
+ TEST_SEARCH ("e(d$|c((a|b)$))", "feca", 0, 4);
+ TEST_SEARCH ("e(d$|c((a|b)$))", "fecb", 0, 4);
+
+ TEST_SEARCH ("e(d$|c((a|b))$)", "fed", 0, 3);
+ TEST_SEARCH ("e(d$|c((a|b))$)", "feca", 0, 3);
+ TEST_SEARCH ("e(d$|c((a|b))$)", "fecb", 0, 3);
+
+ TEST_SEARCH ("e(d$|c((a|b)))$", "fed", 0, 3);
+ TEST_SEARCH ("e(d$|c((a|b)))$", "feca", 0, 3);
+ TEST_SEARCH ("e(d$|c((a|b)))$", "fecb", 0, 3);
+
+ TEST_SEARCH ("(((^a|^b))c)|^de", "acf", 0, 3);
+ TEST_SEARCH ("(((^a|^b))c)|^de", "bcf", 0, 3);
+ TEST_SEARCH ("(((^a|^b))c)|^de", "def", 0, 3);
+
+ TEST_SEARCH ("(((a|b))c$)|de$", "fac", 0, 3);
+ TEST_SEARCH ("(((a|b))c$)|de$", "fbc", 0, 3);
+ TEST_SEARCH ("(((a|b))c$)|de$", "fde", 0, 3);
+
+ TEST_SEARCH ("(((a|b))c)$|de$", "fac", 0, 3);
+ TEST_SEARCH ("(((a|b))c)$|de$", "fbc", 0, 3);
+ TEST_SEARCH ("(((a|b))c)$|de$", "fde", 0, 3);
+
+ TEST_SEARCH ("^ed|^(c((a|b)))", "edf", 0, 3);
+ TEST_SEARCH ("^ed|^(c((a|b)))", "caf", 0, 3);
+ TEST_SEARCH ("^ed|^(c((a|b)))", "cbf", 0, 3);
+
+ TEST_SEARCH ("^ed|(^c((a|b)))", "edf", 0, 3);
+ TEST_SEARCH ("^ed|(^c((a|b)))", "caf", 0, 3);
+ TEST_SEARCH ("^ed|(^c((a|b)))", "cbf", 0, 3);
+
+ TEST_SEARCH ("ed$|(c((a|b)))$", "fed", 0, 3);
+ TEST_SEARCH ("ed$|(c((a|b)))$", "fca", 0, 3);
+ TEST_SEARCH ("ed$|(c((a|b)))$", "fcb", 0, 3);
+
+ TEST_SEARCH ("ed$|(c((a|b))$)", "fed", 0, 3);
+ TEST_SEARCH ("ed$|(c((a|b))$)", "fca", 0, 3);
+ TEST_SEARCH ("ed$|(c((a|b))$)", "fcb", 0, 3);
+
+ TEST_SEARCH ("ed$|(c((a|b)$))", "fed", 0, 3);
+ TEST_SEARCH ("ed$|(c((a|b)$))", "fca", 0, 3);
+ TEST_SEARCH ("ed$|(c((a|b)$))", "fcb", 0, 3);
+
+ TEST_SEARCH ("ed$|(c((a$|b$)))", "fed", 0, 3);
+ TEST_SEARCH ("ed$|(c((a$|b$)))", "fca", 0, 3);
+ TEST_SEARCH ("ed$|(c((a$|b$)))", "fcb", 0, 3);
+
+ TEST_SEARCH ("^a(b|c)|^d", "abe", 0, 3);
+ TEST_SEARCH ("^a(b|c)|^d", "ace", 0, 3);
+ TEST_SEARCH ("^a(b|c)|^d", "df", 0, 2);
+
+ TEST_SEARCH ("a(b$|c$)|d$", "fab", 0, 3);
+ TEST_SEARCH ("a(b$|c$)|d$", "fac", 0, 3);
+ TEST_SEARCH ("a(b$|c$)|d$", "fd", 0, 2);
+
+ TEST_SEARCH ("^(a)(b|c)|^d", "abe", 0, 3);
+ TEST_SEARCH ("^(a)(b|c)|^d", "ace", 0, 3);
+ TEST_SEARCH ("^(a)(b|c)|^d", "df", 0, 2);
+
+ TEST_SEARCH ("(^a)(b|c)|^d", "abe", 0, 3);
+ TEST_SEARCH ("(^a)(b|c)|^d", "ace", 0, 3);
+ TEST_SEARCH ("(^a)(b|c)|^d", "df", 0, 2);
+
+ TEST_SEARCH ("(a)(b|c)$|d$", "fab", 0, 3);
+ TEST_SEARCH ("(a)(b|c)$|d$", "fac", 0, 3);
+ TEST_SEARCH ("(a)(b|c)$|d$", "fd", 0, 2);
+
+ TEST_SEARCH ("(b|c)(a)$|d$", "fba", 0, 3);
+ TEST_SEARCH ("(b|c)(a)$|d$", "fca", 0, 3);
+ TEST_SEARCH ("(b|c)(a)$|d$", "fd", 0, 2);
+
+ TEST_SEARCH ("(b|c)(a$)|d$", "fba", 0, 3);
+ TEST_SEARCH ("(b|c)(a$)|d$", "fca", 0, 3);
+ TEST_SEARCH ("(b|c)(a$)|d$", "fd", 0, 2);
+
+ TEST_SEARCH ("(a)(b$|c$)|d$", "fab", 0, 3);
+ TEST_SEARCH ("(a)(b$|c$)|d$", "fac", 0, 3);
+ TEST_SEARCH ("(a)(b$|c$)|d$", "fd", 0, 2);
+
+ TEST_SEARCH ("^d|^(b|c)(a)", "df", 0, 2);
+ TEST_SEARCH ("^d|^(b|c)(a)", "baf", 0, 3);
+ TEST_SEARCH ("^d|^(b|c)(a)", "caf", 0, 3);
+
+ TEST_SEARCH ("^d|(^b|^c)(a)", "df", 0, 2);
+ TEST_SEARCH ("^d|(^b|^c)(a)", "baf", 0, 3);
+ TEST_SEARCH ("^d|(^b|^c)(a)", "caf", 0, 3);
+
+ TEST_SEARCH ("d$|(b|c)(a$)", "fd", 0, 2);
+ TEST_SEARCH ("d$|(b|c)(a$)", "fba", 0, 3);
+ TEST_SEARCH ("d$|(b|c)(a$)", "fca", 0, 3);
+
+ TEST_SEARCH ("d$|(b|c)(a)$", "fd", 0, 2);
+ TEST_SEARCH ("d$|(b|c)(a)$", "fba", 0, 3);
+ TEST_SEARCH ("d$|(b|c)(a)$", "fca", 0, 3);
+
+ TEST_SEARCH ("d$|(b|c)(a$)", "fd", 0, 2);
+ TEST_SEARCH ("d$|(b|c)(a$)", "fba", 0, 3);
+ TEST_SEARCH ("d$|(b|c)(a$)", "fca", 0, 3);
+
+ TEST_SEARCH ("^d|^(a)(b|c)", "df", 0, 2);
+ TEST_SEARCH ("^d|^(a)(b|c)", "abf", 0, 3);
+ TEST_SEARCH ("^d|^(a)(b|c)", "acf", 0, 3);
+
+ TEST_SEARCH ("^d|(^a)(b|c)", "df", 0, 2);
+ TEST_SEARCH ("^d|(^a)(b|c)", "abf", 0, 3);
+ TEST_SEARCH ("^d|(^a)(b|c)", "acf", 0, 3);
+
+ TEST_SEARCH ("d$|(a)(b$|c$)", "fd", 0, 2);
+ TEST_SEARCH ("d$|(a)(b$|c$)", "fab", 0, 3);
+ TEST_SEARCH ("d$|(a)(b$|c$)", "fac", 0, 3);
+
+ TEST_SEARCH ("d$|(a)(b|c)$", "fd", 0, 2);
+ TEST_SEARCH ("d$|(a)(b|c)$", "fab", 0, 3);
+ TEST_SEARCH ("d$|(a)(b|c)$", "fac", 0, 3);
+
+ TEST_SEARCH ("((^a|^b)|^c)|^d", "ae", 0, 2);
+ TEST_SEARCH ("((^a|^b)|^c)|^d", "be", 0, 2);
+ TEST_SEARCH ("((^a|^b)|^c)|^d", "ce", 0, 2);
+ TEST_SEARCH ("((^a|^b)|^c)|^d", "de", 0, 2);
+
+ TEST_SEARCH ("((a|b)|c)|d$", "ed", 0, 2);
+ TEST_SEARCH ("((a|b)|c)|d$", "ea", 0, 2);
+ TEST_SEARCH ("((a|b)|c)|d$", "eb", 0, 2);
+ TEST_SEARCH ("((a|b)|c)|d$", "ec", 0, 2);
+
+ TEST_SEARCH ("^d|(c|(a|b))", "de", 0, 2);
+
+ TEST_SEARCH ("d$|(c$|(a$|b$))", "ed", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a$|b$))", "ec", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a$|b$))", "ea", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a$|b$))", "eb", 0, 2);
+
+ TEST_SEARCH ("d$|(c$|(a|b)$)", "ed", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a|b)$)", "ec", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a|b)$)", "ea", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a|b)$)", "eb", 0, 2);
+
+ TEST_SEARCH ("d$|(c$|(a|b))$", "ed", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a|b))$", "ec", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a|b))$", "ea", 0, 2);
+ TEST_SEARCH ("d$|(c$|(a|b))$", "eb", 0, 2);
+
+ test_match ("a|^b", "b");
+ test_match ("a|b$", "b");
+ test_match ("^b|a", "b");
+ test_match ("b$|a", "b");
+ test_match ("(^a)", "a");
+ test_match ("(a$)", "a");
+ TEST_SEARCH ("c|^ab", "aba", 0, 3);
+ TEST_SEARCH ("c|ba$", "aba", 0, 3);
+ TEST_SEARCH ("^ab|c", "aba", 0, 3);
+ TEST_SEARCH ("ba$|c", "aba", 0, 3);
+ TEST_SEARCH ("(^a)", "ab", 0, 2);
+ TEST_SEARCH ("(a$)", "ba", 0, 2);
+
+ TEST_SEARCH ("(^a$)", "a", 0, 1);
+ TEST_SEARCH ("(^a)", "ab", 0, 2);
+ TEST_SEARCH ("(b$)", "ab", 0, 2);
+
+ /* Backtracking. */
+ /* Per POSIX D11.1 p. 108, leftmost longest match. */
+ test_match ("(wee|week)(knights|night)", "weeknights");
+
+ test_match ("(fooq|foo)qbar", "fooqbar");
+ test_match ("(fooq|foo)(qbarx|bar)", "fooqbarx");
+
+ /* Take first alternative that does the longest match. */
+ test_all_registers ("(fooq|(foo)|(fo))((qbarx)|(oqbarx)|bar)", "fooqbarx",
+ "", 0, 8, 0, 3, 0, 3, -1, -1, 3, 8, 3, 8, -1, -1, -1, -1, -1, -1,
+ -1, -1);
+
+ test_match ("(fooq|foo)*qbar", "fooqbar");
+ test_match ("(fooq|foo)*(qbar)", "fooqbar");
+ test_match ("(fooq|foo)*(qbar)*", "fooqbar");
+
+ test_match ("(fooq|fo|o)*qbar", "fooqbar");
+ test_match ("(fooq|fo|o)*(qbar)", "fooqbar");
+ test_match ("(fooq|fo|o)*(qbar)*", "fooqbar");
+
+ test_match ("(fooq|fo|o)*(qbar|q)*", "fooqbar");
+ test_match ("(fooq|foo)*(qbarx|bar)", "fooqbarx");
+ test_match ("(fooq|foo)*(qbarx|bar)*", "fooqbarx");
+
+ test_match ("(fooq|fo|o)+(qbar|q)+", "fooqbar");
+ test_match ("(fooq|foo)+(qbarx|bar)", "fooqbarx");
+ test_match ("(fooq|foo)+(qbarx|bar)+", "fooqbarx");
+
+ /* Per Mike Haertel. */
+ test_match ("(foo|foobarfoo)(bar)*", "foobarfoo");
+
+ /* Combination. */
+ test_match ("[ab]?c", "ac");
+ test_match ("[ab]*c", "ac");
+ test_match ("[ab]+c", "ac");
+ test_match ("(a|b)?c", "ac");
+ test_match ("(a|b)*c", "ac");
+ test_match ("(a|b)+c", "ac");
+ test_match ("(a*c)?b", "b");
+ test_match ("(a*c)+b", "aacb");
+ /* Registers. */
+ /* Per David A. Willcox. */
+ test_match ("a((b)|(c))d", "acd");
+ test_all_registers ("a((b)|(c))d", "acd", "", 0, 3, 1, 2, -1, -1, 1, 2,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+
+ /* Extended regular expressions, continued; these don't match their strings. */
+ test_should_match = false;
+
+#if 0
+ /* Invalid use of special characters. */
+ /* These are not invalid anymore, since POSIX says the behavior is
+ undefined, and we prefer context-independent to context-invalid. */
+ invalid_pattern (REG_BADRPT, "*");
+ invalid_pattern (REG_BADRPT, "a|*");
+ invalid_pattern (REG_BADRPT, "(*)");
+ invalid_pattern (REG_BADRPT, "^*");
+ invalid_pattern (REG_BADRPT, "+");
+ invalid_pattern (REG_BADRPT, "a|+");
+ invalid_pattern (REG_BADRPT, "(+)");
+ invalid_pattern (REG_BADRPT, "^+");
+
+ invalid_pattern (REG_BADRPT, "?");
+ invalid_pattern (REG_BADRPT, "a|?");
+ invalid_pattern (REG_BADRPT, "(?)");
+ invalid_pattern (REG_BADRPT, "^?");
+
+ invalid_pattern (REG_BADPAT, "|");
+ invalid_pattern (REG_BADPAT, "a|");
+ invalid_pattern (REG_BADPAT, "a||");
+ invalid_pattern (REG_BADPAT, "(|a)");
+ invalid_pattern (REG_BADPAT, "(a|)");
+
+ invalid_pattern (REG_BADPAT, PARENS_TO_OPS ("(|)"));
+
+ invalid_pattern (REG_BADRPT, "{1}");
+ invalid_pattern (REG_BADRPT, "a|{1}");
+ invalid_pattern (REG_BADRPT, "^{1}");
+ invalid_pattern (REG_BADRPT, "({1})");
+
+ invalid_pattern (REG_BADPAT, "|b");
+
+ invalid_pattern (REG_BADRPT, "^{0,}*");
+ invalid_pattern (REG_BADRPT, "$*");
+ invalid_pattern (REG_BADRPT, "${0,}*");
+#endif /* 0 */
+
+ invalid_pattern (REG_EESCAPE, "\\");
+
+ test_match ("a?b", "a");
+
+
+ test_match ("a+", "");
+ test_match ("a+b", "a");
+ test_match ("a?", "b");
+
+#if 0
+ /* We make empty groups valid now, since they are undefined in POSIX.
+ (13 Sep 92) */
+ /* Subexpressions. */
+ invalid_pattern (REG_BADPAT, "()");
+ invalid_pattern (REG_BADPAT, "a()");
+ invalid_pattern (REG_BADPAT, "()b");
+ invalid_pattern (REG_BADPAT, "a()b");
+ invalid_pattern (REG_BADPAT, "()*");
+ invalid_pattern (REG_BADPAT, "(()*");
+#endif
+ /* Invalid intervals. */
+ test_match ("a{2}*", "aaa");
+ test_match ("a{2}?", "aaa");
+ test_match ("a{2}+", "aaa");
+ test_match ("a{2}{2}", "aaa");
+ test_match ("a{1}{1}{2}", "aaa");
+ test_match ("a{1}{1}{2}", "a");
+ /* Invalid alternation. */
+ test_match ("a|b", "c");
+
+ TEST_SEARCH ("c|^ba", "aba", 0, 3);
+ TEST_SEARCH ("c|ab$", "aba", 0, 3);
+ TEST_SEARCH ("^ba|c", "aba", 0, 3);
+ TEST_SEARCH ("ab$|c", "aba", 0, 3);
+ /* Invalid anchoring. */
+ TEST_SEARCH ("(^a)", "ba", 0, 2);
+ TEST_SEARCH ("(b$)", "ba", 0, 2);
+
+ printf ("\nFinished POSIX extended tests.\n");
+}
+
+
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/test/psx-generic.c b/gnu/lib/libregex/test/psx-generic.c
new file mode 100644
index 0000000..340e938
--- /dev/null
+++ b/gnu/lib/libregex/test/psx-generic.c
@@ -0,0 +1,336 @@
+/* psx-generic.c: test POSIX re's independent of us using basic or
+ extended syntax. */
+
+#include "test.h"
+
+
+void
+test_posix_generic ()
+{
+ int omit_generic_tests = 0; /* reset in debugger to skip */
+
+ if (omit_generic_tests)
+ return;
+ /* Tests somewhat in the order of P1003.2. */
+
+ /* Both posix basic and extended; should match. */
+
+ printf ("\nStarting generic POSIX tests.\n");
+ test_grouping ();
+ test_intervals ();
+
+ test_should_match = true;
+ /* Ordinary characters. */
+ printf ("\nContinuing generic POSIX tests.\n");
+
+ MATCH_SELF ("");
+ test_fastmap ("", "", 0, 0);
+ test_fastmap_search ("", "", "", 0, 0, 2, 0, 0);
+ TEST_REGISTERS ("", "", 0, 0, -1, -1, -1, -1);
+ TEST_SEARCH ("", "", 0, 0);
+ TEST_SEARCH_2 ("", "", "", 0, 1, 0);
+
+ MATCH_SELF ("abc");
+ test_fastmap ("abc", "a", 0, 0);
+ TEST_REGISTERS ("abc", "abc", 0, 3, -1, -1, -1, -1);
+ TEST_REGISTERS ("abc", "xabcx", 1, 4, -1, -1, -1, -1);
+
+ test_match ("\\a","a");
+ test_match ("\\0", "0");
+
+ TEST_SEARCH ("a", "ab", 0, 2);
+ TEST_SEARCH ("b", "ab", 0, 2);
+ TEST_SEARCH ("a", "ab", 1, -2);
+ TEST_SEARCH_2 ("a", "a", "b", 0, 2, 2);
+ TEST_SEARCH_2 ("b", "a", "b", 0, 2, 2);
+ TEST_SEARCH_2 ("a", "a", "b", 1, -2, 2);
+
+ test_match ("\n", "\n");
+ test_match ("a\n", "a\n");
+ test_match ("\nb", "\nb");
+ test_match ("a\nb", "a\nb");
+
+ TEST_SEARCH ("b", "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 236, -237);
+ /* Valid use of special characters. */
+ test_match ("a*", "aa");
+ test_fastmap ("a*", "a", 0, 0);
+ TEST_REGISTERS ("a*", "aa", 0, 2, -1, -1, -1, -1);
+
+ test_match ("a*b", "aab");
+ test_fastmap ("a*b", "ab", 0, 0);
+
+ test_match ("a*ab", "aab");
+ TEST_REGISTERS ("a*a", "aa", 0, 2, -1, -1, -1, -1);
+ TEST_REGISTERS ("a*a", "xaax", 1, 3, -1, -1, -1, -1);
+
+ test_match ("\\{", "{");
+ test_match ("\\^", "^");
+ test_match ("\\.", ".");
+ test_match ("\\*", "*");
+ test_match ("\\[", "[");
+ test_match ("\\$", "$");
+ test_match ("\\\\", "\\");
+
+ test_match ("ab*", "a");
+ test_match ("ab*", "abb");
+
+ /* Valid consecutive repetitions. */
+ test_match ("a**", "a");
+ /* Valid period. */
+ test_match (".", "a");
+ TEST_REGISTERS (".", "a", 0, 1, -1, -1, -1, -1);
+ test_match (".", "\004");
+ test_match (".", "\n");
+ /* Valid bracket expressions. */
+ test_match ("[ab]", "a");
+ test_match ("[ab]", "b");
+ test_fastmap ("[ab]", "ab", 0, 0);
+ TEST_REGISTERS ("[ab]", "a", 0, 1, -1, -1, -1, -1);
+ TEST_REGISTERS ("[ab]", "xax", 1, 2, -1, -1, -1, -1);
+
+ test_fastmap ("[^ab]", "ab", 1, 1);
+ test_match ("[^ab]", "c");
+ test_match ("[^a]", "\n");
+
+ test_match ("[a]*a", "aa");
+
+ test_match ("[[]", "[");
+ test_match ("[]]", "]");
+ test_match ("[.]", ".");
+ test_match ("[*]", "*");
+ test_match ("[\\]", "\\");
+ test_match ("[\\(]", "(");
+ test_match ("[\\)]", ")");
+ test_match ("[^]]", "a");
+ test_match ("[a^]", "^");
+ test_match ("[a$]", "$");
+ test_match ("[]a]", "]");
+ test_match ("[a][]]", "a]");
+ test_match ("[\n]", "\n");
+ test_match ("[^a]", "\n");
+ test_match ("[a-]", "a");
+
+ TEST_REGISTERS ("\\`[ \t\n]*", " karl (Karl Berry)", 0, 1, -1, -1, -1, -1);
+ TEST_REGISTERS ("[ \t\n]*\\'", " karl (Karl Berry)", 18, 18, -1, -1, -1, -1);
+
+ /* Collating, noncollating,
+ equivalence classes aren't
+ implemented yet. */
+
+
+ /* Character classes. */
+ test_match ("[:alpha:]", "p");
+ test_match ("[[:alpha:]]", "a");
+ test_match ("[[:alpha:]]", "z");
+ test_match ("[[:alpha:]]", "A");
+ test_match ("[[:alpha:]]", "Z");
+ test_match ("[[:upper:]]", "A");
+ test_match ("[[:upper:]]", "Z");
+ test_match ("[[:lower:]]", "a");
+ test_match ("[[:lower:]]", "z");
+
+ test_match ("[[:digit:]]", "0");
+ test_match ("[[:digit:]]", "9");
+ test_fastmap ("[[:digit:]]", "0123456789", 0, 0);
+
+ test_match ("[[:alnum:]]", "0");
+ test_match ("[[:alnum:]]", "9");
+ test_match ("[[:alnum:]]", "a");
+ test_match ("[[:alnum:]]", "z");
+ test_match ("[[:alnum:]]", "A");
+ test_match ("[[:alnum:]]", "Z");
+ test_match ("[[:xdigit:]]", "0");
+ test_match ("[[:xdigit:]]", "9");
+ test_match ("[[:xdigit:]]", "A");
+ test_match ("[[:xdigit:]]", "F");
+ test_match ("[[:xdigit:]]", "a");
+ test_match ("[[:xdigit:]]", "f");
+ test_match ("[[:space:]]", " ");
+ test_match ("[[:print:]]", " ");
+ test_match ("[[:print:]]", "~");
+ test_match ("[[:punct:]]", ",");
+ test_match ("[[:graph:]]", "!");
+ test_match ("[[:graph:]]", "~");
+ test_match ("[[:cntrl:]]", "\177");
+ test_match ("[[:digit:]a]", "a");
+ test_match ("[[:digit:]a]", "2");
+ test_match ("[a[:digit:]]", "a");
+ test_match ("[a[:digit:]]", "2");
+ test_match ("[[:]", "[");
+ test_match ("[:]", ":");
+ test_match ("[[:a]", "[");
+ test_match ("[[:alpha:a]", "[");
+ /* Valid ranges. */
+ test_match ("[a-a]", "a");
+ test_fastmap ("[a-a]", "a", 0, 0);
+ TEST_REGISTERS ("[a-a]", "xax", 1, 2, -1, -1, -1, -1);
+
+ test_match ("[a-z]", "z");
+ test_fastmap ("[a-z]", "abcdefghijklmnopqrstuvwxyz", 0, 0);
+ test_match ("[-a]", "-"); /* First */
+ test_match ("[-a]", "a");
+ test_match ("[a-]", "-"); /* Last */
+ test_match ("[a-]", "a");
+ test_match ("[--@]", "@"); /* First and starting point. */
+
+ test_match ("[%--a]", "%"); /* Ending point. */
+ test_match ("[%--a]", "-"); /* Ditto. */
+
+ test_match ("[a%--]", "%"); /* Both ending point and last. */
+ test_match ("[a%--]", "-");
+ test_match ("[%--a]", "a"); /* Ending point only. */
+ test_match ("[a-c-f]", "e"); /* Piggyback. */
+
+ test_match ("[)-+--/]", "*");
+ test_match ("[)-+--/]", ",");
+ test_match ("[)-+--/]", "/");
+ test_match ("[[:digit:]-]", "-");
+ /* Concatenation ????*/
+ test_match ("[ab][cd]", "ac");
+ test_fastmap ("[ab][cd]", "ab", 0, 0);
+ TEST_REGISTERS ("[ab][cd]", "ad", 0, 2, -1, -1, -1, -1);
+ TEST_REGISTERS ("[ab][cd]", "xadx", 1, 3, -1, -1, -1, -1);
+
+ /* Valid expression anchoring. */
+ test_match ("^a", "a");
+ test_fastmap ("^a", "a", 0, 0);
+ TEST_REGISTERS ("^a", "ax", 0, 1, -1, -1, -1, -1);
+
+ test_match ("^", "");
+ TEST_REGISTERS ("^", "", 0, 0, -1, -1, -1, -1);
+ test_match ("$", "");
+ TEST_REGISTERS ("$", "", 0, 0, -1, -1, -1, -1);
+
+ test_match ("a$", "a");
+ test_fastmap ("a$", "a", 0, 0);
+ TEST_REGISTERS ("a$", "xa", 1, 2, -1, -1, -1, -1);
+
+ test_match ("^ab$", "ab");
+ test_fastmap ("^ab$", "a", 0, 0);
+ TEST_REGISTERS ("^a$", "a", 0, 1, -1, -1, -1, -1);
+
+ test_fastmap ("^$", "", 0, 0);
+ test_match ("^$", "");
+ TEST_REGISTERS ("^$", "", 0, 0, -1, -1, -1, -1);
+
+ TEST_SEARCH (PARENS_TO_OPS ("(^a)"), "ab", 0, 2);
+ TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ba", 0, 2);
+ TEST_SEARCH (PARENS_TO_OPS ("^(^a)"), "ab", 0, 2);
+ TEST_SEARCH (PARENS_TO_OPS ("(a$)$"), "ba", 0, 2);
+
+ /* Two strings. */
+ test_match_2 ("ab", "a", "b");
+ TEST_REGISTERS_2 ("ab", "a", "b", 0, 2, -1, -1, -1, -1);
+
+ test_match_2 ("a", "", "a");
+ test_match_2 ("a", "a", "");
+ test_match_2 ("ab", "a", "b");
+ /* (start)pos. */
+ TEST_POSITIONED_MATCH ("b", "ab", 1);
+ /* mstop. */
+ TEST_TRUNCATED_MATCH ("a", "ab", 1);
+
+
+ /* Both basic and extended, continued; should not match. */
+
+ test_should_match = false;
+ /* Ordinary characters. */
+ test_match ("abc", "ab");
+
+ TEST_SEARCH ("c", "ab", 0, 2);
+ TEST_SEARCH ("c", "ab", 0, 2);
+ TEST_SEARCH ("c", "ab", 1, -2);
+ TEST_SEARCH ("c", "ab", 0, 10);
+ TEST_SEARCH ("c", "ab", 1, -10);
+ TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2);
+ TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2);
+ TEST_SEARCH_2 ("c", "a", "b", 0, 2, 2);
+ TEST_SEARCH_2 ("c", "a", "b", 1, -2, 2);
+ TEST_SEARCH_2 ("c", "a", "b", 1, -2, 2);
+
+ TEST_SEARCH ("c", "baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 236, -237);
+
+ /* Invalid use of special characters. */
+ invalid_pattern (REG_EESCAPE, "\\");
+ invalid_pattern (REG_EESCAPE, "a\\");
+ invalid_pattern (REG_EESCAPE, "a*\\");
+ /* Invalid period. */
+ test_match (".", "");
+ /* Invalid bracket expressions. */
+ test_match ("[ab]", "c");
+ test_match ("[^b]", "b");
+ test_match ("[^]]", "]");
+
+ invalid_pattern (REG_EBRACK, "[");
+ invalid_pattern (REG_EBRACK, "[^");
+ invalid_pattern (REG_EBRACK, "[a");
+ invalid_pattern (REG_EBRACK, "[]");
+ invalid_pattern (REG_EBRACK, "[]a");
+ invalid_pattern (REG_EBRACK, "a[]a");
+
+
+ test_match ("[:alpha:]", "q"); /* Character classes. */
+ test_match ("[[:alpha:]]", "2");
+ test_match ("[[:upper:]]", "a");
+ test_match ("[[:lower:]]", "A");
+ test_match ("[[:digit:]]", "a");
+ test_match ("[[:alnum:]]", ":");
+ test_match ("[[:xdigit:]]", "g");
+ test_match ("[[:space:]]", "a");
+ test_match ("[[:print:]]", "\177");
+ test_match ("[[:punct:]]", "a");
+ test_match ("[[:graph:]]", " ");
+ test_match ("[[:cntrl:]]", "a");
+ invalid_pattern (REG_EBRACK, "[[:");
+ invalid_pattern (REG_EBRACK, "[[:alpha:");
+ invalid_pattern (REG_EBRACK, "[[:alpha:]");
+ invalid_pattern (REG_ECTYPE, "[[::]]");
+ invalid_pattern (REG_ECTYPE, "[[:a:]]");
+ invalid_pattern (REG_ECTYPE, "[[:alpo:]]");
+ invalid_pattern (REG_ECTYPE, "[[:a:]");
+
+ test_match ("[a-z]", "2"); /* Invalid ranges. */
+ test_match ("[^-a]", "-");
+ test_match ("[^a-]", "-");
+ test_match ("[)-+--/]", ".");
+ invalid_pattern (REG_ERANGE, "[z-a]"); /* Empty */
+ invalid_pattern (REG_ERANGE, "[a--]"); /* Empty */
+ invalid_pattern (REG_ERANGE, "[[:digit:]-9]");
+ invalid_pattern (REG_ERANGE, "[a-[:alpha:]]");
+ invalid_pattern (REG_ERANGE, "[a-");
+ invalid_pattern (REG_EBRACK, "[a-z");
+
+ test_match ("[ab][cd]", "ae"); /* Concatenation. */
+ test_match ("b*c", "b"); /* Star. */
+
+ /* Invalid anchoring. */
+ test_match ("^", "a");
+ test_match ("^a", "ba");
+ test_match ("$", "b");
+ test_match ("a$", "ab");
+ test_match ("^$", "a");
+ test_match ("^ab$", "a");
+
+ TEST_SEARCH ("^a", "b\na", 0, 3);
+ TEST_SEARCH ("b$", "b\na", 0, 3);
+
+ test_match_2 ("^a", "\n", "a");
+ test_match_2 ("a$", "a", "\n");
+
+ TEST_SEARCH (PARENS_TO_OPS ("(^a)"), "ba", 0, 2);
+ TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ab", 0, 2);
+ TEST_SEARCH (PARENS_TO_OPS ("^(^a)"), "ba", 0, 2);
+ TEST_SEARCH (PARENS_TO_OPS ("(a$)$"), "ab", 0, 2);
+
+ printf ("\nFinished generic POSIX tests.\n");
+}
+
+
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/test/psx-group.c b/gnu/lib/libregex/test/psx-group.c
new file mode 100644
index 0000000..08ae8a2
--- /dev/null
+++ b/gnu/lib/libregex/test/psx-group.c
@@ -0,0 +1,440 @@
+/* psx-group.c: test POSIX grouping, both basic and extended. */
+
+#include "test.h"
+
+
+void
+test_grouping ()
+{
+ printf ("\nStarting POSIX grouping tests.\n");
+
+ test_should_match = true;
+
+ test_fastmap (PARENS_TO_OPS ("(a)"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a)"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)"), "a", 0, 1, 0, 1, -1, -1);
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)"), "xax", 1, 2, 1, 2, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a))"), "a");
+ test_fastmap (PARENS_TO_OPS ("((a))"), "a", 0, 0);
+ TEST_REGISTERS (PARENS_TO_OPS ("((a))"), "a", 0, 1, 0, 1, 0, 1);
+ TEST_REGISTERS (PARENS_TO_OPS ("((a))"), "xax", 1, 2, 1, 2, 1, 2);
+
+ test_fastmap (PARENS_TO_OPS ("(a)(b)"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a)(b)"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)(b)"), "ab", 0, 2, 0, 1, 1, 2);
+
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)(b)"), "xabx", 1, 3, 1, 2, 2, 3);
+
+ test_all_registers (PARENS_TO_OPS ("((a)(b))"), "ab", "", 0, 2, 0, 2, 0, 1,
+ 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+
+ /* Test that we simply ignore groups past the 255th. */
+ test_match (PARENS_TO_OPS ("((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((a))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))"), "a");
+
+
+ /* Per POSIX D11.1, p. 125. */
+
+ test_fastmap (PARENS_TO_OPS ("(a)*"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*"), "", 0, 0, -1, -1, -1, -1);
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*"), "aa", 0, 2, 1, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*)"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*)"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)"), "", 0, 0, 0, 0, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*)"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*)"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)"), "a", 0, 1, 0, 1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*)b"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*)b"), "b");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)b"), "b", 0, 1, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*)b"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)b"), "ab", 0, 2, 0, 1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a*)b)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("((a*)b)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "", 0, 0, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a*)b)*"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "ab", 0, 2, 0, 2, 0, 1);
+
+ test_match (PARENS_TO_OPS ("((a*)b)*"), "abb");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abb", 0, 3, 2, 3, 2, 2);
+
+ test_match (PARENS_TO_OPS ("((a*)b)*"), "aabab");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "aabab", 0, 5, 3, 5, 3, 4);
+
+ test_match (PARENS_TO_OPS ("((a*)b)*"), "abbab");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abbab", 0, 5, 3, 5, 3, 4);
+
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "xabbabx", 0, 0, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a*)b)*"), "abaabaaaab");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b)*"), "abaabaaab", 0, 9, 5, 9, 5, 8);
+
+ test_fastmap (PARENS_TO_OPS ("(ab)*"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(ab)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "", 0, 0, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(ab)*"), "abab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "abab", 0, 4, 2, 4, -1, -1);
+
+ /* We match the empty string here. */
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab)*"), "xababx", 0, 0, -1, -1, -1, -1);
+
+ /* Per David A. Willcox. */
+ TEST_REGISTERS (PARENS_TO_OPS ("a(b*)c"), "ac", 0, 2, 1, 1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a)*b"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(a)*b"), "b");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "b", 0, 1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a)*b"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "ab", 0, 2, 0, 1, -1, -1);
+
+ test_match_2 (PARENS_TO_OPS ("(a)*b"), "a", "ab");
+ TEST_REGISTERS_2 (PARENS_TO_OPS ("(a)*b"), "a", "ab", 0, 3, 1, 2, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a)*b"), "aab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*b"), "aab", 0, 3, 1, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a)*a"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a)*a"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*a"), "a", 0, 1, -1, -1, -1, -1);
+
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "", 0, 0, 0, 0, 0, 0);
+
+ test_match (PARENS_TO_OPS ("((a*))*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*))*"), "", 0, 0, 0, 0, 0, 0);
+ test_match (PARENS_TO_OPS ("((a*))*"), "aa");
+
+ test_fastmap (PARENS_TO_OPS ("(a*)*b"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*)*b"), "b");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "b", 0, 1, 0, 0, -1, -1);
+
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "xbx", 1, 2, 1, 1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*)*b"), "ab"); /* Per rms. */
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "ab", 0, 2, 0, 1, -1, -1);
+
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "xabx", 1, 3, 1, 2, -1, -1);
+
+ /* Test register restores. */
+ test_match (PARENS_TO_OPS ("(a*)*b"), "aab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b"), "aab", 0, 3, 0, 2, -1, -1);
+
+ TEST_REGISTERS_2 (PARENS_TO_OPS ("(a*)*b"), "a", "ab", 0, 3, 0, 2, -1, -1);
+
+ /* We are matching the empty string, with backtracking. */
+ test_fastmap (PARENS_TO_OPS ("(a*)a"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*)a"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)a"), "a", 0, 1, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*)a"), "aa");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)a"), "aa", 0, 2, 0, 1, -1, -1);
+
+ /* We are matching the empty string, with backtracking. */
+/*fails test_match (PARENS_TO_OPS ("(a*)*a"), "a"); */
+ test_match (PARENS_TO_OPS ("(a*)*a"), "aa");
+ /* Match the empty string. */
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "a", 0, 1, 0, 0, -1, -1);
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "xax", 1, 2, 1, 1, -1, -1);
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "aa", 0, 2, 0, 1, -1, -1);
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*a"), "xaax", 1, 3, 1, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a)*ab"), "a", 0 , 0);
+ test_match (PARENS_TO_OPS ("(a)*ab"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*ab"), "ab", 0, 2, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a)*ab"), "aab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*ab"), "aab", 0, 3, 0, 1, -1, -1);
+
+ TEST_REGISTERS (PARENS_TO_OPS("(a)*ab"), "xaabx", 1, 4, 1, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*)ab"), "a", 0 , 0);
+ test_match (PARENS_TO_OPS ("(a*)ab"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "ab", 0, 2, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*)ab"), "aab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "aab", 0, 3, 0, 1, -1, -1);
+
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)ab"), "xaabx", 1, 4, 1, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*)*ab"), "a", 0 , 0);
+ test_match (PARENS_TO_OPS ("(a*)*ab"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*ab"), "ab", 0, 2, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*)*ab"), "aab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*ab"), "aab", 0, 3, 0, 1, -1, -1);
+
+ TEST_REGISTERS (PARENS_TO_OPS("(a*)*ab"), "xaabx", 1, 4, 1, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*)*b*c"), "abc", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*)*b*c"), "c");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*b*c"), "c", 0, 1, 0, 0, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a)*(ab)*"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a)*(ab)*"), "ab");
+ /* Register 1 doesn't match at all (vs. matching the empty string)
+ because of backtracking, hence -1's. */
+ TEST_REGISTERS (PARENS_TO_OPS ("(a)*(ab)*"), "ab", 0, 2, -1, -1, 0, 2);
+
+ test_match (PARENS_TO_OPS ("(a*)*(ab)*"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*(ab)*"), "ab", 0, 2, 0, 0, 0, 2);
+
+ test_fastmap (PARENS_TO_OPS ("(a*b)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*b)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "", 0, 0, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b)*"), "b");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "b", 0, 1, 0, 1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b)*"), "baab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*"), "baab", 0, 4, 1, 4, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "", 0, 0, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "a", 0, 1, 0, 1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "ba");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "ba", 0, 2, 1, 2, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 2, 0, 2, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "aa");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "aa", 0, 2, 0, 2, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "bb");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "bb", 0, 2, 0, 2, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "aba");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "aba", 0, 3, 2, 3, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b*)b"), "b");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)b"), "b", 0, 1, 0, 0, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a*)*(b*)*)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), "");
+ test_all_registers (PARENS_TO_OPS ("((a*)*(b*)*)*"), "", "", 0, 0, 0, 0,
+ 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), "aba");
+ /* Perhaps register 3 should be 3/3 here? Not sure if standard
+ specifies this. xx*/
+ test_all_registers (PARENS_TO_OPS ("((a*)*(b*)*)*"), "aba", "", 0, 3, 2, 3,
+ 2, 3, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a*)(b*))*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("((a*)(b*))*"), "");
+
+ test_all_registers (PARENS_TO_OPS ("((a*)(b*))*"), "", "", 0, 0, 0, 0,
+ 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "");
+
+ test_match (PARENS_TO_OPS ("((a*)(b*))*"), "aba");
+ test_all_registers (PARENS_TO_OPS ("((a*)(b*))*"), "aba", "", 0, 3, 2, 3,
+ 2, 3, 3, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a)*(b)*)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("((a)*(b)*)*"), "");
+ test_all_registers (PARENS_TO_OPS ("((a)*(b)*)*"), "", "", 0, 0, 0, 0,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a)*(b)*)*"), "aba");
+
+ test_all_registers (PARENS_TO_OPS ("((a)*(b)*)*"), "aba", "", 0, 3, 2, 3,
+ 2, 3, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c", 0, 0);
+ test_match (PARENS_TO_OPS ("(c(a)*(b)*)*"), "");
+ test_all_registers (PARENS_TO_OPS ("(c(a)*(b)*)*"), "", "", 0, 0, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c");
+ test_all_registers (PARENS_TO_OPS ("(c(a)*(b)*)*"), "c", "", 0, 1, 0, 1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("c((a)*(b)*)*"), "c", 0, 0);
+ test_match (PARENS_TO_OPS ("c((a)*(b)*)*"), "c");
+ test_all_registers (PARENS_TO_OPS ("c((a)*(b)*)*"), "c", "", 0, 1, 1, 1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(((a)*(b)*)*)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(((a)*(b)*)*)*"), "");
+ test_all_registers (PARENS_TO_OPS ("(((a)*(b)*)*)*"), "", "", 0, 0, 0, 0,
+ 0, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "");
+ test_fastmap (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "c", 0, 0);
+
+ test_all_registers (PARENS_TO_OPS ("(c(c(a)*(b)*)*)*"), "", "", 0, 0, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a)*b)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("((a)*b)*"), "");
+
+ test_match (PARENS_TO_OPS ("((a)*b)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "", 0, 0, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a)*b)*"), "abb");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "abb", 0, 3, 2, 3, 0, 1); /*zz*/
+
+ test_match (PARENS_TO_OPS ("((a)*b)*"), "abbab");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "abbab", 0, 5, 3, 5, 3, 4);
+
+ /* We match the empty string here. */
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*"), "xabbabx", 0, 0, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*)*"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "", 0, 0, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*)*"), "aa");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*)*"), "aa", 0, 2, 0, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a*)*)*"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("((a*)*)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)*)*"), "", 0, 0, 0, 0, 0, 0);
+
+ test_match (PARENS_TO_OPS ("((a*)*)*"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)*)*"), "a", 0, 1, 0, 1, 0, 1);
+
+ test_fastmap (PARENS_TO_OPS ("(ab*)*"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("(ab*)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*"), "", 0, 0, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(ab*)*"), "aa");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*"), "aa", 0, 2, 1, 2, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(ab*)*c"), "ac", 0, 0);
+ test_match (PARENS_TO_OPS ("(ab*)*c"), "c");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "c", 0, 1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(ab*)*c"), "abbac");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "abbac", 0, 5, 3, 4, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(ab*)*c"), "abac");
+ TEST_REGISTERS (PARENS_TO_OPS ("(ab*)*c"), "abac", 0, 4, 2, 3, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*b)*c"), "abc", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*b)*c"), "c");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "c", 0, 1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b)*c"), "bbc");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "bbc", 0, 3, 1, 2, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b)*c"), "aababc");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "aababc", 0, 6, 3, 5, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(a*b)*c"), "aabaabc");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b)*c"), "aabaabc", 0, 7, 3, 6, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a*)b*)"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("((a*)b*)"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "", 0, 0, 0, 0, 0, 0);
+
+ test_match (PARENS_TO_OPS ("((a*)b*)"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "a", 0, 1, 0, 1, 0, 1);
+
+ test_match (PARENS_TO_OPS ("((a*)b*)"), "b");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)"), "b", 0, 1, 0, 1, 0, 0);
+
+ test_fastmap (PARENS_TO_OPS ("((a)*b*)"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("((a)*b*)"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "", 0, 0, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a)*b*)"), "a");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "a", 0, 1, 0, 1, 0, 1);
+
+ test_match (PARENS_TO_OPS ("((a)*b*)"), "b");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "b", 0, 1, 0, 1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("((a)*b*)"), "ab");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)"), "ab", 0, 2, 0, 2, 0, 1);
+
+ test_fastmap (PARENS_TO_OPS ("((a*)b*)c"), "abc", 0, 0);
+ test_match (PARENS_TO_OPS ("((a*)b*)c"), "c");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a*)b*)c"), "c", 0, 1, 0, 0, 0, 0);
+
+ test_fastmap (PARENS_TO_OPS ("((a)*b*)c"), "abc", 0, 0);
+ test_match (PARENS_TO_OPS ("((a)*b*)c"), "c");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b*)c"), "c", 0, 1, 0, 0, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(a*b*)*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "");
+ TEST_REGISTERS (PARENS_TO_OPS ("(a*b*)*"), "", 0, 0, 0, 0, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(((a*))((b*)))*"), "ab", 0, 0);
+ test_match (PARENS_TO_OPS ("(((a*))((b*)))*"), "");
+ test_all_registers (PARENS_TO_OPS ("(((a*))((b*)))*"), "", "", 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), "abcde", 0, 0);
+ test_match (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), "");
+ test_all_registers (PARENS_TO_OPS ("(c*((a*))d*((b*))e*)*"), "", "", 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+
+ test_fastmap (PARENS_TO_OPS ("((a)*b)*c"), "abc", 0, 0);
+ test_match (PARENS_TO_OPS ("((a)*b)*c"), "c");
+ TEST_REGISTERS (PARENS_TO_OPS ("((a)*b)*c"), "c", 0, 1, -1, -1, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(ab)*"), "");
+ test_match (PARENS_TO_OPS ("((ab)*)"), "");
+ test_match (PARENS_TO_OPS ("(((ab)*))"), "");
+ test_match (PARENS_TO_OPS ("((((ab)*)))"), "");
+ test_match (PARENS_TO_OPS ("(((((ab)*))))"), "");
+ test_match (PARENS_TO_OPS ("((((((ab)*)))))"), "");
+ test_match (PARENS_TO_OPS ("(((((((ab)*))))))"), "");
+ test_match (PARENS_TO_OPS ("((((((((ab)*)))))))"), "");
+ test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "");
+
+
+ test_fastmap (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "a", 0, 0);
+ test_match (PARENS_TO_OPS ("((((((((((ab)*)))))))))"), "");
+ test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "");
+ test_all_registers (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "", NULL,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1);
+
+ test_match (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "abab");
+ test_all_registers (PARENS_TO_OPS ("(((((((((ab)*))))))))"), "abab", NULL,
+ 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 0, 4, 2, 4);
+
+
+ test_should_match = false;
+
+ invalid_pattern (REG_EPAREN, PARENS_TO_OPS ("(a"));
+
+ test_match (PARENS_TO_OPS ("(a)"), "");
+ test_match (PARENS_TO_OPS ("((a))"), "b");
+ test_match (PARENS_TO_OPS ("(a)(b)"), "ac");
+ test_match (PARENS_TO_OPS ("(ab)*"), "acab");
+ test_match (PARENS_TO_OPS ("(a*)*b"), "c");
+ test_match (PARENS_TO_OPS ("(a*b)*"), "baa");
+ test_match (PARENS_TO_OPS ("(a*b)*"), "baabc");
+ test_match (PARENS_TO_OPS ("(a*b*)*"), "c");
+ test_match (PARENS_TO_OPS ("((a*)*(b*)*)*"), "c");
+ test_match (PARENS_TO_OPS ("(a*)*"), "ab");
+ test_match (PARENS_TO_OPS ("((a*)*)*"), "ab");
+ test_match (PARENS_TO_OPS ("((a*)*)*"), "b");
+ test_match (PARENS_TO_OPS ("(ab*)*"), "abc");
+ test_match (PARENS_TO_OPS ("(ab*)*c"), "abbad");
+ test_match (PARENS_TO_OPS ("(a*c)*b"), "aacaacd");
+ test_match (PARENS_TO_OPS ("(a*)"), "b");
+ test_match (PARENS_TO_OPS ("((a*)b*)"), "c");
+
+ /* Expression anchoring. */
+ TEST_SEARCH (PARENS_TO_OPS ("(^b)"), "ab", 0, 2);
+ TEST_SEARCH (PARENS_TO_OPS ("(a$)"), "ab", 0, 2);
+
+ printf ("\nFinished POSIX grouping tests.\n");
+}
diff --git a/gnu/lib/libregex/test/psx-interf.c b/gnu/lib/libregex/test/psx-interf.c
new file mode 100644
index 0000000..8312d5e
--- /dev/null
+++ b/gnu/lib/libregex/test/psx-interf.c
@@ -0,0 +1,624 @@
+/* psx-interf.c: test POSIX interface. */
+
+#include <string.h>
+#include <assert.h>
+
+#include "test.h"
+
+#define ERROR_CODE_LENGTH 20
+#define TEST_ERRBUF_SIZE 15
+
+
+void test_compile ();
+
+
+/* ANSWER should be at least ERROR_CODE_LENGTH long. */
+
+static char *
+get_error_string (error_code, answer)
+ int error_code;
+ char answer[];
+{
+ switch (error_code)
+ {
+ case 0: strcpy (answer, "No error"); break;
+ case REG_NOMATCH: strcpy (answer, "REG_NOMATCH"); break;
+ case REG_BADPAT: strcpy (answer, "REG_BADPAT"); break;
+ case REG_EPAREN: strcpy (answer, "REG_EPAREN"); break;
+ case REG_ESPACE: strcpy (answer, "REG_ESPACE"); break;
+ case REG_ECOLLATE: strcpy (answer, "REG_ECOLLATE"); break;
+ case REG_ECTYPE: strcpy (answer, "REG_ECTYPE"); break;
+ case REG_EESCAPE: strcpy (answer, "REG_EESCAPE"); break;
+ case REG_ESUBREG: strcpy (answer, "REG_ESUBREG"); break;
+ case REG_EBRACK: strcpy (answer, "REG_EBRACK"); break;
+ case REG_EBRACE: strcpy (answer, "REG_EBRACE"); break;
+ case REG_BADBR: strcpy (answer, "REG_BADBR"); break;
+ case REG_ERANGE: strcpy (answer, "REG_ERANGE"); break;
+ case REG_BADRPT: strcpy (answer, "REG_BADRPT"); break;
+ case REG_EEND: strcpy (answer, "REG_EEND"); break;
+ default: strcpy (answer, "Bad error code");
+ }
+ return answer;
+}
+
+
+/* I don't think we actually need to initialize all these things.
+ --karl */
+
+void
+init_pattern_buffer (pattern_buffer_ptr)
+ regex_t *pattern_buffer_ptr;
+{
+ pattern_buffer_ptr->buffer = NULL;
+ pattern_buffer_ptr->allocated = 0;
+ pattern_buffer_ptr->used = 0;
+ pattern_buffer_ptr->fastmap = NULL;
+ pattern_buffer_ptr->fastmap_accurate = 0;
+ pattern_buffer_ptr->translate = NULL;
+ pattern_buffer_ptr->can_be_null = 0;
+ pattern_buffer_ptr->re_nsub = 0;
+ pattern_buffer_ptr->no_sub = 0;
+ pattern_buffer_ptr->not_bol = 0;
+ pattern_buffer_ptr->not_eol = 0;
+}
+
+
+void
+test_compile (valid_pattern, error_code_expected, pattern,
+ pattern_buffer_ptr, cflags)
+ unsigned valid_pattern;
+ int error_code_expected;
+ const char *pattern;
+ regex_t *pattern_buffer_ptr;
+ int cflags;
+{
+ int error_code_returned;
+ boolean error = false;
+ char errbuf[TEST_ERRBUF_SIZE];
+
+ init_pattern_buffer (pattern_buffer_ptr);
+ error_code_returned = regcomp (pattern_buffer_ptr, pattern, cflags);
+
+ if (valid_pattern && error_code_returned)
+ {
+ printf ("\nShould have been a valid pattern but wasn't.\n");
+ regerror (error_code_returned, pattern_buffer_ptr, errbuf,
+ TEST_ERRBUF_SIZE);
+ printf ("%s", errbuf);
+ error = true;
+ }
+
+ if (!valid_pattern && !error_code_returned)
+ {
+ printf ("\n\nInvalid pattern compiled as valid:\n");
+ error = true;
+ }
+
+ if (error_code_returned != error_code_expected)
+ {
+ char expected_error_string[ERROR_CODE_LENGTH];
+ char returned_error_string[ERROR_CODE_LENGTH];
+
+ get_error_string (error_code_expected, expected_error_string),
+ get_error_string (error_code_returned, returned_error_string);
+
+ printf (" Expected error code %s but got `%s'.\n",
+ expected_error_string, returned_error_string);
+
+ error = true;
+ }
+
+ if (error)
+ print_pattern_info (pattern, pattern_buffer_ptr);
+}
+
+
+static void
+test_nsub (sub_count, pattern, cflags)
+ unsigned sub_count;
+ char *pattern;
+ int cflags;
+
+{
+ regex_t pattern_buffer;
+
+ test_compile (1, 0, pattern, &pattern_buffer, cflags);
+
+ if (pattern_buffer.re_nsub != sub_count)
+ {
+ printf ("\nShould have counted %d subexpressions but counted %d \
+instead.\n", sub_count, pattern_buffer.re_nsub);
+ }
+
+ regfree (&pattern_buffer);
+}
+
+
+static void
+test_regcomp ()
+{
+ regex_t pattern_buffer;
+ int cflags = 0;
+
+
+ printf ("\nStarting regcomp tests.\n");
+
+ cflags = 0;
+ test_compile (0, REG_ESUBREG, "\\(a\\)\\2", &pattern_buffer, cflags);
+ test_compile (0, REG_EBRACE, "a\\{", &pattern_buffer, cflags);
+ test_compile (0, REG_BADBR, "a\\{-1\\}", &pattern_buffer, cflags);
+ test_compile (0, REG_EBRACE, "a\\{", &pattern_buffer, cflags);
+ test_compile (0, REG_EBRACE, "a\\{1", &pattern_buffer, cflags);
+
+ cflags = REG_EXTENDED;
+ test_compile (0, REG_ECTYPE, "[[:alpo:]]", &pattern_buffer, cflags);
+ test_compile (0, REG_EESCAPE, "\\", &pattern_buffer, cflags);
+ test_compile (0, REG_EBRACK, "[a", &pattern_buffer, cflags);
+ test_compile (0, REG_EPAREN, "(", &pattern_buffer, cflags);
+ test_compile (0, REG_ERANGE, "[z-a]", &pattern_buffer, cflags);
+
+ test_nsub (1, "(a)", cflags);
+ test_nsub (2, "((a))", cflags);
+ test_nsub (2, "(a)(b)", cflags);
+
+ cflags = REG_EXTENDED | REG_NOSUB;
+ test_nsub (1, "(a)", cflags);
+
+ regfree (&pattern_buffer);
+
+ printf ("\nFinished regcomp tests.\n");
+}
+
+
+static void
+fill_pmatch (pmatch, start0, end0, start1, end1, start2, end2)
+ regmatch_t pmatch[];
+ regoff_t start0, end0, start1, end1, start2, end2;
+{
+ pmatch[0].rm_so = start0;
+ pmatch[0].rm_eo = end0;
+ pmatch[1].rm_so = start1;
+ pmatch[1].rm_eo = end1;
+ pmatch[2].rm_so = start2;
+ pmatch[2].rm_eo = end2;
+}
+
+
+static void
+test_pmatch (pattern, string, nmatch, pmatch, correct_pmatch, cflags)
+ char *pattern;
+ char *string;
+ unsigned nmatch;
+ regmatch_t pmatch[];
+ regmatch_t correct_pmatch[];
+ int cflags;
+{
+ regex_t pattern_buffer;
+ unsigned this_match;
+ int error_code_returned;
+ boolean found_nonmatch = false;
+
+ test_compile (1, 0, pattern, &pattern_buffer, cflags);
+ error_code_returned = regexec (&pattern_buffer, string, nmatch, pmatch, 0);
+
+ if (error_code_returned == REG_NOMATCH)
+ printf ("Matching failed in test_pmatch.\n");
+ else
+ {
+ for (this_match = 0; this_match < nmatch; this_match++)
+ {
+ if (pmatch[this_match].rm_so != correct_pmatch[this_match].rm_so)
+ {
+ if (found_nonmatch == false)
+ printf ("\n");
+
+ printf ("Pmatch start %d wrong: was %d when should have \
+been %d.\n", this_match, pmatch[this_match].rm_so,
+ correct_pmatch[this_match].rm_so);
+ found_nonmatch = true;
+ }
+ if (pmatch[this_match].rm_eo != correct_pmatch[this_match].rm_eo)
+ {
+ if (found_nonmatch == false)
+ printf ("\n");
+
+ printf ("Pmatch end %d wrong: was %d when should have been \
+%d.\n", this_match, pmatch[this_match].rm_eo,
+ correct_pmatch[this_match].rm_eo);
+ found_nonmatch = true;
+ }
+ }
+
+ if (found_nonmatch)
+ {
+ printf (" The number of pmatches requested was: %d.\n", nmatch);
+ printf (" The string to match was: `%s'.\n", string);
+ print_pattern_info (pattern, &pattern_buffer);
+ }
+ } /* error_code_returned == REG_NOMATCH */
+
+ regfree (&pattern_buffer);
+}
+
+
+static void
+test_eflags (must_match_bol, must_match_eol, pattern, string, cflags, eflags)
+ boolean must_match_bol;
+ boolean must_match_eol;
+ char *pattern;
+ char *string;
+ int cflags;
+ int eflags;
+{
+ regex_t pattern_buffer;
+ int error_code_returned;
+ boolean was_error = false;
+
+ test_compile (1, 0, pattern, &pattern_buffer, cflags);
+ error_code_returned = regexec (&pattern_buffer, string, 0, 0, eflags);
+
+ if (error_code_returned == REG_NOMATCH)
+ {
+ /* If wasn't true that both 1) the anchored part of the pattern
+ had to match this string and 2) this string was a proper
+ substring... */
+
+ if (!( (must_match_bol && (eflags & REG_NOTBOL))
+ || (must_match_eol && (eflags & REG_NOTEOL)) ))
+ {
+ printf ("\nEflags test failed: didn't match when should have.\n");
+ was_error = true;
+ }
+ }
+ else /* We got a match. */
+ {
+ /* If wasn't true that either 1) the anchored part of the pattern
+ didn't have to match this string or 2) this string wasn't a
+ proper substring... */
+
+ if ((must_match_bol == (eflags & REG_NOTBOL))
+ || (must_match_eol == (eflags & REG_NOTEOL)))
+ {
+ printf ("\nEflags test failed: matched when shouldn't have.\n");
+ was_error = true;
+ }
+ }
+
+ if (was_error)
+ {
+ printf (" The string to match was: `%s'.\n", string);
+ print_pattern_info (pattern, &pattern_buffer);
+
+ if (eflags & REG_NOTBOL)
+ printf (" The eflag REG_BOL was set.\n");
+ if (eflags & REG_NOTEOL)
+ printf (" The eflag REG_EOL was set.\n");
+ }
+
+ regfree (&pattern_buffer);
+}
+
+
+static void
+test_ignore_case (should_match, pattern, string, cflags)
+ boolean should_match;
+ char *pattern;
+ char *string;
+ int cflags;
+{
+ regex_t pattern_buffer;
+ int error_code_returned;
+
+ test_compile (1, 0, pattern, &pattern_buffer, cflags);
+ error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0);
+
+ if (should_match && error_code_returned == REG_NOMATCH)
+ {
+ printf ("\nIgnore-case test failed:\n");
+ printf (" The string to match was: `%s'.\n", string);
+ print_pattern_info (pattern, &pattern_buffer);
+
+ if (cflags & REG_ICASE)
+ printf (" The cflag REG_ICASE was set.\n");
+ }
+
+ regfree (&pattern_buffer);
+}
+
+
+static void
+test_newline (should_match, pattern, string, cflags)
+ boolean should_match;
+ char *pattern;
+ char *string;
+ int cflags;
+{
+ regex_t pattern_buffer;
+ int error_code_returned;
+
+ test_compile (1, 0, pattern, &pattern_buffer, cflags);
+ error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0);
+
+ if (should_match && error_code_returned == REG_NOMATCH)
+ {
+ printf ("\nNewline test failed:\n");
+ printf (" The string to match was: `%s'.\n", string);
+ print_pattern_info (pattern, &pattern_buffer);
+
+ if (cflags & REG_NEWLINE)
+ printf (" The cflag REG_NEWLINE was set.\n");
+ else
+ printf (" The cflag REG_NEWLINE wasn't set.\n");
+ }
+
+ regfree (&pattern_buffer);
+}
+
+
+static void
+test_posix_match (should_match, pattern, string, cflags)
+ boolean should_match;
+ char *pattern;
+ char *string;
+ int cflags;
+{
+ regex_t pattern_buffer;
+ int error_code_returned;
+ boolean was_error = false;
+
+ test_compile (1, 0, pattern, &pattern_buffer, cflags);
+ error_code_returned = regexec (&pattern_buffer, string, 0, 0, 0);
+
+ if (should_match && error_code_returned == REG_NOMATCH)
+ {
+ printf ("\nShould have matched but didn't:\n");
+ was_error = true;
+ }
+ else if (!should_match && error_code_returned != REG_NOMATCH)
+ {
+ printf ("\nShould not have matched but did:\n");
+ was_error = true;
+ }
+
+ if (was_error)
+ {
+ printf (" The string to match was: `%s'.\n", string);
+ print_pattern_info (pattern, &pattern_buffer);
+ }
+
+ regfree (&pattern_buffer);
+}
+
+
+static void
+test_regexec ()
+{
+ regmatch_t pmatch[3];
+ regmatch_t correct_pmatch[3];
+ int cflags = 0;
+ int eflags = 0;
+
+ printf ("\nStarting regexec tests.\n");
+
+ cflags = REG_NOSUB; /* shouldn't look at any of pmatch. */
+ test_pmatch ("a", "a", 0, pmatch, correct_pmatch, cflags);
+
+ /* Ask for less `pmatch'es than there are pattern subexpressions.
+ (Shouldn't look at pmatch[2]. */
+ cflags = REG_EXTENDED;
+ fill_pmatch (correct_pmatch, 0, 1, 0, 1, 100, 101);
+ test_pmatch ("((a))", "a", 2, pmatch, correct_pmatch, cflags);
+
+ /* Ask for same number of `pmatch'es as there are pattern subexpressions. */
+ cflags = REG_EXTENDED;
+ fill_pmatch(correct_pmatch, 0, 1, 0, 1, -1, -1);
+ test_pmatch ("(a)", "a", 2, pmatch, correct_pmatch, cflags);
+
+ /* Ask for more `pmatch'es than there are pattern subexpressions. */
+ cflags = REG_EXTENDED;
+ fill_pmatch (correct_pmatch, 0, 1, -1, -1, -1, -1);
+ test_pmatch ("a", "a", 2, pmatch, correct_pmatch, cflags);
+
+ eflags = REG_NOTBOL;
+ test_eflags (true, false, "^a", "a", cflags, eflags);
+ test_eflags (true, false, "(^a)", "a", cflags, eflags);
+ test_eflags (true, false, "a|^b", "b", cflags, eflags);
+ test_eflags (true, false, "^b|a", "b", cflags, eflags);
+
+ eflags = REG_NOTEOL;
+ test_eflags (false, true, "a$", "a", cflags, eflags);
+ test_eflags (false, true, "(a$)", "a", cflags, eflags);
+ test_eflags (false, true, "a|b$", "b", cflags, eflags);
+ test_eflags (false, true, "b$|a", "b", cflags, eflags);
+
+ eflags = REG_NOTBOL | REG_NOTEOL;
+ test_eflags (true, true, "^a$", "a", cflags, eflags);
+ test_eflags (true, true, "(^a$)", "a", cflags, eflags);
+ test_eflags (true, true, "a|(^b$)", "b", cflags, eflags);
+ test_eflags (true, true, "(^b$)|a", "b", cflags, eflags);
+
+ cflags = REG_ICASE;
+ test_ignore_case (true, "a", "a", cflags);
+ test_ignore_case (true, "A", "A", cflags);
+ test_ignore_case (true, "A", "a", cflags);
+ test_ignore_case (true, "a", "A", cflags);
+
+ test_ignore_case (true, "@", "@", cflags);
+ test_ignore_case (true, "\\[", "[", cflags);
+ test_ignore_case (true, "`", "`", cflags);
+ test_ignore_case (true, "{", "{", cflags);
+
+ test_ignore_case (true, "[!-`]", "A", cflags);
+ test_ignore_case (true, "[!-`]", "a", cflags);
+
+ cflags = 0;
+ test_ignore_case (false, "a", "a", cflags);
+ test_ignore_case (false, "A", "A", cflags);
+ test_ignore_case (false, "A", "a", cflags);
+ test_ignore_case (false, "a", "A", cflags);
+
+ test_ignore_case (true, "@", "@", cflags);
+ test_ignore_case (true, "\\[", "[", cflags);
+ test_ignore_case (true, "`", "`", cflags);
+ test_ignore_case (true, "{", "{", cflags);
+
+ test_ignore_case (true, "[!-`]", "A", cflags);
+ test_ignore_case (false, "[!-`]", "a", cflags);
+
+
+ /* Test newline stuff. */
+ cflags = REG_EXTENDED | REG_NEWLINE;
+ test_newline (true, "\n", "\n", cflags);
+ test_newline (true, "a\n", "a\n", cflags);
+ test_newline (true, "\nb", "\nb", cflags);
+ test_newline (true, "a\nb", "a\nb", cflags);
+
+ test_newline (false, ".", "\n", cflags);
+ test_newline (false, "[^a]", "\n", cflags);
+
+ test_newline (true, "\n^a", "\na", cflags);
+ test_newline (true, "\n(^a|b)", "\na", cflags);
+ test_newline (true, "a$\n", "a\n", cflags);
+ test_newline (true, "(a$|b)\n", "a\n", cflags);
+ test_newline (true, "(a$|b|c)\n", "a\n", cflags);
+ test_newline (true, "((a$|b|c)$)\n", "a\n", cflags);
+ test_newline (true, "((a$|b|c)$)\n", "b\n", cflags);
+ test_newline (true, "(a$|b)\n|a\n", "a\n", cflags);
+
+ test_newline (true, "^a", "\na", cflags);
+ test_newline (true, "a$", "a\n", cflags);
+
+ /* Now test normal behavior. */
+ cflags = REG_EXTENDED;
+ test_newline (true, "\n", "\n", cflags);
+ test_newline (true, "a\n", "a\n", cflags);
+ test_newline (true, "\nb", "\nb", cflags);
+ test_newline (true, "a\nb", "a\nb", cflags);
+
+ test_newline (true, ".", "\n", cflags);
+ test_newline (true, "[^a]", "\n", cflags);
+
+ test_newline (false, "\n^a", "\na", cflags);
+ test_newline (false, "a$\n", "a\n", cflags);
+
+ test_newline (false, "^a", "\na", cflags);
+ test_newline (false, "a$", "a\n", cflags);
+
+
+ /* Test that matches whole string only. */
+ cflags = 0;
+ test_posix_match (true, "a", "a", cflags);
+
+ /* Tests that match substrings. */
+ test_posix_match (true, "a", "ab", cflags);
+ test_posix_match (true, "b", "ab", cflags);
+
+ /* Test that doesn't match. */
+ test_posix_match (false, "a", "b", cflags);
+
+ printf ("\nFinished regexec tests.\n");
+}
+
+
+static void
+test_error_code_message (error_code, expected_error_message)
+ int error_code;
+ char *expected_error_message;
+{
+ char returned_error_message[TEST_ERRBUF_SIZE];
+ char error_code_string[ERROR_CODE_LENGTH];
+ size_t expected_error_message_length = strlen (expected_error_message) + 1;
+ size_t returned_error_message_length = regerror (error_code, 0,
+ returned_error_message,
+ TEST_ERRBUF_SIZE);
+
+ if (returned_error_message_length != expected_error_message_length)
+ {
+ printf ("\n\n Testing returned error codes, with expected error \
+message `%s':\n", expected_error_message);
+
+ printf ("\n\n and returned error message `%s':\n",
+ returned_error_message);
+ printf (" should have returned a length of %d but returned %d.\n",
+ expected_error_message_length, returned_error_message_length);
+ }
+
+ if (strncmp (expected_error_message, returned_error_message,
+ TEST_ERRBUF_SIZE - 1) != 0)
+ {
+
+ get_error_string (error_code, error_code_string),
+ printf ("\n\n With error code %s (%d), expected error message:\n",
+ error_code_string, error_code);
+
+ printf (" `%s'\n", expected_error_message);
+ printf (" but got:\n");
+ printf (" `%s'\n", returned_error_message);
+ }
+}
+
+
+static void
+test_error_code_allocation (error_code, expected_error_message)
+ int error_code;
+ char *expected_error_message;
+{
+ char *returned_error_message = NULL;
+ char error_code_string[ERROR_CODE_LENGTH];
+ size_t returned_error_message_length = regerror (error_code, 0,
+ returned_error_message,
+ (size_t)0);
+
+ returned_error_message = xmalloc (returned_error_message_length + 1);
+
+ regerror (error_code, 0, returned_error_message,
+ returned_error_message_length);
+
+ if (strcmp (expected_error_message, returned_error_message) != 0)
+ {
+ get_error_string (error_code, error_code_string),
+
+ printf ("\n\n Testing error code allocation,\n");
+ printf ("with error code %s (%d), expected error message:\n",
+ error_code_string, error_code);
+ printf (" `%s'\n", expected_error_message);
+ printf (" but got:\n");
+ printf (" `%s'\n", returned_error_message);
+ }
+}
+
+
+static void
+test_regerror ()
+{
+ test_error_code_message (REG_NOMATCH, "No match");
+ test_error_code_message (REG_BADPAT, "Invalid regular expression");
+ test_error_code_message (REG_ECOLLATE, "Invalid collation character");
+ test_error_code_message (REG_ECTYPE, "Invalid character class name");
+ test_error_code_message (REG_EESCAPE, "Trailing backslash");
+ test_error_code_message (REG_ESUBREG, "Invalid back reference");
+ test_error_code_message (REG_EBRACK, "Unmatched [ or [^");
+ test_error_code_message (REG_EPAREN, "Unmatched ( or \\(");
+ test_error_code_message (REG_EBRACE, "Unmatched \\{");
+ test_error_code_message (REG_BADBR, "Invalid content of \\{\\}");
+ test_error_code_message (REG_ERANGE, "Invalid range end");
+ test_error_code_message (REG_ESPACE, "Memory exhausted");
+ test_error_code_message (REG_BADRPT, "Invalid preceding regular expression");
+ test_error_code_message (REG_EEND, "Premature end of regular expression");
+ test_error_code_message (REG_ESIZE, "Regular expression too big");
+ test_error_code_allocation (REG_ERPAREN, "Unmatched ) or \\)");
+}
+
+
+void
+test_posix_interface ()
+{
+ printf ("\nStarting POSIX interface tests.\n");
+ t = posix_interface_test;
+
+ test_regcomp ();
+ test_regexec ();
+ test_regerror ();
+
+ printf ("\nFinished POSIX interface tests.\n");
+}
diff --git a/gnu/lib/libregex/test/psx-interv.c b/gnu/lib/libregex/test/psx-interv.c
new file mode 100644
index 0000000..6725c38
--- /dev/null
+++ b/gnu/lib/libregex/test/psx-interv.c
@@ -0,0 +1,140 @@
+/* psx-interv.c: test POSIX intervals, both basic and extended. */
+
+#include "test.h"
+
+void
+test_intervals ()
+{
+ printf ("\nStarting POSIX interval tests.\n");
+
+ test_should_match = true;
+ /* Valid intervals. */
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), "abaab");
+ test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")), "a", 0, 0);
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,2}b)*")),
+ "abaab", 0, 5, 2, 5, -1, -1);
+
+ test_match (BRACES_TO_OPS ("a{0}"), "");
+ test_fastmap (BRACES_TO_OPS ("a{0}"), "", 0, 0);
+ TEST_REGISTERS (BRACES_TO_OPS ("a{0}"), "", 0, 0, -1, -1, -1, -1);
+ TEST_REGISTERS (BRACES_TO_OPS ("a{0}"), "x", 0, 0, -1, -1, -1, -1);
+
+ test_match (BRACES_TO_OPS ("a{0,}"), "");
+ test_match (BRACES_TO_OPS ("a{0,}"), "a");
+ test_fastmap (BRACES_TO_OPS ("a{0,}"), "a", 0, 0);
+ TEST_REGISTERS (BRACES_TO_OPS ("a{0,}"), "a", 0, 1, -1, -1, -1, -1);
+ TEST_REGISTERS (BRACES_TO_OPS ("a{0,}"), "xax", 0, 0, -1, -1, -1, -1);
+
+ test_match (BRACES_TO_OPS ("a{1}"), "a");
+ test_match (BRACES_TO_OPS ("a{1,}"), "a");
+ test_match (BRACES_TO_OPS ("a{1,}"), "aa");
+ test_match (BRACES_TO_OPS ("a{0,0}"), "");
+ test_match (BRACES_TO_OPS ("a{0,1}"), "");
+ test_match (BRACES_TO_OPS ("a{0,1}"), "a");
+ test_match (BRACES_TO_OPS ("a{1,3}"), "a");
+ test_match (BRACES_TO_OPS ("a{1,3}"), "aa");
+ test_match (BRACES_TO_OPS ("a{1,3}"), "aaa");
+ TEST_REGISTERS (BRACES_TO_OPS ("a{1,3}"), "aaa", 0, 3, -1, -1, -1, -1);
+ TEST_REGISTERS (BRACES_TO_OPS ("a{1,3}"), "xaaax", 1, 4, -1, -1, -1, -1);
+
+ test_match (BRACES_TO_OPS ("a{0,3}b"), "b");
+ test_match (BRACES_TO_OPS ("a{0,3}b"), "aaab");
+ test_fastmap (BRACES_TO_OPS ("a{0,3}b"), "ab", 0, 0);
+ TEST_REGISTERS (BRACES_TO_OPS ("a{0,3}b"), "b", 0, 1, -1, -1, -1, -1);
+ TEST_REGISTERS (BRACES_TO_OPS ("a{0,3}b"), "xbx", 1, 2, -1, -1, -1, -1);
+
+ test_match (BRACES_TO_OPS ("a{1,3}b"), "ab");
+ test_match (BRACES_TO_OPS ("a{1,3}b"), "aaab");
+ test_match (BRACES_TO_OPS ("ab{1,3}c"), "abbbc");
+
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "b");
+ test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "ab", 0, 0);
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "b", 0, 1, -1, -1, -1, -1);
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "ab", 0, 2, 0, 1, -1, -1);
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){0,3}b")), "xabx", 1, 3, 1, 2, -1, -1);
+
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "ab");
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaab");
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaab", 0, 4, 2, 3, -1, -1);
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "xaaabx", 1, 5, 3, 4, -1, -1);
+
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "aaaab");
+ test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "ab", 0, 0);
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){0,3}b")), "aaaab", 0, 5, 4, 4, -1, -1);
+
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "b");
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "aaab");
+ test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "ab", 0, 0);
+
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,1}ab")), "aaaab");
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,1}ab")), "aaaab", 0, 5, 0, 3, -1, -1);
+
+ test_match (BRACES_TO_OPS (".{0,3}b"), "b");
+ test_match (BRACES_TO_OPS (".{0,3}b"), "ab");
+
+ test_match (BRACES_TO_OPS ("[a]{0,3}b"), "b");
+ test_match (BRACES_TO_OPS ("[a]{0,3}b"), "aaab");
+ test_fastmap (BRACES_TO_OPS ("[a]{0,3}b"), "ab", 0, 0);
+ test_match (BRACES_TO_OPS ("[^a]{0,3}b"), "bcdb");
+ test_match (BRACES_TO_OPS ("ab{0,3}c"), "abbbc");
+ test_match (BRACES_TO_OPS ("[[:digit:]]{0,3}d"), "123d");
+ test_fastmap (BRACES_TO_OPS ("[[:digit:]]{0,3}d"), "0123456789d", 0, 0);
+
+ test_match (BRACES_TO_OPS ("\\*{0,3}a"), "***a");
+ test_match (BRACES_TO_OPS (".{0,3}b"), "aaab");
+ test_match (BRACES_TO_OPS ("a{0,3}a"), "aaa");
+ /* Backtracking. */
+ test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a", 0, 0);
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a");
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{1,})*a")), "a", 0, 1, -1, -1, -1, -1);
+
+ test_fastmap (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa", 0, 0);
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa");
+ TEST_REGISTERS (BRACES_TO_OPS (PARENS_TO_OPS ("(a{2,})*aa")), "aa", 0, 2, -1, -1, -1, -1);
+
+ test_match (BRACES_TO_OPS ("a{2}*"), "");
+ test_match (BRACES_TO_OPS ("a{2}*"), "aa");
+
+ test_match (BRACES_TO_OPS ("a{1}*"), "");
+ test_match (BRACES_TO_OPS ("a{1}*"), "a");
+ test_match (BRACES_TO_OPS ("a{1}*"), "aa");
+
+ test_match (BRACES_TO_OPS ("a{1}{1}"), "a");
+
+ test_match (BRACES_TO_OPS ("a{1}{1}{1}"), "a");
+ test_match (BRACES_TO_OPS ("a{1}{1}{2}"), "aa");
+
+ test_match (BRACES_TO_OPS ("a{1}{1}*"), "");
+ test_match (BRACES_TO_OPS ("a{1}{1}*"), "a");
+ test_match (BRACES_TO_OPS ("a{1}{1}*"), "aa");
+ test_match (BRACES_TO_OPS ("a{1}{1}*"), "aaa");
+
+ test_match (BRACES_TO_OPS ("a{1}{2}"), "aa");
+ test_match (BRACES_TO_OPS ("a{2}{1}"), "aa");
+
+
+ test_should_match = false;
+
+ test_match (BRACES_TO_OPS ("a{0}"), "a");
+ test_match (BRACES_TO_OPS ("a{0,}"), "b");
+ test_match (BRACES_TO_OPS ("a{1}"), "");
+ test_match (BRACES_TO_OPS ("a{1}"), "aa");
+ test_match (BRACES_TO_OPS ("a{1,}"), "");
+ test_match (BRACES_TO_OPS ("a{1,}"), "b");
+ test_match (BRACES_TO_OPS ("a{0,0}"), "a");
+ test_match (BRACES_TO_OPS ("a{0,1}"), "aa");
+ test_match (BRACES_TO_OPS ("a{0,1}"), "b");
+ test_match (BRACES_TO_OPS ("a{1,3}"), "");
+ test_match (BRACES_TO_OPS ("a{1,3}"), "aaaa");
+ test_match (BRACES_TO_OPS ("a{1,3}"), "b");
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a){1,3}b")), "aaaab");
+ test_match (BRACES_TO_OPS (PARENS_TO_OPS ("(a*){1,3}b")), "bb");
+ test_match (BRACES_TO_OPS ("[a]{0,3}"), "aaaa");
+ test_match (BRACES_TO_OPS ("[^a]{0,3}b"), "ab");
+ test_match (BRACES_TO_OPS ("ab{0,3}c"), "abababc");
+ test_match (BRACES_TO_OPS ("[:alpha:]{0,3}d"), "123d");
+ test_match (BRACES_TO_OPS ("\\^{1,3}a"), "a");
+ test_match (BRACES_TO_OPS (".{0,3}b"), "aaaab");
+
+ printf ("\nFinished POSIX interval tests.\n");
+}
diff --git a/gnu/lib/libregex/test/regexcpp.sed b/gnu/lib/libregex/test/regexcpp.sed
new file mode 100644
index 0000000..082c136
--- /dev/null
+++ b/gnu/lib/libregex/test/regexcpp.sed
@@ -0,0 +1,8 @@
+/;..*$/s/;/;\
+/g
+/{ .*$/s/{/{\
+/g
+/ \?[^'] /s/?/?\
+/g
+/ : /s/:/:\
+/g
diff --git a/gnu/lib/libregex/test/syntax.skel b/gnu/lib/libregex/test/syntax.skel
new file mode 100644
index 0000000..a3fbf64
--- /dev/null
+++ b/gnu/lib/libregex/test/syntax.skel
@@ -0,0 +1,74 @@
+/* Print which syntax bits are set. */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include "regex.h"
+
+/* It's coincidental that these two are currently the same. */
+#define LONGEST_BIT_NAME "RE_UNMATCHED_RIGHT_PAREN_ORD"
+#define LAST_BIT RE_UNMATCHED_RIGHT_PAREN_ORD
+
+/* Sum of above, when printed. Assigned in main. */
+static unsigned longest;
+
+
+static void
+test_bit (syntax, bit, name)
+ reg_syntax_t syntax;
+ unsigned bit;
+ char *name;
+{
+ char padding[100], test_str[100];
+ int padding_count;
+
+ sprintf (test_str, "%s (%d=0x%x)", name, bit, bit);
+ padding_count = longest - strlen (test_str);
+
+ padding[padding_count] = 0;
+ while (padding_count--)
+ {
+ padding[padding_count] = ' ';
+ }
+
+ printf ("%s%s (%d=0x%x): %c\n",
+ name, padding, bit, bit, syntax & bit ? 'y' : 'n');
+}
+
+
+/* Macro to abbreviate the constant arguments. */
+#define TEST_BIT(bit) test_bit (syntax, bit, #bit)
+
+int
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ reg_syntax_t syntax;
+ char syntax_str[1000], test_str[100];
+
+ switch (argc)
+ {
+ case 1:
+ printf ("Syntax? ");
+ scanf ("%s", syntax_str);
+ break;
+
+ case 2:
+ strcpy (syntax_str, argv[1]);
+ break;
+
+ default:
+ fprintf (stderr, "Usage: syntax [syntax].\n");
+ exit (1);
+ }
+
+ sscanf (syntax_str, "%i", &syntax);
+
+ /* Figure out the longest name, so we can align the output nicely. */
+ sprintf (test_str, "%s (%d=0x%x)", LONGEST_BIT_NAME, LAST_BIT, LAST_BIT);
+ longest = strlen (test_str);
+
+ /* [[[replace with bit tests]]] */
+
+ return 0;
+}
diff --git a/gnu/lib/libregex/test/test.c b/gnu/lib/libregex/test/test.c
new file mode 100644
index 0000000..a8de23e
--- /dev/null
+++ b/gnu/lib/libregex/test/test.c
@@ -0,0 +1,782 @@
+/* test.c: testing routines for regex.c. */
+
+#include <assert.h>
+
+#ifdef STDC_HEADERS
+#include <stdlib.h>
+#else
+char *malloc ();
+char *realloc ();
+#endif
+
+/* Just to be complete, we make both the system V/ANSI and the BSD
+ versions of the string functions available. */
+#if USG || STDC_HEADERS
+#include <string.h>
+#define index strchr
+#define rindex strrchr
+#define bcmp(s1, s2, len) memcmp ((s1), (s2), (len))
+#define bcopy(from, to, len) memcpy ((to), (from), (len))
+#define bzero(s, len) memset ((s), 0, (len))
+#else
+#include <strings.h>
+#define strchr index
+#define strrchr rindex
+#ifndef NEED_MEMORY_H
+#define memcmp(s1, s2, n) bcmp ((s1), (s2), (n))
+#define memcpy(to, from, len) bcopy ((from), (to), (len))
+#endif
+extern char *strtok ();
+extern char *strstr ();
+#endif /* not USG or STDC_HEADERS */
+
+/* SunOS 4.1 declares memchr in <memory.h>, not <string.h>. I don't
+ understand why. */
+#if NEED_MEMORY_H
+#include <memory.h>
+#endif
+
+#include "test.h"
+
+#define BYTEWIDTH 8
+
+extern void print_partial_compiled_pattern ();
+extern void print_compiled_pattern ();
+extern void print_double_string ();
+
+/* If nonzero, the results of every test are displayed. */
+boolean verbose = false;
+
+/* If nonzero, don't do register testing. */
+boolean omit_register_tests = true;
+
+/* Says whether the current test should match or fail to match. */
+boolean test_should_match;
+
+
+static void
+set_all_registers (start0, end0, start1, end1,
+ start2, end2, start3, end3,
+ start4, end4, start5, end5,
+ start6, end6, start7, end7,
+ start8, end8, start9, end9, regs)
+
+ int start0; int end0; int start1; int end1;
+ int start2; int end2; int start3; int end3;
+ int start4; int end4; int start5; int end5;
+ int start6; int end6; int start7; int end7;
+ int start8; int end8; int start9; int end9;
+ struct re_registers *regs;
+
+ {
+ unsigned r;
+
+ regs->start[0] = start0; regs->end[0] = end0;
+ regs->start[1] = start1; regs->end[1] = end1;
+ regs->start[2] = start2; regs->end[2] = end2;
+ regs->start[3] = start3; regs->end[3] = end3;
+ regs->start[4] = start4; regs->end[4] = end4;
+ regs->start[5] = start5; regs->end[5] = end5;
+ regs->start[6] = start6; regs->end[6] = end6;
+ regs->start[7] = start7; regs->end[7] = end7;
+ regs->start[8] = start8; regs->end[8] = end8;
+ regs->start[9] = start9; regs->end[9] = end9;
+ for (r = 10; r < regs->num_regs; r++)
+ {
+ regs->start[r] = -1;
+ regs->end[r] = -1;
+ }
+ }
+
+
+
+/* Return the concatenation of S1 and S2. This would be a prime place
+ to use varargs. */
+
+char *
+concat (s1, s2)
+ char *s1;
+ char *s2;
+{
+ char *answer = xmalloc (strlen (s1) + strlen (s2) + 1);
+
+ strcpy (answer, s1);
+ strcat (answer, s2);
+
+ return answer;
+}
+
+
+#define OK_TO_SEARCH (nonconst_buf.fastmap_accurate && (str1 || str2))
+
+/* We ignore the `can_be_null' argument. Should just be removed. */
+
+void
+general_test (pattern_should_be_valid, match_whole_string,
+ pat, str1, str2, start, range, end, correct_fastmap,
+ correct_regs, can_be_null)
+ unsigned pattern_should_be_valid;
+ unsigned match_whole_string;
+ const char *pat;
+ char *str1, *str2;
+ int start, range, end;
+ char *correct_fastmap;
+ struct re_registers *correct_regs;
+ int can_be_null;
+{
+ struct re_pattern_buffer nonconst_buf;
+ struct re_pattern_buffer old_buf;
+ struct re_registers regs;
+ const char *r;
+ char fastmap[1 << BYTEWIDTH];
+ unsigned *regs_correct = NULL;
+ unsigned all_regs_correct = 1;
+ boolean fastmap_internal_error = false;
+ unsigned match = 0;
+ unsigned match_1 = 0;
+ unsigned match_2 = 0;
+ unsigned invalid_pattern = 0;
+ boolean internal_error_1 = false;
+ boolean internal_error_2 = false;
+
+
+ nonconst_buf.allocated = 8;
+ nonconst_buf.buffer = xmalloc (nonconst_buf.allocated);
+ nonconst_buf.fastmap = fastmap;
+ nonconst_buf.translate = 0;
+
+ assert (pat != NULL);
+ r = re_compile_pattern (pat, strlen (pat), &nonconst_buf);
+
+ /* Kludge: if we are doing POSIX testing, we really should have
+ called regcomp, not re_compile_pattern. As it happens, the only
+ way in which it matters is that re_compile_pattern sets the
+ newline/anchor field for matching (part of what happens when
+ REG_NEWLINE is given to regcomp). We have to undo that for POSIX
+ matching. */
+ if (t == posix_basic_test || t == posix_extended_test)
+ nonconst_buf.newline_anchor = 0;
+
+ invalid_pattern = r != NULL;
+
+ if (!r)
+ {
+ int r;
+
+ if (!pattern_should_be_valid)
+ printf ("\nShould have been an invalid pattern but wasn't:\n");
+ else
+ {
+ fastmap_internal_error = (re_compile_fastmap (&nonconst_buf) == -2);
+
+ if (correct_fastmap)
+ nonconst_buf.fastmap_accurate =
+ memcmp (nonconst_buf.fastmap, correct_fastmap, 1 << BYTEWIDTH)
+ == 0;
+
+ if (OK_TO_SEARCH)
+ {
+ old_buf = nonconst_buf;
+ old_buf.buffer = (unsigned char *) xmalloc (nonconst_buf.used);
+ memcpy (old_buf.buffer, nonconst_buf.buffer, nonconst_buf.used);
+
+ /* If only one string is null, call re_match or re_search,
+ which is what the user would probably do. */
+ if (str1 == NULL && str2 != NULL
+ || str2 == NULL && str1 != NULL)
+ {
+ char *the_str = str1 == NULL ? str2 : str1;
+
+ match_1
+ = match_whole_string
+ ? (r = re_match (&nonconst_buf, the_str,
+ strlen (the_str), start, &regs))
+ == strlen (the_str)
+ : (r = re_search (&nonconst_buf,
+ the_str, strlen (the_str),
+ start, range, &regs))
+ >= 0;
+
+ if (r == -2)
+ internal_error_1 = true;
+ }
+ else
+ match_1 = 1;
+
+ /* Also call with re_match_2 or re_search_2, as they might
+ do this. (Also can check calling with either string1
+ or string2 or both null.) */
+ if (match_whole_string)
+ {
+ r = re_match_2 (&nonconst_buf,
+ str1, SAFE_STRLEN (str1),
+ str2, SAFE_STRLEN (str2),
+ start, &regs, end);
+ match_2 = r == SAFE_STRLEN (str1) + SAFE_STRLEN (str2);
+ }
+ else
+ {
+ r = re_search_2 (&nonconst_buf,
+ str1, SAFE_STRLEN (str1),
+ str2, SAFE_STRLEN (str2),
+ start, range, &regs, end);
+ match_2 = r >= 0;
+ }
+
+ if (r == -2)
+ internal_error_2 = true;
+
+ match = match_1 & match_2;
+
+ if (correct_regs)
+ {
+ unsigned reg;
+ if (regs_correct != NULL)
+ free (regs_correct);
+
+ regs_correct
+ = (unsigned *) xmalloc (regs.num_regs * sizeof (unsigned));
+
+ for (reg = 0;
+ reg < regs.num_regs && reg < correct_regs->num_regs;
+ reg++)
+ {
+ regs_correct[reg]
+ = (regs.start[reg] == correct_regs->start[reg]
+ && regs.end[reg] == correct_regs->end[reg])
+#ifdef EMPTY_REGS_CONFUSED
+ /* There is confusion in the standard about
+ the registers in some patterns which can
+ match either the empty string or not match.
+ For example, in `((a*))*' against the empty
+ string, the two registers can either match
+ the empty string (be 0/0), or not match
+ (because of the outer *) (be -1/-1). (Or
+ one can do one and one can do the other.) */
+ || (regs.start[reg] == -1 && regs.end[reg] == -1
+ && correct_regs->start[reg]
+ == correct_regs->end[reg])
+#endif
+ ;
+
+ all_regs_correct &= regs_correct[reg];
+ }
+ }
+ } /* OK_TO_SEARCH */
+ }
+ }
+
+ if (fastmap_internal_error)
+ printf ("\n\nInternal error in re_compile_fastmap:");
+
+ if (internal_error_1)
+ {
+ if (!fastmap_internal_error)
+ printf ("\n");
+
+ printf ("\nInternal error in re_match or re_search:");
+ }
+
+ if (internal_error_2)
+ {
+ if (!internal_error_1)
+ printf ("\n");
+
+ printf ("\nInternal error in re_match_2 or re_search_2:");
+ }
+
+ if ((OK_TO_SEARCH && ((match && !test_should_match)
+ || (!match && test_should_match))
+ || (correct_regs && !all_regs_correct))
+ || !nonconst_buf.fastmap_accurate
+ || invalid_pattern
+ || !pattern_should_be_valid
+ || internal_error_1 || internal_error_2
+ || verbose)
+ {
+ if (OK_TO_SEARCH && match && !test_should_match)
+ {
+ printf ("\n\nMatched but shouldn't have:\n");
+ if (match_1)
+ printf ("The single match/search succeeded.\n");
+
+ if (match_2)
+ printf ("The double match/search succeeded.\n");
+ }
+ else if (OK_TO_SEARCH && !match && test_should_match)
+ {
+ printf ("\n\nDidn't match but should have:\n");
+ if (!match_1)
+ printf ("The single match/search failed.\n");
+
+ if (!match_2)
+ printf ("The double match/search failed.\n");
+ }
+ else if (invalid_pattern && pattern_should_be_valid)
+ printf ("\n\nInvalid pattern (%s):\n", r);
+ else if (!nonconst_buf.fastmap_accurate && pattern_should_be_valid)
+ printf ("\n\nIncorrect fastmap:\n");
+ else if (OK_TO_SEARCH && correct_regs && !all_regs_correct)
+ printf ("\n\nNot all registers were correct:\n");
+ else if (verbose)
+ printf ("\n\nTest was OK:\n");
+
+
+ if ((!(invalid_pattern && !pattern_should_be_valid)) || verbose)
+ printf (" Pattern: `%s'.\n", pat);
+
+ if (pattern_should_be_valid || verbose
+ || internal_error_1 || internal_error_2)
+ {
+ printf(" Strings: ");
+ printf ("`%s' and ", str1 == NULL ? "NULL" : str1);
+ printf ("`%s'.\n", str2 == NULL ? "NULL" : str2);
+
+ if ((OK_TO_SEARCH || verbose || internal_error_1 || internal_error_2)
+ && !invalid_pattern)
+ {
+ if (memcmp (old_buf.buffer, nonconst_buf.buffer,
+ nonconst_buf.used) != 0
+ && !invalid_pattern)
+ {
+ printf(" (%s)\n", r ? r : "Valid regular expression");
+ printf ("\n Compiled pattern before matching: ");
+ print_compiled_pattern (&old_buf);
+ printf ("\n Compiled pattern after matching: ");
+ }
+ else
+ printf ("\n Compiled pattern: ");
+
+ print_compiled_pattern (&nonconst_buf);
+ }
+
+ if (correct_fastmap && (!nonconst_buf.fastmap_accurate || verbose))
+ {
+ printf ("\n The fastmap should have been: ");
+ print_fastmap (correct_fastmap);
+
+ printf ("\n Fastmap: ");
+ print_fastmap (fastmap);
+
+ printf ("\n Compiled pattern before matching: ");
+ print_compiled_pattern (&nonconst_buf);
+ }
+
+ if ((!all_regs_correct || verbose) && correct_regs)
+ {
+ unsigned this_reg;
+ printf ("\n Incorrect registers:");
+
+ for (this_reg = 0; this_reg < regs.num_regs; this_reg++)
+ {
+ if (!regs_correct[this_reg])
+ {
+ printf ("\n Register %d's start was %2d. ", this_reg,
+ regs.start[this_reg]);
+ printf ("\tIt should have been %d.\n",
+ correct_regs->start[this_reg]);
+ printf (" Register %d's end was %2d. ", this_reg,
+ regs.end[this_reg]);
+ printf ("\tIt should have been %d.\n",
+ correct_regs->end[this_reg]);
+ }
+ }
+ }
+ }
+ }
+
+ if (nonconst_buf.buffer != NULL)
+ free (nonconst_buf.buffer);
+
+ if (OK_TO_SEARCH)
+ {
+ free (old_buf.buffer);
+
+ if (correct_regs)
+ free (regs_correct);
+
+ }
+
+ nonconst_buf.buffer = old_buf.buffer = NULL;
+ regs_correct = NULL;
+ regs.start = regs.end = NULL;
+
+} /* general_test */
+
+
+void
+test_search_return (match_start_wanted, pattern, string)
+ int match_start_wanted;
+ const char *pattern;
+ char *string;
+{
+ struct re_pattern_buffer buf;
+ char fastmap[1 << BYTEWIDTH];
+ const char *compile_return;
+ int match_start;
+ static num_times_called = 0;
+
+ num_times_called++;
+ buf.allocated = 1;
+ buf.buffer = xmalloc (buf.allocated);
+
+ assert (pattern != NULL);
+ buf.translate = 0;
+ compile_return = re_compile_pattern (pattern, strlen (pattern), &buf);
+
+ if (compile_return)
+ {
+ printf ("\n\nInvalid pattern in test_match_start:\n");
+ printf ("%s\n", compile_return);
+ }
+ else
+ {
+ buf.fastmap = fastmap;
+ match_start = re_search (&buf, string, strlen (string),
+ 0, strlen (string), 0);
+
+ if (match_start != match_start_wanted)
+ printf ("\nWanted search to start at %d but started at %d.\n",
+ match_start, match_start_wanted);
+ }
+ free (buf.buffer);
+ buf.buffer = NULL;
+}
+
+
+#define SET_FASTMAP() \
+ { \
+ unsigned this_char; \
+ \
+ memset (correct_fastmap, invert, (1 << BYTEWIDTH)); \
+ \
+ for (this_char = 0; this_char < strlen (fastmap_string); this_char++)\
+ correct_fastmap[fastmap_string[this_char]] = !invert; \
+ correct_fastmap['\n'] = match_newline; \
+ }
+
+
+void
+test_fastmap (pat, fastmap_string, invert, match_newline)
+ const char *pat;
+ char *fastmap_string;
+ unsigned invert;
+ unsigned match_newline;
+{
+ char correct_fastmap[(1 << BYTEWIDTH)];
+
+ SET_FASTMAP ();
+ general_test (1, 0, pat, NULL, NULL, -1, 0, -1, correct_fastmap, 0, -1);
+}
+
+
+void
+test_fastmap_search (pat, str, fastmap_string, invert, match_newline,
+ can_be_null, start0, end0)
+ const char *pat;
+ char *str;
+ char *fastmap_string;
+ unsigned invert;
+ unsigned match_newline;
+ int can_be_null;
+ int start0;
+ int end0;
+{
+ char correct_fastmap[(1 << BYTEWIDTH)];
+ struct re_registers correct_regs;
+
+ correct_regs.num_regs = RE_NREGS;
+ correct_regs.start = (int *) xmalloc (RE_NREGS * sizeof (int));
+ correct_regs.end = (int *) xmalloc (RE_NREGS * sizeof (int));
+
+ set_all_registers (start0, end0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, &correct_regs);
+ SET_FASTMAP ();
+ general_test (1, 0, pat, str, NULL, 0, SAFE_STRLEN (str), SAFE_STRLEN (str),
+ correct_fastmap, &correct_regs, can_be_null);
+
+ free (correct_regs.start);
+ free (correct_regs.end);
+}
+
+
+
+
+void
+test_all_registers (pat, str1, str2,
+ start0, end0, start1, end1,
+ start2, end2, start3, end3,
+ start4, end4, start5, end5,
+ start6, end6, start7, end7,
+ start8, end8, start9, end9)
+ char *pat; char *str1; char *str2;
+ int start0; int end0; int start1; int end1;
+ int start2; int end2; int start3; int end3;
+ int start4; int end4; int start5; int end5;
+ int start6; int end6; int start7; int end7;
+ int start8; int end8; int start9; int end9;
+{
+ struct re_registers correct_regs;
+
+ if (omit_register_tests) return;
+
+ correct_regs.num_regs = RE_NREGS;
+ correct_regs.start = (int *) xmalloc (RE_NREGS * sizeof (int));
+ correct_regs.end = (int *) xmalloc (RE_NREGS * sizeof (int));
+
+ set_all_registers (start0, end0, start1, end1, start2, end2, start3, end3,
+ start4, end4, start5, end5, start6, end6, start7, end7,
+ start8, end8, start9, end9, &correct_regs);
+
+ general_test (1, 0, pat, str1, str2, 0,
+ SAFE_STRLEN (str1) + SAFE_STRLEN (str2),
+ SAFE_STRLEN (str1) + SAFE_STRLEN (str2),
+ NULL, &correct_regs, -1);
+
+ free (correct_regs.start);
+ free (correct_regs.end);
+}
+
+
+void
+invalid_pattern (error_code_expected, pattern)
+ int error_code_expected;
+ char *pattern;
+{
+ regex_t pattern_buffer;
+ int cflags
+ = re_syntax_options == RE_SYNTAX_POSIX_EXTENDED
+ || re_syntax_options == RE_SYNTAX_POSIX_MINIMAL_EXTENDED
+ ? REG_EXTENDED : 0;
+
+ test_compile (0, error_code_expected, pattern, &pattern_buffer, cflags);
+}
+
+
+void
+valid_pattern (pattern)
+ char *pattern;
+{
+ regex_t pattern_buffer;
+ int cflags
+ = re_syntax_options == RE_SYNTAX_POSIX_EXTENDED
+ || re_syntax_options == RE_SYNTAX_POSIX_MINIMAL_EXTENDED
+ ? REG_EXTENDED : 0;
+
+ test_compile (1, 0, pattern, &pattern_buffer, cflags);
+}
+
+
+char *
+delimiters_to_ops (source, left_delimiter, right_delimiter)
+ char *source;
+ char left_delimiter;
+ char right_delimiter;
+{
+ static char *answer = NULL;
+ char *tmp = NULL;
+ boolean double_size = false;
+ unsigned source_char;
+ unsigned answer_char = 0;
+
+ assert (source != NULL);
+
+ switch (left_delimiter)
+ {
+ case '(': if (!(re_syntax_options & RE_NO_BK_PARENS))
+ double_size = true;
+ break;
+ case '{': if (!(re_syntax_options & RE_NO_BK_BRACES))
+ double_size = true;
+ break;
+ default: printf ("Found strange delimiter %c in delimiter_to_ops.\n",
+ left_delimiter);
+ printf ("The source was `%s'\n", source);
+ exit (0);
+ }
+
+ if (answer == source)
+ {
+ tmp = (char *) xmalloc (strlen (source) + 1);
+ strcpy (tmp, source);
+ source = tmp;
+ }
+
+ if (answer)
+ {
+ free (answer);
+ answer = NULL;
+ }
+
+ answer = (char *) xmalloc ((double_size
+ ? strlen (source) << 1
+ : strlen (source))
+ + 1);
+ if (!double_size)
+ strcpy (answer, source);
+ else
+ {
+ for (source_char = 0; source_char < strlen (source); source_char++)
+ {
+ if (source[source_char] == left_delimiter
+ || source[source_char] == right_delimiter)
+ answer[answer_char++] = '\\';
+
+ answer[answer_char++] = source[source_char];
+ }
+ answer[answer_char] = 0;
+ }
+
+ return answer;
+}
+
+
+void
+print_pattern_info (pattern, pattern_buffer_ptr)
+ const char *pattern;
+ regex_t *pattern_buffer_ptr;
+{
+ printf (" Pattern: `%s'.\n", pattern);
+ printf (" Compiled pattern: ");
+ print_compiled_pattern (pattern_buffer_ptr);
+}
+
+
+void
+valid_nonposix_pattern (pattern)
+ char *pattern;
+{
+ struct re_pattern_buffer nonconst_buf;
+
+ nonconst_buf.allocated = 0;
+ nonconst_buf.buffer = NULL;
+ nonconst_buf.translate = NULL;
+
+ assert (pattern != NULL);
+
+ if (re_compile_pattern (pattern, strlen (pattern), &nonconst_buf))
+ {
+ printf ("Couldn't compile the pattern.\n");
+ print_pattern_info (pattern, &nonconst_buf);
+ }
+}
+
+
+void
+compile_and_print_pattern (pattern)
+ char *pattern;
+{
+ struct re_pattern_buffer nonconst_buf;
+
+ nonconst_buf.allocated = 0;
+ nonconst_buf.buffer = NULL;
+
+ if (re_compile_pattern (pattern, strlen (pattern), &nonconst_buf))
+ printf ("Couldn't compile the pattern.\n");
+
+ print_pattern_info (pattern, &nonconst_buf);
+}
+
+
+void
+test_case_fold (pattern, string)
+ const char *pattern;
+ char* string;
+{
+ struct re_pattern_buffer nonconst_buf;
+ const char *ret;
+
+ init_pattern_buffer (&nonconst_buf);
+ nonconst_buf.translate = upcase;
+
+ assert (pattern != NULL);
+ ret = re_compile_pattern (pattern, strlen (pattern), &nonconst_buf);
+
+ if (ret)
+ {
+ printf ("\nShould have been a valid pattern but wasn't.\n");
+ print_pattern_info (pattern, &nonconst_buf);
+ }
+ else
+ {
+ if (test_should_match
+ && re_match (&nonconst_buf, string, strlen (string), 0, 0)
+ != strlen (string))
+ {
+ printf ("Match failed for case fold.\n");
+ printf (" Pattern: `%s'.\n", pattern);
+ printf (" String: `%s'.\n", string == NULL ? "NULL" : string);
+ }
+ }
+}
+
+
+void
+test_match_n_times (n, pattern, string)
+ unsigned n;
+ char* pattern;
+ char* string;
+{
+ struct re_pattern_buffer buf;
+ const char *r;
+ unsigned match = 0;
+ unsigned this_match;
+
+ buf.allocated = 0;
+ buf.buffer = NULL;
+ buf.translate = 0;
+
+ assert (pattern != NULL);
+
+ r = re_compile_pattern (pattern, strlen (pattern), &buf);
+ if (r)
+ {
+ printf ("Didn't compile.\n");
+ printf (" Pattern: %s.\n", pattern);
+ }
+ else
+ {
+ for (this_match = 1; this_match <= n; this_match++)
+ match = (re_match (&buf, string, strlen (string),
+ 0, 0)
+ == strlen (string));
+
+ if (match && !test_should_match)
+ printf ("\n\nMatched but shouldn't have:\n");
+ else if (!match && test_should_match)
+ printf ("\n\nDidn't match but should have:\n");
+
+ if ((match && !test_should_match) || (!match && test_should_match))
+ {
+ printf(" The string to match was: ");
+ if (string)
+ printf ("`%s' and ", string);
+ else
+ printf ("`'");
+
+ printf (" Pattern: %s.\n", pattern);
+ printf (" Compiled pattern: %s.\n", pattern);
+ print_compiled_pattern (&buf);
+ }
+ }
+}
+
+
+void
+test_match_2 (pat, str1, str2)
+ const char *pat;
+ char *str1;
+ char *str2;
+{
+ general_test (1, 1, pat, str1, str2, 0, 1,
+ SAFE_STRLEN (str1) + SAFE_STRLEN (str2), NULL, 0, -1);
+}
+
+void
+test_match (pat, str)
+ const char *pat;
+ char *str;
+{
+ test_match_2 (pat, str, NULL);
+ test_match_2 (pat, NULL, str);
+}
diff --git a/gnu/lib/libregex/test/test.h b/gnu/lib/libregex/test/test.h
new file mode 100644
index 0000000..fb67126
--- /dev/null
+++ b/gnu/lib/libregex/test/test.h
@@ -0,0 +1,141 @@
+/* test.h: for Regex testing. */
+
+#ifndef TEST_H
+#define TEST_H
+
+#include <stdio.h>
+#include <assert.h>
+
+#include <sys/types.h>
+#include "regex.h"
+
+
+/* A strlen that works even on a null pointer. */
+#define SAFE_STRLEN(s) (s == NULL ? 0 : strlen (s))
+
+typedef enum { false = 0, true = 1 } boolean;
+
+extern boolean test_should_match;
+extern boolean omit_register_tests;
+extern void *xmalloc ();
+
+/* Defined in upcase.c. */
+extern char upcase[];
+
+typedef enum
+{
+ all_test,
+ other_test,
+ posix_basic_test,
+ posix_extended_test,
+ posix_interface_test,
+ regress_test
+} test_type;
+
+extern test_type t;
+
+
+#if __STDC__
+
+extern char *concat (char *, char *);
+
+extern void general_test (unsigned pattern_should_be_valid,
+ unsigned match_whole_string,
+ const char *pat, char *str1, char *str2,
+ int start, int range, int end,
+ char *correct_fastmap,
+ struct re_registers *correct_regs, int can_be_null);
+
+
+extern void init_pattern_buffer (regex_t *pattern_buffer_ptr);
+
+extern void test_compile (unsigned valid_pattern, int error_code_expected,
+ const char *pattern, regex_t *pattern_buffer_ptr,
+ int cflags);
+
+extern char *delimiter_to_ops (char *source, char left_delimiter,
+ char right_delimiter);
+
+
+extern void test_search_return (int, const char *, char *);
+
+extern void test_berk_search (const char *pattern, char *string);
+
+extern void test_fastmap (const char *pat, char *fastmap_string, unsigned invert,
+ unsigned match_newline);
+
+extern void test_fastmap_search (const char *pat, char *str, char *fastmap_string,
+ unsigned invert, unsigned match_newline,
+ int can_be_null, int start0, int end0);
+
+extern void test_all_registers (char *pat, char *str1, char *str2,
+ int start0, int end0, int start1, int end1,
+ int start2, int end2, int start3, int end3,
+ int start4, int end4, int start5, int end5,
+ int start6, int end6, int start7, int end7,
+ int start8, int end8, int start9, int end9);
+
+extern void print_pattern_info (const char *pattern, regex_t *pattern_buffer_ptr);
+extern void compile_and_print_pattern (char *pattern);
+
+extern void test_case_fold (const char *pattern, char* string);
+
+extern void test_posix_generic ();
+
+extern void test_grouping ();
+
+extern void invalid_pattern (int error_code_expected, char *pattern);
+extern void valid_nonposix_pattern (char *pattern);
+extern void valid_pattern (char *pattern);
+
+extern void test_match_2 (const char *pat, char *str1, char *str2);
+extern void test_match (const char *pat, char *str);
+
+#endif /* __STDC__ */
+
+
+#define TEST_REGISTERS_2(pat, str1, str2, start0, end0, start1, end1, start2, end2)\
+ if (!omit_register_tests) \
+ test_all_registers (pat, str1, str2, start0, end0, start1, end1, \
+ start2, end2, -1, -1, -1, -1, -1, -1, -1, -1,\
+ -1, -1, -1, -1, -1, -1) \
+
+
+#define TEST_REGISTERS(pat, str, start0, end0, start1, end1, start2, end2) \
+ TEST_REGISTERS_2 (pat, str, NULL, start0, end0, start1, end1, start2, end2)\
+
+#define BRACES_TO_OPS(string) ((char *) delimiters_to_ops (string, '{', '}'))
+#define PARENS_TO_OPS(string) ((char *) delimiters_to_ops (string, '(', ')'))
+
+#define INVALID_PATTERN(pat) \
+ general_test (0, 0, pat, NULL, NULL, -1, 0, -1, NULL, 0, -1)
+
+
+#define MATCH_SELF(p) test_match (p, p)
+
+#define TEST_POSITIONED_MATCH(pat, str, start) \
+ general_test (1, 0, pat, str, NULL, start, 1, SAFE_STRLEN (str), \
+ NULL, 0, -1)
+
+#define TEST_TRUNCATED_MATCH(pat, str, end) \
+ general_test (1, 0, pat, str, NULL, 0, 1, end, NULL, 0, -1)
+
+#define TEST_SEARCH_2(pat, str1, str2, start, range, one_past_end) \
+ general_test (1, 0, pat, str1, str2, start, range, one_past_end, \
+ NULL, 0, -1)
+
+#define TEST_SEARCH(pat, str, start, range) \
+ { \
+ TEST_SEARCH_2 (pat, str, NULL, start, range, SAFE_STRLEN (str)); \
+ TEST_SEARCH_2 (pat, NULL, str, start, range, SAFE_STRLEN (str)); \
+ }
+
+#endif /* TEST_H */
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/test/tregress.c b/gnu/lib/libregex/test/tregress.c
new file mode 100644
index 0000000..7858cac
--- /dev/null
+++ b/gnu/lib/libregex/test/tregress.c
@@ -0,0 +1,464 @@
+/* tregress.c: reported bugs. The `t' just makes the filename not have
+ a common prefix with `regex.c', so completion works better. */
+
+#include "test.h"
+
+
+boolean pause_at_error = true;
+
+char *
+itoa (i)
+ int i;
+{
+ char *a = xmalloc (21); /* sign + 19 digits (enough for 64 bits) + null */
+
+ sprintf (a, "%d", i);
+ return a;
+}
+
+
+static void
+simple_fail (routine, pat, buf, str, ret)
+ const char *routine;
+ const char *pat;
+ struct re_pattern_buffer *buf;
+ const char *str;
+ char *ret;
+{
+ fprintf (stderr, "Failed %s (return = %s).\n", routine, ret);
+ if (str && *str) fprintf (stderr, " String = %s\n", str);
+ fprintf (stderr, " Pattern = %s\n", pat);
+ print_compiled_pattern (buf);
+
+ if (pause_at_error)
+ {
+ fprintf (stderr, "RET to continue: ");
+ (void) getchar ();
+ }
+}
+
+
+/* Abbreviate the most common calls. */
+
+static void
+simple_compile (pat, buf)
+ const char *pat;
+ struct re_pattern_buffer *buf;
+{
+ const char *ret = re_compile_pattern (pat, strlen (pat), buf);
+
+ if (ret != NULL) simple_fail ("compile", pat, buf, NULL, ret);
+}
+
+
+static void
+simple_fastmap (pat)
+ const char *pat;
+{
+ struct re_pattern_buffer buf;
+ char fastmap[256];
+ int ret;
+
+ buf.allocated = 0;
+ buf.buffer = buf.translate = NULL;
+ buf.fastmap = fastmap;
+
+ simple_compile (pat, &buf);
+
+ ret = re_compile_fastmap (&buf);
+
+ if (ret != 0) simple_fail ("fastmap compile", pat, &buf, NULL, itoa (ret));
+}
+
+
+#define SIMPLE_MATCH(pat, str) do_match (pat, str, strlen (str))
+#define SIMPLE_NONMATCH(pat, str) do_match (pat, str, -1)
+
+static void
+do_match (pat, str, expected)
+ const char *pat, *str;
+ int expected;
+{
+ int ret;
+ unsigned len;
+ struct re_pattern_buffer buf;
+
+ buf.allocated = 0;
+ buf.buffer = buf.translate = buf.fastmap = NULL;
+
+ simple_compile (pat, &buf);
+
+ len = strlen (str);
+
+ ret = re_match_2 (&buf, NULL, 0, str, len, 0, NULL, len);
+
+ if (ret != expected) simple_fail ("match", pat, &buf, str, itoa (ret));
+}
+
+
+static void
+simple_search (pat, str, correct_startpos)
+ const char *pat, *str;
+ int correct_startpos;
+{
+ int ret;
+ unsigned len;
+ struct re_pattern_buffer buf;
+
+ buf.allocated = 0;
+ buf.buffer = buf.translate = buf.fastmap = NULL;
+
+ simple_compile (pat, &buf);
+
+ len = strlen (str);
+
+ ret = re_search_2 (&buf, NULL, 0, str, len, 0, len, NULL, len);
+
+ if (ret != correct_startpos)
+ simple_fail ("match", pat, &buf, str, itoa (ret));
+}
+
+/* Past bugs people have reported. */
+
+void
+test_regress ()
+{
+ extern char upcase[];
+ struct re_pattern_buffer buf;
+ unsigned len;
+ struct re_registers regs;
+ int ret;
+ char *fastmap = xmalloc (256);
+
+ buf.translate = NULL;
+ buf.fastmap = NULL;
+ buf.allocated = 0;
+ buf.buffer = NULL;
+
+ printf ("\nStarting regression tests.\n");
+ t = regress_test;
+
+ test_should_match = true;
+ re_set_syntax (RE_SYNTAX_EMACS);
+
+ /* enami@sys.ptg.sony.co.jp 10 Nov 92 15:19:02 JST */
+ buf.translate = upcase;
+ SIMPLE_MATCH ("[A-[]", "A");
+ buf.translate = NULL;
+
+ /* meyering@cs.utexas.edu Nov 6 22:34:41 1992 */
+ simple_search ("\\w+", "a", 0);
+
+ /* jimb@occs.cs.oberlin.edu 10 Sep 92 00:42:33 */
+ buf.translate = upcase;
+ SIMPLE_MATCH ("[\001-\377]", "\001");
+ SIMPLE_MATCH ("[\001-\377]", "a");
+ SIMPLE_MATCH ("[\001-\377]", "\377");
+ buf.translate = NULL;
+
+ /* mike@skinner.cs.uoregon.edu 1 Sep 92 01:45:22 */
+ SIMPLE_MATCH ("^^$", "^");
+
+ /* pclink@qld.tne.oz.au Sep 7 22:42:36 1992 */
+ re_set_syntax (RE_INTERVALS);
+ SIMPLE_MATCH ("^a\\{3\\}$", "aaa");
+ SIMPLE_NONMATCH ("^a\\{3\\}$", "aa");
+ re_set_syntax (RE_SYNTAX_EMACS);
+
+ /* pclink@qld.tne.oz.au, 31 Aug 92. (conjecture) */
+ re_set_syntax (RE_INTERVALS);
+ simple_search ("a\\{1,3\\}b", "aaab", 0);
+ simple_search ("a\\{1,3\\}b", "aaaab", 1);
+ re_set_syntax (RE_SYNTAX_EMACS);
+
+ /* trq@dionysos.thphys.ox.ac.uk, 31 Aug 92. (simplified) */
+ simple_fastmap ("^.*\n[ ]*");
+
+ /* wind!greg@plains.NoDak.edu, 25 Aug 92. (simplified) */
+ re_set_syntax (RE_INTERVALS);
+ SIMPLE_MATCH ("[a-zA-Z]*.\\{5\\}", "xN0000");
+ SIMPLE_MATCH ("[a-zA-Z]*.\\{5\\}$", "systemxN0000");
+ SIMPLE_MATCH ("\\([a-zA-Z]*\\).\\{5\\}$", "systemxN0000");
+ re_set_syntax (RE_SYNTAX_EMACS);
+
+ /* jimb, 18 Aug 92. Don't use \000, so `strlen' (in our testing
+ routines) will work. (This still tickles the bug jimb reported.) */
+ SIMPLE_MATCH ("[\001-\377]", "\001");
+ SIMPLE_MATCH ("[\001-\377]", "a");
+ SIMPLE_MATCH ("[\001-\377]", "\377");
+
+ /* jimb, 13 Aug 92. */
+ SIMPLE_MATCH ("[\001-\177]", "\177");
+
+ /* Tests based on bwoelfel's below. */
+ SIMPLE_MATCH ("\\(a\\|ab\\)*", "aab");
+ SIMPLE_MATCH ("\\(a\\|ab\\)+", "aab");
+ SIMPLE_MATCH ("\\(a*\\|ab\\)+", "aab");
+ SIMPLE_MATCH ("\\(a+\\|ab\\)+", "aab");
+ SIMPLE_MATCH ("\\(a?\\|ab\\)+", "aab");
+
+ /* bwoelfel@widget.seas.upenn.edu, 25 Jul 92. */
+ SIMPLE_MATCH ("^\\([ab]+\\|bc\\)+", "abc");
+
+ /* jla, 3 Jul 92. Core dump in re_search_2. */
+ buf.fastmap = fastmap;
+ buf.translate = upcase;
+#define DATEDUMP_PATTERN " *[0-9]*:"
+ if (re_compile_pattern (DATEDUMP_PATTERN, strlen (DATEDUMP_PATTERN), &buf)
+ != NULL)
+ printf ("date dump compile failed.\n");
+ regs.num_regs = 0;
+ regs.start = regs.end = NULL;
+ if (re_search_2 (&buf, NULL, 0, "Thu Jul 2 18:34:18 1992",
+ 24, 3, 21, &regs, 24) != 10)
+ printf ("date dump search failed.\n");
+ buf.fastmap = 0;
+ buf.translate = 0;
+
+
+ /* rms, 4 Jul 1992. Pattern is much slower in Emacs 19. Fastmap
+ should be only a backslash. */
+#define BEGINEND_PATTERN "\\(\\\\begin\\s *{\\)\\|\\(\\\\end\\s *{\\)"
+ test_fastmap (BEGINEND_PATTERN, "\\", false, 0);
+
+
+ /* kaoru@is.s.u-tokyo.ac.jp, 27 Jun 1992. Code for [a-z] (in regex.c)
+ should translate the whole set. */
+ buf.translate = upcase;
+#define CASE_SET_PATTERN "[ -`]"
+ if (re_compile_pattern (CASE_SET_PATTERN, strlen (CASE_SET_PATTERN), &buf)
+ != NULL)
+ printf ("case set compile failed.\n");
+ if (re_match_2 (&buf, "K", 1, "", 0, 0, NULL, 1) != 1)
+ printf ("case set match failed.\n");
+
+#define CASE_SET_PATTERN2 "[`-|]"
+ if (re_compile_pattern (CASE_SET_PATTERN2, strlen (CASE_SET_PATTERN2), &buf)
+ != NULL)
+ printf ("case set2 compile failed.\n");
+ if (re_match_2 (&buf, "K", 1, "", 0, 0, NULL, 1) != 1)
+ printf ("case set2 match failed.\n");
+
+ buf.translate = NULL;
+
+
+ /* jimb, 27 Jun 92. Problems with gaps in the string. */
+#define GAP_PATTERN "x.*y.*z"
+ if (re_compile_pattern (GAP_PATTERN, strlen (GAP_PATTERN), &buf) != NULL)
+ printf ("gap didn't compile.\n");
+ if (re_match_2 (&buf, "x-", 2, "y-z-", 4, 0, NULL, 6) != 5)
+ printf ("gap match failed.\n");
+
+
+ /* jimb, 19 Jun 92. Since `beginning of word' matches at the
+ beginning of the string, then searching ought to find it there.
+ If `re_compile_fastmap' is not called, then it works ok. */
+ buf.fastmap = fastmap;
+#define BOW_BEG_PATTERN "\\<"
+ if (re_compile_pattern (BOW_BEG_PATTERN, strlen (BOW_BEG_PATTERN), &buf)
+ != NULL)
+ printf ("begword-begstring didn't compile.\n");
+ if (re_search (&buf, "foo", 3, 0, 3, NULL) != 0)
+ printf ("begword-begstring search failed.\n");
+
+ /* Same bug report, different null-matching pattern. */
+#define EMPTY_ANCHOR_PATTERN "^$"
+ if (re_compile_pattern (EMPTY_ANCHOR_PATTERN, strlen (EMPTY_ANCHOR_PATTERN),
+ &buf) != NULL)
+ printf ("empty anchor didn't compile.\n");
+ if (re_search (&buf, "foo\n\nbar", 8, 0, 8, NULL) != 4)
+ printf ("empty anchor search failed.\n");
+
+ /* jimb@occs.cs.oberlin.edu, 21 Apr 92. After we first allocate
+ registers for a particular re_pattern_buffer, we might have to
+ reallocate more registers on subsequent calls -- and we should be
+ reusing the same memory. */
+#define ALLOC_REG_PATTERN "\\(abc\\)"
+ free (buf.fastmap);
+ buf.fastmap = 0;
+ if (re_compile_pattern (ALLOC_REG_PATTERN, strlen (ALLOC_REG_PATTERN), &buf)
+ != NULL)
+ printf ("register allocation didn't compile.\n");
+ if (re_match (&buf, "abc", 3, 0, &regs) != 3)
+ printf ("register allocation didn't match.\n");
+ if (regs.start[1] != 0 || regs.end[1] != 3)
+ printf ("register allocation reg #1 wrong.\n");
+
+ {
+ int *old_regstart = regs.start;
+ int *old_regend = regs.end;
+
+ if (re_match (&buf, "abc", 3, 0, &regs) != 3)
+ printf ("register reallocation didn't match.\n");
+ if (regs.start[1] != 0 || regs.end[1] != 3
+ || old_regstart[1] != 0 || old_regend[1] != 3
+ || regs.start != old_regstart || regs.end != old_regend)
+ printf ("register reallocation registers wrong.\n");
+ }
+
+ /* jskudlarek@std.MENTORG.COM, 21 Apr 92 (string-match). */
+#define JSKUD_PATTERN "[^/]+\\(/[^/.]+\\)?/[0-9]+$"
+ if (re_compile_pattern (JSKUD_PATTERN, strlen (JSKUD_PATTERN), &buf) != NULL)
+ printf ("jskud test didn't compile.\n");
+ if (re_search (&buf, "a/1", 3, 0, 3, &regs) != 0)
+ printf ("jskud test didn't match.\n");
+ if (regs.start[1] != -1 || regs.end[1] != -1)
+ printf ("jskud test, reg #1 wrong.\n");
+
+ /* jla's bug (with string-match), 5 Feb 92. */
+ TEST_SEARCH ("\\`[ \t\n]*", "jla@challenger (Joseph Arceneaux)", 0, 100);
+
+ /* jwz@lucid.com, 8 March 1992 (re-search-forward). (His is the
+ second.) These are not supposed to match. */
+#if 0
+ /* This one fails quickly, because we can change the maybe_pop_jump
+ from the + to a pop_failure_pop, because of the c's. */
+ TEST_SEARCH ("^\\(To\\|CC\\):\\([^c]*\\)+co",
+"To: hbs%titanic@lucid.com (Harlan Sexton)\n\
+Cc: eb@thalidomide, jlm@thalidomide\n\
+Subject: Re: so is this really as horrible an idea as it seems to me?\n\
+In-Reply-To: Harlan Sexton's message of Sun 8-Mar-92 11:00:06 PST <9203081900.AA24794@titanic.lucid>\n\
+References: <9203080736.AA05869@thalidomide.lucid>\n\
+ <9203081900.AA24794@titanic.lucid>", 0, 5000);
+
+ /* This one takes a long, long time to complete, because we have to
+ keep the failure points around because we might backtrack. */
+ TEST_SEARCH ("^\\(To\\|CC\\):\\(.*\n.*\\)+co",
+ /* "X-Windows: The joke that kills.\n\
+FCC: /u/jwz/VM/inbox\n\
+From: Jamie Zawinski <jwz@lucid.com>\n\ */
+"To: hbs%titanic@lucid.com (Harlan Sexton)\n\
+Cc: eb@thalidomide, jlm@thalidomide\n\
+Subject: Re: so is this really as horrible an idea as it seems to me?\n\
+In-Reply-To: Harlan Sexton's message of Sun 8-Mar-92 11:00:06 PST <9203081900.AA24794@titanic.lucid>\n\
+References: <9203080736.AA05869@thalidomide.lucid>\n\
+ <9203081900.AA24794@titanic.lucid>", 0, 5000);
+#endif /* 0 [failed searches] */
+
+
+ /* macrakis' bugs. */
+ buf.translate = upcase; /* message of 24 Jan 91 */
+ if (re_compile_pattern ("[!-`]", 5, &buf) != NULL)
+ printf ("Range test didn't compile.\n");
+ if (re_match (&buf, "A", 1, 0, NULL) != 1)
+ printf ("Range test #1 didn't match.\n");
+ if (re_match (&buf, "a", 1, 0, NULL) != 1)
+ printf ("Range test #2 didn't match.\n");
+
+ buf.translate = 0;
+#define FAO_PATTERN "\\(f\\(.\\)o\\)+"
+ if (re_compile_pattern (FAO_PATTERN, strlen (FAO_PATTERN), &buf) != NULL)
+ printf ("faofdx test didn't compile.\n");
+ if (re_search (&buf, "faofdx", 6, 0, 6, &regs) != 0)
+ printf ("faofdx test didn't match.\n");
+ if (regs.start[1] != 0 || regs.end[1] != 3)
+ printf ("faofdx test, reg #1 wrong.\n");
+ if (regs.start[2] != 1 || regs.end[2] != 2)
+ printf ("faofdx test, reg #2 wrong.\n");
+
+ TEST_REGISTERS ("\\(a\\)*a", "aaa", 0, 3, 1, 2, -1, -1);
+ test_fastmap ("^\\([^ \n]+:\n\\)+\\([^ \n]+:\\)", " \n", 1, 0);
+
+ /* 40 lines, 48 a's in each line. */
+ test_match ("^\\([^ \n]+:\n\\)+\\([^ \n]+:\\)",
+ "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:\n\
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:");
+
+ /* 640 a's followed by one b, twice. */
+ test_match ("\\(.*\\)\\1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab");
+
+ /* 640 a's followed by two b's, twice. */
+ test_match ("\\(.*\\)\\1", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabb");
+
+
+ /* Dave G. bug: Reference to a subexpression which didn't match.
+ Should fail. */
+ re_set_syntax (RE_NO_BK_PARENS | RE_NO_BK_VBAR);
+ test_match ("(ooooooooooone())-annnnnnnnnnnd-(twooooooooooo\\2)",
+ "ooooooooooone-annnnnnnnnnnd-twooooooooooo");
+ test_match ("(o|t)", "o");
+ test_match ("(o()|t)", "o");
+ test_match ("(o|t)", "o");
+ test_match ("(ooooooooooooooo|tttttttttttttttt())", "ooooooooooooooo");
+ test_match ("(o|t())", "o");
+ test_match ("(o()|t())", "o");
+ test_match ("(ooooooooooooooooooooooooone()|twooooooooooooooooooooooooo())", "ooooooooooooooooooooooooone");
+ test_match ("(o()|t())-a-(t\\2|f\\3)", "o-a-t");
+ test_match ("(o()|t())-a-(t\\2|f\\3)", "t-a-f");
+
+ test_should_match = 0;
+ test_match ("(foo(bar)|second)\\2", "second");
+ test_match ("(o()|t())-a-(t\\2|f\\3)", "t-a-t");
+ test_match ("(o()|t())-a-(t\\2|f\\3)", "o-a-f");
+
+ re_set_syntax (RE_SYNTAX_EMACS);
+ test_match ("\\(foo\\(bar\\)\\|second\\)\\2", "secondbar");
+ test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)",
+ "one-and-four");
+ test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)",
+ "two-and-three");
+
+ test_should_match = 1;
+ re_set_syntax (RE_SYNTAX_EMACS);
+ test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)",
+ "one-and-three");
+ test_match ("\\(one\\(\\)\\|two\\(\\)\\)-and-\\(three\\2\\|four\\3\\)",
+ "two-and-four");
+
+ TEST_REGISTERS (":\\(.*\\)", ":/", 0, 2, 1, 2, -1, -1);
+
+ /* Bug with `upcase' translation table, from Nico Josuttis
+ <nico@bredex.de> */
+ test_should_match = 1;
+ test_case_fold ("[a-a]", "a");
+
+ printf ("\nFinished regression tests.\n");
+}
+
+
+
+/*
+Local variables:
+make-backup-files: t
+version-control: t
+trim-versions-without-asking: nil
+End:
+*/
diff --git a/gnu/lib/libregex/test/upcase.c b/gnu/lib/libregex/test/upcase.c
new file mode 100644
index 0000000..5147b81
--- /dev/null
+++ b/gnu/lib/libregex/test/upcase.c
@@ -0,0 +1,39 @@
+/* Indexed by a character, gives the upper case equivalent of the
+ character. */
+
+char upcase[0400] =
+ { 000, 001, 002, 003, 004, 005, 006, 007,
+ 010, 011, 012, 013, 014, 015, 016, 017,
+ 020, 021, 022, 023, 024, 025, 026, 027,
+ 030, 031, 032, 033, 034, 035, 036, 037,
+ 040, 041, 042, 043, 044, 045, 046, 047,
+ 050, 051, 052, 053, 054, 055, 056, 057,
+ 060, 061, 062, 063, 064, 065, 066, 067,
+ 070, 071, 072, 073, 074, 075, 076, 077,
+ 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+ 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+ 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+ 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
+ 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
+ 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
+ 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
+ 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
+ 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
+ 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
+ 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
+ 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
+ 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
+ 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
+ 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
+ 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
+ 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
+ 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
+ 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
+ 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
+ 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
+ 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
+ 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
+ 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
+ };
+
+
diff --git a/gnu/lib/libregex/test/xmalloc.c b/gnu/lib/libregex/test/xmalloc.c
new file mode 100644
index 0000000..88be1a6
--- /dev/null
+++ b/gnu/lib/libregex/test/xmalloc.c
@@ -0,0 +1,21 @@
+#include <stdio.h>
+extern char *malloc ();
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+void *
+xmalloc (size)
+ unsigned size;
+{
+ char *new_mem = malloc (size);
+
+ if (new_mem == NULL)
+ {
+ fprintf (stderr, "xmalloc: request for %u bytes failed.\n", size);
+ abort ();
+ }
+
+ return new_mem;
+}
OpenPOWER on IntegriCloud