summaryrefslogtreecommitdiffstats
path: root/gnu/lib/libregex
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/lib/libregex')
-rw-r--r--gnu/lib/libregex/doc/Makefile.in92
-rw-r--r--gnu/lib/libregex/doc/regex.aux136
-rw-r--r--gnu/lib/libregex/doc/regex.cps152
-rw-r--r--gnu/lib/libregex/doc/regex.info2836
-rw-r--r--gnu/lib/libregex/doc/regex.texi3138
-rw-r--r--gnu/lib/libregex/test/TAGS373
6 files changed, 0 insertions, 6727 deletions
diff --git a/gnu/lib/libregex/doc/Makefile.in b/gnu/lib/libregex/doc/Makefile.in
deleted file mode 100644
index 2f5d382..0000000
--- a/gnu/lib/libregex/doc/Makefile.in
+++ /dev/null
@@ -1,92 +0,0 @@
-# Makefile for regex documentation.
-#
-# Copyright (C) 1992 Free Software Foundation, Inc.
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2, or (at your option)
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-# Installation directories.
-prefix = /usr/local
-infodir = $(prefix)/info
-
-srcdir = @srcdir@
-VPATH = @srcdir@:../@srcdir@
-
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-
-MAKEINFO = makeinfo --no-split
-SHELL = /bin/sh
-TEX = tex
-TEXINDEX = texindex
-
-default all: regex.info regex.dvi
-.PHONY: default all
-
-# We need to include some code from regex.h.
-regex.texi: xregex.texi
- rm -f $@
- gawk -f include.awk -vsource=../$(srcdir)/regex.h \
- <../$(srcdir)/doc/xregex.texi \
- | expand >$@
- chmod a-w $@
-
-regex.dvi: regex.cps
- $(TEX) regex.texi
-regex.cps: regex.cp
- $(TEXINDEX) regex.??
-regex.cp: regex.texi
- $(TEX) ../$(srcdir)/doc/regex.texi
-
-regex.info: regex.texi
- $(MAKEINFO) ../$(srcdir)/doc/regex.texi
-
-# I know of no way to make a good TAGS file from Texinfo source.
-TAGS:
-
-check:
-.PHONY: check
-
-install: regex.info
- -mkdir $(prefix) $(infodir)
- for i in *.info*; do $(INSTALL_DATA) $$i $(infodir)/$$i; done
-.PHONY: install
-
-clean mostlyclean:
- rm -f regex.?? *.dvi *.log *.toc
-
-distclean: clean
- rm -f Makefile
- for f in regex.??s; do if test -z "`cat $$f`"; then rm -f $$f; fi; done
-
-realclean: distclean
- rm -f *.info* regex.??? regex.texi TAGS
-
-extraclean: distclean
- rm -f patch* *~* *\#* *.orig *.rej *.bak core a.out
-.PHONY: mostlyclean clean distclean realclean extraclean
-
-Makefile: Makefile.in ../config.status
- (cd ..; sh config.status)
-
-# Prevent GNU make 3 from overflowing arg limit on system V.
-.NOEXPORT:
-
-# Assumes $(distdir) is the place to put our files.
-distfiles = Makefile.in *.texi texinfo.tex include.awk \
- regex.info* regex.aux regex.cps
-dist: Makefile regex.info regex.cps
- mkdir $(distdir)
- ln $(distfiles) $(distdir)
-.PHONY: dist
diff --git a/gnu/lib/libregex/doc/regex.aux b/gnu/lib/libregex/doc/regex.aux
deleted file mode 100644
index fd6a245..0000000
--- a/gnu/lib/libregex/doc/regex.aux
+++ /dev/null
@@ -1,136 +0,0 @@
-'xrdef {Overview-pg}{1}
-'xrdef {Overview-snt}{Chapter'tie1}
-'xrdef {Regular Expression Syntax-pg}{2}
-'xrdef {Regular Expression Syntax-snt}{Chapter'tie2}
-'xrdef {Syntax Bits-pg}{2}
-'xrdef {Syntax Bits-snt}{Section'tie2.1}
-'xrdef {Predefined Syntaxes-pg}{5}
-'xrdef {Predefined Syntaxes-snt}{Section'tie2.2}
-'xrdef {Collating Elements vs. Characters-pg}{6}
-'xrdef {Collating Elements vs. Characters-snt}{Section'tie2.3}
-'xrdef {The Backslash Character-pg}{7}
-'xrdef {The Backslash Character-snt}{Section'tie2.4}
-'xrdef {Common Operators-pg}{9}
-'xrdef {Common Operators-snt}{Chapter'tie3}
-'xrdef {Match-self Operator-pg}{9}
-'xrdef {Match-self Operator-snt}{Section'tie3.1}
-'xrdef {Match-any-character Operator-pg}{9}
-'xrdef {Match-any-character Operator-snt}{Section'tie3.2}
-'xrdef {Concatenation Operator-pg}{10}
-'xrdef {Concatenation Operator-snt}{Section'tie3.3}
-'xrdef {Repetition Operators-pg}{10}
-'xrdef {Repetition Operators-snt}{Section'tie3.4}
-'xrdef {Match-zero-or-more Operator-pg}{10}
-'xrdef {Match-zero-or-more Operator-snt}{Section'tie3.4.1}
-'xrdef {Match-one-or-more Operator-pg}{11}
-'xrdef {Match-one-or-more Operator-snt}{Section'tie3.4.2}
-'xrdef {Match-zero-or-one Operator-pg}{11}
-'xrdef {Match-zero-or-one Operator-snt}{Section'tie3.4.3}
-'xrdef {Interval Operators-pg}{12}
-'xrdef {Interval Operators-snt}{Section'tie3.4.4}
-'xrdef {Alternation Operator-pg}{13}
-'xrdef {Alternation Operator-snt}{Section'tie3.5}
-'xrdef {List Operators-pg}{13}
-'xrdef {List Operators-snt}{Section'tie3.6}
-'xrdef {Character Class Operators-pg}{14}
-'xrdef {Character Class Operators-snt}{Section'tie3.6.1}
-'xrdef {Range Operator-pg}{15}
-'xrdef {Range Operator-snt}{Section'tie3.6.2}
-'xrdef {Grouping Operators-pg}{16}
-'xrdef {Grouping Operators-snt}{Section'tie3.7}
-'xrdef {Back-reference Operator-pg}{17}
-'xrdef {Back-reference Operator-snt}{Section'tie3.8}
-'xrdef {Anchoring Operators-pg}{18}
-'xrdef {Anchoring Operators-snt}{Section'tie3.9}
-'xrdef {Match-beginning-of-line Operator-pg}{18}
-'xrdef {Match-beginning-of-line Operator-snt}{Section'tie3.9.1}
-'xrdef {Match-end-of-line Operator-pg}{18}
-'xrdef {Match-end-of-line Operator-snt}{Section'tie3.9.2}
-'xrdef {GNU Operators-pg}{20}
-'xrdef {GNU Operators-snt}{Chapter'tie4}
-'xrdef {Word Operators-pg}{20}
-'xrdef {Word Operators-snt}{Section'tie4.1}
-'xrdef {Non-Emacs Syntax Tables-pg}{20}
-'xrdef {Non-Emacs Syntax Tables-snt}{Section'tie4.1.1}
-'xrdef {Match-word-boundary Operator-pg}{20}
-'xrdef {Match-word-boundary Operator-snt}{Section'tie4.1.2}
-'xrdef {Match-within-word Operator-pg}{20}
-'xrdef {Match-within-word Operator-snt}{Section'tie4.1.3}
-'xrdef {Match-beginning-of-word Operator-pg}{21}
-'xrdef {Match-beginning-of-word Operator-snt}{Section'tie4.1.4}
-'xrdef {Match-end-of-word Operator-pg}{21}
-'xrdef {Match-end-of-word Operator-snt}{Section'tie4.1.5}
-'xrdef {Match-word-constituent Operator-pg}{21}
-'xrdef {Match-word-constituent Operator-snt}{Section'tie4.1.6}
-'xrdef {Match-non-word-constituent Operator-pg}{21}
-'xrdef {Match-non-word-constituent Operator-snt}{Section'tie4.1.7}
-'xrdef {Buffer Operators-pg}{21}
-'xrdef {Buffer Operators-snt}{Section'tie4.2}
-'xrdef {Match-beginning-of-buffer Operator-pg}{21}
-'xrdef {Match-beginning-of-buffer Operator-snt}{Section'tie4.2.1}
-'xrdef {Match-end-of-buffer Operator-pg}{21}
-'xrdef {Match-end-of-buffer Operator-snt}{Section'tie4.2.2}
-'xrdef {GNU Emacs Operators-pg}{22}
-'xrdef {GNU Emacs Operators-snt}{Chapter'tie5}
-'xrdef {Syntactic Class Operators-pg}{22}
-'xrdef {Syntactic Class Operators-snt}{Section'tie5.1}
-'xrdef {Emacs Syntax Tables-pg}{22}
-'xrdef {Emacs Syntax Tables-snt}{Section'tie5.1.1}
-'xrdef {Match-syntactic-class Operator-pg}{22}
-'xrdef {Match-syntactic-class Operator-snt}{Section'tie5.1.2}
-'xrdef {Match-not-syntactic-class Operator-pg}{22}
-'xrdef {Match-not-syntactic-class Operator-snt}{Section'tie5.1.3}
-'xrdef {What Gets Matched?-pg}{23}
-'xrdef {What Gets Matched?-snt}{Chapter'tie6}
-'xrdef {Programming with Regex-pg}{24}
-'xrdef {Programming with Regex-snt}{Chapter'tie7}
-'xrdef {GNU Regex Functions-pg}{24}
-'xrdef {GNU Regex Functions-snt}{Section'tie7.1}
-'xrdef {GNU Pattern Buffers-pg}{24}
-'xrdef {GNU Pattern Buffers-snt}{Section'tie7.1.1}
-'xrdef {GNU Regular Expression Compiling-pg}{26}
-'xrdef {GNU Regular Expression Compiling-snt}{Section'tie7.1.2}
-'xrdef {GNU Matching-pg}{27}
-'xrdef {GNU Matching-snt}{Section'tie7.1.3}
-'xrdef {GNU Searching-pg}{28}
-'xrdef {GNU Searching-snt}{Section'tie7.1.4}
-'xrdef {Matching/Searching with Split Data-pg}{29}
-'xrdef {Matching/Searching with Split Data-snt}{Section'tie7.1.5}
-'xrdef {Searching with Fastmaps-pg}{30}
-'xrdef {Searching with Fastmaps-snt}{Section'tie7.1.6}
-'xrdef {GNU Translate Tables-pg}{31}
-'xrdef {GNU Translate Tables-snt}{Section'tie7.1.7}
-'xrdef {Using Registers-pg}{32}
-'xrdef {Using Registers-snt}{Section'tie7.1.8}
-'xrdef {Freeing GNU Pattern Buffers-pg}{34}
-'xrdef {Freeing GNU Pattern Buffers-snt}{Section'tie7.1.9}
-'xrdef {POSIX Regex Functions-pg}{35}
-'xrdef {POSIX Regex Functions-snt}{Section'tie7.2}
-'xrdef {POSIX Pattern Buffers-pg}{35}
-'xrdef {POSIX Pattern Buffers-snt}{Section'tie7.2.1}
-'xrdef {POSIX Regular Expression Compiling-pg}{35}
-'xrdef {POSIX Regular Expression Compiling-snt}{Section'tie7.2.2}
-'xrdef {POSIX Matching-pg}{37}
-'xrdef {POSIX Matching-snt}{Section'tie7.2.3}
-'xrdef {Reporting Errors-pg}{38}
-'xrdef {Reporting Errors-snt}{Section'tie7.2.4}
-'xrdef {Using Byte Offsets-pg}{39}
-'xrdef {Using Byte Offsets-snt}{Section'tie7.2.5}
-'xrdef {Freeing POSIX Pattern Buffers-pg}{39}
-'xrdef {Freeing POSIX Pattern Buffers-snt}{Section'tie7.2.6}
-'xrdef {BSD Regex Functions-pg}{40}
-'xrdef {BSD Regex Functions-snt}{Section'tie7.3}
-'xrdef {BSD Regular Expression Compiling-pg}{40}
-'xrdef {BSD Regular Expression Compiling-snt}{Section'tie7.3.1}
-'xrdef {BSD Searching-pg}{40}
-'xrdef {BSD Searching-snt}{Section'tie7.3.2}
-'xrdef {Copying-pg}{42}
-'xrdef {Copying-snt}{Appendix'tie'char65{}}
-'xrdef {Copying-pg}{42}
-'xrdef {Copying-snt}{}
-'xrdef {Copying-pg}{43}
-'xrdef {Copying-snt}{}
-'xrdef {Copying-pg}{48}
-'xrdef {Copying-snt}{}
-'xrdef {Index-pg}{50}
-'xrdef {Index-snt}{}
diff --git a/gnu/lib/libregex/doc/regex.cps b/gnu/lib/libregex/doc/regex.cps
deleted file mode 100644
index 8b2e57c..0000000
--- a/gnu/lib/libregex/doc/regex.cps
+++ /dev/null
@@ -1,152 +0,0 @@
-\initial {$}
-\entry {\code {$}}{18}
-\initial {(}
-\entry {\code {(}}{16}
-\initial {)}
-\entry {\code {)}}{16}
-\initial {*}
-\entry {\samp {*}}{10}
-\initial {-}
-\entry {\samp {-}}{13}
-\initial {.}
-\entry {\samp {.}}{9}
-\initial {:}
-\entry {\samp {:]} in regex}{14}
-\initial {?}
-\entry {\samp {?}}{11}
-\initial {[}
-\entry {\samp {[}}{13}
-\entry {\samp {[:} in regex}{14}
-\entry {\samp {[{\tt\hat}}}{13}
-\initial {]}
-\entry {\samp {]}}{13}
-\initial {{\tt\char'173}}
-\entry {\samp {{\tt\char'173}}}{12}
-\initial {{\tt\char'174}}
-\entry {\code {{\tt\char'174}}}{13}
-\initial {{\tt\char'175}}
-\entry {\samp {{\tt\char'175}}}{12}
-\initial {{\tt\char43}}
-\entry {\samp {{\tt\char43}}}{11}
-\initial {{\tt\hat}}
-\entry {\samp {{\tt\hat}}}{13}
-\entry {\code {{\tt\hat}}}{18}
-\initial {{\tt\indexbackslash }}
-\entry {{\tt\indexbackslash }}{7}
-\entry {\samp {{\tt\indexbackslash }}}{13}
-\entry {\samp {{\tt\indexbackslash }'}}{21}
-\entry {\code {{\tt\indexbackslash }(}}{16}
-\entry {\code {{\tt\indexbackslash })}}{16}
-\entry {\samp {{\tt\indexbackslash }`}}{21}
-\entry {\samp {{\tt\indexbackslash }{\tt\char'173}}}{12}
-\entry {\code {{\tt\indexbackslash }{\tt\char'174}}}{13}
-\entry {\samp {{\tt\indexbackslash }{\tt\char'175}}}{12}
-\entry {\samp {{\tt\indexbackslash }{\tt\gtr}}}{21}
-\entry {\samp {{\tt\indexbackslash }{\tt\less}}}{21}
-\entry {\samp {{\tt\indexbackslash }b}}{20}
-\entry {\samp {{\tt\indexbackslash }B}}{20}
-\entry {\samp {{\tt\indexbackslash }s}}{22}
-\entry {\samp {{\tt\indexbackslash }S}}{22}
-\entry {\samp {{\tt\indexbackslash }w}}{21}
-\entry {\samp {{\tt\indexbackslash }W}}{21}
-\initial {A}
-\entry {\code {allocated \r {initialization}}}{26}
-\entry {alternation operator}{13}
-\entry {alternation operator and \samp {{\tt\hat}}}{18}
-\entry {anchoring}{18}
-\entry {anchors}{18}
-\entry {Awk}{5}
-\initial {B}
-\entry {back references}{17}
-\entry {backtracking}{10, 13}
-\entry {beginning-of-line operator}{18}
-\entry {bracket expression}{13}
-\entry {\code {buffer \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
-\entry {\code {buffer \r {initialization}}}{26}
-\initial {C}
-\entry {character classes}{14}
-\initial {E}
-\entry {Egrep}{5}
-\entry {Emacs}{5}
-\entry {end-of-line operator}{18}
-\entry {\code {end\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}
-\initial {F}
-\entry {\code {fastmap \r {initialization}}}{26}
-\entry {\code {fastmap{\_}accurate \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
-\entry {fastmaps}{30}
-\initial {G}
-\entry {Grep}{5}
-\entry {grouping}{16}
-\initial {I}
-\entry {ignoring case}{35}
-\entry {interval expression}{12}
-\initial {M}
-\entry {matching list}{13}
-\entry {matching newline}{13}
-\entry {matching with GNU functions}{27}
-\initial {N}
-\entry {\code {newline{\_}anchor \r {field in pattern buffer}}}{18}
-\entry {nonmatching list}{13}
-\entry {\code {not{\_}bol \r {field in pattern buffer}}}{18}
-\entry {\code {num_regs\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}
-\initial {O}
-\entry {open-group operator and \samp {{\tt\hat}}}{18}
-\entry {or operator}{13}
-\initial {P}
-\entry {parenthesizing}{16}
-\entry {pattern buffer initialization}{26}
-\entry {pattern buffer, definition of}{24}
-\entry {POSIX Awk}{5}
-\initial {R}
-\entry {\code {range \r {argument to \code {re{\_}search}}}}{28}
-\entry {\code {re_registers}}{32}
-\entry {\code {RE{\_}BACKSLASH{\_}ESCAPE{\_}IN{\_}LIST}}{3}
-\entry {\code {RE{\_}BK{\_}PLUS{\_}QM}}{3}
-\entry {\code {RE{\_}CHAR{\_}CLASSES}}{3}
-\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS}}{3}
-\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}ANCHORS \r {(and \samp {{\tt\hat}})}}}{18}
-\entry {\code {RE{\_}CONTEXT{\_}INDEP{\_}OPS}}{3}
-\entry {\code {RE{\_}CONTEXT{\_}INVALID{\_}OPS}}{3}
-\entry {\code {RE{\_}DOT{\_}NEWLINE}}{3}
-\entry {\code {RE{\_}DOT{\_}NOT{\_}NULL}}{4}
-\entry {\code {RE{\_}INTERVALS}}{4}
-\entry {\code {RE{\_}LIMITED{\_}OPS}}{4}
-\entry {\code {RE{\_}NEWLINE{\_}ALT}}{4}
-\entry {\code {RE{\_}NO{\_}BK{\_}BRACES}}{4}
-\entry {\code {RE{\_}NO{\_}BK{\_}PARENS}}{4}
-\entry {\code {RE{\_}NO{\_}BK{\_}REFS}}{4}
-\entry {\code {RE{\_}NO{\_}BK{\_}VBAR}}{4}
-\entry {\code {RE{\_}NO{\_}EMPTY{\_}RANGES}}{4}
-\entry {\code {re{\_}nsub \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
-\entry {\code {re{\_}pattern{\_}buffer \r {definition}}}{24}
-\entry {\code {re{\_}syntax{\_}options \r {initialization}}}{26}
-\entry {\code {RE{\_}UNMATCHED{\_}RIGHT{\_}PAREN{\_}ORD}}{4}
-\entry {\code {REG{\_}EXTENDED}}{35}
-\entry {\code {REG{\_}ICASE}}{35}
-\entry {\code {REG{\_}NEWLINE}}{36}
-\entry {\code {REG{\_}NOSUB}}{35}
-\entry {\code {regex.c}}{1}
-\entry {\code {regex.h}}{1}
-\entry {regexp anchoring}{18}
-\entry {\code {regmatch{\_}t}}{39}
-\entry {\code {regs{\_}allocated}}{32}
-\entry {\code {REGS{\_}FIXED}}{33}
-\entry {\code {REGS{\_}REALLOCATE}}{32}
-\entry {\code {REGS{\_}UNALLOCATED}}{32}
-\entry {regular expressions, syntax of}{2}
-\initial {S}
-\entry {searching with GNU functions}{28}
-\entry {\code {start \r {argument to \code {re{\_}search}}}}{28}
-\entry {\code {start\penalty 10000{\spaceskip = 0pt{} }\r {in\penalty 10000{\spaceskip = 0pt{} }\code {struct\penalty 10000{\spaceskip = 0pt{} }re_registers}}}}{32}
-\entry {\code {struct re{\_}pattern{\_}buffer \r {definition}}}{24}
-\entry {subexpressions}{16}
-\entry {syntax bits}{2}
-\entry {\code {syntax \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
-\entry {syntax initialization}{26}
-\entry {syntax of regular expressions}{2}
-\initial {T}
-\entry {\code {translate \r {initialization}}}{26}
-\initial {U}
-\entry {\code {used \r {field, set by \code {re{\_}compile{\_}pattern}}}}{27}
-\initial {W}
-\entry {word boundaries, matching}{20}
diff --git a/gnu/lib/libregex/doc/regex.info b/gnu/lib/libregex/doc/regex.info
deleted file mode 100644
index 90deede..0000000
--- a/gnu/lib/libregex/doc/regex.info
+++ /dev/null
@@ -1,2836 +0,0 @@
-This is Info file regex.info, produced by Makeinfo-1.52 from the input
-file .././doc/regex.texi.
-
- This file documents the GNU regular expression library.
-
- Copyright (C) 1992, 1993 Free Software Foundation, Inc.
-
- Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
- Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided also that the
-section entitled "GNU General Public License" is included exactly as in
-the original, and provided that the entire resulting derived work is
-distributed under the terms of a permission notice identical to this
-one.
-
- Permission is granted to copy and distribute translations of this
-manual into another language, under the above conditions for modified
-versions, except that the section entitled "GNU General Public License"
-may be included in a translation approved by the Free Software
-Foundation instead of in the original English.
-
-
-File: regex.info, Node: Top, Next: Overview, Prev: (dir), Up: (dir)
-
-Regular Expression Library
-**************************
-
- This manual documents how to program with the GNU regular expression
-library. This is edition 0.12a of the manual, 19 September 1992.
-
- The first part of this master menu lists the major nodes in this Info
-document, including the index. The rest of the menu lists all the
-lower level nodes in the document.
-
-* Menu:
-
-* Overview::
-* Regular Expression Syntax::
-* Common Operators::
-* GNU Operators::
-* GNU Emacs Operators::
-* What Gets Matched?::
-* Programming with Regex::
-* Copying:: Copying and sharing Regex.
-* Index:: General index.
- -- The Detailed Node Listing --
-
-Regular Expression Syntax
-
-* Syntax Bits::
-* Predefined Syntaxes::
-* Collating Elements vs. Characters::
-* The Backslash Character::
-
-Common Operators
-
-* Match-self Operator:: Ordinary characters.
-* Match-any-character Operator:: .
-* Concatenation Operator:: Juxtaposition.
-* Repetition Operators:: * + ? {}
-* Alternation Operator:: |
-* List Operators:: [...] [^...]
-* Grouping Operators:: (...)
-* Back-reference Operator:: \digit
-* Anchoring Operators:: ^ $
-
-Repetition Operators
-
-* Match-zero-or-more Operator:: *
-* Match-one-or-more Operator:: +
-* Match-zero-or-one Operator:: ?
-* Interval Operators:: {}
-
-List Operators (`[' ... `]' and `[^' ... `]')
-
-* Character Class Operators:: [:class:]
-* Range Operator:: start-end
-
-Anchoring Operators
-
-* Match-beginning-of-line Operator:: ^
-* Match-end-of-line Operator:: $
-
-GNU Operators
-
-* Word Operators::
-* Buffer Operators::
-
-Word Operators
-
-* Non-Emacs Syntax Tables::
-* Match-word-boundary Operator:: \b
-* Match-within-word Operator:: \B
-* Match-beginning-of-word Operator:: \<
-* Match-end-of-word Operator:: \>
-* Match-word-constituent Operator:: \w
-* Match-non-word-constituent Operator:: \W
-
-Buffer Operators
-
-* Match-beginning-of-buffer Operator:: \`
-* Match-end-of-buffer Operator:: \'
-
-GNU Emacs Operators
-
-* Syntactic Class Operators::
-
-Syntactic Class Operators
-
-* Emacs Syntax Tables::
-* Match-syntactic-class Operator:: \sCLASS
-* Match-not-syntactic-class Operator:: \SCLASS
-
-Programming with Regex
-
-* GNU Regex Functions::
-* POSIX Regex Functions::
-* BSD Regex Functions::
-
-GNU Regex Functions
-
-* GNU Pattern Buffers:: The re_pattern_buffer type.
-* GNU Regular Expression Compiling:: re_compile_pattern ()
-* GNU Matching:: re_match ()
-* GNU Searching:: re_search ()
-* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
-* Searching with Fastmaps:: re_compile_fastmap ()
-* GNU Translate Tables:: The `translate' field.
-* Using Registers:: The re_registers type and related fns.
-* Freeing GNU Pattern Buffers:: regfree ()
-
-POSIX Regex Functions
-
-* POSIX Pattern Buffers:: The regex_t type.
-* POSIX Regular Expression Compiling:: regcomp ()
-* POSIX Matching:: regexec ()
-* Reporting Errors:: regerror ()
-* Using Byte Offsets:: The regmatch_t type.
-* Freeing POSIX Pattern Buffers:: regfree ()
-
-BSD Regex Functions
-
-* BSD Regular Expression Compiling:: re_comp ()
-* BSD Searching:: re_exec ()
-
-
-File: regex.info, Node: Overview, Next: Regular Expression Syntax, Prev: Top, Up: Top
-
-Overview
-********
-
- A "regular expression" (or "regexp", or "pattern") is a text string
-that describes some (mathematical) set of strings. A regexp R
-"matches" a string S if S is in the set of strings described by R.
-
- Using the Regex library, you can:
-
- * see if a string matches a specified pattern as a whole, and
-
- * search within a string for a substring matching a specified
- pattern.
-
- Some regular expressions match only one string, i.e., the set they
-describe has only one member. For example, the regular expression
-`foo' matches the string `foo' and no others. Other regular
-expressions match more than one string, i.e., the set they describe has
-more than one member. For example, the regular expression `f*' matches
-the set of strings made up of any number (including zero) of `f's. As
-you can see, some characters in regular expressions match themselves
-(such as `f') and some don't (such as `*'); the ones that don't match
-themselves instead let you specify patterns that describe many
-different strings.
-
- To either match or search for a regular expression with the Regex
-library functions, you must first compile it with a Regex pattern
-compiling function. A "compiled pattern" is a regular expression
-converted to the internal format used by the library functions. Once
-you've compiled a pattern, you can use it for matching or searching any
-number of times.
-
- The Regex library consists of two source files: `regex.h' and
-`regex.c'. Regex provides three groups of functions with which you can
-operate on regular expressions. One group--the GNU group--is more
-powerful but not completely compatible with the other two, namely the
-POSIX and Berkeley UNIX groups; its interface was designed specifically
-for GNU. The other groups have the same interfaces as do the regular
-expression functions in POSIX and Berkeley UNIX.
-
- We wrote this chapter with programmers in mind, not users of
-programs--such as Emacs--that use Regex. We describe the Regex library
-in its entirety, not how to write regular expressions that a particular
-program understands.
-
-
-File: regex.info, Node: Regular Expression Syntax, Next: Common Operators, Prev: Overview, Up: Top
-
-Regular Expression Syntax
-*************************
-
- "Characters" are things you can type. "Operators" are things in a
-regular expression that match one or more characters. You compose
-regular expressions from operators, which in turn you specify using one
-or more characters.
-
- Most characters represent what we call the match-self operator, i.e.,
-they match themselves; we call these characters "ordinary". Other
-characters represent either all or parts of fancier operators; e.g.,
-`.' represents what we call the match-any-character operator (which, no
-surprise, matches (almost) any character); we call these characters
-"special". Two different things determine what characters represent
-what operators:
-
- 1. the regular expression syntax your program has told the Regex
- library to recognize, and
-
- 2. the context of the character in the regular expression.
-
- In the following sections, we describe these things in more detail.
-
-* Menu:
-
-* Syntax Bits::
-* Predefined Syntaxes::
-* Collating Elements vs. Characters::
-* The Backslash Character::
-
-
-File: regex.info, Node: Syntax Bits, Next: Predefined Syntaxes, Up: Regular Expression Syntax
-
-Syntax Bits
-===========
-
- In any particular syntax for regular expressions, some characters are
-always special, others are sometimes special, and others are never
-special. The particular syntax that Regex recognizes for a given
-regular expression depends on the value in the `syntax' field of the
-pattern buffer of that regular expression.
-
- You get a pattern buffer by compiling a regular expression. *Note
-GNU Pattern Buffers::, and *Note POSIX Pattern Buffers::, for more
-information on pattern buffers. *Note GNU Regular Expression
-Compiling::, *Note POSIX Regular Expression Compiling::, and *Note BSD
-Regular Expression Compiling::, for more information on compiling.
-
- Regex considers the value of the `syntax' field to be a collection of
-bits; we refer to these bits as "syntax bits". In most cases, they
-affect what characters represent what operators. We describe the
-meanings of the operators to which we refer in *Note Common Operators::,
-*Note GNU Operators::, and *Note GNU Emacs Operators::.
-
- For reference, here is the complete list of syntax bits, in
-alphabetical order:
-
-`RE_BACKSLASH_ESCAPE_IN_LISTS'
- If this bit is set, then `\' inside a list (*note List Operators::.
- quotes (makes ordinary, if it's special) the following character;
- if this bit isn't set, then `\' is an ordinary character inside
- lists. (*Note The Backslash Character::, for what `\' does
- outside of lists.)
-
-`RE_BK_PLUS_QM'
- If this bit is set, then `\+' represents the match-one-or-more
- operator and `\?' represents the match-zero-or-more operator; if
- this bit isn't set, then `+' represents the match-one-or-more
- operator and `?' represents the match-zero-or-one operator. This
- bit is irrelevant if `RE_LIMITED_OPS' is set.
-
-`RE_CHAR_CLASSES'
- If this bit is set, then you can use character classes in lists;
- if this bit isn't set, then you can't.
-
-`RE_CONTEXT_INDEP_ANCHORS'
- If this bit is set, then `^' and `$' are special anywhere outside
- a list; if this bit isn't set, then these characters are special
- only in certain contexts. *Note Match-beginning-of-line
- Operator::, and *Note Match-end-of-line Operator::.
-
-`RE_CONTEXT_INDEP_OPS'
- If this bit is set, then certain characters are special anywhere
- outside a list; if this bit isn't set, then those characters are
- special only in some contexts and are ordinary elsewhere.
- Specifically, if this bit isn't set then `*', and (if the syntax
- bit `RE_LIMITED_OPS' isn't set) `+' and `?' (or `\+' and `\?',
- depending on the syntax bit `RE_BK_PLUS_QM') represent repetition
- operators only if they're not first in a regular expression or
- just after an open-group or alternation operator. The same holds
- for `{' (or `\{', depending on the syntax bit `RE_NO_BK_BRACES') if
- it is the beginning of a valid interval and the syntax bit
- `RE_INTERVALS' is set.
-
-`RE_CONTEXT_INVALID_OPS'
- If this bit is set, then repetition and alternation operators
- can't be in certain positions within a regular expression.
- Specifically, the regular expression is invalid if it has:
-
- * a repetition operator first in the regular expression or just
- after a match-beginning-of-line, open-group, or alternation
- operator; or
-
- * an alternation operator first or last in the regular
- expression, just before a match-end-of-line operator, or just
- after an alternation or open-group operator.
-
- If this bit isn't set, then you can put the characters
- representing the repetition and alternation characters anywhere in
- a regular expression. Whether or not they will in fact be
- operators in certain positions depends on other syntax bits.
-
-`RE_DOT_NEWLINE'
- If this bit is set, then the match-any-character operator matches
- a newline; if this bit isn't set, then it doesn't.
-
-`RE_DOT_NOT_NULL'
- If this bit is set, then the match-any-character operator doesn't
- match a null character; if this bit isn't set, then it does.
-
-`RE_INTERVALS'
- If this bit is set, then Regex recognizes interval operators; if
- this bit isn't set, then it doesn't.
-
-`RE_LIMITED_OPS'
- If this bit is set, then Regex doesn't recognize the
- match-one-or-more, match-zero-or-one or alternation operators; if
- this bit isn't set, then it does.
-
-`RE_NEWLINE_ALT'
- If this bit is set, then newline represents the alternation
- operator; if this bit isn't set, then newline is ordinary.
-
-`RE_NO_BK_BRACES'
- If this bit is set, then `{' represents the open-interval operator
- and `}' represents the close-interval operator; if this bit isn't
- set, then `\{' represents the open-interval operator and `\}'
- represents the close-interval operator. This bit is relevant only
- if `RE_INTERVALS' is set.
-
-`RE_NO_BK_PARENS'
- If this bit is set, then `(' represents the open-group operator and
- `)' represents the close-group operator; if this bit isn't set,
- then `\(' represents the open-group operator and `\)' represents
- the close-group operator.
-
-`RE_NO_BK_REFS'
- If this bit is set, then Regex doesn't recognize `\'DIGIT as the
- back reference operator; if this bit isn't set, then it does.
-
-`RE_NO_BK_VBAR'
- If this bit is set, then `|' represents the alternation operator;
- if this bit isn't set, then `\|' represents the alternation
- operator. This bit is irrelevant if `RE_LIMITED_OPS' is set.
-
-`RE_NO_EMPTY_RANGES'
- If this bit is set, then a regular expression with a range whose
- ending point collates lower than its starting point is invalid; if
- this bit isn't set, then Regex considers such a range to be empty.
-
-`RE_UNMATCHED_RIGHT_PAREN_ORD'
- If this bit is set and the regular expression has no matching
- open-group operator, then Regex considers what would otherwise be
- a close-group operator (based on how `RE_NO_BK_PARENS' is set) to
- match `)'.
-
-
-File: regex.info, Node: Predefined Syntaxes, Next: Collating Elements vs. Characters, Prev: Syntax Bits, Up: Regular Expression Syntax
-
-Predefined Syntaxes
-===================
-
- If you're programming with Regex, you can set a pattern buffer's
-(*note GNU Pattern Buffers::., and *Note POSIX Pattern Buffers::)
-`syntax' field either to an arbitrary combination of syntax bits (*note
-Syntax Bits::.) or else to the configurations defined by Regex. These
-configurations define the syntaxes used by certain programs--GNU Emacs,
-POSIX Awk, traditional Awk, Grep, Egrep--in addition to syntaxes for
-POSIX basic and extended regular expressions.
-
- The predefined syntaxes-taken directly from `regex.h'--are:
-
- #define RE_SYNTAX_EMACS 0
-
- #define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
- #define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
-
- #define RE_SYNTAX_GREP \
- (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
- | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
- | RE_NEWLINE_ALT)
-
- #define RE_SYNTAX_EGREP \
- (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
- | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
-
- #define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
-
- /* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
- #define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
-
- #define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
-
- /* Syntax bits common to both basic and extended POSIX regex syntax. */
- #define _RE_SYNTAX_POSIX_COMMON \
- (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NO_EMPTY_RANGES)
-
- #define RE_SYNTAX_POSIX_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
-
- /* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
- RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
- isn't minimal, since other operators, such as \`, aren't disabled. */
- #define RE_SYNTAX_POSIX_MINIMAL_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
-
- #define RE_SYNTAX_POSIX_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
- /* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
- replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
- #define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
-
-File: regex.info, Node: Collating Elements vs. Characters, Next: The Backslash Character, Prev: Predefined Syntaxes, Up: Regular Expression Syntax
-
-Collating Elements vs. Characters
-=================================
-
- POSIX generalizes the notion of a character to that of a collating
-element. It defines a "collating element" to be "a sequence of one or
-more bytes defined in the current collating sequence as a unit of
-collation."
-
- This generalizes the notion of a character in two ways. First, a
-single character can map into two or more collating elements. For
-example, the German "es-zet" collates as the collating element `s'
-followed by another collating element `s'. Second, two or more
-characters can map into one collating element. For example, the
-Spanish `ll' collates after `l' and before `m'.
-
- Since POSIX's "collating element" preserves the essential idea of a
-"character," we use the latter, more familiar, term in this document.
-
-
-File: regex.info, Node: The Backslash Character, Prev: Collating Elements vs. Characters, Up: Regular Expression Syntax
-
-The Backslash Character
-=======================
-
- The `\' character has one of four different meanings, depending on
-the context in which you use it and what syntax bits are set (*note
-Syntax Bits::.). It can: 1) stand for itself, 2) quote the next
-character, 3) introduce an operator, or 4) do nothing.
-
- 1. It stands for itself inside a list (*note List Operators::.) if
- the syntax bit `RE_BACKSLASH_ESCAPE_IN_LISTS' is not set. For
- example, `[\]' would match `\'.
-
- 2. It quotes (makes ordinary, if it's special) the next character
- when you use it either:
-
- * outside a list,(1) or
-
- * inside a list and the syntax bit
- `RE_BACKSLASH_ESCAPE_IN_LISTS' is set.
-
- 3. It introduces an operator when followed by certain ordinary
- characters--sometimes only when certain syntax bits are set. See
- the cases `RE_BK_PLUS_QM', `RE_NO_BK_BRACES', `RE_NO_BK_VAR',
- `RE_NO_BK_PARENS', `RE_NO_BK_REF' in *Note Syntax Bits::. Also:
-
- * `\b' represents the match-word-boundary operator (*note
- Match-word-boundary Operator::.).
-
- * `\B' represents the match-within-word operator (*note
- Match-within-word Operator::.).
-
- * `\<' represents the match-beginning-of-word operator
- (*note Match-beginning-of-word Operator::.).
-
- * `\>' represents the match-end-of-word operator (*note
- Match-end-of-word Operator::.).
-
- * `\w' represents the match-word-constituent operator (*note
- Match-word-constituent Operator::.).
-
- * `\W' represents the match-non-word-constituent operator
- (*note Match-non-word-constituent Operator::.).
-
- * `\`' represents the match-beginning-of-buffer operator and
- `\'' represents the match-end-of-buffer operator (*note
- Buffer Operators::.).
-
- * If Regex was compiled with the C preprocessor symbol `emacs'
- defined, then `\sCLASS' represents the match-syntactic-class
- operator and `\SCLASS' represents the
- match-not-syntactic-class operator (*note Syntactic Class
- Operators::.).
-
- 4. In all other cases, Regex ignores `\'. For example, `\n' matches
- `n'.
-
-
- ---------- Footnotes ----------
-
- (1) Sometimes you don't have to explicitly quote special characters
-to make them ordinary. For instance, most characters lose any special
-meaning inside a list (*note List Operators::.). In addition, if the
-syntax bits `RE_CONTEXT_INVALID_OPS' and `RE_CONTEXT_INDEP_OPS' aren't
-set, then (for historical reasons) the matcher considers special
-characters ordinary if they are in contexts where the operations they
-represent make no sense; for example, then the match-zero-or-more
-operator (represented by `*') matches itself in the regular expression
-`*foo' because there is no preceding expression on which it can
-operate. It is poor practice, however, to depend on this behavior; if
-you want a special character to be ordinary outside a list, it's better
-to always quote it, regardless.
-
-
-File: regex.info, Node: Common Operators, Next: GNU Operators, Prev: Regular Expression Syntax, Up: Top
-
-Common Operators
-****************
-
- You compose regular expressions from operators. In the following
-sections, we describe the regular expression operators specified by
-POSIX; GNU also uses these. Most operators have more than one
-representation as characters. *Note Regular Expression Syntax::, for
-what characters represent what operators under what circumstances.
-
- For most operators that can be represented in two ways, one
-representation is a single character and the other is that character
-preceded by `\'. For example, either `(' or `\(' represents the
-open-group operator. Which one does depends on the setting of a syntax
-bit, in this case `RE_NO_BK_PARENS'. Why is this so? Historical
-reasons dictate some of the varying representations, while POSIX
-dictates others.
-
- Finally, almost all characters lose any special meaning inside a list
-(*note List Operators::.).
-
-* Menu:
-
-* Match-self Operator:: Ordinary characters.
-* Match-any-character Operator:: .
-* Concatenation Operator:: Juxtaposition.
-* Repetition Operators:: * + ? {}
-* Alternation Operator:: |
-* List Operators:: [...] [^...]
-* Grouping Operators:: (...)
-* Back-reference Operator:: \digit
-* Anchoring Operators:: ^ $
-
-
-File: regex.info, Node: Match-self Operator, Next: Match-any-character Operator, Up: Common Operators
-
-The Match-self Operator (ORDINARY CHARACTER)
-============================================
-
- This operator matches the character itself. All ordinary characters
-(*note Regular Expression Syntax::.) represent this operator. For
-example, `f' is always an ordinary character, so the regular expression
-`f' matches only the string `f'. In particular, it does *not* match
-the string `ff'.
-
-
-File: regex.info, Node: Match-any-character Operator, Next: Concatenation Operator, Prev: Match-self Operator, Up: Common Operators
-
-The Match-any-character Operator (`.')
-======================================
-
- This operator matches any single printing or nonprinting character
-except it won't match a:
-
-newline
- if the syntax bit `RE_DOT_NEWLINE' isn't set.
-
-null
- if the syntax bit `RE_DOT_NOT_NULL' is set.
-
- The `.' (period) character represents this operator. For example,
-`a.b' matches any three-character string beginning with `a' and ending
-with `b'.
-
-
-File: regex.info, Node: Concatenation Operator, Next: Repetition Operators, Prev: Match-any-character Operator, Up: Common Operators
-
-The Concatenation Operator
-==========================
-
- This operator concatenates two regular expressions A and B. No
-character represents this operator; you simply put B after A. The
-result is a regular expression that will match a string if A matches
-its first part and B matches the rest. For example, `xy' (two
-match-self operators) matches `xy'.
-
-
-File: regex.info, Node: Repetition Operators, Next: Alternation Operator, Prev: Concatenation Operator, Up: Common Operators
-
-Repetition Operators
-====================
-
- Repetition operators repeat the preceding regular expression a
-specified number of times.
-
-* Menu:
-
-* Match-zero-or-more Operator:: *
-* Match-one-or-more Operator:: +
-* Match-zero-or-one Operator:: ?
-* Interval Operators:: {}
-
-
-File: regex.info, Node: Match-zero-or-more Operator, Next: Match-one-or-more Operator, Up: Repetition Operators
-
-The Match-zero-or-more Operator (`*')
--------------------------------------
-
- This operator repeats the smallest possible preceding regular
-expression as many times as necessary (including zero) to match the
-pattern. `*' represents this operator. For example, `o*' matches any
-string made up of zero or more `o's. Since this operator operates on
-the smallest preceding regular expression, `fo*' has a repeating `o',
-not a repeating `fo'. So, `fo*' matches `f', `fo', `foo', and so on.
-
- Since the match-zero-or-more operator is a suffix operator, it may be
-useless as such when no regular expression precedes it. This is the
-case when it:
-
- * is first in a regular expression, or
-
- * follows a match-beginning-of-line, open-group, or alternation
- operator.
-
-Three different things can happen in these cases:
-
- 1. If the syntax bit `RE_CONTEXT_INVALID_OPS' is set, then the
- regular expression is invalid.
-
- 2. If `RE_CONTEXT_INVALID_OPS' isn't set, but `RE_CONTEXT_INDEP_OPS'
- is, then `*' represents the match-zero-or-more operator (which
- then operates on the empty string).
-
- 3. Otherwise, `*' is ordinary.
-
-
- The matcher processes a match-zero-or-more operator by first matching
-as many repetitions of the smallest preceding regular expression as it
-can. Then it continues to match the rest of the pattern.
-
- If it can't match the rest of the pattern, it backtracks (as many
-times as necessary), each time discarding one of the matches until it
-can either match the entire pattern or be certain that it cannot get a
-match. For example, when matching `ca*ar' against `caaar', the matcher
-first matches all three `a's of the string with the `a*' of the regular
-expression. However, it cannot then match the final `ar' of the
-regular expression against the final `r' of the string. So it
-backtracks, discarding the match of the last `a' in the string. It can
-then match the remaining `ar'.
-
-
-File: regex.info, Node: Match-one-or-more Operator, Next: Match-zero-or-one Operator, Prev: Match-zero-or-more Operator, Up: Repetition Operators
-
-The Match-one-or-more Operator (`+' or `\+')
---------------------------------------------
-
- If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't
-recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM'
-isn't set, then `+' represents this operator; if it is, then `\+' does.
-
- This operator is similar to the match-zero-or-more operator except
-that it repeats the preceding regular expression at least once; *note
-Match-zero-or-more Operator::., for what it operates on, how some
-syntax bits affect it, and how Regex backtracks to match it.
-
- For example, supposing that `+' represents the match-one-or-more
-operator; then `ca+r' matches, e.g., `car' and `caaaar', but not `cr'.
-
-
-File: regex.info, Node: Match-zero-or-one Operator, Next: Interval Operators, Prev: Match-one-or-more Operator, Up: Repetition Operators
-
-The Match-zero-or-one Operator (`?' or `\?')
---------------------------------------------
-
- If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't
-recognize this operator. Otherwise, if the syntax bit `RE_BK_PLUS_QM'
-isn't set, then `?' represents this operator; if it is, then `\?' does.
-
- This operator is similar to the match-zero-or-more operator except
-that it repeats the preceding regular expression once or not at all;
-*note Match-zero-or-more Operator::., to see what it operates on, how
-some syntax bits affect it, and how Regex backtracks to match it.
-
- For example, supposing that `?' represents the match-zero-or-one
-operator; then `ca?r' matches both `car' and `cr', but nothing else.
-
-
-File: regex.info, Node: Interval Operators, Prev: Match-zero-or-one Operator, Up: Repetition Operators
-
-Interval Operators (`{' ... `}' or `\{' ... `\}')
--------------------------------------------------
-
- If the syntax bit `RE_INTERVALS' is set, then Regex recognizes
-"interval expressions". They repeat the smallest possible preceding
-regular expression a specified number of times.
-
- If the syntax bit `RE_NO_BK_BRACES' is set, `{' represents the
-"open-interval operator" and `}' represents the "close-interval
-operator" ; otherwise, `\{' and `\}' do.
-
- Specifically, supposing that `{' and `}' represent the open-interval
-and close-interval operators; then:
-
-`{COUNT}'
- matches exactly COUNT occurrences of the preceding regular
- expression.
-
-`{MIN,}'
- matches MIN or more occurrences of the preceding regular
- expression.
-
-`{MIN, MAX}'
- matches at least MIN but no more than MAX occurrences of the
- preceding regular expression.
-
- The interval expression (but not necessarily the regular expression
-that contains it) is invalid if:
-
- * MIN is greater than MAX, or
-
- * any of COUNT, MIN, or MAX are outside the range zero to
- `RE_DUP_MAX' (which symbol `regex.h' defines).
-
- If the interval expression is invalid and the syntax bit
-`RE_NO_BK_BRACES' is set, then Regex considers all the characters in
-the would-be interval to be ordinary. If that bit isn't set, then the
-regular expression is invalid.
-
- If the interval expression is valid but there is no preceding regular
-expression on which to operate, then if the syntax bit
-`RE_CONTEXT_INVALID_OPS' is set, the regular expression is invalid. If
-that bit isn't set, then Regex considers all the characters--other than
-backslashes, which it ignores--in the would-be interval to be ordinary.
-
-
-File: regex.info, Node: Alternation Operator, Next: List Operators, Prev: Repetition Operators, Up: Common Operators
-
-The Alternation Operator (`|' or `\|')
-======================================
-
- If the syntax bit `RE_LIMITED_OPS' is set, then Regex doesn't
-recognize this operator. Otherwise, if the syntax bit `RE_NO_BK_VBAR'
-is set, then `|' represents this operator; otherwise, `\|' does.
-
- Alternatives match one of a choice of regular expressions: if you put
-the character(s) representing the alternation operator between any two
-regular expressions A and B, the result matches the union of the
-strings that A and B match. For example, supposing that `|' is the
-alternation operator, then `foo|bar|quux' would match any of `foo',
-`bar' or `quux'.
-
- The alternation operator operates on the *largest* possible
-surrounding regular expressions. (Put another way, it has the lowest
-precedence of any regular expression operator.) Thus, the only way you
-can delimit its arguments is to use grouping. For example, if `(' and
-`)' are the open and close-group operators, then `fo(o|b)ar' would
-match either `fooar' or `fobar'. (`foo|bar' would match `foo' or
-`bar'.)
-
- The matcher usually tries all combinations of alternatives so as to
-match the longest possible string. For example, when matching
-`(fooq|foo)*(qbarquux|bar)' against `fooqbarquux', it cannot take, say,
-the first ("depth-first") combination it could match, since then it
-would be content to match just `fooqbar'.
-
-
-File: regex.info, Node: List Operators, Next: Grouping Operators, Prev: Alternation Operator, Up: Common Operators
-
-List Operators (`[' ... `]' and `[^' ... `]')
-=============================================
-
- "Lists", also called "bracket expressions", are a set of one or more
-items. An "item" is a character, a character class expression, or a
-range expression. The syntax bits affect which kinds of items you can
-put in a list. We explain the last two items in subsections below.
-Empty lists are invalid.
-
- A "matching list" matches a single character represented by one of
-the list items. You form a matching list by enclosing one or more items
-within an "open-matching-list operator" (represented by `[') and a
-"close-list operator" (represented by `]').
-
- For example, `[ab]' matches either `a' or `b'. `[ad]*' matches the
-empty string and any string composed of just `a's and `d's in any
-order. Regex considers invalid a regular expression with a `[' but no
-matching `]'.
-
- "Nonmatching lists" are similar to matching lists except that they
-match a single character *not* represented by one of the list items.
-You use an "open-nonmatching-list operator" (represented by `[^'(1))
-instead of an open-matching-list operator to start a nonmatching list.
-
- For example, `[^ab]' matches any character except `a' or `b'.
-
- If the `posix_newline' field in the pattern buffer (*note GNU Pattern
-Buffers::. is set, then nonmatching lists do not match a newline.
-
- Most characters lose any special meaning inside a list. The special
-characters inside a list follow.
-
-`]'
- ends the list if it's not the first list item. So, if you want to
- make the `]' character a list item, you must put it first.
-
-`\'
- quotes the next character if the syntax bit
- `RE_BACKSLASH_ESCAPE_IN_LISTS' is set.
-
-`[:'
- represents the open-character-class operator (*note Character
- Class Operators::.) if the syntax bit `RE_CHAR_CLASSES' is set and
- what follows is a valid character class expression.
-
-`:]'
- represents the close-character-class operator if the syntax bit
- `RE_CHAR_CLASSES' is set and what precedes it is an
- open-character-class operator followed by a valid character class
- name.
-
-`-'
- represents the range operator (*note Range Operator::.) if it's
- not first or last in a list or the ending point of a range.
-
-All other characters are ordinary. For example, `[.*]' matches `.' and
-`*'.
-
-* Menu:
-
-* Character Class Operators:: [:class:]
-* Range Operator:: start-end
-
- ---------- Footnotes ----------
-
- (1) Regex therefore doesn't consider the `^' to be the first
-character in the list. If you put a `^' character first in (what you
-think is) a matching list, you'll turn it into a nonmatching list.
-
-
-File: regex.info, Node: Character Class Operators, Next: Range Operator, Up: List Operators
-
-Character Class Operators (`[:' ... `:]')
------------------------------------------
-
- If the syntax bit `RE_CHARACTER_CLASSES' is set, then Regex
-recognizes character class expressions inside lists. A "character
-class expression" matches one character from a given class. You form a
-character class expression by putting a character class name between an
-"open-character-class operator" (represented by `[:') and a
-"close-character-class operator" (represented by `:]'). The character
-class names and their meanings are:
-
-`alnum'
- letters and digits
-
-`alpha'
- letters
-
-`blank'
- system-dependent; for GNU, a space or tab
-
-`cntrl'
- control characters (in the ASCII encoding, code 0177 and codes
- less than 040)
-
-`digit'
- digits
-
-`graph'
- same as `print' except omits space
-
-`lower'
- lowercase letters
-
-`print'
- printable characters (in the ASCII encoding, space tilde--codes
- 040 through 0176)
-
-`punct'
- neither control nor alphanumeric characters
-
-`space'
- space, carriage return, newline, vertical tab, and form feed
-
-`upper'
- uppercase letters
-
-`xdigit'
- hexadecimal digits: `0'-`9', `a'-`f', `A'-`F'
-
-These correspond to the definitions in the C library's `<ctype.h>'
-facility. For example, `[:alpha:]' corresponds to the standard
-facility `isalpha'. Regex recognizes character class expressions only
-inside of lists; so `[[:alpha:]]' matches any letter, but `[:alpha:]'
-outside of a bracket expression and not followed by a repetition
-operator matches just itself.
-
-
-File: regex.info, Node: Range Operator, Prev: Character Class Operators, Up: List Operators
-
-The Range Operator (`-')
-------------------------
-
- Regex recognizes "range expressions" inside a list. They represent
-those characters that fall between two elements in the current
-collating sequence. You form a range expression by putting a "range
-operator" between two characters.(1) `-' represents the range operator.
-For example, `a-f' within a list represents all the characters from `a'
-through `f' inclusively.
-
- If the syntax bit `RE_NO_EMPTY_RANGES' is set, then if the range's
-ending point collates less than its starting point, the range (and the
-regular expression containing it) is invalid. For example, the regular
-expression `[z-a]' would be invalid. If this bit isn't set, then Regex
-considers such a range to be empty.
-
- Since `-' represents the range operator, if you want to make a `-'
-character itself a list item, you must do one of the following:
-
- * Put the `-' either first or last in the list.
-
- * Include a range whose starting point collates strictly lower than
- `-' and whose ending point collates equal or higher. Unless a
- range is the first item in a list, a `-' can't be its starting
- point, but *can* be its ending point. That is because Regex
- considers `-' to be the range operator unless it is preceded by
- another `-'. For example, in the ASCII encoding, `)', `*', `+',
- `,', `-', `.', and `/' are contiguous characters in the collating
- sequence. You might think that `[)-+--/]' has two ranges: `)-+'
- and `--/'. Rather, it has the ranges `)-+' and `+--', plus the
- character `/', so it matches, e.g., `,', not `.'.
-
- * Put a range whose starting point is `-' first in the list.
-
- For example, `[-a-z]' matches a lowercase letter or a hyphen (in
-English, in ASCII).
-
- ---------- Footnotes ----------
-
- (1) You can't use a character class for the starting or ending point
-of a range, since a character class is not a single character.
-
-
-File: regex.info, Node: Grouping Operators, Next: Back-reference Operator, Prev: List Operators, Up: Common Operators
-
-Grouping Operators (`(' ... `)' or `\(' ... `\)')
-=================================================
-
- A "group", also known as a "subexpression", consists of an
-"open-group operator", any number of other operators, and a
-"close-group operator". Regex treats this sequence as a unit, just as
-mathematics and programming languages treat a parenthesized expression
-as a unit.
-
- Therefore, using "groups", you can:
-
- * delimit the argument(s) to an alternation operator (*note
- Alternation Operator::.) or a repetition operator (*note
- Repetition Operators::.).
-
- * keep track of the indices of the substring that matched a given
- group. *Note Using Registers::, for a precise explanation. This
- lets you:
-
- * use the back-reference operator (*note Back-reference
- Operator::.).
-
- * use registers (*note Using Registers::.).
-
- If the syntax bit `RE_NO_BK_PARENS' is set, then `(' represents the
-open-group operator and `)' represents the close-group operator;
-otherwise, `\(' and `\)' do.
-
- If the syntax bit `RE_UNMATCHED_RIGHT_PAREN_ORD' is set and a
-close-group operator has no matching open-group operator, then Regex
-considers it to match `)'.
-
-
-File: regex.info, Node: Back-reference Operator, Next: Anchoring Operators, Prev: Grouping Operators, Up: Common Operators
-
-The Back-reference Operator ("\"DIGIT)
-======================================
-
- If the syntax bit `RE_NO_BK_REF' isn't set, then Regex recognizes
-back references. A back reference matches a specified preceding group.
-The back reference operator is represented by `\DIGIT' anywhere after
-the end of a regular expression's DIGIT-th group (*note Grouping
-Operators::.).
-
- DIGIT must be between `1' and `9'. The matcher assigns numbers 1
-through 9 to the first nine groups it encounters. By using one of `\1'
-through `\9' after the corresponding group's close-group operator, you
-can match a substring identical to the one that the group does.
-
- Back references match according to the following (in all examples
-below, `(' represents the open-group, `)' the close-group, `{' the
-open-interval and `}' the close-interval operator):
-
- * If the group matches a substring, the back reference matches an
- identical substring. For example, `(a)\1' matches `aa' and
- `(bana)na\1bo\1' matches `bananabanabobana'. Likewise, `(.*)\1'
- matches any (newline-free if the syntax bit `RE_DOT_NEWLINE' isn't
- set) string that is composed of two identical halves; the `(.*)'
- matches the first half and the `\1' matches the second half.
-
- * If the group matches more than once (as it might if followed by,
- e.g., a repetition operator), then the back reference matches the
- substring the group *last* matched. For example, `((a*)b)*\1\2'
- matches `aabababa'; first group 1 (the outer one) matches `aab'
- and group 2 (the inner one) matches `aa'. Then group 1 matches
- `ab' and group 2 matches `a'. So, `\1' matches `ab' and `\2'
- matches `a'.
-
- * If the group doesn't participate in a match, i.e., it is part of an
- alternative not taken or a repetition operator allows zero
- repetitions of it, then the back reference makes the whole match
- fail. For example, `(one()|two())-and-(three\2|four\3)' matches
- `one-and-three' and `two-and-four', but not `one-and-four' or
- `two-and-three'. For example, if the pattern matches `one-and-',
- then its group 2 matches the empty string and its group 3 doesn't
- participate in the match. So, if it then matches `four', then
- when it tries to back reference group 3--which it will attempt to
- do because `\3' follows the `four'--the match will fail because
- group 3 didn't participate in the match.
-
- You can use a back reference as an argument to a repetition operator.
-For example, `(a(b))\2*' matches `a' followed by two or more `b's.
-Similarly, `(a(b))\2{3}' matches `abbbb'.
-
- If there is no preceding DIGIT-th subexpression, the regular
-expression is invalid.
-
-
-File: regex.info, Node: Anchoring Operators, Prev: Back-reference Operator, Up: Common Operators
-
-Anchoring Operators
-===================
-
- These operators can constrain a pattern to match only at the
-beginning or end of the entire string or at the beginning or end of a
-line.
-
-* Menu:
-
-* Match-beginning-of-line Operator:: ^
-* Match-end-of-line Operator:: $
-
-
-File: regex.info, Node: Match-beginning-of-line Operator, Next: Match-end-of-line Operator, Up: Anchoring Operators
-
-The Match-beginning-of-line Operator (`^')
-------------------------------------------
-
- This operator can match the empty string either at the beginning of
-the string or after a newline character. Thus, it is said to "anchor"
-the pattern to the beginning of a line.
-
- In the cases following, `^' represents this operator. (Otherwise,
-`^' is ordinary.)
-
- * It (the `^') is first in the pattern, as in `^foo'.
-
- * The syntax bit `RE_CONTEXT_INDEP_ANCHORS' is set, and it is outside
- a bracket expression.
-
- * It follows an open-group or alternation operator, as in `a\(^b\)'
- and `a\|^b'. *Note Grouping Operators::, and *Note Alternation
- Operator::.
-
- These rules imply that some valid patterns containing `^' cannot be
-matched; for example, `foo^bar' if `RE_CONTEXT_INDEP_ANCHORS' is set.
-
- If the `not_bol' field is set in the pattern buffer (*note GNU
-Pattern Buffers::.), then `^' fails to match at the beginning of the
-string. *Note POSIX Matching::, for when you might find this useful.
-
- If the `newline_anchor' field is set in the pattern buffer, then `^'
-fails to match after a newline. This is useful when you do not regard
-the string to be matched as broken into lines.
-
-
-File: regex.info, Node: Match-end-of-line Operator, Prev: Match-beginning-of-line Operator, Up: Anchoring Operators
-
-The Match-end-of-line Operator (`$')
-------------------------------------
-
- This operator can match the empty string either at the end of the
-string or before a newline character in the string. Thus, it is said
-to "anchor" the pattern to the end of a line.
-
- It is always represented by `$'. For example, `foo$' usually
-matches, e.g., `foo' and, e.g., the first three characters of
-`foo\nbar'.
-
- Its interaction with the syntax bits and pattern buffer fields is
-exactly the dual of `^''s; see the previous section. (That is,
-"beginning" becomes "end", "next" becomes "previous", and "after"
-becomes "before".)
-
-
-File: regex.info, Node: GNU Operators, Next: GNU Emacs Operators, Prev: Common Operators, Up: Top
-
-GNU Operators
-*************
-
- Following are operators that GNU defines (and POSIX doesn't).
-
-* Menu:
-
-* Word Operators::
-* Buffer Operators::
-
-
-File: regex.info, Node: Word Operators, Next: Buffer Operators, Up: GNU Operators
-
-Word Operators
-==============
-
- The operators in this section require Regex to recognize parts of
-words. Regex uses a syntax table to determine whether or not a
-character is part of a word, i.e., whether or not it is
-"word-constituent".
-
-* Menu:
-
-* Non-Emacs Syntax Tables::
-* Match-word-boundary Operator:: \b
-* Match-within-word Operator:: \B
-* Match-beginning-of-word Operator:: \<
-* Match-end-of-word Operator:: \>
-* Match-word-constituent Operator:: \w
-* Match-non-word-constituent Operator:: \W
-
-
-File: regex.info, Node: Non-Emacs Syntax Tables, Next: Match-word-boundary Operator, Up: Word Operators
-
-Non-Emacs Syntax Tables
------------------------
-
- A "syntax table" is an array indexed by the characters in your
-character set. In the ASCII encoding, therefore, a syntax table has
-256 elements. Regex always uses a `char *' variable `re_syntax_table'
-as its syntax table. In some cases, it initializes this variable and
-in others it expects you to initialize it.
-
- * If Regex is compiled with the preprocessor symbols `emacs' and
- `SYNTAX_TABLE' both undefined, then Regex allocates
- `re_syntax_table' and initializes an element I either to `Sword'
- (which it defines) if I is a letter, number, or `_', or to zero if
- it's not.
-
- * If Regex is compiled with `emacs' undefined but `SYNTAX_TABLE'
- defined, then Regex expects you to define a `char *' variable
- `re_syntax_table' to be a valid syntax table.
-
- * *Note Emacs Syntax Tables::, for what happens when Regex is
- compiled with the preprocessor symbol `emacs' defined.
-
-
-File: regex.info, Node: Match-word-boundary Operator, Next: Match-within-word Operator, Prev: Non-Emacs Syntax Tables, Up: Word Operators
-
-The Match-word-boundary Operator (`\b')
----------------------------------------
-
- This operator (represented by `\b') matches the empty string at
-either the beginning or the end of a word. For example, `\brat\b'
-matches the separate word `rat'.
-
-
-File: regex.info, Node: Match-within-word Operator, Next: Match-beginning-of-word Operator, Prev: Match-word-boundary Operator, Up: Word Operators
-
-The Match-within-word Operator (`\B')
--------------------------------------
-
- This operator (represented by `\B') matches the empty string within a
-word. For example, `c\Brat\Be' matches `crate', but `dirty \Brat'
-doesn't match `dirty rat'.
-
-
-File: regex.info, Node: Match-beginning-of-word Operator, Next: Match-end-of-word Operator, Prev: Match-within-word Operator, Up: Word Operators
-
-The Match-beginning-of-word Operator (`\<')
--------------------------------------------
-
- This operator (represented by `\<') matches the empty string at the
-beginning of a word.
-
-
-File: regex.info, Node: Match-end-of-word Operator, Next: Match-word-constituent Operator, Prev: Match-beginning-of-word Operator, Up: Word Operators
-
-The Match-end-of-word Operator (`\>')
--------------------------------------
-
- This operator (represented by `\>') matches the empty string at the
-end of a word.
-
-
-File: regex.info, Node: Match-word-constituent Operator, Next: Match-non-word-constituent Operator, Prev: Match-end-of-word Operator, Up: Word Operators
-
-The Match-word-constituent Operator (`\w')
-------------------------------------------
-
- This operator (represented by `\w') matches any word-constituent
-character.
-
-
-File: regex.info, Node: Match-non-word-constituent Operator, Prev: Match-word-constituent Operator, Up: Word Operators
-
-The Match-non-word-constituent Operator (`\W')
-----------------------------------------------
-
- This operator (represented by `\W') matches any character that is not
-word-constituent.
-
-
-File: regex.info, Node: Buffer Operators, Prev: Word Operators, Up: GNU Operators
-
-Buffer Operators
-================
-
- Following are operators which work on buffers. In Emacs, a "buffer"
-is, naturally, an Emacs buffer. For other programs, Regex considers the
-entire string to be matched as the buffer.
-
-* Menu:
-
-* Match-beginning-of-buffer Operator:: \`
-* Match-end-of-buffer Operator:: \'
-
-
-File: regex.info, Node: Match-beginning-of-buffer Operator, Next: Match-end-of-buffer Operator, Up: Buffer Operators
-
-The Match-beginning-of-buffer Operator (`\`')
----------------------------------------------
-
- This operator (represented by `\`') matches the empty string at the
-beginning of the buffer.
-
-
-File: regex.info, Node: Match-end-of-buffer Operator, Prev: Match-beginning-of-buffer Operator, Up: Buffer Operators
-
-The Match-end-of-buffer Operator (`\'')
----------------------------------------
-
- This operator (represented by `\'') matches the empty string at the
-end of the buffer.
-
-
-File: regex.info, Node: GNU Emacs Operators, Next: What Gets Matched?, Prev: GNU Operators, Up: Top
-
-GNU Emacs Operators
-*******************
-
- Following are operators that GNU defines (and POSIX doesn't) that you
-can use only when Regex is compiled with the preprocessor symbol
-`emacs' defined.
-
-* Menu:
-
-* Syntactic Class Operators::
-
-
-File: regex.info, Node: Syntactic Class Operators, Up: GNU Emacs Operators
-
-Syntactic Class Operators
-=========================
-
- The operators in this section require Regex to recognize the syntactic
-classes of characters. Regex uses a syntax table to determine this.
-
-* Menu:
-
-* Emacs Syntax Tables::
-* Match-syntactic-class Operator:: \sCLASS
-* Match-not-syntactic-class Operator:: \SCLASS
-
-
-File: regex.info, Node: Emacs Syntax Tables, Next: Match-syntactic-class Operator, Up: Syntactic Class Operators
-
-Emacs Syntax Tables
--------------------
-
- A "syntax table" is an array indexed by the characters in your
-character set. In the ASCII encoding, therefore, a syntax table has
-256 elements.
-
- If Regex is compiled with the preprocessor symbol `emacs' defined,
-then Regex expects you to define and initialize the variable
-`re_syntax_table' to be an Emacs syntax table. Emacs' syntax tables
-are more complicated than Regex's own (*note Non-Emacs Syntax
-Tables::.). *Note Syntax: (emacs)Syntax, for a description of Emacs'
-syntax tables.
-
-
-File: regex.info, Node: Match-syntactic-class Operator, Next: Match-not-syntactic-class Operator, Prev: Emacs Syntax Tables, Up: Syntactic Class Operators
-
-The Match-syntactic-class Operator (`\s'CLASS)
-----------------------------------------------
-
- This operator matches any character whose syntactic class is
-represented by a specified character. `\sCLASS' represents this
-operator where CLASS is the character representing the syntactic class
-you want. For example, `w' represents the syntactic class of
-word-constituent characters, so `\sw' matches any word-constituent
-character.
-
-
-File: regex.info, Node: Match-not-syntactic-class Operator, Prev: Match-syntactic-class Operator, Up: Syntactic Class Operators
-
-The Match-not-syntactic-class Operator (`\S'CLASS)
---------------------------------------------------
-
- This operator is similar to the match-syntactic-class operator except
-that it matches any character whose syntactic class is *not*
-represented by the specified character. `\SCLASS' represents this
-operator. For example, `w' represents the syntactic class of
-word-constituent characters, so `\Sw' matches any character that is not
-word-constituent.
-
-
-File: regex.info, Node: What Gets Matched?, Next: Programming with Regex, Prev: GNU Emacs Operators, Up: Top
-
-What Gets Matched?
-******************
-
- Regex usually matches strings according to the "leftmost longest"
-rule; that is, it chooses the longest of the leftmost matches. This
-does not mean that for a regular expression containing subexpressions
-that it simply chooses the longest match for each subexpression, left to
-right; the overall match must also be the longest possible one.
-
- For example, `(ac*)(c*d[ac]*)\1' matches `acdacaaa', not `acdac', as
-it would if it were to choose the longest match for the first
-subexpression.
-
-
-File: regex.info, Node: Programming with Regex, Next: Copying, Prev: What Gets Matched?, Up: Top
-
-Programming with Regex
-**********************
-
- Here we describe how you use the Regex data structures and functions
-in C programs. Regex has three interfaces: one designed for GNU, one
-compatible with POSIX and one compatible with Berkeley UNIX.
-
-* Menu:
-
-* GNU Regex Functions::
-* POSIX Regex Functions::
-* BSD Regex Functions::
-
-
-File: regex.info, Node: GNU Regex Functions, Next: POSIX Regex Functions, Up: Programming with Regex
-
-GNU Regex Functions
-===================
-
- If you're writing code that doesn't need to be compatible with either
-POSIX or Berkeley UNIX, you can use these functions. They provide more
-options than the other interfaces.
-
-* Menu:
-
-* GNU Pattern Buffers:: The re_pattern_buffer type.
-* GNU Regular Expression Compiling:: re_compile_pattern ()
-* GNU Matching:: re_match ()
-* GNU Searching:: re_search ()
-* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
-* Searching with Fastmaps:: re_compile_fastmap ()
-* GNU Translate Tables:: The `translate' field.
-* Using Registers:: The re_registers type and related fns.
-* Freeing GNU Pattern Buffers:: regfree ()
-
-
-File: regex.info, Node: GNU Pattern Buffers, Next: GNU Regular Expression Compiling, Up: GNU Regex Functions
-
-GNU Pattern Buffers
--------------------
-
- To compile, match, or search for a given regular expression, you must
-supply a pattern buffer. A "pattern buffer" holds one compiled regular
-expression.(1)
-
- You can have several different pattern buffers simultaneously, each
-holding a compiled pattern for a different regular expression.
-
- `regex.h' defines the pattern buffer `struct' as follows:
-
- /* Space that holds the compiled pattern. It is declared as
- `unsigned char *' because its elements are
- sometimes used as array indexes. */
- unsigned char *buffer;
-
- /* Number of bytes to which `buffer' points. */
- unsigned long allocated;
-
- /* Number of bytes actually used in `buffer'. */
- unsigned long used;
-
- /* Syntax setting with which the pattern was compiled. */
- reg_syntax_t syntax;
-
- /* Pointer to a fastmap, if any, otherwise zero. re_search uses
- the fastmap, if there is one, to skip over impossible
- starting points for matches. */
- char *fastmap;
-
- /* Either a translate table to apply to all characters before
- comparing them, or zero for no translation. The translation
- is applied to a pattern when it is compiled and to a string
- when it is matched. */
- char *translate;
-
- /* Number of subexpressions found by the compiler. */
- size_t re_nsub;
-
- /* Zero if this pattern cannot match the empty string, one else.
- Well, in truth it's used only in `re_search_2', to see
- whether or not we should use the fastmap, so we don't set
- this absolutely perfectly; see `re_compile_fastmap' (the
- `duplicate' case). */
- unsigned can_be_null : 1;
-
- /* If REGS_UNALLOCATED, allocate space in the `regs' structure
- for `max (RE_NREGS, re_nsub + 1)' groups.
- If REGS_REALLOCATE, reallocate space if necessary.
- If REGS_FIXED, use what's there. */
- #define REGS_UNALLOCATED 0
- #define REGS_REALLOCATE 1
- #define REGS_FIXED 2
- unsigned regs_allocated : 2;
-
- /* Set to zero when `regex_compile' compiles a pattern; set to one
- by `re_compile_fastmap' if it updates the fastmap. */
- unsigned fastmap_accurate : 1;
-
- /* If set, `re_match_2' does not return information about
- subexpressions. */
- unsigned no_sub : 1;
-
- /* If set, a beginning-of-line anchor doesn't match at the
- beginning of the string. */
- unsigned not_bol : 1;
-
- /* Similarly for an end-of-line anchor. */
- unsigned not_eol : 1;
-
- /* If true, an anchor at a newline matches. */
- unsigned newline_anchor : 1;
-
- ---------- Footnotes ----------
-
- (1) Regular expressions are also referred to as "patterns," hence
-the name "pattern buffer."
-
-
-File: regex.info, Node: GNU Regular Expression Compiling, Next: GNU Matching, Prev: GNU Pattern Buffers, Up: GNU Regex Functions
-
-GNU Regular Expression Compiling
---------------------------------
-
- In GNU, you can both match and search for a given regular expression.
-To do either, you must first compile it in a pattern buffer (*note GNU
-Pattern Buffers::.).
-
- Regular expressions match according to the syntax with which they were
-compiled; with GNU, you indicate what syntax you want by setting the
-variable `re_syntax_options' (declared in `regex.h' and defined in
-`regex.c') before calling the compiling function, `re_compile_pattern'
-(see below). *Note Syntax Bits::, and *Note Predefined Syntaxes::.
-
- You can change the value of `re_syntax_options' at any time.
-Usually, however, you set its value once and then never change it.
-
- `re_compile_pattern' takes a pattern buffer as an argument. You must
-initialize the following fields:
-
-`translate initialization'
-`translate'
- Initialize this to point to a translate table if you want one, or
- to zero if you don't. We explain translate tables in *Note GNU
- Translate Tables::.
-
-`fastmap'
- Initialize this to nonzero if you want a fastmap, or to zero if you
- don't.
-
-`buffer'
-`allocated'
- If you want `re_compile_pattern' to allocate memory for the
- compiled pattern, set both of these to zero. If you have an
- existing block of memory (allocated with `malloc') you want Regex
- to use, set `buffer' to its address and `allocated' to its size (in
- bytes).
-
- `re_compile_pattern' uses `realloc' to extend the space for the
- compiled pattern as necessary.
-
- To compile a pattern buffer, use:
-
- char *
- re_compile_pattern (const char *REGEX, const int REGEX_SIZE,
- struct re_pattern_buffer *PATTERN_BUFFER)
-
-REGEX is the regular expression's address, REGEX_SIZE is its length,
-and PATTERN_BUFFER is the pattern buffer's address.
-
- If `re_compile_pattern' successfully compiles the regular expression,
-it returns zero and sets `*PATTERN_BUFFER' to the compiled pattern. It
-sets the pattern buffer's fields as follows:
-
-`buffer'
- to the compiled pattern.
-
-`used'
- to the number of bytes the compiled pattern in `buffer' occupies.
-
-`syntax'
- to the current value of `re_syntax_options'.
-
-`re_nsub'
- to the number of subexpressions in REGEX.
-
-`fastmap_accurate'
- to zero on the theory that the pattern you're compiling is
- different than the one previously compiled into `buffer'; in that
- case (since you can't make a fastmap without a compiled pattern),
- `fastmap' would either contain an incompatible fastmap, or nothing
- at all.
-
- If `re_compile_pattern' can't compile REGEX, it returns an error
-string corresponding to one of the errors listed in *Note POSIX Regular
-Expression Compiling::.
-
-
-File: regex.info, Node: GNU Matching, Next: GNU Searching, Prev: GNU Regular Expression Compiling, Up: GNU Regex Functions
-
-GNU Matching
-------------
-
- Matching the GNU way means trying to match as much of a string as
-possible starting at a position within it you specify. Once you've
-compiled a pattern into a pattern buffer (*note GNU Regular Expression
-Compiling::.), you can ask the matcher to match that pattern against a
-string using:
-
- int
- re_match (struct re_pattern_buffer *PATTERN_BUFFER,
- const char *STRING, const int SIZE,
- const int START, struct re_registers *REGS)
-
-PATTERN_BUFFER is the address of a pattern buffer containing a compiled
-pattern. STRING is the string you want to match; it can contain
-newline and null characters. SIZE is the length of that string. START
-is the string index at which you want to begin matching; the first
-character of STRING is at index zero. *Note Using Registers::, for a
-explanation of REGS; you can safely pass zero.
-
- `re_match' matches the regular expression in PATTERN_BUFFER against
-the string STRING according to the syntax in PATTERN_BUFFERS's `syntax'
-field. (*Note GNU Regular Expression Compiling::, for how to set it.)
-The function returns -1 if the compiled pattern does not match any part
-of STRING and -2 if an internal error happens; otherwise, it returns
-how many (possibly zero) characters of STRING the pattern matched.
-
- An example: suppose PATTERN_BUFFER points to a pattern buffer
-containing the compiled pattern for `a*', and STRING points to `aaaaab'
-(whereupon SIZE should be 6). Then if START is 2, `re_match' returns 3,
-i.e., `a*' would have matched the last three `a's in STRING. If START
-is 0, `re_match' returns 5, i.e., `a*' would have matched all the `a's
-in STRING. If START is either 5 or 6, it returns zero.
-
- If START is not between zero and SIZE, then `re_match' returns -1.
-
-
-File: regex.info, Node: GNU Searching, Next: Matching/Searching with Split Data, Prev: GNU Matching, Up: GNU Regex Functions
-
-GNU Searching
--------------
-
- "Searching" means trying to match starting at successive positions
-within a string. The function `re_search' does this.
-
- Before calling `re_search', you must compile your regular expression.
-*Note GNU Regular Expression Compiling::.
-
- Here is the function declaration:
-
- int
- re_search (struct re_pattern_buffer *PATTERN_BUFFER,
- const char *STRING, const int SIZE,
- const int START, const int RANGE,
- struct re_registers *REGS)
-
-whose arguments are the same as those to `re_match' (*note GNU
-Matching::.) except that the two arguments START and RANGE replace
-`re_match''s argument START.
-
- If RANGE is positive, then `re_search' attempts a match starting
-first at index START, then at START + 1 if that fails, and so on, up to
-START + RANGE; if RANGE is negative, then it attempts a match starting
-first at index START, then at START -1 if that fails, and so on.
-
- If START is not between zero and SIZE, then `re_search' returns -1.
-When RANGE is positive, `re_search' adjusts RANGE so that START + RANGE
-- 1 is between zero and SIZE, if necessary; that way it won't search
-outside of STRING. Similarly, when RANGE is negative, `re_search'
-adjusts RANGE so that START + RANGE + 1 is between zero and SIZE, if
-necessary.
-
- If the `fastmap' field of PATTERN_BUFFER is zero, `re_search' matches
-starting at consecutive positions; otherwise, it uses `fastmap' to make
-the search more efficient. *Note Searching with Fastmaps::.
-
- If no match is found, `re_search' returns -1. If a match is found,
-it returns the index where the match began. If an internal error
-happens, it returns -2.
-
-
-File: regex.info, Node: Matching/Searching with Split Data, Next: Searching with Fastmaps, Prev: GNU Searching, Up: GNU Regex Functions
-
-Matching and Searching with Split Data
---------------------------------------
-
- Using the functions `re_match_2' and `re_search_2', you can match or
-search in data that is divided into two strings.
-
- The function:
-
- int
- re_match_2 (struct re_pattern_buffer *BUFFER,
- const char *STRING1, const int SIZE1,
- const char *STRING2, const int SIZE2,
- const int START,
- struct re_registers *REGS,
- const int STOP)
-
-is similar to `re_match' (*note GNU Matching::.) except that you pass
-*two* data strings and sizes, and an index STOP beyond which you don't
-want the matcher to try matching. As with `re_match', if it succeeds,
-`re_match_2' returns how many characters of STRING it matched. Regard
-STRING1 and STRING2 as concatenated when you set the arguments START and
-STOP and use the contents of REGS; `re_match_2' never returns a value
-larger than SIZE1 + SIZE2.
-
- The function:
-
- int
- re_search_2 (struct re_pattern_buffer *BUFFER,
- const char *STRING1, const int SIZE1,
- const char *STRING2, const int SIZE2,
- const int START, const int RANGE,
- struct re_registers *REGS,
- const int STOP)
-
-is similarly related to `re_search'.
-
-
-File: regex.info, Node: Searching with Fastmaps, Next: GNU Translate Tables, Prev: Matching/Searching with Split Data, Up: GNU Regex Functions
-
-Searching with Fastmaps
------------------------
-
- If you're searching through a long string, you should use a fastmap.
-Without one, the searcher tries to match at consecutive positions in the
-string. Generally, most of the characters in the string could not start
-a match. It takes much longer to try matching at a given position in
-the string than it does to check in a table whether or not the
-character at that position could start a match. A "fastmap" is such a
-table.
-
- More specifically, a fastmap is an array indexed by the characters in
-your character set. Under the ASCII encoding, therefore, a fastmap has
-256 elements. If you want the searcher to use a fastmap with a given
-pattern buffer, you must allocate the array and assign the array's
-address to the pattern buffer's `fastmap' field. You either can
-compile the fastmap yourself or have `re_search' do it for you; when
-`fastmap' is nonzero, it automatically compiles a fastmap the first
-time you search using a particular compiled pattern.
-
- To compile a fastmap yourself, use:
-
- int
- re_compile_fastmap (struct re_pattern_buffer *PATTERN_BUFFER)
-
-PATTERN_BUFFER is the address of a pattern buffer. If the character C
-could start a match for the pattern, `re_compile_fastmap' makes
-`PATTERN_BUFFER->fastmap[C]' nonzero. It returns 0 if it can compile a
-fastmap and -2 if there is an internal error. For example, if `|' is
-the alternation operator and PATTERN_BUFFER holds the compiled pattern
-for `a|b', then `re_compile_fastmap' sets `fastmap['a']' and
-`fastmap['b']' (and no others).
-
- `re_search' uses a fastmap as it moves along in the string: it checks
-the string's characters until it finds one that's in the fastmap. Then
-it tries matching at that character. If the match fails, it repeats
-the process. So, by using a fastmap, `re_search' doesn't waste time
-trying to match at positions in the string that couldn't start a match.
-
- If you don't want `re_search' to use a fastmap, store zero in the
-`fastmap' field of the pattern buffer before calling `re_search'.
-
- Once you've initialized a pattern buffer's `fastmap' field, you need
-never do so again--even if you compile a new pattern in it--provided
-the way the field is set still reflects whether or not you want a
-fastmap. `re_search' will still either do nothing if `fastmap' is null
-or, if it isn't, compile a new fastmap for the new pattern.
-
-
-File: regex.info, Node: GNU Translate Tables, Next: Using Registers, Prev: Searching with Fastmaps, Up: GNU Regex Functions
-
-GNU Translate Tables
---------------------
-
- If you set the `translate' field of a pattern buffer to a translate
-table, then the GNU Regex functions to which you've passed that pattern
-buffer use it to apply a simple transformation to all the regular
-expression and string characters at which they look.
-
- A "translate table" is an array indexed by the characters in your
-character set. Under the ASCII encoding, therefore, a translate table
-has 256 elements. The array's elements are also characters in your
-character set. When the Regex functions see a character C, they use
-`translate[C]' in its place, with one exception: the character after a
-`\' is not translated. (This ensures that, the operators, e.g., `\B'
-and `\b', are always distinguishable.)
-
- For example, a table that maps all lowercase letters to the
-corresponding uppercase ones would cause the matcher to ignore
-differences in case.(1) Such a table would map all characters except
-lowercase letters to themselves, and lowercase letters to the
-corresponding uppercase ones. Under the ASCII encoding, here's how you
-could initialize such a table (we'll call it `case_fold'):
-
- for (i = 0; i < 256; i++)
- case_fold[i] = i;
- for (i = 'a'; i <= 'z'; i++)
- case_fold[i] = i - ('a' - 'A');
-
- You tell Regex to use a translate table on a given pattern buffer by
-assigning that table's address to the `translate' field of that buffer.
-If you don't want Regex to do any translation, put zero into this
-field. You'll get weird results if you change the table's contents
-anytime between compiling the pattern buffer, compiling its fastmap, and
-matching or searching with the pattern buffer.
-
- ---------- Footnotes ----------
-
- (1) A table that maps all uppercase letters to the corresponding
-lowercase ones would work just as well for this purpose.
-
-
-File: regex.info, Node: Using Registers, Next: Freeing GNU Pattern Buffers, Prev: GNU Translate Tables, Up: GNU Regex Functions
-
-Using Registers
----------------
-
- A group in a regular expression can match a (posssibly empty)
-substring of the string that regular expression as a whole matched.
-The matcher remembers the beginning and end of the substring matched by
-each group.
-
- To find out what they matched, pass a nonzero REGS argument to a GNU
-matching or searching function (*note GNU Matching::. and *Note GNU
-Searching::), i.e., the address of a structure of this type, as defined
-in `regex.h':
-
- struct re_registers
- {
- unsigned num_regs;
- regoff_t *start;
- regoff_t *end;
- };
-
- Except for (possibly) the NUM_REGS'th element (see below), the Ith
-element of the `start' and `end' arrays records information about the
-Ith group in the pattern. (They're declared as C pointers, but this is
-only because not all C compilers accept zero-length arrays;
-conceptually, it is simplest to think of them as arrays.)
-
- The `start' and `end' arrays are allocated in various ways, depending
-on the value of the `regs_allocated' field in the pattern buffer passed
-to the matcher.
-
- The simplest and perhaps most useful is to let the matcher
-(re)allocate enough space to record information for all the groups in
-the regular expression. If `regs_allocated' is `REGS_UNALLOCATED', the
-matcher allocates 1 + RE_NSUB (another field in the pattern buffer;
-*note GNU Pattern Buffers::.). The extra element is set to -1, and
-sets `regs_allocated' to `REGS_REALLOCATE'. Then on subsequent calls
-with the same pattern buffer and REGS arguments, the matcher
-reallocates more space if necessary.
-
- It would perhaps be more logical to make the `regs_allocated' field
-part of the `re_registers' structure, instead of part of the pattern
-buffer. But in that case the caller would be forced to initialize the
-structure before passing it. Much existing code doesn't do this
-initialization, and it's arguably better to avoid it anyway.
-
- `re_compile_pattern' sets `regs_allocated' to `REGS_UNALLOCATED', so
-if you use the GNU regular expression functions, you get this behavior
-by default.
-
- xx document re_set_registers
-
- POSIX, on the other hand, requires a different interface: the caller
-is supposed to pass in a fixed-length array which the matcher fills.
-Therefore, if `regs_allocated' is `REGS_FIXED' the matcher simply fills
-that array.
-
- The following examples illustrate the information recorded in the
-`re_registers' structure. (In all of them, `(' represents the
-open-group and `)' the close-group operator. The first character in
-the string STRING is at index 0.)
-
- * If the regular expression has an I-th group not contained within
- another group that matches a substring of STRING, then the
- function sets `REGS->start[I]' to the index in STRING where the
- substring matched by the I-th group begins, and `REGS->end[I]' to
- the index just beyond that substring's end. The function sets
- `REGS->start[0]' and `REGS->end[0]' to analogous information about
- the entire pattern.
-
- For example, when you match `((a)(b))' against `ab', you get:
-
- * 0 in `REGS->start[0]' and 2 in `REGS->end[0]'
-
- * 0 in `REGS->start[1]' and 2 in `REGS->end[1]'
-
- * 0 in `REGS->start[2]' and 1 in `REGS->end[2]'
-
- * 1 in `REGS->start[3]' and 2 in `REGS->end[3]'
-
- * If a group matches more than once (as it might if followed by,
- e.g., a repetition operator), then the function reports the
- information about what the group *last* matched.
-
- For example, when you match the pattern `(a)*' against the string
- `aa', you get:
-
- * 0 in `REGS->start[0]' and 2 in `REGS->end[0]'
-
- * 1 in `REGS->start[1]' and 2 in `REGS->end[1]'
-
- * If the I-th group does not participate in a successful match,
- e.g., it is an alternative not taken or a repetition operator
- allows zero repetitions of it, then the function sets
- `REGS->start[I]' and `REGS->end[I]' to -1.
-
- For example, when you match the pattern `(a)*b' against the string
- `b', you get:
-
- * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'
-
- * -1 in `REGS->start[1]' and -1 in `REGS->end[1]'
-
- * If the I-th group matches a zero-length string, then the function
- sets `REGS->start[I]' and `REGS->end[I]' to the index just beyond
- that zero-length string.
-
- For example, when you match the pattern `(a*)b' against the string
- `b', you get:
-
- * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'
-
- * 0 in `REGS->start[1]' and 0 in `REGS->end[1]'
-
- * If an I-th group contains a J-th group in turn not contained
- within any other group within group I and the function reports a
- match of the I-th group, then it records in `REGS->start[J]' and
- `REGS->end[J]' the last match (if it matched) of the J-th group.
-
- For example, when you match the pattern `((a*)b)*' against the
- string `abb', group 2 last matches the empty string, so you get
- what it previously matched:
-
- * 0 in `REGS->start[0]' and 3 in `REGS->end[0]'
-
- * 2 in `REGS->start[1]' and 3 in `REGS->end[1]'
-
- * 2 in `REGS->start[2]' and 2 in `REGS->end[2]'
-
- When you match the pattern `((a)*b)*' against the string `abb',
- group 2 doesn't participate in the last match, so you get:
-
- * 0 in `REGS->start[0]' and 3 in `REGS->end[0]'
-
- * 2 in `REGS->start[1]' and 3 in `REGS->end[1]'
-
- * 0 in `REGS->start[2]' and 1 in `REGS->end[2]'
-
- * If an I-th group contains a J-th group in turn not contained
- within any other group within group I and the function sets
- `REGS->start[I]' and `REGS->end[I]' to -1, then it also sets
- `REGS->start[J]' and `REGS->end[J]' to -1.
-
- For example, when you match the pattern `((a)*b)*c' against the
- string `c', you get:
-
- * 0 in `REGS->start[0]' and 1 in `REGS->end[0]'
-
- * -1 in `REGS->start[1]' and -1 in `REGS->end[1]'
-
- * -1 in `REGS->start[2]' and -1 in `REGS->end[2]'
-
-
-File: regex.info, Node: Freeing GNU Pattern Buffers, Prev: Using Registers, Up: GNU Regex Functions
-
-Freeing GNU Pattern Buffers
----------------------------
-
- To free any allocated fields of a pattern buffer, you can use the
-POSIX function described in *Note Freeing POSIX Pattern Buffers::,
-since the type `regex_t'--the type for POSIX pattern buffers--is
-equivalent to the type `re_pattern_buffer'. After freeing a pattern
-buffer, you need to again compile a regular expression in it (*note GNU
-Regular Expression Compiling::.) before passing it to a matching or
-searching function.
-
-
-File: regex.info, Node: POSIX Regex Functions, Next: BSD Regex Functions, Prev: GNU Regex Functions, Up: Programming with Regex
-
-POSIX Regex Functions
-=====================
-
- If you're writing code that has to be POSIX compatible, you'll need
-to use these functions. Their interfaces are as specified by POSIX,
-draft 1003.2/D11.2.
-
-* Menu:
-
-* POSIX Pattern Buffers:: The regex_t type.
-* POSIX Regular Expression Compiling:: regcomp ()
-* POSIX Matching:: regexec ()
-* Reporting Errors:: regerror ()
-* Using Byte Offsets:: The regmatch_t type.
-* Freeing POSIX Pattern Buffers:: regfree ()
-
-
-File: regex.info, Node: POSIX Pattern Buffers, Next: POSIX Regular Expression Compiling, Up: POSIX Regex Functions
-
-POSIX Pattern Buffers
----------------------
-
- To compile or match a given regular expression the POSIX way, you
-must supply a pattern buffer exactly the way you do for GNU (*note GNU
-Pattern Buffers::.). POSIX pattern buffers have type `regex_t', which
-is equivalent to the GNU pattern buffer type `re_pattern_buffer'.
-
-
-File: regex.info, Node: POSIX Regular Expression Compiling, Next: POSIX Matching, Prev: POSIX Pattern Buffers, Up: POSIX Regex Functions
-
-POSIX Regular Expression Compiling
-----------------------------------
-
- With POSIX, you can only search for a given regular expression; you
-can't match it. To do this, you must first compile it in a pattern
-buffer, using `regcomp'.
-
- To compile a pattern buffer, use:
-
- int
- regcomp (regex_t *PREG, const char *REGEX, int CFLAGS)
-
-PREG is the initialized pattern buffer's address, REGEX is the regular
-expression's address, and CFLAGS is the compilation flags, which Regex
-considers as a collection of bits. Here are the valid bits, as defined
-in `regex.h':
-
-`REG_EXTENDED'
- says to use POSIX Extended Regular Expression syntax; if this isn't
- set, then says to use POSIX Basic Regular Expression syntax.
- `regcomp' sets PREG's `syntax' field accordingly.
-
-`REG_ICASE'
- says to ignore case; `regcomp' sets PREG's `translate' field to a
- translate table which ignores case, replacing anything you've put
- there before.
-
-`REG_NOSUB'
- says to set PREG's `no_sub' field; *note POSIX Matching::., for
- what this means.
-
-`REG_NEWLINE'
- says that a:
-
- * match-any-character operator (*note Match-any-character
- Operator::.) doesn't match a newline.
-
- * nonmatching list not containing a newline (*note List
- Operators::.) matches a newline.
-
- * match-beginning-of-line operator (*note
- Match-beginning-of-line Operator::.) matches the empty string
- immediately after a newline, regardless of how `REG_NOTBOL'
- is set (*note POSIX Matching::., for an explanation of
- `REG_NOTBOL').
-
- * match-end-of-line operator (*note Match-beginning-of-line
- Operator::.) matches the empty string immediately before a
- newline, regardless of how `REG_NOTEOL' is set (*note POSIX
- Matching::., for an explanation of `REG_NOTEOL').
-
- If `regcomp' successfully compiles the regular expression, it returns
-zero and sets `*PATTERN_BUFFER' to the compiled pattern. Except for
-`syntax' (which it sets as explained above), it also sets the same
-fields the same way as does the GNU compiling function (*note GNU
-Regular Expression Compiling::.).
-
- If `regcomp' can't compile the regular expression, it returns one of
-the error codes listed here. (Except when noted differently, the
-syntax of in all examples below is basic regular expression syntax.)
-
-`REG_BADRPT'
- For example, the consecutive repetition operators `**' in `a**'
- are invalid. As another example, if the syntax is extended
- regular expression syntax, then the repetition operator `*' with
- nothing on which to operate in `*' is invalid.
-
-`REG_BADBR'
- For example, the COUNT `-1' in `a\{-1' is invalid.
-
-`REG_EBRACE'
- For example, `a\{1' is missing a close-interval operator.
-
-`REG_EBRACK'
- For example, `[a' is missing a close-list operator.
-
-`REG_ERANGE'
- For example, the range ending point `z' that collates lower than
- does its starting point `a' in `[z-a]' is invalid. Also, the
- range with the character class `[:alpha:]' as its starting point in
- `[[:alpha:]-|]'.
-
-`REG_ECTYPE'
- For example, the character class name `foo' in `[[:foo:]' is
- invalid.
-
-`REG_EPAREN'
- For example, `a\)' is missing an open-group operator and `\(a' is
- missing a close-group operator.
-
-`REG_ESUBREG'
- For example, the back reference `\2' that refers to a nonexistent
- subexpression in `\(a\)\2' is invalid.
-
-`REG_EEND'
- Returned when a regular expression causes no other more specific
- error.
-
-`REG_EESCAPE'
- For example, the trailing backslash `\' in `a\' is invalid, as is
- the one in `\'.
-
-`REG_BADPAT'
- For example, in the extended regular expression syntax, the empty
- group `()' in `a()b' is invalid.
-
-`REG_ESIZE'
- Returned when a regular expression needs a pattern buffer larger
- than 65536 bytes.
-
-`REG_ESPACE'
- Returned when a regular expression makes Regex to run out of
- memory.
-
-
-File: regex.info, Node: POSIX Matching, Next: Reporting Errors, Prev: POSIX Regular Expression Compiling, Up: POSIX Regex Functions
-
-POSIX Matching
---------------
-
- Matching the POSIX way means trying to match a null-terminated string
-starting at its first character. Once you've compiled a pattern into a
-pattern buffer (*note POSIX Regular Expression Compiling::.), you can
-ask the matcher to match that pattern against a string using:
-
- int
- regexec (const regex_t *PREG, const char *STRING,
- size_t NMATCH, regmatch_t PMATCH[], int EFLAGS)
-
-PREG is the address of a pattern buffer for a compiled pattern. STRING
-is the string you want to match.
-
- *Note Using Byte Offsets::, for an explanation of PMATCH. If you
-pass zero for NMATCH or you compiled PREG with the compilation flag
-`REG_NOSUB' set, then `regexec' will ignore PMATCH; otherwise, you must
-allocate it to have at least NMATCH elements. `regexec' will record
-NMATCH byte offsets in PMATCH, and set to -1 any unused elements up to
-PMATCH`[NMATCH]' - 1.
-
- EFLAGS specifies "execution flags"--namely, the two bits `REG_NOTBOL'
-and `REG_NOTEOL' (defined in `regex.h'). If you set `REG_NOTBOL', then
-the match-beginning-of-line operator (*note Match-beginning-of-line
-Operator::.) always fails to match. This lets you match against pieces
-of a line, as you would need to if, say, searching for repeated
-instances of a given pattern in a line; it would work correctly for
-patterns both with and without match-beginning-of-line operators.
-`REG_NOTEOL' works analogously for the match-end-of-line operator
-(*note Match-end-of-line Operator::.); it exists for symmetry.
-
- `regexec' tries to find a match for PREG in STRING according to the
-syntax in PREG's `syntax' field. (*Note POSIX Regular Expression
-Compiling::, for how to set it.) The function returns zero if the
-compiled pattern matches STRING and `REG_NOMATCH' (defined in
-`regex.h') if it doesn't.
-
-
-File: regex.info, Node: Reporting Errors, Next: Using Byte Offsets, Prev: POSIX Matching, Up: POSIX Regex Functions
-
-Reporting Errors
-----------------
-
- If either `regcomp' or `regexec' fail, they return a nonzero error
-code, the possibilities for which are defined in `regex.h'. *Note
-POSIX Regular Expression Compiling::, and *Note POSIX Matching::, for
-what these codes mean. To get an error string corresponding to these
-codes, you can use:
-
- size_t
- regerror (int ERRCODE,
- const regex_t *PREG,
- char *ERRBUF,
- size_t ERRBUF_SIZE)
-
-ERRCODE is an error code, PREG is the address of the pattern buffer
-which provoked the error, ERRBUF is the error buffer, and ERRBUF_SIZE
-is ERRBUF's size.
-
- `regerror' returns the size in bytes of the error string
-corresponding to ERRCODE (including its terminating null). If ERRBUF
-and ERRBUF_SIZE are nonzero, it also returns in ERRBUF the first
-ERRBUF_SIZE - 1 characters of the error string, followed by a null.
-eRRBUF_SIZE must be a nonnegative number less than or equal to the size
-in bytes of ERRBUF.
-
- You can call `regerror' with a null ERRBUF and a zero ERRBUF_SIZE to
-determine how large ERRBUF need be to accommodate `regerror''s error
-string.
-
-
-File: regex.info, Node: Using Byte Offsets, Next: Freeing POSIX Pattern Buffers, Prev: Reporting Errors, Up: POSIX Regex Functions
-
-Using Byte Offsets
-------------------
-
- In POSIX, variables of type `regmatch_t' hold analogous information,
-but are not identical to, GNU's registers (*note Using Registers::.).
-To get information about registers in POSIX, pass to `regexec' a
-nonzero PMATCH of type `regmatch_t', i.e., the address of a structure
-of this type, defined in `regex.h':
-
- typedef struct
- {
- regoff_t rm_so;
- regoff_t rm_eo;
- } regmatch_t;
-
- When reading in *Note Using Registers::, about how the matching
-function stores the information into the registers, substitute PMATCH
-for REGS, `PMATCH[I]->rm_so' for `REGS->start[I]' and
-`PMATCH[I]->rm_eo' for `REGS->end[I]'.
-
-
-File: regex.info, Node: Freeing POSIX Pattern Buffers, Prev: Using Byte Offsets, Up: POSIX Regex Functions
-
-Freeing POSIX Pattern Buffers
------------------------------
-
- To free any allocated fields of a pattern buffer, use:
-
- void
- regfree (regex_t *PREG)
-
-PREG is the pattern buffer whose allocated fields you want freed.
-`regfree' also sets PREG's `allocated' and `used' fields to zero.
-After freeing a pattern buffer, you need to again compile a regular
-expression in it (*note POSIX Regular Expression Compiling::.) before
-passing it to the matching function (*note POSIX Matching::.).
-
-
-File: regex.info, Node: BSD Regex Functions, Prev: POSIX Regex Functions, Up: Programming with Regex
-
-BSD Regex Functions
-===================
-
- If you're writing code that has to be Berkeley UNIX compatible,
-you'll need to use these functions whose interfaces are the same as
-those in Berkeley UNIX.
-
-* Menu:
-
-* BSD Regular Expression Compiling:: re_comp ()
-* BSD Searching:: re_exec ()
-
-
-File: regex.info, Node: BSD Regular Expression Compiling, Next: BSD Searching, Up: BSD Regex Functions
-
-BSD Regular Expression Compiling
---------------------------------
-
- With Berkeley UNIX, you can only search for a given regular
-expression; you can't match one. To search for it, you must first
-compile it. Before you compile it, you must indicate the regular
-expression syntax you want it compiled according to by setting the
-variable `re_syntax_options' (declared in `regex.h' to some syntax
-(*note Regular Expression Syntax::.).
-
- To compile a regular expression use:
-
- char *
- re_comp (char *REGEX)
-
-REGEX is the address of a null-terminated regular expression.
-`re_comp' uses an internal pattern buffer, so you can use only the most
-recently compiled pattern buffer. This means that if you want to use a
-given regular expression that you've already compiled--but it isn't the
-latest one you've compiled--you'll have to recompile it. If you call
-`re_comp' with the null string (*not* the empty string) as the
-argument, it doesn't change the contents of the pattern buffer.
-
- If `re_comp' successfully compiles the regular expression, it returns
-zero. If it can't compile the regular expression, it returns an error
-string. `re_comp''s error messages are identical to those of
-`re_compile_pattern' (*note GNU Regular Expression Compiling::.).
-
-
-File: regex.info, Node: BSD Searching, Prev: BSD Regular Expression Compiling, Up: BSD Regex Functions
-
-BSD Searching
--------------
-
- Searching the Berkeley UNIX way means searching in a string starting
-at its first character and trying successive positions within it to
-find a match. Once you've compiled a pattern using `re_comp' (*note
-BSD Regular Expression Compiling::.), you can ask Regex to search for
-that pattern in a string using:
-
- int
- re_exec (char *STRING)
-
-STRING is the address of the null-terminated string in which you want
-to search.
-
- `re_exec' returns either 1 for success or 0 for failure. It
-automatically uses a GNU fastmap (*note Searching with Fastmaps::.).
-
-
-File: regex.info, Node: Copying, Next: Index, Prev: Programming with Regex, Up: Top
-
-GNU GENERAL PUBLIC LICENSE
-**************************
-
- Version 2, June 1991
-
- Copyright (C) 1989, 1991 Free Software Foundation, Inc.
- 675 Mass Ave, Cambridge, MA 02139, USA
-
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-Preamble
-========
-
- The licenses for most software are designed to take away your freedom
-to share and change it. By contrast, the GNU General Public License is
-intended to guarantee your freedom to share and change free
-software--to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it in
-new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
- 1. This License applies to any program or other work which contains a
- notice placed by the copyright holder saying it may be distributed
- under the terms of this General Public License. The "Program",
- below, refers to any such program or work, and a "work based on
- the Program" means either the Program or any derivative work under
- copyright law: that is to say, a work containing the Program or a
- portion of it, either verbatim or with modifications and/or
- translated into another language. (Hereinafter, translation is
- included without limitation in the term "modification".) Each
- licensee is addressed as "you".
-
- Activities other than copying, distribution and modification are
- not covered by this License; they are outside its scope. The act
- of running the Program is not restricted, and the output from the
- Program is covered only if its contents constitute a work based on
- the Program (independent of having been made by running the
- Program). Whether that is true depends on what the Program does.
-
- 2. You may copy and distribute verbatim copies of the Program's
- source code as you receive it, in any medium, provided that you
- conspicuously and appropriately publish on each copy an appropriate
- copyright notice and disclaimer of warranty; keep intact all the
- notices that refer to this License and to the absence of any
- warranty; and give any other recipients of the Program a copy of
- this License along with the Program.
-
- You may charge a fee for the physical act of transferring a copy,
- and you may at your option offer warranty protection in exchange
- for a fee.
-
- 3. You may modify your copy or copies of the Program or any portion
- of it, thus forming a work based on the Program, and copy and
- distribute such modifications or work under the terms of Section 1
- above, provided that you also meet all of these conditions:
-
- a. You must cause the modified files to carry prominent notices
- stating that you changed the files and the date of any change.
-
- b. You must cause any work that you distribute or publish, that
- in whole or in part contains or is derived from the Program
- or any part thereof, to be licensed as a whole at no charge
- to all third parties under the terms of this License.
-
- c. If the modified program normally reads commands interactively
- when run, you must cause it, when started running for such
- interactive use in the most ordinary way, to print or display
- an announcement including an appropriate copyright notice and
- a notice that there is no warranty (or else, saying that you
- provide a warranty) and that users may redistribute the
- program under these conditions, and telling the user how to
- view a copy of this License. (Exception: if the Program
- itself is interactive but does not normally print such an
- announcement, your work based on the Program is not required
- to print an announcement.)
-
- These requirements apply to the modified work as a whole. If
- identifiable sections of that work are not derived from the
- Program, and can be reasonably considered independent and separate
- works in themselves, then this License, and its terms, do not
- apply to those sections when you distribute them as separate
- works. But when you distribute the same sections as part of a
- whole which is a work based on the Program, the distribution of
- the whole must be on the terms of this License, whose permissions
- for other licensees extend to the entire whole, and thus to each
- and every part regardless of who wrote it.
-
- Thus, it is not the intent of this section to claim rights or
- contest your rights to work written entirely by you; rather, the
- intent is to exercise the right to control the distribution of
- derivative or collective works based on the Program.
-
- In addition, mere aggregation of another work not based on the
- Program with the Program (or with a work based on the Program) on
- a volume of a storage or distribution medium does not bring the
- other work under the scope of this License.
-
- 4. You may copy and distribute the Program (or a work based on it,
- under Section 2) in object code or executable form under the terms
- of Sections 1 and 2 above provided that you also do one of the
- following:
-
- a. Accompany it with the complete corresponding machine-readable
- source code, which must be distributed under the terms of
- Sections 1 and 2 above on a medium customarily used for
- software interchange; or,
-
- b. Accompany it with a written offer, valid for at least three
- years, to give any third party, for a charge no more than your
- cost of physically performing source distribution, a complete
- machine-readable copy of the corresponding source code, to be
- distributed under the terms of Sections 1 and 2 above on a
- medium customarily used for software interchange; or,
-
- c. Accompany it with the information you received as to the offer
- to distribute corresponding source code. (This alternative is
- allowed only for noncommercial distribution and only if you
- received the program in object code or executable form with
- such an offer, in accord with Subsection b above.)
-
- The source code for a work means the preferred form of the work for
- making modifications to it. For an executable work, complete
- source code means all the source code for all modules it contains,
- plus any associated interface definition files, plus the scripts
- used to control compilation and installation of the executable.
- However, as a special exception, the source code distributed need
- not include anything that is normally distributed (in either
- source or binary form) with the major components (compiler,
- kernel, and so on) of the operating system on which the executable
- runs, unless that component itself accompanies the executable.
-
- If distribution of executable or object code is made by offering
- access to copy from a designated place, then offering equivalent
- access to copy the source code from the same place counts as
- distribution of the source code, even though third parties are not
- compelled to copy the source along with the object code.
-
- 5. You may not copy, modify, sublicense, or distribute the Program
- except as expressly provided under this License. Any attempt
- otherwise to copy, modify, sublicense or distribute the Program is
- void, and will automatically terminate your rights under this
- License. However, parties who have received copies, or rights,
- from you under this License will not have their licenses
- terminated so long as such parties remain in full compliance.
-
- 6. You are not required to accept this License, since you have not
- signed it. However, nothing else grants you permission to modify
- or distribute the Program or its derivative works. These actions
- are prohibited by law if you do not accept this License.
- Therefore, by modifying or distributing the Program (or any work
- based on the Program), you indicate your acceptance of this
- License to do so, and all its terms and conditions for copying,
- distributing or modifying the Program or works based on it.
-
- 7. Each time you redistribute the Program (or any work based on the
- Program), the recipient automatically receives a license from the
- original licensor to copy, distribute or modify the Program
- subject to these terms and conditions. You may not impose any
- further restrictions on the recipients' exercise of the rights
- granted herein. You are not responsible for enforcing compliance
- by third parties to this License.
-
- 8. If, as a consequence of a court judgment or allegation of patent
- infringement or for any other reason (not limited to patent
- issues), conditions are imposed on you (whether by court order,
- agreement or otherwise) that contradict the conditions of this
- License, they do not excuse you from the conditions of this
- License. If you cannot distribute so as to satisfy simultaneously
- your obligations under this License and any other pertinent
- obligations, then as a consequence you may not distribute the
- Program at all. For example, if a patent license would not permit
- royalty-free redistribution of the Program by all those who
- receive copies directly or indirectly through you, then the only
- way you could satisfy both it and this License would be to refrain
- entirely from distribution of the Program.
-
- If any portion of this section is held invalid or unenforceable
- under any particular circumstance, the balance of the section is
- intended to apply and the section as a whole is intended to apply
- in other circumstances.
-
- It is not the purpose of this section to induce you to infringe any
- patents or other property right claims or to contest validity of
- any such claims; this section has the sole purpose of protecting
- the integrity of the free software distribution system, which is
- implemented by public license practices. Many people have made
- generous contributions to the wide range of software distributed
- through that system in reliance on consistent application of that
- system; it is up to the author/donor to decide if he or she is
- willing to distribute software through any other system and a
- licensee cannot impose that choice.
-
- This section is intended to make thoroughly clear what is believed
- to be a consequence of the rest of this License.
-
- 9. If the distribution and/or use of the Program is restricted in
- certain countries either by patents or by copyrighted interfaces,
- the original copyright holder who places the Program under this
- License may add an explicit geographical distribution limitation
- excluding those countries, so that distribution is permitted only
- in or among countries not thus excluded. In such case, this
- License incorporates the limitation as if written in the body of
- this License.
-
- 10. The Free Software Foundation may publish revised and/or new
- versions of the General Public License from time to time. Such
- new versions will be similar in spirit to the present version, but
- may differ in detail to address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
- Program specifies a version number of this License which applies
- to it and "any later version", you have the option of following
- the terms and conditions either of that version or of any later
- version published by the Free Software Foundation. If the Program
- does not specify a version number of this License, you may choose
- any version ever published by the Free Software Foundation.
-
- 11. If you wish to incorporate parts of the Program into other free
- programs whose distribution conditions are different, write to the
- author to ask for permission. For software which is copyrighted
- by the Free Software Foundation, write to the Free Software
- Foundation; we sometimes make exceptions for this. Our decision
- will be guided by the two goals of preserving the free status of
- all derivatives of our free software and of promoting the sharing
- and reuse of software generally.
-
- NO WARRANTY
-
- 12. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
- WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
- LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
- HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
- WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
- NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
- FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
- QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
- PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
- SERVICING, REPAIR OR CORRECTION.
-
- 13. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
- WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
- MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
- LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
- INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
- INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
- DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
- OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
- OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
- ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
-
- END OF TERMS AND CONDITIONS
-
-Appendix: How to Apply These Terms to Your New Programs
-=======================================================
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these
-terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
- Copyright (C) 19YY NAME OF AUTHOR
-
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
- Also add information on how to contact you by electronic and paper
-mail.
-
- If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
- Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
- Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
- The hypothetical commands `show w' and `show c' should show the
-appropriate parts of the General Public License. Of course, the
-commands you use may be called something other than `show w' and `show
-c'; they could even be mouse-clicks or menu items--whatever suits your
-program.
-
- You should also get your employer (if you work as a programmer) or
-your school, if any, to sign a "copyright disclaimer" for the program,
-if necessary. Here is a sample; alter the names:
-
- Yoyodyne, Inc., hereby disclaims all copyright interest in the program
- `Gnomovision' (which makes passes at compilers) written by James Hacker.
-
- SIGNATURE OF TY COON, 1 April 1989
- Ty Coon, President of Vice
-
- This General Public License does not permit incorporating your
-program into proprietary programs. If your program is a subroutine
-library, you may consider it more useful to permit linking proprietary
-applications with the library. If this is what you want to do, use the
-GNU Library General Public License instead of this License.
-
-
-File: regex.info, Node: Index, Prev: Copying, Up: Top
-
-Index
-*****
-
-* Menu:
-
-* $: Match-end-of-line Operator.
-* (: Grouping Operators.
-* ): Grouping Operators.
-* *: Match-zero-or-more Operator.
-* +: Match-one-or-more Operator.
-* -: List Operators.
-* .: Match-any-character Operator.
-* :] in regex: Character Class Operators.
-* ?: Match-zero-or-one Operator.
-* {: Interval Operators.
-* }: Interval Operators.
-* [: in regex: Character Class Operators.
-* [^: List Operators.
-* [: List Operators.
-* \': Match-end-of-buffer Operator.
-* \<: Match-beginning-of-word Operator.
-* \>: Match-end-of-word Operator.
-* \{: Interval Operators.
-* \}: Interval Operators.
-* \b: Match-word-boundary Operator.
-* \B: Match-within-word Operator.
-* \s: Match-syntactic-class Operator.
-* \S: Match-not-syntactic-class Operator.
-* \w: Match-word-constituent Operator.
-* \W: Match-non-word-constituent Operator.
-* \`: Match-beginning-of-buffer Operator.
-* \: List Operators.
-* ]: List Operators.
-* ^: List Operators.
-* allocated initialization: GNU Regular Expression Compiling.
-* alternation operator: Alternation Operator.
-* alternation operator and ^: Match-beginning-of-line Operator.
-* anchoring: Anchoring Operators.
-* anchors: Match-end-of-line Operator.
-* anchors: Match-beginning-of-line Operator.
-* Awk: Predefined Syntaxes.
-* back references: Back-reference Operator.
-* backtracking: Match-zero-or-more Operator.
-* backtracking: Alternation Operator.
-* beginning-of-line operator: Match-beginning-of-line Operator.
-* bracket expression: List Operators.
-* buffer field, set by re_compile_pattern: GNU Regular Expression Compiling.
-* buffer initialization: GNU Regular Expression Compiling.
-* character classes: Character Class Operators.
-* Egrep: Predefined Syntaxes.
-* Emacs: Predefined Syntaxes.
-* end in struct re_registers: Using Registers.
-* end-of-line operator: Match-end-of-line Operator.
-* fastmap initialization: GNU Regular Expression Compiling.
-* fastmaps: Searching with Fastmaps.
-* fastmap_accurate field, set by re_compile_pattern: GNU Regular Expression Compiling.
-* Grep: Predefined Syntaxes.
-* grouping: Grouping Operators.
-* ignoring case: POSIX Regular Expression Compiling.
-* interval expression: Interval Operators.
-* matching list: List Operators.
-* matching newline: List Operators.
-* matching with GNU functions: GNU Matching.
-* newline_anchor field in pattern buffer: Match-beginning-of-line Operator.
-* nonmatching list: List Operators.
-* not_bol field in pattern buffer: Match-beginning-of-line Operator.
-* num_regs in struct re_registers: Using Registers.
-* open-group operator and ^: Match-beginning-of-line Operator.
-* or operator: Alternation Operator.
-* parenthesizing: Grouping Operators.
-* pattern buffer initialization: GNU Regular Expression Compiling.
-* pattern buffer, definition of: GNU Pattern Buffers.
-* POSIX Awk: Predefined Syntaxes.
-* range argument to re_search: GNU Searching.
-* regex.c: Overview.
-* regex.h: Overview.
-* regexp anchoring: Anchoring Operators.
-* regmatch_t: Using Byte Offsets.
-* regs_allocated: Using Registers.
-* REGS_FIXED: Using Registers.
-* REGS_REALLOCATE: Using Registers.
-* REGS_UNALLOCATED: Using Registers.
-* regular expressions, syntax of: Regular Expression Syntax.
-* REG_EXTENDED: POSIX Regular Expression Compiling.
-* REG_ICASE: POSIX Regular Expression Compiling.
-* REG_NEWLINE: POSIX Regular Expression Compiling.
-* REG_NOSUB: POSIX Regular Expression Compiling.
-* RE_BACKSLASH_ESCAPE_IN_LIST: Syntax Bits.
-* RE_BK_PLUS_QM: Syntax Bits.
-* RE_CHAR_CLASSES: Syntax Bits.
-* RE_CONTEXT_INDEP_ANCHORS: Syntax Bits.
-* RE_CONTEXT_INDEP_ANCHORS (and ^): Match-beginning-of-line Operator.
-* RE_CONTEXT_INDEP_OPS: Syntax Bits.
-* RE_CONTEXT_INVALID_OPS: Syntax Bits.
-* RE_DOT_NEWLINE: Syntax Bits.
-* RE_DOT_NOT_NULL: Syntax Bits.
-* RE_INTERVALS: Syntax Bits.
-* RE_LIMITED_OPS: Syntax Bits.
-* RE_NEWLINE_ALT: Syntax Bits.
-* RE_NO_BK_BRACES: Syntax Bits.
-* RE_NO_BK_PARENS: Syntax Bits.
-* RE_NO_BK_REFS: Syntax Bits.
-* RE_NO_BK_VBAR: Syntax Bits.
-* RE_NO_EMPTY_RANGES: Syntax Bits.
-* re_nsub field, set by re_compile_pattern: GNU Regular Expression Compiling.
-* re_pattern_buffer definition: GNU Pattern Buffers.
-* re_registers: Using Registers.
-* re_syntax_options initialization: GNU Regular Expression Compiling.
-* RE_UNMATCHED_RIGHT_PAREN_ORD: Syntax Bits.
-* searching with GNU functions: GNU Searching.
-* start argument to re_search: GNU Searching.
-* start in struct re_registers: Using Registers.
-* struct re_pattern_buffer definition: GNU Pattern Buffers.
-* subexpressions: Grouping Operators.
-* syntax field, set by re_compile_pattern: GNU Regular Expression Compiling.
-* syntax bits: Syntax Bits.
-* syntax initialization: GNU Regular Expression Compiling.
-* syntax of regular expressions: Regular Expression Syntax.
-* translate initialization: GNU Regular Expression Compiling.
-* used field, set by re_compile_pattern: GNU Regular Expression Compiling.
-* word boundaries, matching: Match-word-boundary Operator.
-* \: The Backslash Character.
-* \(: Grouping Operators.
-* \): Grouping Operators.
-* \|: Alternation Operator.
-* ^: Match-beginning-of-line Operator.
-* |: Alternation Operator.
-
-
-
-Tag Table:
-Node: Top1064
-Node: Overview4562
-Node: Regular Expression Syntax6746
-Node: Syntax Bits7916
-Node: Predefined Syntaxes14018
-Node: Collating Elements vs. Characters17872
-Node: The Backslash Character18835
-Node: Common Operators21992
-Node: Match-self Operator23445
-Node: Match-any-character Operator23941
-Node: Concatenation Operator24520
-Node: Repetition Operators25017
-Node: Match-zero-or-more Operator25436
-Node: Match-one-or-more Operator27483
-Node: Match-zero-or-one Operator28341
-Node: Interval Operators29196
-Node: Alternation Operator30991
-Node: List Operators32489
-Node: Character Class Operators35272
-Node: Range Operator36901
-Node: Grouping Operators38930
-Node: Back-reference Operator40251
-Node: Anchoring Operators43073
-Node: Match-beginning-of-line Operator43447
-Node: Match-end-of-line Operator44779
-Node: GNU Operators45518
-Node: Word Operators45767
-Node: Non-Emacs Syntax Tables46391
-Node: Match-word-boundary Operator47465
-Node: Match-within-word Operator47858
-Node: Match-beginning-of-word Operator48255
-Node: Match-end-of-word Operator48588
-Node: Match-word-constituent Operator48908
-Node: Match-non-word-constituent Operator49234
-Node: Buffer Operators49545
-Node: Match-beginning-of-buffer Operator49952
-Node: Match-end-of-buffer Operator50264
-Node: GNU Emacs Operators50558
-Node: Syntactic Class Operators50901
-Node: Emacs Syntax Tables51307
-Node: Match-syntactic-class Operator51963
-Node: Match-not-syntactic-class Operator52560
-Node: What Gets Matched?53150
-Node: Programming with Regex53799
-Node: GNU Regex Functions54237
-Node: GNU Pattern Buffers55078
-Node: GNU Regular Expression Compiling58303
-Node: GNU Matching61181
-Node: GNU Searching63101
-Node: Matching/Searching with Split Data64913
-Node: Searching with Fastmaps66369
-Node: GNU Translate Tables68921
-Node: Using Registers70892
-Node: Freeing GNU Pattern Buffers77000
-Node: POSIX Regex Functions77593
-Node: POSIX Pattern Buffers78266
-Node: POSIX Regular Expression Compiling78709
-Node: POSIX Matching82836
-Node: Reporting Errors84791
-Node: Using Byte Offsets86048
-Node: Freeing POSIX Pattern Buffers86861
-Node: BSD Regex Functions87467
-Node: BSD Regular Expression Compiling87886
-Node: BSD Searching89258
-Node: Copying89960
-Node: Index109122
-
-End Tag Table
diff --git a/gnu/lib/libregex/doc/regex.texi b/gnu/lib/libregex/doc/regex.texi
deleted file mode 100644
index d93953e..0000000
--- a/gnu/lib/libregex/doc/regex.texi
+++ /dev/null
@@ -1,3138 +0,0 @@
-\input texinfo
-@c %**start of header
-@setfilename regex.info
-@settitle Regex
-@c %**end of header
-
-@c \\{fill-paragraph} works better (for me, anyway) if the text in the
-@c source file isn't indented.
-@paragraphindent 2
-
-@c Define a new index for our magic constants.
-@defcodeindex cn
-
-@c Put everything in one index (arbitrarily chosen to be the concept index).
-@syncodeindex cn cp
-@syncodeindex ky cp
-@syncodeindex pg cp
-@syncodeindex tp cp
-@syncodeindex vr cp
-
-@c Here is what we use in the Info `dir' file:
-@c * Regex: (regex). Regular expression library.
-
-
-@ifinfo
-This file documents the GNU regular expression library.
-
-Copyright (C) 1992, 1993 Free Software Foundation, Inc.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-@ignore
-Permission is granted to process this file through TeX and print the
-results, provided the printed document carries a copying permission
-notice identical to this one except for the removal of this paragraph
-(this paragraph not being relevant to the printed manual).
-@end ignore
-
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided also that the
-section entitled ``GNU General Public License'' is included exactly as
-in the original, and provided that the entire resulting derived work is
-distributed under the terms of a permission notice identical to this one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that the section entitled ``GNU General Public License'' may be
-included in a translation approved by the Free Software Foundation
-instead of in the original English.
-@end ifinfo
-
-
-@titlepage
-
-@title Regex
-@subtitle edition 0.12a
-@subtitle 19 September 1992
-@author Kathryn A. Hargreaves
-@author Karl Berry
-
-@page
-
-@vskip 0pt plus 1filll
-Copyright @copyright{} 1992 Free Software Foundation.
-
-Permission is granted to make and distribute verbatim copies of this
-manual provided the copyright notice and this permission notice are
-preserved on all copies.
-
-Permission is granted to copy and distribute modified versions of this
-manual under the conditions for verbatim copying, provided also that the
-section entitled ``GNU General Public License'' is included exactly as
-in the original, and provided that the entire resulting derived work is
-distributed under the terms of a permission notice identical to this
-one.
-
-Permission is granted to copy and distribute translations of this manual
-into another language, under the above conditions for modified versions,
-except that the section entitled ``GNU General Public License'' may be
-included in a translation approved by the Free Software Foundation
-instead of in the original English.
-
-@end titlepage
-
-
-@ifinfo
-@node Top, Overview, (dir), (dir)
-@top Regular Expression Library
-
-This manual documents how to program with the GNU regular expression
-library. This is edition 0.12a of the manual, 19 September 1992.
-
-The first part of this master menu lists the major nodes in this Info
-document, including the index. The rest of the menu lists all the
-lower level nodes in the document.
-
-@menu
-* Overview::
-* Regular Expression Syntax::
-* Common Operators::
-* GNU Operators::
-* GNU Emacs Operators::
-* What Gets Matched?::
-* Programming with Regex::
-* Copying:: Copying and sharing Regex.
-* Index:: General index.
- --- The Detailed Node Listing ---
-
-Regular Expression Syntax
-
-* Syntax Bits::
-* Predefined Syntaxes::
-* Collating Elements vs. Characters::
-* The Backslash Character::
-
-Common Operators
-
-* Match-self Operator:: Ordinary characters.
-* Match-any-character Operator:: .
-* Concatenation Operator:: Juxtaposition.
-* Repetition Operators:: * + ? @{@}
-* Alternation Operator:: |
-* List Operators:: [...] [^...]
-* Grouping Operators:: (...)
-* Back-reference Operator:: \digit
-* Anchoring Operators:: ^ $
-
-Repetition Operators
-
-* Match-zero-or-more Operator:: *
-* Match-one-or-more Operator:: +
-* Match-zero-or-one Operator:: ?
-* Interval Operators:: @{@}
-
-List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})
-
-* Character Class Operators:: [:class:]
-* Range Operator:: start-end
-
-Anchoring Operators
-
-* Match-beginning-of-line Operator:: ^
-* Match-end-of-line Operator:: $
-
-GNU Operators
-
-* Word Operators::
-* Buffer Operators::
-
-Word Operators
-
-* Non-Emacs Syntax Tables::
-* Match-word-boundary Operator:: \b
-* Match-within-word Operator:: \B
-* Match-beginning-of-word Operator:: \<
-* Match-end-of-word Operator:: \>
-* Match-word-constituent Operator:: \w
-* Match-non-word-constituent Operator:: \W
-
-Buffer Operators
-
-* Match-beginning-of-buffer Operator:: \`
-* Match-end-of-buffer Operator:: \'
-
-GNU Emacs Operators
-
-* Syntactic Class Operators::
-
-Syntactic Class Operators
-
-* Emacs Syntax Tables::
-* Match-syntactic-class Operator:: \sCLASS
-* Match-not-syntactic-class Operator:: \SCLASS
-
-Programming with Regex
-
-* GNU Regex Functions::
-* POSIX Regex Functions::
-* BSD Regex Functions::
-
-GNU Regex Functions
-
-* GNU Pattern Buffers:: The re_pattern_buffer type.
-* GNU Regular Expression Compiling:: re_compile_pattern ()
-* GNU Matching:: re_match ()
-* GNU Searching:: re_search ()
-* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
-* Searching with Fastmaps:: re_compile_fastmap ()
-* GNU Translate Tables:: The `translate' field.
-* Using Registers:: The re_registers type and related fns.
-* Freeing GNU Pattern Buffers:: regfree ()
-
-POSIX Regex Functions
-
-* POSIX Pattern Buffers:: The regex_t type.
-* POSIX Regular Expression Compiling:: regcomp ()
-* POSIX Matching:: regexec ()
-* Reporting Errors:: regerror ()
-* Using Byte Offsets:: The regmatch_t type.
-* Freeing POSIX Pattern Buffers:: regfree ()
-
-BSD Regex Functions
-
-* BSD Regular Expression Compiling:: re_comp ()
-* BSD Searching:: re_exec ()
-@end menu
-@end ifinfo
-@node Overview, Regular Expression Syntax, Top, Top
-@chapter Overview
-
-A @dfn{regular expression} (or @dfn{regexp}, or @dfn{pattern}) is a text
-string that describes some (mathematical) set of strings. A regexp
-@var{r} @dfn{matches} a string @var{s} if @var{s} is in the set of
-strings described by @var{r}.
-
-Using the Regex library, you can:
-
-@itemize @bullet
-
-@item
-see if a string matches a specified pattern as a whole, and
-
-@item
-search within a string for a substring matching a specified pattern.
-
-@end itemize
-
-Some regular expressions match only one string, i.e., the set they
-describe has only one member. For example, the regular expression
-@samp{foo} matches the string @samp{foo} and no others. Other regular
-expressions match more than one string, i.e., the set they describe has
-more than one member. For example, the regular expression @samp{f*}
-matches the set of strings made up of any number (including zero) of
-@samp{f}s. As you can see, some characters in regular expressions match
-themselves (such as @samp{f}) and some don't (such as @samp{*}); the
-ones that don't match themselves instead let you specify patterns that
-describe many different strings.
-
-To either match or search for a regular expression with the Regex
-library functions, you must first compile it with a Regex pattern
-compiling function. A @dfn{compiled pattern} is a regular expression
-converted to the internal format used by the library functions. Once
-you've compiled a pattern, you can use it for matching or searching any
-number of times.
-
-The Regex library consists of two source files: @file{regex.h} and
-@file{regex.c}.
-@pindex regex.h
-@pindex regex.c
-Regex provides three groups of functions with which you can operate on
-regular expressions. One group---the @sc{gnu} group---is more powerful
-but not completely compatible with the other two, namely the @sc{posix}
-and Berkeley @sc{unix} groups; its interface was designed specifically
-for @sc{gnu}. The other groups have the same interfaces as do the
-regular expression functions in @sc{posix} and Berkeley
-@sc{unix}.
-
-We wrote this chapter with programmers in mind, not users of
-programs---such as Emacs---that use Regex. We describe the Regex
-library in its entirety, not how to write regular expressions that a
-particular program understands.
-
-
-@node Regular Expression Syntax, Common Operators, Overview, Top
-@chapter Regular Expression Syntax
-
-@cindex regular expressions, syntax of
-@cindex syntax of regular expressions
-
-@dfn{Characters} are things you can type. @dfn{Operators} are things in
-a regular expression that match one or more characters. You compose
-regular expressions from operators, which in turn you specify using one
-or more characters.
-
-Most characters represent what we call the match-self operator, i.e.,
-they match themselves; we call these characters @dfn{ordinary}. Other
-characters represent either all or parts of fancier operators; e.g.,
-@samp{.} represents what we call the match-any-character operator
-(which, no surprise, matches (almost) any character); we call these
-characters @dfn{special}. Two different things determine what
-characters represent what operators:
-
-@enumerate
-@item
-the regular expression syntax your program has told the Regex library to
-recognize, and
-
-@item
-the context of the character in the regular expression.
-@end enumerate
-
-In the following sections, we describe these things in more detail.
-
-@menu
-* Syntax Bits::
-* Predefined Syntaxes::
-* Collating Elements vs. Characters::
-* The Backslash Character::
-@end menu
-
-
-@node Syntax Bits, Predefined Syntaxes, , Regular Expression Syntax
-@section Syntax Bits
-
-@cindex syntax bits
-
-In any particular syntax for regular expressions, some characters are
-always special, others are sometimes special, and others are never
-special. The particular syntax that Regex recognizes for a given
-regular expression depends on the value in the @code{syntax} field of
-the pattern buffer of that regular expression.
-
-You get a pattern buffer by compiling a regular expression. @xref{GNU
-Pattern Buffers}, and @ref{POSIX Pattern Buffers}, for more information
-on pattern buffers. @xref{GNU Regular Expression Compiling}, @ref{POSIX
-Regular Expression Compiling}, and @ref{BSD Regular Expression
-Compiling}, for more information on compiling.
-
-Regex considers the value of the @code{syntax} field to be a collection
-of bits; we refer to these bits as @dfn{syntax bits}. In most cases,
-they affect what characters represent what operators. We describe the
-meanings of the operators to which we refer in @ref{Common Operators},
-@ref{GNU Operators}, and @ref{GNU Emacs Operators}.
-
-For reference, here is the complete list of syntax bits, in alphabetical
-order:
-
-@table @code
-
-@cnindex RE_BACKSLASH_ESCAPE_IN_LIST
-@item RE_BACKSLASH_ESCAPE_IN_LISTS
-If this bit is set, then @samp{\} inside a list (@pxref{List Operators}
-quotes (makes ordinary, if it's special) the following character; if
-this bit isn't set, then @samp{\} is an ordinary character inside lists.
-(@xref{The Backslash Character}, for what `\' does outside of lists.)
-
-@cnindex RE_BK_PLUS_QM
-@item RE_BK_PLUS_QM
-If this bit is set, then @samp{\+} represents the match-one-or-more
-operator and @samp{\?} represents the match-zero-or-more operator; if
-this bit isn't set, then @samp{+} represents the match-one-or-more
-operator and @samp{?} represents the match-zero-or-one operator. This
-bit is irrelevant if @code{RE_LIMITED_OPS} is set.
-
-@cnindex RE_CHAR_CLASSES
-@item RE_CHAR_CLASSES
-If this bit is set, then you can use character classes in lists; if this
-bit isn't set, then you can't.
-
-@cnindex RE_CONTEXT_INDEP_ANCHORS
-@item RE_CONTEXT_INDEP_ANCHORS
-If this bit is set, then @samp{^} and @samp{$} are special anywhere outside
-a list; if this bit isn't set, then these characters are special only in
-certain contexts. @xref{Match-beginning-of-line Operator}, and
-@ref{Match-end-of-line Operator}.
-
-@cnindex RE_CONTEXT_INDEP_OPS
-@item RE_CONTEXT_INDEP_OPS
-If this bit is set, then certain characters are special anywhere outside
-a list; if this bit isn't set, then those characters are special only in
-some contexts and are ordinary elsewhere. Specifically, if this bit
-isn't set then @samp{*}, and (if the syntax bit @code{RE_LIMITED_OPS}
-isn't set) @samp{+} and @samp{?} (or @samp{\+} and @samp{\?}, depending
-on the syntax bit @code{RE_BK_PLUS_QM}) represent repetition operators
-only if they're not first in a regular expression or just after an
-open-group or alternation operator. The same holds for @samp{@{} (or
-@samp{\@{}, depending on the syntax bit @code{RE_NO_BK_BRACES}) if
-it is the beginning of a valid interval and the syntax bit
-@code{RE_INTERVALS} is set.
-
-@cnindex RE_CONTEXT_INVALID_OPS
-@item RE_CONTEXT_INVALID_OPS
-If this bit is set, then repetition and alternation operators can't be
-in certain positions within a regular expression. Specifically, the
-regular expression is invalid if it has:
-
-@itemize @bullet
-
-@item
-a repetition operator first in the regular expression or just after a
-match-beginning-of-line, open-group, or alternation operator; or
-
-@item
-an alternation operator first or last in the regular expression, just
-before a match-end-of-line operator, or just after an alternation or
-open-group operator.
-
-@end itemize
-
-If this bit isn't set, then you can put the characters representing the
-repetition and alternation characters anywhere in a regular expression.
-Whether or not they will in fact be operators in certain positions
-depends on other syntax bits.
-
-@cnindex RE_DOT_NEWLINE
-@item RE_DOT_NEWLINE
-If this bit is set, then the match-any-character operator matches
-a newline; if this bit isn't set, then it doesn't.
-
-@cnindex RE_DOT_NOT_NULL
-@item RE_DOT_NOT_NULL
-If this bit is set, then the match-any-character operator doesn't match
-a null character; if this bit isn't set, then it does.
-
-@cnindex RE_INTERVALS
-@item RE_INTERVALS
-If this bit is set, then Regex recognizes interval operators; if this bit
-isn't set, then it doesn't.
-
-@cnindex RE_LIMITED_OPS
-@item RE_LIMITED_OPS
-If this bit is set, then Regex doesn't recognize the match-one-or-more,
-match-zero-or-one or alternation operators; if this bit isn't set, then
-it does.
-
-@cnindex RE_NEWLINE_ALT
-@item RE_NEWLINE_ALT
-If this bit is set, then newline represents the alternation operator; if
-this bit isn't set, then newline is ordinary.
-
-@cnindex RE_NO_BK_BRACES
-@item RE_NO_BK_BRACES
-If this bit is set, then @samp{@{} represents the open-interval operator
-and @samp{@}} represents the close-interval operator; if this bit isn't
-set, then @samp{\@{} represents the open-interval operator and
-@samp{\@}} represents the close-interval operator. This bit is relevant
-only if @code{RE_INTERVALS} is set.
-
-@cnindex RE_NO_BK_PARENS
-@item RE_NO_BK_PARENS
-If this bit is set, then @samp{(} represents the open-group operator and
-@samp{)} represents the close-group operator; if this bit isn't set, then
-@samp{\(} represents the open-group operator and @samp{\)} represents
-the close-group operator.
-
-@cnindex RE_NO_BK_REFS
-@item RE_NO_BK_REFS
-If this bit is set, then Regex doesn't recognize @samp{\}@var{digit} as
-the back reference operator; if this bit isn't set, then it does.
-
-@cnindex RE_NO_BK_VBAR
-@item RE_NO_BK_VBAR
-If this bit is set, then @samp{|} represents the alternation operator;
-if this bit isn't set, then @samp{\|} represents the alternation
-operator. This bit is irrelevant if @code{RE_LIMITED_OPS} is set.
-
-@cnindex RE_NO_EMPTY_RANGES
-@item RE_NO_EMPTY_RANGES
-If this bit is set, then a regular expression with a range whose ending
-point collates lower than its starting point is invalid; if this bit
-isn't set, then Regex considers such a range to be empty.
-
-@cnindex RE_UNMATCHED_RIGHT_PAREN_ORD
-@item RE_UNMATCHED_RIGHT_PAREN_ORD
-If this bit is set and the regular expression has no matching open-group
-operator, then Regex considers what would otherwise be a close-group
-operator (based on how @code{RE_NO_BK_PARENS} is set) to match @samp{)}.
-
-@end table
-
-
-@node Predefined Syntaxes, Collating Elements vs. Characters, Syntax Bits, Regular Expression Syntax
-@section Predefined Syntaxes
-
-If you're programming with Regex, you can set a pattern buffer's
-(@pxref{GNU Pattern Buffers}, and @ref{POSIX Pattern Buffers})
-@code{syntax} field either to an arbitrary combination of syntax bits
-(@pxref{Syntax Bits}) or else to the configurations defined by Regex.
-These configurations define the syntaxes used by certain
-programs---@sc{gnu} Emacs,
-@cindex Emacs
-@sc{posix} Awk,
-@cindex POSIX Awk
-traditional Awk,
-@cindex Awk
-Grep,
-@cindex Grep
-@cindex Egrep
-Egrep---in addition to syntaxes for @sc{posix} basic and extended
-regular expressions.
-
-The predefined syntaxes--taken directly from @file{regex.h}---are:
-
-@example
-#define RE_SYNTAX_EMACS 0
-
-#define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
-#define RE_SYNTAX_POSIX_AWK \
- (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
-
-#define RE_SYNTAX_GREP \
- (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
- | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
- | RE_NEWLINE_ALT)
-
-#define RE_SYNTAX_EGREP \
- (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
- | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
-
-#define RE_SYNTAX_POSIX_EGREP \
- (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
-
-/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
-#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
-
-#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
-
-/* Syntax bits common to both basic and extended POSIX regex syntax. */
-#define _RE_SYNTAX_POSIX_COMMON \
- (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NO_EMPTY_RANGES)
-
-#define RE_SYNTAX_POSIX_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
-
-/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
- RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
- isn't minimal, since other operators, such as \`, aren't disabled. */
-#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
- (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
-
-#define RE_SYNTAX_POSIX_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
- | RE_UNMATCHED_RIGHT_PAREN_ORD)
-
-/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
- replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
-#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
- (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS \
- | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
-@end example
-
-@node Collating Elements vs. Characters, The Backslash Character, Predefined Syntaxes, Regular Expression Syntax
-@section Collating Elements vs.@: Characters
-
-@sc{posix} generalizes the notion of a character to that of a
-collating element. It defines a @dfn{collating element} to be ``a
-sequence of one or more bytes defined in the current collating sequence
-as a unit of collation.''
-
-This generalizes the notion of a character in
-two ways. First, a single character can map into two or more collating
-elements. For example, the German
-@tex
-`\ss'
-@end tex
-@ifinfo
-``es-zet''
-@end ifinfo
-collates as the collating element @samp{s} followed by another collating
-element @samp{s}. Second, two or more characters can map into one
-collating element. For example, the Spanish @samp{ll} collates after
-@samp{l} and before @samp{m}.
-
-Since @sc{posix}'s ``collating element'' preserves the essential idea of
-a ``character,'' we use the latter, more familiar, term in this document.
-
-@node The Backslash Character, , Collating Elements vs. Characters, Regular Expression Syntax
-@section The Backslash Character
-
-@cindex \
-The @samp{\} character has one of four different meanings, depending on
-the context in which you use it and what syntax bits are set
-(@pxref{Syntax Bits}). It can: 1) stand for itself, 2) quote the next
-character, 3) introduce an operator, or 4) do nothing.
-
-@enumerate
-@item
-It stands for itself inside a list
-(@pxref{List Operators}) if the syntax bit
-@code{RE_BACKSLASH_ESCAPE_IN_LISTS} is not set. For example, @samp{[\]}
-would match @samp{\}.
-
-@item
-It quotes (makes ordinary, if it's special) the next character when you
-use it either:
-
-@itemize @bullet
-@item
-outside a list,@footnote{Sometimes
-you don't have to explicitly quote special characters to make
-them ordinary. For instance, most characters lose any special meaning
-inside a list (@pxref{List Operators}). In addition, if the syntax bits
-@code{RE_CONTEXT_INVALID_OPS} and @code{RE_CONTEXT_INDEP_OPS}
-aren't set, then (for historical reasons) the matcher considers special
-characters ordinary if they are in contexts where the operations they
-represent make no sense; for example, then the match-zero-or-more
-operator (represented by @samp{*}) matches itself in the regular
-expression @samp{*foo} because there is no preceding expression on which
-it can operate. It is poor practice, however, to depend on this
-behavior; if you want a special character to be ordinary outside a list,
-it's better to always quote it, regardless.} or
-
-@item
-inside a list and the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is set.
-
-@end itemize
-
-@item
-It introduces an operator when followed by certain ordinary
-characters---sometimes only when certain syntax bits are set. See the
-cases @code{RE_BK_PLUS_QM}, @code{RE_NO_BK_BRACES}, @code{RE_NO_BK_VAR},
-@code{RE_NO_BK_PARENS}, @code{RE_NO_BK_REF} in @ref{Syntax Bits}. Also:
-
-@itemize @bullet
-@item
-@samp{\b} represents the match-word-boundary operator
-(@pxref{Match-word-boundary Operator}).
-
-@item
-@samp{\B} represents the match-within-word operator
-(@pxref{Match-within-word Operator}).
-
-@item
-@samp{\<} represents the match-beginning-of-word operator @*
-(@pxref{Match-beginning-of-word Operator}).
-
-@item
-@samp{\>} represents the match-end-of-word operator
-(@pxref{Match-end-of-word Operator}).
-
-@item
-@samp{\w} represents the match-word-constituent operator
-(@pxref{Match-word-constituent Operator}).
-
-@item
-@samp{\W} represents the match-non-word-constituent operator
-(@pxref{Match-non-word-constituent Operator}).
-
-@item
-@samp{\`} represents the match-beginning-of-buffer
-operator and @samp{\'} represents the match-end-of-buffer operator
-(@pxref{Buffer Operators}).
-
-@item
-If Regex was compiled with the C preprocessor symbol @code{emacs}
-defined, then @samp{\s@var{class}} represents the match-syntactic-class
-operator and @samp{\S@var{class}} represents the
-match-not-syntactic-class operator (@pxref{Syntactic Class Operators}).
-
-@end itemize
-
-@item
-In all other cases, Regex ignores @samp{\}. For example,
-@samp{\n} matches @samp{n}.
-
-@end enumerate
-
-@node Common Operators, GNU Operators, Regular Expression Syntax, Top
-@chapter Common Operators
-
-You compose regular expressions from operators. In the following
-sections, we describe the regular expression operators specified by
-@sc{posix}; @sc{gnu} also uses these. Most operators have more than one
-representation as characters. @xref{Regular Expression Syntax}, for
-what characters represent what operators under what circumstances.
-
-For most operators that can be represented in two ways, one
-representation is a single character and the other is that character
-preceded by @samp{\}. For example, either @samp{(} or @samp{\(}
-represents the open-group operator. Which one does depends on the
-setting of a syntax bit, in this case @code{RE_NO_BK_PARENS}. Why is
-this so? Historical reasons dictate some of the varying
-representations, while @sc{posix} dictates others.
-
-Finally, almost all characters lose any special meaning inside a list
-(@pxref{List Operators}).
-
-@menu
-* Match-self Operator:: Ordinary characters.
-* Match-any-character Operator:: .
-* Concatenation Operator:: Juxtaposition.
-* Repetition Operators:: * + ? @{@}
-* Alternation Operator:: |
-* List Operators:: [...] [^...]
-* Grouping Operators:: (...)
-* Back-reference Operator:: \digit
-* Anchoring Operators:: ^ $
-@end menu
-
-@node Match-self Operator, Match-any-character Operator, , Common Operators
-@section The Match-self Operator (@var{ordinary character})
-
-This operator matches the character itself. All ordinary characters
-(@pxref{Regular Expression Syntax}) represent this operator. For
-example, @samp{f} is always an ordinary character, so the regular
-expression @samp{f} matches only the string @samp{f}. In
-particular, it does @emph{not} match the string @samp{ff}.
-
-@node Match-any-character Operator, Concatenation Operator, Match-self Operator, Common Operators
-@section The Match-any-character Operator (@code{.})
-
-@cindex @samp{.}
-
-This operator matches any single printing or nonprinting character
-except it won't match a:
-
-@table @asis
-@item newline
-if the syntax bit @code{RE_DOT_NEWLINE} isn't set.
-
-@item null
-if the syntax bit @code{RE_DOT_NOT_NULL} is set.
-
-@end table
-
-The @samp{.} (period) character represents this operator. For example,
-@samp{a.b} matches any three-character string beginning with @samp{a}
-and ending with @samp{b}.
-
-@node Concatenation Operator, Repetition Operators, Match-any-character Operator, Common Operators
-@section The Concatenation Operator
-
-This operator concatenates two regular expressions @var{a} and @var{b}.
-No character represents this operator; you simply put @var{b} after
-@var{a}. The result is a regular expression that will match a string if
-@var{a} matches its first part and @var{b} matches the rest. For
-example, @samp{xy} (two match-self operators) matches @samp{xy}.
-
-@node Repetition Operators, Alternation Operator, Concatenation Operator, Common Operators
-@section Repetition Operators
-
-Repetition operators repeat the preceding regular expression a specified
-number of times.
-
-@menu
-* Match-zero-or-more Operator:: *
-* Match-one-or-more Operator:: +
-* Match-zero-or-one Operator:: ?
-* Interval Operators:: @{@}
-@end menu
-
-@node Match-zero-or-more Operator, Match-one-or-more Operator, , Repetition Operators
-@subsection The Match-zero-or-more Operator (@code{*})
-
-@cindex @samp{*}
-
-This operator repeats the smallest possible preceding regular expression
-as many times as necessary (including zero) to match the pattern.
-@samp{*} represents this operator. For example, @samp{o*}
-matches any string made up of zero or more @samp{o}s. Since this
-operator operates on the smallest preceding regular expression,
-@samp{fo*} has a repeating @samp{o}, not a repeating @samp{fo}. So,
-@samp{fo*} matches @samp{f}, @samp{fo}, @samp{foo}, and so on.
-
-Since the match-zero-or-more operator is a suffix operator, it may be
-useless as such when no regular expression precedes it. This is the
-case when it:
-
-@itemize @bullet
-@item
-is first in a regular expression, or
-
-@item
-follows a match-beginning-of-line, open-group, or alternation
-operator.
-
-@end itemize
-
-@noindent
-Three different things can happen in these cases:
-
-@enumerate
-@item
-If the syntax bit @code{RE_CONTEXT_INVALID_OPS} is set, then the
-regular expression is invalid.
-
-@item
-If @code{RE_CONTEXT_INVALID_OPS} isn't set, but
-@code{RE_CONTEXT_INDEP_OPS} is, then @samp{*} represents the
-match-zero-or-more operator (which then operates on the empty string).
-
-@item
-Otherwise, @samp{*} is ordinary.
-
-@end enumerate
-
-@cindex backtracking
-The matcher processes a match-zero-or-more operator by first matching as
-many repetitions of the smallest preceding regular expression as it can.
-Then it continues to match the rest of the pattern.
-
-If it can't match the rest of the pattern, it backtracks (as many times
-as necessary), each time discarding one of the matches until it can
-either match the entire pattern or be certain that it cannot get a
-match. For example, when matching @samp{ca*ar} against @samp{caaar},
-the matcher first matches all three @samp{a}s of the string with the
-@samp{a*} of the regular expression. However, it cannot then match the
-final @samp{ar} of the regular expression against the final @samp{r} of
-the string. So it backtracks, discarding the match of the last @samp{a}
-in the string. It can then match the remaining @samp{ar}.
-
-
-@node Match-one-or-more Operator, Match-zero-or-one Operator, Match-zero-or-more Operator, Repetition Operators
-@subsection The Match-one-or-more Operator (@code{+} or @code{\+})
-
-@cindex @samp{+}
-
-If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't recognize
-this operator. Otherwise, if the syntax bit @code{RE_BK_PLUS_QM} isn't
-set, then @samp{+} represents this operator; if it is, then @samp{\+}
-does.
-
-This operator is similar to the match-zero-or-more operator except that
-it repeats the preceding regular expression at least once;
-@pxref{Match-zero-or-more Operator}, for what it operates on, how some
-syntax bits affect it, and how Regex backtracks to match it.
-
-For example, supposing that @samp{+} represents the match-one-or-more
-operator; then @samp{ca+r} matches, e.g., @samp{car} and
-@samp{caaaar}, but not @samp{cr}.
-
-@node Match-zero-or-one Operator, Interval Operators, Match-one-or-more Operator, Repetition Operators
-@subsection The Match-zero-or-one Operator (@code{?} or @code{\?})
-@cindex @samp{?}
-
-If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't
-recognize this operator. Otherwise, if the syntax bit
-@code{RE_BK_PLUS_QM} isn't set, then @samp{?} represents this operator;
-if it is, then @samp{\?} does.
-
-This operator is similar to the match-zero-or-more operator except that
-it repeats the preceding regular expression once or not at all;
-@pxref{Match-zero-or-more Operator}, to see what it operates on, how
-some syntax bits affect it, and how Regex backtracks to match it.
-
-For example, supposing that @samp{?} represents the match-zero-or-one
-operator; then @samp{ca?r} matches both @samp{car} and @samp{cr}, but
-nothing else.
-
-@node Interval Operators, , Match-zero-or-one Operator, Repetition Operators
-@subsection Interval Operators (@code{@{} @dots{} @code{@}} or @code{\@{} @dots{} @code{\@}})
-
-@cindex interval expression
-@cindex @samp{@{}
-@cindex @samp{@}}
-@cindex @samp{\@{}
-@cindex @samp{\@}}
-
-If the syntax bit @code{RE_INTERVALS} is set, then Regex recognizes
-@dfn{interval expressions}. They repeat the smallest possible preceding
-regular expression a specified number of times.
-
-If the syntax bit @code{RE_NO_BK_BRACES} is set, @samp{@{} represents
-the @dfn{open-interval operator} and @samp{@}} represents the
-@dfn{close-interval operator} ; otherwise, @samp{\@{} and @samp{\@}} do.
-
-Specifically, supposing that @samp{@{} and @samp{@}} represent the
-open-interval and close-interval operators; then:
-
-@table @code
-@item @{@var{count}@}
-matches exactly @var{count} occurrences of the preceding regular
-expression.
-
-@item @{@var{min,}@}
-matches @var{min} or more occurrences of the preceding regular
-expression.
-
-@item @{@var{min, max}@}
-matches at least @var{min} but no more than @var{max} occurrences of
-the preceding regular expression.
-
-@end table
-
-The interval expression (but not necessarily the regular expression that
-contains it) is invalid if:
-
-@itemize @bullet
-@item
-@var{min} is greater than @var{max}, or
-
-@item
-any of @var{count}, @var{min}, or @var{max} are outside the range
-zero to @code{RE_DUP_MAX} (which symbol @file{regex.h}
-defines).
-
-@end itemize
-
-If the interval expression is invalid and the syntax bit
-@code{RE_NO_BK_BRACES} is set, then Regex considers all the
-characters in the would-be interval to be ordinary. If that bit
-isn't set, then the regular expression is invalid.
-
-If the interval expression is valid but there is no preceding regular
-expression on which to operate, then if the syntax bit
-@code{RE_CONTEXT_INVALID_OPS} is set, the regular expression is invalid.
-If that bit isn't set, then Regex considers all the characters---other
-than backslashes, which it ignores---in the would-be interval to be
-ordinary.
-
-
-@node Alternation Operator, List Operators, Repetition Operators, Common Operators
-@section The Alternation Operator (@code{|} or @code{\|})
-
-@kindex |
-@kindex \|
-@cindex alternation operator
-@cindex or operator
-
-If the syntax bit @code{RE_LIMITED_OPS} is set, then Regex doesn't
-recognize this operator. Otherwise, if the syntax bit
-@code{RE_NO_BK_VBAR} is set, then @samp{|} represents this operator;
-otherwise, @samp{\|} does.
-
-Alternatives match one of a choice of regular expressions:
-if you put the character(s) representing the alternation operator between
-any two regular expressions @var{a} and @var{b}, the result matches
-the union of the strings that @var{a} and @var{b} match. For
-example, supposing that @samp{|} is the alternation operator, then
-@samp{foo|bar|quux} would match any of @samp{foo}, @samp{bar} or
-@samp{quux}.
-
-@ignore
-@c Nobody needs to disallow empty alternatives any more.
-If the syntax bit @code{RE_NO_EMPTY_ALTS} is set, then if either of the regular
-expressions @var{a} or @var{b} is empty, the
-regular expression is invalid. More precisely, if this syntax bit is
-set, then the alternation operator can't:
-
-@itemize @bullet
-@item
-be first or last in a regular expression;
-
-@item
-follow either another alternation operator or an open-group operator
-(@pxref{Grouping Operators}); or
-
-@item
-precede a close-group operator.
-
-@end itemize
-
-@noindent
-For example, supposing @samp{(} and @samp{)} represent the open and
-close-group operators, then @samp{|foo}, @samp{foo|}, @samp{foo||bar},
-@samp{foo(|bar)}, and @samp{(foo|)bar} would all be invalid.
-@end ignore
-
-The alternation operator operates on the @emph{largest} possible
-surrounding regular expressions. (Put another way, it has the lowest
-precedence of any regular expression operator.)
-Thus, the only way you can
-delimit its arguments is to use grouping. For example, if @samp{(} and
-@samp{)} are the open and close-group operators, then @samp{fo(o|b)ar}
-would match either @samp{fooar} or @samp{fobar}. (@samp{foo|bar} would
-match @samp{foo} or @samp{bar}.)
-
-@cindex backtracking
-The matcher usually tries all combinations of alternatives so as to
-match the longest possible string. For example, when matching
-@samp{(fooq|foo)*(qbarquux|bar)} against @samp{fooqbarquux}, it cannot
-take, say, the first (``depth-first'') combination it could match, since
-then it would be content to match just @samp{fooqbar}.
-
-@comment xx something about leftmost-longest
-
-
-@node List Operators, Grouping Operators, Alternation Operator, Common Operators
-@section List Operators (@code{[} @dots{} @code{]} and @code{[^} @dots{} @code{]})
-
-@cindex matching list
-@cindex @samp{[}
-@cindex @samp{]}
-@cindex @samp{^}
-@cindex @samp{-}
-@cindex @samp{\}
-@cindex @samp{[^}
-@cindex nonmatching list
-@cindex matching newline
-@cindex bracket expression
-
-@dfn{Lists}, also called @dfn{bracket expressions}, are a set of one or
-more items. An @dfn{item} is a character,
-@ignore
-(These get added when they get implemented.)
-a collating symbol, an equivalence class expression,
-@end ignore
-a character class expression, or a range expression. The syntax bits
-affect which kinds of items you can put in a list. We explain the last
-two items in subsections below. Empty lists are invalid.
-
-A @dfn{matching list} matches a single character represented by one of
-the list items. You form a matching list by enclosing one or more items
-within an @dfn{open-matching-list operator} (represented by @samp{[})
-and a @dfn{close-list operator} (represented by @samp{]}).
-
-For example, @samp{[ab]} matches either @samp{a} or @samp{b}.
-@samp{[ad]*} matches the empty string and any string composed of just
-@samp{a}s and @samp{d}s in any order. Regex considers invalid a regular
-expression with a @samp{[} but no matching
-@samp{]}.
-
-@dfn{Nonmatching lists} are similar to matching lists except that they
-match a single character @emph{not} represented by one of the list
-items. You use an @dfn{open-nonmatching-list operator} (represented by
-@samp{[^}@footnote{Regex therefore doesn't consider the @samp{^} to be
-the first character in the list. If you put a @samp{^} character first
-in (what you think is) a matching list, you'll turn it into a
-nonmatching list.}) instead of an open-matching-list operator to start a
-nonmatching list.
-
-For example, @samp{[^ab]} matches any character except @samp{a} or
-@samp{b}.
-
-If the @code{posix_newline} field in the pattern buffer (@pxref{GNU
-Pattern Buffers} is set, then nonmatching lists do not match a newline.
-
-Most characters lose any special meaning inside a list. The special
-characters inside a list follow.
-
-@table @samp
-@item ]
-ends the list if it's not the first list item. So, if you want to make
-the @samp{]} character a list item, you must put it first.
-
-@item \
-quotes the next character if the syntax bit @code{RE_BACKSLASH_ESCAPE_IN_LISTS} is
-set.
-
-@ignore
-Put these in if they get implemented.
-
-@item [.
-represents the open-collating-symbol operator (@pxref{Collating Symbol
-Operators}).
-
-@item .]
-represents the close-collating-symbol operator.
-
-@item [=
-represents the open-equivalence-class operator (@pxref{Equivalence Class
-Operators}).
-
-@item =]
-represents the close-equivalence-class operator.
-
-@end ignore
-
-@item [:
-represents the open-character-class operator (@pxref{Character Class
-Operators}) if the syntax bit @code{RE_CHAR_CLASSES} is set and what
-follows is a valid character class expression.
-
-@item :]
-represents the close-character-class operator if the syntax bit
-@code{RE_CHAR_CLASSES} is set and what precedes it is an
-open-character-class operator followed by a valid character class name.
-
-@item -
-represents the range operator (@pxref{Range Operator}) if it's
-not first or last in a list or the ending point of a range.
-
-@end table
-
-@noindent
-All other characters are ordinary. For example, @samp{[.*]} matches
-@samp{.} and @samp{*}.
-
-@menu
-* Character Class Operators:: [:class:]
-* Range Operator:: start-end
-@end menu
-
-@ignore
-(If collating symbols and equivalence class expressions get implemented,
-then add this.)
-
-node Collating Symbol Operators
-subsubsection Collating Symbol Operators (@code{[.} @dots{} @code{.]})
-
-If the syntax bit @code{XX} is set, then you can represent
-collating symbols inside lists. You form a @dfn{collating symbol} by
-putting a collating element between an @dfn{open-collating-symbol
-operator} and an @dfn{close-collating-symbol operator}. @samp{[.}
-represents the open-collating-symbol operator and @samp{.]} represents
-the close-collating-symbol operator. For example, if @samp{ll} is a
-collating element, then @samp{[[.ll.]]} would match @samp{ll}.
-
-node Equivalence Class Operators
-subsubsection Equivalence Class Operators (@code{[=} @dots{} @code{=]})
-@cindex equivalence class expression in regex
-@cindex @samp{[=} in regex
-@cindex @samp{=]} in regex
-
-If the syntax bit @code{XX} is set, then Regex recognizes equivalence class
-expressions inside lists. A @dfn{equivalence class expression} is a set
-of collating elements which all belong to the same equivalence class.
-You form an equivalence class expression by putting a collating
-element between an @dfn{open-equivalence-class operator} and a
-@dfn{close-equivalence-class operator}. @samp{[=} represents the
-open-equivalence-class operator and @samp{=]} represents the
-close-equivalence-class operator. For example, if @samp{a} and @samp{A}
-were an equivalence class, then both @samp{[[=a=]]} and @samp{[[=A=]]}
-would match both @samp{a} and @samp{A}. If the collating element in an
-equivalence class expression isn't part of an equivalence class, then
-the matcher considers the equivalence class expression to be a collating
-symbol.
-
-@end ignore
-
-@node Character Class Operators, Range Operator, , List Operators
-@subsection Character Class Operators (@code{[:} @dots{} @code{:]})
-
-@cindex character classes
-@cindex @samp{[:} in regex
-@cindex @samp{:]} in regex
-
-If the syntax bit @code{RE_CHARACTER_CLASSES} is set, then Regex
-recognizes character class expressions inside lists. A @dfn{character
-class expression} matches one character from a given class. You form a
-character class expression by putting a character class name between an
-@dfn{open-character-class operator} (represented by @samp{[:}) and a
-@dfn{close-character-class operator} (represented by @samp{:]}). The
-character class names and their meanings are:
-
-@table @code
-
-@item alnum
-letters and digits
-
-@item alpha
-letters
-
-@item blank
-system-dependent; for @sc{gnu}, a space or tab
-
-@item cntrl
-control characters (in the @sc{ascii} encoding, code 0177 and codes
-less than 040)
-
-@item digit
-digits
-
-@item graph
-same as @code{print} except omits space
-
-@item lower
-lowercase letters
-
-@item print
-printable characters (in the @sc{ascii} encoding, space
-tilde---codes 040 through 0176)
-
-@item punct
-neither control nor alphanumeric characters
-
-@item space
-space, carriage return, newline, vertical tab, and form feed
-
-@item upper
-uppercase letters
-
-@item xdigit
-hexadecimal digits: @code{0}--@code{9}, @code{a}--@code{f}, @code{A}--@code{F}
-
-@end table
-
-@noindent
-These correspond to the definitions in the C library's @file{<ctype.h>}
-facility. For example, @samp{[:alpha:]} corresponds to the standard
-facility @code{isalpha}. Regex recognizes character class expressions
-only inside of lists; so @samp{[[:alpha:]]} matches any letter, but
-@samp{[:alpha:]} outside of a bracket expression and not followed by a
-repetition operator matches just itself.
-
-@node Range Operator, , Character Class Operators, List Operators
-@subsection The Range Operator (@code{-})
-
-Regex recognizes @dfn{range expressions} inside a list. They represent
-those characters
-that fall between two elements in the current collating sequence. You
-form a range expression by putting a @dfn{range operator} between two
-@ignore
-(If these get implemented, then substitute this for ``characters.'')
-of any of the following: characters, collating elements, collating symbols,
-and equivalence class expressions. The starting point of the range and
-the ending point of the range don't have to be the same kind of item,
-e.g., the starting point could be a collating element and the ending
-point could be an equivalence class expression. If a range's ending
-point is an equivalence class, then all the collating elements in that
-class will be in the range.
-@end ignore
-characters.@footnote{You can't use a character class for the starting
-or ending point of a range, since a character class is not a single
-character.} @samp{-} represents the range operator. For example,
-@samp{a-f} within a list represents all the characters from @samp{a}
-through @samp{f}
-inclusively.
-
-If the syntax bit @code{RE_NO_EMPTY_RANGES} is set, then if the range's
-ending point collates less than its starting point, the range (and the
-regular expression containing it) is invalid. For example, the regular
-expression @samp{[z-a]} would be invalid. If this bit isn't set, then
-Regex considers such a range to be empty.
-
-Since @samp{-} represents the range operator, if you want to make a
-@samp{-} character itself
-a list item, you must do one of the following:
-
-@itemize @bullet
-@item
-Put the @samp{-} either first or last in the list.
-
-@item
-Include a range whose starting point collates strictly lower than
-@samp{-} and whose ending point collates equal or higher. Unless a
-range is the first item in a list, a @samp{-} can't be its starting
-point, but @emph{can} be its ending point. That is because Regex
-considers @samp{-} to be the range operator unless it is preceded by
-another @samp{-}. For example, in the @sc{ascii} encoding, @samp{)},
-@samp{*}, @samp{+}, @samp{,}, @samp{-}, @samp{.}, and @samp{/} are
-contiguous characters in the collating sequence. You might think that
-@samp{[)-+--/]} has two ranges: @samp{)-+} and @samp{--/}. Rather, it
-has the ranges @samp{)-+} and @samp{+--}, plus the character @samp{/}, so
-it matches, e.g., @samp{,}, not @samp{.}.
-
-@item
-Put a range whose starting point is @samp{-} first in the list.
-
-@end itemize
-
-For example, @samp{[-a-z]} matches a lowercase letter or a hyphen (in
-English, in @sc{ascii}).
-
-
-@node Grouping Operators, Back-reference Operator, List Operators, Common Operators
-@section Grouping Operators (@code{(} @dots{} @code{)} or @code{\(} @dots{} @code{\)})
-
-@kindex (
-@kindex )
-@kindex \(
-@kindex \)
-@cindex grouping
-@cindex subexpressions
-@cindex parenthesizing
-
-A @dfn{group}, also known as a @dfn{subexpression}, consists of an
-@dfn{open-group operator}, any number of other operators, and a
-@dfn{close-group operator}. Regex treats this sequence as a unit, just
-as mathematics and programming languages treat a parenthesized
-expression as a unit.
-
-Therefore, using @dfn{groups}, you can:
-
-@itemize @bullet
-@item
-delimit the argument(s) to an alternation operator (@pxref{Alternation
-Operator}) or a repetition operator (@pxref{Repetition
-Operators}).
-
-@item
-keep track of the indices of the substring that matched a given group.
-@xref{Using Registers}, for a precise explanation.
-This lets you:
-
-@itemize @bullet
-@item
-use the back-reference operator (@pxref{Back-reference Operator}).
-
-@item
-use registers (@pxref{Using Registers}).
-
-@end itemize
-
-@end itemize
-
-If the syntax bit @code{RE_NO_BK_PARENS} is set, then @samp{(} represents
-the open-group operator and @samp{)} represents the
-close-group operator; otherwise, @samp{\(} and @samp{\)} do.
-
-If the syntax bit @code{RE_UNMATCHED_RIGHT_PAREN_ORD} is set and a
-close-group operator has no matching open-group operator, then Regex
-considers it to match @samp{)}.
-
-
-@node Back-reference Operator, Anchoring Operators, Grouping Operators, Common Operators
-@section The Back-reference Operator (@dfn{\}@var{digit})
-
-@cindex back references
-
-If the syntax bit @code{RE_NO_BK_REF} isn't set, then Regex recognizes
-back references. A back reference matches a specified preceding group.
-The back reference operator is represented by @samp{\@var{digit}}
-anywhere after the end of a regular expression's @w{@var{digit}-th}
-group (@pxref{Grouping Operators}).
-
-@var{digit} must be between @samp{1} and @samp{9}. The matcher assigns
-numbers 1 through 9 to the first nine groups it encounters. By using
-one of @samp{\1} through @samp{\9} after the corresponding group's
-close-group operator, you can match a substring identical to the
-one that the group does.
-
-Back references match according to the following (in all examples below,
-@samp{(} represents the open-group, @samp{)} the close-group, @samp{@{}
-the open-interval and @samp{@}} the close-interval operator):
-
-@itemize @bullet
-@item
-If the group matches a substring, the back reference matches an
-identical substring. For example, @samp{(a)\1} matches @samp{aa} and
-@samp{(bana)na\1bo\1} matches @samp{bananabanabobana}. Likewise,
-@samp{(.*)\1} matches any (newline-free if the syntax bit
-@code{RE_DOT_NEWLINE} isn't set) string that is composed of two
-identical halves; the @samp{(.*)} matches the first half and the
-@samp{\1} matches the second half.
-
-@item
-If the group matches more than once (as it might if followed
-by, e.g., a repetition operator), then the back reference matches the
-substring the group @emph{last} matched. For example,
-@samp{((a*)b)*\1\2} matches @samp{aabababa}; first @w{group 1} (the
-outer one) matches @samp{aab} and @w{group 2} (the inner one) matches
-@samp{aa}. Then @w{group 1} matches @samp{ab} and @w{group 2} matches
-@samp{a}. So, @samp{\1} matches @samp{ab} and @samp{\2} matches
-@samp{a}.
-
-@item
-If the group doesn't participate in a match, i.e., it is part of an
-alternative not taken or a repetition operator allows zero repetitions
-of it, then the back reference makes the whole match fail. For example,
-@samp{(one()|two())-and-(three\2|four\3)} matches @samp{one-and-three}
-and @samp{two-and-four}, but not @samp{one-and-four} or
-@samp{two-and-three}. For example, if the pattern matches
-@samp{one-and-}, then its @w{group 2} matches the empty string and its
-@w{group 3} doesn't participate in the match. So, if it then matches
-@samp{four}, then when it tries to back reference @w{group 3}---which it
-will attempt to do because @samp{\3} follows the @samp{four}---the match
-will fail because @w{group 3} didn't participate in the match.
-
-@end itemize
-
-You can use a back reference as an argument to a repetition operator. For
-example, @samp{(a(b))\2*} matches @samp{a} followed by two or more
-@samp{b}s. Similarly, @samp{(a(b))\2@{3@}} matches @samp{abbbb}.
-
-If there is no preceding @w{@var{digit}-th} subexpression, the regular
-expression is invalid.
-
-
-@node Anchoring Operators, , Back-reference Operator, Common Operators
-@section Anchoring Operators
-
-@cindex anchoring
-@cindex regexp anchoring
-
-These operators can constrain a pattern to match only at the beginning or
-end of the entire string or at the beginning or end of a line.
-
-@menu
-* Match-beginning-of-line Operator:: ^
-* Match-end-of-line Operator:: $
-@end menu
-
-
-@node Match-beginning-of-line Operator, Match-end-of-line Operator, , Anchoring Operators
-@subsection The Match-beginning-of-line Operator (@code{^})
-
-@kindex ^
-@cindex beginning-of-line operator
-@cindex anchors
-
-This operator can match the empty string either at the beginning of the
-string or after a newline character. Thus, it is said to @dfn{anchor}
-the pattern to the beginning of a line.
-
-In the cases following, @samp{^} represents this operator. (Otherwise,
-@samp{^} is ordinary.)
-
-@itemize @bullet
-
-@item
-It (the @samp{^}) is first in the pattern, as in @samp{^foo}.
-
-@cnindex RE_CONTEXT_INDEP_ANCHORS @r{(and @samp{^})}
-@item
-The syntax bit @code{RE_CONTEXT_INDEP_ANCHORS} is set, and it is outside
-a bracket expression.
-
-@cindex open-group operator and @samp{^}
-@cindex alternation operator and @samp{^}
-@item
-It follows an open-group or alternation operator, as in @samp{a\(^b\)}
-and @samp{a\|^b}. @xref{Grouping Operators}, and @ref{Alternation
-Operator}.
-
-@end itemize
-
-These rules imply that some valid patterns containing @samp{^} cannot be
-matched; for example, @samp{foo^bar} if @code{RE_CONTEXT_INDEP_ANCHORS}
-is set.
-
-@vindex not_bol @r{field in pattern buffer}
-If the @code{not_bol} field is set in the pattern buffer (@pxref{GNU
-Pattern Buffers}), then @samp{^} fails to match at the beginning of the
-string. @xref{POSIX Matching}, for when you might find this useful.
-
-@vindex newline_anchor @r{field in pattern buffer}
-If the @code{newline_anchor} field is set in the pattern buffer, then
-@samp{^} fails to match after a newline. This is useful when you do not
-regard the string to be matched as broken into lines.
-
-
-@node Match-end-of-line Operator, , Match-beginning-of-line Operator, Anchoring Operators
-@subsection The Match-end-of-line Operator (@code{$})
-
-@kindex $
-@cindex end-of-line operator
-@cindex anchors
-
-This operator can match the empty string either at the end of
-the string or before a newline character in the string. Thus, it is
-said to @dfn{anchor} the pattern to the end of a line.
-
-It is always represented by @samp{$}. For example, @samp{foo$} usually
-matches, e.g., @samp{foo} and, e.g., the first three characters of
-@samp{foo\nbar}.
-
-Its interaction with the syntax bits and pattern buffer fields is
-exactly the dual of @samp{^}'s; see the previous section. (That is,
-``beginning'' becomes ``end'', ``next'' becomes ``previous'', and
-``after'' becomes ``before''.)
-
-
-@node GNU Operators, GNU Emacs Operators, Common Operators, Top
-@chapter GNU Operators
-
-Following are operators that @sc{gnu} defines (and @sc{posix} doesn't).
-
-@menu
-* Word Operators::
-* Buffer Operators::
-@end menu
-
-@node Word Operators, Buffer Operators, , GNU Operators
-@section Word Operators
-
-The operators in this section require Regex to recognize parts of words.
-Regex uses a syntax table to determine whether or not a character is
-part of a word, i.e., whether or not it is @dfn{word-constituent}.
-
-@menu
-* Non-Emacs Syntax Tables::
-* Match-word-boundary Operator:: \b
-* Match-within-word Operator:: \B
-* Match-beginning-of-word Operator:: \<
-* Match-end-of-word Operator:: \>
-* Match-word-constituent Operator:: \w
-* Match-non-word-constituent Operator:: \W
-@end menu
-
-@node Non-Emacs Syntax Tables, Match-word-boundary Operator, , Word Operators
-@subsection Non-Emacs Syntax Tables
-
-A @dfn{syntax table} is an array indexed by the characters in your
-character set. In the @sc{ascii} encoding, therefore, a syntax table
-has 256 elements. Regex always uses a @code{char *} variable
-@code{re_syntax_table} as its syntax table. In some cases, it
-initializes this variable and in others it expects you to initialize it.
-
-@itemize @bullet
-@item
-If Regex is compiled with the preprocessor symbols @code{emacs} and
-@code{SYNTAX_TABLE} both undefined, then Regex allocates
-@code{re_syntax_table} and initializes an element @var{i} either to
-@code{Sword} (which it defines) if @var{i} is a letter, number, or
-@samp{_}, or to zero if it's not.
-
-@item
-If Regex is compiled with @code{emacs} undefined but @code{SYNTAX_TABLE}
-defined, then Regex expects you to define a @code{char *} variable
-@code{re_syntax_table} to be a valid syntax table.
-
-@item
-@xref{Emacs Syntax Tables}, for what happens when Regex is compiled with
-the preprocessor symbol @code{emacs} defined.
-
-@end itemize
-
-@node Match-word-boundary Operator, Match-within-word Operator, Non-Emacs Syntax Tables, Word Operators
-@subsection The Match-word-boundary Operator (@code{\b})
-
-@cindex @samp{\b}
-@cindex word boundaries, matching
-
-This operator (represented by @samp{\b}) matches the empty string at
-either the beginning or the end of a word. For example, @samp{\brat\b}
-matches the separate word @samp{rat}.
-
-@node Match-within-word Operator, Match-beginning-of-word Operator, Match-word-boundary Operator, Word Operators
-@subsection The Match-within-word Operator (@code{\B})
-
-@cindex @samp{\B}
-
-This operator (represented by @samp{\B}) matches the empty string within
-a word. For example, @samp{c\Brat\Be} matches @samp{crate}, but
-@samp{dirty \Brat} doesn't match @samp{dirty rat}.
-
-@node Match-beginning-of-word Operator, Match-end-of-word Operator, Match-within-word Operator, Word Operators
-@subsection The Match-beginning-of-word Operator (@code{\<})
-
-@cindex @samp{\<}
-
-This operator (represented by @samp{\<}) matches the empty string at the
-beginning of a word.
-
-@node Match-end-of-word Operator, Match-word-constituent Operator, Match-beginning-of-word Operator, Word Operators
-@subsection The Match-end-of-word Operator (@code{\>})
-
-@cindex @samp{\>}
-
-This operator (represented by @samp{\>}) matches the empty string at the
-end of a word.
-
-@node Match-word-constituent Operator, Match-non-word-constituent Operator, Match-end-of-word Operator, Word Operators
-@subsection The Match-word-constituent Operator (@code{\w})
-
-@cindex @samp{\w}
-
-This operator (represented by @samp{\w}) matches any word-constituent
-character.
-
-@node Match-non-word-constituent Operator, , Match-word-constituent Operator, Word Operators
-@subsection The Match-non-word-constituent Operator (@code{\W})
-
-@cindex @samp{\W}
-
-This operator (represented by @samp{\W}) matches any character that is
-not word-constituent.
-
-
-@node Buffer Operators, , Word Operators, GNU Operators
-@section Buffer Operators
-
-Following are operators which work on buffers. In Emacs, a @dfn{buffer}
-is, naturally, an Emacs buffer. For other programs, Regex considers the
-entire string to be matched as the buffer.
-
-@menu
-* Match-beginning-of-buffer Operator:: \`
-* Match-end-of-buffer Operator:: \'
-@end menu
-
-
-@node Match-beginning-of-buffer Operator, Match-end-of-buffer Operator, , Buffer Operators
-@subsection The Match-beginning-of-buffer Operator (@code{\`})
-
-@cindex @samp{\`}
-
-This operator (represented by @samp{\`}) matches the empty string at the
-beginning of the buffer.
-
-@node Match-end-of-buffer Operator, , Match-beginning-of-buffer Operator, Buffer Operators
-@subsection The Match-end-of-buffer Operator (@code{\'})
-
-@cindex @samp{\'}
-
-This operator (represented by @samp{\'}) matches the empty string at the
-end of the buffer.
-
-
-@node GNU Emacs Operators, What Gets Matched?, GNU Operators, Top
-@chapter GNU Emacs Operators
-
-Following are operators that @sc{gnu} defines (and @sc{posix} doesn't)
-that you can use only when Regex is compiled with the preprocessor
-symbol @code{emacs} defined.
-
-@menu
-* Syntactic Class Operators::
-@end menu
-
-
-@node Syntactic Class Operators, , , GNU Emacs Operators
-@section Syntactic Class Operators
-
-The operators in this section require Regex to recognize the syntactic
-classes of characters. Regex uses a syntax table to determine this.
-
-@menu
-* Emacs Syntax Tables::
-* Match-syntactic-class Operator:: \sCLASS
-* Match-not-syntactic-class Operator:: \SCLASS
-@end menu
-
-@node Emacs Syntax Tables, Match-syntactic-class Operator, , Syntactic Class Operators
-@subsection Emacs Syntax Tables
-
-A @dfn{syntax table} is an array indexed by the characters in your
-character set. In the @sc{ascii} encoding, therefore, a syntax table
-has 256 elements.
-
-If Regex is compiled with the preprocessor symbol @code{emacs} defined,
-then Regex expects you to define and initialize the variable
-@code{re_syntax_table} to be an Emacs syntax table. Emacs' syntax
-tables are more complicated than Regex's own (@pxref{Non-Emacs Syntax
-Tables}). @xref{Syntax, , Syntax, emacs, The GNU Emacs User's Manual},
-for a description of Emacs' syntax tables.
-
-@node Match-syntactic-class Operator, Match-not-syntactic-class Operator, Emacs Syntax Tables, Syntactic Class Operators
-@subsection The Match-syntactic-class Operator (@code{\s}@var{class})
-
-@cindex @samp{\s}
-
-This operator matches any character whose syntactic class is represented
-by a specified character. @samp{\s@var{class}} represents this operator
-where @var{class} is the character representing the syntactic class you
-want. For example, @samp{w} represents the syntactic
-class of word-constituent characters, so @samp{\sw} matches any
-word-constituent character.
-
-@node Match-not-syntactic-class Operator, , Match-syntactic-class Operator, Syntactic Class Operators
-@subsection The Match-not-syntactic-class Operator (@code{\S}@var{class})
-
-@cindex @samp{\S}
-
-This operator is similar to the match-syntactic-class operator except
-that it matches any character whose syntactic class is @emph{not}
-represented by the specified character. @samp{\S@var{class}} represents
-this operator. For example, @samp{w} represents the syntactic class of
-word-constituent characters, so @samp{\Sw} matches any character that is
-not word-constituent.
-
-
-@node What Gets Matched?, Programming with Regex, GNU Emacs Operators, Top
-@chapter What Gets Matched?
-
-Regex usually matches strings according to the ``leftmost longest''
-rule; that is, it chooses the longest of the leftmost matches. This
-does not mean that for a regular expression containing subexpressions
-that it simply chooses the longest match for each subexpression, left to
-right; the overall match must also be the longest possible one.
-
-For example, @samp{(ac*)(c*d[ac]*)\1} matches @samp{acdacaaa}, not
-@samp{acdac}, as it would if it were to choose the longest match for the
-first subexpression.
-
-
-@node Programming with Regex, Copying, What Gets Matched?, Top
-@chapter Programming with Regex
-
-Here we describe how you use the Regex data structures and functions in
-C programs. Regex has three interfaces: one designed for @sc{gnu}, one
-compatible with @sc{posix} and one compatible with Berkeley @sc{unix}.
-
-@menu
-* GNU Regex Functions::
-* POSIX Regex Functions::
-* BSD Regex Functions::
-@end menu
-
-
-@node GNU Regex Functions, POSIX Regex Functions, , Programming with Regex
-@section GNU Regex Functions
-
-If you're writing code that doesn't need to be compatible with either
-@sc{posix} or Berkeley @sc{unix}, you can use these functions. They
-provide more options than the other interfaces.
-
-@menu
-* GNU Pattern Buffers:: The re_pattern_buffer type.
-* GNU Regular Expression Compiling:: re_compile_pattern ()
-* GNU Matching:: re_match ()
-* GNU Searching:: re_search ()
-* Matching/Searching with Split Data:: re_match_2 (), re_search_2 ()
-* Searching with Fastmaps:: re_compile_fastmap ()
-* GNU Translate Tables:: The `translate' field.
-* Using Registers:: The re_registers type and related fns.
-* Freeing GNU Pattern Buffers:: regfree ()
-@end menu
-
-
-@node GNU Pattern Buffers, GNU Regular Expression Compiling, , GNU Regex Functions
-@subsection GNU Pattern Buffers
-
-@cindex pattern buffer, definition of
-@tindex re_pattern_buffer @r{definition}
-@tindex struct re_pattern_buffer @r{definition}
-
-To compile, match, or search for a given regular expression, you must
-supply a pattern buffer. A @dfn{pattern buffer} holds one compiled
-regular expression.@footnote{Regular expressions are also referred to as
-``patterns,'' hence the name ``pattern buffer.''}
-
-You can have several different pattern buffers simultaneously, each
-holding a compiled pattern for a different regular expression.
-
-@file{regex.h} defines the pattern buffer @code{struct} as follows:
-
-@example
- /* Space that holds the compiled pattern. It is declared as
- `unsigned char *' because its elements are
- sometimes used as array indexes. */
- unsigned char *buffer;
-
- /* Number of bytes to which `buffer' points. */
- unsigned long allocated;
-
- /* Number of bytes actually used in `buffer'. */
- unsigned long used;
-
- /* Syntax setting with which the pattern was compiled. */
- reg_syntax_t syntax;
-
- /* Pointer to a fastmap, if any, otherwise zero. re_search uses
- the fastmap, if there is one, to skip over impossible
- starting points for matches. */
- char *fastmap;
-
- /* Either a translate table to apply to all characters before
- comparing them, or zero for no translation. The translation
- is applied to a pattern when it is compiled and to a string
- when it is matched. */
- char *translate;
-
- /* Number of subexpressions found by the compiler. */
- size_t re_nsub;
-
- /* Zero if this pattern cannot match the empty string, one else.
- Well, in truth it's used only in `re_search_2', to see
- whether or not we should use the fastmap, so we don't set
- this absolutely perfectly; see `re_compile_fastmap' (the
- `duplicate' case). */
- unsigned can_be_null : 1;
-
- /* If REGS_UNALLOCATED, allocate space in the `regs' structure
- for `max (RE_NREGS, re_nsub + 1)' groups.
- If REGS_REALLOCATE, reallocate space if necessary.
- If REGS_FIXED, use what's there. */
-#define REGS_UNALLOCATED 0
-#define REGS_REALLOCATE 1
-#define REGS_FIXED 2
- unsigned regs_allocated : 2;
-
- /* Set to zero when `regex_compile' compiles a pattern; set to one
- by `re_compile_fastmap' if it updates the fastmap. */
- unsigned fastmap_accurate : 1;
-
- /* If set, `re_match_2' does not return information about
- subexpressions. */
- unsigned no_sub : 1;
-
- /* If set, a beginning-of-line anchor doesn't match at the
- beginning of the string. */
- unsigned not_bol : 1;
-
- /* Similarly for an end-of-line anchor. */
- unsigned not_eol : 1;
-
- /* If true, an anchor at a newline matches. */
- unsigned newline_anchor : 1;
-
-@end example
-
-
-@node GNU Regular Expression Compiling, GNU Matching, GNU Pattern Buffers, GNU Regex Functions
-@subsection GNU Regular Expression Compiling
-
-In @sc{gnu}, you can both match and search for a given regular
-expression. To do either, you must first compile it in a pattern buffer
-(@pxref{GNU Pattern Buffers}).
-
-@cindex syntax initialization
-@vindex re_syntax_options @r{initialization}
-Regular expressions match according to the syntax with which they were
-compiled; with @sc{gnu}, you indicate what syntax you want by setting
-the variable @code{re_syntax_options} (declared in @file{regex.h} and
-defined in @file{regex.c}) before calling the compiling function,
-@code{re_compile_pattern} (see below). @xref{Syntax Bits}, and
-@ref{Predefined Syntaxes}.
-
-You can change the value of @code{re_syntax_options} at any time.
-Usually, however, you set its value once and then never change it.
-
-@cindex pattern buffer initialization
-@code{re_compile_pattern} takes a pattern buffer as an argument. You
-must initialize the following fields:
-
-@table @code
-
-@item translate @r{initialization}
-
-@item translate
-@vindex translate @r{initialization}
-Initialize this to point to a translate table if you want one, or to
-zero if you don't. We explain translate tables in @ref{GNU Translate
-Tables}.
-
-@item fastmap
-@vindex fastmap @r{initialization}
-Initialize this to nonzero if you want a fastmap, or to zero if you
-don't.
-
-@item buffer
-@itemx allocated
-@vindex buffer @r{initialization}
-@vindex allocated @r{initialization}
-@findex malloc
-If you want @code{re_compile_pattern} to allocate memory for the
-compiled pattern, set both of these to zero. If you have an existing
-block of memory (allocated with @code{malloc}) you want Regex to use,
-set @code{buffer} to its address and @code{allocated} to its size (in
-bytes).
-
-@code{re_compile_pattern} uses @code{realloc} to extend the space for
-the compiled pattern as necessary.
-
-@end table
-
-To compile a pattern buffer, use:
-
-@findex re_compile_pattern
-@example
-char *
-re_compile_pattern (const char *@var{regex}, const int @var{regex_size},
- struct re_pattern_buffer *@var{pattern_buffer})
-@end example
-
-@noindent
-@var{regex} is the regular expression's address, @var{regex_size} is its
-length, and @var{pattern_buffer} is the pattern buffer's address.
-
-If @code{re_compile_pattern} successfully compiles the regular
-expression, it returns zero and sets @code{*@var{pattern_buffer}} to the
-compiled pattern. It sets the pattern buffer's fields as follows:
-
-@table @code
-@item buffer
-@vindex buffer @r{field, set by @code{re_compile_pattern}}
-to the compiled pattern.
-
-@item used
-@vindex used @r{field, set by @code{re_compile_pattern}}
-to the number of bytes the compiled pattern in @code{buffer} occupies.
-
-@item syntax
-@vindex syntax @r{field, set by @code{re_compile_pattern}}
-to the current value of @code{re_syntax_options}.
-
-@item re_nsub
-@vindex re_nsub @r{field, set by @code{re_compile_pattern}}
-to the number of subexpressions in @var{regex}.
-
-@item fastmap_accurate
-@vindex fastmap_accurate @r{field, set by @code{re_compile_pattern}}
-to zero on the theory that the pattern you're compiling is different
-than the one previously compiled into @code{buffer}; in that case (since
-you can't make a fastmap without a compiled pattern),
-@code{fastmap} would either contain an incompatible fastmap, or nothing
-at all.
-
-@c xx what else?
-@end table
-
-If @code{re_compile_pattern} can't compile @var{regex}, it returns an
-error string corresponding to one of the errors listed in @ref{POSIX
-Regular Expression Compiling}.
-
-
-@node GNU Matching, GNU Searching, GNU Regular Expression Compiling, GNU Regex Functions
-@subsection GNU Matching
-
-@cindex matching with GNU functions
-
-Matching the @sc{gnu} way means trying to match as much of a string as
-possible starting at a position within it you specify. Once you've compiled
-a pattern into a pattern buffer (@pxref{GNU Regular Expression
-Compiling}), you can ask the matcher to match that pattern against a
-string using:
-
-@findex re_match
-@example
-int
-re_match (struct re_pattern_buffer *@var{pattern_buffer},
- const char *@var{string}, const int @var{size},
- const int @var{start}, struct re_registers *@var{regs})
-@end example
-
-@noindent
-@var{pattern_buffer} is the address of a pattern buffer containing a
-compiled pattern. @var{string} is the string you want to match; it can
-contain newline and null characters. @var{size} is the length of that
-string. @var{start} is the string index at which you want to
-begin matching; the first character of @var{string} is at index zero.
-@xref{Using Registers}, for a explanation of @var{regs}; you can safely
-pass zero.
-
-@code{re_match} matches the regular expression in @var{pattern_buffer}
-against the string @var{string} according to the syntax in
-@var{pattern_buffers}'s @code{syntax} field. (@xref{GNU Regular
-Expression Compiling}, for how to set it.) The function returns
-@math{-1} if the compiled pattern does not match any part of
-@var{string} and @math{-2} if an internal error happens; otherwise, it
-returns how many (possibly zero) characters of @var{string} the pattern
-matched.
-
-An example: suppose @var{pattern_buffer} points to a pattern buffer
-containing the compiled pattern for @samp{a*}, and @var{string} points
-to @samp{aaaaab} (whereupon @var{size} should be 6). Then if @var{start}
-is 2, @code{re_match} returns 3, i.e., @samp{a*} would have matched the
-last three @samp{a}s in @var{string}. If @var{start} is 0,
-@code{re_match} returns 5, i.e., @samp{a*} would have matched all the
-@samp{a}s in @var{string}. If @var{start} is either 5 or 6, it returns
-zero.
-
-If @var{start} is not between zero and @var{size}, then
-@code{re_match} returns @math{-1}.
-
-
-@node GNU Searching, Matching/Searching with Split Data, GNU Matching, GNU Regex Functions
-@subsection GNU Searching
-
-@cindex searching with GNU functions
-
-@dfn{Searching} means trying to match starting at successive positions
-within a string. The function @code{re_search} does this.
-
-Before calling @code{re_search}, you must compile your regular
-expression. @xref{GNU Regular Expression Compiling}.
-
-Here is the function declaration:
-
-@findex re_search
-@example
-int
-re_search (struct re_pattern_buffer *@var{pattern_buffer},
- const char *@var{string}, const int @var{size},
- const int @var{start}, const int @var{range},
- struct re_registers *@var{regs})
-@end example
-
-@noindent
-@vindex start @r{argument to @code{re_search}}
-@vindex range @r{argument to @code{re_search}}
-whose arguments are the same as those to @code{re_match} (@pxref{GNU
-Matching}) except that the two arguments @var{start} and @var{range}
-replace @code{re_match}'s argument @var{start}.
-
-If @var{range} is positive, then @code{re_search} attempts a match
-starting first at index @var{start}, then at @math{@var{start} + 1} if
-that fails, and so on, up to @math{@var{start} + @var{range}}; if
-@var{range} is negative, then it attempts a match starting first at
-index @var{start}, then at @math{@var{start} -1} if that fails, and so
-on.
-
-If @var{start} is not between zero and @var{size}, then @code{re_search}
-returns @math{-1}. When @var{range} is positive, @code{re_search}
-adjusts @var{range} so that @math{@var{start} + @var{range} - 1} is
-between zero and @var{size}, if necessary; that way it won't search
-outside of @var{string}. Similarly, when @var{range} is negative,
-@code{re_search} adjusts @var{range} so that @math{@var{start} +
-@var{range} + 1} is between zero and @var{size}, if necessary.
-
-If the @code{fastmap} field of @var{pattern_buffer} is zero,
-@code{re_search} matches starting at consecutive positions; otherwise,
-it uses @code{fastmap} to make the search more efficient.
-@xref{Searching with Fastmaps}.
-
-If no match is found, @code{re_search} returns @math{-1}. If
-a match is found, it returns the index where the match began. If an
-internal error happens, it returns @math{-2}.
-
-
-@node Matching/Searching with Split Data, Searching with Fastmaps, GNU Searching, GNU Regex Functions
-@subsection Matching and Searching with Split Data
-
-Using the functions @code{re_match_2} and @code{re_search_2}, you can
-match or search in data that is divided into two strings.
-
-The function:
-
-@findex re_match_2
-@example
-int
-re_match_2 (struct re_pattern_buffer *@var{buffer},
- const char *@var{string1}, const int @var{size1},
- const char *@var{string2}, const int @var{size2},
- const int @var{start},
- struct re_registers *@var{regs},
- const int @var{stop})
-@end example
-
-@noindent
-is similar to @code{re_match} (@pxref{GNU Matching}) except that you
-pass @emph{two} data strings and sizes, and an index @var{stop} beyond
-which you don't want the matcher to try matching. As with
-@code{re_match}, if it succeeds, @code{re_match_2} returns how many
-characters of @var{string} it matched. Regard @var{string1} and
-@var{string2} as concatenated when you set the arguments @var{start} and
-@var{stop} and use the contents of @var{regs}; @code{re_match_2} never
-returns a value larger than @math{@var{size1} + @var{size2}}.
-
-The function:
-
-@findex re_search_2
-@example
-int
-re_search_2 (struct re_pattern_buffer *@var{buffer},
- const char *@var{string1}, const int @var{size1},
- const char *@var{string2}, const int @var{size2},
- const int @var{start}, const int @var{range},
- struct re_registers *@var{regs},
- const int @var{stop})
-@end example
-
-@noindent
-is similarly related to @code{re_search}.
-
-
-@node Searching with Fastmaps, GNU Translate Tables, Matching/Searching with Split Data, GNU Regex Functions
-@subsection Searching with Fastmaps
-
-@cindex fastmaps
-If you're searching through a long string, you should use a fastmap.
-Without one, the searcher tries to match at consecutive positions in the
-string. Generally, most of the characters in the string could not start
-a match. It takes much longer to try matching at a given position in the
-string than it does to check in a table whether or not the character at
-that position could start a match. A @dfn{fastmap} is such a table.
-
-More specifically, a fastmap is an array indexed by the characters in
-your character set. Under the @sc{ascii} encoding, therefore, a fastmap
-has 256 elements. If you want the searcher to use a fastmap with a
-given pattern buffer, you must allocate the array and assign the array's
-address to the pattern buffer's @code{fastmap} field. You either can
-compile the fastmap yourself or have @code{re_search} do it for you;
-when @code{fastmap} is nonzero, it automatically compiles a fastmap the
-first time you search using a particular compiled pattern.
-
-To compile a fastmap yourself, use:
-
-@findex re_compile_fastmap
-@example
-int
-re_compile_fastmap (struct re_pattern_buffer *@var{pattern_buffer})
-@end example
-
-@noindent
-@var{pattern_buffer} is the address of a pattern buffer. If the
-character @var{c} could start a match for the pattern,
-@code{re_compile_fastmap} makes
-@code{@var{pattern_buffer}->fastmap[@var{c}]} nonzero. It returns
-@math{0} if it can compile a fastmap and @math{-2} if there is an
-internal error. For example, if @samp{|} is the alternation operator
-and @var{pattern_buffer} holds the compiled pattern for @samp{a|b}, then
-@code{re_compile_fastmap} sets @code{fastmap['a']} and
-@code{fastmap['b']} (and no others).
-
-@code{re_search} uses a fastmap as it moves along in the string: it
-checks the string's characters until it finds one that's in the fastmap.
-Then it tries matching at that character. If the match fails, it
-repeats the process. So, by using a fastmap, @code{re_search} doesn't
-waste time trying to match at positions in the string that couldn't
-start a match.
-
-If you don't want @code{re_search} to use a fastmap,
-store zero in the @code{fastmap} field of the pattern buffer before
-calling @code{re_search}.
-
-Once you've initialized a pattern buffer's @code{fastmap} field, you
-need never do so again---even if you compile a new pattern in
-it---provided the way the field is set still reflects whether or not you
-want a fastmap. @code{re_search} will still either do nothing if
-@code{fastmap} is null or, if it isn't, compile a new fastmap for the
-new pattern.
-
-@node GNU Translate Tables, Using Registers, Searching with Fastmaps, GNU Regex Functions
-@subsection GNU Translate Tables
-
-If you set the @code{translate} field of a pattern buffer to a translate
-table, then the @sc{gnu} Regex functions to which you've passed that
-pattern buffer use it to apply a simple transformation
-to all the regular expression and string characters at which they look.
-
-A @dfn{translate table} is an array indexed by the characters in your
-character set. Under the @sc{ascii} encoding, therefore, a translate
-table has 256 elements. The array's elements are also characters in
-your character set. When the Regex functions see a character @var{c},
-they use @code{translate[@var{c}]} in its place, with one exception: the
-character after a @samp{\} is not translated. (This ensures that, the
-operators, e.g., @samp{\B} and @samp{\b}, are always distinguishable.)
-
-For example, a table that maps all lowercase letters to the
-corresponding uppercase ones would cause the matcher to ignore
-differences in case.@footnote{A table that maps all uppercase letters to
-the corresponding lowercase ones would work just as well for this
-purpose.} Such a table would map all characters except lowercase letters
-to themselves, and lowercase letters to the corresponding uppercase
-ones. Under the @sc{ascii} encoding, here's how you could initialize
-such a table (we'll call it @code{case_fold}):
-
-@example
-for (i = 0; i < 256; i++)
- case_fold[i] = i;
-for (i = 'a'; i <= 'z'; i++)
- case_fold[i] = i - ('a' - 'A');
-@end example
-
-You tell Regex to use a translate table on a given pattern buffer by
-assigning that table's address to the @code{translate} field of that
-buffer. If you don't want Regex to do any translation, put zero into
-this field. You'll get weird results if you change the table's contents
-anytime between compiling the pattern buffer, compiling its fastmap, and
-matching or searching with the pattern buffer.
-
-@node Using Registers, Freeing GNU Pattern Buffers, GNU Translate Tables, GNU Regex Functions
-@subsection Using Registers
-
-A group in a regular expression can match a (posssibly empty) substring
-of the string that regular expression as a whole matched. The matcher
-remembers the beginning and end of the substring matched by
-each group.
-
-To find out what they matched, pass a nonzero @var{regs} argument to a
-@sc{gnu} matching or searching function (@pxref{GNU Matching} and
-@ref{GNU Searching}), i.e., the address of a structure of this type, as
-defined in @file{regex.h}:
-
-@c We don't bother to include this directly from regex.h,
-@c since it changes so rarely.
-@example
-@tindex re_registers
-@vindex num_regs @r{in @code{struct re_registers}}
-@vindex start @r{in @code{struct re_registers}}
-@vindex end @r{in @code{struct re_registers}}
-struct re_registers
-@{
- unsigned num_regs;
- regoff_t *start;
- regoff_t *end;
-@};
-@end example
-
-Except for (possibly) the @var{num_regs}'th element (see below), the
-@var{i}th element of the @code{start} and @code{end} arrays records
-information about the @var{i}th group in the pattern. (They're declared
-as C pointers, but this is only because not all C compilers accept
-zero-length arrays; conceptually, it is simplest to think of them as
-arrays.)
-
-The @code{start} and @code{end} arrays are allocated in various ways,
-depending on the value of the @code{regs_allocated}
-@vindex regs_allocated
-field in the pattern buffer passed to the matcher.
-
-The simplest and perhaps most useful is to let the matcher (re)allocate
-enough space to record information for all the groups in the regular
-expression. If @code{regs_allocated} is @code{REGS_UNALLOCATED},
-@vindex REGS_UNALLOCATED
-the matcher allocates @math{1 + @var{re_nsub}} (another field in the
-pattern buffer; @pxref{GNU Pattern Buffers}). The extra element is set
-to @math{-1}, and sets @code{regs_allocated} to @code{REGS_REALLOCATE}.
-@vindex REGS_REALLOCATE
-Then on subsequent calls with the same pattern buffer and @var{regs}
-arguments, the matcher reallocates more space if necessary.
-
-It would perhaps be more logical to make the @code{regs_allocated} field
-part of the @code{re_registers} structure, instead of part of the
-pattern buffer. But in that case the caller would be forced to
-initialize the structure before passing it. Much existing code doesn't
-do this initialization, and it's arguably better to avoid it anyway.
-
-@code{re_compile_pattern} sets @code{regs_allocated} to
-@code{REGS_UNALLOCATED},
-so if you use the GNU regular expression
-functions, you get this behavior by default.
-
-xx document re_set_registers
-
-@sc{posix}, on the other hand, requires a different interface: the
-caller is supposed to pass in a fixed-length array which the matcher
-fills. Therefore, if @code{regs_allocated} is @code{REGS_FIXED}
-@vindex REGS_FIXED
-the matcher simply fills that array.
-
-The following examples illustrate the information recorded in the
-@code{re_registers} structure. (In all of them, @samp{(} represents the
-open-group and @samp{)} the close-group operator. The first character
-in the string @var{string} is at index 0.)
-
-@c xx i'm not sure this is all true anymore.
-
-@itemize @bullet
-
-@item
-If the regular expression has an @w{@var{i}-th}
-group not contained within another group that matches a
-substring of @var{string}, then the function sets
-@code{@w{@var{regs}->}start[@var{i}]} to the index in @var{string} where
-the substring matched by the @w{@var{i}-th} group begins, and
-@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that
-substring's end. The function sets @code{@w{@var{regs}->}start[0]} and
-@code{@w{@var{regs}->}end[0]} to analogous information about the entire
-pattern.
-
-For example, when you match @samp{((a)(b))} against @samp{ab}, you get:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}
-
-@item
-0 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}
-
-@item
-0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}
-
-@item
-1 in @code{@w{@var{regs}->}start[3]} and 2 in @code{@w{@var{regs}->}end[3]}
-@end itemize
-
-@item
-If a group matches more than once (as it might if followed by,
-e.g., a repetition operator), then the function reports the information
-about what the group @emph{last} matched.
-
-For example, when you match the pattern @samp{(a)*} against the string
-@samp{aa}, you get:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 2 in @code{@w{@var{regs}->}end[0]}
-
-@item
-1 in @code{@w{@var{regs}->}start[1]} and 2 in @code{@w{@var{regs}->}end[1]}
-@end itemize
-
-@item
-If the @w{@var{i}-th} group does not participate in a
-successful match, e.g., it is an alternative not taken or a
-repetition operator allows zero repetitions of it, then the function
-sets @code{@w{@var{regs}->}start[@var{i}]} and
-@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}.
-
-For example, when you match the pattern @samp{(a)*b} against
-the string @samp{b}, you get:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
-
-@item
-@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}
-@end itemize
-
-@item
-If the @w{@var{i}-th} group matches a zero-length string, then the
-function sets @code{@w{@var{regs}->}start[@var{i}]} and
-@code{@w{@var{regs}->}end[@var{i}]} to the index just beyond that
-zero-length string.
-
-For example, when you match the pattern @samp{(a*)b} against the string
-@samp{b}, you get:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
-
-@item
-0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}
-@end itemize
-
-@ignore
-The function sets @code{@w{@var{regs}->}start[0]} and
-@code{@w{@var{regs}->}end[0]} to analogous information about the entire
-pattern.
-
-For example, when you match the pattern @samp{(a*)} against the empty
-string, you get:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 0 in @code{@w{@var{regs}->}end[0]}
-
-@item
-0 in @code{@w{@var{regs}->}start[1]} and 0 in @code{@w{@var{regs}->}end[1]}
-@end itemize
-@end ignore
-
-@item
-If an @w{@var{i}-th} group contains a @w{@var{j}-th} group
-in turn not contained within any other group within group @var{i} and
-the function reports a match of the @w{@var{i}-th} group, then it
-records in @code{@w{@var{regs}->}start[@var{j}]} and
-@code{@w{@var{regs}->}end[@var{j}]} the last match (if it matched) of
-the @w{@var{j}-th} group.
-
-For example, when you match the pattern @samp{((a*)b)*} against the
-string @samp{abb}, @w{group 2} last matches the empty string, so you
-get what it previously matched:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}
-
-@item
-2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}
-
-@item
-2 in @code{@w{@var{regs}->}start[2]} and 2 in @code{@w{@var{regs}->}end[2]}
-@end itemize
-
-When you match the pattern @samp{((a)*b)*} against the string
-@samp{abb}, @w{group 2} doesn't participate in the last match, so you
-get:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 3 in @code{@w{@var{regs}->}end[0]}
-
-@item
-2 in @code{@w{@var{regs}->}start[1]} and 3 in @code{@w{@var{regs}->}end[1]}
-
-@item
-0 in @code{@w{@var{regs}->}start[2]} and 1 in @code{@w{@var{regs}->}end[2]}
-@end itemize
-
-@item
-If an @w{@var{i}-th} group contains a @w{@var{j}-th} group
-in turn not contained within any other group within group @var{i}
-and the function sets
-@code{@w{@var{regs}->}start[@var{i}]} and
-@code{@w{@var{regs}->}end[@var{i}]} to @math{-1}, then it also sets
-@code{@w{@var{regs}->}start[@var{j}]} and
-@code{@w{@var{regs}->}end[@var{j}]} to @math{-1}.
-
-For example, when you match the pattern @samp{((a)*b)*c} against the
-string @samp{c}, you get:
-
-@itemize
-@item
-0 in @code{@w{@var{regs}->}start[0]} and 1 in @code{@w{@var{regs}->}end[0]}
-
-@item
-@math{-1} in @code{@w{@var{regs}->}start[1]} and @math{-1} in @code{@w{@var{regs}->}end[1]}
-
-@item
-@math{-1} in @code{@w{@var{regs}->}start[2]} and @math{-1} in @code{@w{@var{regs}->}end[2]}
-@end itemize
-
-@end itemize
-
-@node Freeing GNU Pattern Buffers, , Using Registers, GNU Regex Functions
-@subsection Freeing GNU Pattern Buffers
-
-To free any allocated fields of a pattern buffer, you can use the
-@sc{posix} function described in @ref{Freeing POSIX Pattern Buffers},
-since the type @code{regex_t}---the type for @sc{posix} pattern
-buffers---is equivalent to the type @code{re_pattern_buffer}. After
-freeing a pattern buffer, you need to again compile a regular expression
-in it (@pxref{GNU Regular Expression Compiling}) before passing it to
-a matching or searching function.
-
-
-@node POSIX Regex Functions, BSD Regex Functions, GNU Regex Functions, Programming with Regex
-@section POSIX Regex Functions
-
-If you're writing code that has to be @sc{posix} compatible, you'll need
-to use these functions. Their interfaces are as specified by @sc{posix},
-draft 1003.2/D11.2.
-
-@menu
-* POSIX Pattern Buffers:: The regex_t type.
-* POSIX Regular Expression Compiling:: regcomp ()
-* POSIX Matching:: regexec ()
-* Reporting Errors:: regerror ()
-* Using Byte Offsets:: The regmatch_t type.
-* Freeing POSIX Pattern Buffers:: regfree ()
-@end menu
-
-
-@node POSIX Pattern Buffers, POSIX Regular Expression Compiling, , POSIX Regex Functions
-@subsection POSIX Pattern Buffers
-
-To compile or match a given regular expression the @sc{posix} way, you
-must supply a pattern buffer exactly the way you do for @sc{gnu}
-(@pxref{GNU Pattern Buffers}). @sc{posix} pattern buffers have type
-@code{regex_t}, which is equivalent to the @sc{gnu} pattern buffer
-type @code{re_pattern_buffer}.
-
-
-@node POSIX Regular Expression Compiling, POSIX Matching, POSIX Pattern Buffers, POSIX Regex Functions
-@subsection POSIX Regular Expression Compiling
-
-With @sc{posix}, you can only search for a given regular expression; you
-can't match it. To do this, you must first compile it in a
-pattern buffer, using @code{regcomp}.
-
-@ignore
-Before calling @code{regcomp}, you must initialize this pattern buffer
-as you do for @sc{gnu} (@pxref{GNU Regular Expression Compiling}). See
-below, however, for how to choose a syntax with which to compile.
-@end ignore
-
-To compile a pattern buffer, use:
-
-@findex regcomp
-@example
-int
-regcomp (regex_t *@var{preg}, const char *@var{regex}, int @var{cflags})
-@end example
-
-@noindent
-@var{preg} is the initialized pattern buffer's address, @var{regex} is
-the regular expression's address, and @var{cflags} is the compilation
-flags, which Regex considers as a collection of bits. Here are the
-valid bits, as defined in @file{regex.h}:
-
-@table @code
-
-@item REG_EXTENDED
-@vindex REG_EXTENDED
-says to use @sc{posix} Extended Regular Expression syntax; if this isn't
-set, then says to use @sc{posix} Basic Regular Expression syntax.
-@code{regcomp} sets @var{preg}'s @code{syntax} field accordingly.
-
-@item REG_ICASE
-@vindex REG_ICASE
-@cindex ignoring case
-says to ignore case; @code{regcomp} sets @var{preg}'s @code{translate}
-field to a translate table which ignores case, replacing anything you've
-put there before.
-
-@item REG_NOSUB
-@vindex REG_NOSUB
-says to set @var{preg}'s @code{no_sub} field; @pxref{POSIX Matching},
-for what this means.
-
-@item REG_NEWLINE
-@vindex REG_NEWLINE
-says that a:
-
-@itemize @bullet
-
-@item
-match-any-character operator (@pxref{Match-any-character
-Operator}) doesn't match a newline.
-
-@item
-nonmatching list not containing a newline (@pxref{List
-Operators}) matches a newline.
-
-@item
-match-beginning-of-line operator (@pxref{Match-beginning-of-line
-Operator}) matches the empty string immediately after a newline,
-regardless of how @code{REG_NOTBOL} is set (@pxref{POSIX Matching}, for
-an explanation of @code{REG_NOTBOL}).
-
-@item
-match-end-of-line operator (@pxref{Match-beginning-of-line
-Operator}) matches the empty string immediately before a newline,
-regardless of how @code{REG_NOTEOL} is set (@pxref{POSIX Matching},
-for an explanation of @code{REG_NOTEOL}).
-
-@end itemize
-
-@end table
-
-If @code{regcomp} successfully compiles the regular expression, it
-returns zero and sets @code{*@var{pattern_buffer}} to the compiled
-pattern. Except for @code{syntax} (which it sets as explained above), it
-also sets the same fields the same way as does the @sc{gnu} compiling
-function (@pxref{GNU Regular Expression Compiling}).
-
-If @code{regcomp} can't compile the regular expression, it returns one
-of the error codes listed here. (Except when noted differently, the
-syntax of in all examples below is basic regular expression syntax.)
-
-@table @code
-
-@comment repetitions
-@item REG_BADRPT
-For example, the consecutive repetition operators @samp{**} in
-@samp{a**} are invalid. As another example, if the syntax is extended
-regular expression syntax, then the repetition operator @samp{*} with
-nothing on which to operate in @samp{*} is invalid.
-
-@item REG_BADBR
-For example, the @var{count} @samp{-1} in @samp{a\@{-1} is invalid.
-
-@item REG_EBRACE
-For example, @samp{a\@{1} is missing a close-interval operator.
-
-@comment lists
-@item REG_EBRACK
-For example, @samp{[a} is missing a close-list operator.
-
-@item REG_ERANGE
-For example, the range ending point @samp{z} that collates lower than
-does its starting point @samp{a} in @samp{[z-a]} is invalid. Also, the
-range with the character class @samp{[:alpha:]} as its starting point in
-@samp{[[:alpha:]-|]}.
-
-@item REG_ECTYPE
-For example, the character class name @samp{foo} in @samp{[[:foo:]} is
-invalid.
-
-@comment groups
-@item REG_EPAREN
-For example, @samp{a\)} is missing an open-group operator and @samp{\(a}
-is missing a close-group operator.
-
-@item REG_ESUBREG
-For example, the back reference @samp{\2} that refers to a nonexistent
-subexpression in @samp{\(a\)\2} is invalid.
-
-@comment unfinished business
-
-@item REG_EEND
-Returned when a regular expression causes no other more specific error.
-
-@item REG_EESCAPE
-For example, the trailing backslash @samp{\} in @samp{a\} is invalid, as is the
-one in @samp{\}.
-
-@comment kitchen sink
-@item REG_BADPAT
-For example, in the extended regular expression syntax, the empty group
-@samp{()} in @samp{a()b} is invalid.
-
-@comment internal
-@item REG_ESIZE
-Returned when a regular expression needs a pattern buffer larger than
-65536 bytes.
-
-@item REG_ESPACE
-Returned when a regular expression makes Regex to run out of memory.
-
-@end table
-
-
-@node POSIX Matching, Reporting Errors, POSIX Regular Expression Compiling, POSIX Regex Functions
-@subsection POSIX Matching
-
-Matching the @sc{posix} way means trying to match a null-terminated
-string starting at its first character. Once you've compiled a pattern
-into a pattern buffer (@pxref{POSIX Regular Expression Compiling}), you
-can ask the matcher to match that pattern against a string using:
-
-@findex regexec
-@example
-int
-regexec (const regex_t *@var{preg}, const char *@var{string},
- size_t @var{nmatch}, regmatch_t @var{pmatch}[], int @var{eflags})
-@end example
-
-@noindent
-@var{preg} is the address of a pattern buffer for a compiled pattern.
-@var{string} is the string you want to match.
-
-@xref{Using Byte Offsets}, for an explanation of @var{pmatch}. If you
-pass zero for @var{nmatch} or you compiled @var{preg} with the
-compilation flag @code{REG_NOSUB} set, then @code{regexec} will ignore
-@var{pmatch}; otherwise, you must allocate it to have at least
-@var{nmatch} elements. @code{regexec} will record @var{nmatch} byte
-offsets in @var{pmatch}, and set to @math{-1} any unused elements up to
-@math{@var{pmatch}@code{[@var{nmatch}]} - 1}.
-
-@var{eflags} specifies @dfn{execution flags}---namely, the two bits
-@code{REG_NOTBOL} and @code{REG_NOTEOL} (defined in @file{regex.h}). If
-you set @code{REG_NOTBOL}, then the match-beginning-of-line operator
-(@pxref{Match-beginning-of-line Operator}) always fails to match.
-This lets you match against pieces of a line, as you would need to if,
-say, searching for repeated instances of a given pattern in a line; it
-would work correctly for patterns both with and without
-match-beginning-of-line operators. @code{REG_NOTEOL} works analogously
-for the match-end-of-line operator (@pxref{Match-end-of-line
-Operator}); it exists for symmetry.
-
-@code{regexec} tries to find a match for @var{preg} in @var{string}
-according to the syntax in @var{preg}'s @code{syntax} field.
-(@xref{POSIX Regular Expression Compiling}, for how to set it.) The
-function returns zero if the compiled pattern matches @var{string} and
-@code{REG_NOMATCH} (defined in @file{regex.h}) if it doesn't.
-
-@node Reporting Errors, Using Byte Offsets, POSIX Matching, POSIX Regex Functions
-@subsection Reporting Errors
-
-If either @code{regcomp} or @code{regexec} fail, they return a nonzero
-error code, the possibilities for which are defined in @file{regex.h}.
-@xref{POSIX Regular Expression Compiling}, and @ref{POSIX Matching}, for
-what these codes mean. To get an error string corresponding to these
-codes, you can use:
-
-@findex regerror
-@example
-size_t
-regerror (int @var{errcode},
- const regex_t *@var{preg},
- char *@var{errbuf},
- size_t @var{errbuf_size})
-@end example
-
-@noindent
-@var{errcode} is an error code, @var{preg} is the address of the pattern
-buffer which provoked the error, @var{errbuf} is the error buffer, and
-@var{errbuf_size} is @var{errbuf}'s size.
-
-@code{regerror} returns the size in bytes of the error string
-corresponding to @var{errcode} (including its terminating null). If
-@var{errbuf} and @var{errbuf_size} are nonzero, it also returns in
-@var{errbuf} the first @math{@var{errbuf_size} - 1} characters of the
-error string, followed by a null.
-@var{errbuf_size} must be a nonnegative number less than or equal to the
-size in bytes of @var{errbuf}.
-
-You can call @code{regerror} with a null @var{errbuf} and a zero
-@var{errbuf_size} to determine how large @var{errbuf} need be to
-accommodate @code{regerror}'s error string.
-
-@node Using Byte Offsets, Freeing POSIX Pattern Buffers, Reporting Errors, POSIX Regex Functions
-@subsection Using Byte Offsets
-
-In @sc{posix}, variables of type @code{regmatch_t} hold analogous
-information, but are not identical to, @sc{gnu}'s registers (@pxref{Using
-Registers}). To get information about registers in @sc{posix}, pass to
-@code{regexec} a nonzero @var{pmatch} of type @code{regmatch_t}, i.e.,
-the address of a structure of this type, defined in
-@file{regex.h}:
-
-@tindex regmatch_t
-@example
-typedef struct
-@{
- regoff_t rm_so;
- regoff_t rm_eo;
-@} regmatch_t;
-@end example
-
-When reading in @ref{Using Registers}, about how the matching function
-stores the information into the registers, substitute @var{pmatch} for
-@var{regs}, @code{@w{@var{pmatch}[@var{i}]->}rm_so} for
-@code{@w{@var{regs}->}start[@var{i}]} and
-@code{@w{@var{pmatch}[@var{i}]->}rm_eo} for
-@code{@w{@var{regs}->}end[@var{i}]}.
-
-@node Freeing POSIX Pattern Buffers, , Using Byte Offsets, POSIX Regex Functions
-@subsection Freeing POSIX Pattern Buffers
-
-To free any allocated fields of a pattern buffer, use:
-
-@findex regfree
-@example
-void
-regfree (regex_t *@var{preg})
-@end example
-
-@noindent
-@var{preg} is the pattern buffer whose allocated fields you want freed.
-@code{regfree} also sets @var{preg}'s @code{allocated} and @code{used}
-fields to zero. After freeing a pattern buffer, you need to again
-compile a regular expression in it (@pxref{POSIX Regular Expression
-Compiling}) before passing it to the matching function (@pxref{POSIX
-Matching}).
-
-
-@node BSD Regex Functions, , POSIX Regex Functions, Programming with Regex
-@section BSD Regex Functions
-
-If you're writing code that has to be Berkeley @sc{unix} compatible,
-you'll need to use these functions whose interfaces are the same as those
-in Berkeley @sc{unix}.
-
-@menu
-* BSD Regular Expression Compiling:: re_comp ()
-* BSD Searching:: re_exec ()
-@end menu
-
-@node BSD Regular Expression Compiling, BSD Searching, , BSD Regex Functions
-@subsection BSD Regular Expression Compiling
-
-With Berkeley @sc{unix}, you can only search for a given regular
-expression; you can't match one. To search for it, you must first
-compile it. Before you compile it, you must indicate the regular
-expression syntax you want it compiled according to by setting the
-variable @code{re_syntax_options} (declared in @file{regex.h} to some
-syntax (@pxref{Regular Expression Syntax}).
-
-To compile a regular expression use:
-
-@findex re_comp
-@example
-char *
-re_comp (char *@var{regex})
-@end example
-
-@noindent
-@var{regex} is the address of a null-terminated regular expression.
-@code{re_comp} uses an internal pattern buffer, so you can use only the
-most recently compiled pattern buffer. This means that if you want to
-use a given regular expression that you've already compiled---but it
-isn't the latest one you've compiled---you'll have to recompile it. If
-you call @code{re_comp} with the null string (@emph{not} the empty
-string) as the argument, it doesn't change the contents of the pattern
-buffer.
-
-If @code{re_comp} successfully compiles the regular expression, it
-returns zero. If it can't compile the regular expression, it returns
-an error string. @code{re_comp}'s error messages are identical to those
-of @code{re_compile_pattern} (@pxref{GNU Regular Expression
-Compiling}).
-
-@node BSD Searching, , BSD Regular Expression Compiling, BSD Regex Functions
-@subsection BSD Searching
-
-Searching the Berkeley @sc{unix} way means searching in a string
-starting at its first character and trying successive positions within
-it to find a match. Once you've compiled a pattern using @code{re_comp}
-(@pxref{BSD Regular Expression Compiling}), you can ask Regex
-to search for that pattern in a string using:
-
-@findex re_exec
-@example
-int
-re_exec (char *@var{string})
-@end example
-
-@noindent
-@var{string} is the address of the null-terminated string in which you
-want to search.
-
-@code{re_exec} returns either 1 for success or 0 for failure. It
-automatically uses a @sc{gnu} fastmap (@pxref{Searching with Fastmaps}).
-
-
-@node Copying, Index, Programming with Regex, Top
-@appendix GNU GENERAL PUBLIC LICENSE
-@center Version 2, June 1991
-
-@display
-Copyright @copyright{} 1989, 1991 Free Software Foundation, Inc.
-675 Mass Ave, Cambridge, MA 02139, USA
-
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
-@end display
-
-@unnumberedsec Preamble
-
- The licenses for most software are designed to take away your
-freedom to share and change it. By contrast, the GNU General Public
-License is intended to guarantee your freedom to share and change free
-software---to make sure the software is free for all its users. This
-General Public License applies to most of the Free Software
-Foundation's software and to any other program whose authors commit to
-using it. (Some other Free Software Foundation software is covered by
-the GNU Library General Public License instead.) You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-this service if you wish), that you receive source code or can get it
-if you want it, that you can change the software or use pieces of it
-in new free programs; and that you know you can do these things.
-
- To protect your rights, we need to make restrictions that forbid
-anyone to deny you these rights or to ask you to surrender the rights.
-These restrictions translate to certain responsibilities for you if you
-distribute copies of the software, or if you modify it.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must give the recipients all the rights that
-you have. You must make sure that they, too, receive or can get the
-source code. And you must show them these terms so they know their
-rights.
-
- We protect your rights with two steps: (1) copyright the software, and
-(2) offer you this license which gives you legal permission to copy,
-distribute and/or modify the software.
-
- Also, for each author's protection and ours, we want to make certain
-that everyone understands that there is no warranty for this free
-software. If the software is modified by someone else and passed on, we
-want its recipients to know that what they have is not the original, so
-that any problems introduced by others will not reflect on the original
-authors' reputations.
-
- Finally, any free program is threatened constantly by software
-patents. We wish to avoid the danger that redistributors of a free
-program will individually obtain patent licenses, in effect making the
-program proprietary. To prevent this, we have made it clear that any
-patent must be licensed for everyone's free use or not licensed at all.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
-@iftex
-@unnumberedsec TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-@end iftex
-@ifinfo
-@center TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-@end ifinfo
-
-@enumerate
-@item
-This License applies to any program or other work which contains
-a notice placed by the copyright holder saying it may be distributed
-under the terms of this General Public License. The ``Program'', below,
-refers to any such program or work, and a ``work based on the Program''
-means either the Program or any derivative work under copyright law:
-that is to say, a work containing the Program or a portion of it,
-either verbatim or with modifications and/or translated into another
-language. (Hereinafter, translation is included without limitation in
-the term ``modification''.) Each licensee is addressed as ``you''.
-
-Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope. The act of
-running the Program is not restricted, and the output from the Program
-is covered only if its contents constitute a work based on the
-Program (independent of having been made by running the Program).
-Whether that is true depends on what the Program does.
-
-@item
-You may copy and distribute verbatim copies of the Program's
-source code as you receive it, in any medium, provided that you
-conspicuously and appropriately publish on each copy an appropriate
-copyright notice and disclaimer of warranty; keep intact all the
-notices that refer to this License and to the absence of any warranty;
-and give any other recipients of the Program a copy of this License
-along with the Program.
-
-You may charge a fee for the physical act of transferring a copy, and
-you may at your option offer warranty protection in exchange for a fee.
-
-@item
-You may modify your copy or copies of the Program or any portion
-of it, thus forming a work based on the Program, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-@enumerate a
-@item
-You must cause the modified files to carry prominent notices
-stating that you changed the files and the date of any change.
-
-@item
-You must cause any work that you distribute or publish, that in
-whole or in part contains or is derived from the Program or any
-part thereof, to be licensed as a whole at no charge to all third
-parties under the terms of this License.
-
-@item
-If the modified program normally reads commands interactively
-when run, you must cause it, when started running for such
-interactive use in the most ordinary way, to print or display an
-announcement including an appropriate copyright notice and a
-notice that there is no warranty (or else, saying that you provide
-a warranty) and that users may redistribute the program under
-these conditions, and telling the user how to view a copy of this
-License. (Exception: if the Program itself is interactive but
-does not normally print such an announcement, your work based on
-the Program is not required to print an announcement.)
-@end enumerate
-
-These requirements apply to the modified work as a whole. If
-identifiable sections of that work are not derived from the Program,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works. But when you
-distribute the same sections as part of a whole which is a work based
-on the Program, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Program.
-
-In addition, mere aggregation of another work not based on the Program
-with the Program (or with a work based on the Program) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-@item
-You may copy and distribute the Program (or a work based on it,
-under Section 2) in object code or executable form under the terms of
-Sections 1 and 2 above provided that you also do one of the following:
-
-@enumerate a
-@item
-Accompany it with the complete corresponding machine-readable
-source code, which must be distributed under the terms of Sections
-1 and 2 above on a medium customarily used for software interchange; or,
-
-@item
-Accompany it with a written offer, valid for at least three
-years, to give any third party, for a charge no more than your
-cost of physically performing source distribution, a complete
-machine-readable copy of the corresponding source code, to be
-distributed under the terms of Sections 1 and 2 above on a medium
-customarily used for software interchange; or,
-
-@item
-Accompany it with the information you received as to the offer
-to distribute corresponding source code. (This alternative is
-allowed only for noncommercial distribution and only if you
-received the program in object code or executable form with such
-an offer, in accord with Subsection b above.)
-@end enumerate
-
-The source code for a work means the preferred form of the work for
-making modifications to it. For an executable work, complete source
-code means all the source code for all modules it contains, plus any
-associated interface definition files, plus the scripts used to
-control compilation and installation of the executable. However, as a
-special exception, the source code distributed need not include
-anything that is normally distributed (in either source or binary
-form) with the major components (compiler, kernel, and so on) of the
-operating system on which the executable runs, unless that component
-itself accompanies the executable.
-
-If distribution of executable or object code is made by offering
-access to copy from a designated place, then offering equivalent
-access to copy the source code from the same place counts as
-distribution of the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-@item
-You may not copy, modify, sublicense, or distribute the Program
-except as expressly provided under this License. Any attempt
-otherwise to copy, modify, sublicense or distribute the Program is
-void, and will automatically terminate your rights under this License.
-However, parties who have received copies, or rights, from you under
-this License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-@item
-You are not required to accept this License, since you have not
-signed it. However, nothing else grants you permission to modify or
-distribute the Program or its derivative works. These actions are
-prohibited by law if you do not accept this License. Therefore, by
-modifying or distributing the Program (or any work based on the
-Program), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Program or works based on it.
-
-@item
-Each time you redistribute the Program (or any work based on the
-Program), the recipient automatically receives a license from the
-original licensor to copy, distribute or modify the Program subject to
-these terms and conditions. You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties to
-this License.
-
-@item
-If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Program at all. For example, if a patent
-license would not permit royalty-free redistribution of the Program by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Program.
-
-If any portion of this section is held invalid or unenforceable under
-any particular circumstance, the balance of the section is intended to
-apply and the section as a whole is intended to apply in other
-circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system, which is
-implemented by public license practices. Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-@item
-If the distribution and/or use of the Program is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Program under this License
-may add an explicit geographical distribution limitation excluding
-those countries, so that distribution is permitted only in or among
-countries not thus excluded. In such case, this License incorporates
-the limitation as if written in the body of this License.
-
-@item
-The Free Software Foundation may publish revised and/or new versions
-of the General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
-Each version is given a distinguishing version number. If the Program
-specifies a version number of this License which applies to it and ``any
-later version'', you have the option of following the terms and conditions
-either of that version or of any later version published by the Free
-Software Foundation. If the Program does not specify a version number of
-this License, you may choose any version ever published by the Free Software
-Foundation.
-
-@item
-If you wish to incorporate parts of the Program into other free
-programs whose distribution conditions are different, write to the author
-to ask for permission. For software which is copyrighted by the Free
-Software Foundation, write to the Free Software Foundation; we sometimes
-make exceptions for this. Our decision will be guided by the two goals
-of preserving the free status of all derivatives of our free software and
-of promoting the sharing and reuse of software generally.
-
-@iftex
-@heading NO WARRANTY
-@end iftex
-@ifinfo
-@center NO WARRANTY
-@end ifinfo
-
-@item
-BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
-FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
-OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
-PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
-OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
-TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
-PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
-REPAIR OR CORRECTION.
-
-@item
-IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
-REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
-INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
-OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
-TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
-YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
-PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGES.
-@end enumerate
-
-@iftex
-@heading END OF TERMS AND CONDITIONS
-@end iftex
-@ifinfo
-@center END OF TERMS AND CONDITIONS
-@end ifinfo
-
-@page
-@unnumberedsec Appendix: How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least
-the ``copyright'' line and a pointer to where the full notice is found.
-
-@smallexample
-@var{one line to give the program's name and a brief idea of what it does.}
-Copyright (C) 19@var{yy} @var{name of author}
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-@end smallexample
-
-Also add information on how to contact you by electronic and paper mail.
-
-If the program is interactive, make it output a short notice like this
-when it starts in an interactive mode:
-
-@smallexample
-Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
-Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
-This is free software, and you are welcome to redistribute it
-under certain conditions; type `show c' for details.
-@end smallexample
-
-The hypothetical commands @samp{show w} and @samp{show c} should show
-the appropriate parts of the General Public License. Of course, the
-commands you use may be called something other than @samp{show w} and
-@samp{show c}; they could even be mouse-clicks or menu items---whatever
-suits your program.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a ``copyright disclaimer'' for the program, if
-necessary. Here is a sample; alter the names:
-
-@example
-Yoyodyne, Inc., hereby disclaims all copyright interest in the program
-`Gnomovision' (which makes passes at compilers) written by James Hacker.
-
-@var{signature of Ty Coon}, 1 April 1989
-Ty Coon, President of Vice
-@end example
-
-This General Public License does not permit incorporating your program into
-proprietary programs. If your program is a subroutine library, you may
-consider it more useful to permit linking proprietary applications with the
-library. If this is what you want to do, use the GNU Library General
-Public License instead of this License.
-
-
-@node Index, , Copying, Top
-@unnumbered Index
-
-@printindex cp
-
-@contents
-
-@bye
diff --git a/gnu/lib/libregex/test/TAGS b/gnu/lib/libregex/test/TAGS
deleted file mode 100644
index d3aad75..0000000
--- a/gnu/lib/libregex/test/TAGS
+++ /dev/null
@@ -1,373 +0,0 @@
-
-.././regex.c,4137
-#define AT_STRINGS_BEG(3078,98376
-#define AT_STRINGS_END(3079,98449
-#define AT_WORD_BOUNDARY(3093,99002
-#define BUF_PUSH(887,24995
-#define BUF_PUSH_2(895,25208
-#define BUF_PUSH_3(904,25437
-#define DEBUG_POP(2336,74614
-#define DEBUG_PRINT1(471,14296
-#define DEBUG_PRINT1(785,21263
-#define DEBUG_PRINT2(472,14342
-#define DEBUG_PRINT3(473,14398
-#define DEBUG_PRINT3(787,21316
-#define DEBUG_PRINT4(474,14462
-#define DEBUG_PRINT_COMPILED_PATTERN(475,14534
-#define DEBUG_PRINT_COMPILED_PATTERN(789,21386
-#define DEBUG_PRINT_DOUBLE_STRING(477,14637
-#define DEBUG_PUSH(2338,74684
-#define DEBUG_STATEMENT(470,14267
-#define DOUBLE_FAIL_STACK(2299,73230
-#define EVER_MATCHED_SOMETHING(3028,96680
-#define EXTEND_BUFFER(941,26834
-#define EXTRACT_NUMBER(403,12499
-#define EXTRACT_NUMBER(422,12960
-#define EXTRACT_NUMBER_AND_INCR(430,13181
-#define EXTRACT_NUMBER_AND_INCR(448,13583
-#define FAIL_STACK_EMPTY(2271,72289
-#define FAIL_STACK_FULL(2273,72404
-#define FAIL_STACK_PTR_EMPTY(2272,72344
-#define FAIL_STACK_TOP(2274,72473
-#define FIRST_STRING_P(221,5848
-#define FREE_VAR(3100,99186
-#define FREE_VARIABLES(3101,99240
-#define FREE_VARIABLES(3116,99751
-#define GET_BUFFER_SPACE(882,24802
-#define GET_UNSIGNED_NUMBER(1017,29312
-#define INIT_FAIL_STACK(2279,72612
-#define INSERT_JUMP(923,26079
-#define INSERT_JUMP2(927,26236
-#define ISALNUM(147,3407
-#define ISALPHA(148,3455
-#define ISBLANK(135,3062
-#define ISBLANK(137,3116
-#define ISCNTRL(149,3503
-#define ISDIGIT(146,3359
-#define ISGRAPH(140,3185
-#define ISGRAPH(142,3239
-#define ISLOWER(150,3551
-#define ISPRINT(145,3311
-#define ISPUNCT(151,3599
-#define ISSPACE(152,3647
-#define ISUPPER(153,3695
-#define ISXDIGIT(154,3743
-#define IS_ACTIVE(3026,96578
-#define IS_CHAR_CLASS(1035,29793
-#define MATCHED_SOMETHING(3027,96621
-#define MAX(233,6292
-#define MIN(234,6334
-#define PATFETCH(852,23769
-#define PATFETCH_RAW(860,24020
-#define POINTER_TO_OFFSET(3050,97433
-#define POP_FAILURE_ITEM(2331,74426
-#define POP_FAILURE_POINT(2461,79538
-#define PREFETCH(3064,97916
-#define PUSH_FAILURE_ITEM(2327,74253
-#define PUSH_FAILURE_POINT(2352,75048
-#define PUSH_PATTERN_OP(2317,73841
-#define REGEX_REALLOCATE(185,4875
-#define REGEX_REALLOCATE(210,5495
-#define REGEX_TALLOC(227,6137
-#define REG_MATCH_NULL_STRING_P(3025,96511
-#define REG_UNSET(3055,97649
-#define RETALLOC(226,6058
-#define SET_LIST_BIT(1011,29089
-#define SET_REGS_MATCHED(3034,96936
-#define SIGN_EXTEND_CHAR(166,4109
-#define SIGN_EXTEND_CHAR(169,4217
-#define STORE_JUMP(915,25800
-#define STORE_JUMP2(919,25917
-#define STORE_NUMBER(384,11919
-#define STORE_NUMBER_AND_INCR(394,12242
-#define STREQ(231,6244
-#define SYNTAX(120,2790
-#define TALLOC(225,6003
-#define TRANSLATE(873,24503
-#define WORDCHAR_P(3086,98755
-alt_match_null_string_p 4466,149039
-#define assert(782,21217
-at_begline_loc_p 2131,67979
-at_endline_loc_p 2150,68557
-#define bcmp(54,1656
-bcmp_translate 4591,151831
-#define bcopy(57,1726
-typedef char boolean;236,6377
-#define bzero(60,1793
-common_op_match_null_string_p 4503,149895
-compile_range 2200,69997
-} compile_stack_elt_t;990,28602
-} compile_stack_type;998,28748
-extract_number 411,12714
-extract_number_and_incr 438,13370
-} fail_stack_type;2269,72269
-group_in_compile_stack 2172,69174
-group_match_null_string_p 4357,145267
-init_syntax_once 94,2365
-insert_op1 2091,67107
-insert_op2 2110,67475
-#define isascii(131,3018
-typedef int pattern_offset_t;981,28388
-print_compiled_pattern 726,19792
-print_double_string 753,20605
-print_fastmap 486,14835
-print_partial_compiled_pattern 518,15475
-re_comp 4650,153479
-re_compile_fastmap 2532,82428
-re_compile_pattern 4617,152520
-re_exec 4688,154373
-re_match 3136,100557
-re_match_2 3161,101399
-} re_opcode_t;378,11781
-re_search 2844,90872
-re_search_2 2877,91998
-re_set_registers 2817,90247
-re_set_syntax 808,22087
-regcomp 4736,155972
-regerror 4876,160188
-regex_compile 1062,30922
-regexec 4811,158371
-regfree 4920,161247
-} register_info_type;3023,96488
-typedef unsigned regnum_t;974,28172
-store_op1 2063,66535
-store_op2 2076,66768
-typedef const unsigned 2262,72103
-
-.././regex.h,230
-#define _RE_ARGS(394,14981
-#define _RE_ARGS(398,15036
-} reg_errcode_t;270,10874
-typedef unsigned reg_syntax_t;38,1503
-typedef struct re_pattern_buffer regex_t;346,13556
-} regmatch_t;382,14634
-typedef int regoff_t;354,13814
-
-getpagesize.h,84
-#define getpagesize(12,137
-#define getpagesize(15,191
-#define getpagesize(20,302
-
-test.h,436
-#define BRACES_TO_OPS(107,3169
-#define INVALID_PATTERN(110,3328
-#define MATCH_SELF(114,3429
-#define PARENS_TO_OPS(108,3248
-#define SAFE_STRLEN(14,201
-#define TEST_POSITIONED_MATCH(116,3470
-#define TEST_REGISTERS(104,3011
-#define TEST_REGISTERS_2(97,2703
-#define TEST_SEARCH(127,3875
-#define TEST_SEARCH_2(123,3720
-#define TEST_TRUNCATED_MATCH(120,3608
-typedef enum { false = 0, true = 1 } boolean;16,255
-} test_type;33,572
-
-alloca.c,128
-alloca 141,3996
-find_stack_direction 85,2553
-} header;127,3538
-typedef void *pointer;51,1721
-typedef char *pointer;53,1778
-
-bsd-interf.c,51
-test_berk_search 8,106
-test_bsd_interface 33,738
-
-debugmalloc.c,395
-#define TRACE(8,143
-#define TRACE1(9,197
-#define TRACE2(10,254
-#define TRACE3(11,319
-#define TRACE4(12,392
-#define USER_ALLOC(61,1440
-typedef char *address;15,480
-} *chunk;54,1225
-chunk_delete 115,2778
-chunk_insert 96,2294
-chunk_to_mem 79,1916
-free 261,5604
-free_list_available 175,3947
-malloc 203,4343
-mem_to_chunk 68,1703
-realloc 242,5309
-validate_list 153,3478
-xsbrk 21,545
-
-emacsmalloc.c,574
-#define ASSERT(178,5884
-#define ASSERT(181,5985
-#define CHAIN(166,5430
-#define bcmp(73,2821
-#define bcopy(72,2777
-#define bzero(74,2868
-calloc 603,15983
-free 484,13255
-get_lim_data 736,18517
-get_lim_data 752,18767
-get_lim_data 759,18860
-getpool 374,10263
-malloc 413,11133
-malloc_init 218,6863
-malloc_mem_free 707,17940
-malloc_mem_used 688,17683
-malloc_stats 663,17320
-malloc_usable_size 233,7147
-memalign 618,16164
-morecore 244,7380
-realloc 541,14424
-#define start_of_data(110,3486
-#define start_of_data(115,3546
-sys_sbrk 815,20804
-valloc 645,17031
-
-fileregex.c,13
-main 11,156
-
-g++malloc.c,1543
-#define UPDATE_STATS(33,1090
-#define UPDATE_STATS(35,1131
-static inline int aligned_OK(343,11189
-void* calloc(1039,28692
-void cfree(1048,28894
-static inline void* chunk2mem(619,19336
-#define clear_inuse(592,18767
-static inline void consollink(716,21398
-static void do_free_stats(544,18016
-static void do_malloc_stats(534,17741
-766,22304
-extern 762,22235
- for 1260,34165
-void free(1028,28553
-static inline void frontlink(732,21717
-static unsigned int gcd(557,18251
- if 1212,32427
- if 1216,32582
- if 1220,32737
- if 1224,32880
- if 1229,33094
- if 1233,33251
- if 1238,33463
- if 1242,33609
- if 1247,33739
-#define inuse(590,18680
-static inline unsigned int lcm(580,18540
-void* malloc(939,26370
-static mchunkptr malloc_find_space(858,24561
-void malloc_stats(1201,32256
-unsigned int malloc_usable_size(1054,28936
-static volatile void malloc_user_error(286,9757
-static void malloc_user_error(288,9804
-typedef struct malloc_bin* mbinptr;320,10636
-typedef struct malloc_chunk* mchunkptr;309,10247
-static inline mchunkptr mem2chunk(643,19759
-void* memalign(1118,30363
-#define next_chunk(600,18910
-#define prev_chunk(604,19023
-void* realloc(1071,29263
-static inline unsigned int request2size(335,10993
-mchunkptr sanity_check(628,19486
-#define set_inuse(591,18723
-static inline void set_size(609,19149
-static inline mbinptr size2bin(499,16914
-static inline void split(685,20463
-static 768,22312
-static inline void unlink(671,20263
-void* valloc(1194,32107
-typedef volatile void 760,22184
-764,22271
-
-iregex.c,54
-main 20,390
-print_regs 141,2638
-scanstring 87,1839
-
-main.c,13
-main 12,242
-
-malloc-test.c,112
-#define BITS_BLOCK(12,168
-#define BITS_MASK(13,228
-} bits_list_type;6,56
-init_bits_list 16,311
-main(32,621
-
-other.c,18
-test_others 6,96
-
-printchar.c,15
-printchar 2,5
-
-psx-basic.c,23
-test_posix_basic 7,84
-
-psx-extend.c,26
-test_posix_extended 7,88
-
-psx-generic.c,26
-test_posix_generic 8,117
-
-psx-group.c,20
-test_grouping 7,92
-
-psx-interf.c,416
-fill_pmatch 174,4802
-get_error_string 18,260
-init_pattern_buffer 49,1434
-test_compile 67,1925
-test_eflags 245,6876
-test_error_code_allocation 562,16619
-test_error_code_message 524,15247
-test_ignore_case 303,8525
-test_newline 330,9199
-test_nsub 117,3319
-test_pmatch 188,5121
-test_posix_interface 614,18719
-test_posix_match 359,9938
-test_regcomp 138,3725
-test_regerror 592,17621
-test_regexec 394,10783
-
-psx-interv.c,21
-test_intervals 6,93
-
-test.c,607
-#define SET_FASTMAP(447,13999
-#define bcmp(18,362
-#define bcopy(19,415
-#define bzero(20,473
-compile_and_print_pattern 666,19653
-concat 97,2673
-delimiters_to_ops 571,17477
-general_test 115,2996
-invalid_pattern 542,16821
-#define memcmp(26,611
-#define memcpy(27,660
-print_pattern_info 635,18998
-set_all_registers 58,1390
-test_all_registers 506,15567
-test_case_fold 682,19993
-test_fastmap 460,14363
-test_fastmap_search 474,14668
-test_match 776,22235
-test_match_2 766,22040
-test_match_n_times 715,20798
-test_search_return 408,13011
-valid_nonposix_pattern 646,19239
-valid_pattern 557,17182
-
-tregress.c,208
-#define SIMPLE_MATCH(74,1463
-#define SIMPLE_NONMATCH(75,1528
-do_match 78,1599
-itoa 10,199
-simple_compile 44,882
-simple_fail 21,353
-simple_fastmap 55,1115
-simple_search 100,2020
-test_regress 124,2513
-
-upcase.c,0
-
-xmalloc.c,14
-xmalloc 9,87
OpenPOWER on IntegriCloud