diff options
author | sjg <sjg@FreeBSD.org> | 2013-09-05 20:18:59 +0000 |
---|---|---|
committer | sjg <sjg@FreeBSD.org> | 2013-09-05 20:18:59 +0000 |
commit | 62bb1062226d3ce6a2350808256a25508978352d (patch) | |
tree | 22b131dceb13c3df96da594fbaadb693504797c7 /gnu | |
parent | 72ab90509b3a51ab361bf710338f2ef44a4e360d (diff) | |
parent | 04932445481c2cb89ff69a83b961bdef3d64757e (diff) | |
download | FreeBSD-src-62bb1062226d3ce6a2350808256a25508978352d.zip FreeBSD-src-62bb1062226d3ce6a2350808256a25508978352d.tar.gz |
Merge from head
Diffstat (limited to 'gnu')
77 files changed, 238 insertions, 17827 deletions
diff --git a/gnu/lib/libdialog/Makefile b/gnu/lib/libdialog/Makefile index f0979ac..395b02d 100644 --- a/gnu/lib/libdialog/Makefile +++ b/gnu/lib/libdialog/Makefile @@ -3,17 +3,17 @@ DIALOG= ${.CURDIR}/../../../contrib/dialog LIB= dialog -SHLIB_MAJOR= 7 -SRCS= argv.c arrows.c buttons.c calendar.c checklist.c columns.c \ - dlg_keys.c editbox.c fselect.c formbox.c guage.c help.c \ - inputbox.c inputstr.c menubox.c mixedform.c mixedgauge.c \ - mouse.c mousewget.c msgbox.c pause.c prgbox.c progressbox.c \ - rc.c tailbox.c textbox.c timebox.c trace.c ui_getc.c util.c \ - version.c yesno.c +SHLIB_MAJOR= 8 +SRCS= argv.c arrows.c buildlist.c buttons.c calendar.c checklist.c \ + columns.c dlg_keys.c editbox.c fselect.c formbox.c guage.c \ + help.c inputbox.c inputstr.c menubox.c mixedform.c \ + mixedgauge.c mouse.c mousewget.c msgbox.c pause.c prgbox.c \ + progressbox.c rangebox.c rc.c tailbox.c textbox.c timebox.c \ + trace.c treeview.c ui_getc.c util.c version.c yesno.c INCS= dialog.h dlg_colors.h dlg_config.h dlg_keys.h MAN= dialog.3 -CFLAGS+= -I${.CURDIR} -I${DIALOG} -D_XOPEN_SOURCE_EXTENDED +CFLAGS+= -I${.CURDIR} -I${DIALOG} -D_XOPEN_SOURCE_EXTENDED -DGCC_UNUSED=__unused .PATH: ${DIALOG} WARNS?= 1 diff --git a/gnu/lib/libdialog/dlg_config.h b/gnu/lib/libdialog/dlg_config.h index 4dd6ab4..ccb4c30 100644 --- a/gnu/lib/libdialog/dlg_config.h +++ b/gnu/lib/libdialog/dlg_config.h @@ -5,9 +5,12 @@ * $FreeBSD$ */ -#define DIALOG_PATCHDATE 20100428 -#define DIALOG_VERSION "1.1" +#define CURSES_WACS_ARRAY _nc_wacs +#define CURSES_WACS_SYMBOLS 1 +#define DIALOG_PATCHDATE 20130523 +#define DIALOG_VERSION "1.2" #define HAVE_ALLOCA 1 +#define HAVE_BTOWC 1 #define HAVE_COLOR 1 #define HAVE_DIRENT_H 1 #define HAVE_DLG_FORMBOX 1 @@ -18,6 +21,7 @@ #define HAVE_FEOF_UNLOCKED 1 #define HAVE_FLUSHINP 1 #define HAVE_FSEEKO 1 +#define HAVE_GETATTRS 1 #define HAVE_GETBEGX 1 #define HAVE_GETBEGY 1 #define HAVE_GETBEGYX 1 @@ -42,7 +46,11 @@ #define HAVE_LIBNCURSESW 1 #define HAVE_LIMITS_H 1 #define HAVE_LOCALE_H 1 +#define HAVE_MBLEN 1 +#define HAVE_MBRLEN 1 +#define HAVE_MBRTOWC 1 #define HAVE_MBSTATE_T 1 +#define HAVE_MBTOWC 1 #define HAVE_MEMORY_H 1 #define HAVE_MIXEDGAUGE 1 #define HAVE_MMAP 1 @@ -75,8 +83,17 @@ #define HAVE_UNISTD_H 1 #define HAVE_USE_DEFAULT_COLORS 1 #define HAVE_WAITPID 1 -#define HAVE_WGET_WCH 1 +#define HAVE_WCHGAT 1 +#define HAVE_WCSRTOMBS 1 +#define HAVE_WCSTOMBS 1 +#define HAVE_WCTOB 1 +#define HAVE_WCTOMB 1 +#define HAVE_WCURSYNCUP 1 +#define HAVE_WGETPARENT 1 +#define HAVE_WHIPTAIL 1 +#define HAVE_WSYNCUP 1 #define HAVE_XDIALOG 1 +#define HAVE_XDIALOG2 1 #define HAVE__NC_FREE_AND_EXIT 1 #define ICONV_CONST const #define MIXEDCASE_FILENAMES 1 @@ -85,7 +102,7 @@ #define PACKAGE "dialog" #define RETSIGTYPE void #define STDC_HEADERS 1 -#define SYSTEM_NAME "freebsd9.0" +#define SYSTEM_NAME "freebsd10.0" #define TIME_WITH_SYS_TIME 1 #define TYPE_CHTYPE_IS_SCALAR 1 #define USE_WIDE_CURSES 1 diff --git a/gnu/lib/libgcc/Makefile b/gnu/lib/libgcc/Makefile index a06daa7..092e321 100644 --- a/gnu/lib/libgcc/Makefile +++ b/gnu/lib/libgcc/Makefile @@ -19,10 +19,6 @@ MK_SSP= no CFLAGS+= -DTARGET_ARM_EABI .endif -.if ${TARGET_CPUARCH} == "mips" -LIB= gcc -.endif - .PATH: ${GCCDIR}/config/${GCC_CPU} ${GCCDIR}/config ${GCCDIR} CFLAGS+= -DIN_GCC -DIN_LIBGCC2 -D__GCC_FLOAT_NOT_NEEDED \ diff --git a/gnu/lib/libregex/FREEBSD-upgrade b/gnu/lib/libregex/FREEBSD-upgrade deleted file mode 100644 index d072aaf..0000000 --- a/gnu/lib/libregex/FREEBSD-upgrade +++ /dev/null @@ -1,18 +0,0 @@ -$FreeBSD$ - -GNU regex (from glibc): - -Imported by: - -cvs -z 9 -d :pserver:anoncvs@sources.redhat.com:/cvs/glibc login -# enter "anoncvs" as the password -cvs -z 9 -d :pserver:anoncvs@sources.redhat.com:/cvs/glibc \ - co -r fedora-glibc-2_3_4-21 libc/posix libc/include -mkdir regex regex/posix -cd libc/posix -cp regcomp.c regex.c regex_internal.c regex_internal.h regexec.c $OLDPWD/regex/ -cp regex.h $OLDPWD/regex/posix/ -cd - -cp libc/include/regex.h regex/ -cd regex -cvs import src/gnu/lib/libregex FSF fedora-glibc-2_3_4-21 diff --git a/gnu/lib/libregex/Makefile b/gnu/lib/libregex/Makefile index 39692d4..db87416 100644 --- a/gnu/lib/libregex/Makefile +++ b/gnu/lib/libregex/Makefile @@ -5,16 +5,20 @@ SUBDIR= doc LIB= gnuregex SHLIB_MAJOR= 5 +REGEXDIR= ${.CURDIR}/../../../contrib/libgnuregex +.PATH: ${REGEXDIR} + SRCS= gnuregex.c INCSGROUPS= INCS WRINCS PXINCS INCS= regex.h.patched INCSNAME= regex.h INCSDIR= ${INCLUDEDIR}/gnu WRINCS= gnuregex.h -PXINCS= posix/regex.h +PXINCS= ${REGEXDIR}/regex.h PXINCSDIR= ${INCSDIR}/posix -CFLAGS+=-DHAVE_CONFIG_H -I${.CURDIR} +CFLAGS+= -D__attribute_warn_unused_result__="" +CFLAGS+= -DHAVE_CONFIG_H -I${.CURDIR} -I${REGEXDIR} CLEANFILES= regex.h.patched gnuregex.c regex.h.patched: regex.h diff --git a/gnu/lib/libregex/config.h b/gnu/lib/libregex/config.h index bf7f0a0..0076f3d 100644 --- a/gnu/lib/libregex/config.h +++ b/gnu/lib/libregex/config.h @@ -10,3 +10,6 @@ #define HAVE_WCRTOMB 1 #define HAVE_MBRTOWC 1 #define HAVE_WCSCOLL 1 +#define HAVE_ALLOCA 1 +#define HAVE_STDBOOL_H 1 +#define HAVE_STDINT_H 1 diff --git a/gnu/lib/libregex/posix/regex.h b/gnu/lib/libregex/posix/regex.h deleted file mode 100644 index b2d9a62..0000000 --- a/gnu/lib/libregex/posix/regex.h +++ /dev/null @@ -1,593 +0,0 @@ -/* Definitions for data structures and routines for the regular - expression library. - Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _REGEX_H -#define _REGEX_H 1 - -#include <sys/types.h> - -/* Allow the use in C++ code. */ -#ifdef __cplusplus -extern "C" { -#endif - -/* POSIX says that <sys/types.h> must be included (by the caller) before - <regex.h>. */ - -#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS -/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it - should be there. */ -# include <stddef.h> -#endif - -/* The following two types have to be signed and unsigned integer type - wide enough to hold a value of a pointer. For most ANSI compilers - ptrdiff_t and size_t should be likely OK. Still size of these two - types is 2 for Microsoft C. Ugh... */ -typedef long int s_reg_t; -typedef unsigned long int active_reg_t; - -/* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings are chosen so that Emacs syntax - remains the value 0. The bits are given in alphabetical order, and - the definitions shifted by one from the previous bit; thus, when we - add or remove a bit, only one other definition need change. */ -typedef unsigned long int reg_syntax_t; - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) - -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) - -/* If this bit is set, then character classes are supported. They are: - [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], - [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. - If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) - -/* If this bit is set, then ^ and $ are always anchors (outside bracket - expressions, of course). - If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. - - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because - POSIX draft 11.2 says that * etc. in leading positions is undefined. - We already implemented a previous draft which made those constructs - invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) - -/* If this bit is set, then . matches newline. - If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) - -/* If this bit is set, then . doesn't match NUL. - If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) - -/* If this bit is set, nonmatching lists [^...] do not match newline. - If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) - -/* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -/* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) - -/* If this bit is set, newline is an alternation operator. - If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) - -/* If this bit is set, then `{...}' defines an interval, and \{ and \} - are literals. - If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) - -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) - -/* If this bit is set, then \<digit> matches <digit>. - If not set, then \<digit> is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) - -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) - -/* If this bit is set, then an ending range point collating higher - than the starting range point, as in [z-a], is invalid. - If not set, then when ending range point collates higher than the - starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) - -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - -/* If this bit is set, succeed as soon as we match the whole pattern, - without further backtracking. */ -#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) - -/* If this bit is set, do not process the GNU regex operators. - If not set, then the GNU regex operators are recognized. */ -#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) - -/* If this bit is set, turn on internal regex debugging. - If not set, and debugging was on, turn it off. - This only works if regex.c is compiled -DDEBUG. - We define this bit always, so that all that's needed to turn on - debugging is to recompile regex.c; the calling code can always have - this bit set, and it won't affect anything in the normal case. */ -#define RE_DEBUG (RE_NO_GNU_OPS << 1) - -/* If this bit is set, a syntactically invalid interval is treated as - a string of ordinary characters. For example, the ERE 'a{1' is - treated as 'a\{1'. */ -#define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) - -/* This bit is used internally like RE_CONTEXT_INDEP_ANCHORS but only - for ^, because it is difficult to scan the regex backwards to find - whether ^ should be special. */ -#define RE_CARET_ANCHORS_HERE (RE_ICASE << 1) - -/* If this bit is set, then \{ cannot be first in an bre or - immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_DUP (RE_CARET_ANCHORS_HERE << 1) - -/* If this bit is set, then no_sub will be set to 1 during - re_compile_pattern. */ -#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1) - -/* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect - already-compiled regexps. */ -extern reg_syntax_t re_syntax_options; - -/* Define combinations of the above bits for the standard possibilities. - (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ -/* [[[begin syntaxes]]] */ -#define RE_SYNTAX_EMACS 0 - -#define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ - | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) - -#define RE_SYNTAX_GNU_AWK \ - ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ - & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS \ - | RE_CONTEXT_INVALID_OPS )) - -#define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ - | RE_INTERVALS | RE_NO_GNU_OPS) - -#define RE_SYNTAX_GREP \ - (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ - | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ - | RE_NEWLINE_ALT) - -#define RE_SYNTAX_EGREP \ - (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ - | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ - | RE_NO_BK_VBAR) - -#define RE_SYNTAX_POSIX_EGREP \ - (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES \ - | RE_INVALID_INTERVAL_ORD) - -/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ -#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC - -#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC - -/* Syntax bits common to both basic and extended POSIX regex syntax. */ -#define _RE_SYNTAX_POSIX_COMMON \ - (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ - | RE_INTERVALS | RE_NO_EMPTY_RANGES) - -#define RE_SYNTAX_POSIX_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM | RE_CONTEXT_INVALID_DUP) - -/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes - RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this - isn't minimal, since other operators, such as \`, aren't disabled. */ -#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ - (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) - -#define RE_SYNTAX_POSIX_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ - | RE_CONTEXT_INVALID_OPS | RE_UNMATCHED_RIGHT_PAREN_ORD) - -/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INDEP_OPS is - removed and RE_NO_BK_REFS is added. */ -#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ - (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ - | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) -/* [[[end syntaxes]]] */ - -/* Maximum number of duplicates an interval can allow. Some systems - (erroneously) define this in other header files, but we want our - value, so remove any previous define. */ -#ifdef RE_DUP_MAX -# undef RE_DUP_MAX -#endif -/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ -#define RE_DUP_MAX (0x7fff) - - -/* POSIX `cflags' bits (i.e., information for `regcomp'). */ - -/* If this bit is set, then use extended regular expression syntax. - If not set, then use basic regular expression syntax. */ -#define REG_EXTENDED 1 - -/* If this bit is set, then ignore case when matching. - If not set, then case is significant. */ -#define REG_ICASE (REG_EXTENDED << 1) - -/* If this bit is set, then anchors do not match at newline - characters in the string. - If not set, then anchors do match at newlines. */ -#define REG_NEWLINE (REG_ICASE << 1) - -/* If this bit is set, then report only success or fail in regexec. - If not set, then returns differ between not matching and errors. */ -#define REG_NOSUB (REG_NEWLINE << 1) - - -/* POSIX `eflags' bits (i.e., information for regexec). */ - -/* If this bit is set, then the beginning-of-line operator doesn't match - the beginning of the string (presumably because it's not the - beginning of a line). - If not set, then the beginning-of-line operator does match the - beginning of the string. */ -#define REG_NOTBOL 1 - -/* Like REG_NOTBOL, except for the end-of-line. */ -#define REG_NOTEOL (1 << 1) - -/* Use PMATCH[0] to delimit the start and end of the search in the - buffer. */ -#define REG_STARTEND (1 << 2) - - -/* If any error codes are removed, changed, or added, update the - `re_error_msg' table in regex.c. */ -typedef enum -{ -#ifdef _XOPEN_SOURCE - REG_ENOSYS = -1, /* This will never happen for this implementation. */ -#endif - - REG_NOERROR = 0, /* Success. */ - REG_NOMATCH, /* Didn't find a match (for regexec). */ - - /* POSIX regcomp return error codes. (In the order listed in the - standard.) */ - REG_BADPAT, /* Invalid pattern. */ - REG_ECOLLATE, /* Inalid collating element. */ - REG_ECTYPE, /* Invalid character class name. */ - REG_EESCAPE, /* Trailing backslash. */ - REG_ESUBREG, /* Invalid back reference. */ - REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ - REG_EBRACE, /* Unmatched \{. */ - REG_BADBR, /* Invalid contents of \{\}. */ - REG_ERANGE, /* Invalid range end. */ - REG_ESPACE, /* Ran out of memory. */ - REG_BADRPT, /* No preceding re for repetition op. */ - - /* Error codes we've added. */ - REG_EEND, /* Premature end. */ - REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ - REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ -} reg_errcode_t; - -/* This data structure represents a compiled pattern. Before calling - the pattern compiler, the fields `buffer', `allocated', `fastmap', - `translate', and `no_sub' can be set. After the pattern has been - compiled, the `re_nsub' field is available. All other fields are - private to the regex routines. */ - -#ifndef RE_TRANSLATE_TYPE -# define RE_TRANSLATE_TYPE char * -#endif - -struct re_pattern_buffer -{ -/* [[[begin pattern_buffer]]] */ - /* Space that holds the compiled pattern. It is declared as - `unsigned char *' because its elements are - sometimes used as array indexes. */ - unsigned char *buffer; - - /* Number of bytes to which `buffer' points. */ - unsigned long int allocated; - - /* Number of bytes actually used in `buffer'. */ - unsigned long int used; - - /* Syntax setting with which the pattern was compiled. */ - reg_syntax_t syntax; - - /* Pointer to a fastmap, if any, otherwise zero. re_search uses - the fastmap, if there is one, to skip over impossible - starting points for matches. */ - char *fastmap; - - /* Either a translate table to apply to all characters before - comparing them, or zero for no translation. The translation - is applied to a pattern when it is compiled and to a string - when it is matched. */ - RE_TRANSLATE_TYPE translate; - - /* Number of subexpressions found by the compiler. */ - size_t re_nsub; - - /* Zero if this pattern cannot match the empty string, one else. - Well, in truth it's used only in `re_search_2', to see - whether or not we should use the fastmap, so we don't set - this absolutely perfectly; see `re_compile_fastmap' (the - `duplicate' case). */ - unsigned can_be_null : 1; - - /* If REGS_UNALLOCATED, allocate space in the `regs' structure - for `max (RE_NREGS, re_nsub + 1)' groups. - If REGS_REALLOCATE, reallocate space if necessary. - If REGS_FIXED, use what's there. */ -#define REGS_UNALLOCATED 0 -#define REGS_REALLOCATE 1 -#define REGS_FIXED 2 - unsigned regs_allocated : 2; - - /* Set to zero when `regex_compile' compiles a pattern; set to one - by `re_compile_fastmap' if it updates the fastmap. */ - unsigned fastmap_accurate : 1; - - /* If set, `re_match_2' does not return information about - subexpressions. */ - unsigned no_sub : 1; - - /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ - unsigned not_bol : 1; - - /* Similarly for an end-of-line anchor. */ - unsigned not_eol : 1; - - /* If true, an anchor at a newline matches. */ - unsigned newline_anchor : 1; - -/* [[[end pattern_buffer]]] */ -}; - -typedef struct re_pattern_buffer regex_t; - -/* Type for byte offsets within the string. POSIX mandates this. */ -typedef int regoff_t; - - -/* This is the structure we store register match data in. See - regex.texinfo for a full description of what registers match. */ -struct re_registers -{ - unsigned num_regs; - regoff_t *start; - regoff_t *end; -}; - - -/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, - `re_match_2' returns information about at least this many registers - the first time a `regs' structure is passed. */ -#ifndef RE_NREGS -# define RE_NREGS 30 -#endif - - -/* POSIX specification for registers. Aside from the different names than - `re_registers', POSIX uses an array of structures, instead of a - structure of arrays. */ -typedef struct -{ - regoff_t rm_so; /* Byte offset from string's start to substring's start. */ - regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ -} regmatch_t; - -/* Declarations for routines. */ - -/* To avoid duplicating every routine declaration -- once with a - prototype (if we are ANSI), and once without (if we aren't) -- we - use the following macro to declare argument types. This - unfortunately clutters up the declarations a bit, but I think it's - worth it. */ - -#if __STDC__ - -# define _RE_ARGS(args) args - -#else /* not __STDC__ */ - -# define _RE_ARGS(args) () - -#endif /* not __STDC__ */ - -/* Sets the current default syntax to SYNTAX, and return the old syntax. - You can also simply assign to the `re_syntax_options' variable. */ -extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); - -/* Compile the regular expression PATTERN, with length LENGTH - and syntax given by the global `re_syntax_options', into the buffer - BUFFER. Return NULL if successful, and an error string if not. */ -extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, size_t length, - struct re_pattern_buffer *buffer)); - - -/* Compile a fastmap for the compiled pattern in BUFFER; used to - accelerate searches. Return 0 if successful and -2 if was an - internal error. */ -extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); - - -/* Search in the string STRING (with length LENGTH) for the pattern - compiled into BUFFER. Start searching at position START, for RANGE - characters. Return the starting position of the match, -1 for no - match, or -2 for an internal error. Also return register - information in REGS (if REGS and BUFFER->no_sub are nonzero). */ -extern int re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); - - -/* Like `re_search', but search in the concatenation of STRING1 and - STRING2. Also, stop searching at index START + STOP. */ -extern int re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); - - -/* Like `re_search', but return how many characters in STRING the regexp - in BUFFER matched, starting at position START. */ -extern int re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); - - -/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); - - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using BUFFER and REGS will use this memory - for recording register information. STARTS and ENDS must be - allocated with malloc, and must each be at least `NUM_REGS * sizeof - (regoff_t)' bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ -extern void re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); - -#if defined _REGEX_RE_COMP || defined _LIBC -# ifndef _CRAY -/* 4.2 bsd compatibility. */ -extern char *re_comp _RE_ARGS ((const char *)); -extern int re_exec _RE_ARGS ((const char *)); -# endif -#endif - -/* GCC 2.95 and later have "__restrict"; C99 compilers have - "restrict", and "configure" may have defined "restrict". */ -#ifndef __restrict -# if ! (2 < __GNUC__ || (2 == __GNUC__ && 95 <= __GNUC_MINOR__)) -# if defined restrict || 199901L <= __STDC_VERSION__ -# define __restrict restrict -# else -# define __restrict -# endif -# endif -#endif -/* gcc 3.1 and up support the [restrict] syntax. */ -#ifndef __restrict_arr -# if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1) -# define __restrict_arr __restrict -# else -# define __restrict_arr -# endif -#endif - -/* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *__restrict __preg, - const char *__restrict __pattern, - int __cflags)); - -extern int regexec _RE_ARGS ((const regex_t *__restrict __preg, - const char *__restrict __string, size_t __nmatch, - regmatch_t __pmatch[__restrict_arr], - int __eflags)); - -extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, - char *__errbuf, size_t __errbuf_size)); - -extern void regfree _RE_ARGS ((regex_t *__preg)); - - -#ifdef __cplusplus -} -#endif /* C++ */ - -#endif /* regex.h */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/lib/libregex/regcomp.c b/gnu/lib/libregex/regcomp.c deleted file mode 100644 index 68e2bda..0000000 --- a/gnu/lib/libregex/regcomp.c +++ /dev/null @@ -1,3924 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern, - int length, reg_syntax_t syntax); -static void re_compile_fastmap_iter (regex_t *bufp, - const re_dfastate_t *init_state, - char *fastmap); -static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len); -static void init_word_char (re_dfa_t *dfa); -#ifdef RE_ENABLE_I18N -static void free_charset (re_charset_t *cset); -#endif /* RE_ENABLE_I18N */ -static void free_workarea_compile (regex_t *preg); -static reg_errcode_t create_initial_state (re_dfa_t *dfa); -#ifdef RE_ENABLE_I18N -static void optimize_utf8 (re_dfa_t *dfa); -#endif -static reg_errcode_t analyze (regex_t *preg); -static reg_errcode_t create_initial_state (re_dfa_t *dfa); -static reg_errcode_t preorder (bin_tree_t *root, - reg_errcode_t (fn (void *, bin_tree_t *)), - void *extra); -static reg_errcode_t postorder (bin_tree_t *root, - reg_errcode_t (fn (void *, bin_tree_t *)), - void *extra); -static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node); -static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node); -static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg, - bin_tree_t *node); -static reg_errcode_t calc_first (void *extra, bin_tree_t *node); -static reg_errcode_t calc_next (void *extra, bin_tree_t *node); -static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node); -static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node, - int top_clone_node, int root_node, - unsigned int constraint); -static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx, - unsigned int constraint); -static int search_duplicated_node (re_dfa_t *dfa, int org_node, - unsigned int constraint); -static reg_errcode_t calc_eclosure (re_dfa_t *dfa); -static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, - int node, int root); -static reg_errcode_t calc_inveclosure (re_dfa_t *dfa); -static int fetch_number (re_string_t *input, re_token_t *token, - reg_syntax_t syntax); -static void fetch_token (re_token_t *result, re_string_t *input, - reg_syntax_t syntax); -static int peek_token (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); -static int peek_token_bracket (re_token_t *token, re_string_t *input, - reg_syntax_t syntax); -static bin_tree_t *parse (re_string_t *regexp, regex_t *preg, - reg_syntax_t syntax, reg_errcode_t *err); -static bin_tree_t *parse_reg_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_branch (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_expression (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_sub_exp (re_string_t *regexp, regex_t *preg, - re_token_t *token, reg_syntax_t syntax, - int nest, reg_errcode_t *err); -static bin_tree_t *parse_dup_op (bin_tree_t *dup_elem, re_string_t *regexp, - re_dfa_t *dfa, re_token_t *token, - reg_syntax_t syntax, reg_errcode_t *err); -static bin_tree_t *parse_bracket_exp (re_string_t *regexp, re_dfa_t *dfa, - re_token_t *token, reg_syntax_t syntax, - reg_errcode_t *err); -static reg_errcode_t parse_bracket_element (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token, int token_len, - re_dfa_t *dfa, - reg_syntax_t syntax, - int accept_hyphen); -static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem, - re_string_t *regexp, - re_token_t *token); -#ifndef _LIBC -# ifdef RE_ENABLE_I18N -static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, int *range_alloc, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *coll_sym_alloc, - const unsigned char *name); -# else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_range_exp (re_bitset_ptr_t sbcset, - bracket_elem_t *start_elem, - bracket_elem_t *end_elem); -static reg_errcode_t build_collating_symbol (re_bitset_ptr_t sbcset, - const unsigned char *name); -# endif /* not RE_ENABLE_I18N */ -#endif /* not _LIBC */ -#ifdef RE_ENABLE_I18N -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *equiv_class_alloc, - const unsigned char *name); -static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, - re_charset_t *mbcset, - int *char_class_alloc, - const unsigned char *class_name, - reg_syntax_t syntax); -#else /* not RE_ENABLE_I18N */ -static reg_errcode_t build_equiv_class (re_bitset_ptr_t sbcset, - const unsigned char *name); -static reg_errcode_t build_charclass (unsigned RE_TRANSLATE_TYPE trans, - re_bitset_ptr_t sbcset, - const unsigned char *class_name, - reg_syntax_t syntax); -#endif /* not RE_ENABLE_I18N */ -static bin_tree_t *build_charclass_op (re_dfa_t *dfa, - unsigned RE_TRANSLATE_TYPE trans, - const unsigned char *class_name, - const unsigned char *extra, - int non_match, reg_errcode_t *err); -static bin_tree_t *create_tree (re_dfa_t *dfa, - bin_tree_t *left, bin_tree_t *right, - re_token_type_t type); -static bin_tree_t *create_token_tree (re_dfa_t *dfa, - bin_tree_t *left, bin_tree_t *right, - const re_token_t *token); -static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa); -static void free_token (re_token_t *node); -static reg_errcode_t free_tree (void *extra, bin_tree_t *node); -static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node); - -/* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. - POSIX doesn't require that we do anything for REG_NOERROR, - but why not be nice? */ - -const char __re_error_msgid[] attribute_hidden = - { -#define REG_NOERROR_IDX 0 - gettext_noop ("Success") /* REG_NOERROR */ - "\0" -#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") - gettext_noop ("No match") /* REG_NOMATCH */ - "\0" -#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") - gettext_noop ("Invalid regular expression") /* REG_BADPAT */ - "\0" -#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") - gettext_noop ("Invalid collation character") /* REG_ECOLLATE */ - "\0" -#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") - gettext_noop ("Invalid character class name") /* REG_ECTYPE */ - "\0" -#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") - gettext_noop ("Trailing backslash") /* REG_EESCAPE */ - "\0" -#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") - gettext_noop ("Invalid back reference") /* REG_ESUBREG */ - "\0" -#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") - gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */ - "\0" -#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") - gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */ - "\0" -#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") - gettext_noop ("Unmatched \\{") /* REG_EBRACE */ - "\0" -#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") - gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */ - "\0" -#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") - gettext_noop ("Invalid range end") /* REG_ERANGE */ - "\0" -#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") - gettext_noop ("Memory exhausted") /* REG_ESPACE */ - "\0" -#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") - gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */ - "\0" -#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") - gettext_noop ("Premature end of regular expression") /* REG_EEND */ - "\0" -#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") - gettext_noop ("Regular expression too big") /* REG_ESIZE */ - "\0" -#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") - gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */ - }; - -const size_t __re_error_msgid_idx[] attribute_hidden = - { - REG_NOERROR_IDX, - REG_NOMATCH_IDX, - REG_BADPAT_IDX, - REG_ECOLLATE_IDX, - REG_ECTYPE_IDX, - REG_EESCAPE_IDX, - REG_ESUBREG_IDX, - REG_EBRACK_IDX, - REG_EPAREN_IDX, - REG_EBRACE_IDX, - REG_BADBR_IDX, - REG_ERANGE_IDX, - REG_ESPACE_IDX, - REG_BADRPT_IDX, - REG_EEND_IDX, - REG_ESIZE_IDX, - REG_ERPAREN_IDX - }; - -/* Entry points for GNU code. */ - -/* re_compile_pattern is the GNU regular expression compiler: it - compiles PATTERN (of length LENGTH) and puts the result in BUFP. - Returns 0 if the pattern was valid, otherwise an error string. - - Assumes the `allocated' (and perhaps `buffer') and `translate' fields - are set in BUFP on entry. */ - -const char * -re_compile_pattern (pattern, length, bufp) - const char *pattern; - size_t length; - struct re_pattern_buffer *bufp; -{ - reg_errcode_t ret; - - /* And GNU code determines whether or not to get register information - by passing null for the REGS argument to re_match, etc., not by - setting no_sub, unless RE_NO_SUB is set. */ - bufp->no_sub = !!(re_syntax_options & RE_NO_SUB); - - /* Match anchors at newline. */ - bufp->newline_anchor = 1; - - ret = re_compile_internal (bufp, pattern, length, re_syntax_options); - - if (!ret) - return NULL; - return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); -} -#ifdef _LIBC -weak_alias (__re_compile_pattern, re_compile_pattern) -#endif - -/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can - also be assigned to arbitrarily: each pattern buffer stores its own - syntax, so it can be changed between regex compilations. */ -/* This has no initializer because initialized variables in Emacs - become read-only after dumping. */ -reg_syntax_t re_syntax_options; - - -/* Specify the precise syntax of regexps for compilation. This provides - for compatibility for various utilities which historically have - different, incompatible syntaxes. - - The argument SYNTAX is a bit mask comprised of the various bits - defined in regex.h. We return the old syntax. */ - -reg_syntax_t -re_set_syntax (syntax) - reg_syntax_t syntax; -{ - reg_syntax_t ret = re_syntax_options; - - re_syntax_options = syntax; - return ret; -} -#ifdef _LIBC -weak_alias (__re_set_syntax, re_set_syntax) -#endif - -int -re_compile_fastmap (bufp) - struct re_pattern_buffer *bufp; -{ - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; - char *fastmap = bufp->fastmap; - - memset (fastmap, '\0', sizeof (char) * SBC_MAX); - re_compile_fastmap_iter (bufp, dfa->init_state, fastmap); - if (dfa->init_state != dfa->init_state_word) - re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap); - if (dfa->init_state != dfa->init_state_nl) - re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap); - if (dfa->init_state != dfa->init_state_begbuf) - re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap); - bufp->fastmap_accurate = 1; - return 0; -} -#ifdef _LIBC -weak_alias (__re_compile_fastmap, re_compile_fastmap) -#endif - -static inline void -__attribute ((always_inline)) -re_set_fastmap (char *fastmap, int icase, int ch) -{ - fastmap[ch] = 1; - if (icase) - fastmap[tolower (ch)] = 1; -} - -/* Helper function for re_compile_fastmap. - Compile fastmap for the initial_state INIT_STATE. */ - -static void -re_compile_fastmap_iter (bufp, init_state, fastmap) - regex_t *bufp; - const re_dfastate_t *init_state; - char *fastmap; -{ - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; - int node_cnt; - int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE)); - for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt) - { - int node = init_state->nodes.elems[node_cnt]; - re_token_type_t type = dfa->nodes[node].type; - - if (type == CHARACTER) - { - re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c); -#ifdef RE_ENABLE_I18N - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) - { - unsigned char *buf = alloca (dfa->mb_cur_max), *p; - wchar_t wc; - mbstate_t state; - - p = buf; - *p++ = dfa->nodes[node].opr.c; - while (++node < dfa->nodes_len - && dfa->nodes[node].type == CHARACTER - && dfa->nodes[node].mb_partial) - *p++ = dfa->nodes[node].opr.c; - memset (&state, 0, sizeof (state)); - if (mbrtowc (&wc, (const char *) buf, p - buf, - &state) == p - buf - && (__wcrtomb ((char *) buf, towlower (wc), &state) - != (size_t) -1)) - re_set_fastmap (fastmap, 0, buf[0]); - } -#endif - } - else if (type == SIMPLE_BRACKET) - { - int i, j, ch; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (dfa->nodes[node].opr.sbcset[i] & (1 << j)) - re_set_fastmap (fastmap, icase, ch); - } -#ifdef RE_ENABLE_I18N - else if (type == COMPLEX_BRACKET) - { - int i; - re_charset_t *cset = dfa->nodes[node].opr.mbcset; - if (cset->non_match || cset->ncoll_syms || cset->nequiv_classes - || cset->nranges || cset->nchar_classes) - { -# ifdef _LIBC - if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0) - { - /* In this case we want to catch the bytes which are - the first byte of any collation elements. - e.g. In da_DK, we want to catch 'a' since "aa" - is a valid collation element, and don't catch - 'b' since 'b' is the only collation element - which starts from 'b'. */ - int j, ch; - const int32_t *table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (table[ch] < 0) - re_set_fastmap (fastmap, icase, ch); - } -# else - if (dfa->mb_cur_max > 1) - for (i = 0; i < SBC_MAX; ++i) - if (__btowc (i) == WEOF) - re_set_fastmap (fastmap, icase, i); -# endif /* not _LIBC */ - } - for (i = 0; i < cset->nmbchars; ++i) - { - char buf[256]; - mbstate_t state; - memset (&state, '\0', sizeof (state)); - if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1) - re_set_fastmap (fastmap, icase, *(unsigned char *) buf); - if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1) - { - if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state) - != (size_t) -1) - re_set_fastmap (fastmap, 0, *(unsigned char *) buf); - } - } - } -#endif /* RE_ENABLE_I18N */ - else if (type == OP_PERIOD -#ifdef RE_ENABLE_I18N - || type == OP_UTF8_PERIOD -#endif /* RE_ENABLE_I18N */ - || type == END_OF_RE) - { - memset (fastmap, '\1', sizeof (char) * SBC_MAX); - if (type == END_OF_RE) - bufp->can_be_null = 1; - return; - } - } -} - -/* Entry point for POSIX code. */ -/* regcomp takes a regular expression as a string and compiles it. - - PREG is a regex_t *. We do not expect any fields to be initialized, - since POSIX says we shouldn't. Thus, we set - - `buffer' to the compiled pattern; - `used' to the length of the compiled pattern; - `syntax' to RE_SYNTAX_POSIX_EXTENDED if the - REG_EXTENDED bit in CFLAGS is set; otherwise, to - RE_SYNTAX_POSIX_BASIC; - `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' to an allocated space for the fastmap; - `fastmap_accurate' to zero; - `re_nsub' to the number of subexpressions in PATTERN. - - PATTERN is the address of the pattern string. - - CFLAGS is a series of bits which affect compilation. - - If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we - use POSIX basic syntax. - - If REG_NEWLINE is set, then . and [^...] don't match newline. - Also, regexec will try a match beginning after every newline. - - If REG_ICASE is set, then we considers upper- and lowercase - versions of letters to be equivalent when matching. - - If REG_NOSUB is set, then when PREG is passed to regexec, that - routine will report only success or failure, and nothing about the - registers. - - It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for - the return codes and their meanings.) */ - -int -regcomp (preg, pattern, cflags) - regex_t *__restrict preg; - const char *__restrict pattern; - int cflags; -{ - reg_errcode_t ret; - reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED - : RE_SYNTAX_POSIX_BASIC); - - preg->buffer = NULL; - preg->allocated = 0; - preg->used = 0; - - /* Try to allocate space for the fastmap. */ - preg->fastmap = re_malloc (char, SBC_MAX); - if (BE (preg->fastmap == NULL, 0)) - return REG_ESPACE; - - syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0; - - /* If REG_NEWLINE is set, newlines are treated differently. */ - if (cflags & REG_NEWLINE) - { /* REG_NEWLINE implies neither . nor [^...] match newline. */ - syntax &= ~RE_DOT_NEWLINE; - syntax |= RE_HAT_LISTS_NOT_NEWLINE; - /* It also changes the matching behavior. */ - preg->newline_anchor = 1; - } - else - preg->newline_anchor = 0; - preg->no_sub = !!(cflags & REG_NOSUB); - preg->translate = NULL; - - ret = re_compile_internal (preg, pattern, strlen (pattern), syntax); - - /* POSIX doesn't distinguish between an unmatched open-group and an - unmatched close-group: both are REG_EPAREN. */ - if (ret == REG_ERPAREN) - ret = REG_EPAREN; - - /* We have already checked preg->fastmap != NULL. */ - if (BE (ret == REG_NOERROR, 1)) - /* Compute the fastmap now, since regexec cannot modify the pattern - buffer. This function never fails in this implementation. */ - (void) re_compile_fastmap (preg); - else - { - /* Some error occurred while compiling the expression. */ - re_free (preg->fastmap); - preg->fastmap = NULL; - } - - return (int) ret; -} -#ifdef _LIBC -weak_alias (__regcomp, regcomp) -#endif - -/* Returns a message corresponding to an error code, ERRCODE, returned - from either regcomp or regexec. We don't use PREG here. */ - -size_t -regerror (errcode, preg, errbuf, errbuf_size) - int errcode; - const regex_t *preg; - char *errbuf; - size_t errbuf_size; -{ - const char *msg; - size_t msg_size; - - if (BE (errcode < 0 - || errcode >= (int) (sizeof (__re_error_msgid_idx) - / sizeof (__re_error_msgid_idx[0])), 0)) - /* Only error codes returned by the rest of the code should be passed - to this routine. If we are given anything else, or if other regex - code generates an invalid error code, then the program has a bug. - Dump core so we can fix it. */ - abort (); - - msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]); - - msg_size = strlen (msg) + 1; /* Includes the null. */ - - if (BE (errbuf_size != 0, 1)) - { - if (BE (msg_size > errbuf_size, 0)) - { -#if defined HAVE_MEMPCPY || defined _LIBC - *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; -#else - memcpy (errbuf, msg, errbuf_size - 1); - errbuf[errbuf_size - 1] = 0; -#endif - } - else - memcpy (errbuf, msg, msg_size); - } - - return msg_size; -} -#ifdef _LIBC -weak_alias (__regerror, regerror) -#endif - - -#ifdef RE_ENABLE_I18N -/* This static array is used for the map to single-byte characters when - UTF-8 is used. Otherwise we would allocate memory just to initialize - it the same all the time. UTF-8 is the preferred encoding so this is - a worthwhile optimization. */ -static const bitset utf8_sb_map = -{ - /* Set the first 128 bits. */ -# if UINT_MAX == 0xffffffff - 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff -# else -# error "Add case for new unsigned int size" -# endif -}; -#endif - - -static void -free_dfa_content (re_dfa_t *dfa) -{ - int i, j; - - if (dfa->nodes) - for (i = 0; i < dfa->nodes_len; ++i) - free_token (dfa->nodes + i); - re_free (dfa->nexts); - for (i = 0; i < dfa->nodes_len; ++i) - { - if (dfa->eclosures != NULL) - re_node_set_free (dfa->eclosures + i); - if (dfa->inveclosures != NULL) - re_node_set_free (dfa->inveclosures + i); - if (dfa->edests != NULL) - re_node_set_free (dfa->edests + i); - } - re_free (dfa->edests); - re_free (dfa->eclosures); - re_free (dfa->inveclosures); - re_free (dfa->nodes); - - if (dfa->state_table) - for (i = 0; i <= dfa->state_hash_mask; ++i) - { - struct re_state_table_entry *entry = dfa->state_table + i; - for (j = 0; j < entry->num; ++j) - { - re_dfastate_t *state = entry->array[j]; - free_state (state); - } - re_free (entry->array); - } - re_free (dfa->state_table); -#ifdef RE_ENABLE_I18N - if (dfa->sb_char != utf8_sb_map) - re_free (dfa->sb_char); -#endif - re_free (dfa->subexp_map); -#ifdef DEBUG - re_free (dfa->re_str); -#endif - - re_free (dfa); -} - - -/* Free dynamically allocated space used by PREG. */ - -void -regfree (preg) - regex_t *preg; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - if (BE (dfa != NULL, 1)) - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - - re_free (preg->fastmap); - preg->fastmap = NULL; - - re_free (preg->translate); - preg->translate = NULL; -} -#ifdef _LIBC -weak_alias (__regfree, regfree) -#endif - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC - -/* BSD has one and only one pattern buffer. */ -static struct re_pattern_buffer re_comp_buf; - -char * -# ifdef _LIBC -/* Make these definitions weak in libc, so POSIX programs can redefine - these names if they don't use our functions, and still use - regcomp/regexec above without link errors. */ -weak_function -# endif -re_comp (s) - const char *s; -{ - reg_errcode_t ret; - char *fastmap; - - if (!s) - { - if (!re_comp_buf.buffer) - return gettext ("No previous regular expression"); - return 0; - } - - if (re_comp_buf.buffer) - { - fastmap = re_comp_buf.fastmap; - re_comp_buf.fastmap = NULL; - __regfree (&re_comp_buf); - memset (&re_comp_buf, '\0', sizeof (re_comp_buf)); - re_comp_buf.fastmap = fastmap; - } - - if (re_comp_buf.fastmap == NULL) - { - re_comp_buf.fastmap = (char *) malloc (SBC_MAX); - if (re_comp_buf.fastmap == NULL) - return (char *) gettext (__re_error_msgid - + __re_error_msgid_idx[(int) REG_ESPACE]); - } - - /* Since `re_exec' always passes NULL for the `regs' argument, we - don't need to initialize the pattern buffer fields which affect it. */ - - /* Match anchors at newlines. */ - re_comp_buf.newline_anchor = 1; - - ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options); - - if (!ret) - return NULL; - - /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ - return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]); -} - -#ifdef _LIBC -libc_freeres_fn (free_mem) -{ - __regfree (&re_comp_buf); -} -#endif - -#endif /* _REGEX_RE_COMP */ - -/* Internal entry point. - Compile the regular expression PATTERN, whose length is LENGTH. - SYNTAX indicate regular expression's syntax. */ - -static reg_errcode_t -re_compile_internal (preg, pattern, length, syntax) - regex_t *preg; - const char * pattern; - int length; - reg_syntax_t syntax; -{ - reg_errcode_t err = REG_NOERROR; - re_dfa_t *dfa; - re_string_t regexp; - - /* Initialize the pattern buffer. */ - preg->fastmap_accurate = 0; - preg->syntax = syntax; - preg->not_bol = preg->not_eol = 0; - preg->used = 0; - preg->re_nsub = 0; - preg->can_be_null = 0; - preg->regs_allocated = REGS_UNALLOCATED; - - /* Initialize the dfa. */ - dfa = (re_dfa_t *) preg->buffer; - if (BE (preg->allocated < sizeof (re_dfa_t), 0)) - { - /* If zero allocated, but buffer is non-null, try to realloc - enough space. This loses if buffer's address is bogus, but - that is the user's responsibility. If ->buffer is NULL this - is a simple allocation. */ - dfa = re_realloc (preg->buffer, re_dfa_t, 1); - if (dfa == NULL) - return REG_ESPACE; - preg->allocated = sizeof (re_dfa_t); - preg->buffer = (unsigned char *) dfa; - } - preg->used = sizeof (re_dfa_t); - - err = init_dfa (dfa, length); - if (BE (err != REG_NOERROR, 0)) - { - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - return err; - } -#ifdef DEBUG - dfa->re_str = re_malloc (char, length + 1); - strncpy (dfa->re_str, pattern, length + 1); -#endif - - err = re_string_construct (®exp, pattern, length, preg->translate, - syntax & RE_ICASE, dfa); - if (BE (err != REG_NOERROR, 0)) - { - re_compile_internal_free_return: - free_workarea_compile (preg); - re_string_destruct (®exp); - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - return err; - } - - /* Parse the regular expression, and build a structure tree. */ - preg->re_nsub = 0; - dfa->str_tree = parse (®exp, preg, syntax, &err); - if (BE (dfa->str_tree == NULL, 0)) - goto re_compile_internal_free_return; - - /* Analyze the tree and create the nfa. */ - err = analyze (preg); - if (BE (err != REG_NOERROR, 0)) - goto re_compile_internal_free_return; - -#ifdef RE_ENABLE_I18N - /* If possible, do searching in single byte encoding to speed things up. */ - if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL) - optimize_utf8 (dfa); -#endif - - /* Then create the initial state of the dfa. */ - err = create_initial_state (dfa); - - /* Release work areas. */ - free_workarea_compile (preg); - re_string_destruct (®exp); - - if (BE (err != REG_NOERROR, 0)) - { - free_dfa_content (dfa); - preg->buffer = NULL; - preg->allocated = 0; - } - - return err; -} - -/* Initialize DFA. We use the length of the regular expression PAT_LEN - as the initial length of some arrays. */ - -static reg_errcode_t -init_dfa (dfa, pat_len) - re_dfa_t *dfa; - int pat_len; -{ - int table_size; -#ifndef _LIBC - char *codeset_name; -#endif - - memset (dfa, '\0', sizeof (re_dfa_t)); - - /* Force allocation of str_tree_storage the first time. */ - dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; - - dfa->nodes_alloc = pat_len + 1; - dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc); - - dfa->states_alloc = pat_len + 1; - - /* table_size = 2 ^ ceil(log pat_len) */ - for (table_size = 1; table_size > 0; table_size <<= 1) - if (table_size > pat_len) - break; - - dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); - dfa->state_hash_mask = table_size - 1; - - dfa->mb_cur_max = MB_CUR_MAX; -#ifdef _LIBC - if (dfa->mb_cur_max == 6 - && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) - dfa->is_utf8 = 1; - dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) - != 0); -#else -# ifdef HAVE_LANGINFO_CODESET - codeset_name = nl_langinfo (CODESET); -# else - codeset_name = getenv ("LC_ALL"); - if (codeset_name == NULL || codeset_name[0] == '\0') - codeset_name = getenv ("LC_CTYPE"); - if (codeset_name == NULL || codeset_name[0] == '\0') - codeset_name = getenv ("LANG"); - if (codeset_name == NULL) - codeset_name = ""; - else if (strchr (codeset_name, '.') != NULL) - codeset_name = strchr (codeset_name, '.') + 1; -# endif - - if (strcasecmp (codeset_name, "UTF-8") == 0 - || strcasecmp (codeset_name, "UTF8") == 0) - dfa->is_utf8 = 1; - - /* We check exhaustively in the loop below if this charset is a - superset of ASCII. */ - dfa->map_notascii = 0; -#endif - -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - if (dfa->is_utf8) - dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map; - else - { - int i, j, ch; - - dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1); - if (BE (dfa->sb_char == NULL, 0)) - return REG_ESPACE; - - /* Clear all bits by, then set those corresponding to single - byte chars. */ - bitset_empty (dfa->sb_char); - - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - { - wchar_t wch = __btowc (ch); - if (wch != WEOF) - dfa->sb_char[i] |= 1 << j; -# ifndef _LIBC - if (isascii (ch) && wch != (wchar_t) ch) - dfa->map_notascii = 1; -# endif - } - } - } -#endif - - if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0)) - return REG_ESPACE; - return REG_NOERROR; -} - -/* Initialize WORD_CHAR table, which indicate which character is - "word". In this case "word" means that it is the word construction - character used by some operators like "\<", "\>", etc. */ - -static void -init_word_char (dfa) - re_dfa_t *dfa; -{ - int i, j, ch; - dfa->word_ops_used = 1; - for (i = 0, ch = 0; i < BITSET_UINTS; ++i) - for (j = 0; j < UINT_BITS; ++j, ++ch) - if (isalnum (ch) || ch == '_') - dfa->word_char[i] |= 1 << j; -} - -/* Free the work area which are only used while compiling. */ - -static void -free_workarea_compile (preg) - regex_t *preg; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_storage_t *storage, *next; - for (storage = dfa->str_tree_storage; storage; storage = next) - { - next = storage->next; - re_free (storage); - } - dfa->str_tree_storage = NULL; - dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE; - dfa->str_tree = NULL; - re_free (dfa->org_indices); - dfa->org_indices = NULL; -} - -/* Create initial states for all contexts. */ - -static reg_errcode_t -create_initial_state (dfa) - re_dfa_t *dfa; -{ - int first, i; - reg_errcode_t err; - re_node_set init_nodes; - - /* Initial states have the epsilon closure of the node which is - the first node of the regular expression. */ - first = dfa->str_tree->first->node_idx; - dfa->init_node = first; - err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* The back-references which are in initial states can epsilon transit, - since in this case all of the subexpressions can be null. - Then we add epsilon closures of the nodes which are the next nodes of - the back-references. */ - if (dfa->nbackref > 0) - for (i = 0; i < init_nodes.nelem; ++i) - { - int node_idx = init_nodes.elems[i]; - re_token_type_t type = dfa->nodes[node_idx].type; - - int clexp_idx; - if (type != OP_BACK_REF) - continue; - for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx) - { - re_token_t *clexp_node; - clexp_node = dfa->nodes + init_nodes.elems[clexp_idx]; - if (clexp_node->type == OP_CLOSE_SUBEXP - && clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx) - break; - } - if (clexp_idx == init_nodes.nelem) - continue; - - if (type == OP_BACK_REF) - { - int dest_idx = dfa->edests[node_idx].elems[0]; - if (!re_node_set_contains (&init_nodes, dest_idx)) - { - re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx); - i = 0; - } - } - } - - /* It must be the first time to invoke acquire_state. */ - dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0); - /* We don't check ERR here, since the initial state must not be NULL. */ - if (BE (dfa->init_state == NULL, 0)) - return err; - if (dfa->init_state->has_constraint) - { - dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_WORD); - dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes, - CONTEXT_NEWLINE); - dfa->init_state_begbuf = re_acquire_state_context (&err, dfa, - &init_nodes, - CONTEXT_NEWLINE - | CONTEXT_BEGBUF); - if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) - return err; - } - else - dfa->init_state_word = dfa->init_state_nl - = dfa->init_state_begbuf = dfa->init_state; - - re_node_set_free (&init_nodes); - return REG_NOERROR; -} - -#ifdef RE_ENABLE_I18N -/* If it is possible to do searching in single byte encoding instead of UTF-8 - to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change - DFA nodes where needed. */ - -static void -optimize_utf8 (dfa) - re_dfa_t *dfa; -{ - int node, i, mb_chars = 0, has_period = 0; - - for (node = 0; node < dfa->nodes_len; ++node) - switch (dfa->nodes[node].type) - { - case CHARACTER: - if (dfa->nodes[node].opr.c >= 0x80) - mb_chars = 1; - break; - case ANCHOR: - switch (dfa->nodes[node].opr.idx) - { - case LINE_FIRST: - case LINE_LAST: - case BUF_FIRST: - case BUF_LAST: - break; - default: - /* Word anchors etc. cannot be handled. */ - return; - } - break; - case OP_PERIOD: - has_period = 1; - break; - case OP_BACK_REF: - case OP_ALT: - case END_OF_RE: - case OP_DUP_ASTERISK: - case OP_OPEN_SUBEXP: - case OP_CLOSE_SUBEXP: - break; - case COMPLEX_BRACKET: - return; - case SIMPLE_BRACKET: - /* Just double check. */ - for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i) - if (dfa->nodes[node].opr.sbcset[i]) - return; - break; - default: - abort (); - } - - if (mb_chars || has_period) - for (node = 0; node < dfa->nodes_len; ++node) - { - if (dfa->nodes[node].type == CHARACTER - && dfa->nodes[node].opr.c >= 0x80) - dfa->nodes[node].mb_partial = 0; - else if (dfa->nodes[node].type == OP_PERIOD) - dfa->nodes[node].type = OP_UTF8_PERIOD; - } - - /* The search can be in single byte locale. */ - dfa->mb_cur_max = 1; - dfa->is_utf8 = 0; - dfa->has_mb_node = dfa->nbackref > 0 || has_period; -} -#endif - -/* Analyze the structure tree, and calculate "first", "next", "edest", - "eclosure", and "inveclosure". */ - -static reg_errcode_t -analyze (preg) - regex_t *preg; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - reg_errcode_t ret; - - /* Allocate arrays. */ - dfa->nexts = re_malloc (int, dfa->nodes_alloc); - dfa->org_indices = re_malloc (int, dfa->nodes_alloc); - dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc); - dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc); - if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL - || dfa->eclosures == NULL, 0)) - return REG_ESPACE; - - dfa->subexp_map = re_malloc (int, preg->re_nsub); - if (dfa->subexp_map != NULL) - { - int i; - for (i = 0; i < preg->re_nsub; i++) - dfa->subexp_map[i] = i; - preorder (dfa->str_tree, optimize_subexps, dfa); - for (i = 0; i < preg->re_nsub; i++) - if (dfa->subexp_map[i] != i) - break; - if (i == preg->re_nsub) - { - free (dfa->subexp_map); - dfa->subexp_map = NULL; - } - } - - ret = postorder (dfa->str_tree, lower_subexps, preg); - if (BE (ret != REG_NOERROR, 0)) - return ret; - ret = postorder (dfa->str_tree, calc_first, dfa); - if (BE (ret != REG_NOERROR, 0)) - return ret; - preorder (dfa->str_tree, calc_next, dfa); - ret = preorder (dfa->str_tree, link_nfa_nodes, dfa); - if (BE (ret != REG_NOERROR, 0)) - return ret; - ret = calc_eclosure (dfa); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - /* We only need this during the prune_impossible_nodes pass in regexec.c; - skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */ - if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match) - || dfa->nbackref) - { - dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len); - if (BE (dfa->inveclosures == NULL, 0)) - return REG_ESPACE; - ret = calc_inveclosure (dfa); - } - - return ret; -} - -/* Our parse trees are very unbalanced, so we cannot use a stack to - implement parse tree visits. Instead, we use parent pointers and - some hairy code in these two functions. */ -static reg_errcode_t -postorder (root, fn, extra) - bin_tree_t *root; - reg_errcode_t (fn (void *, bin_tree_t *)); - void *extra; -{ - bin_tree_t *node, *prev; - - for (node = root; ; ) - { - /* Descend down the tree, preferably to the left (or to the right - if that's the only child). */ - while (node->left || node->right) - if (node->left) - node = node->left; - else - node = node->right; - - do - { - reg_errcode_t err = fn (extra, node); - if (BE (err != REG_NOERROR, 0)) - return err; - if (node->parent == NULL) - return REG_NOERROR; - prev = node; - node = node->parent; - } - /* Go up while we have a node that is reached from the right. */ - while (node->right == prev || node->right == NULL); - node = node->right; - } -} - -static reg_errcode_t -preorder (root, fn, extra) - bin_tree_t *root; - reg_errcode_t (fn (void *, bin_tree_t *)); - void *extra; -{ - bin_tree_t *node; - - for (node = root; ; ) - { - reg_errcode_t err = fn (extra, node); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* Go to the left node, or up and to the right. */ - if (node->left) - node = node->left; - else - { - bin_tree_t *prev = NULL; - while (node->right == prev || node->right == NULL) - { - prev = node; - node = node->parent; - if (!node) - return REG_NOERROR; - } - node = node->right; - } - } -} - -/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell - re_search_internal to map the inner one's opr.idx to this one's. Adjust - backreferences as well. Requires a preorder visit. */ -static reg_errcode_t -optimize_subexps (extra, node) - void *extra; - bin_tree_t *node; -{ - re_dfa_t *dfa = (re_dfa_t *) extra; - - if (node->token.type == OP_BACK_REF && dfa->subexp_map) - { - int idx = node->token.opr.idx; - node->token.opr.idx = dfa->subexp_map[idx]; - dfa->used_bkref_map |= 1 << node->token.opr.idx; - } - - else if (node->token.type == SUBEXP - && node->left && node->left->token.type == SUBEXP) - { - int other_idx = node->left->token.opr.idx; - - node->left = node->left->left; - if (node->left) - node->left->parent = node; - - dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx]; - if (other_idx < 8 * sizeof (dfa->used_bkref_map)) - dfa->used_bkref_map &= ~(1 << other_idx); - } - - return REG_NOERROR; -} - -/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation - of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */ -static reg_errcode_t -lower_subexps (extra, node) - void *extra; - bin_tree_t *node; -{ - regex_t *preg = (regex_t *) extra; - reg_errcode_t err = REG_NOERROR; - - if (node->left && node->left->token.type == SUBEXP) - { - node->left = lower_subexp (&err, preg, node->left); - if (node->left) - node->left->parent = node; - } - if (node->right && node->right->token.type == SUBEXP) - { - node->right = lower_subexp (&err, preg, node->right); - if (node->right) - node->right->parent = node; - } - - return err; -} - -static bin_tree_t * -lower_subexp (err, preg, node) - reg_errcode_t *err; - regex_t *preg; - bin_tree_t *node; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *body = node->left; - bin_tree_t *op, *cls, *tree1, *tree; - - if (preg->no_sub - /* We do not optimize empty subexpressions, because otherwise we may - have bad CONCAT nodes with NULL children. This is obviously not - very common, so we do not lose much. An example that triggers - this case is the sed "script" /\(\)/x. */ - && node->left != NULL - && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map) - || !(dfa->used_bkref_map & (1 << node->token.opr.idx)))) - return node->left; - - /* Convert the SUBEXP node to the concatenation of an - OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */ - op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP); - cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP); - tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls; - tree = create_tree (dfa, op, tree1, CONCAT); - if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - - op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx; - op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp; - return tree; -} - -/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton - nodes. Requires a postorder visit. */ -static reg_errcode_t -calc_first (extra, node) - void *extra; - bin_tree_t *node; -{ - re_dfa_t *dfa = (re_dfa_t *) extra; - if (node->token.type == CONCAT) - { - node->first = node->left->first; - node->node_idx = node->left->node_idx; - } - else - { - node->first = node; - node->node_idx = re_dfa_add_node (dfa, node->token); - if (BE (node->node_idx == -1, 0)) - return REG_ESPACE; - } - return REG_NOERROR; -} - -/* Pass 2: compute NEXT on the tree. Preorder visit. */ -static reg_errcode_t -calc_next (extra, node) - void *extra; - bin_tree_t *node; -{ - switch (node->token.type) - { - case OP_DUP_ASTERISK: - node->left->next = node; - break; - case CONCAT: - node->left->next = node->right->first; - node->right->next = node->next; - break; - default: - if (node->left) - node->left->next = node->next; - if (node->right) - node->right->next = node->next; - break; - } - return REG_NOERROR; -} - -/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */ -static reg_errcode_t -link_nfa_nodes (extra, node) - void *extra; - bin_tree_t *node; -{ - re_dfa_t *dfa = (re_dfa_t *) extra; - int idx = node->node_idx; - reg_errcode_t err = REG_NOERROR; - - switch (node->token.type) - { - case CONCAT: - break; - - case END_OF_RE: - assert (node->next == NULL); - break; - - case OP_DUP_ASTERISK: - case OP_ALT: - { - int left, right; - dfa->has_plural_match = 1; - if (node->left != NULL) - left = node->left->first->node_idx; - else - left = node->next->node_idx; - if (node->right != NULL) - right = node->right->first->node_idx; - else - right = node->next->node_idx; - assert (left > -1); - assert (right > -1); - err = re_node_set_init_2 (dfa->edests + idx, left, right); - } - break; - - case ANCHOR: - case OP_OPEN_SUBEXP: - case OP_CLOSE_SUBEXP: - err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx); - break; - - case OP_BACK_REF: - dfa->nexts[idx] = node->next->node_idx; - if (node->token.type == OP_BACK_REF) - re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]); - break; - - default: - assert (!IS_EPSILON_NODE (node->token.type)); - dfa->nexts[idx] = node->next->node_idx; - break; - } - - return err; -} - -/* Duplicate the epsilon closure of the node ROOT_NODE. - Note that duplicated nodes have constraint INIT_CONSTRAINT in addition - to their own constraint. */ - -static reg_errcode_t -duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node, - init_constraint) - re_dfa_t *dfa; - int top_org_node, top_clone_node, root_node; - unsigned int init_constraint; -{ - reg_errcode_t err; - int org_node, clone_node, ret; - unsigned int constraint = init_constraint; - for (org_node = top_org_node, clone_node = top_clone_node;;) - { - int org_dest, clone_dest; - if (dfa->nodes[org_node].type == OP_BACK_REF) - { - /* If the back reference epsilon-transit, its destination must - also have the constraint. Then duplicate the epsilon closure - of the destination of the back reference, and store it in - edests of the back reference. */ - org_dest = dfa->nexts[org_node]; - re_node_set_empty (dfa->edests + clone_node); - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - dfa->nexts[clone_node] = dfa->nexts[org_node]; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - else if (dfa->edests[org_node].nelem == 0) - { - /* In case of the node can't epsilon-transit, don't duplicate the - destination and store the original destination as the - destination of the node. */ - dfa->nexts[clone_node] = dfa->nexts[org_node]; - break; - } - else if (dfa->edests[org_node].nelem == 1) - { - /* In case of the node can epsilon-transit, and it has only one - destination. */ - org_dest = dfa->edests[org_node].elems[0]; - re_node_set_empty (dfa->edests + clone_node); - if (dfa->nodes[org_node].type == ANCHOR) - { - /* In case of the node has another constraint, append it. */ - if (org_node == root_node && clone_node != org_node) - { - /* ...but if the node is root_node itself, it means the - epsilon closure have a loop, then tie it to the - destination of the root_node. */ - ret = re_node_set_insert (dfa->edests + clone_node, - org_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - break; - } - constraint |= dfa->nodes[org_node].opr.ctx_type; - } - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - else /* dfa->edests[org_node].nelem == 2 */ - { - /* In case of the node can epsilon-transit, and it has two - destinations. In the bin_tree_t and DFA, that's '|' and '*'. */ - org_dest = dfa->edests[org_node].elems[0]; - re_node_set_empty (dfa->edests + clone_node); - /* Search for a duplicated node which satisfies the constraint. */ - clone_dest = search_duplicated_node (dfa, org_dest, constraint); - if (clone_dest == -1) - { - /* There are no such a duplicated node, create a new one. */ - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - err = duplicate_node_closure (dfa, org_dest, clone_dest, - root_node, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - } - else - { - /* There are a duplicated node which satisfy the constraint, - use it to avoid infinite loop. */ - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - - org_dest = dfa->edests[org_node].elems[1]; - err = duplicate_node (&clone_dest, dfa, org_dest, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - ret = re_node_set_insert (dfa->edests + clone_node, clone_dest); - if (BE (ret < 0, 0)) - return REG_ESPACE; - } - org_node = org_dest; - clone_node = clone_dest; - } - return REG_NOERROR; -} - -/* Search for a node which is duplicated from the node ORG_NODE, and - satisfies the constraint CONSTRAINT. */ - -static int -search_duplicated_node (dfa, org_node, constraint) - re_dfa_t *dfa; - int org_node; - unsigned int constraint; -{ - int idx; - for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx) - { - if (org_node == dfa->org_indices[idx] - && constraint == dfa->nodes[idx].constraint) - return idx; /* Found. */ - } - return -1; /* Not found. */ -} - -/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT. - The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded, - otherwise return the error code. */ - -static reg_errcode_t -duplicate_node (new_idx, dfa, org_idx, constraint) - re_dfa_t *dfa; - int *new_idx, org_idx; - unsigned int constraint; -{ - int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]); - if (BE (dup_idx == -1, 0)) - return REG_ESPACE; - dfa->nodes[dup_idx].constraint = constraint; - if (dfa->nodes[org_idx].type == ANCHOR) - dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type; - dfa->nodes[dup_idx].duplicated = 1; - - /* Store the index of the original node. */ - dfa->org_indices[dup_idx] = org_idx; - *new_idx = dup_idx; - return REG_NOERROR; -} - -static reg_errcode_t -calc_inveclosure (dfa) - re_dfa_t *dfa; -{ - int src, idx, ret; - for (idx = 0; idx < dfa->nodes_len; ++idx) - re_node_set_init_empty (dfa->inveclosures + idx); - - for (src = 0; src < dfa->nodes_len; ++src) - { - int *elems = dfa->eclosures[src].elems; - for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx) - { - ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src); - if (BE (ret == -1, 0)) - return REG_ESPACE; - } - } - - return REG_NOERROR; -} - -/* Calculate "eclosure" for all the node in DFA. */ - -static reg_errcode_t -calc_eclosure (dfa) - re_dfa_t *dfa; -{ - int node_idx, incomplete; -#ifdef DEBUG - assert (dfa->nodes_len > 0); -#endif - incomplete = 0; - /* For each nodes, calculate epsilon closure. */ - for (node_idx = 0; ; ++node_idx) - { - reg_errcode_t err; - re_node_set eclosure_elem; - if (node_idx == dfa->nodes_len) - { - if (!incomplete) - break; - incomplete = 0; - node_idx = 0; - } - -#ifdef DEBUG - assert (dfa->eclosures[node_idx].nelem != -1); -#endif - - /* If we have already calculated, skip it. */ - if (dfa->eclosures[node_idx].nelem != 0) - continue; - /* Calculate epsilon closure of `node_idx'. */ - err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1); - if (BE (err != REG_NOERROR, 0)) - return err; - - if (dfa->eclosures[node_idx].nelem == 0) - { - incomplete = 1; - re_node_set_free (&eclosure_elem); - } - } - return REG_NOERROR; -} - -/* Calculate epsilon closure of NODE. */ - -static reg_errcode_t -calc_eclosure_iter (new_set, dfa, node, root) - re_node_set *new_set; - re_dfa_t *dfa; - int node, root; -{ - reg_errcode_t err; - unsigned int constraint; - int i, incomplete; - re_node_set eclosure; - incomplete = 0; - err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* This indicates that we are calculating this node now. - We reference this value to avoid infinite loop. */ - dfa->eclosures[node].nelem = -1; - - constraint = ((dfa->nodes[node].type == ANCHOR) - ? dfa->nodes[node].opr.ctx_type : 0); - /* If the current node has constraints, duplicate all nodes. - Since they must inherit the constraints. */ - if (constraint - && dfa->edests[node].nelem - && !dfa->nodes[dfa->edests[node].elems[0]].duplicated) - { - int org_node, cur_node; - org_node = cur_node = node; - err = duplicate_node_closure (dfa, node, node, node, constraint); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Expand each epsilon destination nodes. */ - if (IS_EPSILON_NODE(dfa->nodes[node].type)) - for (i = 0; i < dfa->edests[node].nelem; ++i) - { - re_node_set eclosure_elem; - int edest = dfa->edests[node].elems[i]; - /* If calculating the epsilon closure of `edest' is in progress, - return intermediate result. */ - if (dfa->eclosures[edest].nelem == -1) - { - incomplete = 1; - continue; - } - /* If we haven't calculated the epsilon closure of `edest' yet, - calculate now. Otherwise use calculated epsilon closure. */ - if (dfa->eclosures[edest].nelem == 0) - { - err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0); - if (BE (err != REG_NOERROR, 0)) - return err; - } - else - eclosure_elem = dfa->eclosures[edest]; - /* Merge the epsilon closure of `edest'. */ - re_node_set_merge (&eclosure, &eclosure_elem); - /* If the epsilon closure of `edest' is incomplete, - the epsilon closure of this node is also incomplete. */ - if (dfa->eclosures[edest].nelem == 0) - { - incomplete = 1; - re_node_set_free (&eclosure_elem); - } - } - - /* Epsilon closures include itself. */ - re_node_set_insert (&eclosure, node); - if (incomplete && !root) - dfa->eclosures[node].nelem = 0; - else - dfa->eclosures[node] = eclosure; - *new_set = eclosure; - return REG_NOERROR; -} - -/* Functions for token which are used in the parser. */ - -/* Fetch a token from INPUT. - We must not use this function inside bracket expressions. */ - -static void -fetch_token (result, input, syntax) - re_token_t *result; - re_string_t *input; - reg_syntax_t syntax; -{ - re_string_skip_bytes (input, peek_token (result, input, syntax)); -} - -/* Peek a token from INPUT, and return the length of the token. - We must not use this function inside bracket expressions. */ - -static int -peek_token (token, input, syntax) - re_token_t *token; - re_string_t *input; - reg_syntax_t syntax; -{ - unsigned char c; - - if (re_string_eoi (input)) - { - token->type = END_OF_RE; - return 0; - } - - c = re_string_peek_byte (input, 0); - token->opr.c = c; - - token->word_char = 0; -#ifdef RE_ENABLE_I18N - token->mb_partial = 0; - if (input->mb_cur_max > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) - { - token->type = CHARACTER; - token->mb_partial = 1; - return 1; - } -#endif - if (c == '\\') - { - unsigned char c2; - if (re_string_cur_idx (input) + 1 >= re_string_length (input)) - { - token->type = BACK_SLASH; - return 1; - } - - c2 = re_string_peek_byte_case (input, 1); - token->opr.c = c2; - token->type = CHARACTER; -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) - { - wint_t wc = re_string_wchar_at (input, - re_string_cur_idx (input) + 1); - token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; - } - else -#endif - token->word_char = IS_WORD_CHAR (c2) != 0; - - switch (c2) - { - case '|': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR)) - token->type = OP_ALT; - break; - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - if (!(syntax & RE_NO_BK_REFS)) - { - token->type = OP_BACK_REF; - token->opr.idx = c2 - '1'; - } - break; - case '<': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = WORD_FIRST; - } - break; - case '>': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = WORD_LAST; - } - break; - case 'b': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = WORD_DELIM; - } - break; - case 'B': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = NOT_WORD_DELIM; - } - break; - case 'w': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_WORD; - break; - case 'W': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_NOTWORD; - break; - case 's': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_SPACE; - break; - case 'S': - if (!(syntax & RE_NO_GNU_OPS)) - token->type = OP_NOTSPACE; - break; - case '`': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = BUF_FIRST; - } - break; - case '\'': - if (!(syntax & RE_NO_GNU_OPS)) - { - token->type = ANCHOR; - token->opr.ctx_type = BUF_LAST; - } - break; - case '(': - if (!(syntax & RE_NO_BK_PARENS)) - token->type = OP_OPEN_SUBEXP; - break; - case ')': - if (!(syntax & RE_NO_BK_PARENS)) - token->type = OP_CLOSE_SUBEXP; - break; - case '+': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_PLUS; - break; - case '?': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_QUESTION; - break; - case '{': - if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) - token->type = OP_OPEN_DUP_NUM; - break; - case '}': - if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES))) - token->type = OP_CLOSE_DUP_NUM; - break; - default: - break; - } - return 2; - } - - token->type = CHARACTER; -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) - { - wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input)); - token->word_char = IS_WIDE_WORD_CHAR (wc) != 0; - } - else -#endif - token->word_char = IS_WORD_CHAR (token->opr.c); - - switch (c) - { - case '\n': - if (syntax & RE_NEWLINE_ALT) - token->type = OP_ALT; - break; - case '|': - if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR)) - token->type = OP_ALT; - break; - case '*': - token->type = OP_DUP_ASTERISK; - break; - case '+': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_PLUS; - break; - case '?': - if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM)) - token->type = OP_DUP_QUESTION; - break; - case '{': - if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - token->type = OP_OPEN_DUP_NUM; - break; - case '}': - if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) - token->type = OP_CLOSE_DUP_NUM; - break; - case '(': - if (syntax & RE_NO_BK_PARENS) - token->type = OP_OPEN_SUBEXP; - break; - case ')': - if (syntax & RE_NO_BK_PARENS) - token->type = OP_CLOSE_SUBEXP; - break; - case '[': - token->type = OP_OPEN_BRACKET; - break; - case '.': - token->type = OP_PERIOD; - break; - case '^': - if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) && - re_string_cur_idx (input) != 0) - { - char prev = re_string_peek_byte (input, -1); - if (!(syntax & RE_NEWLINE_ALT) || prev != '\n') - break; - } - token->type = ANCHOR; - token->opr.ctx_type = LINE_FIRST; - break; - case '$': - if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) && - re_string_cur_idx (input) + 1 != re_string_length (input)) - { - re_token_t next; - re_string_skip_bytes (input, 1); - peek_token (&next, input, syntax); - re_string_skip_bytes (input, -1); - if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP) - break; - } - token->type = ANCHOR; - token->opr.ctx_type = LINE_LAST; - break; - default: - break; - } - return 1; -} - -/* Peek a token from INPUT, and return the length of the token. - We must not use this function out of bracket expressions. */ - -static int -peek_token_bracket (token, input, syntax) - re_token_t *token; - re_string_t *input; - reg_syntax_t syntax; -{ - unsigned char c; - if (re_string_eoi (input)) - { - token->type = END_OF_RE; - return 0; - } - c = re_string_peek_byte (input, 0); - token->opr.c = c; - -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1 && - !re_string_first_byte (input, re_string_cur_idx (input))) - { - token->type = CHARACTER; - return 1; - } -#endif /* RE_ENABLE_I18N */ - - if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) - && re_string_cur_idx (input) + 1 < re_string_length (input)) - { - /* In this case, '\' escape a character. */ - unsigned char c2; - re_string_skip_bytes (input, 1); - c2 = re_string_peek_byte (input, 0); - token->opr.c = c2; - token->type = CHARACTER; - return 1; - } - if (c == '[') /* '[' is a special char in a bracket exps. */ - { - unsigned char c2; - int token_len; - if (re_string_cur_idx (input) + 1 < re_string_length (input)) - c2 = re_string_peek_byte (input, 1); - else - c2 = 0; - token->opr.c = c2; - token_len = 2; - switch (c2) - { - case '.': - token->type = OP_OPEN_COLL_ELEM; - break; - case '=': - token->type = OP_OPEN_EQUIV_CLASS; - break; - case ':': - if (syntax & RE_CHAR_CLASSES) - { - token->type = OP_OPEN_CHAR_CLASS; - break; - } - /* else fall through. */ - default: - token->type = CHARACTER; - token->opr.c = c; - token_len = 1; - break; - } - return token_len; - } - switch (c) - { - case '-': - token->type = OP_CHARSET_RANGE; - break; - case ']': - token->type = OP_CLOSE_BRACKET; - break; - case '^': - token->type = OP_NON_MATCH_LIST; - break; - default: - token->type = CHARACTER; - } - return 1; -} - -/* Functions for parser. */ - -/* Entry point of the parser. - Parse the regular expression REGEXP and return the structure tree. - If an error is occured, ERR is set by error code, and return NULL. - This function build the following tree, from regular expression <reg_exp>: - CAT - / \ - / \ - <reg_exp> EOR - - CAT means concatenation. - EOR means end of regular expression. */ - -static bin_tree_t * -parse (regexp, preg, syntax, err) - re_string_t *regexp; - regex_t *preg; - reg_syntax_t syntax; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree, *eor, *root; - re_token_t current_token; - dfa->syntax = syntax; - fetch_token (¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE); - tree = parse_reg_exp (regexp, preg, ¤t_token, syntax, 0, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - eor = create_tree (dfa, NULL, NULL, END_OF_RE); - if (tree != NULL) - root = create_tree (dfa, tree, eor, CONCAT); - else - root = eor; - if (BE (eor == NULL || root == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - return root; -} - -/* This function build the following tree, from regular expression - <branch1>|<branch2>: - ALT - / \ - / \ - <branch1> <branch2> - - ALT means alternative, which represents the operator `|'. */ - -static bin_tree_t * -parse_reg_exp (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree, *branch = NULL; - tree = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - - while (token->type == OP_ALT) - { - fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); - if (token->type != OP_ALT && token->type != END_OF_RE - && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) - { - branch = parse_branch (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && branch == NULL, 0)) - return NULL; - } - else - branch = NULL; - tree = create_tree (dfa, tree, branch, OP_ALT); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - return tree; -} - -/* This function build the following tree, from regular expression - <exp1><exp2>: - CAT - / \ - / \ - <exp1> <exp2> - - CAT means concatenation. */ - -static bin_tree_t * -parse_branch (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - bin_tree_t *tree, *exp; - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - tree = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - - while (token->type != OP_ALT && token->type != END_OF_RE - && (nest == 0 || token->type != OP_CLOSE_SUBEXP)) - { - exp = parse_expression (regexp, preg, token, syntax, nest, err); - if (BE (*err != REG_NOERROR && exp == NULL, 0)) - { - return NULL; - } - if (tree != NULL && exp != NULL) - { - tree = create_tree (dfa, tree, exp, CONCAT); - if (tree == NULL) - { - *err = REG_ESPACE; - return NULL; - } - } - else if (tree == NULL) - tree = exp; - /* Otherwise exp == NULL, we don't need to create new tree. */ - } - return tree; -} - -/* This function build the following tree, from regular expression a*: - * - | - a -*/ - -static bin_tree_t * -parse_expression (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree; - switch (token->type) - { - case CHARACTER: - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - while (!re_string_eoi (regexp) - && !re_string_first_byte (regexp, re_string_cur_idx (regexp))) - { - bin_tree_t *mbc_remain; - fetch_token (token, regexp, syntax); - mbc_remain = create_token_tree (dfa, NULL, NULL, token); - tree = create_tree (dfa, tree, mbc_remain, CONCAT); - if (BE (mbc_remain == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - } -#endif - break; - case OP_OPEN_SUBEXP: - tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_OPEN_BRACKET: - tree = parse_bracket_exp (regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_BACK_REF: - if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1)) - { - *err = REG_ESUBREG; - return NULL; - } - dfa->used_bkref_map |= 1 << token->opr.idx; - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - ++dfa->nbackref; - dfa->has_mb_node = 1; - break; - case OP_OPEN_DUP_NUM: - if (syntax & RE_CONTEXT_INVALID_DUP) - { - *err = REG_BADRPT; - return NULL; - } - /* FALLTHROUGH */ - case OP_DUP_ASTERISK: - case OP_DUP_PLUS: - case OP_DUP_QUESTION: - if (syntax & RE_CONTEXT_INVALID_OPS) - { - *err = REG_BADRPT; - return NULL; - } - else if (syntax & RE_CONTEXT_INDEP_OPS) - { - fetch_token (token, regexp, syntax); - return parse_expression (regexp, preg, token, syntax, nest, err); - } - /* else fall through */ - case OP_CLOSE_SUBEXP: - if ((token->type == OP_CLOSE_SUBEXP) && - !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)) - { - *err = REG_ERPAREN; - return NULL; - } - /* else fall through */ - case OP_CLOSE_DUP_NUM: - /* We treat it as a normal character. */ - - /* Then we can these characters as normal characters. */ - token->type = CHARACTER; - /* mb_partial and word_char bits should be initialized already - by peek_token. */ - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - break; - case ANCHOR: - if ((token->opr.ctx_type - & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST)) - && dfa->word_ops_used == 0) - init_word_char (dfa); - if (token->opr.ctx_type == WORD_DELIM - || token->opr.ctx_type == NOT_WORD_DELIM) - { - bin_tree_t *tree_first, *tree_last; - if (token->opr.ctx_type == WORD_DELIM) - { - token->opr.ctx_type = WORD_FIRST; - tree_first = create_token_tree (dfa, NULL, NULL, token); - token->opr.ctx_type = WORD_LAST; - } - else - { - token->opr.ctx_type = INSIDE_WORD; - tree_first = create_token_tree (dfa, NULL, NULL, token); - token->opr.ctx_type = INSIDE_NOTWORD; - } - tree_last = create_token_tree (dfa, NULL, NULL, token); - tree = create_tree (dfa, tree_first, tree_last, OP_ALT); - if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - else - { - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - } - /* We must return here, since ANCHORs can't be followed - by repetition operators. - eg. RE"^*" is invalid or "<ANCHOR(^)><CHAR(*)>", - it must not be "<ANCHOR(^)><REPEAT(*)>". */ - fetch_token (token, regexp, syntax); - return tree; - case OP_PERIOD: - tree = create_token_tree (dfa, NULL, NULL, token); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - if (dfa->mb_cur_max > 1) - dfa->has_mb_node = 1; - break; - case OP_WORD: - case OP_NOTWORD: - tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "alnum", - (const unsigned char *) "_", - token->type == OP_NOTWORD, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_SPACE: - case OP_NOTSPACE: - tree = build_charclass_op (dfa, regexp->trans, - (const unsigned char *) "space", - (const unsigned char *) "", - token->type == OP_NOTSPACE, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - break; - case OP_ALT: - case END_OF_RE: - return NULL; - case BACK_SLASH: - *err = REG_EESCAPE; - return NULL; - default: - /* Must not happen? */ -#ifdef DEBUG - assert (0); -#endif - return NULL; - } - fetch_token (token, regexp, syntax); - - while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS - || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM) - { - tree = parse_dup_op (tree, regexp, dfa, token, syntax, err); - if (BE (*err != REG_NOERROR && tree == NULL, 0)) - return NULL; - /* In BRE consecutive duplications are not allowed. */ - if ((syntax & RE_CONTEXT_INVALID_DUP) - && (token->type == OP_DUP_ASTERISK - || token->type == OP_OPEN_DUP_NUM)) - { - *err = REG_BADRPT; - return NULL; - } - } - - return tree; -} - -/* This function build the following tree, from regular expression - (<reg_exp>): - SUBEXP - | - <reg_exp> -*/ - -static bin_tree_t * -parse_sub_exp (regexp, preg, token, syntax, nest, err) - re_string_t *regexp; - regex_t *preg; - re_token_t *token; - reg_syntax_t syntax; - int nest; - reg_errcode_t *err; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - bin_tree_t *tree; - size_t cur_nsub; - cur_nsub = preg->re_nsub++; - - fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE); - - /* The subexpression may be a null string. */ - if (token->type == OP_CLOSE_SUBEXP) - tree = NULL; - else - { - tree = parse_reg_exp (regexp, preg, token, syntax, nest, err); - if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0)) - *err = REG_EPAREN; - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - dfa->completed_bkref_map |= 1 << cur_nsub; - - tree = create_tree (dfa, tree, NULL, SUBEXP); - if (BE (tree == NULL, 0)) - { - *err = REG_ESPACE; - return NULL; - } - tree->token.opr.idx = cur_nsub; - return tree; -} - -/* This function parse repetition operators like "*", "+", "{1,3}" etc. */ - -static bin_tree_t * -parse_dup_op (elem, regexp, dfa, token, syntax, err) - bin_tree_t *elem; - re_string_t *regexp; - re_dfa_t *dfa; - re_token_t *token; - reg_syntax_t syntax; - reg_errcode_t *err; -{ - bin_tree_t *tree = NULL, *old_tree = NULL; - int i, start, end, start_idx = re_string_cur_idx (regexp); - re_token_t start_token = *token; - - if (token->type == OP_OPEN_DUP_NUM) - { - end = 0; - start = fetch_number (regexp, token, syntax); - if (start == -1) - { - if (token->type == CHARACTER && token->opr.c == ',') - start = 0; /* We treat "{,m}" as "{0,m}". */ - else - { - *err = REG_BADBR; /* <re>{} is invalid. */ - return NULL; - } - } - if (BE (start != -2, 1)) - { - /* We treat "{n}" as "{n,n}". */ - end = ((token->type == OP_CLOSE_DUP_NUM) ? start - : ((token->type == CHARACTER && token->opr.c == ',') - ? fetch_number (regexp, token, syntax) : -2)); - } - if (BE (start == -2 || end == -2, 0)) - { - /* Invalid sequence. */ - if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0)) - { - if (token->type == END_OF_RE) - *err = REG_EBRACE; - else - *err = REG_BADBR; - - return NULL; - } - - /* If the syntax bit is set, rollback. */ - re_string_set_index (regexp, start_idx); - *token = start_token; - token->type = CHARACTER; - /* mb_partial and word_char bits should be already initialized by - peek_token. */ - return elem; - } - - if (BE (end != -1 && start > end, 0)) - { - /* First number greater than second. */ - *err = REG_BADBR; - return NULL; - } - } - else - { - start = (token->type == OP_DUP_PLUS) ? 1 : 0; - end = (token->type == OP_DUP_QUESTION) ? 1 : -1; - } - - fetch_token (token, regexp, syntax); - - if (BE (elem == NULL, 0)) - return NULL; - if (BE (start == 0 && end == 0, 0)) - { - postorder (elem, free_tree, NULL); - return NULL; - } - - /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */ - if (BE (start > 0, 0)) - { - tree = elem; - for (i = 2; i <= start; ++i) - { - elem = duplicate_tree (elem, dfa); - tree = create_tree (dfa, tree, elem, CONCAT); - if (BE (elem == NULL || tree == NULL, 0)) - goto parse_dup_op_espace; - } - - if (start == end) - return tree; - - /* Duplicate ELEM before it is marked optional. */ - elem = duplicate_tree (elem, dfa); - old_tree = tree; - } - else - old_tree = NULL; - - if (elem->token.type == SUBEXP) - postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx); - - tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT)); - if (BE (tree == NULL, 0)) - goto parse_dup_op_espace; - - /* This loop is actually executed only when end != -1, - to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have - already created the start+1-th copy. */ - for (i = start + 2; i <= end; ++i) - { - elem = duplicate_tree (elem, dfa); - tree = create_tree (dfa, tree, elem, CONCAT); - if (BE (elem == NULL || tree == NULL, 0)) - goto parse_dup_op_espace; - - tree = create_tree (dfa, tree, NULL, OP_ALT); - if (BE (tree == NULL, 0)) - goto parse_dup_op_espace; - } - - if (old_tree) - tree = create_tree (dfa, old_tree, tree, CONCAT); - - return tree; - - parse_dup_op_espace: - *err = REG_ESPACE; - return NULL; -} - -/* Size of the names for collating symbol/equivalence_class/character_class. - I'm not sure, but maybe enough. */ -#define BRACKET_NAME_BUF_SIZE 32 - -#ifndef _LIBC - /* Local function for parse_bracket_exp only used in case of NOT _LIBC. - Build the range expression which starts from START_ELEM, and ends - at END_ELEM. The result are written to MBCSET and SBCSET. - RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may - update it. */ - -static reg_errcode_t -# ifdef RE_ENABLE_I18N -build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - int *range_alloc; -# else /* not RE_ENABLE_I18N */ -build_range_exp (sbcset, start_elem, end_elem) -# endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - bracket_elem_t *start_elem, *end_elem; -{ - unsigned int start_ch, end_ch; - /* Equivalence Classes and Character Classes can't be a range start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) - return REG_ERANGE; - - /* We can handle no multi character collating elements without libc - support. */ - if (BE ((start_elem->type == COLL_SYM - && strlen ((char *) start_elem->opr.name) > 1) - || (end_elem->type == COLL_SYM - && strlen ((char *) end_elem->opr.name) > 1), 0)) - return REG_ECOLLATE; - -# ifdef RE_ENABLE_I18N - { - wchar_t wc, start_wc, end_wc; - wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - - start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch - : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] - : 0)); - end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch - : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] - : 0)); - start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM) - ? __btowc (start_ch) : start_elem->opr.wch); - end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM) - ? __btowc (end_ch) : end_elem->opr.wch); - if (start_wc == WEOF || end_wc == WEOF) - return REG_ECOLLATE; - cmp_buf[0] = start_wc; - cmp_buf[4] = end_wc; - if (wcscoll (cmp_buf, cmp_buf + 4) > 0) - return REG_ERANGE; - - /* Got valid collation sequence values, add them as a new entry. - However, for !_LIBC we have no collation elements: if the - character set is single byte, the single byte character set - that we build below suffices. parse_bracket_exp passes - no MBCSET if dfa->mb_cur_max == 1. */ - if (mbcset) - { - /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) - { - /* There is not enough space, need realloc. */ - wchar_t *new_array_start, *new_array_end; - int new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - /* Use realloc since mbcset->range_starts and mbcset->range_ends - are NULL if *range_alloc == 0. */ - new_array_start = re_realloc (mbcset->range_starts, wchar_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, wchar_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; - } - - mbcset->range_starts[mbcset->nranges] = start_wc; - mbcset->range_ends[mbcset->nranges++] = end_wc; - } - - /* Build the table for single byte characters. */ - for (wc = 0; wc < SBC_MAX; ++wc) - { - cmp_buf[2] = wc; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) - bitset_set (sbcset, wc); - } - } -# else /* not RE_ENABLE_I18N */ - { - unsigned int ch; - start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch - : ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0] - : 0)); - end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch - : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0] - : 0)); - if (start_ch > end_ch) - return REG_ERANGE; - /* Build the table for single byte characters. */ - for (ch = 0; ch < SBC_MAX; ++ch) - if (start_ch <= ch && ch <= end_ch) - bitset_set (sbcset, ch); - } -# endif /* not RE_ENABLE_I18N */ - return REG_NOERROR; -} -#endif /* not _LIBC */ - -#ifndef _LIBC -/* Helper function for parse_bracket_exp only used in case of NOT _LIBC.. - Build the collating element which is represented by NAME. - The result are written to MBCSET and SBCSET. - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a - pointer argument since we may update it. */ - -static reg_errcode_t -# ifdef RE_ENABLE_I18N -build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - int *coll_sym_alloc; -# else /* not RE_ENABLE_I18N */ -build_collating_symbol (sbcset, name) -# endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *name; -{ - size_t name_len = strlen ((const char *) name); - if (BE (name_len != 1, 0)) - return REG_ECOLLATE; - else - { - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } -} -#endif /* not _LIBC */ - -/* This function parse bracket expression like "[abc]", "[a-c]", - "[[.a-a.]]" etc. */ - -static bin_tree_t * -parse_bracket_exp (regexp, dfa, token, syntax, err) - re_string_t *regexp; - re_dfa_t *dfa; - re_token_t *token; - reg_syntax_t syntax; - reg_errcode_t *err; -{ -#ifdef _LIBC - const unsigned char *collseqmb; - const char *collseqwc; - uint32_t nrules; - int32_t table_size; - const int32_t *symb_table; - const unsigned char *extra; - - /* Local function for parse_bracket_exp used in _LIBC environement. - Seek the collating symbol entry correspondings to NAME. - Return the index of the symbol in the SYMB_TABLE. */ - - auto inline int32_t - __attribute ((always_inline)) - seek_collating_symbol_entry (name, name_len) - const unsigned char *name; - size_t name_len; - { - int32_t hash = elem_hash ((const char *) name, name_len); - int32_t elem = hash % table_size; - int32_t second = hash % (table_size - 2); - while (symb_table[2 * elem] != 0) - { - /* First compare the hashing value. */ - if (symb_table[2 * elem] == hash - /* Compare the length of the name. */ - && name_len == extra[symb_table[2 * elem + 1]] - /* Compare the name. */ - && memcmp (name, &extra[symb_table[2 * elem + 1] + 1], - name_len) == 0) - { - /* Yep, this is the entry. */ - break; - } - - /* Next entry. */ - elem += second; - } - return elem; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Look up the collation sequence value of BR_ELEM. - Return the value if succeeded, UINT_MAX otherwise. */ - - auto inline unsigned int - __attribute ((always_inline)) - lookup_collation_sequence_value (br_elem) - bracket_elem_t *br_elem; - { - if (br_elem->type == SB_CHAR) - { - /* - if (MB_CUR_MAX == 1) - */ - if (nrules == 0) - return collseqmb[br_elem->opr.ch]; - else - { - wint_t wc = __btowc (br_elem->opr.ch); - return __collseq_table_lookup (collseqwc, wc); - } - } - else if (br_elem->type == MB_CHAR) - { - return __collseq_table_lookup (collseqwc, br_elem->opr.wch); - } - else if (br_elem->type == COLL_SYM) - { - size_t sym_name_len = strlen ((char *) br_elem->opr.name); - if (nrules != 0) - { - int32_t elem, idx; - elem = seek_collating_symbol_entry (br_elem->opr.name, - sym_name_len); - if (symb_table[2 * elem] != 0) - { - /* We found the entry. */ - idx = symb_table[2 * elem + 1]; - /* Skip the name of collating element name. */ - idx += 1 + extra[idx]; - /* Skip the byte sequence of the collating element. */ - idx += 1 + extra[idx]; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - /* Skip the multibyte collation sequence value. */ - idx += sizeof (unsigned int); - /* Skip the wide char sequence of the collating element. */ - idx += sizeof (unsigned int) * - (1 + *(unsigned int *) (extra + idx)); - /* Return the collation sequence value. */ - return *(unsigned int *) (extra + idx); - } - else if (symb_table[2 * elem] == 0 && sym_name_len == 1) - { - /* No valid character. Match it as a single byte - character. */ - return collseqmb[br_elem->opr.name[0]]; - } - } - else if (sym_name_len == 1) - return collseqmb[br_elem->opr.name[0]]; - } - return UINT_MAX; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Build the range expression which starts from START_ELEM, and ends - at END_ELEM. The result are written to MBCSET and SBCSET. - RANGE_ALLOC is the allocated size of mbcset->range_starts, and - mbcset->range_ends, is a pointer argument sinse we may - update it. */ - - auto inline reg_errcode_t - __attribute ((always_inline)) - build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem) - re_charset_t *mbcset; - int *range_alloc; - re_bitset_ptr_t sbcset; - bracket_elem_t *start_elem, *end_elem; - { - unsigned int ch; - uint32_t start_collseq; - uint32_t end_collseq; - - /* Equivalence Classes and Character Classes can't be a range - start/end. */ - if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS - || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS, - 0)) - return REG_ERANGE; - - start_collseq = lookup_collation_sequence_value (start_elem); - end_collseq = lookup_collation_sequence_value (end_elem); - /* Check start/end collation sequence values. */ - if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0)) - return REG_ECOLLATE; - if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0)) - return REG_ERANGE; - - /* Got valid collation sequence values, add them as a new entry. - However, if we have no collation elements, and the character set - is single byte, the single byte character set that we - build below suffices. */ - if (nrules > 0 || dfa->mb_cur_max > 1) - { - /* Check the space of the arrays. */ - if (BE (*range_alloc == mbcset->nranges, 0)) - { - /* There is not enough space, need realloc. */ - uint32_t *new_array_start; - uint32_t *new_array_end; - int new_nranges; - - /* +1 in case of mbcset->nranges is 0. */ - new_nranges = 2 * mbcset->nranges + 1; - new_array_start = re_realloc (mbcset->range_starts, uint32_t, - new_nranges); - new_array_end = re_realloc (mbcset->range_ends, uint32_t, - new_nranges); - - if (BE (new_array_start == NULL || new_array_end == NULL, 0)) - return REG_ESPACE; - - mbcset->range_starts = new_array_start; - mbcset->range_ends = new_array_end; - *range_alloc = new_nranges; - } - - mbcset->range_starts[mbcset->nranges] = start_collseq; - mbcset->range_ends[mbcset->nranges++] = end_collseq; - } - - /* Build the table for single byte characters. */ - for (ch = 0; ch < SBC_MAX; ch++) - { - uint32_t ch_collseq; - /* - if (MB_CUR_MAX == 1) - */ - if (nrules == 0) - ch_collseq = collseqmb[ch]; - else - ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch)); - if (start_collseq <= ch_collseq && ch_collseq <= end_collseq) - bitset_set (sbcset, ch); - } - return REG_NOERROR; - } - - /* Local function for parse_bracket_exp used in _LIBC environement. - Build the collating element which is represented by NAME. - The result are written to MBCSET and SBCSET. - COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a - pointer argument sinse we may update it. */ - - auto inline reg_errcode_t - __attribute ((always_inline)) - build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name) - re_charset_t *mbcset; - int *coll_sym_alloc; - re_bitset_ptr_t sbcset; - const unsigned char *name; - { - int32_t elem, idx; - size_t name_len = strlen ((const char *) name); - if (nrules != 0) - { - elem = seek_collating_symbol_entry (name, name_len); - if (symb_table[2 * elem] != 0) - { - /* We found the entry. */ - idx = symb_table[2 * elem + 1]; - /* Skip the name of collating element name. */ - idx += 1 + extra[idx]; - } - else if (symb_table[2 * elem] == 0 && name_len == 1) - { - /* No valid character, treat it as a normal - character. */ - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } - else - return REG_ECOLLATE; - - /* Got valid collation sequence, add it as a new entry. */ - /* Check the space of the arrays. */ - if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0)) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->ncoll_syms is 0. */ - int new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1; - /* Use realloc since mbcset->coll_syms is NULL - if *alloc == 0. */ - int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t, - new_coll_sym_alloc); - if (BE (new_coll_syms == NULL, 0)) - return REG_ESPACE; - mbcset->coll_syms = new_coll_syms; - *coll_sym_alloc = new_coll_sym_alloc; - } - mbcset->coll_syms[mbcset->ncoll_syms++] = idx; - return REG_NOERROR; - } - else - { - if (BE (name_len != 1, 0)) - return REG_ECOLLATE; - else - { - bitset_set (sbcset, name[0]); - return REG_NOERROR; - } - } - } -#endif - - re_token_t br_token; - re_bitset_ptr_t sbcset; -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; - int coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0; - int equiv_class_alloc = 0, char_class_alloc = 0; -#endif /* not RE_ENABLE_I18N */ - int non_match = 0; - bin_tree_t *work_tree; - int token_len; - int first_round = 1; -#ifdef _LIBC - collseqmb = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); - nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules) - { - /* - if (MB_CUR_MAX > 1) - */ - collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); - table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB); - symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_TABLEMB); - extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_SYMB_EXTRAMB); - } -#endif - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); -#ifdef RE_ENABLE_I18N - mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); -#endif /* RE_ENABLE_I18N */ -#ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) -#else - if (BE (sbcset == NULL, 0)) -#endif /* RE_ENABLE_I18N */ - { - *err = REG_ESPACE; - return NULL; - } - - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_NON_MATCH_LIST) - { -#ifdef RE_ENABLE_I18N - mbcset->non_match = 1; -#endif /* not RE_ENABLE_I18N */ - non_match = 1; - if (syntax & RE_HAT_LISTS_NOT_NEWLINE) - bitset_set (sbcset, '\0'); - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - token_len = peek_token_bracket (token, regexp, syntax); - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_BADPAT; - goto parse_bracket_exp_free_return; - } - } - - /* We treat the first ']' as a normal character. */ - if (token->type == OP_CLOSE_BRACKET) - token->type = CHARACTER; - - while (1) - { - bracket_elem_t start_elem, end_elem; - unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE]; - unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE]; - reg_errcode_t ret; - int token_len2 = 0, is_range_exp = 0; - re_token_t token2; - - start_elem.opr.name = start_name_buf; - ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa, - syntax, first_round); - if (BE (ret != REG_NOERROR, 0)) - { - *err = ret; - goto parse_bracket_exp_free_return; - } - first_round = 0; - - /* Get information about the next token. We need it in any case. */ - token_len = peek_token_bracket (token, regexp, syntax); - - /* Do not check for ranges if we know they are not allowed. */ - if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS) - { - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_EBRACK; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_CHARSET_RANGE) - { - re_string_skip_bytes (regexp, token_len); /* Skip '-'. */ - token_len2 = peek_token_bracket (&token2, regexp, syntax); - if (BE (token2.type == END_OF_RE, 0)) - { - *err = REG_EBRACK; - goto parse_bracket_exp_free_return; - } - if (token2.type == OP_CLOSE_BRACKET) - { - /* We treat the last '-' as a normal character. */ - re_string_skip_bytes (regexp, -token_len); - token->type = CHARACTER; - } - else - is_range_exp = 1; - } - } - - if (is_range_exp == 1) - { - end_elem.opr.name = end_name_buf; - ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2, - dfa, syntax, 1); - if (BE (ret != REG_NOERROR, 0)) - { - *err = ret; - goto parse_bracket_exp_free_return; - } - - token_len = peek_token_bracket (token, regexp, syntax); - -#ifdef _LIBC - *err = build_range_exp (sbcset, mbcset, &range_alloc, - &start_elem, &end_elem); -#else -# ifdef RE_ENABLE_I18N - *err = build_range_exp (sbcset, - dfa->mb_cur_max > 1 ? mbcset : NULL, - &range_alloc, &start_elem, &end_elem); -# else - *err = build_range_exp (sbcset, &start_elem, &end_elem); -# endif -#endif /* RE_ENABLE_I18N */ - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - } - else - { - switch (start_elem.type) - { - case SB_CHAR: - bitset_set (sbcset, start_elem.opr.ch); - break; -#ifdef RE_ENABLE_I18N - case MB_CHAR: - /* Check whether the array has enough space. */ - if (BE (mbchar_alloc == mbcset->nmbchars, 0)) - { - wchar_t *new_mbchars; - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nmbchars is 0. */ - mbchar_alloc = 2 * mbcset->nmbchars + 1; - /* Use realloc since array is NULL if *alloc == 0. */ - new_mbchars = re_realloc (mbcset->mbchars, wchar_t, - mbchar_alloc); - if (BE (new_mbchars == NULL, 0)) - goto parse_bracket_exp_espace; - mbcset->mbchars = new_mbchars; - } - mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch; - break; -#endif /* RE_ENABLE_I18N */ - case EQUIV_CLASS: - *err = build_equiv_class (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &equiv_class_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - case COLL_SYM: - *err = build_collating_symbol (sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &coll_sym_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - case CHAR_CLASS: - *err = build_charclass (regexp->trans, sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &char_class_alloc, -#endif /* RE_ENABLE_I18N */ - start_elem.opr.name, syntax); - if (BE (*err != REG_NOERROR, 0)) - goto parse_bracket_exp_free_return; - break; - default: - assert (0); - break; - } - } - if (BE (token->type == END_OF_RE, 0)) - { - *err = REG_EBRACK; - goto parse_bracket_exp_free_return; - } - if (token->type == OP_CLOSE_BRACKET) - break; - } - - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - - /* If it is non-matching list. */ - if (non_match) - bitset_not (sbcset); - -#ifdef RE_ENABLE_I18N - /* Ensure only single byte characters are set. */ - if (dfa->mb_cur_max > 1) - bitset_mask (sbcset, dfa->sb_char); - - if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes - || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes - || mbcset->non_match))) - { - bin_tree_t *mbc_tree; - int sbc_idx; - /* Build a tree for complex bracket. */ - dfa->has_mb_node = 1; - br_token.type = COMPLEX_BRACKET; - br_token.opr.mbcset = mbcset; - mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (mbc_tree == NULL, 0)) - goto parse_bracket_exp_espace; - for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx) - if (sbcset[sbc_idx]) - break; - /* If there are no bits set in sbcset, there is no point - of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */ - if (sbc_idx < BITSET_UINTS) - { - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - work_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (work_tree == NULL, 0)) - goto parse_bracket_exp_espace; - - /* Then join them by ALT node. */ - work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT); - if (BE (work_tree == NULL, 0)) - goto parse_bracket_exp_espace; - } - else - { - re_free (sbcset); - work_tree = mbc_tree; - } - } - else -#endif /* not RE_ENABLE_I18N */ - { -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - work_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (work_tree == NULL, 0)) - goto parse_bracket_exp_espace; - } - return work_tree; - - parse_bracket_exp_espace: - *err = REG_ESPACE; - parse_bracket_exp_free_return: - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - return NULL; -} - -/* Parse an element in the bracket expression. */ - -static reg_errcode_t -parse_bracket_element (elem, regexp, token, token_len, dfa, syntax, - accept_hyphen) - bracket_elem_t *elem; - re_string_t *regexp; - re_token_t *token; - int token_len; - re_dfa_t *dfa; - reg_syntax_t syntax; - int accept_hyphen; -{ -#ifdef RE_ENABLE_I18N - int cur_char_size; - cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp)); - if (cur_char_size > 1) - { - elem->type = MB_CHAR; - elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp)); - re_string_skip_bytes (regexp, cur_char_size); - return REG_NOERROR; - } -#endif /* RE_ENABLE_I18N */ - re_string_skip_bytes (regexp, token_len); /* Skip a token. */ - if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS - || token->type == OP_OPEN_EQUIV_CLASS) - return parse_bracket_symbol (elem, regexp, token); - if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen) - { - /* A '-' must only appear as anything but a range indicator before - the closing bracket. Everything else is an error. */ - re_token_t token2; - (void) peek_token_bracket (&token2, regexp, syntax); - if (token2.type != OP_CLOSE_BRACKET) - /* The actual error value is not standardized since this whole - case is undefined. But ERANGE makes good sense. */ - return REG_ERANGE; - } - elem->type = SB_CHAR; - elem->opr.ch = token->opr.c; - return REG_NOERROR; -} - -/* Parse a bracket symbol in the bracket expression. Bracket symbols are - such as [:<character_class>:], [.<collating_element>.], and - [=<equivalent_class>=]. */ - -static reg_errcode_t -parse_bracket_symbol (elem, regexp, token) - bracket_elem_t *elem; - re_string_t *regexp; - re_token_t *token; -{ - unsigned char ch, delim = token->opr.c; - int i = 0; - if (re_string_eoi(regexp)) - return REG_EBRACK; - for (;; ++i) - { - if (i >= BRACKET_NAME_BUF_SIZE) - return REG_EBRACK; - if (token->type == OP_OPEN_CHAR_CLASS) - ch = re_string_fetch_byte_case (regexp); - else - ch = re_string_fetch_byte (regexp); - if (re_string_eoi(regexp)) - return REG_EBRACK; - if (ch == delim && re_string_peek_byte (regexp, 0) == ']') - break; - elem->opr.name[i] = ch; - } - re_string_skip_bytes (regexp, 1); - elem->opr.name[i] = '\0'; - switch (token->type) - { - case OP_OPEN_COLL_ELEM: - elem->type = COLL_SYM; - break; - case OP_OPEN_EQUIV_CLASS: - elem->type = EQUIV_CLASS; - break; - case OP_OPEN_CHAR_CLASS: - elem->type = CHAR_CLASS; - break; - default: - break; - } - return REG_NOERROR; -} - - /* Helper function for parse_bracket_exp. - Build the equivalence class which is represented by NAME. - The result are written to MBCSET and SBCSET. - EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes, - is a pointer argument sinse we may update it. */ - -static reg_errcode_t -#ifdef RE_ENABLE_I18N -build_equiv_class (sbcset, mbcset, equiv_class_alloc, name) - re_charset_t *mbcset; - int *equiv_class_alloc; -#else /* not RE_ENABLE_I18N */ -build_equiv_class (sbcset, name) -#endif /* not RE_ENABLE_I18N */ - re_bitset_ptr_t sbcset; - const unsigned char *name; -{ -#if defined _LIBC - uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules != 0) - { - const int32_t *table, *indirect; - const unsigned char *weights, *extra, *cp; - unsigned char char_buf[2]; - int32_t idx1, idx2; - unsigned int ch; - size_t len; - /* This #include defines a local function! */ -# include <locale/weight.h> - /* Calculate the index for equivalence class. */ - cp = name; - table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_INDIRECTMB); - idx1 = findidx (&cp); - if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0)) - /* This isn't a valid character. */ - return REG_ECOLLATE; - - /* Build single byte matcing table for this equivalence class. */ - char_buf[1] = (unsigned char) '\0'; - len = weights[idx1]; - for (ch = 0; ch < SBC_MAX; ++ch) - { - char_buf[0] = ch; - cp = char_buf; - idx2 = findidx (&cp); -/* - idx2 = table[ch]; -*/ - if (idx2 == 0) - /* This isn't a valid character. */ - continue; - if (len == weights[idx2]) - { - int cnt = 0; - while (cnt <= len && - weights[idx1 + 1 + cnt] == weights[idx2 + 1 + cnt]) - ++cnt; - - if (cnt > len) - bitset_set (sbcset, ch); - } - } - /* Check whether the array has enough space. */ - if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0)) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nequiv_classes is 0. */ - int new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1; - /* Use realloc since the array is NULL if *alloc == 0. */ - int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes, - int32_t, - new_equiv_class_alloc); - if (BE (new_equiv_classes == NULL, 0)) - return REG_ESPACE; - mbcset->equiv_classes = new_equiv_classes; - *equiv_class_alloc = new_equiv_class_alloc; - } - mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1; - } - else -#endif /* _LIBC */ - { - if (BE (strlen ((const char *) name) != 1, 0)) - return REG_ECOLLATE; - bitset_set (sbcset, *name); - } - return REG_NOERROR; -} - - /* Helper function for parse_bracket_exp. - Build the character class which is represented by NAME. - The result are written to MBCSET and SBCSET. - CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes, - is a pointer argument sinse we may update it. */ - -static reg_errcode_t -#ifdef RE_ENABLE_I18N -build_charclass (trans, sbcset, mbcset, char_class_alloc, class_name, syntax) - re_charset_t *mbcset; - int *char_class_alloc; -#else /* not RE_ENABLE_I18N */ -build_charclass (trans, sbcset, class_name, syntax) -#endif /* not RE_ENABLE_I18N */ - unsigned RE_TRANSLATE_TYPE trans; - re_bitset_ptr_t sbcset; - const unsigned char *class_name; - reg_syntax_t syntax; -{ - int i; - const char *name = (const char *) class_name; - - /* In case of REG_ICASE "upper" and "lower" match the both of - upper and lower cases. */ - if ((syntax & RE_ICASE) - && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0)) - name = "alpha"; - -#ifdef RE_ENABLE_I18N - /* Check the space of the arrays. */ - if (BE (*char_class_alloc == mbcset->nchar_classes, 0)) - { - /* Not enough, realloc it. */ - /* +1 in case of mbcset->nchar_classes is 0. */ - int new_char_class_alloc = 2 * mbcset->nchar_classes + 1; - /* Use realloc since array is NULL if *alloc == 0. */ - wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t, - new_char_class_alloc); - if (BE (new_char_classes == NULL, 0)) - return REG_ESPACE; - mbcset->char_classes = new_char_classes; - *char_class_alloc = new_char_class_alloc; - } - mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name); -#endif /* RE_ENABLE_I18N */ - -#define BUILD_CHARCLASS_LOOP(ctype_func) \ - for (i = 0; i < SBC_MAX; ++i) \ - { \ - if (ctype_func (i)) \ - { \ - int ch = trans ? trans[i] : i; \ - bitset_set (sbcset, ch); \ - } \ - } - - if (strcmp (name, "alnum") == 0) - BUILD_CHARCLASS_LOOP (isalnum) - else if (strcmp (name, "cntrl") == 0) - BUILD_CHARCLASS_LOOP (iscntrl) - else if (strcmp (name, "lower") == 0) - BUILD_CHARCLASS_LOOP (islower) - else if (strcmp (name, "space") == 0) - BUILD_CHARCLASS_LOOP (isspace) - else if (strcmp (name, "alpha") == 0) - BUILD_CHARCLASS_LOOP (isalpha) - else if (strcmp (name, "digit") == 0) - BUILD_CHARCLASS_LOOP (isdigit) - else if (strcmp (name, "print") == 0) - BUILD_CHARCLASS_LOOP (isprint) - else if (strcmp (name, "upper") == 0) - BUILD_CHARCLASS_LOOP (isupper) - else if (strcmp (name, "blank") == 0) - BUILD_CHARCLASS_LOOP (isblank) - else if (strcmp (name, "graph") == 0) - BUILD_CHARCLASS_LOOP (isgraph) - else if (strcmp (name, "punct") == 0) - BUILD_CHARCLASS_LOOP (ispunct) - else if (strcmp (name, "xdigit") == 0) - BUILD_CHARCLASS_LOOP (isxdigit) - else - return REG_ECTYPE; - - return REG_NOERROR; -} - -static bin_tree_t * -build_charclass_op (dfa, trans, class_name, extra, non_match, err) - re_dfa_t *dfa; - unsigned RE_TRANSLATE_TYPE trans; - const unsigned char *class_name; - const unsigned char *extra; - int non_match; - reg_errcode_t *err; -{ - re_bitset_ptr_t sbcset; -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; - int alloc = 0; -#endif /* not RE_ENABLE_I18N */ - reg_errcode_t ret; - re_token_t br_token; - bin_tree_t *tree; - - sbcset = (re_bitset_ptr_t) calloc (sizeof (unsigned int), BITSET_UINTS); -#ifdef RE_ENABLE_I18N - mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); -#endif /* RE_ENABLE_I18N */ - -#ifdef RE_ENABLE_I18N - if (BE (sbcset == NULL || mbcset == NULL, 0)) -#else /* not RE_ENABLE_I18N */ - if (BE (sbcset == NULL, 0)) -#endif /* not RE_ENABLE_I18N */ - { - *err = REG_ESPACE; - return NULL; - } - - if (non_match) - { -#ifdef RE_ENABLE_I18N - /* - if (syntax & RE_HAT_LISTS_NOT_NEWLINE) - bitset_set(cset->sbcset, '\0'); - */ - mbcset->non_match = 1; -#endif /* not RE_ENABLE_I18N */ - } - - /* We don't care the syntax in this case. */ - ret = build_charclass (trans, sbcset, -#ifdef RE_ENABLE_I18N - mbcset, &alloc, -#endif /* RE_ENABLE_I18N */ - class_name, 0); - - if (BE (ret != REG_NOERROR, 0)) - { - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - *err = ret; - return NULL; - } - /* \w match '_' also. */ - for (; *extra; extra++) - bitset_set (sbcset, *extra); - - /* If it is non-matching list. */ - if (non_match) - bitset_not (sbcset); - -#ifdef RE_ENABLE_I18N - /* Ensure only single byte characters are set. */ - if (dfa->mb_cur_max > 1) - bitset_mask (sbcset, dfa->sb_char); -#endif - - /* Build a tree for simple bracket. */ - br_token.type = SIMPLE_BRACKET; - br_token.opr.sbcset = sbcset; - tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (tree == NULL, 0)) - goto build_word_op_espace; - -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - bin_tree_t *mbc_tree; - /* Build a tree for complex bracket. */ - br_token.type = COMPLEX_BRACKET; - br_token.opr.mbcset = mbcset; - dfa->has_mb_node = 1; - mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token); - if (BE (mbc_tree == NULL, 0)) - goto build_word_op_espace; - /* Then join them by ALT node. */ - tree = create_tree (dfa, tree, mbc_tree, OP_ALT); - if (BE (mbc_tree != NULL, 1)) - return tree; - } - else - { - free_charset (mbcset); - return tree; - } -#else /* not RE_ENABLE_I18N */ - return tree; -#endif /* not RE_ENABLE_I18N */ - - build_word_op_espace: - re_free (sbcset); -#ifdef RE_ENABLE_I18N - free_charset (mbcset); -#endif /* RE_ENABLE_I18N */ - *err = REG_ESPACE; - return NULL; -} - -/* This is intended for the expressions like "a{1,3}". - Fetch a number from `input', and return the number. - Return -1, if the number field is empty like "{,1}". - Return -2, If an error is occured. */ - -static int -fetch_number (input, token, syntax) - re_string_t *input; - re_token_t *token; - reg_syntax_t syntax; -{ - int num = -1; - unsigned char c; - while (1) - { - fetch_token (token, input, syntax); - c = token->opr.c; - if (BE (token->type == END_OF_RE, 0)) - return -2; - if (token->type == OP_CLOSE_DUP_NUM || c == ',') - break; - num = ((token->type != CHARACTER || c < '0' || '9' < c || num == -2) - ? -2 : ((num == -1) ? c - '0' : num * 10 + c - '0')); - num = (num > RE_DUP_MAX) ? -2 : num; - } - return num; -} - -#ifdef RE_ENABLE_I18N -static void -free_charset (re_charset_t *cset) -{ - re_free (cset->mbchars); -# ifdef _LIBC - re_free (cset->coll_syms); - re_free (cset->equiv_classes); - re_free (cset->range_starts); - re_free (cset->range_ends); -# endif - re_free (cset->char_classes); - re_free (cset); -} -#endif /* RE_ENABLE_I18N */ - -/* Functions for binary tree operation. */ - -/* Create a tree node. */ - -static bin_tree_t * -create_tree (dfa, left, right, type) - re_dfa_t *dfa; - bin_tree_t *left; - bin_tree_t *right; - re_token_type_t type; -{ - re_token_t t; - t.type = type; - return create_token_tree (dfa, left, right, &t); -} - -static bin_tree_t * -create_token_tree (dfa, left, right, token) - re_dfa_t *dfa; - bin_tree_t *left; - bin_tree_t *right; - const re_token_t *token; -{ - bin_tree_t *tree; - if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0)) - { - bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1); - - if (storage == NULL) - return NULL; - storage->next = dfa->str_tree_storage; - dfa->str_tree_storage = storage; - dfa->str_tree_storage_idx = 0; - } - tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++]; - - tree->parent = NULL; - tree->left = left; - tree->right = right; - tree->token = *token; - tree->token.duplicated = 0; - tree->token.opt_subexp = 0; - tree->first = NULL; - tree->next = NULL; - tree->node_idx = -1; - - if (left != NULL) - left->parent = tree; - if (right != NULL) - right->parent = tree; - return tree; -} - -/* Mark the tree SRC as an optional subexpression. - To be called from preorder or postorder. */ - -static reg_errcode_t -mark_opt_subexp (extra, node) - void *extra; - bin_tree_t *node; -{ - int idx = (int) (long) extra; - if (node->token.type == SUBEXP && node->token.opr.idx == idx) - node->token.opt_subexp = 1; - - return REG_NOERROR; -} - -/* Free the allocated memory inside NODE. */ - -static void -free_token (re_token_t *node) -{ -#ifdef RE_ENABLE_I18N - if (node->type == COMPLEX_BRACKET && node->duplicated == 0) - free_charset (node->opr.mbcset); - else -#endif /* RE_ENABLE_I18N */ - if (node->type == SIMPLE_BRACKET && node->duplicated == 0) - re_free (node->opr.sbcset); -} - -/* Worker function for tree walking. Free the allocated memory inside NODE - and its children. */ - -static reg_errcode_t -free_tree (void *extra, bin_tree_t *node) -{ - free_token (&node->token); - return REG_NOERROR; -} - - -/* Duplicate the node SRC, and return new node. This is a preorder - visit similar to the one implemented by the generic visitor, but - we need more infrastructure to maintain two parallel trees --- so, - it's easier to duplicate. */ - -static bin_tree_t * -duplicate_tree (root, dfa) - const bin_tree_t *root; - re_dfa_t *dfa; -{ - const bin_tree_t *node; - bin_tree_t *dup_root; - bin_tree_t **p_new = &dup_root, *dup_node = root->parent; - - for (node = root; ; ) - { - /* Create a new tree and link it back to the current parent. */ - *p_new = create_token_tree (dfa, NULL, NULL, &node->token); - if (*p_new == NULL) - return NULL; - (*p_new)->parent = dup_node; - (*p_new)->token.duplicated = 1; - dup_node = *p_new; - - /* Go to the left node, or up and to the right. */ - if (node->left) - { - node = node->left; - p_new = &dup_node->left; - } - else - { - const bin_tree_t *prev = NULL; - while (node->right == prev || node->right == NULL) - { - prev = node; - node = node->parent; - dup_node = dup_node->parent; - if (!node) - return dup_root; - } - node = node->right; - p_new = &dup_node->right; - } - } -} diff --git a/gnu/lib/libregex/regex.c b/gnu/lib/libregex/regex.c deleted file mode 100644 index df7abe2..0000000 --- a/gnu/lib/libregex/regex.c +++ /dev/null @@ -1,98 +0,0 @@ -/* $FreeBSD$ */ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef _AIX -#pragma alloca -#else -# ifndef allocax /* predefined by HP cc +Olibcalls */ -# ifdef __GNUC__ -# define alloca(size) __builtin_alloca (size) -# else -# if HAVE_ALLOCA_H -# include <alloca.h> -# else -# ifdef __hpux - void *alloca (); -# else -# if !defined __OS2__ && !defined WIN32 - char *alloca (); -# else -# include <malloc.h> /* OS/2 defines alloca in here */ -# endif -# endif -# endif -# endif -# endif -#endif - -#ifdef _LIBC -/* We have to keep the namespace clean. */ -# define regfree(preg) __regfree (preg) -# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) -# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) -# define regerror(errcode, preg, errbuf, errbuf_size) \ - __regerror(errcode, preg, errbuf, errbuf_size) -# define re_set_registers(bu, re, nu, st, en) \ - __re_set_registers (bu, re, nu, st, en) -# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ - __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) -# define re_match(bufp, string, size, pos, regs) \ - __re_match (bufp, string, size, pos, regs) -# define re_search(bufp, string, size, startpos, range, regs) \ - __re_search (bufp, string, size, startpos, range, regs) -# define re_compile_pattern(pattern, length, bufp) \ - __re_compile_pattern (pattern, length, bufp) -# define re_set_syntax(syntax) __re_set_syntax (syntax) -# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ - __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) -# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) - -# include "../locale/localeinfo.h" -#endif - -/* POSIX says that <sys/types.h> must be included (by the caller) before - <regex.h>. */ -#include <sys/types.h> - -/* On some systems, limits.h sets RE_DUP_MAX to a lower value than - GNU regex allows. Include it before <regex.h>, which correctly - #undefs RE_DUP_MAX and sets it to the right value. */ -#include <limits.h> - -#include <regex.h> -#include "regex_internal.h" - -#include "regex_internal.c" -#include "regcomp.c" -#include "regexec.c" - -/* Binary backward compatibility. */ -#if _LIBC -# include <shlib-compat.h> -# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3) -link_warning (re_max_failures, "the 're_max_failures' variable is obsolete and will go away.") -int re_max_failures = 2000; -# endif -#endif diff --git a/gnu/lib/libregex/regex.h b/gnu/lib/libregex/regex.h index 364966d..85410a2 100644 --- a/gnu/lib/libregex/regex.h +++ b/gnu/lib/libregex/regex.h @@ -1,47 +1,54 @@ /* $FreeBSD$ */ #ifndef _REGEX_H + +#ifndef __USE_GNU +#define __USE_GNU +#endif + #include <posix/regex.h> /* Document internal interfaces. */ -extern reg_syntax_t __re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); +extern reg_syntax_t __re_set_syntax (reg_syntax_t __syntax); -extern const char *__re_compile_pattern - _RE_ARGS ((const char *pattern, size_t length, - struct re_pattern_buffer *buffer)); +extern const char *__re_compile_pattern (const char *__pattern, size_t __length, + struct re_pattern_buffer *__buffer); -extern int __re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); +extern int __re_compile_fastmap (struct re_pattern_buffer *__buffer); -extern int __re_search - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, int range, struct re_registers *regs)); +extern int __re_search (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, int __range, + struct re_registers *__regs); -extern int __re_search_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, int range, struct re_registers *regs, int stop)); +extern int __re_search_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + int __range, struct re_registers *__regs, int __stop); -extern int __re_match - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, - int length, int start, struct re_registers *regs)); +extern int __re_match (struct re_pattern_buffer *__buffer, const char *__string, + int __length, int __start, struct re_registers *__regs); -extern int __re_match_2 - _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, - int length1, const char *string2, int length2, - int start, struct re_registers *regs, int stop)); +extern int __re_match_2 (struct re_pattern_buffer *__buffer, + const char *__string1, int __length1, + const char *__string2, int __length2, int __start, + struct re_registers *__regs, int __stop); -extern void __re_set_registers - _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, - unsigned num_regs, regoff_t *starts, regoff_t *ends)); +extern void __re_set_registers (struct re_pattern_buffer *__buffer, + struct re_registers *__regs, + unsigned int __num_regs, + regoff_t *__starts, regoff_t *__ends); -extern int __regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern, - int __cflags)); +extern int __regcomp (regex_t *__restrict __preg, + const char *__restrict __pattern, + int __cflags); -extern int __regexec _RE_ARGS ((const regex_t *__preg, - const char *__string, size_t __nmatch, - regmatch_t __pmatch[], int __eflags)); +extern int __regexec (const regex_t *__restrict __preg, + const char *__restrict __string, size_t __nmatch, + regmatch_t __pmatch[__restrict_arr], + int __eflags); -extern size_t __regerror _RE_ARGS ((int __errcode, const regex_t *__preg, - char *__errbuf, size_t __errbuf_size)); +extern size_t __regerror (int __errcode, const regex_t *__restrict __preg, + char *__restrict __errbuf, size_t __errbuf_size); -extern void __regfree _RE_ARGS ((regex_t *__preg)); -#endif +extern void __regfree (regex_t *__preg); + +#endif /* _REGEX_H */ diff --git a/gnu/lib/libregex/regex_internal.c b/gnu/lib/libregex/regex_internal.c deleted file mode 100644 index b3d44c3..0000000 --- a/gnu/lib/libregex/regex_internal.c +++ /dev/null @@ -1,1674 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -static void re_string_construct_common (const char *str, int len, - re_string_t *pstr, - RE_TRANSLATE_TYPE trans, int icase, - const re_dfa_t *dfa) internal_function; -#ifdef RE_ENABLE_I18N -static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, - wint_t *last_wc) internal_function; -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t register_state (re_dfa_t *dfa, re_dfastate_t *newstate, - unsigned int hash) internal_function; -static re_dfastate_t *create_ci_newstate (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int hash) internal_function; -static re_dfastate_t *create_cd_newstate (re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int context, - unsigned int hash) internal_function; -static unsigned int inline calc_state_hash (const re_node_set *nodes, - unsigned int context) internal_function; - -/* Functions for string operation. */ - -/* This function allocate the buffers. It is necessary to call - re_string_reconstruct before using the object. */ - -static reg_errcode_t -re_string_allocate (pstr, str, len, init_len, trans, icase, dfa) - re_string_t *pstr; - const char *str; - int len, init_len, icase; - RE_TRANSLATE_TYPE trans; - const re_dfa_t *dfa; -{ - reg_errcode_t ret; - int init_buf_len; - - /* Ensure at least one character fits into the buffers. */ - if (init_len < dfa->mb_cur_max) - init_len = dfa->mb_cur_max; - init_buf_len = (len + 1 < init_len) ? len + 1: init_len; - re_string_construct_common (str, len, pstr, trans, icase, dfa); - - ret = re_string_realloc_buffers (pstr, init_buf_len); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - pstr->word_char = dfa->word_char; - pstr->word_ops_used = dfa->word_ops_used; - pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; - pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len; - pstr->valid_raw_len = pstr->valid_len; - return REG_NOERROR; -} - -/* This function allocate the buffers, and initialize them. */ - -static reg_errcode_t -re_string_construct (pstr, str, len, trans, icase, dfa) - re_string_t *pstr; - const char *str; - int len, icase; - RE_TRANSLATE_TYPE trans; - const re_dfa_t *dfa; -{ - reg_errcode_t ret; - memset (pstr, '\0', sizeof (re_string_t)); - re_string_construct_common (str, len, pstr, trans, icase, dfa); - - if (len > 0) - { - ret = re_string_realloc_buffers (pstr, len + 1); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str; - - if (icase) - { -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - { - while (1) - { - ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) - return ret; - if (pstr->valid_raw_len >= len) - break; - if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max) - break; - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - } - else -#endif /* RE_ENABLE_I18N */ - build_upper_buffer (pstr); - } - else - { -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - build_wcs_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - { - if (trans != NULL) - re_string_translate_buffer (pstr); - else - { - pstr->valid_len = pstr->bufs_len; - pstr->valid_raw_len = pstr->bufs_len; - } - } - } - - return REG_NOERROR; -} - -/* Helper functions for re_string_allocate, and re_string_construct. */ - -static reg_errcode_t -re_string_realloc_buffers (pstr, new_buf_len) - re_string_t *pstr; - int new_buf_len; -{ -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - wint_t *new_array = re_realloc (pstr->wcs, wint_t, new_buf_len); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - pstr->wcs = new_array; - if (pstr->offsets != NULL) - { - int *new_array = re_realloc (pstr->offsets, int, new_buf_len); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - pstr->offsets = new_array; - } - } -#endif /* RE_ENABLE_I18N */ - if (pstr->mbs_allocated) - { - unsigned char *new_array = re_realloc (pstr->mbs, unsigned char, - new_buf_len); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - pstr->mbs = new_array; - } - pstr->bufs_len = new_buf_len; - return REG_NOERROR; -} - - -static void -re_string_construct_common (str, len, pstr, trans, icase, dfa) - const char *str; - int len; - re_string_t *pstr; - RE_TRANSLATE_TYPE trans; - int icase; - const re_dfa_t *dfa; -{ - pstr->raw_mbs = (const unsigned char *) str; - pstr->len = len; - pstr->raw_len = len; - pstr->trans = (unsigned RE_TRANSLATE_TYPE) trans; - pstr->icase = icase ? 1 : 0; - pstr->mbs_allocated = (trans != NULL || icase); - pstr->mb_cur_max = dfa->mb_cur_max; - pstr->is_utf8 = dfa->is_utf8; - pstr->map_notascii = dfa->map_notascii; - pstr->stop = pstr->len; - pstr->raw_stop = pstr->stop; -} - -#ifdef RE_ENABLE_I18N - -/* Build wide character buffer PSTR->WCS. - If the byte sequence of the string are: - <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3> - Then wide character buffer will be: - <wc1> , WEOF , <wc2> , WEOF , <wc3> - We use WEOF for padding, they indicate that the position isn't - a first byte of a multibyte character. - - Note that this function assumes PSTR->VALID_LEN elements are already - built and starts from PSTR->VALID_LEN. */ - -static void -build_wcs_buffer (pstr) - re_string_t *pstr; -{ -#ifdef _LIBC - unsigned char buf[MB_CUR_MAX]; - assert (MB_CUR_MAX >= pstr->mb_cur_max); -#else - unsigned char buf[64]; -#endif - mbstate_t prev_st; - int byte_idx, end_idx, remain_len; - size_t mbclen; - - /* Build the buffers from pstr->valid_len to either pstr->len or - pstr->bufs_len. */ - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - for (byte_idx = pstr->valid_len; byte_idx < end_idx;) - { - wchar_t wc; - const char *p; - - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - /* Apply the translation if we need. */ - if (BE (pstr->trans != NULL, 0)) - { - int i, ch; - - for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) - { - ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i]; - buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch]; - } - p = (const char *) buf; - } - else - p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx; - mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a singlebyte character. */ - mbclen = 1; - wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - if (BE (pstr->trans != NULL, 0)) - wc = pstr->trans[wc]; - pstr->cur_state = prev_st; - } - - /* Write wide character and padding. */ - pstr->wcs[byte_idx++] = wc; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - pstr->valid_len = byte_idx; - pstr->valid_raw_len = byte_idx; -} - -/* Build wide character buffer PSTR->WCS like build_wcs_buffer, - but for REG_ICASE. */ - -static int -build_wcs_upper_buffer (pstr) - re_string_t *pstr; -{ - mbstate_t prev_st; - int src_idx, byte_idx, end_idx, remain_len; - size_t mbclen; -#ifdef _LIBC - char buf[MB_CUR_MAX]; - assert (MB_CUR_MAX >= pstr->mb_cur_max); -#else - char buf[64]; -#endif - - byte_idx = pstr->valid_len; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - /* The following optimization assumes that ASCII characters can be - mapped to wide characters with a simple cast. */ - if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed) - { - while (byte_idx < end_idx) - { - wchar_t wc; - - if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) - && mbsinit (&pstr->cur_state)) - { - /* In case of a singlebyte character. */ - pstr->mbs[byte_idx] - = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); - /* The next step uses the assumption that wchar_t is encoded - ASCII-safe: all ASCII values can be converted like this. */ - pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; - ++byte_idx; - continue; - } - - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc, - ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx - + byte_idx), remain_len, &pstr->cur_state); - if (BE (mbclen + 2 > 2, 1)) - { - wchar_t wcu = wc; - if (iswlower (wc)) - { - size_t mbcdlen; - - wcu = towupper (wc); - mbcdlen = wcrtomb (buf, wcu, &prev_st); - if (BE (mbclen == mbcdlen, 1)) - memcpy (pstr->mbs + byte_idx, buf, mbclen); - else - { - src_idx = byte_idx; - goto offsets_needed; - } - } - else - memcpy (pstr->mbs + byte_idx, - pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); - pstr->wcs[byte_idx++] = wcu; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - else if (mbclen == (size_t) -1 || mbclen == 0) - { - /* It is an invalid character or '\0'. Just use the byte. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - pstr->mbs[byte_idx] = ch; - /* And also cast it to wide char. */ - pstr->wcs[byte_idx++] = (wchar_t) ch; - if (BE (mbclen == (size_t) -1, 0)) - pstr->cur_state = prev_st; - } - else - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - } - pstr->valid_len = byte_idx; - pstr->valid_raw_len = byte_idx; - return REG_NOERROR; - } - else - for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;) - { - wchar_t wc; - const char *p; - offsets_needed: - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - if (BE (pstr->trans != NULL, 0)) - { - int i, ch; - - for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i) - { - ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i]; - buf[i] = pstr->trans[ch]; - } - p = (const char *) buf; - } - else - p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx; - mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state); - if (BE (mbclen + 2 > 2, 1)) - { - wchar_t wcu = wc; - if (iswlower (wc)) - { - size_t mbcdlen; - - wcu = towupper (wc); - mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st); - if (BE (mbclen == mbcdlen, 1)) - memcpy (pstr->mbs + byte_idx, buf, mbclen); - else if (mbcdlen != (size_t) -1) - { - size_t i; - - if (byte_idx + mbcdlen > pstr->bufs_len) - { - pstr->cur_state = prev_st; - break; - } - - if (pstr->offsets == NULL) - { - pstr->offsets = re_malloc (int, pstr->bufs_len); - - if (pstr->offsets == NULL) - return REG_ESPACE; - } - if (!pstr->offsets_needed) - { - for (i = 0; i < (size_t) byte_idx; ++i) - pstr->offsets[i] = i; - pstr->offsets_needed = 1; - } - - memcpy (pstr->mbs + byte_idx, buf, mbcdlen); - pstr->wcs[byte_idx] = wcu; - pstr->offsets[byte_idx] = src_idx; - for (i = 1; i < mbcdlen; ++i) - { - pstr->offsets[byte_idx + i] - = src_idx + (i < mbclen ? i : mbclen - 1); - pstr->wcs[byte_idx + i] = WEOF; - } - pstr->len += mbcdlen - mbclen; - if (pstr->raw_stop > src_idx) - pstr->stop += mbcdlen - mbclen; - end_idx = (pstr->bufs_len > pstr->len) - ? pstr->len : pstr->bufs_len; - byte_idx += mbcdlen; - src_idx += mbclen; - continue; - } - else - memcpy (pstr->mbs + byte_idx, p, mbclen); - } - else - memcpy (pstr->mbs + byte_idx, p, mbclen); - - if (BE (pstr->offsets_needed != 0, 0)) - { - size_t i; - for (i = 0; i < mbclen; ++i) - pstr->offsets[byte_idx + i] = src_idx + i; - } - src_idx += mbclen; - - pstr->wcs[byte_idx++] = wcu; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; - } - else if (mbclen == (size_t) -1 || mbclen == 0) - { - /* It is an invalid character or '\0'. Just use the byte. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx]; - - if (BE (pstr->trans != NULL, 0)) - ch = pstr->trans [ch]; - pstr->mbs[byte_idx] = ch; - - if (BE (pstr->offsets_needed != 0, 0)) - pstr->offsets[byte_idx] = src_idx; - ++src_idx; - - /* And also cast it to wide char. */ - pstr->wcs[byte_idx++] = (wchar_t) ch; - if (BE (mbclen == (size_t) -1, 0)) - pstr->cur_state = prev_st; - } - else - { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; - } - } - pstr->valid_len = byte_idx; - pstr->valid_raw_len = src_idx; - return REG_NOERROR; -} - -/* Skip characters until the index becomes greater than NEW_RAW_IDX. - Return the index. */ - -static int -re_string_skip_chars (pstr, new_raw_idx, last_wc) - re_string_t *pstr; - int new_raw_idx; - wint_t *last_wc; -{ - mbstate_t prev_st; - int rawbuf_idx; - size_t mbclen; - wchar_t wc = 0; - - /* Skip the characters which are not necessary to check. */ - for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len; - rawbuf_idx < new_raw_idx;) - { - int remain_len; - remain_len = pstr->len - rawbuf_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc, (const char *) pstr->raw_mbs + rawbuf_idx, - remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0)) - { - /* We treat these cases as a singlebyte character. */ - mbclen = 1; - pstr->cur_state = prev_st; - } - /* Then proceed the next character. */ - rawbuf_idx += mbclen; - } - *last_wc = (wint_t) wc; - return rawbuf_idx; -} -#endif /* RE_ENABLE_I18N */ - -/* Build the buffer PSTR->MBS, and apply the translation if we need. - This function is used in case of REG_ICASE. */ - -static void -build_upper_buffer (pstr) - re_string_t *pstr; -{ - int char_idx, end_idx; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx) - { - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx]; - if (BE (pstr->trans != NULL, 0)) - ch = pstr->trans[ch]; - if (islower (ch)) - pstr->mbs[char_idx] = toupper (ch); - else - pstr->mbs[char_idx] = ch; - } - pstr->valid_len = char_idx; - pstr->valid_raw_len = char_idx; -} - -/* Apply TRANS to the buffer in PSTR. */ - -static void -re_string_translate_buffer (pstr) - re_string_t *pstr; -{ - int buf_idx, end_idx; - end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len; - - for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx) - { - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx]; - pstr->mbs[buf_idx] = pstr->trans[ch]; - } - - pstr->valid_len = buf_idx; - pstr->valid_raw_len = buf_idx; -} - -/* This function re-construct the buffers. - Concretely, convert to wide character in case of pstr->mb_cur_max > 1, - convert to upper case in case of REG_ICASE, apply translation. */ - -static reg_errcode_t -re_string_reconstruct (pstr, idx, eflags) - re_string_t *pstr; - int idx, eflags; -{ - int offset = idx - pstr->raw_mbs_idx; - if (BE (offset < 0, 0)) - { - /* Reset buffer. */ -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - memset (&pstr->cur_state, '\0', sizeof (mbstate_t)); -#endif /* RE_ENABLE_I18N */ - pstr->len = pstr->raw_len; - pstr->stop = pstr->raw_stop; - pstr->valid_len = 0; - pstr->raw_mbs_idx = 0; - pstr->valid_raw_len = 0; - pstr->offsets_needed = 0; - pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF - : CONTEXT_NEWLINE | CONTEXT_BEGBUF); - if (!pstr->mbs_allocated) - pstr->mbs = (unsigned char *) pstr->raw_mbs; - offset = idx; - } - - if (BE (offset != 0, 1)) - { - /* Are the characters which are already checked remain? */ - if (BE (offset < pstr->valid_raw_len, 1) -#ifdef RE_ENABLE_I18N - /* Handling this would enlarge the code too much. - Accept a slowdown in that case. */ - && pstr->offsets_needed == 0 -#endif - ) - { - /* Yes, move them to the front of the buffer. */ - pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags); -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - memmove (pstr->wcs, pstr->wcs + offset, - (pstr->valid_len - offset) * sizeof (wint_t)); -#endif /* RE_ENABLE_I18N */ - if (BE (pstr->mbs_allocated, 0)) - memmove (pstr->mbs, pstr->mbs + offset, - pstr->valid_len - offset); - pstr->valid_len -= offset; - pstr->valid_raw_len -= offset; -#if DEBUG - assert (pstr->valid_len > 0); -#endif - } - else - { - /* No, skip all characters until IDX. */ -#ifdef RE_ENABLE_I18N - if (BE (pstr->offsets_needed, 0)) - { - pstr->len = pstr->raw_len - idx + offset; - pstr->stop = pstr->raw_stop - idx + offset; - pstr->offsets_needed = 0; - } -#endif - pstr->valid_len = 0; - pstr->valid_raw_len = 0; -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - int wcs_idx; - wint_t wc = WEOF; - - if (pstr->is_utf8) - { - const unsigned char *raw, *p, *q, *end; - - /* Special case UTF-8. Multi-byte chars start with any - byte other than 0x80 - 0xbf. */ - raw = pstr->raw_mbs + pstr->raw_mbs_idx; - end = raw + (offset - pstr->mb_cur_max); - for (p = raw + offset - 1; p >= end; --p) - if ((*p & 0xc0) != 0x80) - { - mbstate_t cur_state; - wchar_t wc2; - int mlen = raw + pstr->len - p; - unsigned char buf[6]; - - q = p; - if (BE (pstr->trans != NULL, 0)) - { - int i = mlen < 6 ? mlen : 6; - while (--i >= 0) - buf[i] = pstr->trans[p[i]]; - q = buf; - } - /* XXX Don't use mbrtowc, we know which conversion - to use (UTF-8 -> UCS4). */ - memset (&cur_state, 0, sizeof (cur_state)); - mlen = (mbrtowc (&wc2, (const char *) p, mlen, - &cur_state) - - (raw + offset - p)); - if (mlen >= 0) - { - memset (&pstr->cur_state, '\0', - sizeof (mbstate_t)); - pstr->valid_len = mlen; - wc = wc2; - } - break; - } - } - - if (wc == WEOF) - pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx; - if (BE (pstr->valid_len, 0)) - { - for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx) - pstr->wcs[wcs_idx] = WEOF; - if (pstr->mbs_allocated) - memset (pstr->mbs, 255, pstr->valid_len); - } - pstr->valid_raw_len = pstr->valid_len; - pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0) - && IS_WIDE_WORD_CHAR (wc)) - ? CONTEXT_WORD - : ((IS_WIDE_NEWLINE (wc) - && pstr->newline_anchor) - ? CONTEXT_NEWLINE : 0)); - } - else -#endif /* RE_ENABLE_I18N */ - { - int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1]; - if (pstr->trans) - c = pstr->trans[c]; - pstr->tip_context = (bitset_contain (pstr->word_char, c) - ? CONTEXT_WORD - : ((IS_NEWLINE (c) && pstr->newline_anchor) - ? CONTEXT_NEWLINE : 0)); - } - } - if (!BE (pstr->mbs_allocated, 0)) - pstr->mbs += offset; - } - pstr->raw_mbs_idx = idx; - pstr->len -= offset; - pstr->stop -= offset; - - /* Then build the buffers. */ -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - if (pstr->icase) - { - int ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - else - build_wcs_buffer (pstr); - } - else -#endif /* RE_ENABLE_I18N */ - if (BE (pstr->mbs_allocated, 0)) - { - if (pstr->icase) - build_upper_buffer (pstr); - else if (pstr->trans != NULL) - re_string_translate_buffer (pstr); - } - else - pstr->valid_len = pstr->len; - - pstr->cur_idx = 0; - return REG_NOERROR; -} - -static unsigned char -re_string_peek_byte_case (pstr, idx) - const re_string_t *pstr; - int idx; -{ - int ch, off; - - /* Handle the common (easiest) cases first. */ - if (BE (!pstr->mbs_allocated, 1)) - return re_string_peek_byte (pstr, idx); - -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1 - && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx)) - return re_string_peek_byte (pstr, idx); -#endif - - off = pstr->cur_idx + idx; -#ifdef RE_ENABLE_I18N - if (pstr->offsets_needed) - off = pstr->offsets[off]; -#endif - - ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; - -#ifdef RE_ENABLE_I18N - /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I - this function returns CAPITAL LETTER I instead of first byte of - DOTLESS SMALL LETTER I. The latter would confuse the parser, - since peek_byte_case doesn't advance cur_idx in any way. */ - if (pstr->offsets_needed && !isascii (ch)) - return re_string_peek_byte (pstr, idx); -#endif - - return ch; -} - -static unsigned char -re_string_fetch_byte_case (pstr) - re_string_t *pstr; -{ - if (BE (!pstr->mbs_allocated, 1)) - return re_string_fetch_byte (pstr); - -#ifdef RE_ENABLE_I18N - if (pstr->offsets_needed) - { - int off, ch; - - /* For tr_TR.UTF-8 [[:islower:]] there is - [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip - in that case the whole multi-byte character and return - the original letter. On the other side, with - [[: DOTLESS SMALL LETTER I return [[:I, as doing - anything else would complicate things too much. */ - - if (!re_string_first_byte (pstr, pstr->cur_idx)) - return re_string_fetch_byte (pstr); - - off = pstr->offsets[pstr->cur_idx]; - ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; - - if (! isascii (ch)) - return re_string_fetch_byte (pstr); - - re_string_skip_bytes (pstr, - re_string_char_size_at (pstr, pstr->cur_idx)); - return ch; - } -#endif - - return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++]; -} - -static void -re_string_destruct (pstr) - re_string_t *pstr; -{ -#ifdef RE_ENABLE_I18N - re_free (pstr->wcs); - re_free (pstr->offsets); -#endif /* RE_ENABLE_I18N */ - if (pstr->mbs_allocated) - re_free (pstr->mbs); -} - -/* Return the context at IDX in INPUT. */ - -static unsigned int -re_string_context_at (input, idx, eflags) - const re_string_t *input; - int idx, eflags; -{ - int c; - if (BE (idx < 0, 0)) - /* In this case, we use the value stored in input->tip_context, - since we can't know the character in input->mbs[-1] here. */ - return input->tip_context; - if (BE (idx == input->len, 0)) - return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF - : CONTEXT_NEWLINE | CONTEXT_ENDBUF); -#ifdef RE_ENABLE_I18N - if (input->mb_cur_max > 1) - { - wint_t wc; - int wc_idx = idx; - while(input->wcs[wc_idx] == WEOF) - { -#ifdef DEBUG - /* It must not happen. */ - assert (wc_idx >= 0); -#endif - --wc_idx; - if (wc_idx < 0) - return input->tip_context; - } - wc = input->wcs[wc_idx]; - if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) - return CONTEXT_WORD; - return (IS_WIDE_NEWLINE (wc) && input->newline_anchor - ? CONTEXT_NEWLINE : 0); - } - else -#endif - { - c = re_string_byte_at (input, idx); - if (bitset_contain (input->word_char, c)) - return CONTEXT_WORD; - return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; - } -} - -/* Functions for set operation. */ - -static reg_errcode_t -re_node_set_alloc (set, size) - re_node_set *set; - int size; -{ - set->alloc = size; - set->nelem = 0; - set->elems = re_malloc (int, size); - if (BE (set->elems == NULL, 0)) - return REG_ESPACE; - return REG_NOERROR; -} - -static reg_errcode_t -re_node_set_init_1 (set, elem) - re_node_set *set; - int elem; -{ - set->alloc = 1; - set->nelem = 1; - set->elems = re_malloc (int, 1); - if (BE (set->elems == NULL, 0)) - { - set->alloc = set->nelem = 0; - return REG_ESPACE; - } - set->elems[0] = elem; - return REG_NOERROR; -} - -static reg_errcode_t -re_node_set_init_2 (set, elem1, elem2) - re_node_set *set; - int elem1, elem2; -{ - set->alloc = 2; - set->elems = re_malloc (int, 2); - if (BE (set->elems == NULL, 0)) - return REG_ESPACE; - if (elem1 == elem2) - { - set->nelem = 1; - set->elems[0] = elem1; - } - else - { - set->nelem = 2; - if (elem1 < elem2) - { - set->elems[0] = elem1; - set->elems[1] = elem2; - } - else - { - set->elems[0] = elem2; - set->elems[1] = elem1; - } - } - return REG_NOERROR; -} - -static reg_errcode_t -re_node_set_init_copy (dest, src) - re_node_set *dest; - const re_node_set *src; -{ - dest->nelem = src->nelem; - if (src->nelem > 0) - { - dest->alloc = dest->nelem; - dest->elems = re_malloc (int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - { - dest->alloc = dest->nelem = 0; - return REG_ESPACE; - } - memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); - } - else - re_node_set_init_empty (dest); - return REG_NOERROR; -} - -/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. - Note: We assume dest->elems is NULL, when dest->alloc is 0. */ - -static reg_errcode_t -re_node_set_add_intersect (dest, src1, src2) - re_node_set *dest; - const re_node_set *src1, *src2; -{ - int i1, i2, is, id, delta, sbase; - if (src1->nelem == 0 || src2->nelem == 0) - return REG_NOERROR; - - /* We need dest->nelem + 2 * elems_in_intersection; this is a - conservative estimate. */ - if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) - { - int new_alloc = src1->nelem + src2->nelem + dest->alloc; - int *new_elems = re_realloc (dest->elems, int, new_alloc); - if (BE (new_elems == NULL, 0)) - return REG_ESPACE; - dest->elems = new_elems; - dest->alloc = new_alloc; - } - - /* Find the items in the intersection of SRC1 and SRC2, and copy - into the top of DEST those that are not already in DEST itself. */ - sbase = dest->nelem + src1->nelem + src2->nelem; - i1 = src1->nelem - 1; - i2 = src2->nelem - 1; - id = dest->nelem - 1; - for (;;) - { - if (src1->elems[i1] == src2->elems[i2]) - { - /* Try to find the item in DEST. Maybe we could binary search? */ - while (id >= 0 && dest->elems[id] > src1->elems[i1]) - --id; - - if (id < 0 || dest->elems[id] != src1->elems[i1]) - dest->elems[--sbase] = src1->elems[i1]; - - if (--i1 < 0 || --i2 < 0) - break; - } - - /* Lower the highest of the two items. */ - else if (src1->elems[i1] < src2->elems[i2]) - { - if (--i2 < 0) - break; - } - else - { - if (--i1 < 0) - break; - } - } - - id = dest->nelem - 1; - is = dest->nelem + src1->nelem + src2->nelem - 1; - delta = is - sbase + 1; - - /* Now copy. When DELTA becomes zero, the remaining - DEST elements are already in place; this is more or - less the same loop that is in re_node_set_merge. */ - dest->nelem += delta; - if (delta > 0 && id >= 0) - for (;;) - { - if (dest->elems[is] > dest->elems[id]) - { - /* Copy from the top. */ - dest->elems[id + delta--] = dest->elems[is--]; - if (delta == 0) - break; - } - else - { - /* Slide from the bottom. */ - dest->elems[id + delta] = dest->elems[id]; - if (--id < 0) - break; - } - } - - /* Copy remaining SRC elements. */ - memcpy (dest->elems, dest->elems + sbase, delta * sizeof (int)); - - return REG_NOERROR; -} - -/* Calculate the union set of the sets SRC1 and SRC2. And store it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ - -static reg_errcode_t -re_node_set_init_union (dest, src1, src2) - re_node_set *dest; - const re_node_set *src1, *src2; -{ - int i1, i2, id; - if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) - { - dest->alloc = src1->nelem + src2->nelem; - dest->elems = re_malloc (int, dest->alloc); - if (BE (dest->elems == NULL, 0)) - return REG_ESPACE; - } - else - { - if (src1 != NULL && src1->nelem > 0) - return re_node_set_init_copy (dest, src1); - else if (src2 != NULL && src2->nelem > 0) - return re_node_set_init_copy (dest, src2); - else - re_node_set_init_empty (dest); - return REG_NOERROR; - } - for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) - { - if (src1->elems[i1] > src2->elems[i2]) - { - dest->elems[id++] = src2->elems[i2++]; - continue; - } - if (src1->elems[i1] == src2->elems[i2]) - ++i2; - dest->elems[id++] = src1->elems[i1++]; - } - if (i1 < src1->nelem) - { - memcpy (dest->elems + id, src1->elems + i1, - (src1->nelem - i1) * sizeof (int)); - id += src1->nelem - i1; - } - else if (i2 < src2->nelem) - { - memcpy (dest->elems + id, src2->elems + i2, - (src2->nelem - i2) * sizeof (int)); - id += src2->nelem - i2; - } - dest->nelem = id; - return REG_NOERROR; -} - -/* Calculate the union set of the sets DEST and SRC. And store it to - DEST. Return value indicate the error code or REG_NOERROR if succeeded. */ - -static reg_errcode_t -re_node_set_merge (dest, src) - re_node_set *dest; - const re_node_set *src; -{ - int is, id, sbase, delta; - if (src == NULL || src->nelem == 0) - return REG_NOERROR; - if (dest->alloc < 2 * src->nelem + dest->nelem) - { - int new_alloc = 2 * (src->nelem + dest->alloc); - int *new_buffer = re_realloc (dest->elems, int, new_alloc); - if (BE (new_buffer == NULL, 0)) - return REG_ESPACE; - dest->elems = new_buffer; - dest->alloc = new_alloc; - } - - if (BE (dest->nelem == 0, 0)) - { - dest->nelem = src->nelem; - memcpy (dest->elems, src->elems, src->nelem * sizeof (int)); - return REG_NOERROR; - } - - /* Copy into the top of DEST the items of SRC that are not - found in DEST. Maybe we could binary search in DEST? */ - for (sbase = dest->nelem + 2 * src->nelem, - is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; ) - { - if (dest->elems[id] == src->elems[is]) - is--, id--; - else if (dest->elems[id] < src->elems[is]) - dest->elems[--sbase] = src->elems[is--]; - else /* if (dest->elems[id] > src->elems[is]) */ - --id; - } - - if (is >= 0) - { - /* If DEST is exhausted, the remaining items of SRC must be unique. */ - sbase -= is + 1; - memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (int)); - } - - id = dest->nelem - 1; - is = dest->nelem + 2 * src->nelem - 1; - delta = is - sbase + 1; - if (delta == 0) - return REG_NOERROR; - - /* Now copy. When DELTA becomes zero, the remaining - DEST elements are already in place. */ - dest->nelem += delta; - for (;;) - { - if (dest->elems[is] > dest->elems[id]) - { - /* Copy from the top. */ - dest->elems[id + delta--] = dest->elems[is--]; - if (delta == 0) - break; - } - else - { - /* Slide from the bottom. */ - dest->elems[id + delta] = dest->elems[id]; - if (--id < 0) - { - /* Copy remaining SRC elements. */ - memcpy (dest->elems, dest->elems + sbase, - delta * sizeof (int)); - break; - } - } - } - - return REG_NOERROR; -} - -/* Insert the new element ELEM to the re_node_set* SET. - SET should not already have ELEM. - return -1 if an error is occured, return 1 otherwise. */ - -static int -re_node_set_insert (set, elem) - re_node_set *set; - int elem; -{ - int idx; - /* In case the set is empty. */ - if (set->alloc == 0) - { - if (BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1)) - return 1; - else - return -1; - } - - if (BE (set->nelem, 0) == 0) - { - /* We already guaranteed above that set->alloc != 0. */ - set->elems[0] = elem; - ++set->nelem; - return 1; - } - - /* Realloc if we need. */ - if (set->alloc == set->nelem) - { - int *new_array; - set->alloc = set->alloc * 2; - new_array = re_realloc (set->elems, int, set->alloc); - if (BE (new_array == NULL, 0)) - return -1; - set->elems = new_array; - } - - /* Move the elements which follows the new element. Test the - first element separately to skip a check in the inner loop. */ - if (elem < set->elems[0]) - { - idx = 0; - for (idx = set->nelem; idx > 0; idx--) - set->elems[idx] = set->elems[idx - 1]; - } - else - { - for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) - set->elems[idx] = set->elems[idx - 1]; - } - - /* Insert the new element. */ - set->elems[idx] = elem; - ++set->nelem; - return 1; -} - -/* Insert the new element ELEM to the re_node_set* SET. - SET should not already have any element greater than or equal to ELEM. - Return -1 if an error is occured, return 1 otherwise. */ - -static int -re_node_set_insert_last (set, elem) - re_node_set *set; - int elem; -{ - /* Realloc if we need. */ - if (set->alloc == set->nelem) - { - int *new_array; - set->alloc = (set->alloc + 1) * 2; - new_array = re_realloc (set->elems, int, set->alloc); - if (BE (new_array == NULL, 0)) - return -1; - set->elems = new_array; - } - - /* Insert the new element. */ - set->elems[set->nelem++] = elem; - return 1; -} - -/* Compare two node sets SET1 and SET2. - return 1 if SET1 and SET2 are equivalent, return 0 otherwise. */ - -static int -re_node_set_compare (set1, set2) - const re_node_set *set1, *set2; -{ - int i; - if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem) - return 0; - for (i = set1->nelem ; --i >= 0 ; ) - if (set1->elems[i] != set2->elems[i]) - return 0; - return 1; -} - -/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */ - -static int -re_node_set_contains (set, elem) - const re_node_set *set; - int elem; -{ - unsigned int idx, right, mid; - if (set->nelem <= 0) - return 0; - - /* Binary search the element. */ - idx = 0; - right = set->nelem - 1; - while (idx < right) - { - mid = (idx + right) / 2; - if (set->elems[mid] < elem) - idx = mid + 1; - else - right = mid; - } - return set->elems[idx] == elem ? idx + 1 : 0; -} - -static void -re_node_set_remove_at (set, idx) - re_node_set *set; - int idx; -{ - if (idx < 0 || idx >= set->nelem) - return; - --set->nelem; - for (; idx < set->nelem; idx++) - set->elems[idx] = set->elems[idx + 1]; -} - - -/* Add the token TOKEN to dfa->nodes, and return the index of the token. - Or return -1, if an error will be occured. */ - -static int -re_dfa_add_node (dfa, token) - re_dfa_t *dfa; - re_token_t token; -{ - int type = token.type; - if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0)) - { - int new_nodes_alloc = dfa->nodes_alloc * 2; - int *new_nexts, *new_indices; - re_node_set *new_edests, *new_eclosures; - - re_token_t *new_array = re_realloc (dfa->nodes, re_token_t, - new_nodes_alloc); - if (BE (new_array == NULL, 0)) - return -1; - dfa->nodes = new_array; - new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc); - new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc); - new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc); - new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc); - if (BE (new_nexts == NULL || new_indices == NULL - || new_edests == NULL || new_eclosures == NULL, 0)) - return -1; - dfa->nexts = new_nexts; - dfa->org_indices = new_indices; - dfa->edests = new_edests; - dfa->eclosures = new_eclosures; - dfa->nodes_alloc = new_nodes_alloc; - } - dfa->nodes[dfa->nodes_len] = token; - dfa->nodes[dfa->nodes_len].constraint = 0; -#ifdef RE_ENABLE_I18N - dfa->nodes[dfa->nodes_len].accept_mb = - (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET; -#endif - dfa->nexts[dfa->nodes_len] = -1; - re_node_set_init_empty (dfa->edests + dfa->nodes_len); - re_node_set_init_empty (dfa->eclosures + dfa->nodes_len); - return dfa->nodes_len++; -} - -static unsigned int inline -calc_state_hash (nodes, context) - const re_node_set *nodes; - unsigned int context; -{ - unsigned int hash = nodes->nelem + context; - int i; - for (i = 0 ; i < nodes->nelem ; i++) - hash += nodes->elems[i]; - return hash; -} - -/* Search for the state whose node_set is equivalent to NODES. - Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. In case of an error - return NULL and set the error code in ERR. - Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ - -static re_dfastate_t* -re_acquire_state (err, dfa, nodes) - reg_errcode_t *err; - re_dfa_t *dfa; - const re_node_set *nodes; -{ - unsigned int hash; - re_dfastate_t *new_state; - struct re_state_table_entry *spot; - int i; - if (BE (nodes->nelem == 0, 0)) - { - *err = REG_NOERROR; - return NULL; - } - hash = calc_state_hash (nodes, 0); - spot = dfa->state_table + (hash & dfa->state_hash_mask); - - for (i = 0 ; i < spot->num ; i++) - { - re_dfastate_t *state = spot->array[i]; - if (hash != state->hash) - continue; - if (re_node_set_compare (&state->nodes, nodes)) - return state; - } - - /* There are no appropriate state in the dfa, create the new one. */ - new_state = create_ci_newstate (dfa, nodes, hash); - if (BE (new_state != NULL, 1)) - return new_state; - else - { - *err = REG_ESPACE; - return NULL; - } -} - -/* Search for the state whose node_set is equivalent to NODES and - whose context is equivalent to CONTEXT. - Return the pointer to the state, if we found it in the DFA. - Otherwise create the new one and return it. In case of an error - return NULL and set the error code in ERR. - Note: - We assume NULL as the invalid state, then it is possible that - return value is NULL and ERR is REG_NOERROR. - - We never return non-NULL value in case of any errors, it is for - optimization. */ - -static re_dfastate_t* -re_acquire_state_context (err, dfa, nodes, context) - reg_errcode_t *err; - re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int context; -{ - unsigned int hash; - re_dfastate_t *new_state; - struct re_state_table_entry *spot; - int i; - if (nodes->nelem == 0) - { - *err = REG_NOERROR; - return NULL; - } - hash = calc_state_hash (nodes, context); - spot = dfa->state_table + (hash & dfa->state_hash_mask); - - for (i = 0 ; i < spot->num ; i++) - { - re_dfastate_t *state = spot->array[i]; - if (state->hash == hash - && state->context == context - && re_node_set_compare (state->entrance_nodes, nodes)) - return state; - } - /* There are no appropriate state in `dfa', create the new one. */ - new_state = create_cd_newstate (dfa, nodes, context, hash); - if (BE (new_state != NULL, 1)) - return new_state; - else - { - *err = REG_ESPACE; - return NULL; - } -} - -/* Finish initialization of the new state NEWSTATE, and using its hash value - HASH put in the appropriate bucket of DFA's state table. Return value - indicates the error code if failed. */ - -static reg_errcode_t -register_state (dfa, newstate, hash) - re_dfa_t *dfa; - re_dfastate_t *newstate; - unsigned int hash; -{ - struct re_state_table_entry *spot; - reg_errcode_t err; - int i; - - newstate->hash = hash; - err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem); - if (BE (err != REG_NOERROR, 0)) - return REG_ESPACE; - for (i = 0; i < newstate->nodes.nelem; i++) - { - int elem = newstate->nodes.elems[i]; - if (!IS_EPSILON_NODE (dfa->nodes[elem].type)) - re_node_set_insert_last (&newstate->non_eps_nodes, elem); - } - - spot = dfa->state_table + (hash & dfa->state_hash_mask); - if (BE (spot->alloc <= spot->num, 0)) - { - int new_alloc = 2 * spot->num + 2; - re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *, - new_alloc); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - spot->array = new_array; - spot->alloc = new_alloc; - } - spot->array[spot->num++] = newstate; - return REG_NOERROR; -} - -/* Create the new state which is independ of contexts. - Return the new state if succeeded, otherwise return NULL. */ - -static re_dfastate_t * -create_ci_newstate (dfa, nodes, hash) - re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int hash; -{ - int i; - reg_errcode_t err; - re_dfastate_t *newstate; - - newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); - if (BE (newstate == NULL, 0)) - return NULL; - err = re_node_set_init_copy (&newstate->nodes, nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_free (newstate); - return NULL; - } - - newstate->entrance_nodes = &newstate->nodes; - for (i = 0 ; i < nodes->nelem ; i++) - { - re_token_t *node = dfa->nodes + nodes->elems[i]; - re_token_type_t type = node->type; - if (type == CHARACTER && !node->constraint) - continue; -#ifdef RE_ENABLE_I18N - newstate->accept_mb |= node->accept_mb; -#endif /* RE_ENABLE_I18N */ - - /* If the state has the halt node, the state is a halt state. */ - if (type == END_OF_RE) - newstate->halt = 1; - else if (type == OP_BACK_REF) - newstate->has_backref = 1; - else if (type == ANCHOR || node->constraint) - newstate->has_constraint = 1; - } - err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) - { - free_state (newstate); - newstate = NULL; - } - return newstate; -} - -/* Create the new state which is depend on the context CONTEXT. - Return the new state if succeeded, otherwise return NULL. */ - -static re_dfastate_t * -create_cd_newstate (dfa, nodes, context, hash) - re_dfa_t *dfa; - const re_node_set *nodes; - unsigned int context, hash; -{ - int i, nctx_nodes = 0; - reg_errcode_t err; - re_dfastate_t *newstate; - - newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1); - if (BE (newstate == NULL, 0)) - return NULL; - err = re_node_set_init_copy (&newstate->nodes, nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_free (newstate); - return NULL; - } - - newstate->context = context; - newstate->entrance_nodes = &newstate->nodes; - - for (i = 0 ; i < nodes->nelem ; i++) - { - unsigned int constraint = 0; - re_token_t *node = dfa->nodes + nodes->elems[i]; - re_token_type_t type = node->type; - if (node->constraint) - constraint = node->constraint; - - if (type == CHARACTER && !constraint) - continue; -#ifdef RE_ENABLE_I18N - newstate->accept_mb |= node->accept_mb; -#endif /* RE_ENABLE_I18N */ - - /* If the state has the halt node, the state is a halt state. */ - if (type == END_OF_RE) - newstate->halt = 1; - else if (type == OP_BACK_REF) - newstate->has_backref = 1; - else if (type == ANCHOR) - constraint = node->opr.ctx_type; - - if (constraint) - { - if (newstate->entrance_nodes == &newstate->nodes) - { - newstate->entrance_nodes = re_malloc (re_node_set, 1); - if (BE (newstate->entrance_nodes == NULL, 0)) - { - free_state (newstate); - return NULL; - } - re_node_set_init_copy (newstate->entrance_nodes, nodes); - nctx_nodes = 0; - newstate->has_constraint = 1; - } - - if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context)) - { - re_node_set_remove_at (&newstate->nodes, i - nctx_nodes); - ++nctx_nodes; - } - } - } - err = register_state (dfa, newstate, hash); - if (BE (err != REG_NOERROR, 0)) - { - free_state (newstate); - newstate = NULL; - } - return newstate; -} - -static void -free_state (state) - re_dfastate_t *state; -{ - re_node_set_free (&state->non_eps_nodes); - re_node_set_free (&state->inveclosure); - if (state->entrance_nodes != &state->nodes) - { - re_node_set_free (state->entrance_nodes); - re_free (state->entrance_nodes); - } - re_node_set_free (&state->nodes); - re_free (state->word_trtable); - re_free (state->trtable); - re_free (state); -} diff --git a/gnu/lib/libregex/regex_internal.h b/gnu/lib/libregex/regex_internal.h deleted file mode 100644 index 58fa749..0000000 --- a/gnu/lib/libregex/regex_internal.h +++ /dev/null @@ -1,798 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -#ifndef _REGEX_INTERNAL_H -#define _REGEX_INTERNAL_H 1 - -#include <assert.h> -#include <ctype.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC -# include <langinfo.h> -#endif -#if defined HAVE_LOCALE_H || defined _LIBC -# include <locale.h> -#endif -#if defined HAVE_WCHAR_H || defined _LIBC -# include <wchar.h> -#endif /* HAVE_WCHAR_H || _LIBC */ -#if defined HAVE_WCTYPE_H || defined _LIBC -# include <wctype.h> -#endif /* HAVE_WCTYPE_H || _LIBC */ - -/* In case that the system doesn't have isblank(). */ -#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank -# define isblank(ch) ((ch) == ' ' || (ch) == '\t') -#endif - -#ifdef _LIBC -# ifndef _RE_DEFINE_LOCALE_FUNCTIONS -# define _RE_DEFINE_LOCALE_FUNCTIONS 1 -# include <locale/localeinfo.h> -# include <locale/elem-hash.h> -# include <locale/coll-lookup.h> -# endif -#endif - -/* This is for other GNU distributions with internationalized messages. */ -#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC -# include <libintl.h> -# ifdef _LIBC -# undef gettext -# define gettext(msgid) \ - INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES) -# endif -#else -# define gettext(msgid) (msgid) -#endif - -#ifndef gettext_noop -/* This define is so xgettext can find the internationalizable - strings. */ -# define gettext_noop(String) String -#endif - -#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC -# define RE_ENABLE_I18N -#endif - -#if __GNUC__ >= 3 -# define BE(expr, val) __builtin_expect (expr, val) -#else -# define BE(expr, val) (expr) -# define inline -#endif - -/* Number of bits in a byte. */ -#define BYTE_BITS 8 -/* Number of single byte character. */ -#define SBC_MAX 256 - -#define COLL_ELEM_LEN_MAX 8 - -/* The character which represents newline. */ -#define NEWLINE_CHAR '\n' -#define WIDE_NEWLINE_CHAR L'\n' - -/* Rename to standard API for using out of glibc. */ -#ifndef _LIBC -# define __wctype wctype -# define __iswctype iswctype -# define __btowc btowc -# define __mempcpy mempcpy -# define __wcrtomb wcrtomb -# define __regfree regfree -# define attribute_hidden -#endif /* not _LIBC */ - -#ifdef __GNUC__ -# define __attribute(arg) __attribute__ (arg) -#else -# define __attribute(arg) -#endif - -extern const char __re_error_msgid[] attribute_hidden; -extern const size_t __re_error_msgid_idx[] attribute_hidden; - -/* Number of bits in an unsinged int. */ -#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS) -/* Number of unsigned int in an bit_set. */ -#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS) -typedef unsigned int bitset[BITSET_UINTS]; -typedef unsigned int *re_bitset_ptr_t; -typedef const unsigned int *re_const_bitset_ptr_t; - -#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS) -#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS)) -#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS)) -#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_set_all(set) \ - memset (set, 255, sizeof (unsigned int) * BITSET_UINTS) -#define bitset_copy(dest,src) \ - memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS) -static inline void bitset_not (bitset set); -static inline void bitset_merge (bitset dest, const bitset src); -static inline void bitset_not_merge (bitset dest, const bitset src); -static inline void bitset_mask (bitset dest, const bitset src); - -#define PREV_WORD_CONSTRAINT 0x0001 -#define PREV_NOTWORD_CONSTRAINT 0x0002 -#define NEXT_WORD_CONSTRAINT 0x0004 -#define NEXT_NOTWORD_CONSTRAINT 0x0008 -#define PREV_NEWLINE_CONSTRAINT 0x0010 -#define NEXT_NEWLINE_CONSTRAINT 0x0020 -#define PREV_BEGBUF_CONSTRAINT 0x0040 -#define NEXT_ENDBUF_CONSTRAINT 0x0080 -#define WORD_DELIM_CONSTRAINT 0x0100 -#define NOT_WORD_DELIM_CONSTRAINT 0x0200 - -typedef enum -{ - INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, - WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, - WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, - INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, - LINE_FIRST = PREV_NEWLINE_CONSTRAINT, - LINE_LAST = NEXT_NEWLINE_CONSTRAINT, - BUF_FIRST = PREV_BEGBUF_CONSTRAINT, - BUF_LAST = NEXT_ENDBUF_CONSTRAINT, - WORD_DELIM = WORD_DELIM_CONSTRAINT, - NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT -} re_context_type; - -typedef struct -{ - int alloc; - int nelem; - int *elems; -} re_node_set; - -typedef enum -{ - NON_TYPE = 0, - - /* Node type, These are used by token, node, tree. */ - CHARACTER = 1, - END_OF_RE = 2, - SIMPLE_BRACKET = 3, - OP_BACK_REF = 4, - OP_PERIOD = 5, -#ifdef RE_ENABLE_I18N - COMPLEX_BRACKET = 6, - OP_UTF8_PERIOD = 7, -#endif /* RE_ENABLE_I18N */ - - /* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used - when the debugger shows values of this enum type. */ -#define EPSILON_BIT 8 - OP_OPEN_SUBEXP = EPSILON_BIT | 0, - OP_CLOSE_SUBEXP = EPSILON_BIT | 1, - OP_ALT = EPSILON_BIT | 2, - OP_DUP_ASTERISK = EPSILON_BIT | 3, - ANCHOR = EPSILON_BIT | 4, - - /* Tree type, these are used only by tree. */ - CONCAT = 16, - SUBEXP = 17, - - /* Token type, these are used only by token. */ - OP_DUP_PLUS = 18, - OP_DUP_QUESTION, - OP_OPEN_BRACKET, - OP_CLOSE_BRACKET, - OP_CHARSET_RANGE, - OP_OPEN_DUP_NUM, - OP_CLOSE_DUP_NUM, - OP_NON_MATCH_LIST, - OP_OPEN_COLL_ELEM, - OP_CLOSE_COLL_ELEM, - OP_OPEN_EQUIV_CLASS, - OP_CLOSE_EQUIV_CLASS, - OP_OPEN_CHAR_CLASS, - OP_CLOSE_CHAR_CLASS, - OP_WORD, - OP_NOTWORD, - OP_SPACE, - OP_NOTSPACE, - BACK_SLASH - -} re_token_type_t; - -#ifdef RE_ENABLE_I18N -typedef struct -{ - /* Multibyte characters. */ - wchar_t *mbchars; - - /* Collating symbols. */ -# ifdef _LIBC - int32_t *coll_syms; -# endif - - /* Equivalence classes. */ -# ifdef _LIBC - int32_t *equiv_classes; -# endif - - /* Range expressions. */ -# ifdef _LIBC - uint32_t *range_starts; - uint32_t *range_ends; -# else /* not _LIBC */ - wchar_t *range_starts; - wchar_t *range_ends; -# endif /* not _LIBC */ - - /* Character classes. */ - wctype_t *char_classes; - - /* If this character set is the non-matching list. */ - unsigned int non_match : 1; - - /* # of multibyte characters. */ - int nmbchars; - - /* # of collating symbols. */ - int ncoll_syms; - - /* # of equivalence classes. */ - int nequiv_classes; - - /* # of range expressions. */ - int nranges; - - /* # of character classes. */ - int nchar_classes; -} re_charset_t; -#endif /* RE_ENABLE_I18N */ - -typedef struct -{ - union - { - unsigned char c; /* for CHARACTER */ - re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ -#ifdef RE_ENABLE_I18N - re_charset_t *mbcset; /* for COMPLEX_BRACKET */ -#endif /* RE_ENABLE_I18N */ - int idx; /* for BACK_REF */ - re_context_type ctx_type; /* for ANCHOR */ - } opr; -#if __GNUC__ >= 2 - re_token_type_t type : 8; -#else - re_token_type_t type; -#endif - unsigned int constraint : 10; /* context constraint */ - unsigned int duplicated : 1; - unsigned int opt_subexp : 1; -#ifdef RE_ENABLE_I18N - unsigned int accept_mb : 1; - /* These 2 bits can be moved into the union if needed (e.g. if running out - of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ - unsigned int mb_partial : 1; -#endif - unsigned int word_char : 1; -} re_token_t; - -#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) - -struct re_string_t -{ - /* Indicate the raw buffer which is the original string passed as an - argument of regexec(), re_search(), etc.. */ - const unsigned char *raw_mbs; - /* Store the multibyte string. In case of "case insensitive mode" like - REG_ICASE, upper cases of the string are stored, otherwise MBS points - the same address that RAW_MBS points. */ - unsigned char *mbs; -#ifdef RE_ENABLE_I18N - /* Store the wide character string which is corresponding to MBS. */ - wint_t *wcs; - int *offsets; - mbstate_t cur_state; -#endif - /* Index in RAW_MBS. Each character mbs[i] corresponds to - raw_mbs[raw_mbs_idx + i]. */ - int raw_mbs_idx; - /* The length of the valid characters in the buffers. */ - int valid_len; - /* The corresponding number of bytes in raw_mbs array. */ - int valid_raw_len; - /* The length of the buffers MBS and WCS. */ - int bufs_len; - /* The index in MBS, which is updated by re_string_fetch_byte. */ - int cur_idx; - /* length of RAW_MBS array. */ - int raw_len; - /* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ - int len; - /* End of the buffer may be shorter than its length in the cases such - as re_match_2, re_search_2. Then, we use STOP for end of the buffer - instead of LEN. */ - int raw_stop; - /* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ - int stop; - - /* The context of mbs[0]. We store the context independently, since - the context of mbs[0] may be different from raw_mbs[0], which is - the beginning of the input string. */ - unsigned int tip_context; - /* The translation passed as a part of an argument of re_compile_pattern. */ - unsigned RE_TRANSLATE_TYPE trans; - /* Copy of re_dfa_t's word_char. */ - re_const_bitset_ptr_t word_char; - /* 1 if REG_ICASE. */ - unsigned char icase; - unsigned char is_utf8; - unsigned char map_notascii; - unsigned char mbs_allocated; - unsigned char offsets_needed; - unsigned char newline_anchor; - unsigned char word_ops_used; - int mb_cur_max; -}; -typedef struct re_string_t re_string_t; - - -struct re_dfa_t; -typedef struct re_dfa_t re_dfa_t; - -#ifndef _LIBC -# ifdef __i386__ -# define internal_function __attribute ((regparm (3), stdcall)) -# else -# define internal_function -# endif -#endif - -#ifndef RE_NO_INTERNAL_PROTOTYPES -static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str, - int len, int init_len, - RE_TRANSLATE_TYPE trans, int icase, - const re_dfa_t *dfa) - internal_function; -static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str, - int len, RE_TRANSLATE_TYPE trans, - int icase, const re_dfa_t *dfa) - internal_function; -static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx, - int eflags) internal_function; -static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, - int new_buf_len) - internal_function; -# ifdef RE_ENABLE_I18N -static void build_wcs_buffer (re_string_t *pstr) internal_function; -static int build_wcs_upper_buffer (re_string_t *pstr) internal_function; -# endif /* RE_ENABLE_I18N */ -static void build_upper_buffer (re_string_t *pstr) internal_function; -static void re_string_translate_buffer (re_string_t *pstr) internal_function; -static void re_string_destruct (re_string_t *pstr) internal_function; -# ifdef RE_ENABLE_I18N -static int re_string_elem_size_at (const re_string_t *pstr, int idx) - internal_function __attribute ((pure)); -static inline int re_string_char_size_at (const re_string_t *pstr, int idx) - internal_function __attribute ((pure)); -static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx) - internal_function __attribute ((pure)); -# endif /* RE_ENABLE_I18N */ -static unsigned int re_string_context_at (const re_string_t *input, int idx, - int eflags) - internal_function __attribute ((pure)); -static unsigned char re_string_peek_byte_case (const re_string_t *pstr, - int idx) - internal_function __attribute ((pure)); -static unsigned char re_string_fetch_byte_case (re_string_t *pstr) - internal_function __attribute ((pure)); -#endif -#define re_string_peek_byte(pstr, offset) \ - ((pstr)->mbs[(pstr)->cur_idx + offset]) -#define re_string_fetch_byte(pstr) \ - ((pstr)->mbs[(pstr)->cur_idx++]) -#define re_string_first_byte(pstr, idx) \ - ((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != WEOF) -#define re_string_is_single_byte_char(pstr, idx) \ - ((pstr)->wcs[idx] != WEOF && ((pstr)->valid_len == (idx) + 1 \ - || (pstr)->wcs[(idx) + 1] != WEOF)) -#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) -#define re_string_cur_idx(pstr) ((pstr)->cur_idx) -#define re_string_get_buffer(pstr) ((pstr)->mbs) -#define re_string_length(pstr) ((pstr)->len) -#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) -#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) -#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) - -#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) -#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) -#define re_free(p) free (p) - -struct bin_tree_t -{ - struct bin_tree_t *parent; - struct bin_tree_t *left; - struct bin_tree_t *right; - struct bin_tree_t *first; - struct bin_tree_t *next; - - re_token_t token; - - /* `node_idx' is the index in dfa->nodes, if `type' == 0. - Otherwise `type' indicate the type of this node. */ - int node_idx; -}; -typedef struct bin_tree_t bin_tree_t; - -#define BIN_TREE_STORAGE_SIZE \ - ((1024 - sizeof (void *)) / sizeof (bin_tree_t)) - -struct bin_tree_storage_t -{ - struct bin_tree_storage_t *next; - bin_tree_t data[BIN_TREE_STORAGE_SIZE]; -}; -typedef struct bin_tree_storage_t bin_tree_storage_t; - -#define CONTEXT_WORD 1 -#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) -#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) -#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) - -#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) -#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) -#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) -#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) -#define IS_ORDINARY_CONTEXT(c) ((c) == 0) - -#define IS_WORD_CHAR(ch) (isalnum (ch) || (ch) == '_') -#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) -#define IS_WIDE_WORD_CHAR(ch) (iswalnum (ch) || (ch) == L'_') -#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) - -#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ - ((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ - || ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ - || ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ - || ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) - -#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ - ((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ - || (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ - || (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ - || (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) - -struct re_dfastate_t -{ - unsigned int hash; - re_node_set nodes; - re_node_set non_eps_nodes; - re_node_set inveclosure; - re_node_set *entrance_nodes; - struct re_dfastate_t **trtable, **word_trtable; - unsigned int context : 4; - unsigned int halt : 1; - /* If this state can accept `multi byte'. - Note that we refer to multibyte characters, and multi character - collating elements as `multi byte'. */ - unsigned int accept_mb : 1; - /* If this state has backreference node(s). */ - unsigned int has_backref : 1; - unsigned int has_constraint : 1; -}; -typedef struct re_dfastate_t re_dfastate_t; - -struct re_state_table_entry -{ - int num; - int alloc; - re_dfastate_t **array; -}; - -/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ - -typedef struct -{ - int next_idx; - int alloc; - re_dfastate_t **array; -} state_array_t; - -/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ - -typedef struct -{ - int node; - int str_idx; /* The position NODE match at. */ - state_array_t path; -} re_sub_match_last_t; - -/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. - And information about the node, whose type is OP_CLOSE_SUBEXP, - corresponding to NODE is stored in LASTS. */ - -typedef struct -{ - int str_idx; - int node; - int next_last_offset; - state_array_t *path; - int alasts; /* Allocation size of LASTS. */ - int nlasts; /* The number of LASTS. */ - re_sub_match_last_t **lasts; -} re_sub_match_top_t; - -struct re_backref_cache_entry -{ - int node; - int str_idx; - int subexp_from; - int subexp_to; - char more; - char unused; - unsigned short int eps_reachable_subexps_map; -}; - -typedef struct -{ - /* The string object corresponding to the input string. */ - re_string_t input; -#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) - re_dfa_t *const dfa; -#else - re_dfa_t *dfa; -#endif - /* EFLAGS of the argument of regexec. */ - int eflags; - /* Where the matching ends. */ - int match_last; - int last_node; - /* The state log used by the matcher. */ - re_dfastate_t **state_log; - int state_log_top; - /* Back reference cache. */ - int nbkref_ents; - int abkref_ents; - struct re_backref_cache_entry *bkref_ents; - int max_mb_elem_len; - int nsub_tops; - int asub_tops; - re_sub_match_top_t **sub_tops; -} re_match_context_t; - -typedef struct -{ - re_dfastate_t **sifted_states; - re_dfastate_t **limited_states; - int last_node; - int last_str_idx; - re_node_set limits; -} re_sift_context_t; - -struct re_fail_stack_ent_t -{ - int idx; - int node; - regmatch_t *regs; - re_node_set eps_via_nodes; -}; - -struct re_fail_stack_t -{ - int num; - int alloc; - struct re_fail_stack_ent_t *stack; -}; - -struct re_dfa_t -{ - re_token_t *nodes; - int nodes_alloc; - int nodes_len; - int *nexts; - int *org_indices; - re_node_set *edests; - re_node_set *eclosures; - re_node_set *inveclosures; - struct re_state_table_entry *state_table; - re_dfastate_t *init_state; - re_dfastate_t *init_state_word; - re_dfastate_t *init_state_nl; - re_dfastate_t *init_state_begbuf; - bin_tree_t *str_tree; - bin_tree_storage_t *str_tree_storage; - re_bitset_ptr_t sb_char; - int str_tree_storage_idx; - - /* number of subexpressions `re_nsub' is in regex_t. */ - unsigned int state_hash_mask; - int states_alloc; - int init_node; - int nbackref; /* The number of backreference in this dfa. */ - - /* Bitmap expressing which backreference is used. */ - unsigned int used_bkref_map; - unsigned int completed_bkref_map; - - unsigned int has_plural_match : 1; - /* If this dfa has "multibyte node", which is a backreference or - a node which can accept multibyte character or multi character - collating element. */ - unsigned int has_mb_node : 1; - unsigned int is_utf8 : 1; - unsigned int map_notascii : 1; - unsigned int word_ops_used : 1; - int mb_cur_max; - bitset word_char; - reg_syntax_t syntax; - int *subexp_map; -#ifdef DEBUG - char* re_str; -#endif -}; - -#ifndef RE_NO_INTERNAL_PROTOTYPES -static reg_errcode_t re_node_set_alloc (re_node_set *set, int size) internal_function; -static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem) internal_function; -static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1, - int elem2) internal_function; -#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) -static reg_errcode_t re_node_set_init_copy (re_node_set *dest, - const re_node_set *src) internal_function; -static reg_errcode_t re_node_set_add_intersect (re_node_set *dest, - const re_node_set *src1, - const re_node_set *src2) internal_function; -static reg_errcode_t re_node_set_init_union (re_node_set *dest, - const re_node_set *src1, - const re_node_set *src2) internal_function; -static reg_errcode_t re_node_set_merge (re_node_set *dest, - const re_node_set *src) internal_function; -static int re_node_set_insert (re_node_set *set, int elem) internal_function; -static int re_node_set_insert_last (re_node_set *set, - int elem) internal_function; -static int re_node_set_compare (const re_node_set *set1, - const re_node_set *set2) - internal_function __attribute ((pure)); -static int re_node_set_contains (const re_node_set *set, int elem) - internal_function __attribute ((pure)); -static void re_node_set_remove_at (re_node_set *set, int idx) internal_function; -#define re_node_set_remove(set,id) \ - (re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) -#define re_node_set_empty(p) ((p)->nelem = 0) -#define re_node_set_free(set) re_free ((set)->elems) -static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function; -static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa, - const re_node_set *nodes) internal_function; -static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err, - re_dfa_t *dfa, - const re_node_set *nodes, - unsigned int context) internal_function; -static void free_state (re_dfastate_t *state) internal_function; -#endif - - -typedef enum -{ - SB_CHAR, - MB_CHAR, - EQUIV_CLASS, - COLL_SYM, - CHAR_CLASS -} bracket_elem_type; - -typedef struct -{ - bracket_elem_type type; - union - { - unsigned char ch; - unsigned char *name; - wchar_t wch; - } opr; -} bracket_elem_t; - - -/* Inline functions for bitset operation. */ -static inline void -bitset_not (bitset set) -{ - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) - set[bitset_i] = ~set[bitset_i]; -} - -static inline void -bitset_merge (bitset dest, const bitset src) -{ - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) - dest[bitset_i] |= src[bitset_i]; -} - -static inline void -bitset_not_merge (bitset dest, const bitset src) -{ - int i; - for (i = 0; i < BITSET_UINTS; ++i) - dest[i] |= ~src[i]; -} - -static inline void -bitset_mask (bitset dest, const bitset src) -{ - int bitset_i; - for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i) - dest[bitset_i] &= src[bitset_i]; -} - -#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES -/* Inline functions for re_string. */ -static inline int -internal_function -re_string_char_size_at (const re_string_t *pstr, int idx) -{ - int byte_idx; - if (pstr->mb_cur_max == 1) - return 1; - for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) - if (pstr->wcs[idx + byte_idx] != WEOF) - break; - return byte_idx; -} - -static inline wint_t -internal_function -re_string_wchar_at (const re_string_t *pstr, int idx) -{ - if (pstr->mb_cur_max == 1) - return (wint_t) pstr->mbs[idx]; - return (wint_t) pstr->wcs[idx]; -} - -static int -internal_function -re_string_elem_size_at (const re_string_t *pstr, int idx) -{ -#ifdef _LIBC - const unsigned char *p, *extra; - const int32_t *table, *indirect; - int32_t tmp; -# include <locale/weight.h> - uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - - if (nrules != 0) - { - table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, - _NL_COLLATE_INDIRECTMB); - p = pstr->mbs + idx; - tmp = findidx (&p); - return p - pstr->mbs - idx; - } - else -#endif /* _LIBC */ - return 1; -} -#endif /* RE_ENABLE_I18N */ - -#endif /* _REGEX_INTERNAL_H */ diff --git a/gnu/lib/libregex/regexec.c b/gnu/lib/libregex/regexec.c deleted file mode 100644 index 3c226e3..0000000 --- a/gnu/lib/libregex/regexec.c +++ /dev/null @@ -1,4327 +0,0 @@ -/* Extended regular expression matching and search library. - Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc. - This file is part of the GNU C Library. - Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - - The GNU C Library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License as published by the Free Software Foundation; either - version 2.1 of the License, or (at your option) any later version. - - The GNU C Library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with the GNU C Library; if not, write to the Free - Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ - -static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, - int n) internal_function; -static void match_ctx_clean (re_match_context_t *mctx) internal_function; -static void match_ctx_free (re_match_context_t *cache) internal_function; -static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node, - int str_idx, int from, int to) - internal_function; -static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx) - internal_function; -static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node, - int str_idx) internal_function; -static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop, - int node, int str_idx) - internal_function; -static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts, - re_dfastate_t **limited_sts, int last_node, - int last_str_idx) - internal_function; -static reg_errcode_t re_search_internal (const regex_t *preg, - const char *string, int length, - int start, int range, int stop, - size_t nmatch, regmatch_t pmatch[], - int eflags) internal_function; -static int re_search_2_stub (struct re_pattern_buffer *bufp, - const char *string1, int length1, - const char *string2, int length2, - int start, int range, struct re_registers *regs, - int stop, int ret_len) internal_function; -static int re_search_stub (struct re_pattern_buffer *bufp, - const char *string, int length, int start, - int range, int stop, struct re_registers *regs, - int ret_len) internal_function; -static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - int nregs, int regs_allocated) internal_function; -static inline re_dfastate_t *acquire_init_state_context - (reg_errcode_t *err, const re_match_context_t *mctx, int idx) - __attribute ((always_inline)) internal_function; -static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) - internal_function; -static int check_matching (re_match_context_t *mctx, int fl_longest_match, - int *p_match_first) - internal_function; -static int check_halt_node_context (const re_dfa_t *dfa, int node, - unsigned int context) internal_function; -static int check_halt_state_context (const re_match_context_t *mctx, - const re_dfastate_t *state, int idx) - internal_function; -static void update_regs (re_dfa_t *dfa, regmatch_t *pmatch, - regmatch_t *prev_idx_match, int cur_node, - int cur_idx, int nmatch) internal_function; -static int proceed_next_node (const re_match_context_t *mctx, - int nregs, regmatch_t *regs, - int *pidx, int node, re_node_set *eps_via_nodes, - struct re_fail_stack_t *fs) internal_function; -static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, - int str_idx, int dest_node, int nregs, - regmatch_t *regs, - re_node_set *eps_via_nodes) internal_function; -static int pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs, - regmatch_t *regs, re_node_set *eps_via_nodes) internal_function; -static reg_errcode_t set_regs (const regex_t *preg, - const re_match_context_t *mctx, - size_t nmatch, regmatch_t *pmatch, - int fl_backtrack) internal_function; -static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs) internal_function; - -#ifdef RE_ENABLE_I18N -static int sift_states_iter_mb (const re_match_context_t *mctx, - re_sift_context_t *sctx, - int node_idx, int str_idx, int max_str_idx) internal_function; -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t sift_states_backward (re_match_context_t *mctx, - re_sift_context_t *sctx) internal_function; -static reg_errcode_t build_sifted_states (re_match_context_t *mctx, - re_sift_context_t *sctx, int str_idx, - re_node_set *cur_dest) internal_function; -static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx, - re_sift_context_t *sctx, - int str_idx, - re_node_set *dest_nodes) internal_function; -static reg_errcode_t add_epsilon_src_nodes (re_dfa_t *dfa, - re_node_set *dest_nodes, - const re_node_set *candidates) internal_function; -static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node, - re_node_set *dest_nodes, - const re_node_set *and_nodes) internal_function; -static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits, - int dst_node, int dst_idx, int src_node, - int src_idx) internal_function; -static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx, - int boundaries, int subexp_idx, - int from_node, int bkref_idx) internal_function; -static int check_dst_limits_calc_pos (re_match_context_t *mctx, - int limit, int subexp_idx, - int node, int str_idx, - int bkref_idx) internal_function; -static reg_errcode_t check_subexp_limits (re_dfa_t *dfa, - re_node_set *dest_nodes, - const re_node_set *candidates, - re_node_set *limits, - struct re_backref_cache_entry *bkref_ents, - int str_idx) internal_function; -static reg_errcode_t sift_states_bkref (re_match_context_t *mctx, - re_sift_context_t *sctx, - int str_idx, const re_node_set *candidates) internal_function; -static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx, - int next_state_log_idx) internal_function; -static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst, - re_dfastate_t **src, int num) internal_function; -static re_dfastate_t *find_recover_state (reg_errcode_t *err, - re_match_context_t *mctx) internal_function; -static re_dfastate_t *transit_state (reg_errcode_t *err, - re_match_context_t *mctx, - re_dfastate_t *state) internal_function; -static re_dfastate_t *merge_state_with_log (reg_errcode_t *err, - re_match_context_t *mctx, - re_dfastate_t *next_state) internal_function; -static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx, - re_node_set *cur_nodes, - int str_idx) internal_function; -#if 0 -static re_dfastate_t *transit_state_sb (reg_errcode_t *err, - re_match_context_t *mctx, - re_dfastate_t *pstate) internal_function; -#endif -#ifdef RE_ENABLE_I18N -static reg_errcode_t transit_state_mb (re_match_context_t *mctx, - re_dfastate_t *pstate) internal_function; -#endif /* RE_ENABLE_I18N */ -static reg_errcode_t transit_state_bkref (re_match_context_t *mctx, - const re_node_set *nodes) internal_function; -static reg_errcode_t get_subexp (re_match_context_t *mctx, - int bkref_node, int bkref_str_idx) internal_function; -static reg_errcode_t get_subexp_sub (re_match_context_t *mctx, - const re_sub_match_top_t *sub_top, - re_sub_match_last_t *sub_last, - int bkref_node, int bkref_str) internal_function; -static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes, - int subexp_idx, int type) internal_function; -static reg_errcode_t check_arrival (re_match_context_t *mctx, - state_array_t *path, int top_node, - int top_str, int last_node, int last_str, - int type) internal_function; -static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx, - int str_idx, - re_node_set *cur_nodes, - re_node_set *next_nodes) internal_function; -static reg_errcode_t check_arrival_expand_ecl (re_dfa_t *dfa, - re_node_set *cur_nodes, - int ex_subexp, int type) internal_function; -static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa, - re_node_set *dst_nodes, - int target, int ex_subexp, - int type) internal_function; -static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx, - re_node_set *cur_nodes, int cur_str, - int subexp_num, int type) internal_function; -static int build_trtable (re_dfa_t *dfa, - re_dfastate_t *state) internal_function; -#ifdef RE_ENABLE_I18N -static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx, - const re_string_t *input, int idx) internal_function; -# ifdef _LIBC -static unsigned int find_collation_sequence_value (const unsigned char *mbs, - size_t name_len) internal_function; -# endif /* _LIBC */ -#endif /* RE_ENABLE_I18N */ -static int group_nodes_into_DFAstates (re_dfa_t *dfa, - const re_dfastate_t *state, - re_node_set *states_node, - bitset *states_ch) internal_function; -static int check_node_accept (const re_match_context_t *mctx, - const re_token_t *node, int idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) internal_function; - -/* Entry point for POSIX code. */ - -/* regexec searches for a given pattern, specified by PREG, in the - string STRING. - - If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at - least NMATCH elements, and we set them to the offsets of the - corresponding matched substrings. - - EFLAGS specifies `execution flags' which affect matching: if - REG_NOTBOL is set, then ^ does not match at the beginning of the - string; if REG_NOTEOL is set, then $ does not match at the end. - - We return 0 if we find a match and REG_NOMATCH if not. */ - -int -regexec (preg, string, nmatch, pmatch, eflags) - const regex_t *__restrict preg; - const char *__restrict string; - size_t nmatch; - regmatch_t pmatch[]; - int eflags; -{ - reg_errcode_t err; - int start, length; - - if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) - return REG_BADPAT; - - if (eflags & REG_STARTEND) - { - start = pmatch[0].rm_so; - length = pmatch[0].rm_eo; - } - else - { - start = 0; - length = strlen (string); - } - if (preg->no_sub) - err = re_search_internal (preg, string, length, start, length - start, - length, 0, NULL, eflags); - else - err = re_search_internal (preg, string, length, start, length - start, - length, nmatch, pmatch, eflags); - return err != REG_NOERROR; -} - -#ifdef _LIBC -# include <shlib-compat.h> -versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4); - -# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4) -__typeof__ (__regexec) __compat_regexec; - -int -attribute_compat_text_section -__compat_regexec (const regex_t *__restrict preg, - const char *__restrict string, size_t nmatch, - regmatch_t pmatch[], int eflags) -{ - return regexec (preg, string, nmatch, pmatch, - eflags & (REG_NOTBOL | REG_NOTEOL)); -} -compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0); -# endif -#endif - -/* Entry points for GNU code. */ - -/* re_match, re_search, re_match_2, re_search_2 - - The former two functions operate on STRING with length LENGTH, - while the later two operate on concatenation of STRING1 and STRING2 - with lengths LENGTH1 and LENGTH2, respectively. - - re_match() matches the compiled pattern in BUFP against the string, - starting at index START. - - re_search() first tries matching at index START, then it tries to match - starting from index START + 1, and so on. The last start position tried - is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same - way as re_match().) - - The parameter STOP of re_{match,search}_2 specifies that no match exceeding - the first STOP characters of the concatenation of the strings should be - concerned. - - If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match - and all groups is stroed in REGS. (For the "_2" variants, the offsets are - computed relative to the concatenation, not relative to the individual - strings.) - - On success, re_match* functions return the length of the match, re_search* - return the position of the start of the match. Return value -1 means no - match was found and -2 indicates an internal error. */ - -int -re_match (bufp, string, length, start, regs) - struct re_pattern_buffer *bufp; - const char *string; - int length, start; - struct re_registers *regs; -{ - return re_search_stub (bufp, string, length, start, 0, length, regs, 1); -} -#ifdef _LIBC -weak_alias (__re_match, re_match) -#endif - -int -re_search (bufp, string, length, start, range, regs) - struct re_pattern_buffer *bufp; - const char *string; - int length, start, range; - struct re_registers *regs; -{ - return re_search_stub (bufp, string, length, start, range, length, regs, 0); -} -#ifdef _LIBC -weak_alias (__re_search, re_search) -#endif - -int -re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int length1, length2, start, stop; - struct re_registers *regs; -{ - return re_search_2_stub (bufp, string1, length1, string2, length2, - start, 0, regs, stop, 1); -} -#ifdef _LIBC -weak_alias (__re_match_2, re_match_2) -#endif - -int -re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int length1, length2, start, range, stop; - struct re_registers *regs; -{ - return re_search_2_stub (bufp, string1, length1, string2, length2, - start, range, regs, stop, 0); -} -#ifdef _LIBC -weak_alias (__re_search_2, re_search_2) -#endif - -static int -re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs, - stop, ret_len) - struct re_pattern_buffer *bufp; - const char *string1, *string2; - int length1, length2, start, range, stop, ret_len; - struct re_registers *regs; -{ - const char *str; - int rval; - int len = length1 + length2; - int free_str = 0; - - if (BE (length1 < 0 || length2 < 0 || stop < 0, 0)) - return -2; - - /* Concatenate the strings. */ - if (length2 > 0) - if (length1 > 0) - { - char *s = re_malloc (char, len); - - if (BE (s == NULL, 0)) - return -2; - memcpy (s, string1, length1); - memcpy (s + length1, string2, length2); - str = s; - free_str = 1; - } - else - str = string2; - else - str = string1; - - rval = re_search_stub (bufp, str, len, start, range, stop, regs, - ret_len); - if (free_str) - re_free ((char *) str); - return rval; -} - -/* The parameters have the same meaning as those of re_search. - Additional parameters: - If RET_LEN is nonzero the length of the match is returned (re_match style); - otherwise the position of the match is returned. */ - -static int -re_search_stub (bufp, string, length, start, range, stop, regs, ret_len) - struct re_pattern_buffer *bufp; - const char *string; - int length, start, range, stop, ret_len; - struct re_registers *regs; -{ - reg_errcode_t result; - regmatch_t *pmatch; - int nregs, rval; - int eflags = 0; - - /* Check for out-of-range. */ - if (BE (start < 0 || start > length, 0)) - return -1; - if (BE (start + range > length, 0)) - range = length - start; - else if (BE (start + range < 0, 0)) - range = -start; - - eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; - eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; - - /* Compile fastmap if we haven't yet. */ - if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate) - re_compile_fastmap (bufp); - - if (BE (bufp->no_sub, 0)) - regs = NULL; - - /* We need at least 1 register. */ - if (regs == NULL) - nregs = 1; - else if (BE (bufp->regs_allocated == REGS_FIXED && - regs->num_regs < bufp->re_nsub + 1, 0)) - { - nregs = regs->num_regs; - if (BE (nregs < 1, 0)) - { - /* Nothing can be copied to regs. */ - regs = NULL; - nregs = 1; - } - } - else - nregs = bufp->re_nsub + 1; - pmatch = re_malloc (regmatch_t, nregs); - if (BE (pmatch == NULL, 0)) - return -2; - - result = re_search_internal (bufp, string, length, start, range, stop, - nregs, pmatch, eflags); - - rval = 0; - - /* I hope we needn't fill ther regs with -1's when no match was found. */ - if (result != REG_NOERROR) - rval = -1; - else if (regs != NULL) - { - /* If caller wants register contents data back, copy them. */ - bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs, - bufp->regs_allocated); - if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0)) - rval = -2; - } - - if (BE (rval == 0, 1)) - { - if (ret_len) - { - assert (pmatch[0].rm_so == start); - rval = pmatch[0].rm_eo - start; - } - else - rval = pmatch[0].rm_so; - } - re_free (pmatch); - return rval; -} - -static unsigned -re_copy_regs (regs, pmatch, nregs, regs_allocated) - struct re_registers *regs; - regmatch_t *pmatch; - int nregs, regs_allocated; -{ - int rval = REGS_REALLOCATE; - int i; - int need_regs = nregs + 1; - /* We need one extra element beyond `num_regs' for the `-1' marker GNU code - uses. */ - - /* Have the register data arrays been allocated? */ - if (regs_allocated == REGS_UNALLOCATED) - { /* No. So allocate them with malloc. */ - regs->start = re_malloc (regoff_t, need_regs); - regs->end = re_malloc (regoff_t, need_regs); - if (BE (regs->start == NULL, 0) || BE (regs->end == NULL, 0)) - return REGS_UNALLOCATED; - regs->num_regs = need_regs; - } - else if (regs_allocated == REGS_REALLOCATE) - { /* Yes. If we need more elements than were already - allocated, reallocate them. If we need fewer, just - leave it alone. */ - if (BE (need_regs > regs->num_regs, 0)) - { - regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs); - regoff_t *new_end = re_realloc (regs->end, regoff_t, need_regs); - if (BE (new_start == NULL, 0) || BE (new_end == NULL, 0)) - return REGS_UNALLOCATED; - regs->start = new_start; - regs->end = new_end; - regs->num_regs = need_regs; - } - } - else - { - assert (regs_allocated == REGS_FIXED); - /* This function may not be called with REGS_FIXED and nregs too big. */ - assert (regs->num_regs >= nregs); - rval = REGS_FIXED; - } - - /* Copy the regs. */ - for (i = 0; i < nregs; ++i) - { - regs->start[i] = pmatch[i].rm_so; - regs->end[i] = pmatch[i].rm_eo; - } - for ( ; i < regs->num_regs; ++i) - regs->start[i] = regs->end[i] = -1; - - return rval; -} - -/* Set REGS to hold NUM_REGS registers, storing them in STARTS and - ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use - this memory for recording register information. STARTS and ENDS - must be allocated using the malloc library routine, and must each - be at least NUM_REGS * sizeof (regoff_t) bytes long. - - If NUM_REGS == 0, then subsequent matches should allocate their own - register data. - - Unless this function is called, the first search or match using - PATTERN_BUFFER will allocate its own register data, without - freeing the old data. */ - -void -re_set_registers (bufp, regs, num_regs, starts, ends) - struct re_pattern_buffer *bufp; - struct re_registers *regs; - unsigned num_regs; - regoff_t *starts, *ends; -{ - if (num_regs) - { - bufp->regs_allocated = REGS_REALLOCATE; - regs->num_regs = num_regs; - regs->start = starts; - regs->end = ends; - } - else - { - bufp->regs_allocated = REGS_UNALLOCATED; - regs->num_regs = 0; - regs->start = regs->end = (regoff_t *) 0; - } -} -#ifdef _LIBC -weak_alias (__re_set_registers, re_set_registers) -#endif - -/* Entry points compatible with 4.2 BSD regex library. We don't define - them unless specifically requested. */ - -#if defined _REGEX_RE_COMP || defined _LIBC -int -# ifdef _LIBC -weak_function -# endif -re_exec (s) - const char *s; -{ - return 0 == regexec (&re_comp_buf, s, 0, NULL, 0); -} -#endif /* _REGEX_RE_COMP */ - -/* Internal entry point. */ - -/* Searches for a compiled pattern PREG in the string STRING, whose - length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same - mingings with regexec. START, and RANGE have the same meanings - with re_search. - Return REG_NOERROR if we find a match, and REG_NOMATCH if not, - otherwise return the error code. - Note: We assume front end functions already check ranges. - (START + RANGE >= 0 && START + RANGE <= LENGTH) */ - -static reg_errcode_t -re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, - eflags) - const regex_t *preg; - const char *string; - int length, start, range, stop, eflags; - size_t nmatch; - regmatch_t pmatch[]; -{ - reg_errcode_t err; - re_dfa_t *dfa = (re_dfa_t *)preg->buffer; - int left_lim, right_lim, incr; - int fl_longest_match, match_first, match_kind, match_last = -1; - int extra_nmatch; - int sb, ch; -#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) - re_match_context_t mctx = { .dfa = dfa }; -#else - re_match_context_t mctx; -#endif - char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate - && range && !preg->can_be_null) ? preg->fastmap : NULL; - unsigned RE_TRANSLATE_TYPE t = (unsigned RE_TRANSLATE_TYPE) preg->translate; - -#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)) - memset (&mctx, '\0', sizeof (re_match_context_t)); - mctx.dfa = dfa; -#endif - - extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0; - nmatch -= extra_nmatch; - - /* Check if the DFA haven't been compiled. */ - if (BE (preg->used == 0 || dfa->init_state == NULL - || dfa->init_state_word == NULL || dfa->init_state_nl == NULL - || dfa->init_state_begbuf == NULL, 0)) - return REG_NOMATCH; - -#ifdef DEBUG - /* We assume front-end functions already check them. */ - assert (start + range >= 0 && start + range <= length); -#endif - - /* If initial states with non-begbuf contexts have no elements, - the regex must be anchored. If preg->newline_anchor is set, - we'll never use init_state_nl, so do not check it. */ - if (dfa->init_state->nodes.nelem == 0 - && dfa->init_state_word->nodes.nelem == 0 - && (dfa->init_state_nl->nodes.nelem == 0 - || !preg->newline_anchor)) - { - if (start != 0 && start + range != 0) - return REG_NOMATCH; - start = range = 0; - } - - /* We must check the longest matching, if nmatch > 0. */ - fl_longest_match = (nmatch != 0 || dfa->nbackref); - - err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1, - preg->translate, preg->syntax & RE_ICASE, dfa); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - mctx.input.stop = stop; - mctx.input.raw_stop = stop; - mctx.input.newline_anchor = preg->newline_anchor; - - err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* We will log all the DFA states through which the dfa pass, - if nmatch > 1, or this dfa has "multibyte node", which is a - back-reference or a node which can accept multibyte character or - multi character collating element. */ - if (nmatch > 1 || dfa->has_mb_node) - { - mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1); - if (BE (mctx.state_log == NULL, 0)) - { - err = REG_ESPACE; - goto free_return; - } - } - else - mctx.state_log = NULL; - - match_first = start; - mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF - : CONTEXT_NEWLINE | CONTEXT_BEGBUF; - - /* Check incrementally whether of not the input string match. */ - incr = (range < 0) ? -1 : 1; - left_lim = (range < 0) ? start + range : start; - right_lim = (range < 0) ? start : start + range; - sb = dfa->mb_cur_max == 1; - match_kind = - (fastmap - ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0) - | (range >= 0 ? 2 : 0) - | (t != NULL ? 1 : 0)) - : 8); - - for (;; match_first += incr) - { - err = REG_NOMATCH; - if (match_first < left_lim || right_lim < match_first) - goto free_return; - - /* Advance as rapidly as possible through the string, until we - find a plausible place to start matching. This may be done - with varying efficiency, so there are various possibilities: - only the most common of them are specialized, in order to - save on code size. We use a switch statement for speed. */ - switch (match_kind) - { - case 8: - /* No fastmap. */ - break; - - case 7: - /* Fastmap with single-byte translation, match forward. */ - while (BE (match_first < right_lim, 1) - && !fastmap[t[(unsigned char) string[match_first]]]) - ++match_first; - goto forward_match_found_start_or_reached_end; - - case 6: - /* Fastmap without translation, match forward. */ - while (BE (match_first < right_lim, 1) - && !fastmap[(unsigned char) string[match_first]]) - ++match_first; - - forward_match_found_start_or_reached_end: - if (BE (match_first == right_lim, 0)) - { - ch = match_first >= length - ? 0 : (unsigned char) string[match_first]; - if (!fastmap[t ? t[ch] : ch]) - goto free_return; - } - break; - - case 4: - case 5: - /* Fastmap without multi-byte translation, match backwards. */ - while (match_first >= left_lim) - { - ch = match_first >= length - ? 0 : (unsigned char) string[match_first]; - if (fastmap[t ? t[ch] : ch]) - break; - --match_first; - } - if (match_first < left_lim) - goto free_return; - break; - - default: - /* In this case, we can't determine easily the current byte, - since it might be a component byte of a multibyte - character. Then we use the constructed buffer instead. */ - for (;;) - { - /* If MATCH_FIRST is out of the valid range, reconstruct the - buffers. */ - unsigned int offset = match_first - mctx.input.raw_mbs_idx; - if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0)) - { - err = re_string_reconstruct (&mctx.input, match_first, - eflags); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - offset = match_first - mctx.input.raw_mbs_idx; - } - /* If MATCH_FIRST is out of the buffer, leave it as '\0'. - Note that MATCH_FIRST must not be smaller than 0. */ - ch = (match_first >= length - ? 0 : re_string_byte_at (&mctx.input, offset)); - if (fastmap[ch]) - break; - match_first += incr; - if (match_first < left_lim || match_first > right_lim) - { - err = REG_NOMATCH; - goto free_return; - } - } - break; - } - - /* Reconstruct the buffers so that the matcher can assume that - the matching starts from the beginning of the buffer. */ - err = re_string_reconstruct (&mctx.input, match_first, eflags); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - -#ifdef RE_ENABLE_I18N - /* Don't consider this char as a possible match start if it part, - yet isn't the head, of a multibyte character. */ - if (!sb && !re_string_first_byte (&mctx.input, 0)) - continue; -#endif - - /* It seems to be appropriate one, then use the matcher. */ - /* We assume that the matching starts from 0. */ - mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0; - match_last = check_matching (&mctx, fl_longest_match, - range >= 0 ? &match_first : NULL); - if (match_last != -1) - { - if (BE (match_last == -2, 0)) - { - err = REG_ESPACE; - goto free_return; - } - else - { - mctx.match_last = match_last; - if ((!preg->no_sub && nmatch > 1) || dfa->nbackref) - { - re_dfastate_t *pstate = mctx.state_log[match_last]; - mctx.last_node = check_halt_state_context (&mctx, pstate, - match_last); - } - if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match) - || dfa->nbackref) - { - err = prune_impossible_nodes (&mctx); - if (err == REG_NOERROR) - break; - if (BE (err != REG_NOMATCH, 0)) - goto free_return; - match_last = -1; - } - else - break; /* We found a match. */ - } - } - - match_ctx_clean (&mctx); - } - -#ifdef DEBUG - assert (match_last != -1); - assert (err == REG_NOERROR); -#endif - - /* Set pmatch[] if we need. */ - if (nmatch > 0) - { - int reg_idx; - - /* Initialize registers. */ - for (reg_idx = 1; reg_idx < nmatch; ++reg_idx) - pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1; - - /* Set the points where matching start/end. */ - pmatch[0].rm_so = 0; - pmatch[0].rm_eo = mctx.match_last; - - if (!preg->no_sub && nmatch > 1) - { - err = set_regs (preg, &mctx, nmatch, pmatch, - dfa->has_plural_match && dfa->nbackref > 0); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - - /* At last, add the offset to the each registers, since we slided - the buffers so that we could assume that the matching starts - from 0. */ - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) - if (pmatch[reg_idx].rm_so != -1) - { -#ifdef RE_ENABLE_I18N - if (BE (mctx.input.offsets_needed != 0, 0)) - { - if (pmatch[reg_idx].rm_so == mctx.input.valid_len) - pmatch[reg_idx].rm_so += mctx.input.valid_raw_len - mctx.input.valid_len; - else - pmatch[reg_idx].rm_so = mctx.input.offsets[pmatch[reg_idx].rm_so]; - if (pmatch[reg_idx].rm_eo == mctx.input.valid_len) - pmatch[reg_idx].rm_eo += mctx.input.valid_raw_len - mctx.input.valid_len; - else - pmatch[reg_idx].rm_eo = mctx.input.offsets[pmatch[reg_idx].rm_eo]; - } -#else - assert (mctx.input.offsets_needed == 0); -#endif - pmatch[reg_idx].rm_so += match_first; - pmatch[reg_idx].rm_eo += match_first; - } - for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx) - { - pmatch[nmatch + reg_idx].rm_so = -1; - pmatch[nmatch + reg_idx].rm_eo = -1; - } - - if (dfa->subexp_map) - for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++) - if (dfa->subexp_map[reg_idx] != reg_idx) - { - pmatch[reg_idx + 1].rm_so - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so; - pmatch[reg_idx + 1].rm_eo - = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo; - } - } - - free_return: - re_free (mctx.state_log); - if (dfa->nbackref) - match_ctx_free (&mctx); - re_string_destruct (&mctx.input); - return err; -} - -static reg_errcode_t -prune_impossible_nodes (mctx) - re_match_context_t *mctx; -{ - re_dfa_t *const dfa = mctx->dfa; - int halt_node, match_last; - reg_errcode_t ret; - re_dfastate_t **sifted_states; - re_dfastate_t **lim_states = NULL; - re_sift_context_t sctx; -#ifdef DEBUG - assert (mctx->state_log != NULL); -#endif - match_last = mctx->match_last; - halt_node = mctx->last_node; - sifted_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (sifted_states == NULL, 0)) - { - ret = REG_ESPACE; - goto free_return; - } - if (dfa->nbackref) - { - lim_states = re_malloc (re_dfastate_t *, match_last + 1); - if (BE (lim_states == NULL, 0)) - { - ret = REG_ESPACE; - goto free_return; - } - while (1) - { - memset (lim_states, '\0', - sizeof (re_dfastate_t *) * (match_last + 1)); - sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, - match_last); - ret = sift_states_backward (mctx, &sctx); - re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - if (sifted_states[0] != NULL || lim_states[0] != NULL) - break; - do - { - --match_last; - if (match_last < 0) - { - ret = REG_NOMATCH; - goto free_return; - } - } while (mctx->state_log[match_last] == NULL - || !mctx->state_log[match_last]->halt); - halt_node = check_halt_state_context (mctx, - mctx->state_log[match_last], - match_last); - } - ret = merge_state_array (dfa, sifted_states, lim_states, - match_last + 1); - re_free (lim_states); - lim_states = NULL; - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - } - else - { - sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last); - ret = sift_states_backward (mctx, &sctx); - re_node_set_free (&sctx.limits); - if (BE (ret != REG_NOERROR, 0)) - goto free_return; - } - re_free (mctx->state_log); - mctx->state_log = sifted_states; - sifted_states = NULL; - mctx->last_node = halt_node; - mctx->match_last = match_last; - ret = REG_NOERROR; - free_return: - re_free (sifted_states); - re_free (lim_states); - return ret; -} - -/* Acquire an initial state and return it. - We must select appropriate initial state depending on the context, - since initial states may have constraints like "\<", "^", etc.. */ - -static inline re_dfastate_t * -acquire_init_state_context (err, mctx, idx) - reg_errcode_t *err; - const re_match_context_t *mctx; - int idx; -{ - re_dfa_t *const dfa = mctx->dfa; - if (dfa->init_state->has_constraint) - { - unsigned int context; - context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags); - if (IS_WORD_CONTEXT (context)) - return dfa->init_state_word; - else if (IS_ORDINARY_CONTEXT (context)) - return dfa->init_state; - else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context)) - return dfa->init_state_begbuf; - else if (IS_NEWLINE_CONTEXT (context)) - return dfa->init_state_nl; - else if (IS_BEGBUF_CONTEXT (context)) - { - /* It is relatively rare case, then calculate on demand. */ - return re_acquire_state_context (err, dfa, - dfa->init_state->entrance_nodes, - context); - } - else - /* Must not happen? */ - return dfa->init_state; - } - else - return dfa->init_state; -} - -/* Check whether the regular expression match input string INPUT or not, - and return the index where the matching end, return -1 if not match, - or return -2 in case of an error. - FL_LONGEST_MATCH means we want the POSIX longest matching. - If P_MATCH_FIRST is not NULL, and the match fails, it is set to the - next place where we may want to try matching. - Note that the matcher assume that the maching starts from the current - index of the buffer. */ - -static int -check_matching (mctx, fl_longest_match, p_match_first) - re_match_context_t *mctx; - int fl_longest_match; - int *p_match_first; -{ - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - int match = 0; - int match_last = -1; - int cur_str_idx = re_string_cur_idx (&mctx->input); - re_dfastate_t *cur_state; - int at_init_state = p_match_first != NULL; - int next_start_idx = cur_str_idx; - - err = REG_NOERROR; - cur_state = acquire_init_state_context (&err, mctx, cur_str_idx); - /* An initial state must not be NULL (invalid). */ - if (BE (cur_state == NULL, 0)) - { - assert (err == REG_ESPACE); - return -2; - } - - if (mctx->state_log != NULL) - { - mctx->state_log[cur_str_idx] = cur_state; - - /* Check OP_OPEN_SUBEXP in the initial state in case that we use them - later. E.g. Processing back references. */ - if (BE (dfa->nbackref, 0)) - { - at_init_state = 0; - err = check_subexp_matching_top (mctx, &cur_state->nodes, 0); - if (BE (err != REG_NOERROR, 0)) - return err; - - if (cur_state->has_backref) - { - err = transit_state_bkref (mctx, &cur_state->nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - } - - /* If the RE accepts NULL string. */ - if (BE (cur_state->halt, 0)) - { - if (!cur_state->has_constraint - || check_halt_state_context (mctx, cur_state, cur_str_idx)) - { - if (!fl_longest_match) - return cur_str_idx; - else - { - match_last = cur_str_idx; - match = 1; - } - } - } - - while (!re_string_eoi (&mctx->input)) - { - re_dfastate_t *old_state = cur_state; - int next_char_idx = re_string_cur_idx (&mctx->input) + 1; - - if (BE (next_char_idx >= mctx->input.bufs_len, 0) - || (BE (next_char_idx >= mctx->input.valid_len, 0) - && mctx->input.valid_len < mctx->input.len)) - { - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) - { - assert (err == REG_ESPACE); - return -2; - } - } - - cur_state = transit_state (&err, mctx, cur_state); - if (mctx->state_log != NULL) - cur_state = merge_state_with_log (&err, mctx, cur_state); - - if (cur_state == NULL) - { - /* Reached the invalid state or an error. Try to recover a valid - state using the state log, if available and if we have not - already found a valid (even if not the longest) match. */ - if (BE (err != REG_NOERROR, 0)) - return -2; - - if (mctx->state_log == NULL - || (match && !fl_longest_match) - || (cur_state = find_recover_state (&err, mctx)) == NULL) - break; - } - - if (BE (at_init_state, 0)) - { - if (old_state == cur_state) - next_start_idx = next_char_idx; - else - at_init_state = 0; - } - - if (cur_state->halt) - { - /* Reached a halt state. - Check the halt state can satisfy the current context. */ - if (!cur_state->has_constraint - || check_halt_state_context (mctx, cur_state, - re_string_cur_idx (&mctx->input))) - { - /* We found an appropriate halt state. */ - match_last = re_string_cur_idx (&mctx->input); - match = 1; - - /* We found a match, do not modify match_first below. */ - p_match_first = NULL; - if (!fl_longest_match) - break; - } - } - } - - if (p_match_first) - *p_match_first += next_start_idx; - - return match_last; -} - -/* Check NODE match the current context. */ - -static int check_halt_node_context (dfa, node, context) - const re_dfa_t *dfa; - int node; - unsigned int context; -{ - re_token_type_t type = dfa->nodes[node].type; - unsigned int constraint = dfa->nodes[node].constraint; - if (type != END_OF_RE) - return 0; - if (!constraint) - return 1; - if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context)) - return 0; - return 1; -} - -/* Check the halt state STATE match the current context. - Return 0 if not match, if the node, STATE has, is a halt node and - match the context, return the node. */ - -static int -check_halt_state_context (mctx, state, idx) - const re_match_context_t *mctx; - const re_dfastate_t *state; - int idx; -{ - int i; - unsigned int context; -#ifdef DEBUG - assert (state->halt); -#endif - context = re_string_context_at (&mctx->input, idx, mctx->eflags); - for (i = 0; i < state->nodes.nelem; ++i) - if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context)) - return state->nodes.elems[i]; - return 0; -} - -/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA - corresponding to the DFA). - Return the destination node, and update EPS_VIA_NODES, return -1 in case - of errors. */ - -static int -proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs) - const re_match_context_t *mctx; - regmatch_t *regs; - int nregs, *pidx, node; - re_node_set *eps_via_nodes; - struct re_fail_stack_t *fs; -{ - re_dfa_t *const dfa = mctx->dfa; - int i, err, dest_node; - dest_node = -1; - if (IS_EPSILON_NODE (dfa->nodes[node].type)) - { - re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; - re_node_set *edests = &dfa->edests[node]; - int dest_node; - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) - return -2; - /* Pick up a valid destination, or return -1 if none is found. */ - for (dest_node = -1, i = 0; i < edests->nelem; ++i) - { - int candidate = edests->elems[i]; - if (!re_node_set_contains (cur_nodes, candidate)) - continue; - if (dest_node == -1) - dest_node = candidate; - - else - { - /* In order to avoid infinite loop like "(a*)*", return the second - epsilon-transition if the first was already considered. */ - if (re_node_set_contains (eps_via_nodes, dest_node)) - return candidate; - - /* Otherwise, push the second epsilon-transition on the fail stack. */ - else if (fs != NULL - && push_fail_stack (fs, *pidx, candidate, nregs, regs, - eps_via_nodes)) - return -2; - - /* We know we are going to exit. */ - break; - } - } - return dest_node; - } - else - { - int naccepted = 0; - re_token_type_t type = dfa->nodes[node].type; - -#ifdef RE_ENABLE_I18N - if (dfa->nodes[node].accept_mb) - naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx); - else -#endif /* RE_ENABLE_I18N */ - if (type == OP_BACK_REF) - { - int subexp_idx = dfa->nodes[node].opr.idx + 1; - naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so; - if (fs != NULL) - { - if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1) - return -1; - else if (naccepted) - { - char *buf = (char *) re_string_get_buffer (&mctx->input); - if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx, - naccepted) != 0) - return -1; - } - } - - if (naccepted == 0) - { - err = re_node_set_insert (eps_via_nodes, node); - if (BE (err < 0, 0)) - return -2; - dest_node = dfa->edests[node].elems[0]; - if (re_node_set_contains (&mctx->state_log[*pidx]->nodes, - dest_node)) - return dest_node; - } - } - - if (naccepted != 0 - || check_node_accept (mctx, dfa->nodes + node, *pidx)) - { - dest_node = dfa->nexts[node]; - *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted; - if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL - || !re_node_set_contains (&mctx->state_log[*pidx]->nodes, - dest_node))) - return -1; - re_node_set_empty (eps_via_nodes); - return dest_node; - } - } - return -1; -} - -static reg_errcode_t -push_fail_stack (fs, str_idx, dest_node, nregs, regs, eps_via_nodes) - struct re_fail_stack_t *fs; - int str_idx, dest_node, nregs; - regmatch_t *regs; - re_node_set *eps_via_nodes; -{ - reg_errcode_t err; - int num = fs->num++; - if (fs->num == fs->alloc) - { - struct re_fail_stack_ent_t *new_array; - new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t) - * fs->alloc * 2)); - if (new_array == NULL) - return REG_ESPACE; - fs->alloc *= 2; - fs->stack = new_array; - } - fs->stack[num].idx = str_idx; - fs->stack[num].node = dest_node; - fs->stack[num].regs = re_malloc (regmatch_t, nregs); - if (fs->stack[num].regs == NULL) - return REG_ESPACE; - memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); - err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); - return err; -} - -static int -pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes) - struct re_fail_stack_t *fs; - int *pidx, nregs; - regmatch_t *regs; - re_node_set *eps_via_nodes; -{ - int num = --fs->num; - assert (num >= 0); - *pidx = fs->stack[num].idx; - memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); - re_node_set_free (eps_via_nodes); - re_free (fs->stack[num].regs); - *eps_via_nodes = fs->stack[num].eps_via_nodes; - return fs->stack[num].node; -} - -/* Set the positions where the subexpressions are starts/ends to registers - PMATCH. - Note: We assume that pmatch[0] is already set, and - pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */ - -static reg_errcode_t -set_regs (preg, mctx, nmatch, pmatch, fl_backtrack) - const regex_t *preg; - const re_match_context_t *mctx; - size_t nmatch; - regmatch_t *pmatch; - int fl_backtrack; -{ - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; - int idx, cur_node; - re_node_set eps_via_nodes; - struct re_fail_stack_t *fs; - struct re_fail_stack_t fs_body = { 0, 2, NULL }; - regmatch_t *prev_idx_match; - -#ifdef DEBUG - assert (nmatch > 1); - assert (mctx->state_log != NULL); -#endif - if (fl_backtrack) - { - fs = &fs_body; - fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc); - if (fs->stack == NULL) - return REG_ESPACE; - } - else - fs = NULL; - - cur_node = dfa->init_node; - re_node_set_init_empty (&eps_via_nodes); - - prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch); - memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); - - for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;) - { - update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); - - if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) - { - int reg_idx; - if (fs) - { - for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) - if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) - break; - if (reg_idx == nmatch) - { - re_node_set_free (&eps_via_nodes); - return free_fail_stack_return (fs); - } - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, - &eps_via_nodes); - } - else - { - re_node_set_free (&eps_via_nodes); - return REG_NOERROR; - } - } - - /* Proceed to next node. */ - cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, - &eps_via_nodes, fs); - - if (BE (cur_node < 0, 0)) - { - if (BE (cur_node == -2, 0)) - { - re_node_set_free (&eps_via_nodes); - free_fail_stack_return (fs); - return REG_ESPACE; - } - if (fs) - cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, - &eps_via_nodes); - else - { - re_node_set_free (&eps_via_nodes); - return REG_NOMATCH; - } - } - } - re_node_set_free (&eps_via_nodes); - return free_fail_stack_return (fs); -} - -static reg_errcode_t -free_fail_stack_return (fs) - struct re_fail_stack_t *fs; -{ - if (fs) - { - int fs_idx; - for (fs_idx = 0; fs_idx < fs->num; ++fs_idx) - { - re_node_set_free (&fs->stack[fs_idx].eps_via_nodes); - re_free (fs->stack[fs_idx].regs); - } - re_free (fs->stack); - } - return REG_NOERROR; -} - -static void -update_regs (dfa, pmatch, prev_idx_match, cur_node, cur_idx, nmatch) - re_dfa_t *dfa; - regmatch_t *pmatch, *prev_idx_match; - int cur_node, cur_idx, nmatch; -{ - int type = dfa->nodes[cur_node].type; - if (type == OP_OPEN_SUBEXP) - { - int reg_num = dfa->nodes[cur_node].opr.idx + 1; - - /* We are at the first node of this sub expression. */ - if (reg_num < nmatch) - { - pmatch[reg_num].rm_so = cur_idx; - pmatch[reg_num].rm_eo = -1; - } - } - else if (type == OP_CLOSE_SUBEXP) - { - int reg_num = dfa->nodes[cur_node].opr.idx + 1; - if (reg_num < nmatch) - { - /* We are at the last node of this sub expression. */ - if (pmatch[reg_num].rm_so < cur_idx) - { - pmatch[reg_num].rm_eo = cur_idx; - /* This is a non-empty match or we are not inside an optional - subexpression. Accept this right away. */ - memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch); - } - else - { - if (dfa->nodes[cur_node].opt_subexp - && prev_idx_match[reg_num].rm_so != -1) - /* We transited through an empty match for an optional - subexpression, like (a?)*, and this is not the subexp's - first match. Copy back the old content of the registers - so that matches of an inner subexpression are undone as - well, like in ((a?))*. */ - memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch); - else - /* We completed a subexpression, but it may be part of - an optional one, so do not update PREV_IDX_MATCH. */ - pmatch[reg_num].rm_eo = cur_idx; - } - } - } -} - -/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0 - and sift the nodes in each states according to the following rules. - Updated state_log will be wrote to STATE_LOG. - - Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... - 1. When STR_IDX == MATCH_LAST(the last index in the state_log): - If `a' isn't the LAST_NODE and `a' can't epsilon transit to - the LAST_NODE, we throw away the node `a'. - 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts - string `s' and transit to `b': - i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw - away the node `a'. - ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - thrown away, we throw away the node `a'. - 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': - i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the - node `a'. - ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, - we throw away the node `a'. */ - -#define STATE_NODE_CONTAINS(state,node) \ - ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) - -static reg_errcode_t -sift_states_backward (mctx, sctx) - re_match_context_t *mctx; - re_sift_context_t *sctx; -{ - reg_errcode_t err; - int null_cnt = 0; - int str_idx = sctx->last_str_idx; - re_node_set cur_dest; - -#ifdef DEBUG - assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL); -#endif - - /* Build sifted state_log[str_idx]. It has the nodes which can epsilon - transit to the last_node and the last_node itself. */ - err = re_node_set_init_1 (&cur_dest, sctx->last_node); - if (BE (err != REG_NOERROR, 0)) - return err; - err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* Then check each states in the state_log. */ - while (str_idx > 0) - { - /* Update counters. */ - null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0; - if (null_cnt > mctx->max_mb_elem_len) - { - memset (sctx->sifted_states, '\0', - sizeof (re_dfastate_t *) * str_idx); - re_node_set_free (&cur_dest); - return REG_NOERROR; - } - re_node_set_empty (&cur_dest); - --str_idx; - - if (mctx->state_log[str_idx]) - { - err = build_sifted_states (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - - /* Add all the nodes which satisfy the following conditions: - - It can epsilon transit to a node in CUR_DEST. - - It is in CUR_SRC. - And update state_log. */ - err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - err = REG_NOERROR; - free_return: - re_node_set_free (&cur_dest); - return err; -} - -static reg_errcode_t -build_sifted_states (mctx, sctx, str_idx, cur_dest) - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - re_node_set *cur_dest; -{ - re_dfa_t *const dfa = mctx->dfa; - re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes; - int i; - - /* Then build the next sifted state. - We build the next sifted state on `cur_dest', and update - `sifted_states[str_idx]' with `cur_dest'. - Note: - `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]' - (with the epsilon nodes pre-filtered out). */ - for (i = 0; i < cur_src->nelem; i++) - { - int prev_node = cur_src->elems[i]; - int naccepted = 0; - int ret; - -#ifdef DEBUG - re_token_type_t type = dfa->nodes[prev_node].type; - assert (!IS_EPSILON_NODE (type)); -#endif -#ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ - if (dfa->nodes[prev_node].accept_mb) - naccepted = sift_states_iter_mb (mctx, sctx, prev_node, - str_idx, sctx->last_str_idx); -#endif /* RE_ENABLE_I18N */ - - /* We don't check backreferences here. - See update_cur_sifted_state(). */ - if (!naccepted - && check_node_accept (mctx, dfa->nodes + prev_node, str_idx) - && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1], - dfa->nexts[prev_node])) - naccepted = 1; - - if (naccepted == 0) - continue; - - if (sctx->limits.nelem) - { - int to_idx = str_idx + naccepted; - if (check_dst_limits (mctx, &sctx->limits, - dfa->nexts[prev_node], to_idx, - prev_node, str_idx)) - continue; - } - ret = re_node_set_insert (cur_dest, prev_node); - if (BE (ret == -1, 0)) - return REG_ESPACE; - } - - return REG_NOERROR; -} - -/* Helper functions. */ - -static reg_errcode_t -clean_state_log_if_needed (mctx, next_state_log_idx) - re_match_context_t *mctx; - int next_state_log_idx; -{ - int top = mctx->state_log_top; - - if (next_state_log_idx >= mctx->input.bufs_len - || (next_state_log_idx >= mctx->input.valid_len - && mctx->input.valid_len < mctx->input.len)) - { - reg_errcode_t err; - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (top < next_state_log_idx) - { - memset (mctx->state_log + top + 1, '\0', - sizeof (re_dfastate_t *) * (next_state_log_idx - top)); - mctx->state_log_top = next_state_log_idx; - } - return REG_NOERROR; -} - -static reg_errcode_t -merge_state_array (dfa, dst, src, num) - re_dfa_t *dfa; - re_dfastate_t **dst; - re_dfastate_t **src; - int num; -{ - int st_idx; - reg_errcode_t err; - for (st_idx = 0; st_idx < num; ++st_idx) - { - if (dst[st_idx] == NULL) - dst[st_idx] = src[st_idx]; - else if (src[st_idx] != NULL) - { - re_node_set merged_set; - err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes, - &src[st_idx]->nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - dst[st_idx] = re_acquire_state (&err, dfa, &merged_set); - re_node_set_free (&merged_set); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - return REG_NOERROR; -} - -static reg_errcode_t -update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes) - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - re_node_set *dest_nodes; -{ - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - const re_node_set *candidates; - candidates = ((mctx->state_log[str_idx] == NULL) ? NULL - : &mctx->state_log[str_idx]->nodes); - - if (dest_nodes->nelem == 0) - sctx->sifted_states[str_idx] = NULL; - else - { - if (candidates) - { - /* At first, add the nodes which can epsilon transit to a node in - DEST_NODE. */ - err = add_epsilon_src_nodes (dfa, dest_nodes, candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - - /* Then, check the limitations in the current sift_context. */ - if (sctx->limits.nelem) - { - err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits, - mctx->bkref_ents, str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - - sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (candidates && mctx->state_log[str_idx]->has_backref) - { - err = sift_states_bkref (mctx, sctx, str_idx, candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - return REG_NOERROR; -} - -static reg_errcode_t -add_epsilon_src_nodes (dfa, dest_nodes, candidates) - re_dfa_t *dfa; - re_node_set *dest_nodes; - const re_node_set *candidates; -{ - reg_errcode_t err = REG_NOERROR; - int i; - - re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - - if (!state->inveclosure.alloc) - { - err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem); - if (BE (err != REG_NOERROR, 0)) - return REG_ESPACE; - for (i = 0; i < dest_nodes->nelem; i++) - re_node_set_merge (&state->inveclosure, - dfa->inveclosures + dest_nodes->elems[i]); - } - return re_node_set_add_intersect (dest_nodes, candidates, - &state->inveclosure); -} - -static reg_errcode_t -sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates) - re_dfa_t *dfa; - int node; - re_node_set *dest_nodes; - const re_node_set *candidates; -{ - int ecl_idx; - reg_errcode_t err; - re_node_set *inv_eclosure = dfa->inveclosures + node; - re_node_set except_nodes; - re_node_set_init_empty (&except_nodes); - for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) - { - int cur_node = inv_eclosure->elems[ecl_idx]; - if (cur_node == node) - continue; - if (IS_EPSILON_NODE (dfa->nodes[cur_node].type)) - { - int edst1 = dfa->edests[cur_node].elems[0]; - int edst2 = ((dfa->edests[cur_node].nelem > 1) - ? dfa->edests[cur_node].elems[1] : -1); - if ((!re_node_set_contains (inv_eclosure, edst1) - && re_node_set_contains (dest_nodes, edst1)) - || (edst2 > 0 - && !re_node_set_contains (inv_eclosure, edst2) - && re_node_set_contains (dest_nodes, edst2))) - { - err = re_node_set_add_intersect (&except_nodes, candidates, - dfa->inveclosures + cur_node); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&except_nodes); - return err; - } - } - } - } - for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx) - { - int cur_node = inv_eclosure->elems[ecl_idx]; - if (!re_node_set_contains (&except_nodes, cur_node)) - { - int idx = re_node_set_contains (dest_nodes, cur_node) - 1; - re_node_set_remove_at (dest_nodes, idx); - } - } - re_node_set_free (&except_nodes); - return REG_NOERROR; -} - -static int -check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx) - re_match_context_t *mctx; - re_node_set *limits; - int dst_node, dst_idx, src_node, src_idx; -{ - re_dfa_t *const dfa = mctx->dfa; - int lim_idx, src_pos, dst_pos; - - int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx); - int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx); - for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) - { - int subexp_idx; - struct re_backref_cache_entry *ent; - ent = mctx->bkref_ents + limits->elems[lim_idx]; - subexp_idx = dfa->nodes[ent->node].opr.idx; - - dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], - subexp_idx, dst_node, dst_idx, - dst_bkref_idx); - src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx], - subexp_idx, src_node, src_idx, - src_bkref_idx); - - /* In case of: - <src> <dst> ( <subexp> ) - ( <subexp> ) <src> <dst> - ( <subexp1> <src> <subexp2> <dst> <subexp3> ) */ - if (src_pos == dst_pos) - continue; /* This is unrelated limitation. */ - else - return 1; - } - return 0; -} - -static int -check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) - re_match_context_t *mctx; - int boundaries, subexp_idx, from_node, bkref_idx; -{ - re_dfa_t *const dfa = mctx->dfa; - re_node_set *eclosures = dfa->eclosures + from_node; - int node_idx; - - /* Else, we are on the boundary: examine the nodes on the epsilon - closure. */ - for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx) - { - int node = eclosures->elems[node_idx]; - switch (dfa->nodes[node].type) - { - case OP_BACK_REF: - if (bkref_idx != -1) - { - struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx; - do - { - int dst, cpos; - - if (ent->node != node) - continue; - - if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map) - && !(ent->eps_reachable_subexps_map & (1 << subexp_idx))) - continue; - - /* Recurse trying to reach the OP_OPEN_SUBEXP and - OP_CLOSE_SUBEXP cases below. But, if the - destination node is the same node as the source - node, don't recurse because it would cause an - infinite loop: a regex that exhibits this behavior - is ()\1*\1* */ - dst = dfa->edests[node].elems[0]; - if (dst == from_node) - { - if (boundaries & 1) - return -1; - else /* if (boundaries & 2) */ - return 0; - } - - cpos = - check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, - dst, bkref_idx); - if (cpos == -1 /* && (boundaries & 1) */) - return -1; - if (cpos == 0 && (boundaries & 2)) - return 0; - - ent->eps_reachable_subexps_map &= ~(1 << subexp_idx); - } - while (ent++->more); - } - break; - - case OP_OPEN_SUBEXP: - if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx) - return -1; - break; - - case OP_CLOSE_SUBEXP: - if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx) - return 0; - break; - - default: - break; - } - } - - return (boundaries & 2) ? 1 : 0; -} - -static int -check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx) - re_match_context_t *mctx; - int limit, subexp_idx, from_node, str_idx, bkref_idx; -{ - struct re_backref_cache_entry *lim = mctx->bkref_ents + limit; - int boundaries; - - /* If we are outside the range of the subexpression, return -1 or 1. */ - if (str_idx < lim->subexp_from) - return -1; - - if (lim->subexp_to < str_idx) - return 1; - - /* If we are within the subexpression, return 0. */ - boundaries = (str_idx == lim->subexp_from); - boundaries |= (str_idx == lim->subexp_to) << 1; - if (boundaries == 0) - return 0; - - /* Else, examine epsilon closure. */ - return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, - from_node, bkref_idx); -} - -/* Check the limitations of sub expressions LIMITS, and remove the nodes - which are against limitations from DEST_NODES. */ - -static reg_errcode_t -check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx) - re_dfa_t *dfa; - re_node_set *dest_nodes; - const re_node_set *candidates; - re_node_set *limits; - struct re_backref_cache_entry *bkref_ents; - int str_idx; -{ - reg_errcode_t err; - int node_idx, lim_idx; - - for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx) - { - int subexp_idx; - struct re_backref_cache_entry *ent; - ent = bkref_ents + limits->elems[lim_idx]; - - if (str_idx <= ent->subexp_from || ent->str_idx < str_idx) - continue; /* This is unrelated limitation. */ - - subexp_idx = dfa->nodes[ent->node].opr.idx; - if (ent->subexp_to == str_idx) - { - int ops_node = -1; - int cls_node = -1; - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - int node = dest_nodes->elems[node_idx]; - re_token_type_t type = dfa->nodes[node].type; - if (type == OP_OPEN_SUBEXP - && subexp_idx == dfa->nodes[node].opr.idx) - ops_node = node; - else if (type == OP_CLOSE_SUBEXP - && subexp_idx == dfa->nodes[node].opr.idx) - cls_node = node; - } - - /* Check the limitation of the open subexpression. */ - /* Note that (ent->subexp_to = str_idx != ent->subexp_from). */ - if (ops_node >= 0) - { - err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - - /* Check the limitation of the close subexpression. */ - if (cls_node >= 0) - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - int node = dest_nodes->elems[node_idx]; - if (!re_node_set_contains (dfa->inveclosures + node, - cls_node) - && !re_node_set_contains (dfa->eclosures + node, - cls_node)) - { - /* It is against this limitation. - Remove it form the current sifted state. */ - err = sub_epsilon_src_nodes (dfa, node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - --node_idx; - } - } - } - else /* (ent->subexp_to != str_idx) */ - { - for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx) - { - int node = dest_nodes->elems[node_idx]; - re_token_type_t type = dfa->nodes[node].type; - if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP) - { - if (subexp_idx != dfa->nodes[node].opr.idx) - continue; - /* It is against this limitation. - Remove it form the current sifted state. */ - err = sub_epsilon_src_nodes (dfa, node, dest_nodes, - candidates); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - } - } - return REG_NOERROR; -} - -static reg_errcode_t -sift_states_bkref (mctx, sctx, str_idx, candidates) - re_match_context_t *mctx; - re_sift_context_t *sctx; - int str_idx; - const re_node_set *candidates; -{ - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - int node_idx, node; - re_sift_context_t local_sctx; - int first_idx = search_cur_bkref_entry (mctx, str_idx); - - if (first_idx == -1) - return REG_NOERROR; - - local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */ - - for (node_idx = 0; node_idx < candidates->nelem; ++node_idx) - { - int enabled_idx; - re_token_type_t type; - struct re_backref_cache_entry *entry; - node = candidates->elems[node_idx]; - type = dfa->nodes[node].type; - /* Avoid infinite loop for the REs like "()\1+". */ - if (node == sctx->last_node && str_idx == sctx->last_str_idx) - continue; - if (type != OP_BACK_REF) - continue; - - entry = mctx->bkref_ents + first_idx; - enabled_idx = first_idx; - do - { - int subexp_len, to_idx, dst_node; - re_dfastate_t *cur_state; - - if (entry->node != node) - continue; - subexp_len = entry->subexp_to - entry->subexp_from; - to_idx = str_idx + subexp_len; - dst_node = (subexp_len ? dfa->nexts[node] - : dfa->edests[node].elems[0]); - - if (to_idx > sctx->last_str_idx - || sctx->sifted_states[to_idx] == NULL - || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node) - || check_dst_limits (mctx, &sctx->limits, node, - str_idx, dst_node, to_idx)) - continue; - - if (local_sctx.sifted_states == NULL) - { - local_sctx = *sctx; - err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - local_sctx.last_node = node; - local_sctx.last_str_idx = str_idx; - err = re_node_set_insert (&local_sctx.limits, enabled_idx); - if (BE (err < 0, 0)) - { - err = REG_ESPACE; - goto free_return; - } - cur_state = local_sctx.sifted_states[str_idx]; - err = sift_states_backward (mctx, &local_sctx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - if (sctx->limited_states != NULL) - { - err = merge_state_array (dfa, sctx->limited_states, - local_sctx.sifted_states, - str_idx + 1); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - local_sctx.sifted_states[str_idx] = cur_state; - re_node_set_remove (&local_sctx.limits, enabled_idx); - - /* mctx->bkref_ents may have changed, reload the pointer. */ - entry = mctx->bkref_ents + enabled_idx; - } - while (enabled_idx++, entry++->more); - } - err = REG_NOERROR; - free_return: - if (local_sctx.sifted_states != NULL) - { - re_node_set_free (&local_sctx.limits); - } - - return err; -} - - -#ifdef RE_ENABLE_I18N -static int -sift_states_iter_mb (mctx, sctx, node_idx, str_idx, max_str_idx) - const re_match_context_t *mctx; - re_sift_context_t *sctx; - int node_idx, str_idx, max_str_idx; -{ - re_dfa_t *const dfa = mctx->dfa; - int naccepted; - /* Check the node can accept `multi byte'. */ - naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); - if (naccepted > 0 && str_idx + naccepted <= max_str_idx && - !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], - dfa->nexts[node_idx])) - /* The node can't accept the `multi byte', or the - destination was already thrown away, then the node - could't accept the current input `multi byte'. */ - naccepted = 0; - /* Otherwise, it is sure that the node could accept - `naccepted' bytes input. */ - return naccepted; -} -#endif /* RE_ENABLE_I18N */ - - -/* Functions for state transition. */ - -/* Return the next state to which the current state STATE will transit by - accepting the current input byte, and update STATE_LOG if necessary. - If STATE can accept a multibyte char/collating element/back reference - update the destination of STATE_LOG. */ - -static re_dfastate_t * -transit_state (err, mctx, state) - reg_errcode_t *err; - re_match_context_t *mctx; - re_dfastate_t *state; -{ - re_dfastate_t **trtable; - unsigned char ch; - -#ifdef RE_ENABLE_I18N - /* If the current state can accept multibyte. */ - if (BE (state->accept_mb, 0)) - { - *err = transit_state_mb (mctx, state); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } -#endif /* RE_ENABLE_I18N */ - - /* Then decide the next state with the single byte. */ -#if 0 - if (0) - /* don't use transition table */ - return transit_state_sb (err, mctx, state); -#endif - - /* Use transition table */ - ch = re_string_fetch_byte (&mctx->input); - for (;;) - { - trtable = state->trtable; - if (BE (trtable != NULL, 1)) - return trtable[ch]; - - trtable = state->word_trtable; - if (BE (trtable != NULL, 1)) - { - unsigned int context; - context - = re_string_context_at (&mctx->input, - re_string_cur_idx (&mctx->input) - 1, - mctx->eflags); - if (IS_WORD_CONTEXT (context)) - return trtable[ch + SBC_MAX]; - else - return trtable[ch]; - } - - if (!build_trtable (mctx->dfa, state)) - { - *err = REG_ESPACE; - return NULL; - } - - /* Retry, we now have a transition table. */ - } -} - -/* Update the state_log if we need */ -re_dfastate_t * -merge_state_with_log (err, mctx, next_state) - reg_errcode_t *err; - re_match_context_t *mctx; - re_dfastate_t *next_state; -{ - re_dfa_t *const dfa = mctx->dfa; - int cur_idx = re_string_cur_idx (&mctx->input); - - if (cur_idx > mctx->state_log_top) - { - mctx->state_log[cur_idx] = next_state; - mctx->state_log_top = cur_idx; - } - else if (mctx->state_log[cur_idx] == 0) - { - mctx->state_log[cur_idx] = next_state; - } - else - { - re_dfastate_t *pstate; - unsigned int context; - re_node_set next_nodes, *log_nodes, *table_nodes = NULL; - /* If (state_log[cur_idx] != 0), it implies that cur_idx is - the destination of a multibyte char/collating element/ - back reference. Then the next state is the union set of - these destinations and the results of the transition table. */ - pstate = mctx->state_log[cur_idx]; - log_nodes = pstate->entrance_nodes; - if (next_state != NULL) - { - table_nodes = next_state->entrance_nodes; - *err = re_node_set_init_union (&next_nodes, table_nodes, - log_nodes); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - } - else - next_nodes = *log_nodes; - /* Note: We already add the nodes of the initial state, - then we don't need to add them here. */ - - context = re_string_context_at (&mctx->input, - re_string_cur_idx (&mctx->input) - 1, - mctx->eflags); - next_state = mctx->state_log[cur_idx] - = re_acquire_state_context (err, dfa, &next_nodes, context); - /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ - - if (table_nodes != NULL) - re_node_set_free (&next_nodes); - } - - if (BE (dfa->nbackref, 0) && next_state != NULL) - { - /* Check OP_OPEN_SUBEXP in the current state in case that we use them - later. We must check them here, since the back references in the - next state might use them. */ - *err = check_subexp_matching_top (mctx, &next_state->nodes, - cur_idx); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - - /* If the next state has back references. */ - if (next_state->has_backref) - { - *err = transit_state_bkref (mctx, &next_state->nodes); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - next_state = mctx->state_log[cur_idx]; - } - } - - return next_state; -} - -/* Skip bytes in the input that correspond to part of a - multi-byte match, then look in the log for a state - from which to restart matching. */ -re_dfastate_t * -find_recover_state (err, mctx) - reg_errcode_t *err; - re_match_context_t *mctx; -{ - re_dfastate_t *cur_state = NULL; - do - { - int max = mctx->state_log_top; - int cur_str_idx = re_string_cur_idx (&mctx->input); - - do - { - if (++cur_str_idx > max) - return NULL; - re_string_skip_bytes (&mctx->input, 1); - } - while (mctx->state_log[cur_str_idx] == NULL); - - cur_state = merge_state_with_log (err, mctx, NULL); - } - while (err == REG_NOERROR && cur_state == NULL); - return cur_state; -} - -/* Helper functions for transit_state. */ - -/* From the node set CUR_NODES, pick up the nodes whose types are - OP_OPEN_SUBEXP and which have corresponding back references in the regular - expression. And register them to use them later for evaluating the - correspoding back references. */ - -static reg_errcode_t -check_subexp_matching_top (mctx, cur_nodes, str_idx) - re_match_context_t *mctx; - re_node_set *cur_nodes; - int str_idx; -{ - re_dfa_t *const dfa = mctx->dfa; - int node_idx; - reg_errcode_t err; - - /* TODO: This isn't efficient. - Because there might be more than one nodes whose types are - OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all - nodes. - E.g. RE: (a){2} */ - for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx) - { - int node = cur_nodes->elems[node_idx]; - if (dfa->nodes[node].type == OP_OPEN_SUBEXP - && dfa->nodes[node].opr.idx < (8 * sizeof (dfa->used_bkref_map)) - && dfa->used_bkref_map & (1 << dfa->nodes[node].opr.idx)) - { - err = match_ctx_add_subtop (mctx, node, str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - } - } - return REG_NOERROR; -} - -#if 0 -/* Return the next state to which the current state STATE will transit by - accepting the current input byte. */ - -static re_dfastate_t * -transit_state_sb (err, mctx, state) - reg_errcode_t *err; - re_match_context_t *mctx; - re_dfastate_t *state; -{ - re_dfa_t *const dfa = mctx->dfa; - re_node_set next_nodes; - re_dfastate_t *next_state; - int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input); - unsigned int context; - - *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1); - if (BE (*err != REG_NOERROR, 0)) - return NULL; - for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt) - { - int cur_node = state->nodes.elems[node_cnt]; - if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx)) - { - *err = re_node_set_merge (&next_nodes, - dfa->eclosures + dfa->nexts[cur_node]); - if (BE (*err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return NULL; - } - } - } - context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags); - next_state = re_acquire_state_context (err, dfa, &next_nodes, context); - /* We don't need to check errors here, since the return value of - this function is next_state and ERR is already set. */ - - re_node_set_free (&next_nodes); - re_string_skip_bytes (&mctx->input, 1); - return next_state; -} -#endif - -#ifdef RE_ENABLE_I18N -static reg_errcode_t -transit_state_mb (mctx, pstate) - re_match_context_t *mctx; - re_dfastate_t *pstate; -{ - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - int i; - - for (i = 0; i < pstate->nodes.nelem; ++i) - { - re_node_set dest_nodes, *new_nodes; - int cur_node_idx = pstate->nodes.elems[i]; - int naccepted, dest_idx; - unsigned int context; - re_dfastate_t *dest_state; - - if (!dfa->nodes[cur_node_idx].accept_mb) - continue; - - if (dfa->nodes[cur_node_idx].constraint) - { - context = re_string_context_at (&mctx->input, - re_string_cur_idx (&mctx->input), - mctx->eflags); - if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint, - context)) - continue; - } - - /* How many bytes the node can accept? */ - naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input, - re_string_cur_idx (&mctx->input)); - if (naccepted == 0) - continue; - - /* The node can accepts `naccepted' bytes. */ - dest_idx = re_string_cur_idx (&mctx->input) + naccepted; - mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted - : mctx->max_mb_elem_len); - err = clean_state_log_if_needed (mctx, dest_idx); - if (BE (err != REG_NOERROR, 0)) - return err; -#ifdef DEBUG - assert (dfa->nexts[cur_node_idx] != -1); -#endif - new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx]; - - dest_state = mctx->state_log[dest_idx]; - if (dest_state == NULL) - dest_nodes = *new_nodes; - else - { - err = re_node_set_init_union (&dest_nodes, - dest_state->entrance_nodes, new_nodes); - if (BE (err != REG_NOERROR, 0)) - return err; - } - context = re_string_context_at (&mctx->input, dest_idx - 1, mctx->eflags); - mctx->state_log[dest_idx] - = re_acquire_state_context (&err, dfa, &dest_nodes, context); - if (dest_state != NULL) - re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0)) - return err; - } - return REG_NOERROR; -} -#endif /* RE_ENABLE_I18N */ - -static reg_errcode_t -transit_state_bkref (mctx, nodes) - re_match_context_t *mctx; - const re_node_set *nodes; -{ - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - int i; - int cur_str_idx = re_string_cur_idx (&mctx->input); - - for (i = 0; i < nodes->nelem; ++i) - { - int dest_str_idx, prev_nelem, bkc_idx; - int node_idx = nodes->elems[i]; - unsigned int context; - const re_token_t *node = dfa->nodes + node_idx; - re_node_set *new_dest_nodes; - - /* Check whether `node' is a backreference or not. */ - if (node->type != OP_BACK_REF) - continue; - - if (node->constraint) - { - context = re_string_context_at (&mctx->input, cur_str_idx, - mctx->eflags); - if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - continue; - } - - /* `node' is a backreference. - Check the substring which the substring matched. */ - bkc_idx = mctx->nbkref_ents; - err = get_subexp (mctx, node_idx, cur_str_idx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - - /* And add the epsilon closures (which is `new_dest_nodes') of - the backreference to appropriate state_log. */ -#ifdef DEBUG - assert (dfa->nexts[node_idx] != -1); -#endif - for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx) - { - int subexp_len; - re_dfastate_t *dest_state; - struct re_backref_cache_entry *bkref_ent; - bkref_ent = mctx->bkref_ents + bkc_idx; - if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx) - continue; - subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from; - new_dest_nodes = (subexp_len == 0 - ? dfa->eclosures + dfa->edests[node_idx].elems[0] - : dfa->eclosures + dfa->nexts[node_idx]); - dest_str_idx = (cur_str_idx + bkref_ent->subexp_to - - bkref_ent->subexp_from); - context = re_string_context_at (&mctx->input, dest_str_idx - 1, - mctx->eflags); - dest_state = mctx->state_log[dest_str_idx]; - prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 - : mctx->state_log[cur_str_idx]->nodes.nelem); - /* Add `new_dest_node' to state_log. */ - if (dest_state == NULL) - { - mctx->state_log[dest_str_idx] - = re_acquire_state_context (&err, dfa, new_dest_nodes, - context); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) - goto free_return; - } - else - { - re_node_set dest_nodes; - err = re_node_set_init_union (&dest_nodes, - dest_state->entrance_nodes, - new_dest_nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&dest_nodes); - goto free_return; - } - mctx->state_log[dest_str_idx] - = re_acquire_state_context (&err, dfa, &dest_nodes, context); - re_node_set_free (&dest_nodes); - if (BE (mctx->state_log[dest_str_idx] == NULL - && err != REG_NOERROR, 0)) - goto free_return; - } - /* We need to check recursively if the backreference can epsilon - transit. */ - if (subexp_len == 0 - && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem) - { - err = check_subexp_matching_top (mctx, new_dest_nodes, - cur_str_idx); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - err = transit_state_bkref (mctx, new_dest_nodes); - if (BE (err != REG_NOERROR, 0)) - goto free_return; - } - } - } - err = REG_NOERROR; - free_return: - return err; -} - -/* Enumerate all the candidates which the backreference BKREF_NODE can match - at BKREF_STR_IDX, and register them by match_ctx_add_entry(). - Note that we might collect inappropriate candidates here. - However, the cost of checking them strictly here is too high, then we - delay these checking for prune_impossible_nodes(). */ - -static reg_errcode_t -get_subexp (mctx, bkref_node, bkref_str_idx) - re_match_context_t *mctx; - int bkref_node, bkref_str_idx; -{ - re_dfa_t *const dfa = mctx->dfa; - int subexp_num, sub_top_idx; - const char *buf = (const char *) re_string_get_buffer (&mctx->input); - /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */ - int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx); - if (cache_idx != -1) - { - const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx; - do - if (entry->node == bkref_node) - return REG_NOERROR; /* We already checked it. */ - while (entry++->more); - } - - subexp_num = dfa->nodes[bkref_node].opr.idx; - - /* For each sub expression */ - for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx) - { - reg_errcode_t err; - re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx]; - re_sub_match_last_t *sub_last; - int sub_last_idx, sl_str, bkref_str_off; - - if (dfa->nodes[sub_top->node].opr.idx != subexp_num) - continue; /* It isn't related. */ - - sl_str = sub_top->str_idx; - bkref_str_off = bkref_str_idx; - /* At first, check the last node of sub expressions we already - evaluated. */ - for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx) - { - int sl_str_diff; - sub_last = sub_top->lasts[sub_last_idx]; - sl_str_diff = sub_last->str_idx - sl_str; - /* The matched string by the sub expression match with the substring - at the back reference? */ - if (sl_str_diff > 0) - { - if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0)) - { - /* Not enough chars for a successful match. */ - if (bkref_str_off + sl_str_diff > mctx->input.len) - break; - - err = clean_state_log_if_needed (mctx, - bkref_str_off - + sl_str_diff); - if (BE (err != REG_NOERROR, 0)) - return err; - buf = (const char *) re_string_get_buffer (&mctx->input); - } - if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0) - break; /* We don't need to search this sub expression any more. */ - } - bkref_str_off += sl_str_diff; - sl_str += sl_str_diff; - err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, - bkref_str_idx); - - /* Reload buf, since the preceding call might have reallocated - the buffer. */ - buf = (const char *) re_string_get_buffer (&mctx->input); - - if (err == REG_NOMATCH) - continue; - if (BE (err != REG_NOERROR, 0)) - return err; - } - - if (sub_last_idx < sub_top->nlasts) - continue; - if (sub_last_idx > 0) - ++sl_str; - /* Then, search for the other last nodes of the sub expression. */ - for (; sl_str <= bkref_str_idx; ++sl_str) - { - int cls_node, sl_str_off; - const re_node_set *nodes; - sl_str_off = sl_str - sub_top->str_idx; - /* The matched string by the sub expression match with the substring - at the back reference? */ - if (sl_str_off > 0) - { - if (BE (bkref_str_off >= mctx->input.valid_len, 0)) - { - /* If we are at the end of the input, we cannot match. */ - if (bkref_str_off >= mctx->input.len) - break; - - err = extend_buffers (mctx); - if (BE (err != REG_NOERROR, 0)) - return err; - - buf = (const char *) re_string_get_buffer (&mctx->input); - } - if (buf [bkref_str_off++] != buf[sl_str - 1]) - break; /* We don't need to search this sub expression - any more. */ - } - if (mctx->state_log[sl_str] == NULL) - continue; - /* Does this state have a ')' of the sub expression? */ - nodes = &mctx->state_log[sl_str]->nodes; - cls_node = find_subexp_node (dfa, nodes, subexp_num, OP_CLOSE_SUBEXP); - if (cls_node == -1) - continue; /* No. */ - if (sub_top->path == NULL) - { - sub_top->path = calloc (sizeof (state_array_t), - sl_str - sub_top->str_idx + 1); - if (sub_top->path == NULL) - return REG_ESPACE; - } - /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node - in the current context? */ - err = check_arrival (mctx, sub_top->path, sub_top->node, - sub_top->str_idx, cls_node, sl_str, OP_CLOSE_SUBEXP); - if (err == REG_NOMATCH) - continue; - if (BE (err != REG_NOERROR, 0)) - return err; - sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str); - if (BE (sub_last == NULL, 0)) - return REG_ESPACE; - err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node, - bkref_str_idx); - if (err == REG_NOMATCH) - continue; - } - } - return REG_NOERROR; -} - -/* Helper functions for get_subexp(). */ - -/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR. - If it can arrive, register the sub expression expressed with SUB_TOP - and SUB_LAST. */ - -static reg_errcode_t -get_subexp_sub (mctx, sub_top, sub_last, bkref_node, bkref_str) - re_match_context_t *mctx; - const re_sub_match_top_t *sub_top; - re_sub_match_last_t *sub_last; - int bkref_node, bkref_str; -{ - reg_errcode_t err; - int to_idx; - /* Can the subexpression arrive the back reference? */ - err = check_arrival (mctx, &sub_last->path, sub_last->node, - sub_last->str_idx, bkref_node, bkref_str, OP_OPEN_SUBEXP); - if (err != REG_NOERROR) - return err; - err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx, - sub_last->str_idx); - if (BE (err != REG_NOERROR, 0)) - return err; - to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx; - return clean_state_log_if_needed (mctx, to_idx); -} - -/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX. - Search '(' if FL_OPEN, or search ')' otherwise. - TODO: This function isn't efficient... - Because there might be more than one nodes whose types are - OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all - nodes. - E.g. RE: (a){2} */ - -static int -find_subexp_node (dfa, nodes, subexp_idx, type) - const re_dfa_t *dfa; - const re_node_set *nodes; - int subexp_idx, type; -{ - int cls_idx; - for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx) - { - int cls_node = nodes->elems[cls_idx]; - const re_token_t *node = dfa->nodes + cls_node; - if (node->type == type - && node->opr.idx == subexp_idx) - return cls_node; - } - return -1; -} - -/* Check whether the node TOP_NODE at TOP_STR can arrive to the node - LAST_NODE at LAST_STR. We record the path onto PATH since it will be - heavily reused. - Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ - -static reg_errcode_t -check_arrival (mctx, path, top_node, top_str, last_node, last_str, - type) - re_match_context_t *mctx; - state_array_t *path; - int top_node, top_str, last_node, last_str, type; -{ - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - int subexp_num, backup_cur_idx, str_idx, null_cnt; - re_dfastate_t *cur_state = NULL; - re_node_set *cur_nodes, next_nodes; - re_dfastate_t **backup_state_log; - unsigned int context; - - subexp_num = dfa->nodes[top_node].opr.idx; - /* Extend the buffer if we need. */ - if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0)) - { - re_dfastate_t **new_array; - int old_alloc = path->alloc; - path->alloc += last_str + mctx->max_mb_elem_len + 1; - new_array = re_realloc (path->array, re_dfastate_t *, path->alloc); - if (new_array == NULL) - { - path->alloc = old_alloc; - return REG_ESPACE; - } - path->array = new_array; - memset (new_array + old_alloc, '\0', - sizeof (re_dfastate_t *) * (path->alloc - old_alloc)); - } - - str_idx = path->next_idx == 0 ? top_str : path->next_idx; - - /* Temporary modify MCTX. */ - backup_state_log = mctx->state_log; - backup_cur_idx = mctx->input.cur_idx; - mctx->state_log = path->array; - mctx->input.cur_idx = str_idx; - - /* Setup initial node set. */ - context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); - if (str_idx == top_str) - { - err = re_node_set_init_1 (&next_nodes, top_node); - if (BE (err != REG_NOERROR, 0)) - return err; - err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - else - { - cur_state = mctx->state_log[str_idx]; - if (cur_state && cur_state->has_backref) - { - err = re_node_set_init_copy (&next_nodes, &cur_state->nodes); - if (BE ( err != REG_NOERROR, 0)) - return err; - } - else - re_node_set_init_empty (&next_nodes); - } - if (str_idx == top_str || (cur_state && cur_state->has_backref)) - { - if (next_nodes.nelem) - { - err = expand_bkref_cache (mctx, &next_nodes, str_idx, - subexp_num, type); - if (BE ( err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - mctx->state_log[str_idx] = cur_state; - } - - for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;) - { - re_node_set_empty (&next_nodes); - if (mctx->state_log[str_idx + 1]) - { - err = re_node_set_merge (&next_nodes, - &mctx->state_log[str_idx + 1]->nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - if (cur_state) - { - err = check_arrival_add_next_nodes (mctx, str_idx, - &cur_state->non_eps_nodes, &next_nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - ++str_idx; - if (next_nodes.nelem) - { - err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - err = expand_bkref_cache (mctx, &next_nodes, str_idx, - subexp_num, type); - if (BE ( err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - } - context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags); - cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context); - if (BE (cur_state == NULL && err != REG_NOERROR, 0)) - { - re_node_set_free (&next_nodes); - return err; - } - mctx->state_log[str_idx] = cur_state; - null_cnt = cur_state == NULL ? null_cnt + 1 : 0; - } - re_node_set_free (&next_nodes); - cur_nodes = (mctx->state_log[last_str] == NULL ? NULL - : &mctx->state_log[last_str]->nodes); - path->next_idx = str_idx; - - /* Fix MCTX. */ - mctx->state_log = backup_state_log; - mctx->input.cur_idx = backup_cur_idx; - - /* Then check the current node set has the node LAST_NODE. */ - if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node)) - return REG_NOERROR; - - return REG_NOMATCH; -} - -/* Helper functions for check_arrival. */ - -/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them - to NEXT_NODES. - TODO: This function is similar to the functions transit_state*(), - however this function has many additional works. - Can't we unify them? */ - -static reg_errcode_t -check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes) - re_match_context_t *mctx; - int str_idx; - re_node_set *cur_nodes, *next_nodes; -{ - re_dfa_t *const dfa = mctx->dfa; - int result; - int cur_idx; - reg_errcode_t err; - re_node_set union_set; - re_node_set_init_empty (&union_set); - for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx) - { - int naccepted = 0; - int cur_node = cur_nodes->elems[cur_idx]; -#ifdef DEBUG - re_token_type_t type = dfa->nodes[cur_node].type; - assert (!IS_EPSILON_NODE (type)); -#endif -#ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ - if (dfa->nodes[cur_node].accept_mb) - { - naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, - str_idx); - if (naccepted > 1) - { - re_dfastate_t *dest_state; - int next_node = dfa->nexts[cur_node]; - int next_idx = str_idx + naccepted; - dest_state = mctx->state_log[next_idx]; - re_node_set_empty (&union_set); - if (dest_state) - { - err = re_node_set_merge (&union_set, &dest_state->nodes); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&union_set); - return err; - } - } - result = re_node_set_insert (&union_set, next_node); - if (BE (result < 0, 0)) - { - re_node_set_free (&union_set); - return REG_ESPACE; - } - mctx->state_log[next_idx] = re_acquire_state (&err, dfa, - &union_set); - if (BE (mctx->state_log[next_idx] == NULL - && err != REG_NOERROR, 0)) - { - re_node_set_free (&union_set); - return err; - } - } - } -#endif /* RE_ENABLE_I18N */ - if (naccepted - || check_node_accept (mctx, dfa->nodes + cur_node, str_idx)) - { - result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]); - if (BE (result < 0, 0)) - { - re_node_set_free (&union_set); - return REG_ESPACE; - } - } - } - re_node_set_free (&union_set); - return REG_NOERROR; -} - -/* For all the nodes in CUR_NODES, add the epsilon closures of them to - CUR_NODES, however exclude the nodes which are: - - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN. - - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN. -*/ - -static reg_errcode_t -check_arrival_expand_ecl (dfa, cur_nodes, ex_subexp, type) - re_dfa_t *dfa; - re_node_set *cur_nodes; - int ex_subexp, type; -{ - reg_errcode_t err; - int idx, outside_node; - re_node_set new_nodes; -#ifdef DEBUG - assert (cur_nodes->nelem); -#endif - err = re_node_set_alloc (&new_nodes, cur_nodes->nelem); - if (BE (err != REG_NOERROR, 0)) - return err; - /* Create a new node set NEW_NODES with the nodes which are epsilon - closures of the node in CUR_NODES. */ - - for (idx = 0; idx < cur_nodes->nelem; ++idx) - { - int cur_node = cur_nodes->elems[idx]; - re_node_set *eclosure = dfa->eclosures + cur_node; - outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type); - if (outside_node == -1) - { - /* There are no problematic nodes, just merge them. */ - err = re_node_set_merge (&new_nodes, eclosure); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&new_nodes); - return err; - } - } - else - { - /* There are problematic nodes, re-calculate incrementally. */ - err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node, - ex_subexp, type); - if (BE (err != REG_NOERROR, 0)) - { - re_node_set_free (&new_nodes); - return err; - } - } - } - re_node_set_free (cur_nodes); - *cur_nodes = new_nodes; - return REG_NOERROR; -} - -/* Helper function for check_arrival_expand_ecl. - Check incrementally the epsilon closure of TARGET, and if it isn't - problematic append it to DST_NODES. */ - -static reg_errcode_t -check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type) - re_dfa_t *dfa; - int target, ex_subexp, type; - re_node_set *dst_nodes; -{ - int cur_node; - for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);) - { - int err; - - if (dfa->nodes[cur_node].type == type - && dfa->nodes[cur_node].opr.idx == ex_subexp) - { - if (type == OP_CLOSE_SUBEXP) - { - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) - return REG_ESPACE; - } - break; - } - err = re_node_set_insert (dst_nodes, cur_node); - if (BE (err == -1, 0)) - return REG_ESPACE; - if (dfa->edests[cur_node].nelem == 0) - break; - if (dfa->edests[cur_node].nelem == 2) - { - err = check_arrival_expand_ecl_sub (dfa, dst_nodes, - dfa->edests[cur_node].elems[1], - ex_subexp, type); - if (BE (err != REG_NOERROR, 0)) - return err; - } - cur_node = dfa->edests[cur_node].elems[0]; - } - return REG_NOERROR; -} - - -/* For all the back references in the current state, calculate the - destination of the back references by the appropriate entry - in MCTX->BKREF_ENTS. */ - -static reg_errcode_t -expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num, - type) - re_match_context_t *mctx; - int cur_str, subexp_num, type; - re_node_set *cur_nodes; -{ - re_dfa_t *const dfa = mctx->dfa; - reg_errcode_t err; - int cache_idx_start = search_cur_bkref_entry (mctx, cur_str); - struct re_backref_cache_entry *ent; - - if (cache_idx_start == -1) - return REG_NOERROR; - - restart: - ent = mctx->bkref_ents + cache_idx_start; - do - { - int to_idx, next_node; - - /* Is this entry ENT is appropriate? */ - if (!re_node_set_contains (cur_nodes, ent->node)) - continue; /* No. */ - - to_idx = cur_str + ent->subexp_to - ent->subexp_from; - /* Calculate the destination of the back reference, and append it - to MCTX->STATE_LOG. */ - if (to_idx == cur_str) - { - /* The backreference did epsilon transit, we must re-check all the - node in the current state. */ - re_node_set new_dests; - reg_errcode_t err2, err3; - next_node = dfa->edests[ent->node].elems[0]; - if (re_node_set_contains (cur_nodes, next_node)) - continue; - err = re_node_set_init_1 (&new_dests, next_node); - err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type); - err3 = re_node_set_merge (cur_nodes, &new_dests); - re_node_set_free (&new_dests); - if (BE (err != REG_NOERROR || err2 != REG_NOERROR - || err3 != REG_NOERROR, 0)) - { - err = (err != REG_NOERROR ? err - : (err2 != REG_NOERROR ? err2 : err3)); - return err; - } - /* TODO: It is still inefficient... */ - goto restart; - } - else - { - re_node_set union_set; - next_node = dfa->nexts[ent->node]; - if (mctx->state_log[to_idx]) - { - int ret; - if (re_node_set_contains (&mctx->state_log[to_idx]->nodes, - next_node)) - continue; - err = re_node_set_init_copy (&union_set, - &mctx->state_log[to_idx]->nodes); - ret = re_node_set_insert (&union_set, next_node); - if (BE (err != REG_NOERROR || ret < 0, 0)) - { - re_node_set_free (&union_set); - err = err != REG_NOERROR ? err : REG_ESPACE; - return err; - } - } - else - { - err = re_node_set_init_1 (&union_set, next_node); - if (BE (err != REG_NOERROR, 0)) - return err; - } - mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set); - re_node_set_free (&union_set); - if (BE (mctx->state_log[to_idx] == NULL - && err != REG_NOERROR, 0)) - return err; - } - } - while (ent++->more); - return REG_NOERROR; -} - -/* Build transition table for the state. - Return 1 if succeeded, otherwise return NULL. */ - -static int -build_trtable (dfa, state) - re_dfa_t *dfa; - re_dfastate_t *state; -{ - reg_errcode_t err; - int i, j, ch, need_word_trtable = 0; - unsigned int elem, mask; - int dests_node_malloced = 0, dest_states_malloced = 0; - int ndests; /* Number of the destination states from `state'. */ - re_dfastate_t **trtable; - re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; - re_node_set follows, *dests_node; - bitset *dests_ch; - bitset acceptable; - - /* We build DFA states which corresponds to the destination nodes - from `state'. `dests_node[i]' represents the nodes which i-th - destination state contains, and `dests_ch[i]' represents the - characters which i-th destination state accepts. */ -#ifdef _LIBC - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX)) - dests_node = (re_node_set *) - alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - else -#endif - { - dests_node = (re_node_set *) - malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX); - if (BE (dests_node == NULL, 0)) - return 0; - dests_node_malloced = 1; - } - dests_ch = (bitset *) (dests_node + SBC_MAX); - - /* Initialize transiton table. */ - state->word_trtable = state->trtable = NULL; - - /* At first, group all nodes belonging to `state' into several - destinations. */ - ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); - if (BE (ndests <= 0, 0)) - { - if (dests_node_malloced) - free (dests_node); - /* Return 0 in case of an error, 1 otherwise. */ - if (ndests == 0) - { - state->trtable = (re_dfastate_t **) - calloc (sizeof (re_dfastate_t *), SBC_MAX); - return 1; - } - return 0; - } - - err = re_node_set_alloc (&follows, ndests + 1); - if (BE (err != REG_NOERROR, 0)) - goto out_free; - -#ifdef _LIBC - if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX - + ndests * 3 * sizeof (re_dfastate_t *))) - dest_states = (re_dfastate_t **) - alloca (ndests * 3 * sizeof (re_dfastate_t *)); - else -#endif - { - dest_states = (re_dfastate_t **) - malloc (ndests * 3 * sizeof (re_dfastate_t *)); - if (BE (dest_states == NULL, 0)) - { -out_free: - if (dest_states_malloced) - free (dest_states); - re_node_set_free (&follows); - for (i = 0; i < ndests; ++i) - re_node_set_free (dests_node + i); - if (dests_node_malloced) - free (dests_node); - return 0; - } - dest_states_malloced = 1; - } - dest_states_word = dest_states + ndests; - dest_states_nl = dest_states_word + ndests; - bitset_empty (acceptable); - - /* Then build the states for all destinations. */ - for (i = 0; i < ndests; ++i) - { - int next_node; - re_node_set_empty (&follows); - /* Merge the follows of this destination states. */ - for (j = 0; j < dests_node[i].nelem; ++j) - { - next_node = dfa->nexts[dests_node[i].elems[j]]; - if (next_node != -1) - { - err = re_node_set_merge (&follows, dfa->eclosures + next_node); - if (BE (err != REG_NOERROR, 0)) - goto out_free; - } - } - dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0); - if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - /* If the new state has context constraint, - build appropriate states for these contexts. */ - if (dest_states[i]->has_constraint) - { - dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows, - CONTEXT_WORD); - if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - - if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1) - need_word_trtable = 1; - - dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows, - CONTEXT_NEWLINE); - if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0)) - goto out_free; - } - else - { - dest_states_word[i] = dest_states[i]; - dest_states_nl[i] = dest_states[i]; - } - bitset_merge (acceptable, dests_ch[i]); - } - - if (!BE (need_word_trtable, 0)) - { - /* We don't care about whether the following character is a word - character, or we are in a single-byte character set so we can - discern by looking at the character code: allocate a - 256-entry transition table. */ - trtable = state->trtable = - (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX); - if (BE (trtable == NULL, 0)) - goto out_free; - - /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; - elem; - mask <<= 1, elem >>= 1, ++ch) - if (BE (elem & 1, 0)) - { - /* There must be exactly one destination which accepts - character ch. See group_nodes_into_DFAstates. */ - for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) - ; - - /* j-th destination accepts the word character ch. */ - if (dfa->word_char[i] & mask) - trtable[ch] = dest_states_word[j]; - else - trtable[ch] = dest_states[j]; - } - } - else - { - /* We care about whether the following character is a word - character, and we are in a multi-byte character set: discern - by looking at the character code: build two 256-entry - transition tables, one starting at trtable[0] and one - starting at trtable[SBC_MAX]. */ - trtable = state->word_trtable = - (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX); - if (BE (trtable == NULL, 0)) - goto out_free; - - /* For all characters ch...: */ - for (i = 0; i < BITSET_UINTS; ++i) - for (ch = i * UINT_BITS, elem = acceptable[i], mask = 1; - elem; - mask <<= 1, elem >>= 1, ++ch) - if (BE (elem & 1, 0)) - { - /* There must be exactly one destination which accepts - character ch. See group_nodes_into_DFAstates. */ - for (j = 0; (dests_ch[j][i] & mask) == 0; ++j) - ; - - /* j-th destination accepts the word character ch. */ - trtable[ch] = dest_states[j]; - trtable[ch + SBC_MAX] = dest_states_word[j]; - } - } - - /* new line */ - if (bitset_contain (acceptable, NEWLINE_CHAR)) - { - /* The current state accepts newline character. */ - for (j = 0; j < ndests; ++j) - if (bitset_contain (dests_ch[j], NEWLINE_CHAR)) - { - /* k-th destination accepts newline character. */ - trtable[NEWLINE_CHAR] = dest_states_nl[j]; - if (need_word_trtable) - trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j]; - /* There must be only one destination which accepts - newline. See group_nodes_into_DFAstates. */ - break; - } - } - - if (dest_states_malloced) - free (dest_states); - - re_node_set_free (&follows); - for (i = 0; i < ndests; ++i) - re_node_set_free (dests_node + i); - - if (dests_node_malloced) - free (dests_node); - - return 1; -} - -/* Group all nodes belonging to STATE into several destinations. - Then for all destinations, set the nodes belonging to the destination - to DESTS_NODE[i] and set the characters accepted by the destination - to DEST_CH[i]. This function return the number of destinations. */ - -static int -group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch) - re_dfa_t *dfa; - const re_dfastate_t *state; - re_node_set *dests_node; - bitset *dests_ch; -{ - reg_errcode_t err; - int result; - int i, j, k; - int ndests; /* Number of the destinations from `state'. */ - bitset accepts; /* Characters a node can accept. */ - const re_node_set *cur_nodes = &state->nodes; - bitset_empty (accepts); - ndests = 0; - - /* For all the nodes belonging to `state', */ - for (i = 0; i < cur_nodes->nelem; ++i) - { - re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; - re_token_type_t type = node->type; - unsigned int constraint = node->constraint; - - /* Enumerate all single byte character this node can accept. */ - if (type == CHARACTER) - bitset_set (accepts, node->opr.c); - else if (type == SIMPLE_BRACKET) - { - bitset_merge (accepts, node->opr.sbcset); - } - else if (type == OP_PERIOD) - { -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - bitset_merge (accepts, dfa->sb_char); - else -#endif - bitset_set_all (accepts); - if (!(dfa->syntax & RE_DOT_NEWLINE)) - bitset_clear (accepts, '\n'); - if (dfa->syntax & RE_DOT_NOT_NULL) - bitset_clear (accepts, '\0'); - } -#ifdef RE_ENABLE_I18N - else if (type == OP_UTF8_PERIOD) - { - memset (accepts, 255, sizeof (unsigned int) * BITSET_UINTS / 2); - if (!(dfa->syntax & RE_DOT_NEWLINE)) - bitset_clear (accepts, '\n'); - if (dfa->syntax & RE_DOT_NOT_NULL) - bitset_clear (accepts, '\0'); - } -#endif - else - continue; - - /* Check the `accepts' and sift the characters which are not - match it the context. */ - if (constraint) - { - if (constraint & NEXT_NEWLINE_CONSTRAINT) - { - int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR); - bitset_empty (accepts); - if (accepts_newline) - bitset_set (accepts, NEWLINE_CHAR); - else - continue; - } - if (constraint & NEXT_ENDBUF_CONSTRAINT) - { - bitset_empty (accepts); - continue; - } - - if (constraint & NEXT_WORD_CONSTRAINT) - { - unsigned int any_set = 0; - if (type == CHARACTER && !node->word_char) - { - bitset_empty (accepts); - continue; - } -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) - any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j])); - else -#endif - for (j = 0; j < BITSET_UINTS; ++j) - any_set |= (accepts[j] &= dfa->word_char[j]); - if (!any_set) - continue; - } - if (constraint & NEXT_NOTWORD_CONSTRAINT) - { - unsigned int any_set = 0; - if (type == CHARACTER && node->word_char) - { - bitset_empty (accepts); - continue; - } -#ifdef RE_ENABLE_I18N - if (dfa->mb_cur_max > 1) - for (j = 0; j < BITSET_UINTS; ++j) - any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j])); - else -#endif - for (j = 0; j < BITSET_UINTS; ++j) - any_set |= (accepts[j] &= ~dfa->word_char[j]); - if (!any_set) - continue; - } - } - - /* Then divide `accepts' into DFA states, or create a new - state. Above, we make sure that accepts is not empty. */ - for (j = 0; j < ndests; ++j) - { - bitset intersec; /* Intersection sets, see below. */ - bitset remains; - /* Flags, see below. */ - int has_intersec, not_subset, not_consumed; - - /* Optimization, skip if this state doesn't accept the character. */ - if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) - continue; - - /* Enumerate the intersection set of this state and `accepts'. */ - has_intersec = 0; - for (k = 0; k < BITSET_UINTS; ++k) - has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; - /* And skip if the intersection set is empty. */ - if (!has_intersec) - continue; - - /* Then check if this state is a subset of `accepts'. */ - not_subset = not_consumed = 0; - for (k = 0; k < BITSET_UINTS; ++k) - { - not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k]; - not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; - } - - /* If this state isn't a subset of `accepts', create a - new group state, which has the `remains'. */ - if (not_subset) - { - bitset_copy (dests_ch[ndests], remains); - bitset_copy (dests_ch[j], intersec); - err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]); - if (BE (err != REG_NOERROR, 0)) - goto error_return; - ++ndests; - } - - /* Put the position in the current group. */ - result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]); - if (BE (result < 0, 0)) - goto error_return; - - /* If all characters are consumed, go to next node. */ - if (!not_consumed) - break; - } - /* Some characters remain, create a new group. */ - if (j == ndests) - { - bitset_copy (dests_ch[ndests], accepts); - err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]); - if (BE (err != REG_NOERROR, 0)) - goto error_return; - ++ndests; - bitset_empty (accepts); - } - } - return ndests; - error_return: - for (j = 0; j < ndests; ++j) - re_node_set_free (dests_node + j); - return -1; -} - -#ifdef RE_ENABLE_I18N -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. - Return the number of the bytes the node accepts. - STR_IDX is the current index of the input string. - - This function handles the nodes which can accept one character, or - one collating element like '.', '[a-z]', opposite to the other nodes - can only accept one byte. */ - -static int -check_node_accept_bytes (dfa, node_idx, input, str_idx) - re_dfa_t *dfa; - int node_idx, str_idx; - const re_string_t *input; -{ - const re_token_t *node = dfa->nodes + node_idx; - int char_len, elem_len; - int i; - - if (BE (node->type == OP_UTF8_PERIOD, 0)) - { - unsigned char c = re_string_byte_at (input, str_idx), d; - if (BE (c < 0xc2, 1)) - return 0; - - if (str_idx + 2 > input->len) - return 0; - - d = re_string_byte_at (input, str_idx + 1); - if (c < 0xe0) - return (d < 0x80 || d > 0xbf) ? 0 : 2; - else if (c < 0xf0) - { - char_len = 3; - if (c == 0xe0 && d < 0xa0) - return 0; - } - else if (c < 0xf8) - { - char_len = 4; - if (c == 0xf0 && d < 0x90) - return 0; - } - else if (c < 0xfc) - { - char_len = 5; - if (c == 0xf8 && d < 0x88) - return 0; - } - else if (c < 0xfe) - { - char_len = 6; - if (c == 0xfc && d < 0x84) - return 0; - } - else - return 0; - - if (str_idx + char_len > input->len) - return 0; - - for (i = 1; i < char_len; ++i) - { - d = re_string_byte_at (input, str_idx + i); - if (d < 0x80 || d > 0xbf) - return 0; - } - return char_len; - } - - char_len = re_string_char_size_at (input, str_idx); - if (node->type == OP_PERIOD) - { - if (char_len <= 1) - return 0; - /* FIXME: I don't think this if is needed, as both '\n' - and '\0' are char_len == 1. */ - /* '.' accepts any one character except the following two cases. */ - if ((!(dfa->syntax & RE_DOT_NEWLINE) && - re_string_byte_at (input, str_idx) == '\n') || - ((dfa->syntax & RE_DOT_NOT_NULL) && - re_string_byte_at (input, str_idx) == '\0')) - return 0; - return char_len; - } - - elem_len = re_string_elem_size_at (input, str_idx); - if ((elem_len <= 1 && char_len <= 1) || char_len == 0) - return 0; - - if (node->type == COMPLEX_BRACKET) - { - const re_charset_t *cset = node->opr.mbcset; -# ifdef _LIBC - const unsigned char *pin - = ((const unsigned char *) re_string_get_buffer (input) + str_idx); - int j; - uint32_t nrules; -# endif /* _LIBC */ - int match_len = 0; - wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars) - ? re_string_wchar_at (input, str_idx) : 0); - - /* match with multibyte character? */ - for (i = 0; i < cset->nmbchars; ++i) - if (wc == cset->mbchars[i]) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - /* match with character_class? */ - for (i = 0; i < cset->nchar_classes; ++i) - { - wctype_t wt = cset->char_classes[i]; - if (__iswctype (wc, wt)) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - } - -# ifdef _LIBC - nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules != 0) - { - unsigned int in_collseq = 0; - const int32_t *table, *indirect; - const unsigned char *weights, *extra; - const char *collseqwc; - int32_t idx; - /* This #include defines a local function! */ -# include <locale/weight.h> - - /* match with collating_symbol? */ - if (cset->ncoll_syms) - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - for (i = 0; i < cset->ncoll_syms; ++i) - { - const unsigned char *coll_sym = extra + cset->coll_syms[i]; - /* Compare the length of input collating element and - the length of current collating element. */ - if (*coll_sym != elem_len) - continue; - /* Compare each bytes. */ - for (j = 0; j < *coll_sym; j++) - if (pin[j] != coll_sym[1 + j]) - break; - if (j == *coll_sym) - { - /* Match if every bytes is equal. */ - match_len = j; - goto check_node_accept_bytes_match; - } - } - - if (cset->nranges) - { - if (elem_len <= char_len) - { - collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC); - in_collseq = __collseq_table_lookup (collseqwc, wc); - } - else - in_collseq = find_collation_sequence_value (pin, elem_len); - } - /* match with range expression? */ - for (i = 0; i < cset->nranges; ++i) - if (cset->range_starts[i] <= in_collseq - && in_collseq <= cset->range_ends[i]) - { - match_len = elem_len; - goto check_node_accept_bytes_match; - } - - /* match with equivalence_class? */ - if (cset->nequiv_classes) - { - const unsigned char *cp = pin; - table = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); - weights = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); - extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); - indirect = (const int32_t *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - idx = findidx (&cp); - if (idx > 0) - for (i = 0; i < cset->nequiv_classes; ++i) - { - int32_t equiv_class_idx = cset->equiv_classes[i]; - size_t weight_len = weights[idx]; - if (weight_len == weights[equiv_class_idx]) - { - int cnt = 0; - while (cnt <= weight_len - && (weights[equiv_class_idx + 1 + cnt] - == weights[idx + 1 + cnt])) - ++cnt; - if (cnt > weight_len) - { - match_len = elem_len; - goto check_node_accept_bytes_match; - } - } - } - } - } - else -# endif /* _LIBC */ - { - /* match with range expression? */ -#if __GNUC__ >= 2 - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif - for (i = 0; i < cset->nranges; ++i) - { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) - { - match_len = char_len; - goto check_node_accept_bytes_match; - } - } - } - check_node_accept_bytes_match: - if (!cset->non_match) - return match_len; - else - { - if (match_len > 0) - return 0; - else - return (elem_len > char_len) ? elem_len : char_len; - } - } - return 0; -} - -# ifdef _LIBC -static unsigned int -find_collation_sequence_value (mbs, mbs_len) - const unsigned char *mbs; - size_t mbs_len; -{ - uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); - if (nrules == 0) - { - if (mbs_len == 1) - { - /* No valid character. Match it as a single byte character. */ - const unsigned char *collseq = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB); - return collseq[mbs[0]]; - } - return UINT_MAX; - } - else - { - int32_t idx; - const unsigned char *extra = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB); - int32_t extrasize = (const unsigned char *) - _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra; - - for (idx = 0; idx < extrasize;) - { - int mbs_cnt, found = 0; - int32_t elem_mbs_len; - /* Skip the name of collating element name. */ - idx = idx + extra[idx] + 1; - elem_mbs_len = extra[idx++]; - if (mbs_len == elem_mbs_len) - { - for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt) - if (extra[idx + mbs_cnt] != mbs[mbs_cnt]) - break; - if (mbs_cnt == elem_mbs_len) - /* Found the entry. */ - found = 1; - } - /* Skip the byte sequence of the collating element. */ - idx += elem_mbs_len; - /* Adjust for the alignment. */ - idx = (idx + 3) & ~3; - /* Skip the collation sequence value. */ - idx += sizeof (uint32_t); - /* Skip the wide char sequence of the collating element. */ - idx = idx + sizeof (uint32_t) * (extra[idx] + 1); - /* If we found the entry, return the sequence value. */ - if (found) - return *(uint32_t *) (extra + idx); - /* Skip the collation sequence value. */ - idx += sizeof (uint32_t); - } - return UINT_MAX; - } -} -# endif /* _LIBC */ -#endif /* RE_ENABLE_I18N */ - -/* Check whether the node accepts the byte which is IDX-th - byte of the INPUT. */ - -static int -check_node_accept (mctx, node, idx) - const re_match_context_t *mctx; - const re_token_t *node; - int idx; -{ - unsigned char ch; - ch = re_string_byte_at (&mctx->input, idx); - switch (node->type) - { - case CHARACTER: - if (node->opr.c != ch) - return 0; - break; - - case SIMPLE_BRACKET: - if (!bitset_contain (node->opr.sbcset, ch)) - return 0; - break; - -#ifdef RE_ENABLE_I18N - case OP_UTF8_PERIOD: - if (ch >= 0x80) - return 0; - /* FALLTHROUGH */ -#endif - case OP_PERIOD: - if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE)) - || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL))) - return 0; - break; - - default: - return 0; - } - - if (node->constraint) - { - /* The node has constraints. Check whether the current context - satisfies the constraints. */ - unsigned int context = re_string_context_at (&mctx->input, idx, - mctx->eflags); - if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context)) - return 0; - } - - return 1; -} - -/* Extend the buffers, if the buffers have run out. */ - -static reg_errcode_t -extend_buffers (mctx) - re_match_context_t *mctx; -{ - reg_errcode_t ret; - re_string_t *pstr = &mctx->input; - - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); - if (BE (ret != REG_NOERROR, 0)) - return ret; - - if (mctx->state_log != NULL) - { - /* And double the length of state_log. */ - /* XXX We have no indication of the size of this buffer. If this - allocation fail we have no indication that the state_log array - does not have the right size. */ - re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *, - pstr->bufs_len + 1); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - mctx->state_log = new_array; - } - - /* Then reconstruct the buffers. */ - if (pstr->icase) - { -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - { - ret = build_wcs_upper_buffer (pstr); - if (BE (ret != REG_NOERROR, 0)) - return ret; - } - else -#endif /* RE_ENABLE_I18N */ - build_upper_buffer (pstr); - } - else - { -#ifdef RE_ENABLE_I18N - if (pstr->mb_cur_max > 1) - build_wcs_buffer (pstr); - else -#endif /* RE_ENABLE_I18N */ - { - if (pstr->trans != NULL) - re_string_translate_buffer (pstr); - } - } - return REG_NOERROR; -} - - -/* Functions for matching context. */ - -/* Initialize MCTX. */ - -static reg_errcode_t -match_ctx_init (mctx, eflags, n) - re_match_context_t *mctx; - int eflags, n; -{ - mctx->eflags = eflags; - mctx->match_last = -1; - if (n > 0) - { - mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); - mctx->sub_tops = re_malloc (re_sub_match_top_t *, n); - if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0)) - return REG_ESPACE; - } - /* Already zero-ed by the caller. - else - mctx->bkref_ents = NULL; - mctx->nbkref_ents = 0; - mctx->nsub_tops = 0; */ - mctx->abkref_ents = n; - mctx->max_mb_elem_len = 1; - mctx->asub_tops = n; - return REG_NOERROR; -} - -/* Clean the entries which depend on the current input in MCTX. - This function must be invoked when the matcher changes the start index - of the input, or changes the input string. */ - -static void -match_ctx_clean (mctx) - re_match_context_t *mctx; -{ - int st_idx; - for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx) - { - int sl_idx; - re_sub_match_top_t *top = mctx->sub_tops[st_idx]; - for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx) - { - re_sub_match_last_t *last = top->lasts[sl_idx]; - re_free (last->path.array); - re_free (last); - } - re_free (top->lasts); - if (top->path) - { - re_free (top->path->array); - re_free (top->path); - } - free (top); - } - - mctx->nsub_tops = 0; - mctx->nbkref_ents = 0; -} - -/* Free all the memory associated with MCTX. */ - -static void -match_ctx_free (mctx) - re_match_context_t *mctx; -{ - /* First, free all the memory associated with MCTX->SUB_TOPS. */ - match_ctx_clean (mctx); - re_free (mctx->sub_tops); - re_free (mctx->bkref_ents); -} - -/* Add a new backreference entry to MCTX. - Note that we assume that caller never call this function with duplicate - entry, and call with STR_IDX which isn't smaller than any existing entry. -*/ - -static reg_errcode_t -match_ctx_add_entry (mctx, node, str_idx, from, to) - re_match_context_t *mctx; - int node, str_idx, from, to; -{ - if (mctx->nbkref_ents >= mctx->abkref_ents) - { - struct re_backref_cache_entry* new_entry; - new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry, - mctx->abkref_ents * 2); - if (BE (new_entry == NULL, 0)) - { - re_free (mctx->bkref_ents); - return REG_ESPACE; - } - mctx->bkref_ents = new_entry; - memset (mctx->bkref_ents + mctx->nbkref_ents, '\0', - sizeof (struct re_backref_cache_entry) * mctx->abkref_ents); - mctx->abkref_ents *= 2; - } - if (mctx->nbkref_ents > 0 - && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx) - mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1; - - mctx->bkref_ents[mctx->nbkref_ents].node = node; - mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; - mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; - mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; - - /* This is a cache that saves negative results of check_dst_limits_calc_pos. - If bit N is clear, means that this entry won't epsilon-transition to - an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If - it is set, check_dst_limits_calc_pos_1 will recurse and try to find one - such node. - - A backreference does not epsilon-transition unless it is empty, so set - to all zeros if FROM != TO. */ - mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map - = (from == to ? ~0 : 0); - - mctx->bkref_ents[mctx->nbkref_ents++].more = 0; - if (mctx->max_mb_elem_len < to - from) - mctx->max_mb_elem_len = to - from; - return REG_NOERROR; -} - -/* Search for the first entry which has the same str_idx, or -1 if none is - found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */ - -static int -search_cur_bkref_entry (mctx, str_idx) - re_match_context_t *mctx; - int str_idx; -{ - int left, right, mid, last; - last = right = mctx->nbkref_ents; - for (left = 0; left < right;) - { - mid = (left + right) / 2; - if (mctx->bkref_ents[mid].str_idx < str_idx) - left = mid + 1; - else - right = mid; - } - if (left < last && mctx->bkref_ents[left].str_idx == str_idx) - return left; - else - return -1; -} - -/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches - at STR_IDX. */ - -static reg_errcode_t -match_ctx_add_subtop (mctx, node, str_idx) - re_match_context_t *mctx; - int node, str_idx; -{ -#ifdef DEBUG - assert (mctx->sub_tops != NULL); - assert (mctx->asub_tops > 0); -#endif - if (BE (mctx->nsub_tops == mctx->asub_tops, 0)) - { - int new_asub_tops = mctx->asub_tops * 2; - re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops, - re_sub_match_top_t *, - new_asub_tops); - if (BE (new_array == NULL, 0)) - return REG_ESPACE; - mctx->sub_tops = new_array; - mctx->asub_tops = new_asub_tops; - } - mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t)); - if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0)) - return REG_ESPACE; - mctx->sub_tops[mctx->nsub_tops]->node = node; - mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx; - return REG_NOERROR; -} - -/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches - at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ - -static re_sub_match_last_t * -match_ctx_add_sublast (subtop, node, str_idx) - re_sub_match_top_t *subtop; - int node, str_idx; -{ - re_sub_match_last_t *new_entry; - if (BE (subtop->nlasts == subtop->alasts, 0)) - { - int new_alasts = 2 * subtop->alasts + 1; - re_sub_match_last_t **new_array = re_realloc (subtop->lasts, - re_sub_match_last_t *, - new_alasts); - if (BE (new_array == NULL, 0)) - return NULL; - subtop->lasts = new_array; - subtop->alasts = new_alasts; - } - new_entry = calloc (1, sizeof (re_sub_match_last_t)); - if (BE (new_entry != NULL, 1)) - { - subtop->lasts[subtop->nlasts] = new_entry; - new_entry->node = node; - new_entry->str_idx = str_idx; - ++subtop->nlasts; - } - return new_entry; -} - -static void -sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx) - re_sift_context_t *sctx; - re_dfastate_t **sifted_sts, **limited_sts; - int last_node, last_str_idx; -{ - sctx->sifted_states = sifted_sts; - sctx->limited_states = limited_sts; - sctx->last_node = last_node; - sctx->last_str_idx = last_str_idx; - re_node_set_init_empty (&sctx->limits); -} diff --git a/gnu/lib/libstdc++/config.h b/gnu/lib/libstdc++/config.h index 3351419..d0460a7 100644 --- a/gnu/lib/libstdc++/config.h +++ b/gnu/lib/libstdc++/config.h @@ -212,13 +212,13 @@ #define HAVE_LOG10F 1 /* Define to 1 if you have the `log10l' function. */ -/* #undef HAVE_LOG10L */ +#define HAVE_LOG10L 1 /* Define to 1 if you have the `logf' function. */ #define HAVE_LOGF 1 /* Define to 1 if you have the `logl' function. */ -/* #undef HAVE_LOGL */ +#define HAVE_LOGL 1 /* Define to 1 if you have the <machine/endian.h> header file. */ #define HAVE_MACHINE_ENDIAN_H 1 diff --git a/gnu/lib/libsupc++/Version.map b/gnu/lib/libsupc++/Version.map index dbcf495..26fc124 100644 --- a/gnu/lib/libsupc++/Version.map +++ b/gnu/lib/libsupc++/Version.map @@ -147,6 +147,13 @@ GLIBCXX_3.4 { std::set_terminate*; std::set_unexpected*; + "std::unexpected()"; + "std::get_terminate()"; + "std::get_unexpected()"; + "std::uncaught_exception()"; + "std::terminate()"; + + std::bad_alloc; std::bad_cast; std::exception*; @@ -154,14 +161,20 @@ GLIBCXX_3.4 { "typeinfo for std::bad_alloc"; "typeinfo for std::bad_cast"; "typeinfo for std::exception"; + "typeinfo for std::type_info"; "typeinfo name for std::bad_alloc"; "typeinfo name for std::bad_cast"; "typeinfo name for std::exception"; + "typeinfo name for std::type_info"; "vtable for std::bad_alloc"; "vtable for std::bad_cast"; "vtable for std::exception"; + "vtable for std::type_info"; + + std::type_info::__*; + "std::type_info::~type_info()"; }; }; diff --git a/gnu/usr.bin/Makefile b/gnu/usr.bin/Makefile index 386c957..663107c 100644 --- a/gnu/usr.bin/Makefile +++ b/gnu/usr.bin/Makefile @@ -4,7 +4,6 @@ SUBDIR= ${_binutils} \ ${_cc} \ - ${_cvs} \ dialog \ diff \ diff3 \ @@ -13,7 +12,6 @@ SUBDIR= ${_binutils} \ ${_gperf} \ grep \ ${_groff} \ - patch \ ${_rcs} \ sdiff \ send-pr \ @@ -26,10 +24,6 @@ _groff= groff .endif .endif -.if ${MK_CVS} != "no" -_cvs= cvs -.endif - .if ${MK_GPL_DTC} != "no" _dtc= dtc .endif diff --git a/gnu/usr.bin/cc/f77/Makefile b/gnu/usr.bin/cc/f77/Makefile deleted file mode 100644 index 9160391..0000000 --- a/gnu/usr.bin/cc/f77/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -# $FreeBSD$ - -.include "../Makefile.inc" -.include "../Makefile.fe" - -.PATH: ${GCCDIR}/f ${GCCDIR} - -PROG= f77 -SRCS= gcc.c g77spec.c - -CFLAGS+= -DFORTRAN_INIT=\"-lg2c\" -DFORTRAN_INIT_PROFILE=\"-lg2c_p\" - -DPADD= ${LIBCC_INT} -LDADD= ${LIBCC_INT} - -CLEANFILES= f77.1 - -f77.1: g77.1 - cat ${.ALLSRC} > ${.TARGET} - -.include <bsd.prog.mk> diff --git a/gnu/usr.bin/cc/f771/Makefile b/gnu/usr.bin/cc/f771/Makefile deleted file mode 100644 index 8d09092..0000000 --- a/gnu/usr.bin/cc/f771/Makefile +++ /dev/null @@ -1,34 +0,0 @@ -# $FreeBSD$ - -.include "${.CURDIR}/../Makefile.inc" - -.PATH: ${GCCDIR}/f ${GCCDIR} - -PROG= f771 -SRCS= bad.c bit.c bld.c com.c data.c equiv.c expr.c global.c implic.c info.c \ - intrin.c lab.c lex.c malloc.c name.c parse.c src.c st.c sta.c \ - stb.c stc.c std.c ste.c storag.c stp.c str.c sts.c stt.c stu.c stv.c \ - stw.c symbol.c target.c top.c type.c where.c main.c -BINDIR= /usr/libexec -NO_MAN= - -CFLAGS+= -I${GCCDIR}/f -I. - -DPADD= ${LIBCC_INT} -LDADD= ${LIBCC_INT} - -#----------------------------------------------------------------------- -# str-* gunk - -.for i in 1t 2t fo io nq op ot -.ORDER: str-$i.h str-$i.j -str-$i.j str-$i.h: str-$i.fin - ${.OBJDIR}/../cc_tools/fini ${GCCDIR}/f/str-$i.fin str-$i.j str-$i.h - -FINIHDRS+= str-$i.j str-$i.h -.endfor - -SRCS+= ${FINIHDRS:M*.h} -CLEANFILES+= ${FINIHDRS} - -.include <bsd.prog.mk> diff --git a/gnu/usr.bin/cc/f77doc/Makefile b/gnu/usr.bin/cc/f77doc/Makefile deleted file mode 100644 index 5e0808d..0000000 --- a/gnu/usr.bin/cc/f77doc/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -# $FreeBSD$ - -.include "../Makefile.inc" - -.PATH: ${GCCDIR}/f - -INFO= g77 - -MAKEINFOFLAGS+= -I ${GCCDIR}/f -I ${GCCDIR}/doc/include -MAKEINFOFLAGS+= -D "srcdir src/contrib" - -g77.info: g77.texi intdoc.texi ffe.texi invoke.texi news.texi bugs.texi \ - ../doc/include/gpl.texi ../doc/include/fdl.texi ../doc/include/funding.texi - -.include <bsd.info.mk> diff --git a/gnu/usr.bin/cc/include/Makefile b/gnu/usr.bin/cc/include/Makefile index 357bac0..c48975e 100644 --- a/gnu/usr.bin/cc/include/Makefile +++ b/gnu/usr.bin/cc/include/Makefile @@ -6,10 +6,12 @@ INCSDIR=${INCLUDEDIR}/gcc/${GCCVER} -.PATH: ${GCCDIR}/config/${GCC_CPU} +.PATH: ${GCCDIR}/config/${GCC_CPU} ${.CURDIR}/../../../../contrib/llvm/tools/clang/lib/Headers .if ${TARGET_ARCH} == "i386" || ${TARGET_ARCH} == "amd64" -INCS= emmintrin.h mmintrin.h pmmintrin.h tmmintrin.h xmmintrin.h mm_malloc.h +INCS= ammintrin.h emmintrin.h mmintrin.h mm3dnow.h pmmintrin.h \ + tmmintrin.h xmmintrin.h mm_malloc.h +INCS+= wmmintrin.h __wmmintrin_aes.h __wmmintrin_pclmul.h .elif ${TARGET_ARCH} == "ia64" INCS= ia64intrin.h .elif ${TARGET_ARCH} == "arm" diff --git a/gnu/usr.bin/cc/include/__wmmintrin_aes.h b/gnu/usr.bin/cc/include/__wmmintrin_aes.h new file mode 100644 index 0000000..ff8a345 --- /dev/null +++ b/gnu/usr.bin/cc/include/__wmmintrin_aes.h @@ -0,0 +1,54 @@ +/*- + * Copyright 2013 John-Mark Gurney + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _WMMINTRIN_AES_H_ +#define _WMMINTRIN_AES_H_ + +#include <emmintrin.h> + +#define MAKE_AES(name) \ +static __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) \ +_mm_## name ##_si128(__m128i __V, __m128i __R) \ +{ \ + __m128i v = __V; \ + \ + __asm__ (#name " %2, %0": "=x" (v): "0" (v), "xm" (__R)); \ + \ + return v; \ +} + +MAKE_AES(aesimc) +MAKE_AES(aesenc) +MAKE_AES(aesenclast) +MAKE_AES(aesdec) +MAKE_AES(aesdeclast) + +#undef MAKE_AES + +#endif /* _WMMINTRIN_AES_H_ */ diff --git a/gnu/usr.bin/cc/include/__wmmintrin_pclmul.h b/gnu/usr.bin/cc/include/__wmmintrin_pclmul.h new file mode 100644 index 0000000..5bebd81 --- /dev/null +++ b/gnu/usr.bin/cc/include/__wmmintrin_pclmul.h @@ -0,0 +1,53 @@ +/*- + * Copyright 2013 John-Mark Gurney + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + * + */ + +#ifndef _WMMINTRIN_PCLMUL_H_ +#define _WMMINTRIN_PCLMUL_H_ + +#include <emmintrin.h> + +/* + * c selects which parts of a and b to multiple: + * 0x00: a[ 63: 0] * b[ 63: 0] + * 0x01: a[127:64] * b[ 63: 0] + * 0x10: a[ 63: 0] * b[127:64] + * 0x11: a[127:64] * b[127:64] + */ +#define _mm_clmulepi64_si128(a, b, c) \ +({ \ + __m128i _a = (a); \ + __m128i _b = (b); \ + \ + __asm__("pclmulqdq %3, %2, %0": "=x" (_a): "0" (_a), "xm" (_b), \ + "i" (c)); \ + \ + _a; \ +}) + +#endif /* _WMMINTRIN_PCLMUL_H_ */ diff --git a/gnu/usr.bin/cvs/Makefile b/gnu/usr.bin/cvs/Makefile deleted file mode 100644 index dc41a73..0000000 --- a/gnu/usr.bin/cvs/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# $FreeBSD$ - -SUBDIR = lib libdiff cvs contrib cvsbug doc - -.include <bsd.subdir.mk> diff --git a/gnu/usr.bin/cvs/Makefile.inc b/gnu/usr.bin/cvs/Makefile.inc deleted file mode 100644 index 4acfa0b..0000000 --- a/gnu/usr.bin/cvs/Makefile.inc +++ /dev/null @@ -1,17 +0,0 @@ -# $FreeBSD$ - -.if !defined(CVSDIR) - -CVSDIR= $(.CURDIR)/../../../../contrib/cvs - -LIBCVSDIR= ${.OBJDIR}/../lib -LIBCVS= ${LIBCVSDIR}/libcvs.a - -LIBDIFFDIR= ${.OBJDIR}/../libdiff -LIBDIFF= ${LIBDIFFDIR}/libdiff.a - -.if exists(${.CURDIR}/../../Makefile.inc) -.include "${.CURDIR}/../../Makefile.inc" -.endif - -.endif diff --git a/gnu/usr.bin/cvs/contrib/Makefile b/gnu/usr.bin/cvs/contrib/Makefile deleted file mode 100644 index 2ac3618..0000000 --- a/gnu/usr.bin/cvs/contrib/Makefile +++ /dev/null @@ -1,35 +0,0 @@ -# $FreeBSD$ - -.include "../Makefile.inc" - -.PATH: ${CVSDIR}/contrib -.PATH: ${CVSDIR}/man - -SCRIPTS= clmerge cln_hist commit_prep cvs2vendor cvs_acls cvscheck \ - log log_accum mfpipe rcs-to-cvs rcs2log rcslock sccs2rcs \ - easy-import - -FILES= README cvscheck.man cvshelp.man descend.man intro.doc - -EXAMPDIR= ${SHAREDIR}/examples/cvs -FILESDIR= ${EXAMPDIR}/contrib -SCRIPTSDIR= ${FILESDIR} -PERLPATH= /usr/bin/perl -CLEANFILES+= $(SCRIPTS) - -.SUFFIXES: .sh .pl .in - -# Prevent Makefile.in from overwriting Makefile through the suffix rules. -Makefile: - @: - -.sh: - cp -f ${.IMPSRC} ${.TARGET} - -.pl: - sed -e 's,xPERL_PATHx,$(PERLPATH),' ${.IMPSRC} > ${.TARGET} - -.in: - sed -e 's,@CSH@,/bin/csh,' -e 's,@PERL@,$(PERLPATH),' ${.IMPSRC} > ${.TARGET} - -.include <bsd.prog.mk> diff --git a/gnu/usr.bin/cvs/contrib/Makefile.depend b/gnu/usr.bin/cvs/contrib/Makefile.depend deleted file mode 100644 index 57b7e10..0000000 --- a/gnu/usr.bin/cvs/contrib/Makefile.depend +++ /dev/null @@ -1,12 +0,0 @@ -# Autogenerated - do NOT edit! - -DEP_RELDIR := ${_PARSEDIR:S,${SRCTOP}/,,} - -DIRDEPS = \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -.endif diff --git a/gnu/usr.bin/cvs/contrib/easy-import.pl b/gnu/usr.bin/cvs/contrib/easy-import.pl deleted file mode 100644 index d266957..0000000 --- a/gnu/usr.bin/cvs/contrib/easy-import.pl +++ /dev/null @@ -1,403 +0,0 @@ -#! xPERL_PATHx -# -# Support for importing a source collection into CVS. -# Tries to prevent the user from the most common pitfalls (like creating -# new top-level repositories or second-level areas accidentally), and -# cares to do some of the `dirty' work like maintaining the modules -# database accordingly. -# -# Written by Jörg Wunsch, 95/03/07, and placed in the public domain. -# -# $FreeBSD$ - -require "complete.pl"; -require "getopts.pl"; - - -sub scan_opts -{ - local($status); - - $status = &Getopts("nv"); - - $dont_do_it = "-n" if $opt_n; - if($opt_v) { - print STDERR '$FreeBSD$' . "\n"; # 'emacs kludge - exit 0; - } - die "usage: $0 [-v] [-n] [moduledir]\n" . - " -n: don't do any commit, show only\n" . - " -v: show program version\n" - unless $status && $#ARGV <= 0; - - if($#ARGV == 0) { - $moduledir = $ARGV[0]; - shift; - } -} - -sub lsdir -{ - # find all subdirectories under @_ - # ignore all CVS entries, dot entries, and non-directories - - local($base) = @_; - local(@ls, @rv, $fname); - - opendir(DIR, $base) || die "Cannot find dir $base.\n"; - - @ls = readdir(DIR); - closedir(DIR); - - @rv = (); - - foreach $fname (@ls) { - next if $fname =~ /^CVS/ || $fname eq "Attic" - || $fname =~ /^\./ || ! -d "$base/$fname"; - @rv = (@rv, $fname); - } - - return sort(@rv); -} - - -sub contains -{ - # look if the first parameter is contained in the list following it - local($item, @list) = @_; - local($found, $i); - - $found = 0; - foreach $i (@list) { - return 1 if $i eq $item; - } - return 0; -} - - - -sub term_init -{ - # first, get some terminal attributes - - # try bold mode first - $so = `tput md`; $se = `tput me`; - - # if no bold mode available, use standout mode - if ($so eq "") { - $so = `tput so`; $se = `tput se`; - } - - # try if we can underscore - $us = `tput us`; $ue = `tput ue`; - # if we don't have it available, or same as bold/standout, disable it - if ($us eq "" || $us eq $so) { - $us = $ue = ""; - } - - # look how many columns we've got - if($ENV{'COLUMNS'} ne "") { - $columns = $ENV{'COLUMNS'}; - } elsif(-t STDIN) { # if we operate on a terminal... - local($word, $tmp); - - open(STTY, "stty -a|"); - $_ = <STTY>; # try getting the tty win structure value - close(STTY); - chop; - $columns = 0; - foreach $word (split) { - $columns = $tmp if $word eq "columns;"; # the number preceding - $tmp = $word; - } - } else { - $columns = 80; - } - # sanity - $columns = 80 unless $columns >= 5; -} - - -sub list -{ - # pretty-print a list - # imports: global variable $columns - local(@items) = @_; - local($longest,$i,$item,$cols,$width); - - # find the longest item - $longest = 0; - foreach $item (@items) { - $i = length($item); - $longest = $i if $longest < $i; - } - $width = $longest + 1; - $cols = int($columns / $width); - - $i = 0; - foreach $item (@items) { - print $item; - if(++$i == $cols) { - $i = 0; print "\n"; - } else { - print ' ' x ($width - length($item)); - } - } - print "\n" unless $i == 0; -} - -sub cvs_init -{ - # get the CVS repository(s) - - die "You need to have the \$CVSROOT variable set.\n" - unless $ENV{'CVSROOT'} ne ""; - - # get the list of available repositories - $cvsroot = $ENV{'CVSROOT'}; - $cvsroot = (split(/:/, $cvsroot, 2))[1] if $cvsroot =~ /:/; - @reps = &lsdir($cvsroot); -} - - -sub lsmodules -{ - # list all known CVS modules - local(%rv, $mname, $mpath, $_); - - %rv = (); - - open(CVS, "cvs co -c|"); - while($_ = <CVS>) { - chop; - ($mname,$mpath) = split; - next if $mname eq ""; - $rv{$mname} = $mpath; - } - close(CVS); - - return %rv; -} - - -sub checktag -{ - # check a given string for tag rules - local($s, $name) = @_; - local($regexp); - - if($name eq "vendor") { $regexp = '^[A-Z][A-Z0-9_]*$'; } - elsif($name eq "release") { $regexp = '^[a-z][a-z0-9_]*$'; } - else { - print STDERR "Internal error: unknown tag name $name\n"; - exit(2); - } - - if($s !~ /$regexp/) { - print "\a${us}Valid $name tags must match the regexp " . - "$regexp.${ue}\n"; - return 0; - } - if($s =~ /^RELENG/) { - print "\a${us}Tags must not start with the word \"RELENG\".${ue}\n"; - return 0; - } - - return 1; -} - - -&scan_opts; -&term_init; -&cvs_init; - -if(! $moduledir) { - @dirs = &lsdir("."); - print "${so}Import from which directory?${se}\n"; - @dirs = (@dirs, "."); - &list(@dirs); - $moduledir = &Complete("Which? [.]: ", @dirs); - $moduledir = "." unless $moduledir ne ""; -} - -chdir $moduledir || die "Cannot chdir to $moduledir\n"; - -print "${so}Available repositories:${se}\n"; -&list(@reps); - -# the following kludge prevents the Complete package from starting -# over with the string just selected; Complete should better provide -# some reinitialize method -$Complete'return = ""; $Complete'r = 0; - -$selected = - &Complete("Enter repository (<TAB>=complete, ^D=show): ", - @reps); - -die "\aYou cannot create new repositories with this script.\n" - unless &contains($selected, @reps); - -$rep = $selected; - -print "\n${so}Selected repository:${se} ${us}$rep${ue}\n"; - - -@areas = &lsdir("$cvsroot/$rep"); - -print "${so}Existent areas in this repository:${se}\n"; -&list(@areas); - -$Complete'return = ""; $Complete'r = 0; - -$selected = - &Complete("Enter area name (<TAB>=complete, ^D=show): ", - @areas); - -print "\a${us}Warning: this will create a new area.${ue}\n" - unless &contains($selected, @areas); - -$area = "$rep/$selected"; - -print "\n${so}[Working on:${se} ${us}$area${ue}${so}]${se}\n"; - -%cvsmods = &lsmodules(); - -for(;;) { - $| = 1; - print "${so}Gimme the module name:${se} "; - $| = 0; - $modname = <>; - chop $modname; - if ($modname eq "") { - print "\a${us}You cannot use an empty module name.${ue}\n"; - next; - } - last if !$cvsmods{$modname}; - print "\a${us}This module name does already exist; do you intend to\n" . - "perform a vendor-branch import to the existing sources?${ue}: "; - $rep = <>; - if ($rep =~ /\s*[yY]/) { - ($area,$modpath) = split(/\//,$cvsmods{$modname},2); - $branchimport = 1; - last; - } - print "${us}Choose another name.${ue}\n"; -} - - -if(!$branchimport) { - for(;;) { - $| = 1; - print "${so}Enter the module path:${se} $area/"; - $| = 0; - $modpath = <>; - chop $modpath; - if ($modpath eq "") { - print "\a${us}You cannot use an empty module path.${ue}\n"; - next; - } - last if ! -d "$cvsroot/$area/$modpath"; - print "\a${us}This module path does already exist; " . - "choose another one.${ue}\n"; - } - - - @newdirs = (); - $dir1 = "$cvsroot/$area"; - $dir2 = "$area"; - - @newdirs = (@newdirs, "$dir2") if ! -d $dir1; - - foreach $ele (split(/\//, $modpath)) { - $dir1 = "$dir1/$ele"; - $dir2 = "$dir2/$ele"; - @newdirs = (@newdirs, "$dir2") if ! -d $dir1; - } - - print "${so}You're going to create the following new directories:${se}\n"; - - &list(@newdirs); -} - -for(;;) { - $| = 1; - print "${so}Enter a \`vendor\' tag (e. g. the authors ID):${se} "; - $| = 0; - $vtag = <>; - chop $vtag; - last if &checktag($vtag, "vendor"); -} - -for(;;) { - $| = 1; - print "${so}Enter a \`release\' tag (e. g. the version #):${se} "; - $| = 0; - $rtag = <>; - chop $rtag; - last if &checktag($rtag, "release"); -} - - -$| = 1; -print "${so}This is your last chance to interrupt, " . - "hit <return> to go on:${se} "; -$| = 0; -<>; - -if (!$branchimport) { - $mod = ""; - foreach $tmp (sort(keys(%cvsmods))) { - if($tmp gt $modname) { - $mod = $tmp; - last; - } - } - if($mod eq "") { - # we are going to append our module - $cmd = "\$\na\n"; - } else { - # we can insert it - $cmd = "/^${mod}[ \t]/\ni\n"; - } - - print "${so}Checking out the modules database...${se}\n"; - system("cvs co modules") && die "${us}failed.\n${ue}"; - - print "${so}Inserting new module...${se}\n"; - open(ED, "|ed modules/modules") || die "${us}Cannot start ed${ue}\n"; - print(ED "${cmd}${modname} " . ' ' x (15 - length($modname)) . - "$area/${modpath}\n.\nw\nq\n"); - close(ED); - - print "${so}Commiting new modules database...${se}\n"; - system("cvs $dont_do_it commit -m \" " . - "${modname} --> $area/${modpath}\" modules") - && die "Commit failed\n"; - - # we always release "modules" to prevent duplicate - system("cvs -Q release -d modules"); -} - -print "${so}Importing source. Enter a commit message in the editor.${se}\n"; - -system("cvs $dont_do_it import $area/$modpath $vtag $rtag"); - -print "${so}You are done now. Go to a different directory, perform a${se}\n". - "${us}cvs co ${modname}${ue} ${so}command, and see if your new module" . - " builds ok.${se}\n"; - -print "\nPlease don't forget to edit the parent Makefile to add what you\n". - "just imported.\n"; - -if($dont_do_it) { -print <<END - - -${so}Since you did not allow to commit anything, you'll have${se} -${so}to remove the edited modules' database yourself.${se} -${so}To do this, perform a${se} -${us}cd ${moduledir}; cvs -Q release -d modules${ue} -${so}command.${se} -END -; -} diff --git a/gnu/usr.bin/cvs/cvs/Makefile b/gnu/usr.bin/cvs/cvs/Makefile deleted file mode 100644 index f2b1666..0000000 --- a/gnu/usr.bin/cvs/cvs/Makefile +++ /dev/null @@ -1,64 +0,0 @@ -# $FreeBSD$ - -.include <bsd.own.mk> -.include "${.CURDIR}/../Makefile.inc" - -.PATH: ${CVSDIR}/src -.PATH: ${CVSDIR}/lib -.PATH: ${CVSDIR}/man -.PATH: ${CVSDIR} - -PROG= cvs -MAN= cvs.1 cvs.5 - -SRCS= add.c admin.c annotate.c buffer.c \ - checkin.c checkout.c classify.c client.c \ - commit.c create_adm.c cvsrc.c diff.c edit.c entries.c error.c \ - expand_path.c fileattr.c filesubr.c find_names.c \ - hardlink.c hash.c history.c \ - ignore.c import.c lock.c log.c login.c logmsg.c main.c mkmodules.c \ - modules.c myndbm.c no_diff.c parseinfo.c patch.c prepend_args.c \ - rcs.c rcscmds.c \ - recurse.c release.c remove.c repos.c root.c run.c scramble.c \ - server.c stack.c status.c subr.c \ - tag.c update.c vers_ts.c version.c watch.c \ - wrapper.c zlib.c - -# gnu must be before lib to pick correct regex.h -CFLAGS+= -I${.CURDIR} -I../lib -DHAVE_CONFIG_H -I${CVSDIR}/src \ - -I${DESTDIR}/usr/include/gnu \ - -I${CVSDIR}/lib -I${CVSDIR}/diff -I. - -DPADD= ${LIBCVS} ${LIBDIFF} ${LIBGNUREGEX} ${LIBMD} ${LIBCRYPT} ${LIBZ} -LDADD= ${LIBCVS} ${LIBDIFF} -lgnuregex -lmd -lcrypt -lz - -.if ${MK_KERBEROS_SUPPORT} != "no" -CFLAGS+= -DHAVE_GSSAPI -DENCRYPTION -LDADD+= -lgssapi -lkrb5 -lhx509 -lasn1 -lcrypto -lroken -lcrypt -lcom_err -DPADD+= ${LIBGSSAPI} ${LIBKRB5} ${LIBHX509} ${LIBASN1} ${LIBCRYPTO} ${LIBROKEN} -DPADD+= ${LIBCRYPT} ${LIBCOM_ERR} -.endif - -# -# Regression test support -# -CLEANDIRS+=cvs-sanity -.ifmake regress -USERID!=id -u -regress: - mkdir -p ${.OBJDIR}/cvs-sanity/tmp ${.OBJDIR}/cvs-sanity/work -.if ${USERID} == "0" - chown -R nobody ${.OBJDIR}/cvs-sanity - (TESTDIR=`sh -c 'cd ${.OBJDIR}/cvs-sanity/tmp && /bin/pwd'`;\ - export TESTDIR;\ - cd ${.OBJDIR}/cvs-sanity/work;\ - su -m nobody -c "sh ${CVSDIR}/src/sanity.sh ${.OBJDIR}/cvs") -.else - (TESTDIR=`sh -c 'cd ${.OBJDIR}/cvs-sanity/tmp && /bin/pwd'`;\ - export TESTDIR;\ - cd ${.OBJDIR}/cvs-sanity/work;\ - sh ${CVSDIR}/src/sanity.sh ${.OBJDIR}/cvs) -.endif -.endif - -.include <bsd.prog.mk> diff --git a/gnu/usr.bin/cvs/cvs/Makefile.depend b/gnu/usr.bin/cvs/cvs/Makefile.depend deleted file mode 100644 index 177e6e0..0000000 --- a/gnu/usr.bin/cvs/cvs/Makefile.depend +++ /dev/null @@ -1,33 +0,0 @@ -# Autogenerated - do NOT edit! - -DEP_RELDIR := ${_PARSEDIR:S,${SRCTOP}/,,} - -DIRDEPS = \ - gnu/lib/libgcc \ - gnu/lib/libregex \ - gnu/usr.bin/cvs/lib \ - gnu/usr.bin/cvs/libdiff \ - include \ - include/arpa \ - include/gssapi \ - include/xlocale \ - kerberos5/lib/libasn1 \ - kerberos5/lib/libhx509 \ - kerberos5/lib/libkrb5 \ - kerberos5/lib/libroken \ - lib/${CSU_DIR} \ - lib/libc \ - lib/libcom_err \ - lib/libcompiler_rt \ - lib/libcrypt \ - lib/libgssapi \ - lib/libmd \ - lib/libz \ - secure/lib/libcrypto \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -.endif diff --git a/gnu/usr.bin/cvs/cvs/prepend_args.c b/gnu/usr.bin/cvs/cvs/prepend_args.c deleted file mode 100644 index 12322ce..0000000 --- a/gnu/usr.bin/cvs/cvs/prepend_args.c +++ /dev/null @@ -1,86 +0,0 @@ -/* prepend_args.c - utilility programs for manpiulating argv[] - Copyright (C) 1999 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ - -/* $FreeBSD$ */ - - -#ifdef HAVE_CONFIG_H -# include <config.h> -#endif -#include "cvs.h" -#include "prepend_args.h" - - -/* Find the white-space-separated options specified by OPTIONS, and - using BUF to store copies of these options, set ARGV[0], ARGV[1], - etc. to the option copies. Return the number N of options found. - Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] - etc. Backslash can be used to escape whitespace (and backslashes). */ -static int -prepend_args (options, buf, argv) - char const *options; - char *buf; - char **argv; -{ - char const *o = options; - char *b = buf; - int n = 0; - - for (;;) - { - while (isspace ((unsigned char) *o)) - o++; - if (!*o) - return n; - if (argv) - argv[n] = b; - n++; - - do - if ((*b++ = *o++) == '\\' && *o) - b[-1] = *o++; - while (*o && ! isspace ((unsigned char) *o)); - - *b++ = '\0'; - } -} - -/* Prepend the whitespace-separated options in OPTIONS to the argument - vector of a main program with argument count *PARGC and argument - vector *PARGV. */ -void -prepend_default_options (options, pargc, pargv) - char const *options; - int *pargc; - char ***pargv; -{ - if (options) - { - char *buf = xmalloc (strlen (options) + 1); - int prepended = prepend_args (options, buf, (char **) NULL); - int argc = *pargc; - char * const *argv = *pargv; - char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp); - *pargc = prepended + argc; - *pargv = pp; - *pp++ = *argv++; - pp += prepend_args (options, buf, pp); - while ((*pp++ = *argv++)) - continue; - } -} diff --git a/gnu/usr.bin/cvs/cvs/prepend_args.h b/gnu/usr.bin/cvs/cvs/prepend_args.h deleted file mode 100644 index 6708442..0000000 --- a/gnu/usr.bin/cvs/cvs/prepend_args.h +++ /dev/null @@ -1,26 +0,0 @@ -/* prepend_args.h - utilility programs for manpiulating argv[] - Copyright (C) 1999 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA - 02111-1307, USA. */ - -/* $FreeBSD$ */ - -/* This code, taken from GNU Grep, originally used the "PARAM" macro, as the - current GNU coding standards requires. Older GNU code used the "PROTO" - macro, before the GNU coding standards replaced it. We use the older - form here to keep from having to include another file in cvs/src/main.c. */ - -void prepend_default_options PROTO ((char const *, int *, char ***)); diff --git a/gnu/usr.bin/cvs/cvsbug/Makefile b/gnu/usr.bin/cvs/cvsbug/Makefile deleted file mode 100644 index ad07f39..0000000 --- a/gnu/usr.bin/cvs/cvsbug/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -# $FreeBSD$ - -.include "${.CURDIR}/../Makefile.inc" - -.PATH: ${CVSDIR}/src -.PATH: ${CVSDIR}/man -.PATH: ${CVSDIR} - -SCRIPTS= cvsbug -MAN= cvsbug.8 - -CLEANFILES+= cvsbug - -cvsbug: cvsbug.in - version=`sed < ${CVSDIR}/configure \ - -e '/^[ ]*VERSION=/!d' -e 's/.*=["'\'']\{0,1\}\([^"'\'']*\)["'\'']\{0,1\}/\1/' -e q`; \ - sed -e "s,@VERSION@,$${version}-FreeBSD,g" \ - -e "s,@MKTEMP@,/usr/bin/mktemp,g" \ - -e "s,@PACKAGE_BUGREPORT@,bug-cvs@gnu.org,g" \ - -e "s,@SENDMAIL@,/usr/sbin/sendmail,g" \ - -e "s,@MKTEMP_FUNCTION@,," \ - -e "s,@MKTEMP_SH_FUNCTION@,," \ - ${.ALLSRC} > ${.TARGET} - -.include <bsd.prog.mk> diff --git a/gnu/usr.bin/cvs/cvsbug/Makefile.depend b/gnu/usr.bin/cvs/cvsbug/Makefile.depend deleted file mode 100644 index 57b7e10..0000000 --- a/gnu/usr.bin/cvs/cvsbug/Makefile.depend +++ /dev/null @@ -1,12 +0,0 @@ -# Autogenerated - do NOT edit! - -DEP_RELDIR := ${_PARSEDIR:S,${SRCTOP}/,,} - -DIRDEPS = \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -.endif diff --git a/gnu/usr.bin/cvs/doc/Makefile b/gnu/usr.bin/cvs/doc/Makefile deleted file mode 100644 index ade0e8b..0000000 --- a/gnu/usr.bin/cvs/doc/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# $FreeBSD$ - -.include "${.CURDIR}/../Makefile.inc" - -SRCDIR= ${CVSDIR}/doc - -INFO= cvs cvsclient -INFOSECTION= "Programming & development tools." -INFOENTRY_cvs= "* CVS: (cvs). CVS Reference Manual." -INFOENTRY_cvsclient= "* CVS-CLIENT: (cvsclient). CVS client/server Reference Manual." - -.include <bsd.info.mk> diff --git a/gnu/usr.bin/cvs/lib/Makefile b/gnu/usr.bin/cvs/lib/Makefile deleted file mode 100644 index 54a727d..0000000 --- a/gnu/usr.bin/cvs/lib/Makefile +++ /dev/null @@ -1,37 +0,0 @@ -# $FreeBSD$ - -.include "${.CURDIR}/../Makefile.inc" - -.PATH: ${CVSDIR}/src -.PATH: ${CVSDIR}/lib -.PATH: ${CVSDIR}/man - -LIB= cvs -INTERNALLIB= - -# gnu must be before lib to pick correct regex.h -CFLAGS+= -I. -I${CVSDIR}/src -I${DESTDIR}/usr/include/gnu \ - -I${CVSDIR}/lib -CFLAGS+= -DHAVE_CONFIG_H -YFLAGS= -CLEANFILES+= config.h - -CVS_UMASK_DFLT?= 002 -CVS_ADMIN_GROUP?= cvsadmin -CVS_TMPDIR_DFLT?= /tmp - -SRCS= config.h argmatch.c getdate.y getline.c \ - getopt.c getopt1.c savecwd.c \ - sighandle.c stripslash.c \ - xgetwd.c yesno.c - -config.h: config.h.proto ${CVSDIR}/configure - version=`sed < ${CVSDIR}/configure \ - -e '/^[ ]*VERSION=/!d' -e 's/.*=["'\'']\{0,1\}\([^"'\'']*\)["'\'']\{0,1\}/\1/' -e q`; \ - sed -e "s,@VERSION@,$${version}-20080310-FreeBSD,g" \ - -e "s,@UMASK_DFLT@,${CVS_UMASK_DFLT},g" \ - -e "s,@TMPDIR_DFLT@,${CVS_TMPDIR_DFLT},g" \ - -e "s,@CVS_ADMIN_GROUP@,${CVS_ADMIN_GROUP},g" \ - ${.ALLSRC:M*config.h.proto} > ${.TARGET} - -.include <bsd.lib.mk> diff --git a/gnu/usr.bin/cvs/lib/Makefile.depend b/gnu/usr.bin/cvs/lib/Makefile.depend deleted file mode 100644 index bd8e113..0000000 --- a/gnu/usr.bin/cvs/lib/Makefile.depend +++ /dev/null @@ -1,36 +0,0 @@ -# Autogenerated - do NOT edit! - -DEP_RELDIR := ${_PARSEDIR:S,${SRCTOP}/,,} - -DIRDEPS = \ - include \ - include/xlocale \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -argmatch.o: config.h -argmatch.po: config.h -getdate.o: config.h -getdate.o: getdate.c -getdate.po: config.h -getdate.po: getdate.c -getline.o: config.h -getline.po: config.h -getopt.o: config.h -getopt.po: config.h -getopt1.o: config.h -getopt1.po: config.h -savecwd.o: config.h -savecwd.po: config.h -sighandle.o: config.h -sighandle.po: config.h -stripslash.o: config.h -stripslash.po: config.h -xgetwd.o: config.h -xgetwd.po: config.h -yesno.o: config.h -yesno.po: config.h -.endif diff --git a/gnu/usr.bin/cvs/lib/config.h.proto b/gnu/usr.bin/cvs/lib/config.h.proto deleted file mode 100644 index 30a4f00..0000000 --- a/gnu/usr.bin/cvs/lib/config.h.proto +++ /dev/null @@ -1,532 +0,0 @@ -/* $FreeBSD$ */ - -/* config.h. Generated from config.h.in by configure. */ -/* config.h.in. Generated from configure.in by autoheader. */ - -/* Enable AUTH_CLIENT_SUPPORT to enable pserver as a remote access method in - the CVS client (default) */ -#define AUTH_CLIENT_SUPPORT 1 - -/* Define if you want to use the password authenticated server. */ -#define AUTH_SERVER_SUPPORT 1 - -/* Define if you want CVS to be able to be a remote repository client. */ -#define CLIENT_SUPPORT 1 - -/* Define to 1 if the `closedir' function returns void instead of `int'. */ -/* #undef CLOSEDIR_VOID */ - -/* The CVS admin command is restricted to the members of the group - CVS_ADMIN_GROUP. If this group does not exist, all users are allowed to run - CVS admin. To disable the CVS admin command for all users, create an empty - CVS_ADMIN_GROUP by running configure with the --with-cvs-admin-group= - option. To disable access control for CVS admin, run configure with the - --without-cvs-admin-group option in order to comment out the define below. - */ -#define CVS_ADMIN_GROUP "@CVS_ADMIN_GROUP@" - -/* When committing a permanent change, CVS and RCS make a log entry of who - committed the change. If you are committing the change logged in as "root" - (not under "su" or other root-priv giving program), CVS/RCS cannot - determine who is actually making the change. As such, by default, CVS - prohibits changes committed by users logged in as "root". You can disable - checking by passing the "--enable-rootcommit" option to configure or by - commenting out the lines below. */ -#define CVS_BADROOT 1 - -/* The default editor to use, if one does not specify the "-e" option to cvs, - or does not have an EDITOR environment variable. If this is not set to an - absolute path to an executable, use the shell to find where the editor - actually is. This allows sites with /usr/bin/vi or /usr/ucb/vi to work - equally well (assuming that their PATH is reasonable). */ -#ifndef EDITOR_DFLT -#define EDITOR_DFLT "vi" -#endif - -/* Define to enable encryption support. */ -/* #undef ENCRYPTION */ - -/* Define if this executable will be running on case insensitive file systems. - In the client case, this means that it will request that the server pretend - to be case insensitive if it isn't already. */ -/* #undef FILENAMES_CASE_INSENSITIVE */ - -/* When committing or importing files, you must enter a log message. Normally, - you can do this either via the -m flag on the command line, the -F flag on - the command line, or an editor will be started for you. If you like to use - logging templates (the rcsinfo file within the $CVSROOT/CVSROOT directory), - you might want to force people to use the editor even if they specify a - message with -m or -F. Enabling FORCE_USE_EDITOR will cause the -m or -F - message to be appended to the temp file when the editor is started. */ -/* #undef FORCE_USE_EDITOR */ - -/* Define to an alternative value if GSS_C_NT_HOSTBASED_SERVICE isn't defined - in the gssapi.h header file. MIT Kerberos 1.2.1 requires this. Only - relevant when using GSSAPI. */ -/* #undef GSS_C_NT_HOSTBASED_SERVICE */ - -/* Define if you have the connect function. */ -#define HAVE_CONNECT 1 - -/* Define if you have the crypt function. */ -#define HAVE_CRYPT 1 - -/* Define to 1 if you have the <direct.h> header file. */ -/* #undef HAVE_DIRECT_H */ - -/* Define to 1 if you have the <dirent.h> header file, and it defines `DIR'. - */ -#define HAVE_DIRENT_H 1 - -/* Define to 1 if you have the `dup2' function. */ -#define HAVE_DUP2 1 - -/* Define to 1 if you have the <errno.h> header file. */ -#define HAVE_ERRNO_H 1 - -/* Define to 1 if you have the `fchdir' function. */ -#define HAVE_FCHDIR 1 - -/* Define to 1 if you have the `fchmod' function. */ -#define HAVE_FCHMOD 1 - -/* Define to 1 if you have the <fcntl.h> header file. */ -#define HAVE_FCNTL_H 1 - -/* Define to 1 if your system has a working POSIX `fnmatch' function. */ -#define HAVE_FNMATCH 1 - -/* Define to 1 if you have the <fnmatch.h> header file. */ -#define HAVE_FNMATCH_H 1 - -/* Define to 1 if you have the `fork' function. */ -#define HAVE_FORK 1 - -/* Define to 1 if you have the `fsync' function. */ -#define HAVE_FSYNC 1 - -/* Define to 1 if you have the `ftime' function. */ -/* #undef HAVE_FTIME */ - -/* Define to 1 if you have the `ftruncate' function. */ -#define HAVE_FTRUNCATE 1 - -/* Define to 1 if you have the `geteuid' function. */ -#define HAVE_GETEUID 1 - -/* Define to 1 if you have the `getgroups' function. */ -#define HAVE_GETGROUPS 1 - -/* Define to 1 if you have the `gethostname' function. */ -#define HAVE_GETHOSTNAME 1 - -/* Define to 1 if you have the `getopt' function. */ -#define HAVE_GETOPT 1 - -/* Define to 1 if you have the `getpagesize' function. */ -#define HAVE_GETPAGESIZE 1 - -/* Define to 1 if you have the `getpassphrase' function. */ -/* #undef HAVE_GETPASSPHRASE */ - -/* Define to 1 if you have the `getpassphrase' function. */ -/* #undef HAVE_GETPASSPHRASE */ - -/* Define if you have the getspnam function. */ -/* #undef HAVE_GETSPNAM */ - -/* Define to 1 if you have the `gettimeofday' function. */ -#define HAVE_GETTIMEOFDAY 1 - -/* Define if you have GSSAPI with Kerberos version 5 available. */ -/* #undef HAVE_GSSAPI */ - -/* Define to 1 if you have the <gssapi/gssapi_generic.h> header file. */ -/* #undef HAVE_GSSAPI_GSSAPI_GENERIC_H */ - -/* Define to 1 if you have the <gssapi/gssapi.h> header file. */ -/* #undef HAVE_GSSAPI_GSSAPI_H */ - -/* Define to 1 if you have the <gssapi.h> header file. */ -/* #undef HAVE_GSSAPI_H */ - -/* Define to 1 if you have the `initgroups' function. */ -#define HAVE_INITGROUPS 1 - -/* Define to 1 if you have the <inttypes.h> header file. */ -#define HAVE_INTTYPES_H 1 - -/* Define to 1 if you have the <io.h> header file. */ -/* #undef HAVE_IO_H */ - -/* Define if you have MIT Kerberos version 4 available. */ -/* #undef HAVE_KERBEROS */ - -/* Define to 1 if you have the <krb5.h> header file. */ -/* #undef HAVE_KRB5_H */ - -/* Define to 1 if you have the `krb_get_err_text' function. */ -/* #undef HAVE_KRB_GET_ERR_TEXT */ - -/* Define to 1 if you have the `krb' library (-lkrb). */ -/* #undef HAVE_LIBKRB */ - -/* Define to 1 if you have the `krb4' library (-lkrb4). */ -/* #undef HAVE_LIBKRB4 */ - -/* Define to 1 if you have the `nsl' library (-lnsl). */ -/* #undef HAVE_LIBNSL */ - -/* Define to 1 if you have the <limits.h> header file. */ -#define HAVE_LIMITS_H 1 - -/* Define to 1 if you have the `login' function. */ -/* #undef HAVE_LOGIN */ - -/* Define to 1 if you have the `logout' function. */ -/* #undef HAVE_LOGOUT */ - -/* Define to 1 if you support file names longer than 14 characters. */ -#define HAVE_LONG_FILE_NAMES 1 - -/* Define if you have memchr (always for CVS). */ -#define HAVE_MEMCHR 1 - -/* Define to 1 if you have the `memmove' function. */ -#define HAVE_MEMMOVE 1 - -/* Define to 1 if you have the <memory.h> header file. */ -#define HAVE_MEMORY_H 1 - -/* Define to 1 if you have the `mkdir' function. */ -#define HAVE_MKDIR 1 - -/* Define to 1 if you have the `mknod' function. */ -#define HAVE_MKNOD 1 - -/* Define to 1 if you have the `mkstemp' function. */ -#define HAVE_MKSTEMP 1 - -/* Define to 1 if you have the `mktemp' function. */ -#define HAVE_MKTEMP 1 - -/* Define to 1 if you have a working `mmap' system call. */ -#define HAVE_MMAP 1 - -/* Define to 1 if you have the `nanosleep' function. */ -#define HAVE_NANOSLEEP 1 - -/* Define to 1 if you have the <ndbm.h> header file. */ -#define HAVE_NDBM_H 1 - -/* Define to 1 if you have the <ndir.h> header file, and it defines `DIR'. */ -/* #undef HAVE_NDIR_H */ - -/* Define to 1 if you have the `putenv' function. */ -#define HAVE_PUTENV 1 - -/* Define to 1 if you have the `readlink' function. */ -#define HAVE_READLINK 1 - -/* Define to 1 if you have the `regcomp' function. */ -#define HAVE_REGCOMP 1 - -/* Define to 1 if you have the `regerror' function. */ -#define HAVE_REGERROR 1 - -/* Define to 1 if you have the `regexec' function. */ -#define HAVE_REGEXEC 1 - -/* Define to 1 if you have the `regfree' function. */ -#define HAVE_REGFREE 1 - -/* Define to 1 if you have the `rename' function. */ -#define HAVE_RENAME 1 - -/* Define to 1 if you have the `select' function. */ -/* #undef HAVE_SELECT */ - -/* Define if the diff library should use setmode for binary files. */ -/* #undef HAVE_SETMODE */ - -/* Define to 1 if you have the `sigaction' function. */ -#define HAVE_SIGACTION 1 - -/* Define to 1 if you have the `sigblock' function. */ -#define HAVE_SIGBLOCK 1 - -/* Define to 1 if you have the `sigprocmask' function. */ -#define HAVE_SIGPROCMASK 1 - -/* Define to 1 if you have the `sigsetmask' function. */ -#define HAVE_SIGSETMASK 1 - -/* Define to 1 if you have the `sigvec' function. */ -#define HAVE_SIGVEC 1 - -/* Define to 1 if you have the <stdint.h> header file. */ -#define HAVE_STDINT_H 1 - -/* Define to 1 if you have the <stdlib.h> header file. */ -#define HAVE_STDLIB_H 1 - -/* Define if you have strchr (always for CVS). */ -#define HAVE_STRCHR 1 - -/* Define to 1 if you have the `strerror' function. */ -#define HAVE_STRERROR 1 - -/* Define to 1 if you have the <strings.h> header file. */ -#define HAVE_STRINGS_H 1 - -/* Define to 1 if you have the <string.h> header file. */ -#define HAVE_STRING_H 1 - -/* Define to 1 if you have the `strstr' function. */ -#define HAVE_STRSTR 1 - -/* Define to 1 if you have the `strtoul' function. */ -#define HAVE_STRTOUL 1 - -/* Define to 1 if `st_blksize' is member of `struct stat'. */ -#define HAVE_STRUCT_STAT_ST_BLKSIZE 1 - -/* Define to 1 if `st_rdev' is member of `struct stat'. */ -#define HAVE_STRUCT_STAT_ST_RDEV 1 - -/* Define to 1 if you have the <syslog.h> header file. */ -#define HAVE_SYSLOG_H 1 - -/* Define to 1 if you have the <sys/bsdtypes.h> header file. */ -/* #undef HAVE_SYS_BSDTYPES_H */ - -/* Define to 1 if you have the <sys/dir.h> header file, and it defines `DIR'. - */ -/* #undef HAVE_SYS_DIR_H */ - -/* Define to 1 if you have the <sys/file.h> header file. */ -#define HAVE_SYS_FILE_H 1 - -/* Define to 1 if you have the <sys/ndir.h> header file, and it defines `DIR'. - */ -/* #undef HAVE_SYS_NDIR_H */ - -/* Define to 1 if you have the <sys/param.h> header file. */ -#define HAVE_SYS_PARAM_H 1 - -/* Define to 1 if you have the <sys/resource.h> header file. */ -#define HAVE_SYS_RESOURCE_H 1 - -/* Define to 1 if you have the <sys/select.h> header file. */ -#define HAVE_SYS_SELECT_H 1 - -/* Define to 1 if you have the <sys/stat.h> header file. */ -#define HAVE_SYS_STAT_H 1 - -/* Define to 1 if you have the <sys/timeb.h> header file. */ -/* #undef HAVE_SYS_TIMEB_H */ - -/* Define to 1 if you have the <sys/time.h> header file. */ -#define HAVE_SYS_TIME_H 1 - -/* Define to 1 if you have the <sys/types.h> header file. */ -#define HAVE_SYS_TYPES_H 1 - -/* Define to 1 if you have <sys/wait.h> that is POSIX.1 compatible. */ -#define HAVE_SYS_WAIT_H 1 - -/* Define to 1 if you have the `tempnam' function. */ -#define HAVE_TEMPNAM 1 - -/* Define to 1 if you have the `timezone' function. */ -#define HAVE_TIMEZONE 1 - -/* Define to 1 if you have the `tzset' function. */ -#define HAVE_TZSET 1 - -/* Define to 1 if you have the <unistd.h> header file. */ -#define HAVE_UNISTD_H 1 - -/* Define to 1 if you have the `usleep' function. */ -/* #undef HAVE_USLEEP */ - -/* Define to 1 if you have the <utime.h> header file. */ -#define HAVE_UTIME_H 1 - -/* Define to 1 if `utime(file, NULL)' sets file's timestamp to the present. */ -#define HAVE_UTIME_NULL 1 - -/* Define to 1 if you have the `valloc' function. */ -#define HAVE_VALLOC 1 - -/* Define to 1 if you have the `vfork' function. */ -#define HAVE_VFORK 1 - -/* Define to 1 if you have the <vfork.h> header file. */ -/* #undef HAVE_VFORK_H */ - -/* Define to 1 if you have the `vprintf' function. */ -#define HAVE_VPRINTF 1 - -/* Define to 1 if you have the `wait3' function. */ -#define HAVE_WAIT3 1 - -/* Define to 1 if you have the `waitpid' function. */ -#define HAVE_WAITPID 1 - -/* Define to 1 if `fork' works. */ -#define HAVE_WORKING_FORK 1 - -/* Define to 1 if `vfork' works. */ -#define HAVE_WORKING_VFORK 1 - -/* By default, CVS stores its modules and other such items in flat text files - (MY_NDBM enables this). Turning off MY_NDBM causes CVS to look for a - system-supplied ndbm database library and use it instead. That may speed - things up, but the default setting generally works fine too. */ -#define MY_NDBM 1 - -/* Define to 1 if your C compiler doesn't accept -c and -o together. */ -/* #undef NO_MINUS_C_MINUS_O */ - -/* Define to the address where bug reports for this package should be sent. */ -#define PACKAGE_BUGREPORT "bug-cvs@nongnu.org" - -/* Define to the full name of this package. */ -#define PACKAGE_NAME "Concurrent Versions System (CVS)" - -/* Define to the full name and version of this package. */ -#define PACKAGE_STRING "Concurrent Versions System (CVS) @VERSION@" - -/* Define to the one symbol short name of this package. */ -#define PACKAGE_TARNAME "cvs" - -/* Define to the version of this package. */ -#define PACKAGE_VERSION "@VERSION@" - -/* Path to the pr utility */ -#define PR_PROGRAM "/usr/bin/pr" - -/* Define to force lib/regex.c to use malloc instead of alloca. */ -#define REGEX_MALLOC 1 - -/* Define as the return type of signal handlers (`int' or `void'). */ -#define RETSIGTYPE void - -/* The default remote shell to use, if one does not specify the CVS_RSH - environment variable. */ -/* FreeBSD.org default is to use ssh. */ -#define RSH_DFLT "ssh" - -/* If you are working with a large remote repository and a 'cvs checkout' is - swamping your network and memory, define these to enable flow control. You - will end up with even less probability of a consistent checkout (see - Concurrency in cvs.texinfo), but CVS doesn't try to guarantee that anyway. - The master server process will monitor how far it is getting behind, if it - reaches the high water mark, it will signal the child process to stop - generating data when convenient (ie: no locks are held, currently at the - beginning of a new directory). Once the buffer has drained sufficiently to - reach the low water mark, it will be signalled to start again. */ -#define SERVER_FLOWCONTROL 1 - -/* The high water mark in bytes for server flow control. Required if - SERVER_FLOWCONTROL is defined, and useless otherwise. */ -#define SERVER_HI_WATER (2 * 1024 * 1024) - -/* The low water mark in bytes for server flow control. Required if - SERVER_FLOWCONTROL is defined, and useless otherwise. */ -#define SERVER_LO_WATER (1 * 1024 * 1024) - -/* Define if you want CVS to be able to serve repositories to remote clients. - */ -#define SERVER_SUPPORT 1 - -/* Define as the maximum value of type 'size_t', if the system doesn't define - it. */ -/* #undef SIZE_MAX */ - -/* The default remote shell to use, if one does not specify the CVS_SSH - environment variable. */ -#define SSH_DFLT "ssh" - -/* Define to 1 if the `S_IS*' macros in <sys/stat.h> do not work properly. */ -/* #undef STAT_MACROS_BROKEN */ - -/* Define to 1 if you have the ANSI C header files. */ -#define STDC_HEADERS 1 - -/* Define to 1 if you can safely include both <sys/time.h> and <time.h>. */ -#define TIME_WITH_SYS_TIME 1 - -/* Directory used for storing temporary files, if not overridden by - environment variables or the -T global option. There should be little need - to change this (-T is a better mechanism if you need to use a different - directory for temporary files). */ -#define TMPDIR_DFLT "@TMPDIR_DFLT@" - -/* The default umask to use when creating or otherwise setting file or - directory permissions in the repository. Must be a value in the range of 0 - through 0777. For example, a value of 002 allows group rwx access and world - rx access; a value of 007 allows group rwx access but no world access. This - value is overridden by the value of the CVSUMASK environment variable, - which is interpreted as an octal number. */ -#define UMASK_DFLT @UMASK_DFLT@ - -/* Define if setmode is required when writing binary data to stdout. */ -/* #undef USE_SETMODE_STDOUT */ - -/* Define if utime requires write access to the file (true on Windows, but not - Unix). */ -/* #undef UTIME_EXPECTS_WRITABLE */ - -/* Define to 1 if on AIX 3. - System headers sometimes define this. - We just want to avoid a redefinition error message. */ -#ifndef _ALL_SOURCE -/* # undef _ALL_SOURCE */ -#endif - -/* Define to 1 if on MINIX. */ -/* #undef _MINIX */ - -/* Define to 2 if the system does not provide POSIX.1 features except with - this defined. */ -/* #undef _POSIX_1_SOURCE */ - -/* Define to 1 if you need to in order for `stat' and other things to work. */ -/* #undef _POSIX_SOURCE */ - -/* Define to force lib/regex.c to define re_comp et al. */ -#define _REGEX_RE_COMP 1 - -/* Define to empty if `const' does not conform to ANSI C. */ -/* #undef const */ - -/* We want to always use the GNULIB version of getpass which we have in lib, - so define getpass to something that won't conflict with any existing system - declarations. */ -/* #define getpass cvs_getpass */ - -/* Define to `int' if <sys/types.h> doesn't define. */ -/* #undef gid_t */ - -/* Define to `__inline__' or `__inline' if that's what the C compiler - calls it, or to nothing if 'inline' is not supported under any name. */ -#ifndef __cplusplus -/* #undef inline */ -#endif - -/* Define to `int' if <sys/types.h> does not define. */ -/* #undef mode_t */ - -/* Define to `int' if <sys/types.h> does not define. */ -/* #undef pid_t */ - -/* Define to `unsigned int' if <sys/types.h> does not define. */ -/* #undef size_t */ - -/* Define to `int' if <sys/types.h> doesn't define. */ -/* #undef uid_t */ - -/* Define as `fork' if `vfork' does not work. */ -/* #undef vfork */ diff --git a/gnu/usr.bin/cvs/libdiff/Makefile b/gnu/usr.bin/cvs/libdiff/Makefile deleted file mode 100644 index 7fd67a4..0000000 --- a/gnu/usr.bin/cvs/libdiff/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# $FreeBSD$ - -.include "${.CURDIR}/../Makefile.inc" - -.PATH: ${CVSDIR}/diff - -LIB= diff -INTERNALLIB= - -# gnu must be before lib to pick correct regex.h -CFLAGS+= -I../lib -I${DESTDIR}/usr/include/gnu \ - -I${CVSDIR}/lib -DHAVE_CONFIG_H - -SRCS = diff.c diff3.c analyze.c cmpbuf.c cmpbuf.h io.c context.c ed.c \ - normal.c ifdef.c util.c dir.c version.c diff.h side.c - -.include <bsd.lib.mk> diff --git a/gnu/usr.bin/cvs/libdiff/Makefile.depend b/gnu/usr.bin/cvs/libdiff/Makefile.depend deleted file mode 100644 index f789459..0000000 --- a/gnu/usr.bin/cvs/libdiff/Makefile.depend +++ /dev/null @@ -1,16 +0,0 @@ -# Autogenerated - do NOT edit! - -DEP_RELDIR := ${_PARSEDIR:S,${SRCTOP}/,,} - -DIRDEPS = \ - gnu/lib/libregex \ - gnu/usr.bin/cvs/lib \ - include \ - include/xlocale \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -.endif diff --git a/gnu/usr.bin/dialog/Makefile b/gnu/usr.bin/dialog/Makefile index e945b22..f9281c8 100644 --- a/gnu/usr.bin/dialog/Makefile +++ b/gnu/usr.bin/dialog/Makefile @@ -8,6 +8,6 @@ LDADD= -ldialog -lncursesw -lm CFLAGS+= -I${.CURDIR} -I${DIALOG} .PATH: ${DIALOG} -WARNS?= 3 +WARNS?= 6 .include <bsd.prog.mk> diff --git a/gnu/usr.bin/diff/Makefile b/gnu/usr.bin/diff/Makefile index a5ceb17..c5c66dc 100644 --- a/gnu/usr.bin/diff/Makefile +++ b/gnu/usr.bin/diff/Makefile @@ -17,10 +17,10 @@ CFLAGS+=-funsigned-char CFLAGS+=-DHAVE_CONFIG_H CFLAGS+=-DPR_PROGRAM=\"/usr/bin/pr\" -CFLAGS+=-I${DESTDIR}/usr/include/gnu CFLAGS+=-I${.CURDIR}/../../../contrib/diff CFLAGS+=-I${.CURDIR}/../../../contrib/diff/src CFLAGS+=-I${.CURDIR}/../../../contrib/diff/lib +CFLAGS+=-I${DESTDIR}/usr/include/gnu SUBDIR+=doc diff --git a/gnu/usr.bin/gdb/Makefile.inc b/gnu/usr.bin/gdb/Makefile.inc index c40e9b8..5e1d5cd 100644 --- a/gnu/usr.bin/gdb/Makefile.inc +++ b/gnu/usr.bin/gdb/Makefile.inc @@ -37,6 +37,7 @@ GDB_CROSS_DEBUGGER= ${CNTRB_GDB}/gdb/signals ${CNTRB_GDB}/gdb/tui ${TARGET_SUBDIR} CFLAGS+= -DHAVE_CONFIG_H -DRL_NO_COMPAT -DMI_OUT=1 -DTUI=1 +CFLAGS+= -DDEBUGDIR=\"${DEBUGDIR}\" CFLAGS+= -I. CFLAGS+= -I${TARGET_SUBDIR} CFLAGS+= -I${BMAKE_BU}/libbfd -I${BMAKE_BU}/libbfd/${TARGET_CPUARCH} diff --git a/gnu/usr.bin/gdb/arch/amd64/config.h b/gnu/usr.bin/gdb/arch/amd64/config.h index ac81c54..674f818 100644 --- a/gnu/usr.bin/gdb/arch/amd64/config.h +++ b/gnu/usr.bin/gdb/arch/amd64/config.h @@ -439,9 +439,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_i386_arch diff --git a/gnu/usr.bin/gdb/arch/arm/config.h b/gnu/usr.bin/gdb/arch/arm/config.h index e1b128c..b2481f8 100644 --- a/gnu/usr.bin/gdb/arch/arm/config.h +++ b/gnu/usr.bin/gdb/arch/arm/config.h @@ -451,9 +451,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_arm_arch diff --git a/gnu/usr.bin/gdb/arch/i386/config.h b/gnu/usr.bin/gdb/arch/i386/config.h index f21da4c..e849e0a 100644 --- a/gnu/usr.bin/gdb/arch/i386/config.h +++ b/gnu/usr.bin/gdb/arch/i386/config.h @@ -439,9 +439,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_i386_arch diff --git a/gnu/usr.bin/gdb/arch/ia64/config.h b/gnu/usr.bin/gdb/arch/ia64/config.h index 5faa96b..4cc29f9 100644 --- a/gnu/usr.bin/gdb/arch/ia64/config.h +++ b/gnu/usr.bin/gdb/arch/ia64/config.h @@ -439,9 +439,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_ia64_arch diff --git a/gnu/usr.bin/gdb/arch/mips/config.h b/gnu/usr.bin/gdb/arch/mips/config.h index 41a6731..2b375a6 100644 --- a/gnu/usr.bin/gdb/arch/mips/config.h +++ b/gnu/usr.bin/gdb/arch/mips/config.h @@ -439,9 +439,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_mips_arch diff --git a/gnu/usr.bin/gdb/arch/powerpc/config.h b/gnu/usr.bin/gdb/arch/powerpc/config.h index f169fad..37416a7 100644 --- a/gnu/usr.bin/gdb/arch/powerpc/config.h +++ b/gnu/usr.bin/gdb/arch/powerpc/config.h @@ -439,9 +439,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_rs6000_arch diff --git a/gnu/usr.bin/gdb/arch/powerpc64/config.h b/gnu/usr.bin/gdb/arch/powerpc64/config.h index d8b9b6d..58843fb 100644 --- a/gnu/usr.bin/gdb/arch/powerpc64/config.h +++ b/gnu/usr.bin/gdb/arch/powerpc64/config.h @@ -439,9 +439,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_rs6000_arch diff --git a/gnu/usr.bin/gdb/arch/sparc64/config.h b/gnu/usr.bin/gdb/arch/sparc64/config.h index 5527a79..974e426 100644 --- a/gnu/usr.bin/gdb/arch/sparc64/config.h +++ b/gnu/usr.bin/gdb/arch/sparc64/config.h @@ -439,9 +439,6 @@ /* Name of this package. */ #define PACKAGE "gdb" -/* Global directory for separate debug files. */ -#define DEBUGDIR "/usr/local/lib/debug" - /* Define to BFD's default architecture. */ #define DEFAULT_BFD_ARCH bfd_sparc_arch diff --git a/gnu/usr.bin/gdb/gdb/Makefile b/gnu/usr.bin/gdb/gdb/Makefile index ef9e135..15eb2eb 100644 --- a/gnu/usr.bin/gdb/gdb/Makefile +++ b/gnu/usr.bin/gdb/gdb/Makefile @@ -15,3 +15,4 @@ DPADD= ${GDBLIBS} ${BULIBS} ${LIBM} ${LIBREADLINE} ${LIBTERMCAP} ${LIBGNUREGEX} LDADD= ${GDBLIBS} ${BULIBS} -lm -lreadline -ltermcap -lgnuregex .include <bsd.prog.mk> +CFLAGS+= -DDEBUGDIR=\"${DEBUGDIR}\" diff --git a/gnu/usr.bin/gdb/kgdb/trgt_mips.c b/gnu/usr.bin/gdb/kgdb/trgt_mips.c index 6ad2274..aadf095 100644 --- a/gnu/usr.bin/gdb/kgdb/trgt_mips.c +++ b/gnu/usr.bin/gdb/kgdb/trgt_mips.c @@ -115,6 +115,16 @@ static int kgdb_trgt_frame_offset[] = { offsetof(struct trapframe, a1), offsetof(struct trapframe, a2), offsetof(struct trapframe, a3), +#if defined(__mips_n32) || defined(__mips_n64) + offsetof(struct trapframe, a4), + offsetof(struct trapframe, a5), + offsetof(struct trapframe, a6), + offsetof(struct trapframe, a7), + offsetof(struct trapframe, t0), + offsetof(struct trapframe, t1), + offsetof(struct trapframe, t2), + offsetof(struct trapframe, t3), +#else offsetof(struct trapframe, t0), offsetof(struct trapframe, t1), offsetof(struct trapframe, t2), @@ -123,6 +133,7 @@ static int kgdb_trgt_frame_offset[] = { offsetof(struct trapframe, t5), offsetof(struct trapframe, t6), offsetof(struct trapframe, t7), +#endif offsetof(struct trapframe, s0), offsetof(struct trapframe, s1), offsetof(struct trapframe, s2), diff --git a/gnu/usr.bin/gdb/libgdb/Makefile b/gnu/usr.bin/gdb/libgdb/Makefile index b7129a8..ed00525 100644 --- a/gnu/usr.bin/gdb/libgdb/Makefile +++ b/gnu/usr.bin/gdb/libgdb/Makefile @@ -26,7 +26,7 @@ SRCS= annotate.c arch-utils.c auxv.c ax-gdb.c ax-general.c \ elfread.c environ.c eval.c event-loop.c event-top.c exec.c \ expprint.c \ f-exp.y f-lang.c f-typeprint.c f-valprint.c findvar.c \ - ${_fork_child} frame-base.c frame-unwind-kluge.c frame.c \ + ${_fork_child} frame-base.c frame-unwind.c frame.c \ gdb-events.c gdbarch.c gdbtypes.c gnu-v2-abi.c gnu-v3-abi.c \ hpacc-abi.c \ inf-loop.c infcall.c infcmd.c inflow.c ${_infptrace} infrun.c \ @@ -67,13 +67,7 @@ _infptrace= infptrace.c _inftarg= inftarg.c .endif -GENSRCS= frame-unwind-kluge.c version.c - -frame-unwind-kluge.c: frame-unwind.diff - cat ${CNTRB_GDB}/gdb/frame-unwind.c > ${.TARGET} - patch ${.TARGET} ${.ALLSRC} - -CLEANFILES= frame-unwind-kluge.c.orig +GENSRCS= version.c version.c: echo '#include "version.h"' > ${.TARGET} diff --git a/gnu/usr.bin/gdb/libgdb/frame-unwind.diff b/gnu/usr.bin/gdb/libgdb/frame-unwind.diff deleted file mode 100644 index 8143c55..0000000 --- a/gnu/usr.bin/gdb/libgdb/frame-unwind.diff +++ /dev/null @@ -1,27 +0,0 @@ -$FreeBSD$ - -Index: frame-unwind.c -=================================================================== -RCS file: /home/ncvs/src/contrib/gdb/gdb/frame-unwind.c,v -retrieving revision 1.1.1.1 -diff -u -r1.1.1.1 frame-unwind.c ---- frame-unwind.c 20 Jun 2004 18:16:58 -0000 1.1.1.1 -+++ frame-unwind.c 10 Sep 2005 06:36:55 -0000 -@@ -27,6 +27,8 @@ - - static struct gdbarch_data *frame_unwind_data; - -+frame_unwind_sniffer_ftype *kgdb_sniffer_kluge; -+ - struct frame_unwind_table - { - frame_unwind_sniffer_ftype **sniffer; -@@ -49,6 +51,8 @@ - { - struct frame_unwind_table *table = XCALLOC (1, struct frame_unwind_table); - append_predicate (table, dummy_frame_sniffer); -+ if (kgdb_sniffer_kluge != NULL) -+ append_predicate (table, kgdb_sniffer_kluge); - return table; - } - diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c index 920d139..faf4f0b 100644 --- a/gnu/usr.bin/grep/dfa.c +++ b/gnu/usr.bin/grep/dfa.c @@ -334,9 +334,10 @@ static int hard_LC_COLLATE; /* Nonzero if LC_COLLATE is hard. */ #ifdef MBS_SUPPORT /* These variables are used only if (MB_CUR_MAX > 1). */ static mbstate_t mbs; /* Mbstate for mbrlen(). */ -static int cur_mb_len; /* Byte length of the current scanning - multibyte character. */ -static int cur_mb_index; /* Byte index of the current scanning multibyte +static ssize_t cur_mb_len; /* Byte length of the current scanning + multibyte character. Must also handle + negative result from mbrlen(). */ +static ssize_t cur_mb_index; /* Byte index of the current scanning multibyte character. singlebyte character : cur_mb_index = 0 @@ -369,7 +370,7 @@ static unsigned char const *buf_end; /* refference to end in dfaexec(). */ /* This function update cur_mb_len, and cur_mb_index. p points current lexptr, len is the remaining buffer length. */ static void -update_mb_len_index (unsigned char const *p, int len) +update_mb_len_index (unsigned char const *p, size_t len) { /* If last character is a part of a multibyte character, we update cur_mb_index. */ @@ -2463,7 +2464,7 @@ match_mb_charset (struct dfa *d, int s, position pos, int index) int match; /* Flag which represent that matching succeed. */ int match_len; /* Length of the character (or collating element) with which this operator match. */ - int op_len; /* Length of the operator. */ + size_t op_len; /* Length of the operator. */ char buffer[128]; wchar_t wcbuf[6]; diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c index 0cab4a1..61d1bf1 100644 --- a/gnu/usr.bin/grep/grep.c +++ b/gnu/usr.bin/grep/grep.c @@ -301,14 +301,16 @@ reset (int fd, char const *file, struct stats *stats) error (0, errno, "fstat"); return 0; } - if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) - return 0; + if (fd != STDIN_FILENO) { + if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) + return 0; #ifndef DJGPP - if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) || S_ISFIFO(stats->stat.st_mode))) + if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode) || S_ISSOCK(stats->stat.st_mode) || S_ISFIFO(stats->stat.st_mode))) #else - if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) + if (devices == SKIP_DEVICES && (S_ISCHR(stats->stat.st_mode) || S_ISBLK(stats->stat.st_mode))) #endif - return 0; + return 0; + } if ( BZflag || #if HAVE_LIBZ > 0 @@ -1350,9 +1352,9 @@ int main (int argc, char **argv) { char *keys; - size_t keycc, oldcc, keyalloc; + size_t cc, keycc, oldcc, keyalloc; int with_filenames; - int opt, cc, status; + int opt, status; int default_context; FILE *fp; extern char *optarg; diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c index 982d2c5..96ee181 100644 --- a/gnu/usr.bin/grep/search.c +++ b/gnu/usr.bin/grep/search.c @@ -112,7 +112,7 @@ static void kwsinit (void) { static char trans[NCHAR]; - int i; + size_t i; if (match_icase) for (i = 0; i < NCHAR; ++i) @@ -326,7 +326,8 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) { register char const *buflim, *beg, *end; char eol = eolbyte; - int backref, start, len; + int backref; + ptrdiff_t start, len; struct kwsmatch kwsm; size_t i, ret_val; static int use_dfa; diff --git a/gnu/usr.bin/groff/tmac/mdoc.local b/gnu/usr.bin/groff/tmac/mdoc.local index 343209e..b11f4b6 100644 --- a/gnu/usr.bin/groff/tmac/mdoc.local +++ b/gnu/usr.bin/groff/tmac/mdoc.local @@ -52,6 +52,7 @@ .ds doc-operating-system-FreeBSD-8.3 8.3 .ds doc-operating-system-FreeBSD-8.4 8.4 .ds doc-operating-system-FreeBSD-9.1 9.1 +.ds doc-operating-system-FreeBSD-9.2 9.2 .ds doc-operating-system-FreeBSD-10.0 10.0 . .\" Definitions not (yet) in doc-syms diff --git a/gnu/usr.bin/patch/EXTERN.h b/gnu/usr.bin/patch/EXTERN.h deleted file mode 100644 index 374c94f..0000000 --- a/gnu/usr.bin/patch/EXTERN.h +++ /dev/null @@ -1,21 +0,0 @@ -/* $FreeBSD$ - * - * $Log: EXTERN.h,v $ - * Revision 2.0 86/09/17 15:35:37 lwall - * Baseline for netwide release. - * - */ - -#ifdef EXT -#undef EXT -#endif -#define EXT extern - -#ifdef INIT -#undef INIT -#endif -#define INIT(x) - -#ifdef DOINIT -#undef DOINIT -#endif diff --git a/gnu/usr.bin/patch/INTERN.h b/gnu/usr.bin/patch/INTERN.h deleted file mode 100644 index c04e722..0000000 --- a/gnu/usr.bin/patch/INTERN.h +++ /dev/null @@ -1,19 +0,0 @@ -/* $FreeBSD$ - * - * $Log: INTERN.h,v $ - * Revision 2.0 86/09/17 15:35:58 lwall - * Baseline for netwide release. - * - */ - -#ifdef EXT -#undef EXT -#endif -#define EXT - -#ifdef INIT -#undef INIT -#endif -#define INIT(x) = x - -#define DOINIT diff --git a/gnu/usr.bin/patch/Makefile b/gnu/usr.bin/patch/Makefile deleted file mode 100644 index 76d28e8..0000000 --- a/gnu/usr.bin/patch/Makefile +++ /dev/null @@ -1,18 +0,0 @@ -# $FreeBSD$ - -.include <bsd.own.mk> - -.if ${MK_BSD_PATCH} == "yes" -PROG= gnupatch -CLEANFILES+= gnupatch.1 - -gnupatch.1: patch.1 - cp ${.ALLSRC} ${.TARGET} -.else -PROG= patch -.endif - -SRCS= backupfile.c inp.c patch.c pch.c util.c version.c -CFLAGS+=-DHAVE_CONFIG_H - -.include <bsd.prog.mk> diff --git a/gnu/usr.bin/patch/Makefile.depend b/gnu/usr.bin/patch/Makefile.depend deleted file mode 100644 index 639d45d..0000000 --- a/gnu/usr.bin/patch/Makefile.depend +++ /dev/null @@ -1,14 +0,0 @@ -# Autogenerated - do NOT edit! - -DEP_RELDIR := ${_PARSEDIR:S,${SRCTOP}/,,} - -DIRDEPS = \ - include \ - include/xlocale \ - - -.include <dirdeps.mk> - -.if ${DEP_RELDIR} == ${_DEP_RELDIR} -# local dependencies - needed for -jN in clean tree -.endif diff --git a/gnu/usr.bin/patch/backupfile.c b/gnu/usr.bin/patch/backupfile.c deleted file mode 100644 index 94a615d..0000000 --- a/gnu/usr.bin/patch/backupfile.c +++ /dev/null @@ -1,390 +0,0 @@ -/* backupfile.c -- make Emacs style backup file names - Copyright (C) 1990, 1991, 1992 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. - Some algorithms adapted from GNU Emacs. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "config.h" -#include <stdio.h> -#include <ctype.h> -#include <sys/types.h> -#include "backupfile.h" -#ifdef STDC_HEADERS -#include <string.h> -#include <stdlib.h> -#else -char *malloc (); -#endif - -#if defined (HAVE_UNISTD_H) -#include <unistd.h> -#endif - -#if defined(DIRENT) || defined(_POSIX_VERSION) -#include <dirent.h> -#define NLENGTH(direct) (strlen((direct)->d_name)) -#else /* not (DIRENT or _POSIX_VERSION) */ -#define dirent direct -#define NLENGTH(direct) ((direct)->d_namlen) -#ifdef SYSNDIR -#include <sys/ndir.h> -#endif /* SYSNDIR */ -#ifdef SYSDIR -#include <sys/dir.h> -#endif /* SYSDIR */ -#ifdef NDIR -#include <ndir.h> -#endif /* NDIR */ -#endif /* DIRENT or _POSIX_VERSION */ - -#ifndef isascii -#define ISDIGIT(c) (isdigit ((unsigned char) (c))) -#else -#define ISDIGIT(c) (isascii (c) && isdigit (c)) -#endif - -#if defined (_POSIX_VERSION) -/* POSIX does not require that the d_ino field be present, and some - systems do not provide it. */ -#define REAL_DIR_ENTRY(dp) 1 -#else -#define REAL_DIR_ENTRY(dp) ((dp)->d_ino != 0) -#endif - -/* Which type of backup file names are generated. */ -enum backup_type backup_type = none; - -/* The extension added to file names to produce a simple (as opposed - to numbered) backup file name. */ -char *simple_backup_suffix = "~"; - -int argmatch(char *_arg, const char **_optlist); -const char *basename(const char *_name); -char *dirname(const char *_path); -static char *concat(const char *_str1, const char *_str2); -char *find_backup_file_name(char *_file); -static char *make_version_name (char *_file, int _version); -static int max_backup_version(char *_file, char *_dir); -static int version_number(char *base, char *backup, int base_length); -void invalid_arg(const char *_kind, char *_value, int _problem); - -/* Return NAME with any leading path stripped off. */ - -const char * -basename(const char *name) -{ - const char *r = name, *p = name; - - while (*p) - if (*p++ == '/') - r = p; - return r; -} - -#ifndef NODIR -/* Return the name of the new backup file for file FILE, - allocated with malloc. Return 0 if out of memory. - FILE must not end with a '/' unless it is the root directory. - Do not call this function if backup_type == none. */ - -char * -find_backup_file_name(char *file) -{ - char *dir; - char *base_versions; - int highest_backup; - - if (backup_type == simple) - { - char *s = malloc (strlen (file) + strlen (simple_backup_suffix) + 1); - strcpy (s, file); - addext (s, simple_backup_suffix, '~'); - return s; - } - base_versions = concat (basename (file), ".~"); - if (base_versions == 0) - return 0; - dir = dirname (file); - if (dir == 0) - { - free (base_versions); - return 0; - } - highest_backup = max_backup_version (base_versions, dir); - free (base_versions); - free (dir); - if (backup_type == numbered_existing && highest_backup == 0) - return concat (file, simple_backup_suffix); - return make_version_name (file, highest_backup + 1); -} - -/* Return the number of the highest-numbered backup file for file - FILE in directory DIR. If there are no numbered backups - of FILE in DIR, or an error occurs reading DIR, return 0. - FILE should already have ".~" appended to it. */ - -static int -max_backup_version(char *file, char *dir) -{ - DIR *dirp; - struct dirent *dp; - int highest_version; - int this_version; - int file_name_length; - - dirp = opendir (dir); - if (!dirp) - return 0; - - highest_version = 0; - file_name_length = strlen (file); - - while ((dp = readdir (dirp)) != 0) - { - if (!REAL_DIR_ENTRY (dp) || NLENGTH (dp) <= file_name_length) - continue; - - this_version = version_number (file, dp->d_name, file_name_length); - if (this_version > highest_version) - highest_version = this_version; - } - closedir (dirp); - return highest_version; -} - -/* Return a string, allocated with malloc, containing - "FILE.~VERSION~". Return 0 if out of memory. */ - -static char * -make_version_name(char *file, int version) -{ - char *backup_name; - - backup_name = malloc (strlen (file) + 16); - if (backup_name == 0) - return 0; - sprintf (backup_name, "%s.~%d~", file, version); - return backup_name; -} - -/* If BACKUP is a numbered backup of BASE, return its version number; - otherwise return 0. BASE_LENGTH is the length of BASE. - BASE should already have ".~" appended to it. */ - -static int -version_number(char *base, char *backup, int base_length) -{ - int version; - char *p; - - version = 0; - if (!strncmp (base, backup, base_length) && ISDIGIT (backup[base_length])) - { - for (p = &backup[base_length]; ISDIGIT (*p); ++p) - version = version * 10 + *p - '0'; - if (p[0] != '~' || p[1]) - version = 0; - } - return version; -} - -/* Return the newly-allocated concatenation of STR1 and STR2. - If out of memory, return 0. */ - -static char * -concat(const char *str1, const char *str2) -{ - char *newstr; - int str1_length = strlen (str1); - - newstr = malloc (str1_length + strlen (str2) + 1); - if (newstr == 0) - return 0; - strcpy (newstr, str1); - strcpy (newstr + str1_length, str2); - return newstr; -} - -/* Return the leading directories part of PATH, - allocated with malloc. If out of memory, return 0. - Assumes that trailing slashes have already been - removed. */ - -char * -dirname(const char *path) -{ - char *newpath; - const char *slash; - int length; /* Length of result, not including NUL. */ - - slash = basename (path); - if (slash == path) - { - /* File is in the current directory. */ - path = "."; - length = 1; - } - else - { - /* Remove any trailing slashes from result. */ - while (*--slash == '/' && slash > path) - ; - - length = slash - path + 1; - } - newpath = malloc (length + 1); - if (newpath == 0) - return 0; - strncpy (newpath, path, length); - newpath[length] = 0; - return newpath; -} - -/* If ARG is an unambiguous match for an element of the - null-terminated array OPTLIST, return the index in OPTLIST - of the matched element, else -1 if it does not match any element - or -2 if it is ambiguous (is a prefix of more than one element). */ - -int -argmatch(char *arg, const char **optlist) -{ - int i; /* Temporary index in OPTLIST. */ - int arglen; /* Length of ARG. */ - int matchind = -1; /* Index of first nonexact match. */ - int ambiguous = 0; /* If nonzero, multiple nonexact match(es). */ - - arglen = strlen (arg); - - /* Test all elements for either exact match or abbreviated matches. */ - for (i = 0; optlist[i]; i++) - { - if (!strncmp (optlist[i], arg, arglen)) - { - if (strlen (optlist[i]) == arglen) - /* Exact match found. */ - return i; - else if (matchind == -1) - /* First nonexact match found. */ - matchind = i; - else - /* Second nonexact match found. */ - ambiguous = 1; - } - } - if (ambiguous) - return -2; - else - return matchind; -} - -/* Error reporting for argmatch. - KIND is a description of the type of entity that was being matched. - VALUE is the invalid value that was given. - PROBLEM is the return value from argmatch. */ - -void -invalid_arg(const char *kind, char *value, int problem) -{ - fprintf (stderr, "patch: "); - if (problem == -1) - fprintf (stderr, "invalid"); - else /* Assume -2. */ - fprintf (stderr, "ambiguous"); - fprintf (stderr, " %s `%s'\n", kind, value); -} - -static const char *backup_args[] = -{ - "never", "simple", "nil", "existing", "t", "numbered", 0 -}; - -static enum backup_type backup_types[] = -{ - simple, simple, numbered_existing, numbered_existing, numbered, numbered -}; - -/* Return the type of backup indicated by VERSION. - Unique abbreviations are accepted. */ - -enum backup_type -get_version(char *version) -{ - int i; - - if (version == 0 || *version == 0) - return numbered_existing; - i = argmatch (version, backup_args); - if (i >= 0) - return backup_types[i]; - invalid_arg ("version control type", version, i); - exit (1); -} -#endif /* NODIR */ - -/* Append to FILENAME the extension EXT, unless the result would be too long, - in which case just append the character E. */ - -void -addext(char *filename, char *ext, int e) -{ - char *s = (char *)(uintptr_t)(const void *)basename (filename); - int slen = strlen (s), extlen = strlen (ext); - long slen_max = -1; - -#if HAVE_PATHCONF && defined (_PC_NAME_MAX) -#ifndef _POSIX_NAME_MAX -#define _POSIX_NAME_MAX 14 -#endif - if (slen + extlen <= _POSIX_NAME_MAX) - /* The file name is so short there's no need to call pathconf. */ - slen_max = _POSIX_NAME_MAX; - else if (s == filename) - slen_max = pathconf (".", _PC_NAME_MAX); - else - { - char c = *s; - *s = 0; - slen_max = pathconf (filename, _PC_NAME_MAX); - *s = c; - } -#endif - if (slen_max == -1) { -#ifdef HAVE_LONG_FILE_NAMES - slen_max = 255; -#else - slen_max = 14; -#endif - } - if (slen + extlen <= slen_max) - strcpy (s + slen, ext); - else - { - if (slen_max <= slen) { - /* Try to preserve difference between .h .c etc. */ - if (slen == slen_max && s[slen - 2] == '.') - s[slen - 2] = s[slen - 1]; - - slen = slen_max - 1; - } - s[slen] = e; - s[slen + 1] = 0; - } -} diff --git a/gnu/usr.bin/patch/backupfile.h b/gnu/usr.bin/patch/backupfile.h deleted file mode 100644 index 7600714..0000000 --- a/gnu/usr.bin/patch/backupfile.h +++ /dev/null @@ -1,44 +0,0 @@ -/* backupfile.h -- declarations for making Emacs style backup file names - Copyright (C) 1990, 1991, 1992 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -/* - * $FreeBSD$ - */ - -/* When to make backup files. */ -enum backup_type -{ - /* Never make backups. */ - none, - - /* Make simple backups of every file. */ - simple, - - /* Make numbered backups of files that already have numbered backups, - and simple backups of the others. */ - numbered_existing, - - /* Make numbered backups of every file. */ - numbered -}; - -extern enum backup_type backup_type; -extern char *simple_backup_suffix; - -char *find_backup_file_name (char *_file); -enum backup_type get_version(char *_version); -void addext(char *_filename, char *_ext, int _e); diff --git a/gnu/usr.bin/patch/common.h b/gnu/usr.bin/patch/common.h deleted file mode 100644 index aa19105..0000000 --- a/gnu/usr.bin/patch/common.h +++ /dev/null @@ -1,195 +0,0 @@ -/* $FreeBSD$ - * - * $Log: common.h,v $ - * Revision 2.0.1.2 88/06/22 20:44:53 lwall - * patch12: sprintf was declared wrong - * - * Revision 2.0.1.1 88/06/03 15:01:56 lwall - * patch10: support for shorter extensions. - * - * Revision 2.0 86/09/17 15:36:39 lwall - * Baseline for netwide release. - * - */ - -#define DEBUGGING - -#define VOIDUSED 7 -#include "config.h" - -/* shut lint up about the following when return value ignored */ - -#define Signal (void)signal -#define Unlink (void)unlink -#define Lseek (void)lseek -#define Fseek (void)fseek -#define Fstat (void)fstat -#define Pclose (void)pclose -#define Close (void)close -#define Fclose (void)fclose -#define Fflush (void)fflush -#define Sprintf (void)sprintf -#define Snprintf (void)snprintf -#define Mktemp (void)mktemp -#define Strcpy (void)strcpy -#define Strcat (void)strcat -#define Strlcpy (void)strlcpy -#define Strncpy (void)strncpy -#define Strlcat (void)strlcat - -/* NeXT declares malloc and realloc incompatibly from us in some of - these files. Temporarily redefine them to prevent errors. */ -#define malloc system_malloc -#define realloc system_realloc -#include <stdio.h> -#include <assert.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <ctype.h> -#include <signal.h> -#undef malloc -#undef realloc - -/* constants */ - -/* AIX predefines these. */ -#ifdef TRUE -#undef TRUE -#endif -#ifdef FALSE -#undef FALSE -#endif -#define TRUE (1) -#define FALSE (0) - -#define MAXHUNKSIZE 200000 /* is this enough lines? */ -#define INITHUNKMAX 125 /* initial dynamic allocation size */ -#define INITLINELEN 4096 -#define BUFFERSIZE 4096 - -#define SCCSPREFIX "s." -#define GET "get %s" -#define GET_LOCKED "get -e %s" -#define SCCSDIFF "get -p %s | diff - %s >/dev/null" - -#define RCSSUFFIX ",v" -#define CHECKOUT "co %s" -#define CHECKOUT_LOCKED "co -l %s" -#define RCSDIFF "rcsdiff %s > /dev/null" - -/* handy definitions */ - -#define Null(t) ((t)0) -#define Nullch Null(char *) -#define Nullfp Null(FILE *) -#define Nulline Null(LINENUM) - -#define Ctl(ch) ((ch) & 037) - -#define strNE(s1,s2) (strcmp(s1, s2)) -#define strEQ(s1,s2) (!strcmp(s1, s2)) -#define strnNE(s1,s2,l) (strncmp(s1, s2, l)) -#define strnEQ(s1,s2,l) (!strncmp(s1, s2, l)) - -/* typedefs */ - -typedef char bool; -typedef long LINENUM; /* must be signed */ -typedef unsigned MEM; /* what to feed malloc */ - -/* globals */ - -EXT int Argc; /* guess */ -EXT char **Argv; -EXT int optind_last; /* for restarting plan_b */ - -EXT struct stat filestat; /* file statistics area */ -EXT int filemode INIT(0644); - -EXT char *buf; /* general purpose buffer */ -EXT size_t buf_size; /* size of the general purpose buffer */ -EXT FILE *ofp INIT(Nullfp); /* output file pointer */ -EXT FILE *rejfp INIT(Nullfp); /* reject file pointer */ - -EXT int myuid; /* cache getuid return value */ - -EXT bool using_plan_a INIT(TRUE); /* try to keep everything in memory */ -EXT bool out_of_mem INIT(FALSE); /* ran out of memory in plan a */ - -#define MAXFILEC 2 -EXT int filec INIT(0); /* how many file arguments? */ -EXT char *filearg[MAXFILEC]; -EXT bool ok_to_create_file INIT(FALSE); -EXT char *bestguess INIT(Nullch); /* guess at correct filename */ - -EXT char *outname INIT(Nullch); -EXT char rejname[128]; - -EXT char *origprae INIT(Nullch); - -EXT char *TMPOUTNAME; -EXT char *TMPINNAME; -EXT char *TMPREJNAME; -EXT char *TMPPATNAME; -EXT bool toutkeep INIT(FALSE); -EXT bool trejkeep INIT(FALSE); - -EXT LINENUM last_offset INIT(0); -#ifdef DEBUGGING -EXT int debug INIT(0); -#endif -EXT LINENUM maxfuzz INIT(2); -EXT bool force INIT(FALSE); -EXT bool batch INIT(FALSE); -EXT bool verbose INIT(TRUE); -EXT bool reverse INIT(FALSE); -EXT bool noreverse INIT(FALSE); -EXT bool skip_rest_of_patch INIT(FALSE); -EXT int strippath INIT(957); -EXT bool canonicalize INIT(FALSE); - -#define CONTEXT_DIFF 1 -#define NORMAL_DIFF 2 -#define ED_DIFF 3 -#define NEW_CONTEXT_DIFF 4 -#define UNI_DIFF 5 -EXT int diff_type INIT(0); - -EXT bool do_defines INIT(FALSE); /* patch using ifdef, ifndef, etc. */ -EXT char if_defined[128]; /* #ifdef xyzzy */ -EXT char not_defined[128]; /* #ifndef xyzzy */ -EXT char else_defined[] INIT("#else\n");/* #else */ -EXT char end_defined[128]; /* #endif xyzzy */ - -EXT char *revision INIT(Nullch); /* prerequisite revision, if any */ - -#include <errno.h> -#ifdef STDC_HEADERS -#include <stdlib.h> -#include <string.h> -#else -extern int errno; -FILE *popen(); -char *malloc(); -char *realloc(); -long atol(); -char *getenv(); -char *strcpy(); -char *strcat(); -#endif -char *mktemp(char *); -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#else -long lseek(); -#endif -#if defined(_POSIX_VERSION) || defined(HAVE_FCNTL_H) -#include <fcntl.h> -#endif - -#if !defined(S_ISDIR) && defined(S_IFDIR) -#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) -#endif -#if !defined(S_ISREG) && defined(S_IFREG) -#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) -#endif diff --git a/gnu/usr.bin/patch/config.h b/gnu/usr.bin/patch/config.h deleted file mode 100644 index 177a8c5..0000000 --- a/gnu/usr.bin/patch/config.h +++ /dev/null @@ -1,81 +0,0 @@ -/* config.h. Generated automatically by configure. */ -/* Portability variables. -*- C -*- */ - -/* Define if the system does not support the `const' keyword. */ -/* #undef const */ - -/* Define if the system supports file names longer than 14 characters. */ -#define HAVE_LONG_FILE_NAMES - -/* Define if the system has pathconf(). */ -/* #undef HAVE_PATHCONF */ - -/* Define if the system has strerror(). */ -#define HAVE_STRERROR 1 - -/* Define if the system has ANSI C header files and library functions. */ -#define STDC_HEADERS - -/* Define if the system uses strchr instead of index - and strrchr instead of rindex. */ -#define HAVE_STRING_H 1 - -#if defined(STDC_HEADERS) || defined(HAVE_STRING_H) -#define index strchr -#define rindex strrchr -#endif - -/* Define if the system has unistd.h. */ -#define HAVE_UNISTD_H 1 - -/* Define if the system has fcntl.h. */ -#define HAVE_FCNTL_H 1 - -/* Define as either int or void -- the type that signal handlers return. */ -#define RETSIGTYPE void - -#ifndef RETSIGTYPE -#define RETSIGTYPE void -#endif - -/* Which directory library header to use. */ -#define DIRENT 1 /* dirent.h */ -/* #undef SYSNDIR */ /* sys/ndir.h */ -/* #undef SYSDIR */ /* sys/dir.h */ -/* #undef NDIR */ /* ndir.h */ -/* #undef NODIR */ /* none -- don't make numbered backup files */ - -/* Define if the system lets you pass fewer arguments to a function - than the function actually accepts (in the absence of a prototype). - Defining it makes I/O calls slightly more efficient. - You need not bother defining it unless your C preprocessor chokes on - multi-line arguments to macros. */ -/* #undef CANVARARG */ - -/* Define Reg* as either `register' or nothing, depending on whether - the C compiler pays attention to this many register declarations. - The intent is that you don't have to order your register declarations - in the order of importance, so you can freely declare register variables - in sub-blocks of code and as function parameters. - Do not use Reg<n> more than once per routine. - - These don't really matter a lot, since most modern C compilers ignore - register declarations and often do a better job of allocating - registers than people do. */ - -#define Reg1 register -#define Reg2 register -#define Reg3 register -#define Reg4 register -#define Reg5 register -#define Reg6 register -#define Reg7 -#define Reg8 -#define Reg9 -#define Reg10 -#define Reg11 -#define Reg12 -#define Reg13 -#define Reg14 -#define Reg15 -#define Reg16 diff --git a/gnu/usr.bin/patch/inp.c b/gnu/usr.bin/patch/inp.c deleted file mode 100644 index 17f8adb..0000000 --- a/gnu/usr.bin/patch/inp.c +++ /dev/null @@ -1,397 +0,0 @@ -/* $FreeBSD$ - * - * $Log: inp.c,v $ - * Revision 2.0.1.1 88/06/03 15:06:13 lwall - * patch10: made a little smarter about sccs files - * - * Revision 2.0 86/09/17 15:37:02 lwall - * Baseline for netwide release. - * - */ - -#include "EXTERN.h" -#include "common.h" -#include "util.h" -#include "pch.h" -#include "INTERN.h" -#include "inp.h" - -/* Input-file-with-indexable-lines abstract type. */ - -static long i_size; /* Size of the input file */ -static char *i_womp; /* Plan a buffer for entire file */ -static char **i_ptr; /* Pointers to lines in i_womp */ - -static int tifd = -1; /* Plan b virtual string array */ -static char *tibuf[2]; /* Plan b buffers */ -static LINENUM tiline[2] = {-1, -1}; /* 1st line in each buffer */ -static LINENUM lines_per_buf; /* How many lines per buffer */ -static int tireclen; /* Length of records in tmp file */ - -/* - * New patch--prepare to edit another file. - */ -void -re_input(void) -{ - if (using_plan_a) { - i_size = 0; -#ifndef lint - if (i_ptr != Null(char**)) - free((char *)i_ptr); -#endif - if (i_womp != Nullch) - free(i_womp); - i_womp = Nullch; - i_ptr = Null(char **); - } else { - using_plan_a = TRUE; /* maybe the next one is smaller */ - Close(tifd); - tifd = -1; - free(tibuf[0]); - free(tibuf[1]); - tibuf[0] = tibuf[1] = Nullch; - tiline[0] = tiline[1] = -1; - tireclen = 0; - } -} - -/* - * Constuct the line index, somehow or other. - */ -void -scan_input(char *filename) -{ - if (!plan_a(filename)) - plan_b(filename); - if (verbose) { - say3("Patching file %s using Plan %s...\n", filename, - (using_plan_a ? "A" : "B") ); - } -} - -/* - * Try keeping everything in memory. - */ -bool -plan_a(char *filename) -{ - int ifd, statfailed; - Reg1 char *s; - Reg2 LINENUM iline; - char lbuf[INITLINELEN]; - int output_elsewhere = strcmp(filename, outname); - extern int check_patch; - - statfailed = stat(filename, &filestat); - if (statfailed && ok_to_create_file) { - if (verbose) - say2("(Creating file %s...)\n",filename); - if (check_patch) - return TRUE; - makedirs(filename, TRUE); - close(creat(filename, 0666)); - statfailed = stat(filename, &filestat); - } - if (statfailed && check_patch) { - fatal2("%s not found and in check_patch mode.", filename); - } - /* - * For nonexistent or read-only files, look for RCS or SCCS - * versions. - */ - if (statfailed || - (! output_elsewhere && - (/* No one can write to it. */ - (filestat.st_mode & 0222) == 0 || - /* I can't write to it. */ - ((filestat.st_mode & 0022) == 0 && filestat.st_uid != myuid)))) { - struct stat cstat; - char *cs = Nullch; - char *filebase; - int pathlen; - - filebase = basename(filename); - pathlen = filebase - filename; - - /* - * Put any leading path into `s'. - * Leave room in lbuf for the diff command. - */ - s = lbuf + 20; - strncpy(s, filename, pathlen); - -#define try(f,a1,a2) (Sprintf(s + pathlen, f, a1, a2), stat(s, &cstat) == 0) - if ((try("RCS/%s%s", filebase, RCSSUFFIX) || - try("RCS/%s%s", filebase, "") || - try("%s%s", filebase, RCSSUFFIX)) && - /* - * Check that RCS file is not working file. - * Some hosts don't report file name length errors. - */ - (statfailed || - ((filestat.st_dev ^ cstat.st_dev) | - (filestat.st_ino ^ cstat.st_ino)))) { - Sprintf(buf, output_elsewhere?CHECKOUT:CHECKOUT_LOCKED, filename); - Sprintf(lbuf, RCSDIFF, filename); - cs = "RCS"; - } else if (try("SCCS/%s%s", SCCSPREFIX, filebase) || - try("%s%s", SCCSPREFIX, filebase)) { - Sprintf(buf, output_elsewhere?GET:GET_LOCKED, s); - Sprintf(lbuf, SCCSDIFF, s, filename); - cs = "SCCS"; - } else if (statfailed) - fatal2("can't find %s\n", filename); - /* - * else we can't write to it but it's not under a version - * control system, so just proceed. - */ - if (cs) { - if (!statfailed) { - if ((filestat.st_mode & 0222) != 0) - /* The owner can write to it. */ - fatal3( -"file %s seems to be locked by somebody else under %s\n", - filename, cs); - /* - * It might be checked out unlocked. See if - * it's safe to check out the default version - * locked. - */ - if (verbose) - say3( -"Comparing file %s to default %s version...\n", - filename, cs); - if (system(lbuf)) - fatal3( -"can't check out file %s: differs from default %s version\n", - filename, cs); - } - if (verbose) - say3("Checking out file %s from %s...\n", filename, cs); - if (system(buf) || stat(filename, &filestat)) - fatal3("can't check out file %s from %s\n", - filename, cs); - } - } - filemode = filestat.st_mode; - if (!S_ISREG(filemode)) - fatal2("%s is not a normal file--can't patch\n", filename); - i_size = filestat.st_size; - if (out_of_mem) { - set_hunkmax(); /* make sure dynamic arrays are allocated */ - out_of_mem = FALSE; - return FALSE; /* force plan b because plan a bombed */ - } -#ifdef lint - i_womp = Nullch; -#else - /* - * Lint says this may alloc less than i_size, - * but that's okay, I think. - */ - i_womp = malloc((MEM)(i_size + 2)); -#endif - if (i_womp == Nullch) - return FALSE; - if ((ifd = open(filename, 0)) < 0) - pfatal2("can't open file %s", filename); -#ifndef lint - if (read(ifd, i_womp, (int)i_size) != i_size) { - /* - * This probably means i_size > 15 or 16 bits worth. At - * this point it doesn't matter if i_womp was undersized. - */ - Close(ifd); - free(i_womp); - return FALSE; - } -#endif - Close(ifd); - if (i_size && i_womp[i_size - 1] != '\n') - i_womp[i_size++] = '\n'; - i_womp[i_size] = '\0'; - - /* - * Count the lines in the buffer so we know how many pointers we - * need. - */ - iline = 0; - for (s = i_womp; *s; s++) { - if (*s == '\n') - iline++; - } -#ifdef lint - i_ptr = Null(char**); -#else - i_ptr = (char **)malloc((MEM)((iline + 2) * sizeof(char *))); -#endif - if (i_ptr == Null(char **)) { /* shucks, it was a near thing */ - free((char *)i_womp); - return FALSE; - } - - /* Now scan the buffer and build pointer array. */ - iline = 1; - i_ptr[iline] = i_womp; - for (s = i_womp; *s; s++) { - if (*s == '\n') { - /* These are NOT null terminated. */ - i_ptr[++iline] = s + 1; - } - } - input_lines = iline - 1; - - /* Now check for revision, if any. */ - if (revision != Nullch) { - if (!rev_in_string(i_womp)) { - if (force) { - if (verbose) - say2( -"Warning: this file doesn't appear to be the %s version--patching anyway.\n", - revision); - } else if (batch) { - fatal2( -"this file doesn't appear to be the %s version--aborting.\n", revision); - } else { - (void) ask2( -"This file doesn't appear to be the %s version--patch anyway? [n] ", - revision); - if (*buf != 'y') - fatal1("aborted\n"); - } - } else if (verbose) - say2("Good. This file appears to be the %s version.\n", - revision); - } - - return TRUE; /* Plan a will work. */ -} - -/* - * Keep (virtually) nothing in memory. - */ -void -plan_b(char *filename) -{ - Reg3 FILE *ifp; - Reg1 int i = 0; - Reg2 int maxlen = 1; - Reg4 bool found_revision = (revision == Nullch); - - using_plan_a = FALSE; - if ((ifp = fopen(filename, "r")) == Nullfp) - pfatal2("can't open file %s", filename); - if ((tifd = creat(TMPINNAME, 0666)) < 0) - pfatal2("can't open file %s", TMPINNAME); - while (fgets(buf, buf_size, ifp) != Nullch) { - if (revision != Nullch && !found_revision && rev_in_string(buf)) - found_revision = TRUE; - if ((i = strlen(buf)) > maxlen) - maxlen = i; /* Find longest line. */ - } - if (revision != Nullch) { - if (!found_revision) { - if (force) { - if (verbose) - say2( -"Warning: this file doesn't appear to be the %s version--patching anyway.\n", - revision); - } else if (batch) { - fatal2( -"this file doesn't appear to be the %s version--aborting.\n", revision); - } else { - (void) ask2( -"This file doesn't appear to be the %s version--patch anyway? [n] ", - revision); - if (*buf != 'y') - fatal1("aborted\n"); - } - } else if (verbose) - say2("Good. This file appears to be the %s version.\n", - revision); - } - Fseek(ifp, 0L, 0); /* Rewind file. */ - lines_per_buf = BUFFERSIZE / maxlen; - tireclen = maxlen; - tibuf[0] = malloc((MEM)(BUFFERSIZE + 1)); - tibuf[1] = malloc((MEM)(BUFFERSIZE + 1)); - if (tibuf[1] == Nullch) - fatal1("out of memory\n"); - for (i = 1; ; i++) { - if (! (i % lines_per_buf)) /* New block. */ - if (write(tifd, tibuf[0], BUFFERSIZE) < BUFFERSIZE) - pfatal1("can't write temp file"); - if (fgets(tibuf[0] + maxlen * (i%lines_per_buf), - maxlen + 1, ifp) == Nullch) { - input_lines = i - 1; - if (i % lines_per_buf) - if (write(tifd, tibuf[0], BUFFERSIZE) < - BUFFERSIZE) - pfatal1("can't write temp file"); - break; - } - } - Fclose(ifp); - Close(tifd); - if ((tifd = open(TMPINNAME, 0)) < 0) { - pfatal2("can't reopen file %s", TMPINNAME); - } -} - -/* - * Fetch a line from the input file, \n terminated, not necessarily \0. - */ -char * -ifetch(line,whichbuf) -Reg1 LINENUM line; -int whichbuf; /* ignored when file in memory */ -{ - if (line < 1 || line > input_lines) - return ""; - if (using_plan_a) - return i_ptr[line]; - else { - LINENUM offline = line % lines_per_buf; - LINENUM baseline = line - offline; - - if (tiline[0] == baseline) - whichbuf = 0; - else if (tiline[1] == baseline) - whichbuf = 1; - else { - tiline[whichbuf] = baseline; -#ifndef lint /* complains of long accuracy */ - Lseek(tifd, (long)baseline / lines_per_buf * BUFFERSIZE, 0); -#endif - if (read(tifd, tibuf[whichbuf], BUFFERSIZE) < 0) - pfatal2("error reading tmp file %s", TMPINNAME); - } - return tibuf[whichbuf] + (tireclen * offline); - } -} - -/* - * True if the string argument contains the revision number we want. - */ -bool -rev_in_string(char *string) -{ - Reg1 char *s; - Reg2 int patlen; - - if (revision == Nullch) - return TRUE; - patlen = strlen(revision); - if (strnEQ(string,revision,patlen) && - isspace((unsigned char)string[patlen])) - return TRUE; - for (s = string; *s; s++) { - if (isspace((unsigned char)*s) && - strnEQ(s + 1, revision, patlen) && - isspace((unsigned char)s[patlen + 1] )) { - return TRUE; - } - } - return FALSE; -} diff --git a/gnu/usr.bin/patch/inp.h b/gnu/usr.bin/patch/inp.h deleted file mode 100644 index f1f0386..0000000 --- a/gnu/usr.bin/patch/inp.h +++ /dev/null @@ -1,18 +0,0 @@ -/* $FreeBSD$ - * - * $Log: inp.h,v $ - * Revision 2.0 86/09/17 15:37:25 lwall - * Baseline for netwide release. - * - */ - -EXT LINENUM input_lines INIT(0); /* how long is input file in lines */ -EXT LINENUM last_frozen_line INIT(0); /* how many input lines have been */ - /* irretractibly output */ -void re_input(void); -void scan_input(char *_filename); -bool plan_a(char *_filename); -void plan_b(char *_filename); -bool rev_in_string(char *_string); -char *ifetch(LINENUM _line, int _whichbuf); - diff --git a/gnu/usr.bin/patch/patch.1 b/gnu/usr.bin/patch/patch.1 deleted file mode 100644 index 3cc7957..0000000 --- a/gnu/usr.bin/patch/patch.1 +++ /dev/null @@ -1,594 +0,0 @@ -.\" -*- nroff -*- -.rn '' }` -'\" $Header: patch.man,v 2.0.1.2 88/06/22 20:47:18 lwall Locked $ -'\" $FreeBSD$ -'\" -'\" $Log: patch.man,v $ -'\" Revision 2.0.1.2 88/06/22 20:47:18 lwall -'\" patch12: now avoids Bell System Logo -'\" -'\" Revision 2.0.1.1 88/06/03 15:12:51 lwall -'\" patch10: -B switch was contributed. -'\" -'\" Revision 2.0 86/09/17 15:39:09 lwall -'\" Baseline for netwide release. -'\" -'\" Revision 1.4 86/08/01 19:23:22 lwall -'\" Documented -v, -p, -F. -'\" Added notes to patch senders. -'\" -'\" Revision 1.3 85/03/26 15:11:06 lwall -'\" Frozen. -'\" -'\" Revision 1.2.1.4 85/03/12 16:14:27 lwall -'\" Documented -p. -'\" -'\" Revision 1.2.1.3 85/03/12 16:09:41 lwall -'\" Documented -D. -'\" -'\" Revision 1.2.1.2 84/12/05 11:06:55 lwall -'\" Added -l switch, and noted bistability bug. -'\" -'\" Revision 1.2.1.1 84/12/04 17:23:39 lwall -'\" Branch for sdcrdcf changes. -'\" -'\" Revision 1.2 84/12/04 17:22:02 lwall -'\" Baseline version. -'\" -.de Sh -.br -.ne 5 -.PP -\fB\\$1\fR -.PP -.. -.de Sp -.if t .sp .5v -.if n .sp -.. -'\" -'\" Set up \*(-- to give an unbreakable dash; -'\" string Tr holds user defined translation string. -'\" Bell System Logo is used as a dummy character. -'\" -.if !d Tr .ds Tr -.ie n \{\ -.tr \(*W-\*(Tr -.ds -- \(*W- -.if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch -.if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch -.ds L" "" -.ds R" "" -.ds L' ' -.ds R' ' -'br \} -.el \{\ -.ds -- \(em\| -.tr \*(Tr -.ds L" `` -.ds R" '' -.ds L' ` -.ds R' ' -'br\} -.TH PATCH 1 LOCAL -.SH NAME -patch - apply a diff file to an original -.SH SYNOPSIS -.B patch -[options] [origfile [patchfile]] [+ [options] [origfile]]... -.sp -but usually just -.sp -.B patch -<patchfile -.SH DESCRIPTION -.I Patch -will take a patch file containing any of the four forms of difference -listing produced by the -.I diff -program and apply those differences to an original file, producing a patched -version. -By default, the patched version is put in place of the original, with -the original file backed up to the same name with the -extension \*(L".orig\*(R" (\*(L"~\*(R" on systems that do not -support long file names), or as specified by the -\fB\-b\fP (\fB\-\-suffix\fP), -\fB\-B\fP (\fB\-\-prefix\fP), -or -\fB\-V\fP (\fB\-\-version\-control\fP) -options. -The extension used for making backup files may also be specified in the -.B SIMPLE_BACKUP_SUFFIX -environment variable, which is overridden by the above options. -.PP -If the backup file already exists, -.B patch -creates a new backup file name by changing the first lowercase letter -in the last component of the file's name into uppercase. If there are -no more lowercase letters in the name, it removes the first character -from the name. It repeats this process until it comes up with a -backup file that does not already exist. -.PP -You may also specify where you want the output to go with a -\fB\-o\fP (\fB\-\-output\fP) -option; if that file already exists, it is backed up first. -.PP -If -.I patchfile -is omitted, or is a hyphen, the patch will be read from standard input. -If a -\fB\-i\fP -argument is specified, the filename following it will be used, instead of -standard input. You may specify only one -\fB\-i\fP -directive. -.PP -Upon startup, patch will attempt to determine the type of the diff listing, -unless over-ruled by a -\fB\-c\fP (\fB\-\-context\fP), -\fB\-e\fP (\fB\-\-ed\fP), -\fB\-n\fP (\fB\-\-normal\fP), -or -\fB\-u\fP (\fB\-\-unified\fP) -option. -Context diffs (old-style, new-style, and unified) and -normal diffs are applied by the -.I patch -program itself, while -.I ed -diffs are simply fed to the -.I ed -editor via a pipe. -.PP -.I Patch -will try to skip any leading garbage, apply the diff, -and then skip any trailing garbage. -Thus you could feed an article or message containing a -diff listing to -.IR patch , -and it should work. -If the entire diff is indented by a consistent amount, -this will be taken into account. -.PP -With context diffs, and to a lesser extent with normal diffs, -.I patch -can detect when the line numbers mentioned in the patch are incorrect, -and will attempt to find the correct place to apply each hunk of the patch. -As a first guess, it takes the line number mentioned for the hunk, plus or -minus any offset used in applying the previous hunk. -If that is not the correct place, -.I patch -will scan both forwards and backwards for a set of lines matching the context -given in the hunk. -First -.I patch -looks for a place where all lines of the context match. -If no such place is found, and it's a context diff, and the maximum fuzz factor -is set to 1 or more, then another scan takes place ignoring the first and last -line of context. -If that fails, and the maximum fuzz factor is set to 2 or more, -the first two and last two lines of context are ignored, -and another scan is made. -(The default maximum fuzz factor is 2.) -If -.I patch -cannot find a place to install that hunk of the patch, it will put the -hunk out to a reject file, which normally is the name of the output file -plus \*(L".rej\*(R" (\*(L"#\*(R" on systems that do not support -long file names). -(Note that the rejected hunk will come out in context diff form whether the -input patch was a context diff or a normal diff. -If the input was a normal diff, many of the contexts will simply be null.) -The line numbers on the hunks in the reject file may be different than -in the patch file: they reflect the approximate location patch thinks the -failed hunks belong in the new file rather than the old one. -.PP -As each hunk is completed, you will be told whether the hunk succeeded or -failed, and which line (in the new file) -.I patch -thought the hunk should go on. -If this is different from the line number specified in the diff you will -be told the offset. -A single large offset MAY be an indication that a hunk was installed in the -wrong place. -You will also be told if a fuzz factor was used to make the match, in which -case you should also be slightly suspicious. -.PP -If no original file is specified on the command line, -.I patch -will try to figure out from the leading garbage what the name of the file -to edit is. -In the header of a context diff, the file name is found from lines beginning -with \*(L"***\*(R" or \*(L"---\*(R", with the shortest name of an existing -file winning. -Only context diffs have lines like that, but if there is an \*(L"Index:\*(R" -line in the leading garbage, -.I patch -will try to use the file name from that line. -The context diff header takes precedence over an Index line. -If no file name can be intuited from the leading garbage, you will be asked -for the name of the file to patch. -.PP -If the original file cannot be found or is read-only, but a suitable -SCCS or RCS file is handy, -.I patch -will attempt to get or check out the file. -.PP -Additionally, if the leading garbage contains a \*(L"Prereq: \*(R" line, -.I patch -will take the first word from the prerequisites line (normally a version -number) and check the input file to see if that word can be found. -If not, -.I patch -will ask for confirmation before proceeding. -.PP -The upshot of all this is that you should be able to say, while in a news -interface, the following: -.Sp - | patch -d /usr/src/local/blurfl -.Sp -and patch a file in the blurfl directory directly from the article containing -the patch. -.PP -If the patch file contains more than one patch, -.I patch -will try to apply each of them as if they came from separate patch files. -This means, among other things, that it is assumed that the name of the file -to patch must be determined for each diff listing, -and that the garbage before each diff listing will -be examined for interesting things such as file names and revision level, as -mentioned previously. -You can give options (and another original file name) for the second and -subsequent patches by separating the corresponding argument lists -by a \*(L'+\*(R'. -(The argument list for a second or subsequent patch may not specify a new -patch file, however.) -.PP -.I Patch -recognizes the following options: -.TP 5 -.B "\-b suff, \-\-suffix=suff" -causes -.B suff -to be interpreted as the backup extension, to be -used in place of \*(L".orig\*(R" or \*(L"~\*(R". -.TP 5 -.B "\-B pref, \-\-prefix=pref" -causes -.B pref -to be interpreted as a prefix to the backup file -name. If this argument is specified, any argument from -.B \-b -will be ignored. -.TP 5 -.B "\-c, \-\-context" -forces -.I patch -to interpret the patch file as a context diff. -.TP 5 -.B "\-C, \-\-check" -see what would happen, but don't do it. -.TP 5 -.B "\-d dir, \-\-directory=dir" -causes -.I patch -to interpret -.B dir -as a directory, and cd to it before doing -anything else. -.TP 5 -.B "\-D sym, \-\-ifdef=sym" -causes -.I patch -to use the "#ifdef...#endif" construct to mark changes. -.B sym -will be used as the differentiating symbol. -.TP 5 -.B "\-e, \-\-ed" -forces -.I patch -to interpret the patch file as an -.I ed -script. -.TP 5 -.B "\-E, \-\-remove\-empty\-files" -causes -.I patch -to remove output files that are empty after the patches have been applied. -.TP 5 -.B "\-f, \-\-force" -forces -.I patch -to assume that the user knows exactly what he or she is doing, and to not -ask any questions. It assumes the following: skip patches for which a -file to patch can't be found; patch files even though they have the -wrong version for the ``Prereq:'' line in the patch; and assume that -patches are not reversed even if they look like they are. -This option does not suppress commentary; use -.B \-s -for that. -.TP 5 -.B "\-t, \-\-batch" -similar to -.BR \-f , -in that it suppresses questions, but makes some different assumptions: -skip patches for which a file to patch can't be found (the same as \fB\-f\fP); -skip patches for which the file has the wrong version for the ``Prereq:'' line -in the patch; and assume that patches are reversed if they look like -they are. -.TP 5 -.B "\-F number, \-\-fuzz=number" -sets the maximum fuzz factor. -This option only applies to context diffs, and causes -.I patch -to ignore up to that many lines in looking for places to install a hunk. -Note that a larger fuzz factor increases the odds of a faulty patch. -The default fuzz factor is 2, and it may not be set to more than -the number of lines of context in the context diff, ordinarily 3. -.TP 5 -.B "\-i patchfile" -tells -.I patch -to apply \fBpatchfile\fP instead of stdin. -.TP 5 -.B "\-I, \-\-index-first" -forces -.I patch -to take ``Index:'' line precedence over context diff header. -The same effect have -.B PATCH_INDEX_FIRST -environment variable if present. -.TP 5 -.B "\-l, \-\-ignore\-whitespace" -causes the pattern matching to be done loosely, in case the tabs and -spaces have been munged in your input file. -Any sequence of whitespace in the pattern line will match any sequence -in the input file. -Normal characters must still match exactly. -Each line of the context must still match a line in the input file. -.TP 5 -.B "\-n, \-\-normal" -forces -.I patch -to interpret the patch file as a normal diff. -.TP 5 -.B "\-N, \-\-forward" -causes -.I patch -to ignore patches that it thinks are reversed or already applied. -See also -.B \-R . -.TP 5 -.B "\-o file, \-\-output=file" -causes -.B file -to be interpreted as the output file name. -.TP 5 -.B "\-p[number], \-\-strip[=number]" -sets the pathname strip count, -which controls how pathnames found in the patch file are treated, in case -you keep your files in a different directory than the person who sent -out the patch. -The strip count specifies how many slashes are to be stripped from -the front of the pathname. -(Any intervening directory names also go away.) -For example, supposing the file name in the patch file was -.sp - /u/howard/src/blurfl/blurfl.c -.sp -setting -.B \-p -or -.B \-p0 -gives the entire pathname unmodified, -.B \-p1 -gives -.sp - u/howard/src/blurfl/blurfl.c -.sp -without the leading slash, -.B \-p4 -gives -.sp - blurfl/blurfl.c -.sp -and not specifying -.B \-p -at all just gives you "blurfl.c", unless all of the directories in the -leading path (u/howard/src/blurfl) exist and that path is relative, -in which case you get the entire pathname unmodified. -Whatever you end up with is looked for either in the current directory, -or the directory specified by the -.B \-d -option. -.TP 5 -.B "\-r file, \-\-reject\-file=file" -causes -.B file -to be interpreted as the reject file name. -.TP 5 -.B "\-R, \-\-reverse" -tells -.I patch -that this patch was created with the old and new files swapped. -(Yes, I'm afraid that does happen occasionally, human nature being what it -is.) -.I Patch -will attempt to swap each hunk around before applying it. -Rejects will come out in the swapped format. -The -.B \-R -option will not work with -.I ed -diff scripts because there is too little -information to reconstruct the reverse operation. -.Sp -If the first hunk of a patch fails, -.I patch -will reverse the hunk to see if it can be applied that way. -If it can, you will be asked if you want to have the -.B \-R -option set. -If it can't, the patch will continue to be applied normally. -(Note: this method cannot detect a reversed patch if it is a normal diff -and if the first command is an append (i.e. it should have been a delete) -since appends always succeed, due to the fact that a null context will match -anywhere. -Luckily, most patches add or change lines rather than delete them, so most -reversed normal diffs will begin with a delete, which will fail, triggering -the heuristic.) -.TP 5 -.B "\-s, \-\-silent, \-\-quiet" -makes -.I patch -do its work silently, unless an error occurs. -.TP 5 -.B "\-S, \-\-skip" -causes -.I patch -to ignore this patch from the patch file, but continue on looking -for the next patch in the file. -Thus -.sp - patch -S + -S + <patchfile -.sp -will ignore the first and second of three patches. -.TP 5 -.B "\-u, \-\-unified" -forces -.I patch -to interpret the patch file as a unified context diff (a unidiff). -.TP 5 -.B "\-v, \-\-version" -causes -.I patch -to print out its revision header and patch level. -.TP 5 -.B "\-V method, \-\-version\-control=method" -causes -.B method -to be interpreted as a method for creating -backup file names. The type of backups made can also be given in the -.B VERSION_CONTROL -environment variable, which is overridden by this option. -The -.B -B -option overrides this option, causing the prefix to always be used for -making backup file names. -The value of the -.B VERSION_CONTROL -environment variable and the argument to the -.B -V -option are like the GNU -Emacs `version-control' variable; they also recognize synonyms that -are more descriptive. The valid values are (unique abbreviations are -accepted): -.RS -.TP -`t' or `numbered' -Always make numbered backups. -.TP -`nil' or `existing' -Make numbered backups of files that already -have them, simple backups of the others. -This is the default. -.TP -`never' or `simple' -Always make simple backups. -.RE -.TP 5 -.B "\-x number, \-\-debug=number" -sets internal debugging flags, and is of interest only to -.I patch -patchers. -.SH AUTHOR -Larry Wall <lwall@netlabs.com> -.br -with many other contributors. -.SH ENVIRONMENT -.TP -.B TMPDIR -Directory to put temporary files in; default is /tmp. -.TP -.B SIMPLE_BACKUP_SUFFIX -Extension to use for backup file names instead of \*(L".orig\*(R" or -\*(L"~\*(R". -.TP -.B VERSION_CONTROL -Selects when numbered backup files are made. -.SH FILES -$TMPDIR/patch* -.SH SEE ALSO -diff(1) -.SH NOTES FOR PATCH SENDERS -There are several things you should bear in mind if you are going to -be sending out patches. -First, you can save people a lot of grief by keeping a patchlevel.h file -which is patched to increment the patch level as the first diff in the -patch file you send out. -If you put a Prereq: line in with the patch, it won't let them apply -patches out of order without some warning. -Second, make sure you've specified the file names right, either in a -context diff header, or with an Index: line. -If you are patching something in a subdirectory, be sure to tell the patch -user to specify a -.B \-p -option as needed. -Third, you can create a file by sending out a diff that compares a -null file to the file you want to create. -This will only work if the file you want to create doesn't exist already in -the target directory. -Fourth, take care not to send out reversed patches, since it makes people wonder -whether they already applied the patch. -Fifth, while you may be able to get away with putting 582 diff listings into -one file, it is probably wiser to group related patches into separate files in -case something goes haywire. -.SH DIAGNOSTICS -Too many to list here, but generally indicative that -.I patch -couldn't parse your patch file. -.PP -The message \*(L"Hmm...\*(R" indicates that there is unprocessed text in -the patch file and that -.I patch -is attempting to intuit whether there is a patch in that text and, if so, -what kind of patch it is. -.PP -.I Patch -will exit with a non-zero status if any reject files were created. -When applying a set of patches in a loop it behooves you to check this -exit status so you don't apply a later patch to a partially patched file. -.SH CAVEATS -.I Patch -cannot tell if the line numbers are off in an -.I ed -script, and can only detect -bad line numbers in a normal diff when it finds a \*(L"change\*(R" or -a \*(L"delete\*(R" command. -A context diff using fuzz factor 3 may have the same problem. -Until a suitable interactive interface is added, you should probably do -a context diff in these cases to see if the changes made sense. -Of course, compiling without errors is a pretty good indication that the patch -worked, but not always. -.PP -.I Patch -usually produces the correct results, even when it has to do a lot of -guessing. -However, the results are guaranteed to be correct only when the patch is -applied to exactly the same version of the file that the patch was -generated from. -.SH BUGS -Could be smarter about partial matches, excessively \&deviant offsets and -swapped code, but that would take an extra pass. -.PP -If code has been duplicated (for instance with #ifdef OLDCODE ... #else ... -#endif), -.I patch -is incapable of patching both versions, and, if it works at all, will likely -patch the wrong one, and tell you that it succeeded to boot. -.PP -If you apply a patch you've already applied, -.I patch -will think it is a reversed patch, and offer to un-apply the patch. -This could be construed as a feature. -.rn }` '' diff --git a/gnu/usr.bin/patch/patch.c b/gnu/usr.bin/patch/patch.c deleted file mode 100644 index fbf7e70..0000000 --- a/gnu/usr.bin/patch/patch.c +++ /dev/null @@ -1,971 +0,0 @@ -char rcsid[] = - "$FreeBSD$"; - -/* patch - a program to apply diffs to original files - * - * Copyright 1986, Larry Wall - * - * This program may be copied as long as you don't try to make any - * money off of it, or pretend that you wrote it. - * - * $Log: patch.c,v $ - * Revision 2.0.2.0 90/05/01 22:17:50 davison - * patch12u: unidiff support added - * - * Revision 2.0.1.6 88/06/22 20:46:39 lwall - * patch12: rindex() wasn't declared - * - * Revision 2.0.1.5 88/06/03 15:09:37 lwall - * patch10: exit code improved. - * patch10: better support for non-flexfilenames. - * - * Revision 2.0.1.4 87/02/16 14:00:04 lwall - * Short replacement caused spurious "Out of sync" message. - * - * Revision 2.0.1.3 87/01/30 22:45:50 lwall - * Improved diagnostic on sync error. - * Moved do_ed_script() to pch.c. - * - * Revision 2.0.1.2 86/11/21 09:39:15 lwall - * Fuzz factor caused offset of installed lines. - * - * Revision 2.0.1.1 86/10/29 13:10:22 lwall - * Backwards search could terminate prematurely. - * - * Revision 2.0 86/09/17 15:37:32 lwall - * Baseline for netwide release. - * - * Revision 1.5 86/08/01 20:53:24 lwall - * Changed some %d's to %ld's. - * Linted. - * - * Revision 1.4 86/08/01 19:17:29 lwall - * Fixes for machines that can't vararg. - * Added fuzz factor. - * Generalized -p. - * General cleanup. - * - * 85/08/15 van%ucbmonet@berkeley - * Changes for 4.3bsd diff -c. - * - * Revision 1.3 85/03/26 15:07:43 lwall - * Frozen. - * - * Revision 1.2.1.9 85/03/12 17:03:35 lwall - * Changed pfp->_file to fileno(pfp). - * - * Revision 1.2.1.8 85/03/12 16:30:43 lwall - * Check i_ptr and i_womp to make sure they aren't null before freeing. - * Also allow ed output to be suppressed. - * - * Revision 1.2.1.7 85/03/12 15:56:13 lwall - * Added -p option from jromine@uci-750a. - * - * Revision 1.2.1.6 85/03/12 12:12:51 lwall - * Now checks for normalness of file to patch. - * - * Revision 1.2.1.5 85/03/12 11:52:12 lwall - * Added -D (#ifdef) option from joe@fluke. - * - * Revision 1.2.1.4 84/12/06 11:14:15 lwall - * Made smarter about SCCS subdirectories. - * - * Revision 1.2.1.3 84/12/05 11:18:43 lwall - * Added -l switch to do loose string comparison. - * - * Revision 1.2.1.2 84/12/04 09:47:13 lwall - * Failed hunk count not reset on multiple patch file. - * - * Revision 1.2.1.1 84/12/04 09:42:37 lwall - * Branch for sdcrdcf changes. - * - * Revision 1.2 84/11/29 13:29:51 lwall - * Linted. Identifiers uniqified. Fixed i_ptr malloc() bug. Fixed - * multiple calls to mktemp(). Will now work on machines that can only - * read 32767 chars. Added -R option for diffs with new and old swapped. - * Various cosmetic changes. - * - * Revision 1.1 84/11/09 17:03:58 lwall - * Initial revision - * - */ - -#include <paths.h> -#include "INTERN.h" -#include "common.h" -#include "EXTERN.h" -#include "version.h" -#include "util.h" -#include "pch.h" -#include "inp.h" -#include "backupfile.h" -#include "getopt.h" - -/* procedures */ - -void reinitialize_almost_everything(void); -void get_some_switches(void); -LINENUM locate_hunk(LINENUM _fuzz); -void abort_hunk(void); -void apply_hunk(LINENUM _where); -void init_output(char *_name); -void init_reject(char *_name); -void copy_till(LINENUM _lastline); -void spew_output(void); -void dump_line(LINENUM _line); -bool patch_match(LINENUM _base, LINENUM _offset, LINENUM _fuzz); -bool similar(char *_a, char *_b, int _len); -void my_exit(int _status); - - -/* TRUE if -E was specified on command line. */ -static int remove_empty_files = FALSE; - -/* TRUE if -R was specified on command line. */ -static int reverse_flag_specified = FALSE; - -/* TRUE if -C was specified on command line. */ -int check_patch = FALSE; - -/* TRUE if -I was specified on command line */ -/* or PATCH_INDEX_FIRST env. variable is set */ -int index_first; - -/* TRUE if -S was specified on command line. */ -int skip_flag_specified = FALSE; - -/* Apply a set of diffs as appropriate. */ - -int -main(argc,argv) -int argc; -char **argv; -{ - LINENUM where; - LINENUM newwhere; - LINENUM fuzz; - LINENUM mymaxfuzz; - int hunk = 0; - int failed = 0; - int failtotal = 0; - bool rev_okayed = 0; - int i; - - setbuf(stderr, serrbuf); - for (i = 0; i<MAXFILEC; i++) - filearg[i] = Nullch; - - buf_size = INITLINELEN; - buf = malloc((MEM)(buf_size)); - if (buf == Nullch) - fatal1("out of memory\n"); - - myuid = getuid(); - - index_first = getenv ("PATCH_INDEX_FIRST") != 0; - - /* Cons up the names of the temporary files. */ - { - /* Directory for temporary files. */ - char *tmpdir; - int tmpname_len; - - tmpdir = getenv ("TMPDIR"); - if (tmpdir == NULL) { - tmpdir = _PATH_TMP; - } - tmpname_len = strlen (tmpdir) + 20; - - TMPOUTNAME = (char *) malloc (tmpname_len); - strcpy (TMPOUTNAME, tmpdir); - strcat (TMPOUTNAME, "/patchoXXXXXX"); - Mktemp(TMPOUTNAME); - - TMPINNAME = (char *) malloc (tmpname_len); - strcpy (TMPINNAME, tmpdir); - strcat (TMPINNAME, "/patchiXXXXXX"); - Mktemp(TMPINNAME); - - TMPREJNAME = (char *) malloc (tmpname_len); - strcpy (TMPREJNAME, tmpdir); - strcat (TMPREJNAME, "/patchrXXXXXX"); - Mktemp(TMPREJNAME); - - TMPPATNAME = (char *) malloc (tmpname_len); - strcpy (TMPPATNAME, tmpdir); - strcat (TMPPATNAME, "/patchpXXXXXX"); - Mktemp(TMPPATNAME); - } - - { - char *v; - - v = getenv ("SIMPLE_BACKUP_SUFFIX"); - if (v) - simple_backup_suffix = v; - else - simple_backup_suffix = ".orig"; -#ifndef NODIR - v = getenv ("VERSION_CONTROL"); - backup_type = get_version (v); /* OK to pass NULL. */ -#endif - } - - /* parse switches */ - Argc = argc; - Argv = argv; - get_some_switches(); - - /* make sure we clean up /tmp in case of disaster */ - set_signals(0); - - for ( - open_patch_file(filearg[1]); - there_is_another_patch(); - reinitialize_almost_everything() - ) { /* for each patch in patch file */ - - if (outname == Nullch) - outname = savestr(filearg[0]); - - /* for ed script just up and do it and exit */ - if (diff_type == ED_DIFF) { - do_ed_script(); - continue; - } - - /* initialize the patched file */ - if (!skip_rest_of_patch) - init_output(TMPOUTNAME); - - /* initialize reject file */ - init_reject(TMPREJNAME); - - /* find out where all the lines are */ - if (!skip_rest_of_patch) - scan_input(filearg[0]); - - /* from here on, open no standard i/o files, because malloc */ - /* might misfire and we can't catch it easily */ - - /* apply each hunk of patch */ - hunk = 0; - failed = 0; - rev_okayed = FALSE; - out_of_mem = FALSE; - while (another_hunk()) { - hunk++; - fuzz = Nulline; - mymaxfuzz = pch_context(); - if (maxfuzz < mymaxfuzz) - mymaxfuzz = maxfuzz; - if (!skip_rest_of_patch) { - do { - where = locate_hunk(fuzz); - if (hunk == 1 && where == Nulline && !(force|rev_okayed)) { - /* dwim for reversed patch? */ - if (!pch_swap()) { - if (fuzz == Nulline) - say1( -"Not enough memory to try swapped hunk! Assuming unswapped.\n"); - continue; - } - reverse = !reverse; - where = locate_hunk(fuzz); /* try again */ - if (where == Nulline) { /* didn't find it swapped */ - if (!pch_swap()) /* put it back to normal */ - fatal1("lost hunk on alloc error!\n"); - reverse = !reverse; - } - else if (noreverse) { - if (!pch_swap()) /* put it back to normal */ - fatal1("lost hunk on alloc error!\n"); - reverse = !reverse; - say1( -"Ignoring previously applied (or reversed) patch.\n"); - skip_rest_of_patch = TRUE; - } - else if (batch) { - if (verbose) - say3( -"%seversed (or previously applied) patch detected! %s -R.", - reverse ? "R" : "Unr", - reverse ? "Assuming" : "Ignoring"); - } - else { - (void) ask3( -"%seversed (or previously applied) patch detected! %s -R? [y] ", - reverse ? "R" : "Unr", - reverse ? "Assume" : "Ignore"); - if (*buf == 'n') { - (void) ask1("Apply anyway? [n] "); - if (*buf == 'y') - rev_okayed = TRUE; - else - skip_rest_of_patch = TRUE; - where = Nulline; - reverse = !reverse; - if (!pch_swap()) /* put it back to normal */ - fatal1("lost hunk on alloc error!\n"); - } - } - } - } while (!skip_rest_of_patch && where == Nulline && - ++fuzz <= mymaxfuzz); - - if (skip_rest_of_patch) { /* just got decided */ - Fclose(ofp); - ofp = Nullfp; - } - } - - newwhere = pch_newfirst() + last_offset; - if (skip_rest_of_patch) { - abort_hunk(); - if (! skip_flag_specified) - failed++; - if (verbose) - say3("Hunk #%d ignored at %ld.\n", hunk, newwhere); - } - else if (where == Nulline) { - abort_hunk(); - failed++; - if (verbose) - say3("Hunk #%d failed at %ld.\n", hunk, newwhere); - } - else { - apply_hunk(where); - if (verbose) { - say3("Hunk #%d succeeded at %ld", hunk, newwhere); - if (fuzz) - say2(" with fuzz %ld", fuzz); - if (last_offset) - say3(" (offset %ld line%s)", - last_offset, last_offset==1L?"":"s"); - say1(".\n"); - } - } - } - - if (out_of_mem && using_plan_a) { - optind = optind_last; - say1("\n\nRan out of memory using Plan A--trying again...\n\n"); - if (ofp) - Fclose(ofp); - ofp = Nullfp; - if (rejfp) - Fclose(rejfp); - rejfp = Nullfp; - continue; - } - - assert(hunk); - - /* finish spewing out the new file */ - if (!skip_rest_of_patch) - spew_output(); - - /* and put the output where desired */ - ignore_signals(); - if (!skip_rest_of_patch) { - struct stat statbuf; - char *realout = outname; - - if (check_patch) { - ; - } else if (move_file(TMPOUTNAME, outname) < 0) { - toutkeep = TRUE; - realout = TMPOUTNAME; - chmod(TMPOUTNAME, filemode); - } - else - chmod(outname, filemode); - - if (remove_empty_files && stat(realout, &statbuf) == 0 - && statbuf.st_size == 0) { - if (verbose) - say2("Removing %s (empty after patching).\n", realout); - while (unlink(realout) >= 0) ; /* while is for Eunice. */ - } - } - Fclose(rejfp); - rejfp = Nullfp; - if (failed) { - failtotal += failed; - if (!*rejname) { - Strlcpy(rejname, outname, sizeof(rejname)); - addext(rejname, ".rej", '#'); - } - if (skip_rest_of_patch) { - say4("%d out of %d hunks ignored--saving rejects to %s\n", - failed, hunk, rejname); - } - else { - say4("%d out of %d hunks failed--saving rejects to %s\n", - failed, hunk, rejname); - } - if (check_patch) { - ; - } else if (move_file(TMPREJNAME, rejname) < 0) - trejkeep = TRUE; - } - set_signals(1); - } - my_exit(failtotal); - return (failtotal); -} - -/* Prepare to find the next patch to do in the patch file. */ - -void -reinitialize_almost_everything(void) -{ - re_patch(); - re_input(); - - input_lines = 0; - last_frozen_line = 0; - - filec = 0; - if (filearg[0] != Nullch && !out_of_mem) { - free(filearg[0]); - filearg[0] = Nullch; - } - - if (outname != Nullch) { - free(outname); - outname = Nullch; - } - - last_offset = 0; - - diff_type = 0; - - if (revision != Nullch) { - free(revision); - revision = Nullch; - } - - reverse = reverse_flag_specified; - skip_rest_of_patch = FALSE; - skip_flag_specified = FALSE; - - get_some_switches(); - - if (filec >= 2) - fatal1("you may not change to a different patch file\n"); -} - -static char *shortopts = "-b:B:cCd:D:eEfF:i:IlnNo:p::r:RsStuvV:x:"; -static struct option longopts[] = -{ - {"suffix", 1, NULL, 'b'}, - {"prefix", 1, NULL, 'B'}, - {"check", 0, NULL, 'C'}, - {"context", 0, NULL, 'c'}, - {"directory", 1, NULL, 'd'}, - {"ifdef", 1, NULL, 'D'}, - {"ed", 0, NULL, 'e'}, - {"remove-empty-files", 0, NULL, 'E'}, - {"force", 0, NULL, 'f'}, - {"fuzz", 1, NULL, 'F'}, - {"index-first", 0, NULL, 'I'}, - {"ignore-whitespace", 0, NULL, 'l'}, - {"normal", 0, NULL, 'n'}, - {"forward", 0, NULL, 'N'}, - {"output", 1, NULL, 'o'}, - {"strip", 2, NULL, 'p'}, - {"reject-file", 1, NULL, 'r'}, - {"reverse", 0, NULL, 'R'}, - {"quiet", 0, NULL, 's'}, - {"silent", 0, NULL, 's'}, - {"skip", 0, NULL, 'S'}, - {"batch", 0, NULL, 't'}, - {"unified", 0, NULL, 'u'}, - {"version", 0, NULL, 'v'}, - {"version-control", 1, NULL, 'V'}, - {"debug", 1, NULL, 'x'}, - {0, 0, 0, 0} -}; - -/* Process switches and filenames up to next '+' or end of list. */ - -void -get_some_switches(void) -{ - Reg1 int optc; - - rejname[0] = '\0'; - optind_last = optind; - if (optind == Argc) - return; - while ((optc = getopt_long (Argc, Argv, shortopts, longopts, (int *) 0)) - != -1) { - if (optc == 1) { - if (strEQ(optarg, "+")) - return; - if (filec == MAXFILEC) - fatal1("too many file arguments\n"); - filearg[filec++] = savestr(optarg); - } - else { - switch (optc) { - case 'b': - simple_backup_suffix = savestr(optarg); - break; - case 'B': - origprae = savestr(optarg); - break; - case 'c': - diff_type = CONTEXT_DIFF; - break; - case 'C': - check_patch = TRUE; - break; - case 'd': - if (chdir(optarg) < 0) - pfatal2("can't cd to %s", optarg); - break; - case 'D': - do_defines = TRUE; - if (!isalpha((unsigned char)*optarg) && '_' != *optarg) - fatal1("argument to -D is not an identifier\n"); - Snprintf(if_defined, sizeof(if_defined), "#ifdef %s\n", optarg); - Snprintf(not_defined, sizeof(not_defined), "#ifndef %s\n", optarg); - Snprintf(end_defined, sizeof(end_defined), "#endif /* %s */\n", optarg); - break; - case 'e': - diff_type = ED_DIFF; - break; - case 'E': - remove_empty_files = TRUE; - break; - case 'f': - force = TRUE; - break; - case 'F': - maxfuzz = atoi(optarg); - break; - case 'i': - filearg[1] = savestr(optarg); - break; - case 'I': - index_first = TRUE; - break; - case 'l': - canonicalize = TRUE; - break; - case 'n': - diff_type = NORMAL_DIFF; - break; - case 'N': - noreverse = TRUE; - break; - case 'o': - outname = savestr(optarg); - break; - case 'p': - if (optarg) - strippath = atoi(optarg); - else - strippath = 0; - break; - case 'r': - Strlcpy(rejname, optarg, sizeof(rejname)); - break; - case 'R': - reverse = TRUE; - reverse_flag_specified = TRUE; - break; - case 's': - verbose = FALSE; - break; - case 'S': - skip_rest_of_patch = TRUE; - skip_flag_specified = TRUE; - break; - case 't': - batch = TRUE; - break; - case 'u': - diff_type = UNI_DIFF; - break; - case 'v': - version(); - break; - case 'V': -#ifndef NODIR - backup_type = get_version (optarg); -#endif - break; -#ifdef DEBUGGING - case 'x': - debug = atoi(optarg); - break; -#endif - default: - fprintf(stderr, "\ -Usage: %s [options] [origfile [patchfile]] [+ [options] [origfile]]...\n", - Argv[0]); - fprintf(stderr, "\ -Options:\n\ - [-cCeEflnNRsStuv] [-b backup-ext] [-B backup-prefix] [-d directory]\n\ - [-D symbol] [-F max-fuzz] [-i patchfile] [-o out-file] [-p[strip-count]]\n\ - [-r rej-name] [-V {numbered,existing,simple}] [--check] [--context]\n\ - [--prefix=backup-prefix] [--suffix=backup-ext] [--ifdef=symbol]\n\ - [--directory=directory] [--ed] [--fuzz=max-fuzz] [--force] [--batch]\n\ - [--ignore-whitespace] [--forward] [--reverse] [--output=out-file]\n"); - fprintf(stderr, "\ - [--strip[=strip-count]] [--normal] [--reject-file=rej-name] [--skip]\n\ - [--remove-empty-files] [--quiet] [--silent] [--unified] [--version]\n\ - [--version-control={numbered,existing,simple}] [--index-first]\n"); - my_exit(1); - } - } - } - - /* Process any filename args given after "--". */ - for (; optind < Argc; ++optind) { - if (filec == MAXFILEC) - fatal1("too many file arguments\n"); - filearg[filec++] = savestr(Argv[optind]); - } -} - -/* - * Attempt to find the right place to apply this hunk of patch. - */ -LINENUM -locate_hunk(LINENUM fuzz) -{ - Reg1 LINENUM first_guess = pch_first() + last_offset; - Reg2 LINENUM offset; - LINENUM pat_lines = pch_ptrn_lines(); - Reg3 LINENUM max_pos_offset = input_lines - first_guess - - pat_lines + 1; - Reg4 LINENUM max_neg_offset = first_guess - last_frozen_line - 1 - + pch_context(); - - if (!pat_lines) /* null range matches always */ - return first_guess; - if (max_neg_offset >= first_guess) /* do not try lines < 0 */ - max_neg_offset = first_guess - 1; - if (first_guess <= input_lines && patch_match(first_guess, Nulline, fuzz)) - return first_guess; - for (offset = 1; ; offset++) { - Reg5 bool check_after = (offset <= max_pos_offset); - Reg6 bool check_before = (offset <= max_neg_offset); - - if (check_after && patch_match(first_guess, offset, fuzz)) { -#ifdef DEBUGGING - if (debug & 1) - say3("Offset changing from %ld to %ld\n", last_offset, offset); -#endif - last_offset = offset; - return first_guess+offset; - } - else if (check_before && patch_match(first_guess, -offset, fuzz)) { -#ifdef DEBUGGING - if (debug & 1) - say3("Offset changing from %ld to %ld\n", last_offset, -offset); -#endif - last_offset = -offset; - return first_guess-offset; - } - else if (!check_before && !check_after) - return Nulline; - } -} - -/* We did not find the pattern, dump out the hunk so they can handle it. */ - -void -abort_hunk(void) -{ - Reg1 LINENUM i; - Reg2 LINENUM pat_end = pch_end(); - /* add in last_offset to guess the same as the previous successful hunk */ - LINENUM oldfirst = pch_first() + last_offset; - LINENUM newfirst = pch_newfirst() + last_offset; - LINENUM oldlast = oldfirst + pch_ptrn_lines() - 1; - LINENUM newlast = newfirst + pch_repl_lines() - 1; - char *stars = (diff_type >= NEW_CONTEXT_DIFF ? " ****" : ""); - char *minuses = (diff_type >= NEW_CONTEXT_DIFF ? " ----" : " -----"); - - fprintf(rejfp, "***************\n"); - for (i=0; i<=pat_end; i++) { - switch (pch_char(i)) { - case '*': - if (oldlast < oldfirst) - fprintf(rejfp, "*** 0%s\n", stars); - else if (oldlast == oldfirst) - fprintf(rejfp, "*** %ld%s\n", oldfirst, stars); - else - fprintf(rejfp, "*** %ld,%ld%s\n", oldfirst, oldlast, stars); - break; - case '=': - if (newlast < newfirst) - fprintf(rejfp, "--- 0%s\n", minuses); - else if (newlast == newfirst) - fprintf(rejfp, "--- %ld%s\n", newfirst, minuses); - else - fprintf(rejfp, "--- %ld,%ld%s\n", newfirst, newlast, minuses); - break; - case '\n': - fprintf(rejfp, "%s", pfetch(i)); - break; - case ' ': case '-': case '+': case '!': - fprintf(rejfp, "%c %s", pch_char(i), pfetch(i)); - break; - default: - fatal1("fatal internal error in abort_hunk\n"); - } - } -} - -/* - * We found where to apply it (we hope), so do it. - */ -void -apply_hunk(LINENUM where) -{ - Reg1 LINENUM old = 1; - Reg2 LINENUM lastline = pch_ptrn_lines(); - Reg3 LINENUM new = lastline+1; -#define OUTSIDE 0 -#define IN_IFNDEF 1 -#define IN_IFDEF 2 -#define IN_ELSE 3 - Reg4 int def_state = OUTSIDE; - Reg5 bool R_do_defines = do_defines; - Reg6 LINENUM pat_end = pch_end(); - - where--; - while (pch_char(new) == '=' || pch_char(new) == '\n') - new++; - - while (old <= lastline) { - if (pch_char(old) == '-') { - copy_till(where + old - 1); - if (R_do_defines) { - if (def_state == OUTSIDE) { - fputs(not_defined, ofp); - def_state = IN_IFNDEF; - } - else if (def_state == IN_IFDEF) { - fputs(else_defined, ofp); - def_state = IN_ELSE; - } - fputs(pfetch(old), ofp); - } - last_frozen_line++; - old++; - } - else if (new > pat_end) { - break; - } - else if (pch_char(new) == '+') { - copy_till(where + old - 1); - if (R_do_defines) { - if (def_state == IN_IFNDEF) { - fputs(else_defined, ofp); - def_state = IN_ELSE; - } - else if (def_state == OUTSIDE) { - fputs(if_defined, ofp); - def_state = IN_IFDEF; - } - } - fputs(pfetch(new), ofp); - new++; - } - else if (pch_char(new) != pch_char(old)) { - say3("Out-of-sync patch, lines %ld,%ld--mangled text or line numbers, maybe?\n", - pch_hunk_beg() + old, - pch_hunk_beg() + new); -#ifdef DEBUGGING - say3("oldchar = '%c', newchar = '%c'\n", - pch_char(old), pch_char(new)); -#endif - my_exit(1); - } - else if (pch_char(new) == '!') { - copy_till(where + old - 1); - if (R_do_defines) { - fputs(not_defined, ofp); - def_state = IN_IFNDEF; - } - while (pch_char(old) == '!') { - if (R_do_defines) { - fputs(pfetch(old), ofp); - } - last_frozen_line++; - old++; - } - if (R_do_defines) { - fputs(else_defined, ofp); - def_state = IN_ELSE; - } - while (pch_char(new) == '!') { - fputs(pfetch(new), ofp); - new++; - } - } - else { - assert(pch_char(new) == ' '); - old++; - new++; - if (R_do_defines && def_state != OUTSIDE) { - fputs(end_defined, ofp); - def_state = OUTSIDE; - } - } - } - if (new <= pat_end && pch_char(new) == '+') { - copy_till(where + old - 1); - if (R_do_defines) { - if (def_state == OUTSIDE) { - fputs(if_defined, ofp); - def_state = IN_IFDEF; - } - else if (def_state == IN_IFNDEF) { - fputs(else_defined, ofp); - def_state = IN_ELSE; - } - } - while (new <= pat_end && pch_char(new) == '+') { - fputs(pfetch(new), ofp); - new++; - } - } - if (R_do_defines && def_state != OUTSIDE) { - fputs(end_defined, ofp); - } -} - -/* Open the new file. */ - -void -init_output(char *name) -{ - ofp = fopen(name, "w"); - if (ofp == Nullfp) - pfatal2("can't create %s", name); -} - -/* Open a file to put hunks we can't locate. */ - -void -init_reject(char *name) -{ - rejfp = fopen(name, "w"); - if (rejfp == Nullfp) - pfatal2("can't create %s", name); -} - -/* Copy input file to output, up to wherever hunk is to be applied. */ - -void -copy_till(LINENUM lastline) -{ - Reg2 LINENUM R_last_frozen_line = last_frozen_line; - - if (R_last_frozen_line > lastline) - fatal1("misordered hunks! output would be garbled\n"); - while (R_last_frozen_line < lastline) { - dump_line(++R_last_frozen_line); - } - last_frozen_line = R_last_frozen_line; -} - -/* Finish copying the input file to the output file. */ - -void -spew_output(void) -{ -#ifdef DEBUGGING - if (debug & 256) - say3("il=%ld lfl=%ld\n",input_lines,last_frozen_line); -#endif - if (input_lines) - copy_till(input_lines); /* dump remainder of file */ - Fclose(ofp); - ofp = Nullfp; -} - -/* Copy one line from input to output. */ - -void -dump_line(LINENUM line) -{ - Reg1 char *s; - Reg2 char R_newline = '\n'; - - /* Note: string is not null terminated. */ - for (s=ifetch(line, 0); putc(*s, ofp) != R_newline; s++) ; -} - -/* Does the patch pattern match at line base+offset? */ - -bool -patch_match(LINENUM base, LINENUM offset, LINENUM fuzz) -{ - Reg1 LINENUM pline = 1 + fuzz; - Reg2 LINENUM iline; - Reg3 LINENUM pat_lines = pch_ptrn_lines() - fuzz; - - for (iline=base+offset+fuzz; pline <= pat_lines; pline++,iline++) { - if (canonicalize) { - if (!similar(ifetch(iline, (offset >= 0)), - pfetch(pline), - pch_line_len(pline) )) - return FALSE; - } - else if (strnNE(ifetch(iline, (offset >= 0)), - pfetch(pline), - pch_line_len(pline) )) - return FALSE; - } - return TRUE; -} - -/* Do two lines match with canonicalized white space? */ - -bool -similar(char *a, char *b, int len) -{ - while (len) { - if (isspace((unsigned char)*b)) { /* whitespace (or \n) to match? */ - if (!isspace((unsigned char)*a)) /* no corresponding whitespace? */ - return FALSE; - while (len && isspace((unsigned char)*b) && *b != '\n') - b++,len--; /* skip pattern whitespace */ - while (isspace((unsigned char)*a) && *a != '\n') - a++; /* skip target whitespace */ - if (*a == '\n' || *b == '\n') - return (*a == *b); /* should end in sync */ - } - else if (*a++ != *b++) /* match non-whitespace chars */ - return FALSE; - else - len--; /* probably not necessary */ - } - return TRUE; /* actually, this is not reached */ - /* since there is always a \n */ -} - -/* Exit with cleanup. */ - -void -my_exit(int status) -{ - Unlink(TMPINNAME); - if (!toutkeep) { - Unlink(TMPOUTNAME); - } - if (!trejkeep) { - Unlink(TMPREJNAME); - } - Unlink(TMPPATNAME); - exit(status); -} diff --git a/gnu/usr.bin/patch/patchlevel.h b/gnu/usr.bin/patch/patchlevel.h deleted file mode 100644 index d5de3a9..0000000 --- a/gnu/usr.bin/patch/patchlevel.h +++ /dev/null @@ -1 +0,0 @@ -#define PATCH_VERSION "2.1" diff --git a/gnu/usr.bin/patch/pch.c b/gnu/usr.bin/patch/pch.c deleted file mode 100644 index dba4582..0000000 --- a/gnu/usr.bin/patch/pch.c +++ /dev/null @@ -1,1444 +0,0 @@ -/* $FreeBSD$ - * - * $Log: pch.c,v $ - * Revision 2.0.2.0 90/05/01 22:17:51 davison - * patch12u: unidiff support added - * - * Revision 2.0.1.7 88/06/03 15:13:28 lwall - * patch10: Can now find patches in shar scripts. - * patch10: Hunks that swapped and then swapped back could core dump. - * - * Revision 2.0.1.6 87/06/04 16:18:13 lwall - * pch_swap didn't swap p_bfake and p_efake. - * - * Revision 2.0.1.5 87/01/30 22:47:42 lwall - * Improved responses to mangled patches. - * - * Revision 2.0.1.4 87/01/05 16:59:53 lwall - * New-style context diffs caused double call to free(). - * - * Revision 2.0.1.3 86/11/14 10:08:33 lwall - * Fixed problem where a long pattern wouldn't grow the hunk. - * Also restored p_input_line when backtracking so error messages are right. - * - * Revision 2.0.1.2 86/11/03 17:49:52 lwall - * New-style delete triggers spurious assertion error. - * - * Revision 2.0.1.1 86/10/29 15:52:08 lwall - * Could falsely report new-style context diff. - * - * Revision 2.0 86/09/17 15:39:37 lwall - * Baseline for netwide release. - * - */ - -#include "EXTERN.h" -#include "common.h" -#include "util.h" -#include "INTERN.h" -#include "pch.h" - -/* Patch (diff listing) abstract type. */ - -static long p_filesize; /* size of the patch file */ -static LINENUM p_first; /* 1st line number */ -static LINENUM p_newfirst; /* 1st line number of replacement */ -static LINENUM p_ptrn_lines; /* # lines in pattern */ -static LINENUM p_repl_lines; /* # lines in replacement text */ -static LINENUM p_end = -1; /* last line in hunk */ -static LINENUM p_max; /* max allowed value of p_end */ -static LINENUM p_context = 3; /* # of context lines */ -static LINENUM p_input_line = 0; /* current line # from patch file */ -static char **p_line = Null(char**); /* the text of the hunk */ -static short *p_len = Null(short*); /* length of each line */ -static char *p_Char = Nullch; /* +, -, and ! */ -static int hunkmax = INITHUNKMAX; /* size of above arrays to begin with */ -static int p_indent; /* indent to patch */ -static LINENUM p_base; /* where to intuit this time */ -static LINENUM p_bline; /* line # of p_base */ -static LINENUM p_start; /* where intuit found a patch */ -static LINENUM p_sline; /* and the line number for it */ -static LINENUM p_hunk_beg; /* line number of current hunk */ -static LINENUM p_efake = -1; /* end of faked up lines--don't free */ -static LINENUM p_bfake = -1; /* beg of faked up lines */ - -/* - * Prepare to look for the next patch in the patch file. - */ -void -re_patch(void) -{ - p_first = Nulline; - p_newfirst = Nulline; - p_ptrn_lines = Nulline; - p_repl_lines = Nulline; - p_end = (LINENUM)-1; - p_max = Nulline; - p_indent = 0; -} - -/* - * Open the patch file at the beginning of time. - */ -void -open_patch_file(char *filename) -{ - if (filename == Nullch || !*filename || strEQ(filename, "-")) { - pfp = fopen(TMPPATNAME, "w"); - if (pfp == Nullfp) - pfatal2("can't create %s", TMPPATNAME); - while (fgets(buf, buf_size, stdin) != Nullch) - fputs(buf, pfp); - Fclose(pfp); - filename = TMPPATNAME; - } - pfp = fopen(filename, "r"); - if (pfp == Nullfp) - pfatal2("patch file %s not found", filename); - Fstat(fileno(pfp), &filestat); - p_filesize = filestat.st_size; - next_intuit_at(0L,1L); /* start at the beginning */ - set_hunkmax(); -} - -/* - * Make sure our dynamically realloced tables are malloced to begin with. - */ -void -set_hunkmax(void) -{ -#ifndef lint - if (p_line == Null(char**)) - p_line = (char**) malloc((MEM)hunkmax * sizeof(char *)); - if (p_len == Null(short*)) - p_len = (short*) malloc((MEM)hunkmax * sizeof(short)); -#endif - if (p_Char == Nullch) - p_Char = (char*) malloc((MEM)hunkmax * sizeof(char)); -} - -/* - * Enlarge the arrays containing the current hunk of patch. - */ -void -grow_hunkmax(void) -{ - hunkmax *= 2; - /* - * Note that on most systems, only the p_line array ever gets - * fresh memory since p_len can move into p_line's old space, - * and p_Char can move into p_len's old space. Not on PDP-11's - * however. But it doesn't matter. - */ - assert(p_line != Null(char**) && p_len != Null(short*) && - p_Char != Nullch); -#ifndef lint - p_line = (char**) realloc((char*)p_line, (MEM)hunkmax * sizeof(char *)); - p_len = (short*) realloc((char*)p_len, (MEM)hunkmax * sizeof(short)); - p_Char = (char*) realloc((char*)p_Char, (MEM)hunkmax * sizeof(char)); -#endif - if (p_line != Null(char**) && p_len != Null(short*) && p_Char != Nullch) - return; - if (!using_plan_a) - fatal1("out of memory\n"); - out_of_mem = TRUE; /* whatever is null will be allocated again */ - /* from within plan_a(), of all places */ -} - -/* - * True if the remainder of the patch file contains a diff of some sort. - */ -bool -there_is_another_patch(void) -{ - if (p_base != 0L && p_base >= p_filesize) { - if (verbose) - say1("done\n"); - return FALSE; - } - if (verbose) - say1("Hmm..."); - diff_type = intuit_diff_type(); - if (!diff_type) { - if (p_base != 0L) { - if (verbose) - say1(" Ignoring the trailing garbage.\ndone\n"); - } - else - say1(" I can't seem to find a patch in there anywhere.\n"); - return FALSE; - } - if (verbose) - say3(" %sooks like %s to me...\n", - (p_base == 0L ? "L" : "The next patch l"), - diff_type == UNI_DIFF ? "a unified diff" : - diff_type == CONTEXT_DIFF ? "a context diff" : - diff_type == NEW_CONTEXT_DIFF ? - "a new-style context diff" : - diff_type == NORMAL_DIFF ? "a normal diff" : - "an ed script" ); - if (p_indent && verbose) - say3("(Patch is indented %d space%s.)\n", - p_indent, p_indent==1?"":"s"); - skip_to(p_start,p_sline); - while (filearg[0] == Nullch) { - if (force || batch || skip_rest_of_patch) { - say1("No file to patch. Skipping...\n"); - filearg[0] = savestr(bestguess); - skip_rest_of_patch = TRUE; - return TRUE; - } - (void) ask1("File to patch: "); - if (*buf != '\n') { - if (bestguess) - free(bestguess); - bestguess = savestr(buf); - filearg[0] = fetchname(buf, 0, FALSE); - } - if (filearg[0] == Nullch) { - if (ask1("No file found--skip this patch? [n] ")) { - if (*buf != 'y') { - continue; - } - } - if (verbose) - say1("Skipping patch...\n"); - filearg[0] = fetchname(bestguess, 0, TRUE); - skip_rest_of_patch = TRUE; - return TRUE; - } - } - return TRUE; -} - -static char * -p4_savestr(char *str) -{ - char *t, *h; - - /* Leading whitespace. */ - while (isspace((unsigned char)*str)) - str++; - - /* Remove the file revision number. */ - for (t = str, h = NULL; *t != '\0' && !isspace((unsigned char)*t); t++) - if (*t == '#') - h = t; - if (h != NULL) - *h = '\0'; - - return savestr(str); -} - -/* - * Determine what kind of diff is in the remaining part of the patch file. - */ -int -intuit_diff_type(void) -{ - Reg4 long this_line = 0; - Reg5 long previous_line; - Reg6 long first_command_line = -1; - long fcl_line; - Reg7 bool last_line_was_command = FALSE; - Reg8 bool this_is_a_command = FALSE; - Reg9 bool stars_last_line = FALSE; - Reg10 bool stars_this_line = FALSE; - Reg3 int indent; - Reg1 char *s; - Reg2 char *t; - char *indtmp = Nullch; - char *oldtmp = Nullch; - char *newtmp = Nullch; - char *indname = Nullch; - char *oldname = Nullch; - char *newname = Nullch; - Reg11 int retval; - bool no_filearg = (filearg[0] == Nullch); - extern int index_first; - - ok_to_create_file = FALSE; - Fseek(pfp, p_base, 0); - p_input_line = p_bline - 1; - for (;;) { - previous_line = this_line; - last_line_was_command = this_is_a_command; - stars_last_line = stars_this_line; - this_line = ftell(pfp); - indent = 0; - p_input_line++; - if (pgets(FALSE) == 0) { - if (first_command_line >= 0L) { - /* nothing but deletes!? */ - p_start = first_command_line; - p_sline = fcl_line; - retval = ED_DIFF; - goto scan_exit; - } - else { - p_start = this_line; - p_sline = p_input_line; - retval = 0; - goto scan_exit; - } - } - for (s = buf; *s == ' ' || *s == '\t' || *s == 'X'; s++) { - if (*s == '\t') - indent += 8 - (indent % 8); - else - indent++; - } - for (t=s; isdigit((unsigned char)*t) || *t == ','; t++) - ; - this_is_a_command = (isdigit((unsigned char)*s) && - (*t == 'd' || *t == 'c' || *t == 'a') ); - if (first_command_line < 0L && this_is_a_command) { - first_command_line = this_line; - fcl_line = p_input_line; - p_indent = indent; /* assume this for now */ - } - if (!stars_last_line && strnEQ(s, "*** ", 4)) - oldtmp = savestr(s+4); - else if (strnEQ(s, "--- ", 4)) - newtmp = savestr(s+4); - else if (strnEQ(s, "+++ ", 4)) - oldtmp = savestr(s+4); /* pretend it is the old name */ - else if (strnEQ(s, "Index:", 6)) - indtmp = savestr(s+6); - else if (strnEQ(s, "Prereq:", 7)) { - for (t = s + 7; isspace((unsigned char)*t); t++) - ; - revision = savestr(t); - for (t = revision; *t && !isspace((unsigned char)*t); - t++) - ; - *t = '\0'; - if (!*revision) { - free(revision); - revision = Nullch; - } - } else if (strnEQ(s, "==== ", 5)) { - /* Perforce-style diffs. */ - if ((t = strstr(s + 5, " - ")) != NULL) - newtmp = p4_savestr(t + 3); - oldtmp = p4_savestr(s + 5); - } - if ((!diff_type || diff_type == ED_DIFF) && - first_command_line >= 0L && - strEQ(s, ".\n") ) { - p_indent = indent; - p_start = first_command_line; - p_sline = fcl_line; - retval = ED_DIFF; - goto scan_exit; - } - if ((!diff_type || diff_type == UNI_DIFF) && - strnEQ(s, "@@ -", 4)) { - if (!atol(s+3)) - ok_to_create_file = TRUE; - p_indent = indent; - p_start = this_line; - p_sline = p_input_line; - retval = UNI_DIFF; - goto scan_exit; - } - stars_this_line = strnEQ(s, "********", 8); - if ((!diff_type || diff_type == CONTEXT_DIFF) && - stars_last_line && - strnEQ(s, "*** ", 4)) { - if (!atol(s+4)) - ok_to_create_file = TRUE; - /* - * If this is a new context diff the character just - * before the newline is a '*'. - */ - while (*s != '\n') - s++; - p_indent = indent; - p_start = previous_line; - p_sline = p_input_line - 1; - retval = (*(s-1) == '*' ? - NEW_CONTEXT_DIFF : CONTEXT_DIFF); - goto scan_exit; - } - if ((!diff_type || diff_type == NORMAL_DIFF) && - last_line_was_command && - (strnEQ(s, "< ", 2) || strnEQ(s, "> ", 2)) ) { - p_start = previous_line; - p_sline = p_input_line - 1; - p_indent = indent; - retval = NORMAL_DIFF; - goto scan_exit; - } - } - scan_exit: - if (no_filearg) { - if (indtmp != Nullch) - indname = fetchname(indtmp, strippath, - ok_to_create_file); - if (oldtmp != Nullch) - oldname = fetchname(oldtmp, strippath, - ok_to_create_file); - if (newtmp != Nullch) - newname = fetchname(newtmp, strippath, - ok_to_create_file); - if (index_first && indname) - filearg[0] = savestr(indname); - else if (oldname && newname) { - if (strlen(oldname) < strlen(newname)) - filearg[0] = savestr(oldname); - else - filearg[0] = savestr(newname); - } else if (indname) - filearg[0] = savestr(indname); - else if (oldname) - filearg[0] = savestr(oldname); - else if (newname) - filearg[0] = savestr(newname); - } - if (bestguess) { - free(bestguess); - bestguess = Nullch; - } - if (filearg[0] != Nullch) - bestguess = savestr(filearg[0]); - else if (indtmp != Nullch) - bestguess = fetchname(indtmp, strippath, TRUE); - else { - if (oldtmp != Nullch) - oldname = fetchname(oldtmp, strippath, TRUE); - if (newtmp != Nullch) - newname = fetchname(newtmp, strippath, TRUE); - if (oldname && newname) { - if (strlen(oldname) < strlen(newname)) - bestguess = savestr(oldname); - else - bestguess = savestr(newname); - } - else if (oldname) - bestguess = savestr(oldname); - else if (newname) - bestguess = savestr(newname); - } - if (indtmp != Nullch) - free(indtmp); - if (oldtmp != Nullch) - free(oldtmp); - if (newtmp != Nullch) - free(newtmp); - if (indname != Nullch) - free(indname); - if (oldname != Nullch) - free(oldname); - if (newname != Nullch) - free(newname); - return retval; -} - -/* - * Remember where this patch ends so we know where to start up again. - */ -void -next_intuit_at(long file_pos, long file_line) -{ - p_base = file_pos; - p_bline = file_line; -} - -/* - * Basically a verbose fseek() to the actual diff listing. - */ -void -skip_to(long file_pos, long file_line) -{ - size_t len; - - assert(p_base <= file_pos); - if (verbose && p_base < file_pos) { - Fseek(pfp, p_base, 0); - say1("The text leading up to this was:\n--------------------------\n"); - while (ftell(pfp) < file_pos) { - len = pgets(FALSE); - assert(len != 0); - say2("|%s", buf); - } - say1("--------------------------\n"); - } - else - Fseek(pfp, file_pos, 0); - p_input_line = file_line - 1; -} - -/* - * Make this a function for better debugging. - */ -static void -malformed(void) -{ - fatal3("malformed patch at line %ld: %s", p_input_line, buf); - /* about as informative as "Syntax error" in C */ -} - -/* - * True if the line has been discarded (i.e. it is a line saying - * "\ No newline at end of file".) - */ -static bool -remove_special_line(void) -{ - int c; - - c = fgetc(pfp); - if (c == '\\') { - do { - c = fgetc(pfp); - } while (c != EOF && c != '\n'); - - return TRUE; - } - - if (c != EOF) - fseek(pfp, -1, SEEK_CUR); - - return FALSE; -} - -/* - * True if there is more of the current diff listing to process. - */ -bool -another_hunk(void) -{ - Reg1 char *s; - size_t len; - Reg2 int context = 0; - - while (p_end >= 0) { - if (p_end == p_efake) - p_end = p_bfake; /* don't free twice */ - else - free(p_line[p_end]); - p_end--; - } - assert(p_end == -1); - p_efake = -1; - - p_max = hunkmax; /* gets reduced when --- found */ - if (diff_type == CONTEXT_DIFF || diff_type == NEW_CONTEXT_DIFF) { - long line_beginning = ftell(pfp); - /* file pos of the current line */ - LINENUM repl_beginning = 0; /* index of --- line */ - Reg4 LINENUM fillcnt = 0; /* #lines of missing ptrn or repl */ - Reg5 LINENUM fillsrc; /* index of first line to copy */ - Reg6 LINENUM filldst; /* index of first missing line */ - bool ptrn_spaces_eaten = FALSE; /* ptrn was slightly misformed */ - Reg9 bool repl_could_be_missing = TRUE; - /* no + or ! lines in this hunk */ - bool repl_missing = FALSE; /* we are now backtracking */ - long repl_backtrack_position = 0; - /* file pos of first repl line */ - LINENUM repl_patch_line; /* input line number for same */ - Reg7 LINENUM ptrn_copiable = 0; - /* # of copiable lines in ptrn */ - - len = pgets(TRUE); - p_input_line++; - if (len == 0 || strnNE(buf, "********", 8)) { - next_intuit_at(line_beginning,p_input_line); - return FALSE; - } - p_context = 100; - p_hunk_beg = p_input_line + 1; - while (p_end < p_max) { - line_beginning = ftell(pfp); - len = pgets(TRUE); - p_input_line++; - if (len == 0) { - if (p_max - p_end < 4) - Strcpy(buf, " \n"); /* assume blank lines got chopped */ - else { - if (repl_beginning && repl_could_be_missing) { - repl_missing = TRUE; - goto hunk_done; - } - fatal1("unexpected end of file in patch\n"); - } - } - p_end++; - assert(p_end < hunkmax); - p_Char[p_end] = *buf; -#ifdef zilog - p_line[(short)p_end] = Nullch; -#else - p_line[p_end] = Nullch; -#endif - switch (*buf) { - case '*': - if (strnEQ(buf, "********", 8)) { - if (repl_beginning && repl_could_be_missing) { - repl_missing = TRUE; - goto hunk_done; - } - else - fatal2("unexpected end of hunk at line %ld\n", - p_input_line); - } - if (p_end != 0) { - if (repl_beginning && repl_could_be_missing) { - repl_missing = TRUE; - goto hunk_done; - } - fatal3("unexpected *** at line %ld: %s", p_input_line, buf); - } - context = 0; - p_line[p_end] = savestr(buf); - if (out_of_mem) { - p_end--; - return FALSE; - } - for (s=buf; *s && !isdigit((unsigned char)*s); s++) ; - if (!*s) - malformed (); - if (strnEQ(s,"0,0",3)) - strcpy(s,s+2); - p_first = (LINENUM) atol(s); - while (isdigit((unsigned char)*s)) s++; - if (*s == ',') { - for (; *s && !isdigit((unsigned char)*s); s++) ; - if (!*s) - malformed (); - p_ptrn_lines = ((LINENUM)atol(s)) - p_first + 1; - } - else if (p_first) - p_ptrn_lines = 1; - else { - p_ptrn_lines = 0; - p_first = 1; - } - p_max = p_ptrn_lines + 6; /* we need this much at least */ - while (p_max >= hunkmax) - grow_hunkmax(); - p_max = hunkmax; - break; - case '-': - if (buf[1] == '-') { - if (repl_beginning || - (p_end != p_ptrn_lines + 1 + (p_Char[p_end-1] == '\n'))) - { - if (p_end == 1) { - /* `old' lines were omitted - set up to fill */ - /* them in from 'new' context lines. */ - p_end = p_ptrn_lines + 1; - fillsrc = p_end + 1; - filldst = 1; - fillcnt = p_ptrn_lines; - } - else { - if (repl_beginning) { - if (repl_could_be_missing){ - repl_missing = TRUE; - goto hunk_done; - } - fatal3( -"duplicate \"---\" at line %ld--check line numbers at line %ld\n", - p_input_line, p_hunk_beg + repl_beginning); - } - else { - fatal4( -"%s \"---\" at line %ld--check line numbers at line %ld\n", - (p_end <= p_ptrn_lines - ? "Premature" - : "Overdue" ), - p_input_line, p_hunk_beg); - } - } - } - repl_beginning = p_end; - repl_backtrack_position = ftell(pfp); - repl_patch_line = p_input_line; - p_line[p_end] = savestr(buf); - if (out_of_mem) { - p_end--; - return FALSE; - } - p_Char[p_end] = '='; - for (s=buf; *s && !isdigit((unsigned char)*s); s++) ; - if (!*s) - malformed (); - p_newfirst = (LINENUM) atol(s); - while (isdigit((unsigned char)*s)) s++; - if (*s == ',') { - for (; *s && !isdigit((unsigned char)*s); s++) ; - if (!*s) - malformed (); - p_repl_lines = ((LINENUM)atol(s)) - p_newfirst + 1; - } - else if (p_newfirst) - p_repl_lines = 1; - else { - p_repl_lines = 0; - p_newfirst = 1; - } - p_max = p_repl_lines + p_end; - if (p_max > MAXHUNKSIZE) - fatal4("hunk too large (%ld lines) at line %ld: %s", - p_max, p_input_line, buf); - while (p_max >= hunkmax) - grow_hunkmax(); - if (p_repl_lines != ptrn_copiable - && (p_context != 0 || p_repl_lines != 1)) - repl_could_be_missing = FALSE; - break; - } - goto change_line; - case '+': case '!': - repl_could_be_missing = FALSE; - change_line: - if (buf[1] == '\n' && canonicalize) - strcpy(buf+1," \n"); - if (!isspace((unsigned char)buf[1]) && buf[1] != '>' && buf[1] != '<' && - repl_beginning && repl_could_be_missing) { - repl_missing = TRUE; - goto hunk_done; - } - if (context >= 0) { - if (context < p_context) - p_context = context; - context = -1000; - } - p_line[p_end] = savestr(buf+2); - if (out_of_mem) { - p_end--; - return FALSE; - } - if (p_end == p_ptrn_lines) - { - if (remove_special_line()) { - int len; - - len = strlen(p_line[p_end]) - 1; - (p_line[p_end])[len] = 0; - } - } - break; - case '\t': case '\n': /* assume the 2 spaces got eaten */ - if (repl_beginning && repl_could_be_missing && - (!ptrn_spaces_eaten || diff_type == NEW_CONTEXT_DIFF) ) { - repl_missing = TRUE; - goto hunk_done; - } - p_line[p_end] = savestr(buf); - if (out_of_mem) { - p_end--; - return FALSE; - } - if (p_end != p_ptrn_lines + 1) { - ptrn_spaces_eaten |= (repl_beginning != 0); - context++; - if (!repl_beginning) - ptrn_copiable++; - p_Char[p_end] = ' '; - } - break; - case ' ': - if (!isspace((unsigned char)buf[1]) && - repl_beginning && repl_could_be_missing) { - repl_missing = TRUE; - goto hunk_done; - } - context++; - if (!repl_beginning) - ptrn_copiable++; - p_line[p_end] = savestr(buf+2); - if (out_of_mem) { - p_end--; - return FALSE; - } - break; - default: - if (repl_beginning && repl_could_be_missing) { - repl_missing = TRUE; - goto hunk_done; - } - malformed (); - } - /* set up p_len for strncmp() so we don't have to */ - /* assume null termination */ - if (p_line[p_end]) - p_len[p_end] = strlen(p_line[p_end]); - else - p_len[p_end] = 0; - } - - hunk_done: - if (p_end >=0 && !repl_beginning) - fatal2("no --- found in patch at line %ld\n", pch_hunk_beg()); - - if (repl_missing) { - - /* reset state back to just after --- */ - p_input_line = repl_patch_line; - for (p_end--; p_end > repl_beginning; p_end--) - free(p_line[p_end]); - Fseek(pfp, repl_backtrack_position, 0); - - /* redundant 'new' context lines were omitted - set */ - /* up to fill them in from the old file context */ - if (!p_context && p_repl_lines == 1) { - p_repl_lines = 0; - p_max--; - } - fillsrc = 1; - filldst = repl_beginning+1; - fillcnt = p_repl_lines; - p_end = p_max; - } - else if (!p_context && fillcnt == 1) { - /* the first hunk was a null hunk with no context */ - /* and we were expecting one line -- fix it up. */ - while (filldst < p_end) { - p_line[filldst] = p_line[filldst+1]; - p_Char[filldst] = p_Char[filldst+1]; - p_len[filldst] = p_len[filldst+1]; - filldst++; - } -#if 0 - repl_beginning--; /* this doesn't need to be fixed */ -#endif - p_end--; - p_first++; /* do append rather than insert */ - fillcnt = 0; - p_ptrn_lines = 0; - } - - if (diff_type == CONTEXT_DIFF && - (fillcnt || (p_first > 1 && ptrn_copiable > 2*p_context)) ) { - if (verbose) - say4("%s\n%s\n%s\n", -"(Fascinating--this is really a new-style context diff but without", -"the telltale extra asterisks on the *** line that usually indicate", -"the new style...)"); - diff_type = NEW_CONTEXT_DIFF; - } - - /* if there were omitted context lines, fill them in now */ - if (fillcnt) { - p_bfake = filldst; /* remember where not to free() */ - p_efake = filldst + fillcnt - 1; - while (fillcnt-- > 0) { - while (fillsrc <= p_end && p_Char[fillsrc] != ' ') - fillsrc++; - if (fillsrc > p_end) - fatal2("replacement text or line numbers mangled in hunk at line %ld\n", - p_hunk_beg); - p_line[filldst] = p_line[fillsrc]; - p_Char[filldst] = p_Char[fillsrc]; - p_len[filldst] = p_len[fillsrc]; - fillsrc++; filldst++; - } - while (fillsrc <= p_end && fillsrc != repl_beginning && - p_Char[fillsrc] != ' ') - fillsrc++; -#ifdef DEBUGGING - if (debug & 64) - printf("fillsrc %ld, filldst %ld, rb %ld, e+1 %ld\n", - fillsrc,filldst,repl_beginning,p_end+1); -#endif - assert(fillsrc==p_end+1 || fillsrc==repl_beginning); - assert(filldst==p_end+1 || filldst==repl_beginning); - } - - if (p_line[p_end] != NULL) - { - if (remove_special_line()) { - p_len[p_end] -= 1; - (p_line[p_end])[p_len[p_end]] = 0; - } - } - } - else if (diff_type == UNI_DIFF) { - long line_beginning = ftell(pfp); - /* file pos of the current line */ - Reg4 LINENUM fillsrc; /* index of old lines */ - Reg5 LINENUM filldst; /* index of new lines */ - char ch; - - len = pgets(TRUE); - p_input_line++; - if (len == 0 || strnNE(buf, "@@ -", 4)) { - next_intuit_at(line_beginning,p_input_line); - return FALSE; - } - s = buf+4; - if (!*s) - malformed (); - p_first = (LINENUM) atol(s); - while (isdigit((unsigned char)*s)) s++; - if (*s == ',') { - p_ptrn_lines = (LINENUM) atol(++s); - while (isdigit((unsigned char)*s)) s++; - } else - p_ptrn_lines = 1; - if (*s == ' ') s++; - if (*s != '+' || !*++s) - malformed (); - p_newfirst = (LINENUM) atol(s); - while (isdigit((unsigned char)*s)) s++; - if (*s == ',') { - p_repl_lines = (LINENUM) atol(++s); - while (isdigit((unsigned char)*s)) s++; - } else - p_repl_lines = 1; - if (*s == ' ') s++; - if (*s != '@') - malformed (); - if (!p_ptrn_lines) - p_first++; /* do append rather than insert */ - p_max = p_ptrn_lines + p_repl_lines + 1; - while (p_max >= hunkmax) - grow_hunkmax(); - fillsrc = 1; - filldst = fillsrc + p_ptrn_lines; - p_end = filldst + p_repl_lines; - Sprintf(buf,"*** %ld,%ld ****\n",p_first,p_first + p_ptrn_lines - 1); - p_line[0] = savestr(buf); - if (out_of_mem) { - p_end = -1; - return FALSE; - } - p_Char[0] = '*'; - Sprintf(buf,"--- %ld,%ld ----\n",p_newfirst,p_newfirst+p_repl_lines-1); - p_line[filldst] = savestr(buf); - if (out_of_mem) { - p_end = 0; - return FALSE; - } - p_Char[filldst++] = '='; - p_context = 100; - context = 0; - p_hunk_beg = p_input_line + 1; - while (fillsrc <= p_ptrn_lines || filldst <= p_end) { - line_beginning = ftell(pfp); - len = pgets(TRUE); - p_input_line++; - if (len == 0) { - if (p_max - filldst < 3) - Strcpy(buf, " \n"); /* assume blank lines got chopped */ - else { - fatal1("unexpected end of file in patch\n"); - } - } - if (*buf == '\t' || *buf == '\n') { - ch = ' '; /* assume the space got eaten */ - s = savestr(buf); - } - else { - ch = *buf; - s = savestr(buf+1); - } - if (out_of_mem) { - while (--filldst > p_ptrn_lines) - free(p_line[filldst]); - p_end = fillsrc-1; - return FALSE; - } - switch (ch) { - case '-': - if (fillsrc > p_ptrn_lines) { - free(s); - p_end = filldst-1; - malformed (); - } - p_Char[fillsrc] = ch; - p_line[fillsrc] = s; - p_len[fillsrc++] = strlen(s); - if (fillsrc > p_ptrn_lines) { - if (remove_special_line()) { - p_len[fillsrc - 1] -= 1; - s[p_len[fillsrc - 1]] = 0; - } - } - break; - case '=': - ch = ' '; - /* FALLTHROUGH */ - case ' ': - if (fillsrc > p_ptrn_lines) { - free(s); - while (--filldst > p_ptrn_lines) - free(p_line[filldst]); - p_end = fillsrc-1; - malformed (); - } - context++; - p_Char[fillsrc] = ch; - p_line[fillsrc] = s; - p_len[fillsrc++] = strlen(s); - s = savestr(s); - if (out_of_mem) { - while (--filldst > p_ptrn_lines) - free(p_line[filldst]); - p_end = fillsrc-1; - return FALSE; - } - /* FALLTHROUGH */ - case '+': - if (filldst > p_end) { - free(s); - while (--filldst > p_ptrn_lines) - free(p_line[filldst]); - p_end = fillsrc-1; - malformed (); - } - p_Char[filldst] = ch; - p_line[filldst] = s; - p_len[filldst++] = strlen(s); - if (fillsrc > p_ptrn_lines) { - if (remove_special_line()) { - p_len[filldst - 1] -= 1; - s[p_len[filldst - 1]] = 0; - } - } - break; - default: - p_end = filldst; - malformed (); - } - if (ch != ' ' && context > 0) { - if (context < p_context) - p_context = context; - context = -1000; - } - }/* while */ - } - else { /* normal diff--fake it up */ - char hunk_type; - Reg3 int i; - LINENUM min, max; - long line_beginning = ftell(pfp); - - p_context = 0; - len = pgets(TRUE); - p_input_line++; - if (len == 0 || !isdigit((unsigned char)*buf)) { - next_intuit_at(line_beginning,p_input_line); - return FALSE; - } - p_first = (LINENUM)atol(buf); - for (s=buf; isdigit((unsigned char)*s); s++) ; - if (*s == ',') { - p_ptrn_lines = (LINENUM)atol(++s) - p_first + 1; - while (isdigit((unsigned char)*s)) s++; - } - else - p_ptrn_lines = (*s != 'a'); - hunk_type = *s; - if (hunk_type == 'a') - p_first++; /* do append rather than insert */ - min = (LINENUM)atol(++s); - for (; isdigit((unsigned char)*s); s++) ; - if (*s == ',') - max = (LINENUM)atol(++s); - else - max = min; - if (hunk_type == 'd') - min++; - p_end = p_ptrn_lines + 1 + max - min + 1; - if (p_end > MAXHUNKSIZE) - fatal4("hunk too large (%ld lines) at line %ld: %s", - p_end, p_input_line, buf); - while (p_end >= hunkmax) - grow_hunkmax(); - p_newfirst = min; - p_repl_lines = max - min + 1; - Sprintf(buf, "*** %ld,%ld\n", p_first, p_first + p_ptrn_lines - 1); - p_line[0] = savestr(buf); - if (out_of_mem) { - p_end = -1; - return FALSE; - } - p_Char[0] = '*'; - for (i=1; i<=p_ptrn_lines; i++) { - len = pgets(TRUE); - p_input_line++; - if (len == 0) - fatal2("unexpected end of file in patch at line %ld\n", - p_input_line); - if (*buf != '<') - fatal2("< expected at line %ld of patch\n", p_input_line); - p_line[i] = savestr(buf+2); - if (out_of_mem) { - p_end = i-1; - return FALSE; - } - p_len[i] = strlen(p_line[i]); - p_Char[i] = '-'; - } - - if (remove_special_line()) { - p_len[i-1] -= 1; - (p_line[i-1])[p_len[i-1]] = 0; - } - - if (hunk_type == 'c') { - len = pgets(TRUE); - p_input_line++; - if (len == 0) - fatal2("unexpected end of file in patch at line %ld\n", - p_input_line); - if (*buf != '-') - fatal2("--- expected at line %ld of patch\n", p_input_line); - } - Sprintf(buf, "--- %ld,%ld\n", min, max); - p_line[i] = savestr(buf); - if (out_of_mem) { - p_end = i-1; - return FALSE; - } - p_Char[i] = '='; - for (i++; i<=p_end; i++) { - len = pgets(TRUE); - p_input_line++; - if (len == 0) - fatal2("unexpected end of file in patch at line %ld\n", - p_input_line); - if (*buf != '>') - fatal2("> expected at line %ld of patch\n", p_input_line); - p_line[i] = savestr(buf+2); - if (out_of_mem) { - p_end = i-1; - return FALSE; - } - p_len[i] = strlen(p_line[i]); - p_Char[i] = '+'; - } - - if (remove_special_line()) { - p_len[i-1] -= 1; - (p_line[i-1])[p_len[i-1]] = 0; - } - } - if (reverse) /* backwards patch? */ - if (!pch_swap()) - say1("Not enough memory to swap next hunk!\n"); -#ifdef DEBUGGING - if (debug & 2) { - int i; - char special; - - for (i=0; i <= p_end; i++) { - if (i == p_ptrn_lines) - special = '^'; - else - special = ' '; - fprintf(stderr, "%3d %c %c %s", i, p_Char[i], special, p_line[i]); - Fflush(stderr); - } - } -#endif - if (p_end+1 < hunkmax) /* paranoia reigns supreme... */ - p_Char[p_end+1] = '^'; /* add a stopper for apply_hunk */ - return TRUE; -} - -/* - * Input a line from the patch file. - * Worry about indentation if do_indent is true. - * The line is read directly into the buf global variable which - * is resized if necessary in order to hold the complete line. - * Returns the number of characters read including the terminating - * '\n', if any. - */ -size_t -pgets(bool do_indent) -{ - char *line; - size_t len; - int indent = 0, skipped = 0; - - line = fgetln(pfp, &len); - if (line != Nullch) { - if (len + 1 > buf_size) { - while (len + 1 > buf_size) - buf_size *= 2; - free(buf); - buf = malloc(buf_size); - if (buf == Nullch) - fatal1("out of memory\n"); - } - if (do_indent == TRUE && p_indent) { - for (; - indent < p_indent && (*line == ' ' || *line == '\t' || *line == 'X'); - line++, skipped++) { - if (*line == '\t') - indent += 8 - (indent %7); - else - indent++; - } - } - Strncpy(buf, line, len - skipped); - buf[len - skipped] = '\0'; - } - return len; -} - -/* - * Reverse the old and new portions of the current hunk. - */ -bool -pch_swap(void) -{ - char **tp_line; /* the text of the hunk */ - short *tp_len; /* length of each line */ - char *tp_char; /* +, -, and ! */ - Reg1 LINENUM i; - Reg2 LINENUM n; - bool blankline = FALSE; - Reg3 char *s; - - i = p_first; - p_first = p_newfirst; - p_newfirst = i; - - /* make a scratch copy */ - - tp_line = p_line; - tp_len = p_len; - tp_char = p_Char; - p_line = Null(char**); /* force set_hunkmax to allocate again */ - p_len = Null(short*); - p_Char = Nullch; - set_hunkmax(); - if (p_line == Null(char**) || p_len == Null(short*) || p_Char == Nullch) { -#ifndef lint - if (p_line == Null(char**)) - free((char*)p_line); - p_line = tp_line; - if (p_len == Null(short*)) - free((char*)p_len); - p_len = tp_len; -#endif - if (p_Char == Nullch) - free((char*)p_Char); - p_Char = tp_char; - return FALSE; /* not enough memory to swap hunk! */ - } - - /* now turn the new into the old */ - - i = p_ptrn_lines + 1; - if (tp_char[i] == '\n') { /* account for possible blank line */ - blankline = TRUE; - i++; - } - if (p_efake >= 0) { /* fix non-freeable ptr range */ - if (p_efake <= i) - n = p_end - i + 1; - else - n = -i; - p_efake += n; - p_bfake += n; - } - for (n=0; i <= p_end; i++,n++) { - p_line[n] = tp_line[i]; - p_Char[n] = tp_char[i]; - if (p_Char[n] == '+') - p_Char[n] = '-'; - p_len[n] = tp_len[i]; - } - if (blankline) { - i = p_ptrn_lines + 1; - p_line[n] = tp_line[i]; - p_Char[n] = tp_char[i]; - p_len[n] = tp_len[i]; - n++; - } - assert(p_Char[0] == '='); - p_Char[0] = '*'; - for (s=p_line[0]; *s; s++) - if (*s == '-') - *s = '*'; - - /* now turn the old into the new */ - - assert(tp_char[0] == '*'); - tp_char[0] = '='; - for (s=tp_line[0]; *s; s++) - if (*s == '*') - *s = '-'; - for (i=0; n <= p_end; i++,n++) { - p_line[n] = tp_line[i]; - p_Char[n] = tp_char[i]; - if (p_Char[n] == '-') - p_Char[n] = '+'; - p_len[n] = tp_len[i]; - } - assert(i == p_ptrn_lines + 1); - i = p_ptrn_lines; - p_ptrn_lines = p_repl_lines; - p_repl_lines = i; -#ifndef lint - if (tp_line == Null(char**)) - free((char*)tp_line); - if (tp_len == Null(short*)) - free((char*)tp_len); -#endif - if (tp_char == Nullch) - free((char*)tp_char); - return TRUE; -} - -/* - * Return the specified line position in the old file of the old context. - */ -LINENUM -pch_first(void) -{ - return p_first; -} - -/* - * Return the number of lines of old context. - */ -LINENUM -pch_ptrn_lines(void) -{ - return p_ptrn_lines; -} - -/* - * Return the probable line position in the new file of the first line. - */ -LINENUM -pch_newfirst(void) -{ - return p_newfirst; -} - -/* - * Return the number of lines in the replacement text including context. - */ -LINENUM -pch_repl_lines(void) -{ - return p_repl_lines; -} - -/* - * Return the number of lines in the whole hunk. - */ -LINENUM -pch_end(void) -{ - return p_end; -} - -/* - * Return the number of context lines before the first changed line. - */ -LINENUM -pch_context(void) -{ - return p_context; -} - -/* - * Return the length of a particular patch line. - */ -short -pch_line_len(LINENUM line) -{ - return p_len[line]; -} - -/* - * Return the control character (+, -, *, !, etc) for a patch line. - */ -char -pch_char(LINENUM line) -{ - return p_Char[line]; -} - -/* - * Return a pointer to a particular patch line. - */ -char * -pfetch(LINENUM line) -{ - return p_line[line]; -} - -/* - * Return where in the patch file this hunk began, for error messages. - */ -LINENUM -pch_hunk_beg(void) -{ - return p_hunk_beg; -} - -/* - * Apply an ed script by feeding ed itself. - */ -void -do_ed_script(void) -{ - Reg1 char *t; - Reg2 long beginning_of_this_line; - Reg3 bool this_line_is_command = FALSE; - Reg4 FILE *pipefp; - - if (!skip_rest_of_patch) { - Unlink(TMPOUTNAME); - copy_file(filearg[0], TMPOUTNAME); - if (verbose) - Sprintf(buf, "/bin/ed %s", TMPOUTNAME); - else - Sprintf(buf, "/bin/ed - %s", TMPOUTNAME); - pipefp = popen(buf, "w"); - } - for (;;) { - beginning_of_this_line = ftell(pfp); - if (pgets(TRUE) == 0) { - next_intuit_at(beginning_of_this_line, p_input_line); - break; - } - p_input_line++; - for (t=buf; isdigit((unsigned char)*t) || *t == ','; t++) - ; - this_line_is_command = (isdigit((unsigned char)*buf) && - (*t == 'd' || *t == 'c' || *t == 'a') ); - if (this_line_is_command) { - if (!skip_rest_of_patch) - fputs(buf, pipefp); - if (*t != 'd') { - while (pgets(TRUE) != 0) { - p_input_line++; - if (!skip_rest_of_patch) - fputs(buf, pipefp); - if (strEQ(buf, ".\n")) - break; - } - } - } - else { - next_intuit_at(beginning_of_this_line,p_input_line); - break; - } - } - if (skip_rest_of_patch) - return; - fprintf(pipefp, "w\n"); - fprintf(pipefp, "q\n"); - Fflush(pipefp); - Pclose(pipefp); - ignore_signals(); - if (move_file(TMPOUTNAME, outname) < 0) { - toutkeep = TRUE; - chmod(TMPOUTNAME, filemode); - } - else - chmod(outname, filemode); - set_signals(1); -} diff --git a/gnu/usr.bin/patch/pch.h b/gnu/usr.bin/patch/pch.h deleted file mode 100644 index 0393dea..0000000 --- a/gnu/usr.bin/patch/pch.h +++ /dev/null @@ -1,36 +0,0 @@ -/* $FreeBSD$ - * - * $Log: pch.h,v $ - * Revision 2.0.1.1 87/01/30 22:47:16 lwall - * Added do_ed_script(). - * - * Revision 2.0 86/09/17 15:39:57 lwall - * Baseline for netwide release. - * - */ - -EXT FILE *pfp INIT(Nullfp); /* patch file pointer */ - -void re_patch(void); -void open_patch_file(char *_filename); -void set_hunkmax(void); -void grow_hunkmax(void); -bool there_is_another_patch(void); -int intuit_diff_type(void); -void next_intuit_at(long _file_pos, long _file_line); -void skip_to(long _file_pos, long _file_line); -bool another_hunk(void); -bool pch_swap(void); -char *pfetch(LINENUM _line); -short pch_line_len(LINENUM _line); -LINENUM pch_first(void); -LINENUM pch_ptrn_lines(void); -LINENUM pch_newfirst(void); -LINENUM pch_repl_lines(void); -LINENUM pch_end(void); -LINENUM pch_context(void); -LINENUM pch_hunk_beg(void); -char pch_char(LINENUM _line); -char *pfetch(LINENUM _line); -size_t pgets(bool _do_indent); -void do_ed_script(void); diff --git a/gnu/usr.bin/patch/util.c b/gnu/usr.bin/patch/util.c deleted file mode 100644 index 5a76a03..0000000 --- a/gnu/usr.bin/patch/util.c +++ /dev/null @@ -1,458 +0,0 @@ -/* $FreeBSD$ */ - -#include <paths.h> - -#include "EXTERN.h" -#include "common.h" -#include "INTERN.h" -#include "util.h" -#include "backupfile.h" - -void my_exit(int _status); /* in patch.c */ - -#ifndef HAVE_STRERROR -static char * -private_strerror (errnum) - int errnum; -{ - extern char *sys_errlist[]; - extern int sys_nerr; - - if (errnum > 0 && errnum <= sys_nerr) - return sys_errlist[errnum]; - return "Unknown system error"; -} -#define strerror private_strerror -#endif /* !HAVE_STRERROR */ - -/* - * Rename a file, copying it if necessary. - */ -int -move_file(char *from, char *to) -{ - char bakname[512]; - Reg1 char *s; - Reg2 int i; - Reg3 int fromfd; - - /* to stdout? */ - - if (strEQ(to, "-")) { -#ifdef DEBUGGING - if (debug & 4) - say2("Moving %s to stdout.\n", from); -#endif - fromfd = open(from, 0); - if (fromfd < 0) - pfatal2("internal error, can't reopen %s", from); - while ((i = read(fromfd, buf, buf_size)) > 0) - if (write(1, buf, i) != 1) - pfatal1("write failed"); - Close(fromfd); - return 0; - } - - if (origprae) { - Strcpy(bakname, origprae); - Strcat(bakname, to); - } else { -#ifndef NODIR - char *backupname = find_backup_file_name(to); - if (backupname == (char *) 0) - fatal1("out of memory\n"); - Strcpy(bakname, backupname); - free(backupname); -#else /* NODIR */ - Strcpy(bakname, to); - Strcat(bakname, simple_backup_suffix); -#endif /* NODIR */ - } - - if (stat(to, &filestat) == 0) { /* output file exists */ - dev_t to_device = filestat.st_dev; - ino_t to_inode = filestat.st_ino; - char *simplename = bakname; - - for (s = bakname; *s; s++) { - if (*s == '/') - simplename = s + 1; - } - /* - * Find a backup name that is not the same file. - * Change the first lowercase char into uppercase; - * if that isn't sufficient, chop off the first char - * and try again. - */ - while (stat(bakname, &filestat) == 0 && - to_device == filestat.st_dev && - to_inode == filestat.st_ino) { - /* Skip initial non-lowercase chars. */ - for (s=simplename; *s && !islower((unsigned char)*s); - s++) - ; - if (*s) - *s = toupper((unsigned char)*s); - else - Strcpy(simplename, simplename + 1); - } - while (unlink(bakname) >= 0) - ; /* while() is for benefit of Eunice */ -#ifdef DEBUGGING - if (debug & 4) - say3("Moving %s to %s.\n", to, bakname); -#endif - if (rename(to, bakname) < 0) { - say4("Can't backup %s, output is in %s: %s\n", to, from, - strerror(errno)); - return -1; - } - while (unlink(to) >= 0) - ; - } -#ifdef DEBUGGING - if (debug & 4) - say3("Moving %s to %s.\n", from, to); -#endif - if (rename(from, to) < 0) { /* different file system? */ - Reg4 int tofd; - - tofd = creat(to, 0666); - if (tofd < 0) { - say4("Can't create %s, output is in %s: %s\n", - to, from, strerror(errno)); - return -1; - } - fromfd = open(from, 0); - if (fromfd < 0) - pfatal2("internal error, can't reopen %s", from); - while ((i = read(fromfd, buf, buf_size)) > 0) - if (write(tofd, buf, i) != i) - pfatal1("write failed"); - Close(fromfd); - Close(tofd); - } - Unlink(from); - return 0; -} - -/* - * Copy a file. - */ -void -copy_file(char *from, char *to) -{ - Reg3 int tofd; - Reg2 int fromfd; - Reg1 int i; - - tofd = creat(to, 0666); - if (tofd < 0) - pfatal2("can't create %s", to); - fromfd = open(from, 0); - if (fromfd < 0) - pfatal2("internal error, can't reopen %s", from); - while ((i = read(fromfd, buf, buf_size)) > 0) - if (write(tofd, buf, i) != i) - pfatal2("write to %s failed", to); - Close(fromfd); - Close(tofd); -} - -/* - * Allocate a unique area for a string. - */ -char * -savestr(char *s) -{ - Reg3 char *rv; - Reg2 char *t; - - if (!s) - s = "Oops"; - t = s; - while (*t++) - ; - rv = malloc((MEM) (t - s)); - if (rv == Nullch) { - if (using_plan_a) - out_of_mem = TRUE; - else - fatal1("out of memory\n"); - } else { - t = rv; - while ((*t++ = *s++)); - } - return rv; -} - -#if defined(lint) && defined(CANVARARG) - -/*VARARGS ARGSUSED*/ -say(pat) char *pat; { ; } -/*VARARGS ARGSUSED*/ -fatal(pat) char *pat; { ; } -/*VARARGS ARGSUSED*/ -pfatal(pat) char *pat; { ; } -/*VARARGS ARGSUSED*/ -ask(pat) char *pat; { ; } - -#else - -/* - * Vanilla terminal output (buffered). - */ -void -say(pat,arg1,arg2,arg3) -char *pat; -long arg1,arg2,arg3; -{ - fprintf(stderr, pat, arg1, arg2, arg3); - Fflush(stderr); -} - -/* - * Terminal output, pun intended. - */ -void /* very void */ -fatal(pat,arg1,arg2,arg3) -char *pat; -long arg1,arg2,arg3; -{ - fprintf(stderr, "patch: **** "); - fprintf(stderr, pat, arg1, arg2, arg3); - my_exit(1); -} - -/* - * Say something from patch, something from the system, then silence... - */ -void /* very void */ -pfatal(pat,arg1,arg2,arg3) -char *pat; -long arg1,arg2,arg3; -{ - int errnum = errno; - - fprintf(stderr, "patch: **** "); - fprintf(stderr, pat, arg1, arg2, arg3); - fprintf(stderr, ": %s\n", strerror(errnum)); - my_exit(1); -} - -/* - * Get a response from the user, somehow or other. - */ -int -ask(pat,arg1,arg2,arg3) -char *pat; -long arg1,arg2,arg3; -{ - int ttyfd; - int r; - bool tty2 = isatty(2); - - Sprintf(buf, pat, arg1, arg2, arg3); - Fflush(stderr); - write(2, buf, strlen(buf)); - if (tty2) { /* might be redirected to a file */ - r = read(2, buf, buf_size); - } else if (isatty(1)) { /* this may be new file output */ - Fflush(stdout); - write(1, buf, strlen(buf)); - r = read(1, buf, buf_size); - } else if ((ttyfd = open(_PATH_TTY, 2)) >= 0 && isatty(ttyfd)) { - /* might be deleted or unwriteable */ - write(ttyfd, buf, strlen(buf)); - r = read(ttyfd, buf, buf_size); - Close(ttyfd); - } else if (isatty(0)) { /* this is probably patch input */ - Fflush(stdin); - write(0, buf, strlen(buf)); - r = read(0, buf, buf_size); - } else { /* no terminal at all--default it */ - buf[0] = '\n'; - buf[1] = 0; - say1(buf); - return 0; /* signal possible error */ - } - if (r <= 0) - buf[0] = 0; - else - buf[r] = '\0'; - if (!tty2) - say1(buf); - - if (r <= 0) - return 0; /* if there was an error, return it */ - else - return 1; -} -#endif /* lint */ - -/* - * How to handle certain events when not in a critical region. - */ -void -set_signals(int reset) -{ -#ifndef lint - static RETSIGTYPE (*hupval)(),(*intval)(); - - if (!reset) { - hupval = signal(SIGHUP, SIG_IGN); - if (hupval != SIG_IGN) - hupval = (RETSIGTYPE(*)())my_exit; - intval = signal(SIGINT, SIG_IGN); - if (intval != SIG_IGN) - intval = (RETSIGTYPE(*)())my_exit; - } - Signal(SIGHUP, hupval); - Signal(SIGINT, intval); -#endif -} - -/* - * How to handle certain events when in a critical region. - */ -void -ignore_signals(void) -{ -#ifndef lint - Signal(SIGHUP, SIG_IGN); - Signal(SIGINT, SIG_IGN); -#endif -} - -/* - * Make sure we'll have the directories to create a file. - * If `striplast' is TRUE, ignore the last element of `filename'. - */ -void -makedirs(filename,striplast) -Reg1 char *filename; -bool striplast; -{ - char tmpbuf[256]; - Reg2 char *s = tmpbuf; - char *dirv[20]; /* Point to the NULs between elements. */ - Reg3 int i; - Reg4 int dirvp = 0; /* Number of finished entries in dirv. */ - - /* - * Copy `filename' into `tmpbuf' with a NUL instead of a slash - * between the directories. - */ - while (*filename) { - if (*filename == '/') { - filename++; - dirv[dirvp++] = s; - *s++ = '\0'; - } else { - *s++ = *filename++; - } - } - *s = '\0'; - dirv[dirvp] = s; - if (striplast) - dirvp--; - if (dirvp < 0) - return; - - strcpy(buf, "mkdir"); - s = buf; - for (i = 0; i <= dirvp; i++) { - struct stat sbuf; - - if (stat(tmpbuf, &sbuf) && errno == ENOENT) { - while (*s) - s++; - *s++ = ' '; - strcpy(s, tmpbuf); - } - *dirv[i] = '/'; - } - if (s != buf) - system(buf); -} - -/* - * Make filenames more reasonable. - */ -char * -fetchname(char *at, int strip_leading, int assume_exists) -{ - char *fullname; - char *name; - Reg1 char *t; - char tmpbuf[200]; - int sleading = strip_leading; - - if (!at) - return Nullch; - while (isspace((unsigned char)*at)) - at++; -#ifdef DEBUGGING - if (debug & 128) - say4("fetchname %s %d %d\n",at,strip_leading,assume_exists); -#endif - if (strnEQ(at, _PATH_DEVNULL, sizeof _PATH_DEVNULL - 1)) - /* So files can be created by diffing against /dev/null. */ - return Nullch; - name = fullname = t = savestr(at); - - /* Strip off up to `sleading' leading slashes and null terminate. */ - for (; *t && !isspace((unsigned char)*t); t++) - if (*t == '/') - if (--sleading >= 0) - name = t + 1; - *t = '\0'; - - /* - * If no -p option was given (957 is the default value!), - * we were given a relative pathname, - * and the leading directories that we just stripped off all exist, - * put them back on. - */ - if (strip_leading == 957 && name != fullname && *fullname != '/') { - name[-1] = '\0'; - if (stat(fullname, &filestat) == 0 && - S_ISDIR(filestat.st_mode)) { - name[-1] = '/'; - name = fullname; - } - } - - name = savestr(name); - free(fullname); - - if (stat(name, &filestat) && !assume_exists) { - char *filebase = basename(name); - int pathlen = filebase - name; - - /* Put any leading path into `tmpbuf'. */ - strncpy(tmpbuf, name, pathlen); - -#define try(f, a1, a2) \ - (Sprintf(tmpbuf + pathlen, f, a1, a2), stat(tmpbuf, &filestat) == 0) - if (try("RCS/%s%s", filebase, RCSSUFFIX) || - try("RCS/%s%s", filebase, "") || - try( "%s%s", filebase, RCSSUFFIX) || - try("SCCS/%s%s", SCCSPREFIX, filebase) || - try( "%s%s", SCCSPREFIX, filebase)) - return name; - free(name); - name = Nullch; - } - - return name; -} - -char * -xmalloc(unsigned int size) -{ - register char *p = (char *) malloc (size); - if (!p) - fatal("out of memory"); - return p; -} diff --git a/gnu/usr.bin/patch/util.h b/gnu/usr.bin/patch/util.h deleted file mode 100644 index f31f2e2..0000000 --- a/gnu/usr.bin/patch/util.h +++ /dev/null @@ -1,88 +0,0 @@ -/* $FreeBSD$ - * - * $Log: util.h,v $ - * Revision 2.0 86/09/17 15:40:06 lwall - * Baseline for netwide release. - * - */ - -/* and for those machine that can't handle a variable argument list */ - -#ifdef CANVARARG - -#define say1 say -#define say2 say -#define say3 say -#define say4 say -#define ask1 ask -#define ask2 ask -#define ask3 ask -#define ask4 ask -#define fatal1 fatal -#define fatal2 fatal -#define fatal3 fatal -#define fatal4 fatal -#define pfatal1 pfatal -#define pfatal2 pfatal -#define pfatal3 pfatal -#define pfatal4 pfatal - -#else /* hope they allow multi-line macro actual arguments */ - -#ifdef lint - -#define say1(a) say(a, 0, 0, 0) -#define say2(a,b) say(a, (b)==(b), 0, 0) -#define say3(a,b,c) say(a, (b)==(b), (c)==(c), 0) -#define say4(a,b,c,d) say(a, (b)==(b), (c)==(c), (d)==(d)) -#define ask1(a) ask(a, 0, 0, 0) -#define ask2(a,b) ask(a, (b)==(b), 0, 0) -#define ask3(a,b,c) ask(a, (b)==(b), (c)==(c), 0) -#define ask4(a,b,c,d) ask(a, (b)==(b), (c)==(c), (d)==(d)) -#define fatal1(a) fatal(a, 0, 0, 0) -#define fatal2(a,b) fatal(a, (b)==(b), 0, 0) -#define fatal3(a,b,c) fatal(a, (b)==(b), (c)==(c), 0) -#define fatal4(a,b,c,d) fatal(a, (b)==(b), (c)==(c), (d)==(d)) -#define pfatal1(a) pfatal(a, 0, 0, 0) -#define pfatal2(a,b) pfatal(a, (b)==(b), 0, 0) -#define pfatal3(a,b,c) pfatal(a, (b)==(b), (c)==(c), 0) -#define pfatal4(a,b,c,d) pfatal(a, (b)==(b), (c)==(c), (d)==(d)) - -#else /* lint */ - /* if this doesn't work, try defining CANVARARG above */ -#define say1(a) say(a, Nullch, Nullch, Nullch) -#define say2(a,b) say(a, b, Nullch, Nullch) -#define say3(a,b,c) say(a, b, c, Nullch) -#define say4 say -#define ask1(a) ask(a, Nullch, Nullch, Nullch) -#define ask2(a,b) ask(a, b, Nullch, Nullch) -#define ask3(a,b,c) ask(a, b, c, Nullch) -#define ask4 ask -#define fatal1(a) fatal(a, Nullch, Nullch, Nullch) -#define fatal2(a,b) fatal(a, b, Nullch, Nullch) -#define fatal3(a,b,c) fatal(a, b, c, Nullch) -#define fatal4 fatal -#define pfatal1(a) pfatal(a, Nullch, Nullch, Nullch) -#define pfatal2(a,b) pfatal(a, b, Nullch, Nullch) -#define pfatal3(a,b,c) pfatal(a, b, c, Nullch) -#define pfatal4 pfatal - -#endif /* lint */ - -/* if neither of the above work, join all multi-line macro calls. */ -#endif - -EXT char serrbuf[BUFSIZ]; /* buffer for stderr */ - -char *fetchname(char *_at, int _strip_leading, int _assume_exists); -int move_file(char *_from, char *_to); -void copy_file(char *_from, char *_to); -void say(/*const char *pat, long _arg1, long _arg2, long _arg3*/); -void fatal(); -void pfatal(); -int ask(/*const char *pat, long _arg1, long _arg2, long _arg3*/); -char *savestr(char *_s); -void set_signals(int _reset); -void ignore_signals(void); -void makedirs(/*char *_filename, bool _striplast*/); -char *basename(); diff --git a/gnu/usr.bin/patch/version.c b/gnu/usr.bin/patch/version.c deleted file mode 100644 index 753dbfb..0000000 --- a/gnu/usr.bin/patch/version.c +++ /dev/null @@ -1,25 +0,0 @@ -/* $FreeBSD$ - * - * $Log: version.c,v $ - * Revision 2.0 86/09/17 15:40:11 lwall - * Baseline for netwide release. - * - */ - -#include "EXTERN.h" -#include "common.h" -#include "util.h" -#include "INTERN.h" -#include "patchlevel.h" -#include "version.h" - -void my_exit(int _status); /* in patch.c */ - -/* Print out the version number and die. */ - -void -version(void) -{ - fprintf(stderr, "Patch version %s\n", PATCH_VERSION); - my_exit(0); -} diff --git a/gnu/usr.bin/patch/version.h b/gnu/usr.bin/patch/version.h deleted file mode 100644 index faddf7c..0000000 --- a/gnu/usr.bin/patch/version.h +++ /dev/null @@ -1,9 +0,0 @@ -/* $FreeBSD$ - * - * $Log: version.h,v $ - * Revision 2.0 86/09/17 15:40:14 lwall - * Baseline for netwide release. - * - */ - -void version(void); |