summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/cvs/lib
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/usr.bin/cvs/lib')
-rw-r--r--gnu/usr.bin/cvs/lib/alloca.c4
-rw-r--r--gnu/usr.bin/cvs/lib/argmatch.c4
-rw-r--r--gnu/usr.bin/cvs/lib/dup2.c4
-rw-r--r--gnu/usr.bin/cvs/lib/error.c42
-rw-r--r--gnu/usr.bin/cvs/lib/fnmatch.c8
-rw-r--r--gnu/usr.bin/cvs/lib/ftruncate.c4
-rw-r--r--gnu/usr.bin/cvs/lib/getdate.y213
-rw-r--r--gnu/usr.bin/cvs/lib/getopt.c535
-rw-r--r--gnu/usr.bin/cvs/lib/getopt.h105
-rw-r--r--gnu/usr.bin/cvs/lib/getopt1.c101
-rw-r--r--gnu/usr.bin/cvs/lib/getwd.c4
-rw-r--r--gnu/usr.bin/cvs/lib/hash.c95
-rw-r--r--gnu/usr.bin/cvs/lib/hash.h35
-rw-r--r--gnu/usr.bin/cvs/lib/mkdir.c4
-rw-r--r--gnu/usr.bin/cvs/lib/myndbm.c7
-rw-r--r--gnu/usr.bin/cvs/lib/myndbm.h20
-rw-r--r--gnu/usr.bin/cvs/lib/regex.c2561
-rw-r--r--gnu/usr.bin/cvs/lib/regex.h337
-rw-r--r--gnu/usr.bin/cvs/lib/rename.c4
-rw-r--r--gnu/usr.bin/cvs/lib/sighandle.c36
-rw-r--r--gnu/usr.bin/cvs/lib/strdup.c4
-rw-r--r--gnu/usr.bin/cvs/lib/strippath.c20
-rw-r--r--gnu/usr.bin/cvs/lib/stripslash.c15
-rw-r--r--gnu/usr.bin/cvs/lib/subr.c271
-rw-r--r--gnu/usr.bin/cvs/lib/system.h175
-rw-r--r--gnu/usr.bin/cvs/lib/wait.h2
-rw-r--r--gnu/usr.bin/cvs/lib/yesno.c4
27 files changed, 2632 insertions, 1982 deletions
diff --git a/gnu/usr.bin/cvs/lib/alloca.c b/gnu/usr.bin/cvs/lib/alloca.c
index d2a54b3..b57659e 100644
--- a/gnu/usr.bin/cvs/lib/alloca.c
+++ b/gnu/usr.bin/cvs/lib/alloca.c
@@ -30,7 +30,7 @@
static char SCCSid[] = "@(#)alloca.c 1.1"; /* for the "what" utility */
#endif
-#ifdef emacs
+#if defined(emacs) || defined(HAVE_CONFIG_H)
#include "config.h"
#ifdef static
/* actually, only want this if static is defined as ""
@@ -43,7 +43,7 @@ lose
-- must know STACK_DIRECTION at compile-time
#endif /* STACK_DIRECTION undefined */
#endif /* static */
-#endif /* emacs */
+#endif /* emacs || HAVE_CONFIG_H*/
#if __STDC__
typedef void *pointer; /* generic pointer type */
diff --git a/gnu/usr.bin/cvs/lib/argmatch.c b/gnu/usr.bin/cvs/lib/argmatch.c
index 3f765fe..327a27d 100644
--- a/gnu/usr.bin/cvs/lib/argmatch.c
+++ b/gnu/usr.bin/cvs/lib/argmatch.c
@@ -17,6 +17,10 @@
/* Written by David MacKenzie <djm@ai.mit.edu> */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <stdio.h>
#ifdef STDC_HEADERS
#include <string.h>
diff --git a/gnu/usr.bin/cvs/lib/dup2.c b/gnu/usr.bin/cvs/lib/dup2.c
index 0bd3aca..1974383 100644
--- a/gnu/usr.bin/cvs/lib/dup2.c
+++ b/gnu/usr.bin/cvs/lib/dup2.c
@@ -4,6 +4,10 @@
last edit: 11-Feb-1987 D A Gwyn
*/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <errno.h>
#include <fcntl.h>
diff --git a/gnu/usr.bin/cvs/lib/error.c b/gnu/usr.bin/cvs/lib/error.c
index fadb1c5..6734c02 100644
--- a/gnu/usr.bin/cvs/lib/error.c
+++ b/gnu/usr.bin/cvs/lib/error.c
@@ -19,9 +19,13 @@
/* Brian Berliner added support for CVS */
#ifndef lint
-static char rcsid[] = "@(#)error.c 1.9 92/03/31";
+static char rcsid[] = "$CVSid: @(#)error.c 1.13 94/09/30 $";
#endif /* not lint */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <stdio.h>
/* turn on CVS support by default, since this is the CVS distribution */
@@ -35,7 +39,7 @@ void Lock_Cleanup();
#endif /* __STDC__ */
#endif /* CVS_SUPPORT */
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
#if __STDC__
#include <stdarg.h>
@@ -47,7 +51,7 @@ void Lock_Cleanup();
#else
-#ifndef DOPRNT_MISSING
+#ifdef HAVE_DOPRNT
#define va_alist args
#define va_dcl int args;
#else
@@ -57,7 +61,7 @@ void Lock_Cleanup();
#endif
-#ifdef STDC_HEADERS
+#if STDC_HEADERS
#include <stdlib.h>
#include <string.h>
#else
@@ -68,19 +72,7 @@ void exit ();
#endif /* __STDC__ */
#endif
-#ifdef STRERROR_MISSING
-static char *
-strerror (errnum)
- int errnum;
-{
- extern char *sys_errlist[];
- extern int sys_nerr;
-
- if (errnum > 0 && errnum < sys_nerr)
- return sys_errlist[errnum];
- return "Unknown system error";
-}
-#endif /* STRERROR_MISSING */
+extern char *strerror ();
/* Print the program name and error message MESSAGE, which is a printf-style
format string with optional args.
@@ -88,7 +80,7 @@ strerror (errnum)
Exit with status STATUS if it is nonzero. */
/* VARARGS */
void
-#if !defined (VPRINTF_MISSING) && __STDC__
+#if defined (HAVE_VPRINTF) && __STDC__
error (int status, int errnum, char *message, ...)
#else
error (status, errnum, message, va_alist)
@@ -102,7 +94,7 @@ error (status, errnum, message, va_alist)
#ifdef CVS_SUPPORT
extern char *command_name;
#endif
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
va_list args;
#endif
@@ -117,12 +109,12 @@ error (status, errnum, message, va_alist)
#else
fprintf (stderr, "%s: ", program_name);
#endif
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
VA_START (args, message);
vfprintf (stderr, message, args);
va_end (args);
#else
-#ifndef DOPRNT_MISSING
+#ifdef HAVE_DOPRNT
_doprnt (message, &args, stderr);
#else
fprintf (stderr, message, a1, a2, a3, a4, a5, a6, a7, a8);
@@ -149,7 +141,7 @@ error (status, errnum, message, va_alist)
Exit with status STATUS if it is nonzero. */
/* VARARGS */
void
-#if !defined (VPRINTF_MISSING) && __STDC__
+#if defined (HAVE_VPRINTF) && __STDC__
fperror (FILE *fp, int status, int errnum, char *message, ...)
#else
fperror (fp, status, errnum, message, va_alist)
@@ -161,17 +153,17 @@ fperror (fp, status, errnum, message, va_alist)
#endif
{
extern char *program_name;
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
va_list args;
#endif
fprintf (fp, "%s: ", program_name);
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
VA_START (args, message);
vfprintf (fp, message, args);
va_end (args);
#else
-#ifndef DOPRNT_MISSING
+#ifdef HAVE_DOPRNT
_doprnt (message, &args, fp);
#else
fprintf (fp, message, a1, a2, a3, a4, a5, a6, a7, a8);
diff --git a/gnu/usr.bin/cvs/lib/fnmatch.c b/gnu/usr.bin/cvs/lib/fnmatch.c
index 50fa94c..2a05430 100644
--- a/gnu/usr.bin/cvs/lib/fnmatch.c
+++ b/gnu/usr.bin/cvs/lib/fnmatch.c
@@ -18,6 +18,10 @@ Cambridge, MA 02139, USA. */
/* Modified slightly by Brian Berliner <berliner@sun.com> for CVS use */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
/* IGNORE(@ */
/* #include <ansidecl.h> */
/* @) */
@@ -28,10 +32,6 @@ Cambridge, MA 02139, USA. */
extern int errno;
#endif
-#if !__STDC__
-#define const
-#endif
-
/* Match STRING against the filename pattern PATTERN, returning zero if
it matches, nonzero if not. */
int
diff --git a/gnu/usr.bin/cvs/lib/ftruncate.c b/gnu/usr.bin/cvs/lib/ftruncate.c
index 17d263d..13f20a3 100644
--- a/gnu/usr.bin/cvs/lib/ftruncate.c
+++ b/gnu/usr.bin/cvs/lib/ftruncate.c
@@ -1,6 +1,10 @@
/* ftruncate emulations that work on some System V's.
This file is in the public domain. */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <sys/types.h>
#include <fcntl.h>
diff --git a/gnu/usr.bin/cvs/lib/getdate.y b/gnu/usr.bin/cvs/lib/getdate.y
index d010cb6..5769e9c 100644
--- a/gnu/usr.bin/cvs/lib/getdate.y
+++ b/gnu/usr.bin/cvs/lib/getdate.y
@@ -1,39 +1,110 @@
%{
-/* 1.8
-** @(#)getdate.y 1.8 92/03/03
-**
+/*
** Originally written by Steven M. Bellovin <smb@research.att.com> while
** at the University of North Carolina at Chapel Hill. Later tweaked by
** a couple of people on Usenet. Completely overhauled by Rich $alz
** <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990;
** send any email to Rich.
**
-** This grammar has eight shift/reduce conflicts.
+** This grammar has 10 shift/reduce conflicts.
**
** This code is in the public domain and has no copyright.
*/
/* SUPPRESS 287 on yaccpar_sccsid *//* Unused static variable */
/* SUPPRESS 288 on yyerrlab *//* Label unused */
-#include "system.h"
+#ifdef HAVE_CONFIG_H
+#if defined (emacs) || defined (CONFIG_BROKETS)
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+/* Since the code of getdate.y is not included in the Emacs executable
+ itself, there is no need to #define static in this file. Even if
+ the code were included in the Emacs executable, it probably
+ wouldn't do any harm to #undef it here; this will only cause
+ problems if we try to write to a static variable, which I don't
+ think this code needs to do. */
+#ifdef emacs
+#undef static
+#endif
+
+#include <stdio.h>
#include <ctype.h>
-#if (!defined (__STDC__) && defined (sparc)) || defined (__sparc__)
-#ifdef __GNUC__
-#undef alloca /* might get redefined below */
+/* The code at the top of get_date which figures out the offset of the
+ current time zone checks various CPP symbols to see if special
+ tricks are need, but defaults to using the gettimeofday system call.
+ Include <sys/time.h> if that will be used. */
+
+#if defined(vms)
+
+#include <types.h>
+#include <time.h>
+
+#else
+
+#include <sys/types.h>
+
+#ifdef TIME_WITH_SYS_TIME
+#include <sys/time.h>
+#include <time.h>
+#else
+#ifdef HAVE_SYS_TIME_H
+#include <sys/time.h>
+#else
+#include <time.h>
+#endif
+#endif
+
+#ifdef timezone
+#undef timezone /* needed for sgi */
+#endif
+
+#if defined(HAVE_SYS_TIMEB_H)
+#include <sys/timeb.h>
+#else
+/*
+** We use the obsolete `struct timeb' as part of our interface!
+** Since the system doesn't have it, we define it here;
+** our callers must do likewise.
+*/
+struct timeb {
+ time_t time; /* Seconds since the epoch */
+ unsigned short millitm; /* Field not used */
+ short timezone; /* Minutes west of GMT */
+ short dstflag; /* Field not used */
+};
+#endif /* defined(HAVE_SYS_TIMEB_H) */
+
+#endif /* defined(vms) */
+
+#if defined (STDC_HEADERS) || defined (USG)
+#include <string.h>
#endif
+
+/* Some old versions of bison generate parsers that use bcopy.
+ That loses on systems that don't provide the function, so we have
+ to redefine it here. */
+#if !defined (HAVE_BCOPY) && defined (HAVE_MEMCPY) && !defined (bcopy)
+#define bcopy(from, to, len) memcpy ((to), (from), (len))
#endif
-extern struct tm *localtime();
+extern struct tm *gmtime();
+extern struct tm *localtime();
#define yyparse getdate_yyparse
#define yylex getdate_yylex
#define yyerror getdate_yyerror
#if !defined(lint) && !defined(SABER)
-static char RCS[] = "@(#)getdate.y 1.8 92/03/03";
+static char RCS[] = "$CVSid: @(#)getdate.y 1.11 94/09/21 $";
#endif /* !defined(lint) && !defined(SABER) */
+static int yylex ();
+static int yyerror ();
#define EPOCH 1970
#define HOUR(x) ((time_t)(x) * 60)
@@ -202,6 +273,18 @@ date : tUNUMBER '/' tUNUMBER {
yyDay = $3;
yyYear = $5;
}
+ | tUNUMBER tSNUMBER tSNUMBER {
+ /* ISO 8601 format. yyyy-mm-dd. */
+ yyYear = $1;
+ yyMonth = -$2;
+ yyDay = -$3;
+ }
+ | tUNUMBER tMONTH tSNUMBER {
+ /* e.g. 17-JUN-1992. */
+ yyDay = $1;
+ yyMonth = $2;
+ yyYear = -$3;
+ }
| tMONTH tUNUMBER {
yyMonth = $1;
yyDay = $2;
@@ -263,25 +346,24 @@ number : tUNUMBER {
yyYear = $1;
else {
if($1>10000) {
- time_t date_part;
-
- date_part= $1/10000;
yyHaveDate++;
- yyDay= (date_part)%100;
- yyMonth= (date_part/100)%100;
- yyYear = date_part/10000;
- }
- yyHaveTime++;
- if ($1 < 100) {
- yyHour = $1;
- yyMinutes = 0;
+ yyDay= ($1)%100;
+ yyMonth= ($1/100)%100;
+ yyYear = $1/10000;
}
else {
- yyHour = $1 / 100;
- yyMinutes = $1 % 100;
- }
- yySeconds = 0;
- yyMeridian = MER24;
+ yyHaveTime++;
+ if ($1 < 100) {
+ yyHour = $1;
+ yyMinutes = 0;
+ }
+ else {
+ yyHour = $1 / 100;
+ yyMinutes = $1 % 100;
+ }
+ yySeconds = 0;
+ yyMeridian = MER24;
+ }
}
}
;
@@ -297,7 +379,7 @@ o_merid : /* NULL */ {
%%
/* Month and day table. */
-static TABLE MonthDayTable[] = {
+static TABLE const MonthDayTable[] = {
{ "january", tMONTH, 1 },
{ "february", tMONTH, 2 },
{ "march", tMONTH, 3 },
@@ -326,7 +408,7 @@ static TABLE MonthDayTable[] = {
};
/* Time units table. */
-static TABLE UnitsTable[] = {
+static TABLE const UnitsTable[] = {
{ "year", tMONTH_UNIT, 12 },
{ "month", tMONTH_UNIT, 1 },
{ "fortnight", tMINUTE_UNIT, 14 * 24 * 60 },
@@ -341,7 +423,7 @@ static TABLE UnitsTable[] = {
};
/* Assorted relative-time words. */
-static TABLE OtherTable[] = {
+static TABLE const OtherTable[] = {
{ "tomorrow", tMINUTE_UNIT, 1 * 24 * 60 },
{ "yesterday", tMINUTE_UNIT, -1 * 24 * 60 },
{ "today", tMINUTE_UNIT, 0 },
@@ -367,7 +449,7 @@ static TABLE OtherTable[] = {
/* The timezone table. */
/* Some of these are commented out because a time_t can't store a float. */
-static TABLE TimezoneTable[] = {
+static TABLE const TimezoneTable[] = {
{ "gmt", tZONE, HOUR( 0) }, /* Greenwich Mean */
{ "ut", tZONE, HOUR( 0) }, /* Universal (Coordinated) */
{ "utc", tZONE, HOUR( 0) },
@@ -451,7 +533,7 @@ static TABLE TimezoneTable[] = {
};
/* Military timezone table. */
-static TABLE MilitaryTable[] = {
+static TABLE const MilitaryTable[] = {
{ "a", tZONE, HOUR( 1) },
{ "b", tZONE, HOUR( 2) },
{ "c", tZONE, HOUR( 3) },
@@ -484,7 +566,7 @@ static TABLE MilitaryTable[] = {
/* ARGSUSED */
-int
+static int
yyerror(s)
char *s;
{
@@ -514,6 +596,8 @@ ToSeconds(Hours, Minutes, Seconds, Meridian)
if (Hours < 1 || Hours > 12)
return -1;
return ((Hours + 12) * 60L + Minutes) * 60L + Seconds;
+ default:
+ abort ();
}
/* NOTREACHED */
}
@@ -530,7 +614,7 @@ Convert(Month, Day, Year, Hours, Minutes, Seconds, Meridian, DSTmode)
MERIDIAN Meridian;
DSTMODE DSTmode;
{
- static int DaysInMonth[12] = {
+ static int DaysInMonth[12] = {
31, 0, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
};
time_t tod;
@@ -624,7 +708,7 @@ LookupWord(buff)
{
register char *p;
register char *q;
- register TABLE *tp;
+ register const TABLE *tp;
int i;
int abbrev;
@@ -725,7 +809,7 @@ LookupWord(buff)
}
-int
+static int
yylex()
{
register char c;
@@ -777,48 +861,62 @@ yylex()
}
}
+#define TM_YEAR_ORIGIN 1900
+
+/* Yield A - B, measured in seconds. */
+static long
+difftm (a, b)
+ struct tm *a, *b;
+{
+ int ay = a->tm_year + (TM_YEAR_ORIGIN - 1);
+ int by = b->tm_year + (TM_YEAR_ORIGIN - 1);
+ int days = (
+ /* difference in day of year */
+ a->tm_yday - b->tm_yday
+ /* + intervening leap days */
+ + ((ay >> 2) - (by >> 2))
+ - (ay/100 - by/100)
+ + ((ay/100 >> 2) - (by/100 >> 2))
+ /* + difference in years * 365 */
+ + (long)(ay-by) * 365
+ );
+ return (60*(60*(24*days + (a->tm_hour - b->tm_hour))
+ + (a->tm_min - b->tm_min))
+ + (a->tm_sec - b->tm_sec));
+}
time_t
get_date(p, now)
char *p;
struct timeb *now;
{
- struct tm *tm;
+ struct tm *tm, gmt;
struct timeb ftz;
time_t Start;
time_t tod;
yyInput = p;
if (now == NULL) {
- now = &ftz;
-#if defined(FTIME_MISSING)
+ now = &ftz;
(void)time(&ftz.time);
- /* Set the timezone global. */
- tzset();
-#if defined(HAVE_TIMEZONE)
- tm = localtime(&ftz.time);
- ftz.timezone = tm->tm_gmtoff / 60;
-#else
-#if defined(timezone)
- ftz.tzone = (int) timezone / 60;
-#else
- ftz.timezone = (int) timezone / 60;
-#endif /* defined(timezone) */
-#endif /* defined(HAVE_TIMEZONE) */
-#else
- (void)ftime(&ftz);
-#endif /* defined(FTIME_MISSING) */
+
+ if (! (tm = gmtime (&ftz.time)))
+ return -1;
+ gmt = *tm; /* Make a copy, in case localtime modifies *tm. */
+
+ if (! (tm = localtime (&ftz.time)))
+ return -1;
+
+ ftz.timezone = difftm (&gmt, tm) / 60;
+ if(tm->tm_isdst)
+ ftz.timezone += 60;
}
tm = localtime(&now->time);
yyYear = tm->tm_year;
yyMonth = tm->tm_mon + 1;
yyDay = tm->tm_mday;
-#if defined(timezone)
- yyTimezone = now->tzone;
-#else
yyTimezone = now->timezone;
-#endif /* defined(timezone) */
yyDSTmode = DSTmaybe;
yyHour = 0;
yyMinutes = 0;
@@ -865,6 +963,7 @@ get_date(p, now)
#if defined(TEST)
/* ARGSUSED */
+int
main(ac, av)
int ac;
char *av[];
diff --git a/gnu/usr.bin/cvs/lib/getopt.c b/gnu/usr.bin/cvs/lib/getopt.c
index c322fc2..446a8e4 100644
--- a/gnu/usr.bin/cvs/lib/getopt.c
+++ b/gnu/usr.bin/cvs/lib/getopt.c
@@ -1,10 +1,15 @@
/* Getopt for GNU.
- Copyright (C) 1987-1992 Free Software Foundation, Inc.
+ NOTE: getopt is now part of the C library, so if you don't know what
+ "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu
+ before changing it!
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94
+ Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -13,73 +18,73 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
-#if !__STDC__
+/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>.
+ Ditto for AIX 3.2 and <stdlib.h>. */
+#ifndef _NO_PROTO
+#define _NO_PROTO
+#endif
+
+#ifdef HAVE_CONFIG_H
+#if defined (emacs) || defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
+#ifndef __STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
#define const
#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
+
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
+/* Don't include stdlib.h for non-GNU C libraries because some of them
+ contain conflicting prototypes for getopt. */
+#include <stdlib.h>
+#endif /* GNU C library. */
/* This version of `getopt' appears to the caller like standard Unix `getopt'
but it behaves differently for the user, since it allows the user
to intersperse the options with the other arguments.
- As `getopt' works, it permutes the elements of `argv' so that,
+ As `getopt' works, it permutes the elements of ARGV so that,
when it is done, all the options precede everything else. Thus
all application programs are extended to handle flexible argument order.
- Setting the environment variable _POSIX_OPTION_ORDER disables permutation.
+ Setting the environment variable POSIXLY_CORRECT disables permutation.
Then the behavior is completely standard.
GNU application programs can use a third alternative mode in which
they can distinguish the relative order of options and other arguments. */
#ifndef lint
-static char rcsid[] = "@(#)getopt.c 1.7 92/03/31";
-#endif
-
-#include <stdio.h>
-
-#if defined(STDC_HEADERS) || defined(__GNU_LIBRARY__)
-#include <stdlib.h>
-#else /* STDC_HEADERS or __GNU_LIBRARY__ */
-char *getenv ();
-char *malloc ();
-#endif /* STDC_HEADERS or __GNU_LIBRARY__ */
-
-/* AIX requires this to be the first thing in the file. */
-#ifdef __GNUC__
-#if !defined(bsdi) && !defined(__386BSD__)
-#define alloca __builtin_alloca
+static char rcsid[] = "$CVSid: @(#)getopt.c 1.10 94/09/21 $";
#endif
-#else /* not __GNUC__ */
-#ifdef sparc
-#include <alloca.h>
-#else
-#ifdef _AIX
- #pragma alloca
-#else
-char *alloca ();
-#endif
-#endif /* sparc */
-#endif /* not __GNUC__ */
-#if defined(USG) || defined(STDC_HEADERS) || defined(__GNU_LIBRARY__)
-#include <string.h>
-#ifndef bcopy
-#define bcopy(s, d, n) memcpy ((d), (s), (n))
-#endif
-#ifndef index
-#define index strchr
-#endif
-#else /* USG or STDC_HEADERS or __GNU_LIBRARY__ */
-#ifdef VMS
-#include <string.h>
-#else /* VMS */
-#include <strings.h>
-#endif /* VMS */
-/* Declaring bcopy causes errors on systems whose declarations are different.
- If the declaration is omitted, everything works fine. */
-#endif /* USG or STDC_HEADERS or __GNU_LIBRARY__ */
+#include "getopt.h"
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
@@ -87,7 +92,7 @@ char *alloca ();
Also, when `ordering' is RETURN_IN_ORDER,
each non-option ARGV-element is returned here. */
-char *optarg = 0;
+char *optarg = NULL;
/* Index in ARGV of the next element to be scanned.
This is used for communication to and from the caller
@@ -101,6 +106,7 @@ char *optarg = 0;
Otherwise, `optind' communicates from one call to the next
how much of ARGV has been scanned so far. */
+/* XXX 1003.2 says this must be 1 before any call. */
int optind = 0;
/* The next char to be scanned in the option-element
@@ -117,17 +123,23 @@ static char *nextchar;
int opterr = 1;
+/* Set to an option character which was unrecognized.
+ This must be initialized on some systems to avoid linking in the
+ system's own getopt implementation. */
+
+int optopt = '?';
+
/* Describe how to deal with options that follow non-option ARGV-elements.
If the caller did not specify anything,
the default is REQUIRE_ORDER if the environment variable
- _POSIX_OPTION_ORDER is defined, PERMUTE otherwise.
+ POSIXLY_CORRECT is defined, PERMUTE otherwise.
REQUIRE_ORDER means don't recognize them as options;
stop option processing when the first non-option is seen.
This is what Unix does.
This mode of operation is selected by either setting the environment
- variable POSIX_ME_HARDER, or using `+' as the first character
+ variable POSIXLY_CORRECT, or using `+' as the first character
of the list of option characters.
PERMUTE is the default. We permute the contents of ARGV as we scan,
@@ -151,28 +163,50 @@ static enum
REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER
} ordering;
-/* Describe the long-named options requested by the application.
- _GETOPT_LONG_OPTIONS is a vector of `struct option' terminated by an
- element containing a name which is zero.
- The field `has_arg' is 1 if the option takes an argument,
- 2 if it takes an optional argument. */
-
-struct option
-{
- char *name;
- int has_arg;
- int *flag;
- int val;
-};
+/* Value of POSIXLY_CORRECT environment variable. */
+static char *posixly_correct;
+
+#ifdef __GNU_LIBRARY__
+/* We want to avoid inclusion of string.h with non-GNU libraries
+ because there are many ways it can cause trouble.
+ On some systems, it contains special magic macros that don't work
+ in GCC. */
+#include <string.h>
+#define my_index strchr
+#else
-const struct option *_getopt_long_options;
+/* Avoid depending on library functions or files
+ whose names are inconsistent. */
-int _getopt_long_only = 0;
+char *getenv ();
-/* Index in _GETOPT_LONG_OPTIONS of the long-named option actually found.
- Only valid when a long-named option was found. */
+static char *
+my_index (str, chr)
+ const char *str;
+ int chr;
+{
+ while (*str)
+ {
+ if (*str == chr)
+ return (char *) str;
+ str++;
+ }
+ return 0;
+}
-int option_index;
+/* If using GCC, we can safely declare strlen this way.
+ If not using GCC, it is ok not to declare it. */
+#ifdef __GNUC__
+/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h.
+ That was relevant to code that was here before. */
+#ifndef __STDC__
+/* gcc with -traditional declares the built-in strlen to return int,
+ and has done so at least since version 2.4.5. -- rms. */
+extern int strlen (const char *);
+#endif /* not __STDC__ */
+#endif /* __GNUC__ */
+
+#endif /* not __GNU_LIBRARY__ */
/* Handle permutation of arguments. */
@@ -185,32 +219,104 @@ static int last_nonopt;
/* Exchange two adjacent subsequences of ARGV.
One subsequence is elements [first_nonopt,last_nonopt)
- which contains all the non-options that have been skipped so far.
+ which contains all the non-options that have been skipped so far.
The other is elements [last_nonopt,optind), which contains all
- the options processed since those non-options were skipped.
+ the options processed since those non-options were skipped.
`first_nonopt' and `last_nonopt' are relocated so that they describe
- the new indices of the non-options in ARGV after they are moved. */
+ the new indices of the non-options in ARGV after they are moved. */
static void
exchange (argv)
char **argv;
{
- int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *);
- char **temp = (char **) alloca (nonopts_size);
+ int bottom = first_nonopt;
+ int middle = last_nonopt;
+ int top = optind;
+ char *tem;
- /* Interchange the two blocks of data in ARGV. */
+ /* Exchange the shorter segment with the far end of the longer segment.
+ That puts the shorter segment into the right place.
+ It leaves the longer segment in the right place overall,
+ but it consists of two parts that need to be swapped next. */
+
+ while (top > middle && middle > bottom)
+ {
+ if (top - middle > middle - bottom)
+ {
+ /* Bottom segment is the short one. */
+ int len = middle - bottom;
+ register int i;
+
+ /* Swap it with the top part of the top segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[top - (middle - bottom) + i];
+ argv[top - (middle - bottom) + i] = tem;
+ }
+ /* Exclude the moved bottom segment from further swapping. */
+ top -= len;
+ }
+ else
+ {
+ /* Top segment is the short one. */
+ int len = top - middle;
+ register int i;
- bcopy (&argv[first_nonopt], temp, nonopts_size);
- bcopy (&argv[last_nonopt], &argv[first_nonopt],
- (optind - last_nonopt) * sizeof (char *));
- bcopy (temp, &argv[first_nonopt + optind - last_nonopt], nonopts_size);
+ /* Swap it with the bottom part of the bottom segment. */
+ for (i = 0; i < len; i++)
+ {
+ tem = argv[bottom + i];
+ argv[bottom + i] = argv[middle + i];
+ argv[middle + i] = tem;
+ }
+ /* Exclude the moved top segment from further swapping. */
+ bottom += len;
+ }
+ }
/* Update records for the slots the non-options now occupy. */
first_nonopt += (optind - last_nonopt);
last_nonopt = optind;
}
+
+/* Initialize the internal data when the first call is made. */
+
+static const char *
+_getopt_initialize (optstring)
+ const char *optstring;
+{
+ /* Start processing options with ARGV-element 1 (since ARGV-element 0
+ is the program name); the sequence of previously skipped
+ non-option ARGV-elements is empty. */
+
+ first_nonopt = last_nonopt = optind = 1;
+
+ nextchar = NULL;
+
+ posixly_correct = getenv ("POSIXLY_CORRECT");
+
+ /* Determine how to handle the ordering of options and nonoptions. */
+
+ if (optstring[0] == '-')
+ {
+ ordering = RETURN_IN_ORDER;
+ ++optstring;
+ }
+ else if (optstring[0] == '+')
+ {
+ ordering = REQUIRE_ORDER;
+ ++optstring;
+ }
+ else if (posixly_correct != NULL)
+ ordering = REQUIRE_ORDER;
+ else
+ ordering = PERMUTE;
+
+ return optstring;
+}
/* Scan elements of ARGV (whose length is ARGC) for option characters
given in OPTSTRING.
@@ -245,78 +351,67 @@ exchange (argv)
handling the non-option ARGV-elements.
See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above.
- Long-named options begin with `+' instead of `-'.
+ Long-named options begin with `--' instead of `-'.
Their names may be abbreviated as long as the abbreviation is unique
or is an exact match for some defined option. If they have an
argument, it follows the option name in the same ARGV-element, separated
from the option name by a `=', or else the in next ARGV-element.
When `getopt' finds a long-named option, it returns 0 if that option's
`flag' field is nonzero, the value of the option's `val' field
- otherwise. */
+ if the `flag' field is zero.
+
+ The elements of ARGV aren't really const, because we permute them.
+ But we pretend they're const in the prototype to be compatible
+ with other systems.
+
+ LONGOPTS is a vector of `struct option' terminated by an
+ element containing a name which is zero.
+
+ LONGIND returns the index in LONGOPT of the long-named option found.
+ It is only valid when a long-named option has been found by the most
+ recent call.
+
+ If LONG_ONLY is nonzero, '-' as well as '--' can introduce
+ long-named options. */
int
-gnu_getopt (argc, argv, optstring)
+_getopt_internal (argc, argv, optstring, longopts, longind, long_only)
int argc;
- char **argv;
+ char *const *argv;
const char *optstring;
+ const struct option *longopts;
+ int *longind;
+ int long_only;
{
- optarg = 0;
-
- /* Initialize the internal data when the first call is made.
- Start processing options with ARGV-element 1 (since ARGV-element 0
- is the program name); the sequence of previously skipped
- non-option ARGV-elements is empty. */
+ optarg = NULL;
if (optind == 0)
- {
- first_nonopt = last_nonopt = optind = 1;
+ optstring = _getopt_initialize (optstring);
- nextchar = 0;
-
- /* Determine how to handle the ordering of options and nonoptions. */
-
- if (optstring[0] == '-')
- {
- ordering = RETURN_IN_ORDER;
- ++optstring;
- }
- else if (optstring[0] == '+')
- {
- ordering = REQUIRE_ORDER;
- ++optstring;
- }
- else if (getenv ("POSIX_ME_HARDER") != 0)
- ordering = REQUIRE_ORDER;
- else
- ordering = PERMUTE;
- }
-
- if (nextchar == 0 || *nextchar == 0)
+ if (nextchar == NULL || *nextchar == '\0')
{
+ /* Advance to the next ARGV-element. */
+
if (ordering == PERMUTE)
{
/* If we have just processed some options following some non-options,
exchange them so that the options come first. */
if (first_nonopt != last_nonopt && last_nonopt != optind)
- exchange (argv);
+ exchange ((char **) argv);
else if (last_nonopt != optind)
first_nonopt = optind;
- /* Now skip any additional non-options
+ /* Skip any additional non-options
and extend the range of non-options previously skipped. */
while (optind < argc
- && (argv[optind][0] != '-'
- || argv[optind][1] == 0)
- && (_getopt_long_options == 0
- || argv[optind][0] != '+'
- || argv[optind][1] == 0))
+ && (argv[optind][0] != '-' || argv[optind][1] == '\0'))
optind++;
last_nonopt = optind;
}
- /* Special ARGV-element `--' means premature end of options.
+ /* The special ARGV-element `--' means premature end of options.
Skip it like a null option,
then exchange with previous non-options as if it were an option,
then skip everything else like a non-option. */
@@ -326,7 +421,7 @@ gnu_getopt (argc, argv, optstring)
optind++;
if (first_nonopt != last_nonopt && last_nonopt != optind)
- exchange (argv);
+ exchange ((char **) argv);
else if (first_nonopt == last_nonopt)
first_nonopt = optind;
last_nonopt = argc;
@@ -349,9 +444,7 @@ gnu_getopt (argc, argv, optstring)
/* If we have come to a non-option and did not permute it,
either stop the scan or describe it to the caller and pass it by. */
- if ((argv[optind][0] != '-' || argv[optind][1] == 0)
- && (_getopt_long_options == 0
- || argv[optind][0] != '+' || argv[optind][1] == 0))
+ if ((argv[optind][0] != '-' || argv[optind][1] == '\0'))
{
if (ordering == REQUIRE_ORDER)
return EOF;
@@ -360,32 +453,48 @@ gnu_getopt (argc, argv, optstring)
}
/* We have found another option-ARGV-element.
- Start decoding its characters. */
+ Skip the initial punctuation. */
- nextchar = argv[optind] + 1;
+ nextchar = (argv[optind] + 1
+ + (longopts != NULL && argv[optind][1] == '-'));
}
- if (_getopt_long_options != 0
- && (argv[optind][0] == '+'
- || (_getopt_long_only && argv[optind][0] == '-'))
- )
+ /* Decode the current option-ARGV-element. */
+
+ /* Check whether the ARGV-element is a long option.
+
+ If long_only and the ARGV-element has the form "-f", where f is
+ a valid short option, don't consider it an abbreviated form of
+ a long option that starts with f. Otherwise there would be no
+ way to give the -f short option.
+
+ On the other hand, if there's a long option "fubar" and
+ the ARGV-element is "-fu", do consider that an abbreviation of
+ the long option, just like "--fu", and not "-f" with arg "u".
+
+ This distinction seems to be the most useful approach. */
+
+ if (longopts != NULL
+ && (argv[optind][1] == '-'
+ || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1])))))
{
+ char *nameend;
const struct option *p;
- char *s = nextchar;
+ const struct option *pfound = NULL;
int exact = 0;
int ambig = 0;
- const struct option *pfound = 0;
- int indfound = 0;
+ int indfound;
+ int option_index;
- while (*s && *s != '=')
- s++;
+ for (nameend = nextchar; *nameend && *nameend != '='; nameend++)
+ /* Do nothing. */ ;
- /* Test all options for either exact match or abbreviated matches. */
- for (p = _getopt_long_options, option_index = 0; p->name;
- p++, option_index++)
- if (!strncmp (p->name, nextchar, s - nextchar))
+ /* Test all long options for either exact match
+ or abbreviated matches. */
+ for (p = longopts, option_index = 0; p->name; p++, option_index++)
+ if (!strncmp (p->name, nextchar, nameend - nextchar))
{
- if (s - nextchar == strlen (p->name))
+ if (nameend - nextchar == strlen (p->name))
{
/* Exact match found. */
pfound = p;
@@ -393,39 +502,52 @@ gnu_getopt (argc, argv, optstring)
exact = 1;
break;
}
- else if (pfound == 0)
+ else if (pfound == NULL)
{
/* First nonexact match found. */
pfound = p;
indfound = option_index;
}
else
- /* Second nonexact match found. */
+ /* Second or later nonexact match found. */
ambig = 1;
}
if (ambig && !exact)
{
- fprintf (stderr, "%s: option `%s' is ambiguous\n",
- argv[0], argv[optind]);
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' is ambiguous\n",
+ argv[0], argv[optind]);
nextchar += strlen (nextchar);
optind++;
return '?';
}
- if (pfound != 0)
+ if (pfound != NULL)
{
option_index = indfound;
optind++;
- if (*s)
+ if (*nameend)
{
- if (pfound->has_arg > 0)
- optarg = s + 1;
+ /* Don't test has_arg with >, because some C compilers don't
+ allow it to be used on enums. */
+ if (pfound->has_arg)
+ optarg = nameend + 1;
else
{
- fprintf (stderr,
- "%s: option `%c%s' doesn't allow an argument\n",
- argv[0], argv[optind - 1][0], pfound->name);
+ if (opterr)
+ {
+ if (argv[optind - 1][1] == '-')
+ /* --option */
+ fprintf (stderr,
+ "%s: option `--%s' doesn't allow an argument\n",
+ argv[0], pfound->name);
+ else
+ /* +option or -option */
+ fprintf (stderr,
+ "%s: option `%c%s' doesn't allow an argument\n",
+ argv[0], argv[optind - 1][0], pfound->name);
+ }
nextchar += strlen (nextchar);
return '?';
}
@@ -436,13 +558,16 @@ gnu_getopt (argc, argv, optstring)
optarg = argv[optind++];
else
{
- fprintf (stderr, "%s: option `%s' requires an argument\n",
- argv[0], argv[optind - 1]);
+ if (opterr)
+ fprintf (stderr, "%s: option `%s' requires an argument\n",
+ argv[0], argv[optind - 1]);
nextchar += strlen (nextchar);
- return '?';
+ return optstring[0] == ':' ? ':' : '?';
}
}
nextchar += strlen (nextchar);
+ if (longind != NULL)
+ *longind = option_index;
if (pfound->flag)
{
*(pfound->flag) = pfound->val;
@@ -450,43 +575,52 @@ gnu_getopt (argc, argv, optstring)
}
return pfound->val;
}
- /* Can't find it as a long option. If this is getopt_long_only,
- and the option starts with '-' and is a valid short
- option, then interpret it as a short option. Otherwise it's
- an error. */
- if (_getopt_long_only == 0 || argv[optind][0] == '+' ||
- index (optstring, *nextchar) == 0)
+
+ /* Can't find it as a long option. If this is not getopt_long_only,
+ or the option starts with '--' or is not a valid short
+ option, then it's an error.
+ Otherwise interpret it as a short option. */
+ if (!long_only || argv[optind][1] == '-'
+ || my_index (optstring, *nextchar) == NULL)
{
- if (opterr != 0)
- fprintf (stderr, "%s: unrecognized option `%c%s'\n",
- argv[0], argv[optind][0], nextchar);
- nextchar += strlen (nextchar);
+ if (opterr)
+ {
+ if (argv[optind][1] == '-')
+ /* --option */
+ fprintf (stderr, "%s: unrecognized option `--%s'\n",
+ argv[0], nextchar);
+ else
+ /* +option or -option */
+ fprintf (stderr, "%s: unrecognized option `%c%s'\n",
+ argv[0], argv[optind][0], nextchar);
+ }
+ nextchar = (char *) "";
optind++;
return '?';
}
}
- /* Look at and handle the next option-character. */
+ /* Look at and handle the next short option-character. */
{
char c = *nextchar++;
- char *temp = index (optstring, c);
+ char *temp = my_index (optstring, c);
/* Increment `optind' when we start to process its last character. */
- if (*nextchar == 0)
- optind++;
+ if (*nextchar == '\0')
+ ++optind;
- if (temp == 0 || c == ':')
+ if (temp == NULL || c == ':')
{
- if (opterr != 0)
+ if (opterr)
{
- if (c < 040 || c >= 0177)
- fprintf (stderr, "%s: unrecognized option, character code 0%o\n",
- argv[0], c);
+ if (posixly_correct)
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c);
else
- fprintf (stderr, "%s: unrecognized option `-%c'\n",
- argv[0], c);
+ fprintf (stderr, "%s: invalid option -- %c\n", argv[0], c);
}
+ optopt = c;
return '?';
}
if (temp[1] == ':')
@@ -494,19 +628,19 @@ gnu_getopt (argc, argv, optstring)
if (temp[2] == ':')
{
/* This is an option that accepts an argument optionally. */
- if (*nextchar != 0)
+ if (*nextchar != '\0')
{
optarg = nextchar;
optind++;
}
else
- optarg = 0;
- nextchar = 0;
+ optarg = NULL;
+ nextchar = NULL;
}
else
{
/* This is an option that requires an argument. */
- if (*nextchar != 0)
+ if (*nextchar != '\0')
{
optarg = nextchar;
/* If we end this ARGV-element by taking the rest as an arg,
@@ -515,21 +649,42 @@ gnu_getopt (argc, argv, optstring)
}
else if (optind == argc)
{
- if (opterr != 0)
- fprintf (stderr, "%s: option `-%c' requires an argument\n",
- argv[0], c);
- c = '?';
+ if (opterr)
+ {
+ /* 1003.2 specifies the format of this message. */
+ fprintf (stderr, "%s: option requires an argument -- %c\n",
+ argv[0], c);
+ }
+ optopt = c;
+ if (optstring[0] == ':')
+ c = ':';
+ else
+ c = '?';
}
else
/* We already incremented `optind' once;
increment it again when taking next ARGV-elt as argument. */
optarg = argv[optind++];
- nextchar = 0;
+ nextchar = NULL;
}
}
return c;
}
}
+
+int
+getopt (argc, argv, optstring)
+ int argc;
+ char *const *argv;
+ const char *optstring;
+{
+ return _getopt_internal (argc, argv, optstring,
+ (const struct option *) 0,
+ (int *) 0,
+ 0);
+}
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
#ifdef TEST
@@ -548,7 +703,7 @@ main (argc, argv)
{
int this_option_optind = optind ? optind : 1;
- c = gnu_getopt (argc, argv, "abc:d:0123456789");
+ c = getopt (argc, argv, "abc:d:0123456789");
if (c == EOF)
break;
diff --git a/gnu/usr.bin/cvs/lib/getopt.h b/gnu/usr.bin/cvs/lib/getopt.h
index 5f902de..f644aa1 100644
--- a/gnu/usr.bin/cvs/lib/getopt.h
+++ b/gnu/usr.bin/cvs/lib/getopt.h
@@ -1,10 +1,10 @@
-/* declarations for getopt
- Copyright (C) 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+/* Declarations for getopt.
+ Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -13,9 +13,16 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
-/* @(#)getopt.h 1.6 92/03/31 */
+/* $CVSid: @(#)getopt.h 1.7 94/09/21 $ */
+
+#ifndef _GETOPT_H
+#define _GETOPT_H 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
/* For communication from `getopt' to the caller.
When `getopt' finds an option that takes an argument,
@@ -44,16 +51,21 @@ extern int optind;
extern int opterr;
+/* Set to an option character which was unrecognized. */
+
+extern int optopt;
+
/* Describe the long-named options requested by the application.
- _GETOPT_LONG_OPTIONS is a vector of `struct option' terminated by an
- element containing a name which is zero.
+ The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector
+ of `struct option' terminated by an element containing a name which is
+ zero.
The field `has_arg' is:
- 0 if the option does not take an argument,
- 1 if the option requires an argument,
- 2 if the option takes an optional argument.
+ no_argument (or 0) if the option does not take an argument,
+ required_argument (or 1) if the option requires an argument,
+ optional_argument (or 2) if the option takes an optional argument.
- If the field `flag' is nonzero, it points to a variable that is set
+ If the field `flag' is not NULL, it points to a variable that is set
to the value given in the field `val' when the option is found, but
left unchanged if the option is not found.
@@ -66,37 +78,54 @@ extern int opterr;
struct option
{
+#if __STDC__
+ const char *name;
+#else
char *name;
+#endif
+ /* has_arg can't be an enum because some compilers complain about
+ type mismatches in all the code that assumes it is an int. */
int has_arg;
int *flag;
int val;
};
-#if __STDC__
-extern const struct option *_getopt_long_options;
-#else
-extern struct option *_getopt_long_options;
-#endif
-
-/* If nonzero, '-' can introduce long-named options.
- Set by getopt_long_only. */
-
-extern int _getopt_long_only;
+/* Names for the values of the `has_arg' field of `struct option'. */
-/* The index in GETOPT_LONG_OPTIONS of the long-named option found.
- Only valid when a long-named option has been found by the most
- recent call to `getopt'. */
-
-extern int option_index;
+#define no_argument 0
+#define required_argument 1
+#define optional_argument 2
#if __STDC__
-int gnu_getopt (int argc, char **argv, const char *shortopts);
-int gnu_getopt_long (int argc, char **argv, const char *shortopts,
- const struct option *longopts, int *longind);
-int gnu_getopt_long_only (int argc, char **argv, const char *shortopts,
- const struct option *longopts, int *longind);
-#else
-int gnu_getopt ();
-int gnu_getopt_long ();
-int gnu_getopt_long_only ();
+#if defined(__GNU_LIBRARY__)
+/* Many other libraries have conflicting prototypes for getopt, with
+ differences in the consts, in stdlib.h. To avoid compilation
+ errors, only prototype getopt for the GNU C library. */
+extern int getopt (int argc, char *const *argv, const char *shortopts);
+#else /* not __GNU_LIBRARY__ */
+extern int getopt ();
+#endif /* not __GNU_LIBRARY__ */
+extern int getopt_long (int argc, char *const *argv, const char *shortopts,
+ const struct option *longopts, int *longind);
+extern int getopt_long_only (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind);
+
+/* Internal only. Users should not call this directly. */
+extern int _getopt_internal (int argc, char *const *argv,
+ const char *shortopts,
+ const struct option *longopts, int *longind,
+ int long_only);
+#else /* not __STDC__ */
+extern int getopt ();
+extern int getopt_long ();
+extern int getopt_long_only ();
+
+extern int _getopt_internal ();
+#endif /* not __STDC__ */
+
+#ifdef __cplusplus
+}
#endif
+
+#endif /* _GETOPT_H */
diff --git a/gnu/usr.bin/cvs/lib/getopt1.c b/gnu/usr.bin/cvs/lib/getopt1.c
index 8606462..f784b57 100644
--- a/gnu/usr.bin/cvs/lib/getopt1.c
+++ b/gnu/usr.bin/cvs/lib/getopt1.c
@@ -1,10 +1,11 @@
-/* Getopt for GNU.
- Copyright (C) 1987-1992 Free Software Foundation, Inc.
+/* getopt_long and getopt_long_only entry points for GNU getopt.
+ Copyright (C) 1987, 88, 89, 90, 91, 92, 1993
+ Free Software Foundation, Inc.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+ This program is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option) any
+ later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -13,67 +14,84 @@
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+ Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
+#ifdef HAVE_CONFIG_H
+#if defined (emacs) || defined (CONFIG_BROKETS)
+/* We use <config.h> instead of "config.h" so that a compilation
+ using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
+ (which it would do because it found this file in $srcdir). */
+#include <config.h>
+#else
+#include "config.h"
+#endif
+#endif
+
#include "getopt.h"
-#if !__STDC__
+#ifndef __STDC__
+/* This is a separate conditional since some stdc systems
+ reject `defined (const)'. */
+#ifndef const
#define const
#endif
+#endif
+
+#include <stdio.h>
+
+/* Comment out all this code if we are using the GNU C Library, and are not
+ actually compiling the library itself. This code is part of the GNU C
+ Library, but also included in many other GNU distributions. Compiling
+ and linking in this code is a waste when using the GNU C library
+ (especially if it is a shared library). Rather than having every GNU
+ program understand `configure --with-gnu-libc' and omit the object files,
+ it is simpler to just do this in the source for each such file. */
+
+#if defined (_LIBC) || !defined (__GNU_LIBRARY__)
+
-#if defined(STDC_HEADERS) || defined(__GNU_LIBRARY__)
+/* This needs to come after some library #include
+ to get __GNU_LIBRARY__ defined. */
+#ifdef __GNU_LIBRARY__
#include <stdlib.h>
-#else /* STDC_HEADERS or __GNU_LIBRARY__ */
+#else
char *getenv ();
-#endif /* STDC_HEADERS or __GNU_LIBRARY__ */
+#endif
-#if !defined (NULL)
+#ifndef NULL
#define NULL 0
#endif
int
-gnu_getopt_long (argc, argv, options, long_options, opt_index)
+getopt_long (argc, argv, options, long_options, opt_index)
int argc;
- char **argv;
+ char *const *argv;
const char *options;
const struct option *long_options;
int *opt_index;
{
- int val;
-
- /* For strict POSIX compatibility, we must turn off long options. */
- if (getenv ("POSIX_ME_HARDER") == 0)
- _getopt_long_options = long_options;
- val = gnu_getopt (argc, argv, options);
- if (val == 0 && opt_index != NULL)
- *opt_index = option_index;
- return val;
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 0);
}
-/* Like getopt_long, but '-' as well as '+' can indicate a long option.
- If an option that starts with '-' doesn't match a long option,
+/* Like getopt_long, but '-' as well as '--' can indicate a long option.
+ If an option that starts with '-' (not '--') doesn't match a long option,
but does match a short option, it is parsed as a short option
- instead. */
+ instead. */
-int
-gnu_getopt_long_only (argc, argv, options, long_options, opt_index)
+int
+getopt_long_only (argc, argv, options, long_options, opt_index)
int argc;
- char **argv;
+ char *const *argv;
const char *options;
const struct option *long_options;
int *opt_index;
{
- int val;
-
- _getopt_long_options = long_options;
- _getopt_long_only = 1;
- val = gnu_getopt (argc, argv, options);
- if (val == 0 && opt_index != NULL)
- *opt_index = option_index;
- return val;
+ return _getopt_internal (argc, argv, options, long_options, opt_index, 1);
}
-
+
+#endif /* _LIBC or not __GNU_LIBRARY__. */
+
#ifdef TEST
#include <stdio.h>
@@ -89,7 +107,6 @@ main (argc, argv)
while (1)
{
int this_option_optind = optind ? optind : 1;
- char *name = '\0';
int option_index = 0;
static struct option long_options[] =
{
@@ -110,7 +127,7 @@ main (argc, argv)
switch (c)
{
case 0:
- printf ("option %s", (long_options[option_index]).name);
+ printf ("option %s", long_options[option_index].name);
if (optarg)
printf (" with arg %s", optarg);
printf ("\n");
@@ -144,6 +161,10 @@ main (argc, argv)
printf ("option c with value `%s'\n", optarg);
break;
+ case 'd':
+ printf ("option d with value `%s'\n", optarg);
+ break;
+
case '?':
break;
diff --git a/gnu/usr.bin/cvs/lib/getwd.c b/gnu/usr.bin/cvs/lib/getwd.c
index 854feaf..573a788 100644
--- a/gnu/usr.bin/cvs/lib/getwd.c
+++ b/gnu/usr.bin/cvs/lib/getwd.c
@@ -19,6 +19,10 @@
of getwd() which is much faster than getcwd(). As a result, we use the
system's getwd() if it is available */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include "system.h"
/* Get the current working directory into PATHNAME */
diff --git a/gnu/usr.bin/cvs/lib/hash.c b/gnu/usr.bin/cvs/lib/hash.c
index fb29497..afc554d 100644
--- a/gnu/usr.bin/cvs/lib/hash.c
+++ b/gnu/usr.bin/cvs/lib/hash.c
@@ -2,7 +2,7 @@
* Copyright (c) 1992, Brian Berliner and Jeff Polk
*
* You may distribute under the terms of the GNU General Public License as
- * specified in the README file that comes with the CVS 1.3 kit.
+ * specified in the README file that comes with the CVS 1.4 kit.
*
* Polk's hash list manager. So cool.
*/
@@ -10,31 +10,32 @@
#include "cvs.h"
#ifndef lint
-static char rcsid[] = "@(#)hash.c 1.14 92/03/31";
+static char rcsid[] = "$CVSid: @(#)hash.c 1.19 94/09/23 $";
+USE(rcsid)
#endif
/* global caches */
static List *listcache = NULL;
static Node *nodecache = NULL;
-#if __STDC__
-static void freenode_mem (Node * p);
-#else
-static void freenode_mem ();
-#endif /* __STDC__ */
+static void freenode_mem PROTO((Node * p));
/* hash function */
static int
hashp (key)
char *key;
{
- register char *p;
- register int n = 0;
+ unsigned int h = 0;
+ unsigned int g;
- for (p = key; *p; p++)
- n += *p;
+ while (*key != 0)
+ {
+ h = (h << 4) + *key++;
+ if ((g = h & 0xf0000000) != 0)
+ h = (h ^ (g >> 24)) ^ g;
+ }
- return (n % HASHSIZE);
+ return (h % HASHSIZE);
}
/*
@@ -60,7 +61,7 @@ getlist ()
{
/* make a new list from scratch */
list = (List *) xmalloc (sizeof (List));
- bzero ((char *) list, sizeof (List));
+ memset ((char *) list, 0, sizeof (List));
node = getnode ();
list->list = node;
node->type = HEADER;
@@ -130,7 +131,7 @@ getnode ()
}
/* always make it clean */
- bzero ((char *) p, sizeof (Node));
+ memset ((char *) p, 0, sizeof (Node));
p->type = UNKNOWN;
return (p);
@@ -247,7 +248,8 @@ addnode (list, p)
}
/*
- * look up an entry in hash list table
+ * look up an entry in hash list table and return a pointer to the
+ * node. Return NULL on error or not found.
*/
Node *
findnode (list, key)
@@ -273,9 +275,10 @@ findnode (list, key)
* walk a list with a specific proc
*/
int
-walklist (list, proc)
+walklist (list, proc, closure)
List *list;
int (*proc) ();
+ void *closure;
{
Node *head, *p;
int err = 0;
@@ -285,7 +288,7 @@ walklist (list, proc)
head = list->list;
for (p = head->next; p != head; p = p->next)
- err += proc (p);
+ err += proc (p, closure);
return (err);
}
@@ -336,3 +339,61 @@ sortlist (list, comp)
}
}
}
+
+/* Debugging functions. Quite useful to call from within gdb. */
+
+char *
+nodetypestring (type)
+ Ntype type;
+{
+ switch (type) {
+ case UNKNOWN: return("UNKNOWN");
+ case HEADER: return("HEADER");
+ case ENTRIES: return("ENTRIES");
+ case FILES: return("FILES");
+ case LIST: return("LIST");
+ case RCSNODE: return("RCSNODE");
+ case RCSVERS: return("RCSVERS");
+ case DIRS: return("DIRS");
+ case UPDATE: return("UPDATE");
+ case LOCK: return("LOCK");
+ case NDBMNODE: return("NDBMNODE");
+ }
+
+ return("<trash>");
+}
+
+int
+printnode (node, closure)
+ Node *node;
+ void *closure;
+{
+ if (node == NULL)
+ {
+ (void) printf("NULL node.\n");
+ return(0);
+ }
+
+ (void) printf("Node at 0x%p: type = %s, key = 0x%p = \"%s\", data = 0x%p, next = 0x%p, prev = 0x%p\n",
+ node, nodetypestring(node->type), node->key, node->key, node->data, node->next, node->prev);
+
+ return(0);
+}
+
+void
+printlist (list)
+ List *list;
+{
+ if (list == NULL)
+ {
+ (void) printf("NULL list.\n");
+ return;
+ }
+
+ (void) printf("List at 0x%p: list = 0x%p, HASHSIZE = %d, next = 0x%p\n",
+ list, list->list, HASHSIZE, list->next);
+
+ (void) walklist(list, printnode, NULL);
+
+ return;
+}
diff --git a/gnu/usr.bin/cvs/lib/hash.h b/gnu/usr.bin/cvs/lib/hash.h
index 54f227e..8e10e81 100644
--- a/gnu/usr.bin/cvs/lib/hash.h
+++ b/gnu/usr.bin/cvs/lib/hash.h
@@ -1,10 +1,10 @@
-/* @(#)hash.h 1.18 92/03/31 */
+/* $CVSid: @(#)hash.h 1.23 94/10/07 $ */
/*
* Copyright (c) 1992, Brian Berliner and Jeff Polk
*
* You may distribute under the terms of the GNU General Public License as
- * specified in the README file that comes with the CVS 1.3 kit.
+ * specified in the README file that comes with the CVS 1.4 kit.
*/
/*
@@ -51,27 +51,16 @@ struct entnode
char *options;
char *tag;
char *date;
+ char *conflict;
};
typedef struct entnode Entnode;
-#if __STDC__
-List *getlist (void);
-Node *findnode (List * list, char *key);
-Node *getnode (void);
-int addnode (List * list, Node * p);
-int walklist (List * list, int (*proc) ());
-void dellist (List ** listp);
-void delnode (Node * p);
-void freenode (Node * p);
-void sortlist (List * list, int (*comp) ());
-#else
-List *getlist ();
-Node *findnode ();
-Node *getnode ();
-int addnode ();
-int walklist ();
-void dellist ();
-void delnode ();
-void freenode ();
-void sortlist ();
-#endif /* __STDC__ */
+List *getlist PROTO((void));
+Node *findnode PROTO((List * list, char *key));
+Node *getnode PROTO((void));
+int addnode PROTO((List * list, Node * p));
+int walklist PROTO((List * list, int PROTO((*proc)) PROTO((Node *n, void *closure)), void *closure));
+void dellist PROTO((List ** listp));
+void delnode PROTO((Node * p));
+void freenode PROTO((Node * p));
+void sortlist PROTO((List * list, int PROTO((*comp))()));
diff --git a/gnu/usr.bin/cvs/lib/mkdir.c b/gnu/usr.bin/cvs/lib/mkdir.c
index b17cca2..a70c1d8 100644
--- a/gnu/usr.bin/cvs/lib/mkdir.c
+++ b/gnu/usr.bin/cvs/lib/mkdir.c
@@ -15,6 +15,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
diff --git a/gnu/usr.bin/cvs/lib/myndbm.c b/gnu/usr.bin/cvs/lib/myndbm.c
index 8069698..33ef49c 100644
--- a/gnu/usr.bin/cvs/lib/myndbm.c
+++ b/gnu/usr.bin/cvs/lib/myndbm.c
@@ -2,7 +2,7 @@
* Copyright (c) 1992, Brian Berliner
*
* You may distribute under the terms of the GNU General Public License as
- * specified in the README file that comes with the CVS 1.3 kit.
+ * specified in the README file that comes with the CVS 1.4 kit.
*
* A simple ndbm-emulator for CVS. It parses a text file of the format:
*
@@ -18,7 +18,8 @@
#ifdef MY_NDBM
#ifndef lint
-static char rcsid[] = "@(#)myndbm.c 1.5 92/03/31";
+static char rcsid[] = "$CVSid: @(#)myndbm.c 1.7 94/09/23 $";
+USE(rcsid)
#endif
static void mydbm_load_file ();
@@ -138,7 +139,7 @@ mydbm_load_file (fp, list)
for (cont = 0; fgets (line, sizeof (line), fp) != NULL;)
{
- if ((cp = rindex (line, '\n')) != NULL)
+ if ((cp = strrchr (line, '\n')) != NULL)
*cp = '\0'; /* strip the newline */
/*
diff --git a/gnu/usr.bin/cvs/lib/myndbm.h b/gnu/usr.bin/cvs/lib/myndbm.h
index d71acdf..3af31305 100644
--- a/gnu/usr.bin/cvs/lib/myndbm.h
+++ b/gnu/usr.bin/cvs/lib/myndbm.h
@@ -1,4 +1,4 @@
-/* @(#)myndbm.h 1.3 92/02/29 */
+/* $CVSid: @(#)myndbm.h 1.4 94/09/21 $ */
#ifdef MY_NDBM
@@ -27,18 +27,10 @@ typedef struct
#define dbm_firstkey mydbm_firstkey
#define dbm_nextkey mydbm_nextkey
-#if __STDC__
-DBM *mydbm_open (char *file, int flags, int mode);
-void mydbm_close (DBM * db);
-datum mydbm_fetch (DBM * db, datum key);
-datum mydbm_firstkey (DBM * db);
-datum mydbm_nextkey (DBM * db);
-#else
-DBM *mydbm_open ();
-void mydbm_close ();
-datum mydbm_fetch ();
-datum mydbm_firstkey ();
-datum mydbm_nextkey ();
-#endif /* __STDC__ */
+DBM *mydbm_open PROTO((char *file, int flags, int mode));
+void mydbm_close PROTO((DBM * db));
+datum mydbm_fetch PROTO((DBM * db, datum key));
+datum mydbm_firstkey PROTO((DBM * db));
+datum mydbm_nextkey PROTO((DBM * db));
#endif /* MY_NDBM */
diff --git a/gnu/usr.bin/cvs/lib/regex.c b/gnu/usr.bin/cvs/lib/regex.c
index 3bccfd3..8169880 100644
--- a/gnu/usr.bin/cvs/lib/regex.c
+++ b/gnu/usr.bin/cvs/lib/regex.c
@@ -1,8 +1,9 @@
/* Extended regular expression matching and search library,
- version 0.4.
- (Implements POSIX draft P10003.2/D11.2, except for multibyte characters.)
+ version 0.12.
+ (Implements POSIX draft P10003.2/D11.2, except for
+ internationalization features.)
- Copyright (C) 1985, 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Copyright (C) 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -18,26 +19,24 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+/* AIX requires this to be the first thing in the file. */
#if defined (_AIX) && !defined (REGEX_MALLOC)
#pragma alloca
#endif
#define _GNU_SOURCE
-/* For interactive testing, compile with -Dtest. Then this becomes
- a self-contained program which reads a pattern, describes how it
- compiles, then reads a string and searches for it. If a command-line
- argument is present, it is taken to be the value for obscure_syntax (in
- decimal). The default is 0 (Emacs-style syntax).
-
- If DEBUG is defined, this prints many voluminous messages about what
- it is doing (if the variable `debug' is nonzero). */
+/* We need this for `regex.h', and perhaps for the Emacs include files. */
+#include <sys/types.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
/* The `emacs' switch turns on certain matching commands
that make sense only in Emacs. */
#ifdef emacs
-#include "config.h"
+
#include "lisp.h"
#include "buffer.h"
#include "syntax.h"
@@ -47,41 +46,31 @@
#else /* not emacs */
-/* POSIX.1 says that <unistd.h> might need <sys/types.h>. We also need
- it for regex.h. */
-#include <sys/types.h>
-
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-#if defined (USG) || defined (POSIX) || defined (STDC_HEADERS)
-#ifndef BSTRING
+/* We used to test for `BSTRING' here, but only GCC and Emacs define
+ `BSTRING', as far as I know, and neither of them use this code. */
+#if HAVE_STRING_H || STDC_HEADERS
#include <string.h>
-#ifndef bcopy
-#define bcopy(s,d,n) memcpy ((d), (s), (n))
-#endif
#ifndef bcmp
-#define bcmp(s1,s2,n) memcmp ((s1), (s2), (n))
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif
+#ifndef bcopy
+#define bcopy(s, d, n) memcpy ((d), (s), (n))
#endif
#ifndef bzero
-#define bzero(s,n) memset ((s), 0, (n))
+#define bzero(s, n) memset ((s), 0, (n))
+#endif
+#else
+#include <strings.h>
#endif
-#endif /* not BSTRING */
-#endif /* USG or POSIX or STDC_HEADERS */
#ifdef STDC_HEADERS
#include <stdlib.h>
-#else /* not STDC_HEADERS */
+#else
char *malloc ();
char *realloc ();
-#endif /* not STDC_HEADERS */
-
-/* If debugging, we use standard I/O. */
-#ifdef DEBUG
-#include <stdio.h>
#endif
+
/* Define the syntax stuff for \<, \>, etc. */
/* This must be nonzero for the wordchar and notwordchar pattern
@@ -97,7 +86,7 @@ extern char *re_syntax_table;
#else /* not SYNTAX_TABLE */
/* How many characters in the character set. */
-#define CHAR_SET_SIZE 256
+#define CHAR_SET_SIZE 256
static char re_syntax_table[CHAR_SET_SIZE];
@@ -131,33 +120,55 @@ init_syntax_once ()
#define SYNTAX(c) re_syntax_table[c]
#endif /* not emacs */
-
-
+
/* Get the interface, including the syntax bits. */
#include "regex.h"
-
-/* isalpha(3) etc. are used for the character classes. */
+/* isalpha etc. are used for the character classes. */
#include <ctype.h>
-#ifndef isgraph
-#define isgraph(c) (isprint (c) && !isspace (c))
+
+#ifndef isascii
+#define isascii(c) 1
#endif
-#ifndef isblank
-#define isblank(c) ((c) == ' ' || (c) == '\t')
+
+#ifdef isblank
+#define ISBLANK(c) (isascii (c) && isblank (c))
+#else
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+#ifdef isgraph
+#define ISGRAPH(c) (isascii (c) && isgraph (c))
+#else
+#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
#endif
+#define ISPRINT(c) (isascii (c) && isprint (c))
+#define ISDIGIT(c) (isascii (c) && isdigit (c))
+#define ISALNUM(c) (isascii (c) && isalnum (c))
+#define ISALPHA(c) (isascii (c) && isalpha (c))
+#define ISCNTRL(c) (isascii (c) && iscntrl (c))
+#define ISLOWER(c) (isascii (c) && islower (c))
+#define ISPUNCT(c) (isascii (c) && ispunct (c))
+#define ISSPACE(c) (isascii (c) && isspace (c))
+#define ISUPPER(c) (isascii (c) && isupper (c))
+#define ISXDIGIT(c) (isascii (c) && isxdigit (c))
+
#ifndef NULL
#define NULL 0
#endif
-#ifndef SIGN_EXTEND_CHAR
-#ifdef __CHAR_UNSIGNED__ /* for, e.g., IBM RT */
-#define SIGN_EXTEND_CHAR(c) (((c)^128) - 128) /* As in Harbison and Steele. */
-#else
-#define SIGN_EXTEND_CHAR /* As nothing. */
-#endif /* not CHAR_UNSIGNED */
-#endif /* not SIGN_EXTEND_CHAR */
-
+/* We remove any previous definition of `SIGN_EXTEND_CHAR',
+ since ours (we hope) works properly with all combinations of
+ machines, compilers, `char' and `unsigned char' argument types.
+ (Per Bothner suggested the basic approach.) */
+#undef SIGN_EXTEND_CHAR
+#if __STDC__
+#define SIGN_EXTEND_CHAR(c) ((signed char) (c))
+#else /* not __STDC__ */
+/* As in Harbison and Steele. */
+#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
+#endif
+
/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
use `alloca' instead of `malloc'. This is because using malloc in
re_search* or re_match* could cause memory leaks when C-g is used in
@@ -165,12 +176,13 @@ init_syntax_once ()
the other hand, malloc is more portable, and easier to debug.
Because we sometimes use alloca, some routines have to be macros,
- not functions---alloca-allocated space disappears at the end of the
+ not functions -- `alloca'-allocated space disappears at the end of the
function it is called in. */
+
#ifdef REGEX_MALLOC
#define REGEX_ALLOCATE malloc
-#define REGEX_REALLOCATE(source, size) (realloc (source, size))
+#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
#else /* not REGEX_MALLOC */
@@ -181,31 +193,38 @@ init_syntax_once ()
#ifdef __GNUC__
#define alloca __builtin_alloca
#else /* not __GNUC__ */
-#ifdef sparc
+#if HAVE_ALLOCA_H
#include <alloca.h>
-#else /* not __GNUC__ or sparc */
+#else /* not __GNUC__ or HAVE_ALLOCA_H */
+#ifndef _AIX /* Already did AIX, up at the top. */
char *alloca ();
-#endif /* not sparc */
-#endif /* not __GNUC__ */
+#endif /* not _AIX */
+#endif /* not HAVE_ALLOCA_H */
+#endif /* not __GNUC__ */
#endif /* not alloca */
-/* Still not REGEX_MALLOC. */
-
#define REGEX_ALLOCATE alloca
-/* Requires a `char *destination' declared. */
-#define REGEX_REALLOCATE(source, size) \
- (destination = (char *) alloca (size), \
- bcopy (source, destination, size), \
+/* Assumes a `char *destination' variable. */
+#define REGEX_REALLOCATE(source, osize, nsize) \
+ (destination = (char *) alloca (nsize), \
+ bcopy (source, destination, osize), \
destination)
#endif /* not REGEX_MALLOC */
+
+/* True if `size1' is non-NULL and PTR is pointing anywhere inside
+ `string1' or just past its end. This works if PTR is NULL, which is
+ a good thing. */
+#define FIRST_STRING_P(ptr) \
+ (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+
/* (Re)Allocate N items of type T using malloc, or fail. */
-#define TALLOC(n, t) (t *) malloc ((n) * sizeof (t))
+#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
-
+#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
#define BYTEWIDTH 8 /* In bits. */
@@ -213,6 +232,10 @@ char *alloca ();
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#define MIN(a, b) ((a) < (b) ? (a) : (b))
+
+typedef char boolean;
+#define false 0
+#define true 1
/* These are the command codes that appear in compiled regular
expressions. Some opcodes are followed by argument bytes. A
@@ -281,17 +304,17 @@ typedef enum
endbuf,
/* Followed by two byte relative address to which to jump. */
- no_pop_jump,
+ jump,
- /* Same as no_pop_jump, but marks the end of an alternative. */
- jump_past_next_alt,
+ /* Same as jump, but marks the end of an alternative. */
+ jump_past_alt,
/* Followed by two-byte relative address of place to resume at
in case of failure. */
on_failure_jump,
/* Like on_failure_jump, but pushes a placeholder instead of the
- current string position. */
+ current string position when executed. */
on_failure_keep_string_jump,
/* Throw away latest failure point and then jump to following
@@ -299,7 +322,7 @@ typedef enum
pop_failure_jump,
/* Change to pop_failure_jump if know won't have to backtrack to
- match; otherwise change to no_pop_jump. This is used to jump
+ match; otherwise change to jump. This is used to jump
back to the beginning of a repeat. If what follows this jump
clearly won't match what the repeat does, such that we can be
sure that there is no use backtracking out of repetitions
@@ -314,18 +337,21 @@ typedef enum
of jump when compiling an alternative. */
dummy_failure_jump,
- /* Used like on_failure_jump except has to succeed n times; The
- two-byte relative address following it is useless until then.
- The address is followed by two more bytes containing n. */
+ /* Push a dummy failure point and continue. Used at the end of
+ alternatives. */
+ push_dummy_failure,
+
+ /* Followed by two-byte relative address and two-byte number n.
+ After matching N times, jump to the address upon failure. */
succeed_n,
- /* Similar to no_pop_jump, but jump n times only; also the
- relative address following is in turn followed by yet two
- more bytes containing n. */
- no_pop_jump_n,
+ /* Followed by two-byte relative address, and two-byte number n.
+ Jump to the address N times, then fail. */
+ jump_n,
- /* Set the following relative location (two bytes) to the
- subsequent (two-byte) number. */
+ /* Set the following two-byte relative address to the
+ subsequent two-byte number. The address *includes* the two
+ bytes of number. */
set_number_at,
wordchar, /* Matches any word-constituent character. */
@@ -361,7 +387,6 @@ typedef enum
(destination)[1] = (number) >> 8; \
} while (0)
-
/* Same as STORE_NUMBER, except increment DESTINATION to
the byte after where the number is stored. Therefore, DESTINATION
must be an lvalue. */
@@ -372,28 +397,32 @@ typedef enum
(destination) += 2; \
} while (0)
-
/* Put into DESTINATION a number stored in two contiguous bytes starting
at SOURCE. */
#define EXTRACT_NUMBER(destination, source) \
do { \
(destination) = *(source) & 0377; \
- (destination) += SIGN_EXTEND_CHAR (*(const char *)((source) + 1)) << 8;\
+ (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
} while (0)
#ifdef DEBUG
-static int
-extract_number (source)
+static void
+extract_number (dest, source)
+ int *dest;
unsigned char *source;
{
- int answer = *source & 0377;
- answer += (SIGN_EXTEND_CHAR (*(char *)((source) + 1))) << 8;
-
- return answer;
+ int temp = SIGN_EXTEND_CHAR (*(source + 1));
+ *dest = *source & 0377;
+ *dest += temp << 8;
}
-#endif
+#ifndef EXTRACT_MACROS /* To debug the macros. */
+#undef EXTRACT_NUMBER
+#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
+#endif /* not EXTRACT_MACROS */
+
+#endif /* DEBUG */
/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
SOURCE must be an lvalue. */
@@ -410,32 +439,86 @@ extract_number_and_incr (destination, source)
int *destination;
unsigned char **source;
{
- *destination = extract_number (*source);
+ extract_number (destination, *source);
*source += 2;
}
-#endif
+#ifndef EXTRACT_MACROS
+#undef EXTRACT_NUMBER_AND_INCR
+#define EXTRACT_NUMBER_AND_INCR(dest, src) \
+ extract_number_and_incr (&dest, &src)
+#endif /* not EXTRACT_MACROS */
-/* Is true if there is a first string and if PTR is pointing anywhere
- inside it or just past the end. */
-
-#define IS_IN_FIRST_STRING(ptr) \
- (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
+#endif /* DEBUG */
+/* If DEBUG is defined, Regex prints many voluminous messages about what
+ it is doing (if the variable `debug' is nonzero). If linked with the
+ main program in `iregex.c', you can enter patterns and strings
+ interactively. And if linked with the main program in `main.c' and
+ the other test files, you can run the already-written tests. */
+
#ifdef DEBUG
+/* We use standard I/O for debugging. */
+#include <stdio.h>
+
+/* It is useful to test things that ``must'' be true when debugging. */
+#include <assert.h>
+
+static int debug = 0;
+
+#define DEBUG_STATEMENT(e) e
+#define DEBUG_PRINT1(x) if (debug) printf (x)
+#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
+#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
+#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
+ if (debug) print_partial_compiled_pattern (s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
+ if (debug) print_double_string (w, s1, sz1, s2, sz2)
+
+
extern void printchar ();
-/* Print a compiled pattern buffer in human-readable form, starting at
+/* Print the fastmap in human-readable form. */
+
+void
+print_fastmap (fastmap)
+ char *fastmap;
+{
+ unsigned was_a_range = 0;
+ unsigned i = 0;
+
+ while (i < (1 << BYTEWIDTH))
+ {
+ if (fastmap[i++])
+ {
+ was_a_range = 0;
+ printchar (i - 1);
+ while (i < (1 << BYTEWIDTH) && fastmap[i])
+ {
+ was_a_range = 1;
+ i++;
+ }
+ if (was_a_range)
+ {
+ printf ("-");
+ printchar (i - 1);
+ }
+ }
+ }
+ putchar ('\n');
+}
+
+
+/* Print a compiled pattern string in human-readable form, starting at
the START pointer into it and ending just before the pointer END. */
-static void
-partial_compiled_pattern_printer (pbufp, start, end)
- struct re_pattern_buffer *pbufp;
+void
+print_partial_compiled_pattern (start, end)
unsigned char *start;
unsigned char *end;
{
-
int mcnt, mcnt2;
unsigned char *p = start;
unsigned char *pend = end;
@@ -446,7 +529,7 @@ partial_compiled_pattern_printer (pbufp, start, end)
return;
}
- /* This loop loops over pattern commands. */
+ /* Loop over pattern commands. */
while (p < pend)
{
switch ((re_opcode_t) *p++)
@@ -489,12 +572,21 @@ partial_compiled_pattern_printer (pbufp, start, end)
{
register int c;
- printf ("/charset%s/", *(p - 1) == charset_not ? "_not" : "");
+ printf ("/charset%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
+
+ assert (p + *p < pend);
- for (c = 0; p < pend && c < *p * BYTEWIDTH; c++)
+ for (c = 0; c < *p; c++)
{
- if (p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
- printchar (c);
+ unsigned bit;
+ unsigned char map_byte = p[1 + c];
+
+ putchar ('/');
+
+ for (bit = 0; bit < BYTEWIDTH; bit++)
+ if (map_byte & (1 << bit))
+ printchar (c * BYTEWIDTH + bit);
}
p += 1 + *p;
break;
@@ -523,6 +615,10 @@ partial_compiled_pattern_printer (pbufp, start, end)
printf ("/dummy_failure_jump/0/%d", mcnt);
break;
+ case push_dummy_failure:
+ printf ("/push_dummy_failure");
+ break;
+
case maybe_pop_jump:
extract_number_and_incr (&mcnt, &p);
printf ("/maybe_pop_jump/0/%d", mcnt);
@@ -533,14 +629,14 @@ partial_compiled_pattern_printer (pbufp, start, end)
printf ("/pop_failure_jump/0/%d", mcnt);
break;
- case jump_past_next_alt:
+ case jump_past_alt:
extract_number_and_incr (&mcnt, &p);
- printf ("/jump_past_next_alt/0/%d", mcnt);
+ printf ("/jump_past_alt/0/%d", mcnt);
break;
- case no_pop_jump:
+ case jump:
extract_number_and_incr (&mcnt, &p);
- printf ("/no_pop_jump/0/%d", mcnt);
+ printf ("/jump/0/%d", mcnt);
break;
case succeed_n:
@@ -549,10 +645,10 @@ partial_compiled_pattern_printer (pbufp, start, end)
printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
break;
- case no_pop_jump_n:
+ case jump_n:
extract_number_and_incr (&mcnt, &p);
extract_number_and_incr (&mcnt2, &p);
- printf ("/no_pop_jump_n/0/%d/0/%d", mcnt, mcnt2);
+ printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
break;
case set_number_at:
@@ -589,36 +685,26 @@ partial_compiled_pattern_printer (pbufp, start, end)
printf ("/after_dot");
break;
- case wordchar:
- printf ("/wordchar-emacs");
- mcnt = (int) Sword;
- break;
-
case syntaxspec:
printf ("/syntaxspec");
mcnt = *p++;
printf ("/%d", mcnt);
break;
- case notwordchar:
- printf ("/notwordchar-emacs");
- mcnt = (int) Sword;
- break;
-
case notsyntaxspec:
printf ("/notsyntaxspec");
mcnt = *p++;
printf ("/%d", mcnt);
break;
-#else /* not emacs */
+#endif /* emacs */
+
case wordchar:
- printf ("/wordchar-notemacs");
+ printf ("/wordchar");
break;
case notwordchar:
- printf ("/notwordchar-notemacs");
+ printf ("/notwordchar");
break;
-#endif /* not emacs */
case begbuf:
printf ("/begbuf");
@@ -635,20 +721,39 @@ partial_compiled_pattern_printer (pbufp, start, end)
printf ("/\n");
}
-static void
-compiled_pattern_printer (pbufp)
- struct re_pattern_buffer *pbufp;
+
+void
+print_compiled_pattern (bufp)
+ struct re_pattern_buffer *bufp;
{
- partial_compiled_pattern_printer (pbufp, pbufp->buffer,
- pbufp->buffer + pbufp->used);
+ unsigned char *buffer = bufp->buffer;
+
+ print_partial_compiled_pattern (buffer, buffer + bufp->used);
+ printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
+
+ if (bufp->fastmap_accurate && bufp->fastmap)
+ {
+ printf ("fastmap: ");
+ print_fastmap (bufp->fastmap);
+ }
+
+ printf ("re_nsub: %d\t", bufp->re_nsub);
+ printf ("regs_alloc: %d\t", bufp->regs_allocated);
+ printf ("can_be_null: %d\t", bufp->can_be_null);
+ printf ("newline_anchor: %d\n", bufp->newline_anchor);
+ printf ("no_sub: %d\t", bufp->no_sub);
+ printf ("not_bol: %d\t", bufp->not_bol);
+ printf ("not_eol: %d\t", bufp->not_eol);
+ printf ("syntax: %d\n", bufp->syntax);
+ /* Perhaps we should print the translate table? */
}
-static void
-double_string_printer (where, string1, size1, string2, size2)
- unsigned char *where;
- unsigned char *string1;
- unsigned char *string2;
+void
+print_double_string (where, string1, size1, string2, size2)
+ const char *where;
+ const char *string1;
+ const char *string2;
int size1;
int size2;
{
@@ -658,7 +763,7 @@ double_string_printer (where, string1, size1, string2, size2)
printf ("(null)");
else
{
- if (IS_IN_FIRST_STRING (where))
+ if (FIRST_STRING_P (where))
{
for (this_char = where - string1; this_char < size1; this_char++)
printchar (string1[this_char]);
@@ -671,24 +776,6 @@ double_string_printer (where, string1, size1, string2, size2)
}
}
-#endif /* DEBUG */
-
-#ifdef DEBUG
-
-/* It is useful to test things that must to be true when debugging. */
-#include <assert.h>
-
-static int debug = 0;
-
-#define DEBUG_STATEMENT(e) e
-#define DEBUG_PRINT1(x) if (debug) printf (x)
-#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
-#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
-#define DEBUG_COMPILED_PATTERN_PRINTER(p, s, e) \
- if (debug) partial_compiled_pattern_printer (p, s, e)
-#define DEBUG_DOUBLE_STRING_PRINTER(w, s1, sz1, s2, sz2) \
- if (debug) double_string_printer (w, s1, sz1, s2, sz2)
-
#else /* not DEBUG */
#undef assert
@@ -698,19 +785,16 @@ static int debug = 0;
#define DEBUG_PRINT1(x)
#define DEBUG_PRINT2(x1, x2)
#define DEBUG_PRINT3(x1, x2, x3)
-#define DEBUG_COMPILED_PATTERN_PRINTER(p, s, e)
-#define DEBUG_DOUBLE_STRING_PRINTER(w, s1, sz1, s2, sz2)
+#define DEBUG_PRINT4(x1, x2, x3, x4)
+#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
+#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
#endif /* not DEBUG */
-
-typedef char boolean;
-#define false 0
-#define true 1
-/* Set by re_set_syntax to the current regexp syntax to recognize. Can
- also be assigned to more or less arbitrarily. Since we use this as a
- collection of bits, declaring it unsigned maximizes portability. */
-reg_syntax_t obscure_syntax = 0;
+/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
+ also be assigned to arbitrarily: each pattern buffer stores its own
+ syntax, so it can be changed between regex compilations. */
+reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
/* Specify the precise syntax of regexps for compilation. This provides
@@ -724,9 +808,9 @@ reg_syntax_t
re_set_syntax (syntax)
reg_syntax_t syntax;
{
- reg_syntax_t ret = obscure_syntax;
+ reg_syntax_t ret = re_syntax_options;
- obscure_syntax = syntax;
+ re_syntax_options = syntax;
return ret;
}
@@ -753,12 +837,13 @@ static const char *re_error_msg[] =
"Unmatched ) or \\)", /* REG_ERPAREN */
};
-/* Other subroutine declarations and macros for regex_compile. */
+/* Subroutine declarations and macros for regex_compile. */
-static void store_jump (), insert_jump (), store_jump_n (),
- insert_jump_n (), insert_op_2 ();
-
-static boolean at_endline_op_p (), group_in_compile_stack ();
+static void store_op1 (), store_op2 ();
+static void insert_op1 (), insert_op2 ();
+static boolean at_begline_loc_p (), at_endline_loc_p ();
+static boolean group_in_compile_stack ();
+static reg_errcode_t compile_range ();
/* Fetch the next character in the uncompiled pattern---translating it
if necessary. Also cast from a signed character in the constant
@@ -795,22 +880,19 @@ static boolean at_endline_op_p (), group_in_compile_stack ();
/* Make sure we have at least N more bytes of space in buffer. */
#define GET_BUFFER_SPACE(n) \
- { \
while (b - bufp->buffer + (n) > bufp->allocated) \
- EXTEND_BUFFER (); \
- }
+ EXTEND_BUFFER ()
/* Make sure we have one more byte of buffer space and then add C to it. */
-#define PAT_PUSH(c) \
+#define BUF_PUSH(c) \
do { \
GET_BUFFER_SPACE (1); \
*b++ = (unsigned char) (c); \
} while (0)
-/* Make sure we have two more bytes of buffer space and then add C1 and
- C2 to it. */
-#define PAT_PUSH_2(c1, c2) \
+/* Ensure we have two more bytes of buffer space and then append C1 and C2. */
+#define BUF_PUSH_2(c1, c2) \
do { \
GET_BUFFER_SPACE (2); \
*b++ = (unsigned char) (c1); \
@@ -818,9 +900,8 @@ static boolean at_endline_op_p (), group_in_compile_stack ();
} while (0)
-/* Make sure we have two more bytes of buffer space and then add C1, C2
- and C3 to it. */
-#define PAT_PUSH_3(c1, c2, c3) \
+/* As with BUF_PUSH_2, except for three bytes. */
+#define BUF_PUSH_3(c1, c2, c3) \
do { \
GET_BUFFER_SPACE (3); \
*b++ = (unsigned char) (c1); \
@@ -828,11 +909,31 @@ static boolean at_endline_op_p (), group_in_compile_stack ();
*b++ = (unsigned char) (c3); \
} while (0)
-/* This is not an arbitrary limit: the arguments to the opcodes which
- represent offsets into the pattern are two bytes long. So if 2^16
- bytes turns out to be too small, many things would have to change. */
+
+/* Store a jump with opcode OP at LOC to location TO. We store a
+ relative address offset by the three bytes the jump itself occupies. */
+#define STORE_JUMP(op, loc, to) \
+ store_op1 (op, loc, (to) - (loc) - 3)
+
+/* Likewise, for a two-argument jump. */
+#define STORE_JUMP2(op, loc, to, arg) \
+ store_op2 (op, loc, (to) - (loc) - 3, arg)
+
+/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP(op, loc, to) \
+ insert_op1 (op, loc, (to) - (loc) - 3, b)
+
+/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
+#define INSERT_JUMP2(op, loc, to, arg) \
+ insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
+
+
+/* This is not an arbitrary limit: the arguments which represent offsets
+ into the pattern are two bytes long. So if 2^16 bytes turns out to
+ be too small, many things would have to change. */
#define MAX_BUF_SIZE (1L << 16)
+
/* Extend the buffer by twice its current size via realloc and
reset the pointers that pointed into the old block to point to the
correct places in the new one. If extending the buffer results in it
@@ -866,14 +967,18 @@ static boolean at_endline_op_p (), group_in_compile_stack ();
/* Since we have one byte reserved for the register number argument to
{start,stop}_memory, the maximum number of groups we can report
things about is what fits in that byte. */
-typedef unsigned char regnum_t;
-#define MAX_REGNUM ((regnum_t) ((1 << BYTEWIDTH) - 1))
+#define MAX_REGNUM 255
+
+/* But patterns can have more than `MAX_REGNUM' registers. We just
+ ignore the excess. */
+typedef unsigned regnum_t;
/* Macros for the compile stack. */
-/* This type needs to be able to hold values from 0 to MAX_BUF_SIZE - 1. */
-typedef short pattern_offset_t;
+/* Since offsets can go either forwards or backwards, this type needs to
+ be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
+typedef int pattern_offset_t;
typedef struct
{
@@ -903,7 +1008,9 @@ typedef struct
/* Set the bit for character C in a list. */
-#define SET_LIST_BIT(c) (b[(c) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
+#define SET_LIST_BIT(c) \
+ (b[((unsigned char) (c)) / BYTEWIDTH] \
+ |= 1 << (((unsigned char) c) % BYTEWIDTH))
/* Get the next unsigned number in the uncompiled pattern. */
@@ -911,7 +1018,7 @@ typedef struct
{ if (p != pend) \
{ \
PATFETCH (c); \
- while (isdigit (c)) \
+ while (ISDIGIT (c)) \
{ \
if (num < 0) \
num = 0; \
@@ -923,28 +1030,6 @@ typedef struct
} \
}
-
-/* Read the endpoint of a range from the uncompiled pattern and set the
- corresponding bits in the compiled pattern. */
-
-#define DO_RANGE \
- { \
- char end; \
- char this_char = p[-2]; \
- \
- if (p == pend) \
- return REG_ERANGE; \
- PATFETCH (end); \
- if (syntax & RE_NO_EMPTY_RANGES && this_char > end) \
- return REG_ERANGE; \
- while (this_char <= end) \
- { \
- SET_LIST_BIT (TRANSLATE (this_char)); \
- this_char++; \
- } \
- }
-
-
#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
#define IS_CHAR_CLASS(string) \
@@ -954,10 +1039,9 @@ typedef struct
|| STREQ (string, "space") || STREQ (string, "print") \
|| STREQ (string, "punct") || STREQ (string, "graph") \
|| STREQ (string, "cntrl") || STREQ (string, "blank"))
-
-/* regex_compile compiles PATTERN (of length SIZE) according to SYNTAX.
- Returns one of error codes defined in regex.h, or zero for success.
+/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
+ Returns one of error codes defined in `regex.h', or zero for success.
Assumes the `allocated' (and perhaps `buffer') and `translate'
fields are set in BUFP on entry.
@@ -967,9 +1051,9 @@ typedef struct
`buffer' is the compiled pattern;
`syntax' is set to SYNTAX;
`used' is set to the length of the compiled pattern;
- `fastmap_accurate' is set to zero;
- `re_nsub' is set to the number of groups in PATTERN;
- `not_bol' and `not_eol' are set to zero.
+ `fastmap_accurate' is zero;
+ `re_nsub' is the number of subexpressions in PATTERN;
+ `not_bol' and `not_eol' are zero;
The `fastmap' and `newline_anchor' fields are neither
examined nor set. */
@@ -981,12 +1065,20 @@ regex_compile (pattern, size, syntax, bufp)
reg_syntax_t syntax;
struct re_pattern_buffer *bufp;
{
+ /* We fetch characters from PATTERN here. Even though PATTERN is
+ `char *' (i.e., signed), we declare these variables as unsigned, so
+ they can be reliably used as array indices. */
register unsigned char c, c1;
+
+ /* A random tempory spot in PATTERN. */
const char *p1;
/* Points to the end of the buffer, where we should append. */
register unsigned char *b;
+ /* Keeps track of unclosed groups. */
+ compile_stack_type compile_stack;
+
/* Points to the current (ending) position in the pattern. */
const char *p = pattern;
const char *pend = pattern + size;
@@ -1005,28 +1097,23 @@ regex_compile (pattern, size, syntax, bufp)
operand. Reset at the beginning of groups and alternatives. */
unsigned char *laststart = 0;
- /* Place in the uncompiled pattern (i.e., the {) to
- which to go back if the interval is invalid. */
- const char *beg_interval; /* The `{'. */
- const char *following_left_brace;
-
/* Address of beginning of regexp, or inside of last group. */
unsigned char *begalt;
-
+
+ /* Place in the uncompiled pattern (i.e., the {) to
+ which to go back if the interval is invalid. */
+ const char *beg_interval;
+
/* Address of the place where a forward jump should go to the end of
- the containing expression. Each alternative of an `or'---except the
- last---ends with a forward jump of this sort. */
+ the containing expression. Each alternative of an `or' -- except the
+ last -- ends with a forward jump of this sort. */
unsigned char *fixup_alt_jump = 0;
/* Counts open-groups as they are encountered. Remembered for the
matching close-group on the compile stack, so the same register
- number is put in the stop_memory as the start_memory. The type
- here is determined by MAX_REGNUM. */
+ number is put in the stop_memory as the start_memory. */
regnum_t regnum = 0;
- /* Keeps track of unclosed groups. */
- compile_stack_type compile_stack;
-
#ifdef DEBUG
DEBUG_PRINT1 ("\nCompiling pattern: ");
if (debug)
@@ -1035,8 +1122,7 @@ regex_compile (pattern, size, syntax, bufp)
for (debug_count = 0; debug_count < size; debug_count++)
printchar (pattern[debug_count]);
-
- DEBUG_PRINT1 ("\n");
+ putchar ('\n');
}
#endif /* DEBUG */
@@ -1069,9 +1155,9 @@ regex_compile (pattern, size, syntax, bufp)
if (bufp->allocated == 0)
{
if (bufp->buffer)
- { /* EXTEND_BUFFER loses when bufp->allocated is 0. This loses if
- buffer's address is bogus, but that is the user's
- responsibility. */
+ { /* If zero allocated, but buffer is non-null, try to realloc
+ enough space. This loses if buffer's address is bogus, but
+ that is the user's responsibility. */
RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
}
else
@@ -1092,33 +1178,21 @@ regex_compile (pattern, size, syntax, bufp)
switch (c)
{
- /* ^ matches the empty string at the beginning of a string (or
- possibly a line). If RE_CONTEXT_INDEP_ANCHORS is set, ^ is
- always an operator (and foo^bar is unmatchable). If that bit
- isn't set, it's an operator only at the beginning of the
- pattern or after an alternation or open-group operator, or,
- if RE_NEWLINE_ORDINARY is not set, after a newline (except it
- can be preceded by other operators that match the empty
- string); otherwise, it's a normal character. */
case '^':
{
- if ( /* If at start of (sub)pattern, it's an operator. */
- laststart == 0
+ if ( /* If at start of pattern, it's an operator. */
+ p == pattern + 1
/* If context independent, it's an operator. */
|| syntax & RE_CONTEXT_INDEP_ANCHORS
- /* If after a newline, might be an operator. (Since
- laststart is nonzero here, we know we have at
- least one byte before the ^.) */
- || (!(syntax & RE_NEWLINE_ORDINARY) && p[-2] == '\n'))
- PAT_PUSH (begline);
+ /* Otherwise, depends on what's come before. */
+ || at_begline_loc_p (pattern, p, syntax))
+ BUF_PUSH (begline);
else
goto normal_char;
}
break;
- /* $ matches the empty string following the end of the string (or
- possibly a line). It follows rules dual to those for ^. */
case '$':
{
if ( /* If at end of pattern, it's an operator. */
@@ -1126,8 +1200,8 @@ regex_compile (pattern, size, syntax, bufp)
/* If context independent, it's an operator. */
|| syntax & RE_CONTEXT_INDEP_ANCHORS
/* Otherwise, depends on what's next. */
- || at_endline_op_p (p, pend, syntax))
- PAT_PUSH (endline);
+ || at_endline_loc_p (p, pend, syntax))
+ BUF_PUSH (endline);
else
goto normal_char;
}
@@ -1219,7 +1293,7 @@ regex_compile (pattern, size, syntax, bufp)
through the loop. */
assert (p - 1 > pattern);
- /* Get the space for the jump. */
+ /* Allocate the space for the jump. */
GET_BUFFER_SPACE (3);
/* We know we are not at the first character of the pattern,
@@ -1228,15 +1302,16 @@ regex_compile (pattern, size, syntax, bufp)
the `*'. Do we have to do something analogous here
for null bytes, because of RE_DOT_NOT_NULL? */
if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
+ && zero_times_ok
&& p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
&& !(syntax & RE_DOT_NEWLINE))
{ /* We have .*\n. */
- store_jump (b, no_pop_jump, laststart);
+ STORE_JUMP (jump, b, laststart);
keep_string_p = true;
}
else
/* Anything else. */
- store_jump (b, maybe_pop_jump, laststart - 3);
+ STORE_JUMP (maybe_pop_jump, b, laststart - 3);
/* We've added more stuff to the buffer. */
b += 3;
@@ -1245,20 +1320,21 @@ regex_compile (pattern, size, syntax, bufp)
/* On failure, jump from laststart to b + 3, which will be the
end of the buffer after this jump is inserted. */
GET_BUFFER_SPACE (3);
- insert_jump (keep_string_p ? on_failure_keep_string_jump
+ INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
: on_failure_jump,
- laststart, b + 3, b);
+ laststart, b + 3);
pending_exact = 0;
b += 3;
if (!zero_times_ok)
{
/* At least one repetition is required, so insert a
- dummy_failure before the initial on_failure_jump
- instruction of the loop. This effects a skip over that
- instruction the first time we hit that loop. */
+ `dummy_failure_jump' before the initial
+ `on_failure_jump' instruction of the loop. This
+ effects a skip over that instruction the first time
+ we hit that loop. */
GET_BUFFER_SPACE (3);
- insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
+ INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
b += 3;
}
}
@@ -1267,25 +1343,25 @@ regex_compile (pattern, size, syntax, bufp)
case '.':
laststart = b;
- PAT_PUSH (anychar);
+ BUF_PUSH (anychar);
break;
case '[':
{
- boolean just_had_a_char_class = false;
+ boolean had_char_class = false;
if (p == pend) return REG_EBRACK;
- /* Ensure that we have enough space to push an entire
- charset: the opcode, the byte count, and the bitmap. */
- while (b - bufp->buffer + 2 + (1 << BYTEWIDTH) / BYTEWIDTH
- > bufp->allocated)
- EXTEND_BUFFER ();
+ /* Ensure that we have enough space to push a charset: the
+ opcode, the length count, and the bitset; 34 bytes in all. */
+ GET_BUFFER_SPACE (34);
laststart = b;
- PAT_PUSH (*p == '^' ? charset_not : charset);
+ /* We test `*p == '^' twice, instead of using an if
+ statement, so we only need one BUF_PUSH. */
+ BUF_PUSH (*p == '^' ? charset_not : charset);
if (*p == '^')
p++;
@@ -1293,7 +1369,7 @@ regex_compile (pattern, size, syntax, bufp)
p1 = p;
/* Push the number of bytes in the bitmap. */
- PAT_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
+ BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
/* Clear the whole map. */
bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
@@ -1328,7 +1404,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Look ahead to see if it's a range when the last thing
was a character class. */
- if (just_had_a_char_class && c == '-' && *p != ']')
+ if (had_char_class && c == '-' && *p != ']')
return REG_ERANGE;
/* Look ahead to see if it's a range when the last thing
@@ -1340,13 +1416,20 @@ regex_compile (pattern, size, syntax, bufp)
&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
&& *p != ']')
{
- DO_RANGE;
+ reg_errcode_t ret
+ = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
}
else if (p[0] == '-' && p[1] != ']')
{ /* This handles ranges made up of characters only. */
- PATFETCH (c1); /* The `-'. */
- DO_RANGE;
+ reg_errcode_t ret;
+
+ /* Move past the `-'. */
+ PATFETCH (c1);
+
+ ret = compile_range (&p, pend, translate, syntax, b);
+ if (ret != REG_NOERROR) return ret;
}
/* See if we're at the beginning of a possible character
@@ -1401,21 +1484,21 @@ regex_compile (pattern, size, syntax, bufp)
for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
{
- if ( (is_alnum && isalnum (ch))
- || (is_alpha && isalpha (ch))
- || (is_blank && isblank (ch))
- || (is_cntrl && iscntrl (ch))
- || (is_digit && isdigit (ch))
- || (is_graph && isgraph (ch))
- || (is_lower && islower (ch))
- || (is_print && isprint (ch))
- || (is_punct && ispunct (ch))
- || (is_space && isspace (ch))
- || (is_upper && isupper (ch))
- || (is_xdigit && isxdigit (ch)))
+ if ( (is_alnum && ISALNUM (ch))
+ || (is_alpha && ISALPHA (ch))
+ || (is_blank && ISBLANK (ch))
+ || (is_cntrl && ISCNTRL (ch))
+ || (is_digit && ISDIGIT (ch))
+ || (is_graph && ISGRAPH (ch))
+ || (is_lower && ISLOWER (ch))
+ || (is_print && ISPRINT (ch))
+ || (is_punct && ISPUNCT (ch))
+ || (is_space && ISSPACE (ch))
+ || (is_upper && ISUPPER (ch))
+ || (is_xdigit && ISXDIGIT (ch)))
SET_LIST_BIT (ch);
}
- just_had_a_char_class = true;
+ had_char_class = true;
}
else
{
@@ -1424,12 +1507,12 @@ regex_compile (pattern, size, syntax, bufp)
PATUNFETCH;
SET_LIST_BIT ('[');
SET_LIST_BIT (':');
- just_had_a_char_class = false;
+ had_char_class = false;
}
}
else
{
- just_had_a_char_class = false;
+ had_char_class = false;
SET_LIST_BIT (c);
}
}
@@ -1459,14 +1542,14 @@ regex_compile (pattern, size, syntax, bufp)
case '\n':
if (syntax & RE_NEWLINE_ALT)
- goto handle_bar;
+ goto handle_alt;
else
goto normal_char;
case '|':
if (syntax & RE_NO_BK_VBAR)
- goto handle_bar;
+ goto handle_alt;
else
goto normal_char;
@@ -1491,16 +1574,8 @@ regex_compile (pattern, size, syntax, bufp)
case '(':
if (syntax & RE_NO_BK_PARENS)
goto normal_backslash;
- handle_open:
- if (syntax & RE_NO_EMPTY_GROUPS)
- {
- p1 = p;
- if (!(syntax & RE_NO_BK_PARENS) && *p1 == '\\') p1++;
-
- /* If found an empty group... */
- if (*p1 == ')') return REG_BADPAT;
- }
+ handle_open:
bufp->re_nsub++;
regnum++;
@@ -1524,11 +1599,13 @@ regex_compile (pattern, size, syntax, bufp)
COMPILE_STACK_TOP.regnum = regnum;
/* We will eventually replace the 0 with the number of
- groups inner to this one. */
+ groups inner to this one. But do not push a
+ start_memory for groups beyond the last one we can
+ represent in the compiled pattern. */
if (regnum <= MAX_REGNUM)
{
COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
- PAT_PUSH_3 (start_memory, regnum, 0);
+ BUF_PUSH_3 (start_memory, regnum, 0);
}
compile_stack.avail++;
@@ -1536,6 +1613,10 @@ regex_compile (pattern, size, syntax, bufp)
fixup_alt_jump = 0;
laststart = 0;
begalt = b;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
break;
@@ -1550,10 +1631,18 @@ regex_compile (pattern, size, syntax, bufp)
handle_close:
if (fixup_alt_jump)
- store_jump (fixup_alt_jump, jump_past_next_alt, b);
+ { /* Push a dummy failure point at the end of the
+ alternative for a possible future
+ `pop_failure_jump' to pop. See comments at
+ `push_dummy_failure' in `re_match_2'. */
+ BUF_PUSH (push_dummy_failure);
+
+ /* We allocated space for this jump when we assigned
+ to `fixup_alt_jump', in the `handle_alt' case below. */
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
+ }
/* See similar code for backslashed left paren above. */
-
if (COMPILE_STACK_EMPTY)
if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
goto normal_char;
@@ -1577,6 +1666,10 @@ regex_compile (pattern, size, syntax, bufp)
: 0;
laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
this_group_regnum = COMPILE_STACK_TOP.regnum;
+ /* If we've reached MAX_REGNUM groups, then this open
+ won't actually generate any code, so we'll have to
+ clear pending_exact explicitly. */
+ pending_exact = 0;
/* We're at the end of the group, so now we know how many
groups were inside this one. */
@@ -1586,7 +1679,7 @@ regex_compile (pattern, size, syntax, bufp)
= bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
*inner_group_loc = regnum - this_group_regnum;
- PAT_PUSH_3 (stop_memory, this_group_regnum,
+ BUF_PUSH_3 (stop_memory, this_group_regnum,
regnum - this_group_regnum);
}
}
@@ -1596,27 +1689,14 @@ regex_compile (pattern, size, syntax, bufp)
case '|': /* `\|'. */
if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
goto normal_backslash;
- handle_bar:
+ handle_alt:
if (syntax & RE_LIMITED_OPS)
goto normal_char;
- /* Disallow empty alternatives if RE_NO_EMPTY_ALTS is set.
- Caveat: can't detect if the vbar is followed by a
- trailing '$' yet, unless it's the last thing in a
- pattern; the routine for verifying endlines has to do
- the rest. */
- if ((syntax & RE_NO_EMPTY_ALTS)
- && (!laststart || p == pend
- || (*p == '$' && p + 1 == pend)
- || ((syntax & RE_NO_BK_PARENS)
- ? (p < pend && *p == ')')
- : (p + 1 < pend && p[0] == '\\' && p[1] == ')'))))
- return REG_BADPAT;
-
/* Insert before the previous alternative a jump which
jumps to this alternative if the former fails. */
GET_BUFFER_SPACE (3);
- insert_jump (on_failure_jump, begalt, b + 6, b);
+ INSERT_JUMP (on_failure_jump, begalt, b + 6);
pending_exact = 0;
b += 3;
@@ -1631,13 +1711,13 @@ regex_compile (pattern, size, syntax, bufp)
| v | v
a | b | c
- If we are at `b,' then fixup_alt_jump right now points to a
- three-byte space after `a.' We'll put in the jump, set
- fixup_alt_jump to right after `b,' and leave behind three
- bytes which we'll fill in when we get to after `c.' */
+ If we are at `b', then fixup_alt_jump right now points to a
+ three-byte space after `a'. We'll put in the jump, set
+ fixup_alt_jump to right after `b', and leave behind three
+ bytes which we'll fill in when we get to after `c'. */
if (fixup_alt_jump)
- store_jump (fixup_alt_jump, jump_past_next_alt, b);
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
/* Mark and leave space for a jump after this alternative,
to be filled in later either by next alternative or
@@ -1662,14 +1742,12 @@ regex_compile (pattern, size, syntax, bufp)
handle_interval:
{
- /* If got here, then intervals must be allowed. */
+ /* If got here, then the syntax allows intervals. */
- /* For intervals, at least (most) this many matches must
- be made. */
+ /* At least (most) this many matches must be made. */
int lower_bound = -1, upper_bound = -1;
- beg_interval = p - 1; /* The `{'. */
- following_left_brace = NULL;
+ beg_interval = p - 1;
if (p == pend)
{
@@ -1686,8 +1764,8 @@ regex_compile (pattern, size, syntax, bufp)
GET_UNSIGNED_NUMBER (upper_bound);
if (upper_bound < 0) upper_bound = RE_DUP_MAX;
}
-
- if (upper_bound < 0)
+ else
+ /* Interval such as `{1}' => match exactly once. */
upper_bound = lower_bound;
if (lower_bound < 0 || upper_bound > RE_DUP_MAX
@@ -1727,70 +1805,82 @@ regex_compile (pattern, size, syntax, bufp)
goto unfetch_interval;
}
- /* If upper_bound is zero, don't want to succeed at all;
- jump from laststart to b + 3, which will be the end of
- the buffer after this jump is inserted. */
+ /* If the upper bound is zero, don't want to succeed at
+ all; jump from `laststart' to `b + 3', which will be
+ the end of the buffer after we insert the jump. */
if (upper_bound == 0)
{
GET_BUFFER_SPACE (3);
- insert_jump (no_pop_jump, laststart, b + 3, b);
+ INSERT_JUMP (jump, laststart, b + 3);
b += 3;
}
- /* Otherwise, after lower_bound number of succeeds, jump
- to after the no_pop_jump_n which will be inserted at
- the end of the buffer, and insert that
- no_pop_jump_n. */
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
else
- { /* Set to 5 if only one repetition is allowed and
- hence no no_pop_jump_n is inserted at the current
- end of the buffer. Otherwise, need 10 bytes total
- for the succeed_n and the no_pop_jump_n. */
- unsigned slots_needed = upper_bound == 1 ? 5 : 10;
-
- GET_BUFFER_SPACE (slots_needed);
- /* Initialize the succeed_n to n, even though it will
- be set by its attendant set_number_at, because
- re_compile_fastmap will need to know it. Jump to
- what the end of buffer will be after inserting
- this succeed_n and possibly appending a
- no_pop_jump_n. */
- insert_jump_n (succeed_n, laststart, b + slots_needed,
- b, lower_bound);
- b += 5; /* Just increment for the succeed_n here. */
-
-
- /* More than one repetition is allowed, so put in at
- the end of the buffer a backward jump from b to the
- succeed_n we put in above. By the time we've gotten
- to this jump when matching, we'll have matched once
- already, so jump back only upper_bound - 1 times. */
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned nbytes = 10 + (upper_bound > 1) * 10;
+
+ GET_BUFFER_SPACE (nbytes);
+
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + (upper_bound > 1) * 5,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+
if (upper_bound > 1)
- {
- store_jump_n (b, no_pop_jump_n, laststart,
- upper_bound - 1);
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + 5,
+ upper_bound - 1);
b += 5;
- /* When hit this when matching, reset the
- preceding no_pop_jump_n's n to upper_bound - 1. */
- PAT_PUSH (set_number_at);
-
- /* Only need to get space for the numbers. */
- GET_BUFFER_SPACE (4);
- STORE_NUMBER_AND_INCR (b, -5);
- STORE_NUMBER_AND_INCR (b, upper_bound - 1);
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
}
-
- /* When hit this when matching, set the succeed_n's n. */
- GET_BUFFER_SPACE (5);
- insert_op_2 (set_number_at, laststart, b, 5, lower_bound);
- b += 5;
}
pending_exact = 0;
beg_interval = NULL;
-
- if (following_left_brace)
- goto normal_char;
}
break;
@@ -1814,87 +1904,75 @@ regex_compile (pattern, size, syntax, bufp)
/* There is no way to specify the before_dot and after_dot
operators. rms says this is ok. --karl */
case '=':
- PAT_PUSH (at_dot);
+ BUF_PUSH (at_dot);
break;
case 's':
laststart = b;
PATFETCH (c);
- PAT_PUSH_2 (syntaxspec, syntax_spec_code[c]);
+ BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
break;
case 'S':
laststart = b;
PATFETCH (c);
- PAT_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
+ BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
break;
#endif /* emacs */
case 'w':
laststart = b;
- PAT_PUSH (wordchar);
+ BUF_PUSH (wordchar);
break;
case 'W':
laststart = b;
- PAT_PUSH (notwordchar);
+ BUF_PUSH (notwordchar);
break;
case '<':
- PAT_PUSH (wordbeg);
+ BUF_PUSH (wordbeg);
break;
case '>':
- PAT_PUSH (wordend);
+ BUF_PUSH (wordend);
break;
case 'b':
- PAT_PUSH (wordbound);
+ BUF_PUSH (wordbound);
break;
case 'B':
- PAT_PUSH (notwordbound);
+ BUF_PUSH (notwordbound);
break;
case '`':
- PAT_PUSH (begbuf);
+ BUF_PUSH (begbuf);
break;
case '\'':
- PAT_PUSH (endbuf);
+ BUF_PUSH (endbuf);
break;
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- case '8':
- case '9':
+ case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
if (syntax & RE_NO_BK_REFS)
goto normal_char;
c1 = c - '0';
if (c1 > regnum)
- {
- if (syntax & RE_NO_MISSING_BK_REF)
- return REG_ESUBREG;
- else
- goto normal_char;
- }
+ return REG_ESUBREG;
/* Can't back reference to a subexpression if inside of it. */
if (group_in_compile_stack (compile_stack, c1))
goto normal_char;
laststart = b;
- PAT_PUSH_2 (duplicate, c1);
+ BUF_PUSH_2 (duplicate, c1);
break;
@@ -1942,11 +2020,11 @@ regex_compile (pattern, size, syntax, bufp)
laststart = b;
- PAT_PUSH_2 (exactn, 0);
+ BUF_PUSH_2 (exactn, 0);
pending_exact = b - 1;
}
- PAT_PUSH (c);
+ BUF_PUSH (c);
(*pending_exact)++;
break;
} /* switch (c) */
@@ -1956,7 +2034,7 @@ regex_compile (pattern, size, syntax, bufp)
/* Through the pattern now. */
if (fixup_alt_jump)
- store_jump (fixup_alt_jump, jump_past_next_alt, b);
+ STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
if (!COMPILE_STACK_EMPTY)
return REG_EPAREN;
@@ -1965,169 +2043,125 @@ regex_compile (pattern, size, syntax, bufp)
/* We have succeeded; set the length of the buffer. */
bufp->used = b - bufp->buffer;
+
+#ifdef DEBUG
+ if (debug)
+ {
+ DEBUG_PRINT1 ("\nCompiled pattern: ");
+ print_compiled_pattern (bufp);
+ }
+#endif /* DEBUG */
+
return REG_NOERROR;
} /* regex_compile */
-/* Subroutines for regex_compile. */
+/* Subroutines for `regex_compile'. */
-/* Store a jump of the form <OPCODE> <relative address>.
- Store in the location FROM a jump operation to jump to relative
- address FROM - TO. OPCODE is the opcode to store. */
+/* Store OP at LOC followed by two-byte integer parameter ARG. */
static void
-store_jump (from, op, to)
- unsigned char *from, *to;
- re_opcode_t op;
+store_op1 (op, loc, arg)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
{
- from[0] = (unsigned char) op;
- STORE_NUMBER (from + 1, to - (from + 3));
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg);
}
-/* Open up space before char FROM, and insert there a jump to TO.
- CURRENT_END gives the end of the storage not in use, so we know
- how much data to copy up. OP is the opcode of the jump to insert.
-
- If you call this function, you must zero out pending_exact. */
+/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */
static void
-insert_jump (op, from, to, current_end)
- re_opcode_t op;
- unsigned char *from, *to, *current_end;
+store_op2 (op, loc, arg1, arg2)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
{
- register unsigned char *pfrom = current_end; /* Copy from here... */
- register unsigned char *pto = current_end + 3; /* ...to here. */
-
- while (pfrom != from)
- *--pto = *--pfrom;
-
- store_jump (from, op, to);
+ *loc = (unsigned char) op;
+ STORE_NUMBER (loc + 1, arg1);
+ STORE_NUMBER (loc + 3, arg2);
}
-/* Store a jump of the form <opcode> <relative address> <n>.
-
- Store in the location FROM a jump operation to jump to relative
- address FROM - TO. OPCODE is the opcode to store, N is a number the
- jump uses, say, to decide how many times to jump.
-
- If you call this function, you must zero out pending_exact. */
+/* Copy the bytes from LOC to END to open up three bytes of space at LOC
+ for OP followed by two-byte integer parameter ARG. */
static void
-store_jump_n (from, op, to, n)
- unsigned char *from, *to;
- re_opcode_t op;
- unsigned n;
+insert_op1 (op, loc, arg, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg;
+ unsigned char *end;
{
- from[0] = (unsigned char) op;
- STORE_NUMBER (from + 1, to - (from + 3));
- STORE_NUMBER (from + 3, n);
-}
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 3;
+ while (pfrom != loc)
+ *--pto = *--pfrom;
+
+ store_op1 (op, loc, arg);
+}
-/* Similar to insert_jump, but handles a jump which needs an extra
- number to handle minimum and maximum cases. Open up space at
- location FROM, and insert there a jump to TO. CURRENT_END gives the
- end of the storage in use, so we know how much data to copy up. OP is
- the opcode of the jump to insert.
- If you call this function, you must zero out pending_exact. */
+/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */
static void
-insert_jump_n (op, from, to, current_end, n)
- re_opcode_t op;
- unsigned char *from, *to, *current_end;
- unsigned n;
+insert_op2 (op, loc, arg1, arg2, end)
+ re_opcode_t op;
+ unsigned char *loc;
+ int arg1, arg2;
+ unsigned char *end;
{
- register unsigned char *pfrom = current_end;
- register unsigned char *pto = current_end + 5;
+ register unsigned char *pfrom = end;
+ register unsigned char *pto = end + 5;
- while (pfrom != from)
+ while (pfrom != loc)
*--pto = *--pfrom;
- store_jump_n (from, op, to, n);
+ store_op2 (op, loc, arg1, arg2);
}
-/* Open up space at location THERE, and insert operation OP followed by
- NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
- we know how much data to copy up.
-
- If you call this function, you must zero out pending_exact. */
+/* P points to just after a ^ in PATTERN. Return true if that ^ comes
+ after an alternative or a begin-subexpression. We assume there is at
+ least one character before the ^. */
-static void
-insert_op_2 (op, there, current_end, num_1, num_2)
- re_opcode_t op;
- unsigned char *there, *current_end;
- int num_1, num_2;
+static boolean
+at_begline_loc_p (pattern, p, syntax)
+ const char *pattern, *p;
+ reg_syntax_t syntax;
{
- register unsigned char *pfrom = current_end;
- register unsigned char *pto = current_end + 5;
-
- while (pfrom != there)
- *--pto = *--pfrom;
+ const char *prev = p - 2;
+ boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
- there[0] = (unsigned char) op;
- STORE_NUMBER (there + 1, num_1);
- STORE_NUMBER (there + 3, num_2);
+ return
+ /* After a subexpression? */
+ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
+ /* After an alternative? */
+ || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
}
-/* Return true if the pattern position P is at a close-group or
- alternation operator, or if it is a newline and RE_NEWLINE_ORDINARY
- is not set in SYNTAX. Before checking, though, we skip past all
- operators that match the empty string.
-
- This is not quite the dual of what happens with ^. There, we can
- easily check if the (sub)pattern so far can match only the empty
- string, because we have seen the pattern, and `laststart' is set to
- exactly that. But we cannot easily look at the pattern yet to come
- to see if it matches the empty string; that would require us to compile
- the pattern, then go back and analyze the pattern after every
- endline. POSIX required this at one point (that $ be in a
- ``trailing'' position to be considered an anchor), so we implemented
- it, but it was slow and took lots of code, and we were never really
- convinced it worked in all cases. So now it's gone, and we live with
- the slight inconsistency between ^ and $. */
+/* The dual of at_begline_loc_p. This one is for $. We assume there is
+ at least one character after the $, i.e., `P < PEND'. */
static boolean
-at_endline_op_p (p, pend, syntax)
+at_endline_loc_p (p, pend, syntax)
const char *p, *pend;
int syntax;
{
- boolean context_indep = !!(syntax & RE_CONTEXT_INDEP_ANCHORS);
+ const char *next = p;
+ boolean next_backslash = *next == '\\';
+ const char *next_next = p + 1 < pend ? p + 1 : NULL;
- /* Skip past operators that match the empty string. (Except we don't
- handle empty groups.) */
- while (p < pend)
- {
- if (context_indep && (*p == '^' || *p == '$'))
- p++;
-
- /* All others start with \. */
- else if (*p == '\\' && p + 1 < pend
- && (p[1] == 'b' || p[1] == 'B'
- || p[1] == '<' || p[1] == '>'
- || p[1] == '`' || p[1] == '\''
-#ifdef emacs
- || p[1] == '='
-#endif
- ))
- p += 2;
-
- else /* Not an empty string operator. */
- break;
- }
-
- /* See what we're at now. */
- return p < pend
- && ((!(syntax & RE_NEWLINE_ORDINARY) && *p == '\n')
- || (syntax & RE_NO_BK_PARENS
- ? *p == ')'
- : *p == '\\' && p + 1 < pend && p[1] == ')')
- || (syntax & RE_NO_BK_VBAR
- ? *p == '|'
- : (*p == '\\' && p + 1 < pend && p[1] == '|')));
+ return
+ /* Before a subexpression? */
+ (syntax & RE_NO_BK_PARENS ? *next == ')'
+ : next_backslash && next_next && *next_next == ')')
+ /* Before an alternative? */
+ || (syntax & RE_NO_BK_VBAR ? *next == '|'
+ : next_backslash && next_next && *next_next == '|');
}
@@ -2149,6 +2183,63 @@ group_in_compile_stack (compile_stack, regnum)
return false;
}
+
+
+/* Read the ending character of a range (in a bracket expression) from the
+ uncompiled pattern *P_PTR (which ends at PEND). We assume the
+ starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
+ Then we set the translation of all bits between the starting and
+ ending characters (inclusive) in the compiled pattern B.
+
+ Return an error code.
+
+ We use these short variable names so we can use the same macros as
+ `regex_compile' itself. */
+
+static reg_errcode_t
+compile_range (p_ptr, pend, translate, syntax, b)
+ const char **p_ptr, *pend;
+ char *translate;
+ reg_syntax_t syntax;
+ unsigned char *b;
+{
+ unsigned this_char;
+
+ const char *p = *p_ptr;
+ int range_start, range_end;
+
+ if (p == pend)
+ return REG_ERANGE;
+
+ /* Even though the pattern is a signed `char *', we need to fetch
+ with unsigned char *'s; if the high bit of the pattern character
+ is set, the range endpoints will be negative if we fetch using a
+ signed char *.
+
+ We also want to fetch the endpoints without translating them; the
+ appropriate translation is done in the bit-setting loop below. */
+ range_start = ((unsigned char *) p)[-2];
+ range_end = ((unsigned char *) p)[0];
+
+ /* Have to increment the pointer into the pattern string, so the
+ caller isn't still at the ending character. */
+ (*p_ptr)++;
+
+ /* If the start is after the end, the range is empty. */
+ if (range_start > range_end)
+ return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
+
+ /* Here we see why `this_char' has to be larger than an `unsigned
+ char' -- the range is inclusive, so if `range_end' == 0xff
+ (assuming 8-bit characters), we would otherwise go into an infinite
+ loop, since all characters <= 0xff. */
+ for (this_char = range_start; this_char <= range_end; this_char++)
+ {
+ SET_LIST_BIT (TRANSLATE (this_char));
+ }
+
+ return REG_NOERROR;
+}
/* Failure stack declarations and macros; both re_compile_fastmap and
re_match_2 use a failure stack. These have to be macros because of
@@ -2157,7 +2248,7 @@ group_in_compile_stack (compile_stack, regnum)
/* Number of failure points for which to initially allocate space
when matching. If this number is exceeded, we allocate more
- space---so it is not a hard limit. */
+ space, so it is not a hard limit. */
#ifndef INIT_FAILURE_ALLOC
#define INIT_FAILURE_ALLOC 5
#endif
@@ -2168,75 +2259,76 @@ group_in_compile_stack (compile_stack, regnum)
change it ourselves. */
int re_max_failures = 2000;
-typedef const unsigned char *failure_stack_elt_t;
+typedef const unsigned char *fail_stack_elt_t;
typedef struct
{
- failure_stack_elt_t *stack;
+ fail_stack_elt_t *stack;
unsigned size;
unsigned avail; /* Offset of next open position. */
-} failure_stack_type;
+} fail_stack_type;
-#define FAILURE_STACK_EMPTY() (failure_stack.avail == 0)
-#define FAILURE_STACK_PTR_EMPTY() (failure_stack_ptr->avail == 0)
-#define FAILURE_STACK_FULL() (failure_stack.avail == failure_stack.size)
-#define FAILURE_STACK_TOP() (failure_stack.stack[failure_stack.avail])
+#define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
+#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
+#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
+#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail])
-/* Initialize FAILURE_STACK. Return 1 if success, 0 if not. */
+/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */
-#define INIT_FAILURE_STACK(failure_stack) \
- ((failure_stack).stack = (failure_stack_elt_t *) \
- REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (failure_stack_elt_t)), \
- (failure_stack).stack == NULL \
- ? 0 \
- : ((failure_stack).size = INIT_FAILURE_ALLOC, \
- (failure_stack).avail = 0, \
- 1))
+#define INIT_FAIL_STACK() \
+ do { \
+ fail_stack.stack = (fail_stack_elt_t *) \
+ REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
+ \
+ if (fail_stack.stack == NULL) \
+ return -2; \
+ \
+ fail_stack.size = INIT_FAILURE_ALLOC; \
+ fail_stack.avail = 0; \
+ } while (0)
-/* Double the size of FAILURE_STACK, up to approximately
- `re_max_failures' items.
+/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
Return 1 if succeeds, and 0 if either ran out of memory
allocating space for it or it was already too large.
REGEX_REALLOCATE requires `destination' be declared. */
-#define DOUBLE_FAILURE_STACK(failure_stack) \
- ((failure_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
+#define DOUBLE_FAIL_STACK(fail_stack) \
+ ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
? 0 \
- : ((failure_stack).stack = (failure_stack_elt_t *) \
- REGEX_REALLOCATE ((failure_stack).stack, \
- ((failure_stack).size << 1) * sizeof (failure_stack_elt_t)), \
+ : ((fail_stack).stack = (fail_stack_elt_t *) \
+ REGEX_REALLOCATE ((fail_stack).stack, \
+ (fail_stack).size * sizeof (fail_stack_elt_t), \
+ ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
\
- (failure_stack).stack == NULL \
+ (fail_stack).stack == NULL \
? 0 \
- : ((failure_stack).size <<= 1, \
+ : ((fail_stack).size <<= 1, \
1)))
-/* Push PATTERN_OP on FAILURE_STACK.
+/* Push PATTERN_OP on FAIL_STACK.
Return 1 if was able to do so and 0 if ran out of memory allocating
space to do so. */
-#define PUSH_PATTERN_OP(pattern_op, failure_stack) \
- ((FAILURE_STACK_FULL () \
- && !DOUBLE_FAILURE_STACK (failure_stack)) \
+#define PUSH_PATTERN_OP(pattern_op, fail_stack) \
+ ((FAIL_STACK_FULL () \
+ && !DOUBLE_FAIL_STACK (fail_stack)) \
? 0 \
- : ((failure_stack).stack[(failure_stack).avail++] = pattern_op, \
+ : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \
1))
/* This pushes an item onto the failure stack. Must be a four-byte
- value. Assumes the variable `failure_stack'. Probably should only
+ value. Assumes the variable `fail_stack'. Probably should only
be called from within `PUSH_FAILURE_POINT'. */
#define PUSH_FAILURE_ITEM(item) \
- failure_stack.stack[failure_stack.avail++] = (failure_stack_elt_t) item
+ fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
-/* The complement operation. Assumes stack is nonempty, and pointed to
- `failure_stack_ptr'. */
-#define POP_FAILURE_ITEM() \
- failure_stack_ptr->stack[--failure_stack_ptr->avail]
+/* The complement operation. Assumes `fail_stack' is nonempty. */
+#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
/* Used to omit pushing failure point id's when we're not debugging. */
#ifdef DEBUG
@@ -2251,8 +2343,8 @@ typedef struct
/* Push the information about the state we will need
if we ever fail back to it.
- Requires variables failure_stack, regstart, regend, reg_info, and
- num_regs be declared. DOUBLE_FAILURE_STACK requires `destination' be
+ Requires variables fail_stack, regstart, regend, reg_info, and
+ num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be
declared.
Does `return FAILURE_CODE' if runs out of memory. */
@@ -2265,9 +2357,10 @@ typedef struct
int this_reg; \
\
DEBUG_STATEMENT (failure_id++); \
+ DEBUG_STATEMENT (nfailure_points_pushed++); \
DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
- DEBUG_PRINT2 (" Before push, next avail: %d\n", (failure_stack).avail);\
- DEBUG_PRINT2 (" size: %d\n", (failure_stack).size);\
+ DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
+ DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
\
DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
@@ -2275,11 +2368,11 @@ typedef struct
/* Ensure we have enough space allocated for what we will push. */ \
while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
{ \
- if (!DOUBLE_FAILURE_STACK (failure_stack)) \
+ if (!DOUBLE_FAIL_STACK (fail_stack)) \
return failure_code; \
\
DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
- (failure_stack).size); \
+ (fail_stack).size); \
DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
} \
\
@@ -2299,7 +2392,7 @@ typedef struct
PUSH_FAILURE_ITEM (regend[this_reg]); \
\
DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \
- DEBUG_PRINT2 (" match_nothing=%d", \
+ DEBUG_PRINT2 (" match_null=%d", \
REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
DEBUG_PRINT2 (" matched_something=%d", \
@@ -2310,18 +2403,18 @@ typedef struct
PUSH_FAILURE_ITEM (reg_info[this_reg].word); \
} \
\
- DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg); \
+ DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\
PUSH_FAILURE_ITEM (lowest_active_reg); \
\
DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\
PUSH_FAILURE_ITEM (highest_active_reg); \
\
DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
- DEBUG_COMPILED_PATTERN_PRINTER (bufp, pattern_place, pend); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
PUSH_FAILURE_ITEM (pattern_place); \
\
DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
- DEBUG_DOUBLE_STRING_PRINTER (string_place, string1, size1, string2, \
+ DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
size2); \
DEBUG_PRINT1 ("'\n"); \
PUSH_FAILURE_ITEM (string_place); \
@@ -2342,8 +2435,7 @@ typedef struct
#endif
/* We push at most this many items on the stack. */
-#define MAX_FAILURE_ITEMS \
- ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
+#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
/* We actually push this many items. */
#define NUM_FAILURE_ITEMS \
@@ -2351,8 +2443,77 @@ typedef struct
+ NUM_NONREG_ITEMS)
/* How many items can still be added to the stack without overflowing it. */
-#define REMAINING_AVAIL_SLOTS \
- ((failure_stack).size - (failure_stack).avail)
+#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
+
+
+/* Pops what PUSH_FAIL_STACK pushes.
+
+ We restore into the parameters, all of which should be lvalues:
+ STR -- the saved data position.
+ PAT -- the saved pattern position.
+ LOW_REG, HIGH_REG -- the highest and lowest active registers.
+ REGSTART, REGEND -- arrays of string positions.
+ REG_INFO -- array of information about each subexpression.
+
+ Also assumes the variables `fail_stack' and (if debugging), `bufp',
+ `pend', `string1', `size1', `string2', and `size2'. */
+
+#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
+{ \
+ DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
+ int this_reg; \
+ const unsigned char *string_temp; \
+ \
+ assert (!FAIL_STACK_EMPTY ()); \
+ \
+ /* Remove failure points and point to how many regs pushed. */ \
+ DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
+ DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
+ DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
+ \
+ assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
+ \
+ DEBUG_POP (&failure_id); \
+ DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
+ \
+ /* If the saved string location is NULL, it came from an \
+ on_failure_keep_string_jump opcode, and we want to throw away the \
+ saved NULL, thus retaining our current position in the string. */ \
+ string_temp = POP_FAILURE_ITEM (); \
+ if (string_temp != NULL) \
+ str = (const char *) string_temp; \
+ \
+ DEBUG_PRINT2 (" Popping string 0x%x: `", str); \
+ DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
+ DEBUG_PRINT1 ("'\n"); \
+ \
+ pat = (unsigned char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
+ \
+ /* Restore register info. */ \
+ high_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \
+ \
+ low_reg = (unsigned) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
+ \
+ for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
+ { \
+ DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
+ \
+ reg_info[this_reg].word = POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
+ \
+ regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
+ \
+ regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \
+ DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
+ } \
+ \
+ DEBUG_STATEMENT (nfailure_points_popped++); \
+} /* POP_FAILURE_POINT */
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
@@ -2360,20 +2521,19 @@ typedef struct
is used by re_search to skip quickly over impossible starting points.
The caller must supply the address of a (1 << BYTEWIDTH)-byte data
- area as BUFP->fastmap. The other components of BUFP describe the
- pattern to be used.
+ area as BUFP->fastmap.
- We set the `can_be_null' and `fastmap_accurate' fields in the pattern
+ We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
+ the pattern buffer.
- Returns 0 if it can compile a fastmap. Returns -2 if there is an
- internal error. */
+ Returns 0 if we succeed, -2 if an internal error. */
int
re_compile_fastmap (bufp)
struct re_pattern_buffer *bufp;
{
int j, k;
- failure_stack_type failure_stack;
+ fail_stack_type fail_stack;
#ifndef REGEX_MALLOC
char *destination;
#endif
@@ -2386,31 +2546,57 @@ re_compile_fastmap (bufp)
const unsigned char *p = pattern;
register unsigned char *pend = pattern + size;
- INIT_FAILURE_STACK (failure_stack);
+ /* Assume that each path through the pattern can be null until
+ proven otherwise. We set this false at the bottom of switch
+ statement, to which we get only if a particular path doesn't
+ match the empty string. */
+ boolean path_can_be_null = true;
- bzero (fastmap, 1 << BYTEWIDTH);
- bufp->fastmap_accurate = 1; /* It will be when we're done. */
+ /* We aren't doing a `succeed_n' to begin with. */
+ boolean succeed_n_p = false;
+
+ assert (fastmap != NULL && p != NULL);
+
+ INIT_FAIL_STACK ();
+ bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
+ bufp->fastmap_accurate = 1; /* It will be when we're done. */
bufp->can_be_null = 0;
- while (p)
+ while (p != pend || !FAIL_STACK_EMPTY ())
{
- boolean is_a_succeed_n = false;
-
if (p == pend)
- if (FAILURE_STACK_EMPTY ())
- {
- bufp->can_be_null = 1;
- break;
- }
- else
- p = failure_stack.stack[--failure_stack.avail];
+ {
+ bufp->can_be_null |= path_can_be_null;
+ /* Reset for next path. */
+ path_can_be_null = true;
+
+ p = fail_stack.stack[--fail_stack.avail];
+ }
+
+ /* We should never be about to go beyond the end of the pattern. */
+ assert (p < pend);
+
#ifdef SWITCH_ENUM_BUG
switch ((int) ((re_opcode_t) *p++))
#else
switch ((re_opcode_t) *p++)
#endif
{
+
+ /* I guess the idea here is to simply not bother with a fastmap
+ if a backreference is used, since it's too hard to figure out
+ the fastmap for the corresponding group. Setting
+ `can_be_null' stops `re_search_2' from using the fastmap, so
+ that is all we do. */
+ case duplicate:
+ bufp->can_be_null = 1;
+ return 0;
+
+
+ /* Following are the cases which match a character. These end
+ with `break'. */
+
case exactn:
fastmap[p[1]] = 1;
break;
@@ -2434,40 +2620,95 @@ re_compile_fastmap (bufp)
break;
+ case wordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case notwordchar:
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != Sword)
+ fastmap[j] = 1;
+ break;
+
+
+ case anychar:
+ /* `.' matches anything ... */
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ fastmap[j] = 1;
+
+ /* ... except perhaps newline. */
+ if (!(bufp->syntax & RE_DOT_NEWLINE))
+ fastmap['\n'] = 0;
+
+ /* Return if we have already set `can_be_null'; if we have,
+ then the fastmap is irrelevant. Something's wrong here. */
+ else if (bufp->can_be_null)
+ return 0;
+
+ /* Otherwise, have to check alternative paths. */
+ break;
+
+
+#ifdef emacs
+ case syntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) == (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ case notsyntaxspec:
+ k = *p++;
+ for (j = 0; j < (1 << BYTEWIDTH); j++)
+ if (SYNTAX (j) != (enum syntaxcode) k)
+ fastmap[j] = 1;
+ break;
+
+
+ /* All cases after this match the empty string. These end with
+ `continue'. */
+
+
+ case before_dot:
+ case at_dot:
+ case after_dot:
+ continue;
+#endif /* not emacs */
+
+
case no_op:
case begline:
+ case endline:
case begbuf:
case endbuf:
case wordbound:
case notwordbound:
case wordbeg:
case wordend:
+ case push_dummy_failure:
continue;
- case endline:
- if (!bufp->can_be_null)
- bufp->can_be_null = 2;
- break;
-
-
- case no_pop_jump_n:
+ case jump_n:
case pop_failure_jump:
case maybe_pop_jump:
- case no_pop_jump:
- case jump_past_next_alt:
+ case jump:
+ case jump_past_alt:
case dummy_failure_jump:
EXTRACT_NUMBER_AND_INCR (j, p);
p += j;
if (j > 0)
continue;
- /* Jump backward reached implies we just went through
- the body of a loop and matched nothing. Opcode jumped to
- should be an on_failure_jump or succeed_n. Just treat it
- like an ordinary jump. For a * loop, it has pushed its
- failure point already; if so, discard that as redundant. */
-
+ /* Jump backward implies we just went through the body of a
+ loop and matched nothing. Opcode jumped to should be
+ `on_failure_jump' or `succeed_n'. Just treat it like an
+ ordinary jump. For a * loop, it has pushed its failure
+ point already; if so, discard that as redundant. */
if ((re_opcode_t) *p != on_failure_jump
&& (re_opcode_t) *p != succeed_n)
continue;
@@ -2477,14 +2718,15 @@ re_compile_fastmap (bufp)
p += j;
/* If what's on the stack is where we are now, pop it. */
- if (!FAILURE_STACK_EMPTY ()
- && failure_stack.stack[failure_stack.avail - 1] == p)
- failure_stack.avail--;
+ if (!FAIL_STACK_EMPTY ()
+ && fail_stack.stack[fail_stack.avail - 1] == p)
+ fail_stack.avail--;
continue;
case on_failure_jump:
+ case on_failure_keep_string_jump:
handle_on_failure_jump:
EXTRACT_NUMBER_AND_INCR (j, p);
@@ -2492,23 +2734,27 @@ re_compile_fastmap (bufp)
end of the pattern. We don't want to push such a point,
since when we restore it above, entering the switch will
increment `p' past the end of the pattern. We don't need
- to push such a point since there can't be any more
- possibilities for the fastmap beyond pend. */
+ to push such a point since we obviously won't find any more
+ fastmap entries beyond `pend'. Such a pattern can match
+ the null string, though. */
if (p + j < pend)
{
- if (!PUSH_PATTERN_OP (p + j, failure_stack))
+ if (!PUSH_PATTERN_OP (p + j, fail_stack))
return -2;
}
+ else
+ bufp->can_be_null = 1;
- if (is_a_succeed_n)
- EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ if (succeed_n_p)
+ {
+ EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
+ succeed_n_p = false;
+ }
continue;
case succeed_n:
- is_a_succeed_n = true;
-
/* Get to the number of times to succeed. */
p += 2;
@@ -2517,6 +2763,7 @@ re_compile_fastmap (bufp)
if (k == 0)
{
p -= 4;
+ succeed_n_p = true; /* Spaghetti code alert. */
goto handle_on_failure_jump;
}
continue;
@@ -2533,75 +2780,60 @@ re_compile_fastmap (bufp)
continue;
- /* I don't understand this case (any of it). --karl */
- case duplicate:
- bufp->can_be_null = 1;
- fastmap['\n'] = 1;
-
-
- case anychar:
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (j != '\n')
- fastmap[j] = 1;
- if (bufp->can_be_null)
- return 0;
-
- /* Don't return; check the alternative paths
- so we can set can_be_null if appropriate. */
- break;
-
-
- case wordchar:
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) == Sword)
- fastmap[j] = 1;
- break;
-
-
- case notwordchar:
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) != Sword)
- fastmap[j] = 1;
- break;
-
-
-#ifdef emacs
- case before_dot:
- case at_dot:
- case after_dot:
- continue;
-
-
- case syntaxspec:
- k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) == (enum syntaxcode) k)
- fastmap[j] = 1;
- break;
-
-
- case notsyntaxspec:
- k = *p++;
- for (j = 0; j < (1 << BYTEWIDTH); j++)
- if (SYNTAX (j) != (enum syntaxcode) k)
- fastmap[j] = 1;
- break;
-#endif /* not emacs */
-
- default:
- abort ();
+ default:
+ abort (); /* We have listed all the cases. */
} /* switch *p++ */
- /* Getting here means we have successfully found the possible starting
- characters of one path of the pattern. We need not follow this
- path any farther. Instead, look at the next alternative
- remembered in the stack, or quit. The test at the top of the
- loop does these things. */
+ /* Getting here means we have found the possible starting
+ characters for one path of the pattern -- and that the empty
+ string does not match. We need not follow this path further.
+ Instead, look at the next alternative (remembered on the
+ stack), or quit if no more. The test at the top of the loop
+ does these things. */
+ path_can_be_null = false;
p = pend;
} /* while p */
+ /* Set `can_be_null' for the last path (also the first path, if the
+ pattern is empty). */
+ bufp->can_be_null |= path_can_be_null;
return 0;
-} /* re_compile_fastmap */
+} /* re_compile_fastmap */
+
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
+ this memory for recording register information. STARTS and ENDS
+ must be allocated using the malloc library routine, and must each
+ be at least NUM_REGS * sizeof (regoff_t) bytes long.
+
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
+
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+
+void
+re_set_registers (bufp, regs, num_regs, starts, ends)
+ struct re_pattern_buffer *bufp;
+ struct re_registers *regs;
+ unsigned num_regs;
+ regoff_t *starts, *ends;
+{
+ if (num_regs)
+ {
+ bufp->regs_allocated = REGS_REALLOCATE;
+ regs->num_regs = num_regs;
+ regs->start = starts;
+ regs->end = ends;
+ }
+ else
+ {
+ bufp->regs_allocated = REGS_UNALLOCATED;
+ regs->num_regs = 0;
+ regs->start = regs->end = (regoff_t) 0;
+ }
+}
/* Searching routines. */
@@ -2642,8 +2874,7 @@ re_search (bufp, string, size, startpos, range, regs)
stack overflow). */
int
-re_search_2 (bufp, string1, size1, string2, size2, startpos, range,
- regs, stop)
+re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
struct re_pattern_buffer *bufp;
const char *string1, *string2;
int size1, size2;
@@ -2669,15 +2900,9 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range,
else if (endpos > total_size)
range = total_size - startpos;
- /* Update the fastmap now if not correct already. */
- if (fastmap && !bufp->fastmap_accurate)
- if (re_compile_fastmap (bufp) == -2)
- return -2;
-
/* If the search isn't to be a backwards one, don't waste time in a
- long search for a pattern that says it is anchored. */
- if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf
- && range > 0)
+ search for a pattern that must be anchored. */
+ if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
{
if (startpos > 0)
return -1;
@@ -2685,12 +2910,18 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range,
range = 1;
}
+ /* Update the fastmap now if not correct already. */
+ if (fastmap && !bufp->fastmap_accurate)
+ if (re_compile_fastmap (bufp) == -2)
+ return -2;
+
+ /* Loop through the string, looking for a place to start matching. */
for (;;)
{
/* If a fastmap is supplied, skip quickly over characters that
cannot be the start of a match. If the pattern can match the
- null string, however, we don't want to skip over characters; we
- want the first null string. */
+ null string, however, we don't need to skip characters; we want
+ the first null string. */
if (fastmap && startpos < total_size && !bufp->can_be_null)
{
if (range > 0) /* Searching forwards. */
@@ -2707,36 +2938,30 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range,
/* Written out as an if-else to avoid testing `translate'
inside the loop. */
if (translate)
- {
- while (range > lim
- && !fastmap[(unsigned char) translate[*d++]])
- range--;
- }
+ while (range > lim
+ && !fastmap[(unsigned char)
+ translate[(unsigned char) *d++]])
+ range--;
else
- {
- while (range > lim && !fastmap[(unsigned char) *d++])
- range--;
- }
+ while (range > lim && !fastmap[(unsigned char) *d++])
+ range--;
startpos += irange - range;
}
else /* Searching backwards. */
{
- register char c
- = (size1 == 0 || startpos >= size1
- ? string2[startpos - size1]
- : string1[startpos]);
-
- if (translate
- ? !fastmap[(unsigned char) translate[(unsigned char) c]]
- : !fastmap[(unsigned char) c])
+ register char c = (size1 == 0 || startpos >= size1
+ ? string2[startpos - size1]
+ : string1[startpos]);
+
+ if (!fastmap[(unsigned char) TRANSLATE (c)])
goto advance;
}
}
/* If can't match the null string, and that's all we have left, fail. */
- if (range >= 0 && startpos == total_size
- && fastmap && bufp->can_be_null == 0)
+ if (range >= 0 && startpos == total_size && fastmap
+ && !bufp->can_be_null)
return -1;
val = re_match_2 (bufp, string1, size1, string2, size2,
@@ -2770,8 +2995,6 @@ static int bcmp_translate ();
static boolean alt_match_null_string_p (),
common_op_match_null_string_p (),
group_match_null_string_p ();
-static void pop_failure_point ();
-
/* Structure for per-register (a.k.a. per-group) information.
This must not be longer than one word, because we push this value
@@ -2786,12 +3009,12 @@ static void pop_failure_point ();
failure stack. */
typedef union
{
- failure_stack_elt_t word;
+ fail_stack_elt_t word;
struct
{
/* This field is one if this group can match the empty string,
- zero if not. If not yet determined, `MATCH_NOTHING_UNSET_VALUE'. */
-#define MATCH_NOTHING_UNSET_VALUE 3
+ zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
+#define MATCH_NULL_UNSET_VALUE 3
unsigned match_null_string_p : 2;
unsigned is_active : 1;
unsigned matched_something : 1;
@@ -2805,12 +3028,9 @@ typedef union
#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
-/* Call this when have matched something; it sets `matched' flags for the
- registers corresponding to the group of which we currently are inside.
- Also records whether this group ever matched something. We only care
- about this information at `stop_memory', and then only about the
- previous time through the loop (if the group is starred or whatever).
- So it is ok to clear all the nonactive registers here. */
+/* Call this when have matched a real character; it sets `matched' flags
+ for the subexpressions which we are currently inside. Also records
+ that those subexprs have matched. */
#define SET_REGS_MATCHED() \
do \
{ \
@@ -2825,14 +3045,12 @@ typedef union
while (0)
-/* This converts a pointer into one or the other of the strings into an
- offset from the beginning of that string. */
-#define POINTER_TO_OFFSET(pointer) IS_IN_FIRST_STRING (pointer) \
- ? (pointer) - string1 \
- : (pointer) - string2 + size1
+/* This converts PTR, a pointer into one of the search strings `string1'
+ and `string2' into an offset from the beginning of that string. */
+#define POINTER_TO_OFFSET(ptr) \
+ (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
-/* Registers are set to a sentinel value when they haven't yet matched
- anything. */
+/* Registers are set to a sentinel when they haven't yet matched. */
#define REG_UNSET_VALUE ((char *) -1)
#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
@@ -2843,7 +3061,7 @@ typedef union
/* Call before fetching a character with *d. This switches over to
string2 if necessary. */
-#define PREFETCH \
+#define PREFETCH() \
while (d == dend) \
{ \
/* End of string2 => fail. */ \
@@ -2856,48 +3074,46 @@ typedef union
/* Test if at very beginning or at very end of the virtual concatenation
- of string1 and string2. If there is only one string, we've put it in
- string2. */
-#define AT_STRINGS_BEG (d == (size1 ? string1 : string2) || !size2)
-#define AT_STRINGS_END (d == end2)
+ of `string1' and `string2'. If only one string, it's `string2'. */
+#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
+#define AT_STRINGS_END(d) ((d) == end2)
/* Test if D points to a character which is word-constituent. We have
two special cases to check for: if past the end of string1, look at
the first character in string2; and if before the beginning of
- string2, look at the last character in string1.
-
- We assume there is a string1, so use this in conjunction with
- AT_STRINGS_BEG. */
-#define LETTER_P(d) \
- (SYNTAX ((d) == end1 ? *string2 : (d) == string2 - 1 ? *(end1 - 1) : *(d))\
+ string2, look at the last character in string1. */
+#define WORDCHAR_P(d) \
+ (SYNTAX ((d) == end1 ? *string2 \
+ : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
== Sword)
/* Test if the character before D and the one at D differ with respect
to being word-constituent. */
#define AT_WORD_BOUNDARY(d) \
- (AT_STRINGS_BEG || AT_STRINGS_END || LETTER_P (d - 1) != LETTER_P (d))
+ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
+ || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
/* Free everything we malloc. */
#ifdef REGEX_MALLOC
+#define FREE_VAR(var) if (var) free (var); var = NULL
#define FREE_VARIABLES() \
do { \
- free (failure_stack.stack); \
- free (regstart); \
- free (regend); \
- free (old_regstart); \
- free (old_regend); \
- free (reg_info); \
- free (best_regstart); \
- free (best_regend); \
- reg_info = NULL; \
- failure_stack.stack = NULL; \
- regstart = regend = old_regstart = old_regend \
- = best_regstart = best_regend = NULL; \
+ FREE_VAR (fail_stack.stack); \
+ FREE_VAR (regstart); \
+ FREE_VAR (regend); \
+ FREE_VAR (old_regstart); \
+ FREE_VAR (old_regend); \
+ FREE_VAR (best_regstart); \
+ FREE_VAR (best_regend); \
+ FREE_VAR (reg_info); \
+ FREE_VAR (reg_dummy); \
+ FREE_VAR (reg_info_dummy); \
} while (0)
#else /* not REGEX_MALLOC */
-#define FREE_VARIABLES() /* As nothing, since we use alloca. */
+/* Some MIPS systems (at least) want this to free alloca'd storage. */
+#define FREE_VARIABLES() alloca (0)
#endif /* not REGEX_MALLOC */
@@ -2914,12 +3130,11 @@ typedef union
/* Matching routines. */
#ifndef emacs /* Emacs never uses this. */
-
/* re_match is like re_match_2 except it takes only a single string. */
int
re_match (bufp, string, size, pos, regs)
- const struct re_pattern_buffer *bufp;
+ struct re_pattern_buffer *bufp;
const char *string;
int size, pos;
struct re_registers *regs;
@@ -2935,10 +3150,8 @@ re_match (bufp, string, size, pos, regs)
matching at STOP.
If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
- store offsets for the substring each group matched in REGS. (If
- BUFP->caller_allocated_regs is nonzero, we fill REGS->num_regs
- registers; if zero, we set REGS->num_regs to max (RE_NREGS,
- re_nsub+1) and allocate the space with malloc before filling.)
+ store offsets for the substring each group matched in REGS. See the
+ documentation for exactly how many groups we fill.
We return -1 if no match, -2 if an internal error (such as the
failure stack overflowing). Otherwise, we return the length of the
@@ -2946,7 +3159,7 @@ re_match (bufp, string, size, pos, regs)
int
re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
- const struct re_pattern_buffer *bufp;
+ struct re_pattern_buffer *bufp;
const char *string1, *string2;
int size1, size2;
int pos;
@@ -2974,23 +3187,24 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* We use this to map every character in the string. */
char *translate = bufp->translate;
- /* Failure point stack. Each place that can handle a failure further
- down the line pushes a failure point on this stack. It consists of
- restart, regend, and reg_info for all registers corresponding to the
- subexpressions we're currently inside, plus the number of such
- registers, and, finally, two char *'s. The first char * is where to
- resume scanning the pattern; the second one is where to resume
- scanning the strings. If the latter is zero, the failure point is a
- ``dummy''; if a failure happens and the failure point is a dummy, it
- gets discarded and the next next one is tried. */
- failure_stack_type failure_stack;
+ /* Failure point stack. Each place that can handle a failure further
+ down the line pushes a failure point on this stack. It consists of
+ restart, regend, and reg_info for all registers corresponding to
+ the subexpressions we're currently inside, plus the number of such
+ registers, and, finally, two char *'s. The first char * is where
+ to resume scanning the pattern; the second one is where to resume
+ scanning the strings. If the latter is zero, the failure point is
+ a ``dummy''; if a failure happens and the failure point is a dummy,
+ it gets discarded and the next next one is tried. */
+ fail_stack_type fail_stack;
#ifdef DEBUG
static unsigned failure_id = 0;
+ unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
#endif
/* We fill all the registers internally, independent of what we
return, for use in backreferences. The number here includes
- register zero. */
+ an element for register zero. */
unsigned num_regs = bufp->re_nsub + 1;
/* The currently active registers. */
@@ -3004,20 +3218,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
matching and the regnum-th regend points to right after where we
stopped matching the regnum-th subexpression. (The zeroth register
keeps track of what the whole pattern matches.) */
- const char **regstart
- = (const char **) REGEX_ALLOCATE (num_regs * sizeof (char *));
- const char **regend
- = (const char **) REGEX_ALLOCATE (num_regs * sizeof (char *));
+ const char **regstart, **regend;
/* If a group that's operated upon by a repetition operator fails to
match anything, then the register for its start will need to be
restored because it will have been set to wherever in the string we
are when we last see its open-group operator. Similarly for a
register's end. */
- const char **old_regstart
- = (const char **) REGEX_ALLOCATE (num_regs * sizeof (char *));
- const char **old_regend
- = (const char **) REGEX_ALLOCATE (num_regs * sizeof (char *));
+ const char **old_regstart, **old_regend;
/* The is_active field of reg_info helps us keep track of which (possibly
nested) subexpressions we are currently in. The matched_something
@@ -3025,24 +3233,28 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
matched any of the pattern so far this time through the reg_num-th
subexpression. These two fields get reset each time through any
loop their register is in. */
- register_info_type *reg_info = (register_info_type *)
- REGEX_ALLOCATE (num_regs * sizeof (register_info_type));
+ register_info_type *reg_info;
/* The following record the register info as found in the above
variables when we find a match better than any we've seen before.
This happens as we backtrack through the failure points, which in
- turn happens only if we have not yet matched the entire string. */
- unsigned best_regs_set = 0;
- const char **best_regstart
- = (const char **) REGEX_ALLOCATE (num_regs * sizeof (char *));
- const char **best_regend
- = (const char **) REGEX_ALLOCATE (num_regs * sizeof (char *));
+ turn happens only if we have not yet matched the entire string. */
+ unsigned best_regs_set = false;
+ const char **best_regstart, **best_regend;
+
+ /* Logically, this is `best_regend[0]'. But we don't want to have to
+ allocate space for that if we're not allocating space for anything
+ else (see below). Also, we never need info about register 0 for
+ any of the other register vectors, and it seems rather a kludge to
+ treat `best_regend' differently than the rest. So we keep track of
+ the end of the best match so far in a separate variable. We
+ initialize this to NULL so that when we backtrack the first time
+ and need to test it, it's not garbage. */
+ const char *match_end = NULL;
/* Used when we pop values we don't care about. */
- const char **reg_dummy
- = (const char **) REGEX_ALLOCATE (num_regs * sizeof (char *));
- register_info_type *reg_info_dummy = (register_info_type *)
- REGEX_ALLOCATE (num_regs * sizeof (register_info_type));
+ const char **reg_dummy;
+ register_info_type *reg_info_dummy;
#ifdef DEBUG
/* Counts the total number of registers pushed. */
@@ -3051,15 +3263,42 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
- if (!INIT_FAILURE_STACK (failure_stack))
- return -2;
-
- if (!(regstart && regend && old_regstart && old_regend && reg_info
- && best_regstart && best_regend))
+ INIT_FAIL_STACK ();
+
+ /* Do not bother to initialize all the register variables if there are
+ no groups in the pattern, as it takes a fair amount of time. If
+ there are groups, we include space for register 0 (the whole
+ pattern), even though we never use it, since it simplifies the
+ array indexing. We should fix this. */
+ if (bufp->re_nsub)
{
- FREE_VARIABLES ();
- return -2;
+ regstart = REGEX_TALLOC (num_regs, const char *);
+ regend = REGEX_TALLOC (num_regs, const char *);
+ old_regstart = REGEX_TALLOC (num_regs, const char *);
+ old_regend = REGEX_TALLOC (num_regs, const char *);
+ best_regstart = REGEX_TALLOC (num_regs, const char *);
+ best_regend = REGEX_TALLOC (num_regs, const char *);
+ reg_info = REGEX_TALLOC (num_regs, register_info_type);
+ reg_dummy = REGEX_TALLOC (num_regs, const char *);
+ reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
+
+ if (!(regstart && regend && old_regstart && old_regend && reg_info
+ && best_regstart && best_regend && reg_dummy && reg_info_dummy))
+ {
+ FREE_VARIABLES ();
+ return -2;
+ }
}
+#ifdef REGEX_MALLOC
+ else
+ {
+ /* We must initialize all our variables to NULL, so that
+ `FREE_VARIABLES' doesn't try to free them. */
+ regstart = regend = old_regstart = old_regend = best_regstart
+ = best_regend = reg_dummy = NULL;
+ reg_info = reg_info_dummy = (register_info_type *) NULL;
+ }
+#endif /* REGEX_MALLOC */
/* The starting position is bogus. */
if (pos < 0 || pos > size1 + size2)
@@ -3068,26 +3307,22 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
return -1;
}
-
/* Initialize subexpression text positions to -1 to mark ones that no
- \( or ( and \) or ) has been seen for. Also set all registers to
- inactive and mark them as not having any inner groups, able to
- match the empty string, matched anything so far, or ever failed. */
- for (mcnt = 0; mcnt < num_regs; mcnt++)
+ start_memory/stop_memory has been seen for. Also initialize the
+ register information struct. */
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
{
regstart[mcnt] = regend[mcnt]
= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
- REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NOTHING_UNSET_VALUE;
+ REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
IS_ACTIVE (reg_info[mcnt]) = 0;
MATCHED_SOMETHING (reg_info[mcnt]) = 0;
EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
}
- IS_ACTIVE (reg_info[0]) = 1;
-
- /* We move string1 into string2 if the latter's empty---but not if
- string1 is null. */
+ /* We move `string1' into `string2' if the latter's empty -- but not if
+ `string1' is null. */
if (size2 == 0 && string1 != NULL)
{
string2 = string1;
@@ -3110,9 +3345,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
end_match_2 = string2 + stop - size1;
}
- /* `p' scans through the pattern as `d' scans through the data. `dend'
- is the end of the input string that `d' points within. `d' is
- advanced into the following input string whenever necessary, but
+ /* `p' scans through the pattern as `d' scans through the data.
+ `dend' is the end of the input string that `d' points within. `d'
+ is advanced into the following input string whenever necessary, but
this happens before fetching; therefore, at the beginning of the
loop, `d' can be pointing at the end of a string, but it cannot
equal `string2'. */
@@ -3128,9 +3363,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
DEBUG_PRINT1 ("The compiled pattern is: ");
- DEBUG_COMPILED_PATTERN_PRINTER (bufp, p, pend);
+ DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
DEBUG_PRINT1 ("The string to match is: `");
- DEBUG_DOUBLE_STRING_PRINTER (d, string1, size1, string2, size2);
+ DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
DEBUG_PRINT1 ("'\n");
/* This loops over pattern commands. It exits by returning from the
@@ -3142,26 +3377,28 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (p == pend)
{ /* End of pattern means we might have succeeded. */
- DEBUG_PRINT1 ("End of pattern: ");
- /* If not end of string, try backtracking. Otherwise done. */
+ DEBUG_PRINT1 ("end of pattern ... ");
+
+ /* If we haven't matched the entire string, and we want the
+ longest match, try backtracking. */
if (d != end_match_2)
{
DEBUG_PRINT1 ("backtracking.\n");
- if (!FAILURE_STACK_EMPTY ())
+ if (!FAIL_STACK_EMPTY ())
{ /* More failure points to try. */
-
- boolean in_same_string =
- IS_IN_FIRST_STRING (best_regend[0])
- == MATCHING_IN_FIRST_STRING;
+ boolean same_str_p = (FIRST_STRING_P (match_end)
+ == MATCHING_IN_FIRST_STRING);
/* If exceeds best match so far, save it. */
if (!best_regs_set
- || (in_same_string && d > best_regend[0])
- || (!in_same_string && !MATCHING_IN_FIRST_STRING))
+ || (same_str_p && d > match_end)
+ || (!same_str_p && !MATCHING_IN_FIRST_STRING))
{
- best_regs_set = 1;
- best_regend[0] = d; /* Never use regstart[0]. */
+ best_regs_set = true;
+ match_end = d;
+
+ DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
for (mcnt = 1; mcnt < num_regs; mcnt++)
{
@@ -3176,13 +3413,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
else if (best_regs_set)
{
restore_best_regs:
- /* Restore best match. */
- d = best_regend[0];
+ /* Restore best match. It may happen that `dend ==
+ end_match_1' while the restored d is in string2.
+ For example, the pattern `x.*y.*z' against the
+ strings `x-' and `y-z-', if the two strings are
+ not consecutive in memory. */
+ DEBUG_PRINT1 ("Restoring best registers.\n");
- if (d >= string1 && d <= end1)
- dend = end_match_1;
+ d = match_end;
+ dend = ((d >= string1 && d <= end1)
+ ? end_match_1 : end_match_2);
- for (mcnt = 0; mcnt < num_regs; mcnt++)
+ for (mcnt = 1; mcnt < num_regs; mcnt++)
{
regstart[mcnt] = best_regstart[mcnt];
regend[mcnt] = best_regend[mcnt];
@@ -3190,35 +3432,51 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
} /* d != end_match_2 */
- DEBUG_PRINT1 ("accepting match.\n");
+ DEBUG_PRINT1 ("Accepting match.\n");
/* If caller wants register contents data back, do it. */
if (regs && !bufp->no_sub)
{
- /* If they haven't allocated it, we'll do it. */
- if (!bufp->caller_allocated_regs)
- {
+ /* Have the register data arrays been allocated? */
+ if (bufp->regs_allocated == REGS_UNALLOCATED)
+ { /* No. So allocate them with malloc. We need one
+ extra element beyond `num_regs' for the `-1' marker
+ GNU code uses. */
regs->num_regs = MAX (RE_NREGS, num_regs + 1);
regs->start = TALLOC (regs->num_regs, regoff_t);
regs->end = TALLOC (regs->num_regs, regoff_t);
if (regs->start == NULL || regs->end == NULL)
return -2;
+ bufp->regs_allocated = REGS_REALLOCATE;
}
-
+ else if (bufp->regs_allocated == REGS_REALLOCATE)
+ { /* Yes. If we need more elements than were already
+ allocated, reallocate them. If we need fewer, just
+ leave it alone. */
+ if (regs->num_regs < num_regs + 1)
+ {
+ regs->num_regs = num_regs + 1;
+ RETALLOC (regs->start, regs->num_regs, regoff_t);
+ RETALLOC (regs->end, regs->num_regs, regoff_t);
+ if (regs->start == NULL || regs->end == NULL)
+ return -2;
+ }
+ }
+ else
+ assert (bufp->regs_allocated == REGS_FIXED);
+
/* Convert the pointer data in `regstart' and `regend' to
indices. Register zero has to be set differently,
since we haven't kept track of any info for it. */
if (regs->num_regs > 0)
{
regs->start[0] = pos;
- regs->end[0] = MATCHING_IN_FIRST_STRING
- ? d - string1
- : d - string2 + size1;
+ regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
+ : d - string2 + size1);
}
- /* Go through the first min (num_regs, regs->num_regs)
- registers, since that is all we initialized at the
- beginning. */
+ /* Go through the first `min (num_regs, regs->num_regs)'
+ registers, since that is all we initialized. */
for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
{
if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
@@ -3231,16 +3489,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
}
/* If the regs structure we return has more elements than
- it than were in the pattern, set the extra elements to
- -1. If we allocated the registers, this is the case,
- because we always allocate enough to have at least -1
- at the end. */
+ were in the pattern, set the extra elements to -1. If
+ we (re)allocated the registers, this is the case,
+ because we always allocate enough to have at least one
+ -1 at the end. */
for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
regs->start[mcnt] = regs->end[mcnt] = -1;
} /* regs && !bufp->no_sub */
FREE_VARIABLES ();
- DEBUG_PRINT2 ("%d registers pushed.\n", num_regs_pushed);
+ DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
+ nfailure_points_pushed, nfailure_points_popped,
+ nfailure_points_pushed - nfailure_points_popped);
+ DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
mcnt = d - pos - (MATCHING_IN_FIRST_STRING
? string1
@@ -3278,7 +3539,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
{
do
{
- PREFETCH;
+ PREFETCH ();
if (translate[(unsigned char) *d++] != (char) *p++)
goto fail;
}
@@ -3288,7 +3549,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
{
do
{
- PREFETCH;
+ PREFETCH ();
if (*d++ != (char) *p++) goto fail;
}
while (--mcnt);
@@ -3297,17 +3558,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
break;
- /* Match anything but possibly a newline or a null. */
+ /* Match any character except possibly a newline or a null. */
case anychar:
DEBUG_PRINT1 ("EXECUTING anychar.\n");
- PREFETCH;
+ PREFETCH ();
if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
|| (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
goto fail;
SET_REGS_MATCHED ();
+ DEBUG_PRINT2 (" Matched `%d'.\n", *d);
d++;
break;
@@ -3320,10 +3582,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
- PREFETCH;
+ PREFETCH ();
c = TRANSLATE (*d); /* The character to match. */
- if (c < (unsigned char) (*p * BYTEWIDTH)
+ /* Cast to `unsigned' instead of `unsigned char' in case the
+ bit list is a full 32 bytes long. */
+ if (c < (unsigned) (*p * BYTEWIDTH)
&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
not = !not;
@@ -3348,8 +3612,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Find out if this group can match the empty string. */
p1 = p; /* To send to group_match_null_string_p. */
- if (REG_MATCH_NULL_STRING_P (reg_info[*p])
- == MATCH_NOTHING_UNSET_VALUE)
+ if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
REG_MATCH_NULL_STRING_P (reg_info[*p])
= group_match_null_string_p (&p1, pend, reg_info);
@@ -3419,32 +3682,28 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
(a(b)c(d(e)f)g). When group 3 ends, after the f), the
new highest active register is 1. */
unsigned char r = *p - 1;
-
- /* This loop will always terminate, because register 0 is
- always active. */
- assert (IS_ACTIVE (reg_info[0]));
- while (!IS_ACTIVE (reg_info[r]))
+ while (r > 0 && !IS_ACTIVE (reg_info[r]))
r--;
/* If we end up at register zero, that means that we saved
- the registers as the result of an on_failure_jump, not
- a start_memory, and we jumped to past the innermost
- stop_memory. For example, in ((.)*). We save
+ the registers as the result of an `on_failure_jump', not
+ a `start_memory', and we jumped to past the innermost
+ `stop_memory'. For example, in ((.)*) we save
registers 1 and 2 as a result of the *, but when we pop
back to the second ), we are at the stop_memory 1.
Thus, nothing is active. */
- if (r != 0)
- highest_active_reg = r;
- else
+ if (r == 0)
{
lowest_active_reg = NO_LOWEST_ACTIVE_REG;
highest_active_reg = NO_HIGHEST_ACTIVE_REG;
}
+ else
+ highest_active_reg = r;
}
/* If just failed to match something this time around with a
group that's operated on by a repetition operator, try to
- force exit from the ``loop,'' and restore the register
+ force exit from the ``loop'', and restore the register
information for this group that we had before trying this
last match. */
if ((!MATCHED_SOMETHING (reg_info[*p])
@@ -3457,11 +3716,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
mcnt = 0;
switch ((re_opcode_t) *p1++)
{
- case no_pop_jump_n:
+ case jump_n:
is_a_jump_n = true;
case pop_failure_jump:
case maybe_pop_jump:
- case no_pop_jump:
+ case jump:
case dummy_failure_jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
if (is_a_jump_n)
@@ -3540,8 +3799,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
set to the place to stop, otherwise, for now have to use
the end of the first string. */
- dend2 = ((IS_IN_FIRST_STRING (regstart[regno])
- == IS_IN_FIRST_STRING (regend[regno]))
+ dend2 = ((FIRST_STRING_P (regstart[regno])
+ == FIRST_STRING_P (regend[regno]))
? regend[regno] : end_match_1);
for (;;)
{
@@ -3560,7 +3819,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
if (d2 == dend2) break;
/* If necessary, advance to next segment in data. */
- PREFETCH;
+ PREFETCH ();
/* How many characters left in this segment to match. */
mcnt = dend - d;
@@ -3588,11 +3847,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case begline:
DEBUG_PRINT1 ("EXECUTING begline.\n");
- if (AT_STRINGS_BEG)
+ if (AT_STRINGS_BEG (d))
{
if (!bufp->not_bol) break;
}
- else if (d[-1] == '\n' && bufp->newline_anchor)
+ else if (d[-1] == '\n' && bufp->newline_anchor)
{
break;
}
@@ -3604,7 +3863,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case endline:
DEBUG_PRINT1 ("EXECUTING endline.\n");
- if (AT_STRINGS_END)
+ if (AT_STRINGS_END (d))
{
if (!bufp->not_eol) break;
}
@@ -3621,7 +3880,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Match at the very beginning of the data. */
case begbuf:
DEBUG_PRINT1 ("EXECUTING begbuf.\n");
- if (AT_STRINGS_BEG)
+ if (AT_STRINGS_BEG (d))
break;
goto fail;
@@ -3629,27 +3888,27 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Match at the very end of the data. */
case endbuf:
DEBUG_PRINT1 ("EXECUTING endbuf.\n");
- if (AT_STRINGS_END)
+ if (AT_STRINGS_END (d))
break;
goto fail;
/* on_failure_keep_string_jump is used to optimize `.*\n'. It
pushes NULL as the value for the string on the stack. Then
- pop_failure_point will keep the current value for the string,
- instead of restoring it. To see why, consider matching
- `foo\nbar' against `.*\n'. The .* matches the foo; then the
- . fails against the \n. But the next thing we want to do is
- match the \n against the \n; if we restored the string value,
- we would be back at the foo.
+ `pop_failure_point' will keep the current value for the
+ string, instead of restoring it. To see why, consider
+ matching `foo\nbar' against `.*\n'. The .* matches the foo;
+ then the . fails against the \n. But the next thing we want
+ to do is match the \n against the \n; if we restored the
+ string value, we would be back at the foo.
Because this is used only in specific cases, we don't need to
- go through the hassle of checking all the things that
- on_failure_jump does, to make sure the right things get saved
- on the stack. Hence we don't share its code. The only
- reason to push anything on the stack at all is that otherwise
- we would have to change anychar's code to do something
- besides goto fail in this case; that seems worse than this. */
+ check all the things that `on_failure_jump' does, to make
+ sure the right things get saved on the stack. Hence we don't
+ share its code. The only reason to push anything on the
+ stack at all is that otherwise we would have to change
+ `anychar's code to do something besides goto fail in this
+ case; that seems worse than this. */
case on_failure_keep_string_jump:
DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
@@ -3683,7 +3942,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
the original * applied to a group), save the information
for that group and all inner ones, so that if we fail back
to this point, the group's information will be correct.
- For example, in \(a*\)*\1, we only need the preceding group,
+ For example, in \(a*\)*\1, we need the preceding group,
and in \(\(a*\)b*\)\2, we need the inner group. */
/* We can't use `p' to check ahead because we push
@@ -3713,8 +3972,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
break;
- /* A smart repeat ends with a maybe_pop_jump.
- We change it either to a pop_failure_jump or a no_pop_jump. */
+ /* A smart repeat ends with `maybe_pop_jump'.
+ We change it to either `pop_failure_jump' or `jump'. */
case maybe_pop_jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
@@ -3726,7 +3985,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
is nothing that they would both match, i.e., that we
would have to backtrack because of (as in, e.g., `a*a')
then we can change to pop_failure_jump, because we'll
- never have to backtrack. */
+ never have to backtrack.
+
+ This is not true in the case of alternatives: in
+ `(a|ab)*' we do need to backtrack to the `ab' alternative
+ (e.g., if the string was `ab'). But instead of trying to
+ detect that here, the alternative has put on a dummy
+ failure point which is what we will end up popping. */
/* Skip over open/close-group commands. */
while (p2 + 2 < pend
@@ -3736,7 +4001,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* If we're at the end of the pattern, we can change. */
if (p2 == pend)
- p[-3] = (unsigned char) pop_failure_jump;
+ {
+ /* Consider what happens when matching ":\(.*\)"
+ against ":/". I don't really understand this code
+ yet. */
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1
+ (" End of pattern: change to `pop_failure_jump'.\n");
+ }
else if ((re_opcode_t) *p2 == exactn
|| (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
@@ -3745,11 +4017,16 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
= *p2 == (unsigned char) endline ? '\n' : p2[2];
p1 = p + mcnt;
- /* p1[0] ... p1[2] are the on_failure_jump corresponding
- to the maybe_finalize_jump of this case. Examine what
- follows it. */
+ /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
+ to the `maybe_finalize_jump' of this case. Examine what
+ follows. */
if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
- p[-3] = (unsigned char) pop_failure_jump;
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
+ c, p1[5]);
+ }
+
else if ((re_opcode_t) p1[3] == charset
|| (re_opcode_t) p1[3] == charset_not)
{
@@ -3762,15 +4039,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* `not' is equal to 1 if c would match, which means
that we can't change to pop_failure_jump. */
if (!not)
- p[-3] = (unsigned char) pop_failure_jump;
+ {
+ p[-3] = (unsigned char) pop_failure_jump;
+ DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
+ }
}
}
}
p -= 2; /* Point at relative address again. */
if ((re_opcode_t) p[-1] != pop_failure_jump)
{
- p[-1] = (unsigned char) no_pop_jump;
- goto no_pop;
+ p[-1] = (unsigned char) jump;
+ DEBUG_PRINT1 (" Match => jump.\n");
+ goto unconditional_jump;
}
/* Note fall through. */
@@ -3784,31 +4065,27 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case pop_failure_jump:
{
/* We need to pass separate storage for the lowest and
- highest registers, even though we aren't interested.
- Otherwise, we will restore only one register from the
- stack, since lowest will equal highest in
- pop_failure_point (since they'll be the same memory
- location). */
- unsigned dummy_low, dummy_high;
- unsigned char *pdummy = NULL;
+ highest registers, even though we don't care about the
+ actual values. Otherwise, we will restore only one
+ register from the stack, since lowest will == highest in
+ `pop_failure_point'. */
+ unsigned dummy_low_reg, dummy_high_reg;
+ unsigned char *pdummy;
+ const char *sdummy;
DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
- pop_failure_point (bufp, pend,
-#ifdef DEBUG
- string1, size1, string2, size2,
-#endif
- &failure_stack, &pdummy, &pdummy,
- &dummy_low, &dummy_high,
- &reg_dummy, &reg_dummy, &reg_info_dummy);
+ POP_FAILURE_POINT (sdummy, pdummy,
+ dummy_low_reg, dummy_high_reg,
+ reg_dummy, reg_dummy, reg_info_dummy);
}
/* Note fall through. */
- /* Jump without taking off any failure points. */
- case no_pop_jump:
- no_pop:
+ /* Unconditionally jump (without popping any failure points). */
+ case jump:
+ unconditional_jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
- DEBUG_PRINT2 ("EXECUTING no_pop_jump %d ", mcnt);
+ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
p += mcnt; /* Do the jump. */
DEBUG_PRINT2 ("(to 0x%x).\n", p);
break;
@@ -3816,9 +4093,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* We need this opcode so we can detect where alternatives end
in `group_match_null_string_p' et al. */
- case jump_past_next_alt:
- DEBUG_PRINT1 ("EXECUTING jump_past_next_alt.\n");
- goto no_pop;
+ case jump_past_alt:
+ DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
+ goto unconditional_jump;
/* Normally, the on_failure_jump pushes a failure point, which
@@ -3831,17 +4108,30 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* It doesn't matter what we push for the string here. What
the code at `fail' tests is the value for the pattern. */
PUSH_FAILURE_POINT (0, 0, -2);
- goto no_pop;
-
+ goto unconditional_jump;
+
+
+ /* At the end of an alternative, we need to push a dummy failure
+ point in case we are followed by a `pop_failure_jump', because
+ we don't want the failure point for the alternative to be
+ popped. For example, matching `(a|ab)*' against `aab'
+ requires that we match the `ab' alternative. */
+ case push_dummy_failure:
+ DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
+ /* See comments just above at `dummy_failure_jump' about the
+ two zeroes. */
+ PUSH_FAILURE_POINT (0, 0, -2);
+ break;
- /* Have to succeed matching what follows at least n times. Then
- just handle like an on_failure_jump. */
+ /* Have to succeed matching what follows at least n times.
+ After that, handle like `on_failure_jump'. */
case succeed_n:
EXTRACT_NUMBER (mcnt, p + 2);
DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
+ assert (mcnt >= 0);
/* Originally, this is how many times we HAVE to succeed. */
- if (mcnt)
+ if (mcnt > 0)
{
mcnt--;
p += 2;
@@ -3855,25 +4145,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
p[3] = (unsigned char) no_op;
goto on_failure;
}
-#ifdef DEBUG
- else
- {
- fprintf (stderr, "regex: negative n at succeed_n.\n");
- abort ();
- }
-#endif /* DEBUG */
break;
- case no_pop_jump_n:
+ case jump_n:
EXTRACT_NUMBER (mcnt, p + 2);
- DEBUG_PRINT2 ("EXECUTING no_pop_jump_n %d.\n", mcnt);
+ DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
/* Originally, this is how many times we CAN jump. */
if (mcnt)
{
mcnt--;
- STORE_NUMBER(p + 2, mcnt);
- goto no_pop;
+ STORE_NUMBER (p + 2, mcnt);
+ goto unconditional_jump;
}
/* If don't have to jump any more, skip over the rest of command. */
else
@@ -3887,6 +4170,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
EXTRACT_NUMBER_AND_INCR (mcnt, p);
p1 = p + mcnt;
EXTRACT_NUMBER_AND_INCR (mcnt, p);
+ DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
STORE_NUMBER (p1, mcnt);
break;
}
@@ -3905,14 +4189,14 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
case wordbeg:
DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
- if (LETTER_P (d) && (AT_STRINGS_BEG || !LETTER_P (d - 1)))
+ if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
break;
goto fail;
case wordend:
DEBUG_PRINT1 ("EXECUTING wordend.\n");
- if (!AT_STRINGS_BEG && LETTER_P (d - 1)
- && (!LETTER_P (d) || AT_STRINGS_END))
+ if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
+ && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
break;
goto fail;
@@ -3949,11 +4233,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
goto matchsyntax;
case wordchar:
- DEBUG_PRINT1 ("EXECUTING wordchar.\n");
+ DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
mcnt = (int) Sword;
matchsyntax:
- PREFETCH;
- if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
+ PREFETCH ();
+ if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
+ goto fail;
SET_REGS_MATCHED ();
break;
@@ -3963,29 +4248,32 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
goto matchnotsyntax;
case notwordchar:
- DEBUG_PRINT1 ("EXECUTING notwordchar.\n");
+ DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
mcnt = (int) Sword;
- matchnotsyntax: /* We goto here from notsyntaxspec. */
- PREFETCH;
- if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
+ matchnotsyntax:
+ PREFETCH ();
+ if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
+ goto fail;
SET_REGS_MATCHED ();
break;
#else /* not emacs */
case wordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
- PREFETCH;
- if (!LETTER_P (d))
+ PREFETCH ();
+ if (!WORDCHAR_P (d))
goto fail;
SET_REGS_MATCHED ();
+ d++;
break;
case notwordchar:
DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
- PREFETCH;
- if (LETTER_P (d))
+ PREFETCH ();
+ if (WORDCHAR_P (d))
goto fail;
SET_REGS_MATCHED ();
+ d++;
break;
#endif /* not emacs */
@@ -3997,16 +4285,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* We goto here if a matching operation fails. */
fail:
- if (!FAILURE_STACK_EMPTY ())
+ if (!FAIL_STACK_EMPTY ())
{ /* A restart point is known. Restore to that state. */
DEBUG_PRINT1 ("\nFAIL:\n");
- pop_failure_point (bufp, pend,
-#ifdef DEBUG
- string1, size1, string2, size2,
-#endif
- &failure_stack, &p, &d, &lowest_active_reg,
- &highest_active_reg, &regstart, &regend,
- &reg_info);
+ POP_FAILURE_POINT (d, p,
+ lowest_active_reg, highest_active_reg,
+ regstart, regend, reg_info);
/* If this failure point is a dummy, try the next one. */
if (!p)
@@ -4022,11 +4306,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
loop, need to pop this failure point and use the next one. */
switch ((re_opcode_t) *p)
{
- case no_pop_jump_n:
+ case jump_n:
is_a_jump_n = true;
case maybe_pop_jump:
case pop_failure_jump:
- case no_pop_jump:
+ case jump:
p1 = p + 1;
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
p1 += mcnt;
@@ -4059,89 +4343,6 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
/* Subroutine definitions for re_match_2. */
-/* Pops what PUSH_FAILURE_STACK pushes. */
-
-static void
-pop_failure_point (bufp, pattern_end,
-#ifdef DEBUG
- string1, size1, string2, size2,
-#endif
- failure_stack_ptr, pattern_place, string_place,
- lowest_active_reg, highest_active_reg,
- regstart, regend, reg_info)
- const struct re_pattern_buffer *bufp; /* These not modified. */
- unsigned char *pattern_end;
-#ifdef DEBUG
- unsigned char *string1, *string2;
- int size1, size2;
-#endif
- failure_stack_type *failure_stack_ptr; /* These get modified. */
- const unsigned char **pattern_place;
- const unsigned char **string_place;
- unsigned *lowest_active_reg, *highest_active_reg;
- const unsigned char ***regstart;
- const unsigned char ***regend;
- register_info_type **reg_info;
-{
-#ifdef DEBUG
- /* Type is really unsigned; it's declared this way just to avoid a
- compiler warning. */
- failure_stack_elt_t failure_id;
-#endif
- int this_reg;
- const unsigned char *string_temp;
-
- assert (!FAILURE_STACK_PTR_EMPTY ());
-
- /* Remove failure points and point to how many regs pushed. */
- DEBUG_PRINT1 ("pop_failure_point:\n");
- DEBUG_PRINT2 (" Before pop, next avail: %d\n", failure_stack_ptr->avail);
- DEBUG_PRINT2 (" size: %d\n", failure_stack_ptr->size);
-
- assert (failure_stack_ptr->avail >= NUM_NONREG_ITEMS);
-
- DEBUG_POP (&failure_id);
- DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id);
-
- /* If the saved string location is NULL, it came from an
- on_failure_keep_string_jump opcode, and we want to throw away the
- saved NULL, thus retaining our current position in the string. */
- string_temp = POP_FAILURE_ITEM ();
- if (string_temp != NULL)
- *string_place = string_temp;
-
- DEBUG_PRINT2 (" Popping string 0x%x: `", *string_place);
- DEBUG_DOUBLE_STRING_PRINTER (*string_place, string1, size1, string2, size2);
- DEBUG_PRINT1 ("'\n");
-
- *pattern_place = POP_FAILURE_ITEM ();
- DEBUG_PRINT2 (" Popping pattern 0x%x: ", *pattern_place);
- DEBUG_COMPILED_PATTERN_PRINTER (bufp, *pattern_place, pattern_end);
-
- /* Restore register info. */
- *highest_active_reg = (unsigned) POP_FAILURE_ITEM ();
- DEBUG_PRINT2 (" Popping high active reg: %d\n", *highest_active_reg);
-
- *lowest_active_reg = (unsigned) POP_FAILURE_ITEM ();
- DEBUG_PRINT2 (" Popping low active reg: %d\n", *lowest_active_reg);
-
- for (this_reg = *highest_active_reg; this_reg >= *lowest_active_reg;
- this_reg--)
- {
- DEBUG_PRINT2 (" Popping reg: %d\n", this_reg);
-
- (*reg_info)[this_reg].word = POP_FAILURE_ITEM ();
- DEBUG_PRINT2 (" info: 0x%x\n", (*reg_info)[this_reg]);
-
- (*regend)[this_reg] = POP_FAILURE_ITEM ();
- DEBUG_PRINT2 (" end: 0x%x\n", (*regend)[this_reg]);
-
- (*regstart)[this_reg] = POP_FAILURE_ITEM ();
- DEBUG_PRINT2 (" start: 0x%x\n", (*regstart)[this_reg]);
- }
-} /* pop_failure_point */
-
-
/* We are passed P pointing to a register number after a start_memory.
Return true if the pattern up to the corresponding stop_memory can
@@ -4181,12 +4382,12 @@ group_match_null_string_p (p, end, reg_info)
{
/* Go through the on_failure_jumps of the alternatives,
seeing if any of the alternatives cannot match nothing.
- The last alternative starts with only a no_pop_jump,
+ The last alternative starts with only a jump,
whereas the rest start with on_failure_jump and end
- with a no_pop_jump, e.g., here is the pattern for `a|b|c':
+ with a jump, e.g., here is the pattern for `a|b|c':
- /on_failure_jump/0/6/exactn/1/a/jump_past_next_alt/0/6
- /on_failure_jump/0/6/exactn/1/b/jump_past_next_alt/0/3
+ /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
+ /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
/exactn/1/c
So, we have to first go through the first (n-1)
@@ -4195,12 +4396,12 @@ group_match_null_string_p (p, end, reg_info)
/* Deal with the first (n-1) alternatives, which start
with an on_failure_jump (see above) that jumps to right
- past a jump_past_next_alt. */
+ past a jump_past_alt. */
- while ((re_opcode_t) p1[mcnt-3] == jump_past_next_alt)
+ while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
{
/* `mcnt' holds how many bytes long the alternative
- is, including the ending `jump_past_next_alt' and
+ is, including the ending `jump_past_alt' and
its number. */
if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
@@ -4208,7 +4409,7 @@ group_match_null_string_p (p, end, reg_info)
return false;
/* Move to right after this alternative, including the
- jump_past_next_alt. */
+ jump_past_alt. */
p1 += mcnt;
/* Break if it's the beginning of an n-th alternative
@@ -4220,7 +4421,7 @@ group_match_null_string_p (p, end, reg_info)
alternative that starts with an on_failure_jump. */
p1++;
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
- if ((re_opcode_t) p1[mcnt-3] != jump_past_next_alt)
+ if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
{
/* Get to the beginning of the n-th alternative. */
p1 -= 3;
@@ -4229,8 +4430,8 @@ group_match_null_string_p (p, end, reg_info)
}
/* Deal with the last alternative: go back and get number
- of the jump_past_next_alt just before it. `mcnt'
- contains how many bytes long the alternative is. */
+ of the `jump_past_alt' just before it. `mcnt' contains
+ the length of the alternative. */
EXTRACT_NUMBER (mcnt, p1 - 2);
if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
@@ -4328,13 +4529,13 @@ common_op_match_null_string_p (p, end, reg_info)
case start_memory:
reg_no = *p1;
+ assert (reg_no > 0 && reg_no <= MAX_REGNUM);
ret = group_match_null_string_p (&p1, end, reg_info);
/* Have to set this here in case we're checking a group which
contains a group and a back reference to it. */
- if (REG_MATCH_NULL_STRING_P (reg_info[reg_no])
- == MATCH_NOTHING_UNSET_VALUE)
+ if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
if (!ret)
@@ -4342,7 +4543,7 @@ common_op_match_null_string_p (p, end, reg_info)
break;
/* If this is an optimized succeed_n for zero times, make the jump. */
- case no_pop_jump:
+ case jump:
EXTRACT_NUMBER_AND_INCR (mcnt, p1);
if (mcnt >= 0)
p1 += mcnt;
@@ -4420,9 +4621,9 @@ re_compile_pattern (pattern, length, bufp)
{
reg_errcode_t ret;
- /* GNU code is written to assume RE_NREGS registers will be set
- (and extraneous ones will be filled with -1). */
- bufp->caller_allocated_regs = 0;
+ /* GNU code is written to assume at least RE_NREGS registers will be set
+ (and at least one extra will be -1). */
+ bufp->regs_allocated = REGS_UNALLOCATED;
/* And GNU code determines whether or not to get register information
by passing null for the REGS argument to re_match, etc., not by
@@ -4432,19 +4633,20 @@ re_compile_pattern (pattern, length, bufp)
/* Match anchors at newline. */
bufp->newline_anchor = 1;
- ret = regex_compile (pattern, length, obscure_syntax, bufp);
+ ret = regex_compile (pattern, length, re_syntax_options, bufp);
return re_error_msg[(int) ret];
}
/* Entry points compatible with 4.2 BSD regex library. We don't define
- them if this is an Emacs compilation. */
+ them if this is an Emacs or POSIX compilation. */
-#if !defined (emacs)
+#if !defined (emacs) && !defined (_POSIX_SOURCE)
+/* BSD has one and only one pattern buffer. */
static struct re_pattern_buffer re_comp_buf;
-const char *
+char *
re_comp (s)
const char *s;
{
@@ -4469,12 +4671,16 @@ re_comp (s)
return "Memory exhausted";
}
+ /* Since `re_exec' always passes NULL for the `regs' argument, we
+ don't need to initialize the pattern buffer fields which affect it. */
+
/* Match anchors at newlines. */
re_comp_buf.newline_anchor = 1;
- ret = regex_compile (s, strlen (s), obscure_syntax, &re_comp_buf);
+ ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
- return re_error_msg[(int) ret];
+ /* Yes, we're discarding `const' here. */
+ return (char *) re_error_msg[(int) ret];
}
@@ -4483,13 +4689,12 @@ re_exec (s)
const char *s;
{
const int len = strlen (s);
- return 0 <= re_search (&re_comp_buf, s, len, 0, len,
- (struct re_registers *) 0);
+ return
+ 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
}
-#endif /* not emacs */
+#endif /* not emacs and not _POSIX_SOURCE */
-/* Entry points compatible with POSIX regex library. Don't define these
- for Emacs. */
+/* POSIX.2 functions. Don't define these for Emacs. */
#ifndef emacs
@@ -4535,10 +4740,12 @@ regcomp (preg, pattern, cflags)
{
reg_errcode_t ret;
unsigned syntax
- = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
+ = (cflags & REG_EXTENDED) ?
+ RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
/* regex_compile will allocate the space for the compiled pattern. */
preg->buffer = 0;
+ preg->allocated = 0;
/* Don't bother to use a fastmap when searching. This simplifies the
REG_NEWLINE case: if we used a fastmap, we'd have to put all the
@@ -4556,7 +4763,7 @@ regcomp (preg, pattern, cflags)
/* Map uppercase characters to corresponding lowercase ones. */
for (i = 0; i < CHAR_SET_SIZE; i++)
- preg->translate[i] = isupper (i) ? tolower (i) : i;
+ preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
}
else
preg->translate = NULL;
@@ -4619,10 +4826,10 @@ regexec (preg, string, nmatch, pmatch, eflags)
private_preg.not_bol = !!(eflags & REG_NOTBOL);
private_preg.not_eol = !!(eflags & REG_NOTEOL);
- /* The user has told us how many registers to return information
- about, via `nmatch'. We have to pass that on to the matching
- routines. */
- private_preg.caller_allocated_regs = 1;
+ /* The user has told us exactly how many registers to return
+ information about, via `nmatch'. We have to pass that on to the
+ matching routines. */
+ private_preg.regs_allocated = REGS_FIXED;
if (want_reg_info)
{
@@ -4636,7 +4843,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
/* Perform the searching operation. */
ret = re_search (&private_preg, string, len,
/* start: */ 0, /* range: */ len,
- want_reg_info ? &regs : NULL);
+ want_reg_info ? &regs : (struct re_registers *) 0);
/* Copy the register information to the POSIX structure. */
if (want_reg_info)
@@ -4663,7 +4870,7 @@ regexec (preg, string, nmatch, pmatch, eflags)
/* Returns a message corresponding to an error code, ERRCODE, returned
- from either regcomp or regexec. */
+ from either regcomp or regexec. We don't use PREG here. */
size_t
regerror (errcode, preg, errbuf, errbuf_size)
@@ -4672,9 +4879,25 @@ regerror (errcode, preg, errbuf, errbuf_size)
char *errbuf;
size_t errbuf_size;
{
- const char *msg
- = re_error_msg[errcode] == NULL ? "Success" : re_error_msg[errcode];
- size_t msg_size = strlen (msg) + 1; /* Includes the null. */
+ const char *msg;
+ size_t msg_size;
+
+ if (errcode < 0
+ || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
+ /* Only error codes returned by the rest of the code should be passed
+ to this routine. If we are given anything else, or if other regex
+ code generates an invalid error code, then the program has a bug.
+ Dump core so we can fix it. */
+ abort ();
+
+ msg = re_error_msg[errcode];
+
+ /* POSIX doesn't require that we do anything in this case, but why
+ not be nice. */
+ if (! msg)
+ msg = "Success";
+
+ msg_size = strlen (msg) + 1; /* Includes the null. */
if (errbuf_size != 0)
{
@@ -4716,148 +4939,6 @@ regfree (preg)
#endif /* not emacs */
-#ifdef test
-
-#include <stdio.h>
-
-/* Indexed by a character, gives the upper case equivalent of the
- character. */
-
-char upcase[0400] =
- { 000, 001, 002, 003, 004, 005, 006, 007,
- 010, 011, 012, 013, 014, 015, 016, 017,
- 020, 021, 022, 023, 024, 025, 026, 027,
- 030, 031, 032, 033, 034, 035, 036, 037,
- 040, 041, 042, 043, 044, 045, 046, 047,
- 050, 051, 052, 053, 054, 055, 056, 057,
- 060, 061, 062, 063, 064, 065, 066, 067,
- 070, 071, 072, 073, 074, 075, 076, 077,
- 0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- 0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
- 0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
- 0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
- 0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
- 0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
- 0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
- 0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
- 0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
- 0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
- 0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
- 0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
- 0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
- 0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
- 0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
- 0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
- 0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
- 0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
- 0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
- 0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
- 0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
- 0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
- };
-
-
-/* Use this to run interactive tests. */
-
-void
-main (argc, argv)
- int argc;
- char **argv;
-{
- char pat[500];
- struct re_pattern_buffer buf;
- int i;
- char c;
- char fastmap[(1 << BYTEWIDTH)];
-
- /* Allow a command argument to specify the style of syntax. */
- if (argc > 1)
- re_set_syntax (atoi (argv[1]));
-
- buf.allocated = 40;
- buf.buffer = (unsigned char *) malloc (buf.allocated);
- buf.fastmap = fastmap;
- buf.translate = upcase;
-
- for (;;)
- {
- printf ("Pattern = ");
- gets (pat);
-
- if (*pat)
- {
- void printchar ();
- re_compile_pattern (pat, strlen (pat), &buf);
-
- for (i = 0; i < buf.used; i++)
- printchar (buf.buffer[i]);
-
- putchar ('\n');
-
- printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
-
- re_compile_fastmap (&buf);
- printf ("Allowed by fastmap: ");
- for (i = 0; i < (1 << BYTEWIDTH); i++)
- if (fastmap[i]) printchar (i);
- putchar ('\n');
- }
-
- printf ("String = ");
- gets (pat); /* Now read the string to match against */
-
- i = re_match (&buf, pat, strlen (pat), 0, 0);
- printf ("Match value %d.\n\n", i);
- }
-}
-
-
-#if 0
-/* We have a fancier version now, compiled_pattern_printer. */
-print_buf (bufp)
- struct re_pattern_buffer *bufp;
-{
- int i;
-
- printf ("buf is :\n----------------\n");
- for (i = 0; i < bufp->used; i++)
- printchar (bufp->buffer[i]);
-
- printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
-
- printf ("Allowed by fastmap: ");
- for (i = 0; i < (1 << BYTEWIDTH); i++)
- if (bufp->fastmap[i])
- printchar (i);
- printf ("\nAllowed by translate: ");
- if (bufp->translate)
- for (i = 0; i < (1 << BYTEWIDTH); i++)
- if (bufp->translate[i])
- printchar (i);
- printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
- printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
-}
-#endif /* 0 */
-
-
-void
-printchar (c)
- char c;
-{
- if (c < 040 || c >= 0177)
- {
- putchar ('\\');
- putchar (((c >> 6) & 3) + '0');
- putchar (((c >> 3) & 7) + '0');
- putchar ((c & 7) + '0');
- }
- else
- putchar (c);
-}
-#endif /* test */
-
/*
Local variables:
make-backup-files: t
diff --git a/gnu/usr.bin/cvs/lib/regex.h b/gnu/usr.bin/cvs/lib/regex.h
index 211ad09..408dd21 100644
--- a/gnu/usr.bin/cvs/lib/regex.h
+++ b/gnu/usr.bin/cvs/lib/regex.h
@@ -1,7 +1,7 @@
/* Definitions for data structures and routines for the regular
- expression library, version REPLACE-WITH-VERSION.
+ expression library, version 0.12.
- Copyright (C) 1985, 1989, 1990, 1991, 1992 Free Software Foundation, Inc.
+ Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -20,7 +20,15 @@
#ifndef __REGEXP_LIBRARY_H__
#define __REGEXP_LIBRARY_H__
-/* POSIX says that <sys/types.h> must be included before <regex.h>. */
+/* POSIX says that <sys/types.h> must be included (by the caller) before
+ <regex.h>. */
+
+#ifdef VMS
+/* VMS doesn't have `size_t' in <sys/types.h>, even though POSIX says it
+ should be there. */
+#include <stddef.h>
+#endif
+
/* The following bits are used to determine the regexp syntax we
recognize. The set/not-set meanings are chosen so that Emacs syntax
@@ -45,17 +53,17 @@ typedef unsigned reg_syntax_t;
#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1)
/* If this bit is set, then ^ and $ are always anchors (outside bracket
- expressions).
+ expressions, of course).
If this bit is not set, then it depends:
^ is an anchor if it is at the beginning of a regular
expression or after an open-group or an alternation operator;
$ is an anchor if it is at the end of a regular expression, or
before a close-group or an alternation operator.
+
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
- POSIX now says that the behavior of * etc. in leading positions is
- undefined. We have already implemented a previous draft which
- made those constructs invalid, so we may as well not change the code
- back. */
+ POSIX draft 11.2 says that * etc. in leading positions is undefined.
+ We already implemented a previous draft which made those constructs
+ invalid, though, so we haven't changed the code back. */
#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1)
/* If this bit is set, then special characters are always special
@@ -67,16 +75,14 @@ typedef unsigned reg_syntax_t;
#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1)
/* If this bit is set, then *, +, ?, and { cannot be first in an re or
- immediately after an alternation or begin-group operator.
- Furthermore, alternation cannot be first or last in an re, or
- immediately follow another alternation or begin-group. */
+ immediately after an alternation or begin-group operator. */
#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1)
-/* If this bit is set, then . matches a newline.
+/* If this bit is set, then . matches newline.
If not set, then it doesn't. */
#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1)
-/* If this bit is set, then period doesn't match a null.
+/* If this bit is set, then . doesn't match NUL.
If not set, then it does. */
#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1)
@@ -89,7 +95,7 @@ typedef unsigned reg_syntax_t;
If not set, \{, \}, {, and } are literals. */
#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1)
-/* If this bit is set, +, ? and | aren't recognized as operators.
+/* If this bit is set, +, ? and | aren't recognized as operators.
If not set, they are. */
#define RE_LIMITED_OPS (RE_INTERVALS << 1)
@@ -97,104 +103,112 @@ typedef unsigned reg_syntax_t;
If not set, newline is literal. */
#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1)
-/* If this bit is set, newline in the pattern is an ordinary character.
- If not set, newline before ^ or after $ allows the ^ or $ to be an
- anchor. */
-#define RE_NEWLINE_ORDINARY (RE_NEWLINE_ALT << 1)
-
-/* If this bit is not set, then \{ and \} defines an interval,
- and { and } are literals.
- If set, then { and } defines an interval, and \{ and \} are literals. */
-#define RE_NO_BK_BRACES (RE_NEWLINE_ORDINARY << 1)
+/* If this bit is set, then `{...}' defines an interval, and \{ and \}
+ are literals.
+ If not set, then `\{...\}' defines an interval. */
+#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1)
/* If this bit is set, (...) defines a group, and \( and \) are literals.
If not set, \(...\) defines a group, and ( and ) are literals. */
#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1)
-/* If this bit is set, then back references (i.e., \<digit>) are not
- recognized.
- If not set, then they are. */
+/* If this bit is set, then \<digit> matches <digit>.
+ If not set, then \<digit> is a back-reference. */
#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1)
/* If this bit is set, then | is an alternation operator, and \| is literal.
If not set, then \| is an alternation operator, and | is literal. */
#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1)
-/* If this bit is set, then you can't have empty alternatives.
- If not set, then you can. */
-#define RE_NO_EMPTY_ALTS (RE_NO_BK_VBAR << 1)
-
-/* If this bit is set, then you can't have empty groups.
- If not set, then you can. */
-#define RE_NO_EMPTY_GROUPS (RE_NO_EMPTY_ALTS << 1)
-
-/* If this bit is set, then an ending range point has to collate higher
- than or equal to the starting range point.
- If not set, then when the ending range point collates higher than the
- starting range point, we consider such a range to be empty. */
-#define RE_NO_EMPTY_RANGES (RE_NO_EMPTY_GROUPS << 1)
+/* If this bit is set, then an ending range point collating higher
+ than the starting range point, as in [z-a], is invalid.
+ If not set, then when ending range point collates higher than the
+ starting range point, the range is ignored. */
+#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1)
-/* If this bit is set, then all back references must refer to a preceding
- subexpression.
- If not set, then a back reference to a nonexistent subexpression is
- treated as literal characters. */
-#define RE_NO_MISSING_BK_REF (RE_NO_EMPTY_RANGES << 1)
-
-/* If this bit is set, then Regex considers an unmatched close-group
- operator to be the ordinary character parenthesis.
- If not set, then an unmatched close-group operator is invalid. */
-#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_MISSING_BK_REF << 1)
+/* If this bit is set, then an unmatched ) is ordinary.
+ If not set, then an unmatched ) is invalid. */
+#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1)
/* This global variable defines the particular regexp syntax to use (for
some interfaces). When a regexp is compiled, the syntax used is
stored in the pattern buffer, so changing this does not affect
already-compiled regexps. */
-extern reg_syntax_t obscure_syntax;
-
-
-
+extern reg_syntax_t re_syntax_options;
+
/* Define combinations of the above bits for the standard possibilities.
- (The [[[ comments delimit what gets put into the Texinfo file.) */
+ (The [[[ comments delimit what gets put into the Texinfo file, so
+ don't delete them!) */
/* [[[begin syntaxes]]] */
#define RE_SYNTAX_EMACS 0
-#define RE_SYNTAX_POSIX_AWK \
- (RE_CONTEXT_INDEP_ANCHORS | RE_CONTEXT_INDEP_OPS | RE_NO_BK_PARENS \
- | RE_NO_BK_VBAR)
-
#define RE_SYNTAX_AWK \
- (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_SYNTAX_POSIX_AWK)
+ (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \
+ | RE_UNMATCHED_RIGHT_PAREN_ORD)
+
+#define RE_SYNTAX_POSIX_AWK \
+ (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS)
#define RE_SYNTAX_GREP \
- (RE_BK_PLUS_QM | RE_NEWLINE_ALT)
+ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \
+ | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \
+ | RE_NEWLINE_ALT)
#define RE_SYNTAX_EGREP \
- (RE_CONTEXT_INDEP_ANCHORS | RE_CONTEXT_INDEP_OPS \
- | RE_NEWLINE_ALT | RE_NO_BK_PARENS | RE_NO_BK_VBAR)
+ (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \
+ | RE_NEWLINE_ALT | RE_NO_BK_PARENS \
+ | RE_NO_BK_VBAR)
+
+#define RE_SYNTAX_POSIX_EGREP \
+ (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
+
+/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */
+#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC
+
+#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC
+
+/* Syntax bits common to both basic and extended POSIX regex syntax. */
+#define _RE_SYNTAX_POSIX_COMMON \
+ (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
+ | RE_INTERVALS | RE_NO_EMPTY_RANGES)
#define RE_SYNTAX_POSIX_BASIC \
- (RE_CHAR_CLASSES | RE_DOT_NEWLINE \
- | RE_DOT_NOT_NULL | RE_INTERVALS | RE_LIMITED_OPS \
- | RE_NEWLINE_ORDINARY | RE_NO_EMPTY_RANGES | RE_NO_MISSING_BK_REF)
+ (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM)
+
+/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes
+ RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this
+ isn't minimal, since other operators, such as \`, aren't disabled. */
+#define RE_SYNTAX_POSIX_MINIMAL_BASIC \
+ (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS)
#define RE_SYNTAX_POSIX_EXTENDED \
- (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \
- | RE_CONTEXT_INVALID_OPS | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \
- | RE_INTERVALS | RE_NEWLINE_ORDINARY | RE_NO_BK_BRACES \
- | RE_NO_BK_PARENS | RE_NO_BK_REFS | RE_NO_BK_VBAR \
- | RE_NO_EMPTY_ALTS | RE_NO_EMPTY_GROUPS | RE_NO_EMPTY_RANGES \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_VBAR \
| RE_UNMATCHED_RIGHT_PAREN_ORD)
-/* [[[end syntaxes]]] */
-
-
+/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS
+ replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */
+#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \
+ (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \
+ | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \
+ | RE_NO_BK_PARENS | RE_NO_BK_REFS \
+ | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD)
+/* [[[end syntaxes]]] */
-/* Maximum number of duplicates an interval can allow. */
+/* Maximum number of duplicates an interval can allow. Some systems
+ (erroneously) define this in other header files, but we want our
+ value, so remove any previous define. */
+#ifdef RE_DUP_MAX
#undef RE_DUP_MAX
-#define RE_DUP_MAX ((1 << 15) - 1)
+#endif
+#define RE_DUP_MAX ((1 << 15) - 1)
-/* POSIX `cflags' bits (i.e., information for regcomp). */
+/* POSIX `cflags' bits (i.e., information for `regcomp'). */
/* If this bit is set, then use extended regular expression syntax.
If not set, then use basic regular expression syntax. */
@@ -254,9 +268,6 @@ typedef enum
REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */
REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */
} reg_errcode_t;
-
-
-
/* This data structure represents a compiled pattern. Before calling
the pattern compiler, the fields `buffer', `allocated', `fastmap',
@@ -295,19 +306,28 @@ struct re_pattern_buffer
/* Number of subexpressions found by the compiler. */
size_t re_nsub;
- /* Set to 1 by re_compile_fastmap if this pattern can match the
- null string; 0 prevents the searcher from matching it with
- the null string. Set to 2 if it might match the null string
- either at the end of a search range or just before a
- character listed in the fastmap. */
- unsigned can_be_null : 2;
-
- /* Set to zero when regex_compile compiles a pattern; set to one
- by re_compile_fastmap when it updates the fastmap, if any. */
+ /* Zero if this pattern cannot match the empty string, one else.
+ Well, in truth it's used only in `re_search_2', to see
+ whether or not we should use the fastmap, so we don't set
+ this absolutely perfectly; see `re_compile_fastmap' (the
+ `duplicate' case). */
+ unsigned can_be_null : 1;
+
+ /* If REGS_UNALLOCATED, allocate space in the `regs' structure
+ for `max (RE_NREGS, re_nsub + 1)' groups.
+ If REGS_REALLOCATE, reallocate space if necessary.
+ If REGS_FIXED, use what's there. */
+#define REGS_UNALLOCATED 0
+#define REGS_REALLOCATE 1
+#define REGS_FIXED 2
+ unsigned regs_allocated : 2;
+
+ /* Set to zero when `regex_compile' compiles a pattern; set to one
+ by `re_compile_fastmap' if it updates the fastmap. */
unsigned fastmap_accurate : 1;
- /* If set, regexec reports only success or failure and does not
- return anything in pmatch. */
+ /* If set, `re_match_2' does not return information about
+ subexpressions. */
unsigned no_sub : 1;
/* If set, a beginning-of-line anchor doesn't match at the
@@ -320,10 +340,6 @@ struct re_pattern_buffer
/* If true, an anchor at a newline matches. */
unsigned newline_anchor : 1;
- /* If set, re_match_2 assumes a non-null REGS argument is
- initialized. If not set, REGS is initialized to the max of
- RE_NREGS and re_nsub + 1 registers. */
- unsigned caller_allocated_regs : 1;
/* [[[end pattern_buffer]]] */
};
@@ -333,12 +349,8 @@ typedef struct re_pattern_buffer regex_t;
/* search.c (search_buffer) in Emacs needs this one opcode value. It is
defined both in `regex.c' and here. */
#define RE_EXACTN_VALUE 1
-
-
-
-/* Type for byte offsets within the string. POSIX mandates us defining
- this. */
+/* Type for byte offsets within the string. POSIX mandates this. */
typedef int regoff_t;
@@ -352,8 +364,9 @@ struct re_registers
};
-/* If `caller_allocated_regs' is zero in the pattern buffer, re_match_2
- returns information about this many registers. */
+/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
+ `re_match_2' returns information about at least this many registers
+ the first time a `regs' structure is passed. */
#ifndef RE_NREGS
#define RE_NREGS 30
#endif
@@ -367,29 +380,41 @@ typedef struct
regoff_t rm_so; /* Byte offset from string's start to substring's start. */
regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
} regmatch_t;
-
-
-
/* Declarations for routines. */
+/* To avoid duplicating every routine declaration -- once with a
+ prototype (if we are ANSI), and once without (if we aren't) -- we
+ use the following macro to declare argument types. This
+ unfortunately clutters up the declarations a bit, but I think it's
+ worth it. */
+
#if __STDC__
-/* Sets the current syntax to SYNTAX. You can also simply assign to the
- `obscure_syntax' variable. */
-extern reg_syntax_t re_set_syntax (reg_syntax_t syntax);
+#define _RE_ARGS(args) args
+
+#else /* not __STDC__ */
+
+#define _RE_ARGS(args) ()
+
+#endif /* not __STDC__ */
+
+/* Sets the current default syntax to SYNTAX, and return the old syntax.
+ You can also simply assign to the `re_syntax_options' variable. */
+extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax));
/* Compile the regular expression PATTERN, with length LENGTH
- and syntax given by the global `obscure_syntax', into the buffer
+ and syntax given by the global `re_syntax_options', into the buffer
BUFFER. Return NULL if successful, and an error string if not. */
-extern const char *re_compile_pattern (const char *pattern, int length,
- struct re_pattern_buffer *buffer);
+extern const char *re_compile_pattern
+ _RE_ARGS ((const char *pattern, int length,
+ struct re_pattern_buffer *buffer));
/* Compile a fastmap for the compiled pattern in BUFFER; used to
accelerate searches. Return 0 if successful and -2 if was an
internal error. */
-extern int re_compile_fastmap (struct re_pattern_buffer *buffer);
+extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer));
/* Search in the string STRING (with length LENGTH) for the pattern
@@ -397,78 +422,64 @@ extern int re_compile_fastmap (struct re_pattern_buffer *buffer);
characters. Return the starting position of the match, -1 for no
match, or -2 for an internal error. Also return register
information in REGS (if REGS and BUFFER->no_sub are nonzero). */
-extern int re_search (struct re_pattern_buffer *buffer,
- const char *string, int length,
- int start, int range,
- struct re_registers *regs);
+extern int re_search
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, int range, struct re_registers *regs));
/* Like `re_search', but search in the concatenation of STRING1 and
STRING2. Also, stop searching at index START + STOP. */
-extern int re_search_2 (struct re_pattern_buffer *buffer,
- const char *string1, int length1,
- const char *string2, int length2,
- int start, int range,
- struct re_registers *regs,
- int stop);
+extern int re_search_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, int range, struct re_registers *regs, int stop));
/* Like `re_search', but return how many characters in STRING the regexp
in BUFFER matched, starting at position START. */
-extern int re_match (const struct re_pattern_buffer *buffer,
- const char *string, int length,
- int start, struct re_registers *regs);
+extern int re_match
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string,
+ int length, int start, struct re_registers *regs));
/* Relates to `re_match' as `re_search_2' relates to `re_search'. */
-extern int re_match_2 (const struct re_pattern_buffer *buffer,
- const char *string1, int length1,
- const char *string2, int length2,
- int start,
- struct re_registers *regs,
- int stop);
+extern int re_match_2
+ _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1,
+ int length1, const char *string2, int length2,
+ int start, struct re_registers *regs, int stop));
-#ifndef __386BSD__
-/* 4.2 bsd compatibility. */
-#ifndef bsdi
-extern const char *re_comp (const char *);
-#endif
-extern int re_exec (const char *);
-#endif
+/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
+ ENDS. Subsequent matches using BUFFER and REGS will use this memory
+ for recording register information. STARTS and ENDS must be
+ allocated with malloc, and must each be at least `NUM_REGS * sizeof
+ (regoff_t)' bytes long.
-/* POSIX compatibility. */
-extern int regcomp (regex_t *preg, const char *pattern, int cflags);
-extern int regexec (const regex_t *preg, const char *string, size_t nmatch,
- regmatch_t pmatch[], int eflags);
-extern size_t regerror (int errcode, const regex_t *preg, char *errbuf,
- size_t errbuf_size);
-extern void regfree (regex_t *preg);
+ If NUM_REGS == 0, then subsequent matches should allocate their own
+ register data.
-#else /* not __STDC__ */
+ Unless this function is called, the first search or match using
+ PATTERN_BUFFER will allocate its own register data, without
+ freeing the old data. */
+extern void re_set_registers
+ _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs,
+ unsigned num_regs, regoff_t *starts, regoff_t *ends));
-/* Support old C compilers. */
-#define const
-
-extern reg_syntax_t re_set_syntax ();
-extern char *re_compile_pattern ();
-extern int re_search (), re_search_2 ();
-extern int re_match (), re_match_2 ();
-
-/* 4.2 BSD compatibility. */
-extern char *re_comp ();
-extern int re_exec ();
+/* 4.2 bsd compatibility. */
+extern char *re_comp _RE_ARGS ((const char *));
+extern int re_exec _RE_ARGS ((const char *));
/* POSIX compatibility. */
-extern int regcomp ();
-extern int regexec ();
-extern size_t regerror ();
-extern void regfree ();
+extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags));
+extern int regexec
+ _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch,
+ regmatch_t pmatch[], int eflags));
+extern size_t regerror
+ _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf,
+ size_t errbuf_size));
+extern void regfree _RE_ARGS ((regex_t *preg));
-#endif /* not __STDC__ */
#endif /* not __REGEXP_LIBRARY_H__ */
-
-
/*
Local variables:
diff --git a/gnu/usr.bin/cvs/lib/rename.c b/gnu/usr.bin/cvs/lib/rename.c
index 3e0b481..6c43cf6 100644
--- a/gnu/usr.bin/cvs/lib/rename.c
+++ b/gnu/usr.bin/cvs/lib/rename.c
@@ -15,6 +15,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <sys/types.h>
#include <sys/stat.h>
#include <errno.h>
diff --git a/gnu/usr.bin/cvs/lib/sighandle.c b/gnu/usr.bin/cvs/lib/sighandle.c
index 1b73b93..1db4177 100644
--- a/gnu/usr.bin/cvs/lib/sighandle.c
+++ b/gnu/usr.bin/cvs/lib/sighandle.c
@@ -30,10 +30,14 @@
* must not themselves make calls to the signal handling
* facilities.
*
- * @(#)sighandle.c 1.9 92/03/31
+ * $CVSid: @(#)sighandle.c 1.13 94/10/07 $
*
*************************************************************************/
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <sys/types.h>
#include <stdio.h>
#include <signal.h>
@@ -54,18 +58,14 @@ char *malloc();
#undef POSIX /* Minix 1.6 doesn't support POSIX.1 sigaction yet */
#endif
-#ifndef SIGTYPE
-#define SIGTYPE void
-#endif
-
/* Define the highest signal number (usually) */
#ifndef SIGMAX
-#define SIGMAX 32
+#define SIGMAX 64
#endif
/* Define linked list of signal handlers structure */
struct SIG_hlist {
- SIGTYPE (*handler)();
+ RETSIGTYPE (*handler)();
struct SIG_hlist *next;
};
@@ -84,7 +84,7 @@ static struct sigaction *SIG_defaults;
#ifdef BSD_SIGNALS
static struct sigvec *SIG_defaults;
#else
-static SIGTYPE (**SIG_defaults)();
+static RETSIGTYPE (**SIG_defaults)();
#endif
#endif
@@ -112,11 +112,7 @@ static int SIG_init()
#ifdef POSIX
(void) sigfillset(&sigset_test);
- for (i = 1; sigismember(&sigset_test, i) == 1; i++)
-#ifdef BROKEN_SIGISMEMBER
- if ( i >= NSIG )
- break
-#endif
+ for (i = 1; i < SIGMAX && sigismember(&sigset_test, i) == 1; i++)
;
if (i < SIGMAX)
i = SIGMAX;
@@ -133,8 +129,8 @@ static int SIG_init()
calloc(i, sizeof(struct sigvec));
#else
if (!SIG_defaults)
- SIG_defaults = (SIGTYPE (**)())
- calloc(i, sizeof(SIGTYPE (**)()));
+ SIG_defaults = (RETSIGTYPE (**)())
+ calloc(i, sizeof(RETSIGTYPE (**)()));
#endif
SIG_crSectMask = 0;
#endif
@@ -149,7 +145,7 @@ static int SIG_init()
* they were registered.
*/
-static SIGTYPE SIG_handle(sig)
+static RETSIGTYPE SIG_handle(sig)
int sig;
{
struct SIG_hlist *this;
@@ -175,7 +171,7 @@ int sig;
int SIG_register(sig,fn)
int sig;
-SIGTYPE (*fn)();
+RETSIGTYPE (*fn)();
{
int val;
struct SIG_hlist *this;
@@ -236,7 +232,7 @@ SIGTYPE (*fn)();
val = sigvec(sig, &vec, &SIG_defaults[sig]);
#else
if ((SIG_defaults[sig] = signal(sig, SIG_handle)) ==
- (SIGTYPE (*)()) -1)
+ (RETSIGTYPE (*)()) -1)
val = -1;
#endif
#endif
@@ -279,7 +275,7 @@ SIGTYPE (*fn)();
int SIG_deregister(sig,fn)
int sig;
-SIGTYPE (*fn)();
+RETSIGTYPE (*fn)();
{
int val;
struct SIG_hlist *this;
@@ -341,7 +337,7 @@ SIGTYPE (*fn)();
#ifdef BSD_SIGNALS
val = sigvec(sig, &SIG_defaults[sig], (struct sigvec *) NULL);
#else
- if (signal(sig, SIG_defaults[sig]) == (SIGTYPE (*)()) -1)
+ if (signal(sig, SIG_defaults[sig]) == (RETSIGTYPE (*)()) -1)
val = -1;
#endif
#endif
diff --git a/gnu/usr.bin/cvs/lib/strdup.c b/gnu/usr.bin/cvs/lib/strdup.c
index 4e5af07..46fc8a0 100644
--- a/gnu/usr.bin/cvs/lib/strdup.c
+++ b/gnu/usr.bin/cvs/lib/strdup.c
@@ -15,6 +15,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#ifdef STDC_HEADERS
#include <string.h>
#include <stdlib.h>
diff --git a/gnu/usr.bin/cvs/lib/strippath.c b/gnu/usr.bin/cvs/lib/strippath.c
index 3d606a8..39687f9 100644
--- a/gnu/usr.bin/cvs/lib/strippath.c
+++ b/gnu/usr.bin/cvs/lib/strippath.c
@@ -15,14 +15,20 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-#if defined(STDC_HEADERS) || defined(USG)
-#include <string.h>
-#ifndef index
-#define index strchr
+#ifdef HAVE_CONFIG_H
+#include "config.h"
#endif
-#else
+
+#if STDC_HEADERS || HAVE_STRING_H
+#include <string.h>
+/* An ANSI string.h and pre-ANSI memory.h might conflict. */
+#if !STDC_HEADERS && HAVE_MEMORY_H
+#include <memory.h>
+#endif /* not STDC_HEADERS and HAVE_MEMORY_H */
+#else /* not STDC_HJEADERS and not HAVE_STRING_H */
#include <strings.h>
-#endif
+/* memory.h and strings.h conflict on some systems. */
+#endif /* not STDC_HEADERS and not HAVE_STRING_H */
#include <stdio.h>
@@ -43,7 +49,7 @@ strip_path (path)
int stripped = 0;
char *cp, *slash;
- for (cp = path; (slash = index(cp, '/')) != NULL; cp = slash)
+ for (cp = path; (slash = strchr(cp, '/')) != NULL; cp = slash)
{
*slash = '\0';
if ((!*cp && (cp != path || stripped)) ||
diff --git a/gnu/usr.bin/cvs/lib/stripslash.c b/gnu/usr.bin/cvs/lib/stripslash.c
index 0c26ac6..265950e 100644
--- a/gnu/usr.bin/cvs/lib/stripslash.c
+++ b/gnu/usr.bin/cvs/lib/stripslash.c
@@ -15,11 +15,20 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-#if defined(STDC_HEADERS) || defined(USG)
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if STDC_HEADERS || HAVE_STRING_H
#include <string.h>
-#else
+/* An ANSI string.h and pre-ANSI memory.h might conflict. */
+#if !STDC_HEADERS && HAVE_MEMORY_H
+#include <memory.h>
+#endif /* not STDC_HEADERS and HAVE_MEMORY_H */
+#else /* not STDC_HJEADERS and not HAVE_STRING_H */
#include <strings.h>
-#endif
+/* memory.h and strings.h conflict on some systems. */
+#endif /* not STDC_HEADERS and not HAVE_STRING_H */
/* Remove trailing slashes from PATH. */
diff --git a/gnu/usr.bin/cvs/lib/subr.c b/gnu/usr.bin/cvs/lib/subr.c
index 2b728c2..d3d40b1 100644
--- a/gnu/usr.bin/cvs/lib/subr.c
+++ b/gnu/usr.bin/cvs/lib/subr.c
@@ -3,19 +3,24 @@
* Copyright (c) 1989-1992, Brian Berliner
*
* You may distribute under the terms of the GNU General Public License as
- * specified in the README file that comes with the CVS 1.3 kit.
+ * specified in the README file that comes with the CVS 1.4 kit.
*
* Various useful functions for the CVS support code.
*/
#include "cvs.h"
+#ifndef lint
+static char rcsid[] = "$CVSid: @(#)subr.c 1.64 94/10/07 $";
+USE(rcsid)
+#endif
+
#ifdef _MINIX
#undef POSIX /* Minix 1.6 doesn't support POSIX.1 sigaction yet */
#endif
-#ifndef VPRINTF_MISSING
-#if __STDC__
+#ifdef HAVE_VPRINTF
+#if defined (USE_PROTOTYPES) ? USE_PROTOTYPES : defined (__STDC__)
#include <stdarg.h>
#define VA_START(args, lastarg) va_start(args, lastarg)
#else
@@ -27,17 +32,24 @@
#define va_dcl char *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8;
#endif
-#ifndef lint
-static char rcsid[] = "@(#)subr.c 1.52 92/03/31";
+/*
+ * I don't know of a convenient way to test this at configure time, or else
+ * I'd certainly do it there.
+ */
+#if defined(NeXT)
+#define LOSING_TMPNAM_FUNCTION
+#ifndef _POSIX_SOURCE
+/*
+ * NeXT doesn't define these without _POSIX_SOURCE,
+ * but that changes a lot of things.
+ */
+#define WEXITSTATUS(x) ((x).w_retcode)
+#define WTERMSIG(x) ((x).w_termsig)
+#endif
#endif
-#if __STDC__
-static void run_add_arg (char *s);
-static void run_init_prog (void);
-#else
-static void run_add_arg ();
-static void run_init_prog ();
-#endif /* __STDC__ */
+static void run_add_arg PROTO((char *s));
+static void run_init_prog PROTO((void));
extern char *getlogin ();
extern char *strtok ();
@@ -74,7 +86,7 @@ copy_file (from, to)
if (read (fdin, buf, (int) sb.st_size) != (int) sb.st_size)
error (1, errno, "cannot read file %s for copying", from);
if (write (fdout, buf, (int) sb.st_size) != (int) sb.st_size
-#ifndef FSYNC_MISSING
+#ifdef HAVE_FSYNC
|| fsync (fdout) == -1
#endif
)
@@ -88,11 +100,15 @@ copy_file (from, to)
error (1, errno, "cannot close %s", to);
/* now, set the times for the copied file to match those of the original */
+ memset ((char *) &t, 0, sizeof (t));
t.actime = sb.st_atime;
t.modtime = sb.st_mtime;
(void) utime (to, &t);
}
+/* FIXME-krp: these functions would benefit from caching the char * &
+ stat buf. */
+
/*
* Returns non-zero if the argument file is a directory, or is a symbolic
* link which points to a directory.
@@ -202,25 +218,8 @@ make_directory (name)
{
struct stat buf;
- if (stat (name, &buf) == 0)
- {
- if (S_ISDIR (buf.st_mode))
- {
- if (access (name, (R_OK | W_OK | X_OK)) == 0)
- {
- error (0, 0, "Directory %s already exists", name);
- return;
- }
- else
- {
- error (0, 0,
- "Directory %s already exists but is protected from you",
- name);
- }
- }
- else
+ if (stat (name, &buf) == 0 && (!S_ISDIR (buf.st_mode)))
error (0, 0, "%s already exists but is not a directory", name);
- }
if (!noexec && mkdir (name, 0777) < 0)
error (1, errno, "cannot make directory %s", name);
}
@@ -245,7 +244,7 @@ make_directories (name)
error (0, errno, "cannot make path to %s", name);
return;
}
- if ((cp = rindex (name, '/')) == NULL)
+ if ((cp = strrchr (name, '/')) == NULL)
return;
*cp = '\0';
make_directories (name);
@@ -260,14 +259,12 @@ make_directories (name)
*/
char *
xmalloc (bytes)
- int bytes;
+ size_t bytes;
{
char *cp;
- if (bytes <= 0)
- error (1, 0, "bad malloc size %d", bytes);
- if ((cp = malloc ((unsigned) bytes)) == NULL)
- error (1, 0, "malloc failed");
+ if ((cp = malloc (bytes)) == NULL)
+ error (1, 0, "can not allocate %lu bytes", (unsigned long) bytes);
return (cp);
}
@@ -279,17 +276,17 @@ xmalloc (bytes)
char *
xrealloc (ptr, bytes)
char *ptr;
- int bytes;
+ size_t bytes;
{
char *cp;
if (!ptr)
- return (xmalloc (bytes));
+ cp = malloc (bytes);
+ else
+ cp = realloc (ptr, bytes);
- if (bytes <= 0)
- error (1, 0, "bad realloc size %d", bytes);
- if ((cp = realloc (ptr, (unsigned) bytes)) == NULL)
- error (1, 0, "realloc failed");
+ if (cp == NULL)
+ error (1, 0, "can not reallocate %lu bytes", (unsigned long) bytes);
return (cp);
}
@@ -320,7 +317,7 @@ xchmod (fname, writable)
int writable;
{
struct stat sb;
- int mode, oumask;
+ mode_t mode, oumask;
if (stat (fname, &sb) < 0)
{
@@ -399,9 +396,12 @@ unlink_file (f)
* Compare "file1" to "file2". Return non-zero if they don't compare exactly.
*
* mallocs a buffer large enough to hold the entire file and does two reads to
- * load the buffer and calls bcmp to do the cmp. This is reasonable, since
+ * load the buffer and calls memcmp to do the cmp. This is reasonable, since
* source files are typically not too large.
*/
+
+/* richfix: this *could* exploit mmap. */
+
int
xcmp (file1, file2)
char *file1;
@@ -430,10 +430,10 @@ xcmp (file1, file2)
buf1 = xmalloc ((int) size);
buf2 = xmalloc ((int) size);
if (read (fd1, buf1, (int) size) != (int) size)
- error (1, errno, "cannot read file %s cor comparing", file1);
+ error (1, errno, "cannot read file %s for comparing", file1);
if (read (fd2, buf2, (int) size) != (int) size)
error (1, errno, "cannot read file %s for comparing", file2);
- ret = bcmp (buf1, buf2, (int) size);
+ ret = memcmp(buf1, buf2, (int) size);
free (buf1);
free (buf2);
}
@@ -512,10 +512,10 @@ getcaller ()
static char uidname[20];
struct passwd *pw;
char *name;
- int uid;
+ uid_t uid;
uid = getuid ();
- if (uid == 0)
+ if (uid == (uid_t) 0)
{
/* super-user; try getlogin() to distinguish */
if (((name = getenv("LOGNAME")) || (name = getenv("USER")) ||
@@ -524,7 +524,7 @@ getcaller ()
}
if ((pw = (struct passwd *) getpwuid (uid)) == NULL)
{
- (void) sprintf (uidname, "uid%d", uid);
+ (void) sprintf (uidname, "uid%d", (unsigned long) uid);
return (uidname);
}
return (pw->pw_name);
@@ -549,7 +549,7 @@ static int run_argc;
static int run_argc_allocated;
/* VARARGS */
-#if !defined (VPRINTF_MISSING) && __STDC__
+#if defined (HAVE_VPRINTF) && (defined (USE_PROTOTYPES) ? USE_PROTOTYPES : defined (__STDC__))
void
run_setup (char *fmt,...)
#else
@@ -560,7 +560,7 @@ run_setup (fmt, va_alist)
#endif
{
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
va_list args;
#endif
@@ -581,7 +581,7 @@ run_setup (fmt, va_alist)
run_argc = 0;
/* process the varargs into run_prog */
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
VA_START (args, fmt);
(void) vsprintf (run_prog, fmt, args);
va_end (args);
@@ -602,7 +602,7 @@ run_arg (s)
}
/* VARARGS */
-#if !defined (VPRINTF_MISSING) && __STDC__
+#if defined (HAVE_VPRINTF) && (defined (USE_PROTOTYPES) ? USE_PROTOTYPES : defined (__STDC__))
void
run_args (char *fmt,...)
#else
@@ -613,7 +613,7 @@ run_args (fmt, va_alist)
#endif
{
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
va_list args;
#endif
@@ -621,7 +621,7 @@ run_args (fmt, va_alist)
run_init_prog ();
/* process the varargs into run_prog */
-#ifndef VPRINTF_MISSING
+#ifdef HAVE_VPRINTF
VA_START (args, fmt);
(void) vsprintf (run_prog, fmt, args);
va_end (args);
@@ -668,7 +668,12 @@ run_exec (stin, stout, sterr, flags)
{
int shin, shout, sherr;
int mode_out, mode_err;
- int status = -1;
+#if defined(NeXT) && !defined(_POSIX_SOURCE)
+ union wait status;
+#else
+ int status;
+#endif
+ int rc = -1;
int rerrno = 0;
int pid, w;
@@ -682,7 +687,7 @@ run_exec (stin, stout, sterr, flags)
struct sigvec vec, ivec, qvec;
#else
- SIGTYPE (*istat) (), (*qstat) ();
+ RETSIGTYPE (*istat) (), (*qstat) ();
#endif
#endif
@@ -733,11 +738,15 @@ run_exec (stin, stout, sterr, flags)
}
}
+ /* Make sure we don't flush this twice, once in the subprocess. */
+ fflush (stdout);
+ fflush (stderr);
+
/* The output files, if any, are now created. Do the fork and dups */
-#ifdef VFORK_MISSING
- pid = fork ();
-#else
+#ifdef HAVE_VFORK
pid = vfork ();
+#else
+ pid = fork ();
#endif
if (pid == 0)
{
@@ -761,6 +770,7 @@ run_exec (stin, stout, sterr, flags)
/* dup'ing is done. try to run it now */
(void) execvp (run_argv[0], run_argv);
+ error (0, errno, "cannot exec %s", run_argv[0]);
_exit (127);
}
else if (pid == -1)
@@ -790,7 +800,7 @@ run_exec (stin, stout, sterr, flags)
#ifdef BSD_SIGNALS
if (flags & RUN_SIGIGNORE)
{
- bzero ((char *) &vec, sizeof (vec));
+ memset ((char *) &vec, 0, sizeof (vec));
vec.sv_handler = SIG_IGN;
(void) sigvec (SIGINT, &vec, &ivec);
(void) sigvec (SIGQUIT, &vec, &qvec);
@@ -816,19 +826,19 @@ run_exec (stin, stout, sterr, flags)
#endif
if (w == -1)
{
- status = -1;
+ rc = -1;
rerrno = errno;
}
else if (WIFEXITED (status))
- status = WEXITSTATUS (status);
+ rc = WEXITSTATUS (status);
else if (WIFSIGNALED (status))
{
if (WTERMSIG (status) == SIGPIPE)
error (1, 0, "broken pipe");
- status = 2;
+ rc = 2;
}
else
- status = 1;
+ rc = 1;
/* restore the signals */
#ifdef POSIX
@@ -868,7 +878,7 @@ run_exec (stin, stout, sterr, flags)
out0:
if (rerrno)
errno = rerrno;
- return (status);
+ return (rc);
}
void
@@ -910,3 +920,130 @@ get_date (date, now)
}
#endif
#endif
+
+/* Given two revisions, find their greatest common ancestor. If the
+ two input revisions exist, then rcs guarantees that the gca will
+ exist. */
+
+char *
+gca (rev1, rev2)
+ char *rev1;
+ char *rev2;
+{
+ int dots;
+ char gca[PATH_MAX];
+ char *p[2];
+ int j[2];
+
+ if (rev1 == NULL || rev2 == NULL)
+ {
+ error (0, 0, "sanity failure in gca");
+ abort();
+ }
+
+ /* walk the strings, reading the common parts. */
+ gca[0] = '\0';
+ p[0] = rev1;
+ p[1] = rev2;
+ do
+ {
+ int i;
+ char c[2];
+ char *s[2];
+
+ for (i = 0; i < 2; ++i)
+ {
+ /* swap out the dot */
+ s[i] = strchr (p[i], '.');
+ if (s[i] != NULL) {
+ c[i] = *s[i];
+ }
+
+ /* read an int */
+ j[i] = atoi (p[i]);
+
+ /* swap back the dot... */
+ if (s[i] != NULL) {
+ *s[i] = c[i];
+ p[i] = s[i] + 1;
+ }
+ else
+ {
+ /* or mark us at the end */
+ p[i] = NULL;
+ }
+
+ }
+
+ /* use the lowest. */
+ (void) sprintf (gca + strlen (gca), "%d.",
+ j[0] < j[1] ? j[0] : j[1]);
+
+ } while (j[0] == j[1]
+ && p[0] != NULL
+ && p[1] != NULL);
+
+ /* back up over that last dot. */
+ gca[strlen(gca) - 1] = '\0';
+
+ /* numbers differ, or we ran out of strings. we're done with the
+ common parts. */
+
+ dots = numdots (gca);
+ if (dots == 0)
+ {
+ /* revisions differ in trunk major number. */
+
+ char *q;
+ char *s;
+
+ s = (j[0] < j[1]) ? p[0] : p[1];
+
+ if (s == NULL)
+ {
+ /* we only got one number. this is strange. */
+ error (0, 0, "bad revisions %s or %s", rev1, rev2);
+ abort();
+ }
+ else
+ {
+ /* we have a minor number. use it. */
+ q = gca + strlen (gca);
+
+ *q++ = '.';
+ for ( ; *s != '.' && *s != '\0'; )
+ *q++ = *s++;
+
+ *q = '\0';
+ }
+ }
+ else if ((dots & 1) == 0)
+ {
+ /* if we have an even number of dots, then we have a branch.
+ remove the last number in order to make it a revision. */
+
+ char *s;
+
+ s = strrchr(gca, '.');
+ *s = '\0';
+ }
+
+ return (xstrdup (gca));
+}
+
+#ifdef LOSING_TMPNAM_FUNCTION
+char *tmpnam(char *s)
+{
+ static char value[L_tmpnam+1];
+
+ if (s){
+ strcpy(s,"/tmp/cvsXXXXXX");
+ mktemp(s);
+ return s;
+ }else{
+ strcpy(value,"/tmp/cvsXXXXXX");
+ mktemp(s);
+ return value;
+ }
+}
+#endif
diff --git a/gnu/usr.bin/cvs/lib/system.h b/gnu/usr.bin/cvs/lib/system.h
index 6cfd68f..1f35065 100644
--- a/gnu/usr.bin/cvs/lib/system.h
+++ b/gnu/usr.bin/cvs/lib/system.h
@@ -15,7 +15,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-/* @(#)system.h 1.14 92/04/10 */
+/* $CVSid: @(#)system.h 1.18 94/09/25 $ */
#include <sys/types.h>
#include <sys/stat.h>
@@ -52,16 +52,13 @@
#if !defined(S_ISNWK) && defined(S_IFNWK) /* HP/UX */
#define S_ISNWK(m) (((m) & S_IFMT) == S_IFNWK)
#endif
-#if defined(MKFIFO_MISSING)
+#if !defined(HAVE_MKFIFO)
#define mkfifo(path, mode) (mknod ((path), (mode) | S_IFIFO, 0))
#endif
-#ifdef POSIX
+#if defined(POSIX) || defined(HAVE_UNISTD_H)
#include <unistd.h>
#include <limits.h>
-#ifndef PATH_MAX
-#define PATH_MAX pathconf ("/", _PC_PATH_MAX)
-#endif
#else
off_t lseek ();
#endif
@@ -72,7 +69,7 @@ off_t lseek ();
#include <time.h>
#endif
-#ifdef TIMEB_H_MISSING
+#ifndef HAVE_SYS_TIMEB_H
struct timeb {
time_t time; /* Seconds since the epoch */
unsigned short millitm; /* Field not used */
@@ -87,29 +84,66 @@ struct timeb {
#include <sys/timeb.h>
#endif
-#if defined(FTIME_MISSING) && !defined(HAVE_TIMEZONE)
+#if !defined(HAVE_FTIME) && !defined(HAVE_TIMEZONE)
#if !defined(timezone)
-extern char *timezone();
+extern long timezone;
#endif
#endif
-#ifndef POSIX
+
+/*
+** MAXPATHLEN and PATH_MAX
+**
+** On most systems MAXPATHLEN is defined in sys/param.h to be 1024. Of
+** those that this is not true, again most define PATH_MAX in limits.h
+** or sys/limits.h which usually gets included by limits.h. On the few
+** remaining systems that neither statement is true, _POSIX_PATH_MAX
+** is defined.
+**
+** So:
+** 1. If PATH_MAX is defined just use it.
+** 2. If MAXPATHLEN is defined but not PATH_MAX, then define
+** PATH_MAX in terms of MAXPATHLEN.
+** 3. If neither is defined, include limits.h and check for
+** PATH_MAX again.
+** 4. If PATH_MAX is still not defined but _POSIX_PATH_MAX is,
+** then define PATH_MAX in terms of _POSIX_PATH_MAX.
+** 5. And if even _POSIX_PATH_MAX doesn't exist just put in
+** a reasonable value.
+**
+** This works on:
+** Sun Sparc 10 SunOS 4.1.3 & Solaris 1.2
+** HP 9000/700 HP/UX 8.07 & HP/UX 9.01
+** Tektronix XD88/10 UTekV 3.2e
+** IBM RS6000 AIX 3.2
+** Dec Alpha OSF 1 ????
+** Intel 386 BSDI BSD/386
+** Apollo Domain 10.4
+** NEC SVR4
+*/
+
+/* On MOST systems this will get you MAXPATHLEN */
#include <sys/param.h>
-#endif
-#ifndef _POSIX_PATH_MAX
-#define _POSIX_PATH_MAX 255
-#endif
+#ifndef PATH_MAX
+# ifdef MAXPATHLEN
+# define PATH_MAX MAXPATHLEN
+# else
+# include <limits.h>
+# ifndef PATH_MAX
+# ifdef _POSIX_PATH_MAX
+# define PATH_MAX _POSIX_PATH_MAX
+# else
+# define PATH_MAX 1024
+# endif /* _POSIX_PATH_MAX */
+# endif /* PATH_MAX */
+# endif /* MAXPATHLEN */
+#endif /* PATH_MAX */
+
-#ifndef PATH_MAX
-#ifdef MAXPATHLEN
-#define PATH_MAX MAXPATHLEN
-#else
-#define PATH_MAX _POSIX_PATH_MAX
-#endif
-#endif
-#ifdef POSIX
+
+#ifdef HAVE_UTIME_H
#include <utime.h>
#else
#ifndef ALTOS
@@ -122,29 +156,33 @@ struct utimbuf
int utime ();
#endif
-#if defined(USG) || defined(STDC_HEADERS)
+#if STDC_HEADERS || HAVE_STRING_H
#include <string.h>
-#ifndef STDC_HEADERS
+/* An ANSI string.h and pre-ANSI memory.h might conflict. */
+#if !STDC_HEADERS && HAVE_MEMORY_H
#include <memory.h>
-#endif
+#endif /* not STDC_HEADERS and HAVE_MEMORY_H */
+
#ifndef index
#define index strchr
-#endif
+#endif /* index */
+
#ifndef rindex
#define rindex strrchr
-#endif
-#ifndef bcopy
-#define bcopy(from, to, len) memcpy ((to), (from), (len))
-#endif
-#ifndef bzero
-#define bzero(s, n) (void) memset ((s), 0, (n))
-#endif
+#endif /* rindex */
+
#ifndef bcmp
-#define bcmp(s1, s2, n) memcmp((s1), (s2), (n))
-#endif
-#else
+#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
+#endif /* bcmp */
+
+#ifndef bzero
+#define bzero(s, n) memset ((s), 0, (n))
+#endif /* bzero */
+
+#else /* not STDC_HJEADERS and not HAVE_STRING_H */
#include <strings.h>
-#endif
+/* memory.h and strings.h conflict on some systems. */
+#endif /* not STDC_HEADERS and not HAVE_STRING_H */
#include <errno.h>
#ifdef STDC_HEADERS
@@ -157,27 +195,16 @@ char *calloc ();
extern int errno;
#endif
-#ifdef __GNUC__
-#ifdef bsdi
-#define alloca __builtin_alloca
-#endif
-#else
-#ifdef sparc
-#include <alloca.h>
+#if defined(USG) || defined(POSIX)
+char *getcwd ();
#else
-#ifndef _AIX
-/* AIX alloca decl has to be the first thing in the file, bletch! */
-char *alloca ();
-#endif
-#endif
+char *getwd ();
#endif
-#if defined(USG) || defined(POSIX)
+#ifdef HAVE_FCNTL_H
#include <fcntl.h>
-char *getcwd ();
#else
#include <sys/file.h>
-char *getwd ();
#endif
#ifndef SEEK_SET
@@ -185,6 +212,7 @@ char *getwd ();
#define SEEK_CUR 1
#define SEEK_END 2
#endif
+
#ifndef F_OK
#define F_OK 0
#define X_OK 1
@@ -192,23 +220,23 @@ char *getwd ();
#define R_OK 4
#endif
-#ifdef DIRENT
+/* unistd.h defines _POSIX_VERSION on POSIX.1 systems. */
+#if defined(DIRENT) || defined(_POSIX_VERSION)
#include <dirent.h>
-#ifdef direct
-#undef direct
-#endif
-#define direct dirent
-#else
-#ifdef SYSNDIR
+#define NLENGTH(dirent) (strlen((dirent)->d_name))
+#else /* not (DIRENT or _POSIX_VERSION) */
+#define dirent direct
+#define NLENGTH(dirent) ((dirent)->d_namlen)
+#ifdef HAVE_SYS_NDIR_H
#include <sys/ndir.h>
-#else
-#ifdef NDIR
-#include <ndir.h>
-#else /* must be BSD */
-#include <sys/dir.h>
-#endif
-#endif
#endif
+#ifdef HAVE_SYS_DIR_H
+#include <sys/dir.h>
+#endif
+#ifdef HAVE_NDIR_H
+#include <ndir.h>
+#endif
+#endif /* not (DIRENT or _POSIX_VERSION) */
/* Convert B 512-byte blocks to kilobytes if K is nonzero,
otherwise return it unchanged. */
@@ -218,6 +246,17 @@ char *getwd ();
#define lstat stat
#endif
-#ifndef SIGTYPE
-#define SIGTYPE void
+/*
+ * Some UNIX distributions don't include these in their stat.h Defined here
+ * because "config.h" is always included last.
+ */
+#ifndef S_IWRITE
+#define S_IWRITE 0000200 /* write permission, owner */
#endif
+#ifndef S_IWGRP
+#define S_IWGRP 0000020 /* write permission, grougroup */
+#endif
+#ifndef S_IWOTH
+#define S_IWOTH 0000002 /* write permission, other */
+#endif
+
diff --git a/gnu/usr.bin/cvs/lib/wait.h b/gnu/usr.bin/cvs/lib/wait.h
index 49cfb6d..2e47773 100644
--- a/gnu/usr.bin/cvs/lib/wait.h
+++ b/gnu/usr.bin/cvs/lib/wait.h
@@ -15,7 +15,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
-#ifdef POSIX
+#ifdef HAVE_SYS_WAIT_H
#include <sys/types.h> /* For pid_t. */
#include <sys/wait.h>
#else
diff --git a/gnu/usr.bin/cvs/lib/yesno.c b/gnu/usr.bin/cvs/lib/yesno.c
index a705da7..7014803 100644
--- a/gnu/usr.bin/cvs/lib/yesno.c
+++ b/gnu/usr.bin/cvs/lib/yesno.c
@@ -15,6 +15,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include <stdio.h>
/* Read one line from standard input
OpenPOWER on IntegriCloud