summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/grep/dfa.h
diff options
context:
space:
mode:
Diffstat (limited to 'gnu/usr.bin/grep/dfa.h')
-rw-r--r--gnu/usr.bin/grep/dfa.h108
1 files changed, 84 insertions, 24 deletions
diff --git a/gnu/usr.bin/grep/dfa.h b/gnu/usr.bin/grep/dfa.h
index 2a7fed2..4cdbe7a 100644
--- a/gnu/usr.bin/grep/dfa.h
+++ b/gnu/usr.bin/grep/dfa.h
@@ -24,18 +24,24 @@
In addition to clobbering modularity, we eat up valuable
name space. */
-# undef PARAMS
-#if __STDC__
+#ifdef __STDC__
# ifndef _PTR_T
# define _PTR_T
typedef void * ptr_t;
# endif
-# define PARAMS(x) x
#else
# ifndef _PTR_T
# define _PTR_T
typedef char * ptr_t;
# endif
+#endif
+
+#ifdef PARAMS
+# undef PARAMS
+#endif
+#if PROTOTYPES
+# define PARAMS(x) x
+#else
# define PARAMS(x) ()
#endif
@@ -138,6 +144,21 @@ typedef enum
RPAREN, /* RPAREN never appears in the parse tree. */
+ CRANGE, /* CRANGE never appears in the parse tree.
+ It stands for a character range that can
+ match a string of one or more characters.
+ For example, [a-z] can match "ch" in
+ a Spanish locale. */
+
+#ifdef MBS_SUPPORT
+ ANYCHAR, /* ANYCHAR is a terminal symbol that matches
+ any multibyte(or singlebyte) characters.
+ It is used only if MB_CUR_MAX > 1. */
+
+ MBCSET, /* MBCSET is similar to CSET, but for
+ multibyte characters. */
+#endif /* MBS_SUPPORT */
+
CSET /* CSET and (and any value greater) is a
terminal symbol that matches any of a
class of characters. */
@@ -225,6 +246,12 @@ typedef struct
char backref; /* True if this state matches a \<digit>. */
unsigned char constraint; /* Constraint for this state to accept. */
int first_end; /* Token value of the first END in elems. */
+#ifdef MBS_SUPPORT
+ position_set mbps; /* Positions which can match multibyte
+ characters. e.g. period.
+ These staff are used only if
+ MB_CUR_MAX > 1. */
+#endif
} dfa_state;
/* Element of a list of strings, at least one of which is known to
@@ -236,6 +263,26 @@ struct dfamust
struct dfamust *next;
};
+#ifdef MBS_SUPPORT
+/* A bracket operator.
+ e.g. [a-c], [[:alpha:]], etc. */
+struct mb_char_classes
+{
+ int invert;
+ wchar_t *chars; /* Normal characters. */
+ int nchars;
+ wctype_t *ch_classes; /* Character classes. */
+ int nch_classes;
+ wchar_t *range_sts; /* Range characters (start of the range). */
+ wchar_t *range_ends; /* Range characters (end of the range). */
+ int nranges;
+ char **equivs; /* Equivalent classes. */
+ int nequivs;
+ char **coll_elems;
+ int ncoll_elems; /* Collating elements. */
+};
+#endif
+
/* A compiled regular expression. */
struct dfa
{
@@ -254,6 +301,32 @@ struct dfa
int nleaves; /* Number of leaves on the parse tree. */
int nregexps; /* Count of parallel regexps being built
with dfaparse(). */
+#ifdef MBS_SUPPORT
+ /* These stuff are used only if MB_CUR_MAX > 1 or multibyte environments. */
+ int nmultibyte_prop;
+ int *multibyte_prop;
+ /* The value of multibyte_prop[i] is defined by following rule.
+ if tokens[i] < NOTCHAR
+ bit 1 : tokens[i] is a singlebyte character, or the last-byte of
+ a multibyte character.
+ bit 0 : tokens[i] is a singlebyte character, or the 1st-byte of
+ a multibyte character.
+ if tokens[i] = MBCSET
+ ("the index of mbcsets correspnd to this operator" << 2) + 3
+
+ e.g.
+ tokens
+ = 'single_byte_a', 'multi_byte_A', single_byte_b'
+ = 'sb_a', 'mb_A(1st byte)', 'mb_A(2nd byte)', 'mb_A(3rd byte)', 'sb_b'
+ multibyte_prop
+ = 3 , 1 , 0 , 2 , 3
+ */
+
+ /* Array of the bracket expressoin in the DFA. */
+ struct mb_char_classes *mbcsets;
+ int nmbcsets;
+ int mbcsets_alloc;
+#endif
/* Stuff owned by the state builder. */
dfa_state *states; /* States of the dfa. */
@@ -292,13 +365,6 @@ struct dfa
on a state that potentially could do so. */
int *success; /* Table of acceptance conditions used in
dfaexec and computed in build_state. */
- int *newlines; /* Transitions on newlines. The entry for a
- newline in any transition table is always
- -1 so we can count lines without wasting
- too many cycles. The transition for a
- newline is stored separately and handled
- as a special case. Newline is also used
- as a sentinel at the end of the buffer. */
struct dfamust *musts; /* List of strings, at least one of which
is known to appear in any r.e. matching
the dfa. */
@@ -325,26 +391,21 @@ struct dfa
/* dfasyntax() takes three arguments; the first sets the syntax bits described
earlier in this file, the second sets the case-folding flag, and the
third specifies the line terminator. */
-extern void dfasyntax PARAMS ((reg_syntax_t, int, int));
+extern void dfasyntax PARAMS ((reg_syntax_t, int, unsigned char));
/* Compile the given string of the given length into the given struct dfa.
Final argument is a flag specifying whether to build a searching or an
exact matcher. */
-extern void dfacomp PARAMS ((char *, size_t, struct dfa *, int));
+extern void dfacomp PARAMS ((char const *, size_t, struct dfa *, int));
/* Execute the given struct dfa on the buffer of characters. The
- first char * points to the beginning, and the second points to the
- first character after the end of the buffer, which must be a writable
- place so a sentinel end-of-buffer marker can be stored there. The
- second-to-last argument is a flag telling whether to allow newlines to
- be part of a string matching the regexp. The next-to-last argument,
- if non-NULL, points to a place to increment every time we see a
- newline. The final argument, if non-NULL, points to a flag that will
+ last byte of the buffer must equal the end-of-line byte.
+ The final argument points to a flag that will
be set if further examination by a backtracking matcher is needed in
order to verify backreferencing; otherwise the flag will be cleared.
- Returns NULL if no match is found, or a pointer to the first
+ Returns (size_t) -1 if no match is found, or the offset of the first
character after the first & shortest matching string in the buffer. */
-extern char *dfaexec PARAMS ((struct dfa *, char *, char *, int, int *, int *));
+extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *));
/* Free the storage held by the components of a struct dfa. */
extern void dfafree PARAMS ((struct dfa *));
@@ -355,7 +416,7 @@ extern void dfafree PARAMS ((struct dfa *));
extern void dfainit PARAMS ((struct dfa *));
/* Incrementally parse a string of given length into a struct dfa. */
-extern void dfaparse PARAMS ((char *, size_t, struct dfa *));
+extern void dfaparse PARAMS ((char const *, size_t, struct dfa *));
/* Analyze a parsed regexp; second argument tells whether to build a searching
or an exact matcher. */
@@ -369,6 +430,5 @@ extern void dfastate PARAMS ((int, struct dfa *, int []));
/* dfaerror() is called by the regexp routines whenever an error occurs. It
takes a single argument, a NUL-terminated string describing the error.
- The default dfaerror() prints the error message to stderr and exits.
- The user can provide a different dfafree() if so desired. */
+ The user must supply a dfaerror. */
extern void dfaerror PARAMS ((const char *));
OpenPOWER on IntegriCloud