diff options
author | jkh <jkh@FreeBSD.org> | 1994-11-13 05:57:35 +0000 |
---|---|---|
committer | jkh <jkh@FreeBSD.org> | 1994-11-13 05:57:35 +0000 |
commit | 57230199f60c8664d10c03261a029c50973107b6 (patch) | |
tree | c75da0abb532d90fc91e8ab78be5a2291605e4c3 /gnu | |
parent | 1a97c0026b58e59b05740277a5107772a04adc91 (diff) | |
download | FreeBSD-src-57230199f60c8664d10c03261a029c50973107b6.zip FreeBSD-src-57230199f60c8664d10c03261a029c50973107b6.tar.gz |
Michael Reifenberger's libg++ port
Submitted by: mr
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/lib/libg++/include/rx.h | 3255 | ||||
-rw-r--r-- | gnu/lib/libg++/libg++/except.c | 79 | ||||
-rw-r--r-- | gnu/lib/libg++/libio/ioassign.cc | 55 | ||||
-rw-r--r-- | gnu/lib/libg++/libio/iogetdelim.c | 102 |
4 files changed, 3491 insertions, 0 deletions
diff --git a/gnu/lib/libg++/include/rx.h b/gnu/lib/libg++/include/rx.h new file mode 100644 index 0000000..275f397 --- /dev/null +++ b/gnu/lib/libg++/include/rx.h @@ -0,0 +1,3255 @@ +#if !defined(RXH) || defined(RX_WANT_SE_DEFS) +#define RXH + +/* Copyright (C) 1992, 1993 Free Software Foundation, Inc. + +This file is part of the librx library. + +Librx is free software; you can redistribute it and/or modify it under +the terms of the GNU Library General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +Librx is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU Library General Public +License along with this software; see the file COPYING.LIB. If not, +write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA +02139, USA. */ +/* t. lord Wed Sep 23 18:20:57 1992 */ + + + + + + + + +#ifndef RX_WANT_SE_DEFS + +/* This page: Bitsets */ + +#ifndef RX_subset +typedef unsigned int RX_subset; +#define RX_subset_bits (32) +#define RX_subset_mask (RX_subset_bits - 1) +#endif + +typedef RX_subset * rx_Bitset; + +#ifdef __STDC__ +typedef void (*rx_bitset_iterator) (void *, int member_index); +#else +typedef void (*rx_bitset_iterator) (); +#endif + +#define rx_bitset_subset(N) ((N) / RX_subset_bits) +#define rx_bitset_subset_val(B,N) ((B)[rx_bitset_subset(N)]) +#define RX_bitset_access(B,N,OP) \ + ((B)[rx_bitset_subset(N)] OP rx_subset_singletons[(N) & RX_subset_mask]) +#define RX_bitset_member(B,N) RX_bitset_access(B, N, &) +#define RX_bitset_enjoin(B,N) RX_bitset_access(B, N, |=) +#define RX_bitset_remove(B,N) RX_bitset_access(B, N, &= ~) +#define RX_bitset_toggle(B,N) RX_bitset_access(B, N, ^= ) +#define rx_bitset_numb_subsets(N) (((N) + RX_subset_bits - 1) / RX_subset_bits) +#define rx_sizeof_bitset(N) (rx_bitset_numb_subsets(N) * sizeof(RX_subset)) + + + +/* This page: Splay trees. */ + +#ifdef __STDC__ +typedef int (*rx_sp_comparer) (void * a, void * b); +#else +typedef int (*rx_sp_comparer) (); +#endif + +struct rx_sp_node +{ + void * key; + void * data; + struct rx_sp_node * kids[2]; +}; + +#ifdef __STDC__ +typedef void (*rx_sp_key_data_freer) (struct rx_sp_node *); +#else +typedef void (*rx_sp_key_data_freer) (); +#endif + + +/* giant inflatable hash trees */ + +struct rx_hash_item +{ + struct rx_hash_item * next_same_hash; + struct rx_hash * table; + unsigned long hash; + void * data; + void * binding; +}; + +struct rx_hash +{ + struct rx_hash * parent; + int refs; + struct rx_hash * children[13]; + struct rx_hash_item * buckets [13]; + int bucket_size [13]; +}; + +struct rx_hash_rules; + +#ifdef __STDC__ +/* should return like == */ +typedef int (*rx_hash_eq)(void *, void *); +typedef struct rx_hash * (*rx_alloc_hash)(struct rx_hash_rules *); +typedef void (*rx_free_hash)(struct rx_hash *, + struct rx_hash_rules *); +typedef struct rx_hash_item * (*rx_alloc_hash_item)(struct rx_hash_rules *, + void *); +typedef void (*rx_free_hash_item)(struct rx_hash_item *, + struct rx_hash_rules *); +#else +typedef int (*rx_hash_eq)(); +typedef struct rx_hash * (*rx_alloc_hash)(); +typedef void (*rx_free_hash)(); +typedef struct rx_hash_item * (*rx_alloc_hash_item)(); +typedef void (*rx_free_hash_item)(); +#endif + +struct rx_hash_rules +{ + rx_hash_eq eq; + rx_alloc_hash hash_alloc; + rx_free_hash free_hash; + rx_alloc_hash_item hash_item_alloc; + rx_free_hash_item free_hash_item; +}; + + + +/* Matchers decide what to do by examining a series of these. + * Instruction types are described below. + */ +struct rx_inx +{ + void * inx; + void * data; + void * data_2; + void * fnord; +}; + +/* Struct RX holds a compiled regular expression - that is, an nfa ready to be + * converted on demand to a more efficient nfa. This is for the low level interface. + * The high-level interface incloses this in a `struct re_pattern_buffer'. + */ +struct rx_cache; +#ifdef __STDC__ +struct rx_se_list; +struct rx; +typedef int (*rx_se_list_order) (struct rx *, + struct rx_se_list *, struct rx_se_list *); +#else +typedef int (*rx_se_list_order) (); +#endif + +struct rx_superset; + +struct rx +{ + int rx_id; /* Every edition numbered and signed by eclose_nfa. */ + + struct rx_cache * cache; /* Where superstates come from */ + + /* Every regex defines the size of its own character set. */ + int local_cset_size; + + void * buffer; /* Malloced memory for the nfa. */ + unsigned long allocated; /* Size of that memory. */ + + /* How much buffer space to save for external uses. After compilation, + * this space will be available at (buffer + allocated - reserved) + */ + unsigned long reserved; + + /* --------- The remaining fields are for internal use only. --------- */ + /* --------- But! they should be initialized to 0. --------- */ + /* NODEC is the number of nodes in the NFA with non-epsilon + * orx transitions. + */ + int nodec; + + /* EPSNODEC is the number of nodes with only epsilon (orx) transitions. */ + int epsnodec; + + /* The sum of NODEC & EPSNODEC is the total number of states in the + * compiled NFA. + */ + + /* side_effect_progs temporarily holds a tree of side effect lists. */ + struct rx_hash se_list_memo; + + /* A memo for sets of states in the possible_future lists of an nfa: */ + struct rx_hash set_list_memo; + + /* The instruction table is indexed by the enum of instructions defined in + * rxrun.h. The values in the table are used to fill in the `inx' + * slot of instruction frames (see rxrun.h). + */ + void ** instruction_table; + struct rx_nfa_state *nfa_states; + struct rx_nfa_state *start; + + /* This orders the search through super-nfa paths. */ + rx_se_list_order se_list_cmp; + + struct rx_superset * start_set; +}; + +/* An RX NFA may contain epsilon edges labeled with side effects. + * These side effects represent match actions that can not normally be + * defined in a `pure' NFA; for example, recording the location at + * which a paren is crossed in a register structure. + * + * A matcher is supposed to find a particular path + * through the NFA (such as leftmost-longest), and then to execute the + * side effects along that path. Operationally, the space of paths is + * searched and side effects are carried out incrementally, and with + * backtracking. + * + * As the NFA is manipulated during matching sets of side effects. + * Simple lists are used to hold side effect lists. + */ + +typedef void * rx_side_effect; + +struct rx_se_list +{ + rx_side_effect car; + struct rx_se_list * cdr; +}; + + + +/* Struct rexp_node holds an expression tree that represents a regexp. + * In this expression tree, every node has a type, and some parameters + * appropriate to that type. + */ + +enum rexp_node_type +{ + r_cset, /* Match from a character set. `a' or `[a-z]'*/ + r_concat, /* Concat two regexps. `ab' */ + r_alternate, /* Choose one of two regexps. `a\|b' */ + r_opt, /* Optional regexp. `a?' */ + r_star, /* Repeated regexp. `a*' */ + r_2phase_star, /* hard to explain */ + r_side_effect, /* Matches the empty string, but + * implies that a side effect must + * take place. These nodes are used + * by the parser to implement parens, + * backreferences etc. + */ + + r_data /* R_DATA is soley for the convenience + * of parsers or other rexp + * transformers that want to + * (temporarily) introduce new node + * types in rexp structures. These + * must be eliminated + * by the time build_nfa is called. + */ +}; + +struct rexp_node +{ + enum rexp_node_type type; + union + { + rx_Bitset cset; + rx_side_effect side_effect; + struct + { + struct rexp_node *left; + struct rexp_node *right; + } pair; + void * data; + } params; +}; + + + +/* This defines the structure of the NFA into which rexps are compiled. */ + +struct rx_nfa_state +{ + int id; + struct rx_nfa_edge *edges; + struct rx_possible_future *futures; + unsigned int is_final:1; + unsigned int is_start:1; + unsigned int eclosure_needed:1; + struct rx_nfa_state *next; + unsigned int mark:1; +}; + +enum rx_nfa_etype +{ + ne_cset, + ne_epsilon, + ne_side_effect /* A special kind of epsilon. */ +}; + +struct rx_nfa_edge +{ + struct rx_nfa_edge *next; + enum rx_nfa_etype type; + struct rx_nfa_state *dest; + union + { + rx_Bitset cset; + rx_side_effect side_effect; + } params; +}; + +struct rx_nfa_state_set +{ + struct rx_nfa_state * car; + struct rx_nfa_state_set * cdr; +}; + +struct rx_possible_future +{ + struct rx_possible_future *next; + struct rx_se_list * effects; + struct rx_nfa_state_set * destset; +}; + + + +enum rx_opcode +{ + /* + * BACKTRACK_POINT is invoked when a transition results in more + * than one possible future. + * + * There is one occurence of this instruction per transition_class + * structure; that occurence is only ever executed if the + * transition_class contains a list of more than 1 edge. + */ + rx_backtrack_point = 0, /* data is (struct transition_class *) */ + + /* + * RX_DO_SIDE_EFFECTS evaluates the side effects of an epsilon path. + * There is one occurence of this instruction per rx_distinct_future. + * This instruction is skipped if a rx_distinct_future has no side effects. + */ + rx_do_side_effects = rx_backtrack_point + 1, + /* data is (struct rx_distinct_future *) */ + + /* + * RX_CACHE_MISS instructions are stored in rx_distinct_futures whose + * destination superstate has been reclaimed (or was never built). + * It recomputes the destination superstate. + * RX_CACHE_MISS is also stored in a superstate transition table before + * any of its edges have been built. + */ + rx_cache_miss = rx_do_side_effects + 1, + /* data is (struct rx_distinct_future *) */ + + /* + * RX_NEXT_CHAR is called to consume the next character and take the + * corresponding transition. This is the only instruction that uses + * the DATA field of the instruction frame instead of DATA_2. + * (see EXPLORE_FUTURE in regex.c). + */ + rx_next_char = rx_cache_miss + 1, /* data is (struct superstate *) */ + + /* RX_BACKTRACK indicates that a transition fails. + */ + rx_backtrack = rx_next_char + 1, /* no data */ + + /* + * RX_ERROR_INX is stored only in places that should never be executed. + */ + rx_error_inx = rx_backtrack + 1, /* Not supposed to occur. */ + + rx_num_instructions = rx_error_inx + 1 +}; + +/* An id_instruction_table holds the values stored in instruction + * frames. The table is indexed by the enums declared above. + */ +extern void * rx_id_instruction_table[rx_num_instructions]; + +#if 0 /* Already declared way above. */ +/* If the instruction is `rx_next_char' then data is valid. Otherwise it's 0 + * and data_2 is valid. + */ +struct rx_inx +{ + void * inx; + void * data; + void * data_2; +}; +#endif + + +#ifndef RX_TAIL_ARRAY +#define RX_TAIL_ARRAY 1 +#endif + +/* A superstate corresponds to a set of nfa states. Those sets are + * represented by STRUCT RX_SUPERSET. The constructors + * guarantee that only one (shared) structure is created for a given set. + */ +struct rx_superset +{ + int refs; + struct rx_nfa_state * car; /* May or may not be a valid addr. */ + int id; /* == car->id for the initial value of *car */ + struct rx_superset * cdr; /* May be NULL or a live or semifreed super*/ + + /* If the corresponding superstate exists: */ + struct rx_superstate * superstate; + + /* If this is a starting state (as built by re_search_2) + * this points to the `struct rx'. The memory for these objects + * is typed -- so even after they are freed it is safe to look + * at this field (to check, in fact, if this was freed.) + */ + struct rx * starts_for; + + struct rx_hash_item hash_item; +}; + +#define rx_protect_superset(RX,CON) (++(CON)->refs) + +/* Every character occurs in at most one super edge per super-state. + * But, that edge might have more than one option, indicating a point + * of non-determinism. + */ +struct rx_super_edge +{ + struct rx_super_edge *next; + struct rx_inx rx_backtrack_frame; + int cset_size; + rx_Bitset cset; + struct rx_distinct_future *options; +}; + +/* A superstate is a set of nfa states (RX_SUPERSET) along + * with a transition table. Superstates are built on demand and reclaimed + * without warning. To protect a superstate, use LOCK_SUPERSTATE. + * + * Joe Keane thought of calling these superstates and several people + * have commented on what a good name it is for what they do. + */ +struct rx_superstate +{ + int rx_id; + int locks; + struct rx_superstate * next_recyclable; + struct rx_superstate * prev_recyclable; + struct rx_distinct_future * transition_refs; + struct rx_superset * contents; + struct rx_super_edge * edges; + int is_semifree; + int trans_size; + struct rx_inx transitions[RX_TAIL_ARRAY]; /* cset sized */ +}; + +struct rx_distinct_future +{ + struct rx_distinct_future * next_same_super_edge[2]; + struct rx_distinct_future * next_same_dest; + struct rx_distinct_future * prev_same_dest; + struct rx_superstate * present; /* source state */ + struct rx_superstate * future; /* destination state */ + struct rx_super_edge * edge; + struct rx_inx future_frame; + struct rx_inx side_effects_frame; + struct rx_se_list * effects; +}; + +#define rx_lock_superstate(R,S) ((S)->locks++) +#define rx_unlock_superstate(R,S) (--(S)->locks) + + +/* This page destined for rx.h */ + +struct rx_blocklist +{ + struct rx_blocklist * next; + int bytes; +}; + +struct rx_freelist +{ + struct rx_freelist * next; +}; + +struct rx_cache; + +#ifdef __STDC__ +typedef void (*rx_morecore_fn)(struct rx_cache *); +#else +typedef void (*rx_morecore_fn)(); +#endif + +/* You use this to control the allocation of superstate data + * during matching. Most of it should be initialized to 0. + * + * A MORECORE function is necessary. It should allocate + * a new block of memory or return 0. + * A default that uses malloc is called `rx_morecore'. + * + * The number of SUPERSTATES_ALLOWED indirectly limits how much memory + * the system will try to allocate. The default is 128. Batch style + * applications that are very regexp intensive should use as high a number + * as possible without thrashing. + * + * The LOCAL_CSET_SIZE is the number of characters in a character set. + * It is therefore the number of entries in a superstate transition table. + * Generally, it should be 256. If your character set has 16 bits, + * it is better to translate your regexps into equivalent 8 bit patterns. + */ + +struct rx_cache +{ + struct rx_hash_rules superset_hash_rules; + + /* Objects are allocated by incrementing a pointer that + * scans across rx_blocklists. + */ + struct rx_blocklist * memory; + struct rx_blocklist * memory_pos; + int bytes_left; + char * memory_addr; + rx_morecore_fn morecore; + + /* Freelists. */ + struct rx_freelist * free_superstates; + struct rx_freelist * free_transition_classes; + struct rx_freelist * free_discernable_futures; + struct rx_freelist * free_supersets; + struct rx_freelist * free_hash; + + /* Two sets of superstates -- those that are semifreed, and those + * that are being used. + */ + struct rx_superstate * lru_superstate; + struct rx_superstate * semifree_superstate; + + struct rx_superset * empty_superset; + + int superstates; + int semifree_superstates; + int hits; + int misses; + int superstates_allowed; + + int local_cset_size; + void ** instruction_table; + + struct rx_hash superset_table; +}; + + + +/* The lowest-level search function supports arbitrarily fragmented + * strings and (optionally) suspendable/resumable searches. + * + * Callers have to provide a few hooks. + */ + +#ifndef __GNUC__ +#ifdef __STDC__ +#define __const__ const +#else +#define __const__ +#endif +#endif + +/* This holds a matcher position */ +struct rx_string_position +{ + __const__ unsigned char * pos; /* The current pos. */ + __const__ unsigned char * string; /* The current string burst. */ + __const__ unsigned char * end; /* First invalid position >= POS. */ + int offset; /* Integer address of the current burst. */ + int size; /* Current string's size. */ + int search_direction; /* 1 or -1 */ + int search_end; /* First position to not try. */ +}; + + +enum rx_get_burst_return +{ + rx_get_burst_continuation, + rx_get_burst_error, + rx_get_burst_ok, + rx_get_burst_no_more +}; + + +/* A call to get burst should make POS valid. It might be invalid + * if the STRING field doesn't point to a burst that actually + * contains POS. + * + * GET_BURST should take a clue from SEARCH_DIRECTION (1 or -1) as to + * whether or not to pad to the left. Padding to the right is always + * appropriate, but need not go past the point indicated by STOP. + * + * If a continuation is returned, then the reentering call to + * a search function will retry the get_burst. + */ + +#ifdef __STDC__ +typedef enum rx_get_burst_return + (*rx_get_burst_fn) (struct rx_string_position * pos, + void * app_closure, + int stop); + +#else +typedef enum rx_get_burst_return (*rx_get_burst_fn) (); +#endif + + +enum rx_back_check_return +{ + rx_back_check_continuation, + rx_back_check_error, + rx_back_check_pass, + rx_back_check_fail +}; + +/* Back_check should advance the position it is passed + * over rparen - lparen characters and return pass iff + * the characters starting at POS match those indexed + * by [LPAREN..RPAREN]. + * + * If a continuation is returned, then the reentering call to + * a search function will retry the back_check. + */ + +#ifdef __STDC__ +typedef enum rx_back_check_return + (*rx_back_check_fn) (struct rx_string_position * pos, + int lparen, + int rparen, + unsigned char * translate, + void * app_closure, + int stop); + +#else +typedef enum rx_back_check_return (*rx_back_check_fn) (); +#endif + + + + +/* A call to fetch_char should return the character at POS or POS + 1. + * Returning continuations here isn't supported. OFFSET is either 0 or 1 + * and indicates which characters is desired. + */ + +#ifdef __STDC__ +typedef int (*rx_fetch_char_fn) (struct rx_string_position * pos, + int offset, + void * app_closure, + int stop); +#else +typedef int (*rx_fetch_char_fn) (); +#endif + + +enum rx_search_return +{ + rx_search_continuation = -4, + rx_search_error = -3, + rx_search_soft_fail = -2, /* failed by running out of string */ + rx_search_fail = -1 /* failed only by reaching failure states */ + /* return values >= 0 indicate the position of a successful match */ +}; + + + + + + +/* regex.h + * + * The remaining declarations replace regex.h. + */ + +/* This is an array of error messages corresponding to the error codes. + */ +extern __const__ char *re_error_msg[]; + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Not implemented. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* The regex.c support, as a client of rx, defines a set of possible + * side effects that can be added to the edge lables of nfa edges. + * Here is the list of sidef effects in use. + */ + +enum re_side_effects +{ +#define RX_WANT_SE_DEFS 1 +#undef RX_DEF_SE +#undef RX_DEF_CPLX_SE +#define RX_DEF_SE(IDEM, NAME, VALUE) NAME VALUE, +#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) NAME VALUE, +#include "rx.h" +#undef RX_DEF_SE +#undef RX_DEF_CPLX_SE +#undef RX_WANT_SE_DEFS + re_floogle_flap = 65533 +}; + +/* These hold paramaters for the kinds of side effects that are possible + * in the supported pattern languages. These include things like the + * numeric bounds of {} operators and the index of paren registers for + * subexpression measurement or backreferencing. + */ +struct re_se_params +{ + enum re_side_effects se; + int op1; + int op2; +}; + +typedef unsigned reg_syntax_t; + +struct re_pattern_buffer +{ + struct rx rx; + reg_syntax_t syntax; /* See below for syntax bit definitions. */ + + unsigned int no_sub:1; /* If set, don't return register offsets. */ + unsigned int not_bol:1; /* If set, the anchors ('^' and '$') don't */ + unsigned int not_eol:1; /* match at the ends of the string. */ + unsigned int newline_anchor:1;/* If true, an anchor at a newline matches.*/ + unsigned int least_subs:1; /* If set, and returning registers, return + * as few values as possible. Only + * backreferenced groups and group 0 (the whole + * match) will be returned. + */ + + /* If true, this says that the matcher should keep registers on its + * backtracking stack. For many patterns, we can easily determine that + * this isn't necessary. + */ + unsigned int match_regs_on_stack:1; + unsigned int search_regs_on_stack:1; + + /* is_anchored and begbuf_only are filled in by rx_compile. */ + unsigned int is_anchored:1; /* Anchorded by ^? */ + unsigned int begbuf_only:1; /* Anchored to char position 0? */ + + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + * for `max (RE_NREGS, re_nsub + 1)' groups. + * If REGS_REALLOCATE, reallocate space if necessary. + * If REGS_FIXED, use what's there. + */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned int regs_allocated:2; + + + /* Either a translate table to apply to all characters before + * comparing them, or zero for no translation. The translation + * is applied to a pattern when it is compiled and to a string + * when it is matched. + */ + unsigned char * translate; + + /* If this is a valid pointer, it tells rx not to store the extents of + * certain subexpressions (those corresponding to non-zero entries). + * Passing 0x1 is the same as passing an array of all ones. Passing 0x0 + * is the same as passing an array of all zeros. + * The array should contain as many entries as their are subexps in the + * regexp. + */ + char * syntax_parens; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + void * buffer; /* Malloced memory for the nfa. */ + unsigned long allocated; /* Size of that memory. */ + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + * the fastmap, if there is one, to skip over impossible + * starting points for matches. */ + char *fastmap; + + unsigned int fastmap_accurate:1; /* These three are internal. */ + unsigned int can_match_empty:1; + struct rx_nfa_state * start; /* The nfa starting state. */ + + /* This is the list of iterator bounds for {lo,hi} constructs. + * The memory pointed to is part of the rx->buffer. + */ + struct re_se_params *se_params; + + /* This is a bitset representation of the fastmap. + * This is a true fastmap that already takes the translate + * table into account. + */ + rx_Bitset fastset; +}; + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + +typedef struct re_pattern_buffer regex_t; + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \<digit> matches <digit>. + If not set, then \<digit> is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VAR | RE_NO_EMPTY_RANGES \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS + replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +#undef RE_DUP_MAX +#endif +#define RE_DUP_MAX ((1 << 15) - 1) + + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + * `re_match_2' returns information about at least this many registers + * the first time a `regs' structure is passed. + * + * Also, this is the greatest number of backreferenced subexpressions + * allowed in a pattern being matched without caller-supplied registers. + */ +#ifndef RE_NREGS +#define RE_NREGS 30 +#endif + +extern int rx_cache_bound; +extern char rx_version_string[]; + + + +#ifdef RX_WANT_RX_DEFS + +/* This is decls to the interesting subsystems and lower layers + * of rx. Everything which doesn't have a public counterpart in + * regex.c is declared here. + */ + + +#ifdef __STDC__ +typedef void (*rx_hash_freefn) (struct rx_hash_item * it); +#else /* ndef __STDC__ */ +typedef void (*rx_hash_freefn) (); +#endif /* ndef __STDC__ */ + + + + +#ifdef __STDC__ +RX_DECL int rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b); +RX_DECL int rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b); +RX_DECL int rx_bitset_empty (int size, rx_Bitset set); +RX_DECL void rx_bitset_null (int size, rx_Bitset b); +RX_DECL void rx_bitset_universe (int size, rx_Bitset b); +RX_DECL void rx_bitset_complement (int size, rx_Bitset b); +RX_DECL void rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b); +RX_DECL void rx_bitset_union (int size, rx_Bitset a, rx_Bitset b); +RX_DECL void rx_bitset_intersection (int size, + rx_Bitset a, rx_Bitset b); +RX_DECL void rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b); +RX_DECL void rx_bitset_revdifference (int size, + rx_Bitset a, rx_Bitset b); +RX_DECL void rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b); +RX_DECL unsigned long rx_bitset_hash (int size, rx_Bitset b); +RX_DECL struct rx_hash_item * rx_hash_find (struct rx_hash * table, + unsigned long hash, + void * value, + struct rx_hash_rules * rules); +RX_DECL struct rx_hash_item * rx_hash_store (struct rx_hash * table, + unsigned long hash, + void * value, + struct rx_hash_rules * rules); +RX_DECL void rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules); +RX_DECL void rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn, + struct rx_hash_rules * rules); +RX_DECL rx_Bitset rx_cset (struct rx *rx); +RX_DECL rx_Bitset rx_copy_cset (struct rx *rx, rx_Bitset a); +RX_DECL void rx_free_cset (struct rx * rx, rx_Bitset c); +RX_DECL struct rexp_node * rexp_node (struct rx *rx, + enum rexp_node_type type); +RX_DECL struct rexp_node * rx_mk_r_cset (struct rx * rx, + rx_Bitset b); +RX_DECL struct rexp_node * rx_mk_r_concat (struct rx * rx, + struct rexp_node * a, + struct rexp_node * b); +RX_DECL struct rexp_node * rx_mk_r_alternate (struct rx * rx, + struct rexp_node * a, + struct rexp_node * b); +RX_DECL struct rexp_node * rx_mk_r_opt (struct rx * rx, + struct rexp_node * a); +RX_DECL struct rexp_node * rx_mk_r_star (struct rx * rx, + struct rexp_node * a); +RX_DECL struct rexp_node * rx_mk_r_2phase_star (struct rx * rx, + struct rexp_node * a, + struct rexp_node * b); +RX_DECL struct rexp_node * rx_mk_r_side_effect (struct rx * rx, + rx_side_effect a); +RX_DECL struct rexp_node * rx_mk_r_data (struct rx * rx, + void * a); +RX_DECL void rx_free_rexp (struct rx * rx, struct rexp_node * node); +RX_DECL struct rexp_node * rx_copy_rexp (struct rx *rx, + struct rexp_node *node); +RX_DECL struct rx_nfa_state * rx_nfa_state (struct rx *rx); +RX_DECL void rx_free_nfa_state (struct rx_nfa_state * n); +RX_DECL struct rx_nfa_state * rx_id_to_nfa_state (struct rx * rx, + int id); +RX_DECL struct rx_nfa_edge * rx_nfa_edge (struct rx *rx, + enum rx_nfa_etype type, + struct rx_nfa_state *start, + struct rx_nfa_state *dest); +RX_DECL void rx_free_nfa_edge (struct rx_nfa_edge * e); +RX_DECL void rx_free_nfa (struct rx *rx); +RX_DECL int rx_build_nfa (struct rx *rx, + struct rexp_node *rexp, + struct rx_nfa_state **start, + struct rx_nfa_state **end); +RX_DECL void rx_name_nfa_states (struct rx *rx); +RX_DECL int rx_eclose_nfa (struct rx *rx); +RX_DECL void rx_delete_epsilon_transitions (struct rx *rx); +RX_DECL int rx_compactify_nfa (struct rx *rx, + void **mem, unsigned long *size); +RX_DECL void rx_release_superset (struct rx *rx, + struct rx_superset *set); +RX_DECL struct rx_superset * rx_superset_cons (struct rx * rx, + struct rx_nfa_state *car, struct rx_superset *cdr); +RX_DECL struct rx_superset * rx_superstate_eclosure_union + (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl); +RX_DECL struct rx_superstate * rx_superstate (struct rx *rx, + struct rx_superset *set); +RX_DECL struct rx_inx * rx_handle_cache_miss + (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data); +RX_DECL reg_errcode_t rx_compile (__const__ char *pattern, int size, + reg_syntax_t syntax, + struct re_pattern_buffer * rxb); +RX_DECL void rx_blow_up_fastmap (struct re_pattern_buffer * rxb); +#else /* STDC */ +RX_DECL int rx_bitset_is_equal (); +RX_DECL int rx_bitset_is_subset (); +RX_DECL int rx_bitset_empty (); +RX_DECL void rx_bitset_null (); +RX_DECL void rx_bitset_universe (); +RX_DECL void rx_bitset_complement (); +RX_DECL void rx_bitset_assign (); +RX_DECL void rx_bitset_union (); +RX_DECL void rx_bitset_intersection (); +RX_DECL void rx_bitset_difference (); +RX_DECL void rx_bitset_revdifference (); +RX_DECL void rx_bitset_xor (); +RX_DECL unsigned long rx_bitset_hash (); +RX_DECL struct rx_hash_item * rx_hash_find (); +RX_DECL struct rx_hash_item * rx_hash_store (); +RX_DECL void rx_hash_free (); +RX_DECL void rx_free_hash_table (); +RX_DECL rx_Bitset rx_cset (); +RX_DECL rx_Bitset rx_copy_cset (); +RX_DECL void rx_free_cset (); +RX_DECL struct rexp_node * rexp_node (); +RX_DECL struct rexp_node * rx_mk_r_cset (); +RX_DECL struct rexp_node * rx_mk_r_concat (); +RX_DECL struct rexp_node * rx_mk_r_alternate (); +RX_DECL struct rexp_node * rx_mk_r_opt (); +RX_DECL struct rexp_node * rx_mk_r_star (); +RX_DECL struct rexp_node * rx_mk_r_2phase_star (); +RX_DECL struct rexp_node * rx_mk_r_side_effect (); +RX_DECL struct rexp_node * rx_mk_r_data (); +RX_DECL void rx_free_rexp (); +RX_DECL struct rexp_node * rx_copy_rexp (); +RX_DECL struct rx_nfa_state * rx_nfa_state (); +RX_DECL void rx_free_nfa_state (); +RX_DECL struct rx_nfa_state * rx_id_to_nfa_state (); +RX_DECL struct rx_nfa_edge * rx_nfa_edge (); +RX_DECL void rx_free_nfa_edge (); +RX_DECL void rx_free_nfa (); +RX_DECL int rx_build_nfa (); +RX_DECL void rx_name_nfa_states (); +RX_DECL int rx_eclose_nfa (); +RX_DECL void rx_delete_epsilon_transitions (); +RX_DECL int rx_compactify_nfa (); +RX_DECL void rx_release_superset (); +RX_DECL struct rx_superset * rx_superset_cons (); +RX_DECL struct rx_superset * rx_superstate_eclosure_union (); +RX_DECL struct rx_superstate * rx_superstate (); +RX_DECL struct rx_inx * rx_handle_cache_miss (); +RX_DECL reg_errcode_t rx_compile (); +RX_DECL void rx_blow_up_fastmap (); +#endif /* STDC */ + + +#endif /* RX_WANT_RX_DEFS */ + + + +#ifdef __STDC__ +extern int re_search_2 (struct re_pattern_buffer *rxb, + __const__ char * string1, int size1, + __const__ char * string2, int size2, + int startpos, int range, + struct re_registers *regs, + int stop); +extern int re_search (struct re_pattern_buffer * rxb, __const__ char *string, + int size, int startpos, int range, + struct re_registers *regs); +extern int re_match_2 (struct re_pattern_buffer * rxb, + __const__ char * string1, int size1, + __const__ char * string2, int size2, + int pos, struct re_registers *regs, int stop); +extern int re_match (struct re_pattern_buffer * rxb, + __const__ char * string, + int size, int pos, + struct re_registers *regs); +extern reg_syntax_t re_set_syntax (reg_syntax_t syntax); +extern void re_set_registers (struct re_pattern_buffer *bufp, + struct re_registers *regs, + unsigned num_regs, + regoff_t * starts, regoff_t * ends); +extern __const__ char * re_compile_pattern (__const__ char *pattern, + int length, + struct re_pattern_buffer * rxb); +extern int re_compile_fastmap (struct re_pattern_buffer * rxb); +extern char * re_comp (__const__ char *s); +extern int re_exec (__const__ char *s); +extern int regcomp (regex_t * preg, __const__ char * pattern, int cflags); +extern int regexec (__const__ regex_t *preg, __const__ char *string, + size_t nmatch, regmatch_t pmatch[], + int eflags); +extern size_t regerror (int errcode, __const__ regex_t *preg, + char *errbuf, size_t errbuf_size); +extern void regfree (regex_t *preg); + +#else /* STDC */ +extern int re_search_2 (); +extern int re_search (); +extern int re_match_2 (); +extern int re_match (); +extern reg_syntax_t re_set_syntax (); +extern void re_set_registers (); +extern __const__ char * re_compile_pattern (); +extern int re_compile_fastmap (); +extern char * re_comp (); +extern int re_exec (); +extern int regcomp (); +extern int regexec (); +extern size_t regerror (); +extern void regfree (); + +#endif /* STDC */ + + + +#ifdef RX_WANT_RX_DEFS + +struct rx_counter_frame +{ + int tag; + int val; + struct rx_counter_frame * inherited_from; /* If this is a copy. */ + struct rx_counter_frame * cdr; +}; + +struct rx_backtrack_frame +{ + char * counter_stack_sp; + + /* A frame is used to save the matchers state when it crosses a + * backtracking point. The `stk_' fields correspond to variables + * in re_search_2 (just strip off thes `stk_'). They are documented + * tere. + */ + struct rx_superstate * stk_super; + unsigned int stk_c; + struct rx_string_position stk_test_pos; + int stk_last_l; + int stk_last_r; + int stk_test_ret; + + /* This is the list of options left to explore at the backtrack + * point for which this frame was created. + */ + struct rx_distinct_future * df; + struct rx_distinct_future * first_df; + +#ifdef RX_DEBUG + int stk_line_no; +#endif +}; + +struct rx_stack_chunk +{ + struct rx_stack_chunk * next_chunk; + int bytes_left; + char * sp; +}; + +enum rx_outer_entry +{ + rx_outer_start, + rx_outer_fastmap, + rx_outer_test, + rx_outer_restore_pos +}; + +enum rx_fastmap_return +{ + rx_fastmap_continuation, + rx_fastmap_error, + rx_fastmap_ok, + rx_fastmap_fail +}; + +enum rx_fastmap_entry +{ + rx_fastmap_start, + rx_fastmap_string_break +}; + +enum rx_test_return +{ + rx_test_continuation, + rx_test_error, + rx_test_fail, + rx_test_ok +}; + +enum rx_test_internal_return +{ + rx_test_internal_error, + rx_test_found_first, + rx_test_line_finished +}; + +enum rx_test_match_entry +{ + rx_test_start, + rx_test_cache_hit_loop, + rx_test_backreference_check, + rx_test_backtrack_return +}; + +struct rx_search_state +{ + /* Two groups of registers are kept. The group with the register state + * of the current test match, and the group that holds the state at the end + * of the best known match, if any. + * + * For some patterns, there may also be registers saved on the stack. + */ + unsigned num_regs; /* Includes an element for register zero. */ + regoff_t * lparen; /* scratch space for register returns */ + regoff_t * rparen; + regoff_t * best_lpspace; /* in case the user doesn't want these */ + regoff_t * best_rpspace; /* values, we still need space to store + * them. Normally, this memoryis unused + * and the space pointed to by REGS is + * used instead. + */ + + int last_l; /* Highest index of a valid lparen. */ + int last_r; /* It's dual. */ + + int * best_lparen; /* This contains the best known register */ + int * best_rparen; /* assignments. + * This may point to the same mem as + * best_lpspace, or it might point to memory + * passed by the caller. + */ + int best_last_l; /* best_last_l:best_lparen::last_l:lparen */ + int best_last_r; + + + unsigned char * translate; + + struct rx_string_position outer_pos; + + struct rx_superstate * start_super; + int nfa_choice; + int first_found; /* If true, return after finding any match. */ + int ret_val; + + /* For continuations... */ + enum rx_outer_entry outer_search_resume_pt; + struct re_pattern_buffer * saved_rxb; + int saved_startpos; + int saved_range; + int saved_stop; + int saved_total_size; + rx_get_burst_fn saved_get_burst; + rx_back_check_fn saved_back_check; + struct re_registers * saved_regs; + + /** + ** state for fastmap + **/ + char * fastmap; + int fastmap_chr; + int fastmap_val; + + /* for continuations in the fastmap procedure: */ + enum rx_fastmap_entry fastmap_resume_pt; + + /** + ** state for test_match + **/ + + /* The current superNFA position of the matcher. */ + struct rx_superstate * super; + + /* The matcher interprets a series of instruction frames. + * This is the `instruction counter' for the interpretation. + */ + struct rx_inx * ifr; + + /* We insert a ghost character in the string to prime + * the nfa. test_pos.pos, test_pos.str_half, and test_pos.end_half + * keep track of the test-match position and string-half. + */ + unsigned char c; + + /* Position within the string. */ + struct rx_string_position test_pos; + + struct rx_stack_chunk * counter_stack; + struct rx_stack_chunk * backtrack_stack; + int backtrack_frame_bytes; + int chunk_bytes; + struct rx_stack_chunk * free_chunks; + + /* To return from this function, set test_ret and + * `goto test_do_return'. + * + * Possible return values are: + * 1 --- end of string while the superNFA is still going + * 0 --- internal error (out of memory) + * -1 --- search completed by reaching the superNFA fail state + * -2 --- a match was found, maybe not the longest. + * + * When the search is complete (-1), best_last_r indicates whether + * a match was found. + * + * -2 is return only if search_state.first_found is non-zero. + * + * if search_state.first_found is non-zero, a return of -1 indicates no match, + * otherwise, best_last_r has to be checked. + */ + int test_ret; + + int could_have_continued; + +#ifdef RX_DEBUG + int backtrack_depth; + /* There is a search tree with every node as set of deterministic + * transitions in the super nfa. For every branch of a + * backtrack point is an edge in the tree. + * This counts up a pre-order of nodes in that tree. + * It's saved on the search stack and printed when debugging. + */ + int line_no; + int lines_found; +#endif + + + /* For continuations within the match tester */ + enum rx_test_match_entry test_match_resume_pt; + struct rx_inx * saved_next_tr_table; + struct rx_inx * saved_this_tr_table; + int saved_reg; + struct rx_backtrack_frame * saved_bf; + +}; + + +extern char rx_slowmap[]; +extern unsigned char rx_id_translation[]; + +static __inline__ void +init_fastmap (rxb, search_state) + struct re_pattern_buffer * rxb; + struct rx_search_state * search_state; +{ + search_state->fastmap = (rxb->fastmap + ? (char *)rxb->fastmap + : (char *)rx_slowmap); + /* Update the fastmap now if not correct already. + * When the regexp was compiled, the fastmap was computed + * and stored in a bitset. This expands the bitset into a + * character array containing 1s and 0s. + */ + if ((search_state->fastmap == rxb->fastmap) && !rxb->fastmap_accurate) + rx_blow_up_fastmap (rxb); + search_state->fastmap_chr = -1; + search_state->fastmap_val = 0; + search_state->fastmap_resume_pt = rx_fastmap_start; +} + +static __inline__ void +uninit_fastmap (rxb, search_state) + struct re_pattern_buffer * rxb; + struct rx_search_state * search_state; +{ + /* Unset the fastmap sentinel */ + if (search_state->fastmap_chr >= 0) + search_state->fastmap[search_state->fastmap_chr] + = search_state->fastmap_val; +} + +static __inline__ int +fastmap_search (rxb, stop, get_burst, app_closure, search_state) + struct re_pattern_buffer * rxb; + int stop; + rx_get_burst_fn get_burst; + void * app_closure; + struct rx_search_state * search_state; +{ + enum rx_fastmap_entry pc; + + if (0) + { + return_continuation: + search_state->fastmap_resume_pt = pc; + return rx_fastmap_continuation; + } + + pc = search_state->fastmap_resume_pt; + + switch (pc) + { + case rx_fastmap_start: + init_fastmap_sentinal: + /* For the sake of fast fastmapping, set a sentinal in the fastmap. + * This sentinal will trap the fastmap loop when it reaches the last + * valid character in a string half. + * + * This must be reset when the fastmap/search loop crosses a string + * boundry, and before returning to the caller. So sometimes, + * the fastmap loop is restarted with `continue', othertimes by + * `goto init_fastmap_sentinal'. + */ + if (search_state->outer_pos.size) + { + search_state->fastmap_chr = ((search_state->outer_pos.search_direction == 1) + ? *(search_state->outer_pos.end - 1) + : *search_state->outer_pos.string); + search_state->fastmap_val + = search_state->fastmap[search_state->fastmap_chr]; + search_state->fastmap[search_state->fastmap_chr] = 1; + } + else + { + search_state->fastmap_chr = -1; + search_state->fastmap_val = 0; + } + + if (search_state->outer_pos.pos >= search_state->outer_pos.end) + goto fastmap_hit_bound; + else + { + if (search_state->outer_pos.search_direction == 1) + { + if (search_state->fastmap_val) + { + for (;;) + { + while (!search_state->fastmap[*search_state->outer_pos.pos]) + ++search_state->outer_pos.pos; + return rx_fastmap_ok; + } + } + else + { + for (;;) + { + while (!search_state->fastmap[*search_state->outer_pos.pos]) + ++search_state->outer_pos.pos; + if (*search_state->outer_pos.pos != search_state->fastmap_chr) + return rx_fastmap_ok; + else + { + ++search_state->outer_pos.pos; + if (search_state->outer_pos.pos == search_state->outer_pos.end) + goto fastmap_hit_bound; + } + } + } + } + else + { + __const__ unsigned char * bound; + bound = search_state->outer_pos.string - 1; + if (search_state->fastmap_val) + { + for (;;) + { + while (!search_state->fastmap[*search_state->outer_pos.pos]) + --search_state->outer_pos.pos; + return rx_fastmap_ok; + } + } + else + { + for (;;) + { + while (!search_state->fastmap[*search_state->outer_pos.pos]) + --search_state->outer_pos.pos; + if ((*search_state->outer_pos.pos != search_state->fastmap_chr) || search_state->fastmap_val) + return rx_fastmap_ok; + else + { + --search_state->outer_pos.pos; + if (search_state->outer_pos.pos == bound) + goto fastmap_hit_bound; + } + } + } + } + } + + case rx_fastmap_string_break: + fastmap_hit_bound: + { + /* If we hit a bound, it may be time to fetch another burst + * of string, or it may be time to return a continuation to + * the caller, or it might be time to fail. + */ + + int burst_state; + burst_state = get_burst (&search_state->outer_pos, app_closure, stop); + switch (burst_state) + { + case rx_get_burst_continuation: + { + pc = rx_fastmap_string_break; + goto return_continuation; + } + case rx_get_burst_error: + return rx_fastmap_error; + case rx_get_burst_ok: + goto init_fastmap_sentinal; + case rx_get_burst_no_more: + /* ...not a string split, simply no more string. + * + * When searching backward, running out of string + * is reason to quit. + * + * When searching forward, we allow the possibility + * of an (empty) match after the last character in the + * virtual string. So, fall through to the matcher + */ + return ( (search_state->outer_pos.search_direction == 1) + ? rx_fastmap_ok + : rx_fastmap_fail); + } + } + } + +} + + + +#ifdef emacs +/* The `emacs' switch turns on certain matching commands + * that make sense only in Emacs. + */ +#include "config.h" +#include "lisp.h" +#include "buffer.h" +#include "syntax.h" +/* Emacs uses `NULL' as a predicate. */ +#undef NULL +#else /* not emacs */ +/* Setting RX_MEMDBUG is useful if you have dbmalloc. Maybe with similar + * packages too. + */ +#ifdef RX_MEMDBUG +#include <malloc.h> +#else /* not RX_RX_MEMDBUG */ + +/* We used to test for `BSTRING' here, but only GCC and Emacs define + * `BSTRING', as far as I know, and neither of them use this code. + */ +#if HAVE_STRING_H || STDC_HEADERS +#include <string.h> + +#ifndef bcmp +#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif + +#ifndef bcopy +#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif + +#ifndef bzero +#define bzero(s, n) memset ((s), 0, (n)) +#endif + +#else /* HAVE_STRING_H || STDC_HEADERS */ +#include <strings.h> +#endif /* not RX_MEMDBUG */ + +#ifdef STDC_HEADERS +#include <stdlib.h> +#else /* not STDC_HEADERS */ +char *malloc (); +char *realloc (); +#endif /* not STDC_HEADERS */ + +#endif /* not emacs */ + + + +/* Define the syntax basics for \<, \>, etc. + * This must be nonzero for the wordchar and notwordchar pattern + * commands in re_match_2. + */ +#ifndef Sword +#define Sword 1 +#endif + +/* How many characters in the character set. */ +#define CHAR_SET_SIZE (1 << CHARBITS) +#define SYNTAX(c) re_syntax_table[c] +RX_DECL char re_syntax_table[CHAR_SET_SIZE]; + +#endif /* not emacs */ + + +/* Test if at very beginning or at very end of the virtual concatenation + * of `string1' and `string2'. If only one string, it's `string2'. + */ + +#define AT_STRINGS_BEG() \ + ( -1 \ + == ((search_state.test_pos.pos - search_state.test_pos.string) \ + + search_state.test_pos.offset)) + +#define AT_STRINGS_END() \ + ( (total_size - 1) \ + == ((search_state.test_pos.pos - search_state.test_pos.string) \ + + search_state.test_pos.offset)) + + +/* Test if POS + 1 points to a character which is word-constituent. We have + * two special cases to check for: if past the end of string1, look at + * the first character in string2; and if before the beginning of + * string2, look at the last character in string1. + * + * Assumes `string1' exists, so use in conjunction with AT_STRINGS_BEG (). + */ +#define LETTER_P(POS,OFF) \ + ( SYNTAX (fetch_char(POS, OFF, app_closure, stop)) \ + == Sword) + +/* Test if the character at D and the one after D differ with respect + * to being word-constituent. + */ +#define AT_WORD_BOUNDARY(d) \ + (AT_STRINGS_BEG () || AT_STRINGS_END () || LETTER_P (d,0) != LETTER_P (d, 1)) + + +#ifdef RX_SUPPORT_CONTINUATIONS +#define RX_STACK_ALLOC(BYTES) malloc(BYTES) +#define RX_STACK_FREE(MEM) free(MEM) +#else +#define RX_STACK_ALLOC(BYTES) alloca(BYTES) +#define RX_STACK_FREE(MEM) \ + ((struct rx_stack_chunk *)MEM)->next_chunk = search_state.free_chunks; \ + search_state.free_chunks = ((struct rx_stack_chunk *)MEM); + +#endif + +#define PUSH(CHUNK_VAR,BYTES) \ + if (!CHUNK_VAR || (CHUNK_VAR->bytes_left < (BYTES))) \ + { \ + struct rx_stack_chunk * new_chunk; \ + if (search_state.free_chunks) \ + { \ + new_chunk = search_state.free_chunks; \ + search_state.free_chunks = search_state.free_chunks->next_chunk; \ + } \ + else \ + { \ + new_chunk = (struct rx_stack_chunk *)RX_STACK_ALLOC(search_state.chunk_bytes); \ + if (!new_chunk) \ + { \ + search_state.ret_val = 0; \ + goto test_do_return; \ + } \ + } \ + new_chunk->sp = (char *)new_chunk + sizeof (struct rx_stack_chunk); \ + new_chunk->bytes_left = (search_state.chunk_bytes \ + - (BYTES) \ + - sizeof (struct rx_stack_chunk)); \ + new_chunk->next_chunk = CHUNK_VAR; \ + CHUNK_VAR = new_chunk; \ + } \ + else \ + (CHUNK_VAR->sp += (BYTES)), (CHUNK_VAR->bytes_left -= (BYTES)) + +#define POP(CHUNK_VAR,BYTES) \ + if (CHUNK_VAR->sp == ((char *)CHUNK_VAR + sizeof(*CHUNK_VAR))) \ + { \ + struct rx_stack_chunk * new_chunk = CHUNK_VAR->next_chunk; \ + RX_STACK_FREE(CHUNK_VAR); \ + CHUNK_VAR = new_chunk; \ + } \ + else \ + (CHUNK_VAR->sp -= BYTES), (CHUNK_VAR->bytes_left += BYTES) + + + +#define SRCH_TRANSLATE(C) search_state.translate[(unsigned char) (C)] + + + + +#ifdef __STDC__ +RX_DECL __inline__ int +rx_search (struct re_pattern_buffer * rxb, + int startpos, + int range, + int stop, + int total_size, + rx_get_burst_fn get_burst, + rx_back_check_fn back_check, + rx_fetch_char_fn fetch_char, + void * app_closure, + struct re_registers * regs, + struct rx_search_state * resume_state, + struct rx_search_state * save_state) +#else +RX_DECL __inline__ int +rx_search (rxb, startpos, range, stop, total_size, + get_burst, back_check, fetch_char, + app_closure, regs, resume_state, save_state) + struct re_pattern_buffer * rxb; + int startpos; + int range; + int stop; + int total_size; + rx_get_burst_fn get_burst; + rx_back_check_fn back_check; + rx_fetch_char_fn fetch_char; + void * app_closure; + struct re_registers * regs; + struct rx_search_state * resume_state; + struct rx_search_state * save_state; +#endif +{ + int pc; + int test_state; + struct rx_search_state search_state; + + if (!resume_state) + pc = rx_outer_start; + else + { + search_state = *resume_state; + regs = search_state.saved_regs; + rxb = search_state.saved_rxb; + startpos = search_state.saved_startpos; + range = search_state.saved_range; + stop = search_state.saved_stop; + total_size = search_state.saved_total_size; + get_burst = search_state.saved_get_burst; + back_check = search_state.saved_back_check; + pc = search_state.outer_search_resume_pt; + if (0) + { + return_continuation: + if (save_state) + { + *save_state = search_state; + save_state->saved_regs = regs; + save_state->saved_rxb = rxb; + save_state->saved_startpos = startpos; + save_state->saved_range = range; + save_state->saved_stop = stop; + save_state->saved_total_size = total_size; + save_state->saved_get_burst = get_burst; + save_state->saved_back_check = back_check; + save_state->outer_search_resume_pt = pc; + } + return rx_search_continuation; + } + } + + switch (pc) + { + case rx_outer_start: + search_state.ret_val = rx_search_fail; + ( search_state.lparen + = search_state.rparen + = search_state.best_lpspace + = search_state.best_rpspace + = 0); + + /* figure the number of registers we may need for use in backreferences. + * the number here includes an element for register zero. + */ + search_state.num_regs = rxb->re_nsub + 1; + + + /* check for out-of-range startpos. */ + if ((startpos < 0) || (startpos > total_size)) + return rx_search_fail; + + /* fix up range if it might eventually take us outside the string. */ + { + int endpos; + endpos = startpos + range; + if (endpos < -1) + range = (-1 - startpos); + else if (endpos > total_size) + range = total_size - startpos; + } + + /* if the search isn't to be a backwards one, don't waste time in a + * long search for a pattern that says it is anchored. + */ + if (rxb->begbuf_only && (range > 0)) + { + if (startpos > 0) + return rx_search_fail; + else + range = 1; + } + + /* decide whether to use internal or user-provided reg buffers. */ + if (!regs || rxb->no_sub) + { + search_state.best_lpspace = + (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)); + search_state.best_rpspace = + (regoff_t *)REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)); + search_state.best_lparen = search_state.best_lpspace; + search_state.best_rparen = search_state.best_rpspace; + } + else + { + /* have the register data arrays been allocated? */ + if (rxb->regs_allocated == REGS_UNALLOCATED) + { /* no. so allocate them with malloc. we need one + extra element beyond `search_state.num_regs' for the `-1' marker + gnu code uses. */ + regs->num_regs = MAX (RE_NREGS, rxb->re_nsub + 1); + regs->start = ((regoff_t *) + malloc (regs->num_regs * sizeof ( regoff_t))); + regs->end = ((regoff_t *) + malloc (regs->num_regs * sizeof ( regoff_t))); + if (regs->start == 0 || regs->end == 0) + return rx_search_error; + rxb->regs_allocated = REGS_REALLOCATE; + } + else if (rxb->regs_allocated == REGS_REALLOCATE) + { /* yes. if we need more elements than were already + allocated, reallocate them. if we need fewer, just + leave it alone. */ + if (regs->num_regs < search_state.num_regs + 1) + { + regs->num_regs = search_state.num_regs + 1; + regs->start = ((regoff_t *) + realloc (regs->start, + regs->num_regs * sizeof (regoff_t))); + regs->end = ((regoff_t *) + realloc (regs->end, + regs->num_regs * sizeof ( regoff_t))); + if (regs->start == 0 || regs->end == 0) + return rx_search_error; + } + } + else if (rxb->regs_allocated != REGS_FIXED) + return rx_search_error; + + if (regs->num_regs < search_state.num_regs + 1) + { + search_state.best_lpspace = + ((regoff_t *) + REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t))); + search_state.best_rpspace = + ((regoff_t *) + REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t))); + search_state.best_lparen = search_state.best_lpspace; + search_state.best_rparen = search_state.best_rpspace; + } + else + { + search_state.best_lparen = regs->start; + search_state.best_rparen = regs->end; + } + } + + search_state.lparen = + (regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)); + search_state.rparen = + (regoff_t *) REGEX_ALLOCATE (search_state.num_regs * sizeof(regoff_t)); + + if (! ( search_state.best_rparen + && search_state.best_lparen + && search_state.lparen && search_state.rparen)) + return rx_search_error; + + search_state.best_last_l = search_state.best_last_r = -1; + + search_state.translate = (rxb->translate + ? rxb->translate + : rx_id_translation); + + + + /* + * two nfa's were compiled. + * `0' is complete. + * `1' faster but gets registers wrong and ends too soon. + */ + search_state.nfa_choice = (regs && !rxb->least_subs) ? '\0' : '\1'; + + /* we have the option to look for the best match or the first + * one we can find. if the user isn't asking for register information, + * we don't need to find the best match. + */ + search_state.first_found = !regs; + + if (range >= 0) + { + search_state.outer_pos.search_end = MIN (total_size, startpos + range) + 1; + search_state.outer_pos.search_direction = 1; + } + else + { + search_state.outer_pos.search_end = MAX(-1, startpos + range); + search_state.outer_pos.search_direction = -1; + } + + /* the vacuous search always turns up nothing. */ + if ((search_state.outer_pos.search_direction == 1) + ? (startpos > search_state.outer_pos.search_end) + : (startpos < search_state.outer_pos.search_end)) + return rx_search_fail; + + /* now we build the starting state of the supernfa. */ + { + struct rx_superset * start_contents; + struct rx_nfa_state_set * start_nfa_set; + + /* we presume here that the nfa start state has only one + * possible future with no side effects. + */ + start_nfa_set = rxb->start->futures->destset; + if ( rxb->rx.start_set + && (rxb->rx.start_set->starts_for == &rxb->rx)) + start_contents = rxb->rx.start_set; + else + { + start_contents = + rx_superstate_eclosure_union (&rxb->rx, + rx_superset_cons (&rxb->rx, 0, 0), + start_nfa_set); + + if (!start_contents) + return rx_search_fail; + + start_contents->starts_for = &rxb->rx; + rxb->rx.start_set = start_contents; + } + if ( start_contents->superstate + && (start_contents->superstate->rx_id == rxb->rx.rx_id)) + { + search_state.start_super = start_contents->superstate; + rx_lock_superstate (&rxb->rx, search_state.start_super); + } + else + { + rx_protect_superset (&rxb->rx, start_contents); + + search_state.start_super = rx_superstate (&rxb->rx, start_contents); + if (!search_state.start_super) + return rx_search_fail; + rx_lock_superstate (&rxb->rx, search_state.start_super); + rx_release_superset (&rxb->rx, start_contents); + } + } + + + + ( search_state.outer_pos.string + = search_state.outer_pos.end + = 0); + + search_state.outer_pos.offset = 0; + search_state.outer_pos.size = 0; + search_state.outer_pos.pos = (unsigned char *)startpos; + init_fastmap (rxb, &search_state); + + search_state.fastmap_resume_pt = rx_fastmap_start; + case rx_outer_fastmap: + /* do { */ + pseudo_do: + { + { + int fastmap_state; + fastmap_state = fastmap_search (rxb, stop, get_burst, app_closure, + &search_state); + switch (fastmap_state) + { + case rx_fastmap_continuation: + pc = rx_outer_fastmap; + goto return_continuation; + case rx_fastmap_fail: + goto finish; + case rx_fastmap_ok: + break; + } + } + + /* now the fastmap loop has brought us to a plausible + * starting point for a match. so, it's time to run the + * nfa and see if a match occured. + */ + startpos = ( search_state.outer_pos.pos + - search_state.outer_pos.string + + search_state.outer_pos.offset); + if (startpos == search_state.outer_pos.search_end) + goto finish; + } + + search_state.test_match_resume_pt = rx_test_start; + /* do interrupted for entry point... */ + case rx_outer_test: + /* ...do continued */ + { + goto test_match; + test_returns_to_search: + switch (test_state) + { + case rx_test_continuation: + pc = rx_outer_test; + goto return_continuation; + case rx_test_error: + search_state.ret_val = rx_search_error; + goto finish; + case rx_test_fail: + break; + case rx_test_ok: + goto finish; + } + search_state.outer_pos.pos += search_state.outer_pos.search_direction; + startpos += search_state.outer_pos.search_direction; + } + /* do interrupted for entry point... */ + case rx_outer_restore_pos: + { + int x; + x = get_burst (&search_state.outer_pos, app_closure, stop); + switch (x) + { + case rx_get_burst_continuation: + pc = rx_outer_restore_pos; + goto return_continuation; + case rx_get_burst_error: + search_state.ret_val = rx_search_error; + goto finish; + case rx_get_burst_no_more: + goto finish; + case rx_get_burst_ok: + break; + } + } /* } while (...see below...) */ + if ((search_state.outer_pos.search_direction == 1) + ? (startpos < search_state.outer_pos.search_end) + : (startpos > search_state.outer_pos.search_end)) + goto pseudo_do; + + + finish: + uninit_fastmap (rxb, &search_state); + if (search_state.start_super) + rx_unlock_superstate (&rxb->rx, search_state.start_super); + +#ifdef regex_malloc + if (search_state.lparen) free (search_state.lparen); + if (search_state.rparen) free (search_state.rparen); + if (search_state.best_lpspace) free (search_state.best_lpspace); + if (search_state.best_rpspace) free (search_state.best_rpspace); +#endif + return search_state.ret_val; + } + + + test_match: + { + enum rx_test_match_entry test_pc; + int inx; + test_pc = search_state.test_match_resume_pt; + if (test_pc == rx_test_start) + { +#ifdef RX_DEBUG + search_state.backtrack_depth = 0; +#endif + search_state.last_l = search_state.last_r = 0; + search_state.lparen[0] = startpos; + search_state.super = search_state.start_super; + search_state.c = search_state.nfa_choice; + search_state.test_pos.pos = search_state.outer_pos.pos - 1; + search_state.test_pos.string = search_state.outer_pos.string; + search_state.test_pos.end = search_state.outer_pos.end; + search_state.test_pos.offset = search_state.outer_pos.offset; + search_state.test_pos.size = search_state.outer_pos.size; + search_state.test_pos.search_direction = 1; + search_state.counter_stack = 0; + search_state.backtrack_stack = 0; + search_state.backtrack_frame_bytes = + (sizeof (struct rx_backtrack_frame) + + (rxb->match_regs_on_stack + ? sizeof (regoff_t) * (search_state.num_regs + 1) * 2 + : 0)); + search_state.chunk_bytes = search_state.backtrack_frame_bytes * 64; + search_state.free_chunks = 0; + search_state.test_ret = rx_test_line_finished; + search_state.could_have_continued = 0; + } + /* This is while (1)...except that the body of the loop is interrupted + * by some alternative entry points. + */ + pseudo_while_1: + switch (test_pc) + { + case rx_test_cache_hit_loop: + goto resume_continuation_1; + case rx_test_backreference_check: + goto resume_continuation_2; + case rx_test_backtrack_return: + goto resume_continuation_3; + case rx_test_start: +#ifdef RX_DEBUG + /* There is a search tree with every node as set of deterministic + * transitions in the super nfa. For every branch of a + * backtrack point is an edge in the tree. + * This counts up a pre-order of nodes in that tree. + * It's saved on the search stack and printed when debugging. + */ + search_state.line_no = 0; + search_state.lines_found = 0; +#endif + + top_of_cycle: + /* A superstate is basicly a transition table, indexed by + * characters from the string being tested, and containing + * RX_INX (`instruction frame') structures. + */ + search_state.ifr = &search_state.super->transitions [search_state.c]; + + recurse_test_match: + /* This is the point to which control is sent when the + * test matcher `recurses'. Before jumping here, some variables + * need to be saved on the stack and the next instruction frame + * has to be computed. + */ + + restart: + /* Some instructions don't advance the matcher, but just + * carry out some side effects and fetch a new instruction. + * To dispatch that new instruction, `goto restart'. + */ + + { + struct rx_inx * next_tr_table; + struct rx_inx * this_tr_table; + /* The fastest route through the loop is when the instruction + * is RX_NEXT_CHAR. This case is detected when SEARCH_STATE.IFR->DATA + * is non-zero. In that case, it points to the next + * superstate. + * + * This allows us to not bother fetching the bytecode. + */ + next_tr_table = (struct rx_inx *)search_state.ifr->data; + this_tr_table = search_state.super->transitions; + while (next_tr_table) + { +#ifdef RX_DEBUG + if (rx_debug_trace) + { + struct rx_superset * setp; + + fprintf (stderr, "%d %d>> re_next_char @ %d (%d)", + search_state.line_no, + search_state.backtrack_depth, + (search_state.test_pos.pos - search_state.test_pos.string + + search_state.test_pos.offset), search_state.c); + + search_state.super = + ((struct rx_superstate *) + ((char *)this_tr_table + - ((unsigned long) + ((struct rx_superstate *)0)->transitions))); + + setp = search_state.super->contents; + fprintf (stderr, " superstet (rx=%d, &=%x: ", + rxb->rx.rx_id, setp); + while (setp) + { + fprintf (stderr, "%d ", setp->id); + setp = setp->cdr; + } + fprintf (stderr, "\n"); + } +#endif + this_tr_table = next_tr_table; + ++search_state.test_pos.pos; + if (search_state.test_pos.pos == search_state.test_pos.end) + { + int burst_state; + try_burst_1: + burst_state = get_burst (&search_state.test_pos, + app_closure, stop); + switch (burst_state) + { + case rx_get_burst_continuation: + search_state.saved_this_tr_table = this_tr_table; + search_state.saved_next_tr_table = next_tr_table; + test_pc = rx_test_cache_hit_loop; + goto test_return_continuation; + + resume_continuation_1: + /* Continuation one jumps here to do its work: */ + search_state.saved_this_tr_table = this_tr_table; + search_state.saved_next_tr_table = next_tr_table; + goto try_burst_1; + + case rx_get_burst_ok: + /* get_burst succeeded...keep going */ + break; + + case rx_get_burst_no_more: + search_state.test_ret = rx_test_line_finished; + search_state.could_have_continued = 1; + goto test_do_return; + + case rx_get_burst_error: + /* An error... */ + search_state.test_ret = rx_test_internal_error; + goto test_do_return; + } + } + search_state.c = *search_state.test_pos.pos; + search_state.ifr = this_tr_table + search_state.c; + next_tr_table = (struct rx_inx *)search_state.ifr->data; + } /* Fast loop through cached transition tables */ + + /* Here when we ran out of cached next-char transitions. + * So, it will be necessary to do a more expensive + * dispatch on the current instruction. The superstate + * pointer is allowed to become invalid during next-char + * transitions -- now we must bring it up to date. + */ + search_state.super = + ((struct rx_superstate *) + ((char *)this_tr_table + - ((unsigned long) + ((struct rx_superstate *)0)->transitions))); + } + + /* We've encountered an instruction other than next-char. + * Dispatch that instruction: + */ + inx = (int)search_state.ifr->inx; +#ifdef RX_DEBUG + if (rx_debug_trace) + { + struct rx_superset * setp = search_state.super->contents; + + fprintf (stderr, "%d %d>> %s @ %d (%d)", search_state.line_no, + search_state.backtrack_depth, + inx_names[inx], + (search_state.test_pos.pos - search_state.test_pos.string + + (test_pos.half == 0 ? 0 : size1)), search_state.c); + + fprintf (stderr, " superstet (rx=%d, &=%x: ", + rxb->rx.rx_id, setp); + while (setp) + { + fprintf (stderr, "%d ", setp->id); + setp = setp->cdr; + } + fprintf (stderr, "\n"); + } +#endif + switch ((enum rx_opcode)inx) + { + case rx_do_side_effects: + + /* RX_DO_SIDE_EFFECTS occurs when we cross epsilon + * edges associated with parentheses, backreferencing, etc. + */ + { + struct rx_distinct_future * df = + (struct rx_distinct_future *)search_state.ifr->data_2; + struct rx_se_list * el = df->effects; + /* Side effects come in lists. This walks down + * a list, dispatching. + */ + while (el) + { + long effect; + effect = (long)el->car; + if (effect < 0) + { +#ifdef RX_DEBUG + if (rx_debug_trace) + { + struct rx_superset * setp = search_state.super->contents; + + fprintf (stderr, "....%d %d>> %s\n", search_state.line_no, + search_state.backtrack_depth, + efnames[-effect]); + } +#endif + switch ((enum re_side_effects) effect) + + { + case re_se_pushback: + search_state.ifr = &df->future_frame; + if (!search_state.ifr->data) + { + struct rx_superstate * sup; + sup = search_state.super; + rx_lock_superstate (rx, sup); + if (!rx_handle_cache_miss (&rxb->rx, + search_state.super, + search_state.c, + (search_state.ifr + ->data_2))) + { + rx_unlock_superstate (rx, sup); + search_state.test_ret = rx_test_internal_error; + goto test_do_return; + } + rx_unlock_superstate (rx, sup); + } + /* --search_state.test_pos.pos; */ + search_state.c = 't'; + search_state.super + = ((struct rx_superstate *) + ((char *)search_state.ifr->data + - (long)(((struct rx_superstate *)0) + ->transitions))); + goto top_of_cycle; + break; + case re_se_push0: + { + struct rx_counter_frame * old_cf + = (search_state.counter_stack + ? ((struct rx_counter_frame *) + search_state.counter_stack->sp) + : 0); + struct rx_counter_frame * cf; + PUSH (search_state.counter_stack, + sizeof (struct rx_counter_frame)); + cf = ((struct rx_counter_frame *) + search_state.counter_stack->sp); + cf->tag = re_se_iter; + cf->val = 0; + cf->inherited_from = 0; + cf->cdr = old_cf; + break; + } + case re_se_fail: + goto test_do_return; + case re_se_begbuf: + if (!AT_STRINGS_BEG ()) + goto test_do_return; + break; + case re_se_endbuf: + if (!AT_STRINGS_END ()) + goto test_do_return; + break; + case re_se_wordbeg: + if ( LETTER_P (&search_state.test_pos, 1) + && ( AT_STRINGS_BEG() + || !LETTER_P (&search_state.test_pos, 0))) + break; + else + goto test_do_return; + case re_se_wordend: + if ( !AT_STRINGS_BEG () + && LETTER_P (&search_state.test_pos, 0) + && (AT_STRINGS_END () + || !LETTER_P (&search_state.test_pos, 1))) + break; + else + goto test_do_return; + case re_se_wordbound: + if (AT_WORD_BOUNDARY (&search_state.test_pos)) + break; + else + goto test_do_return; + case re_se_notwordbound: + if (!AT_WORD_BOUNDARY (&search_state.test_pos)) + break; + else + goto test_do_return; + case re_se_hat: + if (AT_STRINGS_BEG ()) + { + if (rxb->not_bol) + goto test_do_return; + else + break; + } + else + { + char pos_c = *search_state.test_pos.pos; + if ( (SRCH_TRANSLATE (pos_c) + == SRCH_TRANSLATE('\n')) + && rxb->newline_anchor) + break; + else + goto test_do_return; + } + case re_se_dollar: + if (AT_STRINGS_END ()) + { + if (rxb->not_eol) + goto test_do_return; + else + break; + } + else + { + if ( ( SRCH_TRANSLATE (fetch_char + (&search_state.test_pos, 1, + app_closure, stop)) + == SRCH_TRANSLATE ('\n')) + && rxb->newline_anchor) + break; + else + goto test_do_return; + } + + case re_se_try: + /* This is the first side effect in every + * expression. + * + * FOR NO GOOD REASON...get rid of it... + */ + break; + + case re_se_pushpos: + { + int urhere = + ((int)(search_state.test_pos.pos + - search_state.test_pos.string) + + search_state.test_pos.offset); + struct rx_counter_frame * old_cf + = (search_state.counter_stack + ? ((struct rx_counter_frame *) + search_state.counter_stack->sp) + : 0); + struct rx_counter_frame * cf; + PUSH(search_state.counter_stack, + sizeof (struct rx_counter_frame)); + cf = ((struct rx_counter_frame *) + search_state.counter_stack->sp); + cf->tag = re_se_pushpos; + cf->val = urhere; + cf->inherited_from = 0; + cf->cdr = old_cf; + break; + } + + case re_se_chkpos: + { + int urhere = + ((int)(search_state.test_pos.pos + - search_state.test_pos.string) + + search_state.test_pos.offset); + struct rx_counter_frame * cf + = ((struct rx_counter_frame *) + search_state.counter_stack->sp); + if (cf->val == urhere) + goto test_do_return; + cf->val = urhere; + break; + } + break; + + case re_se_poppos: + POP(search_state.counter_stack, + sizeof (struct rx_counter_frame)); + break; + + + case re_se_at_dot: + case re_se_syntax: + case re_se_not_syntax: +#ifdef emacs + this release lacks emacs support; + (coming soon); +#endif + break; + case re_se_win: + case re_se_lparen: + case re_se_rparen: + case re_se_backref: + case re_se_iter: + case re_se_end_iter: + case re_se_tv: + case re_floogle_flap: + search_state.ret_val = 0; + goto test_do_return; + } + } + else + { +#ifdef RX_DEBUG + if (rx_debug_trace) + fprintf (stderr, "....%d %d>> %s %d %d\n", search_state.line_no, + search_state.backtrack_depth, + efnames2[rxb->se_params [effect].se], + rxb->se_params [effect].op1, + rxb->se_params [effect].op2); +#endif + switch (rxb->se_params [effect].se) + { + case re_se_win: + /* This side effect indicates that we've + * found a match, though not necessarily the + * best match. This is a fancy assignment to + * register 0 unless the caller didn't + * care about registers. In which case, + * this stops the match. + */ + { + int urhere = + ((int)(search_state.test_pos.pos + - search_state.test_pos.string) + + search_state.test_pos.offset); + + if ( (search_state.best_last_r < 0) + || (urhere + 1 > search_state.best_rparen[0])) + { + /* Record the best known and keep + * looking. + */ + int x; + for (x = 0; x <= search_state.last_l; ++x) + search_state.best_lparen[x] = search_state.lparen[x]; + search_state.best_last_l = search_state.last_l; + for (x = 0; x <= search_state.last_r; ++x) + search_state.best_rparen[x] = search_state.rparen[x]; + search_state.best_rparen[0] = urhere + 1; + search_state.best_last_r = search_state.last_r; + } + /* If we're not reporting the match-length + * or other register info, we need look no + * further. + */ + if (search_state.first_found) + { + search_state.test_ret = rx_test_found_first; + goto test_do_return; + } + } + break; + case re_se_lparen: + { + int urhere = + ((int)(search_state.test_pos.pos + - search_state.test_pos.string) + + search_state.test_pos.offset); + + int reg = rxb->se_params [effect].op1; +#if 0 + if (reg > search_state.last_l) +#endif + { + search_state.lparen[reg] = urhere + 1; + /* In addition to making this assignment, + * we now know that lower numbered regs + * that haven't already been assigned, + * won't be. We make sure they're + * filled with -1, so they can be + * recognized as unassigned. + */ + if (search_state.last_l < reg) + while (++search_state.last_l < reg) + search_state.lparen[search_state.last_l] = -1; + } + break; + } + + case re_se_rparen: + { + int urhere = + ((int)(search_state.test_pos.pos + - search_state.test_pos.string) + + search_state.test_pos.offset); + int reg = rxb->se_params [effect].op1; + search_state.rparen[reg] = urhere + 1; + if (search_state.last_r < reg) + { + while (++search_state.last_r < reg) + search_state.rparen[search_state.last_r] + = -1; + } + break; + } + + case re_se_backref: + { + int reg = rxb->se_params [effect].op1; + if ( reg > search_state.last_r + || search_state.rparen[reg] < 0) + goto test_do_return; + + { + int backref_status; + check_backreference: + backref_status + = back_check (&search_state.test_pos, + search_state.lparen[reg], + search_state.rparen[reg], + search_state.translate, + app_closure, + stop); + switch (backref_status) + { + case rx_back_check_continuation: + search_state.saved_reg = reg; + test_pc = rx_test_backreference_check; + goto test_return_continuation; + resume_continuation_2: + reg = search_state.saved_reg; + goto check_backreference; + case rx_back_check_fail: + /* Fail */ + goto test_do_return; + case rx_back_check_pass: + /* pass -- + * test_pos now advanced to last + * char matched by backref + */ + break; + } + } + break; + } + case re_se_iter: + { + struct rx_counter_frame * csp + = ((struct rx_counter_frame *) + search_state.counter_stack->sp); + if (csp->val == rxb->se_params[effect].op2) + goto test_do_return; + else + ++csp->val; + break; + } + case re_se_end_iter: + { + struct rx_counter_frame * csp + = ((struct rx_counter_frame *) + search_state.counter_stack->sp); + if (csp->val < rxb->se_params[effect].op1) + goto test_do_return; + else + { + struct rx_counter_frame * source = csp; + while (source->inherited_from) + source = source->inherited_from; + if (!source || !source->cdr) + { + POP(search_state.counter_stack, + sizeof(struct rx_counter_frame)); + } + else + { + source = source->cdr; + csp->val = source->val; + csp->tag = source->tag; + csp->cdr = 0; + csp->inherited_from = source; + } + } + break; + } + case re_se_tv: + /* is a noop */ + break; + case re_se_try: + case re_se_pushback: + case re_se_push0: + case re_se_pushpos: + case re_se_chkpos: + case re_se_poppos: + case re_se_at_dot: + case re_se_syntax: + case re_se_not_syntax: + case re_se_begbuf: + case re_se_hat: + case re_se_wordbeg: + case re_se_wordbound: + case re_se_notwordbound: + case re_se_wordend: + case re_se_endbuf: + case re_se_dollar: + case re_se_fail: + case re_floogle_flap: + search_state.ret_val = 0; + goto test_do_return; + } + } + el = el->cdr; + } + /* Now the side effects are done, + * so get the next instruction. + * and move on. + */ + search_state.ifr = &df->future_frame; + goto restart; + } + + case rx_backtrack_point: + { + /* A backtrack point indicates that we've reached a + * non-determinism in the superstate NFA. This is a + * loop that exhaustively searches the possibilities. + * + * A backtracking strategy is used. We keep track of what + * registers are valid so we can erase side effects. + * + * First, make sure there is some stack space to hold + * our state. + */ + + struct rx_backtrack_frame * bf; + + PUSH(search_state.backtrack_stack, + search_state.backtrack_frame_bytes); +#ifdef RX_DEBUG + ++search_state.backtrack_depth; +#endif + + bf = ((struct rx_backtrack_frame *) + search_state.backtrack_stack->sp); + { + bf->stk_super = search_state.super; + /* We prevent the current superstate from being + * deleted from the superstate cache. + */ + rx_lock_superstate (&rxb->rx, search_state.super); +#ifdef RX_DEBUG + bf->stk_search_state.line_no = search_state.line_no; +#endif + bf->stk_c = search_state.c; + bf->stk_test_pos = search_state.test_pos; + bf->stk_last_l = search_state.last_l; + bf->stk_last_r = search_state.last_r; + bf->df = ((struct rx_super_edge *) + search_state.ifr->data_2)->options; + bf->first_df = bf->df; + bf->counter_stack_sp = (search_state.counter_stack + ? search_state.counter_stack->sp + : 0); + bf->stk_test_ret = search_state.test_ret; + if (rxb->match_regs_on_stack) + { + int x; + regoff_t * stk = + (regoff_t *)((char *)bf + sizeof (*bf)); + for (x = 0; x <= search_state.last_l; ++x) + stk[x] = search_state.lparen[x]; + stk += x; + for (x = 0; x <= search_state.last_r; ++x) + stk[x] = search_state.rparen[x]; + } + } + + /* Here is a while loop whose body is mainly a function + * call and some code to handle a return from that + * function. + * + * From here on for the rest of `case backtrack_point' it + * is unsafe to assume that the search_state copies of + * variables saved on the backtracking stack are valid + * -- so read their values from the backtracking stack. + * + * This lets us use one generation fewer stack saves in + * the call-graph of a search. + */ + + while_non_det_options: +#ifdef RX_DEBUG + ++search_state.lines_found; + if (rx_debug_trace) + fprintf (stderr, "@@@ %d calls %d @@@\n", + search_state.line_no, search_state.lines_found); + + search_state.line_no = search_state.lines_found; +#endif + + if (bf->df->next_same_super_edge[0] == bf->first_df) + { + /* This is a tail-call optimization -- we don't recurse + * for the last of the possible futures. + */ + search_state.ifr = (bf->df->effects + ? &bf->df->side_effects_frame + : &bf->df->future_frame); + + rx_unlock_superstate (&rxb->rx, search_state.super); + POP(search_state.backtrack_stack, + search_state.backtrack_frame_bytes); +#ifdef RX_DEBUG + --search_state.backtrack_depth; +#endif + goto restart; + } + else + { + if (search_state.counter_stack) + { + struct rx_counter_frame * old_cf + = ((struct rx_counter_frame *)search_state.counter_stack->sp); + struct rx_counter_frame * cf; + PUSH(search_state.counter_stack, sizeof (struct rx_counter_frame)); + cf = ((struct rx_counter_frame *)search_state.counter_stack->sp); + cf->tag = old_cf->tag; + cf->val = old_cf->val; + cf->inherited_from = old_cf; + cf->cdr = 0; + } + /* `Call' this test-match block */ + search_state.ifr = (bf->df->effects + ? &bf->df->side_effects_frame + : &bf->df->future_frame); + goto recurse_test_match; + } + + /* Returns in this block are accomplished by + * goto test_do_return. There are two cases. + * If there is some search-stack left, + * then it is a return from a `recursive' call. + * If there is no search-stack left, then + * we should return to the fastmap/search loop. + */ + + test_do_return: + + if (!search_state.backtrack_stack) + { +#ifdef RX_DEBUG + if (rx_debug_trace) + fprintf (stderr, "!!! %d bails returning %d !!!\n", + search_state.line_no, search_state.test_ret); +#endif + + /* No more search-stack -- this test is done. */ + if (search_state.test_ret) + goto return_from_test_match; + else + goto error_in_testing_match; + } + + /* Returning from a recursive call to + * the test match block: + */ + + bf = ((struct rx_backtrack_frame *) + search_state.backtrack_stack->sp); +#ifdef RX_DEBUG + if (rx_debug_trace) + fprintf (stderr, "+++ %d returns %d (to %d)+++\n", + search_state.line_no, + search_state.test_ret, + bf->stk_search_state.line_no); +#endif + + while (search_state.counter_stack + && (!bf->counter_stack_sp + || (bf->counter_stack_sp + != search_state.counter_stack->sp))) + { + POP(search_state.counter_stack, + sizeof (struct rx_counter_frame)); + } + + if (search_state.test_ret == rx_test_error) + { + POP (search_state.backtrack_stack, + search_state.backtrack_frame_bytes); + goto test_do_return; + } + + /* If a non-longest match was found and that is good + * enough, return immediately. + */ + if ( (search_state.test_ret == rx_test_found_first) + && search_state.first_found) + { + rx_unlock_superstate (&rxb->rx, bf->stk_super); + POP (search_state.backtrack_stack, + search_state.backtrack_frame_bytes); + goto test_do_return; + } + + search_state.test_ret = bf->stk_test_ret; + search_state.last_l = bf->stk_last_l; + search_state.last_r = bf->stk_last_r; + bf->df = bf->df->next_same_super_edge[0]; + search_state.super = bf->stk_super; + search_state.c = bf->stk_c; +#ifdef RX_DEBUG + search_state.line_no = bf->stk_search_state.line_no; +#endif + + if (rxb->match_regs_on_stack) + { + int x; + regoff_t * stk = + (regoff_t *)((char *)bf + sizeof (*bf)); + for (x = 0; x <= search_state.last_l; ++x) + search_state.lparen[x] = stk[x]; + stk += x; + for (x = 0; x <= search_state.last_r; ++x) + search_state.rparen[x] = stk[x]; + } + + { + int x; + try_burst_2: + x = get_burst (&bf->stk_test_pos, app_closure, stop); + switch (x) + { + case rx_get_burst_continuation: + search_state.saved_bf = bf; + test_pc = rx_test_backtrack_return; + goto test_return_continuation; + resume_continuation_3: + bf = search_state.saved_bf; + goto try_burst_2; + case rx_get_burst_no_more: + /* Since we've been here before, it is some kind of + * error that we can't return. + */ + case rx_get_burst_error: + search_state.test_ret = rx_test_internal_error; + goto test_do_return; + case rx_get_burst_ok: + break; + } + } + search_state.test_pos = bf->stk_test_pos; + goto while_non_det_options; + } + + + case rx_cache_miss: + /* Because the superstate NFA is lazily constructed, + * and in fact may erode from underneath us, we sometimes + * have to construct the next instruction from the hard way. + * This invokes one step in the lazy-conversion. + */ + search_state.ifr = rx_handle_cache_miss (&rxb->rx, + search_state.super, + search_state.c, + search_state.ifr->data_2); + if (!search_state.ifr) + { + search_state.test_ret = rx_test_internal_error; + goto test_do_return; + } + goto restart; + + case rx_backtrack: + /* RX_BACKTRACK means that we've reached the empty + * superstate, indicating that match can't succeed + * from this point. + */ + goto test_do_return; + + case rx_next_char: + case rx_error_inx: + case rx_num_instructions: + search_state.ret_val = 0; + goto test_do_return; + } + goto pseudo_while_1; + } + + /* Healthy exits from the test-match loop do a + * `goto return_from_test_match' On the other hand, + * we might end up here. + */ + error_in_testing_match: + test_state = rx_test_error; + goto test_returns_to_search; + + /***** fastmap/search loop body + * considering the results testing for a match + */ + + return_from_test_match: + + if (search_state.best_last_l >= 0) + { + if (regs && (regs->start != search_state.best_lparen)) + { + bcopy (search_state.best_lparen, regs->start, + regs->num_regs * sizeof (int)); + bcopy (search_state.best_rparen, regs->end, + regs->num_regs * sizeof (int)); + } + if (regs && !rxb->no_sub) + { + int q; + int bound = (regs->num_regs > search_state.num_regs + ? regs->num_regs + : search_state.num_regs); + regoff_t * s = regs->start; + regoff_t * e = regs->end; + for (q = search_state.best_last_l + 1; q < bound; ++q) + s[q] = e[q] = -1; + } + search_state.ret_val = search_state.best_lparen[0]; + test_state = rx_test_ok; + goto test_returns_to_search; + } + else + { + test_state = rx_test_fail; + goto test_returns_to_search; + } + + test_return_continuation: + search_state.test_match_resume_pt = test_pc; + test_state = rx_test_continuation; + goto test_returns_to_search; + } +} + + + +#endif /* RX_WANT_RX_DEFS */ + + + +#else /* RX_WANT_SE_DEFS */ + /* Integers are used to represent side effects. + * + * Simple side effects are given negative integer names by these enums. + * + * Non-negative names are reserved for complex effects. + * + * Complex effects are those that take arguments. For example, + * a register assignment associated with a group is complex because + * it requires an argument to tell which group is being matched. + * + * The integer name of a complex effect is an index into rxb->se_params. + */ + + RX_DEF_SE(1, re_se_try, = -1) /* Epsilon from start state */ + + RX_DEF_SE(0, re_se_pushback, = re_se_try - 1) + RX_DEF_SE(0, re_se_push0, = re_se_pushback -1) + RX_DEF_SE(0, re_se_pushpos, = re_se_push0 - 1) + RX_DEF_SE(0, re_se_chkpos, = re_se_pushpos -1) + RX_DEF_SE(0, re_se_poppos, = re_se_chkpos - 1) + + RX_DEF_SE(1, re_se_at_dot, = re_se_poppos - 1) /* Emacs only */ + RX_DEF_SE(0, re_se_syntax, = re_se_at_dot - 1) /* Emacs only */ + RX_DEF_SE(0, re_se_not_syntax, = re_se_syntax - 1) /* Emacs only */ + + RX_DEF_SE(1, re_se_begbuf, = re_se_not_syntax - 1) /* match beginning of buffer */ + RX_DEF_SE(1, re_se_hat, = re_se_begbuf - 1) /* match beginning of line */ + + RX_DEF_SE(1, re_se_wordbeg, = re_se_hat - 1) + RX_DEF_SE(1, re_se_wordbound, = re_se_wordbeg - 1) + RX_DEF_SE(1, re_se_notwordbound, = re_se_wordbound - 1) + + RX_DEF_SE(1, re_se_wordend, = re_se_notwordbound - 1) + RX_DEF_SE(1, re_se_endbuf, = re_se_wordend - 1) + + /* This fails except at the end of a line. + * It deserves to go here since it is typicly one of the last steps + * in a match. + */ + RX_DEF_SE(1, re_se_dollar, = re_se_endbuf - 1) + + /* Simple effects: */ + RX_DEF_SE(1, re_se_fail, = re_se_dollar - 1) + + /* Complex effects. These are used in the 'se' field of + * a struct re_se_params. Indexes into the se array + * are stored as instructions on nfa edges. + */ + RX_DEF_CPLX_SE(1, re_se_win, = 0) + RX_DEF_CPLX_SE(1, re_se_lparen, = re_se_win + 1) + RX_DEF_CPLX_SE(1, re_se_rparen, = re_se_lparen + 1) + RX_DEF_CPLX_SE(0, re_se_backref, = re_se_rparen + 1) + RX_DEF_CPLX_SE(0, re_se_iter, = re_se_backref + 1) + RX_DEF_CPLX_SE(0, re_se_end_iter, = re_se_iter + 1) + RX_DEF_CPLX_SE(0, re_se_tv, = re_se_end_iter + 1) + +#endif + +#endif diff --git a/gnu/lib/libg++/libg++/except.c b/gnu/lib/libg++/libg++/except.c new file mode 100644 index 0000000..84adf22 --- /dev/null +++ b/gnu/lib/libg++/libg++/except.c @@ -0,0 +1,79 @@ +/* This is part of GNU C++ Library. +Copyright (C) 1994 Free Software Foundation + +This file is part of the GNU C++ Library. This library is free +software; you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) +any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +As a special exception, if you link this library with files +compiled with a GNU compiler to produce an executable, this does not cause +the resulting executable to be covered by the GNU General Public License. +This exception does not however invalidate any other reasons why +the executable file might be covered by the GNU General Public License. */ + +/* terminate(), unexpected(), set_terminate(), set_unexpected() as + well as the default terminate func and default unexpected func */ + +#if 0 +extern int printf(); +#endif + +typedef void (*vfp)(); + +void +__default_terminate() +{ + abort(); +} + +void +__default_unexpected() +{ + __default_terminate(); +} + +static vfp __terminate_func = __default_terminate; +static vfp __unexpected_func = __default_unexpected; + +vfp +set_terminate(func) +vfp func; +{ + vfp old = __terminate_func; + + __terminate_func = func; + return old; +} + +vfp +set_unexpected(func) +vfp func; +{ + vfp old = __unexpected_func; + + __unexpected_func = func; + return old; +} + +void +terminate() +{ + __terminate_func(); +} + +void +unexpected() +{ + __unexpected_func(); +} diff --git a/gnu/lib/libg++/libio/ioassign.cc b/gnu/lib/libg++/libio/ioassign.cc new file mode 100644 index 0000000..5441ae7 --- /dev/null +++ b/gnu/lib/libg++/libio/ioassign.cc @@ -0,0 +1,55 @@ +/* This is part of libio/iostream, providing -*- C++ -*- input/output. +Copyright (C) 1994 Free Software Foundation + +This file is part of the GNU IO Library. This library is free +software; you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) +any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +As a special exception, if you link this library with files +compiled with a GNU compiler to produce an executable, this does not cause +the resulting executable to be covered by the GNU General Public License. +This exception does not however invalidate any other reasons why +the executable file might be covered by the GNU General Public License. */ + +/* Written by Per Bothner (bothner@cygnus.com). */ + +#include <iostream.h> +#include "libioP.h" + +// These method are provided for backward compatibility reasons. +// It's generally poor style to use them. +// They are not supported by the ANSI/ISO working paper. + +_IO_istream_withassign& _IO_istream_withassign::operator=(istream& rhs) +{ + if (&rhs != (istream*)this) + { + if (!(_flags & (unsigned int)ios::dont_close)) delete rdbuf(); + init (rhs.rdbuf ()); + _flags |= ios::dont_close; + _gcount = 0; + } + return *this; +} + +_IO_ostream_withassign& _IO_ostream_withassign::operator=(ostream& rhs) +{ + if (&rhs != (ostream*)this) + { + if (!(_flags & (unsigned int)ios::dont_close)) delete rdbuf(); + init (rhs.rdbuf ()); + _flags |= ios::dont_close; + } + return *this; +} diff --git a/gnu/lib/libg++/libio/iogetdelim.c b/gnu/lib/libg++/libio/iogetdelim.c new file mode 100644 index 0000000..3dc5cd1 --- /dev/null +++ b/gnu/lib/libg++/libio/iogetdelim.c @@ -0,0 +1,102 @@ +/* +Copyright (C) 1994 Free Software Foundation + +This file is part of the GNU IO Library. This library is free +software; you can redistribute it and/or modify it under the +terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) +any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU CC; see the file COPYING. If not, write to +the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + +As a special exception, if you link this library with files +compiled with a GNU compiler to produce an executable, this does not cause +the resulting executable to be covered by the GNU General Public License. +This exception does not however invalidate any other reasons why +the executable file might be covered by the GNU General Public License. */ + +#ifdef __STDC__ +#include <stdlib.h> +#endif +#include "libioP.h" +#include <string.h> +#include <errno.h> + +/* Read up to (and including) a TERMINATOR from FP into *LINEPTR + (and null-terminate it). *LINEPTR is a pointer returned from malloc (or + NULL), pointing to *N characters of space. It is realloc'ed as + necessary. Returns the number of characters read (not including the + null terminator), or -1 on error or EOF. */ + +_IO_ssize_t +_IO_getdelim (lineptr, n, delimiter, fp) + char **lineptr; + _IO_size_t *n; + int delimiter; + _IO_FILE *fp; +{ + register _IO_ssize_t cur_len = 0; + _IO_ssize_t len; + + if (lineptr == NULL || n == NULL) + { +#ifdef EINVAL + errno = EINVAL; +#endif + return -1; + } + CHECK_FILE (fp, -1); + if (_IO_ferror (fp)) + return -1; + + if (*lineptr == NULL || *n == 0) + { + *n = 120; + *lineptr = (char *) malloc (*n); + if (*lineptr == NULL) + return -1; + } + + len = fp->_IO_read_end - fp->_IO_read_ptr; + if (len <= 0) + { + if (__underflow (fp) == EOF) + return -1; + len = fp->_IO_read_end - fp->_IO_read_ptr; + } + + for (;;) + { + _IO_size_t needed; + char *t; + t = (char *) memchr ((void *) fp->_IO_read_ptr, delimiter, len); + if (t != NULL) + len = (t - fp->_IO_read_ptr) + 1; + /* make enough space for len+1 (for final NUL) bytes. */ + needed = cur_len + len + 1; + if (needed > *n) + { + if (t == NULL && needed < 2 * *n) + needed = 2 * *n; /* Be generous. */ + *n = needed; + *lineptr = (char *) realloc (*lineptr, needed); + if (*lineptr == NULL) + return -1; + } + memcpy (*lineptr + cur_len, (void *) fp->_IO_read_ptr, len); + fp->_IO_read_ptr += len; + cur_len += len; + if (t != NULL || __underflow (fp) == EOF) + break; + len = fp->_IO_read_end - fp->_IO_read_ptr; + } + lineptr[cur_len] = '\0'; + return cur_len; +} |