diff options
Diffstat (limited to 'contrib/awk/re.c')
-rw-r--r-- | contrib/awk/re.c | 322 |
1 files changed, 0 insertions, 322 deletions
diff --git a/contrib/awk/re.c b/contrib/awk/re.c deleted file mode 100644 index 2ee9e6d..0000000 --- a/contrib/awk/re.c +++ /dev/null @@ -1,322 +0,0 @@ -/* - * re.c - compile regular expressions. - */ - -/* - * Copyright (C) 1991-2001 the Free Software Foundation, Inc. - * - * This file is part of GAWK, the GNU implementation of the - * AWK Programming Language. - * - * GAWK is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * GAWK is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA - */ - -#include "awk.h" - -static reg_syntax_t syn; - -/* make_regexp --- generate compiled regular expressions */ - -Regexp * -make_regexp(char *s, size_t len, int ignorecase, int dfa) -{ - Regexp *rp; - const char *rerr; - char *src = s; - char *temp; - char *end = s + len; - register char *dest; - register int c, c2; - - /* Handle escaped characters first. */ - - /* - * Build a copy of the string (in dest) with the - * escaped characters translated, and generate the regex - * from that. - */ - emalloc(dest, char *, len + 2, "make_regexp"); - temp = dest; - - while (src < end) { - if (*src == '\\') { - c = *++src; - switch (c) { - case 'a': - case 'b': - case 'f': - case 'n': - case 'r': - case 't': - case 'v': - case 'x': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - c2 = parse_escape(&src); - if (c2 < 0) - cant_happen(); - /* - * Unix awk treats octal (and hex?) chars - * literally in re's, so escape regexp - * metacharacters. - */ - if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x') - && strchr("()|*+?.^$\\[]", c2) != NULL) - *dest++ = '\\'; - *dest++ = (char) c2; - break; - case '8': - case '9': /* a\9b not valid */ - *dest++ = c; - src++; - break; - case 'y': /* normally \b */ - /* gnu regex op */ - if (! do_traditional) { - *dest++ = '\\'; - *dest++ = 'b'; - src++; - break; - } - /* else, fall through */ - default: - *dest++ = '\\'; - *dest++ = (char) c; - src++; - break; - } /* switch */ - } else - *dest++ = *src++; /* not '\\' */ - } /* for */ - - *dest = '\0' ; /* Only necessary if we print dest ? */ - emalloc(rp, Regexp *, sizeof(*rp), "make_regexp"); - memset((char *) rp, 0, sizeof(*rp)); - rp->pat.allocated = 0; /* regex will allocate the buffer */ - emalloc(rp->pat.fastmap, char *, 256, "make_regexp"); - - if (ignorecase) - rp->pat.translate = casetable; - else - rp->pat.translate = NULL; - len = dest - temp; - if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL) - fatal("%s: /%s/", gettext(rerr), temp); - - /* gack. this must be done *after* re_compile_pattern */ - rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */ - if (dfa && ! ignorecase) { - dfacomp(temp, len, &(rp->dfareg), TRUE); - rp->dfa = TRUE; - } else - rp->dfa = FALSE; - - free(temp); - return rp; -} - -/* research --- do a regexp search. use dfa if possible */ - -int -research(Regexp *rp, register char *str, int start, - register size_t len, int need_start) -{ - char *ret = str; - int try_backref; - - /* - * Always do dfa search if can; if it fails, then even if - * need_start is true, we won't bother with the regex search. - */ - if (rp->dfa) { - char save; - int count = 0; - - /* - * dfa likes to stick a '\n' right after the matched - * text. So we just save and restore the character. - */ - save = str[start+len]; - ret = dfaexec(&(rp->dfareg), str+start, str+start+len, TRUE, - &count, &try_backref); - str[start+len] = save; - } - if (ret) { - if (need_start || rp->dfa == FALSE || try_backref) { - int res = re_search(&(rp->pat), str, start+len, - start, len, &(rp->regs)); - return res; - } else - return 1; - } else - return -1; -} - -/* refree --- free up the dynamic memory used by a compiled regexp */ - -void -refree(Regexp *rp) -{ - free(rp->pat.buffer); - free(rp->pat.fastmap); - if (rp->regs.start) - free(rp->regs.start); - if (rp->regs.end) - free(rp->regs.end); - if (rp->dfa) - dfafree(&(rp->dfareg)); - free(rp); -} - -/* dfaerror --- print an error message for the dfa routines */ - -void -dfaerror(const char *s) -{ - fatal("%s", s); -} - -/* re_update --- recompile a dynamic regexp */ - -Regexp * -re_update(NODE *t) -{ - NODE *t1; - - if ((t->re_flags & CASE) == IGNORECASE) { - if ((t->re_flags & CONST) != 0) - return t->re_reg; - t1 = force_string(tree_eval(t->re_exp)); - if (t->re_text != NULL) { - if (cmp_nodes(t->re_text, t1) == 0) { - free_temp(t1); - return t->re_reg; - } - unref(t->re_text); - } - t->re_text = dupnode(t1); - free_temp(t1); - } - if (t->re_reg != NULL) - refree(t->re_reg); - if (t->re_cnt > 0) - t->re_cnt++; - if (t->re_cnt > 10) - t->re_cnt = 0; - if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) { - t1 = force_string(tree_eval(t->re_exp)); - unref(t->re_text); - t->re_text = dupnode(t1); - free_temp(t1); - } - t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen, - IGNORECASE, t->re_cnt); - t->re_flags &= ~CASE; - t->re_flags |= IGNORECASE; - return t->re_reg; -} - -/* resetup --- choose what kind of regexps we match */ - -void -resetup() -{ - if (do_posix) - syn = RE_SYNTAX_POSIX_AWK; /* strict POSIX re's */ - else if (do_traditional) - syn = RE_SYNTAX_AWK; /* traditional Unix awk re's */ - else - syn = RE_SYNTAX_GNU_AWK; /* POSIX re's + GNU ops */ - - /* - * Interval expressions are off by default, since it's likely to - * break too many old programs to have them on. - */ - if (do_intervals) - syn |= RE_INTERVALS; - - (void) re_set_syntax(syn); - dfasyntax(syn, FALSE, '\n'); -} - -/* avoid_dfa --- FIXME: temporary kludge function until we have a new dfa.c */ - -int -avoid_dfa(NODE *re, char *str, size_t len) -{ - char *restr; - int relen; - int anchor, i; - char *end; - - if ((re->re_flags & CONST) != 0) { - restr = re->re_exp->stptr; - relen = re->re_exp->stlen; - } else { - restr = re->re_text->stptr; - relen = re->re_text->stlen; - } - - for (anchor = FALSE, i = 0; i < relen; i++) { - if (restr[i] == '^' || restr[i] == '$') { - anchor = TRUE; - break; - } - } - if (! anchor) - return FALSE; - - for (end = str + len; str < end; str++) - if (*str == '\n') - return TRUE; - - return FALSE; -} - -/* reisstring --- return TRUE if the RE match is a simple string match */ - -int -reisstring(char *text, size_t len, Regexp *re, char *buf) -{ - static char metas[] = ".*+(){}[]|?^$\\"; - int i; - int has_meta = FALSE; - int res; - char *matched; - - /* simple checking for has meta characters in re */ - for (i = 0; i < len; i++) { - if (strchr(metas, text[i]) != NULL) { - has_meta = TRUE; - break; - } - } - - /* make accessable to gdb */ - matched = &buf[RESTART(re, buf)]; - - if (has_meta) - return FALSE; /* give up early, can't be string match */ - - res = STREQN(text, matched, len); - - return res; -} |