summaryrefslogtreecommitdiffstats
path: root/contrib/awk/re.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/awk/re.c')
-rw-r--r--contrib/awk/re.c322
1 files changed, 0 insertions, 322 deletions
diff --git a/contrib/awk/re.c b/contrib/awk/re.c
deleted file mode 100644
index 2ee9e6d..0000000
--- a/contrib/awk/re.c
+++ /dev/null
@@ -1,322 +0,0 @@
-/*
- * re.c - compile regular expressions.
- */
-
-/*
- * Copyright (C) 1991-2001 the Free Software Foundation, Inc.
- *
- * This file is part of GAWK, the GNU implementation of the
- * AWK Programming Language.
- *
- * GAWK is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * GAWK is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-
-#include "awk.h"
-
-static reg_syntax_t syn;
-
-/* make_regexp --- generate compiled regular expressions */
-
-Regexp *
-make_regexp(char *s, size_t len, int ignorecase, int dfa)
-{
- Regexp *rp;
- const char *rerr;
- char *src = s;
- char *temp;
- char *end = s + len;
- register char *dest;
- register int c, c2;
-
- /* Handle escaped characters first. */
-
- /*
- * Build a copy of the string (in dest) with the
- * escaped characters translated, and generate the regex
- * from that.
- */
- emalloc(dest, char *, len + 2, "make_regexp");
- temp = dest;
-
- while (src < end) {
- if (*src == '\\') {
- c = *++src;
- switch (c) {
- case 'a':
- case 'b':
- case 'f':
- case 'n':
- case 'r':
- case 't':
- case 'v':
- case 'x':
- case '0':
- case '1':
- case '2':
- case '3':
- case '4':
- case '5':
- case '6':
- case '7':
- c2 = parse_escape(&src);
- if (c2 < 0)
- cant_happen();
- /*
- * Unix awk treats octal (and hex?) chars
- * literally in re's, so escape regexp
- * metacharacters.
- */
- if (do_traditional && ! do_posix && (ISDIGIT(c) || c == 'x')
- && strchr("()|*+?.^$\\[]", c2) != NULL)
- *dest++ = '\\';
- *dest++ = (char) c2;
- break;
- case '8':
- case '9': /* a\9b not valid */
- *dest++ = c;
- src++;
- break;
- case 'y': /* normally \b */
- /* gnu regex op */
- if (! do_traditional) {
- *dest++ = '\\';
- *dest++ = 'b';
- src++;
- break;
- }
- /* else, fall through */
- default:
- *dest++ = '\\';
- *dest++ = (char) c;
- src++;
- break;
- } /* switch */
- } else
- *dest++ = *src++; /* not '\\' */
- } /* for */
-
- *dest = '\0' ; /* Only necessary if we print dest ? */
- emalloc(rp, Regexp *, sizeof(*rp), "make_regexp");
- memset((char *) rp, 0, sizeof(*rp));
- rp->pat.allocated = 0; /* regex will allocate the buffer */
- emalloc(rp->pat.fastmap, char *, 256, "make_regexp");
-
- if (ignorecase)
- rp->pat.translate = casetable;
- else
- rp->pat.translate = NULL;
- len = dest - temp;
- if ((rerr = re_compile_pattern(temp, len, &(rp->pat))) != NULL)
- fatal("%s: /%s/", gettext(rerr), temp);
-
- /* gack. this must be done *after* re_compile_pattern */
- rp->pat.newline_anchor = FALSE; /* don't get \n in middle of string */
- if (dfa && ! ignorecase) {
- dfacomp(temp, len, &(rp->dfareg), TRUE);
- rp->dfa = TRUE;
- } else
- rp->dfa = FALSE;
-
- free(temp);
- return rp;
-}
-
-/* research --- do a regexp search. use dfa if possible */
-
-int
-research(Regexp *rp, register char *str, int start,
- register size_t len, int need_start)
-{
- char *ret = str;
- int try_backref;
-
- /*
- * Always do dfa search if can; if it fails, then even if
- * need_start is true, we won't bother with the regex search.
- */
- if (rp->dfa) {
- char save;
- int count = 0;
-
- /*
- * dfa likes to stick a '\n' right after the matched
- * text. So we just save and restore the character.
- */
- save = str[start+len];
- ret = dfaexec(&(rp->dfareg), str+start, str+start+len, TRUE,
- &count, &try_backref);
- str[start+len] = save;
- }
- if (ret) {
- if (need_start || rp->dfa == FALSE || try_backref) {
- int res = re_search(&(rp->pat), str, start+len,
- start, len, &(rp->regs));
- return res;
- } else
- return 1;
- } else
- return -1;
-}
-
-/* refree --- free up the dynamic memory used by a compiled regexp */
-
-void
-refree(Regexp *rp)
-{
- free(rp->pat.buffer);
- free(rp->pat.fastmap);
- if (rp->regs.start)
- free(rp->regs.start);
- if (rp->regs.end)
- free(rp->regs.end);
- if (rp->dfa)
- dfafree(&(rp->dfareg));
- free(rp);
-}
-
-/* dfaerror --- print an error message for the dfa routines */
-
-void
-dfaerror(const char *s)
-{
- fatal("%s", s);
-}
-
-/* re_update --- recompile a dynamic regexp */
-
-Regexp *
-re_update(NODE *t)
-{
- NODE *t1;
-
- if ((t->re_flags & CASE) == IGNORECASE) {
- if ((t->re_flags & CONST) != 0)
- return t->re_reg;
- t1 = force_string(tree_eval(t->re_exp));
- if (t->re_text != NULL) {
- if (cmp_nodes(t->re_text, t1) == 0) {
- free_temp(t1);
- return t->re_reg;
- }
- unref(t->re_text);
- }
- t->re_text = dupnode(t1);
- free_temp(t1);
- }
- if (t->re_reg != NULL)
- refree(t->re_reg);
- if (t->re_cnt > 0)
- t->re_cnt++;
- if (t->re_cnt > 10)
- t->re_cnt = 0;
- if (t->re_text == NULL || (t->re_flags & CASE) != IGNORECASE) {
- t1 = force_string(tree_eval(t->re_exp));
- unref(t->re_text);
- t->re_text = dupnode(t1);
- free_temp(t1);
- }
- t->re_reg = make_regexp(t->re_text->stptr, t->re_text->stlen,
- IGNORECASE, t->re_cnt);
- t->re_flags &= ~CASE;
- t->re_flags |= IGNORECASE;
- return t->re_reg;
-}
-
-/* resetup --- choose what kind of regexps we match */
-
-void
-resetup()
-{
- if (do_posix)
- syn = RE_SYNTAX_POSIX_AWK; /* strict POSIX re's */
- else if (do_traditional)
- syn = RE_SYNTAX_AWK; /* traditional Unix awk re's */
- else
- syn = RE_SYNTAX_GNU_AWK; /* POSIX re's + GNU ops */
-
- /*
- * Interval expressions are off by default, since it's likely to
- * break too many old programs to have them on.
- */
- if (do_intervals)
- syn |= RE_INTERVALS;
-
- (void) re_set_syntax(syn);
- dfasyntax(syn, FALSE, '\n');
-}
-
-/* avoid_dfa --- FIXME: temporary kludge function until we have a new dfa.c */
-
-int
-avoid_dfa(NODE *re, char *str, size_t len)
-{
- char *restr;
- int relen;
- int anchor, i;
- char *end;
-
- if ((re->re_flags & CONST) != 0) {
- restr = re->re_exp->stptr;
- relen = re->re_exp->stlen;
- } else {
- restr = re->re_text->stptr;
- relen = re->re_text->stlen;
- }
-
- for (anchor = FALSE, i = 0; i < relen; i++) {
- if (restr[i] == '^' || restr[i] == '$') {
- anchor = TRUE;
- break;
- }
- }
- if (! anchor)
- return FALSE;
-
- for (end = str + len; str < end; str++)
- if (*str == '\n')
- return TRUE;
-
- return FALSE;
-}
-
-/* reisstring --- return TRUE if the RE match is a simple string match */
-
-int
-reisstring(char *text, size_t len, Regexp *re, char *buf)
-{
- static char metas[] = ".*+(){}[]|?^$\\";
- int i;
- int has_meta = FALSE;
- int res;
- char *matched;
-
- /* simple checking for has meta characters in re */
- for (i = 0; i < len; i++) {
- if (strchr(metas, text[i]) != NULL) {
- has_meta = TRUE;
- break;
- }
- }
-
- /* make accessable to gdb */
- matched = &buf[RESTART(re, buf)];
-
- if (has_meta)
- return FALSE; /* give up early, can't be string match */
-
- res = STREQN(text, matched, len);
-
- return res;
-}
OpenPOWER on IntegriCloud