diff options
author | jraynard <jraynard@FreeBSD.org> | 1997-10-14 18:17:11 +0000 |
---|---|---|
committer | jraynard <jraynard@FreeBSD.org> | 1997-10-14 18:17:11 +0000 |
commit | a46c41193ff2573a4c910e19b570e9c253e714a1 (patch) | |
tree | d84200da2f7f2d8f1321c265bc6ddd7ce15633f8 /contrib/awk/main.c | |
download | FreeBSD-src-a46c41193ff2573a4c910e19b570e9c253e714a1.zip FreeBSD-src-a46c41193ff2573a4c910e19b570e9c253e714a1.tar.gz |
Virgin import of GNU awk 3.0.3
Diffstat (limited to 'contrib/awk/main.c')
-rw-r--r-- | contrib/awk/main.c | 735 |
1 files changed, 735 insertions, 0 deletions
diff --git a/contrib/awk/main.c b/contrib/awk/main.c new file mode 100644 index 0000000..92445de --- /dev/null +++ b/contrib/awk/main.c @@ -0,0 +1,735 @@ +/* + * main.c -- Expression tree constructors and main program for gawk. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "awk.h" +#include "getopt.h" +#include "patchlevel.h" + +static void usage P((int exitval, FILE *fp)); +static void copyleft P((void)); +static void cmdline_fs P((char *str)); +static void init_args P((int argc0, int argc, char *argv0, char **argv)); +static void init_vars P((void)); +static void pre_assign P((char *v)); +RETSIGTYPE catchsig P((int sig, int code)); +static void nostalgia P((void)); +static void version P((void)); + +/* These nodes store all the special variables AWK uses */ +NODE *ARGC_node, *ARGIND_node, *ARGV_node, *CONVFMT_node, *ENVIRON_node; +NODE *ERRNO_node, *FIELDWIDTHS_node, *FILENAME_node, *FNR_node, *FS_node; +NODE *IGNORECASE_node, *NF_node, *NR_node, *OFMT_node, *OFS_node; +NODE *ORS_node, *RLENGTH_node, *RSTART_node, *RS_node, *RT_node, *SUBSEP_node; + +long NF; +long NR; +long FNR; +int IGNORECASE; +char *OFS; +char *ORS; +char *OFMT; + +/* + * CONVFMT is a convenience pointer for the current number to string format. + * We must supply an initial value to avoid recursion problems of + * set_CONVFMT -> fmt_index -> r_force_string: gets NULL CONVFMT + * Fun, fun, fun, fun. + */ +char *CONVFMT = "%.6g"; + +int errcount = 0; /* error counter, used by yyerror() */ + +NODE *Nnull_string; /* The global null string */ + +/* The name the program was invoked under, for error messages */ +const char *myname; + +/* A block of AWK code to be run before running the program */ +NODE *begin_block = NULL; + +/* A block of AWK code to be run after the last input file */ +NODE *end_block = NULL; + +int exiting = FALSE; /* Was an "exit" statement executed? */ +int exit_val = 0; /* optional exit value */ + +#if defined(YYDEBUG) || defined(DEBUG) +extern int yydebug; +#endif + +struct src *srcfiles = NULL; /* source file name(s) */ +long numfiles = -1; /* how many source files */ + +int do_traditional = FALSE; /* no gnu extensions, add traditional weirdnesses */ +int do_posix = FALSE; /* turn off gnu and unix extensions */ +int do_lint = FALSE; /* provide warnings about questionable stuff */ +int do_lint_old = FALSE; /* warn about stuff not in V7 awk */ +int do_nostalgia = FALSE; /* provide a blast from the past */ +int do_intervals = FALSE; /* allow {...,...} in regexps */ + +int in_begin_rule = FALSE; /* we're in a BEGIN rule */ +int in_end_rule = FALSE; /* we're in a END rule */ + +int output_is_tty = FALSE; /* control flushing of output */ + +extern char *version_string; /* current version, for printing */ + +/* The parse tree is stored here. */ +NODE *expression_value; + +static struct option optab[] = { + { "compat", no_argument, & do_traditional, 1 }, + { "traditional", no_argument, & do_traditional, 1 }, + { "lint", no_argument, & do_lint, 1 }, + { "lint-old", no_argument, & do_lint_old, 1 }, + { "posix", no_argument, & do_posix, 1 }, + { "nostalgia", no_argument, & do_nostalgia, 1 }, + { "copyleft", no_argument, NULL, 'C' }, + { "copyright", no_argument, NULL, 'C' }, + { "field-separator", required_argument, NULL, 'F' }, + { "file", required_argument, NULL, 'f' }, + { "re-interval", no_argument, & do_intervals, 1 }, + { "source", required_argument, NULL, 's' }, + { "assign", required_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "usage", no_argument, NULL, 'u' }, + { "help", no_argument, NULL, 'u' }, +#ifdef DEBUG + { "parsedebug", no_argument, NULL, 'D' }, +#endif + { NULL, 0, NULL, '\0' } +}; + +/* main --- process args, parse program, run it, clean up */ + +int +main(argc, argv) +int argc; +char **argv; +{ + int c; + char *scan; + /* the + on the front tells GNU getopt not to rearrange argv */ + const char *optlist = "+F:f:v:W;m:"; + int stopped_early = FALSE; + int old_optind; + extern int optind; + extern int opterr; + extern char *optarg; + + setlocale(LC_CTYPE, ""); + setlocale(LC_COLLATE, ""); + + (void) signal(SIGFPE, (RETSIGTYPE (*) P((int))) catchsig); + (void) signal(SIGSEGV, (RETSIGTYPE (*) P((int))) catchsig); +#ifdef SIGBUS + (void) signal(SIGBUS, (RETSIGTYPE (*) P((int))) catchsig); +#endif + + myname = gawk_name(argv[0]); + argv[0] = (char *) myname; + os_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ + + /* remove sccs gunk */ + if (strncmp(version_string, "@(#)", 4) == 0) + version_string += 4; + + if (argc < 2) + usage(1, stderr); + + /* initialize the null string */ + Nnull_string = make_string("", 0); + Nnull_string->numbr = 0.0; + Nnull_string->type = Node_val; + Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER); + + /* + * Tell the regex routines how they should work. + * Do this before initializing variables, since + * they could want to do a regexp compile. + */ + resetup(); + + /* Set up the special variables */ + /* + * Note that this must be done BEFORE arg parsing else -F + * breaks horribly + */ + init_vars(); + + /* worst case */ + emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main"); + memset(srcfiles, '\0', argc * sizeof(struct src)); + + /* we do error messages ourselves on invalid options */ + opterr = FALSE; + + /* option processing. ready, set, go! */ + for (optopt = 0, old_optind = 1; + (c = getopt_long(argc, argv, optlist, optab, NULL)) != EOF; + optopt = 0, old_optind = optind) { + if (do_posix) + opterr = TRUE; + + switch (c) { + case 'F': + cmdline_fs(optarg); + break; + + case 'f': + /* + * a la MKS awk, allow multiple -f options. + * this makes function libraries real easy. + * most of the magic is in the scanner. + * + * The following is to allow for whitespace at the end + * of a #! /bin/gawk line in an executable file + */ + scan = optarg; + while (ISSPACE(*scan)) + scan++; + + ++numfiles; + srcfiles[numfiles].stype = SOURCEFILE; + if (*scan == '\0') + srcfiles[numfiles].val = argv[optind++]; + else + srcfiles[numfiles].val = optarg; + break; + + case 'v': + pre_assign(optarg); + break; + + case 'm': + /* + * Research awk extension. + * -mf nnn set # fields, gawk ignores + * -mr nnn set record length, ditto + */ + if (do_lint) + warning("-m[fr] option irrelevant in gawk"); + if (optarg[0] != 'r' && optarg[0] != 'f') + warning("-m option usage: `-m[fr] nnn'"); + if (optarg[1] == '\0') + optind++; + break; + + case 'W': /* gawk specific options - now in getopt_long */ + fprintf(stderr, "%s: option `-W %s' unrecognized, ignored\n", + argv[0], optarg); + break; + + /* These can only come from long form options */ + case 'C': + copyleft(); + break; + + case 's': + if (optarg[0] == '\0') + warning("empty argument to --source ignored"); + else { + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = optarg; + } + break; + + case 'u': + usage(0, stdout); /* per coding stds */ + break; + + case 'V': + version(); + break; + +#ifdef DEBUG + case 'D': + yydebug = 2; + break; +#endif + + case 0: + /* + * getopt_long found an option that sets a variable + * instead of returning a letter. Do nothing, just + * cycle around for the next one. + */ + break; + + case '?': + default: + /* + * New behavior. If not posix, an unrecognized + * option stops argument processing so that it can + * go into ARGV for the awk program to see. This + * makes use of ``#! /bin/gawk -f'' easier. + * + * However, it's never simple. If optopt is set, + * an option that requires an argument didn't get the + * argument. We care because if opterr is 0, then + * getopt_long won't print the error message for us. + */ + if (! do_posix + && (optopt == '\0' || strchr(optlist, optopt) == NULL)) { + /* + * can't just do optind--. In case of an + * option with >= 2 letters, getopt_long + * won't have incremented optind. + */ + optind = old_optind; + stopped_early = TRUE; + goto out; + } else if (optopt != '\0') + /* Use 1003.2 required message format */ + fprintf(stderr, + "%s: option requires an argument -- %c\n", + myname, optopt); + /* else + let getopt print error message for us */ + break; + } + } +out: + + if (do_nostalgia) + nostalgia(); + + /* check for POSIXLY_CORRECT environment variable */ + if (! do_posix && getenv("POSIXLY_CORRECT") != NULL) { + do_posix = TRUE; + if (do_lint) + warning( + "environment variable `POSIXLY_CORRECT' set: turning on --posix"); + } + + if (do_posix) { + if (do_traditional) /* both on command line */ + warning("--posix overrides --traditional"); + else + do_traditional = TRUE; + /* + * POSIX compliance also implies + * no GNU extensions either. + */ + } + + /* + * Tell the regex routines how they should work. + * Do this again, after argument processing, since do_posix + * and do_traditional are now paid attention to by resetup(). + */ + if (do_traditional || do_posix || do_intervals) { + resetup(); + + /* now handle RS and FS. have to be careful with FS */ + set_RS(); + if (using_fieldwidths()) { + set_FS(); + set_FIELDWIDTHS(); + } else + set_FS(); + } + +#ifdef DEBUG + setbuf(stdout, (char *) NULL); /* make debugging easier */ +#endif + if (isatty(fileno(stdout))) + output_is_tty = TRUE; + /* No -f or --source options, use next arg */ + if (numfiles == -1) { + if (optind > argc - 1 || stopped_early) /* no args left or no program */ + usage(1, stderr); + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = argv[optind]; + optind++; + } + + init_args(optind, argc, (char *) myname, argv); + (void) tokexpand(); + + /* Read in the program */ + if (yyparse() != 0 || errcount != 0) + exit(1); + /* recover any space from C based alloca */ +#ifdef C_ALLOCA + (void) alloca(0); +#endif + + /* Set up the field variables */ + init_fields(); + + if (do_lint && begin_block == NULL && expression_value == NULL + && end_block == NULL) + warning("no program"); + + if (begin_block != NULL) { + in_begin_rule = TRUE; + (void) interpret(begin_block); + } + in_begin_rule = FALSE; + if (! exiting && (expression_value != NULL || end_block != NULL)) + do_input(); + if (end_block != NULL) { + in_end_rule = TRUE; + (void) interpret(end_block); + } + in_end_rule = FALSE; + if (close_io() != 0 && exit_val == 0) + exit_val = 1; + exit(exit_val); /* more portable */ + return exit_val; /* to suppress warnings */ +} + +/* usage --- print usage information and exit */ + +static void +usage(exitval, fp) +int exitval; +FILE *fp; +{ + char *opt1 = " -f progfile [--]"; + char *regops = " [POSIX or GNU style options]"; + + fprintf(fp, "Usage: %s%s%s file ...\n\t%s%s [--] %cprogram%c file ...\n", + myname, regops, opt1, myname, regops, quote, quote); + + /* GNU long options info. Gack. */ + fputs("POSIX options:\t\tGNU long options:\n", fp); + fputs("\t-f progfile\t\t--file=progfile\n", fp); + fputs("\t-F fs\t\t\t--field-separator=fs\n", fp); + fputs("\t-v var=val\t\t--assign=var=val\n", fp); + fputs("\t-m[fr] val\n", fp); + fputs("\t-W compat\t\t--compat\n", fp); + fputs("\t-W copyleft\t\t--copyleft\n", fp); + fputs("\t-W copyright\t\t--copyright\n", fp); + fputs("\t-W help\t\t\t--help\n", fp); + fputs("\t-W lint\t\t\t--lint\n", fp); + fputs("\t-W lint-old\t\t--lint-old\n", fp); +#ifdef NOSTALGIA + fputs("\t-W nostalgia\t\t--nostalgia\n", fp); +#endif +#ifdef DEBUG + fputs("\t-W parsedebug\t\t--parsedebug\n", fp); +#endif + fputs("\t-W posix\t\t--posix\n", fp); + fputs("\t-W re-interval\t\t--re-interval\n", fp); + fputs("\t-W source=program-text\t--source=program-text\n", fp); + fputs("\t-W traditional\t\t--traditional\n", fp); + fputs("\t-W usage\t\t--usage\n", fp); + fputs("\t-W version\t\t--version\n", fp); + fputs("\nReport bugs to bug-gnu-utils@prep.ai.mit.edu,\n", fp); + fputs("with a Cc: to arnold@gnu.ai.mit.edu\n", fp); + exit(exitval); +} + +/* copyleft --- print out the short GNU copyright information */ + +static void +copyleft() +{ + static char blurb_part1[] = +"Copyright (C) 1989, 1991-1997 Free Software Foundation.\n\ +\n\ +This program is free software; you can redistribute it and/or modify\n\ +it under the terms of the GNU General Public License as published by\n\ +the Free Software Foundation; either version 2 of the License, or\n\ +(at your option) any later version.\n\ +\n"; + static char blurb_part2[] = +"This program is distributed in the hope that it will be useful,\n\ +but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ +GNU General Public License for more details.\n\ +\n"; + static char blurb_part3[] = +"You should have received a copy of the GNU General Public License\n\ +along with this program; if not, write to the Free Software\n\ +Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.\n"; + + /* multiple blurbs are needed for some brain dead compilers. */ + fputs(blurb_part1, stdout); + fputs(blurb_part2, stdout); + fputs(blurb_part3, stdout); + fflush(stdout); + exit(0); +} + +/* cmdline_fs --- set FS from the command line */ + +static void +cmdline_fs(str) +char *str; +{ + register NODE **tmp; + + tmp = get_lhs(FS_node, (Func_ptr *) 0); + unref(*tmp); + /* + * Only if in full compatibility mode check for the stupid special + * case so -F\t works as documented in awk book even though the shell + * hands us -Ft. Bleah! + * + * Thankfully, Posix didn't propogate this "feature". + */ + if (str[0] == 't' && str[1] == '\0') { + if (do_lint) + warning("-Ft does not set FS to tab in POSIX awk"); + if (do_traditional && ! do_posix) + str[0] = '\t'; + } + *tmp = make_str_node(str, strlen(str), SCAN); /* do process escapes */ + set_FS(); +} + +/* init_args --- set up ARGV from stuff on the command line */ + +static void +init_args(argc0, argc, argv0, argv) +int argc0, argc; +char *argv0; +char **argv; +{ + int i, j; + NODE **aptr; + + ARGV_node = install("ARGV", node(Nnull_string, Node_var_array, (NODE *) NULL)); + aptr = assoc_lookup(ARGV_node, tmp_number(0.0)); + *aptr = make_string(argv0, strlen(argv0)); + (*aptr)->flags |= MAYBE_NUM; + for (i = argc0, j = 1; i < argc; i++) { + aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j)); + *aptr = make_string(argv[i], strlen(argv[i])); + (*aptr)->flags |= MAYBE_NUM; + j++; + } + ARGC_node = install("ARGC", + node(make_number((AWKNUM) j), Node_var, (NODE *) NULL)); +} + +/* + * Set all the special variables to their initial values. + * Note that some of the variables that have set_FOO routines should + * *N*O*T* have those routines called upon initialization, and thus + * they have NULL entries in that field. This is notably true of FS + * and IGNORECASE. + */ +struct varinit { + NODE **spec; + const char *name; + NODETYPE type; + const char *strval; + AWKNUM numval; + Func_ptr assign; +}; +static struct varinit varinit[] = { +{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT }, +{&NF_node, "NF", Node_NF, NULL, -1, set_NF }, +{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, NULL }, +{&NR_node, "NR", Node_NR, NULL, 0, set_NR }, +{&FNR_node, "FNR", Node_FNR, NULL, 0, set_FNR }, +{&FS_node, "FS", Node_FS, " ", 0, NULL }, +{&RS_node, "RS", Node_RS, "\n", 0, set_RS }, +{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, NULL, 0, NULL }, +{&FILENAME_node, "FILENAME", Node_var, "", 0, NULL }, +{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS }, +{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS }, +{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT }, +{&RLENGTH_node, "RLENGTH", Node_var, NULL, 0, NULL }, +{&RSTART_node, "RSTART", Node_var, NULL, 0, NULL }, +{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, NULL }, +{&ARGIND_node, "ARGIND", Node_var, NULL, 0, NULL }, +{&ERRNO_node, "ERRNO", Node_var, NULL, 0, NULL }, +{&RT_node, "RT", Node_var, "", 0, NULL }, +{0, NULL, Node_illegal, NULL, 0, NULL }, +}; + +/* init_vars --- actually initialize everything in the symbol table */ + +static void +init_vars() +{ + register struct varinit *vp; + + for (vp = varinit; vp->name; vp++) { + *(vp->spec) = install((char *) vp->name, + node(vp->strval == NULL ? make_number(vp->numval) + : make_string((char *) vp->strval, + strlen(vp->strval)), + vp->type, (NODE *) NULL)); + (*(vp->spec))->flags |= SCALAR; + if (vp->assign) + (*(vp->assign))(); + } +} + +/* load_environ --- populate the ENVIRON array */ + +void +load_environ() +{ +#if ! (defined(MSDOS) && !defined(DJGPP)) && ! defined(OS2) && ! (defined(VMS) && defined(__DECC)) + extern char **environ; +#endif + register char *var, *val, *cp; + NODE **aptr; + register int i; + + ENVIRON_node = install("ENVIRON", + node(Nnull_string, Node_var, (NODE *) NULL)); + for (i = 0; environ[i] != NULL; i++) { + static char nullstr[] = ""; + + var = environ[i]; + val = strchr(var, '='); + if (val != NULL) + *val++ = '\0'; + else + val = nullstr; + aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen(var))); + *aptr = make_string(val, strlen(val)); + (*aptr)->flags |= (MAYBE_NUM|SCALAR); + + /* restore '=' so that system() gets a valid environment */ + if (val != nullstr) + *--val = '='; + } + /* + * Put AWKPATH into ENVIRON if it's not there. + * This allows querying it from outside gawk. + */ + if ((cp = getenv("AWKPATH")) == NULL) { + aptr = assoc_lookup(ENVIRON_node, tmp_string("AWKPATH", 7)); + *aptr = make_string(defpath, strlen(defpath)); + (*aptr)->flags |= SCALAR; + } +} + +/* arg_assign --- process a command-line assignment */ + +char * +arg_assign(arg) +char *arg; +{ + char *cp, *cp2; + int badvar; + Func_ptr after_assign = NULL; + NODE *var; + NODE *it; + NODE **lhs; + + cp = strchr(arg, '='); + if (cp != NULL) { + *cp++ = '\0'; + /* first check that the variable name has valid syntax */ + badvar = FALSE; + if (! isalpha(arg[0]) && arg[0] != '_') + badvar = TRUE; + else + for (cp2 = arg+1; *cp2; cp2++) + if (! isalnum(*cp2) && *cp2 != '_') { + badvar = TRUE; + break; + } + if (badvar) + fatal("illegal name `%s' in variable assignment", arg); + + /* + * Recent versions of nawk expand escapes inside assignments. + * This makes sense, so we do it too. + */ + it = make_str_node(cp, strlen(cp), SCAN); + it->flags |= (MAYBE_NUM|SCALAR); + var = variable(arg, FALSE, Node_var); + lhs = get_lhs(var, &after_assign); + unref(*lhs); + *lhs = it; + if (after_assign != NULL) + (*after_assign)(); + *--cp = '='; /* restore original text of ARGV */ + } + return cp; +} + +/* pre_assign --- handle -v, print a message and die if a problem */ + +static void +pre_assign(v) +char *v; +{ + if (arg_assign(v) == NULL) { + fprintf(stderr, + "%s: `%s' argument to `-v' not in `var=value' form\n", + myname, v); + usage(1, stderr); + } +} + +/* catchsig --- catch signals */ + +RETSIGTYPE +catchsig(sig, code) +int sig, code; +{ +#ifdef lint + code = 0; sig = code; code = sig; +#endif + if (sig == SIGFPE) { + fatal("floating point exception"); + } else if (sig == SIGSEGV +#ifdef SIGBUS + || sig == SIGBUS +#endif + ) { + set_loc(__FILE__, __LINE__); + msg("fatal error: internal error"); + /* fatal won't abort() if not compiled for debugging */ + abort(); + } else + cant_happen(); + /* NOTREACHED */ +} + +/* nostalgia --- print the famous error message and die */ + +static void +nostalgia() +{ + fprintf(stderr, "awk: bailing out near line 1\n"); + abort(); +} + +/* version --- print version message */ + +static void +version() +{ + printf("%s.%d\n", version_string, PATCHLEVEL); + /* + * Per GNU coding standards, print copyright info, + * then exit successfully, do nothing else. + */ + copyleft(); + exit(0); +} |