diff options
Diffstat (limited to 'gnu/usr.bin/awk/main.c')
-rw-r--r-- | gnu/usr.bin/awk/main.c | 731 |
1 files changed, 731 insertions, 0 deletions
diff --git a/gnu/usr.bin/awk/main.c b/gnu/usr.bin/awk/main.c new file mode 100644 index 0000000..77d0bf7 --- /dev/null +++ b/gnu/usr.bin/awk/main.c @@ -0,0 +1,731 @@ +/* + * main.c -- Expression tree constructors and main program for gawk. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "getopt.h" +#include "awk.h" +#include "patchlevel.h" + +static void usage P((int exitval)); +static void copyleft P((void)); +static void cmdline_fs P((char *str)); +static void init_args P((int argc0, int argc, char *argv0, char **argv)); +static void init_vars P((void)); +static void pre_assign P((char *v)); +SIGTYPE catchsig P((int sig, int code)); +static void gawk_option P((char *optstr)); +static void nostalgia P((void)); +static void version P((void)); +char *gawk_name P((char *filespec)); + +#ifdef MSDOS +extern int isatty P((int)); +#endif + +extern void resetup P((void)); + +/* These nodes store all the special variables AWK uses */ +NODE *FS_node, *NF_node, *RS_node, *NR_node; +NODE *FILENAME_node, *OFS_node, *ORS_node, *OFMT_node; +NODE *CONVFMT_node; +NODE *ERRNO_node; +NODE *FNR_node, *RLENGTH_node, *RSTART_node, *SUBSEP_node; +NODE *ENVIRON_node, *IGNORECASE_node; +NODE *ARGC_node, *ARGV_node, *ARGIND_node; +NODE *FIELDWIDTHS_node; + +int NF; +int NR; +int FNR; +int IGNORECASE; +char *RS; +char *OFS; +char *ORS; +char *OFMT; +char *CONVFMT; + +/* + * The parse tree and field nodes are stored here. Parse_end is a dummy item + * used to free up unneeded fields without freeing the program being run + */ +int errcount = 0; /* error counter, used by yyerror() */ + +/* The global null string */ +NODE *Nnull_string; + +/* The name the program was invoked under, for error messages */ +const char *myname; + +/* A block of AWK code to be run before running the program */ +NODE *begin_block = 0; + +/* A block of AWK code to be run after the last input file */ +NODE *end_block = 0; + +int exiting = 0; /* Was an "exit" statement executed? */ +int exit_val = 0; /* optional exit value */ + +#if defined(YYDEBUG) || defined(DEBUG) +extern int yydebug; +#endif + +struct src *srcfiles = NULL; /* source file name(s) */ +int numfiles = -1; /* how many source files */ + +int do_unix = 0; /* turn off gnu extensions */ +int do_posix = 0; /* turn off gnu and unix extensions */ +int do_lint = 0; /* provide warnings about questionable stuff */ +int do_nostalgia = 0; /* provide a blast from the past */ + +int in_begin_rule = 0; /* we're in a BEGIN rule */ +int in_end_rule = 0; /* we're in a END rule */ + +int output_is_tty = 0; /* control flushing of output */ + +extern char *version_string; /* current version, for printing */ + +NODE *expression_value; + +static struct option optab[] = { + { "compat", no_argument, & do_unix, 1 }, + { "lint", no_argument, & do_lint, 1 }, + { "posix", no_argument, & do_posix, 1 }, + { "nostalgia", no_argument, & do_nostalgia, 1 }, + { "copyleft", no_argument, NULL, 'C' }, + { "copyright", no_argument, NULL, 'C' }, + { "field-separator", required_argument, NULL, 'F' }, + { "file", required_argument, NULL, 'f' }, + { "assign", required_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "usage", no_argument, NULL, 'u' }, + { "help", no_argument, NULL, 'u' }, + { "source", required_argument, NULL, 's' }, +#ifdef DEBUG + { "parsedebug", no_argument, NULL, 'D' }, +#endif + { 0, 0, 0, 0 } +}; + +int +main(argc, argv) +int argc; +char **argv; +{ + int c; + char *scan; + extern int optind; + extern int opterr; + extern char *optarg; + int i; + + (void) signal(SIGFPE, (SIGTYPE (*) P((int))) catchsig); + (void) signal(SIGSEGV, (SIGTYPE (*) P((int))) catchsig); +#ifdef SIGBUS + (void) signal(SIGBUS, (SIGTYPE (*) P((int))) catchsig); +#endif + + myname = gawk_name(argv[0]); + argv[0] = (char *)myname; +#ifdef VMS + vms_arg_fixup(&argc, &argv); /* emulate redirection, expand wildcards */ +#endif + + /* remove sccs gunk */ + if (strncmp(version_string, "@(#)", 4) == 0) + version_string += 4; + + if (argc < 2) + usage(1); + + /* initialize the null string */ + Nnull_string = make_string("", 0); + Nnull_string->numbr = 0.0; + Nnull_string->type = Node_val; + Nnull_string->flags = (PERM|STR|STRING|NUM|NUMBER); + + /* Set up the special variables */ + + /* + * Note that this must be done BEFORE arg parsing else -F + * breaks horribly + */ + init_vars(); + + /* worst case */ + emalloc(srcfiles, struct src *, argc * sizeof(struct src), "main"); + memset(srcfiles, '\0', argc * sizeof(struct src)); + + /* Tell the regex routines how they should work. . . */ + resetup(); + + /* we do error messages ourselves on invalid options */ + opterr = 0; + + /* the + on the front tells GNU getopt not to rearrange argv */ + while ((c = getopt_long(argc, argv, "+F:f:v:W:", optab, NULL)) != EOF) { + if (do_posix) + opterr = 1; + switch (c) { + case 'F': + cmdline_fs(optarg); + break; + + case 'f': + /* + * a la MKS awk, allow multiple -f options. + * this makes function libraries real easy. + * most of the magic is in the scanner. + */ + /* The following is to allow for whitespace at the end + * of a #! /bin/gawk line in an executable file + */ + scan = optarg; + while (isspace(*scan)) + scan++; + ++numfiles; + srcfiles[numfiles].stype = SOURCEFILE; + if (*scan == '\0') + srcfiles[numfiles].val = argv[optind++]; + else + srcfiles[numfiles].val = optarg; + break; + + case 'v': + pre_assign(optarg); + break; + + case 'W': /* gawk specific options */ + gawk_option(optarg); + break; + + /* These can only come from long form options */ + case 'V': + version(); + break; + + case 'C': + copyleft(); + break; + + case 'u': + usage(0); + break; + + case 's': + if (strlen(optarg) == 0) + warning("empty argument to --source ignored"); + else { + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = optarg; + } + break; + +#ifdef DEBUG + case 'D': + yydebug = 2; + break; +#endif + + case '?': + default: + /* + * New behavior. If not posix, an unrecognized + * option stops argument processing so that it can + * go into ARGV for the awk program to see. This + * makes use of ``#! /bin/gawk -f'' easier. + */ + if (! do_posix) + goto out; + /* else + let getopt print error message for us */ + break; + } + } +out: + + if (do_nostalgia) + nostalgia(); + + /* POSIX compliance also implies no Unix extensions either */ + if (do_posix) + do_unix = 1; + +#ifdef DEBUG + setbuf(stdout, (char *) NULL); /* make debugging easier */ +#endif + if (isatty(fileno(stdout))) + output_is_tty = 1; + /* No -f or --source options, use next arg */ + if (numfiles == -1) { + if (optind > argc - 1) /* no args left */ + usage(1); + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = argv[optind]; + optind++; + } + init_args(optind, argc, (char *) myname, argv); + (void) tokexpand(); + + /* Read in the program */ + if (yyparse() || errcount) + exit(1); + + /* Set up the field variables */ + init_fields(); + + if (begin_block) { + in_begin_rule = 1; + (void) interpret(begin_block); + } + in_begin_rule = 0; + if (!exiting && (expression_value || end_block)) + do_input(); + if (end_block) { + in_end_rule = 1; + (void) interpret(end_block); + } + in_end_rule = 0; + if (close_io() != 0 && exit_val == 0) + exit_val = 1; + exit(exit_val); /* more portable */ + return exit_val; /* to suppress warnings */ +} + +/* usage --- print usage information and exit */ + +static void +usage(exitval) +int exitval; +{ + char *opt1 = " -f progfile [--]"; + char *opt2 = " [--] 'program'"; + char *regops = " [POSIX or GNU style options]"; + + version(); + fprintf(stderr, "usage: %s%s%s file ...\n %s%s%s file ...\n", + myname, regops, opt1, myname, regops, opt2); + + /* GNU long options info. Gack. */ + fputs("\nPOSIX options:\t\tGNU long options:\n", stderr); + fputs("\t-f progfile\t\t--file=progfile\n", stderr); + fputs("\t-F fs\t\t\t--field-separator=fs\n", stderr); + fputs("\t-v var=val\t\t--assign=var=val\n", stderr); + fputs("\t-W compat\t\t--compat\n", stderr); + fputs("\t-W copyleft\t\t--copyleft\n", stderr); + fputs("\t-W copyright\t\t--copyright\n", stderr); + fputs("\t-W help\t\t\t--help\n", stderr); + fputs("\t-W lint\t\t\t--lint\n", stderr); +#if 0 + fputs("\t-W nostalgia\t\t--nostalgia\n", stderr); +#endif +#ifdef DEBUG + fputs("\t-W parsedebug\t\t--parsedebug\n", stderr); +#endif + fputs("\t-W posix\t\t--posix\n", stderr); + fputs("\t-W source=program-text\t--source=program-text\n", stderr); + fputs("\t-W usage\t\t--usage\n", stderr); + fputs("\t-W version\t\t--version\n", stderr); + exit(exitval); +} + +static void +copyleft () +{ + static char blurb_part1[] = +"Copyright (C) 1989, 1991, 1992, Free Software Foundation.\n\ +\n\ +This program is free software; you can redistribute it and/or modify\n\ +it under the terms of the GNU General Public License as published by\n\ +the Free Software Foundation; either version 2 of the License, or\n\ +(at your option) any later version.\n\ +\n"; + static char blurb_part2[] = +"This program is distributed in the hope that it will be useful,\n\ +but WITHOUT ANY WARRANTY; without even the implied warranty of\n\ +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n\ +GNU General Public License for more details.\n\ +\n"; + static char blurb_part3[] = +"You should have received a copy of the GNU General Public License\n\ +along with this program; if not, write to the Free Software\n\ +Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.\n"; + + version(); + fputs(blurb_part1, stderr); + fputs(blurb_part2, stderr); + fputs(blurb_part3, stderr); + fflush(stderr); +} + +static void +cmdline_fs(str) +char *str; +{ + register NODE **tmp; + int len = strlen(str); + + tmp = get_lhs(FS_node, (Func_ptr *) 0); + unref(*tmp); + /* + * Only if in full compatibility mode check for the stupid special + * case so -F\t works as documented in awk even though the shell + * hands us -Ft. Bleah! + * + * Thankfully, Posix didn't propogate this "feature". + */ + if (str[0] == 't' && str[1] == '\0') { + if (do_lint) + warning("-Ft does not set FS to tab in POSIX awk"); + if (do_unix && ! do_posix) + str[0] = '\t'; + } + *tmp = make_str_node(str, len, SCAN); /* do process escapes */ + set_FS(); +} + +static void +init_args(argc0, argc, argv0, argv) +int argc0, argc; +char *argv0; +char **argv; +{ + int i, j; + NODE **aptr; + + ARGV_node = install("ARGV", node(Nnull_string, Node_var, (NODE *)NULL)); + aptr = assoc_lookup(ARGV_node, tmp_number(0.0)); + *aptr = make_string(argv0, strlen(argv0)); + (*aptr)->flags |= MAYBE_NUM; + for (i = argc0, j = 1; i < argc; i++) { + aptr = assoc_lookup(ARGV_node, tmp_number((AWKNUM) j)); + *aptr = make_string(argv[i], strlen(argv[i])); + (*aptr)->flags |= MAYBE_NUM; + j++; + } + ARGC_node = install("ARGC", + node(make_number((AWKNUM) j), Node_var, (NODE *) NULL)); +} + +/* + * Set all the special variables to their initial values. + */ +struct varinit { + NODE **spec; + char *name; + NODETYPE type; + char *strval; + AWKNUM numval; + Func_ptr assign; +}; +static struct varinit varinit[] = { +{&NF_node, "NF", Node_NF, 0, -1, set_NF }, +{&FIELDWIDTHS_node, "FIELDWIDTHS", Node_FIELDWIDTHS, "", 0, 0 }, +{&NR_node, "NR", Node_NR, 0, 0, set_NR }, +{&FNR_node, "FNR", Node_FNR, 0, 0, set_FNR }, +{&FS_node, "FS", Node_FS, " ", 0, 0 }, +{&RS_node, "RS", Node_RS, "\n", 0, set_RS }, +{&IGNORECASE_node, "IGNORECASE", Node_IGNORECASE, 0, 0, set_IGNORECASE }, +{&FILENAME_node, "FILENAME", Node_var, "-", 0, 0 }, +{&OFS_node, "OFS", Node_OFS, " ", 0, set_OFS }, +{&ORS_node, "ORS", Node_ORS, "\n", 0, set_ORS }, +{&OFMT_node, "OFMT", Node_OFMT, "%.6g", 0, set_OFMT }, +{&CONVFMT_node, "CONVFMT", Node_CONVFMT, "%.6g", 0, set_CONVFMT }, +{&RLENGTH_node, "RLENGTH", Node_var, 0, 0, 0 }, +{&RSTART_node, "RSTART", Node_var, 0, 0, 0 }, +{&SUBSEP_node, "SUBSEP", Node_var, "\034", 0, 0 }, +{&ARGIND_node, "ARGIND", Node_var, 0, 0, 0 }, +{&ERRNO_node, "ERRNO", Node_var, 0, 0, 0 }, +{0, 0, Node_illegal, 0, 0, 0 }, +}; + +static void +init_vars() +{ + register struct varinit *vp; + + for (vp = varinit; vp->name; vp++) { + *(vp->spec) = install(vp->name, + node(vp->strval == 0 ? make_number(vp->numval) + : make_string(vp->strval, strlen(vp->strval)), + vp->type, (NODE *) NULL)); + if (vp->assign) + (*(vp->assign))(); + } +} + +void +load_environ() +{ +#if !defined(MSDOS) && !(defined(VMS) && defined(__DECC)) + extern char **environ; +#endif + register char *var, *val; + NODE **aptr; + register int i; + + ENVIRON_node = install("ENVIRON", + node(Nnull_string, Node_var, (NODE *) NULL)); + for (i = 0; environ[i]; i++) { + static char nullstr[] = ""; + + var = environ[i]; + val = strchr(var, '='); + if (val) + *val++ = '\0'; + else + val = nullstr; + aptr = assoc_lookup(ENVIRON_node, tmp_string(var, strlen (var))); + *aptr = make_string(val, strlen (val)); + (*aptr)->flags |= MAYBE_NUM; + + /* restore '=' so that system() gets a valid environment */ + if (val != nullstr) + *--val = '='; + } +} + +/* Process a command-line assignment */ +char * +arg_assign(arg) +char *arg; +{ + char *cp; + Func_ptr after_assign = NULL; + NODE *var; + NODE *it; + NODE **lhs; + + cp = strchr(arg, '='); + if (cp != NULL) { + *cp++ = '\0'; + /* + * Recent versions of nawk expand escapes inside assignments. + * This makes sense, so we do it too. + */ + it = make_str_node(cp, strlen(cp), SCAN); + it->flags |= MAYBE_NUM; + var = variable(arg, 0); + lhs = get_lhs(var, &after_assign); + unref(*lhs); + *lhs = it; + if (after_assign) + (*after_assign)(); + *--cp = '='; /* restore original text of ARGV */ + } + return cp; +} + +static void +pre_assign(v) +char *v; +{ + if (!arg_assign(v)) { + fprintf (stderr, + "%s: '%s' argument to -v not in 'var=value' form\n", + myname, v); + usage(1); + } +} + +SIGTYPE +catchsig(sig, code) +int sig, code; +{ +#ifdef lint + code = 0; sig = code; code = sig; +#endif + if (sig == SIGFPE) { + fatal("floating point exception"); + } else if (sig == SIGSEGV +#ifdef SIGBUS + || sig == SIGBUS +#endif + ) { + msg("fatal error: internal error"); + /* fatal won't abort() if not compiled for debugging */ + abort(); + } else + cant_happen(); + /* NOTREACHED */ +} + +/* gawk_option --- do gawk specific things */ + +static void +gawk_option(optstr) +char *optstr; +{ + char *cp; + + for (cp = optstr; *cp; cp++) { + switch (*cp) { + case ' ': + case '\t': + case ',': + break; + case 'v': + case 'V': + /* print version */ + if (strncasecmp(cp, "version", 7) != 0) + goto unknown; + else + cp += 6; + version(); + break; + case 'c': + case 'C': + if (strncasecmp(cp, "copyright", 9) == 0) { + cp += 8; + copyleft(); + } else if (strncasecmp(cp, "copyleft", 8) == 0) { + cp += 7; + copyleft(); + } else if (strncasecmp(cp, "compat", 6) == 0) { + cp += 5; + do_unix = 1; + } else + goto unknown; + break; + case 'n': + case 'N': + /* + * Undocumented feature, + * inspired by nostalgia, and a T-shirt + */ + if (strncasecmp(cp, "nostalgia", 9) != 0) + goto unknown; + nostalgia(); + break; + case 'p': + case 'P': +#ifdef DEBUG + if (strncasecmp(cp, "parsedebug", 10) == 0) { + cp += 9; + yydebug = 2; + break; + } +#endif + if (strncasecmp(cp, "posix", 5) != 0) + goto unknown; + cp += 4; + do_posix = do_unix = 1; + break; + case 'l': + case 'L': + if (strncasecmp(cp, "lint", 4) != 0) + goto unknown; + cp += 3; + do_lint = 1; + break; + case 'H': + case 'h': + if (strncasecmp(cp, "help", 4) != 0) + goto unknown; + cp += 3; + usage(0); + break; + case 'U': + case 'u': + if (strncasecmp(cp, "usage", 5) != 0) + goto unknown; + cp += 4; + usage(0); + break; + case 's': + case 'S': + if (strncasecmp(cp, "source=", 7) != 0) + goto unknown; + cp += 7; + if (strlen(cp) == 0) + warning("empty argument to -Wsource ignored"); + else { + srcfiles[++numfiles].stype = CMDLINE; + srcfiles[numfiles].val = cp; + return; + } + break; + default: + unknown: + fprintf(stderr, "'%c' -- unknown option, ignored\n", + *cp); + break; + } + } +} + +/* nostalgia --- print the famous error message and die */ + +static void +nostalgia() +{ + fprintf(stderr, "awk: bailing out near line 1\n"); + abort(); +} + +/* version --- print version message */ + +static void +version() +{ + fprintf(stderr, "%s, patchlevel %d\n", version_string, PATCHLEVEL); +} + +/* static */ +char * +gawk_name(filespec) +char *filespec; +{ + char *p; + +#ifdef VMS /* "device:[root.][directory.subdir]GAWK.EXE;n" -> "GAWK" */ + char *q; + + p = strrchr(filespec, ']'); /* directory punctuation */ + q = strrchr(filespec, '>'); /* alternate <international> punct */ + + if (p == NULL || q > p) p = q; + p = strdup(p == NULL ? filespec : (p + 1)); + if ((q = strrchr(p, '.')) != NULL) *q = '\0'; /* strip .typ;vers */ + + return p; +#endif /*VMS*/ + +#if defined(MSDOS) || defined(atarist) + char *q; + + p = filespec; + + if (q = strrchr(p, '\\')) + p = q + 1; + if (q = strchr(p, '.')) + *q = '\0'; + strlwr(p); + + return (p == NULL ? filespec : p); +#endif /* MSDOS || atarist */ + + /* "path/name" -> "name" */ + p = strrchr(filespec, '/'); + return (p == NULL ? filespec : p + 1); +} |