diff options
Diffstat (limited to 'contrib/awk/io.c')
-rw-r--r-- | contrib/awk/io.c | 1941 |
1 files changed, 1941 insertions, 0 deletions
diff --git a/contrib/awk/io.c b/contrib/awk/io.c new file mode 100644 index 0000000..74d9a8d --- /dev/null +++ b/contrib/awk/io.c @@ -0,0 +1,1941 @@ +/* + * io.c --- routines for dealing with input and output and records + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991-1997 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Programming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include "awk.h" +#undef HAVE_MMAP /* for now, probably forever */ + +#ifdef HAVE_SYS_PARAM_H +#undef RE_DUP_MAX /* avoid spurious conflict w/regex.h */ +#include <sys/param.h> +#endif /* HAVE_SYS_PARAM_H */ + +#ifdef HAVE_SYS_WAIT_H +#include <sys/wait.h> +#endif /* HAVE_SYS_WAIT_H */ + +#ifdef HAVE_MMAP +#include <sys/mman.h> +#ifndef MAP_FAILED +#define MAP_FAILED ((caddr_t) -1) +#endif /* ! defined (MAP_FAILED) */ +#endif /* HAVE_MMAP */ + +#ifndef O_RDONLY +#include <fcntl.h> +#endif +#ifndef O_ACCMODE +#define O_ACCMODE (O_RDONLY|O_WRONLY|O_RDWR) +#endif + +#include <assert.h> + +#if ! defined(S_ISREG) && defined(S_IFREG) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif + +#if ! defined(S_ISDIR) && defined(S_IFDIR) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif + +#ifndef ENFILE +#define ENFILE EMFILE +#endif + +#ifdef atarist +#include <stddef.h> +#endif + +#if defined(MSDOS) || defined(OS2) || defined(WIN32) +#define PIPES_SIMULATED +#endif + +static IOBUF *nextfile P((int skipping)); +static int inrec P((IOBUF *iop)); +static int iop_close P((IOBUF *iop)); +struct redirect *redirect P((NODE *tree, int *errflg)); +static void close_one P((void)); +static int close_redir P((struct redirect *rp, int exitwarn)); +#ifndef PIPES_SIMULATED +static int wait_any P((int interesting)); +#endif +static IOBUF *gawk_popen P((char *cmd, struct redirect *rp)); +static IOBUF *iop_open P((const char *file, const char *how, IOBUF *buf)); +static IOBUF *iop_alloc P((int fd, const char *name, IOBUF *buf)); +static int gawk_pclose P((struct redirect *rp)); +static int do_pathopen P((const char *file)); +static int get_a_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode)); +#ifdef HAVE_MMAP +static int mmap_get_record P((char **out, IOBUF *iop, int rs, Regexp *RSre, int *errcode)); +#endif /* HAVE_MMAP */ +static int str2mode P((const char *mode)); +static void spec_setup P((IOBUF *iop, int len, int allocate)); +static int specfdopen P((IOBUF *iop, const char *name, const char *mode)); +static int pidopen P((IOBUF *iop, const char *name, const char *mode)); +static int useropen P((IOBUF *iop, const char *name, const char *mode)); + +#if defined (MSDOS) && !defined (__GO32__) +#include "popen.h" +#define popen(c, m) os_popen(c, m) +#define pclose(f) os_pclose(f) +#else +#if defined (OS2) /* OS/2, but not family mode */ +#if defined (_MSC_VER) +#define popen(c, m) _popen(c, m) +#define pclose(f) _pclose(f) +#endif +#else +extern FILE *popen(); +#endif +#endif + +static struct redirect *red_head = NULL; +static NODE *RS; +static Regexp *RS_regexp; + +int RS_is_null; + +extern int output_is_tty; +extern NODE *ARGC_node; +extern NODE *ARGV_node; +extern NODE *ARGIND_node; +extern NODE *ERRNO_node; +extern NODE **fields_arr; + +static jmp_buf filebuf; /* for do_nextfile() */ + +/* do_nextfile --- implement gawk "nextfile" extension */ + +void +do_nextfile() +{ + (void) nextfile(TRUE); + longjmp(filebuf, 1); +} + +/* nextfile --- move to the next input data file */ + +static IOBUF * +nextfile(skipping) +int skipping; +{ + static long i = 1; + static int files = 0; + NODE *arg; + static IOBUF *curfile = NULL; + static IOBUF mybuf; + const char *fname; + + if (skipping) { + if (curfile != NULL) + iop_close(curfile); + curfile = NULL; + return NULL; + } + if (curfile != NULL) { + if (curfile->cnt == EOF) { + (void) iop_close(curfile); + curfile = NULL; + } else + return curfile; + } + for (; i < (long) (ARGC_node->lnode->numbr); i++) { + arg = *assoc_lookup(ARGV_node, tmp_number((AWKNUM) i)); + if (arg->stlen == 0) + continue; + arg->stptr[arg->stlen] = '\0'; + if (! do_traditional) { + unref(ARGIND_node->var_value); + ARGIND_node->var_value = make_number((AWKNUM) i); + } + if (! arg_assign(arg->stptr)) { + files++; + fname = arg->stptr; + curfile = iop_open(fname, "r", &mybuf); + if (curfile == NULL) + goto give_up; + curfile->flag |= IOP_NOFREE_OBJ; + /* This is a kludge. */ + unref(FILENAME_node->var_value); + FILENAME_node->var_value = dupnode(arg); + FNR = 0; + i++; + break; + } + } + if (files == 0) { + files++; + /* no args. -- use stdin */ + /* FNR is init'ed to 0 */ + FILENAME_node->var_value = make_string("-", 1); + fname = "-"; + curfile = iop_open(fname, "r", &mybuf); + if (curfile == NULL) + goto give_up; + curfile->flag |= IOP_NOFREE_OBJ; + } + return curfile; + + give_up: + fatal("cannot open file `%s' for reading (%s)", + fname, strerror(errno)); + /* NOTREACHED */ + return 0; +} + +/* set_FNR --- update internal FNR from awk variable */ + +void +set_FNR() +{ + FNR = (long) FNR_node->var_value->numbr; +} + +/* set_NR --- update internal NR from awk variable */ + +void +set_NR() +{ + NR = (long) NR_node->var_value->numbr; +} + +/* inrec --- This reads in a record from the input file */ + +static int +inrec(iop) +IOBUF *iop; +{ + char *begin; + register int cnt; + int retval = 0; + + if ((cnt = iop->cnt) != EOF) + cnt = (*(iop->getrec)) + (&begin, iop, RS->stptr[0], RS_regexp, NULL); + if (cnt == EOF) { + cnt = 0; + retval = 1; + } else { + NR += 1; + FNR += 1; + set_record(begin, cnt, TRUE); + } + + return retval; +} + +/* iop_close --- close an open IOP */ + +static int +iop_close(iop) +IOBUF *iop; +{ + int ret; + + if (iop == NULL) + return 0; + errno = 0; + +#ifdef _CRAY + /* Work around bug in UNICOS popen */ + if (iop->fd < 3) + ret = 0; + else +#endif + /* save these for re-use; don't free the storage */ + if ((iop->flag & IOP_IS_INTERNAL) != 0) { + iop->off = iop->buf; + iop->end = iop->buf + strlen(iop->buf); + iop->cnt = 0; + iop->secsiz = 0; + return 0; + } + + /* Don't close standard files or else crufty code elsewhere will lose */ + if (iop->fd == fileno(stdin) + || iop->fd == fileno(stdout) + || iop->fd == fileno(stderr) + || (iop->flag & IOP_MMAPPED) != 0) + ret = 0; + else + ret = close(iop->fd); + + if (ret == -1) + warning("close of fd %d (`%s') failed (%s)", iop->fd, + iop->name, strerror(errno)); + if ((iop->flag & IOP_NO_FREE) == 0) { + /* + * Be careful -- $0 may still reference the buffer even though + * an explicit close is being done; in the future, maybe we + * can do this a bit better. + */ + if (iop->buf) { + if ((fields_arr[0]->stptr >= iop->buf) + && (fields_arr[0]->stptr < (iop->buf + iop->secsiz + iop->size))) { + NODE *t; + + t = make_string(fields_arr[0]->stptr, + fields_arr[0]->stlen); + unref(fields_arr[0]); + fields_arr[0] = t; + reset_record(); + } + if ((iop->flag & IOP_MMAPPED) == 0) + free(iop->buf); +#ifdef HAVE_MMAP + else + (void) munmap(iop->buf, iop->size); +#endif + } + if ((iop->flag & IOP_NOFREE_OBJ) == 0) + free((char *) iop); + } + return ret == -1 ? 1 : 0; +} + +/* do_input --- the main input processing loop */ + +void +do_input() +{ + IOBUF *iop; + extern int exiting; + + (void) setjmp(filebuf); /* for `nextfile' */ + + while ((iop = nextfile(FALSE)) != NULL) { + if (inrec(iop) == 0) + while (interpret(expression_value) && inrec(iop) == 0) + continue; +#ifdef C_ALLOCA + /* recover any space from C based alloca */ + (void) alloca(0); +#endif + if (exiting) + break; + } +} + +/* redirect --- Redirection for printf and print commands */ + +struct redirect * +redirect(tree, errflg) +NODE *tree; +int *errflg; +{ + register NODE *tmp; + register struct redirect *rp; + register char *str; + int tflag = 0; + int outflag = 0; + const char *direction = "to"; + const char *mode; + int fd; + const char *what = NULL; + + switch (tree->type) { + case Node_redirect_append: + tflag = RED_APPEND; + /* FALL THROUGH */ + case Node_redirect_output: + outflag = (RED_FILE|RED_WRITE); + tflag |= outflag; + if (tree->type == Node_redirect_output) + what = ">"; + else + what = ">>"; + break; + case Node_redirect_pipe: + tflag = (RED_PIPE|RED_WRITE); + what = "|"; + break; + case Node_redirect_pipein: + tflag = (RED_PIPE|RED_READ); + what = "|"; + break; + case Node_redirect_input: + tflag = (RED_FILE|RED_READ); + what = "<"; + break; + default: + fatal("invalid tree type %d in redirect()", tree->type); + break; + } + tmp = tree_eval(tree->subnode); + if (do_lint && (tmp->flags & STR) == 0) + warning("expression in `%s' redirection only has numeric value", + what); + tmp = force_string(tmp); + str = tmp->stptr; + + if (str == NULL || *str == '\0') + fatal("expression for `%s' redirection has null string value", + what); + + if (do_lint + && (STREQN(str, "0", tmp->stlen) || STREQN(str, "1", tmp->stlen))) + warning("filename `%s' for `%s' redirection may be result of logical expression", str, what); + for (rp = red_head; rp != NULL; rp = rp->next) + if (strlen(rp->value) == tmp->stlen + && STREQN(rp->value, str, tmp->stlen) + && ((rp->flag & ~(RED_NOBUF|RED_EOF)) == tflag + || (outflag != 0 + && (rp->flag & (RED_FILE|RED_WRITE)) == outflag))) + break; + if (rp == NULL) { + emalloc(rp, struct redirect *, sizeof(struct redirect), + "redirect"); + emalloc(str, char *, tmp->stlen+1, "redirect"); + memcpy(str, tmp->stptr, tmp->stlen); + str[tmp->stlen] = '\0'; + rp->value = str; + rp->flag = tflag; + rp->fp = NULL; + rp->iop = NULL; + rp->pid = 0; /* unlikely that we're worried about init */ + rp->status = 0; + /* maintain list in most-recently-used first order */ + if (red_head != NULL) + red_head->prev = rp; + rp->prev = NULL; + rp->next = red_head; + red_head = rp; + } else + str = rp->value; /* get \0 terminated string */ + while (rp->fp == NULL && rp->iop == NULL) { + if (rp->flag & RED_EOF) + /* + * encountered EOF on file or pipe -- must be cleared + * by explicit close() before reading more + */ + return rp; + mode = NULL; + errno = 0; + switch (tree->type) { + case Node_redirect_output: + mode = "w"; + if ((rp->flag & RED_USED) != 0) + mode = "a"; + break; + case Node_redirect_append: + mode = "a"; + break; + case Node_redirect_pipe: + /* synchronize output before new pipe */ + (void) flush_io(); + + if ((rp->fp = popen(str, "w")) == NULL) + fatal("can't open pipe (\"%s\") for output (%s)", + str, strerror(errno)); + rp->flag |= RED_NOBUF; + break; + case Node_redirect_pipein: + direction = "from"; + if (gawk_popen(str, rp) == NULL) + fatal("can't open pipe (\"%s\") for input (%s)", + str, strerror(errno)); + break; + case Node_redirect_input: + direction = "from"; + rp->iop = iop_open(str, "r", NULL); + break; + default: + cant_happen(); + } + if (mode != NULL) { + errno = 0; + fd = devopen(str, mode); + if (fd > INVALID_HANDLE) { + if (fd == fileno(stdin)) + rp->fp = stdin; + else if (fd == fileno(stdout)) + rp->fp = stdout; + else if (fd == fileno(stderr)) + rp->fp = stderr; + else { + rp->fp = fdopen(fd, (char *) mode); + /* don't leak file descriptors */ + if (rp->fp == NULL) + close(fd); + } + if (rp->fp != NULL && isatty(fd)) + rp->flag |= RED_NOBUF; + } + } + if (rp->fp == NULL && rp->iop == NULL) { + /* too many files open -- close one and try again */ + if (errno == EMFILE || errno == ENFILE) + close_one(); +#ifdef HAVE_MMAP + /* this works for solaris 2.5, not sunos */ + else if (errno == 0) /* HACK! */ + close_one(); +#endif + else { + /* + * Some other reason for failure. + * + * On redirection of input from a file, + * just return an error, so e.g. getline + * can return -1. For output to file, + * complain. The shell will complain on + * a bad command to a pipe. + */ + if (errflg != NULL) + *errflg = errno; + if (tree->type == Node_redirect_output + || tree->type == Node_redirect_append) + fatal("can't redirect %s `%s' (%s)", + direction, str, strerror(errno)); + else { + free_temp(tmp); + return NULL; + } + } + } + } + free_temp(tmp); + return rp; +} + +/* getredirect --- find the struct redirect for this file or pipe */ + +struct redirect * +getredirect(str, len) +char *str; +int len; +{ + struct redirect *rp; + + for (rp = red_head; rp != NULL; rp = rp->next) + if (strlen(rp->value) == len && STREQN(rp->value, str, len)) + return rp; + + return NULL; +} + +/* close_one --- temporarily close an open file to re-use the fd */ + +static void +close_one() +{ + register struct redirect *rp; + register struct redirect *rplast = NULL; + + /* go to end of list first, to pick up least recently used entry */ + for (rp = red_head; rp != NULL; rp = rp->next) + rplast = rp; + /* now work back up through the list */ + for (rp = rplast; rp != NULL; rp = rp->prev) + if (rp->fp != NULL && (rp->flag & RED_FILE) != 0) { + rp->flag |= RED_USED; + errno = 0; + if (/* do_lint && */ fclose(rp->fp) != 0) + warning("close of \"%s\" failed (%s).", + rp->value, strerror(errno)); + rp->fp = NULL; + break; + } + if (rp == NULL) + /* surely this is the only reason ??? */ + fatal("too many pipes or input files open"); +} + +/* do_close --- completely close an open file or pipe */ + +NODE * +do_close(tree) +NODE *tree; +{ + NODE *tmp; + register struct redirect *rp; + + tmp = force_string(tree_eval(tree->subnode)); + + /* icky special case: close(FILENAME) called. */ + if (tree->subnode == FILENAME_node + || (tmp->stlen == FILENAME_node->var_value->stlen + && STREQN(tmp->stptr, FILENAME_node->var_value->stptr, tmp->stlen))) { + (void) nextfile(TRUE); + free_temp(tmp); + return tmp_number((AWKNUM) 0.0); + } + + for (rp = red_head; rp != NULL; rp = rp->next) { + if (strlen(rp->value) == tmp->stlen + && STREQN(rp->value, tmp->stptr, tmp->stlen)) + break; + } + if (rp == NULL) { /* no match */ + if (do_lint) + warning("close: `%.*s' is not an open file or pipe", + tmp->stlen, tmp->stptr); + free_temp(tmp); + return tmp_number((AWKNUM) 0.0); + } + free_temp(tmp); + fflush(stdout); /* synchronize regular output */ + tmp = tmp_number((AWKNUM) close_redir(rp, FALSE)); + rp = NULL; + return tmp; +} + +/* close_redir --- close an open file or pipe */ + +static int +close_redir(rp, exitwarn) +register struct redirect *rp; +int exitwarn; +{ + int status = 0; + char *what; + + if (rp == NULL) + return 0; + if (rp->fp == stdout || rp->fp == stderr) + return 0; + errno = 0; + if ((rp->flag & (RED_PIPE|RED_WRITE)) == (RED_PIPE|RED_WRITE)) + status = pclose(rp->fp); + else if (rp->fp != NULL) + status = fclose(rp->fp); + else if (rp->iop != NULL) { + if ((rp->flag & RED_PIPE) != 0) + status = gawk_pclose(rp); + else { + status = iop_close(rp->iop); + rp->iop = NULL; + } + } + + what = ((rp->flag & RED_PIPE) != 0) ? "pipe" : "file"; + + if (exitwarn) + warning("no explicit close of %s `%s' provided", + what, rp->value); + + /* SVR4 awk checks and warns about status of close */ + if (status != 0) { + char *s = strerror(errno); + + /* + * Too many people have complained about this. + * As of 2.15.6, it is now under lint control. + */ + if (do_lint) + warning("failure status (%d) on %s close of \"%s\" (%s)", + status, what, rp->value, s); + + if (! do_traditional) { + /* set ERRNO too so that program can get at it */ + unref(ERRNO_node->var_value); + ERRNO_node->var_value = make_string(s, strlen(s)); + } + } + if (rp->next != NULL) + rp->next->prev = rp->prev; + if (rp->prev != NULL) + rp->prev->next = rp->next; + else + red_head = rp->next; + free(rp->value); + free((char *) rp); + return status; +} + +/* flush_io --- flush all open output files */ + +int +flush_io() +{ + register struct redirect *rp; + int status = 0; + + errno = 0; + if (fflush(stdout)) { + warning("error writing standard output (%s)", strerror(errno)); + status++; + } + if (fflush(stderr)) { + warning("error writing standard error (%s)", strerror(errno)); + status++; + } + for (rp = red_head; rp != NULL; rp = rp->next) + /* flush both files and pipes, what the heck */ + if ((rp->flag & RED_WRITE) && rp->fp != NULL) { + if (fflush(rp->fp)) { + warning("%s flush of \"%s\" failed (%s).", + (rp->flag & RED_PIPE) ? "pipe" : + "file", rp->value, strerror(errno)); + status++; + } + } + return status; +} + +/* close_io --- close all open files, called when exiting */ + +int +close_io() +{ + register struct redirect *rp; + register struct redirect *next; + int status = 0; + + errno = 0; + for (rp = red_head; rp != NULL; rp = next) { + next = rp->next; + /* + * close_redir() will print a message if needed + * if do_lint, warn about lack of explicit close + */ + if (close_redir(rp, do_lint)) + status++; + rp = NULL; + } + /* + * Some of the non-Unix os's have problems doing an fclose + * on stdout and stderr. Since we don't really need to close + * them, we just flush them, and do that across the board. + */ + if (fflush(stdout)) { + warning("error writing standard output (%s)", strerror(errno)); + status++; + } + if (fflush(stderr)) { + warning("error writing standard error (%s)", strerror(errno)); + status++; + } + return status; +} + +/* str2mode --- convert a string mode to an integer mode */ + +static int +str2mode(mode) +const char *mode; +{ + int ret; + + switch(mode[0]) { + case 'r': + ret = O_RDONLY; + break; + + case 'w': + ret = O_WRONLY|O_CREAT|O_TRUNC; + break; + + case 'a': + ret = O_WRONLY|O_APPEND|O_CREAT; + break; + + default: + ret = 0; /* lint */ + cant_happen(); + } + return ret; +} + +/* devopen --- handle /dev/std{in,out,err}, /dev/fd/N, regular files */ + +/* + * This separate version is still needed for output, since file and pipe + * output is done with stdio. iop_open() handles input with IOBUFs of + * more "special" files. Those files are not handled here since it makes + * no sense to use them for output. + */ + +int +devopen(name, mode) +const char *name, *mode; +{ + int openfd; + const char *cp; + char *ptr; + int flag = 0; + struct stat buf; + extern double strtod(); + + flag = str2mode(mode); + + if (STREQ(name, "-")) + openfd = fileno(stdin); + else + openfd = INVALID_HANDLE; + + if (do_traditional) + goto strictopen; + + if ((openfd = os_devopen(name, flag)) >= 0) + return openfd; + + if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) { + cp = name + 5; + + if (STREQ(cp, "stdin") && (flag & O_ACCMODE) == O_RDONLY) + openfd = fileno(stdin); + else if (STREQ(cp, "stdout") && (flag & O_ACCMODE) == O_WRONLY) + openfd = fileno(stdout); + else if (STREQ(cp, "stderr") && (flag & O_ACCMODE) == O_WRONLY) + openfd = fileno(stderr); + else if (STREQN(cp, "fd/", 3)) { + cp += 3; + openfd = (int) strtod(cp, &ptr); + if (openfd <= INVALID_HANDLE || ptr == cp) + openfd = INVALID_HANDLE; + } + } + +strictopen: + if (openfd == INVALID_HANDLE) + openfd = open(name, flag, 0666); + if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0) + if (S_ISDIR(buf.st_mode)) + fatal("file `%s' is a directory", name); + return openfd; +} + + +/* spec_setup --- setup an IOBUF for a special internal file */ + +static void +spec_setup(iop, len, allocate) +IOBUF *iop; +int len; +int allocate; +{ + char *cp; + + if (allocate) { + emalloc(cp, char *, len+2, "spec_setup"); + iop->buf = cp; + } else { + len = strlen(iop->buf); + iop->buf[len++] = '\n'; /* get_a_record clobbered it */ + iop->buf[len] = '\0'; /* just in case */ + } + iop->off = iop->buf; + iop->cnt = 0; + iop->secsiz = 0; + iop->size = len; + iop->end = iop->buf + len; + iop->fd = -1; + iop->flag = IOP_IS_INTERNAL; + iop->getrec = get_a_record; +} + +/* specfdopen --- open an fd special file */ + +static int +specfdopen(iop, name, mode) +IOBUF *iop; +const char *name, *mode; +{ + int fd; + IOBUF *tp; + + fd = devopen(name, mode); + if (fd == INVALID_HANDLE) + return INVALID_HANDLE; + tp = iop_alloc(fd, name, NULL); + if (tp == NULL) { + /* don't leak fd's */ + close(fd); + return INVALID_HANDLE; + } + *iop = *tp; + iop->flag |= IOP_NO_FREE; + free(tp); + return 0; +} + +#ifdef GETPGRP_VOID +#define getpgrp_arg() /* nothing */ +#else +#define getpgrp_arg() getpid() +#endif + +/* pidopen --- "open" /dev/pid, /dev/ppid, and /dev/pgrpid */ + +static int +pidopen(iop, name, mode) +IOBUF *iop; +const char *name, *mode; +{ + char tbuf[BUFSIZ]; + int i; + + if (name[6] == 'g') + sprintf(tbuf, "%d\n", getpgrp(getpgrp_arg())); + else if (name[6] == 'i') + sprintf(tbuf, "%d\n", getpid()); + else + sprintf(tbuf, "%d\n", getppid()); + i = strlen(tbuf); + spec_setup(iop, i, TRUE); + strcpy(iop->buf, tbuf); + return 0; +} + +/* useropen --- "open" /dev/user */ + +/* + * /dev/user creates a record as follows: + * $1 = getuid() + * $2 = geteuid() + * $3 = getgid() + * $4 = getegid() + * If multiple groups are supported, then $5 through $NF are the + * supplementary group set. + */ + +static int +useropen(iop, name, mode) +IOBUF *iop; +const char *name, *mode; +{ + char tbuf[BUFSIZ], *cp; + int i; +#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 + GETGROUPS_T groupset[NGROUPS_MAX]; + int ngroups; +#endif + + sprintf(tbuf, "%d %d %d %d", getuid(), geteuid(), getgid(), getegid()); + + cp = tbuf + strlen(tbuf); +#if defined(NGROUPS_MAX) && NGROUPS_MAX > 0 + ngroups = getgroups(NGROUPS_MAX, groupset); + if (ngroups == -1) + fatal("could not find groups: %s", strerror(errno)); + + for (i = 0; i < ngroups; i++) { + *cp++ = ' '; + sprintf(cp, "%d", (int) groupset[i]); + cp += strlen(cp); + } +#endif + *cp++ = '\n'; + *cp++ = '\0'; + + i = strlen(tbuf); + spec_setup(iop, i, TRUE); + strcpy(iop->buf, tbuf); + return 0; +} + +/* iop_open --- handle special and regular files for input */ + +static IOBUF * +iop_open(name, mode, iop) +const char *name, *mode; +IOBUF *iop; +{ + int openfd = INVALID_HANDLE; + int flag = 0; + struct stat buf; + static struct internal { + const char *name; + int compare; + int (*fp) P((IOBUF *, const char *, const char *)); + IOBUF iob; + } table[] = { + { "/dev/fd/", 8, specfdopen }, + { "/dev/stdin", 10, specfdopen }, + { "/dev/stdout", 11, specfdopen }, + { "/dev/stderr", 11, specfdopen }, + { "/dev/pid", 8, pidopen }, + { "/dev/ppid", 9, pidopen }, + { "/dev/pgrpid", 11, pidopen }, + { "/dev/user", 9, useropen }, + }; + int devcount = sizeof(table) / sizeof(table[0]); + + flag = str2mode(mode); + + /* + * FIXME: remove the stat call, and always process these files + * internally. + */ + if (STREQ(name, "-")) + openfd = fileno(stdin); + else if (do_traditional) + goto strictopen; + else if (STREQN(name, "/dev/", 5) && stat((char *) name, &buf) == -1) { + int i; + + for (i = 0; i < devcount; i++) { + if (STREQN(name, table[i].name, table[i].compare)) { + iop = & table[i].iob; + + if (iop->buf != NULL) { + spec_setup(iop, 0, FALSE); + return iop; + } else if ((*table[i].fp)(iop, name, mode) == 0) + return iop; + else { + warning("could not open %s, mode `%s'", + name, mode); + return NULL; + } + } + } + } + +strictopen: + if (openfd == INVALID_HANDLE) + openfd = open(name, flag, 0666); + if (openfd != INVALID_HANDLE && fstat(openfd, &buf) > 0) + if ((buf.st_mode & S_IFMT) == S_IFDIR) + fatal("file `%s' is a directory", name); + return iop_alloc(openfd, name, iop); +} + +#ifndef PIPES_SIMULATED /* real pipes */ + +/* wait_any --- wait for a child process, close associated pipe */ + +static int +wait_any(interesting) +int interesting; /* pid of interest, if any */ +{ + RETSIGTYPE (*hstat)(), (*istat)(), (*qstat)(); + int pid; + int status = 0; + struct redirect *redp; + extern int errno; + + hstat = signal(SIGHUP, SIG_IGN); + istat = signal(SIGINT, SIG_IGN); + qstat = signal(SIGQUIT, SIG_IGN); + for (;;) { +#ifdef HAVE_SYS_WAIT_H /* Posix compatible sys/wait.h */ + pid = wait(&status); +#else + pid = wait((union wait *)&status); +#endif /* NeXT */ + if (interesting && pid == interesting) { + break; + } else if (pid != -1) { + for (redp = red_head; redp != NULL; redp = redp->next) + if (pid == redp->pid) { + redp->pid = -1; + redp->status = status; + break; + } + } + if (pid == -1 && errno == ECHILD) + break; + } + signal(SIGHUP, hstat); + signal(SIGINT, istat); + signal(SIGQUIT, qstat); + return(status); +} + +/* gawk_popen --- open an IOBUF on a child process */ + +static IOBUF * +gawk_popen(cmd, rp) +char *cmd; +struct redirect *rp; +{ + int p[2]; + register int pid; + + /* + * used to wait for any children to synchronize input and output, + * but this could cause gawk to hang when it is started in a pipeline + * and thus has a child process feeding it input (shell dependant) + */ + /*(void) wait_any(0);*/ /* wait for outstanding processes */ + + if (pipe(p) < 0) + fatal("cannot open pipe \"%s\" (%s)", cmd, strerror(errno)); + if ((pid = fork()) == 0) { + if (close(1) == -1) + fatal("close of stdout in child failed (%s)", + strerror(errno)); + if (dup(p[1]) != 1) + fatal("dup of pipe failed (%s)", strerror(errno)); + if (close(p[0]) == -1 || close(p[1]) == -1) + fatal("close of pipe failed (%s)", strerror(errno)); + execl("/bin/sh", "sh", "-c", cmd, NULL); + _exit(127); + } + if (pid == -1) + fatal("cannot fork for \"%s\" (%s)", cmd, strerror(errno)); + rp->pid = pid; + if (close(p[1]) == -1) + fatal("close of pipe failed (%s)", strerror(errno)); + rp->iop = iop_alloc(p[0], cmd, NULL); + if (rp->iop == NULL) + (void) close(p[0]); + return (rp->iop); +} + +/* gawk_pclose --- close an open child pipe */ + +static int +gawk_pclose(rp) +struct redirect *rp; +{ + (void) iop_close(rp->iop); + rp->iop = NULL; + + /* process previously found, return stored status */ + if (rp->pid == -1) + return (rp->status >> 8) & 0xFF; + rp->status = wait_any(rp->pid); + rp->pid = -1; + return (rp->status >> 8) & 0xFF; +} + +#else /* PIPES_SIMULATED */ + +/* + * use temporary file rather than pipe + * except if popen() provides real pipes too + */ + +#if defined(VMS) || defined(OS2) || defined (MSDOS) + +/* gawk_popen --- open an IOBUF on a child process */ + +static IOBUF * +gawk_popen(cmd, rp) +char *cmd; +struct redirect *rp; +{ + FILE *current; + + if ((current = popen(cmd, "r")) == NULL) + return NULL; + rp->iop = iop_alloc(fileno(current), cmd, NULL); + if (rp->iop == NULL) { + (void) fclose(current); + current = NULL; + } + rp->ifp = current; + return (rp->iop); +} + +/* gawk_pclose --- close an open child pipe */ + +static int +gawk_pclose(rp) +struct redirect *rp; +{ + int rval, aval, fd = rp->iop->fd; + + rp->iop->fd = dup(fd); /* kludge to allow close() + pclose() */ + rval = iop_close(rp->iop); + rp->iop = NULL; + aval = pclose(rp->ifp); + rp->ifp = NULL; + return (rval < 0 ? rval : aval); +} +#else /* not (VMS || OS2 || MSDOS) */ + +static struct pipeinfo { + char *command; + char *name; +} pipes[_NFILE]; + +/* gawk_popen --- open an IOBUF on a child process */ + +static IOBUF * +gawk_popen(cmd, rp) +char *cmd; +struct redirect *rp; +{ + extern char *strdup P((const char *)); + int current; + char *name; + static char cmdbuf[256]; + + /* get a name to use */ + if ((name = tempnam(".", "pip")) == NULL) + return NULL; + sprintf(cmdbuf, "%s > %s", cmd, name); + system(cmdbuf); + if ((current = open(name, O_RDONLY)) == INVALID_HANDLE) + return NULL; + pipes[current].name = name; + pipes[current].command = strdup(cmd); + rp->iop = iop_alloc(current, name, NULL); + if (rp->iop == NULL) + (void) close(current); + return (rp->iop); +} + +/* gawk_pclose --- close an open child pipe */ + +static int +gawk_pclose(rp) +struct redirect *rp; +{ + int cur = rp->iop->fd; + int rval; + + rval = iop_close(rp->iop); + rp->iop = NULL; + + /* check for an open file */ + if (pipes[cur].name == NULL) + return -1; + unlink(pipes[cur].name); + free(pipes[cur].name); + pipes[cur].name = NULL; + free(pipes[cur].command); + return rval; +} +#endif /* not (VMS || OS2 || MSDOS) */ + +#endif /* PIPES_SIMULATED */ + +/* do_getline --- read in a line, into var and with redirection, as needed */ + +NODE * +do_getline(tree) +NODE *tree; +{ + struct redirect *rp = NULL; + IOBUF *iop; + int cnt = EOF; + char *s = NULL; + int errcode; + + while (cnt == EOF) { + if (tree->rnode == NULL) { /* no redirection */ + iop = nextfile(FALSE); + if (iop == NULL) /* end of input */ + return tmp_number((AWKNUM) 0.0); + } else { + int redir_error = 0; + + rp = redirect(tree->rnode, &redir_error); + if (rp == NULL && redir_error) { /* failed redirect */ + if (! do_traditional) { + s = strerror(redir_error); + + unref(ERRNO_node->var_value); + ERRNO_node->var_value = + make_string(s, strlen(s)); + } + return tmp_number((AWKNUM) -1.0); + } + iop = rp->iop; + if (iop == NULL) /* end of input */ + return tmp_number((AWKNUM) 0.0); + } + errcode = 0; + cnt = (*(iop->getrec))(&s, iop, RS->stptr[0], RS_regexp, &errcode); + if (errcode != 0) { + if (! do_traditional) { + s = strerror(errcode); + + unref(ERRNO_node->var_value); + ERRNO_node->var_value = make_string(s, strlen(s)); + } + return tmp_number((AWKNUM) -1.0); + } + if (cnt == EOF) { + if (rp != NULL) { + /* + * Don't do iop_close() here if we are + * reading from a pipe; otherwise + * gawk_pclose will not be called. + */ + if ((rp->flag & RED_PIPE) == 0) { + (void) iop_close(iop); + rp->iop = NULL; + } + rp->flag |= RED_EOF; /* sticky EOF */ + return tmp_number((AWKNUM) 0.0); + } else + continue; /* try another file */ + } + if (rp == NULL) { + NR++; + FNR++; + } + if (tree->lnode == NULL) /* no optional var. */ + set_record(s, cnt, TRUE); + else { /* assignment to variable */ + Func_ptr after_assign = NULL; + NODE **lhs; + + lhs = get_lhs(tree->lnode, &after_assign); + unref(*lhs); + *lhs = make_string(s, cnt); + (*lhs)->flags |= MAYBE_NUM; + /* we may have to regenerate $0 here! */ + if (after_assign != NULL) + (*after_assign)(); + } + } + return tmp_number((AWKNUM) 1.0); +} + +/* pathopen --- pathopen with default file extension handling */ + +int +pathopen(file) +const char *file; +{ + int fd = do_pathopen(file); + +#ifdef DEFAULT_FILETYPE + if (! do_traditional && fd <= INVALID_HANDLE) { + char *file_awk; + int save = errno; +#ifdef VMS + int vms_save = vaxc$errno; +#endif + + /* append ".awk" and try again */ + emalloc(file_awk, char *, strlen(file) + + sizeof(DEFAULT_FILETYPE) + 1, "pathopen"); + sprintf(file_awk, "%s%s", file, DEFAULT_FILETYPE); + fd = do_pathopen(file_awk); + free(file_awk); + if (fd <= INVALID_HANDLE) { + errno = save; +#ifdef VMS + vaxc$errno = vms_save; +#endif + } + } +#endif /*DEFAULT_FILETYPE*/ + + return fd; +} + +/* do_pathopen --- search $AWKPATH for source file */ + +static int +do_pathopen(file) +const char *file; +{ + static const char *savepath = NULL; + static int first = TRUE; + const char *awkpath; + char *cp, trypath[BUFSIZ]; + int fd; + + if (STREQ(file, "-")) + return (0); + + if (do_traditional) + return (devopen(file, "r")); + + if (first) { + first = FALSE; + if ((awkpath = getenv("AWKPATH")) != NULL && *awkpath) + savepath = awkpath; /* used for restarting */ + else + savepath = defpath; + } + awkpath = savepath; + + /* some kind of path name, no search */ + if (ispath(file)) + return (devopen(file, "r")); + + do { + trypath[0] = '\0'; + /* this should take into account limits on size of trypath */ + for (cp = trypath; *awkpath && *awkpath != envsep; ) + *cp++ = *awkpath++; + + if (cp != trypath) { /* nun-null element in path */ + /* add directory punctuation only if needed */ + if (! isdirpunct(*(cp-1))) + *cp++ = '/'; + /* append filename */ + strcpy(cp, file); + } else + strcpy(trypath, file); + if ((fd = devopen(trypath, "r")) > INVALID_HANDLE) + return (fd); + + /* no luck, keep going */ + if(*awkpath == envsep && awkpath[1] != '\0') + awkpath++; /* skip colon */ + } while (*awkpath != '\0'); + /* + * You might have one of the awk paths defined, WITHOUT the current + * working directory in it. Therefore try to open the file in the + * current directory. + */ + return (devopen(file, "r")); +} + +#ifdef TEST +int bufsize = 8192; + +void +fatal(s) +char *s; +{ + printf("%s\n", s); + exit(1); +} +#endif + +/* iop_alloc --- allocate an IOBUF structure for an open fd */ + +static IOBUF * +iop_alloc(fd, name, iop) +int fd; +const char *name; +IOBUF *iop; +{ + struct stat sbuf; + + if (fd == INVALID_HANDLE) + return NULL; + if (iop == NULL) + emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc"); + iop->flag = 0; + if (isatty(fd)) + iop->flag |= IOP_IS_TTY; + iop->size = optimal_bufsize(fd, & sbuf); + if (do_lint && S_ISREG(sbuf.st_mode) && sbuf.st_size == 0) + warning("data file `%s' is empty", name); + iop->secsiz = -2; + errno = 0; + iop->fd = fd; + iop->off = iop->buf = NULL; + iop->cnt = 0; + iop->name = name; + iop->getrec = get_a_record; +#ifdef HAVE_MMAP + if (S_ISREG(sbuf.st_mode) && sbuf.st_size > 0) { + register char *cp; + + iop->buf = iop->off = mmap((caddr_t) 0, sbuf.st_size, + PROT_READ|PROT_WRITE, MAP_PRIVATE, + fd, 0L); + /* cast is for buggy compilers (e.g. DEC OSF/1) */ + if (iop->buf == (caddr_t)MAP_FAILED) { + iop->buf = iop->off = NULL; + goto out; + } + + iop->flag |= IOP_MMAPPED; + iop->size = sbuf.st_size; + iop->secsiz = 0; + iop->end = iop->buf + iop->size; + iop->cnt = sbuf.st_size; + iop->getrec = mmap_get_record; + (void) close(fd); + iop->fd = INVALID_HANDLE; + +#if defined(HAVE_MADVISE) && defined(MADV_SEQUENTIAL) + madvise(iop->buf, iop->size, MADV_SEQUENTIAL); +#endif + /* + * The following is a really gross hack. + * We want to ensure that we have a copy of the input + * data that won't go away, on the off chance that someone + * will truncate the data file we've just mmap'ed. + * So, we go through and touch each page, forcing the + * system to give us a private copy. A page size of 512 + * guarantees this will work, even on the least common + * denominator system (like, oh say, a VAX). + */ + for (cp = iop->buf; cp < iop->end; cp += 512) + *cp = *cp; + } +out: +#endif /* HAVE_MMAP */ + return iop; +} + +/* These macros used by both record reading routines */ +#define set_RT_to_null() \ + (void)(! do_traditional && (unref(RT_node->var_value), \ + RT_node->var_value = Nnull_string)) + +#define set_RT(str, len) \ + (void)(! do_traditional && (unref(RT_node->var_value), \ + RT_node->var_value = make_string(str, len))) + +/* + * get_a_record: + * Get the next record. Uses a "split buffer" where the latter part is + * the normal read buffer and the head part is an "overflow" area that is used + * when a record spans the end of the normal buffer, in which case the first + * part of the record is copied into the overflow area just before the + * normal buffer. Thus, the eventual full record can be returned as a + * contiguous area of memory with a minimum of copying. The overflow area + * is expanded as needed, so that records are unlimited in length. + * We also mark both the end of the buffer and the end of the read() with + * a sentinel character (the current record separator) so that the inside + * loop can run as a single test. + * + * Note that since we know or can compute the end of the read and the end + * of the buffer, the sentinel character does not get in the way of regexp + * based searching, since we simply search up to that character, but not + * including it. + */ + +static int +get_a_record(out, iop, grRS, RSre, errcode) +char **out; /* pointer to pointer to data */ +IOBUF *iop; /* input IOP */ +register int grRS; /* first char in RS->stptr */ +Regexp *RSre; /* regexp for RS */ +int *errcode; /* pointer to error variable */ +{ + register char *bp = iop->off; + char *bufend; + char *start = iop->off; /* beginning of record */ + int rs; + static Regexp *RS_null_re = NULL; + Regexp *rsre = NULL; + int continuing = FALSE, continued = FALSE; /* used for re matching */ + int onecase; + + /* first time through */ + if (RS_null_re == NULL) { + RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE); + if (RS_null_re == NULL) + fatal("internal error: file `%s', line %d\n", + __FILE__, __LINE__); + } + + if (iop->cnt == EOF) { /* previous read hit EOF */ + *out = NULL; + set_RT_to_null(); + return EOF; + } + + if (grRS == FALSE) /* special case: RS == "" */ + rs = '\n'; + else + rs = (char) grRS; + + onecase = (IGNORECASE && isalpha(rs)); + if (onecase) + rs = casetable[rs]; + + /* set up sentinel */ + if (iop->buf) { + bufend = iop->buf + iop->size + iop->secsiz; + *bufend = rs; /* add sentinel to buffer */ + } else + bufend = NULL; + + for (;;) { /* break on end of record, read error or EOF */ +/* buffer mgmt, chunk #1 */ + /* + * Following code is entered on the first call of this routine + * for a new iop, or when we scan to the end of the buffer. + * In the latter case, we copy the current partial record to + * the space preceding the normal read buffer. If necessary, + * we expand this space. This is done so that we can return + * the record as a contiguous area of memory. + */ + if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) { + char *oldbuf = NULL; + char *oldsplit = iop->buf + iop->secsiz; + long len; /* record length so far */ + + len = bp - start; + if (len > iop->secsiz) { + /* expand secondary buffer */ + if (iop->secsiz == -2) + iop->secsiz = 256; + while (len > iop->secsiz) + iop->secsiz *= 2; + oldbuf = iop->buf; + emalloc(iop->buf, char *, + iop->size+iop->secsiz+2, "get_a_record"); + bufend = iop->buf + iop->size + iop->secsiz; + *bufend = rs; + } + if (len > 0) { + char *newsplit = iop->buf + iop->secsiz; + + if (start < oldsplit) { + memcpy(newsplit - len, start, + oldsplit - start); + memcpy(newsplit - (bp - oldsplit), + oldsplit, bp - oldsplit); + } else + memcpy(newsplit - len, start, len); + } + bp = iop->end = iop->off = iop->buf + iop->secsiz; + start = bp - len; + if (oldbuf != NULL) { + free(oldbuf); + oldbuf = NULL; + } + } +/* buffer mgmt, chunk #2 */ + /* + * Following code is entered whenever we have no more data to + * scan. In most cases this will read into the beginning of + * the main buffer, but in some cases (terminal, pipe etc.) + * we may be doing smallish reads into more advanced positions. + */ + if (bp >= iop->end) { + if ((iop->flag & IOP_IS_INTERNAL) != 0) { + iop->cnt = EOF; + break; + } + iop->cnt = read(iop->fd, iop->end, bufend - iop->end); + if (iop->cnt == -1) { + if (! do_traditional && errcode != NULL) { + *errcode = errno; + iop->cnt = EOF; + break; + } else + fatal("error reading input file `%s': %s", + iop->name, strerror(errno)); + } else if (iop->cnt == 0) { + /* + * hit EOF before matching RS, so end + * the record and set RT to "" + */ + iop->cnt = EOF; + /* see comments below about this test */ + if (! continuing) { + set_RT_to_null(); + break; + } + } + if (iop->cnt != EOF) { + iop->end += iop->cnt; + *iop->end = rs; /* reset the sentinel */ + } + } +/* buffers are now setup and filled with data */ +/* search for RS, #1, regexp based, or RS = "" */ + /* + * Attempt to simplify the code a bit. The case where + * RS = "" can also be described by a regexp, RS = "\n\n+". + * The buffer managment and searching code can thus now + * use a common case (the one for regexps) both when RS is + * a regexp, and when RS = "". This particularly benefits + * us for keeping track of how many newlines were matched + * in order to set RT. + */ + if (! do_traditional && RSre != NULL) /* regexp */ + rsre = RSre; + else if (grRS == FALSE) /* RS = "" */ + rsre = RS_null_re; + else + rsre = NULL; + + /* + * Look for regexp match of RS. Non-match conditions are: + * 1. No match at all + * 2. Match of a null string + * 3. Match ends at exact end of buffer + * Number 3 is subtle; we have to add more to the buffer + * in case the match would have extended further into the + * file, since regexp match by definition always matches the + * longest possible match. + * + * It is even more subtle than you might think. Suppose + * the re matches at exactly the end of file. We don't know + * that until we try to add more to the buffer. Thus, we + * set a flag to indicate, that if eof really does happen, + * don't break early. + */ + continuing = FALSE; + if (rsre != NULL) { + again: + /* cases 1 and 2 are simple, just keep going */ + if (research(rsre, start, 0, iop->end - start, TRUE) == -1 + || RESTART(rsre, start) == REEND(rsre, start)) { + bp = iop->end; + continue; + } + /* case 3, regex match at exact end */ + if (start + REEND(rsre, start) >= iop->end) { + if (iop->cnt != EOF) { + bp = iop->end; + continuing = continued = TRUE; + continue; + } + } + /* got a match! */ + /* + * Leading newlines at the beginning of the file + * should be ignored. Whew! + */ + if (grRS == FALSE && RESTART(rsre, start) == 0) { + start += REEND(rsre, start); + goto again; + } + bp = start + RESTART(rsre, start); + set_RT(bp, REEND(rsre, start) - RESTART(rsre, start)); + *bp = '\0'; + iop->off = start + REEND(rsre, start); + break; + } +/* search for RS, #2, RS = <single char> */ + if (onecase) { + while (casetable[(int) *bp++] != rs) + continue; + } else { + while (*bp++ != rs) + continue; + } + set_RT(bp - 1, 1); + + if (bp <= iop->end) + break; + else + bp--; + + if ((iop->flag & IOP_IS_INTERNAL) != 0) + iop->cnt = bp - start; + } + if (iop->cnt == EOF + && (((iop->flag & IOP_IS_INTERNAL) != 0) + || (start == bp && ! continued))) { + *out = NULL; + set_RT_to_null(); + return EOF; + } + + if (do_traditional || rsre == NULL) { + char *bstart; + + bstart = iop->off = bp; + bp--; + if (onecase ? casetable[(int) *bp] != rs : *bp != rs) { + bp++; + bstart = bp; + } + *bp = '\0'; + } else if (grRS == FALSE && iop->cnt == EOF) { + /* + * special case, delete trailing newlines, + * should never be more than one. + */ + while (bp[-1] == '\n') + bp--; + *bp = '\0'; + } + + *out = start; + return bp - start; +} + +#ifdef TEST +int +main(argc, argv) +int argc; +char *argv[]; +{ + IOBUF *iop; + char *out; + int cnt; + char rs[2]; + + rs[0] = '\0'; + if (argc > 1) + bufsize = atoi(argv[1]); + if (argc > 2) + rs[0] = *argv[2]; + iop = iop_alloc(0, "stdin", NULL); + while ((cnt = get_a_record(&out, iop, rs[0], NULL, NULL)) > 0) { + fwrite(out, 1, cnt, stdout); + fwrite(rs, 1, 1, stdout); + } + return 0; +} +#endif + +#ifdef HAVE_MMAP +/* mmap_get_record --- pull a record out of a memory-mapped file */ + +static int +mmap_get_record(out, iop, grRS, RSre, errcode) +char **out; /* pointer to pointer to data */ +IOBUF *iop; /* input IOP */ +register int grRS; /* first char in RS->stptr */ +Regexp *RSre; /* regexp for RS */ +int *errcode; /* pointer to error variable */ +{ + register char *bp = iop->off; + char *start = iop->off; /* beginning of record */ + int rs; + static Regexp *RS_null_re = NULL; + Regexp *rsre = NULL; + int onecase; + register char *end = iop->end; + int cnt; + + /* first time through */ + if (RS_null_re == NULL) { + RS_null_re = make_regexp("\n\n+", 3, TRUE, TRUE); + if (RS_null_re == NULL) + fatal("internal error: file `%s', line %d\n", + __FILE__, __LINE__); + } + + if (iop->off >= iop->end) { /* previous record was last */ + *out = NULL; + set_RT_to_null(); + iop->cnt = EOF; /* tested by higher level code */ + return EOF; + } + + if (grRS == FALSE) /* special case: RS == "" */ + rs = '\n'; + else + rs = (char) grRS; + + onecase = (IGNORECASE && isalpha(rs)); + if (onecase) + rs = casetable[rs]; + + /* if RS = "", skip leading newlines at the front of the file */ + if (grRS == FALSE && iop->off == iop->buf) { + for (bp = iop->off; *bp == '\n'; bp++) + continue; + + if (bp != iop->off) + iop->off = start = bp; + } + + /* + * Regexp based searching. Either RS = "" or RS = <regex> + * See comments in get_a_record. + */ + if (! do_traditional && RSre != NULL) /* regexp */ + rsre = RSre; + else if (grRS == FALSE) /* RS = "" */ + rsre = RS_null_re; + else + rsre = NULL; + + /* + * Look for regexp match of RS. Non-match conditions are: + * 1. No match at all + * 2. Match of a null string + * 3. Match ends at exact end of buffer + * + * #1 means that the record ends the file + * and there is no text that actually matched RS. + * + * #2: is probably like #1. + * + * #3 is simple; since we have the whole file mapped, it's + * the last record in the file. + */ + if (rsre != NULL) { + if (research(rsre, start, 0, iop->end - start, TRUE) == -1 + || RESTART(rsre, start) == REEND(rsre, start)) { + /* no matching text, we have the record */ + *out = start; + iop->off = iop->end; /* all done with the record */ + set_RT_to_null(); + /* special case, don't allow trailing newlines */ + if (grRS == FALSE && *(iop->end - 1) == '\n') + return iop->end - start - 1; + else + return iop->end - start; + + } + /* have a match */ + *out = start; + bp = start + RESTART(rsre, start); + set_RT(bp, REEND(rsre, start) - RESTART(rsre, start)); + *bp = '\0'; + iop->off = start + REEND(rsre, start); + return bp - start; + } + + /* + * RS = "?", i.e., one character based searching. + * + * Alas, we can't just plug the sentinel character in at + * the end of the mmapp'ed file ( *(iop->end) = rs; ). This + * works if we're lucky enough to have a file that does not + * take up all of its last disk block. But if we end up with + * file whose size is an even multiple of the disk block size, + * assigning past the end of it delivers a SIGBUS. So, we have to + * add the extra test in the while loop at the front that looks + * for going past the end of the mapped object. Sigh. + */ + /* search for RS, #2, RS = <single char> */ + if (onecase) { + while (bp < end && casetable[*bp++] != rs) + continue; + } else { + while (bp < end && *bp++ != rs) + continue; + } + cnt = (bp - start) - 1; + if (bp >= iop->end) { + /* at end, may have actually seen rs, or may not */ + if (*(bp-1) == rs) + set_RT(bp - 1, 1); /* real RS seen */ + else { + cnt++; + set_RT_to_null(); + } + } else + set_RT(bp - 1, 1); + + iop->off = bp; + *out = start; + return cnt; +} +#endif /* HAVE_MMAP */ + +/* set_RS --- update things as appropriate when RS is set */ + +void +set_RS() +{ + static NODE *save_rs = NULL; + + if (save_rs && cmp_nodes(RS_node->var_value, save_rs) == 0) + return; + unref(save_rs); + save_rs = dupnode(RS_node->var_value); + RS_is_null = FALSE; + RS = force_string(RS_node->var_value); + if (RS_regexp != NULL) { + refree(RS_regexp); + RS_regexp = NULL; + } + if (RS->stlen == 0) + RS_is_null = TRUE; + else if (RS->stlen > 1) + RS_regexp = make_regexp(RS->stptr, RS->stlen, IGNORECASE, TRUE); + + set_FS_if_not_FIELDWIDTHS(); +} |