diff options
Diffstat (limited to 'gnu/usr.bin/awk/iop.c')
-rw-r--r-- | gnu/usr.bin/awk/iop.c | 318 |
1 files changed, 318 insertions, 0 deletions
diff --git a/gnu/usr.bin/awk/iop.c b/gnu/usr.bin/awk/iop.c new file mode 100644 index 0000000..0d7af12 --- /dev/null +++ b/gnu/usr.bin/awk/iop.c @@ -0,0 +1,318 @@ +/* + * iop.c - do i/o related things. + */ + +/* + * Copyright (C) 1986, 1988, 1989, 1991, 1992 the Free Software Foundation, Inc. + * + * This file is part of GAWK, the GNU implementation of the + * AWK Progamming Language. + * + * GAWK is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * GAWK is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GAWK; see the file COPYING. If not, write to + * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "awk.h" + +#ifndef atarist +#define INVALID_HANDLE (-1) +#else +#include <stddef.h> +#include <fcntl.h> +#define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1) +#endif /* atarist */ + + +#ifdef TEST +int bufsize = 8192; + +void +fatal(s) +char *s; +{ + printf("%s\n", s); + exit(1); +} +#endif + +int +optimal_bufsize(fd) +int fd; +{ + struct stat stb; + +#ifdef VMS + /* + * These values correspond with the RMS multi-block count used by + * vms_open() in vms/vms_misc.c. + */ + if (isatty(fd) > 0) + return BUFSIZ; + else if (fstat(fd, &stb) < 0) + return 8*512; /* conservative in case of DECnet access */ + else + return 24*512; + +#else + /* + * System V doesn't have the file system block size in the + * stat structure. So we have to make some sort of reasonable + * guess. We use stdio's BUFSIZ, since that is what it was + * meant for in the first place. + */ +#ifdef BLKSIZE_MISSING +#define DEFBLKSIZE BUFSIZ +#else +#define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ) +#endif + +#ifdef TEST + return bufsize; +#else +#ifndef atarist + if (isatty(fd)) +#else + /* + * On ST redirected stdin does not have a name attached + * (this could be hard to do to) and fstat would fail + */ + if (0 == fd || isatty(fd)) +#endif /*atarist */ + return BUFSIZ; +#ifndef BLKSIZE_MISSING + /* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */ + stb.st_blksize = 0; +#endif + if (fstat(fd, &stb) == -1) + fatal("can't stat fd %d (%s)", fd, strerror(errno)); + if (lseek(fd, (off_t)0, 0) == -1) + return DEFBLKSIZE; + return ((int) (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE)); +#endif /*! TEST */ +#endif /*! VMS */ +} + +IOBUF * +iop_alloc(fd) +int fd; +{ + IOBUF *iop; + + if (fd == INVALID_HANDLE) + return NULL; + emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc"); + iop->flag = 0; + if (isatty(fd)) + iop->flag |= IOP_IS_TTY; + iop->size = optimal_bufsize(fd); + iop->secsiz = -2; + errno = 0; + iop->fd = fd; + iop->off = iop->buf = NULL; + iop->cnt = 0; + return iop; +} + +/* + * Get the next record. Uses a "split buffer" where the latter part is + * the normal read buffer and the head part is an "overflow" area that is used + * when a record spans the end of the normal buffer, in which case the first + * part of the record is copied into the overflow area just before the + * normal buffer. Thus, the eventual full record can be returned as a + * contiguous area of memory with a minimum of copying. The overflow area + * is expanded as needed, so that records are unlimited in length. + * We also mark both the end of the buffer and the end of the read() with + * a sentinel character (the current record separator) so that the inside + * loop can run as a single test. + */ +int +get_a_record(out, iop, grRS, errcode) +char **out; +IOBUF *iop; +register int grRS; +int *errcode; +{ + register char *bp = iop->off; + char *bufend; + char *start = iop->off; /* beginning of record */ + int saw_newline; + char rs; + int eat_whitespace; + + if (iop->cnt == EOF) /* previous read hit EOF */ + return EOF; + + if (grRS == 0) { /* special case: grRS == "" */ + rs = '\n'; + eat_whitespace = 0; + saw_newline = 0; + } else + rs = (char) grRS; + + /* set up sentinel */ + if (iop->buf) { + bufend = iop->buf + iop->size + iop->secsiz; + *bufend = rs; + } else + bufend = NULL; + + for (;;) { /* break on end of record, read error or EOF */ + + /* Following code is entered on the first call of this routine + * for a new iop, or when we scan to the end of the buffer. + * In the latter case, we copy the current partial record to + * the space preceding the normal read buffer. If necessary, + * we expand this space. This is done so that we can return + * the record as a contiguous area of memory. + */ + if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) { + char *oldbuf = NULL; + char *oldsplit = iop->buf + iop->secsiz; + long len; /* record length so far */ + + if ((iop->flag & IOP_IS_INTERNAL) != 0) + cant_happen(); + + len = bp - start; + if (len > iop->secsiz) { + /* expand secondary buffer */ + if (iop->secsiz == -2) + iop->secsiz = 256; + while (len > iop->secsiz) + iop->secsiz *= 2; + oldbuf = iop->buf; + emalloc(iop->buf, char *, + iop->size+iop->secsiz+2, "get_a_record"); + bufend = iop->buf + iop->size + iop->secsiz; + *bufend = rs; + } + if (len > 0) { + char *newsplit = iop->buf + iop->secsiz; + + if (start < oldsplit) { + memcpy(newsplit - len, start, + oldsplit - start); + memcpy(newsplit - (bp - oldsplit), + oldsplit, bp - oldsplit); + } else + memcpy(newsplit - len, start, len); + } + bp = iop->end = iop->off = iop->buf + iop->secsiz; + start = bp - len; + if (oldbuf) { + free(oldbuf); + oldbuf = NULL; + } + } + /* Following code is entered whenever we have no more data to + * scan. In most cases this will read into the beginning of + * the main buffer, but in some cases (terminal, pipe etc.) + * we may be doing smallish reads into more advanced positions. + */ + if (bp >= iop->end) { + if ((iop->flag & IOP_IS_INTERNAL) != 0) { + iop->cnt = EOF; + break; + } + iop->cnt = read(iop->fd, iop->end, bufend - iop->end); + if (iop->cnt == -1) { + if (! do_unix && errcode != NULL) { + *errcode = errno; + iop->cnt = EOF; + break; + } else + fatal("error reading input: %s", + strerror(errno)); + } else if (iop->cnt == 0) { + iop->cnt = EOF; + break; + } + iop->end += iop->cnt; + *iop->end = rs; + } + if (grRS == 0) { + extern int default_FS; + + if (default_FS && (bp == start || eat_whitespace)) { + while (bp < iop->end && isspace(*bp)) + bp++; + if (bp == iop->end) { + eat_whitespace = 1; + continue; + } else + eat_whitespace = 0; + } + if (saw_newline && *bp == rs) { + bp++; + break; + } + saw_newline = 0; + } + + while (*bp++ != rs) + ; + + if (bp <= iop->end) { + if (grRS == 0) + saw_newline = 1; + else + break; + } else + bp--; + + if ((iop->flag & IOP_IS_INTERNAL) != 0) + iop->cnt = bp - start; + } + if (iop->cnt == EOF + && (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp)) + return EOF; + + iop->off = bp; + bp--; + if (*bp != rs) + bp++; + *bp = '\0'; + if (grRS == 0) { + if (*--bp == rs) + *bp = '\0'; + else + bp++; + } + + *out = start; + return bp - start; +} + +#ifdef TEST +main(argc, argv) +int argc; +char *argv[]; +{ + IOBUF *iop; + char *out; + int cnt; + char rs[2]; + + rs[0] = 0; + if (argc > 1) + bufsize = atoi(argv[1]); + if (argc > 2) + rs[0] = *argv[2]; + iop = iop_alloc(0); + while ((cnt = get_a_record(&out, iop, rs[0], NULL)) > 0) { + fwrite(out, 1, cnt, stdout); + fwrite(rs, 1, 1, stdout); + } +} +#endif |