/* * iop.c - do i/o related things. */ /* * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc. * * This file is part of GAWK, the GNU implementation of the * AWK Progamming Language. * * GAWK is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * GAWK is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GAWK; see the file COPYING. If not, write to * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include "awk.h" #ifndef atarist #define INVALID_HANDLE (-1) #else #include #include #define INVALID_HANDLE (__SMALLEST_VALID_HANDLE - 1) #endif /* atarist */ #ifdef TEST int bufsize = 8192; void fatal(s) char *s; { printf("%s\n", s); exit(1); } #endif int optimal_bufsize(fd) int fd; { struct stat stb; #ifdef VMS /* * These values correspond with the RMS multi-block count used by * vms_open() in vms/vms_misc.c. */ if (isatty(fd) > 0) return BUFSIZ; else if (fstat(fd, &stb) < 0) return 8*512; /* conservative in case of DECnet access */ else return 32*512; #else /* * System V doesn't have the file system block size in the * stat structure. So we have to make some sort of reasonable * guess. We use stdio's BUFSIZ, since that is what it was * meant for in the first place. */ #ifdef BLKSIZE_MISSING #define DEFBLKSIZE BUFSIZ #else #define DEFBLKSIZE (stb.st_blksize ? stb.st_blksize : BUFSIZ) #endif #ifdef TEST return bufsize; #else #ifndef atarist if (isatty(fd)) #else /* * On ST redirected stdin does not have a name attached * (this could be hard to do to) and fstat would fail */ if (0 == fd || isatty(fd)) #endif /*atarist */ return BUFSIZ; #ifndef BLKSIZE_MISSING /* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */ stb.st_blksize = 0; #endif if (fstat(fd, &stb) == -1) fatal("can't stat fd %d (%s)", fd, strerror(errno)); if (lseek(fd, (off_t)0, 0) == -1) return DEFBLKSIZE; return ((int) (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE)); #endif /*! TEST */ #endif /*! VMS */ } IOBUF * iop_alloc(fd) int fd; { IOBUF *iop; if (fd == INVALID_HANDLE) return NULL; emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc"); iop->flag = 0; if (isatty(fd)) iop->flag |= IOP_IS_TTY; iop->size = optimal_bufsize(fd); iop->secsiz = -2; errno = 0; iop->fd = fd; iop->off = iop->buf = NULL; iop->cnt = 0; return iop; } /* * Get the next record. Uses a "split buffer" where the latter part is * the normal read buffer and the head part is an "overflow" area that is used * when a record spans the end of the normal buffer, in which case the first * part of the record is copied into the overflow area just before the * normal buffer. Thus, the eventual full record can be returned as a * contiguous area of memory with a minimum of copying. The overflow area * is expanded as needed, so that records are unlimited in length. * We also mark both the end of the buffer and the end of the read() with * a sentinel character (the current record separator) so that the inside * loop can run as a single test. */ int get_a_record(out, iop, grRS, errcode) char **out; IOBUF *iop; register int grRS; int *errcode; { register char *bp = iop->off; char *bufend; char *start = iop->off; /* beginning of record */ char rs; int saw_newline = 0, eat_whitespace = 0; /* used iff grRS==0 */ if (iop->cnt == EOF) { /* previous read hit EOF */ *out = NULL; return EOF; } if (grRS == 0) { /* special case: grRS == "" */ rs = '\n'; } else rs = (char) grRS; /* set up sentinel */ if (iop->buf) { bufend = iop->buf + iop->size + iop->secsiz; *bufend = rs; } else bufend = NULL; for (;;) { /* break on end of record, read error or EOF */ /* Following code is entered on the first call of this routine * for a new iop, or when we scan to the end of the buffer. * In the latter case, we copy the current partial record to * the space preceding the normal read buffer. If necessary, * we expand this space. This is done so that we can return * the record as a contiguous area of memory. */ if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) { char *oldbuf = NULL; char *oldsplit = iop->buf + iop->secsiz; long len; /* record length so far */ len = bp - start; if (len > iop->secsiz) { /* expand secondary buffer */ if (iop->secsiz == -2) iop->secsiz = 256; while (len > iop->secsiz) iop->secsiz *= 2; oldbuf = iop->buf; emalloc(iop->buf, char *, iop->size+iop->secsiz+2, "get_a_record"); bufend = iop->buf + iop->size + iop->secsiz; *bufend = rs; } if (len > 0) { char *newsplit = iop->buf + iop->secsiz; if (start < oldsplit) { memcpy(newsplit - len, start, oldsplit - start); memcpy(newsplit - (bp - oldsplit), oldsplit, bp - oldsplit); } else memcpy(newsplit - len, start, len); } bp = iop->end = iop->off = iop->buf + iop->secsiz; start = bp - len; if (oldbuf) { free(oldbuf); oldbuf = NULL; } } /* Following code is entered whenever we have no more data to * scan. In most cases this will read into the beginning of * the main buffer, but in some cases (terminal, pipe etc.) * we may be doing smallish reads into more advanced positions. */ if (bp >= iop->end) { if ((iop->flag & IOP_IS_INTERNAL) != 0) { iop->cnt = EOF; break; } iop->cnt = read(iop->fd, iop->end, bufend - iop->end); if (iop->cnt == -1) { if (! do_unix && errcode != NULL) { *errcode = errno; iop->cnt = EOF; break; } else fatal("error reading input: %s", strerror(errno)); } else if (iop->cnt == 0) { iop->cnt = EOF; break; } iop->end += iop->cnt; *iop->end = rs; } if (grRS == 0) { extern int default_FS; if (default_FS && (bp == start || eat_whitespace)) { while (bp < iop->end && (*bp == ' ' || *bp == '\t' || *bp == '\n')) bp++; if (bp == iop->end) { eat_whitespace = 1; continue; } else eat_whitespace = 0; } if (saw_newline && *bp == rs) { bp++; break; } saw_newline = 0; } while (*bp++ != rs) ; if (bp <= iop->end) { if (grRS == 0) saw_newline = 1; else break; } else bp--; if ((iop->flag & IOP_IS_INTERNAL) != 0) iop->cnt = bp - start; } if (iop->cnt == EOF && (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp)) { *out = NULL; return EOF; } iop->off = bp; bp--; if (*bp != rs) bp++; *bp = '\0'; if (grRS == 0) { /* there could be more newlines left, clean 'em out now */ while (*(iop->off) == rs && iop->off <= iop->end) (iop->off)++; if (*--bp == rs) *bp = '\0'; else bp++; } *out = start; return bp - start; } #ifdef TEST main(argc, argv) int argc; char *argv[]; { IOBUF *iop; char *out; int cnt; char rs[2]; rs[0] = 0; if (argc > 1) bufsize = atoi(argv[1]); if (argc > 2) rs[0] = *argv[2]; iop = iop_alloc(0); while ((cnt = get_a_record(&out, iop, rs[0], NULL)) > 0) { fwrite(out, 1, cnt, stdout); fwrite(rs, 1, 1, stdout); } } #endif