summaryrefslogtreecommitdiffstats
path: root/gnu/usr.bin/awk/iop.c
blob: 6b6a03b3a00a071096c557696f95d6cef0e5619d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/*
 * iop.c - do i/o related things.
 */

/*
 * Copyright (C) 1986, 1988, 1989, 1991, 1992, 1993 the Free Software Foundation, Inc.
 *
 * This file is part of GAWK, the GNU implementation of the
 * AWK Progamming Language.
 *
 * GAWK is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * GAWK is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with GAWK; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

#include "awk.h"

#ifndef atarist
#define INVALID_HANDLE (-1)
#else
#include <stddef.h>
#include <fcntl.h>
#define INVALID_HANDLE  (__SMALLEST_VALID_HANDLE - 1)
#endif  /* atarist */


#ifdef TEST
int bufsize = 8192;

void
fatal(s)
char *s;
{
	printf("%s\n", s);
	exit(1);
}
#endif

int
optimal_bufsize(fd)
int fd;
{
	struct stat stb;

#ifdef VMS
	/*
	 * These values correspond with the RMS multi-block count used by
	 * vms_open() in vms/vms_misc.c.
	 */
	if (isatty(fd) > 0)
		return BUFSIZ;
	else if (fstat(fd, &stb) < 0)
		return 8*512;	/* conservative in case of DECnet access */
	else
		return 32*512;

#else
	/*
	 * System V doesn't have the file system block size in the
	 * stat structure. So we have to make some sort of reasonable
	 * guess. We use stdio's BUFSIZ, since that is what it was
	 * meant for in the first place.
	 */
#ifdef BLKSIZE_MISSING
#define	DEFBLKSIZE	BUFSIZ
#else
#define DEFBLKSIZE	(stb.st_blksize ? stb.st_blksize : BUFSIZ)
#endif

#ifdef TEST
	return bufsize;
#else
#ifndef atarist
	if (isatty(fd))
#else
	/*
	 * On ST redirected stdin does not have a name attached
	 * (this could be hard to do to) and fstat would fail
	 */
	if (0 == fd || isatty(fd))
#endif  /*atarist */
		return BUFSIZ;
#ifndef BLKSIZE_MISSING
	/* VMS POSIX 1.0: st_blksize is never assigned a value, so zero it */
	stb.st_blksize = 0;
#endif
	if (fstat(fd, &stb) == -1)
		fatal("can't stat fd %d (%s)", fd, strerror(errno));
	if (lseek(fd, (off_t)0, 0) == -1)
		return DEFBLKSIZE;
	return ((int) (stb.st_size < DEFBLKSIZE ? stb.st_size : DEFBLKSIZE));
#endif	/*! TEST */
#endif	/*! VMS */
}

IOBUF *
iop_alloc(fd)
int fd;
{
	IOBUF *iop;

	if (fd == INVALID_HANDLE)
		return NULL;
	emalloc(iop, IOBUF *, sizeof(IOBUF), "iop_alloc");
	iop->flag = 0;
	if (isatty(fd))
		iop->flag |= IOP_IS_TTY;
	iop->size = optimal_bufsize(fd);
	iop->secsiz = -2;
	errno = 0;
	iop->fd = fd;
	iop->off = iop->buf = NULL;
	iop->cnt = 0;
	return iop;
}

/*
 * Get the next record.  Uses a "split buffer" where the latter part is
 * the normal read buffer and the head part is an "overflow" area that is used
 * when a record spans the end of the normal buffer, in which case the first
 * part of the record is copied into the overflow area just before the
 * normal buffer.  Thus, the eventual full record can be returned as a
 * contiguous area of memory with a minimum of copying.  The overflow area
 * is expanded as needed, so that records are unlimited in length.
 * We also mark both the end of the buffer and the end of the read() with
 * a sentinel character (the current record separator) so that the inside
 * loop can run as a single test.
 */
int
get_a_record(out, iop, grRS, errcode)
char **out;
IOBUF *iop;
register int grRS;
int *errcode;
{
	register char *bp = iop->off;
	char *bufend;
	char *start = iop->off;			/* beginning of record */
	char rs;
	int saw_newline = 0, eat_whitespace = 0;	/* used iff grRS==0 */

	if (iop->cnt == EOF) {	/* previous read hit EOF */
		*out = NULL;
		return EOF;
	}

	if (grRS == 0) {	/* special case:  grRS == "" */
		rs = '\n';
	} else
		rs = (char) grRS;

	/* set up sentinel */
	if (iop->buf) {
		bufend = iop->buf + iop->size + iop->secsiz;
		*bufend = rs;
	} else
		bufend = NULL;

	for (;;) {	/* break on end of record, read error or EOF */

		/* Following code is entered on the first call of this routine
		 * for a new iop, or when we scan to the end of the buffer.
		 * In the latter case, we copy the current partial record to
		 * the space preceding the normal read buffer.  If necessary,
		 * we expand this space.  This is done so that we can return
		 * the record as a contiguous area of memory.
		 */
		if ((iop->flag & IOP_IS_INTERNAL) == 0 && bp >= bufend) {
			char *oldbuf = NULL;
			char *oldsplit = iop->buf + iop->secsiz;
			long len;	/* record length so far */

			len = bp - start;
			if (len > iop->secsiz) {
				/* expand secondary buffer */
				if (iop->secsiz == -2)
					iop->secsiz = 256;
				while (len > iop->secsiz)
					iop->secsiz *= 2;
				oldbuf = iop->buf;
				emalloc(iop->buf, char *,
				    iop->size+iop->secsiz+2, "get_a_record");
				bufend = iop->buf + iop->size + iop->secsiz;
				*bufend = rs;
			}
			if (len > 0) {
				char *newsplit = iop->buf + iop->secsiz;

				if (start < oldsplit) {
					memcpy(newsplit - len, start,
							oldsplit - start);
					memcpy(newsplit - (bp - oldsplit),
							oldsplit, bp - oldsplit);
				} else
					memcpy(newsplit - len, start, len);
			}
			bp = iop->end = iop->off = iop->buf + iop->secsiz;
			start = bp - len;
			if (oldbuf) {
				free(oldbuf);
				oldbuf = NULL;
			}
		}
		/* Following code is entered whenever we have no more data to
		 * scan.  In most cases this will read into the beginning of
		 * the main buffer, but in some cases (terminal, pipe etc.)
		 * we may be doing smallish reads into more advanced positions.
		 */
		if (bp >= iop->end) {
			if ((iop->flag & IOP_IS_INTERNAL) != 0) {
				iop->cnt = EOF;
				break;
			}
			iop->cnt = read(iop->fd, iop->end, bufend - iop->end);
			if (iop->cnt == -1) {
				if (! do_unix && errcode != NULL) {
					*errcode = errno;
					iop->cnt = EOF;
					break;
				} else
					fatal("error reading input: %s",
						strerror(errno));
			} else if (iop->cnt == 0) {
				iop->cnt = EOF;
				break;
			}
			iop->end += iop->cnt;
			*iop->end = rs;
		}
		if (grRS == 0) {
			extern int default_FS;

			if (default_FS && (bp == start || eat_whitespace)) {
				while (bp < iop->end
				  	&& (*bp == ' ' || *bp == '\t' || *bp == '\n'))
					bp++;
				if (bp == iop->end) {
					eat_whitespace = 1;
					continue;
				} else
					eat_whitespace = 0;
			}
			if (saw_newline && *bp == rs) {
				bp++;
				break;
			}
			saw_newline = 0;
		}

		while (*bp++ != rs)
			;

		if (bp <= iop->end) {
			if (grRS == 0)
				saw_newline = 1;
			else
				break;
		} else
			bp--;

		if ((iop->flag & IOP_IS_INTERNAL) != 0)
			iop->cnt = bp - start;
	}
	if (iop->cnt == EOF
	    && (((iop->flag & IOP_IS_INTERNAL) != 0) || start == bp)) {
		*out = NULL;
		return EOF;
	}

	iop->off = bp;
	bp--;
	if (*bp != rs)
		bp++;
	*bp = '\0';
	if (grRS == 0) {
		/* there could be more newlines left, clean 'em out now */
		while (*(iop->off) == rs && iop->off <= iop->end)
			(iop->off)++;

		if (*--bp == rs)
			*bp = '\0';
		else
			bp++;
	}

	*out = start;
	return bp - start;
}

#ifdef TEST
main(argc, argv)
int argc;
char *argv[];
{
	IOBUF *iop;
	char *out;
	int cnt;
	char rs[2];

	rs[0] = 0;
	if (argc > 1)
		bufsize = atoi(argv[1]);
	if (argc > 2)
		rs[0] = *argv[2];
	iop = iop_alloc(0);
	while ((cnt = get_a_record(&out, iop, rs[0], NULL)) > 0) {
		fwrite(out, 1, cnt, stdout);
		fwrite(rs, 1, 1, stdout);
	}
}
#endif
OpenPOWER on IntegriCloud