diff options
Diffstat (limited to 'contrib/sort')
-rw-r--r-- | contrib/sort/Makefile | 7 | ||||
-rw-r--r-- | contrib/sort/append.c | 205 | ||||
-rw-r--r-- | contrib/sort/extern.h | 69 | ||||
-rw-r--r-- | contrib/sort/fields.c | 339 | ||||
-rw-r--r-- | contrib/sort/files.c | 372 | ||||
-rw-r--r-- | contrib/sort/fsort.c | 358 | ||||
-rw-r--r-- | contrib/sort/fsort.h | 77 | ||||
-rw-r--r-- | contrib/sort/init.c | 355 | ||||
-rw-r--r-- | contrib/sort/msort.c | 390 | ||||
-rw-r--r-- | contrib/sort/pathnames.h | 41 | ||||
-rw-r--r-- | contrib/sort/sort.1 | 441 | ||||
-rw-r--r-- | contrib/sort/sort.c | 332 | ||||
-rw-r--r-- | contrib/sort/sort.h | 149 | ||||
-rw-r--r-- | contrib/sort/tmp.c | 84 |
14 files changed, 3219 insertions, 0 deletions
diff --git a/contrib/sort/Makefile b/contrib/sort/Makefile new file mode 100644 index 0000000..7c12a50 --- /dev/null +++ b/contrib/sort/Makefile @@ -0,0 +1,7 @@ +# $NetBSD: Makefile,v 1.4 2001/02/19 17:45:24 jdolecek Exp $ +# from: @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= sort +SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c + +.include <bsd.prog.mk> diff --git a/contrib/sort/append.c b/contrib/sort/append.c new file mode 100644 index 0000000..d02a96f --- /dev/null +++ b/contrib/sort/append.c @@ -0,0 +1,205 @@ +/* $NetBSD: append.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" + +#ifndef lint +__RCSID("$NetBSD: append.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $"); +__SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#include <stdlib.h> +#include <string.h> + +#define OUTPUT { \ + if ((n = cpos - ppos) > 1) { \ + for (; ppos < cpos; ++ppos) \ + *ppos -= odepth; \ + ppos -= n; \ + if (stable_sort) \ + sradixsort(ppos, n, wts1, REC_D); \ + else \ + radixsort(ppos, n, wts1, REC_D); \ + for (; ppos < cpos; ppos++) { \ + prec = (const RECHEADER *) (*ppos - sizeof(TRECHEADER));\ + put(prec, fp); \ + } \ + } else put(prec, fp); \ +} + +/* + * copy sorted lines to output; check for uniqueness + */ +void +append(keylist, nelem, depth, fp, put, ftbl) + const u_char **keylist; + int nelem; + int depth; + FILE *fp; + put_func_t put; + struct field *ftbl; +{ + u_char *wts, *wts1; + int n, odepth; + const u_char **cpos, **ppos, **lastkey; + const u_char *cend, *pend, *start; + const struct recheader *crec, *prec; + + if (*keylist == '\0' && UNIQUE) + return; + wts1 = wts = ftbl[0].weights; + if ((!UNIQUE) && SINGL_FLD) { + if ((ftbl[0].flags & F) && (ftbl[0].flags & R)) + wts1 = Rascii; + else if (ftbl[0].flags & F) + wts1 = ascii; + odepth = depth; + } + lastkey = keylist + nelem; + depth += sizeof(TRECHEADER); + if (SINGL_FLD && (UNIQUE || wts1 != wts)) { + ppos = keylist; + prec = (const RECHEADER *) (*ppos - depth); + if (UNIQUE) + put(prec, fp); + for (cpos = &keylist[1]; cpos < lastkey; cpos++) { + crec = (const RECHEADER *) (*cpos - depth); + if (crec->length == prec->length) { + /* + * Set pend and cend so that trailing NUL and + * record separator is ignored. + */ + pend = (const u_char *) &prec->data + prec->length - 2; + cend = (const u_char *) &crec->data + crec->length - 2; + for (start = *cpos; cend >= start; cend--) { + if (wts[*cend] != wts[*pend]) + break; + pend--; + } + if (pend + 1 != *ppos) { + if (!UNIQUE) { + OUTPUT; + } else + put(crec, fp); + ppos = cpos; + prec = crec; + } + } else { + if (!UNIQUE) { + OUTPUT; + } else + put(crec, fp); + ppos = cpos; + prec = crec; + } + } + if (!UNIQUE) { OUTPUT; } + } else if (UNIQUE) { + ppos = keylist; + prec = (const RECHEADER *) (*ppos - depth); + put(prec, fp); + for (cpos = &keylist[1]; cpos < lastkey; cpos++) { + crec = (const RECHEADER *) (*cpos - depth); + if (crec->offset == prec->offset) { + /* + * Set pend and cend so that trailing NUL and + * record separator is ignored. + */ + pend = (const u_char *) &prec->data + prec->offset - 2; + cend = (const u_char *) &crec->data + crec->offset - 2; + for (start = *cpos; cend >= start; cend--) { + if (wts[*cend] != wts[*pend]) + break; + pend--; + } + if (pend + 1 != *ppos) { + ppos = cpos; + prec = crec; + put(prec, fp); + } + } else { + ppos = cpos; + prec = crec; + put(prec, fp); + } + } + } else for (cpos = keylist; cpos < lastkey; cpos++) { + crec = (const RECHEADER *) (*cpos - depth); + put(crec, fp); + } +} + +/* + * output the already sorted eol bin. + */ +void +rd_append(binno, infl0, nfiles, outfp, buffer, bufend) + u_char *buffer; + int infl0; + int binno, nfiles; + FILE *outfp; + u_char *bufend; +{ + RECHEADER *rec; + + rec = (RECHEADER *) buffer; + if (!getnext(binno, infl0, NULL, nfiles, + (RECHEADER *) buffer, bufend, 0)) { + putline(rec, outfp); + while (getnext(binno, infl0, NULL, nfiles, (RECHEADER *) buffer, + bufend, 0) == 0) { + if (!UNIQUE) + putline(rec, outfp); + } + } +} + +/* + * append plain text--used after sorting the biggest bin. + */ +void +concat(a, b) + FILE *a, *b; +{ + int nread; + char buffer[4096]; + + rewind(b); + while ((nread = fread(buffer, 1, 4096, b)) > 0) + EWRITE(buffer, 1, nread, a); +} diff --git a/contrib/sort/extern.h b/contrib/sort/extern.h new file mode 100644 index 0000000..56fdb58 --- /dev/null +++ b/contrib/sort/extern.h @@ -0,0 +1,69 @@ +/* $NetBSD: extern.h,v 1.6 2001/02/19 20:50:17 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.1 (Berkeley) 6/6/93 + */ + +void append __P((const u_char **, int, int, FILE *, + void (*)(const RECHEADER *, FILE *), struct field *)); +void concat __P((FILE *, FILE *)); +length_t enterkey __P((RECHEADER *, DBT *, int, struct field *)); +void fixit __P((int *, char **)); +void fldreset __P((struct field *)); +FILE *ftmp __P((void)); +void fmerge __P((int, int, struct filelist *, int, + get_func_t, FILE *, put_func_t, struct field *)); +void fsort __P((int, int, int, struct filelist *, int, FILE *, + struct field *)); +int geteasy __P((int, int, struct filelist *, + int, RECHEADER *, u_char *, struct field *)); +int getnext __P((int, int, struct filelist *, + int, RECHEADER *, u_char *, struct field *)); +int makekey __P((int, int, struct filelist *, + int, RECHEADER *, u_char *, struct field *)); +int makeline __P((int, int, struct filelist *, + int, RECHEADER *, u_char *, struct field *)); +void merge __P((int, int, get_func_t, FILE *, put_func_t, struct field *)); +void num_init __P((void)); +void onepass __P((const u_char **, int, long, long *, u_char *, FILE *)); +int optval __P((int, int)); +void order __P((struct filelist *, get_func_t, struct field *)); +void putline __P((const RECHEADER *, FILE *)); +void putrec __P((const RECHEADER *, FILE *)); +void rd_append __P((int, int, int, FILE *, u_char *, u_char *)); +int setfield __P((const char *, struct field *, int)); +void settables __P((int)); diff --git a/contrib/sort/fields.c b/contrib/sort/fields.c new file mode 100644 index 0000000..12242f6 --- /dev/null +++ b/contrib/sort/fields.c @@ -0,0 +1,339 @@ +/* $NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Subroutines to generate sort keys. */ + +#include "sort.h" + +#ifndef lint +__RCSID("$NetBSD: fields.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $"); +__SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#define blancmange(ptr) { \ + if (BLANK & d_mask[*(ptr)]) \ + while (BLANK & d_mask[*(++(ptr))]); \ +} + +#define NEXTCOL(pos) { \ + if (!SEP_FLAG) \ + while (BLANK & l_d_mask[*(++pos)]); \ + while (!((FLD_D | REC_D_F) & l_d_mask[*++pos])); \ +} + +static u_char *enterfield __P((u_char *, u_char *, struct field *, int)); +static u_char *number __P((u_char *, u_char *, u_char *, u_char *, int)); + +extern struct coldesc clist[(ND+1)*2]; +extern int ncols; + +#define DECIMAL '.' +#define OFFSET 128 + +u_char TENS[10]; /* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */ +u_char NEGTENS[10]; /* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */ +u_char *OFF_TENS, *OFF_NTENS; /* TENS - '0', NEGTENS - '0' */ +u_char fnum[NBINS], rnum[NBINS]; + +/* + * constructs sort key with leading recheader, followed by the key, + * followed by the original line. + */ +length_t +enterkey(keybuf, line, size, fieldtable) + RECHEADER *keybuf; /* pointer to start of key */ + DBT *line; + int size; + struct field fieldtable[]; +{ + int i; + u_char *l_d_mask; + u_char *lineend, *pos; + u_char *endkey, *keypos; + struct coldesc *clpos; + int col = 1; + struct field *ftpos; + l_d_mask = d_mask; + pos = (u_char *) line->data - 1; + lineend = (u_char *) line->data + line->size-1; + /* don't include rec_delimiter */ + + for (i = 0; i < ncols; i++) { + clpos = clist + i; + for (; (col < clpos->num) && (pos < lineend); col++) { + NEXTCOL(pos); + } + if (pos >= lineend) + break; + clpos->start = SEP_FLAG ? pos + 1 : pos; + NEXTCOL(pos); + clpos->end = pos; + col++; + if (pos >= lineend) { + clpos->end = lineend; + i++; + break; + } + } + for (; i <= ncols; i++) + clist[i].start = clist[i].end = lineend; + if (clist[0].start < (u_char *) line->data) + clist[0].start++; + + keypos = keybuf->data; + endkey = (u_char *) keybuf + size - line->size; + for (ftpos = fieldtable + 1; ftpos->icol.num; ftpos++) + if ((keypos = enterfield(keypos, endkey, ftpos, + fieldtable->flags)) == NULL) + return (1); + + keybuf->offset = keypos - keybuf->data; + keybuf->length = keybuf->offset + line->size; + if (keybuf->length + sizeof(TRECHEADER) > size) { + /* line too long for buffer */ + return (1); + } + + /* + * Make [s]radixsort() only sort by relevant part of key if: + * 1. we want to choose unique items by relevant field[s] + * 2. we want stable sort and so the items should be sorted only by + * the relevant field[s] + */ + if (UNIQUE || (stable_sort && keybuf->offset < line->size)) + keypos[-1] = REC_D; + + memcpy(keybuf->data + keybuf->offset, line->data, line->size); + return (0); +} + +/* + * constructs a field (as defined by -k) within a key + */ +static u_char * +enterfield(tablepos, endkey, cur_fld, gflags) + struct field *cur_fld; + u_char *tablepos, *endkey; + int gflags; +{ + u_char *start, *end, *lineend, *mask, *lweight; + struct column icol, tcol; + u_int flags; + u_int Rflag; + + icol = cur_fld->icol; + tcol = cur_fld->tcol; + flags = cur_fld->flags; + start = icol.p->start; + lineend = clist[ncols].end; + if (flags & BI) + blancmange(start); + start += icol.indent; + start = min(start, lineend); + + if (!tcol.num) + end = lineend; + else { + if (tcol.indent) { + end = tcol.p->start; + if (flags & BT) + blancmange(end); + end += tcol.indent; + end = min(end, lineend); + } else + end = tcol.p->end; + } + + if (flags & N) { + Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0; + return number(tablepos, endkey, start, end, Rflag); + } + + mask = cur_fld->mask; + lweight = cur_fld->weights; + for (; start < end; start++) + if (mask[*start]) { + if (*start <= 1) { + if (tablepos+2 >= endkey) + return (NULL); + *tablepos++ = lweight[1]; + *tablepos++ = lweight[*start ? 2 : 1]; + } else { + if (tablepos+1 >= endkey) + return (NULL); + *tablepos++ = lweight[*start]; + } + } + *tablepos++ = lweight[0]; + return (tablepos == endkey ? NULL : tablepos); +} + +/* Uses the first bin to assign sign, expsign, 0, and the first + * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ). + * When sorting in forward order: + * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128; + * else use (0-99)->(2-102). + * If the exponent is >=61, use another byte for each additional 253 + * in the exponent. Cutoff is at 567. + * To avoid confusing the exponent and the mantissa, use a field delimiter + * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the + * only time a field delimiter can come in that position. + * Reverse order is done analagously. + */ + +static u_char * +number(pos, bufend, line, lineend, Rflag) + u_char *line, *pos, *bufend, *lineend; + int Rflag; +{ + int or_sign, parity = 0; + int expincr = 1, exponent = -1; + int bite, expsign = 1, sign = 1; + u_char lastvalue, *nonzero, *tline, *C_TENS; + u_char *nweights; + + if (Rflag) + nweights = rnum; + else + nweights = fnum; + if (pos > bufend - 8) + return (NULL); + /* + * or_sign sets the sort direction: + * (-r: +/-)(sign: +/-)(expsign: +/-) + */ + or_sign = sign ^ expsign ^ Rflag; + blancmange(line); + if (*line == '-') { /* set the sign */ + or_sign ^= 1; + sign = 0; + line++; + } + /* eat initial zeroes */ + for (; *line == '0' && line < lineend; line++) + ; + /* calculate exponents < 0 */ + if (*line == DECIMAL) { + exponent = 1; + while (*++line == '0' && line < lineend) + exponent++; + expincr = 0; + expsign = 0; + } + /* next character better be a digit */ + if (*line < '1' || *line > '9' || line >= lineend) { + *pos++ = nweights[127]; + return (pos); + } + if (expincr) { + for (tline = line-1; *++tline >= '0' && + *tline <= '9' && tline < lineend;) + exponent++; + } + if (exponent > 567) { + *pos++ = nweights[sign ? (expsign ? 254 : 128) + : (expsign ? 0 : 126)]; + warnx("exponent out of bounds"); + return (pos); + } + bite = min(exponent, 61); + *pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite) + : (expsign ? 64-bite : 64+bite)]; + if (bite >= 61) { + do { + exponent -= bite; + bite = min(exponent, 254); + *pos++ = nweights[or_sign ? 254-bite : bite]; + } while (bite == 254); + } + C_TENS = or_sign ? OFF_NTENS : OFF_TENS; + for (; line < lineend; line++) { + if (*line >= '0' && *line <= '9') { + if (parity) { + *pos++ = C_TENS[lastvalue] + (or_sign ? - *line + : *line); + if (pos == bufend) + return (NULL); + if (*line != '0' || lastvalue != '0') + nonzero = pos; + } else + lastvalue = *line; + parity ^= 1; + } else if(*line == DECIMAL) { + if(!expincr) /* a decimal already occurred once */ + break; + expincr = 0; + } else + break; + } + if (parity && lastvalue != '0') { + *pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' : + OFF_TENS[lastvalue] + '0'; + } else + pos = nonzero; + if (pos > bufend-1) + return (NULL); + *pos++ = or_sign ? nweights[254] : nweights[0]; + return (pos); +} + +/* This forces a gap around the record delimiter + * Thus fnum has vaues over (0,254) -> ((0,REC_D-1),(REC_D+1,255)); + * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0)) + */ +void +num_init() +{ + int i; + TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0'; + NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0'; + OFF_TENS = TENS - '0'; + OFF_NTENS = NEGTENS - '0'; + for (i = 1; i < 10; i++) { + TENS[i] = TENS[i - 1] + 10; + NEGTENS[i] = NEGTENS[i - 1] - 10; + } + for (i = 0; i < REC_D; i++) { + fnum[i] = i; + rnum[255 - i] = i; + } + for (i = REC_D; i <255; i++) { + fnum[i] = i + 1; + rnum[255 - i] = i - 1; + } +} diff --git a/contrib/sort/files.c b/contrib/sort/files.c new file mode 100644 index 0000000..7c7a58b --- /dev/null +++ b/contrib/sort/files.c @@ -0,0 +1,372 @@ +/* $NetBSD: files.c,v 1.17 2001/05/15 11:18:23 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" +#include "fsort.h" + +#ifndef lint +__RCSID("$NetBSD: files.c,v 1.17 2001/05/15 11:18:23 jdolecek Exp $"); +__SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#include <string.h> + +static int seq __P((FILE *, DBT *, DBT *)); + +/* + * this is the subroutine for file management for fsort(). + * It keeps the buffers for all temporary files. + */ +int +getnext(binno, infl0, filelist, nfiles, pos, end, dummy) + int binno, infl0; + struct filelist *filelist; + int nfiles; + RECHEADER *pos; + u_char *end; + struct field *dummy; +{ + int i; + u_char *hp; + static size_t nleft = 0; + static int cnt = 0, flag = -1; + static u_char maxb = 0; + static FILE *fp; + + if (nleft == 0) { + if (binno < 0) /* reset files. */ { + for (i = 0; i < nfiles; i++) { + rewind(fstack[infl0 + i].fp); + fstack[infl0 + i].max_o = 0; + } + flag = -1; + nleft = cnt = 0; + return (-1); + } + maxb = fstack[infl0].maxb; + for (; nleft == 0; cnt++) { + if (cnt >= nfiles) { + cnt = 0; + return (EOF); + } + fp = fstack[infl0 + cnt].fp; + fread(&nleft, sizeof(nleft), 1, fp); + if (binno < maxb) + fstack[infl0+cnt].max_o + += sizeof(nleft) + nleft; + else if (binno == maxb) { + if (binno != fstack[infl0].lastb) { + fseek(fp, fstack[infl0+ + cnt].max_o, SEEK_SET); + fread(&nleft, sizeof(nleft), 1, fp); + } + if (nleft == 0) + fclose(fp); + } else if (binno == maxb + 1) { /* skip a bin */ + fseek(fp, nleft, SEEK_CUR); + fread(&nleft, sizeof(nleft), 1, fp); + flag = cnt; + } + } + } + if ((u_char *) pos > end - sizeof(TRECHEADER)) + return (BUFFEND); + fread(pos, sizeof(TRECHEADER), 1, fp); + if (end - pos->data < pos->length) { + hp = ((u_char *)pos) + sizeof(TRECHEADER); + for (i = sizeof(TRECHEADER); i ; i--) + ungetc(*--hp, fp); + return (BUFFEND); + } + fread(pos->data, pos->length, 1, fp); + nleft -= pos->length + sizeof(TRECHEADER); + if (nleft == 0 && binno == fstack[infl0].maxb) + fclose(fp); + return (0); +} + +/* + * this is called when there is no special key. It's only called + * in the first fsort pass. + */ +int +makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) + int flno, top; + struct filelist *filelist; + int nfiles; + RECHEADER *recbuf; + u_char *bufend; + struct field *dummy2; +{ + static u_char *obufend; + static size_t osz; + char *pos; + static int filenum = 0, overflow = 0; + static FILE *fp = 0; + int c; + + pos = (char *) recbuf->data; + if (overflow) { + /* + * Buffer shortage is solved by either of two ways: + * o flush previous buffered data and start using the + * buffer from start (see fsort()) + * o realloc buffer and bump bufend + * + * The former is preferred, realloc is only done when + * there is exactly one item in buffer which does not fit. + */ + if (bufend == obufend) + memmove(pos, bufend - osz, osz); + + pos += osz; + overflow = 0; + } + for (;;) { + if (flno >= 0 && (fp = fstack[flno].fp) == NULL) + return (EOF); + else if (fp == NULL) { + if (filenum >= nfiles) + return (EOF); + if (!(fp = fopen(filelist->names[filenum], "r"))) + err(2, "%s", filelist->names[filenum]); + filenum++; + } + while ((pos < (char *)bufend) && ((c = getc(fp)) != EOF)) { + if ((*pos++ = c) == REC_D) { + recbuf->offset = 0; + recbuf->length = pos - (char *) recbuf->data; + return (0); + } + } + if (pos >= (char *)bufend) { + if (recbuf->data < bufend) { + overflow = 1; + obufend = bufend; + osz = (pos - (char *) recbuf->data); + } + return (BUFFEND); + } else if (c == EOF) { + if (recbuf->data != (u_char *) pos) { + *pos++ = REC_D; + recbuf->offset = 0; + recbuf->length = pos - (char *) recbuf->data; + return (0); + } + FCLOSE(fp); + fp = 0; + if (flno >= 0) + fstack[flno].fp = 0; + } else { + + warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); + + /* Consume the rest of line from input */ + while((c = getc(fp)) != REC_D && c != EOF) + ; + + recbuf->offset = 0; + recbuf->length = 0; + + return (BUFFEND); + } + } +} + +/* + * This generates keys. It's only called in the first fsort pass + */ +int +makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) + int flno, top; + struct filelist *filelist; + int nfiles; + RECHEADER *recbuf; + u_char *bufend; + struct field *ftbl; +{ + static int filenum = 0; + static FILE *dbdesc = 0; + static DBT dbkey[1], line[1]; + static int overflow = 0; + int c; + + if (overflow) { + overflow = enterkey(recbuf, line, bufend - (u_char *)recbuf, + ftbl); + if (overflow) + return (BUFFEND); + else + return (0); + } + + for (;;) { + if (flno >= 0) { + if (!(dbdesc = fstack[flno].fp)) + return (EOF); + } else if (!dbdesc) { + if (filenum >= nfiles) + return (EOF); + dbdesc = fopen(filelist->names[filenum], "r"); + if (!dbdesc) + err(2, "%s", filelist->names[filenum]); + filenum++; + } + if (!(c = seq(dbdesc, line, dbkey))) { + if ((signed)line->size > bufend - recbuf->data) { + overflow = 1; + } else { + overflow = enterkey(recbuf, line, + bufend - (u_char *) recbuf, ftbl); + } + if (overflow) + return (BUFFEND); + else + return (0); + } + if (c == EOF) { + FCLOSE(dbdesc); + dbdesc = 0; + if (flno >= 0) + fstack[flno].fp = 0; + } else { + ((char *) line->data)[60] = '\000'; + warnx("makekey: line too long: ignoring %.100s...", + (char *)line->data); + } + } +} + +/* + * get a key/line pair from fp + */ +static int +seq(fp, line, key) + FILE *fp; + DBT *key, *line; +{ + static char *buf, flag = 1; + char *end, *pos; + int c; + + if (flag) { + flag = 0; + buf = (char *) linebuf; + end = buf + linebuf_size; + line->data = buf; + } + pos = buf; + while ((c = getc(fp)) != EOF) { + if ((*pos++ = c) == REC_D) { + line->size = pos - buf; + return (0); + } + if (pos == end) { + linebuf_size *= 2; + linebuf = realloc(linebuf, linebuf_size); + if (!linebuf) + err(2, "realloc of linebuf to %lu bytes failed", + (unsigned long)linebuf_size); + + end = linebuf + linebuf_size; + pos = linebuf + (pos - buf); + line->data = buf = (char *)linebuf; + continue; + } + } + if (pos != buf) { + *pos++ = REC_D; + line->size = pos - buf; + return (0); + } else + return (EOF); +} + +/* + * write a key/line pair to a temporary file + */ +void +putrec(rec, fp) + const RECHEADER *rec; + FILE *fp; +{ + EWRITE(rec, 1, rec->length + sizeof(TRECHEADER), fp); +} + +/* + * write a line to output + */ +void +putline(rec, fp) + const RECHEADER *rec; + FILE *fp; +{ + EWRITE(rec->data+rec->offset, 1, rec->length - rec->offset, fp); +} + +/* + * get a record from a temporary file. (Used by merge sort.) + */ +int +geteasy(flno, top, filelist, nfiles, rec, end, dummy2) + int flno, top; + struct filelist *filelist; + int nfiles; + RECHEADER *rec; + u_char *end; + struct field *dummy2; +{ + int i; + FILE *fp; + + fp = fstack[flno].fp; + if ((u_char *) rec > end - sizeof(TRECHEADER)) + return (BUFFEND); + if (!fread(rec, 1, sizeof(TRECHEADER), fp)) { + fclose(fp); + fstack[flno].fp = 0; + return (EOF); + } + if (end - rec->data < rec->length) { + for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) + ungetc(*((char *) rec + i), fp); + return (BUFFEND); + } + fread(rec->data, rec->length, 1, fp); + return (0); +} diff --git a/contrib/sort/fsort.c b/contrib/sort/fsort.c new file mode 100644 index 0000000..0847a56 --- /dev/null +++ b/contrib/sort/fsort.c @@ -0,0 +1,358 @@ +/* $NetBSD: fsort.c,v 1.20 2001/05/15 11:49:25 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Read in the next bin. If it fits in one segment sort it; + * otherwise refine it by segment deeper by one character, + * and try again on smaller bins. Sort the final bin at this level + * of recursion to keep the head of fstack at 0. + * After PANIC passes, abort to merge sort. + */ +#include "sort.h" +#include "fsort.h" + +#ifndef lint +__RCSID("$NetBSD: fsort.c,v 1.20 2001/05/15 11:49:25 jdolecek Exp $"); +__SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#include <stdlib.h> +#include <string.h> + +static const u_char **keylist = 0; +u_char *buffer = 0, *linebuf = 0; +size_t bufsize = DEFBUFSIZE; +size_t linebuf_size; +struct tempfile fstack[MAXFCT]; +extern char *toutpath; +#define FSORTMAX 4 +int PANIC = FSORTMAX; + +#define MSTART (MAXFCT - MERGE_FNUM) +#define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1)) + +void +fsort(binno, depth, top, filelist, nfiles, outfp, ftbl) + int binno, depth, top; + struct filelist *filelist; + int nfiles; + FILE *outfp; + struct field *ftbl; +{ + const u_char **keypos; + u_char *bufend, *tmpbuf; + u_char *weights; + int ntfiles, mfct = 0, total, i, maxb, lastb, panic = 0; + int c, nelem, base; + long sizes [NBINS+1]; + get_func_t get; + struct recheader *crec; + struct field tfield[2]; + FILE *prevfp, *tailfp[FSORTMAX+1]; + + memset(tailfp, 0, sizeof(tailfp)); + prevfp = outfp; + memset(tfield, 0, sizeof(tfield)); + if (ftbl[0].flags & R) + tfield[0].weights = Rascii; + else + tfield[0].weights = ascii; + tfield[0].icol.num = 1; + weights = ftbl[0].weights; + if (!buffer) { + buffer = malloc(bufsize); + keylist = malloc(MAXNUM * sizeof(u_char *)); + memset(keylist, 0, MAXNUM * sizeof(u_char *)); + if (!SINGL_FLD) { + linebuf_size = DEFLLEN; + if ((linebuf = malloc(linebuf_size)) == NULL) + errx(2, "cannot allocate memory"); + } + } + bufend = buffer + bufsize; + if (binno >= 0) { + base = top + nfiles; + get = getnext; + } else { + base = 0; + if (SINGL_FLD) + get = makeline; + else + get = makekey; + } + for (;;) { + memset(sizes, 0, sizeof(sizes)); + c = ntfiles = 0; + if (binno == weights[REC_D] && + !(SINGL_FLD && ftbl[0].flags & F)) { /* pop */ + rd_append(weights[REC_D], top, + nfiles, prevfp, buffer, bufend); + break; + } else if (binno == weights[REC_D]) { + depth = 0; /* start over on flat weights */ + ftbl = tfield; + weights = ftbl[0].weights; + } + while (c != EOF) { + keypos = keylist; + nelem = 0; + crec = (RECHEADER *) buffer; + + do_read: + while((c = get(binno, top, filelist, nfiles, crec, + bufend, ftbl)) == 0) { + *keypos++ = crec->data + depth; + if (++nelem == MAXNUM) { + c = BUFFEND; + break; + } + crec =(RECHEADER *) ((char *) crec + + SALIGN(crec->length) + sizeof(TRECHEADER)); + } + + if (c == BUFFEND && nelem < MAXNUM + && bufsize < MAXBUFSIZE) { + const u_char **keyp; + u_char *oldb = buffer; + + /* buffer was too small for data, allocate + * bigger buffer */ + bufsize *= 2; + buffer = realloc(buffer, bufsize); + if (!buffer) { + err(2, "failed to realloc buffer to %ld bytes", + (unsigned long) bufsize); + } + bufend = buffer + bufsize; + + /* patch up keylist[] */ + for(keyp = &keypos[-1]; keyp >= keylist; keyp--) + *keyp = buffer + (*keyp - oldb); + + crec = (RECHEADER *) (buffer + ((u_char *)crec - oldb)); + goto do_read; + } + + if (c != BUFFEND && !ntfiles && !mfct) { + /* do not push */ + continue; + } + + /* push */ + if (panic >= PANIC) { + fstack[MSTART + mfct].fp = ftmp(); + if ((stable_sort) + ? sradixsort(keylist, nelem, + weights, REC_D) + : radixsort(keylist, nelem, + weights, REC_D) ) + err(2, NULL); + append(keylist, nelem, depth, + fstack[MSTART + mfct].fp, putrec, + ftbl); + mfct++; + /* reduce number of open files */ + if (mfct == MERGE_FNUM ||(c == EOF && ntfiles)) { + /* + * Only copy extra incomplete crec + * data if there are any. + */ + int nodata = (bufend >= (u_char *)crec + && bufend <= crec->data); + + if (!nodata) { + tmpbuf = malloc(bufend - + crec->data); + memmove(tmpbuf, crec->data, + bufend - crec->data); + } + + fstack[base + ntfiles].fp = ftmp(); + fmerge(0, MSTART, filelist, + mfct, geteasy, fstack[base].fp, + putrec, ftbl); + ntfiles++; + mfct = 0; + + if (!nodata) { + memmove(crec->data, tmpbuf, + bufend - crec->data); + free(tmpbuf); + } + } + } else { + fstack[base + ntfiles].fp= ftmp(); + onepass(keylist, depth, nelem, sizes, + weights, fstack[base + ntfiles].fp); + ntfiles++; + } + } + if (!ntfiles && !mfct) { /* everything in memory--pop */ + if (nelem > 1 + && ((stable_sort) + ? sradixsort(keylist, nelem, weights, REC_D) + : radixsort(keylist, nelem, weights, REC_D) )) + err(2, NULL); + if (nelem > 0) + append(keylist, nelem, depth, outfp, putline, ftbl); + break; /* pop */ + } + if (panic >= PANIC) { + if (!ntfiles) + fmerge(0, MSTART, filelist, mfct, geteasy, + outfp, putline, ftbl); + else + fmerge(0, base, filelist, ntfiles, geteasy, + outfp, putline, ftbl); + break; + + } + total = maxb = lastb = 0; /* find if one bin dominates */ + for (i = 0; i < NBINS; i++) + if (sizes[i]) { + if (sizes[i] > sizes[maxb]) + maxb = i; + lastb = i; + total += sizes[i]; + } + if (sizes[maxb] < max((total / 2) , BUFSIZE)) + maxb = lastb; /* otherwise pop after last bin */ + fstack[base].lastb = lastb; + fstack[base].maxb = maxb; + + /* start refining next level. */ + getnext(-1, base, NULL, ntfiles, crec, bufend, 0); /* rewind */ + for (i = 0; i < maxb; i++) { + if (!sizes[i]) /* bin empty; step ahead file offset */ + getnext(i, base, NULL,ntfiles, crec, bufend, 0); + else + fsort(i, depth+1, base, filelist, ntfiles, + outfp, ftbl); + } + + get = getnext; + + if (lastb != maxb) { + if (prevfp != outfp) + tailfp[panic] = prevfp; + prevfp = ftmp(); + for (i = maxb+1; i <= lastb; i++) + if (!sizes[i]) + getnext(i, base, NULL, ntfiles, crec, + bufend,0); + else + fsort(i, depth+1, base, filelist, + ntfiles, prevfp, ftbl); + } + + /* sort biggest (or last) bin at this level */ + depth++; + panic++; + binno = maxb; + top = base; + nfiles = ntfiles; /* so overwrite them */ + } + if (prevfp != outfp) { + concat(outfp, prevfp); + fclose(prevfp); + } + for (i = panic; i >= 0; --i) + if (tailfp[i]) { + concat(outfp, tailfp[i]); + fclose(tailfp[i]); + } + + /* If on top level, free our structures */ + if (depth == 0) { + free(keylist), keylist = NULL; + free(buffer), buffer = NULL; + } +} + +/* + * This is one pass of radix exchange, dumping the bins to disk. + */ +#define swap(a, b, t) t = a, a = b, b = t +void +onepass(a, depth, n, sizes, tr, fp) + const u_char **a; + int depth; + long n, sizes[]; + u_char *tr; + FILE *fp; +{ + size_t tsizes[NBINS+1]; + const u_char **bin[257], ***bp, ***bpmax, **top[256], ***tp; + static int histo[256]; + int *hp; + int c; + const u_char **an, *t, **aj; + const u_char **ak, *r; + + memset(tsizes, 0, sizeof(tsizes)); + depth += sizeof(TRECHEADER); + an = &a[n]; + for (ak = a; ak < an; ak++) { + histo[c = tr[**ak]]++; + tsizes[c] += ((const RECHEADER *) (*ak -= depth))->length; + } + + bin[0] = a; + bpmax = bin + 256; + tp = top, hp = histo; + for (bp = bin; bp < bpmax; bp++) { + *tp++ = *(bp+1) = *bp + (c = *hp); + *hp++ = 0; + if (c <= 1) + continue; + } + for (aj = a; aj < an; *aj = r, aj = bin[c+1]) + for (r = *aj; aj < (ak = --top[c = tr[r[depth]]]) ;) + swap(*ak, r, t); + + for (ak = a, c = 0; c < 256; c++) { + an = bin[c+1]; + n = an - ak; + tsizes[c] += n * sizeof(TRECHEADER); + /* tell getnext how many elements in this bin, this segment. */ + EWRITE(&tsizes[c], sizeof(size_t), 1, fp); + sizes[c] += tsizes[c]; + for (; ak < an; ++ak) + putrec((const RECHEADER *) *ak, fp); + } +} diff --git a/contrib/sort/fsort.h b/contrib/sort/fsort.h new file mode 100644 index 0000000..41a7557 --- /dev/null +++ b/contrib/sort/fsort.h @@ -0,0 +1,77 @@ +/* $NetBSD: fsort.h,v 1.9 2001/05/14 21:45:20 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fsort.h 8.1 (Berkeley) 6/6/93 + */ + +#define BUFSIZE (1<<20) +#define MAXNUM 131072 /* low guess at average record count */ +#define BUFFEND (EOF-2) +#define MAXFCT 1000 +#define DEFLLEN 65536 + +/* + * Default (initial) and maximum size of record buffer for fsort(). + * Note that no more than MAXNUM records are stored in the buffer, + * even if the buffer is not full yet. + */ +#define DEFBUFSIZE (1 << 20) /* 1MB */ +#define MAXBUFSIZE (8 << 20) /* 10 MB */ + +/* + * Number of files merge() can merge in one pass. + * This should be power of two so that it's possible to use this value + * for rouding. + */ +#define MERGE_FNUM 16 + +extern u_char *buffer, *linebuf; +extern size_t bufsize, linebuf_size; + +/* temp files in the stack have a file descriptor, a largest bin (maxb) + * which becomes the last non-empty bin (lastb) when the actual largest + * bin is smaller than max(half the total file, BUFSIZE) + * Max_o is the offset of maxb so it can be sought after the other bins + * are sorted. +*/ +struct tempfile { + FILE *fp; + u_char maxb; + u_char lastb; + int max_o; +}; +extern struct tempfile fstack[MAXFCT]; diff --git a/contrib/sort/init.c b/contrib/sort/init.c new file mode 100644 index 0000000..3f5c068 --- /dev/null +++ b/contrib/sort/init.c @@ -0,0 +1,355 @@ +/* $NetBSD: init.c,v 1.6 2001/12/31 18:45:04 thorpej Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" + +#ifndef lint +__RCSID("$NetBSD: init.c,v 1.6 2001/12/31 18:45:04 thorpej Exp $"); +__SCCSID("@(#)init.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#include <ctype.h> +#include <string.h> + +static void insertcol __P((struct field *)); +static const char *setcolumn __P((const char *, struct field *, int)); +int setfield __P((const char *, struct field *, int)); + +extern struct coldesc clist[(ND+1)*2]; +extern int ncols; +u_char gweights[NBINS]; + +/* + * masks of ignored characters. Alltable is 256 ones. + */ +static u_char alltable[NBINS], dtable[NBINS], itable[NBINS]; + +/* + * clist (list of columns which correspond to one or more icol or tcol) + * is in increasing order of columns. + * Fields are kept in increasing order of fields. + */ + +/* + * keep clist in order--inserts a column in a sorted array + */ +static void +insertcol(field) + struct field *field; +{ + int i; + for (i = 0; i < ncols; i++) + if (field->icol.num <= clist[i].num) + break; + if (field->icol.num != clist[i].num) { + memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->icol.num; + ncols++; + } + if (field->tcol.num && field->tcol.num != field->icol.num) { + for (i = 0; i < ncols; i++) + if (field->tcol.num <= clist[i].num) + break; + if (field->tcol.num != clist[i].num) { + memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i)); + clist[i].num = field->tcol.num; + ncols++; + } + } +} + +/* + * matches fields with the appropriate columns--n^2 but who cares? + */ +void +fldreset(fldtab) + struct field *fldtab; +{ + int i; + fldtab[0].tcol.p = clist+ncols-1; + for (++fldtab; fldtab->icol.num; ++fldtab) { + for (i = 0; fldtab->icol.num != clist[i].num; i++) + ; + fldtab->icol.p = clist + i; + if (!fldtab->tcol.num) + continue; + for (i = 0; fldtab->tcol.num != clist[i].num; i++) + ; + fldtab->tcol.p = clist + i; + } +} + +/* + * interprets a column in a -k field + */ +static const char * +setcolumn(pos, cur_fld, gflag) + const char *pos; + struct field *cur_fld; + int gflag; +{ + struct column *col; + int tmp; + col = cur_fld->icol.num ? (&(*cur_fld).tcol) : (&(*cur_fld).icol); + pos += sscanf(pos, "%d", &(col->num)); + while (isdigit(*pos)) + pos++; + if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol))) + errx(2, "field numbers must be positive"); + if (*pos == '.') { + if (!col->num) + errx(2, "cannot indent end of line"); + ++pos; + pos += sscanf(pos, "%d", &(col->indent)); + while (isdigit(*pos)) + pos++; + if (&cur_fld->icol == col) + col->indent--; + if (col->indent < 0) + errx(2, "illegal offset"); + } + if (optval(*pos, cur_fld->tcol.num)) + while ((tmp = optval(*pos, cur_fld->tcol.num))) { + cur_fld->flags |= tmp; + pos++; + } + if (cur_fld->icol.num == 0) + cur_fld->icol.num = 1; + return (pos); +} + +int +setfield(pos, cur_fld, gflag) + const char *pos; + struct field *cur_fld; + int gflag; +{ + static int nfields = 0; + int tmp; + + if (++nfields == ND) + errx(2, "too many sort keys. (Limit is %d)", ND-1); + + cur_fld->weights = ascii; + cur_fld->mask = alltable; + + pos = setcolumn(pos, cur_fld, gflag); + if (*pos == '\0') /* key extends to EOL. */ + cur_fld->tcol.num = 0; + else { + if (*pos != ',') + errx(2, "illegal field descriptor"); + setcolumn((++pos), cur_fld, gflag); + } + if (!cur_fld->flags) + cur_fld->flags = gflag; + tmp = cur_fld->flags; + + /* + * Assign appropriate mask table and weight table. + * If the global weights are reversed, the local field + * must be "re-reversed". + */ + if (((tmp & R) ^ (gflag & R)) && (tmp & F)) + cur_fld->weights = RFtable; + else if (tmp & F) + cur_fld->weights = Ftable; + else if ((tmp & R) ^ (gflag & R)) + cur_fld->weights = Rascii; + + if (tmp & I) + cur_fld->mask = itable; + else if (tmp & D) + cur_fld->mask = dtable; + + cur_fld->flags |= (gflag & (BI | BT)); + if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */ + cur_fld->flags &= ~BT; + + if (cur_fld->tcol.num && !(!(cur_fld->flags & BI) + && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num + && cur_fld->tcol.indent < cur_fld->icol.indent)) + errx(2, "fields out of order"); + insertcol(cur_fld); + return (cur_fld->tcol.num); +} + +int +optval(desc, tcolflag) + int desc, tcolflag; +{ + switch(desc) { + case 'b': + if (!tcolflag) + return (BI); + else + return (BT); + case 'd': return (D); + case 'f': return (F); + case 'i': return (I); + case 'n': return (N); + case 'r': return (R); + default: return (0); + } +} + +void +fixit(argc, argv) + int *argc; + char **argv; +{ + int i, j, v, w, x; + static char vbuf[ND*20], *vpos, *tpos; + vpos = vbuf; + + for (i = 1; i < *argc; i++) { + if (argv[i][0] == '+') { + tpos = argv[i]+1; + argv[i] = vpos; + vpos += sprintf(vpos, "-k"); + tpos += sscanf(tpos, "%d", &v); + while (isdigit(*tpos)) + tpos++; + vpos += sprintf(vpos, "%d", v+1); + if (*tpos == '.') { + ++tpos; + tpos += sscanf(tpos, "%d", &x); + vpos += sprintf(vpos, ".%d", x+1); + } + while (*tpos) + *vpos++ = *tpos++; + vpos += sprintf(vpos, ","); + if (argv[i+1] && + argv[i+1][0] == '-' && isdigit(argv[i+1][1])) { + tpos = argv[i+1] + 1; + tpos += sscanf(tpos, "%d", &w); + while (isdigit(*tpos)) + tpos++; + x = 0; + if (*tpos == '.') { + ++tpos; + tpos += sscanf(tpos, "%d", &x); + while (isdigit(*tpos)) + tpos++; + } + if (x) { + vpos += sprintf(vpos, "%d", w+1); + vpos += sprintf(vpos, ".%d", x); + } else + vpos += sprintf(vpos, "%d", w); + while (*tpos) + *vpos++ = *tpos++; + for (j= i+1; j < *argc; j++) + argv[j] = argv[j+1]; + *argc -= 1; + } + } + } +} + +/* + * ascii, Rascii, Ftable, and RFtable map + * REC_D -> REC_D; {not REC_D} -> {not REC_D}. + * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}. + * Note: when sorting in forward order, to encode character zero in a key, + * use \001\001; character 1 becomes \001\002. In this case, character 0 + * is reserved for the field delimiter. Analagously for -r (fld_d = 255). + * Note: this is only good for ASCII sorting. For different LC 's, + * all bets are off. See also num_init in number.c + */ +void +settables(gflags) + int gflags; +{ + u_char *wts; + int i, incr; + for (i=0; i < 256; i++) { + ascii[i] = i; + if (i > REC_D && i < 255 - REC_D+1) + Rascii[i] = 255 - i + 1; + else + Rascii[i] = 255 - i; + if (islower(i)) { + Ftable[i] = Ftable[toupper(i)]; + RFtable[i] = RFtable[toupper(i)]; + } else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) { + Ftable[i] = i + 1; + RFtable[i] = Rascii[i] - 1; + } else { + Ftable[i] = i; + RFtable[i] = Rascii[i]; + } + alltable[i] = 1; + + if (i == '\n' || isprint(i)) + itable[i] = 1; + else + itable[i] = 0; + + if (i == '\n' || i == '\t' || i == ' ' || isalnum(i)) + dtable[i] = 1; + else + dtable[i] = 0; + } + + Rascii[REC_D] = RFtable[REC_D] = REC_D; + if (isupper(REC_D)) + Ftable[tolower(REC_D)]++; + + if ((gflags & R) && !((gflags & F) && SINGL_FLD)) + wts = Rascii; + else if (!((gflags & F) && SINGL_FLD)) + wts = ascii; + else if (gflags & R) + wts = RFtable; + else + wts = Ftable; + + memmove(gweights, wts, sizeof(gweights)); + incr = (gflags & R) ? -1 : 1; + for (i = 0; i < REC_D; i++) + gweights[i] += incr; + gweights[REC_D] = ((gflags & R) ? 255 : 0); + if (SINGL_FLD && (gflags & F)) { + for (i = 0; i < REC_D; i++) { + ascii[i] += incr; + Rascii[i] += incr; + } + ascii[REC_D] = Rascii[REC_D] = gweights[REC_D]; + } +} diff --git a/contrib/sort/msort.c b/contrib/sort/msort.c new file mode 100644 index 0000000..8a2500c --- /dev/null +++ b/contrib/sort/msort.c @@ -0,0 +1,390 @@ +/* $NetBSD: msort.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "sort.h" +#include "fsort.h" + +#ifndef lint +__RCSID("$NetBSD: msort.c,v 1.10 2001/02/19 20:50:17 jdolecek Exp $"); +__SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +/* Subroutines using comparisons: merge sort and check order */ +#define DELETE (1) + +typedef struct mfile { + u_char *end; + short flno; + struct recheader rec[1]; +} MFILE; + +static u_char *wts, *wts1 = NULL; + +static int cmp __P((RECHEADER *, RECHEADER *)); +static int insert __P((struct mfile **, struct mfile **, int, int)); + +void +fmerge(binno, top, filelist, nfiles, get, outfp, fput, ftbl) + int binno, top; + struct filelist *filelist; + int nfiles; + get_func_t get; + FILE *outfp; + put_func_t fput; + struct field *ftbl; +{ + FILE *tout; + int i, j, last; + put_func_t put; + struct tempfile *l_fstack; + + wts = ftbl->weights; + if (!UNIQUE && SINGL_FLD && ftbl->flags & F) + wts1 = (ftbl->flags & R) ? Rascii : ascii; + + if (!buffer) { + buffer = malloc(bufsize); + if (!buffer) + err(2, "fmerge(): realloc"); + + if (!linebuf && !SINGL_FLD) { + linebuf_size = DEFLLEN; + linebuf = malloc(linebuf_size); + } + } + + if (binno >= 0) + l_fstack = fstack + top; + else + l_fstack = fstack; + + while (nfiles) { + put = putrec; + for (j = 0; j < nfiles; j += MERGE_FNUM) { + if (nfiles <= MERGE_FNUM) { + tout = outfp; + put = fput; + } + else + tout = ftmp(); + last = min(MERGE_FNUM, nfiles - j); + if (binno < 0) { + for (i = 0; i < last; i++) + if (!(l_fstack[i+MAXFCT-1-MERGE_FNUM].fp = + fopen(filelist->names[j+i], "r"))) + err(2, "%s", + filelist->names[j+i]); + merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl); + } else { + for (i = 0; i< last; i++) + rewind(l_fstack[i+j].fp); + merge(top+j, last, get, tout, put, ftbl); + } + if (nfiles > MERGE_FNUM) + l_fstack[j/MERGE_FNUM].fp = tout; + } + nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM; + if (nfiles == 1) + nfiles = 0; + if (binno < 0) { + binno = 0; + get = geteasy; + top = 0; + } + } +} + +void +merge(infl0, nfiles, get, outfp, put, ftbl) + int infl0, nfiles; + get_func_t get; + put_func_t put; + FILE *outfp; + struct field *ftbl; +{ + int c, i, j, nf = nfiles; + struct mfile *flist[MERGE_FNUM], *cfile; + size_t availsz = bufsize; + static void *bufs[MERGE_FNUM+1]; + static size_t bufs_sz[MERGE_FNUM+1]; + + /* + * We need nfiles + 1 buffers. One is 'buffer', the + * rest needs to be allocated. + */ + bufs[0] = buffer; + bufs_sz[0] = bufsize; + for(i=1; i < nfiles+1; i++) { + if (bufs[i]) + continue; + + bufs[i] = malloc(DEFLLEN); + if (!bufs[i]) + err(2, "merge(): realloc"); + bufs_sz[i] = DEFLLEN; + } + + for (i = j = 0; i < nfiles; i++) { + cfile = (struct mfile *) bufs[j]; + cfile->flno = infl0 + j; + cfile->end = (u_char *) bufs[j] + bufs_sz[j]; + for (c = 1; c == 1;) { + if (EOF == (c = get(cfile->flno, 0, NULL, nfiles, + cfile->rec, cfile->end, ftbl))) { + --i; + --nfiles; + break; + } + + if (c == BUFFEND) { + cfile = realloc(bufs[j], bufs_sz[j] *= 2); + bufs[j] = (void *) cfile; + + if (!cfile) + err(2, "merge(): realloc"); + + cfile->end = (u_char *)cfile + bufs_sz[j]; + + c = 1; + continue; + } + + if (i) + c = insert(flist, &cfile, i, !DELETE); + else + flist[0] = cfile; + } + j++; + } + + cfile = (struct mfile *) bufs[nf]; + cfile->flno = flist[0]->flno; + cfile->end = (u_char *) cfile + bufs_sz[nf]; + while (nfiles) { + for (c = 1; c == 1;) { + if (EOF == (c = get(cfile->flno, 0, NULL, nfiles, + cfile->rec, cfile->end, ftbl))) { + put(flist[0]->rec, outfp); + memmove(flist, flist + 1, + sizeof(MFILE *) * (--nfiles)); + cfile->flno = flist[0]->flno; + break; + } + if (c == BUFFEND) { + char *oldbuf = (char *) cfile; + availsz = (char *) cfile->end - oldbuf; + availsz *= 2; + cfile = realloc(oldbuf, availsz); + for(i=0; i < nf+1; i++) { + if (bufs[i] == oldbuf) { + bufs[i] = (char *)cfile; + bufs_sz[i] = availsz; + break; + } + } + + if (!cfile) + err(2, "merge: realloc"); + + cfile->end = (u_char *)cfile + availsz; + c = 1; + continue; + } + + if (!(c = insert(flist, &cfile, nfiles, DELETE))) + put(cfile->rec, outfp); + } + } + + if (bufs_sz[0] > bufsize) { + buffer = bufs[0]; + bufsize = bufs_sz[0]; + } +} + +/* + * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec; + * otherwise just inserts *rec in flist. + */ +static int +insert(flist, rec, ttop, delete) + struct mfile **flist, **rec; + int delete, ttop; /* delete = 0 or 1 */ +{ + struct mfile *tmprec = *rec; + int mid, top = ttop, bot = 0, cmpv = 1; + + for (mid = top/2; bot +1 != top; mid = (bot+top)/2) { + cmpv = cmp(tmprec->rec, flist[mid]->rec); + if (cmpv < 0) + top = mid; + else if (cmpv > 0) + bot = mid; + else { + if (UNIQUE) + break; + + if (stable_sort) { + /* + * Apply sort by fileno, to give priority + * to earlier specified files, hence providing + * more stable sort. + * If fileno is same, the new record should + * be put _after_ the previous entry. + */ + cmpv = tmprec->flno - flist[mid]->flno; + if (cmpv >= 0) + bot = mid; + else /* cmpv == 0 */ + bot = mid - 1; + } else { + /* non-stable sort */ + bot = mid - 1; + } + + break; + } + } + + if (delete) { + if (UNIQUE) { + if (!bot && cmpv) + cmpv = cmp(tmprec->rec, flist[0]->rec); + if (!cmpv) + return (1); + } + tmprec = flist[0]; + if (bot) + memmove(flist, flist+1, bot * sizeof(MFILE **)); + flist[bot] = *rec; + *rec = tmprec; + (*rec)->flno = flist[0]->flno; + return (0); + } else { + if (!bot && !(UNIQUE && !cmpv)) { + cmpv = cmp(tmprec->rec, flist[0]->rec); + if (cmpv < 0) + bot = -1; + } + if (UNIQUE && !cmpv) + return (1); + bot++; + memmove(flist + bot+1, flist + bot, + (ttop - bot) * sizeof(MFILE **)); + flist[bot] = *rec; + return (0); + } +} + +/* + * check order on one file + */ +void +order(filelist, get, ftbl) + struct filelist *filelist; + get_func_t get; + struct field *ftbl; +{ + u_char *crec_end, *prec_end, *trec_end; + int c; + RECHEADER *crec, *prec, *trec; + + if (!SINGL_FLD) + linebuf = malloc(DEFLLEN); + buffer = malloc(2 * (DEFLLEN + sizeof(TRECHEADER))); + crec = (RECHEADER *) buffer; + crec_end = buffer + DEFLLEN + sizeof(TRECHEADER); + prec = (RECHEADER *) (buffer + DEFLLEN + sizeof(TRECHEADER)); + prec_end = buffer + 2*(DEFLLEN + sizeof(TRECHEADER)); + wts = ftbl->weights; + if (SINGL_FLD && (ftbl->flags & F)) + wts1 = (ftbl->flags & R) ? Rascii : ascii; + else + wts1 = NULL; + if (0 == get(-1, 0, filelist, 1, prec, prec_end, ftbl)) + while (0 == get(-1, 0, filelist, 1, crec, crec_end, ftbl)) { + if (0 < (c = cmp(prec, crec))) { + crec->data[crec->length-1] = 0; + errx(1, "found disorder: %s", crec->data+crec->offset); + } + if (UNIQUE && !c) { + crec->data[crec->length-1] = 0; + errx(1, "found non-uniqueness: %s", + crec->data+crec->offset); + } + /* + * Swap pointers so that this record is on place pointed + * to by prec and new record is read to place pointed to by + * crec. + */ + trec = prec; + prec = crec; + crec = trec; + trec_end = prec_end; + prec_end = crec_end; + crec_end = trec_end; + } + exit(0); +} + +static int +cmp(rec1, rec2) + RECHEADER *rec1, *rec2; +{ + int r; + u_char *pos1, *pos2, *end; + u_char *cwts; + for (cwts = wts; cwts; cwts = (cwts == wts1 ? NULL : wts1)) { + pos1 = rec1->data; + pos2 = rec2->data; + if (!SINGL_FLD && (UNIQUE || stable_sort)) + end = pos1 + min(rec1->offset, rec2->offset); + else + end = pos1 + min(rec1->length, rec2->length); + + for (; pos1 < end; ) { + if ((r = cwts[*pos1++] - cwts[*pos2++])) + return (r); + } + } + return (0); +} diff --git a/contrib/sort/pathnames.h b/contrib/sort/pathnames.h new file mode 100644 index 0000000..4f08e63 --- /dev/null +++ b/contrib/sort/pathnames.h @@ -0,0 +1,41 @@ +/* $NetBSD: pathnames.h,v 1.3 2000/10/07 20:37:06 bjh21 Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)pathnames.h 8.1 (Berkeley) 6/6/93 + */ + +#define _PATH_STDIN "/dev/stdin" diff --git a/contrib/sort/sort.1 b/contrib/sort/sort.1 new file mode 100644 index 0000000..c4a782b --- /dev/null +++ b/contrib/sort/sort.1 @@ -0,0 +1,441 @@ +.\" $NetBSD: sort.1,v 1.18 2002/02/08 01:36:33 ross Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)sort.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd January 13, 2001 +.Dt SORT 1 +.Os +.Sh NAME +.Nm sort +.Nd sort or merge text files +.Sh SYNOPSIS +.Nm sort +.Op Fl cmubdfHinrsS +.Op Fl t Ar char +.Op Fl R Ar char +.Oo +.Fl k +.Ar field1 Ns Op Li \&, Ns Ar field2 +.Oc +.Op Fl T Ar dir +.Op Fl o Ar output +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility sorts text files by lines. +Comparisons are based on one or more sort keys extracted +from each line of input, and are performed lexicographically. +By default, if keys are not given, +.Nm +regards each input line as a single field. +.Pp +The following options are available: +.Bl -tag -width Fl +.It Fl c +Check that the single input file is sorted. +If the file is not sorted, +.Nm +produces the appropriate error messages and exits with code 1; otherwise, +.Nm +returns 0. +.Nm +.Fl c +produces no output. +.It Fl m +Merge only; the input files are assumed to be pre-sorted. +.It Fl o Ar output +The argument given is the name of an +.Ar output +file to be used instead of the standard output. +This file can be the same as one of the input files. +.It Fl T Ar dir +Use +.Ar dir +as the directory for temporary files. +The default is the value specified in the environment variable +.Ev TMPDIR or +.Pa /tmp +if +.Ev TMPDIR +is not defined. +.It Fl u +Unique: suppress all but one in each set of lines having equal keys. +If used with the +.Fl c +option, check that there are no lines with duplicate keys. +.El +.Pp +The following options override the default ordering rules. +When ordering options appear independent of key field +specifications, the requested field ordering rules are +applied globally to all sort keys. +When attached to a specific key (see +.Fl k ) , +the ordering options override +all global ordering options for that key. +.Bl -tag -width Fl +.It Fl d +Only blank space and alphanumeric characters +.\" according +.\" to the current setting of LC_CTYPE +are used +in making comparisons. +.It Fl f +Considers all lowercase characters that have uppercase +equivalents to be the same for purposes of comparison. +.It Fl i +Ignore all non-printable characters. +.It Fl n +An initial numeric string, consisting of optional blank space, optional +minus sign, and zero or more digits (including decimal point) +.\" with +.\" optional radix character and thousands +.\" separator +.\" (as defined in the current locale), +is sorted by arithmetic value. +(The +.Fl n +option no longer implies the +.Fl b +option.) +.It Fl r +Reverse the sense of comparisons. +.It Fl S +Don't use stable sort. +Default is to use stable sort. +.It Fl s +Use stable sort. +This is the default. +Provided for compatiblity with other +.Nm +implementations only. +.It Fl H +Use a merge sort instead of a radix sort. +This option should be used for files larger than 60Mb. +.El +.Pp +The treatment of field separators can be altered using these options: +.Bl -tag -width Fl +.It Fl b +Ignores leading blank space when determining the start +and end of a restricted sort key. +A +.Fl b +option specified before the first +.Fl k +option applies globally to all +.Fl k +options. +Otherwise, the +.Fl b +option can be attached independently to each +.Ar field +argument of the +.Fl k +option (see below). +Note that the +.Fl b +option has no effect unless key fields are specified. +.It Fl t Ar char +.Ar char +is used as the field separator character. +The initial +.Ar char +is not considered to be part of a field when determining +key offsets (see below). +Each occurrence of +.Ar char +is significant (for example, +.Dq Ar charchar +delimits an empty field). +If +.Fl t +is not specified, the default field separator is a sequence of +blank-space characters, and consecutive blank spaces do +.Em not +delimit an empty field; further, the initial blank space +.Em is +considered part of a field when determining key offsets. +.It Fl R Ar char +.Ar char +is used as the record separator character. +This should be used with discretion; +.Fl R Ar \*[Lt]alphanumeric\*[Gt] +usually produces undesirable results. +The default record separator is newline. +.It Xo +.Fl k +.Ar field1 Ns Op Li \&, Ns Ar field2 +.Xc +Designates the starting position, +.Ar field1 , +and optional ending position, +.Ar field2 , +of a key field. +The +.Fl k +option replaces the obsolescent options +.Cm \(pl Ns Ar pos1 +and +.Fl Ns Ar pos2 . +.El +.Pp +The following operands are available: +.Bl -tag -width Ar +.It Ar file +The pathname of a file to be sorted, merged, or checked. +If no +.Ar file +operands are specified, or if +a +.Ar file +operand is +.Fl , +the standard input is used. +.El +.Pp +A field is defined as a minimal sequence of characters followed by a +field separator or a newline character. +By default, the first +blank space of a sequence of blank spaces acts as the field separator. +All blank spaces in a sequence of blank spaces are considered +as part of the next field; for example, all blank spaces at +the beginning of a line are considered to be part of the +first field. +.Pp +Fields are specified +by the +.Fl k +.Ar field1 Ns Op \&, Ns Ar field2 +argument. +A missing +.Ar field2 +argument defaults to the end of a line. +.Pp +The arguments +.Ar field1 +and +.Ar field2 +have the form +.Ar m Ns Li \&. Ns Ar n +and can be followed by one or more of the letters +.Cm b , d , f , i , +.Cm n , +and +.Cm r , +which correspond to the options discussed above. +A +.Ar field1 +position specified by +.Ar m Ns Li \&. Ns Ar n +.Pq Ar m , n No \*[Gt] 0 +is interpreted as the +.Ar n Ns th +character in the +.Ar m Ns th +field. +A missing +.Li \&. Ns Ar n +in +.Ar field1 +means +.Ql \&.1 , +indicating the first character of the +.Ar m Ns th +field; if the +.Fl b +option is in effect, +.Ar n +is counted from the first non-blank character in the +.Ar m Ns th +field; +.Ar m Ns Li \&.1b +refers to the first non-blank character in the +.Ar m Ns th +field. +.Pp +A +.Ar field2 +position specified by +.Ar m Ns Li \&. Ns Ar n +is interpreted as +the +.Ar n Ns th +character (including separators) of the +.Ar m Ns th +field. +A missing +.Li \&. Ns Ar n +indicates the last character of the +.Ar m Ns th +field; +.Ar m += \&0 +designates the end of a line. +Thus the option +.Fl k +.Sm off +.Xo +.Ar v Li \&. Ar x Li \&, +.Ar w Li \&. Ar y +.Xc +.Sm on +is synonymous with the obsolescent option +.Sm off +.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1 +.Fl Ar w-\&1 Li \&. Ar y ; +.Sm on +when +.Ar y +is omitted, +.Fl k +.Sm off +.Ar v Li \&. Ar x Li \&, Ar w +.Sm on +is synonymous with +.Sm off +.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1 +.Fl Ar w+1 Li \&.0 . +.Sm on +The obsolescent +.Cm \(pl Ns Ar pos1 +.Fl Ns Ar pos2 +option is still supported, except for +.Fl Ns Ar w Ns Li \&.0b , +which has no +.Fl k +equivalent. +.Sh RETURN VALUES +Sort exits with one of the following values: +.Bl -tag -width flag -compact +.It 0 +Normal behavior. +.It 1 +On disorder (or non-uniqueness) with the +.Fl c +option +.It 2 +An error occurred. +.El +.Sh ENVIRONMENT +If the following environment variable exists, it is utilized by +.Nm "" . +.Bl -tag -width Ev +.It Ev TMPDIR +.Nm +uses the contents of the +.Ev TMPDIR +environment variable as the path in which to store +temporary files. +.El +.Sh FILES +.Bl -tag -width outputNUMBER+some -compact +.It Pa /tmp/sort.* +Default temporary files. +.It Pa Ar output Ns NUMBER +Temporary file which is used for output if +.Ar output +already exists. +Once sorting is finished, this file replaces +.Ar output +(via +.Xr link 2 +and +.Xr unlink 2 ) . +.El +.Sh SEE ALSO +.Xr comm 1 , +.Xr join 1 , +.Xr uniq 1 , +.Xr qsort 3 , +.Xr radixsort 3 +.Sh HISTORY +A +.Nm +command appeared in +.At v5 . +This +.Nm +implementation appeared in +.Bx 4.4 +and is used since +.Nx 1.6 . +.Sh BUGS +To sort files larger than 60Mb, use +.Nm +.Fl H ; +files larger than 704Mb must be sorted in smaller pieces, then merged. +.Sh NOTES +This +.Nm +has no limits on input line length (other than imposed by available +memory) or any restrictions on bytes allowed within lines. +.Pp +To protect data +.Nm +.Fl o +calls +.Xr link 2 +and +.Xr unlink 2 , +and thus fails on protected directories. +.Pp +Input files should be text files. +If file doesn't end with record separator (which is typically newline), the +.Nm +utility silently supplies one. +.Pp +The current +.Nm +uses lexicographic radix sorting, which requires +that sort keys be kept in memory (as opposed to previous versions which used quick +and merge sorts and did not.) +Thus performance depends highly on efficient choice of sort keys, and the +.Fl b +option and the +.Ar field2 +argument of the +.Fl k +option should be used whenever possible. +Similarly, +.Nm +.Fl k1f +is equivalent to +.Nm +.Fl f +and may take twice as long. diff --git a/contrib/sort/sort.c b/contrib/sort/sort.c new file mode 100644 index 0000000..a119b7d --- /dev/null +++ b/contrib/sort/sort.c @@ -0,0 +1,332 @@ +/* $NetBSD: sort.c,v 1.27 2001/05/14 21:52:21 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* Sort sorts a file using an optional user-defined key. + * Sort uses radix sort for internal sorting, and allows + * a choice of merge sort and radix sort for external sorting. + */ + +#include "sort.h" +#include "fsort.h" +#include "pathnames.h" + +#ifndef lint +__COPYRIGHT("@(#) Copyright (c) 1993\n\ + The Regents of the University of California. All rights reserved.\n"); +#endif /* not lint */ + +#ifndef lint +__RCSID("$NetBSD: sort.c,v 1.27 2001/05/14 21:52:21 jdolecek Exp $"); +__SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> + +#include <paths.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <locale.h> + +int REC_D = '\n'; +u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ +/* + * weight tables. Gweights is one of ascii, Rascii.. + * modified to weight rec_d = 0 (or 255) + */ +u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; +int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; +struct coldesc clist[(ND+1)*2]; +int ncols = 0; +extern struct coldesc clist[(ND+1)*2]; +extern int ncols; + +/* + * Default to stable sort. + */ +int stable_sort = 1; + +char toutpath[MAXPATHLEN]; + +const char *tmpdir; /* where temporary files should be put */ + +static void cleanup __P((void)); +static void onsignal __P((int)); +static void usage __P((const char *)); +static void many_files __P((void)); + +int main __P((int argc, char **argv)); + +int +main(argc, argv) + int argc; + char *argv[]; +{ + get_func_t get; + int ch, i, stdinflag = 0, tmp = 0; + char cflag = 0, mflag = 0; + char *outfile, *outpath = 0; + struct field fldtab[ND+2], *ftpos; + struct filelist filelist; + FILE *outfp = NULL; + + setlocale(LC_ALL, ""); + + memset(fldtab, 0, (ND+2)*sizeof(struct field)); + memset(d_mask, 0, NBINS); + d_mask[REC_D = '\n'] = REC_D_F; + SINGL_FLD = SEP_FLAG = 0; + d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; + ftpos = fldtab; + many_files(); + + fixit(&argc, argv); + if (!(tmpdir = getenv("TMPDIR"))) + tmpdir = _PATH_TMP; + + while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) { + switch (ch) { + case 'b': + fldtab->flags |= BI | BT; + break; + case 'c': + cflag = 1; + break; + case 'd': case 'f': case 'i': case 'n': case 'r': + tmp |= optval(ch, 0); + if ((tmp & R) && (tmp & F)) + fldtab->weights = RFtable; + else if (tmp & F) + fldtab->weights = Ftable; + else if (tmp & R) + fldtab->weights = Rascii; + fldtab->flags |= tmp; + break; + case 'H': + PANIC = 0; + break; + case 'k': + setfield(optarg, ++ftpos, fldtab->flags); + break; + case 'm': + mflag = 1; + break; + case 'o': + outpath = optarg; + break; + case 's': + /* for GNU sort compatibility (this is our default) */ + stable_sort = 1; + break; + case 'S': + stable_sort = 0; + break; + case 't': + if (SEP_FLAG) + usage("multiple field delimiters"); + SEP_FLAG = 1; + d_mask[' '] &= ~FLD_D; + d_mask['\t'] &= ~FLD_D; + d_mask[(u_char)*optarg] |= FLD_D; + if (d_mask[(u_char)*optarg] & REC_D_F) + errx(2, "record/field delimiter clash"); + break; + case 'R': + if (REC_D != '\n') + usage("multiple record delimiters"); + if ('\n' == (REC_D = *optarg)) + break; + d_mask['\n'] = d_mask[' ']; + d_mask[REC_D] = REC_D_F; + break; + case 'T': + /* -T tmpdir */ + tmpdir = optarg; + break; + case 'u': + UNIQUE = 1; + break; + case '?': + default: + usage(NULL); + } + } + if (cflag && argc > optind+1) + errx(2, "too many input files for -c option"); + if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { + outpath = argv[argc-1]; + argc -= 2; + } + if (mflag && argc - optind > (MAXFCT - (16+1))*16) + errx(2, "too many input files for -m option"); + for (i = optind; i < argc; i++) { + /* allow one occurrence of /dev/stdin */ + if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { + if (stdinflag) + warnx("ignoring extra \"%s\" in file list", + argv[i]); + else + stdinflag = 1; + + /* change to /dev/stdin if '-' */ + if (argv[i][0] == '-') + argv[i] = _PATH_STDIN; + + } else if ((ch = access(argv[i], R_OK))) + err(2, "%s", argv[i]); + } + if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { + SINGL_FLD = 1; + fldtab[0].icol.num = 1; + } else { + if (!fldtab[1].icol.num) { + fldtab[0].flags &= ~(BI|BT); + setfield("1", ++ftpos, fldtab->flags); + } + fldreset(fldtab); + fldtab[0].flags &= ~F; + } + settables(fldtab[0].flags); + num_init(); + fldtab->weights = gweights; + if (optind == argc) { + static const char * const names[] = { _PATH_STDIN, NULL }; + + filelist.names = names; + optind--; + } else + filelist.names = (const char * const *) &argv[optind]; + + if (SINGL_FLD) + get = makeline; + else + get = makekey; + + if (cflag) { + order(&filelist, get, fldtab); + /* NOT REACHED */ + } + if (!outpath) { + (void)snprintf(toutpath, + sizeof(toutpath), "%sstdout", _PATH_DEV); + outfile = outpath = toutpath; + outfp = stdout; + } else if (!(ch = access(outpath, 0)) && + strncmp(_PATH_DEV, outpath, 5)) { + static const struct sigaction act = + { onsignal, {{0}}, SA_RESTART | SA_RESETHAND }; + static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, + SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; + int outfd; + errno = 0; + if (access(outpath, W_OK)) + err(2, "%s", outpath); + (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", + outpath); + if ((outfd = mkstemp(toutpath)) == -1) + err(2, "Cannot create temporary file `%s'", toutpath); + if ((outfp = fdopen(outfd, "w")) == NULL) + err(2, "Cannot open temporary file `%s'", toutpath); + outfile = toutpath; + (void)atexit(cleanup); + for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ + sigaction(sigtable[i], &act, 0); + } else + outfile = outpath; + + if (outfp == NULL && (outfp = fopen(outfile, "w")) == NULL) + err(2, "output file %s", outfile); + + if (mflag) { + fmerge(-1, 0, &filelist, argc-optind, get, outfp, putline, + fldtab); + } else + fsort(-1, 0, 0, &filelist, argc-optind, outfp, fldtab); + + if (outfile != outpath) { + if (access(outfile, 0)) + err(2, "%s", outfile); + (void)unlink(outpath); + if (link(outfile, outpath)) + err(2, "cannot link %s: output left in %s", + outpath, outfile); + (void)unlink(outfile); + } + exit(0); +} + +static void +onsignal(sig) + int sig; +{ + cleanup(); +} + +static void +cleanup() +{ + if (toutpath[0]) + (void)unlink(toutpath); +} + +static void +usage(msg) + const char *msg; +{ + if (msg != NULL) + (void)fprintf(stderr, "sort: %s\n", msg); + (void)fprintf(stderr, "usage: [-o output] [-cmubdfinrsS] [-t char] "); + (void)fprintf(stderr, "[-R char] [-k keydef] ... [files]\n"); + exit(2); +} + +static void +many_files() +{ +#if 0 + struct rlimit rlp_many_files[1]; + + if (getrlimit(RLIMIT_NOFILE, rlp_many_files) == 0) { + rlp_many_files->rlim_cur = rlp_many_files->rlim_max; + setrlimit(RLIMIT_NOFILE, rlp_many_files); + } +#endif +} diff --git a/contrib/sort/sort.h b/contrib/sort/sort.h new file mode 100644 index 0000000..a8f2c4b --- /dev/null +++ b/contrib/sort/sort.h @@ -0,0 +1,149 @@ +/* $NetBSD: sort.h,v 1.12 2001/02/19 19:31:29 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)sort.h 8.1 (Berkeley) 6/6/93 + */ + +#include <sys/param.h> + +#include <db.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#define NBINS 256 +#define MAXMERGE 16 + +/* values for masks, weights, and other flags. */ +#define I 1 /* mask out non-printable characters */ +#define D 2 /* sort alphanumeric characters only */ +#define N 4 /* Field is a number */ +#define F 8 /* weight lower and upper case the same */ +#define R 16 /* Field is reversed with respect to the global weight */ +#define BI 32 /* ignore blanks in icol */ +#define BT 64 /* ignore blanks in tcol */ + +/* masks for delimiters: blanks, fields, and termination. */ +#define BLANK 1 /* ' ', '\t'; '\n' if -T is invoked */ +#define FLD_D 2 /* ' ', '\t' default; from -t otherwise */ +#define REC_D_F 4 /* '\n' default; from -T otherwise */ + +#define ND 10 /* limit on number of -k options. */ + +#define min(a, b) ((a) < (b) ? (a) : (b)) +#define max(a, b) ((a) > (b) ? (a) : (b)) + +#define FCLOSE(file) { \ + if (EOF == fclose(file)) \ + err(2, "%p", file); \ +} + +#define EWRITE(ptr, size, n, f) { \ + if (!fwrite(ptr, size, n, f)) \ + err(2, NULL); \ +} + +/* length of record is currently limited to maximum string length (size_t) */ +typedef size_t length_t; + +/* a record is a key/line pair starting at rec.data. It has a total length + * and an offset to the start of the line half of the pair. + */ +typedef struct recheader { + length_t length; + length_t offset; + u_char data[1]; +} RECHEADER; + +typedef struct trecheader { + length_t length; + length_t offset; +} TRECHEADER; + +/* This is the column as seen by struct field. It is used by enterfield. + * They are matched with corresponding coldescs during initialization. + */ +struct column { + struct coldesc *p; + int num; + int indent; +}; + +/* a coldesc has a number and pointers to the beginning and end of the + * corresponding column in the current line. This is determined in enterkey. + */ +typedef struct coldesc { + u_char *start; + u_char *end; + int num; +} COLDESC; + +/* A field has an initial and final column; an omitted final column + * implies the end of the line. Flags regulate omission of blanks and + * numerical sorts; mask determines which characters are ignored (from -i, -d); + * weights determines the sort weights of a character (from -f, -r). + */ +struct field { + struct column icol; + struct column tcol; + u_int flags; + u_char *mask; + u_char *weights; +}; + +struct filelist { + const char * const * names; +}; + +typedef int (*get_func_t) __P((int, int, struct filelist *, int, + RECHEADER *, u_char *, struct field *)); +typedef void (*put_func_t) __P((const struct recheader *, FILE *)); + +extern int PANIC; /* maximum depth of fsort before fmerge is called */ +extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; +extern u_char d_mask[NBINS]; +extern int SINGL_FLD, SEP_FLAG, UNIQUE; +extern int REC_D; +extern const char *tmpdir; +extern int stable_sort; +extern u_char gweights[NBINS]; + +#include "extern.h" diff --git a/contrib/sort/tmp.c b/contrib/sort/tmp.c new file mode 100644 index 0000000..d8476d6 --- /dev/null +++ b/contrib/sort/tmp.c @@ -0,0 +1,84 @@ +/* $NetBSD: tmp.c,v 1.8 2001/02/23 08:59:49 jdolecek Exp $ */ + +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <ctype.h> + +#ifndef lint +__RCSID("$NetBSD: tmp.c,v 1.8 2001/02/23 08:59:49 jdolecek Exp $"); +__SCCSID("@(#)tmp.c 8.1 (Berkeley) 6/6/93"); +#endif /* not lint */ + +#include <sys/param.h> + +#include <err.h> +#include <errno.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "sort.h" +#include "pathnames.h" + +#define _NAME_TMP "sort.XXXXXXXX" + +FILE * +ftmp() +{ + sigset_t set, oset; + FILE *fp; + int fd; + char pathb[MAXPATHLEN], *path; + + path = pathb; + (void)snprintf(path, sizeof(pathb), "%s%s%s", tmpdir, + (tmpdir[strlen(tmpdir)-1] != '/') ? "/" : "", _NAME_TMP); + + sigfillset(&set); + (void)sigprocmask(SIG_BLOCK, &set, &oset); + if ((fd = mkstemp(path)) < 0) + err(2, "ftmp: mkstemp(\"%s\")", path); + if (!(fp = fdopen(fd, "w+"))) + err(2, "ftmp: fdopen(\"%s\")", path); + (void)unlink(path); + + (void)sigprocmask(SIG_SETMASK, &oset, NULL); + return (fp); +} |