diff options
author | markm <markm@FreeBSD.org> | 2002-03-24 15:59:17 +0000 |
---|---|---|
committer | markm <markm@FreeBSD.org> | 2002-03-24 15:59:17 +0000 |
commit | 5fb54802b6b0448b20c3b023ef2e7d96923bfc7e (patch) | |
tree | e2d0e46de42b0413a51e3009da29b76e87ecb51b /usr.bin/sort | |
parent | 46388605d15bf0a6846de3e434c971c64a1a4d8c (diff) | |
download | FreeBSD-src-5fb54802b6b0448b20c3b023ef2e7d96923bfc7e.zip FreeBSD-src-5fb54802b6b0448b20c3b023ef2e7d96923bfc7e.tar.gz |
Make this compile on FreeBSD. Sort out VCS tags, fix warnings.
Diffstat (limited to 'usr.bin/sort')
-rw-r--r-- | usr.bin/sort/append.c | 5 | ||||
-rw-r--r-- | usr.bin/sort/extern.h | 3 | ||||
-rw-r--r-- | usr.bin/sort/fields.c | 7 | ||||
-rw-r--r-- | usr.bin/sort/files.c | 29 | ||||
-rw-r--r-- | usr.bin/sort/fsort.c | 5 | ||||
-rw-r--r-- | usr.bin/sort/init.c | 7 | ||||
-rw-r--r-- | usr.bin/sort/msort.c | 5 | ||||
-rw-r--r-- | usr.bin/sort/sort.1 | 443 | ||||
-rw-r--r-- | usr.bin/sort/sort.c | 14 | ||||
-rw-r--r-- | usr.bin/sort/tmp.c | 5 |
10 files changed, 502 insertions, 21 deletions
diff --git a/usr.bin/sort/append.c b/usr.bin/sort/append.c index d24b9fe..a486361 100644 --- a/usr.bin/sort/append.c +++ b/usr.bin/sort/append.c @@ -39,10 +39,15 @@ #include "sort.h" #ifndef lint +#if 0 __RCSID("$NetBSD: append.c,v 1.9 2001/01/18 20:59:43 jdolecek Exp $"); __SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <stdlib.h> #include <string.h> diff --git a/usr.bin/sort/extern.h b/usr.bin/sort/extern.h index cdfb9fe..6af2e03 100644 --- a/usr.bin/sort/extern.h +++ b/usr.bin/sort/extern.h @@ -36,12 +36,13 @@ * SUCH DAMAGE. * * @(#)extern.h 8.1 (Berkeley) 6/6/93 + * $FreeBSD$ */ void append __P((const u_char **, int, int, FILE *, void (*)(const RECHEADER *, FILE *), struct field *)); void concat __P((FILE *, FILE *)); -length_t enterkey __P((RECHEADER *, DBT *, int, struct field *)); +length_t enterkey __P((RECHEADER *, DBT *, size_t, struct field *)); void fixit __P((int *, char **)); void fldreset __P((struct field *)); FILE *ftmp __P((void)); diff --git a/usr.bin/sort/fields.c b/usr.bin/sort/fields.c index 175b87f..73c54f0 100644 --- a/usr.bin/sort/fields.c +++ b/usr.bin/sort/fields.c @@ -41,10 +41,15 @@ #include "sort.h" #ifndef lint +#if 0 __RCSID("$NetBSD: fields.c,v 1.9 2001/02/19 19:52:27 jdolecek Exp $"); __SCCSID("@(#)fields.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #define blancmange(ptr) { \ if (BLANK & d_mask[*(ptr)]) \ while (BLANK & d_mask[*(++(ptr))]); \ @@ -78,7 +83,7 @@ length_t enterkey(keybuf, line, size, fieldtable) RECHEADER *keybuf; /* pointer to start of key */ DBT *line; - int size; + size_t size; struct field fieldtable[]; { int i; diff --git a/usr.bin/sort/files.c b/usr.bin/sort/files.c index f53d456..fbd6aa2 100644 --- a/usr.bin/sort/files.c +++ b/usr.bin/sort/files.c @@ -40,10 +40,15 @@ #include "fsort.h" #ifndef lint +#if 0 __RCSID("$NetBSD: files.c,v 1.16 2001/02/19 20:50:17 jdolecek Exp $"); __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <string.h> static int seq __P((FILE *, DBT *, DBT *)); @@ -55,11 +60,11 @@ static int seq __P((FILE *, DBT *, DBT *)); int getnext(binno, infl0, filelist, nfiles, pos, end, dummy) int binno, infl0; - struct filelist *filelist; + struct filelist *filelist __unused; int nfiles; RECHEADER *pos; u_char *end; - struct field *dummy; + struct field *dummy __unused; { int i; u_char *hp; @@ -107,7 +112,7 @@ getnext(binno, infl0, filelist, nfiles, pos, end, dummy) if ((u_char *) pos > end - sizeof(TRECHEADER)) return (BUFFEND); fread(pos, sizeof(TRECHEADER), 1, fp); - if (end - pos->data < pos->length) { + if ((length_t)(end - pos->data) < pos->length) { hp = ((u_char *)pos) + sizeof(TRECHEADER); for (i = sizeof(TRECHEADER); i ; i--) ungetc(*--hp, fp); @@ -126,12 +131,12 @@ getnext(binno, infl0, filelist, nfiles, pos, end, dummy) */ int makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) - int flno, top; + int flno, top __unused; struct filelist *filelist; int nfiles; RECHEADER *recbuf; u_char *bufend; - struct field *dummy2; + struct field *dummy2 __unused; { static u_char *obufend; static size_t osz; @@ -213,7 +218,7 @@ makeline(flno, top, filelist, nfiles, recbuf, bufend, dummy2) */ int makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) - int flno, top; + int flno, top __unused; struct filelist *filelist; int nfiles; RECHEADER *recbuf; @@ -278,7 +283,7 @@ makekey(flno, top, filelist, nfiles, recbuf, bufend, ftbl) static int seq(fp, line, key) FILE *fp; - DBT *key, *line; + DBT *key __unused, *line; { static char *buf, flag = 1; char *end, *pos; @@ -344,12 +349,12 @@ putline(rec, fp) */ int geteasy(flno, top, filelist, nfiles, rec, end, dummy2) - int flno, top; - struct filelist *filelist; - int nfiles; + int flno, top __unused; + struct filelist *filelist __unused; + int nfiles __unused; RECHEADER *rec; u_char *end; - struct field *dummy2; + struct field *dummy2 __unused; { int i; FILE *fp; @@ -362,7 +367,7 @@ geteasy(flno, top, filelist, nfiles, rec, end, dummy2) fstack[flno].fp = 0; return (EOF); } - if (end - rec->data < rec->length) { + if ((length_t)(end - rec->data) < rec->length) { for (i = sizeof(TRECHEADER) - 1; i >= 0; i--) ungetc(*((char *) rec + i), fp); return (BUFFEND); diff --git a/usr.bin/sort/fsort.c b/usr.bin/sort/fsort.c index a38c79d..eb9302a 100644 --- a/usr.bin/sort/fsort.c +++ b/usr.bin/sort/fsort.c @@ -47,10 +47,15 @@ #include "fsort.h" #ifndef lint +#if 0 __RCSID("$NetBSD: fsort.c,v 1.19 2001/05/15 11:19:45 jdolecek Exp $"); __SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <stdlib.h> #include <string.h> diff --git a/usr.bin/sort/init.c b/usr.bin/sort/init.c index 8b965b1..9bcc1d2 100644 --- a/usr.bin/sort/init.c +++ b/usr.bin/sort/init.c @@ -39,10 +39,15 @@ #include "sort.h" #ifndef lint +#if 0 __RCSID("$NetBSD: init.c,v 1.5 2001/02/19 20:50:17 jdolecek Exp $"); __SCCSID("@(#)init.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <ctype.h> #include <string.h> @@ -121,7 +126,7 @@ static const char * setcolumn(pos, cur_fld, gflag) const char *pos; struct field *cur_fld; - int gflag; + int gflag __unused; { struct column *col; int tmp; diff --git a/usr.bin/sort/msort.c b/usr.bin/sort/msort.c index f66db0b..8a90944 100644 --- a/usr.bin/sort/msort.c +++ b/usr.bin/sort/msort.c @@ -40,10 +40,15 @@ #include "fsort.h" #ifndef lint +#if 0 __RCSID("$NetBSD: msort.c,v 1.9 2001/01/19 10:50:31 jdolecek Exp $"); __SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <stdlib.h> #include <string.h> #include <unistd.h> diff --git a/usr.bin/sort/sort.1 b/usr.bin/sort/sort.1 index 5873eb0..d6c6e6c 100644 --- a/usr.bin/sort/sort.1 +++ b/usr.bin/sort/sort.1 @@ -1 +1,442 @@ -$FreeBSD$ +.\" $NetBSD: sort.1,v 1.17 2001/12/08 19:16:07 wiz Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)sort.1 8.1 (Berkeley) 6/6/93 +.\" $FreeBSD$ +.\" +.Dd January 13, 2001 +.Dt SORT 1 +.Os +.Sh NAME +.Nm sort +.Nd sort or merge text files +.Sh SYNOPSIS +.Nm sort +.Op Fl cmubdfHinrsS +.Op Fl t Ar char +.Op Fl R Ar char +.Oo +.Fl k +.Ar field1 Ns Op Li \&, Ns Ar field2 +.Oc +.Op Fl T Ar dir +.Op Fl o Ar output +.Op Ar +.Sh DESCRIPTION +The +.Nm +utility sorts text files by lines. +Comparisons are based on one or more sort keys extracted +from each line of input, and are performed lexicographically. +By default, if keys are not given, +.Nm +regards each input line as a single field. +.Pp +The following options are available: +.Bl -tag -width Fl +.It Fl c +Check that the single input file is sorted. +If the file is not sorted, +.Nm +produces the appropriate error messages and exits with code 1; otherwise, +.Nm +returns 0. +.Nm +.Fl c +produces no output. +.It Fl m +Merge only; the input files are assumed to be pre-sorted. +.It Fl o Ar output +The argument given is the name of an +.Ar output +file to be used instead of the standard output. +This file can be the same as one of the input files. +.It Fl T Ar dir +Use +.Ar dir +as the directory for temporary files. +The default is the value specified in the environment variable +.Ev TMPDIR or +.Pa /tmp +if +.Ev TMPDIR +is not defined. +.It Fl u +Unique: suppress all but one in each set of lines having equal keys. +If used with the +.Fl c +option, check that there are no lines with duplicate keys. +.El +.Pp +The following options override the default ordering rules. +When ordering options appear independent of key field +specifications, the requested field ordering rules are +applied globally to all sort keys. +When attached to a specific key (see +.Fl k ) , +the ordering options override +all global ordering options for that key. +.Bl -tag -width Fl +.It Fl d +Only blank space and alphanumeric characters +.\" according +.\" to the current setting of LC_CTYPE +are used +in making comparisons. +.It Fl f +Considers all lowercase characters that have uppercase +equivalents to be the same for purposes of comparison. +.It Fl i +Ignore all non-printable characters. +.It Fl n +An initial numeric string, consisting of optional blank space, optional +minus sign, and zero or more digits (including decimal point) +.\" with +.\" optional radix character and thousands +.\" separator +.\" (as defined in the current locale), +is sorted by arithmetic value. +(The +.Fl n +option no longer implies the +.Fl b +option.) +.It Fl r +Reverse the sense of comparisons. +.It Fl S +Don't use stable sort. +Default is to use stable sort. +.It Fl s +Use stable sort. +This is the default. +Provided for compatiblity with other +.Nm +implementations only. +.It Fl H +Use a merge sort instead of a radix sort. +This option should be used for files larger than 60Mb. +.El +.Pp +The treatment of field separators can be altered using these options: +.Bl -tag -width Fl +.It Fl b +Ignores leading blank space when determining the start +and end of a restricted sort key. +A +.Fl b +option specified before the first +.Fl k +option applies globally to all +.Fl k +options. +Otherwise, the +.Fl b +option can be attached independently to each +.Ar field +argument of the +.Fl k +option (see below). +Note that the +.Fl b +option has no effect unless key fields are specified. +.It Fl t Ar char +.Ar char +is used as the field separator character. +The initial +.Ar char +is not considered to be part of a field when determining +key offsets (see below). +Each occurrence of +.Ar char +is significant (for example, +.Dq Ar charchar +delimits an empty field). +If +.Fl t +is not specified, the default field separator is a sequence of +blank-space characters, and consecutive blank spaces do +.Em not +delimit an empty field; further, the initial blank space +.Em is +considered part of a field when determining key offsets. +.It Fl R Ar char +.Ar char +is used as the record separator character. +This should be used with discretion; +.Fl R Ar \*[Lt]alphanumeric\*[Gt] +usually produces undesirable results. +The default record separator is newline. +.It Xo +.Fl k +.Ar field1 Ns Op Li \&, Ns Ar field2 +.Xc +Designates the starting position, +.Ar field1 , +and optional ending position, +.Ar field2 , +of a key field. +The +.Fl k +option replaces the obsolescent options +.Cm \(pl Ns Ar pos1 +and +.Fl Ns Ar pos2 . +.El +.Pp +The following operands are available: +.Bl -tag -width Ar +.It Ar file +The pathname of a file to be sorted, merged, or checked. +If no +.Ar file +operands are specified, or if +a +.Ar file +operand is +.Fl , +the standard input is used. +.El +.Pp +A field is defined as a minimal sequence of characters followed by a +field separator or a newline character. +By default, the first +blank space of a sequence of blank spaces acts as the field separator. +All blank spaces in a sequence of blank spaces are considered +as part of the next field; for example, all blank spaces at +the beginning of a line are considered to be part of the +first field. +.Pp +Fields are specified +by the +.Fl k +.Ar field1 Ns Op \&, Ns Ar field2 +argument. +A missing +.Ar field2 +argument defaults to the end of a line. +.Pp +The arguments +.Ar field1 +and +.Ar field2 +have the form +.Ar m Ns Li \&. Ns Ar n +and can be followed by one or more of the letters +.Cm b , d , f , i , +.Cm n , +and +.Cm r , +which correspond to the options discussed above. +A +.Ar field1 +position specified by +.Ar m Ns Li \&. Ns Ar n +.Pq Ar m , n No \*[Gt] 0 +is interpreted as the +.Ar n Ns th +character in the +.Ar m Ns th +field. +A missing +.Li \&. Ns Ar n +in +.Ar field1 +means +.Ql \&.1 , +indicating the first character of the +.Ar m Ns th +field; if the +.Fl b +option is in effect, +.Ar n +is counted from the first non-blank character in the +.Ar m Ns th +field; +.Ar m Ns Li \&.1b +refers to the first non-blank character in the +.Ar m Ns th +field. +.Pp +A +.Ar field2 +position specified by +.Ar m Ns Li \&. Ns Ar n +is interpreted as +the +.Ar n Ns th +character (including separators) of the +.Ar m Ns th +field. +A missing +.Li \&. Ns Ar n +indicates the last character of the +.Ar m Ns th +field; +.Ar m += \&0 +designates the end of a line. +Thus the option +.Fl k +.Sm off +.Xo +.Ar v Li \&. Ar x Li \&, +.Ar w Li \&. Ar y +.Xc +.Sm on +is synonymous with the obsolescent option +.Sm off +.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1 +.Fl Ar w-\&1 Li \&. Ar y ; +.Sm on +when +.Ar y +is omitted, +.Fl k +.Sm off +.Ar v Li \&. Ar x Li \&, Ar w +.Sm on +is synonymous with +.Sm off +.Cm \(pl Ar v-\&1 Li \&. Ar x-\&1 +.Fl Ar w+1 Li \&.0 . +.Sm on +The obsolescent +.Cm \(pl Ns Ar pos1 +.Fl Ns Ar pos2 +option is still supported, except for +.Fl Ns Ar w Ns Li \&.0b , +which has no +.Fl k +equivalent. +.Sh RETURN VALUES +Sort exits with one of the following values: +.Bl -tag -width flag -compact +.It 0 +Normal behavior. +.It 1 +On disorder (or non-uniqueness) with the +.Fl c +option +.It 2 +An error occurred. +.El +.Sh ENVIRONMENT +If the following environment variable exists, it is utilized by +.Nm "" . +.Bl -tag -width Ev +.It Ev TMPDIR +.Nm +uses the contents of the +.Ev TMPDIR +environment variable as the path in which to store +temporary files. +.El +.Sh FILES +.Bl -tag -width outputNUMBER+some -compact +.It Pa /tmp/sort.* +Default temporary files. +.It Pa Ar output Ns NUMBER +Temporary file which is used for output if +.Ar output +already exists. +Once sorting is finished, this file replaces +.Ar output +(via +.Xr link 2 +and +.Xr unlink 2 ) . +.El +.Sh SEE ALSO +.Xr comm 1 , +.Xr join 1 , +.Xr uniq 1 , +.Xr qsort 3 , +.Xr radixsort 3 +.Sh HISTORY +A +.Nm +command appeared in +.At v5 . +This +.Nm +implementation appeared in +.Bx 4.4 +and is used since +.Nx 1.6 . +.Sh BUGS +To sort files larger than 60Mb, use +.Nm +.Fl H ; +files larger than 704Mb must be sorted in smaller pieces, then merged. +.Sh NOTES +This +.Nm +has no limits on input line length (other than imposed by available +memory) or any restrictions on bytes allowed within lines. +.Pp +To protect data +.Nm +.Fl o +calls +.Xr link 2 +and +.Xr unlink 2 , +and thus fails on protected directories. +.Pp +Input files should be text files. +If file doesn't end with record separator (which is typically newline), the +.Nm +utility silently supplies one. +.Pp +The current +.Nm +uses lexicographic radix sorting, which requires +that sort keys be kept in memory (as opposed to previous versions which used quick +and merge sorts and did not.) +Thus performance depends highly on efficient choice of sort keys, and the +.Fl b +option and the +.Ar field2 +argument of the +.Fl k +option should be used whenever possible. +Similarly, +.Nm +.Fl k1f +is equivalent to +.Nm +.Fl f +and may take twice as long. diff --git a/usr.bin/sort/sort.c b/usr.bin/sort/sort.c index 57e2357..4c10233 100644 --- a/usr.bin/sort/sort.c +++ b/usr.bin/sort/sort.c @@ -46,15 +46,19 @@ #include "pathnames.h" #ifndef lint -__COPYRIGHT("@(#) Copyright (c) 1993\n\ - The Regents of the University of California. All rights reserved.\n"); +__COPYRIGHT("@(#) Copyright (c) 1993\nThe Regents of the University of California. All rights reserved.\n"); #endif /* not lint */ #ifndef lint +#if 0 __RCSID("$NetBSD: sort.c,v 1.26 2001/04/30 00:25:09 ross Exp $"); __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <sys/types.h> #include <sys/time.h> #include <sys/resource.h> @@ -208,7 +212,7 @@ main(argc, argv) /* change to /dev/stdin if '-' */ if (argv[i][0] == '-') - argv[i] = _PATH_STDIN; + argv[i] = strdup(_PATH_STDIN); } else if ((ch = access(argv[i], R_OK))) err(2, "%s", argv[i]); @@ -252,7 +256,7 @@ main(argc, argv) } else if (!(ch = access(outpath, 0)) && strncmp(_PATH_DEV, outpath, 5)) { static const struct sigaction act = - { onsignal, {{0}}, SA_RESTART | SA_RESETHAND }; + { { onsignal }, SA_RESTART | SA_RESETHAND, { { 0 } } }; static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; int outfd; @@ -295,7 +299,7 @@ main(argc, argv) static void onsignal(sig) - int sig; + int sig __unused; { cleanup(); } diff --git a/usr.bin/sort/tmp.c b/usr.bin/sort/tmp.c index 55ca383..2900f47 100644 --- a/usr.bin/sort/tmp.c +++ b/usr.bin/sort/tmp.c @@ -39,10 +39,15 @@ #include <ctype.h> #ifndef lint +#if 0 __RCSID("$NetBSD: tmp.c,v 1.7 2001/02/19 15:45:45 jdolecek Exp $"); __SCCSID("@(#)tmp.c 8.1 (Berkeley) 6/6/93"); +#endif #endif /* not lint */ +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + #include <sys/param.h> #include <err.h> |