From f9ab90d9d6d02989a075d0f0074496d5b1045e4b Mon Sep 17 00:00:00 2001 From: rgrimes Date: Fri, 27 May 1994 12:33:43 +0000 Subject: BSD 4.4 Lite Usr.bin Sources --- usr.bin/uniq/Makefile | 5 + usr.bin/uniq/uniq.1 | 130 ++++++++++++++++++++++++ usr.bin/uniq/uniq.c | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 409 insertions(+) create mode 100644 usr.bin/uniq/Makefile create mode 100644 usr.bin/uniq/uniq.1 create mode 100644 usr.bin/uniq/uniq.c (limited to 'usr.bin/uniq') diff --git a/usr.bin/uniq/Makefile b/usr.bin/uniq/Makefile new file mode 100644 index 0000000..fa0db8b --- /dev/null +++ b/usr.bin/uniq/Makefile @@ -0,0 +1,5 @@ +# @(#)Makefile 8.1 (Berkeley) 6/6/93 + +PROG= uniq + +.include diff --git a/usr.bin/uniq/uniq.1 b/usr.bin/uniq/uniq.1 new file mode 100644 index 0000000..0898e0e --- /dev/null +++ b/usr.bin/uniq/uniq.1 @@ -0,0 +1,130 @@ +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)uniq.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd June 6, 1993 +.Dt UNIQ 1 +.Os +.Sh NAME +.Nm uniq +.Nd report or filter out repeated lines in a file +.Sh SYNOPSIS +.Nm uniq +.Op Fl c | Fl d | Fl u +.Op Fl f Ar fields +.Op Fl s Ar chars +.Oo +.Ar input_file +.Op Ar output_file +.Oc +.Sh DESCRIPTION +The +.Nm uniq +utility reads the standard input comparing adjacent lines, and writes +a copy of each unique input line to the standard output. +The second and succeeding copies of identical adjacent input lines are +not written. +Repeated lines in the input will not be detected if they are not adjacent, +so it may be necessary to sort the files first. +.Pp +The following options are available: +.Bl -tag -width Ds +.It Fl c +Precede each output line with the count of the number of times the line +occurred in the input, followed by a single space. +.It Fl d +Don't output lines that are not repeated in the input. +.It Fl f Ar fields +Ignore the first +.Ar fields +in each input line when doing comparisons. +A field is a string of non-blank characters separated from adjacent fields +by blanks. +Field numbers are one based, i.e. the first field is field one. +.It Fl s Ar chars +Ignore the first +.Ar chars +characters in each input line when doing comparisons. +If specified in conjunction with the +.Fl f +option, the first +.Ar chars +characters after the first +.Ar fields +fields will be ignored. +Character numbers are one based, i.e. the first character is character one. +.It Fl u +Don't output lines that are repeated in the input. +.\".It Fl Ns Ar n +.\"(Deprecated; replaced by +.\".Fl f ) . +.\"Ignore the first n +.\"fields on each input line when doing comparisons, +.\"where n is a number. +.\"A field is a string of non-blank +.\"characters separated from adjacent fields +.\"by blanks. +.\".It Cm \&\(pl Ns Ar n +.\"(Deprecated; replaced by +.\".Fl s ) . +.\"Ignore the first +.\".Ar m +.\"characters when doing comparisons, where +.\".Ar m +.\"is a +.\"number. +.El +.Pp +If additional arguments are specified on the command line, the first +such argument is used as the name of an input file, the second is used +as the name of an output file. +.Pp +The +.Nm uniq +utility exits 0 on success, and >0 if an error occurs. +.Sh COMPATIBILITY +The historic +.Cm \&\(pl Ns Ar number +and +.Fl Ns Ar number +options have been deprecated but are still supported in this implementation. +.Sh SEE ALSO +.Xr sort 1 +.Sh STANDARDS +The +.Nm uniq +utility is expected to be +.St -p1003.2 +compatible. diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c new file mode 100644 index 0000000..e84f82e --- /dev/null +++ b/usr.bin/uniq/uniq.c @@ -0,0 +1,274 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Case Larsen. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef lint +static char copyright[] = +"@(#) Copyright (c) 1989, 1993\n\ + The Regents of the University of California. All rights reserved.\n"; +#endif /* not lint */ + +#ifndef lint +static char sccsid[] = "@(#)uniq.c 8.1 (Berkeley) 6/6/93"; +#endif /* not lint */ + +#include +#include +#include +#include +#include + +#define MAXLINELEN (8 * 1024) + +int cflag, dflag, uflag; +int numchars, numfields, repeats; + +void err __P((const char *, ...)); +FILE *file __P((char *, char *)); +void show __P((FILE *, char *)); +char *skip __P((char *)); +void obsolete __P((char *[])); +void usage __P((void)); + +int +main (argc, argv) + int argc; + char *argv[]; +{ + register char *t1, *t2; + FILE *ifp, *ofp; + int ch; + char *prevline, *thisline, *p; + + obsolete(argv); + while ((ch = getopt(argc, argv, "-cdf:s:u")) != EOF) + switch (ch) { + case '-': + --optind; + goto done; + case 'c': + cflag = 1; + break; + case 'd': + dflag = 1; + break; + case 'f': + numfields = strtol(optarg, &p, 10); + if (numfields < 0 || *p) + err("illegal field skip value: %s", optarg); + break; + case 's': + numchars = strtol(optarg, &p, 10); + if (numchars < 0 || *p) + err("illegal character skip value: %s", optarg); + break; + case 'u': + uflag = 1; + break; + case '?': + default: + usage(); + } + +done: argc -= optind; + argv +=optind; + + /* If no flags are set, default is -d -u. */ + if (cflag) { + if (dflag || uflag) + usage(); + } else if (!dflag && !uflag) + dflag = uflag = 1; + + switch(argc) { + case 0: + ifp = stdin; + ofp = stdout; + break; + case 1: + ifp = file(argv[0], "r"); + ofp = stdout; + break; + case 2: + ifp = file(argv[0], "r"); + ofp = file(argv[1], "w"); + break; + default: + usage(); + } + + prevline = malloc(MAXLINELEN); + thisline = malloc(MAXLINELEN); + if (prevline == NULL || thisline == NULL) + err("%s", strerror(errno)); + + if (fgets(prevline, MAXLINELEN, ifp) == NULL) + exit(0); + + while (fgets(thisline, MAXLINELEN, ifp)) { + /* If requested get the chosen fields + character offsets. */ + if (numfields || numchars) { + t1 = skip(thisline); + t2 = skip(prevline); + } else { + t1 = thisline; + t2 = prevline; + } + + /* If different, print; set previous to new value. */ + if (strcmp(t1, t2)) { + show(ofp, prevline); + t1 = prevline; + prevline = thisline; + thisline = t1; + repeats = 0; + } else + ++repeats; + } + show(ofp, prevline); + exit(0); +} + +/* + * show -- + * Output a line depending on the flags and number of repetitions + * of the line. + */ +void +show(ofp, str) + FILE *ofp; + char *str; +{ + if (cflag) + (void)fprintf(ofp, "%4d %s", repeats + 1, str); + if (dflag && repeats || uflag && !repeats) + (void)fprintf(ofp, "%s", str); +} + +char * +skip(str) + register char *str; +{ + register int infield, nchars, nfields; + + for (nfields = numfields, infield = 0; nfields && *str; ++str) + if (isspace(*str)) { + if (infield) { + infield = 0; + --nfields; + } + } else if (!infield) + infield = 1; + for (nchars = numchars; nchars-- && *str; ++str); + return(str); +} + +FILE * +file(name, mode) + char *name, *mode; +{ + FILE *fp; + + if ((fp = fopen(name, mode)) == NULL) + err("%s: %s", name, strerror(errno)); + return(fp); +} + +void +obsolete(argv) + char *argv[]; +{ + int len; + char *ap, *p, *start; + + while (ap = *++argv) { + /* Return if "--" or not an option of any form. */ + if (ap[0] != '-') { + if (ap[0] != '+') + return; + } else if (ap[1] == '-') + return; + if (!isdigit(ap[1])) + continue; + /* + * Digit signifies an old-style option. Malloc space for dash, + * new option and argument. + */ + len = strlen(ap); + if ((start = p = malloc(len + 3)) == NULL) + err("%s", strerror(errno)); + *p++ = '-'; + *p++ = ap[0] == '+' ? 's' : 'f'; + (void)strcpy(p, ap + 1); + *argv = start; + } +} + +void +usage() +{ + (void)fprintf(stderr, + "usage: uniq [-c | -du] [-f fields] [-s chars] [input [output]]\n"); + exit(1); +} + +#if __STDC__ +#include +#else +#include +#endif + +void +#if __STDC__ +err(const char *fmt, ...) +#else +err(fmt, va_alist) + char *fmt; + va_dcl +#endif +{ + va_list ap; +#if __STDC__ + va_start(ap, fmt); +#else + va_start(ap); +#endif + (void)fprintf(stderr, "uniq: "); + (void)vfprintf(stderr, fmt, ap); + va_end(ap); + (void)fprintf(stderr, "\n"); + exit(1); + /* NOTREACHED */ +} -- cgit v1.1