From c3eeb08622b7845ad94522593f93674233225bf1 Mon Sep 17 00:00:00 2001 From: archie Date: Mon, 1 Feb 1999 21:16:45 +0000 Subject: Add new option '-p pattern' for splitting files based on matching lines in the file with a regular expression. Useful for e.g. 'cvs diff' output. Also compile cleanly with -Wall and fix a few style bugs. PR: bin/9405 --- usr.bin/split/Makefile | 1 + usr.bin/split/split.1 | 16 ++++++ usr.bin/split/split.c | 129 +++++++++++++++++++++++++++++-------------------- 3 files changed, 94 insertions(+), 52 deletions(-) (limited to 'usr.bin/split') diff --git a/usr.bin/split/Makefile b/usr.bin/split/Makefile index 93048f7..ffac658 100644 --- a/usr.bin/split/Makefile +++ b/usr.bin/split/Makefile @@ -1,5 +1,6 @@ # @(#)Makefile 8.1 (Berkeley) 6/6/93 PROG= split +COPTS+= -Wall .include diff --git a/usr.bin/split/split.1 b/usr.bin/split/split.1 index 34f3c07..927250c 100644 --- a/usr.bin/split/split.1 +++ b/usr.bin/split/split.1 @@ -30,6 +30,7 @@ .\" SUCH DAMAGE. .\" .\" @(#)split.1 8.3 (Berkeley) 4/16/94 +.\" $Id$ .\" .Dd April 16, 1994 .Dt SPLIT 1 @@ -41,6 +42,7 @@ .Nm split .Op Fl b Ar byte_count[k|m] .Op Fl l Ar line_count +.Op Fl p Ar pattern .Op Ar file Op Ar name .Sh DESCRIPTION The @@ -70,6 +72,16 @@ megabyte pieces. Create smaller files .Ar n lines in length. +.It Fl p Ar pattern +The file is split whenever an input line matches +.Ar pattern , +which is interpreted as an extended regular expression. +The matching line will be the first line of the next output file. +This option is incompatible with the +.Fl b +and +.Fl l +options. .El .Pp If additional arguments are specified, the first is used as the name @@ -92,6 +104,10 @@ For historical reasons, if you specify can only create 676 separate files. The default naming convention allows 2028 separate files. +.Pp +The maximum line length for matching patterns is 65536. +.Sh SEE ALSO +.Xr re_format 7 . .Sh HISTORY A .Nm split diff --git a/usr.bin/split/split.c b/usr.bin/split/split.c index a975e41..84c7bca 100644 --- a/usr.bin/split/split.c +++ b/usr.bin/split/split.c @@ -44,6 +44,7 @@ static char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; #endif /* not lint */ #include +#include #include #include @@ -52,6 +53,8 @@ static char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; #include #include #include +#include +#include #define DEFLINE 1000 /* Default num lines per file. */ @@ -61,6 +64,8 @@ int file_open; /* If a file open. */ int ifd = -1, ofd = -1; /* Input/output file descriptors. */ char bfr[MAXBSIZE]; /* I/O buffer. */ char fname[MAXPATHLEN]; /* File name prefix. */ +regex_t rgx; +int pflag; void newfile __P((void)); void split1 __P((void)); @@ -75,7 +80,7 @@ main(argc, argv) int ch; char *ep, *p; - while ((ch = getopt(argc, argv, "-0123456789b:l:")) != -1) + while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1) switch (ch) { case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': @@ -91,7 +96,8 @@ main(argc, argv) numlines = strtol(argv[optind] + 1, &ep, 10); if (numlines <= 0 || *ep) - errx(1, "%s: illegal line count", optarg); + errx(EX_USAGE, + "%s: illegal line count", optarg); } break; case '-': /* Undocumented: historic stdin flag. */ @@ -102,17 +108,24 @@ main(argc, argv) case 'b': /* Byte count. */ if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 || (*ep != '\0' && *ep != 'k' && *ep != 'm')) - errx(1, "%s: illegal byte count", optarg); + errx(EX_USAGE, + "%s: illegal byte count", optarg); if (*ep == 'k') bytecnt *= 1024; else if (*ep == 'm') bytecnt *= 1048576; break; + case 'p' : /* pattern matching. */ + if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) + errx(EX_USAGE, "%s: illegal regexp", optarg); + pflag = 1; + break; case 'l': /* Line count. */ if (numlines != 0) usage(); if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) - errx(1, "%s: illegal line count", optarg); + errx(EX_USAGE, + "%s: illegal line count", optarg); break; default: usage(); @@ -123,7 +136,7 @@ main(argc, argv) if (*argv != NULL) if (ifd == -1) { /* Input file. */ if ((ifd = open(*argv, O_RDONLY, 0)) < 0) - err(1, "%s", *argv); + err(EX_NOINPUT, "%s", *argv); ++argv; } if (*argv != NULL) /* File name prefix. */ @@ -131,9 +144,12 @@ main(argc, argv) if (*argv != NULL) usage(); + if (pflag && (numlines != 0 || bytecnt != 0)) + usage(); + if (numlines == 0) numlines = DEFLINE; - else if (bytecnt) + else if (bytecnt != 0) usage(); if (ifd == -1) /* Stdin by default. */ @@ -144,6 +160,8 @@ main(argc, argv) exit (0); } split2(); + if (pflag) + regfree(&rgx); exit(0); } @@ -159,40 +177,38 @@ split1() char *C; for (bcnt = 0;;) - switch (len = read(ifd, bfr, MAXBSIZE)) { + switch ((len = read(ifd, bfr, MAXBSIZE))) { case 0: exit(0); case -1: - err(1, "read"); + err(EX_IOERR, "read"); /* NOTREACHED */ default: - if (!file_open) { + if (!file_open) newfile(); - file_open = 1; - } if (bcnt + len >= bytecnt) { dist = bytecnt - bcnt; if (write(ofd, bfr, dist) != dist) - err(1, "write"); + err(EX_IOERR, "write"); len -= dist; for (C = bfr + dist; len >= bytecnt; len -= bytecnt, C += bytecnt) { newfile(); if (write(ofd, C, (int)bytecnt) != bytecnt) - err(1, "write"); + err(EX_IOERR, "write"); } - if (len) { + if (len != 0) { newfile(); if (write(ofd, C, len) != len) - err(1, "write"); + err(EX_IOERR, "write"); } else file_open = 0; bcnt = len; } else { bcnt += len; if (write(ofd, bfr, len) != len) - err(1, "write"); + err(EX_IOERR, "write"); } } } @@ -204,40 +220,49 @@ split1() void split2() { - long lcnt; - int len, bcnt; - char *Ce, *Cs; + long lcnt = 0; + FILE *infp; - for (lcnt = 0;;) - switch (len = read(ifd, bfr, MAXBSIZE)) { - case 0: - exit(0); - case -1: - err(1, "read"); - /* NOTREACHED */ - default: - if (!file_open) { + /* Stick a stream on top of input file descriptor */ + if ((infp = fdopen(ifd, "r")) == NULL) + err(EX_NOINPUT, "fdopen"); + + /* Process input one line at a time */ + while (fgets(bfr, sizeof(bfr), infp) != NULL) { + const int len = strlen(bfr); + + /* If line is too long to deal with, just write it out */ + if (bfr[len - 1] != '\n') + goto writeit; + + /* Check if we need to start a new file */ + if (pflag) { + regmatch_t pmatch; + + pmatch.rm_so = 0; + pmatch.rm_eo = len - 1; + if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) newfile(); - file_open = 1; - } - for (Cs = Ce = bfr; len--; Ce++) - if (*Ce == '\n' && ++lcnt == numlines) { - bcnt = Ce - Cs + 1; - if (write(ofd, Cs, bcnt) != bcnt) - err(1, "write"); - lcnt = 0; - Cs = Ce + 1; - if (len) - newfile(); - else - file_open = 0; - } - if (Cs < Ce) { - bcnt = Ce - Cs; - if (write(ofd, Cs, bcnt) != bcnt) - err(1, "write"); - } + } else if (lcnt++ == numlines) { + newfile(); + lcnt = 1; } + +writeit: + /* Open output file if needed */ + if (!file_open) + newfile(); + + /* Write out line */ + if (write(ofd, bfr, len) != len) + err(EX_IOERR, "write"); + } + + /* EOF or error? */ + if (ferror(infp)) + err(EX_IOERR, "read"); + else + exit(0); } /* @@ -269,7 +294,7 @@ newfile() #define MAXFILES 676 if (fnum == MAXFILES) { if (!defname || fname[0] == 'z') - errx(1, "too many files"); + errx(EX_DATAERR, "too many files"); ++fname[0]; fnum = 0; } @@ -277,13 +302,13 @@ newfile() fpnt[1] = fnum % 26 + 'a'; ++fnum; if (!freopen(fname, "w", stdout)) - err(1, "%s", fname); + err(EX_IOERR, "%s", fname); + file_open = 1; } static void usage() { - (void)fprintf(stderr, -"usage: split [-b byte_count] [-l line_count] [file [prefix]]\n"); - exit(1); + errx(EX_USAGE, +"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]"); } -- cgit v1.1