diff options
-rw-r--r-- | usr.bin/cut/cut.1 | 13 | ||||
-rw-r--r-- | usr.bin/cut/cut.c | 88 |
2 files changed, 97 insertions, 4 deletions
diff --git a/usr.bin/cut/cut.1 b/usr.bin/cut/cut.1 index 0f6420f8..3da80f7 100644 --- a/usr.bin/cut/cut.1 +++ b/usr.bin/cut/cut.1 @@ -115,6 +115,19 @@ Do not split multi-byte characters. Suppress lines with no field delimiter characters. Unless specified, lines with no delimiters are passed through unmodified. .El +.Sh ENVIRONMENT +The +.Ev LANG , +.Ev LC_ALL +and +.Ev LC_CTYPE +environment variables affect the execution of +.Nm +if the +.Fl n +option is specified. +Their effect is described in +.Xr environ 7 . .Sh DIAGNOSTICS .Ex -std .Sh SEE ALSO diff --git a/usr.bin/cut/cut.c b/usr.bin/cut/cut.c index 461dc86..209dbb9 100644 --- a/usr.bin/cut/cut.c +++ b/usr.bin/cut/cut.c @@ -52,12 +52,15 @@ static const char rcsid[] = #include <string.h> #include <unistd.h> +int bflag; int cflag; char dchar; int dflag; int fflag; +int nflag; int sflag; +void b_n_cut(FILE *, const char *); void c_cut(FILE *, const char *); void f_cut(FILE *, const char *); void get_list(char *); @@ -79,11 +82,17 @@ main(argc, argv) fcn = NULL; dchar = '\t'; /* default delimiter is \t */ - /* Since we don't support multi-byte characters, the -c and -b - options are equivalent, and the -n option is meaningless. */ + /* + * Since we don't support multi-byte characters, the -c and -b + * options are equivalent. + */ while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) switch(ch) { case 'b': + fcn = c_cut; + get_list(optarg); + bflag = 1; + break; case 'c': fcn = c_cut; get_list(optarg); @@ -102,6 +111,7 @@ main(argc, argv) sflag = 1; break; case 'n': + nflag = 1; break; case '?': default: @@ -111,11 +121,16 @@ main(argc, argv) argv += optind; if (fflag) { - if (cflag) + if (bflag || cflag || nflag) usage(); - } else if (!cflag || dflag || sflag) + } else if (!(bflag || cflag) || dflag || sflag) + usage(); + else if (!bflag && nflag) usage(); + if (nflag) + fcn = b_n_cut; + rval = 0; if (*argv) for (; *argv; ++argv) { @@ -217,6 +232,71 @@ needpos(size_t n) } } +/* + * Cut based on byte positions, taking care not to split multibyte characters. + * Although this function also handles the case where -n is not specified, + * c_cut() ought to be much faster. + */ +void +b_n_cut(fp, fname) + FILE *fp; + const char *fname; +{ + size_t col, i, lbuflen; + char *lbuf; + int canwrite, clen, warned; + + warned = 0; + while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { + for (col = 0; lbuflen > 0; col += clen) { + if ((clen = mblen(lbuf, lbuflen)) < 0) { + if (!warned) { + warn("%s", fname); + warned = 1; + } + clen = 1; + } + if (clen == 0 || *lbuf == '\n') + break; + if (col < maxval && !positions[1 + col]) { + /* + * Print the character if (1) after an initial + * segment of un-selected bytes, the rest of + * it is selected, and (2) the last byte is + * selected. + */ + i = col; + while (i < col + clen && i < maxval && + !positions[1 + i]) + i++; + canwrite = i < col + clen; + for (; i < col + clen && i < maxval; i++) + canwrite &= positions[1 + i]; + if (canwrite) + fwrite(lbuf, 1, clen, stdout); + } else { + /* + * Print the character if all of it has + * been selected. + */ + canwrite = 1; + for (i = col; i < col + clen; i++) + if ((i >= maxval && !autostop) || + (i < maxval && !positions[1 + i])) { + canwrite = 0; + break; + } + if (canwrite) + fwrite(lbuf, 1, clen, stdout); + } + lbuf += clen; + lbuflen -= clen; + } + if (lbuflen > 0) + putchar('\n'); + } +} + void c_cut(fp, fname) FILE *fp; |