summaryrefslogtreecommitdiffstats
path: root/usr.bin/cut/cut.c
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2002-06-08 07:27:21 +0000
committertjr <tjr@FreeBSD.org>2002-06-08 07:27:21 +0000
commit38740250412b74e00ddeed31b89d7d2650d6d2de (patch)
treef985afc60a3790626a6a5264b386cc5f31c6125e /usr.bin/cut/cut.c
parent012b4f04b0091641c394bf191c366e74ed5d250d (diff)
downloadFreeBSD-src-38740250412b74e00ddeed31b89d7d2650d6d2de.zip
FreeBSD-src-38740250412b74e00ddeed31b89d7d2650d6d2de.tar.gz
Don't split multibyte characters when the -n option is specified.
Diffstat (limited to 'usr.bin/cut/cut.c')
-rw-r--r--usr.bin/cut/cut.c88
1 files changed, 84 insertions, 4 deletions
diff --git a/usr.bin/cut/cut.c b/usr.bin/cut/cut.c
index 461dc86..209dbb9 100644
--- a/usr.bin/cut/cut.c
+++ b/usr.bin/cut/cut.c
@@ -52,12 +52,15 @@ static const char rcsid[] =
#include <string.h>
#include <unistd.h>
+int bflag;
int cflag;
char dchar;
int dflag;
int fflag;
+int nflag;
int sflag;
+void b_n_cut(FILE *, const char *);
void c_cut(FILE *, const char *);
void f_cut(FILE *, const char *);
void get_list(char *);
@@ -79,11 +82,17 @@ main(argc, argv)
fcn = NULL;
dchar = '\t'; /* default delimiter is \t */
- /* Since we don't support multi-byte characters, the -c and -b
- options are equivalent, and the -n option is meaningless. */
+ /*
+ * Since we don't support multi-byte characters, the -c and -b
+ * options are equivalent.
+ */
while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
switch(ch) {
case 'b':
+ fcn = c_cut;
+ get_list(optarg);
+ bflag = 1;
+ break;
case 'c':
fcn = c_cut;
get_list(optarg);
@@ -102,6 +111,7 @@ main(argc, argv)
sflag = 1;
break;
case 'n':
+ nflag = 1;
break;
case '?':
default:
@@ -111,11 +121,16 @@ main(argc, argv)
argv += optind;
if (fflag) {
- if (cflag)
+ if (bflag || cflag || nflag)
usage();
- } else if (!cflag || dflag || sflag)
+ } else if (!(bflag || cflag) || dflag || sflag)
+ usage();
+ else if (!bflag && nflag)
usage();
+ if (nflag)
+ fcn = b_n_cut;
+
rval = 0;
if (*argv)
for (; *argv; ++argv) {
@@ -217,6 +232,71 @@ needpos(size_t n)
}
}
+/*
+ * Cut based on byte positions, taking care not to split multibyte characters.
+ * Although this function also handles the case where -n is not specified,
+ * c_cut() ought to be much faster.
+ */
+void
+b_n_cut(fp, fname)
+ FILE *fp;
+ const char *fname;
+{
+ size_t col, i, lbuflen;
+ char *lbuf;
+ int canwrite, clen, warned;
+
+ warned = 0;
+ while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
+ for (col = 0; lbuflen > 0; col += clen) {
+ if ((clen = mblen(lbuf, lbuflen)) < 0) {
+ if (!warned) {
+ warn("%s", fname);
+ warned = 1;
+ }
+ clen = 1;
+ }
+ if (clen == 0 || *lbuf == '\n')
+ break;
+ if (col < maxval && !positions[1 + col]) {
+ /*
+ * Print the character if (1) after an initial
+ * segment of un-selected bytes, the rest of
+ * it is selected, and (2) the last byte is
+ * selected.
+ */
+ i = col;
+ while (i < col + clen && i < maxval &&
+ !positions[1 + i])
+ i++;
+ canwrite = i < col + clen;
+ for (; i < col + clen && i < maxval; i++)
+ canwrite &= positions[1 + i];
+ if (canwrite)
+ fwrite(lbuf, 1, clen, stdout);
+ } else {
+ /*
+ * Print the character if all of it has
+ * been selected.
+ */
+ canwrite = 1;
+ for (i = col; i < col + clen; i++)
+ if ((i >= maxval && !autostop) ||
+ (i < maxval && !positions[1 + i])) {
+ canwrite = 0;
+ break;
+ }
+ if (canwrite)
+ fwrite(lbuf, 1, clen, stdout);
+ }
+ lbuf += clen;
+ lbuflen -= clen;
+ }
+ if (lbuflen > 0)
+ putchar('\n');
+ }
+}
+
void
c_cut(fp, fname)
FILE *fp;
OpenPOWER on IntegriCloud