diff options
author | tjr <tjr@FreeBSD.org> | 2004-06-27 14:55:07 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2004-06-27 14:55:07 +0000 |
commit | f3e5f1bddb9c2bab6e372c376e825b698044b2c3 (patch) | |
tree | a2f3ba4a2d9bbbfba72a5e43e5c3b66b1d8e52a4 /usr.bin/cut | |
parent | 9739f5e620632bbd9f7ea381b97cbfd20dca2fdc (diff) | |
download | FreeBSD-src-f3e5f1bddb9c2bab6e372c376e825b698044b2c3.zip FreeBSD-src-f3e5f1bddb9c2bab6e372c376e825b698044b2c3.tar.gz |
Implement the -c option correctly in locales with multibyte characters
instead of treating it as a synonym for -b.
Diffstat (limited to 'usr.bin/cut')
-rw-r--r-- | usr.bin/cut/cut.1 | 15 | ||||
-rw-r--r-- | usr.bin/cut/cut.c | 92 |
2 files changed, 67 insertions, 40 deletions
diff --git a/usr.bin/cut/cut.1 b/usr.bin/cut/cut.1 index bc4adc6..2effded 100644 --- a/usr.bin/cut/cut.1 +++ b/usr.bin/cut/cut.1 @@ -35,7 +35,7 @@ .\" @(#)cut.1 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" -.Dd June 6, 1993 +.Dd June 28, 2004 .Dt CUT 1 .Os .Sh NAME @@ -122,11 +122,9 @@ The .Ev LANG , LC_ALL and .Ev LC_CTYPE -environment variables affect the execution of +environment variables affect the execution of the .Nm -if the -.Fl n -option is specified. +utility. Their effect is described in .Xr environ 7 . .Sh EXAMPLES @@ -158,13 +156,6 @@ command appeared in System III .Ux . .Sh BUGS -The -.Fl c -option is a synonym for the -.Fl b -option, which causes incorrect behaviour in locales that support -multibyte characters. -.Pp When operating on fields .Fl ( f option is specified), diff --git a/usr.bin/cut/cut.c b/usr.bin/cut/cut.c index 0447db0..eeeb17e 100644 --- a/usr.bin/cut/cut.c +++ b/usr.bin/cut/cut.c @@ -61,9 +61,10 @@ int fflag; int nflag; int sflag; -void b_n_cut(FILE *, const char *); -void c_cut(FILE *, const char *); -void f_cut(FILE *, const char *); +int b_cut(FILE *, const char *); +int b_n_cut(FILE *, const char *); +int c_cut(FILE *, const char *); +int f_cut(FILE *, const char *); void get_list(char *); void needpos(size_t); static void usage(void); @@ -72,7 +73,7 @@ int main(int argc, char *argv[]) { FILE *fp; - void (*fcn)(FILE *, const char *); + int (*fcn)(FILE *, const char *); int ch, rval; setlocale(LC_ALL, ""); @@ -80,19 +81,13 @@ main(int argc, char *argv[]) fcn = NULL; dchar = '\t'; /* default delimiter is \t */ - /* - * Since we don't support multi-byte characters, the -c and -b - * options are equivalent. - */ while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) switch(ch) { case 'b': - fcn = c_cut; get_list(optarg); bflag = 1; break; case 'c': - fcn = c_cut; get_list(optarg); cflag = 1; break; @@ -102,7 +97,6 @@ main(int argc, char *argv[]) break; case 'f': get_list(optarg); - fcn = f_cut; fflag = 1; break; case 's': @@ -126,14 +120,18 @@ main(int argc, char *argv[]) else if (!bflag && nflag) usage(); - if (nflag) - fcn = b_n_cut; + if (fflag) + fcn = f_cut; + else if (cflag) + fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; + else if (bflag) + fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; rval = 0; if (*argv) for (; *argv; ++argv) { if (strcmp(*argv, "-") == 0) - fcn(stdin, "stdin"); + rval |= fcn(stdin, "stdin"); else { if (!(fp = fopen(*argv, "r"))) { warn("%s", *argv); @@ -145,7 +143,7 @@ main(int argc, char *argv[]) } } else - fcn(stdin, "stdin"); + rval = fcn(stdin, "stdin"); exit(rval); } @@ -229,12 +227,41 @@ needpos(size_t n) } } +int +b_cut(FILE *fp, const char *fname) +{ + int ch, col; + char *pos; + + ch = 0; + for (;;) { + pos = positions + 1; + for (col = maxval; col; --col) { + if ((ch = getc(fp)) == EOF) + return (0); + if (ch == '\n') + break; + if (*pos++) + (void)putchar(ch); + } + if (ch != '\n') { + if (autostop) + while ((ch = getc(fp)) != EOF && ch != '\n') + (void)putchar(ch); + else + while ((ch = getc(fp)) != EOF && ch != '\n'); + } + (void)putchar('\n'); + } + return (0); +} + /* * Cut based on byte positions, taking care not to split multibyte characters. * Although this function also handles the case where -n is not specified, - * c_cut() ought to be much faster. + * b_cut() ought to be much faster. */ -void +int b_n_cut(FILE *fp, const char *fname) { size_t col, i, lbuflen; @@ -293,37 +320,45 @@ b_n_cut(FILE *fp, const char *fname) if (lbuflen > 0) putchar('\n'); } + return (warned); } -void -c_cut(FILE *fp, const char *fname __unused) +int +c_cut(FILE *fp, const char *fname) { - int ch, col; + wint_t ch; + int col; char *pos; ch = 0; for (;;) { pos = positions + 1; for (col = maxval; col; --col) { - if ((ch = getc(fp)) == EOF) - return; + if ((ch = getwc(fp)) == WEOF) + goto out; if (ch == '\n') break; if (*pos++) - (void)putchar(ch); + (void)putwchar(ch); } if (ch != '\n') { if (autostop) - while ((ch = getc(fp)) != EOF && ch != '\n') - (void)putchar(ch); + while ((ch = getwc(fp)) != WEOF && ch != '\n') + (void)putwchar(ch); else - while ((ch = getc(fp)) != EOF && ch != '\n'); + while ((ch = getwc(fp)) != WEOF && ch != '\n'); } - (void)putchar('\n'); + (void)putwchar('\n'); + } +out: + if (ferror(fp)) { + warn("%s", fname); + return (1); } + return (0); } -void +int f_cut(FILE *fp, const char *fname __unused) { int ch, field, isdelim; @@ -386,6 +421,7 @@ f_cut(FILE *fp, const char *fname __unused) } if (mlbuf != NULL) free(mlbuf); + return (0); } static void |