diff options
author | tjr <tjr@FreeBSD.org> | 2004-04-09 11:17:29 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2004-04-09 11:17:29 +0000 |
commit | 9e9fb7be4dadb385bbc975d38ded49df37044bd9 (patch) | |
tree | 103a130a743c50a10ea55ca0759171858ddc1975 /usr.bin/wc | |
parent | d7b4a9c5cadba35af80634f64b2030d3e59d0ccb (diff) | |
download | FreeBSD-src-9e9fb7be4dadb385bbc975d38ded49df37044bd9.zip FreeBSD-src-9e9fb7be4dadb385bbc975d38ded49df37044bd9.tar.gz |
Improve robustness of multibyte character handling (-m option), and
simplify the read buffering now that we can feed partial multibyte
characters to mbrtowc().
Diffstat (limited to 'usr.bin/wc')
-rw-r--r-- | usr.bin/wc/wc.c | 42 |
1 files changed, 23 insertions, 19 deletions
diff --git a/usr.bin/wc/wc.c b/usr.bin/wc/wc.c index 85a90c9..5afeb17 100644 --- a/usr.bin/wc/wc.c +++ b/usr.bin/wc/wc.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <wchar.h> #include <wctype.h> uintmax_t tlinect, twordct, tcharct; @@ -137,12 +138,13 @@ cnt(file) { struct stat sb; uintmax_t linect, wordct, charct; - ssize_t nread; - int clen, fd, len, warned; + int fd, len, warned; + size_t clen; short gotsp; u_char *p; u_char buf[MAXBSIZE]; wchar_t wch; + mbstate_t mbs; linect = wordct = charct = 0; if (file == NULL) { @@ -202,34 +204,33 @@ cnt(file) /* Do it the hard way... */ word: gotsp = 1; - len = 0; warned = 0; - while ((nread = read(fd, buf + len, MAXBSIZE - len)) != 0) { - if (nread == -1) { + memset(&mbs, 0, sizeof(mbs)); + while ((len = read(fd, buf, MAXBSIZE)) != 0) { + if (len == -1) { warn("%s: read", file); (void)close(fd); return (1); } - len += nread; p = buf; while (len > 0) { if (!domulti || MB_CUR_MAX == 1) { clen = 1; wch = (unsigned char)*p; - } else if ((clen = mbtowc(&wch, p, len)) <= 0) { - if (len > MB_CUR_MAX) { - clen = 1; - wch = (unsigned char)*p; - if (!warned) { - errno = EILSEQ; - warn("%s", file); - warned = 1; - } - } else { - memmove(buf, p, len); - break; + } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == + (size_t)-1) { + if (!warned) { + errno = EILSEQ; + warn("%s", file); + warned = 1; } - } + memset(&mbs, 0, sizeof(mbs)); + clen = 1; + wch = (unsigned char)*p; + } else if (clen == (size_t)-2) + break; + else if (clen == 0) + clen = 1; charct++; len -= clen; p += clen; @@ -243,6 +244,9 @@ word: gotsp = 1; } } } + if (domulti && MB_CUR_MAX > 1) + if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) + warn("%s", file); if (doline) { tlinect += linect; (void)printf(" %7ju", linect); |