diff options
author | tjr <tjr@FreeBSD.org> | 2004-05-02 11:25:37 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2004-05-02 11:25:37 +0000 |
commit | de72f8ed4193914556f8e7266dc1f52706213d5b (patch) | |
tree | 33ababbec3452462cff8a64581a3fc9f628a2913 /bin/ls | |
parent | 0342c52a187a490376a081f56bf3b22a229b7cd1 (diff) | |
download | FreeBSD-src-de72f8ed4193914556f8e7266dc1f52706213d5b.zip FreeBSD-src-de72f8ed4193914556f8e7266dc1f52706213d5b.tar.gz |
Treat filenames as multibyte character strings (according to the current
LC_CTYPE setting) when determining which characters are printable.
This is an often-requested feature.
Use wcwidth() to determine the number of column positions a character
takes up, although there are still a few places left where we assume
1 byte = 1 column position, e.g. line-wrapping when handling the -m option.
The error handling here is somewhat more complicated than usual: we do
our best to show what we can of a filename in the presence of conversion
errors, instead of simply aborting.
Diffstat (limited to 'bin/ls')
-rw-r--r-- | bin/ls/extern.h | 1 | ||||
-rw-r--r-- | bin/ls/ls.1 | 6 | ||||
-rw-r--r-- | bin/ls/print.c | 3 | ||||
-rw-r--r-- | bin/ls/util.c | 187 |
4 files changed, 132 insertions, 65 deletions
diff --git a/bin/ls/extern.h b/bin/ls/extern.h index 9482dad..52b6a85 100644 --- a/bin/ls/extern.h +++ b/bin/ls/extern.h @@ -45,6 +45,7 @@ int printname(const char *); void printscol(const DISPLAY *); void printstream(const DISPLAY *); void usage(void); +int prn_normal(const char *); size_t len_octal(const char *, int); int prn_octal(const char *); int prn_printable(const char *); diff --git a/bin/ls/ls.1 b/bin/ls/ls.1 index eb84484..8554f54 100644 --- a/bin/ls/ls.1 +++ b/bin/ls/ls.1 @@ -31,7 +31,7 @@ .\" @(#)ls.1 8.7 (Berkeley) 7/29/94 .\" $FreeBSD$ .\" -.Dd March 21, 2004 +.Dd May 2, 2004 .Dt LS 1 .Os .Sh NAME @@ -673,7 +673,3 @@ command appeared in .Sh BUGS To maintain backward compatibility, the relationships between the many options are quite complex. -.Pp -The -.Nm -utility does not recognize multibyte characters in filenames. diff --git a/bin/ls/print.c b/bin/ls/print.c index 1d6bec9..eb0258e 100644 --- a/bin/ls/print.c +++ b/bin/ls/print.c @@ -147,7 +147,7 @@ printname(const char *name) else if (f_nonprint) return prn_printable(name); else - return printf("%s", name); + return prn_normal(name); } void @@ -239,6 +239,7 @@ printstream(const DISPLAY *dp) for (p = dp->list, chcnt = 0; p; p = p->fts_link) { if (p->fts_number == NO_PRINT) continue; + /* XXX strlen does not take octal escapes into account. */ if (strlen(p->fts_name) + chcnt + (p->fts_link ? 2 : 0) >= (unsigned)termwidth) { putchar('\n'); diff --git a/bin/ls/util.c b/bin/ls/util.c index ea305c1..a7b8208 100644 --- a/bin/ls/util.c +++ b/bin/ls/util.c @@ -44,25 +44,81 @@ __FBSDID("$FreeBSD$"); #include <ctype.h> #include <err.h> #include <fts.h> +#include <limits.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <wchar.h> +#include <wctype.h> #include "ls.h" #include "extern.h" int +prn_normal(const char *s) +{ + mbstate_t mbs; + wchar_t wc; + int i, n; + size_t clen; + + memset(&mbs, 0, sizeof(mbs)); + n = 0; + while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) { + if (clen == (size_t)-2) { + n += printf("%s", s); + break; + } + if (clen == (size_t)-1) { + memset(&mbs, 0, sizeof(mbs)); + putchar((unsigned char)*s); + s++; + n++; + continue; + } + for (i = 0; i < (int)clen; i++) + putchar((unsigned char)s[i]); + s += clen; + n += wcwidth(wc); + } + return (n); +} + +int prn_printable(const char *s) { - char c; - int n; + mbstate_t mbs; + wchar_t wc; + int i, n; + size_t clen; - for (n = 0; (c = *s) != '\0'; ++s, ++n) - if (isprint((unsigned char)c)) - putchar(c); - else + memset(&mbs, 0, sizeof(mbs)); + n = 0; + while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) { + if (clen == (size_t)-1) { + putchar('?'); + s++; + n++; + memset(&mbs, 0, sizeof(mbs)); + continue; + } + if (clen == (size_t)-2) { putchar('?'); - return n; + n++; + break; + } + if (!iswprint(wc)) { + putchar('?'); + s += clen; + n++; + continue; + } + for (i = 0; i < (int)clen; i++) + putchar((unsigned char)s[i]); + s += clen; + n += wcwidth(wc); + } + return (n); } /* @@ -81,70 +137,83 @@ prn_printable(const char *s) size_t len_octal(const char *s, int len) { - size_t r = 0; + mbstate_t mbs; + wchar_t wc; + size_t clen, r; - while (len--) - if (isprint((unsigned const char)*s++)) r++; else r += 4; - return r; + memset(&mbs, 0, sizeof(mbs)); + r = 0; + while (len != 0 && (clen = mbrtowc(&wc, s, len, &mbs)) != 0) { + if (clen == (size_t)-1) { + r += 4; + s++; + len--; + memset(&mbs, 0, sizeof(mbs)); + continue; + } + if (clen == (size_t)-2) { + r += 4 * len; + break; + } + if (iswprint(wc)) + r++; + else + r += 4 * clen; + s += clen; + } + return (r); } int prn_octal(const char *s) { - unsigned char ch; - int len = 0; + static const char esc[] = "\\\\\"\"\aa\bb\ff\nn\rr\tt\vv"; + const char *p; + mbstate_t mbs; + wchar_t wc; + size_t clen; + unsigned char ch; + int goodchar, i, len, prtlen; - while ((ch = (unsigned char)*s++)) { - if (isprint(ch) && (ch != '\"') && (ch != '\\')) - putchar(ch), len++; - else if (f_octal_escape) { - putchar('\\'); - switch (ch) { - case '\\': - putchar('\\'); - break; - case '\"': - putchar('"'); - break; - case '\a': - putchar('a'); - break; - case '\b': - putchar('b'); - break; - case '\f': - putchar('f'); - break; - case '\n': - putchar('n'); - break; - case '\r': - putchar('r'); - break; - case '\t': - putchar('t'); - break; - case '\v': - putchar('v'); - break; - default: + memset(&mbs, 0, sizeof(mbs)); + len = 0; + while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) { + goodchar = clen != (size_t)-1 && clen != (size_t)-2; + if (goodchar && iswprint(wc) && wc != L'\"' && wc != L'\\') { + for (i = 0; i < (int)clen; i++) + putchar((unsigned char)s[i]); + len += wcwidth(wc); + } else if (goodchar && f_octal_escape && wc >= 0 && + wc <= (wchar_t)UCHAR_MAX && + (p = strchr(esc, (char)wc)) != NULL) { + putchar('\\'); + putchar(p[1]); + len += 2; + } else { + if (goodchar) + prtlen = clen; + else if (clen == (size_t)-1) + prtlen = 1; + else + prtlen = strlen(s); + for (i = 0; i < prtlen; i++) { + ch = (unsigned char)s[i]; + putchar('\\'); putchar('0' + (ch >> 6)); putchar('0' + ((ch >> 3) & 7)); putchar('0' + (ch & 7)); - len += 2; - break; - } - len += 2; - } - else { - putchar('\\'); - putchar('0' + (ch >> 6)); - putchar('0' + ((ch >> 3) & 7)); - putchar('0' + (ch & 7)); - len += 4; + len += 4; + } } + if (clen == (size_t)-2) + break; + if (clen == (size_t)-1) { + memset(&mbs, 0, sizeof(mbs)); + s++; + } else + s += clen; } - return len; + return (len); } void |