From e5cdb48ecf2feafeb68697f0ca0b6fc451ad4a1f Mon Sep 17 00:00:00 2001 From: tjr Date: Fri, 25 Jun 2004 01:48:43 +0000 Subject: Add support for multibyte characters in input files and delimiter strings (arguments to the -d option.) This involves backing out paste.c rev. 1.13 until we have a version of fgetln() that operates on wide character streams. --- usr.bin/paste/paste.1 | 6 +---- usr.bin/paste/paste.c | 73 +++++++++++++++++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 31 deletions(-) (limited to 'usr.bin/paste') diff --git a/usr.bin/paste/paste.1 b/usr.bin/paste/paste.1 index f9a1dc3..dee2cd3 100644 --- a/usr.bin/paste/paste.1 +++ b/usr.bin/paste/paste.1 @@ -36,7 +36,7 @@ .\" @(#)paste.1 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" -.Dd September 20, 2001 +.Dd June 25, 2004 .Dt PASTE 1 .Os .Sh NAME @@ -148,7 +148,3 @@ A .Nm command appeared in .At 32v . -.Sh BUGS -Multibyte character delimiters cannot be specified with the -.Fl d -option. diff --git a/usr.bin/paste/paste.c b/usr.bin/paste/paste.c index 52a6458..683b120 100644 --- a/usr.bin/paste/paste.c +++ b/usr.bin/paste/paste.c @@ -54,31 +54,49 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include +#include -char *delim; +wchar_t *delim; int delimcnt; int parallel(char **); int sequential(char **); -int tr(char *); +int tr(wchar_t *); static void usage(void); -char tab[] = "\t"; +wchar_t tab[] = L"\t"; int main(int argc, char *argv[]) { int ch, rval, seq; + wchar_t *warg; + const char *arg; + size_t len; + + setlocale(LC_CTYPE, ""); seq = 0; while ((ch = getopt(argc, argv, "d:s")) != -1) switch(ch) { case 'd': - delimcnt = tr(delim = optarg); + arg = optarg; + len = mbsrtowcs(NULL, &arg, 0, NULL); + if (len == (size_t)-1) + err(1, "delimiters"); + warg = malloc((len + 1) * sizeof(*warg)); + if (warg == NULL) + err(1, NULL); + arg = optarg; + len = mbsrtowcs(warg, &arg, len + 1, NULL); + if (len == (size_t)-1) + err(1, "delimiters"); + delimcnt = tr(delim = warg); break; case 's': seq = 1; @@ -116,10 +134,11 @@ parallel(char **argv) { LIST *lp; int cnt; - char ch, *buf, *p; + wint_t ich; + wchar_t ch; + char *p; LIST *head, *tmp; int opencnt, output; - size_t len; for (cnt = 0, head = NULL; (p = *argv); ++argv, ++cnt) { if ((lp = malloc(sizeof(LIST))) == NULL) @@ -144,16 +163,16 @@ parallel(char **argv) if (!lp->fp) { if (output && lp->cnt && (ch = delim[(lp->cnt - 1) % delimcnt])) - putchar(ch); + putwchar(ch); continue; } - if ((buf = fgetln(lp->fp, &len)) == NULL) { + if ((ich = getwc(lp->fp)) == WEOF) { if (!--opencnt) break; lp->fp = NULL; if (output && lp->cnt && (ch = delim[(lp->cnt - 1) % delimcnt])) - putchar(ch); + putwchar(ch); continue; } /* @@ -164,15 +183,17 @@ parallel(char **argv) output = 1; for (cnt = 0; cnt < lp->cnt; ++cnt) if ((ch = delim[cnt % delimcnt])) - putchar(ch); + putwchar(ch); } else if ((ch = delim[(lp->cnt - 1) % delimcnt])) - putchar(ch); - if (buf[len - 1] == '\n') - len--; - fwrite(buf, 1, len, stdout); + putwchar(ch); + if (ich == '\n') + continue; + do { + putwchar(ich); + } while ((ich = getwc(lp->fp)) != WEOF && ich != '\n'); } if (output) - putchar('\n'); + putwchar('\n'); } return (0); @@ -183,8 +204,8 @@ sequential(char **argv) { FILE *fp; int cnt, failed, needdelim; - char *buf, *p; - size_t len; + wint_t ch; + char *p; failed = 0; for (; (p = *argv); ++argv) { @@ -196,21 +217,21 @@ sequential(char **argv) continue; } cnt = needdelim = 0; - while ((buf = fgetln(fp, &len)) != NULL) { + while ((ch = getwc(fp)) != WEOF) { if (needdelim) { needdelim = 0; if (delim[cnt] != '\0') - putchar(delim[cnt]); + putwchar(delim[cnt]); if (++cnt == delimcnt) cnt = 0; } - if (buf[len - 1] == '\n') - len--; - fwrite(buf, 1, len, stdout); - needdelim = 1; + if (ch != '\n') + putwchar(ch); + else + needdelim = 1; } if (needdelim) - putchar('\n'); + putwchar('\n'); if (fp != stdin) (void)fclose(fp); } @@ -219,10 +240,10 @@ sequential(char **argv) } int -tr(char *arg) +tr(wchar_t *arg) { int cnt; - char ch, *p; + wchar_t ch, *p; for (p = arg, cnt = 0; (ch = *p++); ++arg, ++cnt) if (ch == '\\') -- cgit v1.1