From cf2cc544859193ac5eb52a665cfc01a484641415 Mon Sep 17 00:00:00 2001 From: tjr Date: Fri, 2 Jul 2004 22:48:29 +0000 Subject: Add support for multibyte characters. --- usr.bin/comm/comm.1 | 19 +++++++++++------ usr.bin/comm/comm.c | 61 ++++++++++++++++++++++++++++++----------------------- 2 files changed, 47 insertions(+), 33 deletions(-) (limited to 'usr.bin') diff --git a/usr.bin/comm/comm.1 b/usr.bin/comm/comm.1 index 6866986..c054428 100644 --- a/usr.bin/comm/comm.1 +++ b/usr.bin/comm/comm.1 @@ -35,7 +35,7 @@ .\" From: @(#)comm.1 8.1 (Berkeley) 6/6/93 .\" $FreeBSD$ .\" -.Dd June 25, 2004 +.Dd July 3, 2004 .Os .Dt COMM 1 .Sh NAME @@ -84,6 +84,17 @@ The .Nm utility assumes that the files are lexically sorted; all characters participate in line comparisons. +.Sh ENVIRONMENT +The +.Ev LANG , +.Ev LC_ALL , +.Ev LC_COLLATE , +and +.Ev LC_CTYPE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . .Sh DIAGNOSTICS .Ex -std .Sh SEE ALSO @@ -101,9 +112,3 @@ A .Nm command appeared in .At v4 . -.Sh BUGS -The -.Nm -utility does not recognize multibyte characters in order to remain -compatible with the current implementation of -.Xr sort 1 . diff --git a/usr.bin/comm/comm.c b/usr.bin/comm/comm.c index 86f1fa8..0e7e029 100644 --- a/usr.bin/comm/comm.c +++ b/usr.bin/comm/comm.c @@ -49,7 +49,6 @@ static char sccsid[] = "From: @(#)comm.c 8.4 (Berkeley) 5/4/95"; #include __FBSDID("$FreeBSD$"); -#include #include #include #include @@ -57,14 +56,16 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #define MAXLINELEN (LINE_MAX + 1) -const char *tabs[] = { "", "\t", "\t\t" }; +const wchar_t *tabs[] = { L"", L"\t", L"\t\t" }; FILE *file(const char *); -void show(FILE *, const char *, char *); -int stricoll(const char *, const char *); +void show(FILE *, const char *, const wchar_t *, wchar_t *); +int wcsicoll(const wchar_t *, const wchar_t *); static void usage(void); int @@ -73,9 +74,9 @@ main(int argc, char *argv[]) int comp, file1done = 0, file2done = 0, read1, read2; int ch, flag1, flag2, flag3, iflag; FILE *fp1, *fp2; - const char *col1, *col2, *col3; - char line1[MAXLINELEN], line2[MAXLINELEN]; - const char **p; + const wchar_t *col1, *col2, *col3; + wchar_t line1[MAXLINELEN], line2[MAXLINELEN]; + const wchar_t **p; flag1 = flag2 = flag3 = 1; iflag = 0; @@ -121,33 +122,39 @@ main(int argc, char *argv[]) for (read1 = read2 = 1;;) { /* read next line, check for EOF */ - if (read1) - file1done = !fgets(line1, MAXLINELEN, fp1); - if (read2) - file2done = !fgets(line2, MAXLINELEN, fp2); + if (read1) { + file1done = !fgetws(line1, MAXLINELEN, fp1); + if (file1done && ferror(fp1)) + err(1, "%s", argv[0]); + } + if (read2) { + file2done = !fgetws(line2, MAXLINELEN, fp2); + if (file2done && ferror(fp2)) + err(1, "%s", argv[1]); + } /* if one file done, display the rest of the other file */ if (file1done) { if (!file2done && col2) - show(fp2, col2, line2); + show(fp2, argv[1], col2, line2); break; } if (file2done) { if (!file1done && col1) - show(fp1, col1, line1); + show(fp1, argv[0], col1, line1); break; } /* lines are the same */ if(iflag) - comp = stricoll(line1, line2); + comp = wcsicoll(line1, line2); else - comp = strcoll(line1, line2); + comp = wcscoll(line1, line2); if (!comp) { read1 = read2 = 1; if (col3) - (void)printf("%s%s", col3, line1); + (void)printf("%ls%ls", col3, line1); continue; } @@ -156,24 +163,26 @@ main(int argc, char *argv[]) read1 = 1; read2 = 0; if (col1) - (void)printf("%s%s", col1, line1); + (void)printf("%ls%ls", col1, line1); } else { read1 = 0; read2 = 1; if (col2) - (void)printf("%s%s", col2, line2); + (void)printf("%ls%ls", col2, line2); } } exit(0); } void -show(FILE *fp, const char *offset, char *buf) +show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf) { do { - (void)printf("%s%s", offset, buf); - } while (fgets(buf, MAXLINELEN, fp)); + (void)printf("%ls%ls", offset, buf); + } while (fgetws(buf, MAXLINELEN, fp)); + if (ferror(fp)) + err(1, "%s", fn); } FILE * @@ -197,15 +206,15 @@ usage(void) } int -stricoll(const char *s1, const char *s2) +wcsicoll(const wchar_t *s1, const wchar_t *s2) { - char *p, line1[MAXLINELEN], line2[MAXLINELEN]; + wchar_t *p, line1[MAXLINELEN], line2[MAXLINELEN]; for (p = line1; *s1; s1++) - *p++ = tolower((unsigned char)*s1); + *p++ = towlower(*s1); *p = '\0'; for (p = line2; *s2; s2++) - *p++ = tolower((unsigned char)*s2); + *p++ = towlower(*s2); *p = '\0'; - return strcoll(line1, line2); + return (wcscoll(line1, line2)); } -- cgit v1.1