summaryrefslogtreecommitdiffstats
path: root/usr.bin/comm
diff options
context:
space:
mode:
authorache <ache@FreeBSD.org>2010-03-08 22:27:46 +0000
committerache <ache@FreeBSD.org>2010-03-08 22:27:46 +0000
commita97c77aebb425b7cafa63e7684a83ea3217ad0a0 (patch)
tree3e3fce03fe079b2abacc98308fc11c1d42db6ad8 /usr.bin/comm
parent5ed5e2a613d2bcd756670bfb890e6501c7bc3157 (diff)
downloadFreeBSD-src-a97c77aebb425b7cafa63e7684a83ea3217ad0a0.zip
FreeBSD-src-a97c77aebb425b7cafa63e7684a83ea3217ad0a0.tar.gz
Rewrite input processing to not exit with error on the first EILSEQ found
in the input data but fallback to "binary comparison" instead. POSIX says: "The input files shall be text files", nothing more, so the text file with illegal sequence is valid input. BTW, GNU sort does not fails on EILSEQ too.
Diffstat (limited to 'usr.bin/comm')
-rw-r--r--usr.bin/comm/comm.c176
1 files changed, 68 insertions, 108 deletions
diff --git a/usr.bin/comm/comm.c b/usr.bin/comm/comm.c
index afda0e7..b910911 100644
--- a/usr.bin/comm/comm.c
+++ b/usr.bin/comm/comm.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <limits.h>
#include <locale.h>
#include <stdint.h>
+#define _WITH_GETLINE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -60,40 +61,31 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include <wctype.h>
-#define INITLINELEN (LINE_MAX + 1)
-#define MAXLINELEN ((SIZE_MAX / sizeof(wchar_t)) / 2)
-
-const wchar_t *tabs[] = { L"", L"\t", L"\t\t" };
+int iflag;
+const char *tabs[] = { "", "\t", "\t\t" };
FILE *file(const char *);
-wchar_t *getline(wchar_t *, size_t *, FILE *);
-void show(FILE *, const char *, const wchar_t *, wchar_t *, size_t *);
-int wcsicoll(const wchar_t *, const wchar_t *);
+wchar_t *convert(const char *);
+void show(FILE *, const char *, const char *, char **, size_t *);
static void usage(void);
int
main(int argc, char *argv[])
{
int comp, read1, read2;
- int ch, flag1, flag2, flag3, iflag;
+ int ch, flag1, flag2, flag3;
FILE *fp1, *fp2;
- const wchar_t *col1, *col2, *col3;
+ const char *col1, *col2, *col3;
size_t line1len, line2len;
- wchar_t *line1, *line2;
- const wchar_t **p;
-
- flag1 = flag2 = flag3 = 1;
- iflag = 0;
-
- line1len = INITLINELEN;
- line2len = INITLINELEN;
- line1 = malloc(line1len * sizeof(*line1));
- line2 = malloc(line2len * sizeof(*line2));
- if (line1 == NULL || line2 == NULL)
- err(1, "malloc");
+ char *line1, *line2;
+ ssize_t n1, n2;
+ wchar_t *tline1, *tline2;
+ const char **p;
(void) setlocale(LC_ALL, "");
+ flag1 = flag2 = flag3 = 1;
+
while ((ch = getopt(argc, argv, "123i")) != -1)
switch(ch) {
case '1':
@@ -131,41 +123,57 @@ main(int argc, char *argv[])
if (flag3)
col3 = *p;
+ line1len = line2len = 0;
+ line1 = line2 = NULL;
+ n1 = n2 = -1;
+
for (read1 = read2 = 1;;) {
/* read next line, check for EOF */
if (read1) {
- line1 = getline(line1, &line1len, fp1);
- if (line1 == NULL && ferror(fp1))
+ n1 = getline(&line1, &line1len, fp1);
+ if (n1 < 0 && ferror(fp1))
err(1, "%s", argv[0]);
+ if (n1 > 0 && line1[n1 - 1] == '\n')
+ line1[n1 - 1] = '\0';
+
}
if (read2) {
- line2 = getline(line2, &line2len, fp2);
- if (line2 == NULL && ferror(fp2))
+ n2 = getline(&line2, &line2len, fp2);
+ if (n2 < 0 && ferror(fp2))
err(1, "%s", argv[1]);
+ if (n2 > 0 && line2[n2 - 1] == '\n')
+ line2[n2 - 1] = '\0';
}
/* if one file done, display the rest of the other file */
- if (line1 == NULL) {
- if (line2 != NULL && col2 != NULL)
- show(fp2, argv[1], col2, line2, &line2len);
+ if (n1 < 0) {
+ if (n2 >= 0 && col2 != NULL)
+ show(fp2, argv[1], col2, &line2, &line2len);
break;
}
- if (line2 == NULL) {
- if (line1 != NULL && col1 != NULL)
- show(fp1, argv[0], col1, line1, &line1len);
+ if (n2 < 0) {
+ if (n1 >= 0 && col1 != NULL)
+ show(fp1, argv[0], col1, &line1, &line1len);
break;
}
- /* lines are the same */
- if(iflag)
- comp = wcsicoll(line1, line2);
+ tline2 = NULL;
+ if ((tline1 = convert(line1)) != NULL)
+ tline2 = convert(line2);
+ if (tline1 == NULL || tline2 == NULL)
+ comp = strcmp(line1, line2);
else
- comp = wcscoll(line1, line2);
+ comp = wcscoll(tline1, tline2);
+ if (tline1 != NULL)
+ free(tline1);
+ if (tline2 != NULL)
+ free(tline2);
+ /* lines are the same */
if (!comp) {
read1 = read2 = 1;
if (col3 != NULL)
- (void)printf("%ls%ls\n", col3, line1);
+ (void)printf("%s%s\n", col3, line1);
continue;
}
@@ -174,49 +182,50 @@ main(int argc, char *argv[])
read1 = 1;
read2 = 0;
if (col1 != NULL)
- (void)printf("%ls%ls\n", col1, line1);
+ (void)printf("%s%s\n", col1, line1);
} else {
read1 = 0;
read2 = 1;
if (col2 != NULL)
- (void)printf("%ls%ls\n", col2, line2);
+ (void)printf("%s%s\n", col2, line2);
}
}
exit(0);
}
wchar_t *
-getline(wchar_t *buf, size_t *buflen, FILE *fp)
+convert(const char *str)
{
- size_t bufpos;
- wint_t ch;
+ size_t n;
+ wchar_t *buf, *p;
- bufpos = 0;
- while ((ch = getwc(fp)) != WEOF && ch != '\n') {
- if (bufpos + 1 >= *buflen) {
- *buflen = *buflen * 2;
- if (*buflen > MAXLINELEN)
- errx(1,
- "Maximum line buffer length (%zu) exceeded",
- MAXLINELEN);
- buf = reallocf(buf, *buflen * sizeof(*buf));
- if (buf == NULL)
- err(1, "reallocf");
- }
- buf[bufpos++] = ch;
+ if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1)
+ return (NULL);
+ if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL)
+ err(1, "malloc");
+ if (mbstowcs(buf, str, n + 1) != n)
+ errx(1, "internal mbstowcs() error");
+
+ if (iflag) {
+ for (p = buf; *p != L'\0'; p++)
+ *p = towlower(*p);
}
- buf[bufpos] = '\0';
- return (bufpos != 0 || ch == '\n' ? buf : NULL);
+ return (buf);
}
void
-show(FILE *fp, const char *fn, const wchar_t *offset, wchar_t *buf, size_t *buflen)
+show(FILE *fp, const char *fn, const char *offset, char **bufp, size_t *buflenp)
{
+ ssize_t n;
do {
- (void)printf("%ls%ls\n", offset, buf);
- } while ((buf = getline(buf, buflen, fp)) != NULL);
+ (void)printf("%s%s\n", offset, *bufp);
+ if ((n = getline(bufp, buflenp, fp)) < 0)
+ break;
+ if (n > 0 && (*bufp)[n - 1] == '\n')
+ (*bufp)[n - 1] = '\0';
+ } while (1);
if (ferror(fp))
err(1, "%s", fn);
}
@@ -240,52 +249,3 @@ usage(void)
(void)fprintf(stderr, "usage: comm [-123i] file1 file2\n");
exit(1);
}
-
-static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0;
-static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL;
-
-int
-wcsicoll(const wchar_t *s1, const wchar_t *s2)
-{
- wchar_t *p;
- size_t l1, l2;
- size_t new_l1_buflen, new_l2_buflen;
-
- l1 = wcslen(s1) + 1;
- l2 = wcslen(s2) + 1;
- new_l1_buflen = wcsicoll_l1_buflen;
- new_l2_buflen = wcsicoll_l2_buflen;
- while (new_l1_buflen < l1) {
- if (new_l1_buflen == 0)
- new_l1_buflen = INITLINELEN;
- else
- new_l1_buflen *= 2;
- }
- while (new_l2_buflen < l2) {
- if (new_l2_buflen == 0)
- new_l2_buflen = INITLINELEN;
- else
- new_l2_buflen *= 2;
- }
- if (new_l1_buflen > wcsicoll_l1_buflen) {
- wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf));
- if (wcsicoll_l1_buf == NULL)
- err(1, "reallocf");
- wcsicoll_l1_buflen = new_l1_buflen;
- }
- if (new_l2_buflen > wcsicoll_l2_buflen) {
- wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf));
- if (wcsicoll_l2_buf == NULL)
- err(1, "reallocf");
- wcsicoll_l2_buflen = new_l2_buflen;
- }
-
- for (p = wcsicoll_l1_buf; *s1; s1++)
- *p++ = towlower(*s1);
- *p = '\0';
- for (p = wcsicoll_l2_buf; *s2; s2++)
- *p++ = towlower(*s2);
- *p = '\0';
-
- return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf));
-}
OpenPOWER on IntegriCloud