diff options
Diffstat (limited to 'usr.bin/uniq/uniq.c')
-rw-r--r-- | usr.bin/uniq/uniq.c | 193 |
1 files changed, 84 insertions, 109 deletions
diff --git a/usr.bin/uniq/uniq.c b/usr.bin/uniq/uniq.c index 2b11fe4..605bd00 100644 --- a/usr.bin/uniq/uniq.c +++ b/usr.bin/uniq/uniq.c @@ -53,6 +53,7 @@ static const char rcsid[] = #include <limits.h> #include <locale.h> #include <stdint.h> +#define _WITH_GETLINE #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -60,31 +61,26 @@ static const char rcsid[] = #include <wchar.h> #include <wctype.h> -#define INITLINELEN (LINE_MAX + 1) -#define MAXLINELEN ((SIZE_MAX / sizeof(wchar_t)) / 2) - -int cflag, dflag, uflag; +int cflag, dflag, uflag, iflag; int numchars, numfields, repeats; FILE *file(const char *, const char *); -wchar_t *getline(wchar_t *, size_t *, FILE *); -void show(FILE *, wchar_t *); +wchar_t *convert(const char *); +int inlcmp(const char *, const char *); +void show(FILE *, const char *); wchar_t *skip(wchar_t *); void obsolete(char *[]); static void usage(void); -int wcsicoll(wchar_t *, wchar_t *); int main (int argc, char *argv[]) { - wchar_t *t1, *t2; + wchar_t *tprev, *tthis; FILE *ifp, *ofp; - int ch, b1; - size_t prevbuflen, thisbuflen; - wchar_t *prevline, *thisline; - char *p; + int ch, comp; + size_t prevbuflen, thisbuflen, b1; + char *prevline, *thisline, *p; const char *ifn; - int iflag = 0, comp; (void) setlocale(LC_ALL, ""); @@ -139,48 +135,48 @@ main (int argc, char *argv[]) if (argc > 1) ofp = file(argv[1], "w"); - prevbuflen = INITLINELEN; - thisbuflen = INITLINELEN; - prevline = malloc(prevbuflen * sizeof(*prevline)); - thisline = malloc(thisbuflen * sizeof(*thisline)); - if (prevline == NULL || thisline == NULL) - err(1, "malloc"); + prevbuflen = thisbuflen = 0; + prevline = thisline = NULL; - if ((prevline = getline(prevline, &prevbuflen, ifp)) == NULL) { + if (getline(&prevline, &prevbuflen, ifp) < 0) { if (ferror(ifp)) err(1, "%s", ifn); exit(0); } + tprev = convert(prevline); + if (!cflag && uflag && dflag) show(ofp, prevline); - while ((thisline = getline(thisline, &thisbuflen, ifp)) != NULL) { - /* If requested get the chosen fields + character offsets. */ - if (numfields || numchars) { - t1 = skip(thisline); - t2 = skip(prevline); - } else { - t1 = thisline; - t2 = prevline; - } + tthis = NULL; + while (getline(&thisline, &thisbuflen, ifp) >= 0) { + if (tthis != NULL) + free(tthis); + tthis = convert(thisline); - /* If different, print; set previous to new value. */ - if (iflag) - comp = wcsicoll(t1, t2); + if (tthis == NULL && tprev == NULL) + comp = inlcmp(thisline, prevline); + else if (tthis == NULL || tprev == NULL) + comp = 1; else - comp = wcscoll(t1, t2); + comp = wcscoll(tthis, tprev); if (comp) { + /* If different, print; set previous to new value. */ if (cflag || !dflag || !uflag) show(ofp, prevline); - t1 = prevline; + p = prevline; b1 = prevbuflen; prevline = thisline; prevbuflen = thisbuflen; + if (tprev != NULL) + free(tprev); + tprev = tthis; if (!cflag && uflag && dflag) show(ofp, prevline); - thisline = t1; + thisline = p; thisbuflen = b1; + tthis = NULL; repeats = 0; } else ++repeats; @@ -193,28 +189,55 @@ main (int argc, char *argv[]) } wchar_t * -getline(wchar_t *buf, size_t *buflen, FILE *fp) +convert(const char *str) { - size_t bufpos; - wint_t ch; - - bufpos = 0; - while ((ch = getwc(fp)) != WEOF && ch != '\n') { - if (bufpos + 1 >= *buflen) { - *buflen = *buflen * 2; - if (*buflen > MAXLINELEN) - errx(1, - "Maximum line buffer length (%zu) exceeded", - MAXLINELEN); - buf = reallocf(buf, *buflen * sizeof(*buf)); - if (buf == NULL) - err(1, "reallocf"); - } - buf[bufpos++] = ch; + size_t n; + wchar_t *buf, *ret, *p; + + if ((n = mbstowcs(NULL, str, 0)) == (size_t)-1) + return (NULL); + if (SIZE_MAX / sizeof(*buf) < n + 1) + errx(1, "conversion buffer length overflow"); + if ((buf = malloc((n + 1) * sizeof(*buf))) == NULL) + err(1, "malloc"); + if (mbstowcs(buf, str, n + 1) != n) + errx(1, "internal mbstowcs() error"); + /* The last line may not end with \n. */ + if (n > 0 && buf[n - 1] == L'\n') + buf[n - 1] = L'\0'; + + /* If requested get the chosen fields + character offsets. */ + if (numfields || numchars) { + if ((ret = wcsdup(skip(buf))) == NULL) + err(1, "wcsdup"); + free(buf); + } else + ret = buf; + + if (iflag) { + for (p = ret; *p != L'\0'; p++) + *p = towlower(*p); } - buf[bufpos] = '\0'; - return (bufpos != 0 || ch == '\n' ? buf : NULL); + return (ret); +} + +int +inlcmp(const char *s1, const char *s2) +{ + int c1, c2; + + while (*s1 == *s2++) + if (*s1++ == '\0') + return (0); + c1 = (unsigned char)*s1; + c2 = (unsigned char)*(s2 - 1); + /* The last line may not end with \n. */ + if (c1 == '\n') + c1 = '\0'; + if (c2 == '\n') + c2 = '\0'; + return (c1 - c2); } /* @@ -223,13 +246,13 @@ getline(wchar_t *buf, size_t *buflen, FILE *fp) * of the line. */ void -show(FILE *ofp, wchar_t *str) +show(FILE *ofp, const char *str) { if (cflag) - (void)fprintf(ofp, "%4d %ls\n", repeats + 1, str); + (void)fprintf(ofp, "%4d %s", repeats + 1, str); if ((dflag && repeats) || (uflag && !repeats)) - (void)fprintf(ofp, "%ls\n", str); + (void)fprintf(ofp, "%s", str); } wchar_t * @@ -237,13 +260,14 @@ skip(wchar_t *str) { int nchars, nfields; - for (nfields = 0; *str != '\0' && nfields++ != numfields; ) { + for (nfields = 0; *str != L'\0' && nfields++ != numfields; ) { while (iswblank(*str)) str++; - while (*str != '\0' && !iswblank(*str)) + while (*str != L'\0' && !iswblank(*str)) str++; } - for (nchars = numchars; nchars-- && *str; ++str); + for (nchars = numchars; nchars-- && *str != L'\0'; ++str) + ; return(str); } @@ -293,52 +317,3 @@ usage(void) "usage: uniq [-c | -d | -u] [-i] [-f fields] [-s chars] [input [output]]\n"); exit(1); } - -static size_t wcsicoll_l1_buflen = 0, wcsicoll_l2_buflen = 0; -static wchar_t *wcsicoll_l1_buf = NULL, *wcsicoll_l2_buf = NULL; - -int -wcsicoll(wchar_t *s1, wchar_t *s2) -{ - wchar_t *p; - size_t l1, l2; - size_t new_l1_buflen, new_l2_buflen; - - l1 = wcslen(s1) + 1; - l2 = wcslen(s2) + 1; - new_l1_buflen = wcsicoll_l1_buflen; - new_l2_buflen = wcsicoll_l2_buflen; - while (new_l1_buflen < l1) { - if (new_l1_buflen == 0) - new_l1_buflen = INITLINELEN; - else - new_l1_buflen *= 2; - } - while (new_l2_buflen < l2) { - if (new_l2_buflen == 0) - new_l2_buflen = INITLINELEN; - else - new_l2_buflen *= 2; - } - if (new_l1_buflen > wcsicoll_l1_buflen) { - wcsicoll_l1_buf = reallocf(wcsicoll_l1_buf, new_l1_buflen * sizeof(*wcsicoll_l1_buf)); - if (wcsicoll_l1_buf == NULL) - err(1, "reallocf"); - wcsicoll_l1_buflen = new_l1_buflen; - } - if (new_l2_buflen > wcsicoll_l2_buflen) { - wcsicoll_l2_buf = reallocf(wcsicoll_l2_buf, new_l2_buflen * sizeof(*wcsicoll_l2_buf)); - if (wcsicoll_l2_buf == NULL) - err(1, "reallocf"); - wcsicoll_l2_buflen = new_l2_buflen; - } - - for (p = wcsicoll_l1_buf; *s1; s1++) - *p++ = towlower(*s1); - *p = '\0'; - for (p = wcsicoll_l2_buf; *s2; s2++) - *p++ = towlower(*s2); - *p = '\0'; - - return (wcscoll(wcsicoll_l1_buf, wcsicoll_l2_buf)); -} |