diff options
author | ache <ache@FreeBSD.org> | 2003-08-03 02:23:39 +0000 |
---|---|---|
committer | ache <ache@FreeBSD.org> | 2003-08-03 02:23:39 +0000 |
commit | 0113a19ead9ecc4c8d3e911e1dd52f2474e068b0 (patch) | |
tree | 1f11173c6522259931f70066719199160a7dafe8 /usr.bin/tr | |
parent | ff880b993e6ccf0433a67419c42064845d6f008a (diff) | |
download | FreeBSD-src-0113a19ead9ecc4c8d3e911e1dd52f2474e068b0.zip FreeBSD-src-0113a19ead9ecc4c8d3e911e1dd52f2474e068b0.tar.gz |
This patch address two problems.
1st one is relatively minor: according our own manpage, upper and lower
classes must be sorted, but currently not.
2nd one is serious:
tr '[:lower:]' '[:upper:]'
(and vice versa) currently works only if upper and lower classes
have exact the same number of elements. When it is not true, like for
many ISO8859-x locales which have bigger amount of lowercase letters,
tr may do nasty things.
See this page
http://www.opengroup.org/onlinepubs/007908799/xcu/tr.html
for detailed description of desired tr behaviour in such cases.
Diffstat (limited to 'usr.bin/tr')
-rw-r--r-- | usr.bin/tr/extern.h | 5 | ||||
-rw-r--r-- | usr.bin/tr/str.c | 14 | ||||
-rw-r--r-- | usr.bin/tr/tr.c | 71 |
3 files changed, 70 insertions, 20 deletions
diff --git a/usr.bin/tr/extern.h b/usr.bin/tr/extern.h index 356f025..a5ed577 100644 --- a/usr.bin/tr/extern.h +++ b/usr.bin/tr/extern.h @@ -40,7 +40,8 @@ typedef struct { enum { STRING1, STRING2 } which; - enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, SET } state; + enum { EOS, INFINITE, NORMAL, RANGE, SEQUENCE, + SET, SET_UPPER, SET_LOWER } state; int cnt; /* character count */ int lastch; /* last character */ int equiv[NCHARS]; /* equivalence set */ @@ -49,3 +50,5 @@ typedef struct { } STR; int next(STR *); +int charcoll(const void *, const void *); + diff --git a/usr.bin/tr/str.c b/usr.bin/tr/str.c index fee1824..f8a7137 100644 --- a/usr.bin/tr/str.c +++ b/usr.bin/tr/str.c @@ -106,6 +106,8 @@ next(s) } return (1); case SET: + case SET_UPPER: + case SET_LOWER: if ((s->lastch = s->set[s->cnt++]) == OOBCH) { s->state = NORMAL; return (next(s)); @@ -194,7 +196,7 @@ genclass(s) { int cnt, (*func)(int); CLASS *cp, tmp; - int *p; + int *p, n; tmp.name = s->str; if ((cp = (CLASS *)bsearch(&tmp, classes, sizeof(classes) / @@ -208,10 +210,18 @@ genclass(s) if ((func)(cnt)) *p++ = cnt; *p = OOBCH; + n = p - cp->set; s->cnt = 0; - s->state = SET; s->set = cp->set; + if (strcmp(s->str, "upper") == 0) + s->state = SET_UPPER; + else if (strcmp(s->str, "lower") == 0) { + s->state = SET_LOWER; + } else + s->state = SET; + if ((s->state == SET_LOWER || s->state == SET_UPPER) && n > 1) + mergesort(s->set, n, sizeof(*(s->set)), charcoll); } static int diff --git a/usr.bin/tr/tr.c b/usr.bin/tr/tr.c index 407b4f4..d6c9698 100644 --- a/usr.bin/tr/tr.c +++ b/usr.bin/tr/tr.c @@ -101,7 +101,6 @@ static int string1[NCHARS] = { STR s1 = { STRING1, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; STR s2 = { STRING2, NORMAL, 0, OOBCH, { 0, OOBCH }, NULL, NULL }; -static int charcoll(const void *, const void *); static void setup(int *, char *, STR *, int, int); static void usage(void); @@ -224,20 +223,55 @@ main(int argc, char **argv) if (!next(&s2)) errx(1, "empty string2"); - ch = s2.lastch; + /* + * For -s result will contain only those characters defined + * as the second characters in each of the toupper or tolower + * pairs. + */ + /* If string2 runs out of characters, use the last one specified. */ - if (sflag) - while (next(&s1)) { - string1[s1.lastch] = ch = s2.lastch; - string2[ch] = 1; - (void)next(&s2); + while (next(&s1)) { + again: + if (s1.state == SET_LOWER && + s2.state == SET_UPPER && + s1.cnt == 1 && s2.cnt == 1) { + do { + string1[s1.lastch] = ch = toupper(s1.lastch); + if (sflag && isupper(ch)) + string2[ch] = 1; + if (!next(&s1)) + goto endloop; + } while (s1.state == SET_LOWER && s1.cnt > 1); + /* skip upper set */ + do { + if (!next(&s2)) + break; + } while (s2.state == SET_UPPER && s2.cnt > 1); + goto again; + } else if (s1.state == SET_UPPER && + s2.state == SET_LOWER && + s1.cnt == 1 && s2.cnt == 1) { + do { + string1[s1.lastch] = ch = tolower(s1.lastch); + if (sflag && islower(ch)) + string2[ch] = 1; + if (!next(&s1)) + goto endloop; + } while (s1.state == SET_UPPER && s1.cnt > 1); + /* skip lower set */ + do { + if (!next(&s2)) + break; + } while (s2.state == SET_LOWER && s2.cnt > 1); + goto again; + } else { + string1[s1.lastch] = s2.lastch; + if (sflag) + string2[s2.lastch] = 1; } - else - while (next(&s1)) { - string1[s1.lastch] = ch = s2.lastch; - (void)next(&s2); - } - + (void)next(&s2); + } +endloop: if (cflag || Cflag) { s2.str = argv[1]; s2.state = NORMAL; @@ -294,15 +328,18 @@ setup(int *string, char *arg, STR *str, int cflag, int Cflag) string[cnt] = !string[cnt] && ISCHAR(cnt); } -static int +int charcoll(const void *a, const void *b) { - char sa[2], sb[2]; + static char sa[2], sb[2]; + int r; sa[0] = *(const int *)a; sb[0] = *(const int *)b; - sa[1] = sb[1] = '\0'; - return (strcoll(sa, sb)); + r = strcoll(sa, sb); + if (r == 0) + r = *(const int *)a - *(const int *)b; + return (r); } static void |