diff options
author | tjr <tjr@FreeBSD.org> | 2002-06-14 07:37:08 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2002-06-14 07:37:08 +0000 |
commit | 0c8a9db6f99a60d7dd69784a1c0e0f6d254fdcc3 (patch) | |
tree | d2045ef36addf678037810b6a9dfe3d0c268d899 /usr.bin/tr | |
parent | 6231c89ca356ca59780afac8c23b8b9333851cfc (diff) | |
download | FreeBSD-src-0c8a9db6f99a60d7dd69784a1c0e0f6d254fdcc3.zip FreeBSD-src-0c8a9db6f99a60d7dd69784a1c0e0f6d254fdcc3.tar.gz |
Implement support for equivalence classes ([=e=]) when the mapping is
one-to-one (SUSv3)
Diffstat (limited to 'usr.bin/tr')
-rw-r--r-- | usr.bin/tr/str.c | 29 | ||||
-rw-r--r-- | usr.bin/tr/tr.1 | 28 | ||||
-rw-r--r-- | usr.bin/tr/tr.c | 2 |
3 files changed, 45 insertions, 14 deletions
diff --git a/usr.bin/tr/str.c b/usr.bin/tr/str.c index 46bf340..90f7335 100644 --- a/usr.bin/tr/str.c +++ b/usr.bin/tr/str.c @@ -216,14 +216,13 @@ c_class(a, b) return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name)); } -/* - * English doesn't have any equivalence classes, so for now - * we just syntax check and grab the character. - */ static void genequiv(s) STR *s; { + int i, p, pri; + char src[2], dst[3]; + if (*s->str == '\\') { s->equiv[0] = backslash(s); if (*s->str != '=') @@ -233,6 +232,28 @@ genequiv(s) if (s->str[1] != '=') errx(1, "misplaced equivalence equals sign"); } + + /* + * Calculate the set of all characters in the same equivalence class + * as the specified character (they will have the same primary + * collation weights). + * XXX Knows too much about how strxfrm() is implemented. Assumes + * it fills the string with primary collation weight bytes. Only one- + * to-one mappings are supported. + */ + src[0] = s->equiv[0]; + src[1] = '\0'; + if (strxfrm(dst, src, sizeof(dst)) == 1) { + pri = (unsigned char)*dst; + for (p = 1, i = 1; i < NCHARS; i++) { + *src = i; + if (strxfrm(dst, src, sizeof(dst)) == 1 && pri && + pri == (unsigned char)*dst) + s->equiv[p++] = i; + } + s->equiv[p] = OOBCH; + } + s->str += 2; s->cnt = 0; s->state = SET; diff --git a/usr.bin/tr/tr.1 b/usr.bin/tr/tr.1 index 2875ccf..b1ac00c 100644 --- a/usr.bin/tr/tr.1 +++ b/usr.bin/tr/tr.1 @@ -200,15 +200,9 @@ in these classes, see .Xr ctype 3 and related manual pages. .It [=equiv=] -Represents all characters or collating (sorting) elements belonging to -the same equivalence class as -.Ar equiv . -If -there is a secondary ordering within the equivalence class, the characters -are ordered in ascending sequence. -Otherwise, they are ordered after their encoded values. -An example of an equivalence class might be ``c'' and ``ch'' in Spanish; -English has no equivalence classes. +Represents all characters belonging to the same equivalence class as +.Ar equiv , +ordered by their encoded values. .It [#*n] Represents .Ar n @@ -228,6 +222,17 @@ If has a leading zero, it is interpreted as an octal value, otherwise, it's interpreted as a decimal value. .El +.Sh ENVIRONMENT +The +.Ev LANG , +.Ev LC_ALL , +.Ev LC_CTYPE +and +.Ev LC_COLLATE +environment variables affect the execution of +.Nm +as described in +.Xr environ 7 . .Sh DIAGNOSTICS .Ex -std .Sh EXAMPLES @@ -245,6 +250,11 @@ Translate the contents of file1 to upper-case. Strip out non-printable characters from file1. .Pp .D1 Li "tr -cd \*q[:print:]\*q < file1" +.Pp +Remove diacritical marks from all accented variants of the letter +.Sq e : +.Pp +.Dl "tr \*q[=e=]\*q \*qe\*q" .Sh COMPATIBILITY System V has historically implemented character ranges using the syntax ``[c-c]'' instead of the ``c-c'' used by historic diff --git a/usr.bin/tr/tr.c b/usr.bin/tr/tr.c index 359ef05..c74e1d0 100644 --- a/usr.bin/tr/tr.c +++ b/usr.bin/tr/tr.c @@ -105,7 +105,7 @@ main(argc, argv) int ch, cnt, lastch, *p; int cflag, dflag, sflag, isstring2; - (void) setlocale(LC_CTYPE, ""); + (void)setlocale(LC_ALL, ""); cflag = dflag = sflag = 0; while ((ch = getopt(argc, argv, "cdsu")) != -1) |