summaryrefslogtreecommitdiffstats
path: root/usr.bin/tr
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2002-06-14 07:37:08 +0000
committertjr <tjr@FreeBSD.org>2002-06-14 07:37:08 +0000
commit0c8a9db6f99a60d7dd69784a1c0e0f6d254fdcc3 (patch)
treed2045ef36addf678037810b6a9dfe3d0c268d899 /usr.bin/tr
parent6231c89ca356ca59780afac8c23b8b9333851cfc (diff)
downloadFreeBSD-src-0c8a9db6f99a60d7dd69784a1c0e0f6d254fdcc3.zip
FreeBSD-src-0c8a9db6f99a60d7dd69784a1c0e0f6d254fdcc3.tar.gz
Implement support for equivalence classes ([=e=]) when the mapping is
one-to-one (SUSv3)
Diffstat (limited to 'usr.bin/tr')
-rw-r--r--usr.bin/tr/str.c29
-rw-r--r--usr.bin/tr/tr.128
-rw-r--r--usr.bin/tr/tr.c2
3 files changed, 45 insertions, 14 deletions
diff --git a/usr.bin/tr/str.c b/usr.bin/tr/str.c
index 46bf340..90f7335 100644
--- a/usr.bin/tr/str.c
+++ b/usr.bin/tr/str.c
@@ -216,14 +216,13 @@ c_class(a, b)
return (strcmp(((const CLASS *)a)->name, ((const CLASS *)b)->name));
}
-/*
- * English doesn't have any equivalence classes, so for now
- * we just syntax check and grab the character.
- */
static void
genequiv(s)
STR *s;
{
+ int i, p, pri;
+ char src[2], dst[3];
+
if (*s->str == '\\') {
s->equiv[0] = backslash(s);
if (*s->str != '=')
@@ -233,6 +232,28 @@ genequiv(s)
if (s->str[1] != '=')
errx(1, "misplaced equivalence equals sign");
}
+
+ /*
+ * Calculate the set of all characters in the same equivalence class
+ * as the specified character (they will have the same primary
+ * collation weights).
+ * XXX Knows too much about how strxfrm() is implemented. Assumes
+ * it fills the string with primary collation weight bytes. Only one-
+ * to-one mappings are supported.
+ */
+ src[0] = s->equiv[0];
+ src[1] = '\0';
+ if (strxfrm(dst, src, sizeof(dst)) == 1) {
+ pri = (unsigned char)*dst;
+ for (p = 1, i = 1; i < NCHARS; i++) {
+ *src = i;
+ if (strxfrm(dst, src, sizeof(dst)) == 1 && pri &&
+ pri == (unsigned char)*dst)
+ s->equiv[p++] = i;
+ }
+ s->equiv[p] = OOBCH;
+ }
+
s->str += 2;
s->cnt = 0;
s->state = SET;
diff --git a/usr.bin/tr/tr.1 b/usr.bin/tr/tr.1
index 2875ccf..b1ac00c 100644
--- a/usr.bin/tr/tr.1
+++ b/usr.bin/tr/tr.1
@@ -200,15 +200,9 @@ in these classes, see
.Xr ctype 3
and related manual pages.
.It [=equiv=]
-Represents all characters or collating (sorting) elements belonging to
-the same equivalence class as
-.Ar equiv .
-If
-there is a secondary ordering within the equivalence class, the characters
-are ordered in ascending sequence.
-Otherwise, they are ordered after their encoded values.
-An example of an equivalence class might be ``c'' and ``ch'' in Spanish;
-English has no equivalence classes.
+Represents all characters belonging to the same equivalence class as
+.Ar equiv ,
+ordered by their encoded values.
.It [#*n]
Represents
.Ar n
@@ -228,6 +222,17 @@ If
has a leading zero, it is interpreted as an octal value, otherwise,
it's interpreted as a decimal value.
.El
+.Sh ENVIRONMENT
+The
+.Ev LANG ,
+.Ev LC_ALL ,
+.Ev LC_CTYPE
+and
+.Ev LC_COLLATE
+environment variables affect the execution of
+.Nm
+as described in
+.Xr environ 7 .
.Sh DIAGNOSTICS
.Ex -std
.Sh EXAMPLES
@@ -245,6 +250,11 @@ Translate the contents of file1 to upper-case.
Strip out non-printable characters from file1.
.Pp
.D1 Li "tr -cd \*q[:print:]\*q < file1"
+.Pp
+Remove diacritical marks from all accented variants of the letter
+.Sq e :
+.Pp
+.Dl "tr \*q[=e=]\*q \*qe\*q"
.Sh COMPATIBILITY
System V has historically implemented character ranges using the syntax
``[c-c]'' instead of the ``c-c'' used by historic
diff --git a/usr.bin/tr/tr.c b/usr.bin/tr/tr.c
index 359ef05..c74e1d0 100644
--- a/usr.bin/tr/tr.c
+++ b/usr.bin/tr/tr.c
@@ -105,7 +105,7 @@ main(argc, argv)
int ch, cnt, lastch, *p;
int cflag, dflag, sflag, isstring2;
- (void) setlocale(LC_CTYPE, "");
+ (void)setlocale(LC_ALL, "");
cflag = dflag = sflag = 0;
while ((ch = getopt(argc, argv, "cdsu")) != -1)
OpenPOWER on IntegriCloud