summaryrefslogtreecommitdiffstats
path: root/usr.bin/tr
diff options
context:
space:
mode:
authorache <ache@FreeBSD.org>2003-08-04 04:20:04 +0000
committerache <ache@FreeBSD.org>2003-08-04 04:20:04 +0000
commitfbdcc3c06068d7212d416b75593dbf2749e93323 (patch)
treea1979a9f5a4f787ab0588aa1fc2d94e8c11d2299 /usr.bin/tr
parentbf98881a21aa08a1e6457d1a17c2cf7841b780ab (diff)
downloadFreeBSD-src-fbdcc3c06068d7212d416b75593dbf2749e93323.zip
FreeBSD-src-fbdcc3c06068d7212d416b75593dbf2749e93323.tar.gz
POSIX require complex processing of 'c-c' ranges: if one of the endpoints
is octal sequence, range is taken in the byte values order, for non-octal endpoints range is taken in the sorted collation order. Implement it.
Diffstat (limited to 'usr.bin/tr')
-rw-r--r--usr.bin/tr/str.c57
-rw-r--r--usr.bin/tr/tr.16
2 files changed, 41 insertions, 22 deletions
diff --git a/usr.bin/tr/str.c b/usr.bin/tr/str.c
index 151a4bd..a307bb8 100644
--- a/usr.bin/tr/str.c
+++ b/usr.bin/tr/str.c
@@ -51,19 +51,19 @@ static const char sccsid[] = "@(#)str.c 8.2 (Berkeley) 4/28/95";
#include "extern.h"
-static int backslash(STR *);
+static int backslash(STR *, int *);
static int bracket(STR *);
static int c_class(const void *, const void *);
static void genclass(STR *);
static void genequiv(STR *);
-static int genrange(STR *);
+static int genrange(STR *, int);
static void genseq(STR *);
int
next(s)
STR *s;
{
- int ch;
+ int ch, is_octal;
switch (s->state) {
case EOS:
@@ -76,20 +76,21 @@ next(s)
s->state = EOS;
return (0);
case '\\':
- s->lastch = backslash(s);
+ s->lastch = backslash(s, &is_octal);
break;
case '[':
if (bracket(s))
return (next(s));
/* FALLTHROUGH */
default:
+ is_octal = 0;
++s->str;
s->lastch = ch;
break;
}
/* We can start a range at any time. */
- if (s->str[0] == '-' && genrange(s))
+ if (s->str[0] == '-' && genrange(s, is_octal))
return (next(s));
return (1);
case SEQUENCE:
@@ -233,7 +234,7 @@ genequiv(s)
char src[2], dst[3];
if (*s->str == '\\') {
- s->equiv[0] = backslash(s);
+ s->equiv[0] = backslash(s, NULL);
if (*s->str != '=')
errx(1, "misplaced equivalence equals sign");
s->str += 2;
@@ -271,32 +272,42 @@ genequiv(s)
}
static int
-genrange(s)
- STR *s;
+genrange(STR *s, int was_octal)
{
- int stopval;
+ int stopval, octal;
char *savestart;
int n, cnt, *p;
+ octal = 0;
savestart = s->str;
- stopval = *++s->str == '\\' ? backslash(s) : (u_char)*s->str++;
- if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) {
+ stopval = *++s->str == '\\' ? backslash(s, &octal) : (u_char)*s->str++;
+ if (!octal)
+ octal = was_octal;
+
+ if ((octal && stopval < s->lastch) ||
+ (!octal &&
+ charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0)) {
s->str = savestart;
return (0);
}
if ((s->set = p = malloc((NCHARS + 1) * sizeof(int))) == NULL)
err(1, "genrange() malloc");
bzero(p, (NCHARS + 1) * sizeof(int));
- for (cnt = 0; cnt < NCHARS; ++cnt)
- if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 0 &&
- charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
+ if (octal) {
+ for (cnt = s->lastch; cnt <= stopval; cnt++)
*p++ = cnt;
+ } else {
+ for (cnt = 0; cnt < NCHARS; cnt++)
+ if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 0 &&
+ charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
+ *p++ = cnt;
+ }
*p = OOBCH;
n = p - s->set;
s->cnt = 0;
s->state = SET;
- if (n > 1)
+ if (!octal && n > 1)
mergesort(s->set, n, sizeof(*(s->set)), charcoll);
return (1);
}
@@ -311,7 +322,7 @@ genseq(s)
errx(1, "sequences only valid in string2");
if (*s->str == '\\')
- s->lastch = backslash(s);
+ s->lastch = backslash(s, NULL);
else
s->lastch = *s->str++;
if (*s->str != '*')
@@ -319,7 +330,7 @@ genseq(s)
switch (*++s->str) {
case '\\':
- s->cnt = backslash(s);
+ s->cnt = backslash(s, NULL);
break;
case ']':
s->cnt = 0;
@@ -345,14 +356,15 @@ genseq(s)
* an escape code or a literal character.
*/
static int
-backslash(s)
- STR *s;
+backslash(STR *s, int *is_octal)
{
int ch, cnt, val;
+ if (is_octal != NULL)
+ *is_octal = 0;
for (cnt = val = 0;;) {
ch = (u_char)*++s->str;
- if (!isascii(ch) || !isdigit(ch))
+ if (!isdigit(ch))
break;
val = val * 8 + ch - '0';
if (++cnt == 3) {
@@ -360,8 +372,11 @@ backslash(s)
break;
}
}
- if (cnt)
+ if (cnt) {
+ if (is_octal != NULL)
+ *is_octal = 1;
return (val);
+ }
if (ch != '\0')
++s->str;
switch (ch) {
diff --git a/usr.bin/tr/tr.1 b/usr.bin/tr/tr.1
index 9188685..3f26441 100644
--- a/usr.bin/tr/tr.1
+++ b/usr.bin/tr/tr.1
@@ -162,9 +162,13 @@ values.
.Pp
A backslash followed by any other character maps to that character.
.It c-c
-Represents the range of characters between the range endpoints, inclusive,
+For non-octal range endpoints
+represents the range of characters between the range endpoints, inclusive,
in ascending order,
as defined by the collation sequence.
+If either or both of the range endpoints are octal sequences, it
+represents the range of specific coded values between the
+range endpoints, inclusive.
.It [:class:]
Represents all characters belonging to the defined character class.
Class names are:
OpenPOWER on IntegriCloud