diff options
author | tjr <tjr@FreeBSD.org> | 2004-07-14 10:06:22 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2004-07-14 10:06:22 +0000 |
commit | b7f5e217dda791d61c549a147e0e6ad6cd1b3f3d (patch) | |
tree | cebf0634774be08929212c168278981cd54ae195 /usr.bin/sed/process.c | |
parent | 084c37915e361d7646a6eefa02b04a5db5958496 (diff) | |
download | FreeBSD-src-b7f5e217dda791d61c549a147e0e6ad6cd1b3f3d.zip FreeBSD-src-b7f5e217dda791d61c549a147e0e6ad6cd1b3f3d.tar.gz |
Make the 'y' (translate) command aware of multibyte characters.
Diffstat (limited to 'usr.bin/sed/process.c')
-rw-r--r-- | usr.bin/sed/process.c | 62 |
1 files changed, 59 insertions, 3 deletions
diff --git a/usr.bin/sed/process.c b/usr.bin/sed/process.c index 0d63994..1858b65 100644 --- a/usr.bin/sed/process.c +++ b/usr.bin/sed/process.c @@ -63,7 +63,7 @@ static const char sccsid[] = "@(#)process.c 8.6 (Berkeley) 4/20/94"; #include "defs.h" #include "extern.h" -static SPACE HS, PS, SS; +static SPACE HS, PS, SS, YS; #define pd PS.deleted #define ps PS.space #define psl PS.len @@ -71,6 +71,7 @@ static SPACE HS, PS, SS; #define hsl HS.len static __inline int applies(struct s_command *); +static void do_tr(struct s_tr *); static void flush_appends(void); static void lputs(char *, size_t); static __inline int regexec_e(regex_t *, const char *, int, int, size_t); @@ -97,6 +98,7 @@ process(void) SPACE tspace; size_t len, oldpsl = 0; char *p; + char nc; p = NULL; @@ -247,8 +249,7 @@ redirect: case 'y': if (pd || psl == 0) break; - for (p = ps, len = psl; len--; ++p) - *p = cp->u.y[(unsigned char)*p]; + do_tr(cp->u.y); break; case ':': case '}': @@ -426,6 +427,61 @@ substitute(struct s_command *cp) } /* + * do_tr -- + * Perform translation ('y' command) in the pattern space. + */ +static void +do_tr(struct s_tr *y) +{ + SPACE tmp; + char c, *p; + size_t clen, left; + int i; + + if (MB_CUR_MAX == 1) { + /* + * Single-byte encoding: perform in-place translation + * of the pattern space. + */ + for (p = ps; p < &ps[psl]; p++) + *p = y->bytetab[(u_char)*p]; + } else { + /* + * Multi-byte encoding: perform translation into the + * translation space, then swap the translation and + * pattern spaces. + */ + /* Clean translation space. */ + YS.len = 0; + for (p = ps, left = psl; left > 0; p += clen, left -= clen) { + if ((c = y->bytetab[(u_char)*p]) != '\0') { + cspace(&YS, &c, 1, APPEND); + clen = 1; + continue; + } + for (i = 0; i < y->nmultis; i++) + if (left >= y->multis[i].fromlen && + memcmp(p, y->multis[i].from, + y->multis[i].fromlen) == 0) + break; + if (i < y->nmultis) { + cspace(&YS, y->multis[i].to, + y->multis[i].tolen, APPEND); + clen = y->multis[i].fromlen; + } else { + cspace(&YS, p, 1, APPEND); + clen = 1; + } + } + /* Swap the translation space and the pattern space. */ + tmp = PS; + PS = YS; + YS = tmp; + YS.space = YS.back; + } +} + +/* * Flush append requests. Always called before reading a line, * therefore it also resets the substitution done (sdone) flag. */ |