From 8bbce85526c337676cfa1f717dac02d86742cf0a Mon Sep 17 00:00:00 2001 From: jilles Date: Sat, 7 May 2011 14:32:16 +0000 Subject: sh: Add UTF-8 support to ${#var}. If the current locale uses UTF-8, ${#var} counts codepoints (more precisely, bytes b with (b & 0xc0) != 0x80). --- bin/sh/expand.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'bin') diff --git a/bin/sh/expand.c b/bin/sh/expand.c index c343977..dcef74e 100644 --- a/bin/sh/expand.c +++ b/bin/sh/expand.c @@ -665,6 +665,7 @@ evalvar(char *p, int flag) int special; int startloc; int varlen; + int varlenb; int easy; int quotes = flag & (EXP_FULL | EXP_CASE | EXP_REDIR); @@ -712,8 +713,15 @@ again: /* jump here after setting a variable with ${var=text} */ if (special) { varvalue(var, varflags & VSQUOTE, subtype, flag); if (subtype == VSLENGTH) { - varlen = expdest - stackblock() - startloc; - STADJUST(-varlen, expdest); + varlenb = expdest - stackblock() - startloc; + varlen = varlenb; + if (localeisutf8) { + val = stackblock() + startloc; + for (;val != expdest; val++) + if ((*val & 0xC0) == 0x80) + varlen--; + } + STADJUST(-varlenb, expdest); } } else { char const *syntax = (varflags & VSQUOTE) ? DQSYNTAX @@ -721,7 +729,9 @@ again: /* jump here after setting a variable with ${var=text} */ if (subtype == VSLENGTH) { for (;*val; val++) - varlen++; + if (!localeisutf8 || + (*val & 0xC0) != 0x80) + varlen++; } else { if (quotes) -- cgit v1.1