diff options
Diffstat (limited to 'contrib/tcsh/tc.str.c')
-rw-r--r-- | contrib/tcsh/tc.str.c | 101 |
1 files changed, 81 insertions, 20 deletions
diff --git a/contrib/tcsh/tc.str.c b/contrib/tcsh/tc.str.c index 568a1ef..ca00721 100644 --- a/contrib/tcsh/tc.str.c +++ b/contrib/tcsh/tc.str.c @@ -1,4 +1,4 @@ -/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.30 2009/06/25 21:27:38 christos Exp $ */ +/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */ /* * tc.str.c: Short string package * This has been a lesson of how to write buggy code! @@ -33,9 +33,10 @@ */ #include "sh.h" +#include <assert.h> #include <limits.h> -RCSID("$tcsh: tc.str.c,v 3.30 2009/06/25 21:27:38 christos Exp $") +RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $") #define MALLOC_INCR 128 #ifdef WIDE_STRINGS @@ -46,7 +47,7 @@ RCSID("$tcsh: tc.str.c,v 3.30 2009/06/25 21:27:38 christos Exp $") #ifdef WIDE_STRINGS size_t -one_mbtowc(wchar_t *pwc, const char *s, size_t n) +one_mbtowc(Char *pwc, const char *s, size_t n) { int len; @@ -61,7 +62,7 @@ one_mbtowc(wchar_t *pwc, const char *s, size_t n) } size_t -one_wctomb(char *s, wchar_t wchar) +one_wctomb(char *s, Char wchar) { int len; @@ -69,7 +70,23 @@ one_wctomb(char *s, wchar_t wchar) s[0] = wchar & 0xFF; len = 1; } else { - len = wctomb(s, wchar); +#ifdef UTF16_STRINGS + if (wchar >= 0x10000) { + /* UTF-16 systems can't handle these values directly in calls to + wctomb. Convert value to UTF-16 surrogate and call wcstombs to + convert the "string" to the correct multibyte representation, + if any. */ + wchar_t ws[3]; + wchar -= 0x10000; + ws[0] = 0xd800 | (wchar >> 10); + ws[1] = 0xdc00 | (wchar & 0x3ff); + ws[2] = 0; + /* The return value of wcstombs excludes the trailing 0, so len is + the correct number of multibytes for the Unicode char. */ + len = wcstombs (s, ws, MB_CUR_MAX + 1); + } else +#endif + len = wctomb(s, (wchar_t) wchar); if (len == -1) s[0] = wchar; if (len <= 0) @@ -79,14 +96,55 @@ one_wctomb(char *s, wchar_t wchar) } int -rt_mbtowc(wchar_t *pwc, const char *s, size_t n) +rt_mbtowc(Char *pwc, const char *s, size_t n) { int ret; char back[MB_LEN_MAX]; + wchar_t tmp; +#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) +# if defined(AUTOSET_KANJI) + static mbstate_t mb_zero, mb; + /* + * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII! + */ + if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') && + !memcmp(&mb, &mb_zero, sizeof(mb))) + { + *pwc = *s; + return 1; + } +# else + mbstate_t mb; +# endif + + memset (&mb, 0, sizeof mb); + ret = mbrtowc(&tmp, s, n, &mb); +#else + ret = mbtowc(&tmp, s, n); +#endif + if (ret > 0) { + *pwc = tmp; +#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC) + if (tmp >= 0xd800 && tmp <= 0xdbff) { + /* UTF-16 surrogate pair. Fetch second half and compute + UTF-32 value. Dispense with the inverse test in this case. */ + size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb); + if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) + ret = -1; + else { + *pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000; + ret += n2; + } + } else +#endif + if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0) + ret = -1; - ret = mbtowc(pwc, s, n); - if (ret > 0 && (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)) + } else if (ret == -2) ret = -1; + else if (ret == 0) + *pwc = '\0'; + return ret; } #endif @@ -186,7 +244,7 @@ short2str(const Char *src) return (sdst); } -#ifndef WIDE_STRINGS +#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS) Char * s_strcpy(Char *dst, const Char *src) { @@ -334,18 +392,20 @@ int s_strcasecmp(const Char *str1, const Char *str2) { #ifdef WIDE_STRINGS - wchar_t l1 = 0, l2 = 0; - for (; *str1 && ((*str1 == *str2 && (l1 = l2 = 0) == 0) || - (l1 = towlower(*str1)) == (l2 = towlower(*str2))); str1++, str2++) - continue; - + wint_t l1 = 0, l2 = 0; + for (; *str1; str1++, str2++) + if (*str1 == *str2) + l1 = l2 = 0; + else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2))) + break; #else - unsigned char c1, c2, l1 = 0, l2 = 0; - for (; *str1 && ((*str1 == *str2 && (l1 = l2 = 0) == 0) || - ((c1 = (unsigned char)*str1) == *str1 && - (c2 = (unsigned char)*str2) == *str2 && - (l1 = tolower(c1)) == (l2 = tolower(c2)))); str1++, str2++) - continue; + unsigned char l1 = 0, l2 = 0; + for (; *str1; str1++, str2++) + if (*str1 == *str2) + l1 = l2 = 0; + else if ((l1 = tolower((unsigned char)*str1)) != + (l2 = tolower((unsigned char)*str2))) + break; #endif /* * The following case analysis is necessary so that characters which look @@ -568,6 +628,7 @@ STRBUF##_store1(struct STRBUF *buf, CHAR c) \ buf->size *= 2; \ buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s)); \ } \ + assert(buf->s); \ buf->s[buf->len] = c; \ } \ \ |