1 files changed, 81 insertions, 20 deletions
diff --git a/contrib/tcsh/tc.str.c b/contrib/tcsh/tc.str.c
index 568a1ef..ca00721 100644
--- a/contrib/tcsh/tc.str.c
+++ b/contrib/tcsh/tc.str.c
@@ -1,4 +1,4 @@
-/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.30 2009/06/25 21:27:38 christos Exp $ */
+/* $Header: /p/tcsh/cvsroot/tcsh/tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $ */
 /*
  * tc.str.c: Short string package
  * 	     This has been a lesson of how to write buggy code!
@@ -33,9 +33,10 @@
  */
 #include "sh.h"
 
+#include <assert.h>
 #include <limits.h>
 
-RCSID("$tcsh: tc.str.c,v 3.30 2009/06/25 21:27:38 christos Exp $")
+RCSID("$tcsh: tc.str.c,v 3.42 2012/01/10 21:34:31 christos Exp $")
 
 #define MALLOC_INCR	128
 #ifdef WIDE_STRINGS
@@ -46,7 +47,7 @@ RCSID("$tcsh: tc.str.c,v 3.30 2009/06/25 21:27:38 christos Exp $")
 
 #ifdef WIDE_STRINGS
 size_t
-one_mbtowc(wchar_t *pwc, const char *s, size_t n)
+one_mbtowc(Char *pwc, const char *s, size_t n)
 {
     int len;
 
@@ -61,7 +62,7 @@ one_mbtowc(wchar_t *pwc, const char *s, size_t n)
 }
 
 size_t
-one_wctomb(char *s, wchar_t wchar)
+one_wctomb(char *s, Char wchar)
 {
     int len;
 
@@ -69,7 +70,23 @@ one_wctomb(char *s, wchar_t wchar)
 	s[0] = wchar & 0xFF;
 	len = 1;
     } else {
-	len = wctomb(s, wchar);
+#ifdef UTF16_STRINGS
+	if (wchar >= 0x10000) {
+	    /* UTF-16 systems can't handle these values directly in calls to
+	       wctomb.  Convert value to UTF-16 surrogate and call wcstombs to
+	       convert the "string" to the correct multibyte representation,
+	       if any. */
+	    wchar_t ws[3];
+	    wchar -= 0x10000;
+	    ws[0] = 0xd800 | (wchar >> 10);
+	    ws[1] = 0xdc00 | (wchar & 0x3ff);
+	    ws[2] = 0;
+	    /* The return value of wcstombs excludes the trailing 0, so len is
+	       the correct number of multibytes for the Unicode char. */
+	    len = wcstombs (s, ws, MB_CUR_MAX + 1);
+	} else
+#endif
+	len = wctomb(s, (wchar_t) wchar);
 	if (len == -1)
 	    s[0] = wchar;
 	if (len <= 0)
@@ -79,14 +96,55 @@ one_wctomb(char *s, wchar_t wchar)
 }
 
 int
-rt_mbtowc(wchar_t *pwc, const char *s, size_t n)
+rt_mbtowc(Char *pwc, const char *s, size_t n)
 {
     int ret;
     char back[MB_LEN_MAX];
+    wchar_t tmp;
+#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
+# if defined(AUTOSET_KANJI)
+    static mbstate_t mb_zero, mb;
+    /*
+     * Workaround the Shift-JIS endcoding that translates unshifted 7 bit ASCII!
+     */
+    if (!adrof(STRnokanji) && n && pwc && s && (*s == '\\' || *s == '~') &&
+	!memcmp(&mb, &mb_zero, sizeof(mb)))
+    {
+	*pwc = *s;
+	return 1;
+    }
+# else
+    mbstate_t mb;
+# endif
+
+    memset (&mb, 0, sizeof mb);
+    ret = mbrtowc(&tmp, s, n, &mb);
+#else
+    ret = mbtowc(&tmp, s, n);
+#endif
+    if (ret > 0) {
+	*pwc = tmp;
+#if defined(UTF16_STRINGS) && defined(HAVE_MBRTOWC)
+	if (tmp >= 0xd800 && tmp <= 0xdbff) {
+	    /* UTF-16 surrogate pair.  Fetch second half and compute
+	       UTF-32 value.  Dispense with the inverse test in this case. */
+	    size_t n2 = mbrtowc(&tmp, s + ret, n - ret, &mb);
+	    if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2)
+		ret = -1;
+	    else {
+		*pwc = (((*pwc & 0x3ff) << 10) | (tmp & 0x3ff)) + 0x10000;
+		ret += n2;
+	    }
+	} else
+#endif
+      	if (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0)
+	    ret = -1;
 
-    ret = mbtowc(pwc, s, n);
-    if (ret > 0 && (wctomb(back, *pwc) != ret || memcmp(s, back, ret) != 0))
+    } else if (ret == -2)
 	ret = -1;
+    else if (ret == 0)
+	*pwc = '\0';
+
     return ret;
 }
 #endif
@@ -186,7 +244,7 @@ short2str(const Char *src)
     return (sdst);
 }
 
-#ifndef WIDE_STRINGS
+#if !defined (WIDE_STRINGS) || defined (UTF16_STRINGS)
 Char   *
 s_strcpy(Char *dst, const Char *src)
 {
@@ -334,18 +392,20 @@ int
 s_strcasecmp(const Char *str1, const Char *str2)
 {
 #ifdef WIDE_STRINGS
-    wchar_t l1 = 0, l2 = 0;
-    for (; *str1 && ((*str1 == *str2 && (l1 = l2 = 0) == 0) || 
-	(l1 = towlower(*str1)) == (l2 = towlower(*str2))); str1++, str2++)
-	continue;
-    
+    wint_t l1 = 0, l2 = 0;
+    for (; *str1; str1++, str2++)
+	if (*str1 == *str2)
+	    l1 = l2 = 0;
+	else if ((l1 = towlower(*str1)) != (l2 = towlower(*str2)))
+	    break;
 #else
-    unsigned char c1, c2, l1 = 0, l2 = 0;
-    for (; *str1 && ((*str1 == *str2 && (l1 = l2 = 0) == 0) || 
-	((c1 = (unsigned char)*str1) == *str1 &&
-	 (c2 = (unsigned char)*str2) == *str2 &&
-	(l1 = tolower(c1)) == (l2 = tolower(c2)))); str1++, str2++)
-	continue;
+    unsigned char l1 = 0, l2 = 0;
+    for (; *str1; str1++, str2++)
+	if (*str1 == *str2)
+		l1 = l2 = 0;
+	else if ((l1 = tolower((unsigned char)*str1)) !=
+	    (l2 = tolower((unsigned char)*str2)))
+	    break;
 #endif
     /*
      * The following case analysis is necessary so that characters which look
@@ -568,6 +628,7 @@ STRBUF##_store1(struct STRBUF *buf, CHAR c)			\
 	    buf->size *= 2;					\
 	buf->s = xrealloc(buf->s, buf->size * sizeof(*buf->s));	\
     }								\
+    assert(buf->s);						\
     buf->s[buf->len] = c;					\
 }								\
 								\