summaryrefslogtreecommitdiffstats
path: root/lib/libc/locale
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2003-11-02 10:09:33 +0000
committertjr <tjr@FreeBSD.org>2003-11-02 10:09:33 +0000
commit1c3a3f7e2621ff295c7e7b931e81655cef49d115 (patch)
tree824f8d75e428ea4465ffd6a2314d3084178e9224 /lib/libc/locale
parent69c81d4892abdd3dea4d40d0a43d101eeb956109 (diff)
downloadFreeBSD-src-1c3a3f7e2621ff295c7e7b931e81655cef49d115.zip
FreeBSD-src-1c3a3f7e2621ff295c7e7b931e81655cef49d115.tar.gz
Convert the Big5, EUC, MSKanji and UTF-8 encoding methods to implement
mbrtowc() and wcrtomb() directly. GB18030, GBK and UTF2 are left unconverted; GB18030 will be done eventually, but GBK and UTF2 may just be removed, as they are subsets of GB18030 and UTF-8 respectively.
Diffstat (limited to 'lib/libc/locale')
-rw-r--r--lib/libc/locale/big5.c103
-rw-r--r--lib/libc/locale/euc.c152
-rw-r--r--lib/libc/locale/mskanji.c92
-rw-r--r--lib/libc/locale/utf8.c140
4 files changed, 233 insertions, 254 deletions
diff --git a/lib/libc/locale/big5.c b/lib/libc/locale/big5.c
index 12cc312..7c0c981 100644
--- a/lib/libc/locale/big5.c
+++ b/lib/libc/locale/big5.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved.
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
@@ -40,80 +41,76 @@ static char sccsid[] = "@(#)big5.c 8.1 (Berkeley) 6/4/93";
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rune.h>
+#include <sys/types.h>
+#include <runetype.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
-#include <sys/types.h>
+#include <wchar.h>
+
+extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict);
-rune_t _BIG5_sgetrune(const char *, size_t, char const **);
-int _BIG5_sputrune(rune_t, char *, size_t, char **);
+int _BIG5_init(_RuneLocale *);
+size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
+ mbstate_t * __restrict);
+size_t _BIG5_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
int
-_BIG5_init(rl)
- _RuneLocale *rl;
+_BIG5_init(_RuneLocale *rl)
{
- rl->sgetrune = _BIG5_sgetrune;
- rl->sputrune = _BIG5_sputrune;
+
+ __mbrtowc = _BIG5_mbrtowc;
+ __wcrtomb = _BIG5_wcrtomb;
_CurrentRuneLocale = rl;
__mb_cur_max = 2;
return (0);
}
-static inline int
-_big5_check(c)
- u_int c;
+static __inline int
+_big5_check(u_int c)
{
+
c &= 0xff;
return ((c >= 0xa1 && c <= 0xfe) ? 2 : 1);
}
-rune_t
-_BIG5_sgetrune(string, n, result)
- const char *string;
- size_t n;
- char const **result;
+size_t
+_BIG5_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
+ mbstate_t * __restrict ps __unused)
{
- rune_t rune = 0;
- int len;
+ wchar_t wc;
+ int i, len;
- if (n < 1 || (len = _big5_check(*string)) > n) {
- if (result)
- *result = string;
- return (_INVALID_RUNE);
- }
- while (--len >= 0)
- rune = (rune << 8) | ((u_int)(*string++) & 0xff);
- if (result)
- *result = string;
- return rune;
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (0);
+ if (n == 0 || (size_t)(len = _big5_check(*s)) > n)
+ /* Incomplete multibyte sequence */
+ return ((size_t)-2);
+ wc = 0;
+ i = len;
+ while (i-- > 0)
+ wc = (wc << 8) | (unsigned char)*s++;
+ if (pwc != NULL)
+ *pwc = wc;
+ return (wc == L'\0' ? 0 : len);
}
-int
-_BIG5_sputrune(c, string, n, result)
- rune_t c;
- char *string, **result;
- size_t n;
+size_t
+_BIG5_wcrtomb(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps __unused)
{
- if (c & 0x8000) {
- if (n >= 2) {
- string[0] = (c >> 8) & 0xff;
- string[1] = c & 0xff;
- if (result)
- *result = string + 2;
- return (2);
- }
- }
- else {
- if (n >= 1) {
- *string = c & 0xff;
- if (result)
- *result = string + 1;
- return (1);
- }
+
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (1);
+ if (wc & 0x8000) {
+ *s++ = (wc >> 8) & 0xff;
+ *s = wc & 0xff;
+ return (2);
}
- if (result)
- *result = string;
- return (0);
-
+ *s = wc & 0xff;
+ return (1);
}
diff --git a/lib/libc/locale/euc.c b/lib/libc/locale/euc.c
index 596d107a..355e7a5 100644
--- a/lib/libc/locale/euc.c
+++ b/lib/libc/locale/euc.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved.
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
@@ -43,32 +44,35 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <errno.h>
-#include <rune.h>
+#include <runetype.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
-rune_t _EUC_sgetrune(const char *, size_t, char const **);
-int _EUC_sputrune(rune_t, char *, size_t, char **);
+extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict);
+
+int _EUC_init(_RuneLocale *);
+size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
+ mbstate_t * __restrict);
+size_t _EUC_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
typedef struct {
int count[4];
- rune_t bits[4];
- rune_t mask;
+ wchar_t bits[4];
+ wchar_t mask;
} _EucInfo;
int
-_EUC_init(rl)
- _RuneLocale *rl;
+_EUC_init(_RuneLocale *rl)
{
_EucInfo *ei;
int x, new__mb_cur_max;
char *v, *e;
- rl->sgetrune = _EUC_sgetrune;
- rl->sputrune = _EUC_sputrune;
-
if (rl->variable == NULL)
return (EFTYPE);
@@ -108,6 +112,8 @@ _EUC_init(rl)
rl->variable_len = sizeof(_EucInfo);
_CurrentRuneLocale = rl;
__mb_cur_max = new__mb_cur_max;
+ __mbrtowc = _EUC_mbrtowc;
+ __wcrtomb = _EUC_wcrtomb;
return (0);
}
@@ -118,105 +124,85 @@ _EUC_init(rl)
#define GR_BITS 0x80808080 /* XXX: to be fixed */
-static inline int
-_euc_set(c)
- u_int c;
+static __inline int
+_euc_set(u_int c)
{
c &= 0xff;
-
return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
}
-rune_t
-_EUC_sgetrune(string, n, result)
- const char *string;
- size_t n;
- char const **result;
-{
- rune_t rune = 0;
- int len, set;
- if (n < 1 || (len = CEI->count[set = _euc_set(*string)]) > n) {
- if (result)
- *result = string;
- return (_INVALID_RUNE);
- }
+size_t
+_EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
+ mbstate_t * __restrict ps __unused)
+{
+ int len, remain, set;
+ wchar_t wc;
+
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (0);
+ if (n == 0 || (size_t)(len = CEI->count[set = _euc_set(*s)]) > n)
+ /* Incomplete multibyte sequence */
+ return ((size_t)-2);
+ wc = 0;
+ remain = len;
switch (set) {
case 3:
case 2:
- --len;
- ++string;
+ --remain;
+ ++s;
/* FALLTHROUGH */
case 1:
case 0:
- while (len-- > 0)
- rune = (rune << 8) | ((u_int)(*string++) & 0xff);
+ while (remain-- > 0)
+ wc = (wc << 8) | (unsigned char)*s++;
break;
}
- if (result)
- *result = string;
- return ((rune & ~CEI->mask) | CEI->bits[set]);
+ wc = (wc & ~CEI->mask) | CEI->bits[set];
+ if (pwc != NULL)
+ *pwc = wc;
+ return (wc == L'\0' ? 0 : len);
}
-int
-_EUC_sputrune(c, string, n, result)
- rune_t c;
- char *string, **result;
- size_t n;
+size_t
+_EUC_wcrtomb(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps __unused)
{
- rune_t m = c & CEI->mask;
- rune_t nm = c & ~m;
+ wchar_t m, nm;
int i, len;
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (1);
+
+ m = wc & CEI->mask;
+ nm = wc & ~m;
+
if (m == CEI->bits[1]) {
CodeSet1:
/* Codeset 1: The first byte must have 0x80 in it. */
i = len = CEI->count[1];
- if (n >= len) {
- if (result)
- *result = string + len;
- while (i-- > 0)
- *string++ = (nm >> (i << 3)) | 0x80;
- } else
- if (result)
- *result = (char *) 0;
+ while (i-- > 0)
+ *s++ = (nm >> (i << 3)) | 0x80;
} else {
- if (m == CEI->bits[0]) {
+ if (m == CEI->bits[0])
i = len = CEI->count[0];
- if (n < len) {
- if (result)
- *result = NULL;
- return (len);
- }
+ else if (m == CEI->bits[2]) {
+ i = len = CEI->count[2];
+ *s++ = _SS2;
+ --i;
+ /* SS2 designates G2 into GR */
+ nm |= GR_BITS;
+ } else if (m == CEI->bits[3]) {
+ i = len = CEI->count[3];
+ *s++ = _SS3;
+ --i;
+ /* SS3 designates G3 into GR */
+ nm |= GR_BITS;
} else
- if (m == CEI->bits[2]) {
- i = len = CEI->count[2];
- if (n < len) {
- if (result)
- *result = NULL;
- return (len);
- }
- *string++ = _SS2;
- --i;
- /* SS2 designates G2 into GR */
- nm |= GR_BITS;
- } else
- if (m == CEI->bits[3]) {
- i = len = CEI->count[3];
- if (n < len) {
- if (result)
- *result = NULL;
- return (len);
- }
- *string++ = _SS3;
- --i;
- /* SS3 designates G3 into GR */
- nm |= GR_BITS;
- } else
- goto CodeSet1; /* Bletch */
+ goto CodeSet1; /* Bletch */
while (i-- > 0)
- *string++ = (nm >> (i << 3)) & 0xff;
- if (result)
- *result = string;
+ *s++ = (nm >> (i << 3)) & 0xff;
}
return (len);
}
diff --git a/lib/libc/locale/mskanji.c b/lib/libc/locale/mskanji.c
index 482e5b5..f4efcca 100644
--- a/lib/libc/locale/mskanji.c
+++ b/lib/libc/locale/mskanji.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved.
* ja_JP.SJIS locale table for BSD4.4/rune
* version 1.0
* (C) Sin'ichiro MIYATANI / Phase One, Inc
@@ -38,74 +39,71 @@ static char sccsid[] = "@(#)mskanji.c 1.0 (Phase One) 5/5/95";
__FBSDID("$FreeBSD$");
#include <sys/types.h>
-
-#include <rune.h>
+#include <runetype.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
+#include <wchar.h>
+
+extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict);
-rune_t _MSKanji_sgetrune(const char *, size_t, char const **);
-int _MSKanji_sputrune(rune_t, char *, size_t, char **);
+int _MSKanji_init(_RuneLocale *);
+size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
+ mbstate_t * __restrict);
+size_t _MSKanji_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
int
-_MSKanji_init(rl)
- _RuneLocale *rl;
+_MSKanji_init(_RuneLocale *rl)
{
- rl->sgetrune = _MSKanji_sgetrune;
- rl->sputrune = _MSKanji_sputrune;
+ __mbrtowc = _MSKanji_mbrtowc;
+ __wcrtomb = _MSKanji_wcrtomb;
_CurrentRuneLocale = rl;
__mb_cur_max = 2;
return (0);
}
-rune_t
-_MSKanji_sgetrune(string, n, result)
- const char *string;
- size_t n;
- char const **result;
+size_t
+_MSKanji_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
+ mbstate_t * __restrict ps __unused)
{
- rune_t rune = 0;
-
- if (n < 1) {
- if (result != NULL)
- *result = string;
- return (_INVALID_RUNE);
- }
+ wchar_t wc;
+ int len;
- rune = *string++ & 0xff;
- if ((rune > 0x80 && rune < 0xa0) ||
- (rune >= 0xe0 && rune < 0xfd)) {
- if (n < 2) {
- rune = _INVALID_RUNE;
- --string;
- } else
- rune = (rune << 8) | (*string++ & 0xff);
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (0);
+ if (n == 0)
+ /* Incomplete multibyte sequence */
+ return ((size_t)-2);
+ len = 1;
+ wc = *s++ & 0xff;
+ if ((wc > 0x80 && wc < 0xa0) || (wc >= 0xe0 && wc < 0xfd)) {
+ if (n < 2)
+ /* Incomplete multibyte sequence */
+ return ((size_t)-2);
+ wc = (wc << 8) | (*s++ & 0xff);
+ len = 2;
}
- if (result != NULL)
- *result = string;
-
- return (rune);
+ if (pwc != NULL)
+ *pwc = wc;
+ return (wc == L'\0' ? 0 : len);
}
-int
-_MSKanji_sputrune(c, string, n, result)
- rune_t c;
- char *string, **result;
- size_t n;
+size_t
+_MSKanji_wcrtomb(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps __unused)
{
int len, i;
- len = (c > 0x100) ? 2 : 1;
- if (n < len) {
- if (result != NULL)
- *result = NULL;
- } else {
- if (result != NULL)
- *result = string + len;
- for (i = len; i-- > 0; )
- *string++ = c >> (i << 3);
- }
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (1);
+ len = (wc > 0x100) ? 2 : 1;
+ for (i = len; i-- > 0; )
+ *s++ = wc >> (i << 3);
return (len);
}
diff --git a/lib/libc/locale/utf8.c b/lib/libc/locale/utf8.c
index c22d3d6..10f937b 100644
--- a/lib/libc/locale/utf8.c
+++ b/lib/libc/locale/utf8.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2002 Tim J. Robbins
+ * Copyright (c) 2002, 2003 Tim J. Robbins
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,37 +27,46 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <rune.h>
+#include <errno.h>
+#include <runetype.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
+#include <wchar.h>
-rune_t _UTF8_sgetrune(const char *, size_t, char const **);
-int _UTF8_sputrune(rune_t, char *, size_t, char **);
+extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict);
+
+size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
+ mbstate_t * __restrict);
+size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
int
_UTF8_init(_RuneLocale *rl)
{
- rl->sgetrune = _UTF8_sgetrune;
- rl->sputrune = _UTF8_sputrune;
+ __mbrtowc = _UTF8_mbrtowc;
+ __wcrtomb = _UTF8_wcrtomb;
_CurrentRuneLocale = rl;
__mb_cur_max = 6;
return (0);
}
-rune_t
-_UTF8_sgetrune(const char *string, size_t n, const char **result)
+size_t
+_UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
+ mbstate_t * __restrict ps __unused)
{
- int ch, len, mask;
- rune_t lbound, wch;
+ int ch, i, len, mask;
+ wchar_t lbound, wch;
- if (n < 1) {
- if (result != NULL)
- *result = string;
- return (_INVALID_RUNE);
- }
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (0);
+ if (n == 0)
+ /* Incomplete multibyte sequence */
+ return ((size_t)-2);
/*
* Determine the number of octets that make up this character from
@@ -70,7 +79,7 @@ _UTF8_sgetrune(const char *string, size_t n, const char **result)
* character. This enforces a 1-to-1 mapping between character
* codes and their multibyte representations.
*/
- ch = (unsigned char)*string;
+ ch = (unsigned char)*s;
if ((ch & 0x80) == 0) {
mask = 0x7f;
len = 1;
@@ -99,106 +108,95 @@ _UTF8_sgetrune(const char *string, size_t n, const char **result)
/*
* Malformed input; input is not UTF-8.
*/
- if (result != NULL)
- *result = string + 1;
- return (_INVALID_RUNE);
+ errno = EILSEQ;
+ return ((size_t)-1);
}
- if (n < len) {
- /*
- * Truncated or partial input.
- */
- if (result != NULL)
- *result = string;
- return (_INVALID_RUNE);
- }
+ if (n < (size_t)len)
+ /* Incomplete multibyte sequence */
+ return ((size_t)-2);
/*
* Decode the octet sequence representing the character in chunks
* of 6 bits, most significant first.
*/
- wch = (unsigned char)*string++ & mask;
- while (--len != 0) {
- if ((*string & 0xc0) != 0x80) {
+ wch = (unsigned char)*s++ & mask;
+ i = len;
+ while (--i != 0) {
+ if ((*s & 0xc0) != 0x80) {
/*
* Malformed input; bad characters in the middle
* of a character.
*/
- wch = _INVALID_RUNE;
- if (result != NULL)
- *result = string + 1;
- return (_INVALID_RUNE);
+ errno = EILSEQ;
+ return ((size_t)-1);
}
wch <<= 6;
- wch |= *string++ & 0x3f;
+ wch |= *s++ & 0x3f;
}
- if (wch != _INVALID_RUNE && wch < lbound)
+ if (wch < lbound) {
/*
* Malformed input; redundant encoding.
*/
- wch = _INVALID_RUNE;
- if (result != NULL)
- *result = string;
- return (wch);
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+ if (pwc != NULL)
+ *pwc = wch;
+ return (wch == L'\0' ? 0 : i);
}
-int
-_UTF8_sputrune(rune_t c, char *string, size_t n, char **result)
+size_t
+_UTF8_wcrtomb(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps __unused)
{
unsigned char lead;
int i, len;
+ if (s == NULL)
+ /* Reset to initial shift state (no-op) */
+ return (1);
+
/*
* Determine the number of octets needed to represent this character.
* We always output the shortest sequence possible. Also specify the
* first few bits of the first octet, which contains the information
* about the sequence length.
*/
- if ((c & ~0x7f) == 0) {
+ if ((wc & ~0x7f) == 0) {
lead = 0;
len = 1;
- } else if ((c & ~0x7ff) == 0) {
+ } else if ((wc & ~0x7ff) == 0) {
lead = 0xc0;
len = 2;
- } else if ((c & ~0xffff) == 0) {
+ } else if ((wc & ~0xffff) == 0) {
lead = 0xe0;
len = 3;
- } else if ((c & ~0x1fffff) == 0) {
+ } else if ((wc & ~0x1fffff) == 0) {
lead = 0xf0;
len = 4;
- } else if ((c & ~0x3ffffff) == 0) {
+ } else if ((wc & ~0x3ffffff) == 0) {
lead = 0xf8;
len = 5;
- } else if ((c & ~0x7fffffff) == 0) {
+ } else if ((wc & ~0x7fffffff) == 0) {
lead = 0xfc;
len = 6;
} else {
- /*
- * Wide character code is out of range.
- */
- if (result != NULL)
- *result = NULL;
- return (0);
+ errno = EILSEQ;
+ return ((size_t)-1);
}
- if (n < len) {
- if (result != NULL)
- *result = NULL;
- } else {
- /*
- * Output the octets representing the character in chunks
- * of 6 bits, least significant last. The first octet is
- * a special case because it contains the sequence length
- * information.
- */
- for (i = len - 1; i > 0; i--) {
- string[i] = (c & 0x3f) | 0x80;
- c >>= 6;
- }
- *string = (c & 0xff) | lead;
- if (result != NULL)
- *result = string + len;
+ /*
+ * Output the octets representing the character in chunks
+ * of 6 bits, least significant last. The first octet is
+ * a special case because it contains the sequence length
+ * information.
+ */
+ for (i = len - 1; i > 0; i--) {
+ s[i] = (wc & 0x3f) | 0x80;
+ wc >>= 6;
}
+ *s = (wc & 0xff) | lead;
return (len);
}
OpenPOWER on IntegriCloud