diff options
author | tjr <tjr@FreeBSD.org> | 2004-04-07 10:48:19 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2004-04-07 10:48:19 +0000 |
commit | 54a18fa1d63375b790f3bf3157f6b64b294e5d16 (patch) | |
tree | 44e2a3f83138ea23f4b4f68cbea18e4f60c8745e /lib | |
parent | a6980b04fc41a4c6dc314dc3aa00de7e7834ba7b (diff) | |
download | FreeBSD-src-54a18fa1d63375b790f3bf3157f6b64b294e5d16.zip FreeBSD-src-54a18fa1d63375b790f3bf3157f6b64b294e5d16.tar.gz |
Allow partial multibyte characters to accumulate in conversion state
objects passed to mbrtowc(), mbsrtowcs(), and mbrlen(), as required
by C99.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libc/locale/big5.c | 48 | ||||
-rw-r--r-- | lib/libc/locale/euc.c | 50 | ||||
-rw-r--r-- | lib/libc/locale/gb18030.c | 50 | ||||
-rw-r--r-- | lib/libc/locale/gb2312.c | 47 | ||||
-rw-r--r-- | lib/libc/locale/gbk.c | 48 | ||||
-rw-r--r-- | lib/libc/locale/mbsinit.c | 13 | ||||
-rw-r--r-- | lib/libc/locale/mskanji.c | 54 | ||||
-rw-r--r-- | lib/libc/locale/none.c | 16 | ||||
-rw-r--r-- | lib/libc/locale/setrunelocale.c | 7 | ||||
-rw-r--r-- | lib/libc/locale/table.c | 2 | ||||
-rw-r--r-- | lib/libc/locale/utf2.c | 51 | ||||
-rw-r--r-- | lib/libc/locale/utf8.c | 53 |
12 files changed, 350 insertions, 89 deletions
diff --git a/lib/libc/locale/big5.c b/lib/libc/locale/big5.c index 3b1dabe..ccf35f0 100644 --- a/lib/libc/locale/big5.c +++ b/lib/libc/locale/big5.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved. + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * @@ -38,34 +38,49 @@ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)big5.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> #include <runetype.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); int _BIG5_init(_RuneLocale *); size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +int _BIG5_mbsinit(const mbstate_t *); size_t _BIG5_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +typedef struct { + int count; + u_char bytes[2]; +} _BIG5State; + int _BIG5_init(_RuneLocale *rl) { __mbrtowc = _BIG5_mbrtowc; __wcrtomb = _BIG5_wcrtomb; + __mbsinit = _BIG5_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; return (0); } +int +_BIG5_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_BIG5State *)ps)->count == 0); +} + static __inline int _big5_check(u_int c) { @@ -76,14 +91,28 @@ _big5_check(u_int c) size_t _BIG5_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) + mbstate_t * __restrict ps) { + _BIG5State *bs; wchar_t wc; - int i, len; + int i, len, ocount; + size_t ncopy; + + bs = (_BIG5State *)ps; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(bs->bytes) - bs->count); + memcpy(bs->bytes + bs->count, s, ncopy); + ocount = bs->count; + bs->count += ncopy; + s = (char *)bs->bytes; + n = bs->count; - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); if (n == 0 || (size_t)(len = _big5_check(*s)) > n) /* Incomplete multibyte sequence */ return ((size_t)-2); @@ -93,7 +122,8 @@ _BIG5_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, wc = (wc << 8) | (unsigned char)*s++; if (pwc != NULL) *pwc = wc; - return (wc == L'\0' ? 0 : len); + bs->count = 0; + return (wc == L'\0' ? 0 : len - ocount); } size_t diff --git a/lib/libc/locale/euc.c b/lib/libc/locale/euc.c index 74ed52e..490b1c8 100644 --- a/lib/libc/locale/euc.c +++ b/lib/libc/locale/euc.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved. + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * @@ -38,23 +38,25 @@ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)euc.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> - #include <errno.h> +#include <limits.h> #include <runetype.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); int _EUC_init(_RuneLocale *); size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +int _EUC_mbsinit(const mbstate_t *); size_t _EUC_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); typedef struct { @@ -63,6 +65,11 @@ typedef struct { wchar_t mask; } _EucInfo; +typedef struct { + int count; + u_char bytes[MB_LEN_MAX]; +} _EucState; + int _EUC_init(_RuneLocale *rl) { @@ -111,9 +118,17 @@ _EUC_init(_RuneLocale *rl) __mb_cur_max = new__mb_cur_max; __mbrtowc = _EUC_mbrtowc; __wcrtomb = _EUC_wcrtomb; + __mbsinit = _EUC_mbsinit; return (0); } +int +_EUC_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_EucState *)ps)->count == 0); +} + #define CEI ((_EucInfo *)(_CurrentRuneLocale->variable)) #define _SS2 0x008e @@ -130,14 +145,28 @@ _euc_set(u_int c) size_t _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) + mbstate_t * __restrict ps) { - int len, remain, set; + _EucState *es; + int len, ocount, remain, set; wchar_t wc; + size_t ncopy; + + es = (_EucState *)ps; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(es->bytes) - es->count); + memcpy(es->bytes + es->count, s, ncopy); + ocount = es->count; + es->count += ncopy; + s = (char *)es->bytes; + n = es->count; - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); if (n == 0 || (size_t)(len = CEI->count[set = _euc_set(*s)]) > n) /* Incomplete multibyte sequence */ return ((size_t)-2); @@ -158,7 +187,8 @@ _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, wc = (wc & ~CEI->mask) | CEI->bits[set]; if (pwc != NULL) *pwc = wc; - return (wc == L'\0' ? 0 : len); + es->count = 0; + return (wc == L'\0' ? 0 : len - ocount); } size_t diff --git a/lib/libc/locale/gb18030.c b/lib/libc/locale/gb18030.c index dca718f..817602b 100644 --- a/lib/libc/locale/gb18030.c +++ b/lib/libc/locale/gb18030.c @@ -29,22 +29,30 @@ * See gb18030(5) for details. */ -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); #include <errno.h> #include <runetype.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); int _GB18030_init(_RuneLocale *); -size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, +size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); -size_t _GB18030_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +int _GB18030_mbsinit(const mbstate_t *); +size_t _GB18030_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); + +typedef struct { + int count; + u_char bytes[4]; +} _GB18030State; int _GB18030_init(_RuneLocale *rl) @@ -52,22 +60,44 @@ _GB18030_init(_RuneLocale *rl) __mbrtowc = _GB18030_mbrtowc; __wcrtomb = _GB18030_wcrtomb; + __mbsinit = _GB18030_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 4; return (0); } +int +_GB18030_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_GB18030State *)ps)->count == 0); +} + size_t _GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, - size_t n, mbstate_t * __restrict ps __unused) + size_t n, mbstate_t * __restrict ps) { + _GB18030State *gs; wchar_t wch; - int ch, len; + int ch, len, ocount; + size_t ncopy; + + gs = (_GB18030State *)ps; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); + memcpy(gs->bytes + gs->count, s, ncopy); + ocount = gs->count; + gs->count += ncopy; + s = (char *)gs->bytes; + n = gs->count; - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); @@ -116,7 +146,8 @@ _GB18030_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, if (pwc != NULL) *pwc = wch; - return (wch == L'\0' ? 0 : len); + gs->count = 0; + return (wch == L'\0' ? 0 : len - ocount); ilseq: errno = EILSEQ; return ((size_t)-1); @@ -132,7 +163,6 @@ _GB18030_wcrtomb(char * __restrict s, wchar_t wc, if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); - if ((wc & ~0x7fffffff) != 0) goto ilseq; if (wc & 0x7f000000) { diff --git a/lib/libc/locale/gb2312.c b/lib/libc/locale/gb2312.c index f1895ce..8509d04 100644 --- a/lib/libc/locale/gb2312.c +++ b/lib/libc/locale/gb2312.c @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2004 Tim J. Robbins. All rights reserved. * Copyright (c) 2003 David Xu <davidxu@freebsd.org> * All rights reserved. * @@ -24,23 +25,30 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> #include <runetype.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); int _GB2312_init(_RuneLocale *); size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +int _GB2312_mbsinit(const mbstate_t *); size_t _GB2312_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +typedef struct { + int count; + u_char bytes[2]; +} _GB2312State; + int _GB2312_init(_RuneLocale *rl) { @@ -48,10 +56,18 @@ _GB2312_init(_RuneLocale *rl) _CurrentRuneLocale = rl; __mbrtowc = _GB2312_mbrtowc; __wcrtomb = _GB2312_wcrtomb; + __mbsinit = _GB2312_mbsinit; __mb_cur_max = 2; return (0); } +int +_GB2312_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_GB2312State *)ps)->count == 0); +} + static __inline int _GB2312_check(const char *str, size_t n) { @@ -77,14 +93,28 @@ _GB2312_check(const char *str, size_t n) size_t _GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) + mbstate_t * __restrict ps) { + _GB2312State *gs; wchar_t wc; - int i, len; + int i, len, ocount; + size_t ncopy; + + gs = (_GB2312State *)gs; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); + memcpy(gs->bytes + gs->count, s, ncopy); + ocount = gs->count; + gs->count += ncopy; + s = (char *)gs->bytes; + n = gs->count; - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); if ((len = _GB2312_check(s, n)) < 0) return ((size_t)len); wc = 0; @@ -93,7 +123,8 @@ _GB2312_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, wc = (wc << 8) | (unsigned char)*s++; if (pwc != NULL) *pwc = wc; - return (wc == L'\0' ? 0 : len); + gs->count = 0; + return (wc == L'\0' ? 0 : len - ocount); } size_t diff --git a/lib/libc/locale/gbk.c b/lib/libc/locale/gbk.c index e5572bc..880884f 100644 --- a/lib/libc/locale/gbk.c +++ b/lib/libc/locale/gbk.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved. + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * @@ -35,34 +35,49 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> #include <runetype.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); int _GBK_init(_RuneLocale *); size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +int _GBK_mbsinit(const mbstate_t *); size_t _GBK_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +typedef struct { + int count; + u_char bytes[2]; +} _GBKState; + int _GBK_init(_RuneLocale *rl) { __mbrtowc = _GBK_mbrtowc; __wcrtomb = _GBK_wcrtomb; + __mbsinit = _GBK_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; return (0); } +int +_GBK_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_GBKState *)ps)->count == 0); +} + static __inline int _gbk_check(u_int c) { @@ -73,14 +88,28 @@ _gbk_check(u_int c) size_t _GBK_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) + mbstate_t * __restrict ps) { + _GBKState *gs; wchar_t wc; - int i, len; + int i, len, ocount; + size_t ncopy; + + gs = (_GBKState *)ps; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(gs->bytes) - gs->count); + memcpy(gs->bytes + gs->count, s, ncopy); + ocount = gs->count; + gs->count += ncopy; + s = (char *)gs->bytes; + n = gs->count; - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); if (n == 0 || (size_t)(len = _gbk_check(*s)) > n) /* Incomplete multibyte sequence */ return ((size_t)-2); @@ -90,7 +119,8 @@ _GBK_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, wc = (wc << 8) | (unsigned char)*s++; if (pwc != NULL) *pwc = wc; - return (wc == L'\0' ? 0 : len); + gs->count = 0; + return (wc == L'\0' ? 0 : len - ocount); } size_t diff --git a/lib/libc/locale/mbsinit.c b/lib/libc/locale/mbsinit.c index 923db2f..5470789 100644 --- a/lib/libc/locale/mbsinit.c +++ b/lib/libc/locale/mbsinit.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002 Tim J. Robbins. + * Copyright (c) 2002-2004 Tim J. Robbins. * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,14 +29,11 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> +extern int (*__mbsinit)(const mbstate_t *); + int -mbsinit(const mbstate_t *ps __unused) +mbsinit(const mbstate_t *ps) { - /* - * Stateful multibyte conversion is not supported; there are no - * states other than the initial state. - */ - - return (1); + return (__mbsinit(ps)); } diff --git a/lib/libc/locale/mskanji.c b/lib/libc/locale/mskanji.c index 653e3f8..43bae6c 100644 --- a/lib/libc/locale/mskanji.c +++ b/lib/libc/locale/mskanji.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved. + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. + * * ja_JP.SJIS locale table for BSD4.4/rune * version 1.0 * (C) Sin'ichiro MIYATANI / Phase One, Inc @@ -35,22 +36,29 @@ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)mskanji.c 1.0 (Phase One) 5/5/95"; #endif /* LIBC_SCCS and not lint */ -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); -#include <sys/types.h> #include <runetype.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); int _MSKanji_init(_RuneLocale *); -size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, +size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); -size_t _MSKanji_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +int _MSKanji_mbsinit(const mbstate_t *); +size_t _MSKanji_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); + +typedef struct { + int count; + u_char bytes[2]; +} _MSKanjiState; int _MSKanji_init(_RuneLocale *rl) @@ -58,21 +66,43 @@ _MSKanji_init(_RuneLocale *rl) __mbrtowc = _MSKanji_mbrtowc; __wcrtomb = _MSKanji_wcrtomb; + __mbsinit = _MSKanji_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; return (0); } +int +_MSKanji_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_MSKanjiState *)ps)->count == 0); +} + size_t _MSKanji_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) + mbstate_t * __restrict ps) { + _MSKanjiState *ms; wchar_t wc; - int len; + int len, ocount; + size_t ncopy; + + ms = (_MSKanjiState *)ps; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(ms->bytes) - ms->count); + memcpy(ms->bytes + ms->count, s, ncopy); + ocount = ms->count; + ms->count += ncopy; + s = (char *)ms->bytes; + n = ms->count; - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); @@ -87,7 +117,8 @@ _MSKanji_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, } if (pwc != NULL) *pwc = wc; - return (wc == L'\0' ? 0 : len); + ms->count = 0; + return (wc == L'\0' ? 0 : len - ocount); } size_t @@ -99,7 +130,6 @@ _MSKanji_wcrtomb(char * __restrict s, wchar_t wc, if (s == NULL) /* Reset to initial shift state (no-op) */ return (1); - len = (wc > 0x100) ? 2 : 1; for (i = len; i-- > 0; ) *s++ = wc >> (i << 3); diff --git a/lib/libc/locale/none.c b/lib/libc/locale/none.c index 3c4d8c7..a94d040 100644 --- a/lib/libc/locale/none.c +++ b/lib/libc/locale/none.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002, 2003 Tim J. Robbins. All rights reserved. + * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved. * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * @@ -51,11 +51,13 @@ __FBSDID("$FreeBSD$"); extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); int _none_init(_RuneLocale *); size_t _none_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +int _none_mbsinit(const mbstate_t *); size_t _none_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); int @@ -63,12 +65,24 @@ _none_init(_RuneLocale *rl) { __mbrtowc = _none_mbrtowc; + __mbsinit = _none_mbsinit; __wcrtomb = _none_wcrtomb; _CurrentRuneLocale = rl; __mb_cur_max = 1; return(0); } +int +_none_mbsinit(const mbstate_t *ps __unused) +{ + + /* + * Encoding is not state dependent - we are always in the + * initial state. + */ + return (1); +} + size_t _none_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, mbstate_t * __restrict ps __unused) diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index 11378a0..0d11893 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -64,11 +64,13 @@ extern _RuneLocale *_Read_RuneMagi(FILE *); extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); extern rune_t __emulated_sgetrune(const char *, size_t, const char **); extern int __emulated_sputrune(rune_t, char *, size_t, char **); extern size_t _none_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int _none_mbsinit(const mbstate_t *); extern size_t _none_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); @@ -108,6 +110,7 @@ __setrunelocale(const char *encoding) const char * __restrict, size_t, mbstate_t * __restrict); static size_t (*Cached__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); + static int (*Cached__mbsinit)(const mbstate_t *); /* * The "C" and "POSIX" locale are always here. @@ -117,6 +120,7 @@ __setrunelocale(const char *encoding) __mb_cur_max = 1; __mbrtowc = _none_mbrtowc; __wcrtomb = _none_wcrtomb; + __mbsinit = _none_mbsinit; return (0); } @@ -129,6 +133,7 @@ __setrunelocale(const char *encoding) __mb_cur_max = Cached__mb_cur_max; __mbrtowc = Cached__mbrtowc; __wcrtomb = Cached__wcrtomb; + __mbsinit = Cached__mbsinit; return (0); } @@ -154,6 +159,7 @@ __setrunelocale(const char *encoding) __mbrtowc = NULL; __wcrtomb = NULL; + __mbsinit = NULL; rl->sputrune = __emulated_sputrune; rl->sgetrune = __emulated_sgetrune; if (strcmp(rl->encoding, "NONE") == 0) @@ -186,6 +192,7 @@ __setrunelocale(const char *encoding) CachedRuneLocale = _CurrentRuneLocale; Cached__mb_cur_max = __mb_cur_max; Cached__mbrtowc = __mbrtowc; + Cached__mbsinit = __mbsinit; Cached__wcrtomb = __wcrtomb; (void)strcpy(ctype_encoding, encoding); } else diff --git a/lib/libc/locale/table.c b/lib/libc/locale/table.c index f93fc1a..7680b9c 100644 --- a/lib/libc/locale/table.c +++ b/lib/libc/locale/table.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); extern size_t _none_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int _none_mbsinit(const mbstate_t *); extern size_t _none_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); extern rune_t __emulated_sgetrune(const char *, size_t, const char **); extern int __emulated_sputrune(rune_t, char *, size_t, char **); @@ -259,5 +260,6 @@ _RuneLocale *_CurrentRuneLocale = &_DefaultRuneLocale; int __mb_cur_max = 1; size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict) = _none_mbrtowc; +int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict) = _none_wcrtomb; diff --git a/lib/libc/locale/utf2.c b/lib/libc/locale/utf2.c index 39befd6..af7cc2f 100644 --- a/lib/libc/locale/utf2.c +++ b/lib/libc/locale/utf2.c @@ -36,23 +36,29 @@ /* UTF2 is obsolete and will be removed in FreeBSD 6 -- use UTF-8 instead. */ #define OBSOLETE_IN_6 -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); #include <errno.h> #include <runetype.h> -#include <stddef.h> -#include <stdio.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); -size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, +size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); -size_t _UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +int _UTF2_mbsinit(const mbstate_t *); +size_t _UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); + +typedef struct { + int count; + u_char bytes[3]; +} _UTF2State; int _UTF2_init(_RuneLocale *rl) @@ -60,21 +66,44 @@ _UTF2_init(_RuneLocale *rl) __mbrtowc = _UTF2_mbrtowc; __wcrtomb = _UTF2_wcrtomb; + __mbsinit = _UTF2_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 3; return (0); } +int +_UTF2_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_UTF2State *)ps)->count == 0); +} + size_t _UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) + mbstate_t * __restrict ps) { - int ch, i, len, mask; + _UTF2State *us; + int ch, i, len, mask, ocount; wchar_t wch; + size_t ncopy; + + us = (_UTF2State *)ps; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(us->bytes) - us->count); + memcpy(us->bytes + us->count, s, ncopy); + ocount = us->count; + us->count += ncopy; + s = (char *)us->bytes; + n = us->count; - if (s == NULL) - return (0); if (n == 0) return ((size_t)-2); @@ -108,7 +137,8 @@ _UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, } if (pwc != NULL) *pwc = wch; - return (wch == L'\0' ? 0 : len); + us->count = 0; + return (wch == L'\0' ? 0 : len - ocount); } size_t @@ -119,6 +149,7 @@ _UTF2_wcrtomb(char * __restrict s, wchar_t wc, int i, len; if (s == NULL) + /* Reset to initial conversion state. */ return (1); if ((wc & ~0x7f) == 0) { diff --git a/lib/libc/locale/utf8.c b/lib/libc/locale/utf8.c index e1cbdea..113cdaf 100644 --- a/lib/libc/locale/utf8.c +++ b/lib/libc/locale/utf8.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2002, 2003 Tim J. Robbins + * Copyright (c) 2002-2004 Tim J. Robbins * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -24,23 +24,29 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> +#include <sys/param.h> __FBSDID("$FreeBSD$"); #include <errno.h> #include <runetype.h> -#include <stddef.h> -#include <stdio.h> #include <stdlib.h> +#include <string.h> #include <wchar.h> extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); +extern int (*__mbsinit)(const mbstate_t *); extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict); -size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, +size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); -size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); +int _UTF8_mbsinit(const mbstate_t *); +size_t _UTF8_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); + +typedef struct { + int count; + u_char bytes[6]; +} _UTF8State; int _UTF8_init(_RuneLocale *rl) @@ -48,22 +54,44 @@ _UTF8_init(_RuneLocale *rl) __mbrtowc = _UTF8_mbrtowc; __wcrtomb = _UTF8_wcrtomb; + __mbsinit = _UTF8_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 6; return (0); } +int +_UTF8_mbsinit(const mbstate_t *ps) +{ + + return (ps == NULL || ((_UTF8State *)ps)->count == 0); +} + size_t _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps __unused) + mbstate_t * __restrict ps) { - int ch, i, len, mask; + _UTF8State *us; + int ch, i, len, mask, ocount; wchar_t lbound, wch; + size_t ncopy; + + us = (_UTF8State *)ps; + + if (s == NULL) { + s = ""; + n = 1; + pwc = NULL; + } + + ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(us->bytes) - us->count); + memcpy(us->bytes + us->count, s, ncopy); + ocount = us->count; + us->count += ncopy; + s = (char *)us->bytes; + n = us->count; - if (s == NULL) - /* Reset to initial shift state (no-op) */ - return (0); if (n == 0) /* Incomplete multibyte sequence */ return ((size_t)-2); @@ -143,7 +171,8 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, } if (pwc != NULL) *pwc = wch; - return (wch == L'\0' ? 0 : len); + us->count = 0; + return (wch == L'\0' ? 0 : len - ocount); } size_t |