diff options
author | ache <ache@FreeBSD.org> | 2007-10-13 16:28:22 +0000 |
---|---|---|
committer | ache <ache@FreeBSD.org> | 2007-10-13 16:28:22 +0000 |
commit | a5038f060de9f1cc50cf532f78541dfd901f10b8 (patch) | |
tree | 364de71872fe91708dda5fd7ffeb957967a6f749 /lib/libc | |
parent | 5b067f00c53dea274158508c1867eedfa02afea8 (diff) | |
download | FreeBSD-src-a5038f060de9f1cc50cf532f78541dfd901f10b8.zip FreeBSD-src-a5038f060de9f1cc50cf532f78541dfd901f10b8.tar.gz |
The problem is: currently our single byte ctype(3) functions are broken
for wide characters locales in the argument range >= 0x80 - they may
return false positives.
Example 1: for UTF-8 locale we currently have:
iswspace(0xA0)==1 and isspace(0xA0)==1
(because iswspace() and isspace() are the same code)
but must have
iswspace(0xA0)==1 and isspace(0xA0)==0
(because there is no such character and all others in the range
0x80..0xff for the UTF-8 locale, it keeps ASCII only in the single byte
range because our internal wchar_t representation for UTF-8 is UCS-4).
Example 2: for all wide character locales isalpha(arg) when arg > 0xFF may
return false positives (must be 0).
(because iswalpha() and isalpha() are the same code)
This change address this issue separating single byte and wide ctype
and also fix iswascii() (currently iswascii() is broken for
arguments > 0xFF).
This change is 100% binary compatible with old binaries.
Reviewied by: i18n@
Diffstat (limited to 'lib/libc')
-rw-r--r-- | lib/libc/locale/Symbol.map | 5 | ||||
-rw-r--r-- | lib/libc/locale/big5.c | 3 | ||||
-rw-r--r-- | lib/libc/locale/euc.c | 3 | ||||
-rw-r--r-- | lib/libc/locale/gb18030.c | 3 | ||||
-rw-r--r-- | lib/libc/locale/gb2312.c | 3 | ||||
-rw-r--r-- | lib/libc/locale/gbk.c | 3 | ||||
-rw-r--r-- | lib/libc/locale/isctype.c | 38 | ||||
-rw-r--r-- | lib/libc/locale/iswctype.c | 2 | ||||
-rw-r--r-- | lib/libc/locale/mskanji.c | 3 | ||||
-rw-r--r-- | lib/libc/locale/none.c | 7 | ||||
-rw-r--r-- | lib/libc/locale/setrunelocale.c | 5 | ||||
-rw-r--r-- | lib/libc/locale/utf8.c | 3 |
12 files changed, 57 insertions, 21 deletions
diff --git a/lib/libc/locale/Symbol.map b/lib/libc/locale/Symbol.map index 12daba1..20d092b 100644 --- a/lib/libc/locale/Symbol.map +++ b/lib/libc/locale/Symbol.map @@ -60,12 +60,17 @@ FBSD_1.0 { nextwctype; nl_langinfo; __maskrune; + __sbmaskrune; __istype; + __sbistype; __isctype; __toupper; + __sbtoupper; __tolower; + __sbtolower; __wcwidth; __mb_cur_max; + __mb_sb_limit; rpmatch; ___runetype; setlocale; diff --git a/lib/libc/locale/big5.c b/lib/libc/locale/big5.c index 44b9957..19977d0 100644 --- a/lib/libc/locale/big5.c +++ b/lib/libc/locale/big5.c @@ -49,6 +49,8 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _BIG5_mbsinit(const mbstate_t *); @@ -68,6 +70,7 @@ _BIG5_init(_RuneLocale *rl) __mbsinit = _BIG5_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } diff --git a/lib/libc/locale/euc.c b/lib/libc/locale/euc.c index b3b35ed..188073e 100644 --- a/lib/libc/locale/euc.c +++ b/lib/libc/locale/euc.c @@ -49,6 +49,8 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _EUC_mbsinit(const mbstate_t *); @@ -116,6 +118,7 @@ _EUC_init(_RuneLocale *rl) __mbrtowc = _EUC_mbrtowc; __wcrtomb = _EUC_wcrtomb; __mbsinit = _EUC_mbsinit; + __mb_sb_limit = 256; return (0); } diff --git a/lib/libc/locale/gb18030.c b/lib/libc/locale/gb18030.c index 3e43179..1457d3e 100644 --- a/lib/libc/locale/gb18030.c +++ b/lib/libc/locale/gb18030.c @@ -39,6 +39,8 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB18030_mbsinit(const mbstate_t *); @@ -59,6 +61,7 @@ _GB18030_init(_RuneLocale *rl) __mbsinit = _GB18030_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 4; + __mb_sb_limit = 128; return (0); } diff --git a/lib/libc/locale/gb2312.c b/lib/libc/locale/gb2312.c index 232daba..74a7bdc 100644 --- a/lib/libc/locale/gb2312.c +++ b/lib/libc/locale/gb2312.c @@ -35,6 +35,8 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GB2312_mbsinit(const mbstate_t *); @@ -55,6 +57,7 @@ _GB2312_init(_RuneLocale *rl) __wcrtomb = _GB2312_wcrtomb; __mbsinit = _GB2312_mbsinit; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } diff --git a/lib/libc/locale/gbk.c b/lib/libc/locale/gbk.c index 5288293..802f78e 100644 --- a/lib/libc/locale/gbk.c +++ b/lib/libc/locale/gbk.c @@ -42,6 +42,8 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _GBK_mbsinit(const mbstate_t *); @@ -61,6 +63,7 @@ _GBK_init(_RuneLocale *rl) __mbsinit = _GBK_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 128; return (0); } diff --git a/lib/libc/locale/isctype.c b/lib/libc/locale/isctype.c index 13ac6c2..be1b091 100644 --- a/lib/libc/locale/isctype.c +++ b/lib/libc/locale/isctype.c @@ -48,7 +48,7 @@ int digittoint(c) int c; { - return (__maskrune(c, 0xFF)); + return (__sbmaskrune(c, 0xFF)); } #undef isalnum @@ -56,7 +56,7 @@ int isalnum(c) int c; { - return (__istype(c, _CTYPE_A|_CTYPE_D)); + return (__sbistype(c, _CTYPE_A|_CTYPE_D)); } #undef isalpha @@ -64,7 +64,7 @@ int isalpha(c) int c; { - return (__istype(c, _CTYPE_A)); + return (__sbistype(c, _CTYPE_A)); } #undef isascii @@ -80,7 +80,7 @@ int isblank(c) int c; { - return (__istype(c, _CTYPE_B)); + return (__sbistype(c, _CTYPE_B)); } #undef iscntrl @@ -88,7 +88,7 @@ int iscntrl(c) int c; { - return (__istype(c, _CTYPE_C)); + return (__sbistype(c, _CTYPE_C)); } #undef isdigit @@ -104,7 +104,7 @@ int isgraph(c) int c; { - return (__istype(c, _CTYPE_G)); + return (__sbistype(c, _CTYPE_G)); } #undef ishexnumber @@ -112,7 +112,7 @@ int ishexnumber(c) int c; { - return (__istype(c, _CTYPE_X)); + return (__sbistype(c, _CTYPE_X)); } #undef isideogram @@ -120,7 +120,7 @@ int isideogram(c) int c; { - return (__istype(c, _CTYPE_I)); + return (__sbistype(c, _CTYPE_I)); } #undef islower @@ -128,7 +128,7 @@ int islower(c) int c; { - return (__istype(c, _CTYPE_L)); + return (__sbistype(c, _CTYPE_L)); } #undef isnumber @@ -136,7 +136,7 @@ int isnumber(c) int c; { - return (__istype(c, _CTYPE_D)); + return (__sbistype(c, _CTYPE_D)); } #undef isphonogram @@ -144,7 +144,7 @@ int isphonogram(c) int c; { - return (__istype(c, _CTYPE_Q)); + return (__sbistype(c, _CTYPE_Q)); } #undef isprint @@ -152,7 +152,7 @@ int isprint(c) int c; { - return (__istype(c, _CTYPE_R)); + return (__sbistype(c, _CTYPE_R)); } #undef ispunct @@ -160,7 +160,7 @@ int ispunct(c) int c; { - return (__istype(c, _CTYPE_P)); + return (__sbistype(c, _CTYPE_P)); } #undef isrune @@ -168,7 +168,7 @@ int isrune(c) int c; { - return (__istype(c, 0xFFFFFF00L)); + return (__sbistype(c, 0xFFFFFF00L)); } #undef isspace @@ -176,7 +176,7 @@ int isspace(c) int c; { - return (__istype(c, _CTYPE_S)); + return (__sbistype(c, _CTYPE_S)); } #undef isspecial @@ -184,7 +184,7 @@ int isspecial(c) int c; { - return (__istype(c, _CTYPE_T)); + return (__sbistype(c, _CTYPE_T)); } #undef isupper @@ -192,7 +192,7 @@ int isupper(c) int c; { - return (__istype(c, _CTYPE_U)); + return (__sbistype(c, _CTYPE_U)); } #undef isxdigit @@ -216,7 +216,7 @@ int tolower(c) int c; { - return (__tolower(c)); + return (__sbtolower(c)); } #undef toupper @@ -224,6 +224,6 @@ int toupper(c) int c; { - return (__toupper(c)); + return (__sbtoupper(c)); } diff --git a/lib/libc/locale/iswctype.c b/lib/libc/locale/iswctype.c index eaa1bf3..c2e0f53 100644 --- a/lib/libc/locale/iswctype.c +++ b/lib/libc/locale/iswctype.c @@ -61,7 +61,7 @@ int iswascii(wc) wint_t wc; { - return ((wc & ~0x7F) == 0); + return (wc < 0x80); } #undef iswblank diff --git a/lib/libc/locale/mskanji.c b/lib/libc/locale/mskanji.c index aba87e7..9ee91de 100644 --- a/lib/libc/locale/mskanji.c +++ b/lib/libc/locale/mskanji.c @@ -47,6 +47,8 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _MSKanji_mbsinit(const mbstate_t *); @@ -66,6 +68,7 @@ _MSKanji_init(_RuneLocale *rl) __mbsinit = _MSKanji_mbsinit; _CurrentRuneLocale = rl; __mb_cur_max = 2; + __mb_sb_limit = 256; return (0); } diff --git a/lib/libc/locale/none.c b/lib/libc/locale/none.c index 79981e5..22fcd20 100644 --- a/lib/libc/locale/none.c +++ b/lib/libc/locale/none.c @@ -58,6 +58,11 @@ static size_t _none_wcrtomb(char * __restrict, wchar_t, static size_t _none_wcsnrtombs(char * __restrict, const wchar_t ** __restrict, size_t, size_t, mbstate_t * __restrict); +/* setup defaults */ + +int __mb_cur_max = 1; +int __mb_sb_limit = 256; /* Expected to be <= _CACHED_RUNES */ + int _none_init(_RuneLocale *rl) { @@ -69,6 +74,7 @@ _none_init(_RuneLocale *rl) __wcsnrtombs = _none_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 1; + __mb_sb_limit = 256; return(0); } @@ -176,7 +182,6 @@ _none_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, /* setup defaults */ -int __mb_cur_max = 1; size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict) = _none_mbrtowc; int (*__mbsinit)(const mbstate_t *) = _none_mbsinit; diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index 9484709..e723ea6 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include "mblocal.h" #include "setlocale.h" +extern int __mb_sb_limit; + extern _RuneLocale *_Read_RuneMagi(FILE *); static int __setrunelocale(const char *); @@ -59,6 +61,7 @@ __setrunelocale(const char *encoding) static char ctype_encoding[ENCODING_LEN + 1]; static _RuneLocale *CachedRuneLocale; static int Cached__mb_cur_max; + static int Cached__mb_sb_limit; static size_t (*Cached__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static size_t (*Cached__wcrtomb)(char * __restrict, wchar_t, @@ -85,6 +88,7 @@ __setrunelocale(const char *encoding) strcmp(encoding, ctype_encoding) == 0) { _CurrentRuneLocale = CachedRuneLocale; __mb_cur_max = Cached__mb_cur_max; + __mb_sb_limit = Cached__mb_sb_limit; __mbrtowc = Cached__mbrtowc; __mbsinit = Cached__mbsinit; __mbsnrtowcs = Cached__mbsnrtowcs; @@ -147,6 +151,7 @@ __setrunelocale(const char *encoding) } CachedRuneLocale = _CurrentRuneLocale; Cached__mb_cur_max = __mb_cur_max; + Cached__mb_sb_limit = __mb_sb_limit; Cached__mbrtowc = __mbrtowc; Cached__mbsinit = __mbsinit; Cached__mbsnrtowcs = __mbsnrtowcs; diff --git a/lib/libc/locale/utf8.c b/lib/libc/locale/utf8.c index e467fc0..086a1e4 100644 --- a/lib/libc/locale/utf8.c +++ b/lib/libc/locale/utf8.c @@ -35,6 +35,8 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "mblocal.h" +extern int __mb_sb_limit; + static size_t _UTF8_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, mbstate_t * __restrict); static int _UTF8_mbsinit(const mbstate_t *); @@ -63,6 +65,7 @@ _UTF8_init(_RuneLocale *rl) __wcsnrtombs = _UTF8_wcsnrtombs; _CurrentRuneLocale = rl; __mb_cur_max = 6; + __mb_sb_limit = 128; return (0); } |