diff options
Diffstat (limited to 'lib/libc')
-rw-r--r-- | lib/libc/locale/Makefile.inc | 4 | ||||
-rw-r--r-- | lib/libc/locale/mbrune.3 | 1 | ||||
-rw-r--r-- | lib/libc/locale/multibyte.3 | 1 | ||||
-rw-r--r-- | lib/libc/locale/rune.3 | 1 | ||||
-rw-r--r-- | lib/libc/locale/setlocale.3 | 1 | ||||
-rw-r--r-- | lib/libc/locale/setrunelocale.c | 3 | ||||
-rw-r--r-- | lib/libc/locale/utf2.5 | 103 | ||||
-rw-r--r-- | lib/libc/locale/utf2.c | 184 | ||||
-rw-r--r-- | lib/libc/locale/utf8.5 | 22 |
9 files changed, 3 insertions, 317 deletions
diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index cf02a3f..260d196 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -13,7 +13,7 @@ SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \ mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rune.c \ runetype.c setinvalidrune.c setlocale.c setrunelocale.c srune.c \ table.c \ - tolower.c toupper.c utf2.c utf8.c wcrtomb.c wcsnrtombs.c \ + tolower.c toupper.c utf8.c wcrtomb.c wcsnrtombs.c \ wcsrtombs.c wcsftime.c \ wcstof.c wcstod.c \ wcstoimax.c wcstol.c wcstold.c wcstoll.c \ @@ -36,7 +36,7 @@ MAN+= btowc.3 \ wcrtomb.3 \ wcsrtombs.3 wcstod.3 wcstol.3 wcstombs.3 wctomb.3 \ wctrans.3 wctype.3 wcwidth.3 -MAN+= big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf2.5 utf8.5 +MAN+= big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf8.5 MLINKS+=btowc.3 wctob.3 MLINKS+=isdigit.3 isnumber.3 diff --git a/lib/libc/locale/mbrune.3 b/lib/libc/locale/mbrune.3 index 6474ece..02cab23 100644 --- a/lib/libc/locale/mbrune.3 +++ b/lib/libc/locale/mbrune.3 @@ -159,7 +159,6 @@ does not appear in the string. .Xr rune 3 , .Xr setlocale 3 , .Xr euc 5 , -.Xr utf2 5 , .Xr utf8 5 .Sh HISTORY The diff --git a/lib/libc/locale/multibyte.3 b/lib/libc/locale/multibyte.3 index 6047113..02b6342 100644 --- a/lib/libc/locale/multibyte.3 +++ b/lib/libc/locale/multibyte.3 @@ -140,7 +140,6 @@ multibyte characters: .Xr gb2312 5 , .Xr gbk 5 , .Xr mskanji 5 , -.Xr utf2 5 , .Xr utf8 5 .Sh STANDARDS These functions conform to diff --git a/lib/libc/locale/rune.3 b/lib/libc/locale/rune.3 index b572da4..a2f306e 100644 --- a/lib/libc/locale/rune.3 +++ b/lib/libc/locale/rune.3 @@ -268,7 +268,6 @@ binary LC_CTYPE file for the locale .Xr mbrune 3 , .Xr setlocale 3 , .Xr euc 5 , -.Xr utf2 5 , .Xr utf8 5 .Sh HISTORY These functions first appeared in diff --git a/lib/libc/locale/setlocale.3 b/lib/libc/locale/setlocale.3 index 278e18d..ce0c66d 100644 --- a/lib/libc/locale/setlocale.3 +++ b/lib/libc/locale/setlocale.3 @@ -171,7 +171,6 @@ and the category .Xr strcoll 3 , .Xr strxfrm 3 , .Xr euc 5 , -.Xr utf2 5 , .Xr utf8 5 , .Xr environ 7 .Sh STANDARDS diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c index 15e069f..c2cc7f0 100644 --- a/lib/libc/locale/setrunelocale.c +++ b/lib/libc/locale/setrunelocale.c @@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$"); #include "setlocale.h" extern int _none_init(_RuneLocale *); -extern int _UTF2_init(_RuneLocale *); extern int _UTF8_init(_RuneLocale *); extern int _EUC_init(_RuneLocale *); extern int _GB18030_init(_RuneLocale *); @@ -164,8 +163,6 @@ __setrunelocale(const char *encoding) rl->__sgetrune = __emulated_sgetrune; if (strcmp(rl->__encoding, "NONE") == 0) ret = _none_init(rl); - else if (strcmp(rl->__encoding, "UTF2") == 0) - ret = _UTF2_init(rl); else if (strcmp(rl->__encoding, "UTF-8") == 0) ret = _UTF8_init(rl); else if (strcmp(rl->__encoding, "EUC") == 0) diff --git a/lib/libc/locale/utf2.5 b/lib/libc/locale/utf2.5 deleted file mode 100644 index 833f626..0000000 --- a/lib/libc/locale/utf2.5 +++ /dev/null @@ -1,103 +0,0 @@ -.\" Copyright (c) 1993 -.\" The Regents of the University of California. All rights reserved. -.\" -.\" This code is derived from software contributed to Berkeley by -.\" Paul Borman at Krystal Technologies. -.\" -.\" Redistribution and use in source and binary forms, with or without -.\" modification, are permitted provided that the following conditions -.\" are met: -.\" 1. Redistributions of source code must retain the above copyright -.\" notice, this list of conditions and the following disclaimer. -.\" 2. Redistributions in binary form must reproduce the above copyright -.\" notice, this list of conditions and the following disclaimer in the -.\" documentation and/or other materials provided with the distribution. -.\" 3. All advertising materials mentioning features or use of this software -.\" must display the following acknowledgement: -.\" This product includes software developed by the University of -.\" California, Berkeley and its contributors. -.\" 4. Neither the name of the University nor the names of its contributors -.\" may be used to endorse or promote products derived from this software -.\" without specific prior written permission. -.\" -.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -.\" SUCH DAMAGE. -.\" -.\" @(#)utf2.4 8.1 (Berkeley) 6/4/93 -.\" $FreeBSD$ -.\" -.Dd August 21, 2004 -.Dt UTF2 5 -.Os -.Sh NAME -.Nm utf2 -.Nd "Universal character set Transformation Format encoding of wide characters" -.Sh SYNOPSIS -.Nm ENCODING -.Qq UTF2 -.Sh DESCRIPTION -.Bf Em -The UTF2 encoding is obsolete and may not be supported in future releases. -.Ef -UTF-8 should be used instead. -.Pp -The -.Nm UTF2 -encoding is based on a proposed X-Open multibyte -.Tn FSS-UCS-TF -(File System Safe Universal Character Set Transformation Format) -encoding as used in -.Tn "Plan 9" -from Bell Labs. -Although it is capable of representing more than 16 bits, -the current implementation is limited to 16 bits as defined by the -Unicode Standard. -.Pp -.Nm UTF2 -representation is backwards compatible with -.Tn ASCII , -so 0x00-0x7f refer to the -.Tn ASCII -character set. -The multibyte encodings of wide characters between -0x0080 and 0xffff -consist entirely of bytes whose high order bit is set. -The actual -encoding is represented by the following table: -.Bd -literal -[0x0000 - 0x007f] [00000000.0bbbbbbb] -> 0bbbbbbb -[0x0080 - 0x07ff] [00000bbb.bbbbbbbb] -> 110bbbbb, 10bbbbbb -[0x0800 - 0xffff] [bbbbbbbb.bbbbbbbb] -> 1110bbbb, 10bbbbbb, 10bbbbbb -.Ed -.Pp -If more than a single representation of a value exists (for example, -0x00; 0xC0 0x80; 0xE0 0x80 0x80) the shortest representation is always -used (but the longer ones will be correctly decoded). -.Pp -The final three encodings provided by X-Open: -.Bd -literal -[00000000.000bbbbb.bbbbbbbb.bbbbbbbb] -> - 11110bbb, 10bbbbbb, 10bbbbbb, 10bbbbbb - -[000000bb.bbbbbbbb.bbbbbbbb.bbbbbbbb] -> - 111110bb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb - -[0bbbbbbb.bbbbbbbb.bbbbbbbb.bbbbbbbb] -> - 1111110b, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb -.Ed -.Pp -which provides for the entire proposed ISO-10646 31 bit standard are currently -not implemented. -.Sh SEE ALSO -.Xr mklocale 1 , -.Xr setlocale 3 , -.Xr utf8 5 diff --git a/lib/libc/locale/utf2.c b/lib/libc/locale/utf2.c deleted file mode 100644 index cf0e3c8..0000000 --- a/lib/libc/locale/utf2.c +++ /dev/null @@ -1,184 +0,0 @@ -/*- - * Copyright (c) 2002-2004 Tim J. Robbins - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * UTF2 encoding. - * - * This is an obsolete subset of UTF-8, maintained for temporary - * compatibility with old applications. It is limited to 1-, 2- or - * 3-byte encodings, and allows redundantly-encoded characters. - * - * See utf2(5) for details. - */ - -/* UTF2 is obsolete and will be removed in FreeBSD 6 -- use UTF-8 instead. */ -#define OBSOLETE_IN_6 - -#include <sys/param.h> -__FBSDID("$FreeBSD$"); - -#include <errno.h> -#include <runetype.h> -#include <stdlib.h> -#include <string.h> -#include <wchar.h> -#include "mblocal.h" - -size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t, - mbstate_t * __restrict); -int _UTF2_mbsinit(const mbstate_t *); -size_t _UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict); - -typedef struct { - int count; - u_char bytes[3]; -} _UTF2State; - -int -_UTF2_init(_RuneLocale *rl) -{ - - __mbrtowc = _UTF2_mbrtowc; - __wcrtomb = _UTF2_wcrtomb; - __mbsinit = _UTF2_mbsinit; - _CurrentRuneLocale = rl; - __mb_cur_max = 3; - - return (0); -} - -int -_UTF2_mbsinit(const mbstate_t *ps) -{ - - return (ps == NULL || ((const _UTF2State *)ps)->count == 0); -} - -size_t -_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n, - mbstate_t * __restrict ps) -{ - _UTF2State *us; - int ch, i, len, mask, ocount; - wchar_t wch; - size_t ncopy; - - us = (_UTF2State *)ps; - - if (us->count < 0 || us->count > sizeof(us->bytes)) { - errno = EINVAL; - return ((size_t)-1); - } - - if (s == NULL) { - s = ""; - n = 1; - pwc = NULL; - } - - ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(us->bytes) - us->count); - memcpy(us->bytes + us->count, s, ncopy); - ocount = us->count; - us->count += ncopy; - s = (char *)us->bytes; - n = us->count; - - if (n == 0) - return ((size_t)-2); - - ch = (unsigned char)*s; - if ((ch & 0x80) == 0) { - mask = 0x7f; - len = 1; - } else if ((ch & 0xe0) == 0xc0) { - mask = 0x1f; - len = 2; - } else if ((ch & 0xf0) == 0xe0) { - mask = 0x0f; - len = 3; - } else { - errno = EILSEQ; - return ((size_t)-1); - } - - if (n < (size_t)len) - return ((size_t)-2); - - wch = (unsigned char)*s++ & mask; - i = len; - while (--i != 0) { - if ((*s & 0xc0) != 0x80) { - errno = EILSEQ; - return ((size_t)-1); - } - wch <<= 6; - wch |= *s++ & 0x3f; - } - if (pwc != NULL) - *pwc = wch; - us->count = 0; - return (wch == L'\0' ? 0 : len - ocount); -} - -size_t -_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps) -{ - _UTF2State *us; - unsigned char lead; - int i, len; - - us = (_UTF2State *)ps; - - if (us->count != 0) { - errno = EINVAL; - return ((size_t)-1); - } - - if (s == NULL) - /* Reset to initial conversion state. */ - return (1); - - if ((wc & ~0x7f) == 0) { - lead = 0; - len = 1; - } else if ((wc & ~0x7ff) == 0) { - lead = 0xc0; - len = 2; - } else if ((wc & ~0xffff) == 0) { - lead = 0xe0; - len = 3; - } else { - errno = EILSEQ; - return ((size_t)-1); - } - - for (i = len - 1; i > 0; i--) { - s[i] = (wc & 0x3f) | 0x80; - wc >>= 6; - } - *s = (wc & 0xff) | lead; - - return (len); -} diff --git a/lib/libc/locale/utf8.5 b/lib/libc/locale/utf8.5 index f142b63..01a8a51 100644 --- a/lib/libc/locale/utf8.5 +++ b/lib/libc/locale/utf8.5 @@ -78,28 +78,8 @@ If more than a single representation of a value exists (for example, used. Longer ones are detected as an error as they pose a potential security risk, and destroy the 1:1 character:octet sequence mapping. -.Sh COMPATIBILITY -The -.Nm -encoding supersedes the -.Xr utf2 5 -encoding. -The only differences between the two are that -.Nm -handles the full 31-bit character set of -.Tn ISO -10646 -whereas -.Xr utf2 5 -is limited to a 16-bit character set, -and that -.Xr utf2 5 -accepts redundant, -.No non- Ns Dq "shortest form" -representations of characters. .Sh SEE ALSO -.Xr euc 5 , -.Xr utf2 5 +.Xr euc 5 .Rs .%A "Rob Pike" .%A "Ken Thompson" |