summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2004-10-17 02:29:15 +0000
committertjr <tjr@FreeBSD.org>2004-10-17 02:29:15 +0000
commite40fa30ce61655590cbd4e7c025b2731600ac9b6 (patch)
tree6d6fb7d4a9ac47dbf0567825652b2c97a5f4156e /lib
parent173f5d041b75ccb5f441455614c36ae702ae68d8 (diff)
downloadFreeBSD-src-e40fa30ce61655590cbd4e7c025b2731600ac9b6.zip
FreeBSD-src-e40fa30ce61655590cbd4e7c025b2731600ac9b6.tar.gz
Remove support for the obsolete UTF2 encoding.
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/locale/Makefile.inc4
-rw-r--r--lib/libc/locale/mbrune.31
-rw-r--r--lib/libc/locale/multibyte.31
-rw-r--r--lib/libc/locale/rune.31
-rw-r--r--lib/libc/locale/setlocale.31
-rw-r--r--lib/libc/locale/setrunelocale.c3
-rw-r--r--lib/libc/locale/utf2.5103
-rw-r--r--lib/libc/locale/utf2.c184
-rw-r--r--lib/libc/locale/utf8.522
9 files changed, 3 insertions, 317 deletions
diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc
index cf02a3f..260d196 100644
--- a/lib/libc/locale/Makefile.inc
+++ b/lib/libc/locale/Makefile.inc
@@ -13,7 +13,7 @@ SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \
mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rune.c \
runetype.c setinvalidrune.c setlocale.c setrunelocale.c srune.c \
table.c \
- tolower.c toupper.c utf2.c utf8.c wcrtomb.c wcsnrtombs.c \
+ tolower.c toupper.c utf8.c wcrtomb.c wcsnrtombs.c \
wcsrtombs.c wcsftime.c \
wcstof.c wcstod.c \
wcstoimax.c wcstol.c wcstold.c wcstoll.c \
@@ -36,7 +36,7 @@ MAN+= btowc.3 \
wcrtomb.3 \
wcsrtombs.3 wcstod.3 wcstol.3 wcstombs.3 wctomb.3 \
wctrans.3 wctype.3 wcwidth.3
-MAN+= big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf2.5 utf8.5
+MAN+= big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf8.5
MLINKS+=btowc.3 wctob.3
MLINKS+=isdigit.3 isnumber.3
diff --git a/lib/libc/locale/mbrune.3 b/lib/libc/locale/mbrune.3
index 6474ece..02cab23 100644
--- a/lib/libc/locale/mbrune.3
+++ b/lib/libc/locale/mbrune.3
@@ -159,7 +159,6 @@ does not appear in the string.
.Xr rune 3 ,
.Xr setlocale 3 ,
.Xr euc 5 ,
-.Xr utf2 5 ,
.Xr utf8 5
.Sh HISTORY
The
diff --git a/lib/libc/locale/multibyte.3 b/lib/libc/locale/multibyte.3
index 6047113..02b6342 100644
--- a/lib/libc/locale/multibyte.3
+++ b/lib/libc/locale/multibyte.3
@@ -140,7 +140,6 @@ multibyte characters:
.Xr gb2312 5 ,
.Xr gbk 5 ,
.Xr mskanji 5 ,
-.Xr utf2 5 ,
.Xr utf8 5
.Sh STANDARDS
These functions conform to
diff --git a/lib/libc/locale/rune.3 b/lib/libc/locale/rune.3
index b572da4..a2f306e 100644
--- a/lib/libc/locale/rune.3
+++ b/lib/libc/locale/rune.3
@@ -268,7 +268,6 @@ binary LC_CTYPE file for the locale
.Xr mbrune 3 ,
.Xr setlocale 3 ,
.Xr euc 5 ,
-.Xr utf2 5 ,
.Xr utf8 5
.Sh HISTORY
These functions first appeared in
diff --git a/lib/libc/locale/setlocale.3 b/lib/libc/locale/setlocale.3
index 278e18d..ce0c66d 100644
--- a/lib/libc/locale/setlocale.3
+++ b/lib/libc/locale/setlocale.3
@@ -171,7 +171,6 @@ and the category
.Xr strcoll 3 ,
.Xr strxfrm 3 ,
.Xr euc 5 ,
-.Xr utf2 5 ,
.Xr utf8 5 ,
.Xr environ 7
.Sh STANDARDS
diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c
index 15e069f..c2cc7f0 100644
--- a/lib/libc/locale/setrunelocale.c
+++ b/lib/libc/locale/setrunelocale.c
@@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
#include "setlocale.h"
extern int _none_init(_RuneLocale *);
-extern int _UTF2_init(_RuneLocale *);
extern int _UTF8_init(_RuneLocale *);
extern int _EUC_init(_RuneLocale *);
extern int _GB18030_init(_RuneLocale *);
@@ -164,8 +163,6 @@ __setrunelocale(const char *encoding)
rl->__sgetrune = __emulated_sgetrune;
if (strcmp(rl->__encoding, "NONE") == 0)
ret = _none_init(rl);
- else if (strcmp(rl->__encoding, "UTF2") == 0)
- ret = _UTF2_init(rl);
else if (strcmp(rl->__encoding, "UTF-8") == 0)
ret = _UTF8_init(rl);
else if (strcmp(rl->__encoding, "EUC") == 0)
diff --git a/lib/libc/locale/utf2.5 b/lib/libc/locale/utf2.5
deleted file mode 100644
index 833f626..0000000
--- a/lib/libc/locale/utf2.5
+++ /dev/null
@@ -1,103 +0,0 @@
-.\" Copyright (c) 1993
-.\" The Regents of the University of California. All rights reserved.
-.\"
-.\" This code is derived from software contributed to Berkeley by
-.\" Paul Borman at Krystal Technologies.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\" notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\" notice, this list of conditions and the following disclaimer in the
-.\" documentation and/or other materials provided with the distribution.
-.\" 3. All advertising materials mentioning features or use of this software
-.\" must display the following acknowledgement:
-.\" This product includes software developed by the University of
-.\" California, Berkeley and its contributors.
-.\" 4. Neither the name of the University nor the names of its contributors
-.\" may be used to endorse or promote products derived from this software
-.\" without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\" @(#)utf2.4 8.1 (Berkeley) 6/4/93
-.\" $FreeBSD$
-.\"
-.Dd August 21, 2004
-.Dt UTF2 5
-.Os
-.Sh NAME
-.Nm utf2
-.Nd "Universal character set Transformation Format encoding of wide characters"
-.Sh SYNOPSIS
-.Nm ENCODING
-.Qq UTF2
-.Sh DESCRIPTION
-.Bf Em
-The UTF2 encoding is obsolete and may not be supported in future releases.
-.Ef
-UTF-8 should be used instead.
-.Pp
-The
-.Nm UTF2
-encoding is based on a proposed X-Open multibyte
-.Tn FSS-UCS-TF
-(File System Safe Universal Character Set Transformation Format)
-encoding as used in
-.Tn "Plan 9"
-from Bell Labs.
-Although it is capable of representing more than 16 bits,
-the current implementation is limited to 16 bits as defined by the
-Unicode Standard.
-.Pp
-.Nm UTF2
-representation is backwards compatible with
-.Tn ASCII ,
-so 0x00-0x7f refer to the
-.Tn ASCII
-character set.
-The multibyte encodings of wide characters between
-0x0080 and 0xffff
-consist entirely of bytes whose high order bit is set.
-The actual
-encoding is represented by the following table:
-.Bd -literal
-[0x0000 - 0x007f] [00000000.0bbbbbbb] -> 0bbbbbbb
-[0x0080 - 0x07ff] [00000bbb.bbbbbbbb] -> 110bbbbb, 10bbbbbb
-[0x0800 - 0xffff] [bbbbbbbb.bbbbbbbb] -> 1110bbbb, 10bbbbbb, 10bbbbbb
-.Ed
-.Pp
-If more than a single representation of a value exists (for example,
-0x00; 0xC0 0x80; 0xE0 0x80 0x80) the shortest representation is always
-used (but the longer ones will be correctly decoded).
-.Pp
-The final three encodings provided by X-Open:
-.Bd -literal
-[00000000.000bbbbb.bbbbbbbb.bbbbbbbb] ->
- 11110bbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
-
-[000000bb.bbbbbbbb.bbbbbbbb.bbbbbbbb] ->
- 111110bb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
-
-[0bbbbbbb.bbbbbbbb.bbbbbbbb.bbbbbbbb] ->
- 1111110b, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
-.Ed
-.Pp
-which provides for the entire proposed ISO-10646 31 bit standard are currently
-not implemented.
-.Sh SEE ALSO
-.Xr mklocale 1 ,
-.Xr setlocale 3 ,
-.Xr utf8 5
diff --git a/lib/libc/locale/utf2.c b/lib/libc/locale/utf2.c
deleted file mode 100644
index cf0e3c8..0000000
--- a/lib/libc/locale/utf2.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*-
- * Copyright (c) 2002-2004 Tim J. Robbins
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * UTF2 encoding.
- *
- * This is an obsolete subset of UTF-8, maintained for temporary
- * compatibility with old applications. It is limited to 1-, 2- or
- * 3-byte encodings, and allows redundantly-encoded characters.
- *
- * See utf2(5) for details.
- */
-
-/* UTF2 is obsolete and will be removed in FreeBSD 6 -- use UTF-8 instead. */
-#define OBSOLETE_IN_6
-
-#include <sys/param.h>
-__FBSDID("$FreeBSD$");
-
-#include <errno.h>
-#include <runetype.h>
-#include <stdlib.h>
-#include <string.h>
-#include <wchar.h>
-#include "mblocal.h"
-
-size_t _UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
- mbstate_t * __restrict);
-int _UTF2_mbsinit(const mbstate_t *);
-size_t _UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
-
-typedef struct {
- int count;
- u_char bytes[3];
-} _UTF2State;
-
-int
-_UTF2_init(_RuneLocale *rl)
-{
-
- __mbrtowc = _UTF2_mbrtowc;
- __wcrtomb = _UTF2_wcrtomb;
- __mbsinit = _UTF2_mbsinit;
- _CurrentRuneLocale = rl;
- __mb_cur_max = 3;
-
- return (0);
-}
-
-int
-_UTF2_mbsinit(const mbstate_t *ps)
-{
-
- return (ps == NULL || ((const _UTF2State *)ps)->count == 0);
-}
-
-size_t
-_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
- mbstate_t * __restrict ps)
-{
- _UTF2State *us;
- int ch, i, len, mask, ocount;
- wchar_t wch;
- size_t ncopy;
-
- us = (_UTF2State *)ps;
-
- if (us->count < 0 || us->count > sizeof(us->bytes)) {
- errno = EINVAL;
- return ((size_t)-1);
- }
-
- if (s == NULL) {
- s = "";
- n = 1;
- pwc = NULL;
- }
-
- ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(us->bytes) - us->count);
- memcpy(us->bytes + us->count, s, ncopy);
- ocount = us->count;
- us->count += ncopy;
- s = (char *)us->bytes;
- n = us->count;
-
- if (n == 0)
- return ((size_t)-2);
-
- ch = (unsigned char)*s;
- if ((ch & 0x80) == 0) {
- mask = 0x7f;
- len = 1;
- } else if ((ch & 0xe0) == 0xc0) {
- mask = 0x1f;
- len = 2;
- } else if ((ch & 0xf0) == 0xe0) {
- mask = 0x0f;
- len = 3;
- } else {
- errno = EILSEQ;
- return ((size_t)-1);
- }
-
- if (n < (size_t)len)
- return ((size_t)-2);
-
- wch = (unsigned char)*s++ & mask;
- i = len;
- while (--i != 0) {
- if ((*s & 0xc0) != 0x80) {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- wch <<= 6;
- wch |= *s++ & 0x3f;
- }
- if (pwc != NULL)
- *pwc = wch;
- us->count = 0;
- return (wch == L'\0' ? 0 : len - ocount);
-}
-
-size_t
-_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
-{
- _UTF2State *us;
- unsigned char lead;
- int i, len;
-
- us = (_UTF2State *)ps;
-
- if (us->count != 0) {
- errno = EINVAL;
- return ((size_t)-1);
- }
-
- if (s == NULL)
- /* Reset to initial conversion state. */
- return (1);
-
- if ((wc & ~0x7f) == 0) {
- lead = 0;
- len = 1;
- } else if ((wc & ~0x7ff) == 0) {
- lead = 0xc0;
- len = 2;
- } else if ((wc & ~0xffff) == 0) {
- lead = 0xe0;
- len = 3;
- } else {
- errno = EILSEQ;
- return ((size_t)-1);
- }
-
- for (i = len - 1; i > 0; i--) {
- s[i] = (wc & 0x3f) | 0x80;
- wc >>= 6;
- }
- *s = (wc & 0xff) | lead;
-
- return (len);
-}
diff --git a/lib/libc/locale/utf8.5 b/lib/libc/locale/utf8.5
index f142b63..01a8a51 100644
--- a/lib/libc/locale/utf8.5
+++ b/lib/libc/locale/utf8.5
@@ -78,28 +78,8 @@ If more than a single representation of a value exists (for example,
used.
Longer ones are detected as an error as they pose a potential
security risk, and destroy the 1:1 character:octet sequence mapping.
-.Sh COMPATIBILITY
-The
-.Nm
-encoding supersedes the
-.Xr utf2 5
-encoding.
-The only differences between the two are that
-.Nm
-handles the full 31-bit character set of
-.Tn ISO
-10646
-whereas
-.Xr utf2 5
-is limited to a 16-bit character set,
-and that
-.Xr utf2 5
-accepts redundant,
-.No non- Ns Dq "shortest form"
-representations of characters.
.Sh SEE ALSO
-.Xr euc 5 ,
-.Xr utf2 5
+.Xr euc 5
.Rs
.%A "Rob Pike"
.%A "Ken Thompson"
OpenPOWER on IntegriCloud