Remove support for the obsolete UTF2 encoding.

author: tjr <tjr@FreeBSD.org> 2004-10-17 02:29:15 +0000
committer: tjr <tjr@FreeBSD.org> 2004-10-17 02:29:15 +0000
commit: e40fa30ce61655590cbd4e7c025b2731600ac9b6 (patch)
tree: 6d6fb7d4a9ac47dbf0567825652b2c97a5f4156e /lib
parent: 173f5d041b75ccb5f441455614c36ae702ae68d8 (diff)
download: FreeBSD-src-e40fa30ce61655590cbd4e7c025b2731600ac9b6.zip
FreeBSD-src-e40fa30ce61655590cbd4e7c025b2731600ac9b6.tar.gz
9 files changed, 3 insertions, 317 deletions
diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc
index cf02a3f..260d196 100644
--- a/lib/libc/locale/Makefile.inc
+++ b/lib/libc/locale/Makefile.inc
@@ -13,7 +13,7 @@ SRCS+=	big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \
 	mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rune.c \
 	runetype.c setinvalidrune.c setlocale.c setrunelocale.c srune.c \
 	table.c \
-	tolower.c toupper.c utf2.c utf8.c wcrtomb.c wcsnrtombs.c \
+	tolower.c toupper.c utf8.c wcrtomb.c wcsnrtombs.c \
 	wcsrtombs.c wcsftime.c \
 	wcstof.c wcstod.c \
 	wcstoimax.c wcstol.c wcstold.c wcstoll.c \
@@ -36,7 +36,7 @@ MAN+=	btowc.3 \
 	wcrtomb.3 \
 	wcsrtombs.3 wcstod.3 wcstol.3 wcstombs.3 wctomb.3 \
 	wctrans.3 wctype.3 wcwidth.3
-MAN+=	big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf2.5 utf8.5
+MAN+=	big5.5 euc.5 gb18030.5 gb2312.5 gbk.5 mskanji.5 utf8.5
 
 MLINKS+=btowc.3 wctob.3
 MLINKS+=isdigit.3 isnumber.3
diff --git a/lib/libc/locale/mbrune.3 b/lib/libc/locale/mbrune.3
index 6474ece..02cab23 100644
--- a/lib/libc/locale/mbrune.3
+++ b/lib/libc/locale/mbrune.3
@@ -159,7 +159,6 @@ does not appear in the string.
 .Xr rune 3 ,
 .Xr setlocale 3 ,
 .Xr euc 5 ,
-.Xr utf2 5 ,
 .Xr utf8 5
 .Sh HISTORY
 The
diff --git a/lib/libc/locale/multibyte.3 b/lib/libc/locale/multibyte.3
index 6047113..02b6342 100644
--- a/lib/libc/locale/multibyte.3
+++ b/lib/libc/locale/multibyte.3
@@ -140,7 +140,6 @@ multibyte characters:
 .Xr gb2312 5 ,
 .Xr gbk 5 ,
 .Xr mskanji 5 ,
-.Xr utf2 5 ,
 .Xr utf8 5
 .Sh STANDARDS
 These functions conform to
diff --git a/lib/libc/locale/rune.3 b/lib/libc/locale/rune.3
index b572da4..a2f306e 100644
--- a/lib/libc/locale/rune.3
+++ b/lib/libc/locale/rune.3
@@ -268,7 +268,6 @@ binary LC_CTYPE file for the locale
 .Xr mbrune 3 ,
 .Xr setlocale 3 ,
 .Xr euc 5 ,
-.Xr utf2 5 ,
 .Xr utf8 5
 .Sh HISTORY
 These functions first appeared in
diff --git a/lib/libc/locale/setlocale.3 b/lib/libc/locale/setlocale.3
index 278e18d..ce0c66d 100644
--- a/lib/libc/locale/setlocale.3
+++ b/lib/libc/locale/setlocale.3
@@ -171,7 +171,6 @@ and the category
 .Xr strcoll 3 ,
 .Xr strxfrm 3 ,
 .Xr euc 5 ,
-.Xr utf2 5 ,
 .Xr utf8 5 ,
 .Xr environ 7
 .Sh STANDARDS
diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c
index 15e069f..c2cc7f0 100644
--- a/lib/libc/locale/setrunelocale.c
+++ b/lib/libc/locale/setrunelocale.c
@@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
 #include "setlocale.h"
 
 extern int		_none_init(_RuneLocale *);
-extern int		_UTF2_init(_RuneLocale *);
 extern int		_UTF8_init(_RuneLocale *);
 extern int		_EUC_init(_RuneLocale *);
 extern int		_GB18030_init(_RuneLocale *);
@@ -164,8 +163,6 @@ __setrunelocale(const char *encoding)
 	rl->__sgetrune = __emulated_sgetrune;
 	if (strcmp(rl->__encoding, "NONE") == 0)
 		ret = _none_init(rl);
-	else if (strcmp(rl->__encoding, "UTF2") == 0)
-		ret = _UTF2_init(rl);
 	else if (strcmp(rl->__encoding, "UTF-8") == 0)
 		ret = _UTF8_init(rl);
 	else if (strcmp(rl->__encoding, "EUC") == 0)
diff --git a/lib/libc/locale/utf2.5 b/lib/libc/locale/utf2.5
deleted file mode 100644
index 833f626..0000000
--- a/lib/libc/locale/utf2.5
+++ /dev/null
@@ -1,103 +0,0 @@
-.\" Copyright (c) 1993
-.\"	The Regents of the University of California.  All rights reserved.
-.\"
-.\" This code is derived from software contributed to Berkeley by
-.\" Paul Borman at Krystal Technologies.
-.\"
-.\" Redistribution and use in source and binary forms, with or without
-.\" modification, are permitted provided that the following conditions
-.\" are met:
-.\" 1. Redistributions of source code must retain the above copyright
-.\"    notice, this list of conditions and the following disclaimer.
-.\" 2. Redistributions in binary form must reproduce the above copyright
-.\"    notice, this list of conditions and the following disclaimer in the
-.\"    documentation and/or other materials provided with the distribution.
-.\" 3. All advertising materials mentioning features or use of this software
-.\"    must display the following acknowledgement:
-.\"	This product includes software developed by the University of
-.\"	California, Berkeley and its contributors.
-.\" 4. Neither the name of the University nor the names of its contributors
-.\"    may be used to endorse or promote products derived from this software
-.\"    without specific prior written permission.
-.\"
-.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
-.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
-.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-.\" SUCH DAMAGE.
-.\"
-.\"	@(#)utf2.4	8.1 (Berkeley) 6/4/93
-.\" $FreeBSD$
-.\"
-.Dd August 21, 2004
-.Dt UTF2 5
-.Os
-.Sh NAME
-.Nm utf2
-.Nd "Universal character set Transformation Format encoding of wide characters"
-.Sh SYNOPSIS
-.Nm ENCODING
-.Qq UTF2
-.Sh DESCRIPTION
-.Bf Em
-The UTF2 encoding is obsolete and may not be supported in future releases.
-.Ef
-UTF-8 should be used instead.
-.Pp
-The
-.Nm UTF2
-encoding is based on a proposed X-Open multibyte
-.Tn FSS-UCS-TF
-(File System Safe Universal Character Set Transformation Format)
-encoding as used in
-.Tn "Plan 9"
-from Bell Labs.
-Although it is capable of representing more than 16 bits,
-the current implementation is limited to 16 bits as defined by the
-Unicode Standard.
-.Pp
-.Nm UTF2
-representation is backwards compatible with
-.Tn ASCII ,
-so 0x00-0x7f refer to the
-.Tn ASCII
-character set.
-The multibyte encodings of wide characters between
-0x0080 and 0xffff
-consist entirely of bytes whose high order bit is set.
-The actual
-encoding is represented by the following table:
-.Bd -literal
-[0x0000 - 0x007f] [00000000.0bbbbbbb] -> 0bbbbbbb
-[0x0080 - 0x07ff] [00000bbb.bbbbbbbb] -> 110bbbbb, 10bbbbbb
-[0x0800 - 0xffff] [bbbbbbbb.bbbbbbbb] -> 1110bbbb, 10bbbbbb, 10bbbbbb
-.Ed
-.Pp
-If more than a single representation of a value exists (for example,
-0x00; 0xC0 0x80; 0xE0 0x80 0x80) the shortest representation is always
-used (but the longer ones will be correctly decoded).
-.Pp
-The final three encodings provided by X-Open:
-.Bd -literal
-[00000000.000bbbbb.bbbbbbbb.bbbbbbbb] ->
-	11110bbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
-
-[000000bb.bbbbbbbb.bbbbbbbb.bbbbbbbb] ->
-	111110bb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
-
-[0bbbbbbb.bbbbbbbb.bbbbbbbb.bbbbbbbb] ->
-	1111110b, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb, 10bbbbbb
-.Ed
-.Pp
-which provides for the entire proposed ISO-10646 31 bit standard are currently
-not implemented.
-.Sh SEE ALSO
-.Xr mklocale 1 ,
-.Xr setlocale 3 ,
-.Xr utf8 5
diff --git a/lib/libc/locale/utf2.c b/lib/libc/locale/utf2.c
deleted file mode 100644
index cf0e3c8..0000000
--- a/lib/libc/locale/utf2.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*-
- * Copyright (c) 2002-2004 Tim J. Robbins
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * UTF2 encoding.
- *
- * This is an obsolete subset of UTF-8, maintained for temporary
- * compatibility with old applications. It is limited to 1-, 2- or
- * 3-byte encodings, and allows redundantly-encoded characters.
- *
- * See utf2(5) for details.
- */
-
-/* UTF2 is obsolete and will be removed in FreeBSD 6 -- use UTF-8 instead. */
-#define	OBSOLETE_IN_6
-
-#include <sys/param.h>
-__FBSDID("$FreeBSD$");
-
-#include <errno.h>
-#include <runetype.h>
-#include <stdlib.h>
-#include <string.h>
-#include <wchar.h>
-#include "mblocal.h"
-
-size_t	_UTF2_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
-	    mbstate_t * __restrict);
-int	_UTF2_mbsinit(const mbstate_t *);
-size_t	_UTF2_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
-
-typedef struct {
-	int	count;
-	u_char	bytes[3];
-} _UTF2State;
-
-int
-_UTF2_init(_RuneLocale *rl)
-{
-
-	__mbrtowc = _UTF2_mbrtowc;
-	__wcrtomb = _UTF2_wcrtomb;
-	__mbsinit = _UTF2_mbsinit;
-	_CurrentRuneLocale = rl;
-	__mb_cur_max = 3;
-
-	return (0);
-}
-
-int
-_UTF2_mbsinit(const mbstate_t *ps)
-{
-
-	return (ps == NULL || ((const _UTF2State *)ps)->count == 0);
-}
-
-size_t
-_UTF2_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
-    mbstate_t * __restrict ps)
-{
-	_UTF2State *us;
-	int ch, i, len, mask, ocount;
-	wchar_t wch;
-	size_t ncopy;
-
-	us = (_UTF2State *)ps;
-
-	if (us->count < 0 || us->count > sizeof(us->bytes)) {
-		errno = EINVAL;
-		return ((size_t)-1);
-	}
-
-	if (s == NULL) {
-		s = "";
-		n = 1;
-		pwc = NULL;
-	}
-
-	ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(us->bytes) - us->count);
-	memcpy(us->bytes + us->count, s, ncopy);
-	ocount = us->count;
-	us->count += ncopy;
-	s = (char *)us->bytes;
-	n = us->count;
-
-	if (n == 0)
-		return ((size_t)-2);
-
-	ch = (unsigned char)*s;
-	if ((ch & 0x80) == 0) {
-		mask = 0x7f;
-		len = 1;
-	} else if ((ch & 0xe0) == 0xc0) {
-		mask = 0x1f;
-		len = 2;
-	} else if ((ch & 0xf0) == 0xe0) {
-		mask = 0x0f;
-		len = 3;
-	} else {
-		errno = EILSEQ;
-		return ((size_t)-1);
-	}
-
-	if (n < (size_t)len)
-		return ((size_t)-2);
-
-	wch = (unsigned char)*s++ & mask;
-	i = len;
-	while (--i != 0) {
-		if ((*s & 0xc0) != 0x80) {
-			errno = EILSEQ;
-			return ((size_t)-1);
-		}
-		wch <<= 6;
-		wch |= *s++ & 0x3f;
-	}
-	if (pwc != NULL)
-		*pwc = wch;
-	us->count = 0;
-	return (wch == L'\0' ? 0 : len - ocount);
-}
-
-size_t
-_UTF2_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
-{
-	_UTF2State *us;
-	unsigned char lead;
-	int i, len;
-
-	us = (_UTF2State *)ps;
-
-	if (us->count != 0) {
-		errno = EINVAL;
-		return ((size_t)-1);
-	}
-
-	if (s == NULL)
-		/* Reset to initial conversion state. */
-		return (1);
-
-	if ((wc & ~0x7f) == 0) {
-		lead = 0;
-		len = 1;
-	} else if ((wc & ~0x7ff) == 0) {
-		lead = 0xc0;
-		len = 2;
-	} else if ((wc & ~0xffff) == 0) {
-		lead = 0xe0;
-		len = 3;
-	} else {
-		errno = EILSEQ;
-		return ((size_t)-1);
-	}
-
-	for (i = len - 1; i > 0; i--) {
-		s[i] = (wc & 0x3f) | 0x80;
-		wc >>= 6;
-	}
-	*s = (wc & 0xff) | lead;
-
-	return (len);
-}
diff --git a/lib/libc/locale/utf8.5 b/lib/libc/locale/utf8.5
index f142b63..01a8a51 100644
--- a/lib/libc/locale/utf8.5
+++ b/lib/libc/locale/utf8.5
@@ -78,28 +78,8 @@ If more than a single representation of a value exists (for example,
 used.
 Longer ones are detected as an error as they pose a potential
 security risk, and destroy the 1:1 character:octet sequence mapping.
-.Sh COMPATIBILITY
-The
-.Nm
-encoding supersedes the
-.Xr utf2 5
-encoding.
-The only differences between the two are that
-.Nm
-handles the full 31-bit character set of
-.Tn ISO
-10646
-whereas
-.Xr utf2 5
-is limited to a 16-bit character set,
-and that
-.Xr utf2 5
-accepts redundant,
-.No non- Ns Dq "shortest form"
-representations of characters.
 .Sh SEE ALSO
-.Xr euc 5 ,
-.Xr utf2 5
+.Xr euc 5
 .Rs
 .%A "Rob Pike"
 .%A "Ken Thompson"
author	tjr <tjr@FreeBSD.org>	2004-10-17 02:29:15 +0000
committer	tjr <tjr@FreeBSD.org>	2004-10-17 02:29:15 +0000
commit	e40fa30ce61655590cbd4e7c025b2731600ac9b6 (patch)
tree	6d6fb7d4a9ac47dbf0567825652b2c97a5f4156e /lib
parent	173f5d041b75ccb5f441455614c36ae702ae68d8 (diff)
download	FreeBSD-src-e40fa30ce61655590cbd4e7c025b2731600ac9b6.zip FreeBSD-src-e40fa30ce61655590cbd4e7c025b2731600ac9b6.tar.gz