diff options
Diffstat (limited to 'lib/libc/locale/multibyte.3')
-rw-r--r-- | lib/libc/locale/multibyte.3 | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/lib/libc/locale/multibyte.3 b/lib/libc/locale/multibyte.3 new file mode 100644 index 0000000..3ea10e5 --- /dev/null +++ b/lib/libc/locale/multibyte.3 @@ -0,0 +1,241 @@ +.\" Copyright (c) 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Donn Seeley of BSDI. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)multibyte.3 8.1 (Berkeley) 6/4/93 +.\" +.Dd "June 4, 1993" +.Dt MULTIBYTE 3 +.Os +.Sh NAME +.Nm mblen , +.Nm mbstowcs , +.Nm mbtowc , +.Nm wcstombs , +.Nm wctomb +.Nd multibyte character support for C +.Sh SYNOPSIS +.Fd #include <stdlib.h> +.Ft int +.Fn mblen "const char *mbchar" "int nbytes" +.Ft size_t +.Fn mbstowcs "wchar_t *wcstring" "const char *mbstring" "size_t nwchars" +.Ft int +.Fn mbtowc "wchar_t *wcharp" "const char *mbchar" "size_t nbytes" +.Ft size_t +.Fn wcstombs "char *mbstring" "const wchar_t *wcstring" "size_t nbytes" +.Ft int +.Fn wctomb "char *mbchar" "wchar_t wchar" +.Sh DESCRIPTION +The basic elements of some written natural languages such as Chinese +cannot be represented uniquely with single C +.Va char Ns s . +The C standard supports two different ways of dealing with +extended natural language encodings, +.Em wide +characters and +.Em multibyte +characters. +Wide characters are an internal representation +which allows each basic element to map +to a single object of type +.Va wchar_t . +Multibyte characters are used for input and output +and code each basic element as a sequence of C +.Va char Ns s . +Individual basic elements may map into one or more +.Pq up to Dv MB_CHAR_MAX +bytes in a multibyte character. +.Pp +The current locale +.Pq Xr setlocale 3 +governs the interpretation of wide and multibyte characters. +The locale category +.Dv LC_CTYPE +specifically controls this interpretation. +The +.Va wchar_t +type is wide enough to hold the largest value +in the wide character representations for all locales. +.Pp +Multibyte strings may contain +.Sq shift +indicators to switch to and from +particular modes within the given representation. +If explicit bytes are used to signal shifting, +these are not recognized as separate characters +but are lumped with a neighboring character. +There is always a distinguished +.Sq initial +shift state. +The +.Fn mbstowcs +and +.Fn wcstombs +functions assume that multibyte strings are interpreted +starting from the initial shift state. +The +.Fn mblen , +.Fn mbtowc +and +.Fn wctomb +functions maintain static shift state internally. +A call with a null +.Fa mbchar +pointer returns nonzero if the current locale requires shift states, +zero otherwise; +if shift states are required, the shift state is reset to the initial state. +The internal shift states are undefined after a call to +.Fn setlocale +with the +.Dv LC_CTYPE +or +.Dv LC_ALL +categories. +.Pp +For convenience in processing, +the wide character with value 0 +.Pq the null wide character +is recognized as the wide character string terminator, +and the character with value 0 +.Pq the null byte +is recognized as the multibyte character string terminator. +Null bytes are not permitted within multibyte characters. +.Pp +The +.Fn mblen +function computes the length in bytes +of a multibyte character +.Fa mbchar . +Up to +.Fa nbytes +bytes are examined. +.Pp +The +.Fn mbtowc +function converts a multibyte character +.Fa mbchar +into a wide character and stores the result +in the object pointed to by +.Fa wcharp. +Up to +.Fa nbytes +bytes are examined. +.Pp +The +.Fn wctomb +function converts a wide character +.Fa wchar +into a multibyte character and stores +the result in +.Fa mbchar . +The object pointed to by +.Fa mbchar +must be large enough to accommodate the multibyte character. +.Pp +The +.Fn mbstowcs +function converts a multibyte character string +.Fa mbstring +into a wide character string +.Fa wcstring . +No more than +.Fa nwchars +wide characters are stored. +A terminating null wide character is appended if there is room. +.Pp +The +.Fn wcstombs +function converts a wide character string +.Fa wcstring +into a multibyte character string +.Fa mbstring . +Up to +.Fa nbytes +bytes are stored in +.Fa mbstring . +Partial multibyte characters at the end of the string are not stored. +The multibyte character string is null terminated if there is room. +.Sh "RETURN VALUES +If multibyte characters are not supported in the current locale, +all of these functions will return \-1 if characters can be processed, +otherwise 0. +.Pp +If +.Fa mbchar +is +.Dv NULL , +the +.Fn mblen , +.Fn mbtowc +and +.Fn wctomb +functions return nonzero if shift states are supported, +zero otherwise. +If +.Fa mbchar +is valid, +then these functions return +the number of bytes processed in +.Fa mbchar , +or \-1 if no multibyte character +could be recognized or converted. +.Pp +The +.Fn mbstowcs +function returns the number of wide characters converted, +not counting any terminating null wide character. +The +.Fn wcstombs +function returns the number of bytes converted, +not counting any terminating null byte. +If any invalid multibyte characters are encountered, +both functions return \-1. +.Sh "SEE ALSO +.Xr euc 4 , +.Xr mbrune 3 , +.Xr rune 3 , +.Xr setlocale 3 , +.Xr utf2 4 +.Sh STANDARDS +The +.Fn mblen , +.Fn mbstowcs , +.Fn mbtowc , +.Fn wcstombs +and +.Fn wctomb +functions conform to +.St -ansiC . +.Sh BUGS +The current implementation does not support shift states. |