summaryrefslogtreecommitdiffstats
path: root/lib/libc/locale/multibyte.3
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/locale/multibyte.3')
-rw-r--r--lib/libc/locale/multibyte.3241
1 files changed, 241 insertions, 0 deletions
diff --git a/lib/libc/locale/multibyte.3 b/lib/libc/locale/multibyte.3
new file mode 100644
index 0000000..3ea10e5
--- /dev/null
+++ b/lib/libc/locale/multibyte.3
@@ -0,0 +1,241 @@
+.\" Copyright (c) 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Donn Seeley of BSDI.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)multibyte.3 8.1 (Berkeley) 6/4/93
+.\"
+.Dd "June 4, 1993"
+.Dt MULTIBYTE 3
+.Os
+.Sh NAME
+.Nm mblen ,
+.Nm mbstowcs ,
+.Nm mbtowc ,
+.Nm wcstombs ,
+.Nm wctomb
+.Nd multibyte character support for C
+.Sh SYNOPSIS
+.Fd #include <stdlib.h>
+.Ft int
+.Fn mblen "const char *mbchar" "int nbytes"
+.Ft size_t
+.Fn mbstowcs "wchar_t *wcstring" "const char *mbstring" "size_t nwchars"
+.Ft int
+.Fn mbtowc "wchar_t *wcharp" "const char *mbchar" "size_t nbytes"
+.Ft size_t
+.Fn wcstombs "char *mbstring" "const wchar_t *wcstring" "size_t nbytes"
+.Ft int
+.Fn wctomb "char *mbchar" "wchar_t wchar"
+.Sh DESCRIPTION
+The basic elements of some written natural languages such as Chinese
+cannot be represented uniquely with single C
+.Va char Ns s .
+The C standard supports two different ways of dealing with
+extended natural language encodings,
+.Em wide
+characters and
+.Em multibyte
+characters.
+Wide characters are an internal representation
+which allows each basic element to map
+to a single object of type
+.Va wchar_t .
+Multibyte characters are used for input and output
+and code each basic element as a sequence of C
+.Va char Ns s .
+Individual basic elements may map into one or more
+.Pq up to Dv MB_CHAR_MAX
+bytes in a multibyte character.
+.Pp
+The current locale
+.Pq Xr setlocale 3
+governs the interpretation of wide and multibyte characters.
+The locale category
+.Dv LC_CTYPE
+specifically controls this interpretation.
+The
+.Va wchar_t
+type is wide enough to hold the largest value
+in the wide character representations for all locales.
+.Pp
+Multibyte strings may contain
+.Sq shift
+indicators to switch to and from
+particular modes within the given representation.
+If explicit bytes are used to signal shifting,
+these are not recognized as separate characters
+but are lumped with a neighboring character.
+There is always a distinguished
+.Sq initial
+shift state.
+The
+.Fn mbstowcs
+and
+.Fn wcstombs
+functions assume that multibyte strings are interpreted
+starting from the initial shift state.
+The
+.Fn mblen ,
+.Fn mbtowc
+and
+.Fn wctomb
+functions maintain static shift state internally.
+A call with a null
+.Fa mbchar
+pointer returns nonzero if the current locale requires shift states,
+zero otherwise;
+if shift states are required, the shift state is reset to the initial state.
+The internal shift states are undefined after a call to
+.Fn setlocale
+with the
+.Dv LC_CTYPE
+or
+.Dv LC_ALL
+categories.
+.Pp
+For convenience in processing,
+the wide character with value 0
+.Pq the null wide character
+is recognized as the wide character string terminator,
+and the character with value 0
+.Pq the null byte
+is recognized as the multibyte character string terminator.
+Null bytes are not permitted within multibyte characters.
+.Pp
+The
+.Fn mblen
+function computes the length in bytes
+of a multibyte character
+.Fa mbchar .
+Up to
+.Fa nbytes
+bytes are examined.
+.Pp
+The
+.Fn mbtowc
+function converts a multibyte character
+.Fa mbchar
+into a wide character and stores the result
+in the object pointed to by
+.Fa wcharp.
+Up to
+.Fa nbytes
+bytes are examined.
+.Pp
+The
+.Fn wctomb
+function converts a wide character
+.Fa wchar
+into a multibyte character and stores
+the result in
+.Fa mbchar .
+The object pointed to by
+.Fa mbchar
+must be large enough to accommodate the multibyte character.
+.Pp
+The
+.Fn mbstowcs
+function converts a multibyte character string
+.Fa mbstring
+into a wide character string
+.Fa wcstring .
+No more than
+.Fa nwchars
+wide characters are stored.
+A terminating null wide character is appended if there is room.
+.Pp
+The
+.Fn wcstombs
+function converts a wide character string
+.Fa wcstring
+into a multibyte character string
+.Fa mbstring .
+Up to
+.Fa nbytes
+bytes are stored in
+.Fa mbstring .
+Partial multibyte characters at the end of the string are not stored.
+The multibyte character string is null terminated if there is room.
+.Sh "RETURN VALUES
+If multibyte characters are not supported in the current locale,
+all of these functions will return \-1 if characters can be processed,
+otherwise 0.
+.Pp
+If
+.Fa mbchar
+is
+.Dv NULL ,
+the
+.Fn mblen ,
+.Fn mbtowc
+and
+.Fn wctomb
+functions return nonzero if shift states are supported,
+zero otherwise.
+If
+.Fa mbchar
+is valid,
+then these functions return
+the number of bytes processed in
+.Fa mbchar ,
+or \-1 if no multibyte character
+could be recognized or converted.
+.Pp
+The
+.Fn mbstowcs
+function returns the number of wide characters converted,
+not counting any terminating null wide character.
+The
+.Fn wcstombs
+function returns the number of bytes converted,
+not counting any terminating null byte.
+If any invalid multibyte characters are encountered,
+both functions return \-1.
+.Sh "SEE ALSO
+.Xr euc 4 ,
+.Xr mbrune 3 ,
+.Xr rune 3 ,
+.Xr setlocale 3 ,
+.Xr utf2 4
+.Sh STANDARDS
+The
+.Fn mblen ,
+.Fn mbstowcs ,
+.Fn mbtowc ,
+.Fn wcstombs
+and
+.Fn wctomb
+functions conform to
+.St -ansiC .
+.Sh BUGS
+The current implementation does not support shift states.
OpenPOWER on IntegriCloud