From a87152b56061e8a50c4a1aa30087ab5414747caa Mon Sep 17 00:00:00 2001 From: tjr Date: Sun, 18 Aug 2002 06:30:10 +0000 Subject: Implement the ISO C90 Amd.1 restartable wide and multibyte character manipulation functions mbrlen(), mbrtowc(), mbsinit(), mbsrtowcs(), wcrtomb(), wcsrtombs(). --- lib/libc/locale/Makefile.inc | 14 +++-- lib/libc/locale/mbrlen.3 | 113 ++++++++++++++++++++++++++++++++++ lib/libc/locale/mbrlen.c | 37 +++++++++++ lib/libc/locale/mbrtowc.3 | 142 +++++++++++++++++++++++++++++++++++++++++++ lib/libc/locale/mbrtowc.c | 81 ++++++++++++++++++++++++ lib/libc/locale/mbsinit.3 | 63 +++++++++++++++++++ lib/libc/locale/mbsinit.c | 42 +++++++++++++ lib/libc/locale/mbsrtowcs.3 | 108 ++++++++++++++++++++++++++++++++ lib/libc/locale/mbsrtowcs.c | 74 ++++++++++++++++++++++ lib/libc/locale/wcrtomb.3 | 106 ++++++++++++++++++++++++++++++++ lib/libc/locale/wcrtomb.c | 46 ++++++++++++++ lib/libc/locale/wcsrtombs.3 | 108 ++++++++++++++++++++++++++++++++ lib/libc/locale/wcsrtombs.c | 90 +++++++++++++++++++++++++++ 13 files changed, 1020 insertions(+), 4 deletions(-) create mode 100644 lib/libc/locale/mbrlen.3 create mode 100644 lib/libc/locale/mbrlen.c create mode 100644 lib/libc/locale/mbrtowc.3 create mode 100644 lib/libc/locale/mbrtowc.c create mode 100644 lib/libc/locale/mbsinit.3 create mode 100644 lib/libc/locale/mbsinit.c create mode 100644 lib/libc/locale/mbsrtowcs.3 create mode 100644 lib/libc/locale/mbsrtowcs.c create mode 100644 lib/libc/locale/wcrtomb.3 create mode 100644 lib/libc/locale/wcrtomb.c create mode 100644 lib/libc/locale/wcsrtombs.3 create mode 100644 lib/libc/locale/wcsrtombs.c (limited to 'lib/libc') diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index dfa5431..96e24ab 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -6,18 +6,24 @@ SRCS+= ansi.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \ isctype.c iswctype.c \ - ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mbrune.c \ + ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mbrlen.c \ + mbrtowc.c mbrune.c mbsinit.c mbsrtowcs.c \ mskanji.c nl_langinfo.c nomacros.c none.c rune.c \ runetype.c setinvalidrune.c setlocale.c setrunelocale.c table.c \ - tolower.c toupper.c utf2.c wctob.c wctrans.c wctype.c wcwidth.c + tolower.c toupper.c utf2.c wcrtomb.c wcsrtombs.c wctob.c wctrans.c \ + wctype.c wcwidth.c .if ${LIB} == "c" MAN+= btowc.3 \ ctype.3 digittoint.3 isalnum.3 isalpha.3 isascii.3 isblank.3 iscntrl.3 \ isdigit.3 isgraph.3 islower.3 isprint.3 ispunct.3 isspace.3 \ - isupper.3 iswalnum.3 isxdigit.3 mbrune.3 multibyte.3 nl_langinfo.3 \ + isupper.3 iswalnum.3 isxdigit.3 mbrlen.3 mbrtowc.3 mbrune.3 mbsinit.3 \ + mbsrtowcs.3 multibyte.3 \ + nl_langinfo.3 \ rune.3 \ - setlocale.3 toascii.3 tolower.3 toupper.3 towlower.3 wctrans.3 wctype.3 + setlocale.3 toascii.3 tolower.3 toupper.3 towlower.3 wcsrtombs.3 \ + wcrtomb.3 \ + wctrans.3 wctype.3 MAN+= euc.4 utf2.4 MLINKS+=btowc.3 wctob.3 diff --git a/lib/libc/locale/mbrlen.3 b/lib/libc/locale/mbrlen.3 new file mode 100644 index 0000000..0fcc227 --- /dev/null +++ b/lib/libc/locale/mbrlen.3 @@ -0,0 +1,113 @@ +.\" Copyright (c) 2002 Tim J. Robbins +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.Dd August 16, 2002 +.Dt MBRLEN 3 +.Os +.Sh NAME +.Nm mbrlen +.Nd "get number of bytes in a character (restartable)" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn mbrlen "const char *restrict s" "size_t n" "mbstate_t *restrict ps" +.Sh DESCRIPTION +The +.Fn mbrlen +function determines the the number of bytes constituting the +multibyte character sequence pointer to by +.Fa s . +.Pp +It is equivalent to: +.Pp +.Dl "mbrtowc(NULL, s, n, ps);" +.Pp +Except that when +.Fa ps +is a NULL pointer, +.Fn mbrlen +uses its own static, internal +.Ft mbstate_t +object to keep track of shift state. +.Sh RETURN VALUES +The +.Fn mbrlen +functions returns: +.Bl -tag -width indent +.It 0 +The first +.Fa n +or fewer bytes of +.Fa s +represent the null wide character (L'\e0'). +.It >0 +The first +.Fa n +or fewer bytes of +.Fa s +represent a valid character, +.Fn mbrtowc +returns the length (in bytes) of the multibyte sequence. +.It Xo +.No ( Ns +.Ft size_t Ns +.No ) Ns \&-2 +.Xc +The first +.Fa n +bytes of +.Fa s +are an incomplete multibyte sequence. +.It Xo +.No ( Ns +.Ft size_t Ns +.No ) Ns \&-1 +.Xc +The byte sequence pointed to by +.Fa s +is an invalid multibyte sequence. +.El +.Sh ERRORS +The +.Fn mbrlen +function will fail if: +.Bl -tag -width Er +.\".It Bq Er EINVAL +.\"Invalid argument. +.It Bq Er EILSEQ +An invalid multibyte sequence was detected. +.El +.Sh SEE ALSO +.Xr mblen 3 , +.Xr mbrtowc 3 +.Sh STANDARDS +The +.Fn mbrlen +function conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation does not support shift states. diff --git a/lib/libc/locale/mbrlen.c b/lib/libc/locale/mbrlen.c new file mode 100644 index 0000000..139aff3 --- /dev/null +++ b/lib/libc/locale/mbrlen.c @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +size_t +mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps __unused) +{ + + return (mbrtowc(NULL, s, n, NULL)); +} diff --git a/lib/libc/locale/mbrtowc.3 b/lib/libc/locale/mbrtowc.3 new file mode 100644 index 0000000..bb0f1ac --- /dev/null +++ b/lib/libc/locale/mbrtowc.3 @@ -0,0 +1,142 @@ +.\" Copyright (c) 2002 Tim J. Robbins +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.Dd August 15, 2002 +.Dt MBRTOWC 3 +.Os +.Sh NAME +.Nm mbrtowc +.Nd "convert a character to a wide-character code (restartable)" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn mbrtowc "wchar_t *restrict pwc" "const char *restrict s" "size_t n" "mbstate_t *restrict ps" +.Sh DESCRIPTION +The +.Fn mbrtowc +function inspects at most +.Fa n +bytes pointed to by +.Fa s +and interprets them as a multibyte character sequence +according to the current setting of +.Ev LC_CTYPE . +If +.Fa pwc +is not +.Dv NULL , +the multibyte character which +.Fa s +represents is stored in the +.Ft wchar_t +it points to. +.Pp +If +.Fa s +is +.Dv NULL , +.Fn mbrtowc +behaves as if +.Fa pwc +was +.Dv NULL , +.Fa s +was an empty string ("") +and +.Fa n +was 1. +.Pp +The +.Ft mbstate_t +argument, +.Fa ps , +is used to keep track of the shift state. +If it is +.Dv NULL , +.Fn mbrtowc +uses an internal, static +.Ft mbstate_t +object. +.Sh RETURN VALUES +The +.Fn mbrtowc +functions returns: +.Bl -tag -width indent +.It 0 +The first +.Fa n +or fewer bytes of +.Fa s +represent the null wide character (L'\e0'). +.It >0 +The first +.Fa n +or fewer bytes of +.Fa s +represent a valid character, +.Fn mbrtowc +returns the length (in bytes) of the multibyte sequence. +.It Xo +.No ( Ns +.Ft size_t Ns +.No ) Ns \&-2 +.Xc +The first +.Fa n +bytes of +.Fa s +are an incomplete multibyte sequence. +.It Xo +.No ( Ns +.Ft size_t Ns +.No ) Ns \&-1 +.Xc +The byte sequence pointed to by +.Fa s +is an invalid multibyte sequence. +.El +.Sh ERRORS +The +.Fn mbrtowc +function will fail if: +.Bl -tag -width Er +.\".It Bq Er EINVAL +.\"Invalid argument. +.It Bq Er EILSEQ +An invalid multibyte sequence was detected. +.El +.Sh SEE ALSO +.Xr mbtowc 3 , +.Xr setlocale 3 , +.Xr wcrtomb 3 +.Sh STANDARDS +The +.Fn mbrtowc +function conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation does not support shift states. diff --git a/lib/libc/locale/mbrtowc.c b/lib/libc/locale/mbrtowc.c new file mode 100644 index 0000000..a1b960c --- /dev/null +++ b/lib/libc/locale/mbrtowc.c @@ -0,0 +1,81 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n, + mbstate_t *__restrict ps __unused) +{ + const char *e; + rune_t r; + + if (s == NULL) { + pwc = NULL; + s = ""; + n = 1; + } + + if (*s == '\0') { + if (pwc != NULL) + *pwc = L'\0'; + return (0); + } + + if ((r = sgetrune(s, n, &e)) == _INVALID_RUNE) { + /* + * The design of sgetrune() doesn't give us any way to tell + * between incomplete and invalid multibyte sequences. + */ + + if (n >= (size_t)MB_CUR_MAX) { + /* + * If we have been supplied with at least MB_CUR_MAX + * bytes and still cannot find a valid character, the + * data must be invalid. + */ + errno = EILSEQ; + return ((size_t)-1); + } + + /* + * .. otherwise, it's an incomplete character or an invalid + * character we cannot detect yet. + */ + return ((size_t)-2); + } + + if (pwc != NULL) + *pwc = (wchar_t)r; + + return ((size_t)(e - s)); +} diff --git a/lib/libc/locale/mbsinit.3 b/lib/libc/locale/mbsinit.3 new file mode 100644 index 0000000..295bb68 --- /dev/null +++ b/lib/libc/locale/mbsinit.3 @@ -0,0 +1,63 @@ +.\" Copyright (c) 2002 Tim J. Robbins +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.Dd August 16, 2002 +.Dt MBSINIT 3 +.Os +.Sh NAME +.Nm mbsinit +.Nd "determine conversion object status" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft int +.Fn mbsinit "const mbstate_t *ps" +.Sh DESCRIPTION +The +.Fn mbsinit +function determines whether the +.Ft mbstate_t +object pointed to by +.Fa ps +describes an initial conversion state. +.Sh RETURN VALUES +The +.Fn mbsinit +function returns non-zero if +.Fa ps +is +.Dv NULL +or describes an initial conversion state, +otherwise it returns zero. +.Sh STANDARDS +The +.Fn mbsinit +function conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation does not support shift states; +.Fn mbsinit +always returns non-zero. diff --git a/lib/libc/locale/mbsinit.c b/lib/libc/locale/mbsinit.c new file mode 100644 index 0000000..923db2f --- /dev/null +++ b/lib/libc/locale/mbsinit.c @@ -0,0 +1,42 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include + +int +mbsinit(const mbstate_t *ps __unused) +{ + + /* + * Stateful multibyte conversion is not supported; there are no + * states other than the initial state. + */ + + return (1); +} diff --git a/lib/libc/locale/mbsrtowcs.3 b/lib/libc/locale/mbsrtowcs.3 new file mode 100644 index 0000000..3c38a46 --- /dev/null +++ b/lib/libc/locale/mbsrtowcs.3 @@ -0,0 +1,108 @@ +.\" Copyright (c) 2002 Tim J. Robbins +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.Dd August 16, 2002 +.Dt MBSRTOWCS 3 +.Os +.Sh NAME +.Nm mbsrtowcs +.Nd "convert a character string to a wide-character string (restartable)" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn mbsrtowcs "wchar_t *restrict dst" "const char **restrict src" "size_t len" "mbstate_t *restrict ps" +.Sh DESCRIPTION +The +.Fn mbsrtowcs +function converts a sequence of multibyte characters pointed to indirectly by +.Fa src +into a sequence of corresponding wide characters and stores at most +.Fa len +of them in the +.Ft wchar_t +array pointed to by +.Fa dst , +until it encounters a terminating null character ('\e0'). +.Pp +If +.Fa dst +is +.Dv NULL , +no characters are stored. +.Pp +If +.Fa dst +is not +.Dv NULL , +the pointer pointed to by +.Fa src +is updated to point to the character after the one that conversion stopped at. +If conversion stops because a null character is encountered, +.No * Ns Fa src +is set to +.Dv NULL . +.Pp +The +.Ft mbstate_t +argument, +.Fa ps , +is used to keep track of the shift state. +If it is +.Dv NULL , +.Fn mbsrtowcs +uses an internal, static +.Ft mbstate_t +object. +.Sh RETURN VALUES +The +.Fn mbsrtowcs +function returns the number of wide characters stored in +the array pointed to by +.Fa dst +if successful, otherwise it returns +.No ( Ns +.Ft size_t Ns +.No ) Ns -1 . +.Sh ERRORS +The +.Fn mbsrtowcs +function will fail if: +.Bl -tag -width Er +.It Bq Er EILSEQ +An invalid multibyte character sequence was encountered. +.El +.Sh SEE ALSO +.Xr mbrtowc 3 , +.Xr mbstowcs 3 , +.Xr wcsrtombs 3 +.Sh STANDARDS +The +.Fn mbsrtowcs +function conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation does not support shift states. diff --git a/lib/libc/locale/mbsrtowcs.c b/lib/libc/locale/mbsrtowcs.c new file mode 100644 index 0000000..25b6c17 --- /dev/null +++ b/lib/libc/locale/mbsrtowcs.c @@ -0,0 +1,74 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src, size_t len, + mbstate_t *__restrict ps __unused) +{ + const char *s; + size_t nchr; + wchar_t wc; + int nb; + + s = *src; + nchr = 0; + + if (dst == NULL) { + for (;;) { + if ((nb = (int)mbrtowc(&wc, s, MB_CUR_MAX, NULL)) < 0) + /* Invalid sequence - mbrtowc() sets errno. */ + return ((size_t)-1); + else if (nb == 0) + return (nchr); + s += nb; + nchr++; + } + /*NOTREACHED*/ + } + + while (len-- > 0) { + if ((nb = (int)mbrtowc(dst, s, MB_CUR_MAX, NULL)) < 0) { + *src = s; + return ((size_t)-1); + } else if (nb == 0) { + *src = NULL; + return (nchr); + } + s += nb; + nchr++; + dst++; + } + *src = s; + return (nchr); +} diff --git a/lib/libc/locale/wcrtomb.3 b/lib/libc/locale/wcrtomb.3 new file mode 100644 index 0000000..d8fb7f2 --- /dev/null +++ b/lib/libc/locale/wcrtomb.3 @@ -0,0 +1,106 @@ +.\" Copyright (c) 2002 Tim J. Robbins +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.Dd August 15, 2002 +.Dt WCRTOMB 3 +.Os +.Sh NAME +.Nm wcrtomb +.Nd "convert a wide-character code to a character (restartable)" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn wcrtomb "char *restrict s" "wchar_t wc" "mbstate_t *restrict ps" +.Sh DESCRIPTION +The +.Fn wcrtomb +function stores a multibyte sequence representing the +wide character +.Fa wc , +including any necessary shift sequences, to the +character array +.Fa s , +storing a maximum of +.Dv MB_CUR_MAX +bytes. +.Pp +If +.Fa s +is +.Dv NULL , +.Fn wcrtomb +behaves as if +.Fa s +pointed to an internal buffer and +.Fa wc +was a null wide character (L'\e0'). +.Pp +The +.Ft mbstate_t +argument, +.Fa ps , +is used to keep track of the shift state. +If it is +.Dv NULL , +.Fn wcrtomb +uses an internal, static +.Ft mbstate_t +object. +.Sh RETURN VALUES +The +.Fn wcrtomb +functions returns the length (in bytes) of the multibyte sequence +needed to represent +.Fa wc , +or +.No ( Ns +.Ft size_t Ns +.No ) Ns \&-1 +if +.Fa wc +is not a valid wide character code. +.Sh ERRORS +The +.Fn wcrtomb +function will fail if: +.Bl -tag -width Er +.\".It Bq Er EINVAL +.\"Invalid argument. +.It Bq Er EILSEQ +An invalid wide character code was specified. +.El +.Sh SEE ALSO +.Xr mbrtowc 3 , +.Xr setlocale 3 , +.Xr wctomb 3 +.Sh STANDARDS +The +.Fn wcrtomb +function conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation does not support shift states. diff --git a/lib/libc/locale/wcrtomb.c b/lib/libc/locale/wcrtomb.c new file mode 100644 index 0000000..8c8b2cd --- /dev/null +++ b/lib/libc/locale/wcrtomb.c @@ -0,0 +1,46 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +size_t +wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps __unused) +{ + char *e; + + sputrune(wc, s, MB_CUR_MAX, &e); + if (e == NULL) { + errno = EILSEQ; + return ((size_t)-1); + } + return ((size_t)(e - s)); +} diff --git a/lib/libc/locale/wcsrtombs.3 b/lib/libc/locale/wcsrtombs.3 new file mode 100644 index 0000000..f3e9ad0 --- /dev/null +++ b/lib/libc/locale/wcsrtombs.3 @@ -0,0 +1,108 @@ +.\" Copyright (c) [year] [your name] +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.Dd August 16, 2002 +.Dt WCSRTOMBS 3 +.Os +.Sh NAME +.Nm wcsrtombs +.Nd "convert a wide-character string to a character string (restartable)" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn wcsrtombs "char *restrict dst" "const wchar_t **restrict src" "size_t len" "mbstate_t *restrict ps" +.Sh DESCRIPTION +The +.Fn wcsrtombs +function converts a string of wide characters indirectly pointed to by +.Fa src +to a corresponding multibyte character string stored in the array +pointed to by +.Fa dst . +No more than +.Fa len +bytes are written to +.Fa dst . +.Pp +If +.Fa dst +is +.Dv NULL , +no characters are stored. +.Pp +If +.Fa dst +is not +.Dv NULL , +the pointer pointed to by +.Fa src +is updated to point to the character after the one that conversion stopped at. +If conversion stops because a null character is encountered, +.No * Ns Fa src +is set to +.Dv NULL . +.Pp +The +.Ft mbstate_t +argument, +.Fa ps , +is used to keep track of the shift state. +If it is +.Dv NULL , +.Fn wcsrtombs +uses an internal, static +.Ft mbstate_t +object. +.Sh RETURN VALUES +The +.Fn wcsrtombs +function returns the number of bytes stored in +the array pointed to by +.Fa dst +(not including any terminating null), if successful, otherwise it returns +.No ( Ns +.Ft size_t Ns +.No ) Ns -1 . +.Sh ERRORS +The +.Fn wcsrtombs +function will fail if: +.Bl -tag -width Er +.It Bq Er EILSEQ +An invalid wide character was encountered. +.El +.Sh SEE ALSO +.Xr mbsrtowcs 3 , +.Xr wcstombs 3 , +.Xr wcrtomb 3 +.Sh STANDARDS +The +.Fn wcsrtombs +function conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation does not support shift states. diff --git a/lib/libc/locale/wcsrtombs.c b/lib/libc/locale/wcsrtombs.c new file mode 100644 index 0000000..ccda84e --- /dev/null +++ b/lib/libc/locale/wcsrtombs.c @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include + +size_t +wcsrtombs(char *__restrict dst, const wchar_t **__restrict src, size_t len, + mbstate_t *__restrict ps __unused) +{ + char buf[MB_LEN_MAX]; + const wchar_t *s; + size_t nbytes; + int nb; + + s = *src; + nbytes = 0; + + if (dst == NULL) { + for (;;) { + if ((nb = (int)wcrtomb(buf, *s, NULL)) < 0) + /* Invalid character - wcrtomb() sets errno. */ + return ((size_t)-1); + else if (*s == L'\0') + return (nbytes + nb - 1); + s++; + nbytes += nb; + } + /*NOTREACHED*/ + } + + while (len > 0) { + if (len > (size_t)MB_CUR_MAX) { + /* Enough space to translate in-place. */ + if ((nb = (int)wcrtomb(dst, *s, NULL)) < 0) { + *src = s; + return ((size_t)-1); + } + } else { + /* May not be enough space; use temp. buffer. */ + if ((nb = (int)wcrtomb(buf, *s, NULL)) < 0) { + *src = s; + return ((size_t)-1); + } + if (nb > (int)len) + /* MB sequence for character won't fit. */ + break; + memcpy(dst, buf, nb); + } + if (*s == L'\0') { + *src = NULL; + return (nbytes + nb - 1); + } + s++; + dst += nb; + len -= nb; + nbytes += nb; + } + *src = s; + return (nbytes); +} -- cgit v1.1