diff options
author | tjr <tjr@FreeBSD.org> | 2002-10-04 03:18:26 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2002-10-04 03:18:26 +0000 |
commit | f213f77cc2b5bfccabc6b1df6da904e9dcc88383 (patch) | |
tree | 0d173f4e040e30d9156e96c95bc7f828c498a795 /lib | |
parent | 2c7ac0ae064ce7ab8b30d7c30473f92d799a1485 (diff) | |
download | FreeBSD-src-f213f77cc2b5bfccabc6b1df6da904e9dcc88383.zip FreeBSD-src-f213f77cc2b5bfccabc6b1df6da904e9dcc88383.tar.gz |
Add a placeholder implementation of wcscoll() and wcsxfrm() which gives
locale-sensitive collation only in single-byte locales, and just does
binary comparison for the others with extended character sets.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/libc/string/Makefile.inc | 10 | ||||
-rw-r--r-- | lib/libc/string/wcscoll.3 | 112 | ||||
-rw-r--r-- | lib/libc/string/wcscoll.c | 97 | ||||
-rw-r--r-- | lib/libc/string/wcsxfrm.3 | 124 | ||||
-rw-r--r-- | lib/libc/string/wcsxfrm.c | 115 |
5 files changed, 454 insertions, 4 deletions
diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc index ce73ee1..63c7af0 100644 --- a/lib/libc/string/Makefile.inc +++ b/lib/libc/string/Makefile.inc @@ -12,9 +12,11 @@ MISRCS+=bcmp.c bcopy.c bzero.c ffs.c index.c memccpy.c memchr.c memcmp.c \ strlcat.c strlcpy.c strlen.c strmode.c strncat.c strncmp.c strncpy.c \ strcasestr.c strnstr.c \ strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \ - strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscpy.c wcscspn.c \ + strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscoll.c wcscpy.c \ + wcscspn.c \ wcslcat.c wcslcpy.c wcslen.c wcsncat.c wcsncmp.c wcsncpy.c wcspbrk.c \ - wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wmemchr.c wmemcmp.c \ + wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wcsxfrm.c wmemchr.c \ + wmemcmp.c \ wmemcpy.c wmemmove.c wmemset.c @@ -28,8 +30,8 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \ memcmp.3 memcpy.3 memmove.3 memset.3 rindex.3 strcasecmp.3 strcat.3 \ strchr.3 strcmp.3 strcoll.3 strcpy.3 strcspn.3 strdup.3 strerror.3 \ string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strrchr.3 strsep.3 \ - strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcstok.3 wcswidth.3 \ - wmemchr.3 + strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcscoll.3 wcstok.3 \ + wcswidth.3 wcsxfrm.3 wmemchr.3 MLINKS+=strcasecmp.3 strncasecmp.3 MLINKS+=strcat.3 strncat.3 diff --git a/lib/libc/string/wcscoll.3 b/lib/libc/string/wcscoll.3 new file mode 100644 index 0000000..fb6a36b --- /dev/null +++ b/lib/libc/string/wcscoll.3 @@ -0,0 +1,112 @@ +.\" Copyright (c) 1990, 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Chris Torek and the American National Standards Committee X3, +.\" on Information Processing Systems. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)strcoll.3 8.1 (Berkeley) 6/4/93 +.\" FreeBSD: src/lib/libc/string/strcoll.3,v 1.11 2001/10/01 16:09:00 ru Exp +.\" $FreeBSD$ +.\" +.Dd October 4, 2002 +.Dt WCSCOLL 3 +.Os +.Sh NAME +.Nm wcscoll +.Nd compare wide strings according to current collation +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft int +.Fn wcscoll "const wchar_t *s1" "const wchar_t *s2" +.Sh DESCRIPTION +The +.Fn wcscoll +function compares the null-terminated strings +.Fa s1 +and +.Fa s2 +according to the current locale collation order. +In the +.Dq Li C +locale, +.Fn wcscoll +is equivalent to +.Fn wcscmp . +.Sh RETURN VALUES +The +.Fn wcscoll +function +returns an integer greater than, equal to, or less than 0, +if +.Fa s1 +is greater than, equal to, or less than +.Fa s2 . +.Pp +No return value is reserved to indicate errors; +callers should set +.Va errno +to 0 before calling +.Fn wcscoll . +If it is non-zero upon return from +.Fn wcscoll , +an error has occurred. +.Sh ERRORS +The +.Fn wcscoll +function will fail if: +.Bl -tag -width Er +.It Bq Er EILSEQ +An invalid wide character code was specified. +.It Bq Er ENOMEM +Cannot allocate enough memory for temporary buffers. +.El +.Sh SEE ALSO +.Xr setlocale 3 , +.Xr strcoll 3 , +.Xr wcscmp 3 , +.Xr wcsxfrm 3 +.Sh STANDARDS +The +.Fn wcscoll +function +conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation of +.Fn wcscoll +only works in single-byte +.Dv LC_CTYPE +locales, and falls back to using +.Fn wcscmp +in locales with extended character sets. diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c new file mode 100644 index 0000000..79dad7d --- /dev/null +++ b/lib/libc/string/wcscoll.c @@ -0,0 +1,97 @@ +/*- + * Copyright (c) 2002 Tim J. Robbins + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "collate.h" + +static char *__mbsdup(const wchar_t *); + +/* + * Placeholder implementation of wcscoll(). Attempts to use the single-byte + * collation ordering where possible, and falls back on wcscmp() in locales + * with extended character sets. + */ +int +wcscoll(const wchar_t *ws1, const wchar_t *ws2) +{ + char *mbs1, *mbs2; + int diff, sverrno; + + if (__collate_load_error || MB_CUR_MAX > 1) + /* + * Locale has no special collating order, could not be + * loaded, or has an extended character set; do a fast binary + * comparison. + */ + return (wcscmp(ws1, ws2)); + + if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) { + /* + * Out of memory or illegal wide chars; fall back to wcscmp() + * but leave errno indicating the error. Callers that don't + * check for error will get a reasonable but often slightly + * incorrect result. + */ + sverrno = errno; + free(mbs1); + errno = sverrno; + return (wcscmp(ws1, ws2)); + } + + diff = strcoll(mbs1, mbs2); + sverrno = errno; + free(mbs1); + free(mbs2); + errno = sverrno; + + return (diff); +} + +static char * +__mbsdup(const wchar_t *ws) +{ + mbstate_t state; + const wchar_t *wcp; + size_t len; + char *mbs; + + memset(&state, 0, sizeof(state)); + wcp = ws; + if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1) + return (NULL); + if ((mbs = malloc(len + 1)) == NULL) + return (NULL); + memset(&state, 0, sizeof(state)); + wcsrtombs(mbs, &ws, len + 1, &state); + + return (mbs); +} diff --git a/lib/libc/string/wcsxfrm.3 b/lib/libc/string/wcsxfrm.3 new file mode 100644 index 0000000..55c03f8 --- /dev/null +++ b/lib/libc/string/wcsxfrm.3 @@ -0,0 +1,124 @@ +.\" Copyright (c) 1990, 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" Chris Torek and the American National Standards Committee X3, +.\" on Information Processing Systems. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)strxfrm.3 8.1 (Berkeley) 6/4/93 +.\" FreeBSD: src/lib/libc/string/strxfrm.3,v 1.16 2002/09/06 11:24:06 tjr Exp +.\" $FreeBSD$ +.\" +.Dd October 4, 2002 +.Dt WCSXFRM 3 +.Os +.Sh NAME +.Nm wcsxfrm +.Nd transform a wide string under locale +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wchar.h +.Ft size_t +.Fn wcsxfrm "wchar_t * restrict dst" "const wchar_t * restrict src" "size_t n" +.Sh DESCRIPTION +The +.Fn wcsxfrm +function transforms a null-terminated wide character string pointed to by +.Fa src +according to the current locale collation order +then copies the transformed string +into +.Fa dst . +No more than +.Fa n +wide characters are copied into +.Fa dst , +including the terminating null character added. +If +.Fa n +is set to 0 +(it helps to determine an actual size needed +for transformation), +.Fa dst +is permitted to be a NULL pointer. +.Pp +Comparing two strings using +.Fn wcscmp +after +.Fn wcsxfrm +is equivalent to comparing +two original strings with +.Fn wcscoll . +.Sh RETURN VALUES +Upon successful completion, +.Fn wcsxfrm +returns the length of the transformed string not including +the terminating null character. +If this value is +.Fa n +or more, the contents of +.Fa dst +are indeterminate. +.Sh SEE ALSO +.Xr setlocale 3 , +.Xr strxfrm 3 , +.Xr wcscoll 3 , +.Xr wcscmp 3 +.Sh STANDARDS +The +.Fn wcsxfrm +function +conforms to +.St -isoC-99 . +.Sh BUGS +The current implementation of +.Fn wcsxfrm +only works in single-byte +.Dv LC_CTYPE +locales, and falls back to using +.Fn wcsncpy +in locales with extended character sets. +.Pp +Comparing two strings using +.Fn wcscmp +after +.Fn wcsxfrm +is +.Em not +always equivalent to comparison with +.Fn wcscoll ; +.Fn wcsxfrm +only stores information about primary collation weights into +.Fa dst , +whereas +.Fn wcscoll +compares characters using both primary and secondary weights. diff --git a/lib/libc/string/wcsxfrm.c b/lib/libc/string/wcsxfrm.c new file mode 100644 index 0000000..4be6e46 --- /dev/null +++ b/lib/libc/string/wcsxfrm.c @@ -0,0 +1,115 @@ +/*- + * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> + * at Electronni Visti IA, Kiev, Ukraine. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#if 0 +__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp "); +#endif +__FBSDID("$FreeBSD$"); + +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include "collate.h" + +static char *__mbsdup(const wchar_t *); + +/* + * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of + * the logic used. + */ +size_t +wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len) +{ + int prim, sec, l; + size_t slen; + char *mbsrc, *s, *ss; + + if (*src == L'\0') { + if (len != 0) + *dest = L'\0'; + return (0); + } + + if (__collate_load_error || MB_CUR_MAX > 1) { + slen = wcslen(src); + if (len > 0) { + if (slen < len) + wcscpy(dest, src); + else { + wcsncpy(dest, src, len - 1); + dest[len - 1] = L'\0'; + } + } + return (slen); + } + + mbsrc = __mbsdup(src); + slen = 0; + prim = sec = 0; + ss = s = __collate_substitute(mbsrc); + while (*s != '\0') { + while (*s != '\0' && prim == 0) { + __collate_lookup(s, &l, &prim, &sec); + s += l; + } + if (prim != 0) { + if (len > 1) { + *dest++ = (wchar_t)prim; + len--; + } + slen++; + prim = 0; + } + } + free(ss); + free(mbsrc); + if (len != 0) + *dest = L'\0'; + + return (slen); +} + +static char * +__mbsdup(const wchar_t *ws) +{ + mbstate_t state; + const wchar_t *wcp; + size_t len; + char *mbs; + + memset(&state, 0, sizeof(state)); + wcp = ws; + if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1) + return (NULL); + if ((mbs = malloc(len + 1)) == NULL) + return (NULL); + memset(&state, 0, sizeof(state)); + wcsrtombs(mbs, &ws, len + 1, &state); + + return (mbs); +} |