summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2002-10-04 03:18:26 +0000
committertjr <tjr@FreeBSD.org>2002-10-04 03:18:26 +0000
commitf213f77cc2b5bfccabc6b1df6da904e9dcc88383 (patch)
tree0d173f4e040e30d9156e96c95bc7f828c498a795
parent2c7ac0ae064ce7ab8b30d7c30473f92d799a1485 (diff)
downloadFreeBSD-src-f213f77cc2b5bfccabc6b1df6da904e9dcc88383.zip
FreeBSD-src-f213f77cc2b5bfccabc6b1df6da904e9dcc88383.tar.gz
Add a placeholder implementation of wcscoll() and wcsxfrm() which gives
locale-sensitive collation only in single-byte locales, and just does binary comparison for the others with extended character sets.
-rw-r--r--include/wchar.h2
-rw-r--r--lib/libc/string/Makefile.inc10
-rw-r--r--lib/libc/string/wcscoll.3112
-rw-r--r--lib/libc/string/wcscoll.c97
-rw-r--r--lib/libc/string/wcsxfrm.3124
-rw-r--r--lib/libc/string/wcsxfrm.c115
6 files changed, 456 insertions, 4 deletions
diff --git a/include/wchar.h b/include/wchar.h
index 6a85b4d..b62eb19 100644
--- a/include/wchar.h
+++ b/include/wchar.h
@@ -141,6 +141,7 @@ size_t wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
wchar_t *wcscat(wchar_t * __restrict, const wchar_t * __restrict);
wchar_t *wcschr(const wchar_t *, wchar_t);
int wcscmp(const wchar_t *, const wchar_t *);
+int wcscoll(const wchar_t *, const wchar_t *);
wchar_t *wcscpy(wchar_t * __restrict, const wchar_t * __restrict);
size_t wcscspn(const wchar_t *, const wchar_t *);
size_t wcsftime(wchar_t * __restrict, size_t, const wchar_t * __restrict,
@@ -156,6 +157,7 @@ size_t wcsrtombs(char * __restrict, const wchar_t ** __restrict, size_t,
mbstate_t * __restrict);
size_t wcsspn(const wchar_t *, const wchar_t *);
wchar_t *wcsstr(const wchar_t * __restrict, const wchar_t * __restrict);
+size_t wcsxfrm(wchar_t * __restrict, const wchar_t * __restrict, size_t);
int wctob(wint_t);
double wcstod(const wchar_t * __restrict, wchar_t ** __restrict);
wchar_t *wcstok(wchar_t * __restrict, const wchar_t * __restrict,
diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc
index ce73ee1..63c7af0 100644
--- a/lib/libc/string/Makefile.inc
+++ b/lib/libc/string/Makefile.inc
@@ -12,9 +12,11 @@ MISRCS+=bcmp.c bcopy.c bzero.c ffs.c index.c memccpy.c memchr.c memcmp.c \
strlcat.c strlcpy.c strlen.c strmode.c strncat.c strncmp.c strncpy.c \
strcasestr.c strnstr.c \
strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \
- strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscpy.c wcscspn.c \
+ strxfrm.c swab.c wcscat.c wcschr.c wcscmp.c wcscoll.c wcscpy.c \
+ wcscspn.c \
wcslcat.c wcslcpy.c wcslen.c wcsncat.c wcsncmp.c wcsncpy.c wcspbrk.c \
- wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wmemchr.c wmemcmp.c \
+ wcsrchr.c wcsspn.c wcsstr.c wcstok.c wcswidth.c wcsxfrm.c wmemchr.c \
+ wmemcmp.c \
wmemcpy.c wmemmove.c wmemset.c
@@ -28,8 +30,8 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 memccpy.3 memchr.3 \
memcmp.3 memcpy.3 memmove.3 memset.3 rindex.3 strcasecmp.3 strcat.3 \
strchr.3 strcmp.3 strcoll.3 strcpy.3 strcspn.3 strdup.3 strerror.3 \
string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strrchr.3 strsep.3 \
- strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcstok.3 wcswidth.3 \
- wmemchr.3
+ strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 wcscoll.3 wcstok.3 \
+ wcswidth.3 wcsxfrm.3 wmemchr.3
MLINKS+=strcasecmp.3 strncasecmp.3
MLINKS+=strcat.3 strncat.3
diff --git a/lib/libc/string/wcscoll.3 b/lib/libc/string/wcscoll.3
new file mode 100644
index 0000000..fb6a36b
--- /dev/null
+++ b/lib/libc/string/wcscoll.3
@@ -0,0 +1,112 @@
+.\" Copyright (c) 1990, 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Chris Torek and the American National Standards Committee X3,
+.\" on Information Processing Systems.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)strcoll.3 8.1 (Berkeley) 6/4/93
+.\" FreeBSD: src/lib/libc/string/strcoll.3,v 1.11 2001/10/01 16:09:00 ru Exp
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2002
+.Dt WCSCOLL 3
+.Os
+.Sh NAME
+.Nm wcscoll
+.Nd compare wide strings according to current collation
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In wchar.h
+.Ft int
+.Fn wcscoll "const wchar_t *s1" "const wchar_t *s2"
+.Sh DESCRIPTION
+The
+.Fn wcscoll
+function compares the null-terminated strings
+.Fa s1
+and
+.Fa s2
+according to the current locale collation order.
+In the
+.Dq Li C
+locale,
+.Fn wcscoll
+is equivalent to
+.Fn wcscmp .
+.Sh RETURN VALUES
+The
+.Fn wcscoll
+function
+returns an integer greater than, equal to, or less than 0,
+if
+.Fa s1
+is greater than, equal to, or less than
+.Fa s2 .
+.Pp
+No return value is reserved to indicate errors;
+callers should set
+.Va errno
+to 0 before calling
+.Fn wcscoll .
+If it is non-zero upon return from
+.Fn wcscoll ,
+an error has occurred.
+.Sh ERRORS
+The
+.Fn wcscoll
+function will fail if:
+.Bl -tag -width Er
+.It Bq Er EILSEQ
+An invalid wide character code was specified.
+.It Bq Er ENOMEM
+Cannot allocate enough memory for temporary buffers.
+.El
+.Sh SEE ALSO
+.Xr setlocale 3 ,
+.Xr strcoll 3 ,
+.Xr wcscmp 3 ,
+.Xr wcsxfrm 3
+.Sh STANDARDS
+The
+.Fn wcscoll
+function
+conforms to
+.St -isoC-99 .
+.Sh BUGS
+The current implementation of
+.Fn wcscoll
+only works in single-byte
+.Dv LC_CTYPE
+locales, and falls back to using
+.Fn wcscmp
+in locales with extended character sets.
diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c
new file mode 100644
index 0000000..79dad7d
--- /dev/null
+++ b/lib/libc/string/wcscoll.c
@@ -0,0 +1,97 @@
+/*-
+ * Copyright (c) 2002 Tim J. Robbins
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include "collate.h"
+
+static char *__mbsdup(const wchar_t *);
+
+/*
+ * Placeholder implementation of wcscoll(). Attempts to use the single-byte
+ * collation ordering where possible, and falls back on wcscmp() in locales
+ * with extended character sets.
+ */
+int
+wcscoll(const wchar_t *ws1, const wchar_t *ws2)
+{
+ char *mbs1, *mbs2;
+ int diff, sverrno;
+
+ if (__collate_load_error || MB_CUR_MAX > 1)
+ /*
+ * Locale has no special collating order, could not be
+ * loaded, or has an extended character set; do a fast binary
+ * comparison.
+ */
+ return (wcscmp(ws1, ws2));
+
+ if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) {
+ /*
+ * Out of memory or illegal wide chars; fall back to wcscmp()
+ * but leave errno indicating the error. Callers that don't
+ * check for error will get a reasonable but often slightly
+ * incorrect result.
+ */
+ sverrno = errno;
+ free(mbs1);
+ errno = sverrno;
+ return (wcscmp(ws1, ws2));
+ }
+
+ diff = strcoll(mbs1, mbs2);
+ sverrno = errno;
+ free(mbs1);
+ free(mbs2);
+ errno = sverrno;
+
+ return (diff);
+}
+
+static char *
+__mbsdup(const wchar_t *ws)
+{
+ mbstate_t state;
+ const wchar_t *wcp;
+ size_t len;
+ char *mbs;
+
+ memset(&state, 0, sizeof(state));
+ wcp = ws;
+ if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1)
+ return (NULL);
+ if ((mbs = malloc(len + 1)) == NULL)
+ return (NULL);
+ memset(&state, 0, sizeof(state));
+ wcsrtombs(mbs, &ws, len + 1, &state);
+
+ return (mbs);
+}
diff --git a/lib/libc/string/wcsxfrm.3 b/lib/libc/string/wcsxfrm.3
new file mode 100644
index 0000000..55c03f8
--- /dev/null
+++ b/lib/libc/string/wcsxfrm.3
@@ -0,0 +1,124 @@
+.\" Copyright (c) 1990, 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" This code is derived from software contributed to Berkeley by
+.\" Chris Torek and the American National Standards Committee X3,
+.\" on Information Processing Systems.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)strxfrm.3 8.1 (Berkeley) 6/4/93
+.\" FreeBSD: src/lib/libc/string/strxfrm.3,v 1.16 2002/09/06 11:24:06 tjr Exp
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2002
+.Dt WCSXFRM 3
+.Os
+.Sh NAME
+.Nm wcsxfrm
+.Nd transform a wide string under locale
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In wchar.h
+.Ft size_t
+.Fn wcsxfrm "wchar_t * restrict dst" "const wchar_t * restrict src" "size_t n"
+.Sh DESCRIPTION
+The
+.Fn wcsxfrm
+function transforms a null-terminated wide character string pointed to by
+.Fa src
+according to the current locale collation order
+then copies the transformed string
+into
+.Fa dst .
+No more than
+.Fa n
+wide characters are copied into
+.Fa dst ,
+including the terminating null character added.
+If
+.Fa n
+is set to 0
+(it helps to determine an actual size needed
+for transformation),
+.Fa dst
+is permitted to be a NULL pointer.
+.Pp
+Comparing two strings using
+.Fn wcscmp
+after
+.Fn wcsxfrm
+is equivalent to comparing
+two original strings with
+.Fn wcscoll .
+.Sh RETURN VALUES
+Upon successful completion,
+.Fn wcsxfrm
+returns the length of the transformed string not including
+the terminating null character.
+If this value is
+.Fa n
+or more, the contents of
+.Fa dst
+are indeterminate.
+.Sh SEE ALSO
+.Xr setlocale 3 ,
+.Xr strxfrm 3 ,
+.Xr wcscoll 3 ,
+.Xr wcscmp 3
+.Sh STANDARDS
+The
+.Fn wcsxfrm
+function
+conforms to
+.St -isoC-99 .
+.Sh BUGS
+The current implementation of
+.Fn wcsxfrm
+only works in single-byte
+.Dv LC_CTYPE
+locales, and falls back to using
+.Fn wcsncpy
+in locales with extended character sets.
+.Pp
+Comparing two strings using
+.Fn wcscmp
+after
+.Fn wcsxfrm
+is
+.Em not
+always equivalent to comparison with
+.Fn wcscoll ;
+.Fn wcsxfrm
+only stores information about primary collation weights into
+.Fa dst ,
+whereas
+.Fn wcscoll
+compares characters using both primary and secondary weights.
diff --git a/lib/libc/string/wcsxfrm.c b/lib/libc/string/wcsxfrm.c
new file mode 100644
index 0000000..4be6e46
--- /dev/null
+++ b/lib/libc/string/wcsxfrm.c
@@ -0,0 +1,115 @@
+/*-
+ * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
+ * at Electronni Visti IA, Kiev, Ukraine.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if 0
+__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp ");
+#endif
+__FBSDID("$FreeBSD$");
+
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include "collate.h"
+
+static char *__mbsdup(const wchar_t *);
+
+/*
+ * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of
+ * the logic used.
+ */
+size_t
+wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len)
+{
+ int prim, sec, l;
+ size_t slen;
+ char *mbsrc, *s, *ss;
+
+ if (*src == L'\0') {
+ if (len != 0)
+ *dest = L'\0';
+ return (0);
+ }
+
+ if (__collate_load_error || MB_CUR_MAX > 1) {
+ slen = wcslen(src);
+ if (len > 0) {
+ if (slen < len)
+ wcscpy(dest, src);
+ else {
+ wcsncpy(dest, src, len - 1);
+ dest[len - 1] = L'\0';
+ }
+ }
+ return (slen);
+ }
+
+ mbsrc = __mbsdup(src);
+ slen = 0;
+ prim = sec = 0;
+ ss = s = __collate_substitute(mbsrc);
+ while (*s != '\0') {
+ while (*s != '\0' && prim == 0) {
+ __collate_lookup(s, &l, &prim, &sec);
+ s += l;
+ }
+ if (prim != 0) {
+ if (len > 1) {
+ *dest++ = (wchar_t)prim;
+ len--;
+ }
+ slen++;
+ prim = 0;
+ }
+ }
+ free(ss);
+ free(mbsrc);
+ if (len != 0)
+ *dest = L'\0';
+
+ return (slen);
+}
+
+static char *
+__mbsdup(const wchar_t *ws)
+{
+ mbstate_t state;
+ const wchar_t *wcp;
+ size_t len;
+ char *mbs;
+
+ memset(&state, 0, sizeof(state));
+ wcp = ws;
+ if ((len = wcsrtombs(NULL, &wcp, 0, &state)) == (size_t)-1)
+ return (NULL);
+ if ((mbs = malloc(len + 1)) == NULL)
+ return (NULL);
+ memset(&state, 0, sizeof(state));
+ wcsrtombs(mbs, &ws, len + 1, &state);
+
+ return (mbs);
+}
OpenPOWER on IntegriCloud