summaryrefslogtreecommitdiffstats
path: root/lib/libc/string
diff options
context:
space:
mode:
authorngie <ngie@FreeBSD.org>2015-11-16 02:00:12 +0000
committerngie <ngie@FreeBSD.org>2015-11-16 02:00:12 +0000
commitfd9367ff35d3ef67f52f8612ba6df3c487b90f4d (patch)
tree68c676723f5f9cd1d66be903cfdce6b37dcc3719 /lib/libc/string
parent3d588fbc9bc229fdd93d6aaf6340583c2bcaca9f (diff)
parentb708fe19431dfeacfe97dbc276190934bd53b483 (diff)
downloadFreeBSD-src-fd9367ff35d3ef67f52f8612ba6df3c487b90f4d.zip
FreeBSD-src-fd9367ff35d3ef67f52f8612ba6df3c487b90f4d.tar.gz
MFhead @ r290899
Diffstat (limited to 'lib/libc/string')
-rw-r--r--lib/libc/string/strcoll.c108
-rw-r--r--lib/libc/string/strxfrm.c64
-rw-r--r--lib/libc/string/wcscoll.c213
-rw-r--r--lib/libc/string/wcsxfrm.c84
4 files changed, 283 insertions, 186 deletions
diff --git a/lib/libc/string/strcoll.c b/lib/libc/string/strcoll.c
index a918fca..7675e0a 100644
--- a/lib/libc/string/strcoll.c
+++ b/lib/libc/string/strcoll.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -35,63 +36,78 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
+#include <wchar.h>
#include "collate.h"
-#include <stdio.h>
+/*
+ * In order to properly handle multibyte locales, its easiest to just
+ * convert to wide characters and then use wcscoll. However if an
+ * error occurs, we gracefully fall back to simple strcmp. Caller
+ * should check errno.
+ */
int
strcoll_l(const char *s, const char *s2, locale_t locale)
{
- int len, len2, prim, prim2, sec, sec2, ret, ret2;
- const char *t, *t2;
- char *tt, *tt2;
+ int ret;
+ wchar_t *t1 = NULL, *t2 = NULL;
+ wchar_t *w1 = NULL, *w2 = NULL;
+ const char *cs1, *cs2;
+ mbstate_t mbs1;
+ mbstate_t mbs2;
+ size_t sz1, sz2;
+
+ memset(&mbs1, 0, sizeof (mbstate_t));
+ memset(&mbs2, 0, sizeof (mbstate_t));
+
+ /*
+ * The mbsrtowcs_l function can set the src pointer to null upon
+ * failure, so it should act on a copy to avoid:
+ * - sending null pointer to strcmp
+ * - having strcoll/strcoll_l change *s or *s2 to null
+ */
+ cs1 = s;
+ cs2 = s2;
+
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
if (table->__collate_load_error)
- return strcmp(s, s2);
-
- len = len2 = 1;
- ret = ret2 = 0;
- if (table->__collate_substitute_nontrivial) {
- t = tt = __collate_substitute(table, s);
- t2 = tt2 = __collate_substitute(table, s2);
- } else {
- tt = tt2 = NULL;
- t = s;
- t2 = s2;
- }
- while(*t && *t2) {
- prim = prim2 = 0;
- while(*t && !prim) {
- __collate_lookup(table, t, &len, &prim, &sec);
- t += len;
- }
- while(*t2 && !prim2) {
- __collate_lookup(table, t2, &len2, &prim2, &sec2);
- t2 += len2;
- }
- if(!prim || !prim2)
- break;
- if(prim != prim2) {
- ret = prim - prim2;
- goto end;
- }
- if(!ret2)
- ret2 = sec - sec2;
- }
- if(!*t && *t2)
- ret = -(int)((u_char)*t2);
- else if(*t && !*t2)
- ret = (u_char)*t;
- else if(!*t && !*t2)
- ret = ret2;
- end:
- free(tt);
- free(tt2);
-
- return ret;
+ goto error;
+
+ sz1 = strlen(s) + 1;
+ sz2 = strlen(s2) + 1;
+
+ /*
+ * Simple assumption: conversion to wide format is strictly
+ * reducing, i.e. a single byte (or multibyte character)
+ * cannot result in multiple wide characters.
+ */
+ if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
+ goto error;
+ w1 = t1;
+ if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
+ goto error;
+ w2 = t2;
+
+ if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1)
+ goto error;
+
+ if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1)
+ goto error;
+
+ ret = wcscoll_l(w1, w2, locale);
+ free(t1);
+ free(t2);
+
+ return (ret);
+
+error:
+ free(t1);
+ free(t2);
+ return (strcmp(s, s2));
}
int
diff --git a/lib/libc/string/strxfrm.c b/lib/libc/string/strxfrm.c
index b758b0c..06ae639 100644
--- a/lib/libc/string/strxfrm.c
+++ b/lib/libc/string/strxfrm.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -35,6 +36,8 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
+#include <wchar.h>
#include "collate.h"
size_t
@@ -48,9 +51,10 @@ strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
size_t
strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale)
{
- int prim, sec, l;
size_t slen;
- char *s, *ss;
+ size_t xlen;
+ wchar_t *wcs = NULL;
+
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
@@ -58,32 +62,42 @@ strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, local
if (!*src) {
if (len > 0)
*dest = '\0';
- return 0;
+ return (0);
}
+ /*
+ * The conversion from multibyte to wide character strings is
+ * strictly reducing (one byte of an mbs cannot expand to more
+ * than one wide character.)
+ */
+ slen = strlen(src);
+
if (table->__collate_load_error)
- return strlcpy(dest, src, len);
-
- slen = 0;
- prim = sec = 0;
- ss = s = __collate_substitute(table, src);
- while (*s) {
- while (*s && !prim) {
- __collate_lookup(table, s, &l, &prim, &sec);
- s += l;
- }
- if (prim) {
- if (len > 1) {
- *dest++ = (char)prim;
- len--;
- }
- slen++;
- prim = 0;
- }
+ goto error;
+
+ if ((wcs = malloc((slen + 1) * sizeof (wchar_t))) == NULL)
+ goto error;
+
+ if (mbstowcs_l(wcs, src, slen + 1, locale) == (size_t)-1)
+ goto error;
+
+ if ((xlen = _collate_sxfrm(table, wcs, dest, len)) == (size_t)-1)
+ goto error;
+
+ free(wcs);
+
+ if (len > xlen) {
+ dest[xlen] = 0;
+ } else if (len) {
+ dest[len-1] = 0;
}
- free(ss);
- if (len > 0)
- *dest = '\0';
- return slen;
+ return (xlen);
+
+error:
+ /* errno should be set to ENOMEM if malloc failed */
+ free(wcs);
+ strlcpy(dest, src, len);
+
+ return (slen);
}
diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c
index 3c51015..ee0e72a 100644
--- a/lib/libc/string/wcscoll.c
+++ b/lib/libc/string/wcscoll.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002 Tim J. Robbins
* All rights reserved.
*
@@ -38,50 +39,181 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include "collate.h"
-static char *__mbsdup(const wchar_t *);
-
-/*
- * Placeholder implementation of wcscoll(). Attempts to use the single-byte
- * collation ordering where possible, and falls back on wcscmp() in locales
- * with extended character sets.
- */
int
wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
{
- char *mbs1, *mbs2;
- int diff, sverrno;
+ int len1, len2, pri1, pri2, ret;
+ wchar_t *tr1 = NULL, *tr2 = NULL;
+ int direc, pass;
+
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
- if (table->__collate_load_error || MB_CUR_MAX > 1)
+ if (table->__collate_load_error)
/*
- * Locale has no special collating order, could not be
- * loaded, or has an extended character set; do a fast binary
- * comparison.
+ * Locale has no special collating order or could not be
+ * loaded, do a fast binary comparison.
*/
return (wcscmp(ws1, ws2));
- if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) {
- /*
- * Out of memory or illegal wide chars; fall back to wcscmp()
- * but leave errno indicating the error. Callers that don't
- * check for error will get a reasonable but often slightly
- * incorrect result.
- */
- sverrno = errno;
- free(mbs1);
- errno = sverrno;
- return (wcscmp(ws1, ws2));
+ ret = 0;
+
+ /*
+ * Once upon a time we had code to try to optimize this, but
+ * it turns out that you really can't make many assumptions
+ * safely. You absolutely have to run this pass by pass,
+ * because some passes will be ignored for a given character,
+ * while others will not. Simpler locales will benefit from
+ * having fewer passes, and most comparisions should resolve
+ * during the primary pass anyway.
+ *
+ * Note that we do one final extra pass at the end to pick
+ * up UNDEFINED elements. There is special handling for them.
+ */
+ for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+ const int32_t *st1 = NULL;
+ const int32_t *st2 = NULL;
+ const wchar_t *w1 = ws1;
+ const wchar_t *w2 = ws2;
+ int check1, check2;
+
+ /* special pass for UNDEFINED */
+ if (pass == table->info->directive_count) {
+ direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+ } else {
+ direc = table->info->directive[pass];
+ }
+
+ if (direc & DIRECTIVE_BACKWARD) {
+ wchar_t *bp, *fp, c;
+ if ((tr1 = wcsdup(w1)) == NULL)
+ goto fail;
+ bp = tr1;
+ fp = tr1 + wcslen(tr1) - 1;
+ while (bp < fp) {
+ c = *bp;
+ *bp++ = *fp;
+ *fp-- = c;
+ }
+ if ((tr2 = wcsdup(w2)) == NULL)
+ goto fail;
+ bp = tr2;
+ fp = tr2 + wcslen(tr2) - 1;
+ while (bp < fp) {
+ c = *bp;
+ *bp++ = *fp;
+ *fp-- = c;
+ }
+ w1 = tr1;
+ w2 = tr2;
+ }
+
+ if (direc & DIRECTIVE_POSITION) {
+ while (*w1 && *w2) {
+ pri1 = pri2 = 0;
+ check1 = check2 = 1;
+ while ((pri1 == pri2) && (check1 || check2)) {
+ if (check1) {
+ _collate_lookup(table, w1, &len1,
+ &pri1, pass, &st1);
+ if (pri1 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ if (!pri1) {
+ pri1 = COLLATE_MAX_PRIORITY;
+ st1 = NULL;
+ }
+ check1 = (st1 != NULL);
+ }
+ if (check2) {
+ _collate_lookup(table, w2, &len2,
+ &pri2, pass, &st2);
+ if (pri2 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ if (!pri2) {
+ pri2 = COLLATE_MAX_PRIORITY;
+ st2 = NULL;
+ }
+ check2 = (st2 != NULL);
+ }
+ }
+ if (pri1 != pri2) {
+ ret = pri1 - pri2;
+ goto end;
+ }
+ w1 += len1;
+ w2 += len2;
+ }
+ } else {
+ while (*w1 && *w2) {
+ pri1 = pri2 = 0;
+ check1 = check2 = 1;
+ while ((pri1 == pri2) && (check1 || check2)) {
+ while (check1 && *w1) {
+ _collate_lookup(table, w1,
+ &len1, &pri1, pass, &st1);
+ if (pri1 > 0)
+ break;
+ if (pri1 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ st1 = NULL;
+ w1 += 1;
+ }
+ check1 = (st1 != NULL);
+ while (check2 && *w2) {
+ _collate_lookup(table, w2,
+ &len2, &pri2, pass, &st2);
+ if (pri2 > 0)
+ break;
+ if (pri2 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ st2 = NULL;
+ w2 += 1;
+ }
+ check2 = (st2 != NULL);
+ if (!pri1 || !pri2)
+ break;
+ }
+ if (!pri1 || !pri2)
+ break;
+ if (pri1 != pri2) {
+ ret = pri1 - pri2;
+ goto end;
+ }
+ w1 += len1;
+ w2 += len2;
+ }
+ }
+ if (!*w1) {
+ if (*w2) {
+ ret = -(int)*w2;
+ goto end;
+ }
+ } else {
+ ret = *w1;
+ goto end;
+ }
}
+ ret = 0;
- diff = strcoll_l(mbs1, mbs2, locale);
- sverrno = errno;
- free(mbs1);
- free(mbs2);
- errno = sverrno;
+end:
+ free(tr1);
+ free(tr2);
- return (diff);
+ return (ret);
+
+fail:
+ ret = wcscmp(ws1, ws2);
+ goto end;
}
int
@@ -89,24 +221,3 @@ wcscoll(const wchar_t *ws1, const wchar_t *ws2)
{
return wcscoll_l(ws1, ws2, __get_locale());
}
-
-static char *
-__mbsdup(const wchar_t *ws)
-{
- static const mbstate_t initial;
- mbstate_t st;
- const wchar_t *wcp;
- size_t len;
- char *mbs;
-
- wcp = ws;
- st = initial;
- if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
- return (NULL);
- if ((mbs = malloc(len + 1)) == NULL)
- return (NULL);
- st = initial;
- wcsrtombs(mbs, &ws, len + 1, &st);
-
- return (mbs);
-}
diff --git a/lib/libc/string/wcsxfrm.c b/lib/libc/string/wcsxfrm.c
index cea667e..3d6c960 100644
--- a/lib/libc/string/wcsxfrm.c
+++ b/lib/libc/string/wcsxfrm.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -31,9 +32,6 @@
*/
#include <sys/cdefs.h>
-#if 0
-__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp ");
-#endif
__FBSDID("$FreeBSD$");
#include <stdlib.h>
@@ -41,18 +39,10 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include "collate.h"
-static char *__mbsdup(const wchar_t *);
-
-/*
- * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of
- * the logic used.
- */
size_t
wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, locale_t locale)
{
- int prim, sec, l;
size_t slen;
- char *mbsrc, *s, *ss;
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
@@ -63,67 +53,33 @@ wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len,
return (0);
}
- if (table->__collate_load_error || MB_CUR_MAX > 1) {
- slen = wcslen(src);
- if (len > 0) {
- if (slen < len)
- wcscpy(dest, src);
- else {
- wcsncpy(dest, src, len - 1);
- dest[len - 1] = L'\0';
- }
- }
- return (slen);
+ if ((table->__collate_load_error) ||
+ ((slen = _collate_wxfrm(table, src, dest, len)) == (size_t)-1)) {
+ goto error;
}
- mbsrc = __mbsdup(src);
- slen = 0;
- prim = sec = 0;
- ss = s = __collate_substitute(table, mbsrc);
- while (*s != '\0') {
- while (*s != '\0' && prim == 0) {
- __collate_lookup(table, s, &l, &prim, &sec);
- s += l;
- }
- if (prim != 0) {
- if (len > 1) {
- *dest++ = (wchar_t)prim;
- len--;
- }
- slen++;
- prim = 0;
- }
+ /* Add null termination at the correct location. */
+ if (len > slen) {
+ dest[slen] = 0;
+ } else if (len) {
+ dest[len-1] = 0;
}
- free(ss);
- free(mbsrc);
- if (len != 0)
- *dest = L'\0';
return (slen);
+
+error:
+ slen = wcslen(src);
+ if (slen < len)
+ (void) wcscpy(dest, src);
+ else {
+ (void) wcsncpy(dest, src, len - 1);
+ dest[len - 1] = L'\0';
+ }
+ return (slen);
}
+
size_t
wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len)
{
return wcsxfrm_l(dest, src, len, __get_locale());
}
-
-static char *
-__mbsdup(const wchar_t *ws)
-{
- static const mbstate_t initial;
- mbstate_t st;
- const wchar_t *wcp;
- size_t len;
- char *mbs;
-
- wcp = ws;
- st = initial;
- if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
- return (NULL);
- if ((mbs = malloc(len + 1)) == NULL)
- return (NULL);
- st = initial;
- wcsrtombs(mbs, &ws, len + 1, &st);
-
- return (mbs);
-}
OpenPOWER on IntegriCloud