diff options
author | ngie <ngie@FreeBSD.org> | 2015-11-16 02:00:12 +0000 |
---|---|---|
committer | ngie <ngie@FreeBSD.org> | 2015-11-16 02:00:12 +0000 |
commit | fd9367ff35d3ef67f52f8612ba6df3c487b90f4d (patch) | |
tree | 68c676723f5f9cd1d66be903cfdce6b37dcc3719 /lib/libc/string | |
parent | 3d588fbc9bc229fdd93d6aaf6340583c2bcaca9f (diff) | |
parent | b708fe19431dfeacfe97dbc276190934bd53b483 (diff) | |
download | FreeBSD-src-fd9367ff35d3ef67f52f8612ba6df3c487b90f4d.zip FreeBSD-src-fd9367ff35d3ef67f52f8612ba6df3c487b90f4d.tar.gz |
MFhead @ r290899
Diffstat (limited to 'lib/libc/string')
-rw-r--r-- | lib/libc/string/strcoll.c | 108 | ||||
-rw-r--r-- | lib/libc/string/strxfrm.c | 64 | ||||
-rw-r--r-- | lib/libc/string/wcscoll.c | 213 | ||||
-rw-r--r-- | lib/libc/string/wcsxfrm.c | 84 |
4 files changed, 283 insertions, 186 deletions
diff --git a/lib/libc/string/strcoll.c b/lib/libc/string/strcoll.c index a918fca..7675e0a 100644 --- a/lib/libc/string/strcoll.c +++ b/lib/libc/string/strcoll.c @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. @@ -35,63 +36,78 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <string.h> +#include <errno.h> +#include <wchar.h> #include "collate.h" -#include <stdio.h> +/* + * In order to properly handle multibyte locales, its easiest to just + * convert to wide characters and then use wcscoll. However if an + * error occurs, we gracefully fall back to simple strcmp. Caller + * should check errno. + */ int strcoll_l(const char *s, const char *s2, locale_t locale) { - int len, len2, prim, prim2, sec, sec2, ret, ret2; - const char *t, *t2; - char *tt, *tt2; + int ret; + wchar_t *t1 = NULL, *t2 = NULL; + wchar_t *w1 = NULL, *w2 = NULL; + const char *cs1, *cs2; + mbstate_t mbs1; + mbstate_t mbs2; + size_t sz1, sz2; + + memset(&mbs1, 0, sizeof (mbstate_t)); + memset(&mbs2, 0, sizeof (mbstate_t)); + + /* + * The mbsrtowcs_l function can set the src pointer to null upon + * failure, so it should act on a copy to avoid: + * - sending null pointer to strcmp + * - having strcoll/strcoll_l change *s or *s2 to null + */ + cs1 = s; + cs2 = s2; + FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if (table->__collate_load_error) - return strcmp(s, s2); - - len = len2 = 1; - ret = ret2 = 0; - if (table->__collate_substitute_nontrivial) { - t = tt = __collate_substitute(table, s); - t2 = tt2 = __collate_substitute(table, s2); - } else { - tt = tt2 = NULL; - t = s; - t2 = s2; - } - while(*t && *t2) { - prim = prim2 = 0; - while(*t && !prim) { - __collate_lookup(table, t, &len, &prim, &sec); - t += len; - } - while(*t2 && !prim2) { - __collate_lookup(table, t2, &len2, &prim2, &sec2); - t2 += len2; - } - if(!prim || !prim2) - break; - if(prim != prim2) { - ret = prim - prim2; - goto end; - } - if(!ret2) - ret2 = sec - sec2; - } - if(!*t && *t2) - ret = -(int)((u_char)*t2); - else if(*t && !*t2) - ret = (u_char)*t; - else if(!*t && !*t2) - ret = ret2; - end: - free(tt); - free(tt2); - - return ret; + goto error; + + sz1 = strlen(s) + 1; + sz2 = strlen(s2) + 1; + + /* + * Simple assumption: conversion to wide format is strictly + * reducing, i.e. a single byte (or multibyte character) + * cannot result in multiple wide characters. + */ + if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL) + goto error; + w1 = t1; + if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL) + goto error; + w2 = t2; + + if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1) + goto error; + + if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1) + goto error; + + ret = wcscoll_l(w1, w2, locale); + free(t1); + free(t2); + + return (ret); + +error: + free(t1); + free(t2); + return (strcmp(s, s2)); } int diff --git a/lib/libc/string/strxfrm.c b/lib/libc/string/strxfrm.c index b758b0c..06ae639 100644 --- a/lib/libc/string/strxfrm.c +++ b/lib/libc/string/strxfrm.c @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. @@ -35,6 +36,8 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <string.h> +#include <errno.h> +#include <wchar.h> #include "collate.h" size_t @@ -48,9 +51,10 @@ strxfrm(char * __restrict dest, const char * __restrict src, size_t len) size_t strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale) { - int prim, sec, l; size_t slen; - char *s, *ss; + size_t xlen; + wchar_t *wcs = NULL; + FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; @@ -58,32 +62,42 @@ strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, local if (!*src) { if (len > 0) *dest = '\0'; - return 0; + return (0); } + /* + * The conversion from multibyte to wide character strings is + * strictly reducing (one byte of an mbs cannot expand to more + * than one wide character.) + */ + slen = strlen(src); + if (table->__collate_load_error) - return strlcpy(dest, src, len); - - slen = 0; - prim = sec = 0; - ss = s = __collate_substitute(table, src); - while (*s) { - while (*s && !prim) { - __collate_lookup(table, s, &l, &prim, &sec); - s += l; - } - if (prim) { - if (len > 1) { - *dest++ = (char)prim; - len--; - } - slen++; - prim = 0; - } + goto error; + + if ((wcs = malloc((slen + 1) * sizeof (wchar_t))) == NULL) + goto error; + + if (mbstowcs_l(wcs, src, slen + 1, locale) == (size_t)-1) + goto error; + + if ((xlen = _collate_sxfrm(table, wcs, dest, len)) == (size_t)-1) + goto error; + + free(wcs); + + if (len > xlen) { + dest[xlen] = 0; + } else if (len) { + dest[len-1] = 0; } - free(ss); - if (len > 0) - *dest = '\0'; - return slen; + return (xlen); + +error: + /* errno should be set to ENOMEM if malloc failed */ + free(wcs); + strlcpy(dest, src, len); + + return (slen); } diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c index 3c51015..ee0e72a 100644 --- a/lib/libc/string/wcscoll.c +++ b/lib/libc/string/wcscoll.c @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2002 Tim J. Robbins * All rights reserved. * @@ -38,50 +39,181 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "collate.h" -static char *__mbsdup(const wchar_t *); - -/* - * Placeholder implementation of wcscoll(). Attempts to use the single-byte - * collation ordering where possible, and falls back on wcscmp() in locales - * with extended character sets. - */ int wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale) { - char *mbs1, *mbs2; - int diff, sverrno; + int len1, len2, pri1, pri2, ret; + wchar_t *tr1 = NULL, *tr2 = NULL; + int direc, pass; + FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; - if (table->__collate_load_error || MB_CUR_MAX > 1) + if (table->__collate_load_error) /* - * Locale has no special collating order, could not be - * loaded, or has an extended character set; do a fast binary - * comparison. + * Locale has no special collating order or could not be + * loaded, do a fast binary comparison. */ return (wcscmp(ws1, ws2)); - if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) { - /* - * Out of memory or illegal wide chars; fall back to wcscmp() - * but leave errno indicating the error. Callers that don't - * check for error will get a reasonable but often slightly - * incorrect result. - */ - sverrno = errno; - free(mbs1); - errno = sverrno; - return (wcscmp(ws1, ws2)); + ret = 0; + + /* + * Once upon a time we had code to try to optimize this, but + * it turns out that you really can't make many assumptions + * safely. You absolutely have to run this pass by pass, + * because some passes will be ignored for a given character, + * while others will not. Simpler locales will benefit from + * having fewer passes, and most comparisions should resolve + * during the primary pass anyway. + * + * Note that we do one final extra pass at the end to pick + * up UNDEFINED elements. There is special handling for them. + */ + for (pass = 0; pass <= table->info->directive_count; pass++) { + + const int32_t *st1 = NULL; + const int32_t *st2 = NULL; + const wchar_t *w1 = ws1; + const wchar_t *w2 = ws2; + int check1, check2; + + /* special pass for UNDEFINED */ + if (pass == table->info->directive_count) { + direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED; + } else { + direc = table->info->directive[pass]; + } + + if (direc & DIRECTIVE_BACKWARD) { + wchar_t *bp, *fp, c; + if ((tr1 = wcsdup(w1)) == NULL) + goto fail; + bp = tr1; + fp = tr1 + wcslen(tr1) - 1; + while (bp < fp) { + c = *bp; + *bp++ = *fp; + *fp-- = c; + } + if ((tr2 = wcsdup(w2)) == NULL) + goto fail; + bp = tr2; + fp = tr2 + wcslen(tr2) - 1; + while (bp < fp) { + c = *bp; + *bp++ = *fp; + *fp-- = c; + } + w1 = tr1; + w2 = tr2; + } + + if (direc & DIRECTIVE_POSITION) { + while (*w1 && *w2) { + pri1 = pri2 = 0; + check1 = check2 = 1; + while ((pri1 == pri2) && (check1 || check2)) { + if (check1) { + _collate_lookup(table, w1, &len1, + &pri1, pass, &st1); + if (pri1 < 0) { + errno = EINVAL; + goto fail; + } + if (!pri1) { + pri1 = COLLATE_MAX_PRIORITY; + st1 = NULL; + } + check1 = (st1 != NULL); + } + if (check2) { + _collate_lookup(table, w2, &len2, + &pri2, pass, &st2); + if (pri2 < 0) { + errno = EINVAL; + goto fail; + } + if (!pri2) { + pri2 = COLLATE_MAX_PRIORITY; + st2 = NULL; + } + check2 = (st2 != NULL); + } + } + if (pri1 != pri2) { + ret = pri1 - pri2; + goto end; + } + w1 += len1; + w2 += len2; + } + } else { + while (*w1 && *w2) { + pri1 = pri2 = 0; + check1 = check2 = 1; + while ((pri1 == pri2) && (check1 || check2)) { + while (check1 && *w1) { + _collate_lookup(table, w1, + &len1, &pri1, pass, &st1); + if (pri1 > 0) + break; + if (pri1 < 0) { + errno = EINVAL; + goto fail; + } + st1 = NULL; + w1 += 1; + } + check1 = (st1 != NULL); + while (check2 && *w2) { + _collate_lookup(table, w2, + &len2, &pri2, pass, &st2); + if (pri2 > 0) + break; + if (pri2 < 0) { + errno = EINVAL; + goto fail; + } + st2 = NULL; + w2 += 1; + } + check2 = (st2 != NULL); + if (!pri1 || !pri2) + break; + } + if (!pri1 || !pri2) + break; + if (pri1 != pri2) { + ret = pri1 - pri2; + goto end; + } + w1 += len1; + w2 += len2; + } + } + if (!*w1) { + if (*w2) { + ret = -(int)*w2; + goto end; + } + } else { + ret = *w1; + goto end; + } } + ret = 0; - diff = strcoll_l(mbs1, mbs2, locale); - sverrno = errno; - free(mbs1); - free(mbs2); - errno = sverrno; +end: + free(tr1); + free(tr2); - return (diff); + return (ret); + +fail: + ret = wcscmp(ws1, ws2); + goto end; } int @@ -89,24 +221,3 @@ wcscoll(const wchar_t *ws1, const wchar_t *ws2) { return wcscoll_l(ws1, ws2, __get_locale()); } - -static char * -__mbsdup(const wchar_t *ws) -{ - static const mbstate_t initial; - mbstate_t st; - const wchar_t *wcp; - size_t len; - char *mbs; - - wcp = ws; - st = initial; - if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) - return (NULL); - if ((mbs = malloc(len + 1)) == NULL) - return (NULL); - st = initial; - wcsrtombs(mbs, &ws, len + 1, &st); - - return (mbs); -} diff --git a/lib/libc/string/wcsxfrm.c b/lib/libc/string/wcsxfrm.c index cea667e..3d6c960 100644 --- a/lib/libc/string/wcsxfrm.c +++ b/lib/libc/string/wcsxfrm.c @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. @@ -31,9 +32,6 @@ */ #include <sys/cdefs.h> -#if 0 -__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp "); -#endif __FBSDID("$FreeBSD$"); #include <stdlib.h> @@ -41,18 +39,10 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "collate.h" -static char *__mbsdup(const wchar_t *); - -/* - * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of - * the logic used. - */ size_t wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, locale_t locale) { - int prim, sec, l; size_t slen; - char *mbsrc, *s, *ss; FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; @@ -63,67 +53,33 @@ wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, return (0); } - if (table->__collate_load_error || MB_CUR_MAX > 1) { - slen = wcslen(src); - if (len > 0) { - if (slen < len) - wcscpy(dest, src); - else { - wcsncpy(dest, src, len - 1); - dest[len - 1] = L'\0'; - } - } - return (slen); + if ((table->__collate_load_error) || + ((slen = _collate_wxfrm(table, src, dest, len)) == (size_t)-1)) { + goto error; } - mbsrc = __mbsdup(src); - slen = 0; - prim = sec = 0; - ss = s = __collate_substitute(table, mbsrc); - while (*s != '\0') { - while (*s != '\0' && prim == 0) { - __collate_lookup(table, s, &l, &prim, &sec); - s += l; - } - if (prim != 0) { - if (len > 1) { - *dest++ = (wchar_t)prim; - len--; - } - slen++; - prim = 0; - } + /* Add null termination at the correct location. */ + if (len > slen) { + dest[slen] = 0; + } else if (len) { + dest[len-1] = 0; } - free(ss); - free(mbsrc); - if (len != 0) - *dest = L'\0'; return (slen); + +error: + slen = wcslen(src); + if (slen < len) + (void) wcscpy(dest, src); + else { + (void) wcsncpy(dest, src, len - 1); + dest[len - 1] = L'\0'; + } + return (slen); } + size_t wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len) { return wcsxfrm_l(dest, src, len, __get_locale()); } - -static char * -__mbsdup(const wchar_t *ws) -{ - static const mbstate_t initial; - mbstate_t st; - const wchar_t *wcp; - size_t len; - char *mbs; - - wcp = ws; - st = initial; - if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) - return (NULL); - if ((mbs = malloc(len + 1)) == NULL) - return (NULL); - st = initial; - wcsrtombs(mbs, &ws, len + 1, &st); - - return (mbs); -} |