diff options
author | bapt <bapt@FreeBSD.org> | 2015-08-07 23:41:26 +0000 |
---|---|---|
committer | bapt <bapt@FreeBSD.org> | 2015-08-07 23:41:26 +0000 |
commit | 11a5726cda4d7b191129ed4220970bf8e00a8db6 (patch) | |
tree | 063f1101225b65ad20c1a8be3c96300b958d5a20 /lib/libc/string | |
parent | cbf6cdcbbb6c0212ee9911bbbe9e87053b54ab9f (diff) | |
download | FreeBSD-src-11a5726cda4d7b191129ed4220970bf8e00a8db6.zip FreeBSD-src-11a5726cda4d7b191129ed4220970bf8e00a8db6.tar.gz |
The collate functions within libc have been using version 1 and 1.2 of the
packed LC_COLLATE binary formats. These were generated with the colldef
tool, but the new LC_COLLATE files are going to be generated by the new
localedef tool using CLDR POSIX files as input. The BSD-flavored
version of localedef identifies the format as "BSD 1.0". Any
LC_COLLATE file with a different version will simply not be loaded, and
all LC* categories will get set to "C" (aka "POSIX") locale.
This work is based off of Nexenta's contribution to Illumos.
The integration with xlocale is John Marino's work for Dragonfly.
The following commits will enable localedef tool, disable the colldef
tool, add generated colldef directory, and finally remove colldef from
base.
The only difference with Dragonfly are:
- a few fixes to build with clang
- And identification of the flavor as "BSD 1.0" instead of "Dragonfly 4.4"
Obtained from: Dragonfly
Diffstat (limited to 'lib/libc/string')
-rw-r--r-- | lib/libc/string/strcoll.c | 112 | ||||
-rw-r--r-- | lib/libc/string/strxfrm.c | 66 | ||||
-rw-r--r-- | lib/libc/string/wcsxfrm.c | 84 |
3 files changed, 127 insertions, 135 deletions
diff --git a/lib/libc/string/strcoll.c b/lib/libc/string/strcoll.c index a918fca..5bad40c 100644 --- a/lib/libc/string/strcoll.c +++ b/lib/libc/string/strcoll.c @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. @@ -35,63 +36,82 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <string.h> +#include <errno.h> +#include <wchar.h> #include "collate.h" -#include <stdio.h> +/* + * In order to properly handle multibyte locales, its easiet to just + * convert to wide characters and then use wcscoll. However if an + * error occurs, we gracefully fall back to simple strcmp. Caller + * should check errno. + */ int strcoll_l(const char *s, const char *s2, locale_t locale) { - int len, len2, prim, prim2, sec, sec2, ret, ret2; - const char *t, *t2; - char *tt, *tt2; + int ret; + wchar_t *t1 = NULL, *t2 = NULL; + wchar_t *w1 = NULL, *w2 = NULL; + const char *cs1, *cs2; + mbstate_t mbs1; + mbstate_t mbs2; + size_t sz1, sz2; + + memset(&mbs1, 0, sizeof (mbstate_t)); + memset(&mbs2, 0, sizeof (mbstate_t)); + + /* + * The mbsrtowcs_l function can set the src pointer to null upon + * failure, so it should act on a copy to avoid: + * - sending null pointer to strcmp + * - having strcoll/strcoll_l change *s or *s2 to null + */ + cs1 = s; + cs2 = s2; + FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; if (table->__collate_load_error) - return strcmp(s, s2); - - len = len2 = 1; - ret = ret2 = 0; - if (table->__collate_substitute_nontrivial) { - t = tt = __collate_substitute(table, s); - t2 = tt2 = __collate_substitute(table, s2); - } else { - tt = tt2 = NULL; - t = s; - t2 = s2; - } - while(*t && *t2) { - prim = prim2 = 0; - while(*t && !prim) { - __collate_lookup(table, t, &len, &prim, &sec); - t += len; - } - while(*t2 && !prim2) { - __collate_lookup(table, t2, &len2, &prim2, &sec2); - t2 += len2; - } - if(!prim || !prim2) - break; - if(prim != prim2) { - ret = prim - prim2; - goto end; - } - if(!ret2) - ret2 = sec - sec2; - } - if(!*t && *t2) - ret = -(int)((u_char)*t2); - else if(*t && !*t2) - ret = (u_char)*t; - else if(!*t && !*t2) - ret = ret2; - end: - free(tt); - free(tt2); - - return ret; + goto error; + + sz1 = strlen(s) + 1; + sz2 = strlen(s2) + 1; + + /* + * Simple assumption: conversion to wide format is strictly + * reducing, i.e. a single byte (or multibyte character) + * cannot result in multiple wide characters. + */ + if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL) + goto error; + w1 = t1; + if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL) + goto error; + w2 = t2; + + if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1) + goto error; + + if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1) + goto error; + + ret = wcscoll_l(w1, w2, locale); + if (t1) + free(t1); + if (t2) + free(t2); + + return (ret); + +error: + if (t1) + free(t1); + if (t2) + free(t2); + return (strcmp(s, s2)); } int diff --git a/lib/libc/string/strxfrm.c b/lib/libc/string/strxfrm.c index b758b0c..8b25b0e 100644 --- a/lib/libc/string/strxfrm.c +++ b/lib/libc/string/strxfrm.c @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. @@ -35,6 +36,8 @@ __FBSDID("$FreeBSD$"); #include <stdlib.h> #include <string.h> +#include <errno.h> +#include <wchar.h> #include "collate.h" size_t @@ -48,9 +51,10 @@ strxfrm(char * __restrict dest, const char * __restrict src, size_t len) size_t strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale) { - int prim, sec, l; size_t slen; - char *s, *ss; + size_t xlen; + wchar_t *wcs = NULL; + FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; @@ -58,32 +62,44 @@ strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, local if (!*src) { if (len > 0) *dest = '\0'; - return 0; + return (0); } + /* + * The conversion from multibyte to wide character strings is + * strictly reducing (one byte of an mbs cannot expand to more + * than one wide character.) + */ + slen = strlen(src); + if (table->__collate_load_error) - return strlcpy(dest, src, len); - - slen = 0; - prim = sec = 0; - ss = s = __collate_substitute(table, src); - while (*s) { - while (*s && !prim) { - __collate_lookup(table, s, &l, &prim, &sec); - s += l; - } - if (prim) { - if (len > 1) { - *dest++ = (char)prim; - len--; - } - slen++; - prim = 0; - } + goto error; + + if ((wcs = malloc((slen + 1) * sizeof (wchar_t))) == NULL) + goto error; + + if (mbstowcs_l(wcs, src, slen + 1, locale) == (size_t)-1) + goto error; + + if ((xlen = _collate_sxfrm(table, wcs, dest, len)) == (size_t)-1) + goto error; + + if (wcs) + free(wcs); + + if (len > xlen) { + dest[xlen] = 0; + } else if (len) { + dest[len-1] = 0; } - free(ss); - if (len > 0) - *dest = '\0'; - return slen; + return (xlen); + +error: + /* errno should be set to ENOMEM if malloc failed */ + if (wcs) + free(wcs); + (void) strlcpy(dest, src, len); + + return (slen); } diff --git a/lib/libc/string/wcsxfrm.c b/lib/libc/string/wcsxfrm.c index cea667e..3d6c960 100644 --- a/lib/libc/string/wcsxfrm.c +++ b/lib/libc/string/wcsxfrm.c @@ -1,4 +1,5 @@ /*- + * Copyright 2010 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua> * at Electronni Visti IA, Kiev, Ukraine. * All rights reserved. @@ -31,9 +32,6 @@ */ #include <sys/cdefs.h> -#if 0 -__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp "); -#endif __FBSDID("$FreeBSD$"); #include <stdlib.h> @@ -41,18 +39,10 @@ __FBSDID("$FreeBSD$"); #include <wchar.h> #include "collate.h" -static char *__mbsdup(const wchar_t *); - -/* - * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of - * the logic used. - */ size_t wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, locale_t locale) { - int prim, sec, l; size_t slen; - char *mbsrc, *s, *ss; FIX_LOCALE(locale); struct xlocale_collate *table = (struct xlocale_collate*)locale->components[XLC_COLLATE]; @@ -63,67 +53,33 @@ wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, return (0); } - if (table->__collate_load_error || MB_CUR_MAX > 1) { - slen = wcslen(src); - if (len > 0) { - if (slen < len) - wcscpy(dest, src); - else { - wcsncpy(dest, src, len - 1); - dest[len - 1] = L'\0'; - } - } - return (slen); + if ((table->__collate_load_error) || + ((slen = _collate_wxfrm(table, src, dest, len)) == (size_t)-1)) { + goto error; } - mbsrc = __mbsdup(src); - slen = 0; - prim = sec = 0; - ss = s = __collate_substitute(table, mbsrc); - while (*s != '\0') { - while (*s != '\0' && prim == 0) { - __collate_lookup(table, s, &l, &prim, &sec); - s += l; - } - if (prim != 0) { - if (len > 1) { - *dest++ = (wchar_t)prim; - len--; - } - slen++; - prim = 0; - } + /* Add null termination at the correct location. */ + if (len > slen) { + dest[slen] = 0; + } else if (len) { + dest[len-1] = 0; } - free(ss); - free(mbsrc); - if (len != 0) - *dest = L'\0'; return (slen); + +error: + slen = wcslen(src); + if (slen < len) + (void) wcscpy(dest, src); + else { + (void) wcsncpy(dest, src, len - 1); + dest[len - 1] = L'\0'; + } + return (slen); } + size_t wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len) { return wcsxfrm_l(dest, src, len, __get_locale()); } - -static char * -__mbsdup(const wchar_t *ws) -{ - static const mbstate_t initial; - mbstate_t st; - const wchar_t *wcp; - size_t len; - char *mbs; - - wcp = ws; - st = initial; - if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1) - return (NULL); - if ((mbs = malloc(len + 1)) == NULL) - return (NULL); - st = initial; - wcsrtombs(mbs, &ws, len + 1, &st); - - return (mbs); -} |