summaryrefslogtreecommitdiffstats
path: root/lib/libc/string/strcoll.c
diff options
context:
space:
mode:
authorbapt <bapt@FreeBSD.org>2015-08-07 23:41:26 +0000
committerbapt <bapt@FreeBSD.org>2015-08-07 23:41:26 +0000
commit11a5726cda4d7b191129ed4220970bf8e00a8db6 (patch)
tree063f1101225b65ad20c1a8be3c96300b958d5a20 /lib/libc/string/strcoll.c
parentcbf6cdcbbb6c0212ee9911bbbe9e87053b54ab9f (diff)
downloadFreeBSD-src-11a5726cda4d7b191129ed4220970bf8e00a8db6.zip
FreeBSD-src-11a5726cda4d7b191129ed4220970bf8e00a8db6.tar.gz
The collate functions within libc have been using version 1 and 1.2 of the
packed LC_COLLATE binary formats. These were generated with the colldef tool, but the new LC_COLLATE files are going to be generated by the new localedef tool using CLDR POSIX files as input. The BSD-flavored version of localedef identifies the format as "BSD 1.0". Any LC_COLLATE file with a different version will simply not be loaded, and all LC* categories will get set to "C" (aka "POSIX") locale. This work is based off of Nexenta's contribution to Illumos. The integration with xlocale is John Marino's work for Dragonfly. The following commits will enable localedef tool, disable the colldef tool, add generated colldef directory, and finally remove colldef from base. The only difference with Dragonfly are: - a few fixes to build with clang - And identification of the flavor as "BSD 1.0" instead of "Dragonfly 4.4" Obtained from: Dragonfly
Diffstat (limited to 'lib/libc/string/strcoll.c')
-rw-r--r--lib/libc/string/strcoll.c112
1 files changed, 66 insertions, 46 deletions
diff --git a/lib/libc/string/strcoll.c b/lib/libc/string/strcoll.c
index a918fca..5bad40c 100644
--- a/lib/libc/string/strcoll.c
+++ b/lib/libc/string/strcoll.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -35,63 +36,82 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
+#include <wchar.h>
#include "collate.h"
-#include <stdio.h>
+/*
+ * In order to properly handle multibyte locales, its easiet to just
+ * convert to wide characters and then use wcscoll. However if an
+ * error occurs, we gracefully fall back to simple strcmp. Caller
+ * should check errno.
+ */
int
strcoll_l(const char *s, const char *s2, locale_t locale)
{
- int len, len2, prim, prim2, sec, sec2, ret, ret2;
- const char *t, *t2;
- char *tt, *tt2;
+ int ret;
+ wchar_t *t1 = NULL, *t2 = NULL;
+ wchar_t *w1 = NULL, *w2 = NULL;
+ const char *cs1, *cs2;
+ mbstate_t mbs1;
+ mbstate_t mbs2;
+ size_t sz1, sz2;
+
+ memset(&mbs1, 0, sizeof (mbstate_t));
+ memset(&mbs2, 0, sizeof (mbstate_t));
+
+ /*
+ * The mbsrtowcs_l function can set the src pointer to null upon
+ * failure, so it should act on a copy to avoid:
+ * - sending null pointer to strcmp
+ * - having strcoll/strcoll_l change *s or *s2 to null
+ */
+ cs1 = s;
+ cs2 = s2;
+
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
if (table->__collate_load_error)
- return strcmp(s, s2);
-
- len = len2 = 1;
- ret = ret2 = 0;
- if (table->__collate_substitute_nontrivial) {
- t = tt = __collate_substitute(table, s);
- t2 = tt2 = __collate_substitute(table, s2);
- } else {
- tt = tt2 = NULL;
- t = s;
- t2 = s2;
- }
- while(*t && *t2) {
- prim = prim2 = 0;
- while(*t && !prim) {
- __collate_lookup(table, t, &len, &prim, &sec);
- t += len;
- }
- while(*t2 && !prim2) {
- __collate_lookup(table, t2, &len2, &prim2, &sec2);
- t2 += len2;
- }
- if(!prim || !prim2)
- break;
- if(prim != prim2) {
- ret = prim - prim2;
- goto end;
- }
- if(!ret2)
- ret2 = sec - sec2;
- }
- if(!*t && *t2)
- ret = -(int)((u_char)*t2);
- else if(*t && !*t2)
- ret = (u_char)*t;
- else if(!*t && !*t2)
- ret = ret2;
- end:
- free(tt);
- free(tt2);
-
- return ret;
+ goto error;
+
+ sz1 = strlen(s) + 1;
+ sz2 = strlen(s2) + 1;
+
+ /*
+ * Simple assumption: conversion to wide format is strictly
+ * reducing, i.e. a single byte (or multibyte character)
+ * cannot result in multiple wide characters.
+ */
+ if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
+ goto error;
+ w1 = t1;
+ if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
+ goto error;
+ w2 = t2;
+
+ if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1)
+ goto error;
+
+ if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1)
+ goto error;
+
+ ret = wcscoll_l(w1, w2, locale);
+ if (t1)
+ free(t1);
+ if (t2)
+ free(t2);
+
+ return (ret);
+
+error:
+ if (t1)
+ free(t1);
+ if (t2)
+ free(t2);
+ return (strcmp(s, s2));
}
int
OpenPOWER on IntegriCloud