summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorbapt <bapt@FreeBSD.org>2017-05-01 12:42:06 +0000
committerbapt <bapt@FreeBSD.org>2017-05-01 12:42:06 +0000
commit9b70ea0f74fd0ede657b303ffafba3b48ae4ca20 (patch)
tree6f5cca6df00d34b18e5a4ee8539ffc92cbd5cf6b /lib
parentbb6c5f5ffdce55e694bc675e29fb9f83d6f5078c (diff)
downloadFreeBSD-src-9b70ea0f74fd0ede657b303ffafba3b48ae4ca20.zip
FreeBSD-src-9b70ea0f74fd0ede657b303ffafba3b48ae4ca20.tar.gz
MFC r317034:
Fix strcoll_l disagreeing with strxfrm by reworking the forward order case in wcscoll_l(). Illumos fixed this while grabbing back our patches: https://www.illumos.org/rb/r/402/ This does not 100% fix what postgresql folks reported as there is still a remaining issue: https://www.illumos.org/issues/7962, it improves the situation The initial issue was reported in postgresql mailing lists: https://www.postgresql.org/message-id/flat/111D0E27-A8F3-4A84-A4E0-B0FB703863DF@s24.com#111D0E27-A8F3-4A84-A4E0-B0FB703863DF@s24.com Submitted by: Yuri Pankov <yuri.pankov@nexenta.com> Obtained from: Illumos
Diffstat (limited to 'lib')
-rw-r--r--lib/libc/string/wcscoll.c120
-rw-r--r--lib/libc/tests/string/wcscoll_test.c93
2 files changed, 155 insertions, 58 deletions
diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c
index a3e959d..dac3dfd 100644
--- a/lib/libc/string/wcscoll.c
+++ b/lib/libc/string/wcscoll.c
@@ -1,5 +1,5 @@
/*-
- * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
* Copyright (c) 2002 Tim J. Robbins
* All rights reserved.
*
@@ -42,22 +42,22 @@ __FBSDID("$FreeBSD$");
int
wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
{
- int len1, len2, pri1, pri2, ret;
+ int len1, len2, pri1, pri2;
wchar_t *tr1 = NULL, *tr2 = NULL;
int direc, pass;
+ int ret = wcscmp(ws1, ws2);
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
- if (table->__collate_load_error)
- /*
- * Locale has no special collating order or could not be
- * loaded, do a fast binary comparison.
- */
- return (wcscmp(ws1, ws2));
+ if (table->__collate_load_error || ret == 0)
+ return (ret);
- ret = 0;
+ if (*ws1 == 0 && *ws2 != 0)
+ return (-1);
+ if (*ws1 != 0 && *ws2 == 0)
+ return (1);
/*
* Once upon a time we had code to try to optimize this, but
@@ -77,19 +77,19 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
const int32_t *st2 = NULL;
const wchar_t *w1 = ws1;
const wchar_t *w2 = ws2;
- int check1, check2;
/* special pass for UNDEFINED */
if (pass == table->info->directive_count) {
- direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+ direc = DIRECTIVE_FORWARD;
} else {
direc = table->info->directive[pass];
}
if (direc & DIRECTIVE_BACKWARD) {
wchar_t *bp, *fp, c;
+ free(tr1);
if ((tr1 = wcsdup(w1)) == NULL)
- goto fail;
+ goto end;
bp = tr1;
fp = tr1 + wcslen(tr1) - 1;
while (bp < fp) {
@@ -97,8 +97,9 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
*bp++ = *fp;
*fp-- = c;
}
+ free(tr2);
if ((tr2 = wcsdup(w2)) == NULL)
- goto fail;
+ goto end;
bp = tr2;
fp = tr2 + wcslen(tr2) - 1;
while (bp < fp) {
@@ -111,6 +112,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
}
if (direc & DIRECTIVE_POSITION) {
+ int check1, check2;
while (*w1 && *w2) {
pri1 = pri2 = 0;
check1 = check2 = 1;
@@ -120,7 +122,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
&pri1, pass, &st1);
if (pri1 < 0) {
errno = EINVAL;
- goto fail;
+ goto end;
}
if (!pri1) {
pri1 = COLLATE_MAX_PRIORITY;
@@ -133,7 +135,7 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
&pri2, pass, &st2);
if (pri2 < 0) {
errno = EINVAL;
- goto fail;
+ goto end;
}
if (!pri2) {
pri2 = COLLATE_MAX_PRIORITY;
@@ -149,58 +151,64 @@ wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
w1 += len1;
w2 += len2;
}
+ if (!*w1) {
+ if (*w2) {
+ ret = -(int)*w2;
+ goto end;
+ }
+ } else {
+ ret = *w1;
+ goto end;
+ }
} else {
- while (*w1 && *w2) {
- pri1 = pri2 = 0;
- check1 = check2 = 1;
- while ((pri1 == pri2) && (check1 || check2)) {
- while (check1 && *w1) {
- _collate_lookup(table, w1,
- &len1, &pri1, pass, &st1);
- if (pri1 > 0)
- break;
- if (pri1 < 0) {
- errno = EINVAL;
- goto fail;
- }
- st1 = NULL;
- w1 += 1;
+ int vpri1 = 0, vpri2 = 0;
+ while (*w1 || *w2 || st1 || st2) {
+ pri1 = 1;
+ while (*w1 || st1) {
+ _collate_lookup(table, w1, &len1, &pri1,
+ pass, &st1);
+ w1 += len1;
+ if (pri1 > 0) {
+ vpri1++;
+ break;
}
- check1 = (st1 != NULL);
- while (check2 && *w2) {
- _collate_lookup(table, w2,
- &len2, &pri2, pass, &st2);
- if (pri2 > 0)
- break;
- if (pri2 < 0) {
- errno = EINVAL;
- goto fail;
- }
- st2 = NULL;
- w2 += 1;
+
+ if (pri1 < 0) {
+ errno = EINVAL;
+ goto end;
}
- check2 = (st2 != NULL);
- if (!pri1 || !pri2)
+ st1 = NULL;
+ }
+ pri2 = 1;
+ while (*w2 || st2) {
+ _collate_lookup(table, w2, &len2, &pri2,
+ pass, &st2);
+ w2 += len2;
+ if (pri2 > 0) {
+ vpri2++;
break;
+ }
+ if (pri2 < 0) {
+ errno = EINVAL;
+ goto end;
+ }
+ st2 = NULL;
}
- if (!pri1 || !pri2)
+ if ((!pri1 || !pri2) && (vpri1 == vpri2))
break;
if (pri1 != pri2) {
ret = pri1 - pri2;
goto end;
}
- w1 += len1;
- w2 += len2;
}
- }
- if (!*w1) {
- if (*w2) {
- ret = -(int)*w2;
+ if (vpri1 && !vpri2) {
+ ret = 1;
+ goto end;
+ }
+ if (!vpri1 && vpri2) {
+ ret = -1;
goto end;
}
- } else {
- ret = *w1;
- goto end;
}
}
ret = 0;
@@ -210,10 +218,6 @@ end:
free(tr2);
return (ret);
-
-fail:
- ret = wcscmp(ws1, ws2);
- goto end;
}
int
diff --git a/lib/libc/tests/string/wcscoll_test.c b/lib/libc/tests/string/wcscoll_test.c
index 2f2472b..022ad8e 100644
--- a/lib/libc/tests/string/wcscoll_test.c
+++ b/lib/libc/tests/string/wcscoll_test.c
@@ -1,5 +1,7 @@
/*-
* Copyright (c) 2016 Baptiste Daroussin <bapt@FreeBSD.org>
+ * Copyright 2016 Tom Lane <tgl@sss.pgh.pa.us>
+ * Copyright 2017 Nexenta Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,6 +32,8 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include <locale.h>
#include <stdlib.h>
+#include <time.h>
+#include <errno.h>
#include <atf-c.h>
@@ -55,9 +59,98 @@ ATF_TC_BODY(russian_collation, tc)
"Bad collation, expected: '%ls' got '%ls'", res, c);
}
+#define NSTRINGS 2000
+#define MAXSTRLEN 20
+#define MAXXFRMLEN (MAXSTRLEN * 20)
+
+typedef struct {
+ char sval[MAXSTRLEN];
+ char xval[MAXXFRMLEN];
+} cstr;
+
+ATF_TC_WITHOUT_HEAD(strcoll_vs_strxfrm);
+ATF_TC_BODY(strcoll_vs_strxfrm, tc)
+{
+ cstr data[NSTRINGS];
+ char *curloc;
+ int i, j;
+
+ curloc = setlocale(LC_ALL, "en_US.UTF-8");
+ ATF_CHECK_MSG(curloc != NULL, "Fail to set locale");
+
+ /* Ensure new random() values on every run */
+ srandom((unsigned int) time(NULL));
+
+ /* Generate random UTF8 strings of length less than MAXSTRLEN bytes */
+ for (i = 0; i < NSTRINGS; i++) {
+ char *p;
+ int len;
+
+again:
+ p = data[i].sval;
+ len = 1 + (random() % (MAXSTRLEN - 1));
+ while (len > 0) {
+ int c;
+ /*
+ * Generate random printable char in ISO8859-1 range.
+ * Bias towards producing a lot of spaces.
+ */
+
+ if ((random() % 16) < 3) {
+ c = ' ';
+ } else {
+ do {
+ c = random() & 0xFF;
+ } while (!((c >= ' ' && c <= 127) ||
+ (c >= 0xA0 && c <= 0xFF)));
+ }
+
+ if (c <= 127) {
+ *p++ = c;
+ len--;
+ } else {
+ if (len < 2)
+ break;
+ /* Poor man's utf8-ification */
+ *p++ = 0xC0 + (c >> 6);
+ len--;
+ *p++ = 0x80 + (c & 0x3F);
+ len--;
+ }
+ }
+ *p = '\0';
+ /* strxfrm() each string as we produce it */
+ errno = 0;
+ ATF_CHECK_MSG(strxfrm(data[i].xval, data[i].sval,
+ MAXXFRMLEN) < MAXXFRMLEN, "strxfrm() result for %d-length "
+ " string exceeded %d bytes", (int)strlen(data[i].sval),
+ MAXXFRMLEN);
+
+ /*
+ * Amend strxfrm() failing on certain characters to be fixed and
+ * test later
+ */
+ if (errno != 0)
+ goto again;
+ }
+
+ for (i = 0; i < NSTRINGS; i++) {
+ for (j = 0; j < NSTRINGS; j++) {
+ int sr = strcoll(data[i].sval, data[j].sval);
+ int sx = strcmp(data[i].xval, data[j].xval);
+
+ ATF_CHECK_MSG(!((sr * sx < 0) ||
+ (sr * sx == 0 && sr + sx != 0)),
+ "%s: diff for \"%s\" and \"%s\"",
+ curloc, data[i].sval, data[j].sval);
+ }
+ }
+}
+
ATF_TP_ADD_TCS(tp)
{
ATF_TP_ADD_TC(tp, russian_collation);
+ ATF_TP_ADD_TC(tp, strcoll_vs_strxfrm);
return (atf_no_error());
}
OpenPOWER on IntegriCloud