summaryrefslogtreecommitdiffstats
path: root/lib/libc
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc')
-rw-r--r--lib/libc/locale/Makefile.inc2
-rw-r--r--lib/libc/locale/ascii.c192
-rw-r--r--lib/libc/locale/big5.c32
-rw-r--r--lib/libc/locale/collate.c766
-rw-r--r--lib/libc/locale/collate.h107
-rw-r--r--lib/libc/locale/collcmp.c9
-rw-r--r--lib/libc/locale/euc.c434
-rw-r--r--lib/libc/locale/gb18030.c28
-rw-r--r--lib/libc/locale/gb2312.c31
-rw-r--r--lib/libc/locale/gbk.c28
-rw-r--r--lib/libc/locale/mblocal.h22
-rw-r--r--lib/libc/locale/mbsnrtowcs.c10
-rw-r--r--lib/libc/locale/mskanji.c42
-rw-r--r--lib/libc/locale/none.c12
-rw-r--r--lib/libc/locale/rune.c107
-rw-r--r--lib/libc/locale/setrunelocale.c60
-rw-r--r--lib/libc/locale/utf8.c4
-rw-r--r--lib/libc/locale/wcsnrtombs.c12
-rw-r--r--lib/libc/string/strcoll.c108
-rw-r--r--lib/libc/string/strxfrm.c64
-rw-r--r--lib/libc/string/wcscoll.c194
-rw-r--r--lib/libc/string/wcsxfrm.c84
22 files changed, 1459 insertions, 889 deletions
diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc
index d28355b..08bc6e4 100644
--- a/lib/libc/locale/Makefile.inc
+++ b/lib/libc/locale/Makefile.inc
@@ -4,7 +4,7 @@
# locale sources
.PATH: ${LIBC_SRCTOP}/${LIBC_ARCH}/locale ${LIBC_SRCTOP}/locale
-SRCS+= ascii.c big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \
+SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c \
gb18030.c gb2312.c gbk.c ctype.c isctype.c iswctype.c \
ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \
mbrlen.c \
diff --git a/lib/libc/locale/ascii.c b/lib/libc/locale/ascii.c
deleted file mode 100644
index 784814d..0000000
--- a/lib/libc/locale/ascii.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/*-
- * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
- * Copyright (c) 1993
- * The Regents of the University of California. All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * Paul Borman at Krystal Technologies.
- *
- * Copyright (c) 2011 The FreeBSD Foundation
- * All rights reserved.
- * Portions of this software were developed by David Chisnall
- * under sponsorship from the FreeBSD Foundation.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <errno.h>
-#include <limits.h>
-#include <runetype.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <wchar.h>
-#include "mblocal.h"
-
-static size_t _ascii_mbrtowc(wchar_t * __restrict, const char * __restrict,
- size_t, mbstate_t * __restrict);
-static int _ascii_mbsinit(const mbstate_t *);
-static size_t _ascii_mbsnrtowcs(wchar_t * __restrict dst,
- const char ** __restrict src, size_t nms, size_t len,
- mbstate_t * __restrict ps __unused);
-static size_t _ascii_wcrtomb(char * __restrict, wchar_t,
- mbstate_t * __restrict);
-static size_t _ascii_wcsnrtombs(char * __restrict, const wchar_t ** __restrict,
- size_t, size_t, mbstate_t * __restrict);
-
-int
-_ascii_init(struct xlocale_ctype *l,_RuneLocale *rl)
-{
-
- l->__mbrtowc = _ascii_mbrtowc;
- l->__mbsinit = _ascii_mbsinit;
- l->__mbsnrtowcs = _ascii_mbsnrtowcs;
- l->__wcrtomb = _ascii_wcrtomb;
- l->__wcsnrtombs = _ascii_wcsnrtombs;
- l->runes = rl;
- l->__mb_cur_max = 1;
- l->__mb_sb_limit = 128;
- return(0);
-}
-
-static int
-_ascii_mbsinit(const mbstate_t *ps __unused)
-{
-
- /*
- * Encoding is not state dependent - we are always in the
- * initial state.
- */
- return (1);
-}
-
-static size_t
-_ascii_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
- mbstate_t * __restrict ps __unused)
-{
-
- if (s == NULL)
- /* Reset to initial shift state (no-op) */
- return (0);
- if (n == 0)
- /* Incomplete multibyte sequence */
- return ((size_t)-2);
- if (*s & 0x80) {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- if (pwc != NULL)
- *pwc = (unsigned char)*s;
- return (*s == '\0' ? 0 : 1);
-}
-
-static size_t
-_ascii_wcrtomb(char * __restrict s, wchar_t wc,
- mbstate_t * __restrict ps __unused)
-{
-
- if (s == NULL)
- /* Reset to initial shift state (no-op) */
- return (1);
- if (wc < 0 || wc > 127) {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- *s = (unsigned char)wc;
- return (1);
-}
-
-static size_t
-_ascii_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
- size_t nms, size_t len, mbstate_t * __restrict ps __unused)
-{
- const char *s;
- size_t nchr;
-
- if (dst == NULL) {
- for (s = *src; nms > 0 && *s != '\0'; s++, nms--) {
- if (*s & 0x80) {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- }
- return (s - *src);
- }
-
- s = *src;
- nchr = 0;
- while (len-- > 0 && nms-- > 0) {
- if (*s & 0x80) {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- if ((*dst++ = (unsigned char)*s++) == L'\0') {
- *src = NULL;
- return (nchr);
- }
- nchr++;
- }
- *src = s;
- return (nchr);
-}
-
-static size_t
-_ascii_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
- size_t nwc, size_t len, mbstate_t * __restrict ps __unused)
-{
- const wchar_t *s;
- size_t nchr;
-
- if (dst == NULL) {
- for (s = *src; nwc > 0 && *s != L'\0'; s++, nwc--) {
- if (*s < 0 || *s > 127) {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- }
- return (s - *src);
- }
-
- s = *src;
- nchr = 0;
- while (len-- > 0 && nwc-- > 0) {
- if (*s < 0 || *s > 127) {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- if ((*dst++ = *s++) == '\0') {
- *src = NULL;
- return (nchr);
- }
- nchr++;
- }
- *src = s;
- return (nchr);
-}
-
diff --git a/lib/libc/locale/big5.c b/lib/libc/locale/big5.c
index 4b37265..c1f94d3 100644
--- a/lib/libc/locale/big5.c
+++ b/lib/libc/locale/big5.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
@@ -19,11 +21,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -61,6 +59,12 @@ static size_t _BIG5_mbrtowc(wchar_t * __restrict, const char * __restrict,
static int _BIG5_mbsinit(const mbstate_t *);
static size_t _BIG5_wcrtomb(char * __restrict, wchar_t,
mbstate_t * __restrict);
+static size_t _BIG5_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _BIG5_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
typedef struct {
wchar_t ch;
@@ -72,6 +76,8 @@ _BIG5_init(struct xlocale_ctype *l, _RuneLocale *rl)
l->__mbrtowc = _BIG5_mbrtowc;
l->__wcrtomb = _BIG5_wcrtomb;
+ l->__mbsnrtowcs = _BIG5_mbsnrtowcs;
+ l->__wcsnrtombs = _BIG5_wcsnrtombs;
l->__mbsinit = _BIG5_mbsinit;
l->runes = rl;
l->__mb_cur_max = 2;
@@ -147,7 +153,7 @@ _BIG5_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
wc = (wc << 8) | (*s++ & 0xff);
if (pwc != NULL)
*pwc = wc;
- return (2);
+ return (2);
} else {
if (pwc != NULL)
*pwc = wc;
@@ -178,3 +184,17 @@ _BIG5_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
*s = wc & 0xff;
return (1);
}
+
+static size_t
+_BIG5_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
+ size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _BIG5_mbrtowc));
+}
+
+static size_t
+_BIG5_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+ size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _BIG5_wcrtomb));
+}
diff --git a/lib/libc/locale/collate.c b/lib/libc/locale/collate.c
index 56513f4..0f0f92b 100644
--- a/lib/libc/locale/collate.c
+++ b/lib/libc/locale/collate.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -28,66 +30,54 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * Adapted to xlocale by John Marino <draco@marino.st>
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "namespace.h"
-#include <arpa/inet.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h>
#include <errno.h>
#include <unistd.h>
-#include <sysexits.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
#include "un-namespace.h"
#include "collate.h"
#include "setlocale.h"
#include "ldpart.h"
-
#include "libc_private.h"
-/*
- * To avoid modifying the original (single-threaded) code too much, we'll just
- * define the old globals as fields inside the table.
- *
- * We also modify the collation table test functions to search the thread-local
- * table first and the global table second.
- */
-#define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
-#define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
-#define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
-#define __collate_chain_pri_table (table->__collate_chain_pri_table)
-int __collate_load_error;
-
-
struct xlocale_collate __xlocale_global_collate = {
- {{0}, "C"}, 1, 0
+ {{0}, "C"}, 1, 0, 0, 0
};
- struct xlocale_collate __xlocale_C_collate = {
- {{0}, "C"}, 1, 0
+struct xlocale_collate __xlocale_C_collate = {
+ {{0}, "C"}, 1, 0, 0, 0
};
-void __collate_err(int ex, const char *f) __dead2;
-
-int
+static int
__collate_load_tables_l(const char *encoding, struct xlocale_collate *table);
static void
destruct_collate(void *t)
{
struct xlocale_collate *table = t;
- if (__collate_chain_pri_table) {
- free(__collate_chain_pri_table);
+ if (table->map && (table->maplen > 0)) {
+ (void) munmap(table->map, table->maplen);
}
free(t);
}
void *
-__collate_load(const char *encoding, locale_t unused)
+__collate_load(const char *encoding, __unused locale_t unused)
{
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
return &__xlocale_C_collate;
@@ -109,19 +99,20 @@ __collate_load(const char *encoding, locale_t unused)
int
__collate_load_tables(const char *encoding)
{
- int ret = __collate_load_tables_l(encoding, &__xlocale_global_collate);
- __collate_load_error = __xlocale_global_collate.__collate_load_error;
- return ret;
+
+ return (__collate_load_tables_l(encoding, &__xlocale_global_collate));
}
int
__collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
{
- FILE *fp;
- int i, saverr, chains;
- uint32_t u32;
- char strbuf[STR_LEN], buf[PATH_MAX];
- void *TMP_substitute_table, *TMP_char_pri_table, *TMP_chain_pri_table;
+ int i, chains, z;
+ char *buf;
+ char *TMP;
+ char *map;
+ collate_info_t *info;
+ struct stat sbuf;
+ int fd;
/* 'encoding' must be already checked. */
if (strcmp(encoding, "C") == 0 || strcmp(encoding, "POSIX") == 0) {
@@ -129,217 +120,586 @@ __collate_load_tables_l(const char *encoding, struct xlocale_collate *table)
return (_LDP_CACHE);
}
- /* 'PathLocale' must be already set & checked. */
- /* Range checking not needed, encoding has fixed size */
- (void)strcpy(buf, _PathLocale);
- (void)strcat(buf, "/");
- (void)strcat(buf, encoding);
- (void)strcat(buf, "/LC_COLLATE");
- if ((fp = fopen(buf, "re")) == NULL)
+ asprintf(&buf, "%s/%s/LC_COLLATE", _PathLocale, encoding);
+ if (buf == NULL)
return (_LDP_ERROR);
- if (fread(strbuf, sizeof(strbuf), 1, fp) != 1) {
- saverr = errno;
- (void)fclose(fp);
- errno = saverr;
+ if ((fd = _open(buf, O_RDONLY)) < 0) {
+ free(buf);
return (_LDP_ERROR);
}
- chains = -1;
- if (strcmp(strbuf, COLLATE_VERSION) == 0)
- chains = 0;
- else if (strcmp(strbuf, COLLATE_VERSION1_2) == 0)
- chains = 1;
- if (chains < 0) {
- (void)fclose(fp);
- errno = EFTYPE;
+ free(buf);
+ if (_fstat(fd, &sbuf) < 0) {
+ (void) _close(fd);
return (_LDP_ERROR);
}
- if (chains) {
- if (fread(&u32, sizeof(u32), 1, fp) != 1) {
- saverr = errno;
- (void)fclose(fp);
- errno = saverr;
- return (_LDP_ERROR);
- }
- if ((chains = (int)ntohl(u32)) < 1) {
- (void)fclose(fp);
- errno = EFTYPE;
- return (_LDP_ERROR);
- }
- } else
- chains = TABLE_SIZE;
-
- if ((TMP_substitute_table =
- malloc(sizeof(__collate_substitute_table))) == NULL) {
- saverr = errno;
- (void)fclose(fp);
- errno = saverr;
+ if (sbuf.st_size < (COLLATE_STR_LEN + sizeof (info))) {
+ (void) _close(fd);
+ errno = EINVAL;
return (_LDP_ERROR);
}
- if ((TMP_char_pri_table =
- malloc(sizeof(__collate_char_pri_table))) == NULL) {
- saverr = errno;
- free(TMP_substitute_table);
- (void)fclose(fp);
- errno = saverr;
+ map = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ (void) _close(fd);
+ if ((TMP = map) == NULL) {
return (_LDP_ERROR);
}
- if ((TMP_chain_pri_table =
- malloc(sizeof(*__collate_chain_pri_table) * chains)) == NULL) {
- saverr = errno;
- free(TMP_substitute_table);
- free(TMP_char_pri_table);
- (void)fclose(fp);
- errno = saverr;
+
+ if (strncmp(TMP, COLLATE_VERSION, COLLATE_STR_LEN) != 0) {
+ (void) munmap(map, sbuf.st_size);
+ errno = EINVAL;
return (_LDP_ERROR);
}
+ TMP += COLLATE_STR_LEN;
-#define FREAD(a, b, c, d) \
-{ \
- if (fread(a, b, c, d) != c) { \
- saverr = errno; \
- free(TMP_substitute_table); \
- free(TMP_char_pri_table); \
- free(TMP_chain_pri_table); \
- (void)fclose(d); \
- errno = saverr; \
- return (_LDP_ERROR); \
- } \
-}
+ info = (void *)TMP;
+ TMP += sizeof (*info);
- FREAD(TMP_substitute_table, sizeof(__collate_substitute_table), 1, fp);
- FREAD(TMP_char_pri_table, sizeof(__collate_char_pri_table), 1, fp);
- FREAD(TMP_chain_pri_table,
- sizeof(*__collate_chain_pri_table), chains, fp);
- (void)fclose(fp);
-
- if (__collate_substitute_table_ptr != NULL)
- free(__collate_substitute_table_ptr);
- __collate_substitute_table_ptr = TMP_substitute_table;
- if (__collate_char_pri_table_ptr != NULL)
- free(__collate_char_pri_table_ptr);
- __collate_char_pri_table_ptr = TMP_char_pri_table;
- for (i = 0; i < UCHAR_MAX + 1; i++) {
- __collate_char_pri_table[i].prim =
- ntohl(__collate_char_pri_table[i].prim);
- __collate_char_pri_table[i].sec =
- ntohl(__collate_char_pri_table[i].sec);
+ if ((info->directive_count < 1) ||
+ (info->directive_count >= COLL_WEIGHTS_MAX) ||
+ ((chains = info->chain_count) < 0)) {
+ (void) munmap(map, sbuf.st_size);
+ errno = EINVAL;
+ return (_LDP_ERROR);
}
- if (__collate_chain_pri_table != NULL)
- free(__collate_chain_pri_table);
- __collate_chain_pri_table = TMP_chain_pri_table;
- for (i = 0; i < chains; i++) {
- __collate_chain_pri_table[i].prim =
- ntohl(__collate_chain_pri_table[i].prim);
- __collate_chain_pri_table[i].sec =
- ntohl(__collate_chain_pri_table[i].sec);
+
+ i = (sizeof (collate_char_t) * (UCHAR_MAX + 1)) +
+ (sizeof (collate_chain_t) * chains) +
+ (sizeof (collate_large_t) * info->large_count);
+ for (z = 0; z < (info->directive_count); z++) {
+ i += sizeof (collate_subst_t) * info->subst_count[z];
}
- __collate_substitute_nontrivial = 0;
- for (i = 0; i < UCHAR_MAX + 1; i++) {
- if (__collate_substitute_table[i][0] != i ||
- __collate_substitute_table[i][1] != 0) {
- __collate_substitute_nontrivial = 1;
- break;
+ if (i != (sbuf.st_size - (TMP - map))) {
+ (void) munmap(map, sbuf.st_size);
+ errno = EINVAL;
+ return (_LDP_ERROR);
+ }
+
+ table->char_pri_table = (void *)TMP;
+ TMP += sizeof (collate_char_t) * (UCHAR_MAX + 1);
+
+ for (z = 0; z < info->directive_count; z++) {
+ if (info->subst_count[z] > 0) {
+ table->subst_table[z] = (void *)TMP;
+ TMP += info->subst_count[z] * sizeof (collate_subst_t);
+ } else {
+ table->subst_table[z] = NULL;
}
}
+
+ if (chains > 0) {
+ table->chain_pri_table = (void *)TMP;
+ TMP += chains * sizeof (collate_chain_t);
+ } else
+ table->chain_pri_table = NULL;
+ if (info->large_count > 0)
+ table->large_pri_table = (void *)TMP;
+ else
+ table->large_pri_table = NULL;
+
+ table->info = info;
table->__collate_load_error = 0;
return (_LDP_LOADED);
}
-u_char *
-__collate_substitute(struct xlocale_collate *table, const u_char *s)
+/*
+ * Note: for performance reasons, we have expanded bsearch here. This avoids
+ * function call overhead with each comparison.
+ */
+
+static int32_t *
+substsearch(struct xlocale_collate *table, const wchar_t key, int pass)
+{
+ collate_subst_t *p;
+ int n = table->info->subst_count[pass];
+
+ if (n == 0)
+ return (NULL);
+
+ if (pass >= table->info->directive_count)
+ return (NULL);
+
+ if (!(key & COLLATE_SUBST_PRIORITY))
+ return (NULL);
+
+ p = table->subst_table[pass] + (key & ~COLLATE_SUBST_PRIORITY);
+ return (p->pri);
+}
+
+static collate_chain_t *
+chainsearch(struct xlocale_collate *table, const wchar_t *key, int *len)
{
- int dest_len, len, nlen;
- int delta = strlen(s);
- u_char *dest_str = NULL;
-
- if (s == NULL || *s == '\0')
- return (__collate_strdup(""));
- delta += delta / 8;
- dest_str = malloc(dest_len = delta);
- if (dest_str == NULL)
- __collate_err(EX_OSERR, __func__);
- len = 0;
- while (*s) {
- nlen = len + strlen(__collate_substitute_table[*s]);
- if (dest_len <= nlen) {
- dest_str = reallocf(dest_str, dest_len = nlen + delta);
- if (dest_str == NULL)
- __collate_err(EX_OSERR, __func__);
+ int low;
+ int high;
+ int next, compar, l;
+ collate_chain_t *p;
+ collate_chain_t *tab;
+
+ if (table->info->chain_count == 0)
+ return (NULL);
+
+ low = 0;
+ high = table->info->chain_count - 1;
+ tab = table->chain_pri_table;
+
+ while (low <= high) {
+ next = (low + high) / 2;
+ p = tab + next;
+ compar = *key - *p->str;
+ if (compar == 0) {
+ l = wcsnlen(p->str, COLLATE_STR_LEN);
+ compar = wcsncmp(key, p->str, l);
+ if (compar == 0) {
+ *len = l;
+ return (p);
+ }
}
- (void)strcpy(dest_str + len, __collate_substitute_table[*s++]);
- len = nlen;
+ if (compar > 0)
+ low = next + 1;
+ else
+ high = next - 1;
+ }
+ return (NULL);
+}
+
+static collate_large_t *
+largesearch(struct xlocale_collate *table, const wchar_t key)
+{
+ int low = 0;
+ int high = table->info->large_count - 1;
+ int next, compar;
+ collate_large_t *p;
+ collate_large_t *tab = table->large_pri_table;
+
+ if (table->info->large_count == 0)
+ return (NULL);
+
+ while (low <= high) {
+ next = (low + high) / 2;
+ p = tab + next;
+ compar = key - p->val;
+ if (compar == 0)
+ return (p);
+ if (compar > 0)
+ low = next + 1;
+ else
+ high = next - 1;
}
- return (dest_str);
+ return (NULL);
}
void
-__collate_lookup(struct xlocale_collate *table, const u_char *t, int *len, int *prim, int *sec)
+_collate_lookup(struct xlocale_collate *table, const wchar_t *t, int *len,
+ int *pri, int which, const int **state)
{
- struct __collate_st_chain_pri *p2;
+ collate_chain_t *p2;
+ collate_large_t *match;
+ int p, l;
+ const int *sptr;
+
+ /*
+ * If this is the "last" pass for the UNDEFINED, then
+ * we just return the priority itself.
+ */
+ if (which >= table->info->directive_count) {
+ *pri = *t;
+ *len = 1;
+ *state = NULL;
+ return;
+ }
+ /*
+ * If we have remaining substitution data from a previous
+ * call, consume it first.
+ */
+ if ((sptr = *state) != NULL) {
+ *pri = *sptr;
+ sptr++;
+ *state = *sptr ? sptr : NULL;
+ *len = 0;
+ return;
+ }
+
+ /* No active substitutions */
*len = 1;
- *prim = *sec = 0;
- for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++) {
- if (*t == p2->str[0] &&
- strncmp(t, p2->str, strlen(p2->str)) == 0) {
- *len = strlen(p2->str);
- *prim = p2->prim;
- *sec = p2->sec;
- return;
+
+ /*
+ * Check for composites such as dipthongs that collate as a
+ * single element (aka chains or collating-elements).
+ */
+ if (((p2 = chainsearch(table, t, &l)) != NULL) &&
+ ((p = p2->pri[which]) >= 0)) {
+
+ *len = l;
+ *pri = p;
+
+ } else if (*t <= UCHAR_MAX) {
+
+ /*
+ * Character is a small (8-bit) character.
+ * We just look these up directly for speed.
+ */
+ *pri = table->char_pri_table[*t].pri[which];
+
+ } else if ((table->info->large_count > 0) &&
+ ((match = largesearch(table, *t)) != NULL)) {
+
+ /*
+ * Character was found in the extended table.
+ */
+ *pri = match->pri.pri[which];
+
+ } else {
+ /*
+ * Character lacks a specific definition.
+ */
+ if (table->info->directive[which] & DIRECTIVE_UNDEFINED) {
+ /* Mask off sign bit to prevent ordering confusion. */
+ *pri = (*t & COLLATE_MAX_PRIORITY);
+ } else {
+ *pri = table->info->undef_pri[which];
+ }
+ /* No substitutions for undefined characters! */
+ return;
+ }
+
+ /*
+ * Try substituting (expanding) the character. We are
+ * currently doing this *after* the chain compression. I
+ * think it should not matter, but this way might be slightly
+ * faster.
+ *
+ * We do this after the priority search, as this will help us
+ * to identify a single key value. In order for this to work,
+ * its important that the priority assigned to a given element
+ * to be substituted be unique for that level. The localedef
+ * code ensures this for us.
+ */
+ if ((sptr = substsearch(table, *pri, which)) != NULL) {
+ if ((*pri = *sptr) != 0) {
+ sptr++;
+ *state = *sptr ? sptr : NULL;
}
}
- *prim = __collate_char_pri_table[*t].prim;
- *sec = __collate_char_pri_table[*t].sec;
+
}
-u_char *
-__collate_strdup(u_char *s)
+/*
+ * This is the meaty part of wcsxfrm & strxfrm. Note that it does
+ * NOT NULL terminate. That is left to the caller.
+ */
+size_t
+_collate_wxfrm(struct xlocale_collate *table, const wchar_t *src, wchar_t *xf,
+ size_t room)
{
- u_char *t = strdup(s);
+ int pri;
+ int len;
+ const wchar_t *t;
+ wchar_t *tr = NULL;
+ int direc;
+ int pass;
+ const int32_t *state;
+ size_t want = 0;
+ size_t need = 0;
+
+ for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+ state = NULL;
+
+ if (pass != 0) {
+ /* insert level separator from the previous pass */
+ if (room) {
+ *xf++ = 1;
+ room--;
+ }
+ want++;
+ }
+
+ /* special pass for undefined */
+ if (pass == table->info->directive_count) {
+ direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+ } else {
+ direc = table->info->directive[pass];
+ }
+
+ t = src;
+
+ if (direc & DIRECTIVE_BACKWARD) {
+ wchar_t *bp, *fp, c;
+ if (tr)
+ free(tr);
+ if ((tr = wcsdup(t)) == NULL) {
+ errno = ENOMEM;
+ goto fail;
+ }
+ bp = tr;
+ fp = tr + wcslen(tr) - 1;
+ while (bp < fp) {
+ c = *bp;
+ *bp++ = *fp;
+ *fp-- = c;
+ }
+ t = (const wchar_t *)tr;
+ }
- if (t == NULL)
- __collate_err(EX_OSERR, __func__);
- return (t);
+ if (direc & DIRECTIVE_POSITION) {
+ while (*t || state) {
+ _collate_lookup(table, t, &len, &pri, pass, &state);
+ t += len;
+ if (pri <= 0) {
+ if (pri < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ pri = COLLATE_MAX_PRIORITY;
+ }
+ if (room) {
+ *xf++ = pri;
+ room--;
+ }
+ want++;
+ need = want;
+ }
+ } else {
+ while (*t || state) {
+ _collate_lookup(table, t, &len, &pri, pass, &state);
+ t += len;
+ if (pri <= 0) {
+ if (pri < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ continue;
+ }
+ if (room) {
+ *xf++ = pri;
+ room--;
+ }
+ want++;
+ need = want;
+ }
+ }
+ }
+ if (tr)
+ free(tr);
+ return (need);
+
+fail:
+ if (tr)
+ free(tr);
+ return ((size_t)(-1));
}
-void
-__collate_err(int ex, const char *f)
+/*
+ * In the non-POSIX case, we transform each character into a string of
+ * characters representing the character's priority. Since char is usually
+ * signed, we are limited by 7 bits per byte. To avoid zero, we need to add
+ * XFRM_OFFSET, so we can't use a full 7 bits. For simplicity, we choose 6
+ * bits per byte.
+ *
+ * It turns out that we sometimes have real priorities that are
+ * 31-bits wide. (But: be careful using priorities where the high
+ * order bit is set -- i.e. the priority is negative. The sort order
+ * may be surprising!)
+ *
+ * TODO: This would be a good area to optimize somewhat. It turns out
+ * that real prioririties *except for the last UNDEFINED pass* are generally
+ * very small. We need the localedef code to precalculate the max
+ * priority for us, and ideally also give us a mask, and then we could
+ * severely limit what we expand to.
+ */
+#define XFRM_BYTES 6
+#define XFRM_OFFSET ('0') /* make all printable characters */
+#define XFRM_SHIFT 6
+#define XFRM_MASK ((1 << XFRM_SHIFT) - 1)
+#define XFRM_SEP ('.') /* chosen to be less than XFRM_OFFSET */
+
+static int
+xfrm(struct xlocale_collate *table, unsigned char *p, int pri, int pass)
{
- const char *s;
- int serrno = errno;
-
- s = _getprogname();
- _write(STDERR_FILENO, s, strlen(s));
- _write(STDERR_FILENO, ": ", 2);
- s = f;
- _write(STDERR_FILENO, s, strlen(s));
- _write(STDERR_FILENO, ": ", 2);
- s = strerror(serrno);
- _write(STDERR_FILENO, s, strlen(s));
- _write(STDERR_FILENO, "\n", 1);
- exit(ex);
+ /* we use unsigned to ensure zero fill on right shift */
+ uint32_t val = (uint32_t)table->info->pri_count[pass];
+ int nc = 0;
+
+ while (val) {
+ *p = (pri & XFRM_MASK) + XFRM_OFFSET;
+ pri >>= XFRM_SHIFT;
+ val >>= XFRM_SHIFT;
+ p++;
+ nc++;
+ }
+ return (nc);
}
-#ifdef COLLATE_DEBUG
-void
-__collate_print_tables()
+size_t
+_collate_sxfrm(struct xlocale_collate *table, const wchar_t *src, char *xf,
+ size_t room)
+{
+ int pri;
+ int len;
+ const wchar_t *t;
+ wchar_t *tr = NULL;
+ int direc;
+ int pass;
+ const int32_t *state;
+ size_t want = 0;
+ size_t need = 0;
+ int b;
+ uint8_t buf[XFRM_BYTES];
+
+ for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+ state = NULL;
+
+ if (pass != 0) {
+ /* insert level separator from the previous pass */
+ if (room) {
+ *xf++ = XFRM_SEP;
+ room--;
+ }
+ want++;
+ }
+
+ /* special pass for undefined */
+ if (pass == table->info->directive_count) {
+ direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+ } else {
+ direc = table->info->directive[pass];
+ }
+
+ t = src;
+
+ if (direc & DIRECTIVE_BACKWARD) {
+ wchar_t *bp, *fp, c;
+ if (tr)
+ free(tr);
+ if ((tr = wcsdup(t)) == NULL) {
+ errno = ENOMEM;
+ goto fail;
+ }
+ bp = tr;
+ fp = tr + wcslen(tr) - 1;
+ while (bp < fp) {
+ c = *bp;
+ *bp++ = *fp;
+ *fp-- = c;
+ }
+ t = (const wchar_t *)tr;
+ }
+
+ if (direc & DIRECTIVE_POSITION) {
+ while (*t || state) {
+
+ _collate_lookup(table, t, &len, &pri, pass, &state);
+ t += len;
+ if (pri <= 0) {
+ if (pri < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ pri = COLLATE_MAX_PRIORITY;
+ }
+
+ b = xfrm(table, buf, pri, pass);
+ want += b;
+ if (room) {
+ while (b) {
+ b--;
+ if (room) {
+ *xf++ = buf[b];
+ room--;
+ }
+ }
+ }
+ need = want;
+ }
+ } else {
+ while (*t || state) {
+ _collate_lookup(table, t, &len, &pri, pass, &state);
+ t += len;
+ if (pri <= 0) {
+ if (pri < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ continue;
+ }
+
+ b = xfrm(table, buf, pri, pass);
+ want += b;
+ if (room) {
+
+ while (b) {
+ b--;
+ if (room) {
+ *xf++ = buf[b];
+ room--;
+ }
+ }
+ }
+ need = want;
+ }
+ }
+ }
+ if (tr)
+ free(tr);
+ return (need);
+
+fail:
+ if (tr)
+ free(tr);
+ return ((size_t)(-1));
+}
+
+/*
+ * __collate_equiv_value returns the primary collation value for the given
+ * collating symbol specified by str and len. Zero or negative is returned
+ * if the collating symbol was not found. This function is used by bracket
+ * code in the TRE regex library.
+ */
+int
+__collate_equiv_value(locale_t locale, const wchar_t *str, size_t len)
{
- int i;
- struct __collate_st_chain_pri *p2;
-
- printf("Substitute table:\n");
- for (i = 0; i < UCHAR_MAX + 1; i++)
- if (i != *__collate_substitute_table[i])
- printf("\t'%c' --> \"%s\"\n", i,
- __collate_substitute_table[i]);
- printf("Chain priority table:\n");
- for (p2 = __collate_chain_pri_table; p2->str[0] != '\0'; p2++)
- printf("\t\"%s\" : %d %d\n", p2->str, p2->prim, p2->sec);
- printf("Char priority table:\n");
- for (i = 0; i < UCHAR_MAX + 1; i++)
- printf("\t'%c' : %d %d\n", i, __collate_char_pri_table[i].prim,
- __collate_char_pri_table[i].sec);
+ int32_t e;
+
+ if (len < 1 || len >= COLLATE_STR_LEN)
+ return (-1);
+
+ FIX_LOCALE(locale);
+ struct xlocale_collate *table =
+ (struct xlocale_collate*)locale->components[XLC_COLLATE];
+
+ if (table->__collate_load_error)
+ return ((len == 1 && *str <= UCHAR_MAX) ? *str : -1);
+
+ if (len == 1) {
+ e = -1;
+ if (*str <= UCHAR_MAX)
+ e = table->char_pri_table[*str].pri[0];
+ else if (table->info->large_count > 0) {
+ collate_large_t *match_large;
+ match_large = largesearch(table, *str);
+ if (match_large)
+ e = match_large->pri.pri[0];
+ }
+ if (e == 0)
+ return (1);
+ return (e > 0 ? e : 0);
+ }
+ if (table->info->chain_count > 0) {
+ wchar_t name[COLLATE_STR_LEN];
+ collate_chain_t *match_chain;
+ int clen;
+
+ wcsncpy (name, str, len);
+ name[len] = 0;
+ match_chain = chainsearch(table, name, &clen);
+ if (match_chain) {
+ e = match_chain->pri[0];
+ if (e == 0)
+ return (1);
+ return (e < 0 ? -e : e);
+ }
+ }
+ return (0);
}
-#endif
diff --git a/lib/libc/locale/collate.h b/lib/libc/locale/collate.h
index ad034d4..9812589 100644
--- a/lib/libc/locale/collate.h
+++ b/lib/libc/locale/collate.h
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -40,42 +41,98 @@
#include <limits.h>
#include "xlocale_private.h"
-#define STR_LEN 10
-#define TABLE_SIZE 100
-#define COLLATE_VERSION "1.0\n"
-#define COLLATE_VERSION1_2 "1.2\n"
+/*
+ * Work around buildworld bootstrapping from older systems whose limits.h
+ * sets COLL_WEIGHTS_MAX to 0.
+ */
+#if COLL_WEIGHTS_MAX == 0
+#undef COLL_WEIGHTS_MAX
+#define COLL_WEIGHTS_MAX 10
+#endif
-struct __collate_st_char_pri {
- int prim, sec;
-};
-struct __collate_st_chain_pri {
- u_char str[STR_LEN];
- int prim, sec;
-};
+#define COLLATE_STR_LEN 24 /* should be 64-bit multiple */
+#define COLLATE_VERSION "BSD 1.0\n"
+
+#define COLLATE_MAX_PRIORITY (0x7fffffff) /* max signed value */
+#define COLLATE_SUBST_PRIORITY (0x40000000) /* bit indicates subst table */
-#define __collate_substitute_table (*__collate_substitute_table_ptr)
-#define __collate_char_pri_table (*__collate_char_pri_table_ptr)
+#define DIRECTIVE_UNDEF 0x00
+#define DIRECTIVE_FORWARD 0x01
+#define DIRECTIVE_BACKWARD 0x02
+#define DIRECTIVE_POSITION 0x04
+#define DIRECTIVE_UNDEFINED 0x08 /* special last weight for UNDEFINED */
+
+#define DIRECTIVE_DIRECTION_MASK (DIRECTIVE_FORWARD | DIRECTIVE_BACKWARD)
+
+/*
+ * The collate file format is as follows:
+ *
+ * char version[COLLATE_STR_LEN]; // must be COLLATE_VERSION
+ * collate_info_t info; // see below, includes padding
+ * collate_char_pri_t char_data[256]; // 8 bit char values
+ * collate_subst_t subst[*]; // 0 or more substitutions
+ * collate_chain_pri_t chains[*]; // 0 or more chains
+ * collate_large_pri_t large[*]; // extended char priorities
+ *
+ * Note that all structures must be 32-bit aligned, as each structure
+ * contains 32-bit member fields. The entire file is mmap'd, so its
+ * critical that alignment be observed. It is not generally safe to
+ * use any 64-bit values in the structures.
+ */
+
+typedef struct collate_info {
+ uint8_t directive_count;
+ uint8_t directive[COLL_WEIGHTS_MAX];
+ int32_t pri_count[COLL_WEIGHTS_MAX];
+ int32_t flags;
+ int32_t chain_count;
+ int32_t large_count;
+ int32_t subst_count[COLL_WEIGHTS_MAX];
+ int32_t undef_pri[COLL_WEIGHTS_MAX];
+} collate_info_t;
+
+typedef struct collate_char {
+ int32_t pri[COLL_WEIGHTS_MAX];
+} collate_char_t;
+
+typedef struct collate_chain {
+ wchar_t str[COLLATE_STR_LEN];
+ int32_t pri[COLL_WEIGHTS_MAX];
+} collate_chain_t;
+
+typedef struct collate_large {
+ int32_t val;
+ collate_char_t pri;
+} collate_large_t;
+
+typedef struct collate_subst {
+ int32_t key;
+ int32_t pri[COLLATE_STR_LEN];
+} collate_subst_t;
struct xlocale_collate {
struct xlocale_component header;
int __collate_load_error;
- int __collate_substitute_nontrivial;
+ char * map;
+ size_t maplen;
- u_char (*__collate_substitute_table_ptr)[UCHAR_MAX + 1][STR_LEN];
- struct __collate_st_char_pri (*__collate_char_pri_table_ptr)[UCHAR_MAX + 1];
- struct __collate_st_chain_pri *__collate_chain_pri_table;
+ collate_info_t *info;
+ collate_char_t *char_pri_table;
+ collate_large_t *large_pri_table;
+ collate_chain_t *chain_pri_table;
+ collate_subst_t *subst_table[COLL_WEIGHTS_MAX];
};
-
__BEGIN_DECLS
-u_char *__collate_strdup(u_char *);
-u_char *__collate_substitute(struct xlocale_collate *, const u_char *);
int __collate_load_tables(const char *);
-void __collate_lookup(struct xlocale_collate *, const u_char *, int *, int *, int *);
-int __collate_range_cmp(struct xlocale_collate *, int, int);
-#ifdef COLLATE_DEBUG
-void __collate_print_tables(void);
-#endif
+int __collate_equiv_value(locale_t, const wchar_t *, size_t);
+void _collate_lookup(struct xlocale_collate *,const wchar_t *, int *, int *,
+ int, const int **);
+int __collate_range_cmp(struct xlocale_collate *, wchar_t, wchar_t);
+size_t _collate_wxfrm(struct xlocale_collate *, const wchar_t *, wchar_t *,
+ size_t);
+size_t _collate_sxfrm(struct xlocale_collate *, const wchar_t *, char *,
+ size_t);
__END_DECLS
#endif /* !_COLLATE_H_ */
diff --git a/lib/libc/locale/collcmp.c b/lib/libc/locale/collcmp.c
index aa17afd..102fbfb 100644
--- a/lib/libc/locale/collcmp.c
+++ b/lib/libc/locale/collcmp.c
@@ -33,6 +33,7 @@
__FBSDID("$FreeBSD$");
#include <string.h>
+#include <wchar.h>
#include <xlocale.h>
#include "collate.h"
@@ -40,13 +41,15 @@ __FBSDID("$FreeBSD$");
* Compare two characters using collate
*/
-int __collate_range_cmp(struct xlocale_collate *table, int c1, int c2)
+int __collate_range_cmp(struct xlocale_collate *table, wchar_t c1, wchar_t c2)
{
- static char s1[2], s2[2];
+ wchar_t s1[2], s2[2];
s1[0] = c1;
+ s1[1] = 0;
s2[0] = c2;
+ s2[1] = 0;
struct _xlocale l = {{0}};
l.components[XLC_COLLATE] = (struct xlocale_component *)table;
- return (strcoll_l(s1, s2, &l));
+ return (wcscoll_l(s1, s2, &l));
}
diff --git a/lib/libc/locale/euc.c b/lib/libc/locale/euc.c
index 26ad413..0e5f1bf 100644
--- a/lib/libc/locale/euc.c
+++ b/lib/libc/locale/euc.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
@@ -19,11 +21,7 @@
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
+ * 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
@@ -56,17 +54,56 @@ __FBSDID("$FreeBSD$");
extern int __mb_sb_limit;
-static size_t _EUC_mbrtowc(wchar_t * __restrict, const char * __restrict,
+static size_t _EUC_mbrtowc_impl(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
+static size_t _EUC_wcrtomb_impl(char * __restrict, wchar_t,
+ mbstate_t * __restrict, uint8_t, uint8_t, uint8_t, uint8_t);
+
+static size_t _EUC_CN_mbrtowc(wchar_t * __restrict, const char * __restrict,
size_t, mbstate_t * __restrict);
-static int _EUC_mbsinit(const mbstate_t *);
-static size_t _EUC_wcrtomb(char * __restrict, wchar_t,
+static size_t _EUC_JP_mbrtowc(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+static size_t _EUC_KR_mbrtowc(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+static size_t _EUC_TW_mbrtowc(wchar_t * __restrict, const char * __restrict,
+ size_t, mbstate_t * __restrict);
+
+static size_t _EUC_CN_wcrtomb(char * __restrict, wchar_t,
+ mbstate_t * __restrict);
+static size_t _EUC_JP_wcrtomb(char * __restrict, wchar_t,
+ mbstate_t * __restrict);
+static size_t _EUC_KR_wcrtomb(char * __restrict, wchar_t,
+ mbstate_t * __restrict);
+static size_t _EUC_TW_wcrtomb(char * __restrict, wchar_t,
mbstate_t * __restrict);
-typedef struct {
- int count[4];
- wchar_t bits[4];
- wchar_t mask;
-} _EucInfo;
+static size_t _EUC_CN_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _EUC_JP_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _EUC_KR_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _EUC_TW_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+
+static size_t _EUC_CN_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _EUC_JP_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _EUC_KR_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _EUC_TW_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+
+static int _EUC_mbsinit(const mbstate_t *);
typedef struct {
wchar_t ch;
@@ -74,94 +111,218 @@ typedef struct {
int want;
} _EucState;
+static int
+_EUC_mbsinit(const mbstate_t *ps)
+{
+
+ return (ps == NULL || ((const _EucState *)ps)->want == 0);
+}
+
+/*
+ * EUC-CN uses CS0, CS1 and CS2 (4 bytes).
+ */
int
-_EUC_init(struct xlocale_ctype *l, _RuneLocale *rl)
+_EUC_CN_init(struct xlocale_ctype *l, _RuneLocale *rl)
{
- _EucInfo *ei;
- int x, new__mb_cur_max;
- char *v, *e;
+ l->__mbrtowc = _EUC_CN_mbrtowc;
+ l->__wcrtomb = _EUC_CN_wcrtomb;
+ l->__mbsnrtowcs = _EUC_CN_mbsnrtowcs;
+ l->__wcsnrtombs = _EUC_CN_wcsnrtombs;
+ l->__mbsinit = _EUC_mbsinit;
- if (rl->__variable == NULL)
- return (EFTYPE);
+ l->runes = rl;
+ l->__mb_cur_max = 4;
+ l->__mb_sb_limit = 256;
+ return (0);
+}
- v = (char *)rl->__variable;
+static size_t
+_EUC_CN_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+ size_t n, mbstate_t * __restrict ps)
+{
+ return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
+}
- while (*v == ' ' || *v == '\t')
- ++v;
+static size_t
+_EUC_CN_mbsnrtowcs(wchar_t * __restrict dst,
+ const char ** __restrict src,
+ size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_CN_mbrtowc));
+}
- if ((ei = malloc(sizeof(_EucInfo))) == NULL)
- return (errno == 0 ? ENOMEM : errno);
+static size_t
+_EUC_CN_wcrtomb(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps)
+{
+ return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
+}
- new__mb_cur_max = 0;
- for (x = 0; x < 4; ++x) {
- ei->count[x] = (int)strtol(v, &e, 0);
- if (v == e || !(v = e)) {
- free(ei);
- return (EFTYPE);
- }
- if (new__mb_cur_max < ei->count[x])
- new__mb_cur_max = ei->count[x];
- while (*v == ' ' || *v == '\t')
- ++v;
- ei->bits[x] = (int)strtol(v, &e, 0);
- if (v == e || !(v = e)) {
- free(ei);
- return (EFTYPE);
- }
- while (*v == ' ' || *v == '\t')
- ++v;
- }
- ei->mask = (int)strtol(v, &e, 0);
- if (v == e || !(v = e)) {
- free(ei);
- return (EFTYPE);
- }
- rl->__variable = ei;
- rl->__variable_len = sizeof(_EucInfo);
- l->runes = rl;
- l->__mb_cur_max = new__mb_cur_max;
- l->__mbrtowc = _EUC_mbrtowc;
- l->__wcrtomb = _EUC_wcrtomb;
+static size_t
+_EUC_CN_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+ size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_CN_wcrtomb));
+}
+
+/*
+ * EUC-KR uses only CS0 and CS1.
+ */
+int
+_EUC_KR_init(struct xlocale_ctype *l, _RuneLocale *rl)
+{
+ l->__mbrtowc = _EUC_KR_mbrtowc;
+ l->__wcrtomb = _EUC_KR_wcrtomb;
+ l->__mbsnrtowcs = _EUC_KR_mbsnrtowcs;
+ l->__wcsnrtombs = _EUC_KR_wcsnrtombs;
l->__mbsinit = _EUC_mbsinit;
- l->__mb_sb_limit = 256;
+
+ l->runes = rl;
+ l->__mb_cur_max = 2;
+ l->__mb_sb_limit = 128;
return (0);
}
-static int
-_EUC_mbsinit(const mbstate_t *ps)
+static size_t
+_EUC_KR_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+ size_t n, mbstate_t * __restrict ps)
{
+ return (_EUC_mbrtowc_impl(pwc, s, n, ps, 0, 0, 0, 0));
+}
- return (ps == NULL || ((const _EucState *)ps)->want == 0);
+static size_t
+_EUC_KR_mbsnrtowcs(wchar_t * __restrict dst,
+ const char ** __restrict src,
+ size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_KR_mbrtowc));
}
-#define CEI ((_EucInfo *)(_CurrentRuneLocale->__variable))
+static size_t
+_EUC_KR_wcrtomb(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps)
+{
+ return (_EUC_wcrtomb_impl(s, wc, ps, 0, 0, 0, 0));
+}
-#define _SS2 0x008e
-#define _SS3 0x008f
+static size_t
+_EUC_KR_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+ size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_KR_wcrtomb));
+}
-#define GR_BITS 0x80808080 /* XXX: to be fixed */
+/*
+ * EUC-JP uses CS0, CS1, CS2, and CS3.
+ */
+int
+_EUC_JP_init(struct xlocale_ctype *l, _RuneLocale *rl)
+{
+ l->__mbrtowc = _EUC_JP_mbrtowc;
+ l->__wcrtomb = _EUC_JP_wcrtomb;
+ l->__mbsnrtowcs = _EUC_JP_mbsnrtowcs;
+ l->__wcsnrtombs = _EUC_JP_wcsnrtombs;
+ l->__mbsinit = _EUC_mbsinit;
-static __inline int
-_euc_set(u_int c)
+ l->runes = rl;
+ l->__mb_cur_max = 3;
+ l->__mb_sb_limit = 196;
+ return (0);
+}
+
+static size_t
+_EUC_JP_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+ size_t n, mbstate_t * __restrict ps)
{
+ return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 2, SS3, 3));
+}
- c &= 0xff;
- return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
+static size_t
+_EUC_JP_mbsnrtowcs(wchar_t * __restrict dst,
+ const char ** __restrict src,
+ size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_JP_mbrtowc));
}
static size_t
-_EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
+_EUC_JP_wcrtomb(char * __restrict s, wchar_t wc,
mbstate_t * __restrict ps)
{
+ return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 2, SS3, 3));
+}
+
+static size_t
+_EUC_JP_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+ size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_JP_wcrtomb));
+}
+
+/*
+ * EUC-TW uses CS0, CS1, and CS2.
+ */
+int
+_EUC_TW_init(struct xlocale_ctype *l, _RuneLocale *rl)
+{
+ l->__mbrtowc = _EUC_TW_mbrtowc;
+ l->__wcrtomb = _EUC_TW_wcrtomb;
+ l->__mbsnrtowcs = _EUC_TW_mbsnrtowcs;
+ l->__wcsnrtombs = _EUC_TW_wcsnrtombs;
+ l->__mbsinit = _EUC_mbsinit;
+
+ l->runes = rl;
+ l->__mb_cur_max = 4;
+ l->__mb_sb_limit = 256;
+ return (0);
+}
+
+static size_t
+_EUC_TW_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s,
+ size_t n, mbstate_t * __restrict ps)
+{
+ return (_EUC_mbrtowc_impl(pwc, s, n, ps, SS2, 4, 0, 0));
+}
+
+static size_t
+_EUC_TW_mbsnrtowcs(wchar_t * __restrict dst,
+ const char ** __restrict src,
+ size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _EUC_TW_mbrtowc));
+}
+
+static size_t
+_EUC_TW_wcrtomb(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps)
+{
+ return (_EUC_wcrtomb_impl(s, wc, ps, SS2, 4, 0, 0));
+}
+
+static size_t
+_EUC_TW_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+ size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _EUC_TW_wcrtomb));
+}
+
+/*
+ * Common EUC code.
+ */
+
+static size_t
+_EUC_mbrtowc_impl(wchar_t * __restrict pwc, const char * __restrict s,
+ size_t n, mbstate_t * __restrict ps,
+ uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
+{
_EucState *es;
- int i, set, want;
+ int i, want;
wchar_t wc;
- const char *os;
+ unsigned char ch;
es = (_EucState *)ps;
- if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 ||
- es->set > 3) {
+ if (es->want < 0 || es->want > MB_CUR_MAX) {
errno = EINVAL;
return ((size_t)-1);
}
@@ -176,58 +337,59 @@ _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
/* Incomplete multibyte sequence */
return ((size_t)-2);
- os = s;
-
if (es->want == 0) {
- want = CEI->count[set = _euc_set(*s)];
- if (set == 2 || set == 3) {
- --want;
- if (--n == 0) {
- /* Incomplete multibyte sequence */
- es->set = set;
- es->want = want;
- es->ch = 0;
- return ((size_t)-2);
- }
- ++s;
- if (*s == '\0') {
- errno = EILSEQ;
- return ((size_t)-1);
- }
+ /* Fast path for plain ASCII (CS0) */
+ if (((ch = (unsigned char)*s) & 0x80) == 0) {
+ if (pwc != NULL)
+ *pwc = ch;
+ return (ch != '\0' ? 1 : 0);
}
- wc = (unsigned char)*s++;
+
+ if (ch >= 0xa1) {
+ /* CS1 */
+ want = 2;
+ } else if (ch == cs2) {
+ want = cs2width;
+ } else if (ch == cs3) {
+ want = cs3width;
+ } else {
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+
+
+ es->want = want;
+ es->ch = 0;
} else {
- set = es->set;
want = es->want;
wc = es->ch;
}
- for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
- if (*s == '\0') {
- errno = EILSEQ;
- return ((size_t)-1);
- }
- wc = (wc << 8) | (unsigned char)*s++;
+
+ for (i = 0; i < MIN(want, n); i++) {
+ wc <<= 8;
+ wc |= *s;
+ s++;
}
if (i < want) {
/* Incomplete multibyte sequence */
- es->set = set;
es->want = want - i;
es->ch = wc;
return ((size_t)-2);
}
- wc = (wc & ~CEI->mask) | CEI->bits[set];
if (pwc != NULL)
*pwc = wc;
es->want = 0;
- return (wc == L'\0' ? 0 : s - os);
+ return (wc == L'\0' ? 0 : want);
}
static size_t
-_EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
+_EUC_wcrtomb_impl(char * __restrict s, wchar_t wc,
+ mbstate_t * __restrict ps,
+ uint8_t cs2, uint8_t cs2width, uint8_t cs3, uint8_t cs3width)
{
_EucState *es;
- wchar_t m, nm;
int i, len;
+ wchar_t nm;
es = (_EucState *)ps;
@@ -240,34 +402,52 @@ _EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
/* Reset to initial shift state (no-op) */
return (1);
- m = wc & CEI->mask;
- nm = wc & ~m;
+ if ((wc & ~0x7f) == 0) {
+ /* Fast path for plain ASCII (CS0) */
+ *s = (char)wc;
+ return (1);
+ }
- if (m == CEI->bits[1]) {
-CodeSet1:
- /* Codeset 1: The first byte must have 0x80 in it. */
- i = len = CEI->count[1];
- while (i-- > 0)
- *s++ = (nm >> (i << 3)) | 0x80;
+ /* Determine the "length" */
+ if ((unsigned)wc > 0xffffff) {
+ len = 4;
+ } else if ((unsigned)wc > 0xffff) {
+ len = 3;
+ } else if ((unsigned)wc > 0xff) {
+ len = 2;
} else {
- if (m == CEI->bits[0])
- i = len = CEI->count[0];
- else if (m == CEI->bits[2]) {
- i = len = CEI->count[2];
- *s++ = _SS2;
- --i;
- /* SS2 designates G2 into GR */
- nm |= GR_BITS;
- } else if (m == CEI->bits[3]) {
- i = len = CEI->count[3];
- *s++ = _SS3;
- --i;
- /* SS3 designates G3 into GR */
- nm |= GR_BITS;
- } else
- goto CodeSet1; /* Bletch */
- while (i-- > 0)
- *s++ = (nm >> (i << 3)) & 0xff;
+ len = 1;
+ }
+
+ if (len > MB_CUR_MAX) {
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+
+ /* This first check excludes CS1, which is implicitly valid. */
+ if ((wc < 0xa100) || (wc > 0xffff)) {
+ /* Check for valid CS2 or CS3 */
+ nm = (wc >> ((len - 1) * 8));
+ if (nm == cs2) {
+ if (len != cs2width) {
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+ } else if (nm == cs3) {
+ if (len != cs3width) {
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+ } else {
+ errno = EILSEQ;
+ return ((size_t)-1);
+ }
+ }
+
+ /* Stash the bytes, least significant last */
+ for (i = len - 1; i >= 0; i--) {
+ s[i] = (wc & 0xff);
+ wc >>= 8;
}
return (len);
}
diff --git a/lib/libc/locale/gb18030.c b/lib/libc/locale/gb18030.c
index 9214385..d87b6f7 100644
--- a/lib/libc/locale/gb18030.c
+++ b/lib/libc/locale/gb18030.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins
* All rights reserved.
*
@@ -28,6 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
+
/*
* PRC National Standard GB 18030-2000 encoding of Chinese text.
*
@@ -49,6 +52,13 @@ static size_t _GB18030_mbrtowc(wchar_t * __restrict, const char * __restrict,
static int _GB18030_mbsinit(const mbstate_t *);
static size_t _GB18030_wcrtomb(char * __restrict, wchar_t,
mbstate_t * __restrict);
+static size_t _GB18030_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _GB18030_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+
typedef struct {
int count;
@@ -62,6 +72,8 @@ _GB18030_init(struct xlocale_ctype *l, _RuneLocale *rl)
l->__mbrtowc = _GB18030_mbrtowc;
l->__wcrtomb = _GB18030_wcrtomb;
l->__mbsinit = _GB18030_mbsinit;
+ l->__mbsnrtowcs = _GB18030_mbsnrtowcs;
+ l->__wcsnrtombs = _GB18030_wcsnrtombs;
l->runes = rl;
l->__mb_cur_max = 4;
l->__mb_sb_limit = 128;
@@ -222,3 +234,19 @@ ilseq:
errno = EILSEQ;
return ((size_t)-1);
}
+
+static size_t
+_GB18030_mbsnrtowcs(wchar_t * __restrict dst,
+ const char ** __restrict src, size_t nms, size_t len,
+ mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB18030_mbrtowc));
+}
+
+static size_t
+_GB18030_wcsnrtombs(char * __restrict dst,
+ const wchar_t ** __restrict src, size_t nwc, size_t len,
+ mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB18030_wcrtomb));
+}
diff --git a/lib/libc/locale/gb2312.c b/lib/libc/locale/gb2312.c
index 5fbc07d..032a3e2 100644
--- a/lib/libc/locale/gb2312.c
+++ b/lib/libc/locale/gb2312.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2004 Tim J. Robbins. All rights reserved.
* Copyright (c) 2003 David Xu <davidxu@freebsd.org>
* All rights reserved.
@@ -45,6 +47,13 @@ static size_t _GB2312_mbrtowc(wchar_t * __restrict, const char * __restrict,
static int _GB2312_mbsinit(const mbstate_t *);
static size_t _GB2312_wcrtomb(char * __restrict, wchar_t,
mbstate_t * __restrict);
+static size_t _GB2312_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _GB2312_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+
typedef struct {
int count;
@@ -59,6 +68,8 @@ _GB2312_init(struct xlocale_ctype *l, _RuneLocale *rl)
l->__mbrtowc = _GB2312_mbrtowc;
l->__wcrtomb = _GB2312_wcrtomb;
l->__mbsinit = _GB2312_mbsinit;
+ l->__mbsnrtowcs = _GB2312_mbsnrtowcs;
+ l->__wcsnrtombs = _GB2312_wcsnrtombs;
l->__mb_cur_max = 2;
l->__mb_sb_limit = 128;
return (0);
@@ -71,7 +82,7 @@ _GB2312_mbsinit(const mbstate_t *ps)
return (ps == NULL || ((const _GB2312State *)ps)->count == 0);
}
-static __inline int
+static int
_GB2312_check(const char *str, size_t n)
{
const u_char *s = (const u_char *)str;
@@ -90,7 +101,7 @@ _GB2312_check(const char *str, size_t n)
} else if (s[0] & 0x80) {
/* Invalid multibyte sequence */
return (-1);
- }
+ }
return (1);
}
@@ -158,3 +169,19 @@ _GB2312_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
*s = wc & 0xff;
return (1);
}
+
+static size_t
+_GB2312_mbsnrtowcs(wchar_t * __restrict dst,
+ const char ** __restrict src, size_t nms, size_t len,
+ mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GB2312_mbrtowc));
+}
+
+static size_t
+_GB2312_wcsnrtombs(char * __restrict dst,
+ const wchar_t ** __restrict src, size_t nwc, size_t len,
+ mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GB2312_wcrtomb));
+}
diff --git a/lib/libc/locale/gbk.c b/lib/libc/locale/gbk.c
index 43269c7..c3d16f6 100644
--- a/lib/libc/locale/gbk.c
+++ b/lib/libc/locale/gbk.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
@@ -54,6 +56,12 @@ static size_t _GBK_mbrtowc(wchar_t * __restrict, const char * __restrict,
static int _GBK_mbsinit(const mbstate_t *);
static size_t _GBK_wcrtomb(char * __restrict, wchar_t,
mbstate_t * __restrict);
+static size_t _GBK_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _GBK_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
typedef struct {
wchar_t ch;
@@ -66,6 +74,8 @@ _GBK_init(struct xlocale_ctype *l, _RuneLocale *rl)
l->__mbrtowc = _GBK_mbrtowc;
l->__wcrtomb = _GBK_wcrtomb;
l->__mbsinit = _GBK_mbsinit;
+ l->__mbsnrtowcs = _GBK_mbsnrtowcs;
+ l->__wcsnrtombs = _GBK_wcsnrtombs;
l->runes = rl;
l->__mb_cur_max = 2;
l->__mb_sb_limit = 128;
@@ -79,7 +89,7 @@ _GBK_mbsinit(const mbstate_t *ps)
return (ps == NULL || ((const _GBKState *)ps)->ch == 0);
}
-static __inline int
+static int
_gbk_check(u_int c)
{
@@ -140,7 +150,7 @@ _GBK_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
wc = (wc << 8) | (*s++ & 0xff);
if (pwc != NULL)
*pwc = wc;
- return (2);
+ return (2);
} else {
if (pwc != NULL)
*pwc = wc;
@@ -171,3 +181,17 @@ _GBK_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
*s = wc & 0xff;
return (1);
}
+
+static size_t
+_GBK_mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
+ size_t nms, size_t len, mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _GBK_mbrtowc));
+}
+
+static size_t
+_GBK_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
+ size_t nwc, size_t len, mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _GBK_wcrtomb));
+}
diff --git a/lib/libc/locale/mblocal.h b/lib/libc/locale/mblocal.h
index d172764..d86fd55 100644
--- a/lib/libc/locale/mblocal.h
+++ b/lib/libc/locale/mblocal.h
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2004 Tim J. Robbins.
* All rights reserved.
*
@@ -37,6 +39,8 @@
#include <runetype.h>
#include "xlocale_private.h"
+#define SS2 0x008e
+#define SS3 0x008f
/*
* Conversion function pointers for current encoding.
@@ -62,18 +66,24 @@ extern struct xlocale_ctype __xlocale_global_ctype;
* Rune initialization function prototypes.
*/
int _none_init(struct xlocale_ctype *, _RuneLocale *);
-int _ascii_init(struct xlocale_ctype *, _RuneLocale *);
int _UTF8_init(struct xlocale_ctype *, _RuneLocale *);
-int _EUC_init(struct xlocale_ctype *, _RuneLocale *);
+int _EUC_CN_init(struct xlocale_ctype *, _RuneLocale *);
+int _EUC_JP_init(struct xlocale_ctype *, _RuneLocale *);
+int _EUC_KR_init(struct xlocale_ctype *, _RuneLocale *);
+int _EUC_TW_init(struct xlocale_ctype *, _RuneLocale *);
int _GB18030_init(struct xlocale_ctype *, _RuneLocale *);
int _GB2312_init(struct xlocale_ctype *, _RuneLocale *);
int _GBK_init(struct xlocale_ctype *, _RuneLocale *);
int _BIG5_init(struct xlocale_ctype *, _RuneLocale *);
int _MSKanji_init(struct xlocale_ctype *, _RuneLocale *);
-extern size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict,
- size_t, size_t, mbstate_t * __restrict);
-extern size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict,
- size_t, size_t, mbstate_t * __restrict);
+typedef size_t (*mbrtowc_pfn_t)(wchar_t * __restrict,
+ const char * __restrict, size_t, mbstate_t * __restrict);
+typedef size_t (*wcrtomb_pfn_t)(char * __restrict, wchar_t,
+ mbstate_t * __restrict);
+size_t __mbsnrtowcs_std(wchar_t * __restrict, const char ** __restrict,
+ size_t, size_t, mbstate_t * __restrict, mbrtowc_pfn_t);
+size_t __wcsnrtombs_std(char * __restrict, const wchar_t ** __restrict,
+ size_t, size_t, mbstate_t * __restrict, wcrtomb_pfn_t);
#endif /* _MBLOCAL_H_ */
diff --git a/lib/libc/locale/mbsnrtowcs.c b/lib/libc/locale/mbsnrtowcs.c
index 15b48dd7..9e6baad 100644
--- a/lib/libc/locale/mbsnrtowcs.c
+++ b/lib/libc/locale/mbsnrtowcs.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins.
*
* Copyright (c) 2011 The FreeBSD Foundation
@@ -56,20 +58,20 @@ mbsnrtowcs(wchar_t * __restrict dst, const char ** __restrict src,
size_t
__mbsnrtowcs_std(wchar_t * __restrict dst, const char ** __restrict src,
- size_t nms, size_t len, mbstate_t * __restrict ps)
+ size_t nms, size_t len, mbstate_t * __restrict ps,
+ mbrtowc_pfn_t pmbrtowc)
{
const char *s;
size_t nchr;
wchar_t wc;
size_t nb;
- struct xlocale_ctype *ct = XLOCALE_CTYPE(__get_locale());
s = *src;
nchr = 0;
if (dst == NULL) {
for (;;) {
- if ((nb = ct->__mbrtowc(&wc, s, nms, ps)) == (size_t)-1)
+ if ((nb = pmbrtowc(&wc, s, nms, ps)) == (size_t)-1)
/* Invalid sequence - mbrtowc() sets errno. */
return ((size_t)-1);
else if (nb == 0 || nb == (size_t)-2)
@@ -82,7 +84,7 @@ __mbsnrtowcs_std(wchar_t * __restrict dst, const char ** __restrict src,
}
while (len-- > 0) {
- if ((nb = ct->__mbrtowc(dst, s, nms, ps)) == (size_t)-1) {
+ if ((nb = pmbrtowc(dst, s, nms, ps)) == (size_t)-1) {
*src = s;
return ((size_t)-1);
} else if (nb == (size_t)-2) {
diff --git a/lib/libc/locale/mskanji.c b/lib/libc/locale/mskanji.c
index 9fdd080..dc2d0e1 100644
--- a/lib/libc/locale/mskanji.c
+++ b/lib/libc/locale/mskanji.c
@@ -1,4 +1,6 @@
/*
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
*
* ja_JP.SJIS locale table for BSD4.4/rune
@@ -28,14 +30,14 @@
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -59,6 +61,12 @@ static size_t _MSKanji_mbrtowc(wchar_t * __restrict, const char * __restrict,
static int _MSKanji_mbsinit(const mbstate_t *);
static size_t _MSKanji_wcrtomb(char * __restrict, wchar_t,
mbstate_t * __restrict);
+static size_t _MSKanji_mbsnrtowcs(wchar_t * __restrict,
+ const char ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
+static size_t _MSKanji_wcsnrtombs(char * __restrict,
+ const wchar_t ** __restrict, size_t, size_t,
+ mbstate_t * __restrict);
typedef struct {
wchar_t ch;
@@ -70,6 +78,8 @@ _MSKanji_init(struct xlocale_ctype *l, _RuneLocale *rl)
l->__mbrtowc = _MSKanji_mbrtowc;
l->__wcrtomb = _MSKanji_wcrtomb;
+ l->__mbsnrtowcs = _MSKanji_mbsnrtowcs;
+ l->__wcsnrtombs = _MSKanji_wcsnrtombs;
l->__mbsinit = _MSKanji_mbsinit;
l->runes = rl;
l->__mb_cur_max = 2;
@@ -163,3 +173,19 @@ _MSKanji_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
*s++ = wc >> (i << 3);
return (len);
}
+
+static size_t
+_MSKanji_mbsnrtowcs(wchar_t * __restrict dst,
+ const char ** __restrict src, size_t nms,
+ size_t len, mbstate_t * __restrict ps)
+{
+ return (__mbsnrtowcs_std(dst, src, nms, len, ps, _MSKanji_mbrtowc));
+}
+
+static size_t
+_MSKanji_wcsnrtombs(char * __restrict dst,
+ const wchar_t ** __restrict src, size_t nwc,
+ size_t len, mbstate_t * __restrict ps)
+{
+ return (__wcsnrtombs_std(dst, src, nwc, len, ps, _MSKanji_wcrtomb));
+}
diff --git a/lib/libc/locale/none.c b/lib/libc/locale/none.c
index cacfd73..c4695fd 100644
--- a/lib/libc/locale/none.c
+++ b/lib/libc/locale/none.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
@@ -187,16 +189,6 @@ _none_wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src,
/* setup defaults */
-size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict, size_t,
- mbstate_t * __restrict) = _none_mbrtowc;
-int (*__mbsinit)(const mbstate_t *) = _none_mbsinit;
-size_t (*__mbsnrtowcs)(wchar_t * __restrict, const char ** __restrict,
- size_t, size_t, mbstate_t * __restrict) = _none_mbsnrtowcs;
-size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict) =
- _none_wcrtomb;
-size_t (*__wcsnrtombs)(char * __restrict, const wchar_t ** __restrict,
- size_t, size_t, mbstate_t * __restrict) = _none_wcsnrtombs;
-
struct xlocale_ctype __xlocale_global_ctype = {
{{0}, "C"},
(_RuneLocale*)&_DefaultRuneLocale,
diff --git a/lib/libc/locale/rune.c b/lib/libc/locale/rune.c
index f72ba74..00ef19b 100644
--- a/lib/libc/locale/rune.c
+++ b/lib/libc/locale/rune.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2014 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1993
* The Regents of the University of California. All rights reserved.
*
@@ -45,14 +47,15 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
#include "un-namespace.h"
#include "runefile.h"
-_RuneLocale *_Read_RuneMagi(FILE *);
-
_RuneLocale *
-_Read_RuneMagi(FILE *fp)
+_Read_RuneMagi(const char *fname)
{
char *fdata, *data;
void *lastp;
@@ -67,119 +70,77 @@ _Read_RuneMagi(FILE *fp)
_FileRuneEntry *maplower_ext_ranges;
_FileRuneEntry *mapupper_ext_ranges;
int runetype_ext_len = 0;
+ int fd;
- if (_fstat(fileno(fp), &sb) < 0)
+ if ((fd = _open(fname, O_RDONLY)) < 0) {
+ errno = EINVAL;
return (NULL);
+ }
- if ((size_t)sb.st_size < sizeof(_FileRuneLocale)) {
- errno = EFTYPE;
+ if (_fstat(fd, &sb) < 0) {
+ (void) _close(fd);
+ errno = EINVAL;
return (NULL);
}
- if ((fdata = malloc(sb.st_size)) == NULL)
- return (NULL);
-
- errno = 0;
- rewind(fp); /* Someone might have read the magic number once already */
- if (errno) {
- saverr = errno;
- free(fdata);
- errno = saverr;
+ if ((size_t)sb.st_size < sizeof (_FileRuneLocale)) {
+ (void) _close(fd);
+ errno = EINVAL;
return (NULL);
}
- if (fread(fdata, sb.st_size, 1, fp) != 1) {
- saverr = errno;
- free(fdata);
- errno = saverr;
+
+ fdata = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ (void) _close(fd);
+ if (fdata == NULL) {
+ errno = EINVAL;
return (NULL);
}
- frl = (_FileRuneLocale *)fdata;
+ frl = (_FileRuneLocale *)(void *)fdata;
lastp = fdata + sb.st_size;
variable = frl + 1;
- if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof(frl->magic))) {
- free(fdata);
- errno = EFTYPE;
- return (NULL);
- }
-
- frl->variable_len = ntohl(frl->variable_len);
- frl->runetype_ext_nranges = ntohl(frl->runetype_ext_nranges);
- frl->maplower_ext_nranges = ntohl(frl->maplower_ext_nranges);
- frl->mapupper_ext_nranges = ntohl(frl->mapupper_ext_nranges);
-
- for (x = 0; x < _CACHED_RUNES; ++x) {
- frl->runetype[x] = ntohl(frl->runetype[x]);
- frl->maplower[x] = ntohl(frl->maplower[x]);
- frl->mapupper[x] = ntohl(frl->mapupper[x]);
+ if (memcmp(frl->magic, _FILE_RUNE_MAGIC_1, sizeof (frl->magic))) {
+ goto invalid;
}
runetype_ext_ranges = (_FileRuneEntry *)variable;
variable = runetype_ext_ranges + frl->runetype_ext_nranges;
if (variable > lastp) {
- free(fdata);
- errno = EFTYPE;
- return (NULL);
+ goto invalid;
}
maplower_ext_ranges = (_FileRuneEntry *)variable;
variable = maplower_ext_ranges + frl->maplower_ext_nranges;
if (variable > lastp) {
- free(fdata);
- errno = EFTYPE;
- return (NULL);
+ goto invalid;
}
mapupper_ext_ranges = (_FileRuneEntry *)variable;
variable = mapupper_ext_ranges + frl->mapupper_ext_nranges;
if (variable > lastp) {
- free(fdata);
- errno = EFTYPE;
- return (NULL);
+ goto invalid;
}
frr = runetype_ext_ranges;
for (x = 0; x < frl->runetype_ext_nranges; ++x) {
uint32_t *types;
- frr[x].min = ntohl(frr[x].min);
- frr[x].max = ntohl(frr[x].max);
- frr[x].map = ntohl(frr[x].map);
if (frr[x].map == 0) {
int len = frr[x].max - frr[x].min + 1;
types = variable;
variable = types + len;
runetype_ext_len += len;
if (variable > lastp) {
- free(fdata);
- errno = EFTYPE;
- return (NULL);
+ goto invalid;
}
- while (len-- > 0)
- types[len] = ntohl(types[len]);
}
}
- frr = maplower_ext_ranges;
- for (x = 0; x < frl->maplower_ext_nranges; ++x) {
- frr[x].min = ntohl(frr[x].min);
- frr[x].max = ntohl(frr[x].max);
- frr[x].map = ntohl(frr[x].map);
- }
-
- frr = mapupper_ext_ranges;
- for (x = 0; x < frl->mapupper_ext_nranges; ++x) {
- frr[x].min = ntohl(frr[x].min);
- frr[x].max = ntohl(frr[x].max);
- frr[x].map = ntohl(frr[x].map);
- }
if ((char *)variable + frl->variable_len > (char *)lastp) {
- free(fdata);
- errno = EFTYPE;
- return (NULL);
+ goto invalid;
}
/*
@@ -192,7 +153,7 @@ _Read_RuneMagi(FILE *fp)
frl->variable_len);
if (data == NULL) {
saverr = errno;
- free(fdata);
+ munmap(fdata, sb.st_size);
errno = saverr;
return (NULL);
}
@@ -202,7 +163,6 @@ _Read_RuneMagi(FILE *fp)
memcpy(rl->__magic, _RUNE_MAGIC_1, sizeof(rl->__magic));
memcpy(rl->__encoding, frl->encoding, sizeof(rl->__encoding));
- rl->__invalid_rune = 0;
rl->__variable_len = frl->variable_len;
rl->__runetype_ext.__nranges = frl->runetype_ext_nranges;
@@ -265,7 +225,7 @@ _Read_RuneMagi(FILE *fp)
}
memcpy(rl->__variable, variable, rl->__variable_len);
- free(fdata);
+ munmap(fdata, sb.st_size);
/*
* Go out and zero pointers that should be zero.
@@ -283,4 +243,9 @@ _Read_RuneMagi(FILE *fp)
rl->__mapupper_ext.__ranges = NULL;
return (rl);
+
+invalid:
+ munmap(fdata, sb.st_size);
+ errno = EINVAL;
+ return (NULL);
}
diff --git a/lib/libc/locale/setrunelocale.c b/lib/libc/locale/setrunelocale.c
index 0a0943f..67c632e 100644
--- a/lib/libc/locale/setrunelocale.c
+++ b/lib/libc/locale/setrunelocale.c
@@ -63,23 +63,15 @@ _Thread_local const _RuneLocale *_ThreadRuneLocale;
extern int __mb_sb_limit;
-extern _RuneLocale *_Read_RuneMagi(FILE *);
+extern _RuneLocale *_Read_RuneMagi(const char *);
static int __setrunelocale(struct xlocale_ctype *l, const char *);
-#define __collate_substitute_nontrivial (table->__collate_substitute_nontrivial)
-#define __collate_substitute_table_ptr (table->__collate_substitute_table_ptr)
-#define __collate_char_pri_table_ptr (table->__collate_char_pri_table_ptr)
-#define __collate_chain_pri_table (table->__collate_chain_pri_table)
-
-
static void
destruct_ctype(void *v)
{
struct xlocale_ctype *l = v;
- if (strcmp(l->runes->__encoding, "EUC") == 0)
- free(l->runes->__variable);
if (&_DefaultRuneLocale != l->runes)
free(l->runes);
free(l);
@@ -95,12 +87,7 @@ __getCurrentRuneLocale(void)
static void
free_runes(_RuneLocale *rl)
{
-
- /* FIXME: The "EUC" check here is a hideous abstraction violation. */
if ((rl != &_DefaultRuneLocale) && (rl)) {
- if (strcmp(rl->__encoding, "EUC") == 0) {
- free(rl->__variable);
- }
free(rl);
}
}
@@ -108,10 +95,9 @@ free_runes(_RuneLocale *rl)
static int
__setrunelocale(struct xlocale_ctype *l, const char *encoding)
{
- FILE *fp;
- char name[PATH_MAX];
_RuneLocale *rl;
- int saverr, ret;
+ int ret;
+ char *path;
struct xlocale_ctype saved = *l;
/*
@@ -124,37 +110,37 @@ __setrunelocale(struct xlocale_ctype *l, const char *encoding)
}
/* Range checking not needed, encoding length already checked before */
- (void) strcpy(name, _PathLocale);
- (void) strcat(name, "/");
- (void) strcat(name, encoding);
- (void) strcat(name, "/LC_CTYPE");
-
- if ((fp = fopen(name, "re")) == NULL)
- return (errno == 0 ? ENOENT : errno);
-
- if ((rl = _Read_RuneMagi(fp)) == NULL) {
- saverr = (errno == 0 ? EFTYPE : errno);
- (void)fclose(fp);
- return (saverr);
+ asprintf(&path, "%s/%s/LC_CTYPE", _PathLocale, encoding);
+ if (path == NULL)
+ return (0);
+
+ if ((rl = _Read_RuneMagi(path)) == NULL) {
+ free(path);
+ errno = EINVAL;
+ return (errno);
}
- (void)fclose(fp);
+ free(path);
l->__mbrtowc = NULL;
l->__mbsinit = NULL;
- l->__mbsnrtowcs = __mbsnrtowcs_std;
+ l->__mbsnrtowcs = NULL;
l->__wcrtomb = NULL;
- l->__wcsnrtombs = __wcsnrtombs_std;
+ l->__wcsnrtombs = NULL;
rl->__sputrune = NULL;
rl->__sgetrune = NULL;
if (strcmp(rl->__encoding, "NONE") == 0)
ret = _none_init(l, rl);
- else if (strcmp(rl->__encoding, "ASCII") == 0)
- ret = _ascii_init(l, rl);
else if (strcmp(rl->__encoding, "UTF-8") == 0)
ret = _UTF8_init(l, rl);
- else if (strcmp(rl->__encoding, "EUC") == 0)
- ret = _EUC_init(l, rl);
+ else if (strcmp(rl->__encoding, "EUC-CN") == 0)
+ ret = _EUC_CN_init(l, rl);
+ else if (strcmp(rl->__encoding, "EUC-JP") == 0)
+ ret = _EUC_JP_init(l, rl);
+ else if (strcmp(rl->__encoding, "EUC-KR") == 0)
+ ret = _EUC_KR_init(l, rl);
+ else if (strcmp(rl->__encoding, "EUC-TW") == 0)
+ ret = _EUC_TW_init(l, rl);
else if (strcmp(rl->__encoding, "GB18030") == 0)
ret = _GB18030_init(l, rl);
else if (strcmp(rl->__encoding, "GB2312") == 0)
@@ -211,7 +197,7 @@ __set_thread_rune_locale(locale_t loc)
#endif
void *
-__ctype_load(const char *locale, locale_t unused)
+__ctype_load(const char *locale, locale_t unused __unused)
{
struct xlocale_ctype *l = calloc(sizeof(struct xlocale_ctype), 1);
diff --git a/lib/libc/locale/utf8.c b/lib/libc/locale/utf8.c
index 8ccfdb1..e096b87 100644
--- a/lib/libc/locale/utf8.c
+++ b/lib/libc/locale/utf8.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins
* All rights reserved.
@@ -70,7 +71,7 @@ _UTF8_init(struct xlocale_ctype *l, _RuneLocale *rl)
l->__mbsnrtowcs = _UTF8_mbsnrtowcs;
l->__wcsnrtombs = _UTF8_wcsnrtombs;
l->runes = rl;
- l->__mb_cur_max = 6;
+ l->__mb_cur_max = 4;
/*
* UCS-4 encoding used as the internal representation, so
* slots 0x0080-0x00FF are occuped and must be excluded
@@ -165,6 +166,7 @@ _UTF8_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
wch = (unsigned char)*s++ & mask;
else
wch = us->ch;
+
for (i = (us->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
if ((*s & 0xc0) != 0x80) {
/*
diff --git a/lib/libc/locale/wcsnrtombs.c b/lib/libc/locale/wcsnrtombs.c
index 2f3bf1e..ce77ebb 100644
--- a/lib/libc/locale/wcsnrtombs.c
+++ b/lib/libc/locale/wcsnrtombs.c
@@ -1,4 +1,6 @@
/*-
+ * Copyright 2013 Garrett D'Amore <garrett@damore.org>
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002-2004 Tim J. Robbins.
* All rights reserved.
*
@@ -57,21 +59,21 @@ wcsnrtombs(char * __restrict dst, const wchar_t ** __restrict src, size_t nwc,
size_t
__wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src,
- size_t nwc, size_t len, mbstate_t * __restrict ps)
+ size_t nwc, size_t len, mbstate_t * __restrict ps,
+ wcrtomb_pfn_t pwcrtomb)
{
mbstate_t mbsbak;
char buf[MB_LEN_MAX];
const wchar_t *s;
size_t nbytes;
size_t nb;
- struct xlocale_ctype *l = XLOCALE_CTYPE(__get_locale());
s = *src;
nbytes = 0;
if (dst == NULL) {
while (nwc-- > 0) {
- if ((nb = l->__wcrtomb(buf, *s, ps)) == (size_t)-1)
+ if ((nb = pwcrtomb(buf, *s, ps)) == (size_t)-1)
/* Invalid character - wcrtomb() sets errno. */
return ((size_t)-1);
else if (*s == L'\0')
@@ -85,7 +87,7 @@ __wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src,
while (len > 0 && nwc-- > 0) {
if (len > (size_t)MB_CUR_MAX) {
/* Enough space to translate in-place. */
- if ((nb = l->__wcrtomb(dst, *s, ps)) == (size_t)-1) {
+ if ((nb = pwcrtomb(dst, *s, ps)) == (size_t)-1) {
*src = s;
return ((size_t)-1);
}
@@ -98,7 +100,7 @@ __wcsnrtombs_std(char * __restrict dst, const wchar_t ** __restrict src,
* character is too long for the buffer.
*/
mbsbak = *ps;
- if ((nb = l->__wcrtomb(buf, *s, ps)) == (size_t)-1) {
+ if ((nb = pwcrtomb(buf, *s, ps)) == (size_t)-1) {
*src = s;
return ((size_t)-1);
}
diff --git a/lib/libc/string/strcoll.c b/lib/libc/string/strcoll.c
index a918fca..7675e0a 100644
--- a/lib/libc/string/strcoll.c
+++ b/lib/libc/string/strcoll.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -35,63 +36,78 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
+#include <wchar.h>
#include "collate.h"
-#include <stdio.h>
+/*
+ * In order to properly handle multibyte locales, its easiest to just
+ * convert to wide characters and then use wcscoll. However if an
+ * error occurs, we gracefully fall back to simple strcmp. Caller
+ * should check errno.
+ */
int
strcoll_l(const char *s, const char *s2, locale_t locale)
{
- int len, len2, prim, prim2, sec, sec2, ret, ret2;
- const char *t, *t2;
- char *tt, *tt2;
+ int ret;
+ wchar_t *t1 = NULL, *t2 = NULL;
+ wchar_t *w1 = NULL, *w2 = NULL;
+ const char *cs1, *cs2;
+ mbstate_t mbs1;
+ mbstate_t mbs2;
+ size_t sz1, sz2;
+
+ memset(&mbs1, 0, sizeof (mbstate_t));
+ memset(&mbs2, 0, sizeof (mbstate_t));
+
+ /*
+ * The mbsrtowcs_l function can set the src pointer to null upon
+ * failure, so it should act on a copy to avoid:
+ * - sending null pointer to strcmp
+ * - having strcoll/strcoll_l change *s or *s2 to null
+ */
+ cs1 = s;
+ cs2 = s2;
+
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
if (table->__collate_load_error)
- return strcmp(s, s2);
-
- len = len2 = 1;
- ret = ret2 = 0;
- if (table->__collate_substitute_nontrivial) {
- t = tt = __collate_substitute(table, s);
- t2 = tt2 = __collate_substitute(table, s2);
- } else {
- tt = tt2 = NULL;
- t = s;
- t2 = s2;
- }
- while(*t && *t2) {
- prim = prim2 = 0;
- while(*t && !prim) {
- __collate_lookup(table, t, &len, &prim, &sec);
- t += len;
- }
- while(*t2 && !prim2) {
- __collate_lookup(table, t2, &len2, &prim2, &sec2);
- t2 += len2;
- }
- if(!prim || !prim2)
- break;
- if(prim != prim2) {
- ret = prim - prim2;
- goto end;
- }
- if(!ret2)
- ret2 = sec - sec2;
- }
- if(!*t && *t2)
- ret = -(int)((u_char)*t2);
- else if(*t && !*t2)
- ret = (u_char)*t;
- else if(!*t && !*t2)
- ret = ret2;
- end:
- free(tt);
- free(tt2);
-
- return ret;
+ goto error;
+
+ sz1 = strlen(s) + 1;
+ sz2 = strlen(s2) + 1;
+
+ /*
+ * Simple assumption: conversion to wide format is strictly
+ * reducing, i.e. a single byte (or multibyte character)
+ * cannot result in multiple wide characters.
+ */
+ if ((t1 = malloc(sz1 * sizeof (wchar_t))) == NULL)
+ goto error;
+ w1 = t1;
+ if ((t2 = malloc(sz2 * sizeof (wchar_t))) == NULL)
+ goto error;
+ w2 = t2;
+
+ if ((mbsrtowcs_l(w1, &cs1, sz1, &mbs1, locale)) == (size_t)-1)
+ goto error;
+
+ if ((mbsrtowcs_l(w2, &cs2, sz2, &mbs2, locale)) == (size_t)-1)
+ goto error;
+
+ ret = wcscoll_l(w1, w2, locale);
+ free(t1);
+ free(t2);
+
+ return (ret);
+
+error:
+ free(t1);
+ free(t2);
+ return (strcmp(s, s2));
}
int
diff --git a/lib/libc/string/strxfrm.c b/lib/libc/string/strxfrm.c
index b758b0c..06ae639 100644
--- a/lib/libc/string/strxfrm.c
+++ b/lib/libc/string/strxfrm.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -35,6 +36,8 @@ __FBSDID("$FreeBSD$");
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
+#include <wchar.h>
#include "collate.h"
size_t
@@ -48,9 +51,10 @@ strxfrm(char * __restrict dest, const char * __restrict src, size_t len)
size_t
strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, locale_t locale)
{
- int prim, sec, l;
size_t slen;
- char *s, *ss;
+ size_t xlen;
+ wchar_t *wcs = NULL;
+
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
@@ -58,32 +62,42 @@ strxfrm_l(char * __restrict dest, const char * __restrict src, size_t len, local
if (!*src) {
if (len > 0)
*dest = '\0';
- return 0;
+ return (0);
}
+ /*
+ * The conversion from multibyte to wide character strings is
+ * strictly reducing (one byte of an mbs cannot expand to more
+ * than one wide character.)
+ */
+ slen = strlen(src);
+
if (table->__collate_load_error)
- return strlcpy(dest, src, len);
-
- slen = 0;
- prim = sec = 0;
- ss = s = __collate_substitute(table, src);
- while (*s) {
- while (*s && !prim) {
- __collate_lookup(table, s, &l, &prim, &sec);
- s += l;
- }
- if (prim) {
- if (len > 1) {
- *dest++ = (char)prim;
- len--;
- }
- slen++;
- prim = 0;
- }
+ goto error;
+
+ if ((wcs = malloc((slen + 1) * sizeof (wchar_t))) == NULL)
+ goto error;
+
+ if (mbstowcs_l(wcs, src, slen + 1, locale) == (size_t)-1)
+ goto error;
+
+ if ((xlen = _collate_sxfrm(table, wcs, dest, len)) == (size_t)-1)
+ goto error;
+
+ free(wcs);
+
+ if (len > xlen) {
+ dest[xlen] = 0;
+ } else if (len) {
+ dest[len-1] = 0;
}
- free(ss);
- if (len > 0)
- *dest = '\0';
- return slen;
+ return (xlen);
+
+error:
+ /* errno should be set to ENOMEM if malloc failed */
+ free(wcs);
+ strlcpy(dest, src, len);
+
+ return (slen);
}
diff --git a/lib/libc/string/wcscoll.c b/lib/libc/string/wcscoll.c
index 3c51015..acb1277 100644
--- a/lib/libc/string/wcscoll.c
+++ b/lib/libc/string/wcscoll.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2002 Tim J. Robbins
* All rights reserved.
*
@@ -38,50 +39,162 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include "collate.h"
-static char *__mbsdup(const wchar_t *);
-
-/*
- * Placeholder implementation of wcscoll(). Attempts to use the single-byte
- * collation ordering where possible, and falls back on wcscmp() in locales
- * with extended character sets.
- */
int
wcscoll_l(const wchar_t *ws1, const wchar_t *ws2, locale_t locale)
{
- char *mbs1, *mbs2;
- int diff, sverrno;
+ int len1, len2, pri1, pri2, ret;
+ wchar_t *tr1 = NULL, *tr2 = NULL;
+ int direc, pass;
+
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
- if (table->__collate_load_error || MB_CUR_MAX > 1)
+ if (table->__collate_load_error)
/*
- * Locale has no special collating order, could not be
- * loaded, or has an extended character set; do a fast binary
- * comparison.
+ * Locale has no special collating order or could not be
+ * loaded, do a fast binary comparison.
*/
return (wcscmp(ws1, ws2));
- if ((mbs1 = __mbsdup(ws1)) == NULL || (mbs2 = __mbsdup(ws2)) == NULL) {
- /*
- * Out of memory or illegal wide chars; fall back to wcscmp()
- * but leave errno indicating the error. Callers that don't
- * check for error will get a reasonable but often slightly
- * incorrect result.
- */
- sverrno = errno;
- free(mbs1);
- errno = sverrno;
- return (wcscmp(ws1, ws2));
+ ret = 0;
+
+ /*
+ * Once upon a time we had code to try to optimize this, but
+ * it turns out that you really can't make many assumptions
+ * safely. You absolutely have to run this pass by pass,
+ * because some passes will be ignored for a given character,
+ * while others will not. Simpler locales will benefit from
+ * having fewer passes, and most comparisions should resolve
+ * during the primary pass anyway.
+ *
+ * Note that we do one final extra pass at the end to pick
+ * up UNDEFINED elements. There is special handling for them.
+ */
+ for (pass = 0; pass <= table->info->directive_count; pass++) {
+
+ const int32_t *st1 = NULL;
+ const int32_t *st2 = NULL;
+ const wchar_t *w1 = ws1;
+ const wchar_t *w2 = ws2;
+
+ /* special pass for UNDEFINED */
+ if (pass == table->info->directive_count) {
+ direc = DIRECTIVE_FORWARD | DIRECTIVE_UNDEFINED;
+ } else {
+ direc = table->info->directive[pass];
+ }
+
+ if (direc & DIRECTIVE_BACKWARD) {
+ wchar_t *bp, *fp, c;
+ if ((tr1 = wcsdup(w1)) == NULL)
+ goto fail;
+ bp = tr1;
+ fp = tr1 + wcslen(tr1) - 1;
+ while (bp < fp) {
+ c = *bp;
+ *bp++ = *fp;
+ *fp-- = c;
+ }
+ if ((tr2 = wcsdup(w2)) == NULL)
+ goto fail;
+ bp = tr2;
+ fp = tr2 + wcslen(tr2) - 1;
+ while (bp < fp) {
+ c = *bp;
+ *bp++ = *fp;
+ *fp-- = c;
+ }
+ w1 = tr1;
+ w2 = tr2;
+ }
+
+ if (direc & DIRECTIVE_POSITION) {
+ while ((*w1 || st1) && (*w2 || st2)) {
+ pri1 = pri2 = 0;
+ _collate_lookup(table, w1, &len1, &pri1, pass,
+ &st1);
+ if (pri1 <= 0) {
+ if (pri1 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ pri1 = COLLATE_MAX_PRIORITY;
+ }
+ _collate_lookup(table, w2, &len2, &pri2, pass,
+ &st2);
+ if (pri2 <= 0) {
+ if (pri2 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ pri2 = COLLATE_MAX_PRIORITY;
+ }
+ if (pri1 != pri2) {
+ ret = pri1 - pri2;
+ goto end;
+ }
+ w1 += len1;
+ w2 += len2;
+ }
+ } else {
+ while ((*w1 || st1) && (*w2 || st2)) {
+ pri1 = pri2 = 0;
+ while (*w1) {
+ _collate_lookup(table, w1, &len1,
+ &pri1, pass, &st1);
+ if (pri1 > 0)
+ break;
+ if (pri1 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ w1 += len1;
+ }
+ while (*w2) {
+ _collate_lookup(table, w2, &len2,
+ &pri2, pass, &st2);
+ if (pri2 > 0)
+ break;
+ if (pri2 < 0) {
+ errno = EINVAL;
+ goto fail;
+ }
+ w2 += len2;
+ }
+ if (!pri1 || !pri2)
+ break;
+ if (pri1 != pri2) {
+ ret = pri1 - pri2;
+ goto end;
+ }
+ w1 += len1;
+ w2 += len2;
+ }
+ }
+ if (!*w1) {
+ if (*w2) {
+ ret = -(int)*w2;
+ goto end;
+ }
+ } else {
+ ret = *w1;
+ goto end;
+ }
}
+ ret = 0;
- diff = strcoll_l(mbs1, mbs2, locale);
- sverrno = errno;
- free(mbs1);
- free(mbs2);
- errno = sverrno;
+end:
+ if (tr1)
+ free(tr1);
+ if (tr2)
+ free(tr2);
- return (diff);
+ return (ret);
+
+fail:
+ ret = wcscmp(ws1, ws2);
+ goto end;
}
int
@@ -89,24 +202,3 @@ wcscoll(const wchar_t *ws1, const wchar_t *ws2)
{
return wcscoll_l(ws1, ws2, __get_locale());
}
-
-static char *
-__mbsdup(const wchar_t *ws)
-{
- static const mbstate_t initial;
- mbstate_t st;
- const wchar_t *wcp;
- size_t len;
- char *mbs;
-
- wcp = ws;
- st = initial;
- if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
- return (NULL);
- if ((mbs = malloc(len + 1)) == NULL)
- return (NULL);
- st = initial;
- wcsrtombs(mbs, &ws, len + 1, &st);
-
- return (mbs);
-}
diff --git a/lib/libc/string/wcsxfrm.c b/lib/libc/string/wcsxfrm.c
index cea667e..3d6c960 100644
--- a/lib/libc/string/wcsxfrm.c
+++ b/lib/libc/string/wcsxfrm.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 1995 Alex Tatmanjants <alex@elvisti.kiev.ua>
* at Electronni Visti IA, Kiev, Ukraine.
* All rights reserved.
@@ -31,9 +32,6 @@
*/
#include <sys/cdefs.h>
-#if 0
-__FBSDID("FreeBSD: src/lib/libc/string/strxfrm.c,v 1.15 2002/09/06 11:24:06 tjr Exp ");
-#endif
__FBSDID("$FreeBSD$");
#include <stdlib.h>
@@ -41,18 +39,10 @@ __FBSDID("$FreeBSD$");
#include <wchar.h>
#include "collate.h"
-static char *__mbsdup(const wchar_t *);
-
-/*
- * Placeholder wcsxfrm() implementation. See wcscoll.c for a description of
- * the logic used.
- */
size_t
wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len, locale_t locale)
{
- int prim, sec, l;
size_t slen;
- char *mbsrc, *s, *ss;
FIX_LOCALE(locale);
struct xlocale_collate *table =
(struct xlocale_collate*)locale->components[XLC_COLLATE];
@@ -63,67 +53,33 @@ wcsxfrm_l(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len,
return (0);
}
- if (table->__collate_load_error || MB_CUR_MAX > 1) {
- slen = wcslen(src);
- if (len > 0) {
- if (slen < len)
- wcscpy(dest, src);
- else {
- wcsncpy(dest, src, len - 1);
- dest[len - 1] = L'\0';
- }
- }
- return (slen);
+ if ((table->__collate_load_error) ||
+ ((slen = _collate_wxfrm(table, src, dest, len)) == (size_t)-1)) {
+ goto error;
}
- mbsrc = __mbsdup(src);
- slen = 0;
- prim = sec = 0;
- ss = s = __collate_substitute(table, mbsrc);
- while (*s != '\0') {
- while (*s != '\0' && prim == 0) {
- __collate_lookup(table, s, &l, &prim, &sec);
- s += l;
- }
- if (prim != 0) {
- if (len > 1) {
- *dest++ = (wchar_t)prim;
- len--;
- }
- slen++;
- prim = 0;
- }
+ /* Add null termination at the correct location. */
+ if (len > slen) {
+ dest[slen] = 0;
+ } else if (len) {
+ dest[len-1] = 0;
}
- free(ss);
- free(mbsrc);
- if (len != 0)
- *dest = L'\0';
return (slen);
+
+error:
+ slen = wcslen(src);
+ if (slen < len)
+ (void) wcscpy(dest, src);
+ else {
+ (void) wcsncpy(dest, src, len - 1);
+ dest[len - 1] = L'\0';
+ }
+ return (slen);
}
+
size_t
wcsxfrm(wchar_t * __restrict dest, const wchar_t * __restrict src, size_t len)
{
return wcsxfrm_l(dest, src, len, __get_locale());
}
-
-static char *
-__mbsdup(const wchar_t *ws)
-{
- static const mbstate_t initial;
- mbstate_t st;
- const wchar_t *wcp;
- size_t len;
- char *mbs;
-
- wcp = ws;
- st = initial;
- if ((len = wcsrtombs(NULL, &wcp, 0, &st)) == (size_t)-1)
- return (NULL);
- if ((mbs = malloc(len + 1)) == NULL)
- return (NULL);
- st = initial;
- wcsrtombs(mbs, &ws, len + 1, &st);
-
- return (mbs);
-}
OpenPOWER on IntegriCloud