summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2004-07-29 03:48:52 +0000
committertjr <tjr@FreeBSD.org>2004-07-29 03:48:52 +0000
commit989082ba5990c7f8edd9f7f0df8215b27e0741b8 (patch)
tree548264100c6eeb06ec05edce3c9dbc32aa2b539b
parentd220006f8da92db1f09e5aa9e858657e7a622f39 (diff)
downloadFreeBSD-src-989082ba5990c7f8edd9f7f0df8215b27e0741b8.zip
FreeBSD-src-989082ba5990c7f8edd9f7f0df8215b27e0741b8.tar.gz
Add support for multibyte characters.
-rw-r--r--lib/libc/gen/glob.c114
1 files changed, 84 insertions, 30 deletions
diff --git a/lib/libc/gen/glob.c b/lib/libc/gen/glob.c
index 17f67f8..f68623b 100644
--- a/lib/libc/gen/glob.c
+++ b/lib/libc/gen/glob.c
@@ -65,6 +65,16 @@ __FBSDID("$FreeBSD$");
* Number of matches in the current invocation of glob.
*/
+/*
+ * Some notes on multibyte character support:
+ * 1. Patterns with illegal byte sequences match nothing - even if
+ * GLOB_NOCHECK is specified.
+ * 2. Illegal byte sequences in filenames are handled by treating them as
+ * single-byte characters with a value of the first byte of the sequence
+ * cast to wchar_t.
+ * 3. State-dependent encodings are not currently supported.
+ */
+
#include <sys/param.h>
#include <sys/stat.h>
@@ -72,11 +82,14 @@ __FBSDID("$FreeBSD$");
#include <dirent.h>
#include <errno.h>
#include <glob.h>
+#include <limits.h>
#include <pwd.h>
+#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <wchar.h>
#include "collate.h"
@@ -100,26 +113,26 @@ __FBSDID("$FreeBSD$");
#ifndef DEBUG
-#define M_QUOTE 0x8000
-#define M_PROTECT 0x4000
-#define M_MASK 0xffff
-#define M_ASCII 0x00ff
+#define M_QUOTE 0x8000000000ULL
+#define M_PROTECT 0x4000000000ULL
+#define M_MASK 0xffffffffffULL
+#define M_CHAR 0x00ffffffffULL
-typedef u_short Char;
+typedef uint_fast64_t Char;
#else
#define M_QUOTE 0x80
#define M_PROTECT 0x40
#define M_MASK 0xff
-#define M_ASCII 0x7f
+#define M_CHAR 0x7f
typedef char Char;
#endif
-#define CHAR(c) ((Char)((c)&M_ASCII))
+#define CHAR(c) ((Char)((c)&M_CHAR))
#define META(c) ((Char)((c)|M_QUOTE))
#define M_ALL META('*')
#define M_END META(']')
@@ -134,7 +147,7 @@ static int compare(const void *, const void *);
static int g_Ctoc(const Char *, char *, u_int);
static int g_lstat(Char *, struct stat *, glob_t *);
static DIR *g_opendir(Char *, glob_t *);
-static Char *g_strchr(Char *, int);
+static Char *g_strchr(Char *, wchar_t);
#ifdef notdef
static Char *g_strcat(Char *, const Char *);
#endif
@@ -160,8 +173,11 @@ glob(pattern, flags, errfunc, pglob)
glob_t *pglob;
{
const u_char *patnext;
- int c, limit;
- Char *bufnext, *bufend, patbuf[MAXPATHLEN];
+ int limit;
+ Char *bufnext, *bufend, patbuf[MAXPATHLEN], prot;
+ mbstate_t mbs;
+ wchar_t wc;
+ size_t clen;
patnext = (u_char *) pattern;
if (!(flags & GLOB_APPEND)) {
@@ -182,21 +198,37 @@ glob(pattern, flags, errfunc, pglob)
bufnext = patbuf;
bufend = bufnext + MAXPATHLEN - 1;
- if (flags & GLOB_NOESCAPE)
- while (bufnext < bufend && (c = *patnext++) != EOS)
- *bufnext++ = c;
- else {
+ if (flags & GLOB_NOESCAPE) {
+ memset(&mbs, 0, sizeof(mbs));
+ while (bufend - bufnext >= MB_CUR_MAX) {
+ clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
+ if (clen == (size_t)-1 || clen == (size_t)-2)
+ return (GLOB_NOMATCH);
+ else if (clen == 0)
+ break;
+ *bufnext++ = wc;
+ patnext += clen;
+ }
+ } else {
/* Protect the quoted characters. */
- while (bufnext < bufend && (c = *patnext++) != EOS)
- if (c == QUOTE) {
- if ((c = *patnext++) == EOS) {
- c = QUOTE;
- --patnext;
+ memset(&mbs, 0, sizeof(mbs));
+ while (bufend - bufnext >= MB_CUR_MAX) {
+ if (*patnext == QUOTE) {
+ if (*++patnext == EOS) {
+ *bufnext++ = QUOTE | M_PROTECT;
+ continue;
}
- *bufnext++ = c | M_PROTECT;
- }
- else
- *bufnext++ = c;
+ prot = M_PROTECT;
+ } else
+ prot = 0;
+ clen = mbrtowc(&wc, patnext, MB_LEN_MAX, &mbs);
+ if (clen == (size_t)-1 || clen == (size_t)-2)
+ return (GLOB_NOMATCH);
+ else if (clen == 0)
+ break;
+ *bufnext++ = wc | prot;
+ patnext += clen;
+ }
}
*bufnext = EOS;
@@ -636,14 +668,27 @@ glob3(pathbuf, pathend, pathend_last, pattern, restpattern, pglob, limit)
while ((dp = (*readdirfunc)(dirp))) {
u_char *sc;
Char *dc;
+ wchar_t wc;
+ size_t clen;
+ mbstate_t mbs;
/* Initial DOT must be matched literally. */
if (dp->d_name[0] == DOT && *pattern != DOT)
continue;
+ memset(&mbs, 0, sizeof(mbs));
dc = pathend;
sc = (u_char *) dp->d_name;
- while (dc < pathend_last && (*dc++ = *sc++) != EOS)
- ;
+ while (dc < pathend_last) {
+ clen = mbrtowc(&wc, sc, MB_LEN_MAX, &mbs);
+ if (clen == (size_t)-1 || clen == (size_t)-2) {
+ wc = *sc;
+ clen = 1;
+ memset(&mbs, 0, sizeof(mbs));
+ }
+ if ((*dc++ = wc) == EOS)
+ break;
+ sc += clen;
+ }
if (!match(pathend, pattern, restpattern)) {
*pathend = EOS;
continue;
@@ -715,7 +760,7 @@ globextend(path, pglob, limit)
for (p = path; *p++;)
continue;
- len = (size_t)(p - path);
+ len = MB_CUR_MAX * (size_t)(p - path); /* XXX overallocation */
if ((copy = malloc(len)) != NULL) {
if (g_Ctoc(path, copy, len)) {
free(copy);
@@ -857,7 +902,7 @@ g_stat(fn, sb, pglob)
static Char *
g_strchr(str, ch)
Char *str;
- int ch;
+ wchar_t ch;
{
do {
if (*str == ch)
@@ -872,10 +917,19 @@ g_Ctoc(str, buf, len)
char *buf;
u_int len;
{
-
- while (len--) {
- if ((*buf++ = *str++) == '\0')
+ mbstate_t mbs;
+ size_t clen;
+
+ memset(&mbs, 0, sizeof(mbs));
+ while (len >= MB_CUR_MAX) {
+ clen = wcrtomb(buf, *str, &mbs);
+ if (clen == (size_t)-1)
+ return (1);
+ if (*str == L'\0')
return (0);
+ str++;
+ buf += clen;
+ len -= clen;
}
return (1);
}
OpenPOWER on IntegriCloud