diff options
Diffstat (limited to 'lib/libc/regex/regexec.c')
-rw-r--r-- | lib/libc/regex/regexec.c | 67 |
1 files changed, 64 insertions, 3 deletions
diff --git a/lib/libc/regex/regexec.c b/lib/libc/regex/regexec.c index c13c72d..c596bdd 100644 --- a/lib/libc/regex/regexec.c +++ b/lib/libc/regex/regexec.c @@ -46,9 +46,9 @@ __FBSDID("$FreeBSD$"); /* * the outer shell of regexec() * - * This file includes engine.c *twice*, after muchos fiddling with the + * This file includes engine.c three times, after muchos fiddling with the * macros that code uses. This lets the same code operate on two different - * representations for state sets. + * representations for state sets and characters. */ #include <sys/types.h> #include <stdio.h> @@ -57,12 +57,53 @@ __FBSDID("$FreeBSD$"); #include <limits.h> #include <ctype.h> #include <regex.h> +#include <wchar.h> +#include <wctype.h> #include "utils.h" #include "regex2.h" static int nope __unused = 0; /* for use in asserts; shuts lint up */ +static __inline size_t +xmbrtowc(wi, s, n, mbs, dummy) +wint_t *wi; +const char *s; +size_t n; +mbstate_t *mbs; +wint_t dummy; +{ + size_t nr; + wchar_t wc; + + nr = mbrtowc(&wc, s, n, mbs); + if (wi != NULL) + *wi = wc; + if (nr == 0) + return (1); + else if (nr == (size_t)-1 || nr == (size_t)-2) { + memset(mbs, 0, sizeof(*mbs)); + if (wi != NULL) + *wi = dummy; + return (1); + } else + return (nr); +} + +static __inline size_t +xmbrtowc_dummy(wi, s, n, mbs, dummy) +wint_t *wi; +const char *s; +size_t n __unused; +mbstate_t *mbs __unused; +wint_t dummy __unused; +{ + + if (wi != NULL) + *wi = (unsigned char)*s; + return (1); +} + /* macros for manipulating states, small version */ #define states long #define states1 states /* for later use in regexec() decision */ @@ -85,6 +126,9 @@ static int nope __unused = 0; /* for use in asserts; shuts lint up */ #define FWD(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) << (n)) #define BACK(dst, src, n) ((dst) |= ((unsigned long)(src)&(here)) >> (n)) #define ISSETBACK(v, n) (((v) & ((unsigned long)here >> (n))) != 0) +/* no multibyte support */ +#define XMBRTOWC xmbrtowc_dummy +#define ZAPSTATE(mbs) ((void)(mbs)) /* function names */ #define SNAMES /* engine.c looks after details */ @@ -110,6 +154,8 @@ static int nope __unused = 0; /* for use in asserts; shuts lint up */ #undef BACK #undef ISSETBACK #undef SNAMES +#undef XMBRTOWC +#undef ZAPSTATE /* macros for manipulating states, large version */ #define states char * @@ -134,11 +180,24 @@ static int nope __unused = 0; /* for use in asserts; shuts lint up */ #define FWD(dst, src, n) ((dst)[here+(n)] |= (src)[here]) #define BACK(dst, src, n) ((dst)[here-(n)] |= (src)[here]) #define ISSETBACK(v, n) ((v)[here - (n)]) +/* no multibyte support */ +#define XMBRTOWC xmbrtowc_dummy +#define ZAPSTATE(mbs) ((void)(mbs)) /* function names */ #define LNAMES /* flag */ #include "engine.c" +/* multibyte character & large states version */ +#undef LNAMES +#undef XMBRTOWC +#undef ZAPSTATE +#define XMBRTOWC xmbrtowc +#define ZAPSTATE(mbs) memset((mbs), 0, sizeof(*(mbs))) +#define MNAMES + +#include "engine.c" + /* - regexec - interface for matching = extern int regexec(const regex_t *, const char *, size_t, \ @@ -176,7 +235,9 @@ int eflags; return(REG_BADPAT); eflags = GOODFLAGS(eflags); - if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) + if (MB_CUR_MAX > 1) + return(mmatcher(g, (char *)string, nmatch, pmatch, eflags)); + else if (g->nstates <= CHAR_BIT*sizeof(states1) && !(eflags®_LARGE)) return(smatcher(g, (char *)string, nmatch, pmatch, eflags)); else return(lmatcher(g, (char *)string, nmatch, pmatch, eflags)); |