diff options
author | tjr <tjr@FreeBSD.org> | 2004-07-08 06:43:37 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2004-07-08 06:43:37 +0000 |
commit | 3a9d81b253cf3999ea62ce29e53a7c1e58396894 (patch) | |
tree | 6b88325e31aceec129e4a7871658983a62d5240a /lib/libc | |
parent | 9cdb603498b8b9bb1d5120526a885118481467b5 (diff) | |
download | FreeBSD-src-3a9d81b253cf3999ea62ce29e53a7c1e58396894.zip FreeBSD-src-3a9d81b253cf3999ea62ce29e53a7c1e58396894.tar.gz |
Add a function to iterate over all characters in a particular character
class. This is necessary in order to implement tr(1) efficiently in
multibyte locales, since the brute force method of finding all characters
in a class is infeasible with a 32-bit (or wider) wchar_t.
Diffstat (limited to 'lib/libc')
-rw-r--r-- | lib/libc/locale/Makefile.inc | 4 | ||||
-rw-r--r-- | lib/libc/locale/nextwctype.3 | 58 | ||||
-rw-r--r-- | lib/libc/locale/nextwctype.c | 90 |
3 files changed, 150 insertions, 2 deletions
diff --git a/lib/libc/locale/Makefile.inc b/lib/libc/locale/Makefile.inc index a3b85b3..1e8a363 100644 --- a/lib/libc/locale/Makefile.inc +++ b/lib/libc/locale/Makefile.inc @@ -9,7 +9,7 @@ SRCS+= big5.c btowc.c collate.c collcmp.c euc.c fix_grouping.c frune.c \ ldpart.c lmessages.c lmonetary.c lnumeric.c localeconv.c mblen.c \ mbrlen.c \ mbrtowc.c mbrune.c mbsinit.c mbsrtowcs.c mbtowc.c mbstowcs.c \ - mskanji.c nl_langinfo.c nomacros.c none.c rune.c \ + mskanji.c nextwctype.c nl_langinfo.c nomacros.c none.c rune.c \ runetype.c setinvalidrune.c setlocale.c setrunelocale.c srune.c \ table.c \ tolower.c toupper.c utf2.c utf8.c wcrtomb.c wcsrtombs.c wcsftime.c \ @@ -27,7 +27,7 @@ MAN+= btowc.3 \ mbrtowc.3 \ mbrune.3 mbsinit.3 \ mbsrtowcs.3 mbstowcs.3 mbtowc.3 multibyte.3 \ - nl_langinfo.3 \ + nextwctype.3 nl_langinfo.3 \ rune.3 \ setlocale.3 toascii.3 tolower.3 toupper.3 towlower.3 towupper.3 \ wcsftime.3 \ diff --git a/lib/libc/locale/nextwctype.3 b/lib/libc/locale/nextwctype.3 new file mode 100644 index 0000000..2d9acd1 --- /dev/null +++ b/lib/libc/locale/nextwctype.3 @@ -0,0 +1,58 @@ +.\" +.\" Copyright (c) 2004 Tim J. Robbins +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd July 8, 2004 +.Dt NEXTWCTYPE 3 +.Os +.Sh NAME +.Nm nextwctype +.Nd "iterate through character classes" +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In wctype.h +.Ft wint_t +.Fo nextwctype +.Fa "wint_t ch" "wctype_t wct" +.Fc +.Sh DESCRIPTION +The +.Fn nextwctype +function determines the next character after +.Fa ch +that is a member of character class +.Fa wct . +If +.Fa ch +is \-1, the search begins at the first member of +.Fa wct . +.Sh RETURN VALUES +The +.Fn nextwctype +functions returns the next character, or \-1 if there are no more. +.Sh SEE ALSO +.Xr wctype 3 diff --git a/lib/libc/locale/nextwctype.c b/lib/libc/locale/nextwctype.c new file mode 100644 index 0000000..9363b0a --- /dev/null +++ b/lib/libc/locale/nextwctype.c @@ -0,0 +1,90 @@ +/*- + * Copyright (c) 2004 Tim J. Robbins. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <runetype.h> +#include <wchar.h> +#include <wctype.h> + +wint_t +nextwctype(wint_t wc, wctype_t wct) +{ + size_t lim; + _RuneRange *rr = &_CurrentRuneLocale->__runetype_ext; + _RuneEntry *base, *re; + int noinc; + + noinc = 0; + if (wc < _CACHED_RUNES) { + wc++; + while (wc < _CACHED_RUNES) { + if (_CurrentRuneLocale->__runetype[wc] & wct) + return (wc); + wc++; + } + wc--; + } + if (rr->__ranges != NULL && wc < rr->__ranges[0].__min) { + wc = rr->__ranges[0].__min; + noinc = 1; + } + + /* Binary search -- see bsearch.c for explanation. */ + base = rr->__ranges; + for (lim = rr->__nranges; lim != 0; lim >>= 1) { + re = base + (lim >> 1); + if (re->__min <= wc && wc <= re->__max) + goto found; + else if (wc > re->__max) { + base = re + 1; + lim--; + } + } + return (-1); +found: + if (!noinc) + wc++; + if (re->__min <= wc && wc <= re->__max) { + if (re->__types != NULL) { + for (; wc <= re->__max; wc++) + if (re->__types[wc - re->__min] & wct) + return (wc); + } else if (re->__map & wct) + return (wc); + } + while (++re < rr->__ranges + rr->__nranges) { + wc = re->__min; + if (re->__types != NULL) { + for (; wc <= re->__max; wc++) + if (re->__types[wc - re->__min] & wct) + return (wc); + } else if (re->__map & wct) + return (wc); + } + return (-1); +} |