diff options
author | tjr <tjr@FreeBSD.org> | 2006-02-19 04:27:39 +0000 |
---|---|---|
committer | tjr <tjr@FreeBSD.org> | 2006-02-19 04:27:39 +0000 |
commit | 0162e627f837bf4242c3e13a2157d8a44e4a0f61 (patch) | |
tree | c1e00162b9211522a420826410361a43e2350dfa /gnu | |
parent | 5fea1f45f3bf34a683c61a474a671be6317e7b92 (diff) | |
download | FreeBSD-src-0162e627f837bf4242c3e13a2157d8a44e4a0f61.zip FreeBSD-src-0162e627f837bf4242c3e13a2157d8a44e4a0f61.tar.gz |
Correctly locate the character preceeding the matched string in -w
mode when in non-UTF-8 multibyte locales (e.g. EUC, GB2312, etc.).
PR: 91909
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/usr.bin/grep/search.c | 40 |
1 files changed, 35 insertions, 5 deletions
diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c index a195156..982d2c5 100644 --- a/gnu/usr.bin/grep/search.c +++ b/gnu/usr.bin/grep/search.c @@ -524,11 +524,16 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) if (mb_cur_max > 1) { const char *s; - int mr; + size_t mr; wchar_t pwc; + /* Locate the start of the multibyte character + before the match position (== beg + start). */ if (using_utf8) { + /* UTF-8 is a special case: scan backwards + until we find a 7-bit character or a + lead byte. */ s = beg + start - 1; while (s > buf && (unsigned char) *s >= 0x80 @@ -536,15 +541,40 @@ EGexecute (char const *buf, size_t size, size_t *match_size, int exact) --s; } else - s = last_char; - mr = mbtowc (&pwc, s, beg + start - s); - if (mr <= 0) + { + /* Scan forwards to find the start of the + last complete character before the + match position. */ + size_t bytes_left = start - 1; + s = beg; + while (bytes_left > 0) + { + mr = mbrlen (s, bytes_left, &mbs); + if (mr == (size_t) -1 || mr == 0) + { + memset (&mbs, '\0', sizeof (mbs)); + s++; + bytes_left--; + continue; + } + if (mr == (size_t) -2) + { + memset (&mbs, '\0', sizeof (mbs)); + break; + } + s += mr; + bytes_left -= mr; + } + } + mr = mbrtowc (&pwc, s, beg + start - s, &mbs); + if (mr == (size_t) -2 || mr == (size_t) -1 || + mr == 0) { memset (&mbs, '\0', sizeof (mbstate_t)); lword_match = 1; } else if (!(iswalnum (pwc) || pwc == L'_') - && mr == (int) (beg + start - s)) + && mr == beg + start - s) lword_match = 1; } else |