From e4b49c84982ca77225117a127483a0802637e7e5 Mon Sep 17 00:00:00 2001 From: tjr Date: Sat, 14 May 2005 03:02:22 +0000 Subject: Remove mb-caching hack. Obtained from: Fedora (Tim Waugh) --- gnu/usr.bin/grep/dfa.c | 83 +++++++++++----------------------------- gnu/usr.bin/grep/dfa.h | 5 +-- gnu/usr.bin/grep/grep.c | 63 +++++++++++------------------- gnu/usr.bin/grep/grep.h | 5 +-- gnu/usr.bin/grep/mbcache.h | 11 ------ gnu/usr.bin/grep/search.c | 95 +++++++++++++--------------------------------- 6 files changed, 74 insertions(+), 188 deletions(-) delete mode 100644 gnu/usr.bin/grep/mbcache.h diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c index e57aee3..84c671e 100644 --- a/gnu/usr.bin/grep/dfa.c +++ b/gnu/usr.bin/grep/dfa.c @@ -2747,8 +2747,7 @@ transit_state (struct dfa *d, int s, unsigned char const **pp) match needs to be verified by a backtracking matcher. Otherwise we store a 0 in *backref. */ size_t -dfaexec (struct dfa *d, char const *begin, size_t size, int *backref, - struct mb_cache *mb_cache) +dfaexec (struct dfa *d, char const *begin, size_t size, int *backref) { register int s; /* Current state. */ register unsigned char const *p; /* Current input character. */ @@ -2780,79 +2779,43 @@ dfaexec (struct dfa *d, char const *begin, size_t size, int *backref, #ifdef MBS_SUPPORT if (MB_CUR_MAX > 1) { + int remain_bytes, i; buf_begin = begin; buf_end = end; - if (mb_cache && mb_cache->mblen_buf && mb_cache->wcs_buf && - begin > mb_cache->orig_buf && - (begin + size) <= (mb_cache->orig_buf + mb_cache->len)) - { - /* The cache can help us. */ - MALLOC(mblen_buf, unsigned char, size + 2); - MALLOC(inputwcs, wchar_t, size + 2); - memcpy (mblen_buf, - mb_cache->mblen_buf + (begin - mb_cache->orig_buf), - (size + 2) * sizeof (unsigned char)); - memcpy (inputwcs, - mb_cache->wcs_buf + (begin - mb_cache->orig_buf), - (size + 2) * sizeof (wchar_t)); - mblen_buf[size + 1] = 0; - inputwcs[size + 1] = 0; - } - else + + /* initialize mblen_buf, and inputwcs. */ + MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2); + MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2); + memset(&mbs, 0, sizeof(mbstate_t)); + remain_bytes = 0; + for (i = 0; i < end - (unsigned char const *)begin + 1; i++) { - int remain_bytes, i; - - /* initialize mblen_buf, and inputwcs. */ - MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2); - MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2); - memset(&mbs, 0, sizeof(mbstate_t)); - remain_bytes = 0; - for (i = 0; i < end - (unsigned char const *)begin + 1; i++) + if (remain_bytes == 0) { - if (remain_bytes == 0) + remain_bytes + = mbrtowc(inputwcs + i, begin + i, + end - (unsigned char const *)begin - i + 1, &mbs); + if (remain_bytes <= 1) { - remain_bytes - = mbrtowc(inputwcs + i, begin + i, - end - (unsigned char const *)begin - i + 1, &mbs); - if (remain_bytes <= 1) - { - remain_bytes = 0; - inputwcs[i] = (wchar_t)begin[i]; - mblen_buf[i] = 0; - } - else - { - mblen_buf[i] = remain_bytes; - remain_bytes--; - } + remain_bytes = 0; + inputwcs[i] = (wchar_t)begin[i]; + mblen_buf[i] = 0; } else { mblen_buf[i] = remain_bytes; - inputwcs[i] = 0; remain_bytes--; } } - mblen_buf[i] = 0; - inputwcs[i] = 0; /* sentinel */ - - if (mb_cache) + else { - /* Populate the cache. */ - mb_cache->len = size; - mb_cache->orig_buf = begin; - if (mb_cache->mblen_buf) - free (mb_cache->mblen_buf); - if (mb_cache->wcs_buf) - free (mb_cache->wcs_buf); - MALLOC(mb_cache->mblen_buf, unsigned char, size + 2); - MALLOC(mb_cache->wcs_buf, wchar_t, size + 2); - memcpy (mb_cache->mblen_buf, mblen_buf, - (size + 2) * sizeof (unsigned char)); - memcpy (mb_cache->wcs_buf, inputwcs, - (size + 2) * sizeof (wchar_t)); + mblen_buf[i] = remain_bytes; + inputwcs[i] = 0; + remain_bytes--; } } + mblen_buf[i] = 0; + inputwcs[i] = 0; /* sentinel */ } #endif /* MBS_SUPPORT */ diff --git a/gnu/usr.bin/grep/dfa.h b/gnu/usr.bin/grep/dfa.h index 014f1b3..4cdbe7a 100644 --- a/gnu/usr.bin/grep/dfa.h +++ b/gnu/usr.bin/grep/dfa.h @@ -24,8 +24,6 @@ In addition to clobbering modularity, we eat up valuable name space. */ -#include "mbcache.h" - #ifdef __STDC__ # ifndef _PTR_T # define _PTR_T @@ -407,8 +405,7 @@ extern void dfacomp PARAMS ((char const *, size_t, struct dfa *, int)); order to verify backreferencing; otherwise the flag will be cleared. Returns (size_t) -1 if no match is found, or the offset of the first character after the first & shortest matching string in the buffer. */ -extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *, - struct mb_cache *)); +extern size_t dfaexec PARAMS ((struct dfa *, char const *, size_t, int *)); /* Free the storage held by the components of a struct dfa. */ extern void dfafree PARAMS ((struct dfa *)); diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c index aa1fac6..1cd8b34 100644 --- a/gnu/usr.bin/grep/grep.c +++ b/gnu/usr.bin/grep/grep.c @@ -198,8 +198,7 @@ static inline int undossify_input PARAMS ((register char *, size_t)); /* Functions we'll use to search. */ static void (*compile) PARAMS ((char const *, size_t)); -static size_t (*execute) PARAMS ((char const *, size_t, struct mb_cache *, - size_t *, int)); +static size_t (*execute) PARAMS ((char const *, size_t, size_t *, int)); /* Like error, but suppress the diagnostic if requested. */ static void @@ -584,7 +583,7 @@ print_offset_sep (uintmax_t pos, char sep) } static void -prline (char const *beg, char const *lim, int sep, struct mb_cache *mb_cache) +prline (char const *beg, char const *lim, int sep) { if (out_file) printf ("%s%c", filename, sep & filename_mask); @@ -607,8 +606,7 @@ prline (char const *beg, char const *lim, int sep, struct mb_cache *mb_cache) { size_t match_size; size_t match_offset; - while ((match_offset = (*execute) (beg, lim - beg, mb_cache, - &match_size, 1)) + while ((match_offset = (*execute) (beg, lim - beg, &match_size, 1)) != (size_t) -1) { char const *b = beg + match_offset; @@ -642,8 +640,7 @@ prline (char const *beg, char const *lim, int sep, struct mb_cache *mb_cache) int i; for (i = 0; i < lim - beg; i++) ibeg[i] = tolower (beg[i]); - while ((match_offset = (*execute) (ibeg, ilim-ibeg, mb_cache, - &match_size, 1)) + while ((match_offset = (*execute) (ibeg, ilim-ibeg, &match_size, 1)) != (size_t) -1) { char const *b = beg + match_offset; @@ -661,8 +658,7 @@ prline (char const *beg, char const *lim, int sep, struct mb_cache *mb_cache) lastout = lim; return; } - while (lim-beg && (match_offset = (*execute) (beg, lim - beg, mb_cache, - &match_size, 1)) + while (lim-beg && (match_offset = (*execute) (beg, lim - beg, &match_size, 1)) != (size_t) -1) { char const *b = beg + match_offset; @@ -690,7 +686,7 @@ prline (char const *beg, char const *lim, int sep, struct mb_cache *mb_cache) /* Print pending lines of trailing context prior to LIM. Trailing context ends at the next matching line when OUTLEFT is 0. */ static void -prpending (char const *lim, struct mb_cache *mb_cache) +prpending (char const *lim) { if (!lastout) lastout = bufbeg; @@ -700,10 +696,9 @@ prpending (char const *lim, struct mb_cache *mb_cache) size_t match_size; --pending; if (outleft - || (((*execute) (lastout, nl - lastout, mb_cache, - &match_size, 0) == (size_t) -1) + || (((*execute) (lastout, nl - lastout, &match_size, 0) == (size_t) -1) == !out_invert)) - prline (lastout, nl + 1, '-', mb_cache); + prline (lastout, nl + 1, '-'); else pending = 0; } @@ -712,8 +707,7 @@ prpending (char const *lim, struct mb_cache *mb_cache) /* Print the lines between BEG and LIM. Deal with context crap. If NLINESP is non-null, store a count of lines between BEG and LIM. */ static void -prtext (char const *beg, char const *lim, int *nlinesp, - struct mb_cache *mb_cache) +prtext (char const *beg, char const *lim, int *nlinesp) { static int used; /* avoid printing "--" before any output */ char const *bp, *p; @@ -721,7 +715,7 @@ prtext (char const *beg, char const *lim, int *nlinesp, int i, n; if (!out_quiet && pending > 0) - prpending (beg, mb_cache); + prpending (beg); p = beg; @@ -745,7 +739,7 @@ prtext (char const *beg, char const *lim, int *nlinesp, { char const *nl = memchr (p, eol, beg - p); nl++; - prline (p, nl, '-', mb_cache); + prline (p, nl, '-'); p = nl; } } @@ -758,7 +752,7 @@ prtext (char const *beg, char const *lim, int *nlinesp, char const *nl = memchr (p, eol, lim - p); nl++; if (!out_quiet) - prline (p, nl, ':', mb_cache); + prline (p, nl, ':'); p = nl; } *nlinesp = n; @@ -768,7 +762,7 @@ prtext (char const *beg, char const *lim, int *nlinesp, } else if (!out_quiet) - prline (beg, lim, ':', mb_cache); + prline (beg, lim, ':'); pending = out_quiet ? 0 : out_after; used = 1; @@ -778,7 +772,7 @@ prtext (char const *beg, char const *lim, int *nlinesp, between matching lines if OUT_INVERT is true). Return a count of lines printed. */ static int -grepbuf (char const *beg, char const *lim, struct mb_cache *mb_cache) +grepbuf (char const *beg, char const *lim) { int nlines, n; register char const *p; @@ -787,8 +781,7 @@ grepbuf (char const *beg, char const *lim, struct mb_cache *mb_cache) nlines = 0; p = beg; - while ((match_offset = (*execute) (p, lim - p, mb_cache, - &match_size, 0)) != (size_t) -1) + while ((match_offset = (*execute) (p, lim - p, &match_size, 0)) != (size_t) -1) { char const *b = p + match_offset; char const *endp = b + match_size; @@ -797,7 +790,7 @@ grepbuf (char const *beg, char const *lim, struct mb_cache *mb_cache) break; if (!out_invert) { - prtext (b, endp, (int *) 0, mb_cache); + prtext (b, endp, (int *) 0); nlines++; outleft--; if (!outleft || done_on_match) @@ -810,7 +803,7 @@ grepbuf (char const *beg, char const *lim, struct mb_cache *mb_cache) } else if (p < b) { - prtext (p, b, &n, mb_cache); + prtext (p, b, &n); nlines += n; outleft -= n; if (!outleft) @@ -820,7 +813,7 @@ grepbuf (char const *beg, char const *lim, struct mb_cache *mb_cache) } if (out_invert && p < lim) { - prtext (p, lim, &n, mb_cache); + prtext (p, lim, &n); nlines += n; outleft -= n; } @@ -840,9 +833,7 @@ grep (int fd, char const *file, struct stats *stats) char *beg; char *lim; char eol = eolbyte; - struct mb_cache mb_cache; - memset (&mb_cache, 0, sizeof (mb_cache)); if (!reset (fd, file, stats)) return 0; @@ -917,9 +908,9 @@ grep (int fd, char const *file, struct stats *stats) if (beg < lim) { if (outleft) - nlines += grepbuf (beg, lim, &mb_cache); + nlines += grepbuf (beg, lim); if (pending) - prpending (lim, &mb_cache); + prpending (lim); if((!outleft && !pending) || (nlines && done_on_match && !out_invert)) goto finish_grep; } @@ -947,11 +938,6 @@ grep (int fd, char const *file, struct stats *stats) totalcc = add_count (totalcc, buflim - bufbeg - save); if (out_line) nlscan (beg); - if (mb_cache.wcs_buf) - free (mb_cache.wcs_buf); - if (mb_cache.mblen_buf) - free (mb_cache.mblen_buf); - memset (&mb_cache, 0, sizeof (mb_cache)); if (! fillbuf (save, stats)) { if (! is_EISDIR (errno, file)) @@ -963,9 +949,9 @@ grep (int fd, char const *file, struct stats *stats) { *buflim++ = eol; if (outleft) - nlines += grepbuf (bufbeg + save - residue, buflim, &mb_cache); + nlines += grepbuf (bufbeg + save - residue, buflim); if (pending) - prpending (buflim, &mb_cache); + prpending (buflim); } finish_grep: @@ -973,11 +959,6 @@ grep (int fd, char const *file, struct stats *stats) out_quiet -= not_text; if ((not_text & ~out_quiet) && nlines != 0) printf (_("Binary file %s matches\n"), filename); - - if (mb_cache.wcs_buf) - free (mb_cache.wcs_buf); - if (mb_cache.mblen_buf) - free (mb_cache.mblen_buf); return nlines; } diff --git a/gnu/usr.bin/grep/grep.h b/gnu/usr.bin/grep/grep.h index 8a10330..f4937e7 100644 --- a/gnu/usr.bin/grep/grep.h +++ b/gnu/usr.bin/grep/grep.h @@ -22,8 +22,6 @@ # define __attribute__(x) #endif -#include "mbcache.h" - /* Grep.c expects the matchers vector to be terminated by an entry with a NULL compile, and to contain at least an entry named "default". */ @@ -32,8 +30,7 @@ extern struct matcher { char name[8]; void (*compile) PARAMS ((char const *, size_t)); - size_t (*execute) PARAMS ((char const *, size_t, struct mb_cache *, - size_t *, int)); + size_t (*execute) PARAMS ((char const *, size_t, size_t *, int)); } const matchers[]; /* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */ diff --git a/gnu/usr.bin/grep/mbcache.h b/gnu/usr.bin/grep/mbcache.h deleted file mode 100644 index 3b76e25..0000000 --- a/gnu/usr.bin/grep/mbcache.h +++ /dev/null @@ -1,11 +0,0 @@ -/* $FreeBSD$ */ -#ifndef MB_CACHE_DEFINED -#define MB_CACHE_DEFINED -struct mb_cache -{ - size_t len; - const char *orig_buf; /* not the only reference; do not free */ - wchar_t *wcs_buf; - unsigned char *mblen_buf; -}; -#endif diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c index 1ea4217..b2514a9 100644 --- a/gnu/usr.bin/grep/search.c +++ b/gnu/usr.bin/grep/search.c @@ -73,22 +73,17 @@ static kwset_t kwset; static int kwset_exact_matches; #if defined(MBS_SUPPORT) -static char* check_multibyte_string PARAMS ((char const *buf, size_t size, - struct mb_cache *, - char const *orig_buf)); +static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); #endif static void kwsinit PARAMS ((void)); static void kwsmusts PARAMS ((void)); static void Gcompile PARAMS ((char const *, size_t)); static void Ecompile PARAMS ((char const *, size_t)); -static size_t EGexecute PARAMS ((char const *, size_t, struct mb_cache *, - size_t *, int )); +static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int )); static void Fcompile PARAMS ((char const *, size_t)); -static size_t Fexecute PARAMS ((char const *, size_t, struct mb_cache *, - size_t *, int)); +static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int)); static void Pcompile PARAMS ((char const *, size_t )); -static size_t Pexecute PARAMS ((char const *, size_t, struct mb_cache *, - size_t *, int)); +static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); void dfaerror (char const *mesg) @@ -154,66 +149,35 @@ kwsmusts (void) are not singlebyte character nor the first byte of a multibyte character. Caller must free the array. */ static char* -check_multibyte_string(char const *buf, size_t size, struct mb_cache *mb_cache, - char const *orig_buf) +check_multibyte_string(char const *buf, size_t size) { char *mb_properties = xmalloc(size); mbstate_t cur_state; wchar_t wc; int i; memset(&cur_state, 0, sizeof(mbstate_t)); - - if (mb_cache && mb_cache->mblen_buf && - orig_buf > mb_cache->orig_buf && - (orig_buf + size) <= (mb_cache->orig_buf + mb_cache->len)) - { - /* The cache can help us. */ - memcpy (mb_properties, - mb_cache->mblen_buf + (orig_buf - mb_cache->orig_buf), - size); - - } - else + memset(mb_properties, 0, sizeof(char)*size); + for (i = 0; i < size ;) { - memset(mb_properties, 0, sizeof(char)*size); - for (i = 0; i < size ;) - { - size_t mbclen; - mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); + size_t mbclen; + mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); - if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) - { - /* An invalid sequence, or a truncated multibyte character. - We treat it as a singlebyte character. */ - mbclen = 1; - } - else if (match_icase) - { - if (iswupper((wint_t)wc)) - { - wc = towlower((wint_t)wc); - wcrtomb(buf + i, wc, &cur_state); - } - } - mb_properties[i] = mbclen; - i += mbclen; + if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) + { + /* An invalid sequence, or a truncated multibyte character. + We treat it as a singlebyte character. */ + mbclen = 1; } - - /* Now populate the cache. */ - if (mb_cache) + else if (match_icase) { - if (mb_cache->wcs_buf) + if (iswupper((wint_t)wc)) { - free (mb_cache->wcs_buf); - mb_cache->wcs_buf = NULL; + wc = towlower((wint_t)wc); + wcrtomb(buf + i, wc, &cur_state); } - if (mb_cache->mblen_buf) - free (mb_cache->mblen_buf); - mb_cache->len = size; - mb_cache->orig_buf = orig_buf; - mb_cache->mblen_buf = xmalloc (size); - memcpy (mb_cache->mblen_buf, mb_properties, size); } + mb_properties[i] = mbclen; + i += mbclen; } return mb_properties; @@ -380,11 +344,9 @@ Ecompile (char const *pattern, size_t size) } static size_t -EGexecute (char const *buf, size_t size, struct mb_cache *mb_cache, - size_t *match_size, int exact) +EGexecute (char const *buf, size_t size, size_t *match_size, int exact) { register char const *buflim, *beg, *end; - char const *orig_buf = buf; char eol = eolbyte; int backref, start, len; struct kwsmatch kwsm; @@ -400,7 +362,7 @@ EGexecute (char const *buf, size_t size, struct mb_cache *mb_cache, buf = case_buf; } if (kwset) - mb_properties = check_multibyte_string(buf, size, mb_cache, orig_buf); + mb_properties = check_multibyte_string(buf, size); } #endif /* MBS_SUPPORT */ @@ -429,13 +391,13 @@ EGexecute (char const *buf, size_t size, struct mb_cache *mb_cache, --beg; if (kwsm.index < kwset_exact_matches) goto success_in_beg_and_end; - if (dfaexec (&dfa, beg, end - beg, &backref, mb_cache) == (size_t) -1) + if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) continue; } else { /* No good fixed strings; start with DFA. */ - size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref, mb_cache); + size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); if (offset == (size_t) -1) break; /* Narrow down to the line we've found. */ @@ -563,11 +525,9 @@ Fcompile (char const *pattern, size_t size) } static size_t -Fexecute (char const *buf, size_t size, struct mb_cache *mb_cache, - size_t *match_size, int exact) +Fexecute (char const *buf, size_t size, size_t *match_size, int exact) { register char const *beg, *try, *end; - char const *orig_buf = buf; register size_t len; char eol = eolbyte; struct kwsmatch kwsmatch; @@ -582,7 +542,7 @@ Fexecute (char const *buf, size_t size, struct mb_cache *mb_cache, memcpy(case_buf, buf, size); buf = case_buf; } - mb_properties = check_multibyte_string(buf, size, mb_cache, orig_buf); + mb_properties = check_multibyte_string(buf, size); } #endif /* MBS_SUPPORT */ @@ -747,8 +707,7 @@ Pcompile (char const *pattern, size_t size) } static size_t -Pexecute (char const *buf, size_t size, struct mb_cache *mb_cache, - size_t *match_size, int exact) +Pexecute (char const *buf, size_t size, size_t *match_size, int exact) { #if !HAVE_LIBPCRE abort (); -- cgit v1.1