diff options
author | wosch <wosch@FreeBSD.org> | 1996-08-14 00:22:31 +0000 |
---|---|---|
committer | wosch <wosch@FreeBSD.org> | 1996-08-14 00:22:31 +0000 |
commit | 9c0ad6a2b3f4178ace4292cdb66f55997eddf45c (patch) | |
tree | 5ee2c3ccbe51dc57abaaffc3dd772e05c9efab8c /usr.bin/locate/code | |
parent | 521551a94c26abb72ce3538eb54e226c481f5dad (diff) | |
download | FreeBSD-src-9c0ad6a2b3f4178ace4292cdb66f55997eddf45c.zip FreeBSD-src-9c0ad6a2b3f4178ace4292cdb66f55997eddf45c.tar.gz |
bigram
Bigram does not remove newline at end of filename. This
break particulary the bigram algorithm and /var/db/locate.database
grow up 15 %.
Bigram does not check for characters outside 32-127.
The bigram output is silly and need ~1/2 CPU time of
database rebuilding.
old:
locate.bigram < $filelist | sort | uniq -c | sort -nr
^^^^^^^^^^^^^^
this can easy made bigram
new:
bigram < $filelist | sort -nr
code
Code does not check for char 31.
Use a lookup array instead a function. 3 x faster.
updatedb
rewritten
sync with bigram changes
read config file /etc/locate.rc if exists
submitted by: guido@gvr.win.tue.nl (Guido van Rooij)
concatdb - concatenate locate databases
mklocatedb - build locate database
Diffstat (limited to 'usr.bin/locate/code')
-rw-r--r-- | usr.bin/locate/code/Makefile | 5 | ||||
-rw-r--r-- | usr.bin/locate/code/locate.code.c | 58 |
2 files changed, 45 insertions, 18 deletions
diff --git a/usr.bin/locate/code/Makefile b/usr.bin/locate/code/Makefile index 743e968..a7d8e80 100644 --- a/usr.bin/locate/code/Makefile +++ b/usr.bin/locate/code/Makefile @@ -1,8 +1,9 @@ # @(#)Makefile 8.1 (Berkeley) 6/6/93 PROG= locate.code -CFLAGS+=-I${.CURDIR}/../locate +CFLAGS+=-I${.CURDIR}/../locate NOMAN= noman -BINDIR= /usr/libexec +BINDIR= ${LIBEXECDIR} +.include "../Makefile.inc" .include <bsd.prog.mk> diff --git a/usr.bin/locate/code/locate.code.c b/usr.bin/locate/code/locate.code.c index a7506ec..60be32a 100644 --- a/usr.bin/locate/code/locate.code.c +++ b/usr.bin/locate/code/locate.code.c @@ -89,25 +89,38 @@ static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93"; #define BGBUFSIZE (NBG * 2) /* size of bigram buffer */ -char buf1[MAXPATHLEN + 1] = " "; -char buf2[MAXPATHLEN + 1]; +u_char buf1[MAXPATHLEN] = " "; +u_char buf2[MAXPATHLEN]; char bigrams[BGBUFSIZE + 1] = { 0 }; +#define LOOKUP 1 +#ifdef LOOKUP +#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)]) +typedef u_char bg_t; +bg_t big[UCHAR_MAX][UCHAR_MAX]; + +#else +#define BGINDEX(x) bgindex(x) +typedef int bg_t; +#endif + int bgindex __P((char *)); void usage __P((void)); +extern int optind; +extern int optopt; int main(argc, argv) int argc; char *argv[]; { - register char *cp, *oldpath, *path; + register u_char *cp, *oldpath, *path; int ch, code, count, diffcount, oldcount; FILE *fp; + register int i, j; while ((ch = getopt(argc, argv, "")) != EOF) switch(ch) { - case '?': default: usage(); } @@ -126,27 +139,38 @@ main(argc, argv) err(1, "stdout"); (void)fclose(fp); +#ifdef LOOKUP + /* init lookup table */ + for (i = 0; i < UCHAR_MAX; i++) + for (j = 0; j < UCHAR_MAX; j++) + big[i][j] = (bg_t)-1; + + for (cp = bigrams, i = 0; *cp != NULL; i += 2, cp += 2) + big[(int)*cp][(int)*(cp + 1)] = (bg_t)i; +#endif + oldpath = buf1; path = buf2; oldcount = 0; - while (fgets(path, sizeof(buf2) - 1, stdin) != NULL) { - /* Truncate newline. */ - cp = path + strlen(path) - 1; - if (cp > path && *cp == '\n') - *cp = '\0'; + while (fgets(path, sizeof(buf2), stdin) != NULL) { + + /* skip empty lines */ + if (*path == '\n') + continue; /* Squelch characters that would botch the decoding. */ for (cp = path; *cp != NULL; cp++) { - if ((u_char)*cp >= PARITY) - *cp &= PARITY-1; - if (*cp <= SWITCH) + /* chop newline */ + if (*cp == '\n') + *cp = NULL; + /* range */ + else if (*cp < ASCII_MIN || *cp > ASCII_MAX) *cp = '?'; } /* Skip longest common prefix. */ - for (cp = path; *cp == *oldpath; cp++, oldpath++) - if (*oldpath == NULL) - break; + for (cp = path; *cp == *oldpath && *cp; cp++, oldpath++); + count = cp - path; diffcount = count - oldcount + OFFSET; oldcount = count; @@ -164,7 +188,7 @@ main(argc, argv) err(1, "stdout"); break; } - if ((code = bgindex(cp)) < 0) { + if ((code = BGINDEX(cp)) == (bg_t)-1) { if (putchar(*cp++) == EOF || putchar(*cp++) == EOF) err(1, "stdout"); @@ -189,6 +213,7 @@ main(argc, argv) exit(0); } +#ifndef LOOKUP int bgindex(bg) /* Return location of bg in bigrams or -1. */ char *bg; @@ -202,6 +227,7 @@ bgindex(bg) /* Return location of bg in bigrams or -1. */ break; return (*p == NULL ? -1 : --p - bigrams); } +#endif /* !LOOKUP */ void usage() |