summaryrefslogtreecommitdiffstats
path: root/usr.bin/locate/bigram
diff options
context:
space:
mode:
authorwosch <wosch@FreeBSD.org>1996-08-14 00:22:31 +0000
committerwosch <wosch@FreeBSD.org>1996-08-14 00:22:31 +0000
commit9c0ad6a2b3f4178ace4292cdb66f55997eddf45c (patch)
tree5ee2c3ccbe51dc57abaaffc3dd772e05c9efab8c /usr.bin/locate/bigram
parent521551a94c26abb72ce3538eb54e226c481f5dad (diff)
downloadFreeBSD-src-9c0ad6a2b3f4178ace4292cdb66f55997eddf45c.zip
FreeBSD-src-9c0ad6a2b3f4178ace4292cdb66f55997eddf45c.tar.gz
bigram
Bigram does not remove newline at end of filename. This break particulary the bigram algorithm and /var/db/locate.database grow up 15 %. Bigram does not check for characters outside 32-127. The bigram output is silly and need ~1/2 CPU time of database rebuilding. old: locate.bigram < $filelist | sort | uniq -c | sort -nr ^^^^^^^^^^^^^^ this can easy made bigram new: bigram < $filelist | sort -nr code Code does not check for char 31. Use a lookup array instead a function. 3 x faster. updatedb rewritten sync with bigram changes read config file /etc/locate.rc if exists submitted by: guido@gvr.win.tue.nl (Guido van Rooij) concatdb - concatenate locate databases mklocatedb - build locate database
Diffstat (limited to 'usr.bin/locate/bigram')
-rw-r--r--usr.bin/locate/bigram/Makefile4
-rw-r--r--usr.bin/locate/bigram/locate.bigram.c55
2 files changed, 47 insertions, 12 deletions
diff --git a/usr.bin/locate/bigram/Makefile b/usr.bin/locate/bigram/Makefile
index d7d4348..fbba14d 100644
--- a/usr.bin/locate/bigram/Makefile
+++ b/usr.bin/locate/bigram/Makefile
@@ -2,6 +2,8 @@
PROG= locate.bigram
NOMAN= noman
-BINDIR= /usr/libexec
+BINDIR= ${LIBEXECDIR}
+CFLAGS+= -I${.CURDIR}/../locate
+.include "../Makefile.inc"
.include <bsd.prog.mk>
diff --git a/usr.bin/locate/bigram/locate.bigram.c b/usr.bin/locate/bigram/locate.bigram.c
index 149e437..dc95399 100644
--- a/usr.bin/locate/bigram/locate.bigram.c
+++ b/usr.bin/locate/bigram/locate.bigram.c
@@ -53,32 +53,65 @@ static char sccsid[] = "@(#)locate.bigram.c 8.1 (Berkeley) 6/6/93";
#include <stdio.h>
#include <sys/param.h> /* for MAXPATHLEN */
+#include <string.h> /* memchr */
+#include "locate.h"
-char buf1[MAXPATHLEN] = " ";
-char buf2[MAXPATHLEN];
+u_char buf1[MAXPATHLEN] = " ";
+u_char buf2[MAXPATHLEN];
+unsigned int bigram[UCHAR_MAX][UCHAR_MAX];
-main ( )
+
+void main ( )
{
- register char *cp;
- register char *oldpath = buf1, *path = buf2;
+ register u_char *cp;
+ register u_char *oldpath = buf1, *path = buf2;
+ register int i, j;
+
+ /* init bigram buffer */
+ for (i = 0; i < UCHAR_MAX; i++)
+ for (j = 0; j < UCHAR_MAX; j++)
+ bigram[i][j] = 0;
while ( fgets ( path, sizeof(buf2), stdin ) != NULL ) {
+ /* skip empty lines */
+ if (*path == '\n')
+ continue;
+
+ /* Squelch characters that would botch the decoding. */
+ for (cp = path; *cp != NULL; cp++) {
+ /* chop newline */
+ if (*cp == '\n')
+ *cp = NULL;
+ /* range */
+ else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
+ *cp = '?';
+ }
+
+
/* skip longest common prefix */
- for ( cp = path; *cp == *oldpath; cp++, oldpath++ )
- if ( *oldpath == NULL )
- break;
+ for (cp = path; *cp == *oldpath && *cp; cp++, oldpath++);
+
/*
* output post-residue bigrams only
*/
+
+ /* check later for boundary */
while ( *cp != NULL && *(cp + 1) != NULL ) {
- putchar ( *cp++ );
- putchar ( *cp++ );
- putchar ( '\n' );
+ bigram[*cp][*(cp+1)]++;
+ cp += 2;
}
+
if ( path == buf1 ) /* swap pointers */
path = buf2, oldpath = buf1;
else
path = buf1, oldpath = buf2;
}
+
+ /* output, boundary check */
+ for (i = ASCII_MIN; i <= ASCII_MAX; i++)
+ for (j = ASCII_MIN; j <= ASCII_MAX; j++)
+ if (bigram[i][j] != 0)
+ fprintf(stdout, "%4d %c%c\n",
+ bigram[i][j], i, j);
}
OpenPOWER on IntegriCloud