summaryrefslogtreecommitdiffstats
path: root/usr.bin/locate
diff options
context:
space:
mode:
authorwosch <wosch@FreeBSD.org>1996-08-31 23:14:54 +0000
committerwosch <wosch@FreeBSD.org>1996-08-31 23:14:54 +0000
commit5ec9b01393dfe32d3219602980663d0530d7a8ca (patch)
treebeabaab6d0d1608e110a218fe4bcd65489bb947d /usr.bin/locate
parent2ae01aec12a5da8cdffd15e0318368c77ef72359 (diff)
downloadFreeBSD-src-5ec9b01393dfe32d3219602980663d0530d7a8ca.zip
FreeBSD-src-5ec9b01393dfe32d3219602980663d0530d7a8ca.tar.gz
optimized search algorithm
faster IO due mmap(2) [-m | -s] better error check for damaged databases support for databases in network byte order (SunOS/sparc) optional case insensitve search [-i] optional multiple databases optional multiple pattern new enviroment variable LOCATE_PATH for database(s) [-S] print some statistic about the database [-l number] limit output to number file names [-c] suppress normal output; instead print a count of matching file names
Diffstat (limited to 'usr.bin/locate')
-rw-r--r--usr.bin/locate/locate/Makefile5
-rw-r--r--usr.bin/locate/locate/fastfind.c280
-rw-r--r--usr.bin/locate/locate/locate.1134
-rw-r--r--usr.bin/locate/locate/locate.c392
-rw-r--r--usr.bin/locate/locate/util.c267
5 files changed, 957 insertions, 121 deletions
diff --git a/usr.bin/locate/locate/Makefile b/usr.bin/locate/locate/Makefile
index e4ebc2f..644ad58 100644
--- a/usr.bin/locate/locate/Makefile
+++ b/usr.bin/locate/locate/Makefile
@@ -1,12 +1,15 @@
# @(#)Makefile 8.1 (Berkeley) 6/6/93
-# $Id: Makefile,v 1.3 1996/04/25 15:54:22 wosch Exp wosch $
+# $Id: Makefile,v 1.1 1996/08/29 22:39:41 wosch Exp wosch $
PROG= locate
+SRCS= util.c locate.c
+CFLAGS+= -I. -DMMAP -O2 # -DDEBUG
MAN1= locate.1
MAN8= locate.updatedb.8
SCRIPTS= updatedb mklocatedb concatdb
MLINKS+= locate.updatedb.8 updatedb.8
+
beforeinstall:
.for script in ${SCRIPTS}
${INSTALL} -c -o ${BINOWN} -g ${BINGRP} -m ${BINMODE} \
diff --git a/usr.bin/locate/locate/fastfind.c b/usr.bin/locate/locate/fastfind.c
new file mode 100644
index 0000000..e191704
--- /dev/null
+++ b/usr.bin/locate/locate/fastfind.c
@@ -0,0 +1,280 @@
+/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * James A. Woods.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: fastfind.c,v 1.2 1996/08/29 22:39:41 wosch Exp wosch $
+ */
+
+
+#ifndef _LOCATE_STATISTIC_
+#define _LOCATE_STATISTIC_
+
+void
+statistic (fp, path_fcodes)
+ FILE *fp; /* open database */
+ char *path_fcodes; /* for error message */
+{
+ register int lines, chars, size, big;
+ register u_char *p, *s;
+ register int c;
+ int count;
+ u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+
+ for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
+ p[c] = check_bigram_char(getc(fp));
+ s[c] = check_bigram_char(getc(fp));
+ }
+
+ lines = chars = big = 0;
+ size = NBG + NBG;
+
+ for (c = getc(fp), count = 0; c != EOF; size++) {
+ if (c == SWITCH) {
+ count += getwf(fp) - OFFSET;
+ size += sizeof(int);
+ } else
+ count += c - OFFSET;
+
+ for (p = path + count; (c = getc(fp)) > SWITCH; size++)
+ if (c < PARITY)
+ p++;
+ else {
+ big++;
+ p += 2;
+ }
+
+ p++;
+ lines++;
+ chars += (p - path);
+ }
+
+ (void)printf("\nDatabase: %s\n", path_fcodes);
+ (void)printf("Compression: Front: %2.2f%%, ",
+ (float)(100 * (size + big)) / chars);
+ (void)printf("Bigram: %2.2f%%, ", (float)(100 * (size - big)) / size);
+ (void)printf("Total: %2.2f%%\n", (float)(100 * size) / chars);
+ (void)printf("Filenames: %d, ", lines);
+ (void)printf("Chars: %d\n", chars);
+ (void)printf("Database size: %d, ", size);
+ (void)printf("Bigram chars: %d\n", big);
+
+}
+#endif /* _LOCATE_STATISTIC_ */
+
+
+void
+#ifdef FF_MMAP
+
+
+#ifdef FF_ICASE
+fastfind_mmap_icase
+#else
+fastfind_mmap
+#endif
+(pathpart, paddr, len, database)
+ char *pathpart; /* search string */
+ caddr_t paddr; /* mmap pointer */
+ int len; /* length of database */
+ char *database; /* for error message */
+
+
+#else /* MMAP */
+
+
+#ifdef FF_ICASE
+fastfind_icase
+#else /* !FF_ICASE */
+fastfind
+#endif /* FF_ICASE */
+
+(fp, pathpart, database)
+ FILE *fp; /* open database */
+ char *pathpart; /* search string */
+ char *database; /* for error message */
+
+
+#endif /* MMAP */
+
+{
+ register u_char *p, *s, *patend, *q, *foundchar;
+ register int c, cc;
+ int count, found, globflag;
+ u_char *cutoff;
+ u_char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
+
+#ifdef FF_ICASE
+ /* use a lookup table for case insensitive search */
+ u_char table[UCHAR_MAX];
+
+ tolower_word(pathpart);
+#endif
+
+ /* init bigram table */
+#ifdef FF_MMAP
+ if (len < (2*NBG)) {
+ (void)fprintf(stderr, "database to small: %s\n", database);
+ exit(1);
+ }
+
+ for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
+ p[c] = check_bigram_char(*paddr++);
+ s[c] = check_bigram_char(*paddr++);
+ }
+#else
+ for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
+ p[c] = check_bigram_char(getc(fp));
+ s[c] = check_bigram_char(getc(fp));
+ }
+#endif
+
+ /* find optimal (last) char for searching */
+ p = pathpart;
+ globflag = index(p, '*') || index(p, '?') || index(p, '[');
+ patend = patprep(p);
+ cc = *patend;
+
+#ifdef FF_ICASE
+ /* set patend char to true */
+ table[TOLOWER(*patend)] = 1;
+ table[toupper(*patend)] = 1;
+#endif
+
+
+ /* main loop */
+ found = count = 0;
+ foundchar = 0;
+
+#ifdef FF_MMAP
+ for (c = (u_char)*paddr++; len-- > 0; ) {
+#else
+ for (c = getc(fp); c != EOF; ) {
+#endif
+
+ /* go forward or backward */
+ if (c == SWITCH) { /* big step, an integer */
+#ifdef FF_MMAP
+ count += getwm(paddr) - OFFSET;
+ len -= INTSIZE; paddr += INTSIZE;
+#else
+ count += getwf(fp) - OFFSET;
+#endif
+ } else { /* slow step, =< 14 chars */
+ count += c - OFFSET;
+ }
+
+ /* overlay old path */
+ p = path + count;
+ foundchar = p - 1;
+#ifdef FF_MMAP
+ for (; (c = (u_char)*paddr++) > SWITCH; len--)
+#else
+ for (; (c = getc(fp)) > SWITCH; )
+#endif
+
+ if (c < PARITY) {
+#ifdef FF_ICASE
+ if (table[c])
+#else
+ if (c == cc)
+#endif
+ foundchar = p;
+ *p++ = c;
+ }
+ else {
+ /* bigrams are parity-marked */
+ TO7BIT(c);
+
+#ifndef FF_ICASE
+ if (bigram1[c] == cc ||
+ bigram2[c] == cc)
+#else
+
+ if (table[bigram1[c]] ||
+ table[bigram2[c]])
+#endif
+ foundchar = p + 1;
+
+ *p++ = bigram1[c];
+ *p++ = bigram2[c];
+ }
+
+
+ if (found) { /* previous line matched */
+ cutoff = path;
+ *p-- = '\0';
+ foundchar = p;
+ } else if (foundchar >= path + count) { /* a char matched */
+ *p-- = '\0';
+ cutoff = path + count;
+ } else /* nothing to do */
+ continue;
+
+ found = 0;
+ for (s = foundchar; s >= cutoff; s--) {
+ if (*s == cc
+#ifdef FF_ICASE
+ || TOLOWER(*s) == cc
+#endif
+ ) { /* fast first char check */
+ for (p = patend - 1, q = s - 1; *p != '\0';
+ p--, q--)
+ if (*q != *p
+#ifdef FF_ICASE
+ && TOLOWER(*q) != *p
+#endif
+ )
+ break;
+ if (*p == '\0') { /* fast match success */
+ found = 1;
+ if (!globflag || !fnmatch(pathpart, path, 0)) {
+ if (f_silent)
+ counter++;
+ else if (f_limit) {
+ counter++;
+ if (f_limit >= counter)
+ (void)puts(path);
+ else {
+ (void)fprintf(stderr, "[show only %d lines]\n", counter - 1);
+ exit(0);
+ }
+ } else
+ (void)puts(path);
+ }
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/usr.bin/locate/locate/locate.1 b/usr.bin/locate/locate/locate.1
index f2ddad8..1c19977 100644
--- a/usr.bin/locate/locate/locate.1
+++ b/usr.bin/locate/locate/locate.1
@@ -1,3 +1,4 @@
+.\" Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
.\" Copyright (c) 1990, 1993
.\" The Regents of the University of California. All rights reserved.
.\"
@@ -30,21 +31,26 @@
.\" SUCH DAMAGE.
.\"
.\" @(#)locate.1 8.1 (Berkeley) 6/6/93
+.\" $Id$
.\"
.Dd June 6, 1993
.Dt LOCATE 1
.Os BSD 4.4
.Sh NAME
.Nm locate
-.Nd find files
+.Nd find filenames quickly
.Sh SYNOPSIS
-.Ar locate
-pattern
+.Nm
+.Op Fl Scims
+.Op Fl l Ar limit
+.Op Fl d Ar database
+pattern ...
.Sh DESCRIPTION
.Nm Locate
searches a database for all pathnames which match the specified
.Ar pattern .
-The database is recomputed periodically, and contains the pathnames
+The database is recomputed periodically (usually weekly or daily),
+and contains the pathnames
of all files which are publicly accessible.
.Pp
Shell globbing and quoting characters (``*'', ``?'', ``\e'', ``[''
@@ -59,12 +65,95 @@ including slashes (``/'').
.Pp
As a special case, a pattern containing no globbing characters (``foo'')
is matched as though it were ``*foo*''.
+
+The following options are available:
+.Bl -tag -width 10n indent
+.It Fl S
+Print some statistic about the database and exit.
+.It Fl c
+Suppress normal output; instead print a count of matching file names.
+.It Fl d Ar database
+Search in
+.Ar database
+instead the default file name database.
+Multiple
+.Fl d
+options are allowed. Each additional
+.Fl d
+option adds the specified database to the list
+of databases to be searched.
+
+.Ar database
+may be a colon-separated list of databases. A single colon is a reference
+to the default database.
+
+$ locate -d $HOME/lib/mydb: foo
+
+will first search string ``foo'' in
+.Pa $HOME/lib/mydb
+and then in
+.Pa /var/db/locate.database .
+
+$ locate -d $HOME/lib/mydb::/cdrom/locate.database foo
+
+will first search string ``foo'' in
+.Pa $HOME/lib/mydb
+and then in
+.Pa /var/db/locate.database
+and then in
+.Pa /cdrom/locate.database .
+
+
+``$ locate -d db1 -d db2 -d db3 pattern'' is the same as
+
+``$ locate -d db1:db2:db3 pattern'' or
+
+``$ locate -d db1:db2 -d db3 pattern''.
+
+If
+.Ar -
+is given as the database name, standard input will be read instead.
+For example, you can compress your database
+and use:
+
+$ zcat database.gz | locate -d - pattern
+
+This might be useful on machines with a fast CPU and little RAM and slow
+I/O. Note: you can only use
+.Ar one
+pattern for stdin.
+.It Fl i
+Ignore case distinctions in both the pattern and the database.
+.It Fl l Ar number
+Limit output to
+.Ar number
+of file names and exit.
+.It Fl m
+Use
+.Xr mmap 2
+instead of the
+.Xr stdio 3
+library. This is the default behavior. Usually faster in most cases.
+.It Fl s
+Use the
+.Xr stdio 3
+library instead of
+.Xr mmap 2 .
.Sh FILES
.Bl -tag -width /usr/libexec/locate.updatedb -compact
.It Pa /var/db/locate.database
-The actual database
+locate database
.It Pa /usr/libexec/locate.updatedb
Script to update the locate database
+.It Pa /etc/weekly
+Script that usually starts the database rebuild
+.El
+.Sh ENVIRONMENT
+.Bl -tag -width LOCATE_PATH -compact
+.It Pa LOCATE_PATH
+path to the locate database if set and not empty, ignored if the
+.Fl d
+option was specified.
.El
.Sh SEE ALSO
.Xr find 1 ,
@@ -79,17 +168,48 @@ Script to update the locate database
.%P pp. 8-10
.Re
.Sh BUGS
-.Nm Locate
+.Nm
may fail to list some files that are present, or may
to list files that have been removed from the system. This is because
locate only reports files that are present in the database, which is
typically only regenerated once a week by the
-.Nm /etc/weekly
+.Pa /etc/weekly
script. Use
.Xr find 1
to locate files that are of a more transitory nature.
+
+.Nm
+database was built by user
+.Dq nobody .
+.Xr find 1
+skip directories,
+which are not readable for user
+.Dq nobody ,
+group
+.Dq nobody ,
+or
+world. E.g. if your HOME directory ist not world-readable, all your
+files are
+.Ar not
+in the database.
+
+The
+.Nm
+database is not byte order independ. It is not possible
+to share the databases between machines with different byte order.
+The current
+.Nm
+implementation understand databases in host byte order or
+network byte order. So you can read on a FreeBSD/i386 machine
+(little endian)
+a locate database which was built on SunOS/sparc machine
+(big endian, net).
+
.Sh HISTORY
The
.Nm locate
command appears in
.Bx 4.4 .
+Many new features were
+added in
+.Fx 2.2 .
diff --git a/usr.bin/locate/locate/locate.c b/usr.bin/locate/locate/locate.c
index 102c5e9..0935283 100644
--- a/usr.bin/locate/locate/locate.c
+++ b/usr.bin/locate/locate/locate.c
@@ -1,6 +1,7 @@
/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
* Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
+ * The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* James A. Woods.
@@ -15,8 +16,8 @@
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
@@ -32,16 +33,19 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * $Id: locate.c,v 1.3 1996/08/29 22:39:41 wosch Exp wosch $
*/
#ifndef lint
static char copyright[] =
-"@(#) Copyright (c) 1989, 1993\n\
- The Regents of the University of California. All rights reserved.\n";
+"@(#) Copyright (c) 1995-1996 Wolfram Schneider, Berlin.\n\
+@(#) Copyright (c) 1989, 1993\n\
+ The Regents of the University of California. All rights reserved.\n";
#endif /* not lint */
#ifndef lint
-static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
+static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
#endif /* not lint */
/*
@@ -54,10 +58,10 @@ static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
*
* The codes are:
*
- * 0-28 likeliest differential counts + offset to make nonnegative
- * 30 switch code for out-of-range count to follow in next word
- * 128-255 bigram codes (128 most common, as determined by 'updatedb')
- * 32-127 single character (printable) ascii residue (ie, literal)
+ * 0-28 likeliest differential counts + offset to make nonnegative
+ * 30 switch code for out-of-range count to follow in next word
+ * 128-255 bigram codes (128 most common, as determined by 'updatedb')
+ * 32-127 single character (printable) ascii residue (ie, literal)
*
* A novel two-tiered string search technique is employed:
*
@@ -72,129 +76,291 @@ static char sccsid[] = "@(#)locate.c 8.1 (Berkeley) 6/6/93";
*/
#include <sys/param.h>
-
#include <fnmatch.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
+#include <stdlib.h>
+#include <ctype.h>
+#ifdef MMAP
+# include <sys/types.h>
+# include <sys/stat.h>
+# include <sys/mman.h>
+# include <fcntl.h>
+#endif
+#include <err.h>
+
+#ifdef sun
+#include <netinet/in.h> /* SunOS byteorder(3) htohl(3) */
+#ifndef __P
+#define __P(x) x
+#endif
+#endif
#include "locate.h"
#include "pathnames.h"
-FILE *fp;
+#ifdef DEBUG
+# include <sys/time.h>
+# include <sys/types.h>
+# include <sys/resource.h>
+#endif
+
+char *path_fcodes; /* locate database */
+int f_mmap; /* use mmap */
+int f_icase; /* ignore case */
+int f_stdin; /* read database from stdin */
+int f_statistic; /* print statistic */
+int f_silent; /* suppress output, show only count of matches */
+int f_limit; /* limit number of output lines, 0 == infinite */
+u_int counter; /* counter for matches [-c] */
+
+
+void usage __P((void));
+void statistic __P((FILE *, char *));
+void fastfind __P((FILE *, char *, char *));
+void fastfind_icase __P((FILE *, char *, char *));
+void fastfind_mmap __P((char *, caddr_t, int, char *));
+void fastfind_mmap_icase __P((char *, caddr_t, int, char *));
+void search_mmap __P((char *, char **));
+void search_fopen __P((char *, char **));
+unsigned long cputime __P((void));
+
+extern char **colon __P((char **, char*, char*));
+extern void print_matches __P((u_int));
+extern int getwm __P((caddr_t));
+extern int getwf __P((FILE *));
+extern u_char *tolower_word __P((u_char *));
+extern int check_bigram_char __P((int));
+extern char *patprep __P((char *));
+
+extern char *optarg;
+extern int optind;
-void fastfind __P((char *pathpart));
-char *patprep __P((char *name));
int
main(argc, argv)
- int argc;
- char *argv[];
+ int argc;
+ char **argv;
{
- if (argc != 2) {
- (void)fprintf(stderr, "usage: locate pattern\n");
- exit(1);
- }
- if (!(fp = fopen(_PATH_FCODES, "r"))) {
- (void)fprintf(stderr, "locate: no database file %s.\n",
- _PATH_FCODES);
- exit(1);
- }
- while (*++argv)
- fastfind(*argv);
- exit(0);
+ register int ch;
+ char **dbv = NULL;
+#ifdef MMAP
+ f_mmap = 1; /* mmap is default */
+#endif
+
+ while ((ch = getopt(argc, argv, "Scd:il:ms")) != EOF)
+ switch(ch) {
+ case 'S': /* statistic lines */
+ f_statistic = 1;
+ break;
+ case 'l': /* limit number of output lines, 0 == infinite */
+ f_limit = atoi(optarg);
+ break;
+ case 'd': /* database */
+ dbv = colon(dbv, optarg, _PATH_FCODES);
+ break;
+ case 'i': /* ignore case */
+ f_icase = 1;
+ break;
+ case 'm': /* mmap */
+#ifdef MMAP
+ f_mmap = 1;
+#else
+ (void)fprintf(stderr, "mmap(2) not implemented\n");
+#endif
+ break;
+ case 's': /* stdio lib */
+ f_mmap = 0;
+ break;
+ case 'c': /* suppress output, show only count of matches */
+ f_silent = 1;
+ break;
+ default:
+ usage();
+ }
+ argv += optind;
+ argc -= optind;
+
+ /* to few arguments */
+ if (argc < 1 && !(f_statistic))
+ usage();
+
+ /* no (valid) database as argument */
+ if (dbv == NULL || *dbv == NULL) {
+ /* try to read database from enviroment */
+ if ((path_fcodes = getenv("LOCATE_PATH")) == NULL ||
+ *path_fcodes == '\0')
+ /* use default database */
+ dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES);
+ else /* $LOCATE_PATH */
+ dbv = colon(dbv, path_fcodes, _PATH_FCODES);
+ }
+
+ if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */
+ for (ch = 0; ch <= UCHAR_MAX; ch++)
+ myctype[ch] = tolower(ch);
+
+ /* foreach database ... */
+ while((path_fcodes = *dbv) != NULL) {
+ dbv++;
+
+ if (!strcmp(path_fcodes, "-"))
+ f_stdin = 1;
+ else
+ f_stdin = 0;
+
+#ifndef MMAP
+ f_mmap = 0; /* be paranoid */
+#endif
+ if (!f_mmap || f_stdin || f_statistic)
+ search_fopen(path_fcodes, argv);
+ else
+ search_mmap(path_fcodes, argv);
+ }
+
+ if (f_silent)
+ print_matches(counter);
+ exit(0);
}
+
void
-fastfind(pathpart)
- char *pathpart;
+search_fopen(db, s)
+ char *db; /* database */
+ char **s; /* search strings */
{
- register char *p, *s;
- register int c;
- int count, found, globflag;
- char *cutoff, *patend, *q;
- char bigram1[NBG], bigram2[NBG], path[MAXPATHLEN];
-
- for (c = 0, p = bigram1, s = bigram2; c < NBG; c++)
- p[c] = getc(fp), s[c] = getc(fp);
-
- p = pathpart;
- globflag = index(p, '*') || index(p, '?') || index(p, '[');
- patend = patprep(p);
-
- found = 0;
- for (c = getc(fp), count = 0; c != EOF;) {
- count += ((c == SWITCH) ? getw(fp) : c) - OFFSET;
- /* overlay old path */
- for (p = path + count; (c = getc(fp)) > SWITCH;)
- if (c < PARITY)
- *p++ = c;
- else { /* bigrams are parity-marked */
- c &= PARITY - 1;
- *p++ = bigram1[c], *p++ = bigram2[c];
- }
- *p-- = NULL;
- cutoff = (found ? path : path + count);
- for (found = 0, s = p; s >= cutoff; s--)
- if (*s == *patend) { /* fast first char check */
- for (p = patend - 1, q = s - 1; *p != NULL;
- p--, q--)
- if (*q != *p)
- break;
- if (*p == NULL) { /* fast match success */
- found = 1;
- if (!globflag ||
- !fnmatch(pathpart, path, 0))
- (void)printf("%s\n", path);
- break;
- }
- }
+ FILE *fp;
+#ifdef DEBUG
+ long t0;
+#endif
+
+ /* can only read stdin once */
+ if (f_stdin) {
+ fp = stdin;
+ if (*(s+1) != NULL) {
+ (void)fprintf(stderr,
+ "read database from stdin, use only");
+ (void)fprintf(stderr, " `%s' as pattern\n", *s);
+ *(s+1) = NULL;
+ }
+ }
+ else if ((fp = fopen(path_fcodes, "r")) == NULL)
+ err(1, "`%s'", path_fcodes);
+
+ /* count only chars or lines */
+ if (f_statistic) {
+ statistic(fp, path_fcodes);
+ (void)fclose(fp);
+ return;
}
-}
-/*
- * extract last glob-free subpattern in name for fast pre-match; prepend
- * '\0' for backwards match; return end of new pattern
- */
-static char globfree[100];
+ /* foreach search string ... */
+ while(*s != NULL) {
+#ifdef DEBUG
+ t0 = cputime();
+#endif
+ if (!f_stdin &&
+ fseek(fp, (long)0, SEEK_SET) == -1)
+ err(1, "fseek to begin of ``%s''\n", path_fcodes);
+
+ if (f_icase)
+ fastfind_icase(fp, *s, path_fcodes);
+ else
+ fastfind(fp, *s, path_fcodes);
+#ifdef DEBUG
+ (void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
+#endif
+ s++;
+ }
+ (void)fclose(fp);
+}
-char *
-patprep(name)
- char *name;
+#ifdef MMAP
+void
+search_mmap(db, s)
+ char *db; /* database */
+ char **s; /* search strings */
{
- register char *endmark, *p, *subp;
-
- subp = globfree;
- *subp++ = '\0';
- p = name + strlen(name) - 1;
- /* skip trailing metacharacters (and [] ranges) */
- for (; p >= name; p--)
- if (index("*?", *p) == 0)
- break;
- if (p < name)
- p = name;
- if (*p == ']')
- for (p--; p >= name; p--)
- if (*p == '[') {
- p--;
- break;
- }
- if (p < name)
- p = name;
- /*
- * if pattern has only metacharacters, check every path (force '/'
- * search)
- */
- if ((p == name) && index("?*[]", *p) != 0)
- *subp++ = '/';
- else {
- for (endmark = p; p >= name; p--)
- if (index("]*?", *p) != 0)
- break;
- for (++p;
- (p <= endmark) && subp < (globfree + sizeof(globfree));)
- *subp++ = *p++;
+ struct stat sb;
+ int fd;
+ caddr_t p;
+ off_t len;
+#ifdef DEBUG
+ long t0;
+#endif
+ if ((fd = open(path_fcodes, O_RDONLY)) == -1 ||
+ fstat(fd, &sb) == -1)
+ err(1, "`%s'", path_fcodes);
+ len = sb.st_size;
+
+ if ((p = mmap((caddr_t)0, (size_t)len,
+ PROT_READ, MAP_SHARED,
+ fd, (off_t)0)) == (caddr_t)-1)
+ err(1, "mmap ``%s''", path_fcodes);
+
+ /* foreach search string ... */
+ while (*s != NULL) {
+#ifdef DEBUG
+ t0 = cputime();
+#endif
+ if (f_icase)
+ fastfind_mmap_icase(*s, p, (int)len, path_fcodes);
+ else
+ fastfind_mmap(*s, p, (int)len, path_fcodes);
+#ifdef DEBUG
+ (void)fprintf(stderr, "fastfind %ld ms\n", cputime () - t0);
+#endif
+ s++;
}
- *subp = '\0';
- return(--subp);
+
+ if (munmap(p, (size_t)len) == -1)
+ warn("munmap %s\n", path_fcodes);
+
+ (void)close(fd);
+}
+#endif /* MMAP */
+
+#ifdef DEBUG
+unsigned long
+cputime ()
+{
+ struct rusage rus;
+
+ getrusage(0, &rus);
+ return(rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000);
+}
+#endif /* DEBUG */
+
+void
+usage ()
+{
+ (void)fprintf(stderr, "usage: locate [-Scims] [-l limit] ");
+ (void)fprintf(stderr, "[-d database] pattern ...\n\n");
+ (void)fprintf(stderr, "default database: `%s' or $LOCATE_PATH\n",
+ _PATH_FCODES);
+ exit(1);
}
+
+
+/* load fastfind functions */
+
+/* statistic */
+/* fastfind_mmap, fastfind_mmap_icase */
+#ifdef MMAP
+#undef FF_MMAP
+#undef FF_ICASE
+
+#define FF_MMAP
+#include <fastfind.c>
+#define FF_ICASE
+#include <fastfind.c>
+#endif /* MMAP */
+
+/* fopen */
+/* fastfind, fastfind_icase */
+#undef FF_MMAP
+#undef FF_ICASE
+#include <fastfind.c>
+#define FF_ICASE
+#include <fastfind.c>
diff --git a/usr.bin/locate/locate/util.c b/usr.bin/locate/locate/util.c
new file mode 100644
index 0000000..313939b
--- /dev/null
+++ b/usr.bin/locate/locate/util.c
@@ -0,0 +1,267 @@
+/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * James A. Woods.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: util.c,v 1.2 1996/08/29 22:39:41 wosch Exp wosch $
+ */
+
+
+#include <stdlib.h>
+#include <string.h>
+#include <err.h>
+#include <sys/param.h>
+#include <stdio.h>
+
+#include "locate.h"
+
+char **colon __P((char **, char*, char*));
+char *patprep __P((char *));
+void print_matches __P((u_int));
+u_char *tolower_word __P((u_char *));
+int getwm __P((caddr_t));
+int getwf __P((FILE *));
+int check_bigram_char __P((int));
+
+/*
+ * Validate bigram chars. If the test failed the database is corrupt
+ * or the database is obviously not a locate database.
+ */
+int
+check_bigram_char(ch)
+ int ch;
+{
+ /* legal bigram: 0, ASCII_MIN ... ASCII_MAX */
+ if (ch == 0 ||
+ (ch >= ASCII_MIN && ch <= ASCII_MAX))
+ return(ch);
+
+ (void)fprintf(stderr, "locate database header corrupt, bigram ");
+ (void)fprintf(stderr, "char outside 0, %d-%d: %d\n",
+ ASCII_MIN, ASCII_MAX, ch);
+ exit(1);
+}
+
+/* split a colon separated string into a char vector
+ *
+ * "bla:foo" -> {"foo", "bla"}
+ * "bla:" -> {"foo", dot}
+ * "bla" -> {"bla"}
+ * "" -> do nothing
+ *
+ */
+char **
+colon(dbv, path, dot)
+ char **dbv;
+ char *path;
+ char *dot; /* default for single ':' */
+{
+ int vlen, slen;
+ char *c, *ch, *p;
+ char **pv;
+
+ if (dbv == NULL) {
+ if ((dbv = malloc(sizeof(char **))) == NULL)
+ err(1, "malloc");
+ *dbv = NULL;
+ }
+
+ /* empty string */
+ if (*path == '\0') {
+ (void)fprintf(stderr, "empty database name, ignored\n");
+ return(dbv);
+ }
+
+ /* length of string vector */
+ for(vlen = 0, pv = dbv; *pv != NULL; pv++, vlen++);
+
+ for (ch = c = path; ; ch++) {
+ if (*ch == ':' ||
+ (!*ch && !(*(ch - 1) == ':' && ch == 1+ path))) {
+ /* single colon -> dot */
+ if (ch == c)
+ p = dot;
+ else {
+ /* a string */
+ slen = ch - c;
+ if ((p = malloc(sizeof(char) * (slen + 1)))
+ == NULL)
+ err(1, "malloc");
+ bcopy(c, p, slen);
+ *(p + slen) = '\0';
+ }
+ /* increase dbv with element p */
+ if ((dbv = realloc(dbv, sizeof(char **) * (vlen + 2)))
+ == NULL)
+ err(1, "realloc");
+ *(dbv + vlen) = p;
+ *(dbv + ++vlen) = NULL;
+ c = ch + 1;
+ }
+ if (*ch == '\0')
+ break;
+ }
+ return (dbv);
+}
+
+void
+print_matches(counter)
+ u_int counter;
+{
+ (void)printf("%d\n", counter);
+}
+
+
+/*
+ * extract last glob-free subpattern in name for fast pre-match; prepend
+ * '\0' for backwards match; return end of new pattern
+ */
+static char globfree[100];
+
+char *
+patprep(name)
+ char *name;
+{
+ register char *endmark, *p, *subp;
+
+ subp = globfree;
+ *subp++ = '\0';
+ p = name + strlen(name) - 1;
+ /* skip trailing metacharacters (and [] ranges) */
+ for (; p >= name; p--)
+ if (index("*?", *p) == 0)
+ break;
+ if (p < name)
+ p = name;
+ if (*p == ']')
+ for (p--; p >= name; p--)
+ if (*p == '[') {
+ p--;
+ break;
+ }
+ if (p < name)
+ p = name;
+ /*
+ * if pattern has only metacharacters, check every path (force '/'
+ * search)
+ */
+ if ((p == name) && index("?*[]", *p) != 0)
+ *subp++ = '/';
+ else {
+ for (endmark = p; p >= name; p--)
+ if (index("]*?", *p) != 0)
+ break;
+ for (++p;
+ (p <= endmark) && subp < (globfree + sizeof(globfree));)
+ *subp++ = *p++;
+ }
+ *subp = '\0';
+ return(--subp);
+}
+
+/* tolower word */
+u_char *
+tolower_word(word)
+ u_char *word;
+{
+ register u_char *p;
+
+ for(p = word; *p != '\0'; p++)
+ *p = TOLOWER(*p);
+
+ return(word);
+}
+
+
+/*
+ * Read integer from mmap pointer.
+ * Essential a simple ``return *(int *)p'' but avoid sigbus
+ * for integer alignment (SunOS 4.x, 5.x).
+ *
+ * Convert network byte order to host byte order if neccessary.
+ * So we can read on FreeBSD/i386 (little endian) a locate database
+ * which was built on SunOS/sparc (big endian).
+ */
+
+int
+getwm(p)
+ caddr_t p;
+{
+ static char buf[INTSIZE];
+ register int i;
+
+ for (i = 0; i < INTSIZE; i++)
+ buf[i] = *p++;
+
+ i = *(int *)buf;
+
+ if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+ i = ntohl(i);
+ if (i > MAXPATHLEN || i < -(MAXPATHLEN)) {
+ (void)fprintf(stderr,
+ "integer out of +-MAXPATHLEN (%d): %d\n",
+ MAXPATHLEN, i);
+ exit(1);
+ }
+ }
+ return(i);
+}
+
+/*
+ * Read integer from stream.
+ *
+ * Convert network byte order to host byte order if neccessary.
+ * So we can read on FreeBSD/i386 (little endian) a locate database
+ * which was built on SunOS/sparc (big endian).
+ */
+
+int
+getwf(fp)
+ FILE *fp;
+{
+ register int word;
+
+ word = getw(fp);
+
+ if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+ word = ntohl(word);
+ if (word > MAXPATHLEN || word < -(MAXPATHLEN)) {
+ (void)fprintf(stderr,
+ "integer out of +-MAXPATHLEN (%d): %d\n",
+ MAXPATHLEN, word);
+ exit(1);
+ }
+ }
+ return(word);
+}
OpenPOWER on IntegriCloud