summaryrefslogtreecommitdiffstats
path: root/usr.bin/locate/code
diff options
context:
space:
mode:
authorwosch <wosch@FreeBSD.org>1996-10-13 01:44:43 +0000
committerwosch <wosch@FreeBSD.org>1996-10-13 01:44:43 +0000
commit1edf24275a071892cdbff4ba07879dea75987a51 (patch)
treeba20d97a5160d5236052b18ca5c02a3b4d24bd94 /usr.bin/locate/code
parent154b44fb0a407fda8021c14d48dc08625f97bdbc (diff)
downloadFreeBSD-src-1edf24275a071892cdbff4ba07879dea75987a51.zip
FreeBSD-src-1edf24275a071892cdbff4ba07879dea75987a51.tar.gz
8-Bit character support.
Old locate(1) programs still works with the new database format, print some garbage for 8 bit characters, but don't core (maybe except char 30). 7-Bit Puritan should not notice any difference. Same speed, Same database size if the database contain only ASCII characters. Reviewed by: ache
Diffstat (limited to 'usr.bin/locate/code')
-rw-r--r--usr.bin/locate/code/locate.code.c82
1 files changed, 56 insertions, 26 deletions
diff --git a/usr.bin/locate/code/locate.code.c b/usr.bin/locate/code/locate.code.c
index 469b978..67228da 100644
--- a/usr.bin/locate/code/locate.code.c
+++ b/usr.bin/locate/code/locate.code.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 1995 Wolfram Schneider <wosch@FreeBSD.org>. Berlin.
* Copyright (c) 1989, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -33,7 +34,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: locate.code.c,v 1.4 1996/08/22 18:46:13 wosch Exp $
+ * $Id: locate.code.c,v 1.5 1996/08/31 14:51:18 wosch Exp $
*/
#ifndef lint
@@ -72,13 +73,22 @@ static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93";
*
* 0-28 likeliest differential counts + offset to make nonnegative
* 30 switch code for out-of-range count to follow in next word
+ * 31 an 8 bit char followed
* 128-255 bigram codes (128 most common, as determined by 'updatedb')
* 32-127 single character (printable) ascii residue (ie, literal)
*
- * SEE ALSO: updatedb.csh, bigram.c
+ * The locate database store any character except newline ('\n')
+ * and NUL ('\0'). The 8-bit character support don't wast extra
+ * space until you have characters in file names less than 32
+ * or greather than 127.
+ *
+ *
+ * SEE ALSO: updatedb.sh, ../bigram/locate.bigram.c
*
* AUTHOR: James A. Woods, Informatics General Corp.,
* NASA Ames Research Center, 10/82
+ * 8-bit file names characters:
+ * Wolfram Schneider, Berlin September 1996
*/
#include <sys/param.h>
@@ -93,14 +103,14 @@ static char sccsid[] = "@(#)locate.code.c 8.1 (Berkeley) 6/6/93";
u_char buf1[MAXPATHLEN] = " ";
u_char buf2[MAXPATHLEN];
-char bigrams[BGBUFSIZE + 1] = { 0 };
+u_char bigrams[BGBUFSIZE + 1] = { 0 };
#define LOOKUP 1 /* use a lookup array instead a function, 3x faster */
#ifdef LOOKUP
-#define BGINDEX(x) (big[(u_int)*x][(u_int)*(x+1)])
-typedef u_char bg_t;
-bg_t big[UCHAR_MAX][UCHAR_MAX];
+#define BGINDEX(x) (big[(u_char)*x][(u_char)*(x + 1)])
+typedef short bg_t;
+bg_t big[UCHAR_MAX + 1][UCHAR_MAX + 1];
#else
#define BGINDEX(x) bgindex(x)
typedef int bg_t;
@@ -145,12 +155,13 @@ main(argc, argv)
#ifdef LOOKUP
/* init lookup table */
- for (i = 0; i < UCHAR_MAX; i++)
- for (j = 0; j < UCHAR_MAX; j++)
+ for (i = 0; i < UCHAR_MAX + 1; i++)
+ for (j = 0; j < UCHAR_MAX + 1; j++)
big[i][j] = (bg_t)-1;
for (cp = bigrams, i = 0; *cp != '\0'; i += 2, cp += 2)
- big[(int)*cp][(int)*(cp + 1)] = (bg_t)i;
+ big[(u_char)*cp][(u_char)*(cp + 1)] = (bg_t)i;
+
#endif /* LOOKUP */
oldpath = buf1;
@@ -159,22 +170,21 @@ main(argc, argv)
while (fgets(path, sizeof(buf2), stdin) != NULL) {
- /* skip empty lines */
+ /* skip empty lines */
if (*path == '\n')
continue;
- /* Squelch characters that would botch the decoding. */
+ /* remove newline */
for (cp = path; *cp != '\0'; cp++) {
/* chop newline */
if (*cp == '\n')
*cp = '\0';
- /* range */
- else if (*cp < ASCII_MIN || *cp > ASCII_MAX)
- *cp = '?';
}
/* Skip longest common prefix. */
- for (cp = path; *cp == *oldpath && *cp != '\0'; cp++, oldpath++);
+ for (cp = path; *cp == *oldpath; cp++, oldpath++)
+ if (*cp == '\0')
+ break;
count = cp - path;
diffcount = count - oldcount + OFFSET;
@@ -188,22 +198,42 @@ main(argc, argv)
err(1, "stdout");
while (*cp != '\0') {
- if (*(cp + 1) == '\0') {
- if (putchar(*cp) == EOF)
- err(1, "stdout");
- break;
- }
- if ((code = BGINDEX(cp)) == (bg_t)-1) {
- if (putchar(*cp++) == EOF ||
- putchar(*cp++) == EOF)
- err(1, "stdout");
- } else {
- /* Found, so mark byte with parity bit. */
+ /* print *two* characters */
+
+ if ((code = BGINDEX(cp)) != (bg_t)-1) {
+ /*
+ * print *one* as bigram
+ * Found, so mark byte with
+ * parity bit.
+ */
if (putchar((code / 2) | PARITY) == EOF)
err(1, "stdout");
cp += 2;
}
+
+ else {
+ for (i = 0; i < 2; i++) {
+ if (*cp == '\0')
+ break;
+
+ /* print umlauts in file names */
+ if (*cp < ASCII_MIN ||
+ *cp > ASCII_MAX) {
+ if (putchar(UMLAUT) == EOF ||
+ putchar(*cp++) == EOF)
+ err(1, "stdout");
+ }
+
+ else {
+ /* normal character */
+ if(putchar(*cp++) == EOF)
+ err(1, "stdout");
+ }
+ }
+
+ }
}
+
if (path == buf1) { /* swap pointers */
path = buf2;
oldpath = buf1;
OpenPOWER on IntegriCloud