summaryrefslogtreecommitdiffstats
path: root/usr.bin/file
diff options
context:
space:
mode:
authorjoerg <joerg@FreeBSD.org>1996-12-11 14:09:12 +0000
committerjoerg <joerg@FreeBSD.org>1996-12-11 14:09:12 +0000
commit2f3249e30505ccc6b0eb2fd1b892f3e2813453ce (patch)
tree97aba5034f2e97275813d181d18b4b2128f57e57 /usr.bin/file
parentee57f46ce0a7ad65af6b01f68b94eb2b9d315d79 (diff)
downloadFreeBSD-src-2f3249e30505ccc6b0eb2fd1b892f3e2813453ce.zip
FreeBSD-src-2f3249e30505ccc6b0eb2fd1b892f3e2813453ce.tar.gz
Add another matching algorithhm to do heuristics for international
language text files. Should finally close PR # bin/1925: file does not consider cyrillic text..., though i've never got any response from the originator about my suggestion. While i was at it, also move out the `magic' file to /usr/share/misc, there's nothing that magic with this file to justify its life under /etc.
Diffstat (limited to 'usr.bin/file')
-rw-r--r--usr.bin/file/Makefile8
-rw-r--r--usr.bin/file/file.c6
-rw-r--r--usr.bin/file/file.h3
-rw-r--r--usr.bin/file/international.c72
4 files changed, 83 insertions, 6 deletions
diff --git a/usr.bin/file/Makefile b/usr.bin/file/Makefile
index efac409..d1ff65d 100644
--- a/usr.bin/file/Makefile
+++ b/usr.bin/file/Makefile
@@ -1,6 +1,6 @@
# Makefile for file(1) cmd.
# Copyright (c) Ian F. Darwin 86/09/01 - see LEGAL.NOTICE.
-# @(#)$Id: Makefile,v 1.4 1995/07/25 00:36:03 bde Exp $
+# @(#)$Id: Makefile,v 1.5 1996/08/17 22:27:08 wosch Exp $
#
# This software is not subject to any license of the American Telephone
# and Telegraph Company or of the Regents of the University of California.
@@ -23,7 +23,7 @@
# 4. This notice may not be removed or altered.
#
# Hacked and dismembered for bmake (Geoff Rehmet).
-MAGIC= /etc/magic
+MAGIC= /usr/share/misc/magic
MAGICOWN= bin
MAGICGRP= bin
MAGICMODE= 444
@@ -33,7 +33,7 @@ CFLAGS+= -DMAGIC='"$(MAGIC)"'
PROG= file
SRCS= file.c apprentice.c fsmagic.c softmagic.c ascmagic.c \
- compress.c is_tar.c print.c
+ compress.c is_tar.c print.c international.c
MAN1= file.1
MAN5= magic.5
@@ -51,7 +51,7 @@ magic: $(MAGFILES)
cat $(MAGFILES) > $(.TARGET)
# called from /usr/src/etc/Makefile
-etc-magic:
+beforeinstall:
${INSTALL} -c -o $(MAGICOWN) -g $(MAGICGRP) -m $(MAGICMODE) magic \
$(DESTDIR)$(MAGIC)
diff --git a/usr.bin/file/file.c b/usr.bin/file/file.c
index 5814255..5400083 100644
--- a/usr.bin/file/file.c
+++ b/usr.bin/file/file.c
@@ -26,7 +26,7 @@
*/
#ifndef lint
static char *moduleid =
- "@(#)$Id: file.c,v 1.2 1995/05/30 06:30:01 rgrimes Exp $";
+ "@(#)$Id: file.c,v 1.3 1996/01/23 12:40:11 mpp Exp $";
#endif /* lint */
#include <stdio.h>
@@ -343,6 +343,10 @@ int nb, zflag;
if (ascmagic(buf, nb))
return 'a';
+ /* see if it's international language text */
+ if (internatmagic(buf, nb))
+ return 'i';
+
/* abandon hope, all ye who remain here */
ckfputs("data", stdout);
return '\0';
diff --git a/usr.bin/file/file.h b/usr.bin/file/file.h
index 5c9888d..1ed6772 100644
--- a/usr.bin/file/file.h
+++ b/usr.bin/file/file.h
@@ -1,6 +1,6 @@
/*
* file.h - definitions for file(1) program
- * @(#)$Id: file.h,v 1.2 1995/05/30 06:30:02 rgrimes Exp $
+ * @(#)$Id: file.h,v 1.3 1996/01/23 12:40:13 mpp Exp $
*
* Copyright (c) Ian F. Darwin, 1987.
* Written by Ian F. Darwin.
@@ -87,6 +87,7 @@ extern void error __P((const char *, ...));
extern void ckfputs __P((const char *, FILE *));
struct stat;
extern int fsmagic __P((const char *, struct stat *));
+extern int internatmagic __P((unsigned char *, int));
extern int is_compress __P((const unsigned char *, int *));
extern int is_tar __P((unsigned char *, int));
extern void magwarn __P((const char *, ...));
diff --git a/usr.bin/file/international.c b/usr.bin/file/international.c
new file mode 100644
index 0000000..59a508a
--- /dev/null
+++ b/usr.bin/file/international.c
@@ -0,0 +1,72 @@
+#include "file.h"
+
+#include <string.h>
+
+#define F 0
+#define T 1
+
+/*
+ * List of characters that look "reasonable" in international
+ * language texts. That's almost all characters :), except a
+ * few in the control range of ASCII (all the known international
+ * charactersets share the bottom half with ASCII).
+ */
+static char maybe_internat[256] = {
+ F, F, F, F, F, F, F, F, T, T, T, T, T, T, F, F, /* 0x0X */
+ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x8X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x9X */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xaX */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xbX */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xcX */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xdX */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xeX */
+ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T /* 0xfX */
+};
+
+/* Maximal length of a line we consider "reasonable". */
+#define MAXLINELEN 300
+
+int
+internatmagic(buf, nbytes)
+ unsigned char *buf;
+ int nbytes;
+{
+ int i;
+ unsigned char *cp;
+
+ nbytes--;
+
+ /* First, look whether there are "unreasonable" characters. */
+ for (i = 0, cp = buf; i < nbytes; i++, cp++)
+ if (!maybe_internat[*cp])
+ return 0;
+
+ /*
+ * Now, look whether the file consists of lines of
+ * "reasonable" length.
+ */
+
+ for (i = 0; i < nbytes;) {
+ cp = memchr(buf, '\n', nbytes - i);
+ if (cp == NULL) {
+ /* Don't fail if we hit the end of buffer. */
+ if (i + MAXLINELEN >= nbytes)
+ break;
+ else
+ return 0;
+ }
+ if (cp - buf > MAXLINELEN)
+ return 0;
+ i += (cp - buf + 1);
+ buf = cp + 1;
+ }
+ ckfputs("International language text", stdout);
+ return 1;
+}
OpenPOWER on IntegriCloud