summaryrefslogtreecommitdiffstats
path: root/contrib/file/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/file/file.c')
-rw-r--r--contrib/file/file.c422
1 files changed, 190 insertions, 232 deletions
diff --git a/contrib/file/file.c b/contrib/file/file.c
index 2ccd8ca..269ae86 100644
--- a/contrib/file/file.c
+++ b/contrib/file/file.c
@@ -1,35 +1,49 @@
/*
+ * Copyright (c) Ian F. Darwin 1986-1995.
+ * Software written by Ian F. Darwin and others;
+ * maintained 1995-present by Christos Zoulas and others.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice immediately at the beginning of the file, without modification,
+ * this list of conditions, and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by Ian F. Darwin and others.
+ * 4. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+/*
* file - find type of a file or files - main program.
- *
- * Copyright (c) Ian F. Darwin, 1987.
- * Written by Ian F. Darwin.
- *
- * This software is not subject to any license of the American Telephone
- * and Telegraph Company or of the Regents of the University of California.
- *
- * Permission is granted to anyone to use this software for any purpose on
- * any computer system, and to alter it and redistribute it freely, subject
- * to the following restrictions:
- *
- * 1. The author is not responsible for the consequences of use of this
- * software, no matter how awful, even if they arise from flaws in it.
- *
- * 2. The origin of this software must not be misrepresented, either by
- * explicit claim or by omission. Since few users ever read sources,
- * credits must appear in the documentation.
- *
- * 3. Altered versions must be plainly marked as such, and must not be
- * misrepresented as being the original software. Since few users
- * ever read sources, credits must appear in the documentation.
- *
- * 4. This notice may not be removed or altered.
*/
#include "file.h"
+#include "magic.h"
+
+#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
+#include <sys/types.h>
#include <sys/param.h> /* for MAXPATHLEN */
+#include <sys/stat.h>
#include <fcntl.h> /* for open() */
#ifdef RESTORE_TIME
# if (__COHERENT__ >= 0x420)
@@ -48,9 +62,14 @@
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif
#ifdef HAVE_GETOPT_H
#include <getopt.h> /* for long options (is this portable?)*/
+#else
+#undef HAVE_GETOPT_LONG
#endif
#include <netinet/in.h> /* for byte swapping */
@@ -58,80 +77,65 @@
#include "patchlevel.h"
#ifndef lint
-FILE_RCSID("@(#)$Id: file.c,v 1.69 2003/02/27 20:47:46 christos Exp $")
+FILE_RCSID("@(#)$Id: file.c,v 1.93 2004/04/07 14:23:55 christos Exp $")
#endif /* lint */
#ifdef S_IFLNK
-# define USAGE "Usage: %s [-bciknsvzL] [-f namefile] [-m magicfiles] file...\n"
+#define SYMLINKFLAG "L"
#else
-# define USAGE "Usage: %s [-bciknsvz] [-f namefile] [-m magicfiles] file...\n"
+#define SYMLINKFLAG ""
#endif
-#ifdef __EMX__
-static char *apptypeName = NULL;
-int os2_apptype (const char *fn, char *buf, int nb);
-#endif /* __EMX__ */
-
-#ifndef MAGIC
-# define MAGIC "/etc/magic"
-#endif
+# define USAGE "Usage: %s [-bcik" SYMLINKFLAG "nNsvz] [-f namefile] [-F separator] [-m magicfiles] file...\n %s -C -m magicfiles\n"
#ifndef MAXPATHLEN
#define MAXPATHLEN 512
#endif
-int /* Global command-line options */
- debug = 0, /* debugging */
- lflag = 0, /* follow Symlinks (BSD only) */
+private int /* Global command-line options */
bflag = 0, /* brief output format */
- zflag = 0, /* follow (uncompress) compressed files */
- sflag = 0, /* read block special files */
- iflag = 0,
nopad = 0, /* Don't pad output */
- nobuffer = 0, /* Do not buffer stdout */
- kflag = 0; /* Keep going after the first match */
-
-int /* Misc globals */
- nmagic = 0; /* number of valid magic[]s */
+ nobuffer = 0; /* Do not buffer stdout */
-struct magic *magic; /* array of magic entries */
+private const char *magicfile = 0; /* where the magic is */
+private const char *default_magicfile = MAGIC;
+private char *separator = ":"; /* Default field separator */
-const char *magicfile = 0; /* where the magic is */
-const char *default_magicfile = MAGIC;
+private char *progname; /* used throughout */
-char separator = ':'; /* Default field separator */
+private struct magic_set *magic;
-char *progname; /* used throughout */
-int lineno; /* line number in the magic file */
-
-
-static void unwrap(char *fn);
-static void usage(void);
+private void unwrap(char *);
+private void usage(void);
#ifdef HAVE_GETOPT_LONG
-static void help(void);
+private void help(void);
#endif
#if 0
-static int byteconv4(int, int, int);
-static short byteconv2(int, int, int);
+private int byteconv4(int, int, int);
+private short byteconv2(int, int, int);
#endif
int main(int, char *[]);
+private void process(const char *, int);
+private void load(const char *, int);
+
/*
* main - parse arguments and handle options
*/
int
-main(int argc, char **argv)
+main(int argc, char *argv[])
{
int c;
- int action = 0, didsomefiles = 0, errflg = 0, ret = 0, app = 0;
- char *mime, *home, *usermagic;
+ int action = 0, didsomefiles = 0, errflg = 0;
+ int flags = 0;
+ char *home, *usermagic;
struct stat sb;
-#define OPTSTRING "bcdf:F:ikm:nNsvzCL"
+#define OPTSTRING "bcCdf:F:ikLm:nNprsvz"
#ifdef HAVE_GETOPT_LONG
int longindex;
- static struct option long_options[] =
+ private struct option long_options[] =
{
{"version", 0, 0, 'v'},
{"help", 0, 0, 0},
@@ -146,7 +150,11 @@ main(int argc, char **argv)
{"dereference", 0, 0, 'L'},
#endif
{"magic-file", 1, 0, 'm'},
+#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
+ {"preserve-date", 0, 0, 'p'},
+#endif
{"uncompress", 0, 0, 'z'},
+ {"raw", 0, 0, 'r'},
{"no-buffer", 0, 0, 'n'},
{"no-pad", 0, 0, 'N'},
{"special-files", 0, 0, 's'},
@@ -201,37 +209,29 @@ main(int argc, char **argv)
++bflag;
break;
case 'c':
- action = CHECK;
+ action = FILE_CHECK;
break;
case 'C':
- action = COMPILE;
+ action = FILE_COMPILE;
break;
case 'd':
- ++debug;
+ flags |= MAGIC_DEBUG|MAGIC_CHECK;
break;
case 'f':
- if (!app) {
- ret = apprentice(magicfile, action);
- if (action)
- exit(ret);
- app = 1;
- }
+ if(action)
+ usage();
+ load(magicfile, flags);
unwrap(optarg);
++didsomefiles;
break;
case 'F':
- separator = *optarg;
+ separator = optarg;
break;
case 'i':
- iflag++;
- if ((mime = malloc(strlen(magicfile) + 6)) != NULL) {
- (void)strcpy(mime, magicfile);
- (void)strcat(mime, ".mime");
- magicfile = mime;
- }
+ flags |= MAGIC_MIME;
break;
case 'k':
- kflag = 1;
+ flags |= MAGIC_CONTINUE;
break;
case 'm':
magicfile = optarg;
@@ -242,21 +242,29 @@ main(int argc, char **argv)
case 'N':
++nopad;
break;
+#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
+ case 'p':
+ flags |= MAGIC_PRESERVE_ATIME;
+ break;
+#endif
+ case 'r':
+ flags |= MAGIC_RAW;
+ break;
case 's':
- sflag++;
+ flags |= MAGIC_DEVICES;
break;
case 'v':
- (void) fprintf(stdout, "%s-%d.%d\n", progname,
+ (void) fprintf(stdout, "%s-%d.%.2d\n", progname,
FILE_VERSION_MAJOR, patchlevel);
(void) fprintf(stdout, "magic file from %s\n",
magicfile);
return 1;
case 'z':
- zflag++;
+ flags |= MAGIC_COMPRESS;
break;
#ifdef S_IFLNK
case 'L':
- ++lflag;
+ flags |= MAGIC_SYMLINK;
break;
#endif
case '?':
@@ -269,11 +277,26 @@ main(int argc, char **argv)
usage();
}
- if (!app) {
- ret = apprentice(magicfile, action);
- if (action)
- exit(ret);
- app = 1;
+ switch(action) {
+ case FILE_CHECK:
+ case FILE_COMPILE:
+ magic = magic_open(flags|MAGIC_CHECK);
+ if (magic == NULL) {
+ (void)fprintf(stderr, "%s: %s\n", progname,
+ strerror(errno));
+ return 1;
+ }
+ c = action == FILE_CHECK ? magic_check(magic, magicfile) :
+ magic_compile(magic, magicfile);
+ if (c == -1) {
+ (void)fprintf(stderr, "%s: %s\n", progname,
+ magic_error(magic));
+ return -1;
+ }
+ return 0;
+ default:
+ load(magicfile, flags);
+ break;
}
if (optind == argc) {
@@ -284,7 +307,7 @@ main(int argc, char **argv)
else {
int i, wid, nw;
for (wid = 0, i = optind; i < argc; i++) {
- nw = strlen(argv[i]);
+ nw = file_mbswidth(argv[i]);
if (nw > wid)
wid = nw;
}
@@ -296,10 +319,27 @@ main(int argc, char **argv)
}
+private void
+load(const char *m, int flags)
+{
+ if (magic)
+ return;
+ magic = magic_open(flags);
+ if (magic == NULL) {
+ (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
+ exit(1);
+ }
+ if (magic_load(magic, magicfile) == -1) {
+ (void)fprintf(stderr, "%s: %s\n",
+ progname, magic_error(magic));
+ exit(1);
+ }
+}
+
/*
* unwrap -- read a file of filenames, do each one.
*/
-static void
+private void
unwrap(char *fn)
{
char buf[MAXPATHLEN];
@@ -311,12 +351,13 @@ unwrap(char *fn)
wid = 1;
} else {
if ((f = fopen(fn, "r")) == NULL) {
- error("Cannot open `%s' (%s).\n", fn, strerror(errno));
- /*NOTREACHED*/
+ (void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
+ progname, fn, strerror(errno));
+ exit(1);
}
while (fgets(buf, MAXPATHLEN, f) != NULL) {
- cwid = strlen(buf) - 1;
+ cwid = file_mbswidth(buf) - 1;
if (cwid > wid)
wid = cwid;
}
@@ -325,7 +366,7 @@ unwrap(char *fn)
}
while (fgets(buf, MAXPATHLEN, f) != NULL) {
- buf[strlen(buf)-1] = '\0';
+ buf[file_mbswidth(buf)-1] = '\0';
process(buf, wid);
if(nobuffer)
(void) fflush(stdout);
@@ -334,6 +375,23 @@ unwrap(char *fn)
(void) fclose(f);
}
+private void
+process(const char *inname, int wid)
+{
+ const char *type;
+ int std_in = strcmp(inname, "-") == 0;
+
+ if (wid > 0 && !bflag)
+ (void) printf("%s%s%*s ", std_in ? "/dev/stdin" : inname,
+ separator, (int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
+
+ type = magic_file(magic, std_in ? NULL : inname);
+ if (type == NULL)
+ printf("ERROR: %s\n", magic_error(magic));
+ else
+ printf("%s\n", type);
+}
+
#if 0
/*
@@ -343,7 +401,7 @@ unwrap(char *fn)
* same whether to perform byte swapping
* big_endian whether we are a big endian host
*/
-static int
+private int
byteconv4(int from, int same, int big_endian)
{
if (same)
@@ -370,7 +428,7 @@ byteconv4(int from, int same, int big_endian)
* byteconv2
* Same as byteconv4, but for shorts
*/
-static short
+private short
byteconv2(int from, int same, int big_endian)
{
if (same)
@@ -392,148 +450,44 @@ byteconv2(int from, int same, int big_endian)
}
#endif
-/*
- * process - process input file
- */
-void
-process(const char *inname, int wid)
+size_t
+file_mbswidth(const char *s)
{
- int fd = 0;
- static const char stdname[] = "standard input";
- unsigned char buf[HOWMANY+1]; /* one extra for terminating '\0' */
- struct stat sb;
- int nbytes = 0; /* number of bytes read from a datafile */
- char match = '\0';
-
- if (strcmp("-", inname) == 0) {
- if (fstat(0, &sb)<0) {
- error("cannot fstat `%s' (%s).\n", stdname,
- strerror(errno));
- /*NOTREACHED*/
- }
- inname = stdname;
- }
-
- if (wid > 0 && !bflag)
- (void) printf("%s%c%*s", inname, separator,
- (int) (nopad ? 0 : 1 + (wid - strlen(inname))), "");
-
- if (inname != stdname) {
- /*
- * first try judging the file based on its filesystem status
- */
- if (fsmagic(inname, &sb) != 0) {
- putchar('\n');
- return;
+#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
+ size_t bytesconsumed, old_n, n, width = 0;
+ mbstate_t state;
+ wchar_t nextchar;
+ (void)memset(&state, 0, sizeof(mbstate_t));
+ old_n = n = strlen(s);
+
+ while (n > 0) {
+ bytesconsumed = mbrtowc(&nextchar, s, n, &state);
+ if (bytesconsumed == (size_t)(-1) ||
+ bytesconsumed == (size_t)(-2)) {
+ /* Something went wrong, return something reasonable */
+ return old_n;
}
-
- if ((fd = open(inname, O_RDONLY)) < 0) {
- /* We can't open it, but we were able to stat it. */
- if (sb.st_mode & 0002) ckfputs("writable, ", stdout);
- if (sb.st_mode & 0111) ckfputs("executable, ", stdout);
- ckfprintf(stdout, "can't read `%s' (%s).\n",
- inname, strerror(errno));
- return;
- }
- }
-
-
- /*
- * try looking at the first HOWMANY bytes
- */
- if ((nbytes = read(fd, (char *)buf, HOWMANY)) == -1) {
- error("read failed (%s).\n", strerror(errno));
- /*NOTREACHED*/
+ if (s[0] == '\n') {
+ /*
+ * do what strlen() would do, so that caller
+ * is always right
+ */
+ width++;
+ } else
+ width += wcwidth(nextchar);
+
+ s += bytesconsumed, n -= bytesconsumed;
}
-
- if (nbytes == 0)
- ckfputs(iflag ? "application/x-empty" : "empty", stdout);
- else {
- buf[nbytes++] = '\0'; /* null-terminate it */
- match = tryit(inname, buf, nbytes, zflag);
- }
-
-#ifdef BUILTIN_ELF
- if (match == 's' && nbytes > 5) {
- /*
- * We matched something in the file, so this *might*
- * be an ELF file, and the file is at least 5 bytes long,
- * so if it's an ELF file it has at least one byte
- * past the ELF magic number - try extracting information
- * from the ELF headers that can't easily be extracted
- * with rules in the magic file.
- */
- tryelf(fd, buf, nbytes);
- }
-#endif
-
- if (inname != stdname) {
-#ifdef RESTORE_TIME
- /*
- * Try to restore access, modification times if read it.
- * This is really *bad* because it will modify the status
- * time of the file... And of course this will affect
- * backup programs
- */
-# ifdef USE_UTIMES
- struct timeval utsbuf[2];
- utsbuf[0].tv_sec = sb.st_atime;
- utsbuf[1].tv_sec = sb.st_mtime;
-
- (void) utimes(inname, utsbuf); /* don't care if loses */
-# else
- struct utimbuf utbuf;
-
- utbuf.actime = sb.st_atime;
- utbuf.modtime = sb.st_mtime;
- (void) utime(inname, &utbuf); /* don't care if loses */
-# endif
-#endif
- (void) close(fd);
- }
- (void) putchar('\n');
-}
-
-
-int
-tryit(const char *fn, unsigned char *buf, int nb, int zfl)
-{
-
- /*
- * The main work is done here!
- * We have the file name and/or the data buffer to be identified.
- */
-
-#ifdef __EMX__
- /*
- * Ok, here's the right place to add a call to some os-specific
- * routine, e.g.
- */
- if (os2_apptype(fn, buf, nb) == 1)
- return 'o';
+ return width;
+#else
+ return strlen(s);
#endif
- /* try compression stuff */
- if (zfl && zmagic(fn, buf, nb))
- return 'z';
-
- /* try tests in /etc/magic (or surrogate magic file) */
- if (softmagic(buf, nb))
- return 's';
-
- /* try known keywords, check whether it is ASCII */
- if (ascmagic(buf, nb))
- return 'a';
-
- /* abandon hope, all ye who remain here */
- ckfputs(iflag ? "application/octet-stream" : "data", stdout);
- return '\0';
}
-static void
+private void
usage(void)
{
- (void)fprintf(stderr, USAGE, progname);
- (void)fprintf(stderr, "Usage: %s -C [-m magic]\n", progname);
+ (void)fprintf(stderr, USAGE, progname, progname);
#ifdef HAVE_GETOPT_LONG
(void)fputs("Try `file --help' for more information.\n", stderr);
#endif
@@ -541,7 +495,7 @@ usage(void)
}
#ifdef HAVE_GETOPT_LONG
-static void
+private void
help(void)
{
puts(
@@ -556,10 +510,14 @@ help(void)
" conjunction with -m to debug a new magic file\n"
" before installing it\n"
" -f, --files-from FILE read the filenames to be examined from FILE\n"
+" -F, --separator string use string as separator instead of `:'\n"
" -i, --mime output mime type strings\n"
" -k, --keep-going don't stop at the first match\n"
" -L, --dereference causes symlinks to be followed\n"
" -n, --no-buffer do not buffer output\n"
+" -N, --no-pad do not pad output\n"
+" -p, --preserve-date preserve access times on files\n"
+" -r, --raw don't translate unprintable chars to \\ooo\n"
" -s, --special-files treat special (block/char devices) files as\n"
" ordinary ones\n"
" --help display this help and exit\n"
OpenPOWER on IntegriCloud