summaryrefslogtreecommitdiffstats
path: root/contrib/file/apprentice.c
diff options
context:
space:
mode:
authorobrien <obrien@FreeBSD.org>2012-04-19 03:20:13 +0000
committerobrien <obrien@FreeBSD.org>2012-04-19 03:20:13 +0000
commit248502d7f08de3125e3e3cc808446294565ba5df (patch)
treec7640efe490a887a5460d7a88d62aca990489d21 /contrib/file/apprentice.c
parent86e3fa59a2a400ec95b0c84576fb00815631d92f (diff)
parent15f98df7891f1853090ecb6c4a9cc734e671ef6b (diff)
downloadFreeBSD-src-248502d7f08de3125e3e3cc808446294565ba5df.zip
FreeBSD-src-248502d7f08de3125e3e3cc808446294565ba5df.tar.gz
Update file(1) to version 5.11.
Diffstat (limited to 'contrib/file/apprentice.c')
-rw-r--r--contrib/file/apprentice.c366
1 files changed, 280 insertions, 86 deletions
diff --git a/contrib/file/apprentice.c b/contrib/file/apprentice.c
index 3827e5c..a9d4d17 100644
--- a/contrib/file/apprentice.c
+++ b/contrib/file/apprentice.c
@@ -32,11 +32,10 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: apprentice.c,v 1.151 2009/03/18 15:19:23 christos Exp $")
+FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $")
#endif /* lint */
#include "magic.h"
-#include "patchlevel.h"
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
@@ -72,10 +71,6 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.151 2009/03/18 15:19:23 christos Exp $")
#define MAP_FILE 0
#endif
-#ifndef MAXPATHLEN
-#define MAXPATHLEN 1024
-#endif
-
struct magic_entry {
struct magic *mp;
uint32_t cont_count;
@@ -97,6 +92,9 @@ private void eatsize(const char **);
private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
private size_t apprentice_magic_strength(const struct magic *);
private int apprentice_sort(const void *, const void *);
+#ifndef COMPILE_ONLY
+private void apprentice_list(struct mlist *, int );
+#endif
private int apprentice_load(struct magic_set *, struct magic **, uint32_t *,
const char *, int);
#ifndef COMPILE_ONLY
@@ -322,6 +320,12 @@ apprentice_1(struct magic_set *ms, const char *fn, int action,
ml->next = mlist;
mlist->prev = ml;
+ if (action == FILE_LIST) {
+ printf("Binary patterns:\n");
+ apprentice_list(mlist, BINTEST);
+ printf("Text patterns:\n");
+ apprentice_list(mlist, TEXTTEST);
+ }
#endif /* COMPILE_ONLY */
return 0;
}
@@ -361,12 +365,10 @@ file_apprentice(struct magic_set *ms, const char *fn, int action)
int file_err, errs = -1;
struct mlist *mlist;
- init_file_tables();
+ if ((fn = magic_getpath(fn, action)) == NULL)
+ return NULL;
- if (fn == NULL)
- fn = getenv("MAGIC");
- if (fn == NULL)
- fn = MAGIC;
+ init_file_tables();
if ((mfn = strdup(fn)) == NULL) {
file_oomem(ms, strlen(fn));
@@ -558,6 +560,45 @@ apprentice_sort(const void *a, const void *b)
return 1;
}
+/*
+ * Shows sorted patterns list in the order which is used for the matching
+ */
+#ifndef COMPILE_ONLY
+private void
+apprentice_list(struct mlist *mlist, int mode)
+{
+ uint32_t magindex = 0;
+ struct mlist *ml;
+ for (ml = mlist->next; ml != mlist; ml = ml->next) {
+ for (magindex = 0; magindex < ml->nmagic; magindex++) {
+ struct magic *m = &ml->magic[magindex];
+ if ((m->flag & mode) != mode) {
+ /* Skip sub-tests */
+ while (magindex + 1 < ml->nmagic &&
+ ml->magic[magindex + 1].cont_level != 0)
+ ++magindex;
+ continue; /* Skip to next top-level test*/
+ }
+
+ /*
+ * Try to iterate over the tree until we find item with
+ * description/mimetype.
+ */
+ while (magindex + 1 < ml->nmagic &&
+ ml->magic[magindex + 1].cont_level != 0 &&
+ *ml->magic[magindex].desc == '\0' &&
+ *ml->magic[magindex].mimetype == '\0')
+ magindex++;
+
+ printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
+ apprentice_magic_strength(m),
+ ml->magic[magindex].desc,
+ ml->magic[magindex].mimetype);
+ }
+ }
+}
+#endif /* COMPILE_ONLY */
+
private void
set_test_type(struct magic *mstart, struct magic *m)
{
@@ -593,20 +634,36 @@ set_test_type(struct magic *mstart, struct magic *m)
case FILE_DOUBLE:
case FILE_BEDOUBLE:
case FILE_LEDOUBLE:
+ mstart->flag |= BINTEST;
+ break;
case FILE_STRING:
case FILE_PSTRING:
case FILE_BESTRING16:
case FILE_LESTRING16:
- /* binary test, set flag */
- mstart->flag |= BINTEST;
+ /* Allow text overrides */
+ if (mstart->str_flags & STRING_TEXTTEST)
+ mstart->flag |= TEXTTEST;
+ else
+ mstart->flag |= BINTEST;
break;
case FILE_REGEX:
case FILE_SEARCH:
#ifndef COMPILE_ONLY
+ /* Check for override */
+ if (mstart->str_flags & STRING_BINTEST)
+ mstart->flag |= BINTEST;
+ if (mstart->str_flags & STRING_TEXTTEST)
+ mstart->flag |= TEXTTEST;
+
+ if (mstart->flag & (TEXTTEST|BINTEST))
+ break;
+
/* binary test if pattern is not text */
if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
NULL) <= 0)
mstart->flag |= BINTEST;
+ else
+ mstart->flag |= TEXTTEST;
#endif
break;
case FILE_DEFAULT:
@@ -627,34 +684,38 @@ private void
load_1(struct magic_set *ms, int action, const char *fn, int *errs,
struct magic_entry **marray, uint32_t *marraycount)
{
- char line[BUFSIZ];
- size_t lineno = 0;
+ size_t lineno = 0, llen = 0;
+ char *line = NULL;
+ ssize_t len;
+
FILE *f = fopen(ms->file = fn, "r");
if (f == NULL) {
if (errno != ENOENT)
file_error(ms, errno, "cannot read magic file `%s'",
fn);
(*errs)++;
- } else {
- /* read and parse this file */
- for (ms->line = 1; fgets(line, sizeof(line), f) != NULL; ms->line++) {
- size_t len;
- len = strlen(line);
- if (len == 0) /* null line, garbage, etc */
- continue;
- if (line[len - 1] == '\n') {
- lineno++;
- line[len - 1] = '\0'; /* delete newline */
- }
- if (line[0] == '\0') /* empty, do not parse */
- continue;
- if (line[0] == '#') /* comment, do not parse */
- continue;
- if (line[0] == '!' && line[1] == ':') {
+ return;
+ }
+
+ /* read and parse this file */
+ for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
+ ms->line++) {
+ if (len == 0) /* null line, garbage, etc */
+ continue;
+ if (line[len - 1] == '\n') {
+ lineno++;
+ line[len - 1] = '\0'; /* delete newline */
+ }
+ switch (line[0]) {
+ case '\0': /* empty, do not parse */
+ case '#': /* comment, do not parse */
+ continue;
+ case '!':
+ if (line[1] == ':') {
size_t i;
for (i = 0; bang[i].name != NULL; i++) {
- if (len - 2 > bang[i].len &&
+ if ((size_t)(len - 2) > bang[i].len &&
memcmp(bang[i].name, line + 2,
bang[i].len) == 0)
break;
@@ -680,13 +741,16 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs,
}
continue;
}
+ /*FALLTHROUGH*/
+ default:
if (parse(ms, marray, marraycount, line, lineno,
action) != 0)
(*errs)++;
+ break;
}
-
- (void)fclose(f);
}
+ free(line);
+ (void)fclose(f);
}
/*
@@ -694,14 +758,20 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs,
* const char *fn: name of magic file or directory
*/
private int
+cmpstrp(const void *p1, const void *p2)
+{
+ return strcmp(*(char *const *)p1, *(char *const *)p2);
+}
+
+private int
apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
const char *fn, int action)
{
int errs = 0;
struct magic_entry *marray;
uint32_t marraycount, i, mentrycount = 0, starttest;
- size_t slen;
- char subfn[MAXPATHLEN];
+ size_t slen, files = 0, maxfiles = 0;
+ char **filearr = NULL, *mfn;
struct stat st;
DIR *dir;
struct dirent *d;
@@ -721,23 +791,47 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
(void)fprintf(stderr, "%s\n", usg_hdr);
/* load directory or file */
- /* FIXME: Read file names and sort them to prevent
- non-determinism. See Debian bug #488562. */
if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
dir = opendir(fn);
- if (dir) {
- while ((d = readdir(dir)) != NULL) {
- snprintf(subfn, sizeof(subfn), "%s/%s",
- fn, d->d_name);
- if (stat(subfn, &st) == 0 &&
- S_ISREG(st.st_mode)) {
- load_1(ms, action, subfn, &errs,
- &marray, &marraycount);
+ if (!dir) {
+ errs++;
+ goto out;
+ }
+ while ((d = readdir(dir)) != NULL) {
+ if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
+ file_oomem(ms,
+ strlen(fn) + strlen(d->d_name) + 2);
+ errs++;
+ closedir(dir);
+ goto out;
+ }
+ if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
+ free(mfn);
+ continue;
+ }
+ if (files >= maxfiles) {
+ size_t mlen;
+ maxfiles = (maxfiles + 1) * 2;
+ mlen = maxfiles * sizeof(*filearr);
+ if ((filearr = CAST(char **,
+ realloc(filearr, mlen))) == NULL) {
+ file_oomem(ms, mlen);
+ free(mfn);
+ closedir(dir);
+ errs++;
+ goto out;
}
}
- closedir(dir);
- } else
- errs++;
+ filearr[files++] = mfn;
+ }
+ closedir(dir);
+ qsort(filearr, files, sizeof(*filearr), cmpstrp);
+ for (i = 0; i < files; i++) {
+ load_1(ms, action, filearr[i], &errs, &marray,
+ &marraycount);
+ free(filearr[i]);
+ }
+ free(filearr);
} else
load_1(ms, action, fn, &errs, &marray, &marraycount);
if (errs)
@@ -789,7 +883,8 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
if (marray[i].mp->cont_level == 0)
break;
if (i != marraycount) {
- ms->line = marray[i].mp->lineno; /* XXX - Ugh! */
+ /* XXX - Ugh! */
+ ms->line = marray[i].mp->lineno;
file_magwarn(ms,
"level 0 \"default\" did not sort last");
}
@@ -905,6 +1000,11 @@ string_modifier_check(struct magic_set *ms, struct magic *m)
if ((ms->flags & MAGIC_CHECK) == 0)
return 0;
+ if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
+ file_magwarn(ms,
+ "'/BHhLl' modifiers are only allowed for pascal strings\n");
+ return -1;
+ }
switch (m->type) {
case FILE_BESTRING16:
case FILE_LESTRING16:
@@ -933,14 +1033,14 @@ string_modifier_check(struct magic_set *ms, struct magic *m)
}
break;
case FILE_REGEX:
- if ((m->str_flags & STRING_COMPACT_BLANK) != 0) {
+ if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
file_magwarn(ms, "'/%c' not allowed on regex\n",
- CHAR_COMPACT_BLANK);
+ CHAR_COMPACT_WHITESPACE);
return -1;
}
- if ((m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) != 0) {
+ if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
file_magwarn(ms, "'/%c' not allowed on regex\n",
- CHAR_COMPACT_OPTIONAL_BLANK);
+ CHAR_COMPACT_OPTIONAL_WHITESPACE);
return -1;
}
break;
@@ -1097,7 +1197,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
return -1;
}
me->mp = m = nm;
- me->max_count = cnt;
+ me->max_count = CAST(uint32_t, cnt);
}
m = &me->mp[me->cont_count++];
(void)memset(m, 0, sizeof(*m));
@@ -1133,7 +1233,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
m->cont_level = 0;
me->cont_count = 1;
}
- m->lineno = lineno;
+ m->lineno = CAST(uint32_t, lineno);
if (*l == '&') { /* m->cont_level == 0 checked below. */
++l; /* step over */
@@ -1281,8 +1381,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
++l;
}
m->str_range = 0;
- m->str_flags = 0;
- m->num_mask = 0;
+ m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
if ((op = get_op(*l)) != -1) {
if (!IS_STRING(m->type)) {
uint64_t val;
@@ -1306,18 +1405,20 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
file_magwarn(ms,
"multiple ranges");
have_range = 1;
- m->str_range = strtoul(l, &t, 0);
+ m->str_range = CAST(uint32_t,
+ strtoul(l, &t, 0));
if (m->str_range == 0)
file_magwarn(ms,
"zero range");
l = t - 1;
break;
- case CHAR_COMPACT_BLANK:
- m->str_flags |= STRING_COMPACT_BLANK;
+ case CHAR_COMPACT_WHITESPACE:
+ m->str_flags |=
+ STRING_COMPACT_WHITESPACE;
break;
- case CHAR_COMPACT_OPTIONAL_BLANK:
+ case CHAR_COMPACT_OPTIONAL_WHITESPACE:
m->str_flags |=
- STRING_COMPACT_OPTIONAL_BLANK;
+ STRING_COMPACT_OPTIONAL_WHITESPACE;
break;
case CHAR_IGNORE_LOWERCASE:
m->str_flags |= STRING_IGNORE_LOWERCASE;
@@ -1328,11 +1429,48 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp,
case CHAR_REGEX_OFFSET_START:
m->str_flags |= REGEX_OFFSET_START;
break;
+ case CHAR_BINTEST:
+ m->str_flags |= STRING_BINTEST;
+ break;
+ case CHAR_TEXTTEST:
+ m->str_flags |= STRING_TEXTTEST;
+ break;
+ case CHAR_PSTRING_1_LE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
+ break;
+ case CHAR_PSTRING_2_BE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
+ break;
+ case CHAR_PSTRING_2_LE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
+ break;
+ case CHAR_PSTRING_4_BE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
+ break;
+ case CHAR_PSTRING_4_LE:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
+ break;
+ case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
+ if (m->type != FILE_PSTRING)
+ goto bad;
+ m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
+ break;
default:
+ bad:
if (ms->flags & MAGIC_CHECK)
file_magwarn(ms,
- "string extension `%c' invalid",
- *l);
+ "string extension `%c' "
+ "invalid", *l);
return -1;
}
/* allow multiple '/' for readability */
@@ -1499,7 +1637,8 @@ out:
}
/*
- * Parse an Apple CREATOR/TYPE annotation from magic file and put it into magic[index - 1]
+ * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
+ * magic[index - 1]
*/
private int
parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
@@ -1509,19 +1648,21 @@ parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
if (m->apple[0] != '\0') {
- file_magwarn(ms, "Current entry already has a APPLE type `%.8s',"
- " new type `%s'", m->mimetype, l);
+ file_magwarn(ms, "Current entry already has a APPLE type "
+ "`%.8s', new type `%s'", m->mimetype, l);
return -1;
}
EATAB;
- for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
- || strchr("-+/.", *l)) && i < sizeof(m->apple); m->apple[i++] = *l++)
+ for (i = 0; *l && ((isascii((unsigned char)*l) &&
+ isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
+ i < sizeof(m->apple); m->apple[i++] = *l++)
continue;
if (i == sizeof(m->apple) && *l) {
+ /* We don't need to NUL terminate here, printing handles it */
if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "APPLE type `%s' truncated %zu",
- line, i);
+ file_magwarn(ms, "APPLE type `%s' truncated %"
+ SIZE_T_FORMAT "u", line, i);
}
if (i > 0)
@@ -1548,14 +1689,15 @@ parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
}
EATAB;
- for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l))
- || strchr("-+/.", *l)) && i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
+ for (i = 0; *l && ((isascii((unsigned char)*l) &&
+ isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
+ i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
continue;
if (i == sizeof(m->mimetype)) {
- m->desc[sizeof(m->mimetype) - 1] = '\0';
+ m->mimetype[sizeof(m->mimetype) - 1] = '\0';
if (ms->flags & MAGIC_CHECK)
- file_magwarn(ms, "MIME type `%s' truncated %zu",
- m->mimetype, i);
+ file_magwarn(ms, "MIME type `%s' truncated %"
+ SIZE_T_FORMAT "u", m->mimetype, i);
} else
m->mimetype[i] = '\0';
@@ -1844,8 +1986,10 @@ getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
if (isprint((unsigned char)c)) {
/* Allow escaping of
* ``relations'' */
- if (strchr("<>&^=!", c)
- == NULL) {
+ if (strchr("<>&^=!", c) == NULL
+ && (m->type != FILE_REGEX ||
+ strchr("[]().*?^$|{}", c)
+ == NULL)) {
file_magwarn(ms, "no "
"need to escape "
"`%c'", c);
@@ -1953,9 +2097,9 @@ getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
}
out:
*p = '\0';
- m->vallen = p - origp;
+ m->vallen = CAST(unsigned char, (p - origp));
if (m->type == FILE_PSTRING)
- m->vallen++;
+ m->vallen += (unsigned char)file_pstring_length_size(m);
return s;
}
@@ -1985,14 +2129,15 @@ file_showstr(FILE *fp, const char *s, size_t len)
char c;
for (;;) {
- c = *s++;
if (len == ~0U) {
+ c = *s++;
if (c == '\0')
break;
}
else {
if (len-- == 0)
break;
+ c = *s++;
}
if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
(void) fputc(c, fp);
@@ -2128,8 +2273,8 @@ apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
else
version = ptr[1];
if (version != VERSIONNO) {
- file_error(ms, 0, "File %d.%d supports only version %d magic "
- "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel,
+ file_error(ms, 0, "File %s supports only version %d magic "
+ "files. `%s' is version %d", VERSION,
VERSIONNO, dbname, version);
goto error1;
}
@@ -2171,7 +2316,7 @@ private int
apprentice_compile(struct magic_set *ms, struct magic **magicp,
uint32_t *nmagicp, const char *fn)
{
- int fd;
+ int fd = -1;
char *dbname;
int rv = -1;
@@ -2202,7 +2347,8 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp,
goto out;
}
- (void)close(fd);
+ if (fd != -1)
+ (void)close(fd);
rv = 0;
out:
free(dbname);
@@ -2347,3 +2493,51 @@ bs1(struct magic *m)
}
}
#endif /* COMPILE_ONLY */
+
+protected size_t
+file_pstring_length_size(const struct magic *m)
+{
+ switch (m->str_flags & PSTRING_LEN) {
+ case PSTRING_1_LE:
+ return 1;
+ case PSTRING_2_LE:
+ case PSTRING_2_BE:
+ return 2;
+ case PSTRING_4_LE:
+ case PSTRING_4_BE:
+ return 4;
+ default:
+ abort(); /* Impossible */
+ return 1;
+ }
+}
+protected size_t
+file_pstring_get_length(const struct magic *m, const char *s)
+{
+ size_t len = 0;
+
+ switch (m->str_flags & PSTRING_LEN) {
+ case PSTRING_1_LE:
+ len = *s;
+ break;
+ case PSTRING_2_LE:
+ len = (s[1] << 8) | s[0];
+ break;
+ case PSTRING_2_BE:
+ len = (s[0] << 8) | s[1];
+ break;
+ case PSTRING_4_LE:
+ len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
+ break;
+ case PSTRING_4_BE:
+ len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
+ break;
+ default:
+ abort(); /* Impossible */
+ }
+
+ if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
+ len -= file_pstring_length_size(m);
+
+ return len;
+}
OpenPOWER on IntegriCloud