diff options
author | obrien <obrien@FreeBSD.org> | 2012-04-19 03:20:13 +0000 |
---|---|---|
committer | obrien <obrien@FreeBSD.org> | 2012-04-19 03:20:13 +0000 |
commit | 248502d7f08de3125e3e3cc808446294565ba5df (patch) | |
tree | c7640efe490a887a5460d7a88d62aca990489d21 /contrib/file/apprentice.c | |
parent | 86e3fa59a2a400ec95b0c84576fb00815631d92f (diff) | |
parent | 15f98df7891f1853090ecb6c4a9cc734e671ef6b (diff) | |
download | FreeBSD-src-248502d7f08de3125e3e3cc808446294565ba5df.zip FreeBSD-src-248502d7f08de3125e3e3cc808446294565ba5df.tar.gz |
Update file(1) to version 5.11.
Diffstat (limited to 'contrib/file/apprentice.c')
-rw-r--r-- | contrib/file/apprentice.c | 366 |
1 files changed, 280 insertions, 86 deletions
diff --git a/contrib/file/apprentice.c b/contrib/file/apprentice.c index 3827e5c..a9d4d17 100644 --- a/contrib/file/apprentice.c +++ b/contrib/file/apprentice.c @@ -32,11 +32,10 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: apprentice.c,v 1.151 2009/03/18 15:19:23 christos Exp $") +FILE_RCSID("@(#)$File: apprentice.c,v 1.173 2011/12/08 12:38:24 rrt Exp $") #endif /* lint */ #include "magic.h" -#include "patchlevel.h" #include <stdlib.h> #ifdef HAVE_UNISTD_H #include <unistd.h> @@ -72,10 +71,6 @@ FILE_RCSID("@(#)$File: apprentice.c,v 1.151 2009/03/18 15:19:23 christos Exp $") #define MAP_FILE 0 #endif -#ifndef MAXPATHLEN -#define MAXPATHLEN 1024 -#endif - struct magic_entry { struct magic *mp; uint32_t cont_count; @@ -97,6 +92,9 @@ private void eatsize(const char **); private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); private size_t apprentice_magic_strength(const struct magic *); private int apprentice_sort(const void *, const void *); +#ifndef COMPILE_ONLY +private void apprentice_list(struct mlist *, int ); +#endif private int apprentice_load(struct magic_set *, struct magic **, uint32_t *, const char *, int); #ifndef COMPILE_ONLY @@ -322,6 +320,12 @@ apprentice_1(struct magic_set *ms, const char *fn, int action, ml->next = mlist; mlist->prev = ml; + if (action == FILE_LIST) { + printf("Binary patterns:\n"); + apprentice_list(mlist, BINTEST); + printf("Text patterns:\n"); + apprentice_list(mlist, TEXTTEST); + } #endif /* COMPILE_ONLY */ return 0; } @@ -361,12 +365,10 @@ file_apprentice(struct magic_set *ms, const char *fn, int action) int file_err, errs = -1; struct mlist *mlist; - init_file_tables(); + if ((fn = magic_getpath(fn, action)) == NULL) + return NULL; - if (fn == NULL) - fn = getenv("MAGIC"); - if (fn == NULL) - fn = MAGIC; + init_file_tables(); if ((mfn = strdup(fn)) == NULL) { file_oomem(ms, strlen(fn)); @@ -558,6 +560,45 @@ apprentice_sort(const void *a, const void *b) return 1; } +/* + * Shows sorted patterns list in the order which is used for the matching + */ +#ifndef COMPILE_ONLY +private void +apprentice_list(struct mlist *mlist, int mode) +{ + uint32_t magindex = 0; + struct mlist *ml; + for (ml = mlist->next; ml != mlist; ml = ml->next) { + for (magindex = 0; magindex < ml->nmagic; magindex++) { + struct magic *m = &ml->magic[magindex]; + if ((m->flag & mode) != mode) { + /* Skip sub-tests */ + while (magindex + 1 < ml->nmagic && + ml->magic[magindex + 1].cont_level != 0) + ++magindex; + continue; /* Skip to next top-level test*/ + } + + /* + * Try to iterate over the tree until we find item with + * description/mimetype. + */ + while (magindex + 1 < ml->nmagic && + ml->magic[magindex + 1].cont_level != 0 && + *ml->magic[magindex].desc == '\0' && + *ml->magic[magindex].mimetype == '\0') + magindex++; + + printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n", + apprentice_magic_strength(m), + ml->magic[magindex].desc, + ml->magic[magindex].mimetype); + } + } +} +#endif /* COMPILE_ONLY */ + private void set_test_type(struct magic *mstart, struct magic *m) { @@ -593,20 +634,36 @@ set_test_type(struct magic *mstart, struct magic *m) case FILE_DOUBLE: case FILE_BEDOUBLE: case FILE_LEDOUBLE: + mstart->flag |= BINTEST; + break; case FILE_STRING: case FILE_PSTRING: case FILE_BESTRING16: case FILE_LESTRING16: - /* binary test, set flag */ - mstart->flag |= BINTEST; + /* Allow text overrides */ + if (mstart->str_flags & STRING_TEXTTEST) + mstart->flag |= TEXTTEST; + else + mstart->flag |= BINTEST; break; case FILE_REGEX: case FILE_SEARCH: #ifndef COMPILE_ONLY + /* Check for override */ + if (mstart->str_flags & STRING_BINTEST) + mstart->flag |= BINTEST; + if (mstart->str_flags & STRING_TEXTTEST) + mstart->flag |= TEXTTEST; + + if (mstart->flag & (TEXTTEST|BINTEST)) + break; + /* binary test if pattern is not text */ if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL, NULL) <= 0) mstart->flag |= BINTEST; + else + mstart->flag |= TEXTTEST; #endif break; case FILE_DEFAULT: @@ -627,34 +684,38 @@ private void load_1(struct magic_set *ms, int action, const char *fn, int *errs, struct magic_entry **marray, uint32_t *marraycount) { - char line[BUFSIZ]; - size_t lineno = 0; + size_t lineno = 0, llen = 0; + char *line = NULL; + ssize_t len; + FILE *f = fopen(ms->file = fn, "r"); if (f == NULL) { if (errno != ENOENT) file_error(ms, errno, "cannot read magic file `%s'", fn); (*errs)++; - } else { - /* read and parse this file */ - for (ms->line = 1; fgets(line, sizeof(line), f) != NULL; ms->line++) { - size_t len; - len = strlen(line); - if (len == 0) /* null line, garbage, etc */ - continue; - if (line[len - 1] == '\n') { - lineno++; - line[len - 1] = '\0'; /* delete newline */ - } - if (line[0] == '\0') /* empty, do not parse */ - continue; - if (line[0] == '#') /* comment, do not parse */ - continue; - if (line[0] == '!' && line[1] == ':') { + return; + } + + /* read and parse this file */ + for (ms->line = 1; (len = getline(&line, &llen, f)) != -1; + ms->line++) { + if (len == 0) /* null line, garbage, etc */ + continue; + if (line[len - 1] == '\n') { + lineno++; + line[len - 1] = '\0'; /* delete newline */ + } + switch (line[0]) { + case '\0': /* empty, do not parse */ + case '#': /* comment, do not parse */ + continue; + case '!': + if (line[1] == ':') { size_t i; for (i = 0; bang[i].name != NULL; i++) { - if (len - 2 > bang[i].len && + if ((size_t)(len - 2) > bang[i].len && memcmp(bang[i].name, line + 2, bang[i].len) == 0) break; @@ -680,13 +741,16 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs, } continue; } + /*FALLTHROUGH*/ + default: if (parse(ms, marray, marraycount, line, lineno, action) != 0) (*errs)++; + break; } - - (void)fclose(f); } + free(line); + (void)fclose(f); } /* @@ -694,14 +758,20 @@ load_1(struct magic_set *ms, int action, const char *fn, int *errs, * const char *fn: name of magic file or directory */ private int +cmpstrp(const void *p1, const void *p2) +{ + return strcmp(*(char *const *)p1, *(char *const *)p2); +} + +private int apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, const char *fn, int action) { int errs = 0; struct magic_entry *marray; uint32_t marraycount, i, mentrycount = 0, starttest; - size_t slen; - char subfn[MAXPATHLEN]; + size_t slen, files = 0, maxfiles = 0; + char **filearr = NULL, *mfn; struct stat st; DIR *dir; struct dirent *d; @@ -721,23 +791,47 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, (void)fprintf(stderr, "%s\n", usg_hdr); /* load directory or file */ - /* FIXME: Read file names and sort them to prevent - non-determinism. See Debian bug #488562. */ if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) { dir = opendir(fn); - if (dir) { - while ((d = readdir(dir)) != NULL) { - snprintf(subfn, sizeof(subfn), "%s/%s", - fn, d->d_name); - if (stat(subfn, &st) == 0 && - S_ISREG(st.st_mode)) { - load_1(ms, action, subfn, &errs, - &marray, &marraycount); + if (!dir) { + errs++; + goto out; + } + while ((d = readdir(dir)) != NULL) { + if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) { + file_oomem(ms, + strlen(fn) + strlen(d->d_name) + 2); + errs++; + closedir(dir); + goto out; + } + if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) { + free(mfn); + continue; + } + if (files >= maxfiles) { + size_t mlen; + maxfiles = (maxfiles + 1) * 2; + mlen = maxfiles * sizeof(*filearr); + if ((filearr = CAST(char **, + realloc(filearr, mlen))) == NULL) { + file_oomem(ms, mlen); + free(mfn); + closedir(dir); + errs++; + goto out; } } - closedir(dir); - } else - errs++; + filearr[files++] = mfn; + } + closedir(dir); + qsort(filearr, files, sizeof(*filearr), cmpstrp); + for (i = 0; i < files; i++) { + load_1(ms, action, filearr[i], &errs, &marray, + &marraycount); + free(filearr[i]); + } + free(filearr); } else load_1(ms, action, fn, &errs, &marray, &marraycount); if (errs) @@ -789,7 +883,8 @@ apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, if (marray[i].mp->cont_level == 0) break; if (i != marraycount) { - ms->line = marray[i].mp->lineno; /* XXX - Ugh! */ + /* XXX - Ugh! */ + ms->line = marray[i].mp->lineno; file_magwarn(ms, "level 0 \"default\" did not sort last"); } @@ -905,6 +1000,11 @@ string_modifier_check(struct magic_set *ms, struct magic *m) if ((ms->flags & MAGIC_CHECK) == 0) return 0; + if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) { + file_magwarn(ms, + "'/BHhLl' modifiers are only allowed for pascal strings\n"); + return -1; + } switch (m->type) { case FILE_BESTRING16: case FILE_LESTRING16: @@ -933,14 +1033,14 @@ string_modifier_check(struct magic_set *ms, struct magic *m) } break; case FILE_REGEX: - if ((m->str_flags & STRING_COMPACT_BLANK) != 0) { + if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) { file_magwarn(ms, "'/%c' not allowed on regex\n", - CHAR_COMPACT_BLANK); + CHAR_COMPACT_WHITESPACE); return -1; } - if ((m->str_flags & STRING_COMPACT_OPTIONAL_BLANK) != 0) { + if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) { file_magwarn(ms, "'/%c' not allowed on regex\n", - CHAR_COMPACT_OPTIONAL_BLANK); + CHAR_COMPACT_OPTIONAL_WHITESPACE); return -1; } break; @@ -1097,7 +1197,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, return -1; } me->mp = m = nm; - me->max_count = cnt; + me->max_count = CAST(uint32_t, cnt); } m = &me->mp[me->cont_count++]; (void)memset(m, 0, sizeof(*m)); @@ -1133,7 +1233,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, m->cont_level = 0; me->cont_count = 1; } - m->lineno = lineno; + m->lineno = CAST(uint32_t, lineno); if (*l == '&') { /* m->cont_level == 0 checked below. */ ++l; /* step over */ @@ -1281,8 +1381,7 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, ++l; } m->str_range = 0; - m->str_flags = 0; - m->num_mask = 0; + m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0; if ((op = get_op(*l)) != -1) { if (!IS_STRING(m->type)) { uint64_t val; @@ -1306,18 +1405,20 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, file_magwarn(ms, "multiple ranges"); have_range = 1; - m->str_range = strtoul(l, &t, 0); + m->str_range = CAST(uint32_t, + strtoul(l, &t, 0)); if (m->str_range == 0) file_magwarn(ms, "zero range"); l = t - 1; break; - case CHAR_COMPACT_BLANK: - m->str_flags |= STRING_COMPACT_BLANK; + case CHAR_COMPACT_WHITESPACE: + m->str_flags |= + STRING_COMPACT_WHITESPACE; break; - case CHAR_COMPACT_OPTIONAL_BLANK: + case CHAR_COMPACT_OPTIONAL_WHITESPACE: m->str_flags |= - STRING_COMPACT_OPTIONAL_BLANK; + STRING_COMPACT_OPTIONAL_WHITESPACE; break; case CHAR_IGNORE_LOWERCASE: m->str_flags |= STRING_IGNORE_LOWERCASE; @@ -1328,11 +1429,48 @@ parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, case CHAR_REGEX_OFFSET_START: m->str_flags |= REGEX_OFFSET_START; break; + case CHAR_BINTEST: + m->str_flags |= STRING_BINTEST; + break; + case CHAR_TEXTTEST: + m->str_flags |= STRING_TEXTTEST; + break; + case CHAR_PSTRING_1_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE; + break; + case CHAR_PSTRING_2_BE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE; + break; + case CHAR_PSTRING_2_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE; + break; + case CHAR_PSTRING_4_BE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE; + break; + case CHAR_PSTRING_4_LE: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE; + break; + case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF: + if (m->type != FILE_PSTRING) + goto bad; + m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF; + break; default: + bad: if (ms->flags & MAGIC_CHECK) file_magwarn(ms, - "string extension `%c' invalid", - *l); + "string extension `%c' " + "invalid", *l); return -1; } /* allow multiple '/' for readability */ @@ -1499,7 +1637,8 @@ out: } /* - * Parse an Apple CREATOR/TYPE annotation from magic file and put it into magic[index - 1] + * Parse an Apple CREATOR/TYPE annotation from magic file and put it into + * magic[index - 1] */ private int parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) @@ -1509,19 +1648,21 @@ parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line) struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1]; if (m->apple[0] != '\0') { - file_magwarn(ms, "Current entry already has a APPLE type `%.8s'," - " new type `%s'", m->mimetype, l); + file_magwarn(ms, "Current entry already has a APPLE type " + "`%.8s', new type `%s'", m->mimetype, l); return -1; } EATAB; - for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l)) - || strchr("-+/.", *l)) && i < sizeof(m->apple); m->apple[i++] = *l++) + for (i = 0; *l && ((isascii((unsigned char)*l) && + isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && + i < sizeof(m->apple); m->apple[i++] = *l++) continue; if (i == sizeof(m->apple) && *l) { + /* We don't need to NUL terminate here, printing handles it */ if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "APPLE type `%s' truncated %zu", - line, i); + file_magwarn(ms, "APPLE type `%s' truncated %" + SIZE_T_FORMAT "u", line, i); } if (i > 0) @@ -1548,14 +1689,15 @@ parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line) } EATAB; - for (i = 0; *l && ((isascii((unsigned char)*l) && isalnum((unsigned char)*l)) - || strchr("-+/.", *l)) && i < sizeof(m->mimetype); m->mimetype[i++] = *l++) + for (i = 0; *l && ((isascii((unsigned char)*l) && + isalnum((unsigned char)*l)) || strchr("-+/.", *l)) && + i < sizeof(m->mimetype); m->mimetype[i++] = *l++) continue; if (i == sizeof(m->mimetype)) { - m->desc[sizeof(m->mimetype) - 1] = '\0'; + m->mimetype[sizeof(m->mimetype) - 1] = '\0'; if (ms->flags & MAGIC_CHECK) - file_magwarn(ms, "MIME type `%s' truncated %zu", - m->mimetype, i); + file_magwarn(ms, "MIME type `%s' truncated %" + SIZE_T_FORMAT "u", m->mimetype, i); } else m->mimetype[i] = '\0'; @@ -1844,8 +1986,10 @@ getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) if (isprint((unsigned char)c)) { /* Allow escaping of * ``relations'' */ - if (strchr("<>&^=!", c) - == NULL) { + if (strchr("<>&^=!", c) == NULL + && (m->type != FILE_REGEX || + strchr("[]().*?^$|{}", c) + == NULL)) { file_magwarn(ms, "no " "need to escape " "`%c'", c); @@ -1953,9 +2097,9 @@ getstr(struct magic_set *ms, struct magic *m, const char *s, int warn) } out: *p = '\0'; - m->vallen = p - origp; + m->vallen = CAST(unsigned char, (p - origp)); if (m->type == FILE_PSTRING) - m->vallen++; + m->vallen += (unsigned char)file_pstring_length_size(m); return s; } @@ -1985,14 +2129,15 @@ file_showstr(FILE *fp, const char *s, size_t len) char c; for (;;) { - c = *s++; if (len == ~0U) { + c = *s++; if (c == '\0') break; } else { if (len-- == 0) break; + c = *s++; } if (c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */ (void) fputc(c, fp); @@ -2128,8 +2273,8 @@ apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, else version = ptr[1]; if (version != VERSIONNO) { - file_error(ms, 0, "File %d.%d supports only version %d magic " - "files. `%s' is version %d", FILE_VERSION_MAJOR, patchlevel, + file_error(ms, 0, "File %s supports only version %d magic " + "files. `%s' is version %d", VERSION, VERSIONNO, dbname, version); goto error1; } @@ -2171,7 +2316,7 @@ private int apprentice_compile(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, const char *fn) { - int fd; + int fd = -1; char *dbname; int rv = -1; @@ -2202,7 +2347,8 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp, goto out; } - (void)close(fd); + if (fd != -1) + (void)close(fd); rv = 0; out: free(dbname); @@ -2347,3 +2493,51 @@ bs1(struct magic *m) } } #endif /* COMPILE_ONLY */ + +protected size_t +file_pstring_length_size(const struct magic *m) +{ + switch (m->str_flags & PSTRING_LEN) { + case PSTRING_1_LE: + return 1; + case PSTRING_2_LE: + case PSTRING_2_BE: + return 2; + case PSTRING_4_LE: + case PSTRING_4_BE: + return 4; + default: + abort(); /* Impossible */ + return 1; + } +} +protected size_t +file_pstring_get_length(const struct magic *m, const char *s) +{ + size_t len = 0; + + switch (m->str_flags & PSTRING_LEN) { + case PSTRING_1_LE: + len = *s; + break; + case PSTRING_2_LE: + len = (s[1] << 8) | s[0]; + break; + case PSTRING_2_BE: + len = (s[0] << 8) | s[1]; + break; + case PSTRING_4_LE: + len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0]; + break; + case PSTRING_4_BE: + len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3]; + break; + default: + abort(); /* Impossible */ + } + + if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) + len -= file_pstring_length_size(m); + + return len; +} |