diff options
author | obrien <obrien@FreeBSD.org> | 2012-04-19 03:20:13 +0000 |
---|---|---|
committer | obrien <obrien@FreeBSD.org> | 2012-04-19 03:20:13 +0000 |
commit | 248502d7f08de3125e3e3cc808446294565ba5df (patch) | |
tree | c7640efe490a887a5460d7a88d62aca990489d21 /contrib/file/ascmagic.c | |
parent | 86e3fa59a2a400ec95b0c84576fb00815631d92f (diff) | |
parent | 15f98df7891f1853090ecb6c4a9cc734e671ef6b (diff) | |
download | FreeBSD-src-248502d7f08de3125e3e3cc808446294565ba5df.zip FreeBSD-src-248502d7f08de3125e3e3cc808446294565ba5df.tar.gz |
Update file(1) to version 5.11.
Diffstat (limited to 'contrib/file/ascmagic.c')
-rw-r--r-- | contrib/file/ascmagic.c | 136 |
1 files changed, 53 insertions, 83 deletions
diff --git a/contrib/file/ascmagic.c b/contrib/file/ascmagic.c index 9236fb4..5a1caac 100644 --- a/contrib/file/ascmagic.c +++ b/contrib/file/ascmagic.c @@ -26,8 +26,7 @@ * SUCH DAMAGE. */ /* - * ASCII magic -- file types that we know based on keywords - * that can appear anywhere in the file. + * ASCII magic -- try to detect text encoding. * * Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000, * to handle character codes other than ASCII on a unified basis. @@ -36,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $") +FILE_RCSID("@(#)$File: ascmagic.c,v 1.84 2011/12/08 12:38:24 rrt Exp $") #endif /* lint */ #include "magic.h" @@ -47,13 +46,11 @@ FILE_RCSID("@(#)$File: ascmagic.c,v 1.75 2009/02/03 20:27:51 christos Exp $") #ifdef HAVE_UNISTD_H #include <unistd.h> #endif -#include "names.h" #define MAXLINELEN 300 /* longest sane line length */ #define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \ || (x) == 0x85 || (x) == '\f') -private int ascmatch(const unsigned char *, const unichar *, size_t); private unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t); private size_t trim_nuls(const unsigned char *, size_t); @@ -71,7 +68,8 @@ trim_nuls(const unsigned char *buf, size_t nbytes) } protected int -file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) +file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes, + int text) { unichar *ubuf = NULL; size_t ulen; @@ -88,17 +86,13 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) /* If file doesn't look like any sort of text, give up. */ if (file_encoding(ms, buf, nbytes, &ubuf, &ulen, &code, &code_mime, - &type) == 0) { + &type) == 0) rv = 0; - goto done; - } - - rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, - type); + else + rv = file_ascmagic_with_encoding(ms, buf, nbytes, ubuf, ulen, code, + type, text); - done: - if (ubuf) - free(ubuf); + free(ubuf); return rv; } @@ -106,11 +100,10 @@ file_ascmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes) protected int file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t ulen, const char *code, - const char *type) + const char *type, int text) { unsigned char *utf8_buf = NULL, *utf8_end; size_t mlen, i; - const struct names *p; int rv = -1; int mime = ms->flags & MAGIC_MIME; @@ -125,6 +118,7 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, int n_lf = 0; int n_cr = 0; int n_nel = 0; + int executable = 0; size_t last_line_end = (size_t)-1; int has_long_lines = 0; @@ -140,56 +134,23 @@ file_ascmagic_with_encoding(struct magic_set *ms, const unsigned char *buf, goto done; } - /* Convert ubuf to UTF-8 and try text soft magic */ - /* malloc size is a conservative overestimate; could be - improved, or at least realloced after conversion. */ - mlen = ulen * 6; - if ((utf8_buf = CAST(unsigned char *, malloc(mlen))) == NULL) { - file_oomem(ms, mlen); - goto done; - } - if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL) - goto done; - if ((rv = file_softmagic(ms, utf8_buf, (size_t)(utf8_end - utf8_buf), - TEXTTEST)) != 0) - goto done; - else - rv = -1; - - /* look for tokens from names.h - this is expensive! */ - if ((ms->flags & MAGIC_NO_CHECK_TOKENS) != 0) - goto subtype_identified; - - i = 0; - while (i < ulen) { - size_t end; - - /* skip past any leading space */ - while (i < ulen && ISSPC(ubuf[i])) - i++; - if (i >= ulen) - break; - - /* find the next whitespace */ - for (end = i + 1; end < nbytes; end++) - if (ISSPC(ubuf[end])) - break; - - /* compare the word thus isolated against the token list */ - for (p = names; p < names + NNAMES; p++) { - if (ascmatch((const unsigned char *)p->name, ubuf + i, - end - i)) { - subtype = types[p->type].human; - subtype_mime = types[p->type].mime; - goto subtype_identified; - } + if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) { + /* Convert ubuf to UTF-8 and try text soft magic */ + /* malloc size is a conservative overestimate; could be + improved, or at least realloced after conversion. */ + mlen = ulen * 6; + if ((utf8_buf = CAST(unsigned char *, malloc(mlen))) == NULL) { + file_oomem(ms, mlen); + goto done; } - - i = end; + if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) + == NULL) + goto done; + if ((rv = file_softmagic(ms, utf8_buf, + (size_t)(utf8_end - utf8_buf), TEXTTEST, text)) == 0) + rv = -1; } -subtype_identified: - /* Now try to discover other details about the file. */ for (i = 0; i < ulen; i++) { if (ubuf[i] == '\n') { @@ -232,7 +193,7 @@ subtype_identified: goto done; } if (mime) { - if ((mime & MAGIC_MIME_TYPE) != 0) { + if (!file_printedlen(ms) && (mime & MAGIC_MIME_TYPE) != 0) { if (subtype_mime) { if (file_printf(ms, "%s", subtype_mime) == -1) goto done; @@ -242,6 +203,28 @@ subtype_identified: } } } else { + if (file_printedlen(ms)) { + switch (file_replace(ms, " text$", ", ")) { + case 0: + switch (file_replace(ms, " text executable$", + ", ")) { + case 0: + if (file_printf(ms, ", ") == -1) + goto done; + case -1: + goto done; + default: + executable = 1; + break; + } + break; + case -1: + goto done; + default: + break; + } + } + if (file_printf(ms, "%s", code) == -1) goto done; @@ -253,6 +236,10 @@ subtype_identified: if (file_printf(ms, " %s", type) == -1) goto done; + if (executable) + if (file_printf(ms, " executable") == -1) + goto done; + if (has_long_lines) if (file_printf(ms, ", with very long lines") == -1) goto done; @@ -309,28 +296,11 @@ subtype_identified: } rv = 1; done: - if (utf8_buf) - free(utf8_buf); + free(utf8_buf); return rv; } -private int -ascmatch(const unsigned char *s, const unichar *us, size_t ulen) -{ - size_t i; - - for (i = 0; i < ulen; i++) { - if (s[i] != us[i]) - return 0; - } - - if (s[i]) - return 0; - else - return 1; -} - /* * Encode Unicode string as UTF-8, returning pointer to character * after end of string, or NULL if an invalid character is found. |