diff options
author | obrien <obrien@FreeBSD.org> | 2011-10-06 06:01:12 +0000 |
---|---|---|
committer | obrien <obrien@FreeBSD.org> | 2011-10-06 06:01:12 +0000 |
commit | 75c49f9dd6a0ff710f7c791a485899c7a07af444 (patch) | |
tree | d25590ff6bfc3386fbca9494d26b8761e3d33410 /encoding.c | |
parent | 862d9405b857dba35f8f2eb6d0623b9a552d4353 (diff) | |
download | FreeBSD-src-75c49f9dd6a0ff710f7c791a485899c7a07af444.zip FreeBSD-src-75c49f9dd6a0ff710f7c791a485899c7a07af444.tar.gz |
Virgin import of Christos Zoulas's FILE 5.09.
Diffstat (limited to 'encoding.c')
-rw-r--r-- | encoding.c | 19 |
1 files changed, 18 insertions, 1 deletions
@@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $") +FILE_RCSID("@(#)$File: encoding.c,v 1.5 2010/07/21 16:47:17 christos Exp $") #endif /* lint */ #include "magic.h" @@ -52,6 +52,12 @@ private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); private void from_ebcdic(const unsigned char *, size_t, unsigned char *); +#ifdef DEBUG_ENCODING +#define DPRINTF(a) printf a +#else +#define DPRINTF(a) +#endif + /* * Try to determine whether text is in some character code we can * identify. Each of these tests, if it succeeds, will leave @@ -78,12 +84,16 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni *type = "text"; if (looks_ascii(buf, nbytes, *ubuf, ulen)) { + DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); *code = "ASCII"; *code_mime = "us-ascii"; } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { + DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen)); *code = "UTF-8 Unicode (with BOM)"; *code_mime = "utf-8"; } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) { + DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen)); + *code = "UTF-8 Unicode (with BOM)"; *code = "UTF-8 Unicode"; *code_mime = "utf-8"; } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) { @@ -94,22 +104,29 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni *code = "Big-endian UTF-16 Unicode"; *code_mime = "utf-16be"; } + DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen)); } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) { + DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen)); *code = "ISO-8859"; *code_mime = "iso-8859-1"; } else if (looks_extended(buf, nbytes, *ubuf, ulen)) { + DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen)); *code = "Non-ISO extended-ASCII"; *code_mime = "unknown-8bit"; } else { from_ebcdic(buf, nbytes, nbuf); if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) { + DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen)); *code = "EBCDIC"; *code_mime = "ebcdic"; } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) { + DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n", + *ulen)); *code = "International EBCDIC"; *code_mime = "ebcdic"; } else { /* Doesn't look like text at all */ + DPRINTF(("binary\n")); rv = 0; *type = "binary"; } |