summaryrefslogtreecommitdiffstats
path: root/encoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'encoding.c')
-rw-r--r--encoding.c19
1 files changed, 18 insertions, 1 deletions
diff --git a/encoding.c b/encoding.c
index 4e94f9b..097673a 100644
--- a/encoding.c
+++ b/encoding.c
@@ -35,7 +35,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.3 2009/02/03 20:27:51 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.5 2010/07/21 16:47:17 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -52,6 +52,12 @@ private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
+#ifdef DEBUG_ENCODING
+#define DPRINTF(a) printf a
+#else
+#define DPRINTF(a)
+#endif
+
/*
* Try to determine whether text is in some character code we can
* identify. Each of these tests, if it succeeds, will leave
@@ -78,12 +84,16 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
*type = "text";
if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
*code = "ASCII";
*code_mime = "us-ascii";
} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
+ DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
*code = "UTF-8 Unicode (with BOM)";
*code_mime = "utf-8";
} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
+ DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen));
+ *code = "UTF-8 Unicode (with BOM)";
*code = "UTF-8 Unicode";
*code_mime = "utf-8";
} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
@@ -94,22 +104,29 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
*code = "Big-endian UTF-16 Unicode";
*code_mime = "utf-16be";
}
+ DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen));
} else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen));
*code = "ISO-8859";
*code_mime = "iso-8859-1";
} else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen));
*code = "Non-ISO extended-ASCII";
*code_mime = "unknown-8bit";
} else {
from_ebcdic(buf, nbytes, nbuf);
if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen));
*code = "EBCDIC";
*code_mime = "ebcdic";
} else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
+ DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n",
+ *ulen));
*code = "International EBCDIC";
*code_mime = "ebcdic";
} else { /* Doesn't look like text at all */
+ DPRINTF(("binary\n"));
rv = 0;
*type = "binary";
}
OpenPOWER on IntegriCloud