summaryrefslogtreecommitdiffstats
path: root/contrib/file/src/encoding.c
diff options
context:
space:
mode:
authordelphij <delphij@FreeBSD.org>2015-06-24 19:58:14 +0000
committerdelphij <delphij@FreeBSD.org>2015-06-24 19:58:14 +0000
commit80cac6ebcb3bd864730df83cf322cfa03207c067 (patch)
treee17bc37b90bc59b4b56ce77a4119bc361c3331b2 /contrib/file/src/encoding.c
parent5d11dcc72032e3027520c3aa2ffb5905115760e7 (diff)
downloadFreeBSD-src-80cac6ebcb3bd864730df83cf322cfa03207c067.zip
FreeBSD-src-80cac6ebcb3bd864730df83cf322cfa03207c067.tar.gz
MFC r284237,284277:
file 5.23. Relnotes: yes
Diffstat (limited to 'contrib/file/src/encoding.c')
-rw-r--r--contrib/file/src/encoding.c38
1 files changed, 32 insertions, 6 deletions
diff --git a/contrib/file/src/encoding.c b/contrib/file/src/encoding.c
index c1b23cc..3c116cd 100644
--- a/contrib/file/src/encoding.c
+++ b/contrib/file/src/encoding.c
@@ -35,7 +35,7 @@
#include "file.h"
#ifndef lint
-FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $")
+FILE_RCSID("@(#)$File: encoding.c,v 1.13 2015/06/04 19:16:28 christos Exp $")
#endif /* lint */
#include "magic.h"
@@ -47,6 +47,7 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $")
private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
size_t *);
+private int looks_utf7(const unsigned char *, size_t, unichar *, size_t *);
private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
@@ -88,9 +89,15 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
}
if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
- DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
- *code = "ASCII";
- *code_mime = "us-ascii";
+ if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) {
+ DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen));
+ *code = "UTF-7 Unicode";
+ *code_mime = "utf-7";
+ } else {
+ DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
+ *code = "ASCII";
+ *code_mime = "us-ascii";
+ }
} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
*code = "UTF-8 Unicode (with BOM)";
@@ -199,8 +206,8 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni
#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
private char text_chars[256] = {
- /* BEL BS HT LF FF CR */
- F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
+ /* BEL BS HT LF VT FF CR */
+ F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F, /* 0x0X */
/* ESC */
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
@@ -372,6 +379,25 @@ looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
}
private int
+looks_utf7(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
+{
+ if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v')
+ switch (buf[3]) {
+ case '8':
+ case '9':
+ case '+':
+ case '/':
+ if (ubuf)
+ *ulen = 0;
+ return 1;
+ default:
+ return -1;
+ }
+ else
+ return -1;
+}
+
+private int
looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
OpenPOWER on IntegriCloud