diff options
author | delphij <delphij@FreeBSD.org> | 2015-06-24 19:58:14 +0000 |
---|---|---|
committer | delphij <delphij@FreeBSD.org> | 2015-06-24 19:58:14 +0000 |
commit | 80cac6ebcb3bd864730df83cf322cfa03207c067 (patch) | |
tree | e17bc37b90bc59b4b56ce77a4119bc361c3331b2 /contrib/file/src/encoding.c | |
parent | 5d11dcc72032e3027520c3aa2ffb5905115760e7 (diff) | |
download | FreeBSD-src-80cac6ebcb3bd864730df83cf322cfa03207c067.zip FreeBSD-src-80cac6ebcb3bd864730df83cf322cfa03207c067.tar.gz |
MFC r284237,284277:
file 5.23.
Relnotes: yes
Diffstat (limited to 'contrib/file/src/encoding.c')
-rw-r--r-- | contrib/file/src/encoding.c | 38 |
1 files changed, 32 insertions, 6 deletions
diff --git a/contrib/file/src/encoding.c b/contrib/file/src/encoding.c index c1b23cc..3c116cd 100644 --- a/contrib/file/src/encoding.c +++ b/contrib/file/src/encoding.c @@ -35,7 +35,7 @@ #include "file.h" #ifndef lint -FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $") +FILE_RCSID("@(#)$File: encoding.c,v 1.13 2015/06/04 19:16:28 christos Exp $") #endif /* lint */ #include "magic.h" @@ -47,6 +47,7 @@ FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $") private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *); private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *, size_t *); +private int looks_utf7(const unsigned char *, size_t, unichar *, size_t *); private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *); private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); @@ -88,9 +89,15 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni } if (looks_ascii(buf, nbytes, *ubuf, ulen)) { - DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); - *code = "ASCII"; - *code_mime = "us-ascii"; + if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) { + DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen)); + *code = "UTF-7 Unicode"; + *code_mime = "utf-7"; + } else { + DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); + *code = "ASCII"; + *code_mime = "us-ascii"; + } } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen)); *code = "UTF-8 Unicode (with BOM)"; @@ -199,8 +206,8 @@ file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, uni #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ private char text_chars[256] = { - /* BEL BS HT LF FF CR */ - F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ + /* BEL BS HT LF VT FF CR */ + F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F, /* 0x0X */ /* ESC */ F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ @@ -372,6 +379,25 @@ looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf, } private int +looks_utf7(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) +{ + if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v') + switch (buf[3]) { + case '8': + case '9': + case '+': + case '/': + if (ubuf) + *ulen = 0; + return 1; + default: + return -1; + } + else + return -1; +} + +private int looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) { |