diff options
Diffstat (limited to 'contrib/file/apprentice.c')
-rw-r--r-- | contrib/file/apprentice.c | 307 |
1 files changed, 243 insertions, 64 deletions
diff --git a/contrib/file/apprentice.c b/contrib/file/apprentice.c index 22e2d84..4432a56 100644 --- a/contrib/file/apprentice.c +++ b/contrib/file/apprentice.c @@ -45,7 +45,7 @@ #endif #ifndef lint -FILE_RCSID("@(#)$Id: apprentice.c,v 1.82 2004/11/24 18:56:04 christos Exp $") +FILE_RCSID("@(#)$Id: apprentice.c,v 1.87 2006/03/02 22:08:57 christos Exp $") #endif /* lint */ #define EATAB {while (isascii((unsigned char) *l) && \ @@ -74,15 +74,28 @@ FILE_RCSID("@(#)$Id: apprentice.c,v 1.82 2004/11/24 18:56:04 christos Exp $") #define MAXPATHLEN 1024 #endif -#define IS_STRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \ +#define IS_PLAINSTRING(t) ((t) == FILE_STRING || (t) == FILE_PSTRING || \ (t) == FILE_BESTRING16 || (t) == FILE_LESTRING16) + +#define IS_STRING(t) (IS_PLAINSTRING(t) || (t) == FILE_REGEX || \ + (t) == FILE_SEARCH) + +struct magic_entry { + struct magic *mp; + uint32_t cont_count; + uint32_t max_count; +}; -private int getvalue(struct magic_set *ms, struct magic *, char **); +private int getvalue(struct magic_set *ms, struct magic *, const char **); private int hextoint(int); -private char *getstr(struct magic_set *, char *, char *, int, int *); -private int parse(struct magic_set *, struct magic **, uint32_t *, char *, int); -private void eatsize(char **); +private const char *getstr(struct magic_set *, const char *, char *, int, + int *); +private int parse(struct magic_set *, struct magic_entry **, uint32_t *, + const char *, int); +private void eatsize(const char **); private int apprentice_1(struct magic_set *, const char *, int, struct mlist *); +private size_t apprentice_magic_strength(const struct magic *); +private int apprentice_sort(const void *, const void *); private int apprentice_file(struct magic_set *, struct magic **, uint32_t *, const char *, int); private void byteswap(struct magic *, uint32_t); @@ -99,6 +112,7 @@ private int check_format(struct magic_set *, struct magic *); private size_t maxmagic = 0; private size_t magicsize = sizeof(struct magic); + #ifdef COMPILE_ONLY int main(int, char *[]); @@ -161,6 +175,7 @@ apprentice_1(struct magic_set *ms, const char *fn, int action, free(magic); return rv; } + #ifndef COMPILE_ONLY if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) { if (ms->flags & MAGIC_CHECK) @@ -283,6 +298,64 @@ file_apprentice(struct magic_set *ms, const char *fn, int action) return mlist; } +private size_t +apprentice_magic_strength(const struct magic *m) +{ + switch (m->type) { + case FILE_BYTE: + return 1; + + case FILE_SHORT: + case FILE_LESHORT: + case FILE_BESHORT: + return 2; + + case FILE_LONG: + case FILE_LELONG: + case FILE_BELONG: + case FILE_MELONG: + return 4; + + case FILE_PSTRING: + case FILE_STRING: + case FILE_REGEX: + case FILE_BESTRING16: + case FILE_LESTRING16: + case FILE_SEARCH: + return m->vallen; + + case FILE_DATE: + case FILE_LEDATE: + case FILE_BEDATE: + case FILE_MEDATE: + return 4; + + case FILE_LDATE: + case FILE_LELDATE: + case FILE_BELDATE: + case FILE_MELDATE: + return 8; + + default: + return 0; + } +} + +private int +apprentice_sort(const void *a, const void *b) +{ + const struct magic_entry *ma = a; + const struct magic_entry *mb = b; + size_t sa = apprentice_magic_strength(ma->mp); + size_t sb = apprentice_magic_strength(mb->mp); + if (sa == sb) + return 0; + else if (sa > sb) + return -1; + else + return 1; +} + /* * parse from a file * const char *fn: name of magic file @@ -296,6 +369,8 @@ apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, FILE *f; char line[BUFSIZ+1]; int errs = 0; + struct magic_entry *marray; + uint32_t marraycount, i, mentrycount; f = fopen(ms->file = fn, "r"); if (f == NULL) { @@ -306,12 +381,12 @@ apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, } maxmagic = MAXMAGIS; - *magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic)); - if (*magicp == NULL) { + if ((marray = malloc(maxmagic * sizeof(*marray))) == NULL) { (void)fclose(f); file_oomem(ms); return -1; } + marraycount = 0; /* print silly verbose header for USG compat. */ if (action == FILE_CHECK) @@ -320,23 +395,53 @@ apprentice_file(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, /* parse it */ for (ms->line = 1; fgets(line, BUFSIZ, f) != NULL; ms->line++) { size_t len; - if (line[0]=='#') /* comment, do not parse */ + if (line[0] == '#') /* comment, do not parse */ continue; len = strlen(line); if (len < 2) /* null line, garbage, etc */ continue; - line[len - 1] = '\0'; /* delete newline */ - if (parse(ms, magicp, nmagicp, line, action) != 0) - errs = 1; + if (line[len - 1] == '\n') + line[len - 1] = '\0'; /* delete newline */ + if (parse(ms, &marray, &marraycount, line, action) != 0) + errs++; } (void)fclose(f); + if (errs) + goto out; + +#ifndef NOORDER + qsort(marray, marraycount, sizeof(*marray), apprentice_sort); +#endif + + for (i = 0, mentrycount = 0; i < marraycount; i++) + mentrycount += marray[i].cont_count; + + if ((*magicp = malloc(sizeof(**magicp) * mentrycount)) == NULL) { + file_oomem(ms); + errs++; + goto out; + } + + mentrycount = 0; + for (i = 0; i < marraycount; i++) { + (void)memcpy(*magicp + mentrycount, marray[i].mp, + marray[i].cont_count * sizeof(**magicp)); + mentrycount += marray[i].cont_count; + } +out: + for (i = 0; i < marraycount; i++) + free(marray[i].mp); + free(marray); if (errs) { - free(*magicp); *magicp = NULL; *nmagicp = 0; + return errs; + } else { + *nmagicp = mentrycount; + return 0; } - return errs; + } /* @@ -363,20 +468,23 @@ file_signextend(struct magic_set *ms, struct magic *m, uint32_t v) case FILE_DATE: case FILE_BEDATE: case FILE_LEDATE: + case FILE_MEDATE: case FILE_LDATE: case FILE_BELDATE: case FILE_LELDATE: + case FILE_MELDATE: case FILE_LONG: case FILE_BELONG: case FILE_LELONG: + case FILE_MELONG: v = (int32_t) v; break; case FILE_STRING: case FILE_PSTRING: case FILE_BESTRING16: case FILE_LESTRING16: - break; case FILE_REGEX: + case FILE_SEARCH: break; default: if (ms->flags & MAGIC_CHECK) @@ -391,41 +499,85 @@ file_signextend(struct magic_set *ms, struct magic *m, uint32_t v) * parse one line from magic file, put into magic[index++] if valid */ private int -parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, - int action) +parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, + const char *line, int action) { int i = 0; + struct magic_entry *me; struct magic *m; + const char *l = line; char *t; private const char *fops = FILE_OPS; uint32_t val; + uint32_t cont_level, cont_count; -#define ALLOC_INCR 200 - if (*nmagicp + 1 >= maxmagic){ - maxmagic += ALLOC_INCR; - if ((m = (struct magic *) realloc(*magicp, - sizeof(struct magic) * maxmagic)) == NULL) { - file_oomem(ms); - if (*magicp) - free(*magicp); - return -1; - } - *magicp = m; - memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic) - * ALLOC_INCR); - } - m = &(*magicp)[*nmagicp]; - m->flag = 0; - m->cont_level = 0; + cont_level = 0; while (*l == '>') { ++l; /* step over */ - m->cont_level++; + cont_level++; + } + +#define ALLOC_CHUNK (size_t)10 +#define ALLOC_INCR (size_t)200 + + if (cont_level != 0) { + if (*nmentryp == 0) { + file_error(ms, 0, "No current entry for continuation"); + return -1; + } + me = &(*mentryp)[*nmentryp - 1]; + if (me->cont_count == me->max_count) { + struct magic *nm; + size_t cnt = me->max_count + ALLOC_CHUNK; + if ((nm = realloc(me->mp, sizeof(*nm) * cnt)) == NULL) { + file_oomem(ms); + return -1; + } + me->mp = m = nm; + me->max_count = cnt; + } + m = &me->mp[me->cont_count++]; + memset(m, 0, sizeof(*m)); + m->cont_level = cont_level; + } else { + if (*nmentryp == maxmagic) { + struct magic_entry *mp; + + maxmagic += ALLOC_INCR; + if ((mp = realloc(*mentryp, sizeof(*mp) * maxmagic)) == + NULL) { + file_oomem(ms); + return -1; + } + (void)memset(&mp[*nmentryp], 0, sizeof(*mp) * + ALLOC_INCR); + *mentryp = mp; + } + me = &(*mentryp)[*nmentryp]; + if (me->mp == NULL) { + if ((m = malloc(sizeof(*m) * ALLOC_CHUNK)) == NULL) { + file_oomem(ms); + return -1; + } + me->mp = m; + me->max_count = ALLOC_CHUNK; + } else + m = me->mp; + memset(m, 0, sizeof(*m)); + m->cont_level = 0; + me->cont_count = 1; } + if (m->cont_level != 0 && *l == '&') { + ++l; /* step over */ + m->flag |= OFFADD; + } if (m->cont_level != 0 && *l == '(') { ++l; /* step over */ m->flag |= INDIR; + if (m->flag & OFFADD) + m->flag = (m->flag & ~OFFADD) | INDIROFFADD; } if (m->cont_level != 0 && *l == '&') { ++l; /* step over */ @@ -454,6 +606,9 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, case 'L': m->in_type = FILE_BELONG; break; + case 'm': + m->in_type = FILE_MELONG; + break; case 'h': case 's': m->in_type = FILE_LESHORT; @@ -478,7 +633,7 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, l++; } if (*l == '~') { - m->in_op = FILE_OPINVERSE; + m->in_op |= FILE_OPINVERSE; l++; } switch (*l) { @@ -515,15 +670,19 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, l++; break; } - if (isdigit((unsigned char)*l)) - m->in_offset = (uint32_t)strtoul(l, &t, 0); - else - t = l; - if (*t++ != ')') + if (*l == '(') { + m->in_op |= FILE_OPINDIRECT; + l++; + } + if (isdigit((unsigned char)*l) || *l == '-') { + m->in_offset = (int32_t)strtol(l, &t, 0); + l = t; + } + if (*l++ != ')' || + ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "missing ')' in indirect offset"); - l = t; } @@ -541,14 +700,18 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, #define NBEDATE 6 #define NLESHORT 7 #define NLELONG 6 +#define NMELONG 6 #define NLEDATE 6 +#define NMEDATE 6 #define NPSTRING 7 #define NLDATE 5 #define NBELDATE 7 #define NLELDATE 7 +#define NMELDATE 7 #define NREGEX 5 #define NBESTRING16 10 #define NLESTRING16 10 +#define NSEARCH 6 if (*l == 'u') { ++l; @@ -589,9 +752,15 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, } else if (strncmp(l, "lelong", NLELONG)==0) { m->type = FILE_LELONG; l += NLELONG; + } else if (strncmp(l, "melong", NMELONG)==0) { + m->type = FILE_MELONG; + l += NMELONG; } else if (strncmp(l, "ledate", NLEDATE)==0) { m->type = FILE_LEDATE; l += NLEDATE; + } else if (strncmp(l, "medate", NMEDATE)==0) { + m->type = FILE_MEDATE; + l += NMEDATE; } else if (strncmp(l, "pstring", NPSTRING)==0) { m->type = FILE_PSTRING; l += NPSTRING; @@ -604,6 +773,9 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, } else if (strncmp(l, "leldate", NLELDATE)==0) { m->type = FILE_LELDATE; l += NLELDATE; + } else if (strncmp(l, "meldate", NMELDATE)==0) { + m->type = FILE_MELDATE; + l += NMELDATE; } else if (strncmp(l, "regex", NREGEX)==0) { m->type = FILE_REGEX; l += NREGEX; @@ -613,6 +785,9 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, } else if (strncmp(l, "lestring16", NLESTRING16)==0) { m->type = FILE_LESTRING16; l += NLESTRING16; + } else if (strncmp(l, "search", NSEARCH)==0) { + m->type = FILE_SEARCH; + l += NSEARCH; } else { if (ms->flags & MAGIC_CHECK) file_magwarn(ms, "type `%s' invalid", l); @@ -622,15 +797,16 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */ if (*l == '~') { if (!IS_STRING(m->type)) - m->mask_op = FILE_OPINVERSE; + m->mask_op |= FILE_OPINVERSE; ++l; } if ((t = strchr(fops, *l)) != NULL) { uint32_t op = (uint32_t)(t - fops); - if (op != FILE_OPDIVIDE || !IS_STRING(m->type)) { + if (op != FILE_OPDIVIDE || !IS_PLAINSTRING(m->type)) { ++l; m->mask_op |= op; - val = (uint32_t)strtoul(l, &l, 0); + val = (uint32_t)strtoul(l, &t, 0); + l = t; m->mask = file_signextend(ms, m, val); eatsize(&l); } else { @@ -655,6 +831,7 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, return -1; } } + ++l; } } /* @@ -678,15 +855,12 @@ parse(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, char *l, } break; case '!': - if (!IS_STRING(m->type)) { - m->reln = *l; - ++l; - break; - } - /*FALLTHROUGH*/ + m->reln = *l; + ++l; + break; default: - if (*l == 'x' && isascii((unsigned char)l[1]) && - isspace((unsigned char)l[1])) { + if (*l == 'x' && ((isascii((unsigned char)l[1]) && + isspace((unsigned char)l[1])) || !l[1])) { m->reln = *l; ++l; goto GetDesc; /* Bill The Cat */ @@ -730,7 +904,8 @@ GetDesc: file_mdump(m); } #endif - ++(*nmagicp); /* make room for next */ + if (m->cont_level == 0) + ++(*nmentryp); /* make room for next */ return 0; } @@ -791,7 +966,7 @@ check_format(struct magic_set *ms, struct magic *m) * just after the number read. Return 0 for success, non-zero for failure. */ private int -getvalue(struct magic_set *ms, struct magic *m, char **p) +getvalue(struct magic_set *ms, struct magic *m, const char **p) { int slen; @@ -801,6 +976,7 @@ getvalue(struct magic_set *ms, struct magic *m, char **p) case FILE_STRING: case FILE_PSTRING: case FILE_REGEX: + case FILE_SEARCH: *p = getstr(ms, *p, m->value.s, sizeof(m->value.s), &slen); if (*p == NULL) { if (ms->flags & MAGIC_CHECK) @@ -812,8 +988,10 @@ getvalue(struct magic_set *ms, struct magic *m, char **p) return 0; default: if (m->reln != 'x') { + char *ep; m->value.l = file_signextend(ms, m, - (uint32_t)strtoul(*p, p, 0)); + (uint32_t)strtoul(*p, &ep, 0)); + *p = ep; eatsize(p); } return 0; @@ -826,10 +1004,11 @@ getvalue(struct magic_set *ms, struct magic *m, char **p) * Copy the converted version to "p", returning its length in *slen. * Return updated scan pointer as function result. */ -private char * -getstr(struct magic_set *ms, char *s, char *p, int plen, int *slen) +private const char * +getstr(struct magic_set *ms, const char *s, char *p, int plen, int *slen) { - char *origs = s, *origp = p; + const char *origs = s; + char *origp = p; char *pmax = p + plen - 1; int c; int val; @@ -1001,9 +1180,9 @@ file_showstr(FILE *fp, const char *s, size_t len) * eatsize(): Eat the size spec from a number [eg. 10UL] */ private void -eatsize(char **p) +eatsize(const char **p) { - char *l = *p; + const char *l = *p; if (LOWCASE(*l) == 'u') l++; @@ -1042,7 +1221,7 @@ apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp, if (dbname == NULL) return -1; - if ((fd = open(dbname, O_RDONLY)) == -1) + if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1) return -1; if (fstat(fd, &st) == -1) { @@ -1132,7 +1311,7 @@ apprentice_compile(struct magic_set *ms, struct magic **magicp, if (dbname == NULL) return -1; - if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) { + if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) { file_error(ms, errno, "cannot open `%s'", dbname); return -1; } @@ -1225,7 +1404,7 @@ bs1(struct magic *m) m->cont_level = swap2(m->cont_level); m->offset = swap4((uint32_t)m->offset); m->in_offset = swap4((uint32_t)m->in_offset); - if (IS_STRING(m->type)) + if (!IS_STRING(m->type)) m->value.l = swap4(m->value.l); m->mask = swap4(m->mask); } |