summaryrefslogtreecommitdiffstats
path: root/contrib/file/apprentice.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/file/apprentice.c')
-rw-r--r--contrib/file/apprentice.c672
1 files changed, 672 insertions, 0 deletions
diff --git a/contrib/file/apprentice.c b/contrib/file/apprentice.c
new file mode 100644
index 0000000..1c4664b
--- /dev/null
+++ b/contrib/file/apprentice.c
@@ -0,0 +1,672 @@
+/*
+ * apprentice - make one pass through /etc/magic, learning its secrets.
+ *
+ * Copyright (c) Ian F. Darwin, 1987.
+ * Written by Ian F. Darwin.
+ *
+ * This software is not subject to any license of the American Telephone
+ * and Telegraph Company or of the Regents of the University of California.
+ *
+ * Permission is granted to anyone to use this software for any purpose on
+ * any computer system, and to alter it and redistribute it freely, subject
+ * to the following restrictions:
+ *
+ * 1. The author is not responsible for the consequences of use of this
+ * software, no matter how awful, even if they arise from flaws in it.
+ *
+ * 2. The origin of this software must not be misrepresented, either by
+ * explicit claim or by omission. Since few users ever read sources,
+ * credits must appear in the documentation.
+ *
+ * 3. Altered versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software. Since few users
+ * ever read sources, credits must appear in the documentation.
+ *
+ * 4. This notice may not be removed or altered.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include "file.h"
+
+#ifndef lint
+FILE_RCSID("@(#)$Id: apprentice.c,v 1.33 2000/08/05 17:36:47 christos Exp $")
+#endif /* lint */
+
+#define EATAB {while (isascii((unsigned char) *l) && \
+ isspace((unsigned char) *l)) ++l;}
+#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
+ tolower((unsigned char) (l)) : (l))
+
+
+#ifdef __EMX__
+ char PATHSEP=';';
+#else
+ char PATHSEP=':';
+#endif
+
+
+static int getvalue __P((struct magic *, char **));
+static int hextoint __P((int));
+static char *getstr __P((char *, char *, int, int *));
+static int parse __P((char *, int *, int));
+static void eatsize __P((char **));
+
+static int maxmagic = 0;
+
+static int apprentice_1 __P((const char *, int));
+
+int
+apprentice(fn, check)
+ const char *fn; /* list of magic files */
+ int check; /* non-zero? checking-only run. */
+{
+ char *p, *mfn;
+ int file_err, errs = -1;
+
+ maxmagic = MAXMAGIS;
+ magic = (struct magic *) calloc(sizeof(struct magic), maxmagic);
+ mfn = malloc(strlen(fn)+1);
+ if (magic == NULL || mfn == NULL) {
+ (void) fprintf(stderr, "%s: Out of memory.\n", progname);
+ if (check)
+ return -1;
+ else
+ exit(1);
+ }
+ fn = strcpy(mfn, fn);
+
+ while (fn) {
+ p = strchr(fn, PATHSEP);
+ if (p)
+ *p++ = '\0';
+ file_err = apprentice_1(fn, check);
+ if (file_err > errs)
+ errs = file_err;
+ fn = p;
+ }
+ if (errs == -1)
+ (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
+ progname);
+ if (!check && errs)
+ exit(1);
+
+ free(mfn);
+ return errs;
+}
+
+static int
+apprentice_1(fn, check)
+ const char *fn; /* name of magic file */
+ int check; /* non-zero? checking-only run. */
+{
+ static const char hdr[] =
+ "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
+ FILE *f;
+ char line[BUFSIZ+1];
+ int errs = 0;
+
+ f = fopen(fn, "r");
+ if (f==NULL) {
+ if (errno != ENOENT)
+ (void) fprintf(stderr,
+ "%s: can't read magic file %s (%s)\n",
+ progname, fn, strerror(errno));
+ return -1;
+ }
+
+ /* parse it */
+ if (check) /* print silly verbose header for USG compat. */
+ (void) printf("%s\n", hdr);
+
+ for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
+ if (line[0]=='#') /* comment, do not parse */
+ continue;
+ if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
+ continue;
+ line[strlen(line)-1] = '\0'; /* delete newline */
+ if (parse(line, &nmagic, check) != 0)
+ errs = 1;
+ }
+
+ (void) fclose(f);
+ return errs;
+}
+
+/*
+ * extend the sign bit if the comparison is to be signed
+ */
+uint32
+signextend(m, v)
+ struct magic *m;
+ uint32 v;
+{
+ if (!(m->flag & UNSIGNED))
+ switch(m->type) {
+ /*
+ * Do not remove the casts below. They are
+ * vital. When later compared with the data,
+ * the sign extension must have happened.
+ */
+ case BYTE:
+ v = (char) v;
+ break;
+ case SHORT:
+ case BESHORT:
+ case LESHORT:
+ v = (short) v;
+ break;
+ case DATE:
+ case BEDATE:
+ case LEDATE:
+ case LONG:
+ case BELONG:
+ case LELONG:
+ v = (int32) v;
+ break;
+ case STRING:
+ break;
+ default:
+ magwarn("can't happen: m->type=%d\n",
+ m->type);
+ return -1;
+ }
+ return v;
+}
+
+/*
+ * parse one line from magic file, put into magic[index++] if valid
+ */
+static int
+parse(l, ndx, check)
+ char *l;
+ int *ndx, check;
+{
+ int i = 0, nd = *ndx;
+ struct magic *m;
+ char *t, *s;
+
+#define ALLOC_INCR 200
+ if (nd+1 >= maxmagic){
+ maxmagic += ALLOC_INCR;
+ if ((m = (struct magic *) realloc(magic, sizeof(struct magic) *
+ maxmagic)) == NULL) {
+ (void) fprintf(stderr, "%s: Out of memory.\n",
+ progname);
+ if (magic)
+ free(magic);
+ if (check)
+ return -1;
+ else
+ exit(1);
+ }
+ magic = m;
+ memset(&magic[*ndx], 0, sizeof(struct magic) * ALLOC_INCR);
+ }
+ m = &magic[*ndx];
+ m->flag = 0;
+ m->cont_level = 0;
+
+ while (*l == '>') {
+ ++l; /* step over */
+ m->cont_level++;
+ }
+
+ if (m->cont_level != 0 && *l == '(') {
+ ++l; /* step over */
+ m->flag |= INDIR;
+ }
+ if (m->cont_level != 0 && *l == '&') {
+ ++l; /* step over */
+ m->flag |= ADD;
+ }
+
+ /* get offset, then skip over it */
+ m->offset = (int) strtoul(l,&t,0);
+ if (l == t)
+ magwarn("offset %s invalid", l);
+ l = t;
+
+ if (m->flag & INDIR) {
+ m->in.type = LONG;
+ m->in.offset = 0;
+ /*
+ * read [.lbs][+-]nnnnn)
+ */
+ if (*l == '.') {
+ l++;
+ switch (*l) {
+ case 'l':
+ m->in.type = LELONG;
+ break;
+ case 'L':
+ m->in.type = BELONG;
+ break;
+ case 'h':
+ case 's':
+ m->in.type = LESHORT;
+ break;
+ case 'H':
+ case 'S':
+ m->in.type = BESHORT;
+ break;
+ case 'c':
+ case 'b':
+ case 'C':
+ case 'B':
+ m->in.type = BYTE;
+ break;
+ default:
+ magwarn("indirect offset type %c invalid", *l);
+ break;
+ }
+ l++;
+ }
+ s = l;
+ if (*l == '+' || *l == '-') l++;
+ if (isdigit((unsigned char)*l)) {
+ m->in.offset = strtoul(l, &t, 0);
+ if (*s == '-') m->in.offset = - m->in.offset;
+ }
+ else
+ t = l;
+ if (*t++ != ')')
+ magwarn("missing ')' in indirect offset");
+ l = t;
+ }
+
+
+ while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
+ ++l;
+ EATAB;
+
+#define NBYTE 4
+#define NSHORT 5
+#define NLONG 4
+#define NSTRING 6
+#define NDATE 4
+#define NBESHORT 7
+#define NBELONG 6
+#define NBEDATE 6
+#define NLESHORT 7
+#define NLELONG 6
+#define NLEDATE 6
+
+ if (*l == 'u') {
+ ++l;
+ m->flag |= UNSIGNED;
+ }
+
+ /* get type, skip it */
+ if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
+ m->type = BYTE;
+ l += NBYTE;
+ } else if (strncmp(l, "byte", NBYTE)==0) {
+ m->type = BYTE;
+ l += NBYTE;
+ } else if (strncmp(l, "short", NSHORT)==0) {
+ m->type = SHORT;
+ l += NSHORT;
+ } else if (strncmp(l, "long", NLONG)==0) {
+ m->type = LONG;
+ l += NLONG;
+ } else if (strncmp(l, "string", NSTRING)==0) {
+ m->type = STRING;
+ l += NSTRING;
+ } else if (strncmp(l, "date", NDATE)==0) {
+ m->type = DATE;
+ l += NDATE;
+ } else if (strncmp(l, "beshort", NBESHORT)==0) {
+ m->type = BESHORT;
+ l += NBESHORT;
+ } else if (strncmp(l, "belong", NBELONG)==0) {
+ m->type = BELONG;
+ l += NBELONG;
+ } else if (strncmp(l, "bedate", NBEDATE)==0) {
+ m->type = BEDATE;
+ l += NBEDATE;
+ } else if (strncmp(l, "leshort", NLESHORT)==0) {
+ m->type = LESHORT;
+ l += NLESHORT;
+ } else if (strncmp(l, "lelong", NLELONG)==0) {
+ m->type = LELONG;
+ l += NLELONG;
+ } else if (strncmp(l, "ledate", NLEDATE)==0) {
+ m->type = LEDATE;
+ l += NLEDATE;
+ } else {
+ magwarn("type %s invalid", l);
+ return -1;
+ }
+ /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
+ if (*l == '&') {
+ ++l;
+ m->mask = signextend(m, strtoul(l, &l, 0));
+ eatsize(&l);
+ } else if (STRING == m->type) {
+ m->mask = 0L;
+ if (*l == '/') {
+ while (!isspace(*++l)) {
+ switch (*l) {
+ case CHAR_IGNORE_LOWERCASE:
+ m->mask |= STRING_IGNORE_LOWERCASE;
+ break;
+ case CHAR_COMPACT_BLANK:
+ m->mask |= STRING_COMPACT_BLANK;
+ break;
+ case CHAR_COMPACT_OPTIONAL_BLANK:
+ m->mask |=
+ STRING_COMPACT_OPTIONAL_BLANK;
+ break;
+ default:
+ magwarn("string extension %c invalid",
+ *l);
+ return -1;
+ }
+ }
+ }
+ } else
+ m->mask = ~0L;
+ EATAB;
+
+ switch (*l) {
+ case '>':
+ case '<':
+ /* Old-style anding: "0 byte &0x80 dynamically linked" */
+ case '&':
+ case '^':
+ case '=':
+ m->reln = *l;
+ ++l;
+ if (*l == '=') {
+ /* HP compat: ignore &= etc. */
+ ++l;
+ }
+ break;
+ case '!':
+ if (m->type != STRING) {
+ m->reln = *l;
+ ++l;
+ break;
+ }
+ /* FALL THROUGH */
+ default:
+ if (*l == 'x' && isascii((unsigned char)l[1]) &&
+ isspace((unsigned char)l[1])) {
+ m->reln = *l;
+ ++l;
+ goto GetDesc; /* Bill The Cat */
+ }
+ m->reln = '=';
+ break;
+ }
+ EATAB;
+
+ if (getvalue(m, &l))
+ return -1;
+ /*
+ * TODO finish this macro and start using it!
+ * #define offsetcheck {if (offset > HOWMANY-1)
+ * magwarn("offset too big"); }
+ */
+
+ /*
+ * now get last part - the description
+ */
+GetDesc:
+ EATAB;
+ if (l[0] == '\b') {
+ ++l;
+ m->nospflag = 1;
+ } else if ((l[0] == '\\') && (l[1] == 'b')) {
+ ++l;
+ ++l;
+ m->nospflag = 1;
+ } else
+ m->nospflag = 0;
+ while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
+ /* NULLBODY */;
+
+ if (check) {
+ mdump(m);
+ }
+ ++(*ndx); /* make room for next */
+ return 0;
+}
+
+/*
+ * Read a numeric value from a pointer, into the value union of a magic
+ * pointer, according to the magic type. Update the string pointer to point
+ * just after the number read. Return 0 for success, non-zero for failure.
+ */
+static int
+getvalue(m, p)
+ struct magic *m;
+ char **p;
+{
+ int slen;
+
+ if (m->type == STRING) {
+ *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
+ m->vallen = slen;
+ } else
+ if (m->reln != 'x') {
+ m->value.l = signextend(m, strtoul(*p, p, 0));
+ eatsize(p);
+ }
+ return 0;
+}
+
+/*
+ * Convert a string containing C character escapes. Stop at an unescaped
+ * space or tab.
+ * Copy the converted version to "p", returning its length in *slen.
+ * Return updated scan pointer as function result.
+ */
+static char *
+getstr(s, p, plen, slen)
+ char *s;
+ char *p;
+ int plen, *slen;
+{
+ char *origs = s, *origp = p;
+ char *pmax = p + plen - 1;
+ int c;
+ int val;
+
+ while ((c = *s++) != '\0') {
+ if (isspace((unsigned char) c))
+ break;
+ if (p >= pmax) {
+ fprintf(stderr, "String too long: %s\n", origs);
+ break;
+ }
+ if(c == '\\') {
+ switch(c = *s++) {
+
+ case '\0':
+ goto out;
+
+ default:
+ *p++ = (char) c;
+ break;
+
+ case 'n':
+ *p++ = '\n';
+ break;
+
+ case 'r':
+ *p++ = '\r';
+ break;
+
+ case 'b':
+ *p++ = '\b';
+ break;
+
+ case 't':
+ *p++ = '\t';
+ break;
+
+ case 'f':
+ *p++ = '\f';
+ break;
+
+ case 'v':
+ *p++ = '\v';
+ break;
+
+ /* \ and up to 3 octal digits */
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ val = c - '0';
+ c = *s++; /* try for 2 */
+ if(c >= '0' && c <= '7') {
+ val = (val<<3) | (c - '0');
+ c = *s++; /* try for 3 */
+ if(c >= '0' && c <= '7')
+ val = (val<<3) | (c-'0');
+ else
+ --s;
+ }
+ else
+ --s;
+ *p++ = (char)val;
+ break;
+
+ /* \x and up to 2 hex digits */
+ case 'x':
+ val = 'x'; /* Default if no digits */
+ c = hextoint(*s++); /* Get next char */
+ if (c >= 0) {
+ val = c;
+ c = hextoint(*s++);
+ if (c >= 0)
+ val = (val << 4) + c;
+ else
+ --s;
+ } else
+ --s;
+ *p++ = (char)val;
+ break;
+ }
+ } else
+ *p++ = (char)c;
+ }
+out:
+ *p = '\0';
+ *slen = p - origp;
+ return s;
+}
+
+
+/* Single hex char to int; -1 if not a hex char. */
+static int
+hextoint(c)
+ int c;
+{
+ if (!isascii((unsigned char) c))
+ return -1;
+ if (isdigit((unsigned char) c))
+ return c - '0';
+ if ((c >= 'a')&&(c <= 'f'))
+ return c + 10 - 'a';
+ if (( c>= 'A')&&(c <= 'F'))
+ return c + 10 - 'A';
+ return -1;
+}
+
+
+/*
+ * Print a string containing C character escapes.
+ */
+void
+showstr(fp, s, len)
+ FILE *fp;
+ const char *s;
+ int len;
+{
+ char c;
+
+ for (;;) {
+ c = *s++;
+ if (len == -1) {
+ if (c == '\0')
+ break;
+ }
+ else {
+ if (len-- == 0)
+ break;
+ }
+ if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
+ (void) fputc(c, fp);
+ else {
+ (void) fputc('\\', fp);
+ switch (c) {
+
+ case '\n':
+ (void) fputc('n', fp);
+ break;
+
+ case '\r':
+ (void) fputc('r', fp);
+ break;
+
+ case '\b':
+ (void) fputc('b', fp);
+ break;
+
+ case '\t':
+ (void) fputc('t', fp);
+ break;
+
+ case '\f':
+ (void) fputc('f', fp);
+ break;
+
+ case '\v':
+ (void) fputc('v', fp);
+ break;
+
+ default:
+ (void) fprintf(fp, "%.3o", c & 0377);
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * eatsize(): Eat the size spec from a number [eg. 10UL]
+ */
+static void
+eatsize(p)
+ char **p;
+{
+ char *l = *p;
+
+ if (LOWCASE(*l) == 'u')
+ l++;
+
+ switch (LOWCASE(*l)) {
+ case 'l': /* long */
+ case 's': /* short */
+ case 'h': /* short */
+ case 'b': /* char/byte */
+ case 'c': /* char/byte */
+ l++;
+ /*FALLTHROUGH*/
+ default:
+ break;
+ }
+
+ *p = l;
+}
OpenPOWER on IntegriCloud