2 * apprentice - make one pass through /etc/magic, learning its secrets.
4 * Copyright (c) Ian F. Darwin, 1987.
5 * Written by Ian F. Darwin.
7 * This software is not subject to any license of the American Telephone
8 * and Telegraph Company or of the Regents of the University of California.
10 * Permission is granted to anyone to use this software for any purpose on
11 * any computer system, and to alter it and redistribute it freely, subject
12 * to the following restrictions:
14 * 1. The author is not responsible for the consequences of use of this
15 * software, no matter how awful, even if they arise from flaws in it.
17 * 2. The origin of this software must not be misrepresented, either by
18 * explicit claim or by omission. Since few users ever read sources,
19 * credits must appear in the documentation.
21 * 3. Altered versions must be plainly marked as such, and must not be
22 * misrepresented as being the original software. Since few users
23 * ever read sources, credits must appear in the documentation.
25 * 4. This notice may not be removed or altered.
41 FILE_RCSID("@(#)$Id: apprentice.c,v 1.34 2001/03/11 20:29:16 christos Exp $")
44 #define EATAB {while (isascii((unsigned char) *l) && \
45 isspace((unsigned char) *l)) ++l;}
46 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
47 tolower((unsigned char) (l)) : (l))
57 static int getvalue __P((struct magic *, char **));
58 static int hextoint __P((int));
59 static char *getstr __P((char *, char *, int, int *));
60 static int parse __P((struct magic **, uint32 *, char *, int));
61 static void eatsize __P((char **));
62 static int apprentice_1 __P((const char *, int));
63 static int apprentice_file __P((struct magic **, uint32 *,
66 static void byteswap __P((struct magic *, uint32));
67 static void bs1 __P((struct magic *));
68 static uint16 swap2 __P((uint16));
69 static uint32 swap4 __P((uint32));
70 static char * mkdbname __P((const char *));
71 static int apprentice_map __P((struct magic **, uint32 *,
73 static int apprentice_compile __P((struct magic **, uint32 *,
77 static int maxmagic = 0;
86 apprentice_1(fn, action)
90 struct magic *magic = NULL;
96 if (action == COMPILE) {
97 rv = apprentice_file(&magic, &nmagic, fn, action);
99 return apprentice_compile(&magic, &nmagic, fn, action);
103 if ((rv = apprentice_map(&magic, &nmagic, fn, action)) != 0)
104 (void)fprintf(stderr, "%s: Using regular magic file `%s'\n",
109 rv = apprentice_file(&magic, &nmagic, fn, action);
114 if ((ml = malloc(sizeof(*ml))) == NULL) {
115 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
120 if (magic == NULL || nmagic == 0)
126 mlist.prev->next = ml;
127 ml->prev = mlist.prev;
136 apprentice(fn, action)
137 const char *fn; /* list of magic files */
141 int file_err, errs = -1;
143 mlist.next = mlist.prev = &mlist;
144 mfn = malloc(strlen(fn)+1);
146 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
152 fn = strcpy(mfn, fn);
155 p = strchr(fn, PATHSEP);
158 file_err = apprentice_1(fn, action);
164 (void) fprintf(stderr, "%s: couldn't find any magic files!\n",
166 if (action == CHECK && errs)
177 apprentice_file(magicp, nmagicp, fn, action)
178 struct magic **magicp;
180 const char *fn; /* name of magic file */
183 static const char hdr[] =
184 "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
192 (void) fprintf(stderr,
193 "%s: can't read magic file %s (%s)\n",
194 progname, fn, strerror(errno));
199 *magicp = (struct magic *) calloc(sizeof(struct magic), maxmagic);
200 if (*magicp == NULL) {
201 (void) fprintf(stderr, "%s: Out of memory.\n", progname);
207 if (action == CHECK) /* print silly verbose header for USG compat. */
208 (void) printf("%s\n", hdr);
210 for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
211 if (line[0]=='#') /* comment, do not parse */
213 if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
215 line[strlen(line)-1] = '\0'; /* delete newline */
216 if (parse(magicp, nmagicp, line, action) != 0)
230 * extend the sign bit if the comparison is to be signed
237 if (!(m->flag & UNSIGNED))
240 * Do not remove the casts below. They are
241 * vital. When later compared with the data,
242 * the sign extension must have happened.
263 magwarn("can't happen: m->type=%d\n",
271 * parse one line from magic file, put into magic[index++] if valid
274 parse(magicp, nmagicp, l, action)
275 struct magic **magicp;
284 #define ALLOC_INCR 200
285 if (*nmagicp + 1 >= maxmagic){
286 maxmagic += ALLOC_INCR;
287 if ((m = (struct magic *) realloc(*magicp,
288 sizeof(struct magic) * maxmagic)) == NULL) {
289 (void) fprintf(stderr, "%s: Out of memory.\n",
299 memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
302 m = &(*magicp)[*nmagicp];
311 if (m->cont_level != 0 && *l == '(') {
315 if (m->cont_level != 0 && *l == '&') {
320 /* get offset, then skip over it */
321 m->offset = (int) strtoul(l,&t,0);
323 magwarn("offset %s invalid", l);
326 if (m->flag & INDIR) {
330 * read [.lbs][+-]nnnnn)
343 m->in_type = LESHORT;
347 m->in_type = BESHORT;
356 magwarn("indirect offset type %c invalid", *l);
362 if (*l == '+' || *l == '-') l++;
363 if (isdigit((unsigned char)*l)) {
364 m->in_offset = strtoul(l, &t, 0);
365 if (*s == '-') m->in_offset = - m->in_offset;
370 magwarn("missing ')' in indirect offset");
375 while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
396 /* get type, skip it */
397 if (strncmp(l, "char", NBYTE)==0) { /* HP/UX compat */
400 } else if (strncmp(l, "byte", NBYTE)==0) {
403 } else if (strncmp(l, "short", NSHORT)==0) {
406 } else if (strncmp(l, "long", NLONG)==0) {
409 } else if (strncmp(l, "string", NSTRING)==0) {
412 } else if (strncmp(l, "date", NDATE)==0) {
415 } else if (strncmp(l, "beshort", NBESHORT)==0) {
418 } else if (strncmp(l, "belong", NBELONG)==0) {
421 } else if (strncmp(l, "bedate", NBEDATE)==0) {
424 } else if (strncmp(l, "leshort", NLESHORT)==0) {
427 } else if (strncmp(l, "lelong", NLELONG)==0) {
430 } else if (strncmp(l, "ledate", NLEDATE)==0) {
434 magwarn("type %s invalid", l);
437 /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
440 m->mask = signextend(m, strtoul(l, &l, 0));
442 } else if (STRING == m->type) {
445 while (!isspace(*++l)) {
447 case CHAR_IGNORE_LOWERCASE:
448 m->mask |= STRING_IGNORE_LOWERCASE;
450 case CHAR_COMPACT_BLANK:
451 m->mask |= STRING_COMPACT_BLANK;
453 case CHAR_COMPACT_OPTIONAL_BLANK:
455 STRING_COMPACT_OPTIONAL_BLANK;
458 magwarn("string extension %c invalid",
471 /* Old-style anding: "0 byte &0x80 dynamically linked" */
478 /* HP compat: ignore &= etc. */
483 if (m->type != STRING) {
490 if (*l == 'x' && isascii((unsigned char)l[1]) &&
491 isspace((unsigned char)l[1])) {
494 goto GetDesc; /* Bill The Cat */
504 * TODO finish this macro and start using it!
505 * #define offsetcheck {if (offset > HOWMANY-1)
506 * magwarn("offset too big"); }
510 * now get last part - the description
517 } else if ((l[0] == '\\') && (l[1] == 'b')) {
523 while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
526 if (action == CHECK) {
529 ++(*nmagicp); /* make room for next */
534 * Read a numeric value from a pointer, into the value union of a magic
535 * pointer, according to the magic type. Update the string pointer to point
536 * just after the number read. Return 0 for success, non-zero for failure.
545 if (m->type == STRING) {
546 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
549 if (m->reln != 'x') {
550 m->value.l = signextend(m, strtoul(*p, p, 0));
557 * Convert a string containing C character escapes. Stop at an unescaped
559 * Copy the converted version to "p", returning its length in *slen.
560 * Return updated scan pointer as function result.
563 getstr(s, p, plen, slen)
568 char *origs = s, *origp = p;
569 char *pmax = p + plen - 1;
573 while ((c = *s++) != '\0') {
574 if (isspace((unsigned char) c))
577 fprintf(stderr, "String too long: %s\n", origs);
614 /* \ and up to 3 octal digits */
624 c = *s++; /* try for 2 */
625 if(c >= '0' && c <= '7') {
626 val = (val<<3) | (c - '0');
627 c = *s++; /* try for 3 */
628 if(c >= '0' && c <= '7')
629 val = (val<<3) | (c-'0');
638 /* \x and up to 2 hex digits */
640 val = 'x'; /* Default if no digits */
641 c = hextoint(*s++); /* Get next char */
646 val = (val << 4) + c;
664 /* Single hex char to int; -1 if not a hex char. */
669 if (!isascii((unsigned char) c))
671 if (isdigit((unsigned char) c))
673 if ((c >= 'a')&&(c <= 'f'))
675 if (( c>= 'A')&&(c <= 'F'))
682 * Print a string containing C character escapes.
702 if(c >= 040 && c <= 0176) /* TODO isprint && !iscntrl */
705 (void) fputc('\\', fp);
709 (void) fputc('n', fp);
713 (void) fputc('r', fp);
717 (void) fputc('b', fp);
721 (void) fputc('t', fp);
725 (void) fputc('f', fp);
729 (void) fputc('v', fp);
733 (void) fprintf(fp, "%.3o", c & 0377);
741 * eatsize(): Eat the size spec from a number [eg. 10UL]
749 if (LOWCASE(*l) == 'u')
752 switch (LOWCASE(*l)) {
754 case 's': /* short */
755 case 'h': /* short */
756 case 'b': /* char/byte */
757 case 'c': /* char/byte */
769 * handle an mmaped file.
772 apprentice_map(magicp, nmagicp, fn, action)
773 struct magic **magicp;
783 char *dbname = mkdbname(fn);
785 if ((fd = open(dbname, O_RDONLY)) == -1)
788 if (fstat(fd, &st) == -1) {
789 (void)fprintf(stderr, "%s: Cannot stat `%s' (%s)\n",
790 progname, dbname, strerror(errno));
794 if ((*magicp = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
795 MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
796 (void)fprintf(stderr, "%s: Cannot map `%s' (%s)\n",
797 progname, dbname, strerror(errno));
801 ptr = (uint32 *) *magicp;
802 if (*ptr != MAGICNO) {
803 if (swap4(*ptr) != MAGICNO) {
804 (void)fprintf(stderr, "%s: Bad magic in `%s'\n",
812 version = swap4(ptr[1]);
815 if (version != VERSIONNO) {
816 (void)fprintf(stderr,
817 "%s: version mismatch (%d != %d) in `%s'\n",
818 progname, version, VERSION, dbname);
821 *nmagicp = (st.st_size / sizeof(struct magic)) - 1;
824 byteswap(*magicp, *nmagicp);
831 (void)munmap(*magicp, (size_t)st.st_size);
840 * handle an mmaped file.
843 apprentice_compile(magicp, nmagicp, fn, action)
844 struct magic **magicp;
850 char *dbname = mkdbname(fn);
851 static const uint32 ar[] = {
855 if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
856 (void)fprintf(stderr, "%s: Cannot open `%s' (%s)\n",
857 progname, dbname, strerror(errno));
861 if (write(fd, ar, sizeof(ar)) != sizeof(ar)) {
862 (void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
863 progname, dbname, strerror(errno));
867 if (lseek(fd, sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
868 (void)fprintf(stderr, "%s: error seeking `%s' (%s)\n",
869 progname, dbname, strerror(errno));
873 if (write(fd, *magicp, sizeof(struct magic) * *nmagicp)
874 != sizeof(struct magic) * *nmagicp) {
875 (void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
876 progname, dbname, strerror(errno));
891 static const char ext[] = ".mgc";
892 static char *buf = NULL;
893 size_t len = strlen(fn) + sizeof(ext) + 1;
897 buf = realloc(buf, len);
898 (void)strcpy(buf, fn);
899 (void)strcat(buf, ext);
904 * Byteswap an mmap'ed file if needed
907 byteswap(magic, nmagic)
912 for (i = 0; i < nmagic; i++)
924 uint8 *s = (uint8 *) &sv;
925 uint8 *d = (uint8 *) &rv;
939 uint8 *s = (uint8 *) &sv;
940 uint8 *d = (uint8 *) &rv;
949 * byteswap a single magic entry
955 m->cont_level = swap2(m->cont_level);
956 m->offset = swap4(m->offset);
957 m->in_offset = swap4(m->in_offset);
958 if (m->type != STRING)
959 m->value.l = swap4(m->value.l);
960 m->mask = swap4(m->mask);