2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * softmagic - interpret variable magic from MAGIC
42 FILE_RCSID("@(#)$Id: softmagic.c,v 1.78 2006/03/12 22:09:33 christos Exp $")
45 private int match(struct magic_set *, struct magic *, uint32_t,
46 const unsigned char *, size_t);
47 private int mget(struct magic_set *, union VALUETYPE *, const unsigned char *,
48 struct magic *, size_t, unsigned int);
49 private int mcheck(struct magic_set *, union VALUETYPE *, struct magic *);
50 private int32_t mprint(struct magic_set *, union VALUETYPE *, struct magic *);
51 private void mdebug(uint32_t, const char *, size_t);
52 private int mcopy(struct magic_set *, union VALUETYPE *, int, int,
53 const unsigned char *, size_t, size_t);
54 private int mconvert(struct magic_set *, union VALUETYPE *, struct magic *);
55 private int check_mem(struct magic_set *, unsigned int);
58 * softmagic - lookup one file in database
59 * (already read from MAGIC by apprentice.c).
60 * Passed the name and FILE * of one file to be typed.
62 /*ARGSUSED1*/ /* nbytes passed for regularity, maybe need later */
64 file_softmagic(struct magic_set *ms, const unsigned char *buf, size_t nbytes)
67 for (ml = ms->mlist->next; ml != ms->mlist; ml = ml->next)
68 if (match(ms, ml->magic, ml->nmagic, buf, nbytes))
75 * Go through the whole list, stopping if you find a match. Process all
76 * the continuations of that match before returning.
78 * We support multi-level continuations:
80 * At any time when processing a successful top-level match, there is a
81 * current continuation level; it represents the level of the last
82 * successfully matched continuation.
84 * Continuations above that level are skipped as, if we see one, it
85 * means that the continuation that controls them - i.e, the
86 * lower-level continuation preceding them - failed to match.
88 * Continuations below that level are processed as, if we see one,
89 * it means we've finished processing or skipping higher-level
90 * continuations under the control of a successful or unsuccessful
91 * lower-level continuation, and are now seeing the next lower-level
92 * continuation and should process it. The current continuation
93 * level reverts to the level of the one we're seeing.
95 * Continuations at the current level are processed as, if we see
96 * one, there's no lower-level continuation that may have failed.
98 * If a continuation matches, we bump the current continuation level
99 * so that higher-level continuations are processed.
102 match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
103 const unsigned char *s, size_t nbytes)
105 uint32_t magindex = 0;
106 unsigned int cont_level = 0;
107 int need_separator = 0;
110 int returnval = 0; /* if a match is found it is set to 1*/
111 int firstline = 1; /* a flag to print X\n X\n- X */
113 if (check_mem(ms, cont_level) == -1)
116 for (magindex = 0; magindex < nmagic; magindex++) {
117 /* if main entry matches, print it... */
118 int flush = !mget(ms, &p, s, &magic[magindex], nbytes,
121 if (magic[magindex].reln == '!') flush = 0;
123 switch (mcheck(ms, &p, &magic[magindex])) {
135 * main entry didn't match,
136 * flush its continuations
138 while (magindex < nmagic - 1 &&
139 magic[magindex + 1].cont_level != 0)
144 if (!firstline) { /* we found another match */
145 /* put a newline and '-' to do some simple formatting*/
146 if (file_printf(ms, "\n- ") == -1)
150 if ((ms->c.off[cont_level] = mprint(ms, &p, &magic[magindex]))
154 * If we printed something, we'll need to print
155 * a blank before we print something else.
157 if (magic[magindex].desc[0])
159 /* and any continuations that match */
160 if (check_mem(ms, ++cont_level) == -1)
163 while (magic[magindex+1].cont_level != 0 &&
164 ++magindex < nmagic) {
165 if (cont_level < magic[magindex].cont_level)
167 if (cont_level > magic[magindex].cont_level) {
169 * We're at the end of the level
170 * "cont_level" continuations.
172 cont_level = magic[magindex].cont_level;
174 oldoff = magic[magindex].offset;
175 if (magic[magindex].flag & OFFADD) {
176 magic[magindex].offset +=
177 ms->c.off[cont_level - 1];
180 flush = !mget(ms, &p, s, &magic[magindex], nbytes,
182 if (flush && magic[magindex].reln != '!')
185 switch (flush ? 1 : mcheck(ms, &p, &magic[magindex])) {
192 * This continuation matched.
193 * Print its message, with
194 * a blank before it if
195 * the previous item printed
196 * and this item isn't empty.
198 /* space if previous printed */
200 && (magic[magindex].nospflag == 0)
201 && (magic[magindex].desc[0] != '\0')) {
202 if (file_printf(ms, " ") == -1)
206 if ((ms->c.off[cont_level] = mprint(ms, &p,
207 &magic[magindex])) == -1)
209 if (magic[magindex].desc[0])
213 * If we see any continuations
217 if (check_mem(ms, ++cont_level) == -1)
221 magic[magindex].offset = oldoff;
225 if ((ms->flags & MAGIC_CONTINUE) == 0) {
226 return 1; /* don't keep searching */
229 return returnval; /* This is hit if -k is set or there is no match */
233 check_mem(struct magic_set *ms, unsigned int level)
237 if (level < ms->c.len)
240 len = (ms->c.len += 20) * sizeof(*ms->c.off);
241 ms->c.off = (ms->c.off == NULL) ? malloc(len) : realloc(ms->c.off, len);
242 if (ms->c.off != NULL)
249 mprint(struct magic_set *ms, union VALUETYPE *p, struct magic *m)
257 v = file_signextend(ms, m, (size_t)p->b);
258 if (file_printf(ms, m->desc, (unsigned char) v) == -1)
260 t = m->offset + sizeof(char);
266 v = file_signextend(ms, m, (size_t)p->h);
267 if (file_printf(ms, m->desc, (unsigned short) v) == -1)
269 t = m->offset + sizeof(short);
276 v = file_signextend(ms, m, p->l);
277 if (file_printf(ms, m->desc, (uint32_t) v) == -1)
279 t = m->offset + sizeof(int32_t);
284 case FILE_BESTRING16:
285 case FILE_LESTRING16:
286 if (m->reln == '=' || m->reln == '!') {
287 if (file_printf(ms, m->desc, m->value.s) == -1)
289 t = m->offset + m->vallen;
292 if (*m->value.s == '\0') {
293 char *cp = strchr(p->s,'\n');
297 if (file_printf(ms, m->desc, p->s) == -1)
299 t = m->offset + strlen(p->s);
307 if (file_printf(ms, m->desc, file_fmttime(p->l, 1)) == -1)
309 t = m->offset + sizeof(time_t);
316 if (file_printf(ms, m->desc, file_fmttime(p->l, 0)) == -1)
318 t = m->offset + sizeof(time_t);
321 if (file_printf(ms, m->desc, p->s) == -1)
323 t = m->offset + strlen(p->s);
326 if (file_printf(ms, m->desc, m->value.s) == -1)
328 t = m->offset + m->vallen;
332 file_error(ms, 0, "invalid m->type (%d) in mprint()", m->type);
339 * Convert the byte order of the data we are looking at
340 * While we're here, let's apply the mask operation
341 * (unless you have a better idea)
344 mconvert(struct magic_set *ms, union VALUETYPE *p, struct magic *m)
349 switch (m->mask_op & 0x7F) {
365 case FILE_OPMULTIPLY:
375 if (m->mask_op & FILE_OPINVERSE)
380 switch (m->mask_op & 0x7F) {
396 case FILE_OPMULTIPLY:
406 if (m->mask_op & FILE_OPINVERSE)
413 switch (m->mask_op & 0x7F) {
429 case FILE_OPMULTIPLY:
439 if (m->mask_op & FILE_OPINVERSE)
443 case FILE_BESTRING16:
444 case FILE_LESTRING16:
448 /* Null terminate and eat *trailing* return */
449 p->s[sizeof(p->s) - 1] = '\0';
451 if (len-- && p->s[len] == '\n')
457 char *ptr1 = p->s, *ptr2 = ptr1 + 1;
459 if (len >= sizeof(p->s))
460 len = sizeof(p->s) - 1;
465 if (len-- && p->s[len] == '\n')
470 p->h = (short)((p->hs[0]<<8)|(p->hs[1]));
472 switch (m->mask_op&0x7F) {
488 case FILE_OPMULTIPLY:
498 if (m->mask_op & FILE_OPINVERSE)
505 ((p->hl[0]<<24)|(p->hl[1]<<16)|(p->hl[2]<<8)|(p->hl[3]));
507 switch (m->mask_op&0x7F) {
523 case FILE_OPMULTIPLY:
533 if (m->mask_op & FILE_OPINVERSE)
537 p->h = (short)((p->hs[1]<<8)|(p->hs[0]));
539 switch (m->mask_op&0x7F) {
555 case FILE_OPMULTIPLY:
565 if (m->mask_op & FILE_OPINVERSE)
572 ((p->hl[3]<<24)|(p->hl[2]<<16)|(p->hl[1]<<8)|(p->hl[0]));
574 switch (m->mask_op&0x7F) {
590 case FILE_OPMULTIPLY:
600 if (m->mask_op & FILE_OPINVERSE)
607 ((p->hl[1]<<24)|(p->hl[0]<<16)|(p->hl[3]<<8)|(p->hl[2]));
609 switch (m->mask_op&0x7F) {
625 case FILE_OPMULTIPLY:
635 if (m->mask_op & FILE_OPINVERSE)
642 file_error(ms, 0, "invalid type %d in mconvert()", m->type);
649 mdebug(uint32_t offset, const char *str, size_t len)
651 (void) fprintf(stderr, "mget @%d: ", offset);
652 file_showstr(stderr, str, len);
653 (void) fputc('\n', stderr);
654 (void) fputc('\n', stderr);
658 mcopy(struct magic_set *ms, union VALUETYPE *p, int type, int indir,
659 const unsigned char *s, size_t offset, size_t nbytes)
661 if (type == FILE_REGEX && indir == 0) {
663 * offset is interpreted as last line to search,
664 * (starting at 1), not as bytes-from start-of-file
666 char *b, *c, *last = NULL;
668 p->search.buflen = 0;
669 p->search.buf = NULL;
672 if ((p->search.buf = strdup((const char *)s)) == NULL) {
676 for (b = p->search.buf; offset &&
677 ((b = strchr(c = b, '\n')) || (b = strchr(c, '\r')));
680 if (b[0] == '\r' && b[1] == '\n') b++;
684 p->search.buflen = last - p->search.buf;
688 if (indir == 0 && (type == FILE_BESTRING16 || type == FILE_LESTRING16))
690 const unsigned char *src = s + offset;
691 const unsigned char *esrc = s + nbytes;
692 char *dst = p->s, *edst = &p->s[sizeof(p->s) - 1];
694 if (type == FILE_BESTRING16)
697 for (;src < esrc; src++, dst++) {
709 if (offset >= nbytes) {
710 (void)memset(p, '\0', sizeof(*p));
713 if (nbytes - offset < sizeof(*p))
714 nbytes = nbytes - offset;
718 (void)memcpy(p, s + offset, nbytes);
721 * the usefulness of padding with zeroes eludes me, it
722 * might even cause problems
724 if (nbytes < sizeof(*p))
725 (void)memset(((char *)(void *)p) + nbytes, '\0',
726 sizeof(*p) - nbytes);
731 mget(struct magic_set *ms, union VALUETYPE *p, const unsigned char *s,
732 struct magic *m, size_t nbytes, unsigned int cont_level)
734 uint32_t offset = m->offset;
736 if (mcopy(ms, p, m->type, m->flag & INDIR, s, offset, nbytes) == -1)
739 if ((ms->flags & MAGIC_DEBUG) != 0) {
740 mdebug(offset, (char *)(void *)p, sizeof(union VALUETYPE));
744 if (m->flag & INDIR) {
745 int off = m->in_offset;
746 if (m->in_op & FILE_OPINDIRECT) {
747 const union VALUETYPE *q =
748 ((const void *)(s + offset + off));
749 switch (m->in_type) {
757 off = (short)((q->hs[0]<<8)|(q->hs[1]));
760 off = (short)((q->hs[1]<<8)|(q->hs[0]));
766 off = (int32_t)((q->hl[0]<<24)|(q->hl[1]<<16)|
767 (q->hl[2]<<8)|(q->hl[3]));
770 off = (int32_t)((q->hl[3]<<24)|(q->hl[2]<<16)|
771 (q->hl[1]<<8)|(q->hl[0]));
774 off = (int32_t)((q->hl[1]<<24)|(q->hl[0]<<16)|
775 (q->hl[3]<<8)|(q->hl[2]));
779 switch (m->in_type) {
781 if (nbytes < (offset + 1)) return 0;
783 switch (m->in_op & 0x3F) {
799 case FILE_OPMULTIPLY:
811 if (m->in_op & FILE_OPINVERSE)
815 if (nbytes < (offset + 2))
818 switch (m->in_op & 0x7F) {
820 offset = (short)((p->hs[0]<<8)|
825 offset = (short)((p->hs[0]<<8)|
830 offset = (short)((p->hs[0]<<8)|
835 offset = (short)((p->hs[0]<<8)|
840 offset = (short)((p->hs[0]<<8)|
844 case FILE_OPMULTIPLY:
845 offset = (short)((p->hs[0]<<8)|
850 offset = (short)((p->hs[0]<<8)|
855 offset = (short)((p->hs[0]<<8)|
861 offset = (short)((p->hs[0]<<8)|
863 if (m->in_op & FILE_OPINVERSE)
867 if (nbytes < (offset + 2))
870 switch (m->in_op & 0x7F) {
872 offset = (short)((p->hs[1]<<8)|
877 offset = (short)((p->hs[1]<<8)|
882 offset = (short)((p->hs[1]<<8)|
887 offset = (short)((p->hs[1]<<8)|
892 offset = (short)((p->hs[1]<<8)|
896 case FILE_OPMULTIPLY:
897 offset = (short)((p->hs[1]<<8)|
902 offset = (short)((p->hs[1]<<8)|
907 offset = (short)((p->hs[1]<<8)|
913 offset = (short)((p->hs[1]<<8)|
915 if (m->in_op & FILE_OPINVERSE)
919 if (nbytes < (offset + 2))
922 switch (m->in_op & 0x7F) {
938 case FILE_OPMULTIPLY:
951 if (m->in_op & FILE_OPINVERSE)
955 if (nbytes < (offset + 4))
958 switch (m->in_op & 0x7F) {
960 offset = (int32_t)((p->hl[0]<<24)|
967 offset = (int32_t)((p->hl[0]<<24)|
974 offset = (int32_t)((p->hl[0]<<24)|
981 offset = (int32_t)((p->hl[0]<<24)|
988 offset = (int32_t)((p->hl[0]<<24)|
994 case FILE_OPMULTIPLY:
995 offset = (int32_t)((p->hl[0]<<24)|
1002 offset = (int32_t)((p->hl[0]<<24)|
1009 offset = (int32_t)((p->hl[0]<<24)|
1017 offset = (int32_t)((p->hl[0]<<24)|
1021 if (m->in_op & FILE_OPINVERSE)
1025 if (nbytes < (offset + 4))
1028 switch (m->in_op & 0x7F) {
1030 offset = (int32_t)((p->hl[3]<<24)|
1037 offset = (int32_t)((p->hl[3]<<24)|
1044 offset = (int32_t)((p->hl[3]<<24)|
1051 offset = (int32_t)((p->hl[3]<<24)|
1058 offset = (int32_t)((p->hl[3]<<24)|
1064 case FILE_OPMULTIPLY:
1065 offset = (int32_t)((p->hl[3]<<24)|
1072 offset = (int32_t)((p->hl[3]<<24)|
1079 offset = (int32_t)((p->hl[3]<<24)|
1087 offset = (int32_t)((p->hl[3]<<24)|
1091 if (m->in_op & FILE_OPINVERSE)
1095 if (nbytes < (offset + 4))
1098 switch (m->in_op & 0x7F) {
1100 offset = (int32_t)((p->hl[1]<<24)|
1107 offset = (int32_t)((p->hl[1]<<24)|
1114 offset = (int32_t)((p->hl[1]<<24)|
1121 offset = (int32_t)((p->hl[1]<<24)|
1128 offset = (int32_t)((p->hl[1]<<24)|
1134 case FILE_OPMULTIPLY:
1135 offset = (int32_t)((p->hl[1]<<24)|
1142 offset = (int32_t)((p->hl[1]<<24)|
1149 offset = (int32_t)((p->hl[1]<<24)|
1157 offset = (int32_t)((p->hl[1]<<24)|
1161 if (m->in_op & FILE_OPINVERSE)
1165 if (nbytes < (offset + 4))
1168 switch (m->in_op & 0x7F) {
1170 offset = p->l & off;
1173 offset = p->l | off;
1176 offset = p->l ^ off;
1179 offset = p->l + off;
1182 offset = p->l - off;
1184 case FILE_OPMULTIPLY:
1185 offset = p->l * off;
1188 offset = p->l / off;
1191 offset = p->l % off;
1193 /* case TOOMANYSWITCHBLOCKS:
1194 * ugh = p->eye % m->strain;
1197 * off = p->tab & m->in_gest;
1203 if (m->in_op & FILE_OPINVERSE)
1208 if (m->flag & INDIROFFADD) offset += ms->c.off[cont_level-1];
1209 if (mcopy(ms, p, m->type, 0, s, offset, nbytes) == -1)
1213 if ((ms->flags & MAGIC_DEBUG) != 0) {
1214 mdebug(offset, (char *)(void *)p,
1215 sizeof(union VALUETYPE));
1220 /* Verify we have enough data to match magic type */
1223 if (nbytes < (offset + 1)) /* should alway be true */
1230 if (nbytes < (offset + 2))
1246 if (nbytes < (offset + 4))
1253 if (nbytes < (offset + m->vallen))
1259 if (m->type == FILE_SEARCH) {
1260 size_t mlen = m->mask + m->vallen;
1261 size_t flen = nbytes - offset;
1264 p->search.buflen = mlen;
1265 p->search.buf = malloc(mlen + 1);
1266 if (p->search.buf == NULL) {
1267 file_error(ms, errno, "Cannot allocate search buffer");
1270 (void)memcpy(p->search.buf, s + offset, mlen);
1271 p->search.buf[mlen] = '\0';
1273 if (!mconvert(ms, p, m))
1279 mcheck(struct magic_set *ms, union VALUETYPE *p, struct magic *m)
1281 uint32_t l = m->value.l;
1285 if ( (m->value.s[0] == 'x') && (m->value.s[1] == '\0') ) {
1317 case FILE_BESTRING16:
1318 case FILE_LESTRING16:
1322 * What we want here is:
1323 * v = strncmp(m->value.s, p->s, m->vallen);
1324 * but ignoring any nulls. bcmp doesn't give -/+/0
1325 * and isn't universally available anyway.
1327 unsigned char *a = (unsigned char*)m->value.s;
1328 unsigned char *b = (unsigned char*)p->s;
1329 int len = m->vallen;
1332 if (0L == m->mask) { /* normal string: do it fast */
1334 if ((v = *b++ - *a++) != '\0')
1336 } else { /* combine the others */
1337 while (--len >= 0) {
1338 if ((m->mask & STRING_IGNORE_LOWERCASE) &&
1340 if ((v = tolower(*b++) - *a++) != '\0')
1342 } else if ((m->mask & STRING_COMPACT_BLANK) &&
1345 if (isspace(*b++)) {
1352 } else if (isspace(*a) &&
1353 (m->mask & STRING_COMPACT_OPTIONAL_BLANK)) {
1358 if ((v = *b++ - *a++) != '\0')
1371 if (p->search.buf == NULL)
1374 rc = regcomp(&rx, m->value.s,
1375 REG_EXTENDED|REG_NOSUB|REG_NEWLINE|
1376 ((m->mask & STRING_IGNORE_LOWERCASE) ? REG_ICASE : 0));
1378 free(p->search.buf);
1379 p->search.buf = NULL;
1380 regerror(rc, &rx, errmsg, sizeof(errmsg));
1381 file_error(ms, 0, "regex error %d, (%s)", rc, errmsg);
1384 rc = regexec(&rx, p->search.buf, 0, 0, 0);
1386 free(p->search.buf);
1387 p->search.buf = NULL;
1394 * search for a string in a certain range
1396 unsigned char *a = (unsigned char*)m->value.s;
1397 unsigned char *b = (unsigned char*)p->search.buf;
1398 size_t len, slen = m->vallen;
1400 if (slen > sizeof(m->value.s))
1401 slen = sizeof(m->value.s);
1407 while (++range <= m->mask) {
1408 while (len-- > 0 && (v = *b++ - *a++) == 0)
1411 m->offset += range - 1;
1414 if (range + slen >= p->search.buflen)
1417 a = (unsigned char*)m->value.s;
1418 b = (unsigned char*)p->search.buf + range;
1420 free(p->search.buf);
1421 p->search.buf = NULL;
1425 file_error(ms, 0, "invalid type %d in mcheck()", m->type);
1429 if (m->type != FILE_STRING && m->type != FILE_PSTRING)
1430 v = file_signextend(ms, m, v);
1434 if ((ms->flags & MAGIC_DEBUG) != 0)
1435 (void) fprintf(stderr, "%u == *any* = 1\n", v);
1441 if ((ms->flags & MAGIC_DEBUG) != 0)
1442 (void) fprintf(stderr, "%u != %u = %d\n",
1448 if ((ms->flags & MAGIC_DEBUG) != 0)
1449 (void) fprintf(stderr, "%u == %u = %d\n",
1454 if (m->flag & UNSIGNED) {
1456 if ((ms->flags & MAGIC_DEBUG) != 0)
1457 (void) fprintf(stderr, "%u > %u = %d\n",
1461 matched = (int32_t) v > (int32_t) l;
1462 if ((ms->flags & MAGIC_DEBUG) != 0)
1463 (void) fprintf(stderr, "%d > %d = %d\n",
1469 if (m->flag & UNSIGNED) {
1471 if ((ms->flags & MAGIC_DEBUG) != 0)
1472 (void) fprintf(stderr, "%u < %u = %d\n",
1476 matched = (int32_t) v < (int32_t) l;
1477 if ((ms->flags & MAGIC_DEBUG) != 0)
1478 (void) fprintf(stderr, "%d < %d = %d\n",
1484 matched = (v & l) == l;
1485 if ((ms->flags & MAGIC_DEBUG) != 0)
1486 (void) fprintf(stderr, "((%x & %x) == %x) = %d\n",
1491 matched = (v & l) != l;
1492 if ((ms->flags & MAGIC_DEBUG) != 0)
1493 (void) fprintf(stderr, "((%x & %x) != %x) = %d\n",
1499 file_error(ms, 0, "cannot happen: invalid relation `%c'",