2 * Copyright (c) Christos Zoulas 2003.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 FILE_RCSID("@(#)$File: funcs.c,v 1.122 2021/06/30 10:08:48 christos Exp $")
40 #include <unistd.h> /* for pipe2() */
42 #if defined(HAVE_WCHAR_H)
45 #if defined(HAVE_WCTYPE_H)
51 #define SIZE_MAX ((size_t)~0)
55 file_copystr(char *buf, size_t blen, size_t width, const char *str)
59 strlcpy(buf, str, width);
64 file_clearbuf(struct magic_set *ms)
72 file_checkfield(char *msg, size_t mlen, const char *what, const char **pp)
77 while (*p && isdigit((unsigned char)*p))
78 fw = fw * 10 + (*p++ - '0');
85 snprintf(msg, mlen, "field %s too large: %d", what, fw);
91 file_checkfmt(char *msg, size_t mlen, const char *fmt)
93 for (const char *p = fmt; *p; p++) {
98 // Skip uninteresting.
99 while (strchr("#0.'+- ", *p) != NULL)
103 snprintf(msg, mlen, "* not allowed in format");
107 if (!file_checkfield(msg, mlen, "width", &p))
112 if (!file_checkfield(msg, mlen, "precision", &p))
116 if (!isalpha((unsigned char)*p)) {
118 snprintf(msg, mlen, "bad format char: %c", *p);
126 * Like printf, only we append to a buffer.
129 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap)
135 if (ms->event_flags & EVENT_HAD_ERR)
138 if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) {
140 file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf);
144 len = vasprintf(&buf, fmt, ap);
145 if (len < 0 || (size_t)len > 1024 || len + ms->o.blen > 1024 * 1024) {
146 size_t blen = ms->o.blen;
149 file_error(ms, 0, "Output buffer space exceeded %d+%zu", len,
154 if (ms->o.buf != NULL) {
155 len = asprintf(&newstr, "%s%s", ms->o.buf, buf);
167 file_error(ms, errno, "vasprintf failed");
172 file_printf(struct magic_set *ms, const char *fmt, ...)
178 rv = file_vprintf(ms, fmt, ap);
184 * error - print best error message possible
187 __attribute__((__format__(__printf__, 3, 0)))
189 file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
192 /* Only the first error is ok */
193 if (ms->event_flags & EVENT_HAD_ERR)
197 (void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
199 if (ms->o.buf && *ms->o.buf)
200 (void)file_printf(ms, " ");
201 (void)file_vprintf(ms, f, va);
203 (void)file_printf(ms, " (%s)", strerror(error));
204 ms->event_flags |= EVENT_HAD_ERR;
210 file_error(struct magic_set *ms, int error, const char *f, ...)
214 file_error_core(ms, error, f, va, 0);
219 * Print an error with magic line number.
223 file_magerror(struct magic_set *ms, const char *f, ...)
227 file_error_core(ms, 0, f, va, ms->line);
232 file_oomem(struct magic_set *ms, size_t len)
234 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes",
239 file_badseek(struct magic_set *ms)
241 file_error(ms, errno, "error seeking");
245 file_badread(struct magic_set *ms)
247 file_error(ms, errno, "error reading");
251 #define FILE_SEPARATOR "\n- "
254 file_separator(struct magic_set *ms)
256 return file_printf(ms, FILE_SEPARATOR);
260 trim_separator(struct magic_set *ms)
264 if (ms->o.buf == NULL)
267 l = strlen(ms->o.buf);
268 if (l < sizeof(FILE_SEPARATOR))
271 l -= sizeof(FILE_SEPARATOR) - 1;
272 if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0)
279 checkdone(struct magic_set *ms, int *rv)
281 if ((ms->flags & MAGIC_CONTINUE) == 0)
283 if (file_separator(ms) == -1)
289 file_default(struct magic_set *ms, size_t nb)
291 if (ms->flags & MAGIC_MIME) {
292 if ((ms->flags & MAGIC_MIME_TYPE) &&
293 file_printf(ms, "application/%s",
294 nb ? "octet-stream" : "x-empty") == -1)
298 if (ms->flags & MAGIC_APPLE) {
299 if (file_printf(ms, "UNKNUNKN") == -1)
303 if (ms->flags & MAGIC_EXTENSION) {
304 if (file_printf(ms, "???") == -1)
312 * The magic detection functions return:
319 file_buffer(struct magic_set *ms, int fd, struct stat *st,
320 const char *inname __attribute__ ((__unused__)),
321 const void *buf, size_t nb)
323 int m = 0, rv = 0, looks_text = 0;
324 const char *code = NULL;
325 const char *code_mime = "binary";
326 const char *def = "data";
327 const char *ftype = NULL;
331 buffer_init(&b, fd, st, buf, nb);
332 ms->mode = b.st.st_mode;
337 } else if (nb == 1) {
338 def = "very short file (no magic)";
342 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
343 looks_text = file_encoding(ms, &b, NULL, 0,
344 &code, &code_mime, &ftype);
348 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
349 m = file_os2_apptype(ms, inname, &b);
350 if ((ms->flags & MAGIC_DEBUG) != 0)
351 (void)fprintf(stderr, "[try os2_apptype %d]\n", m);
363 /* try compression stuff */
364 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) {
365 m = file_zmagic(ms, &b, inname);
366 if ((ms->flags & MAGIC_DEBUG) != 0)
367 (void)fprintf(stderr, "[try zmagic %d]\n", m);
373 /* Check if we have a tar file */
374 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) {
375 m = file_is_tar(ms, &b);
376 if ((ms->flags & MAGIC_DEBUG) != 0)
377 (void)fprintf(stderr, "[try tar %d]\n", m);
379 if (checkdone(ms, &rv))
384 /* Check if we have a JSON file */
385 if ((ms->flags & MAGIC_NO_CHECK_JSON) == 0) {
386 m = file_is_json(ms, &b);
387 if ((ms->flags & MAGIC_DEBUG) != 0)
388 (void)fprintf(stderr, "[try json %d]\n", m);
390 if (checkdone(ms, &rv))
395 /* Check if we have a CSV file */
396 if ((ms->flags & MAGIC_NO_CHECK_CSV) == 0) {
397 m = file_is_csv(ms, &b, looks_text);
398 if ((ms->flags & MAGIC_DEBUG) != 0)
399 (void)fprintf(stderr, "[try csv %d]\n", m);
401 if (checkdone(ms, &rv))
406 /* Check if we have a CDF file */
407 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) {
408 m = file_trycdf(ms, &b);
409 if ((ms->flags & MAGIC_DEBUG) != 0)
410 (void)fprintf(stderr, "[try cdf %d]\n", m);
412 if (checkdone(ms, &rv))
417 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && nb > 5 && fd != -1) {
420 * We matched something in the file, so this
421 * *might* be an ELF file, and the file is at
422 * least 5 bytes long, so if it's an ELF file
423 * it has at least one byte past the ELF magic
424 * number - try extracting information from the
425 * ELF headers that cannot easily be extracted
426 * with rules in the magic file. We we don't
427 * print the information yet.
429 if ((pb = file_push_buffer(ms)) == NULL)
432 rv = file_tryelf(ms, &b);
433 rbuf = file_pop_buffer(ms, pb);
438 if ((ms->flags & MAGIC_DEBUG) != 0)
439 (void)fprintf(stderr, "[try elf %d]\n", m);
443 /* try soft magic tests */
444 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) {
445 m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text);
446 if ((ms->flags & MAGIC_DEBUG) != 0)
447 (void)fprintf(stderr, "[try softmagic %d]\n", m);
448 if (m == 1 && rbuf) {
449 if (file_printf(ms, "%s", rbuf) == -1)
453 if (checkdone(ms, &rv))
458 /* try text properties */
459 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
461 m = file_ascmagic(ms, &b, looks_text);
462 if ((ms->flags & MAGIC_DEBUG) != 0)
463 (void)fprintf(stderr, "[try ascmagic %d]\n", m);
473 rv = file_default(ms, nb);
475 if (file_printf(ms, "%s", def) == -1)
480 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
481 if (ms->flags & MAGIC_MIME_TYPE)
482 if (file_printf(ms, "; charset=") == -1)
484 if (file_printf(ms, "%s", code_mime) == -1)
500 file_reset(struct magic_set *ms, int checkloaded)
502 if (checkloaded && ms->mlist[0] == NULL) {
503 file_error(ms, 0, "no magic files loaded");
511 ms->event_flags &= ~EVENT_HAD_ERR;
516 #define OCTALIFY(n, o) \
518 (void)(*(n)++ = '\\', \
519 *(n)++ = ((CAST(uint32_t, *(o)) >> 6) & 3) + '0', \
520 *(n)++ = ((CAST(uint32_t, *(o)) >> 3) & 7) + '0', \
521 *(n)++ = ((CAST(uint32_t, *(o)) >> 0) & 7) + '0', \
524 protected const char *
525 file_getbuffer(struct magic_set *ms)
527 char *pbuf, *op, *np;
530 if (ms->event_flags & EVENT_HAD_ERR)
533 if (ms->flags & MAGIC_RAW)
536 if (ms->o.buf == NULL)
539 /* * 4 is for octal representation, + 1 is for NUL */
540 len = strlen(ms->o.buf);
541 if (len > (SIZE_MAX - 1) / 4) {
546 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) {
547 file_oomem(ms, psize);
552 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
557 size_t bytesconsumed;
559 (void)memset(&state, 0, sizeof(mbstate_t));
566 bytesconsumed = mbrtowc(&nextchar, op,
567 CAST(size_t, eop - op), &state);
568 if (bytesconsumed == CAST(size_t, -1) ||
569 bytesconsumed == CAST(size_t, -2)) {
574 if (iswprint(nextchar)) {
575 (void)memcpy(np, op, bytesconsumed);
579 while (bytesconsumed-- > 0)
585 /* Parsing succeeded as a multi-byte sequence */
591 for (np = ms->o.pbuf, op = ms->o.buf; *op;) {
592 if (isprint(CAST(unsigned char, *op))) {
603 file_check_mem(struct magic_set *ms, unsigned int level)
607 if (level >= ms->c.len) {
608 len = (ms->c.len = 20 + level) * sizeof(*ms->c.li);
609 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ?
611 realloc(ms->c.li, len));
612 if (ms->c.li == NULL) {
617 ms->c.li[level].got_match = 0;
618 #ifdef ENABLE_CONDITIONALS
619 ms->c.li[level].last_match = 0;
620 ms->c.li[level].last_cond = COND_NONE;
621 #endif /* ENABLE_CONDITIONALS */
626 file_printedlen(const struct magic_set *ms)
632 file_replace(struct magic_set *ms, const char *pat, const char *rep)
637 rc = file_regcomp(&rx, pat, REG_EXTENDED);
639 file_regerror(&rx, rc, ms);
643 while (file_regexec(&rx, ms->o.buf, 1, &rm, 0) == 0) {
644 ms->o.buf[rm.rm_so] = '\0';
645 if (file_printf(ms, "%s%s", rep,
646 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1)
658 file_regcomp(file_regex_t *rx, const char *pat, int flags)
661 rx->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
662 assert(rx->c_lc_ctype != NULL);
663 rx->old_lc_ctype = uselocale(rx->c_lc_ctype);
664 assert(rx->old_lc_ctype != NULL);
666 rx->old_lc_ctype = setlocale(LC_CTYPE, NULL);
667 assert(rx->old_lc_ctype != NULL);
668 rx->old_lc_ctype = strdup(rx->old_lc_ctype);
669 assert(rx->old_lc_ctype != NULL);
670 (void)setlocale(LC_CTYPE, "C");
674 return rx->rc = regcomp(&rx->rx, pat, flags);
678 file_regexec(file_regex_t *rx, const char *str, size_t nmatch,
679 regmatch_t* pmatch, int eflags)
682 /* XXX: force initialization because glibc does not always do this */
684 memset(pmatch, 0, nmatch * sizeof(*pmatch));
685 return regexec(&rx->rx, str, nmatch, pmatch, eflags);
689 file_regfree(file_regex_t *rx)
694 (void)uselocale(rx->old_lc_ctype);
695 freelocale(rx->c_lc_ctype);
697 (void)setlocale(LC_CTYPE, rx->old_lc_ctype);
698 free(rx->old_lc_ctype);
703 file_regerror(file_regex_t *rx, int rc, struct magic_set *ms)
707 (void)regerror(rc, &rx->rx, errmsg, sizeof(errmsg));
708 file_magerror(ms, "regex error %d for `%s', (%s)", rc, rx->pat,
712 protected file_pushbuf_t *
713 file_push_buffer(struct magic_set *ms)
717 if (ms->event_flags & EVENT_HAD_ERR)
720 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL)
724 pb->blen = ms->o.blen;
725 pb->offset = ms->offset;
735 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
739 if (ms->event_flags & EVENT_HAD_ERR) {
748 ms->o.blen = pb->blen;
749 ms->offset = pb->offset;
756 * convert string to ascii printable format.
759 file_printable(struct magic_set *ms, char *buf, size_t bufsiz,
760 const char *str, size_t slen)
762 char *ptr, *eptr = buf + bufsiz - 1;
763 const unsigned char *s = RCAST(const unsigned char *, str);
764 const unsigned char *es = s + slen;
766 for (ptr = buf; ptr < eptr && s < es && *s; s++) {
767 if ((ms->flags & MAGIC_RAW) != 0 || isprint(*s)) {
774 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0';
775 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0';
776 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0';
790 file_parse_guid(const char *s, uint64_t *guid)
792 struct guid *g = CAST(struct guid *, CAST(void *, guid));
794 "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx",
795 &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1],
796 &g->data4[2], &g->data4[3], &g->data4[4], &g->data4[5],
797 &g->data4[6], &g->data4[7]) == 11 ? 0 : -1;
801 file_print_guid(char *str, size_t len, const uint64_t *guid)
803 const struct guid *g = CAST(const struct guid *,
804 CAST(const void *, guid));
806 return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-"
807 "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX",
808 g->data1, g->data2, g->data3, g->data4[0], g->data4[1],
809 g->data4[2], g->data4[3], g->data4[4], g->data4[5],
810 g->data4[6], g->data4[7]);
814 file_pipe_closexec(int *fds)
817 return pipe2(fds, O_CLOEXEC);
821 (void)fcntl(fds[0], F_SETFD, FD_CLOEXEC);
822 (void)fcntl(fds[1], F_SETFD, FD_CLOEXEC);
828 file_clear_closexec(int fd) {
829 return fcntl(fd, F_SETFD, 0);
833 file_strtrim(char *str)
837 while (isspace(CAST(unsigned char, *str)))
843 while (isspace(CAST(unsigned char, *last)))