2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
38 FILE_RCSID("@(#)$File: compress.c,v 1.107 2018/04/28 18:48:22 christos Exp $")
53 typedef void (*sig_t)(int);
54 # endif /* HAVE_SIG_T */
56 #if !defined(__MINGW32__) && !defined(WIN32)
57 #include <sys/ioctl.h>
59 #ifdef HAVE_SYS_WAIT_H
62 #if defined(HAVE_SYS_TIME_H)
65 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
66 #define BUILTIN_DECOMPRESS
71 #define DPRINTF(...) do { \
73 tty = open("/dev/tty", O_RDWR); \
76 dprintf(tty, __VA_ARGS__); \
77 } while (/*CONSTCOND*/0)
84 * The following python code is not really used because ZLIBSUPPORT is only
85 * defined if we have a built-in zlib, and the built-in zlib handles that.
86 * That is not true for android where we have zlib.h and not -lz.
88 static const char zlibcode[] =
89 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
91 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
94 zlibcmp(const unsigned char *buf)
97 unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
99 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
101 if (s[0] != 1) /* endianness test */
102 x = buf[0] | (buf[1] << 8);
104 x = buf[1] | (buf[0] << 8);
111 #define gzip_flags "-cd"
112 #define lrzip_flags "-do"
113 #define lzip_flags gzip_flags
115 static const char *gzip_args[] = {
116 "gzip", gzip_flags, NULL
118 static const char *uncompress_args[] = {
119 "uncompress", "-c", NULL
121 static const char *bzip2_args[] = {
124 static const char *lzip_args[] = {
125 "lzip", lzip_flags, NULL
127 static const char *xz_args[] = {
130 static const char *lrzip_args[] = {
131 "lrzip", lrzip_flags, NULL
133 static const char *lz4_args[] = {
136 static const char *zstd_args[] = {
140 private const struct {
145 { "\037\235", 2, gzip_args }, /* compressed */
146 /* Uncompress can get stuck; so use gzip first if we have it
147 * Idea from Damien Clark, thanks! */
148 { "\037\235", 2, uncompress_args }, /* compressed */
149 { "\037\213", 2, gzip_args }, /* gzipped */
150 { "\037\236", 2, gzip_args }, /* frozen */
151 { "\037\240", 2, gzip_args }, /* SCO LZH */
152 /* the standard pack utilities do not accept standard input */
153 { "\037\036", 2, gzip_args }, /* packed */
154 { "PK\3\4", 4, gzip_args }, /* pkzipped, */
155 /* ...only first file examined */
156 { "BZh", 3, bzip2_args }, /* bzip2-ed */
157 { "LZIP", 4, lzip_args }, /* lzip-ed */
158 { "\3757zXZ\0", 6, xz_args }, /* XZ Utils */
159 { "LRZI", 4, lrzip_args }, /* LRZIP */
160 { "\004\"M\030",4, lz4_args }, /* LZ4 */
161 { "\x28\xB5\x2F\xFD", 4, zstd_args }, /* zstd */
163 { RCAST(const void *, zlibcmp), 0, zlib_args }, /* zlib */
171 private ssize_t swrite(int, const void *, size_t);
173 private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
174 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
175 unsigned char **, size_t *);
176 #ifdef BUILTIN_DECOMPRESS
177 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
179 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
182 static int makeerror(unsigned char **, size_t *, const char *, ...)
183 __attribute__((__format__(__printf__, 3, 4)));
184 private const char *methodname(size_t);
187 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
190 int mime = ms->flags & MAGIC_MIME;
193 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
195 for (p = buf; *p; p++)
199 return file_printf(ms, "application/x-decompression-error-%s-%s",
204 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
206 unsigned char *newbuf = NULL;
210 int urv, prv, rv = 0;
211 int mime = ms->flags & MAGIC_MIME;
213 const unsigned char *buf = b->fbuf;
214 size_t nbytes = b->flen;
219 if ((ms->flags & MAGIC_COMPRESS) == 0)
223 osigpipe = signal(SIGPIPE, SIG_IGN);
225 for (i = 0; i < ncompr; i++) {
227 if (nbytes < compr[i].maglen)
230 if (compr[i].maglen == 0)
231 zm = (RCAST(int (*)(const unsigned char *),
232 CCAST(void *, compr[i].magic)))(buf);
235 zm = memcmp(buf, compr[i].magic, compr[i].maglen) == 0;
240 urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
241 DPRINTF("uncompressbuf = %d, %s, %zu\n", urv, (char *)newbuf,
246 ms->flags &= ~MAGIC_COMPRESS;
248 prv = format_decompression_error(ms, i, newbuf);
250 prv = file_buffer(ms, -1, name, newbuf, nsz);
254 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
256 if (mime != MAGIC_MIME && mime != 0)
259 mime ? " compressed-encoding=" : " (")) == -1)
261 if ((pb = file_push_buffer(ms)) == NULL)
264 * XXX: If file_buffer fails here, we overwrite
265 * the compressed text. FIXME.
267 if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
269 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
270 if (file_printf(ms, "%s", rbuf) == -1) {
276 if (!mime && file_printf(ms, ")") == -1)
290 DPRINTF("rv = %d\n", rv);
293 (void)signal(SIGPIPE, osigpipe);
296 ms->flags |= MAGIC_COMPRESS;
297 DPRINTF("Zmagic returns %d\n", rv);
302 * `safe' write for sockets and pipes.
305 swrite(int fd, const void *buf, size_t n)
311 switch (rv = write(fd, buf, n)) {
318 buf = CAST(const char *, buf) + rv;
327 * `safe' read for sockets and pipes.
330 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
338 if (fd == STDIN_FILENO)
342 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
345 for (cnt = 0;; cnt++) {
347 struct timeval tout = {0, 100 * 1000};
354 * Avoid soft deadlock: do not read if there
355 * is nothing to read from sockets and pipes.
357 selrv = select(fd + 1, &check, NULL, NULL, &tout);
359 if (errno == EINTR || errno == EAGAIN)
361 } else if (selrv == 0 && cnt >= 5) {
367 (void)ioctl(fd, FIONREAD, &t);
370 if (t > 0 && (size_t)t < n) {
378 switch ((rv = read(fd, buf, n))) {
387 buf = CAST(char *, CCAST(void *, buf)) + rv;
395 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
402 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
405 char *ptr = mktemp(buf);
406 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
421 file_error(ms, errno,
422 "cannot create temporary file for pipe copy");
426 if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
429 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
430 if (swrite(tfd, buf, (size_t)r) != r)
436 file_error(ms, errno, "error copying from pipe to temp file");
441 file_error(ms, errno, "error while writing to temp file");
446 * We duplicate the file descriptor, because fclose on a
447 * tmpfile will delete the file, but any open descriptors
448 * can still access the phantom inode.
450 if ((fd = dup2(tfd, fd)) == -1) {
451 file_error(ms, errno, "could not dup descriptor for temp file");
455 if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
462 #ifdef BUILTIN_DECOMPRESS
464 #define FHCRC (1 << 1)
465 #define FEXTRA (1 << 2)
466 #define FNAME (1 << 3)
467 #define FCOMMENT (1 << 4)
471 uncompressgzipped(const unsigned char *old, unsigned char **newch,
472 size_t bytes_max, size_t *n)
474 unsigned char flg = old[3];
475 size_t data_start = 10;
478 if (data_start + 1 >= *n)
480 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
483 while(data_start < *n && old[data_start])
487 if (flg & FCOMMENT) {
488 while(data_start < *n && old[data_start])
495 if (data_start >= *n)
500 return uncompresszlib(old, newch, bytes_max, n, 0);
502 return makeerror(newch, n, "File too short");
506 uncompresszlib(const unsigned char *old, unsigned char **newch,
507 size_t bytes_max, size_t *n, int zlib)
512 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
513 return makeerror(newch, n, "No buffer, %s", strerror(errno));
515 z.next_in = CCAST(Bytef *, old);
516 z.avail_in = CAST(uint32_t, *n);
518 z.avail_out = CAST(unsigned int, bytes_max);
523 /* LINTED bug in header macro */
524 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
528 rc = inflate(&z, Z_SYNC_FLUSH);
529 if (rc != Z_OK && rc != Z_STREAM_END)
532 *n = (size_t)z.total_out;
537 /* let's keep the nul-terminate tradition */
542 strlcpy((char *)*newch, z.msg ? z.msg : zError(rc), bytes_max);
543 *n = strlen((char *)*newch);
549 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
556 rv = vasprintf(&msg, fmt, ap);
563 *buf = (unsigned char *)msg;
569 closefd(int *fd, size_t i)
581 for (i = 0; i < 2; i++)
586 copydesc(int i, int *fd)
588 int j = fd[i == STDIN_FILENO ? 0 : 1];
591 if (dup2(j, i) == -1) {
592 DPRINTF("dup(%d, %d) failed (%s)\n", j, i, strerror(errno));
599 writechild(int fdp[3][2], const void *old, size_t n)
603 closefd(fdp[STDIN_FILENO], 0);
605 * fork again, to avoid blocking because both
610 closefd(fdp[STDOUT_FILENO], 0);
611 if (swrite(fdp[STDIN_FILENO][1], old, n) != (ssize_t)n) {
612 DPRINTF("Write failed (%s)\n", strerror(errno));
619 DPRINTF("Fork failed (%s)\n", strerror(errno));
623 default: /* parent */
624 if (wait(&status) == -1) {
625 DPRINTF("Wait failed (%s)\n", strerror(errno));
628 DPRINTF("Grandchild wait return %#x\n", status);
630 closefd(fdp[STDIN_FILENO], 1);
634 filter_error(unsigned char *ubuf, ssize_t n)
641 while (isspace((unsigned char)*buf))
643 DPRINTF("Filter error[[[%s]]]\n", buf);
644 if ((p = strchr((char *)buf, '\n')) != NULL)
646 if ((p = strchr((char *)buf, ';')) != NULL)
648 if ((p = strrchr((char *)buf, ':')) != NULL) {
650 while (isspace((unsigned char)*p))
653 memmove(ubuf, p, CAST(size_t, n + 1));
655 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
657 *ubuf = toupper(*ubuf);
662 methodname(size_t method)
664 #ifdef BUILTIN_DECOMPRESS
665 /* FIXME: This doesn't cope with bzip2 */
666 if (method == 2 || compr[method].maglen == 0)
669 return compr[method].argv[0];
673 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
674 unsigned char **newch, size_t* n)
681 #ifdef BUILTIN_DECOMPRESS
682 /* FIXME: This doesn't cope with bzip2 */
684 return uncompressgzipped(old, newch, bytes_max, n);
685 if (compr[method].maglen == 0)
686 return uncompresszlib(old, newch, bytes_max, n, 1);
688 (void)fflush(stdout);
689 (void)fflush(stderr);
691 for (i = 0; i < __arraycount(fdp); i++)
692 fdp[i][0] = fdp[i][1] = -1;
694 if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
695 pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
696 closep(fdp[STDIN_FILENO]);
697 closep(fdp[STDOUT_FILENO]);
698 return makeerror(newch, n, "Cannot create pipe, %s",
704 fdp[STDIN_FILENO][0] = fd;
705 (void) lseek(fd, (off_t)0, SEEK_SET);
708 for (i = 0; i < __arraycount(fdp); i++)
709 copydesc(CAST(int, i), fdp[i]);
711 (void)execvp(compr[method].argv[0],
712 (char *const *)(intptr_t)compr[method].argv);
713 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
714 compr[method].argv[0], strerror(errno));
718 return makeerror(newch, n, "Cannot fork, %s",
721 default: /* parent */
722 for (i = 1; i < __arraycount(fdp); i++)
725 /* Write the buffer data to the child, if we don't have fd */
727 writechild(fdp, old, *n);
729 *newch = CAST(unsigned char *, malloc(bytes_max + 1));
730 if (*newch == NULL) {
731 rv = makeerror(newch, n, "No buffer, %s",
736 if ((r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0)) > 0)
738 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
739 r != -1 ? strerror(errno) : "no data");
743 (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
745 r = filter_error(*newch, r);
750 rv = makeerror(newch, n, "Read failed, %s",
753 rv = makeerror(newch, n, "No data");
758 /* NUL terminate, as every buffer is handled here. */
761 closefd(fdp[STDIN_FILENO], 1);
762 closefd(fdp[STDOUT_FILENO], 0);
763 closefd(fdp[STDERR_FILENO], 0);
764 if (wait(&status) == -1) {
766 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
767 DPRINTF("Child wait return %#x\n", status);
768 } else if (!WIFEXITED(status)) {
769 DPRINTF("Child not exited (%#x)\n", status);
770 } else if (WEXITSTATUS(status) != 0) {
771 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
774 closefd(fdp[STDIN_FILENO], 0);
775 DPRINTF("Returning %p n=%zu rv=%d\n", *newch, *n, rv);