2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
38 FILE_RCSID("@(#)$File: compress.c,v 1.127 2020/05/31 00:11:06 christos Exp $")
52 typedef void (*sig_t)(int);
53 #endif /* HAVE_SIG_T */
54 #if !defined(__MINGW32__) && !defined(WIN32) && !defined(__MINGW64__)
55 #include <sys/ioctl.h>
57 #ifdef HAVE_SYS_WAIT_H
60 #if defined(HAVE_SYS_TIME_H)
64 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
65 #define BUILTIN_DECOMPRESS
69 #if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
74 #if defined(HAVE_XZLIB_H) && defined(XZLIBSUPPORT)
81 #define DPRINTF(...) do { \
83 tty = open("/dev/tty", O_RDWR); \
86 dprintf(tty, __VA_ARGS__); \
87 } while (/*CONSTCOND*/0)
94 * The following python code is not really used because ZLIBSUPPORT is only
95 * defined if we have a built-in zlib, and the built-in zlib handles that.
96 * That is not true for android where we have zlib.h and not -lz.
98 static const char zlibcode[] =
99 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
101 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
104 zlibcmp(const unsigned char *buf)
106 unsigned short x = 1;
107 unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
109 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
111 if (s[0] != 1) /* endianness test */
112 x = buf[0] | (buf[1] << 8);
114 x = buf[1] | (buf[0] << 8);
122 lzmacmp(const unsigned char *buf)
124 if (buf[0] != 0x5d || buf[1] || buf[2])
126 if (buf[12] && buf[12] != 0xff)
131 #define gzip_flags "-cd"
132 #define lrzip_flags "-do"
133 #define lzip_flags gzip_flags
135 static const char *gzip_args[] = {
136 "gzip", gzip_flags, NULL
138 static const char *uncompress_args[] = {
139 "uncompress", "-c", NULL
141 static const char *bzip2_args[] = {
144 static const char *lzip_args[] = {
145 "lzip", lzip_flags, NULL
147 static const char *xz_args[] = {
150 static const char *lrzip_args[] = {
151 "lrzip", lrzip_flags, NULL
153 static const char *lz4_args[] = {
156 static const char *zstd_args[] = {
161 #define do_bzlib NULL
163 private const struct {
166 int (*func)(const unsigned char *);
172 #define METH_FROZEN 2
177 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */
178 /* Uncompress can get stuck; so use gzip first if we have it
179 * Idea from Damien Clark, thanks! */
180 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */
181 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */
182 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */
183 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */
184 /* the standard pack utilities do not accept standard input */
185 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */
186 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */
187 /* ...only first file examined */
188 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
189 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */
190 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */
191 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */
192 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */
193 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
194 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */
196 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */
204 private ssize_t swrite(int, const void *, size_t);
206 private size_t ncompr = __arraycount(compr);
207 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
208 unsigned char **, size_t *);
209 #ifdef BUILTIN_DECOMPRESS
210 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
212 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
216 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
220 private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
224 static int makeerror(unsigned char **, size_t *, const char *, ...)
225 __attribute__((__format__(__printf__, 3, 4)));
226 private const char *methodname(size_t);
229 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
232 int mime = ms->flags & MAGIC_MIME;
235 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
237 for (p = buf; *p; p++)
241 return file_printf(ms, "application/x-decompression-error-%s-%s",
246 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
248 unsigned char *newbuf = NULL;
252 int urv, prv, rv = 0;
253 int mime = ms->flags & MAGIC_MIME;
255 const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
256 size_t nbytes = b->flen;
258 struct sigaction sig_act;
260 if ((ms->flags & MAGIC_COMPRESS) == 0)
263 for (i = 0; i < ncompr; i++) {
265 if (nbytes < CAST(size_t, abs(compr[i].maglen)))
267 if (compr[i].maglen < 0) {
268 zm = (*compr[i].u.func)(buf);
270 zm = memcmp(buf, compr[i].u.magic,
271 CAST(size_t, compr[i].maglen)) == 0;
277 /* Prevent SIGPIPE death if child dies unexpectedly */
279 //We can use sig_act for both new and old, but
280 struct sigaction new_act;
281 memset(&new_act, 0, sizeof(new_act));
282 new_act.sa_handler = SIG_IGN;
283 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
287 urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
288 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
289 (char *)newbuf, nsz);
293 ms->flags &= ~MAGIC_COMPRESS;
295 prv = format_decompression_error(ms, i, newbuf);
297 prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
301 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
303 if (mime != MAGIC_MIME && mime != 0)
306 mime ? " compressed-encoding=" : " (")) == -1)
308 if ((pb = file_push_buffer(ms)) == NULL)
311 * XXX: If file_buffer fails here, we overwrite
312 * the compressed text. FIXME.
314 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
315 if (file_pop_buffer(ms, pb) != NULL)
319 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
320 if (file_printf(ms, "%s", rbuf) == -1) {
326 if (!mime && file_printf(ms, ")") == -1)
340 DPRINTF("rv = %d\n", rv);
342 if (sa_saved && sig_act.sa_handler != SIG_IGN)
343 (void)sigaction(SIGPIPE, &sig_act, NULL);
346 ms->flags |= MAGIC_COMPRESS;
347 DPRINTF("Zmagic returns %d\n", rv);
352 * `safe' write for sockets and pipes.
355 swrite(int fd, const void *buf, size_t n)
361 switch (rv = write(fd, buf, n)) {
368 buf = CAST(const char *, buf) + rv;
377 * `safe' read for sockets and pipes.
380 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
388 if (fd == STDIN_FILENO)
392 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
395 for (cnt = 0;; cnt++) {
397 struct timeval tout = {0, 100 * 1000};
404 * Avoid soft deadlock: do not read if there
405 * is nothing to read from sockets and pipes.
407 selrv = select(fd + 1, &check, NULL, NULL, &tout);
409 if (errno == EINTR || errno == EAGAIN)
411 } else if (selrv == 0 && cnt >= 5) {
417 (void)ioctl(fd, FIONREAD, &t);
420 if (t > 0 && CAST(size_t, t) < n) {
428 switch ((rv = read(fd, buf, n))) {
437 buf = CAST(char *, CCAST(void *, buf)) + rv;
445 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
452 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
455 char *ptr = mktemp(buf);
456 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
464 mode_t ou = umask(0);
473 file_error(ms, errno,
474 "cannot create temporary file for pipe copy");
478 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
481 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
482 if (swrite(tfd, buf, CAST(size_t, r)) != r)
488 file_error(ms, errno, "error copying from pipe to temp file");
493 file_error(ms, errno, "error while writing to temp file");
498 * We duplicate the file descriptor, because fclose on a
499 * tmpfile will delete the file, but any open descriptors
500 * can still access the phantom inode.
502 if ((fd = dup2(tfd, fd)) == -1) {
503 file_error(ms, errno, "could not dup descriptor for temp file");
507 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
514 #ifdef BUILTIN_DECOMPRESS
516 #define FHCRC (1 << 1)
517 #define FEXTRA (1 << 2)
518 #define FNAME (1 << 3)
519 #define FCOMMENT (1 << 4)
523 uncompressgzipped(const unsigned char *old, unsigned char **newch,
524 size_t bytes_max, size_t *n)
526 unsigned char flg = old[3];
527 size_t data_start = 10;
530 if (data_start + 1 >= *n)
532 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
535 while(data_start < *n && old[data_start])
539 if (flg & FCOMMENT) {
540 while(data_start < *n && old[data_start])
547 if (data_start >= *n)
552 return uncompresszlib(old, newch, bytes_max, n, 0);
554 return makeerror(newch, n, "File too short");
558 uncompresszlib(const unsigned char *old, unsigned char **newch,
559 size_t bytes_max, size_t *n, int zlib)
564 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
565 return makeerror(newch, n, "No buffer, %s", strerror(errno));
567 z.next_in = CCAST(Bytef *, old);
568 z.avail_in = CAST(uint32_t, *n);
570 z.avail_out = CAST(unsigned int, bytes_max);
575 /* LINTED bug in header macro */
576 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
580 rc = inflate(&z, Z_SYNC_FLUSH);
581 if (rc != Z_OK && rc != Z_STREAM_END)
584 *n = CAST(size_t, z.total_out);
589 /* let's keep the nul-terminate tradition */
594 strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
595 *n = strlen(RCAST(char *, *newch));
602 uncompressbzlib(const unsigned char *old, unsigned char **newch,
603 size_t bytes_max, size_t *n)
608 memset(&bz, 0, sizeof(bz));
609 rc = BZ2_bzDecompressInit(&bz, 0, 0);
613 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
614 return makeerror(newch, n, "No buffer, %s", strerror(errno));
616 bz.next_in = CCAST(char *, RCAST(const char *, old));
617 bz.avail_in = CAST(uint32_t, *n);
618 bz.next_out = RCAST(char *, *newch);
619 bz.avail_out = CAST(unsigned int, bytes_max);
621 rc = BZ2_bzDecompress(&bz);
622 if (rc != BZ_OK && rc != BZ_STREAM_END)
625 /* Assume byte_max is within 32bit */
626 /* assert(bz.total_out_hi32 == 0); */
627 *n = CAST(size_t, bz.total_out_lo32);
628 rc = BZ2_bzDecompressEnd(&bz);
632 /* let's keep the nul-terminate tradition */
637 snprintf(RCAST(char *, *newch), bytes_max, "bunzip error %d", rc);
638 *n = strlen(RCAST(char *, *newch));
645 uncompressxzlib(const unsigned char *old, unsigned char **newch,
646 size_t bytes_max, size_t *n)
651 memset(&xz, 0, sizeof(xz));
652 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
656 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
657 return makeerror(newch, n, "No buffer, %s", strerror(errno));
659 xz.next_in = CCAST(const uint8_t *, old);
660 xz.avail_in = CAST(uint32_t, *n);
661 xz.next_out = RCAST(uint8_t *, *newch);
662 xz.avail_out = CAST(unsigned int, bytes_max);
664 rc = lzma_code(&xz, LZMA_RUN);
665 if (rc != LZMA_OK && rc != LZMA_STREAM_END)
668 *n = CAST(size_t, xz.total_out);
672 /* let's keep the nul-terminate tradition */
677 snprintf(RCAST(char *, *newch), bytes_max, "unxz error %d", rc);
678 *n = strlen(RCAST(char *, *newch));
685 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
692 rv = vasprintf(&msg, fmt, ap);
699 *buf = RCAST(unsigned char *, msg);
705 closefd(int *fd, size_t i)
717 for (i = 0; i < 2; i++)
722 copydesc(int i, int fd)
725 return 0; /* "no dup was necessary" */
726 if (dup2(fd, i) == -1) {
727 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
734 writechild(int fd, const void *old, size_t n)
739 * fork again, to avoid blocking because both
744 DPRINTF("Fork failed (%s)\n", strerror(errno));
749 if (swrite(fd, old, n) != CAST(ssize_t, n)) {
750 DPRINTF("Write failed (%s)\n", strerror(errno));
760 filter_error(unsigned char *ubuf, ssize_t n)
766 buf = RCAST(char *, ubuf);
767 while (isspace(CAST(unsigned char, *buf)))
769 DPRINTF("Filter error[[[%s]]]\n", buf);
770 if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
772 if ((p = strchr(CAST(char *, buf), ';')) != NULL)
774 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
776 while (isspace(CAST(unsigned char, *p)))
779 memmove(ubuf, p, CAST(size_t, n + 1));
781 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
783 *ubuf = toupper(*ubuf);
788 methodname(size_t method)
791 #ifdef BUILTIN_DECOMPRESS
806 return compr[method].argv[0];
811 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
812 unsigned char **newch, size_t* n)
822 #ifdef BUILTIN_DECOMPRESS
824 return uncompressgzipped(old, newch, bytes_max, n);
826 return uncompresszlib(old, newch, bytes_max, n, 1);
830 return uncompressbzlib(old, newch, bytes_max, n);
835 return uncompressxzlib(old, newch, bytes_max, n);
841 (void)fflush(stdout);
842 (void)fflush(stderr);
844 for (i = 0; i < __arraycount(fdp); i++)
845 fdp[i][0] = fdp[i][1] = -1;
847 if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
848 pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
849 closep(fdp[STDIN_FILENO]);
850 closep(fdp[STDOUT_FILENO]);
851 return makeerror(newch, n, "Cannot create pipe, %s",
855 /* For processes with large mapped virtual sizes, vfork
856 * may be _much_ faster (10-100 times) than fork.
860 return makeerror(newch, n, "Cannot vfork, %s",
865 /* Note: we are after vfork, do not modify memory
866 * in a way which confuses parent. In particular,
867 * do not modify fdp[i][j].
870 (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
871 if (copydesc(STDIN_FILENO, fd))
874 if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
875 (void) close(fdp[STDIN_FILENO][0]);
876 if (fdp[STDIN_FILENO][1] > 2)
877 (void) close(fdp[STDIN_FILENO][1]);
879 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
880 if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
881 (void) close(fdp[STDOUT_FILENO][1]);
882 if (fdp[STDOUT_FILENO][0] > 2)
883 (void) close(fdp[STDOUT_FILENO][0]);
885 if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
886 (void) close(fdp[STDERR_FILENO][1]);
887 if (fdp[STDERR_FILENO][0] > 2)
888 (void) close(fdp[STDERR_FILENO][0]);
890 (void)execvp(compr[method].argv[0],
891 RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
892 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
893 compr[method].argv[0], strerror(errno));
894 _exit(1); /* _exit(), not exit(), because of vfork */
897 /* Close write sides of child stdout/err pipes */
898 for (i = 1; i < __arraycount(fdp); i++)
900 /* Write the buffer data to child stdin, if we don't have fd */
902 closefd(fdp[STDIN_FILENO], 0);
903 writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
904 closefd(fdp[STDIN_FILENO], 1);
907 *newch = CAST(unsigned char *, malloc(bytes_max + 1));
908 if (*newch == NULL) {
909 rv = makeerror(newch, n, "No buffer, %s",
914 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
916 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
917 r != -1 ? strerror(errno) : "no data");
921 (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
923 r = filter_error(*newch, r);
928 rv = makeerror(newch, n, "Read failed, %s",
931 rv = makeerror(newch, n, "No data");
936 /* NUL terminate, as every buffer is handled here. */
939 closefd(fdp[STDIN_FILENO], 1);
940 closefd(fdp[STDOUT_FILENO], 0);
941 closefd(fdp[STDERR_FILENO], 0);
943 w = waitpid(pid, &status, 0);
947 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
948 DPRINTF("Child wait return %#x\n", status);
949 } else if (!WIFEXITED(status)) {
950 DPRINTF("Child not exited (%#x)\n", status);
951 } else if (WEXITSTATUS(status) != 0) {
952 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
955 /* _After_ we know decompressor has exited, our input writer
956 * definitely will exit now (at worst, writing fails in it,
957 * since output fd is closed now on the reading size).
959 w = waitpid(writepid, &status, 0);
964 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
965 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);