2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice immediately at the beginning of the file, without modification,
11 * this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * zmagic() - returns 0 if not recognized, uncompresses and prints
31 * information if recognized
32 * uncompress(method, old, n, newch) - uncompress old into new,
33 * using method, return sizeof new
38 FILE_RCSID("@(#)$File: compress.c,v 1.121 2019/05/07 02:27:11 christos Exp $")
52 typedef void (*sig_t)(int);
53 #endif /* HAVE_SIG_T */
54 #if !defined(__MINGW32__) && !defined(WIN32)
55 #include <sys/ioctl.h>
57 #ifdef HAVE_SYS_WAIT_H
60 #if defined(HAVE_SYS_TIME_H)
64 #if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
65 #define BUILTIN_DECOMPRESS
69 #if defined(HAVE_BZLIB_H)
76 #define DPRINTF(...) do { \
78 tty = open("/dev/tty", O_RDWR); \
81 dprintf(tty, __VA_ARGS__); \
82 } while (/*CONSTCOND*/0)
89 * The following python code is not really used because ZLIBSUPPORT is only
90 * defined if we have a built-in zlib, and the built-in zlib handles that.
91 * That is not true for android where we have zlib.h and not -lz.
93 static const char zlibcode[] =
94 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
96 static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
99 zlibcmp(const unsigned char *buf)
101 unsigned short x = 1;
102 unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
104 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
106 if (s[0] != 1) /* endianness test */
107 x = buf[0] | (buf[1] << 8);
109 x = buf[1] | (buf[0] << 8);
116 #define gzip_flags "-cd"
117 #define lrzip_flags "-do"
118 #define lzip_flags gzip_flags
120 static const char *gzip_args[] = {
121 "gzip", gzip_flags, NULL
123 static const char *uncompress_args[] = {
124 "uncompress", "-c", NULL
126 static const char *bzip2_args[] = {
129 static const char *lzip_args[] = {
130 "lzip", lzip_flags, NULL
132 static const char *xz_args[] = {
135 static const char *lrzip_args[] = {
136 "lrzip", lrzip_flags, NULL
138 static const char *lz4_args[] = {
141 static const char *zstd_args[] = {
146 #define do_bzlib NULL
148 private const struct {
154 { "\037\235", 2, gzip_args, NULL }, /* compressed */
155 /* Uncompress can get stuck; so use gzip first if we have it
156 * Idea from Damien Clark, thanks! */
157 { "\037\235", 2, uncompress_args, NULL }, /* compressed */
158 { "\037\213", 2, gzip_args, do_zlib }, /* gzipped */
159 { "\037\236", 2, gzip_args, NULL }, /* frozen */
160 { "\037\240", 2, gzip_args, NULL }, /* SCO LZH */
161 /* the standard pack utilities do not accept standard input */
162 { "\037\036", 2, gzip_args, NULL }, /* packed */
163 { "PK\3\4", 4, gzip_args, NULL }, /* pkzipped, */
164 /* ...only first file examined */
165 { "BZh", 3, bzip2_args, do_bzlib }, /* bzip2-ed */
166 { "LZIP", 4, lzip_args, NULL }, /* lzip-ed */
167 { "\3757zXZ\0", 6, xz_args, NULL }, /* XZ Utils */
168 { "LRZI", 4, lrzip_args, NULL }, /* LRZIP */
169 { "\004\"M\030",4, lz4_args, NULL }, /* LZ4 */
170 { "\x28\xB5\x2F\xFD", 4, zstd_args, NULL }, /* zstd */
172 { RCAST(const void *, zlibcmp), 0, zlib_args, NULL }, /* zlib */
180 private ssize_t swrite(int, const void *, size_t);
182 private size_t ncompr = __arraycount(compr);
183 private int uncompressbuf(int, size_t, size_t, const unsigned char *,
184 unsigned char **, size_t *);
185 #ifdef BUILTIN_DECOMPRESS
186 private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
188 private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
192 private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
196 static int makeerror(unsigned char **, size_t *, const char *, ...)
197 __attribute__((__format__(__printf__, 3, 4)));
198 private const char *methodname(size_t);
201 format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
204 int mime = ms->flags & MAGIC_MIME;
207 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
209 for (p = buf; *p; p++)
213 return file_printf(ms, "application/x-decompression-error-%s-%s",
218 file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
220 unsigned char *newbuf = NULL;
224 int urv, prv, rv = 0;
225 int mime = ms->flags & MAGIC_MIME;
227 const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
228 size_t nbytes = b->flen;
230 struct sigaction sig_act;
232 if ((ms->flags & MAGIC_COMPRESS) == 0)
235 for (i = 0; i < ncompr; i++) {
237 if (nbytes < compr[i].maglen)
240 if (compr[i].maglen == 0)
241 zm = (RCAST(int (*)(const unsigned char *),
242 CCAST(void *, compr[i].magic)))(buf);
245 zm = memcmp(buf, compr[i].magic, compr[i].maglen) == 0;
250 /* Prevent SIGPIPE death if child dies unexpectedly */
252 //We can use sig_act for both new and old, but
253 struct sigaction new_act;
254 memset(&new_act, 0, sizeof(new_act));
255 new_act.sa_handler = SIG_IGN;
256 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
260 urv = uncompressbuf(fd, ms->bytes_max, i, buf, &newbuf, &nsz);
261 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
262 (char *)newbuf, nsz);
266 ms->flags &= ~MAGIC_COMPRESS;
268 prv = format_decompression_error(ms, i, newbuf);
270 prv = file_buffer(ms, -1, NULL, name, newbuf, nsz);
274 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
276 if (mime != MAGIC_MIME && mime != 0)
279 mime ? " compressed-encoding=" : " (")) == -1)
281 if ((pb = file_push_buffer(ms)) == NULL)
284 * XXX: If file_buffer fails here, we overwrite
285 * the compressed text. FIXME.
287 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) {
288 if (file_pop_buffer(ms, pb) != NULL)
292 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
293 if (file_printf(ms, "%s", rbuf) == -1) {
299 if (!mime && file_printf(ms, ")") == -1)
313 DPRINTF("rv = %d\n", rv);
315 if (sa_saved && sig_act.sa_handler != SIG_IGN)
316 (void)sigaction(SIGPIPE, &sig_act, NULL);
319 ms->flags |= MAGIC_COMPRESS;
320 DPRINTF("Zmagic returns %d\n", rv);
325 * `safe' write for sockets and pipes.
328 swrite(int fd, const void *buf, size_t n)
334 switch (rv = write(fd, buf, n)) {
341 buf = CAST(const char *, buf) + rv;
350 * `safe' read for sockets and pipes.
353 sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
361 if (fd == STDIN_FILENO)
365 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
368 for (cnt = 0;; cnt++) {
370 struct timeval tout = {0, 100 * 1000};
377 * Avoid soft deadlock: do not read if there
378 * is nothing to read from sockets and pipes.
380 selrv = select(fd + 1, &check, NULL, NULL, &tout);
382 if (errno == EINTR || errno == EAGAIN)
384 } else if (selrv == 0 && cnt >= 5) {
390 (void)ioctl(fd, FIONREAD, &t);
393 if (t > 0 && CAST(size_t, t) < n) {
401 switch ((rv = read(fd, buf, n))) {
410 buf = CAST(char *, CCAST(void *, buf)) + rv;
418 file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
425 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
428 char *ptr = mktemp(buf);
429 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
437 mode_t ou = umask(0);
446 file_error(ms, errno,
447 "cannot create temporary file for pipe copy");
451 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
454 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
455 if (swrite(tfd, buf, CAST(size_t, r)) != r)
461 file_error(ms, errno, "error copying from pipe to temp file");
466 file_error(ms, errno, "error while writing to temp file");
471 * We duplicate the file descriptor, because fclose on a
472 * tmpfile will delete the file, but any open descriptors
473 * can still access the phantom inode.
475 if ((fd = dup2(tfd, fd)) == -1) {
476 file_error(ms, errno, "could not dup descriptor for temp file");
480 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
487 #ifdef BUILTIN_DECOMPRESS
489 #define FHCRC (1 << 1)
490 #define FEXTRA (1 << 2)
491 #define FNAME (1 << 3)
492 #define FCOMMENT (1 << 4)
496 uncompressgzipped(const unsigned char *old, unsigned char **newch,
497 size_t bytes_max, size_t *n)
499 unsigned char flg = old[3];
500 size_t data_start = 10;
503 if (data_start + 1 >= *n)
505 data_start += 2 + old[data_start] + old[data_start + 1] * 256;
508 while(data_start < *n && old[data_start])
512 if (flg & FCOMMENT) {
513 while(data_start < *n && old[data_start])
520 if (data_start >= *n)
525 return uncompresszlib(old, newch, bytes_max, n, 0);
527 return makeerror(newch, n, "File too short");
531 uncompresszlib(const unsigned char *old, unsigned char **newch,
532 size_t bytes_max, size_t *n, int zlib)
537 if ((*newch = CAST(unsigned char *, malloc(bytes_max + 1))) == NULL)
538 return makeerror(newch, n, "No buffer, %s", strerror(errno));
540 z.next_in = CCAST(Bytef *, old);
541 z.avail_in = CAST(uint32_t, *n);
543 z.avail_out = CAST(unsigned int, bytes_max);
548 /* LINTED bug in header macro */
549 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
553 rc = inflate(&z, Z_SYNC_FLUSH);
554 if (rc != Z_OK && rc != Z_STREAM_END)
557 *n = CAST(size_t, z.total_out);
562 /* let's keep the nul-terminate tradition */
567 strlcpy(RCAST(char *, *newch), z.msg ? z.msg : zError(rc), bytes_max);
568 *n = strlen(RCAST(char *, *newch));
574 makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
581 rv = vasprintf(&msg, fmt, ap);
588 *buf = RCAST(unsigned char *, msg);
594 closefd(int *fd, size_t i)
606 for (i = 0; i < 2; i++)
611 copydesc(int i, int fd)
614 return 0; /* "no dup was necessary" */
615 if (dup2(fd, i) == -1) {
616 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
623 writechild(int fd, const void *old, size_t n)
628 * fork again, to avoid blocking because both
633 DPRINTF("Fork failed (%s)\n", strerror(errno));
638 if (swrite(fd, old, n) != CAST(ssize_t, n)) {
639 DPRINTF("Write failed (%s)\n", strerror(errno));
649 filter_error(unsigned char *ubuf, ssize_t n)
655 buf = RCAST(char *, ubuf);
656 while (isspace(CAST(unsigned char, *buf)))
658 DPRINTF("Filter error[[[%s]]]\n", buf);
659 if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
661 if ((p = strchr(CAST(char *, buf), ';')) != NULL)
663 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
665 while (isspace(CAST(unsigned char, *p)))
668 memmove(ubuf, p, CAST(size_t, n + 1));
670 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
672 *ubuf = toupper(*ubuf);
677 methodname(size_t method)
679 #ifdef BUILTIN_DECOMPRESS
680 /* FIXME: This doesn't cope with bzip2 */
681 if (method == 2 || compr[method].maglen == 0)
684 return compr[method].argv[0];
688 uncompressbuf(int fd, size_t bytes_max, size_t method, const unsigned char *old,
689 unsigned char **newch, size_t* n)
698 #ifdef BUILTIN_DECOMPRESS
699 /* FIXME: This doesn't cope with bzip2 */
701 return uncompressgzipped(old, newch, bytes_max, n);
702 if (compr[method].maglen == 0)
703 return uncompresszlib(old, newch, bytes_max, n, 1);
705 (void)fflush(stdout);
706 (void)fflush(stderr);
708 for (i = 0; i < __arraycount(fdp); i++)
709 fdp[i][0] = fdp[i][1] = -1;
711 if ((fd == -1 && pipe(fdp[STDIN_FILENO]) == -1) ||
712 pipe(fdp[STDOUT_FILENO]) == -1 || pipe(fdp[STDERR_FILENO]) == -1) {
713 closep(fdp[STDIN_FILENO]);
714 closep(fdp[STDOUT_FILENO]);
715 return makeerror(newch, n, "Cannot create pipe, %s",
719 /* For processes with large mapped virtual sizes, vfork
720 * may be _much_ faster (10-100 times) than fork.
724 return makeerror(newch, n, "Cannot vfork, %s",
729 /* Note: we are after vfork, do not modify memory
730 * in a way which confuses parent. In particular,
731 * do not modify fdp[i][j].
734 (void) lseek(fd, CAST(off_t, 0), SEEK_SET);
735 if (copydesc(STDIN_FILENO, fd))
738 if (copydesc(STDIN_FILENO, fdp[STDIN_FILENO][0]))
739 (void) close(fdp[STDIN_FILENO][0]);
740 if (fdp[STDIN_FILENO][1] > 2)
741 (void) close(fdp[STDIN_FILENO][1]);
743 ///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
744 if (copydesc(STDOUT_FILENO, fdp[STDOUT_FILENO][1]))
745 (void) close(fdp[STDOUT_FILENO][1]);
746 if (fdp[STDOUT_FILENO][0] > 2)
747 (void) close(fdp[STDOUT_FILENO][0]);
749 if (copydesc(STDERR_FILENO, fdp[STDERR_FILENO][1]))
750 (void) close(fdp[STDERR_FILENO][1]);
751 if (fdp[STDERR_FILENO][0] > 2)
752 (void) close(fdp[STDERR_FILENO][0]);
754 (void)execvp(compr[method].argv[0],
755 RCAST(char *const *, RCAST(intptr_t, compr[method].argv)));
756 dprintf(STDERR_FILENO, "exec `%s' failed, %s",
757 compr[method].argv[0], strerror(errno));
758 _exit(1); /* _exit(), not exit(), because of vfork */
761 /* Close write sides of child stdout/err pipes */
762 for (i = 1; i < __arraycount(fdp); i++)
764 /* Write the buffer data to child stdin, if we don't have fd */
766 closefd(fdp[STDIN_FILENO], 0);
767 writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
768 closefd(fdp[STDIN_FILENO], 1);
771 *newch = CAST(unsigned char *, malloc(bytes_max + 1));
772 if (*newch == NULL) {
773 rv = makeerror(newch, n, "No buffer, %s",
778 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
780 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
781 r != -1 ? strerror(errno) : "no data");
785 (r = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0)
787 r = filter_error(*newch, r);
792 rv = makeerror(newch, n, "Read failed, %s",
795 rv = makeerror(newch, n, "No data");
800 /* NUL terminate, as every buffer is handled here. */
803 closefd(fdp[STDIN_FILENO], 1);
804 closefd(fdp[STDOUT_FILENO], 0);
805 closefd(fdp[STDERR_FILENO], 0);
807 w = waitpid(pid, &status, 0);
811 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
812 DPRINTF("Child wait return %#x\n", status);
813 } else if (!WIFEXITED(status)) {
814 DPRINTF("Child not exited (%#x)\n", status);
815 } else if (WEXITSTATUS(status) != 0) {
816 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
819 /* _After_ we know decompressor has exited, our input writer
820 * definitely will exit now (at worst, writing fails in it,
821 * since output fd is closed now on the reading size).
823 w = waitpid(writepid, &status, 0);
828 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
829 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);