2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2009, 2010 Joerg Sonnenberger <joerg@NetBSD.org>
5 * Copyright (c) 2007-2008 Dag-Erling Smørgrav
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * This file would be much shorter if we didn't care about command-line
33 * compatibility with Info-ZIP's UnZip, which requires us to duplicate
34 * parts of libarchive in order to gain more detailed control of its
35 * behaviour for the purpose of implementing the -n, -o, -L and -a
39 #include <sys/queue.h>
53 #include <archive_entry.h>
55 /* command-line options */
56 static int a_opt; /* convert EOL */
57 static int C_opt; /* match case-insensitively */
58 static int c_opt; /* extract to stdout */
59 static const char *d_arg; /* directory */
60 static int f_opt; /* update existing files only */
61 static int j_opt; /* junk directories */
62 static int L_opt; /* lowercase names */
63 static int n_opt; /* never overwrite */
64 static int o_opt; /* always overwrite */
65 static int p_opt; /* extract to stdout, quiet */
66 static int q_opt; /* quiet */
67 static int t_opt; /* test */
68 static int u_opt; /* update */
69 static int v_opt; /* verbose/list */
70 static const char *y_str = ""; /* 4 digit year */
71 static int Z1_opt; /* zipinfo mode list files only */
74 static int unzip_debug;
77 static int zipinfo_mode;
82 /* convenience macro */
83 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
87 if (acret != ARCHIVE_OK) \
88 errorx("%s", archive_error_string(a)); \
92 * Indicates that last info() did not end with EOL. This helps error() et
93 * al. avoid printing an error message on the same line as an incomplete
94 * informational message.
98 /* fatal error message + errno */
100 error(const char *fmt, ...)
105 fprintf(stdout, "\n");
107 fprintf(stderr, "unzip: ");
109 vfprintf(stderr, fmt, ap);
111 fprintf(stderr, ": %s\n", strerror(errno));
115 /* fatal error message, no errno */
117 errorx(const char *fmt, ...)
122 fprintf(stdout, "\n");
124 fprintf(stderr, "unzip: ");
126 vfprintf(stderr, fmt, ap);
128 fprintf(stderr, "\n");
132 /* non-fatal error message + errno */
134 warning(const char *fmt, ...)
139 fprintf(stdout, "\n");
141 fprintf(stderr, "unzip: ");
143 vfprintf(stderr, fmt, ap);
145 fprintf(stderr, ": %s\n", strerror(errno));
148 /* non-fatal error message, no errno */
150 warningx(const char *fmt, ...)
155 fprintf(stdout, "\n");
157 fprintf(stderr, "unzip: ");
159 vfprintf(stderr, fmt, ap);
161 fprintf(stderr, "\n");
164 /* informational message (if not -q) */
166 info(const char *fmt, ...)
170 if (q_opt && !unzip_debug)
173 vfprintf(stdout, fmt, ap);
180 noeol = fmt[strlen(fmt) - 1] != '\n';
183 /* debug message (if unzip_debug) */
185 debug(const char *fmt, ...)
192 vfprintf(stderr, fmt, ap);
199 noeol = fmt[strlen(fmt) - 1] != '\n';
202 /* duplicate a path name, possibly converting to lower case */
204 pathdup(const char *path)
210 while (len && path[len - 1] == '/')
212 if ((str = malloc(len + 1)) == NULL) {
217 for (i = 0; i < len; ++i)
218 str[i] = tolower((unsigned char)path[i]);
220 memcpy(str, path, len);
227 /* concatenate two path names */
229 pathcat(const char *prefix, const char *path)
234 prelen = prefix ? strlen(prefix) + 1 : 0;
235 len = strlen(path) + 1;
236 if ((str = malloc(prelen + len)) == NULL) {
241 memcpy(str, prefix, prelen); /* includes zero */
242 str[prelen - 1] = '/'; /* splat zero */
244 memcpy(str + prelen, path, len); /* includes zero */
250 * Pattern lists for include / exclude processing
253 STAILQ_ENTRY(pattern) link;
257 STAILQ_HEAD(pattern_list, pattern);
258 static struct pattern_list include = STAILQ_HEAD_INITIALIZER(include);
259 static struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude);
262 * Add an entry to a pattern list
265 add_pattern(struct pattern_list *list, const char *pattern)
267 struct pattern *entry;
270 debug("adding pattern '%s'\n", pattern);
271 len = strlen(pattern);
272 if ((entry = malloc(sizeof *entry + len + 1)) == NULL) {
276 memcpy(entry->pattern, pattern, len + 1);
277 STAILQ_INSERT_TAIL(list, entry, link);
281 * Match a string against a list of patterns
284 match_pattern(struct pattern_list *list, const char *str)
286 struct pattern *entry;
288 STAILQ_FOREACH(entry, list, link) {
289 if (fnmatch(entry->pattern, str, C_opt ? FNM_CASEFOLD : 0) == 0)
296 * Verify that a given pathname is in the include list and not in the
300 accept_pathname(const char *pathname)
303 if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname))
305 if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname))
311 * Create the specified directory with the specified mode, taking certain
312 * precautions on they way.
315 make_dir(const char *path, int mode)
319 if (lstat(path, &sb) == 0) {
320 if (S_ISDIR(sb.st_mode))
323 * Normally, we should either ask the user about removing
324 * the non-directory of the same name as a directory we
325 * wish to create, or respect the -n or -o command-line
326 * options. However, this may lead to a later failure or
327 * even compromise (if this non-directory happens to be a
328 * symlink to somewhere unsafe), so we don't.
332 * Don't check unlink() result; failure will cause mkdir()
333 * to fail later, which we will catch.
337 if (mkdir(path, mode) != 0 && errno != EEXIST)
338 error("mkdir('%s')", path);
342 * Ensure that all directories leading up to (but not including) the
343 * specified path exist.
345 * XXX inefficient + modifies the file in-place
348 make_parent(char *path)
353 sep = strrchr(path, '/');
354 if (sep == NULL || sep == path)
357 if (lstat(path, &sb) == 0) {
358 if (S_ISDIR(sb.st_mode)) {
369 for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) {
370 /* root in case of absolute d_arg */
374 make_dir(path, 0755);
381 * Extract a directory.
384 extract_dir(struct archive *a, struct archive_entry *e, const char *path)
388 mode = archive_entry_mode(e) & 0777;
393 * Some zipfiles contain directories with weird permissions such
394 * as 0644 or 0444. This can cause strange issues such as being
395 * unable to extract files into the directory we just created, or
396 * the user being unable to remove the directory later without
397 * first manually changing its permissions. Therefore, we whack
398 * the permissions into shape, assuming that the user wants full
399 * access and that anyone who gets read access also gets execute
408 info(" creating: %s/\n", path);
409 make_dir(path, mode);
410 ac(archive_read_data_skip(a));
413 static unsigned char buffer[8192];
414 static char spinner[] = { '|', '/', '-', '\\' };
417 handle_existing_file(char **path)
425 "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
427 if (fgets(buf, sizeof(buf), stdin) == NULL) {
429 printf("NULL\n(EOF or read error, "
430 "treating as \"[N]one\"...)\n");
449 printf("New name: ");
454 len = getdelim(path, &alen, '\n', stdin);
455 if ((*path)[len - 1] == '\n')
456 (*path)[len - 1] = '\0';
465 * Detect binary files by a combination of character white list and
466 * black list. NUL bytes and other control codes without use in text files
467 * result directly in switching the file to binary mode. Otherwise, at least
468 * one white-listed byte has to be found.
470 * Black-listed: 0..6, 14..25, 28..31
471 * 0xf3ffc07f = 11110011111111111100000001111111b
472 * White-listed: 9..10, 13, >= 32
473 * 0x00002600 = 00000000000000000010011000000000b
475 * See the proginfo/txtvsbin.txt in the zip sources for a detailed discussion.
477 #define BYTE_IS_BINARY(x) ((x) < 32 && (0xf3ffc07fU & (1U << (x))))
478 #define BYTE_IS_TEXT(x) ((x) >= 32 || (0x00002600U & (1U << (x))))
481 check_binary(const unsigned char *buf, size_t len)
484 for (rv = 1; len--; ++buf) {
485 if (BYTE_IS_BINARY(*buf))
487 if (BYTE_IS_TEXT(*buf))
495 * Extract to a file descriptor
498 extract2fd(struct archive *a, char *pathname, int fd)
502 unsigned char *p, *q, *end;
508 /* loop over file contents and write to fd */
509 for (int n = 0; ; n++) {
510 if (fd != STDOUT_FILENO)
511 if (tty && (n % 4) == 0)
512 info(" %c\b\b", spinner[(n / 4) % sizeof spinner]);
514 len = archive_read_data(a, buffer, sizeof buffer);
519 /* left over CR from previous buffer */
521 if (len == 0 || buffer[0] != '\n')
522 if (write(fd, "\r", 1) != 1)
523 error("write('%s')", pathname);
533 * Detect whether this is a text file. The correct way to
534 * do this is to check the least significant bit of the
535 * "internal file attributes" field of the corresponding
536 * file header in the central directory, but libarchive
537 * does not provide access to this field, so we have to
538 * guess by looking for non-ASCII characters in the
539 * buffer. Hopefully we won't guess wrong. If we do
540 * guess wrong, we print a warning message later.
542 if (a_opt && n == 0) {
543 if (check_binary(buffer, len))
548 if (!a_opt || !text) {
549 if (write(fd, buffer, len) != len)
550 error("write('%s')", pathname);
554 /* hard case: convert \r\n to \n (sigh...) */
555 for (p = buffer; p < end; p = q + 1) {
556 for (q = p; q < end; q++) {
557 if (!warn && BYTE_IS_BINARY(*q)) {
558 warningx("%s may be corrupted due"
559 " to weak text file detection"
560 " heuristic", pathname);
572 if (write(fd, p, q - p) != q - p)
573 error("write('%s')", pathname);
581 * Extract a regular file.
584 extract_file(struct archive *a, struct archive_entry *e, char **path)
587 struct timespec mtime;
589 struct timespec ts[2];
591 const char *linkname;
593 mode = archive_entry_mode(e) & 0777;
596 mtime.tv_sec = archive_entry_mtime(e);
597 mtime.tv_nsec = archive_entry_mtime_nsec(e);
599 /* look for existing file of same name */
601 if (lstat(*path, &sb) == 0) {
602 if (u_opt || f_opt) {
603 /* check if up-to-date */
604 if ((S_ISREG(sb.st_mode) || S_ISLNK(sb.st_mode)) &&
605 (sb.st_mtim.tv_sec > mtime.tv_sec ||
606 (sb.st_mtim.tv_sec == mtime.tv_sec &&
607 sb.st_mtim.tv_nsec >= mtime.tv_nsec)))
614 /* do not overwrite */
617 check = handle_existing_file(path);
621 return; /* do not overwrite */
629 ts[0].tv_nsec = UTIME_NOW;
632 /* process symlinks */
633 linkname = archive_entry_symlink(e);
634 if (linkname != NULL) {
635 if (symlink(linkname, *path) != 0)
636 error("symlink('%s')", *path);
637 info(" extracting: %s -> %s\n", *path, linkname);
638 if (lchmod(*path, mode) != 0)
639 warning("Cannot set mode for '%s'", *path);
640 /* set access and modification time */
641 if (utimensat(AT_FDCWD, *path, ts, AT_SYMLINK_NOFOLLOW) != 0)
642 warning("utimensat('%s')", *path);
646 if ((fd = open(*path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0)
647 error("open('%s')", *path);
649 info(" extracting: %s", *path);
651 text = extract2fd(a, *path, fd);
659 /* set access and modification time */
660 if (futimens(fd, ts) != 0)
661 error("futimens('%s')", *path);
663 error("close('%s')", *path);
667 * Extract a zipfile entry: first perform some sanity checks to ensure
668 * that it is either a directory or a regular file and that the path is
669 * not absolute and does not try to break out of the current directory;
670 * then call either extract_dir() or extract_file() as appropriate.
672 * This is complicated a bit by the various ways in which we need to
673 * manipulate the path name. Case conversion (if requested by the -L
674 * option) happens first, but the include / exclude patterns are applied
675 * to the full converted path name, before the directory part of the path
676 * is removed in accordance with the -j option. Sanity checks are
677 * intentionally done earlier than they need to be, so the user will get a
678 * warning about insecure paths even for files or directories which
679 * wouldn't be extracted anyway.
682 extract(struct archive *a, struct archive_entry *e)
684 char *pathname, *realpathname;
688 pathname = pathdup(archive_entry_pathname(e));
689 filetype = archive_entry_filetype(e);
692 if (pathname[0] == '/' ||
693 strncmp(pathname, "../", 3) == 0 ||
694 strstr(pathname, "/../") != NULL) {
695 warningx("skipping insecure entry '%s'", pathname);
696 ac(archive_read_data_skip(a));
701 /* I don't think this can happen in a zipfile.. */
702 if (!S_ISDIR(filetype) && !S_ISREG(filetype) && !S_ISLNK(filetype)) {
703 warningx("skipping non-regular entry '%s'", pathname);
704 ac(archive_read_data_skip(a));
709 /* skip directories in -j case */
710 if (S_ISDIR(filetype) && j_opt) {
711 ac(archive_read_data_skip(a));
716 /* apply include / exclude patterns */
717 if (!accept_pathname(pathname)) {
718 ac(archive_read_data_skip(a));
723 /* apply -j and -d */
725 for (p = q = pathname; *p; ++p)
728 realpathname = pathcat(d_arg, q);
730 realpathname = pathcat(d_arg, pathname);
733 /* ensure that parent directory exists */
734 make_parent(realpathname);
736 if (S_ISDIR(filetype))
737 extract_dir(a, e, realpathname);
739 extract_file(a, e, &realpathname);
746 extract_stdout(struct archive *a, struct archive_entry *e)
751 pathname = pathdup(archive_entry_pathname(e));
752 filetype = archive_entry_filetype(e);
754 /* I don't think this can happen in a zipfile.. */
755 if (!S_ISDIR(filetype) && !S_ISREG(filetype) && !S_ISLNK(filetype)) {
756 warningx("skipping non-regular entry '%s'", pathname);
757 ac(archive_read_data_skip(a));
762 /* skip directories in -j case */
763 if (S_ISDIR(filetype)) {
764 ac(archive_read_data_skip(a));
769 /* apply include / exclude patterns */
770 if (!accept_pathname(pathname)) {
771 ac(archive_read_data_skip(a));
777 info("x %s\n", pathname);
779 (void)extract2fd(a, pathname, STDOUT_FILENO);
785 * Print the name of an entry to stdout.
788 list(struct archive *a, struct archive_entry *e)
794 mtime = archive_entry_mtime(e);
795 tm = localtime(&mtime);
797 strftime(buf, sizeof(buf), "%m-%d-%G %R", tm);
799 strftime(buf, sizeof(buf), "%m-%d-%g %R", tm);
803 printf(" %8ju %s %s\n",
804 (uintmax_t)archive_entry_size(e),
805 buf, archive_entry_pathname(e));
806 } else if (v_opt == 2) {
807 printf("%8ju Stored %7ju 0%% %s %08x %s\n",
808 (uintmax_t)archive_entry_size(e),
809 (uintmax_t)archive_entry_size(e),
812 archive_entry_pathname(e));
816 printf("%s\n",archive_entry_pathname(e));
818 ac(archive_read_data_skip(a));
822 * Extract to memory to check CRC
825 test(struct archive *a, struct archive_entry *e)
831 if (S_ISDIR(archive_entry_filetype(e)))
834 info(" testing: %s\t", archive_entry_pathname(e));
835 while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0)
838 info(" %s\n", archive_error_string(a));
844 /* shouldn't be necessary, but it doesn't hurt */
845 ac(archive_read_data_skip(a));
851 * Main loop: open the zipfile, iterate over its contents and decide what
852 * to do with each entry.
855 unzip(const char *fn)
858 struct archive_entry *e;
860 uintmax_t total_size, file_count, error_count;
862 if ((a = archive_read_new()) == NULL)
863 error("archive_read_new failed");
865 ac(archive_read_support_format_zip(a));
866 ac(archive_read_open_filename(a, fn, 8192));
869 if (!p_opt && !q_opt)
870 printf("Archive: %s\n", fn);
872 printf(" Length %sDate Time Name\n", y_str);
873 printf(" -------- %s---- ---- ----\n", y_str);
874 } else if (v_opt == 2) {
875 printf(" Length Method Size Ratio %sDate Time CRC-32 Name\n", y_str);
876 printf("-------- ------ ------- ----- %s---- ---- ------ ----\n", y_str);
884 ret = archive_read_next_header(a, &e);
885 if (ret == ARCHIVE_EOF)
890 error_count += test(a, e);
893 else if (p_opt || c_opt)
894 extract_stdout(a, e);
902 total_size += archive_entry_size(e);
908 printf(" -------- %s-------\n", y_str);
909 printf(" %8ju %s%ju file%s\n",
910 total_size, y_str, file_count, file_count != 1 ? "s" : "");
911 } else if (v_opt == 2) {
912 printf("-------- ------- --- %s-------\n", y_str);
913 printf("%8ju %7ju 0%% %s%ju file%s\n",
914 total_size, total_size, y_str, file_count,
915 file_count != 1 ? "s" : "");
919 ac(archive_read_close(a));
920 (void)archive_read_free(a);
923 if (error_count > 0) {
924 errorx("%ju checksum error(s) found.", error_count);
927 printf("No errors detected in compressed data of %s.\n",
937 fprintf(stderr, "Usage: unzip [-aCcfjLlnopqtuvyZ1] [-d dir] [-x pattern] "
943 getopts(int argc, char *argv[])
947 optreset = optind = 1;
948 while ((opt = getopt(argc, argv, "aCcd:fjLlnopqtuvx:yZ1")) != -1)
1001 add_pattern(&exclude, optarg);
1017 main(int argc, char *argv[])
1019 const char *zipfile;
1022 if (isatty(STDOUT_FILENO))
1025 if (getenv("UNZIP_DEBUG") != NULL)
1027 for (int i = 0; i < argc; ++i)
1028 debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n');
1031 * Info-ZIP's unzip(1) expects certain options to come before the
1032 * zipfile name, and others to come after - though it does not
1033 * enforce this. For simplicity, we accept *all* options both
1034 * before and after the zipfile name.
1036 nopts = getopts(argc, argv);
1039 * When more of the zipinfo mode options are implemented, this
1040 * will need to change.
1042 if (zipinfo_mode && !Z1_opt) {
1043 printf("Zipinfo mode needs additional options\n");
1049 zipfile = argv[nopts++];
1051 if (strcmp(zipfile, "-") == 0)
1052 zipfile = NULL; /* STDIN */
1054 while (nopts < argc && *argv[nopts] != '-')
1055 add_pattern(&include, argv[nopts++]);
1057 nopts--; /* fake argv[0] */
1058 nopts += getopts(argc - nopts, argv + nopts);
1060 if (n_opt + o_opt + u_opt > 1)
1061 errorx("-n, -o and -u are contradictory");