2 * Copyright (c) 2009 Joerg Sonnenberger <joerg@NetBSD.org>
3 * Copyright (c) 2007-2008 Dag-Erling Coïdan Smørgrav
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer
11 * in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * This file would be much shorter if we didn't care about command-line
31 * compatibility with Info-ZIP's UnZip, which requires us to duplicate
32 * parts of libarchive in order to gain more detailed control of its
33 * behaviour for the purpose of implementing the -n, -o, -L and -a
37 #include <sys/queue.h>
51 #include <archive_entry.h>
53 /* command-line options */
54 static int a_opt; /* convert EOL */
55 static int C_opt; /* match case-insensitively */
56 static int c_opt; /* extract to stdout */
57 static const char *d_arg; /* directory */
58 static int f_opt; /* update existing files only */
59 static int j_opt; /* junk directories */
60 static int L_opt; /* lowercase names */
61 static int n_opt; /* never overwrite */
62 static int o_opt; /* always overwrite */
63 static int p_opt; /* extract to stdout, quiet */
64 static int q_opt; /* quiet */
65 static int t_opt; /* test */
66 static int u_opt; /* update */
67 static int v_opt; /* verbose/list */
69 /* time when unzip started */
73 static int unzip_debug;
78 /* convenience macro */
79 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
83 if (acret != ARCHIVE_OK) \
84 errorx("%s", archive_error_string(a)); \
88 * Indicates that last info() did not end with EOL. This helps error() et
89 * al. avoid printing an error message on the same line as an incomplete
90 * informational message.
94 /* fatal error message + errno */
96 error(const char *fmt, ...)
101 fprintf(stdout, "\n");
103 fprintf(stderr, "unzip: ");
105 vfprintf(stderr, fmt, ap);
107 fprintf(stderr, ": %s\n", strerror(errno));
111 /* fatal error message, no errno */
113 errorx(const char *fmt, ...)
118 fprintf(stdout, "\n");
120 fprintf(stderr, "unzip: ");
122 vfprintf(stderr, fmt, ap);
124 fprintf(stderr, "\n");
129 /* non-fatal error message + errno */
131 warning(const char *fmt, ...)
136 fprintf(stdout, "\n");
138 fprintf(stderr, "unzip: ");
140 vfprintf(stderr, fmt, ap);
142 fprintf(stderr, ": %s\n", strerror(errno));
146 /* non-fatal error message, no errno */
148 warningx(const char *fmt, ...)
153 fprintf(stdout, "\n");
155 fprintf(stderr, "unzip: ");
157 vfprintf(stderr, fmt, ap);
159 fprintf(stderr, "\n");
162 /* informational message (if not -q) */
164 info(const char *fmt, ...)
168 if (q_opt && !unzip_debug)
171 vfprintf(stdout, fmt, ap);
178 noeol = fmt[strlen(fmt) - 1] != '\n';
181 /* debug message (if unzip_debug) */
183 debug(const char *fmt, ...)
190 vfprintf(stderr, fmt, ap);
197 noeol = fmt[strlen(fmt) - 1] != '\n';
200 /* duplicate a path name, possibly converting to lower case */
202 pathdup(const char *path)
208 while (len && path[len - 1] == '/')
210 if ((str = malloc(len + 1)) == NULL) {
215 for (i = 0; i < len; ++i)
216 str[i] = tolower((unsigned char)path[i]);
218 memcpy(str, path, len);
225 /* concatenate two path names */
227 pathcat(const char *prefix, const char *path)
232 prelen = prefix ? strlen(prefix) + 1 : 0;
233 len = strlen(path) + 1;
234 if ((str = malloc(prelen + len)) == NULL) {
239 memcpy(str, prefix, prelen); /* includes zero */
240 str[prelen - 1] = '/'; /* splat zero */
242 memcpy(str + prelen, path, len); /* includes zero */
248 * Pattern lists for include / exclude processing
251 STAILQ_ENTRY(pattern) link;
255 STAILQ_HEAD(pattern_list, pattern);
256 static struct pattern_list include = STAILQ_HEAD_INITIALIZER(include);
257 static struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude);
260 * Add an entry to a pattern list
263 add_pattern(struct pattern_list *list, const char *pattern)
265 struct pattern *entry;
268 debug("adding pattern '%s'\n", pattern);
269 len = strlen(pattern);
270 if ((entry = malloc(sizeof *entry + len + 1)) == NULL) {
274 memcpy(entry->pattern, pattern, len + 1);
275 STAILQ_INSERT_TAIL(list, entry, link);
279 * Match a string against a list of patterns
282 match_pattern(struct pattern_list *list, const char *str)
284 struct pattern *entry;
286 STAILQ_FOREACH(entry, list, link) {
287 if (fnmatch(entry->pattern, str, C_opt ? FNM_CASEFOLD : 0) == 0)
294 * Verify that a given pathname is in the include list and not in the
298 accept_pathname(const char *pathname)
301 if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname))
303 if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname))
309 * Create the specified directory with the specified mode, taking certain
310 * precautions on they way.
313 make_dir(const char *path, int mode)
317 if (lstat(path, &sb) == 0) {
318 if (S_ISDIR(sb.st_mode))
321 * Normally, we should either ask the user about removing
322 * the non-directory of the same name as a directory we
323 * wish to create, or respect the -n or -o command-line
324 * options. However, this may lead to a later failure or
325 * even compromise (if this non-directory happens to be a
326 * symlink to somewhere unsafe), so we don't.
330 * Don't check unlink() result; failure will cause mkdir()
331 * to fail later, which we will catch.
335 if (mkdir(path, mode) != 0 && errno != EEXIST)
336 error("mkdir('%s')", path);
340 * Ensure that all directories leading up to (but not including) the
341 * specified path exist.
343 * XXX inefficient + modifies the file in-place
346 make_parent(char *path)
351 sep = strrchr(path, '/');
352 if (sep == NULL || sep == path)
355 if (lstat(path, &sb) == 0) {
356 if (S_ISDIR(sb.st_mode)) {
367 for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) {
368 /* root in case of absolute d_arg */
372 make_dir(path, 0755);
379 * Extract a directory.
382 extract_dir(struct archive *a, struct archive_entry *e, const char *path)
386 mode = archive_entry_mode(e) & 0777;
391 * Some zipfiles contain directories with weird permissions such
392 * as 0644 or 0444. This can cause strange issues such as being
393 * unable to extract files into the directory we just created, or
394 * the user being unable to remove the directory later without
395 * first manually changing its permissions. Therefore, we whack
396 * the permissions into shape, assuming that the user wants full
397 * access and that anyone who gets read access also gets execute
406 info("d %s\n", path);
407 make_dir(path, mode);
408 ac(archive_read_data_skip(a));
411 static unsigned char buffer[8192];
412 static char spinner[] = { '|', '/', '-', '\\' };
415 handle_existing_file(char **path)
423 "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
425 if (fgets(buf, sizeof(buf), stdin) == 0) {
427 printf("NULL\n(EOF or read error, "
428 "treating as \"[N]one\"...)\n");
447 printf("New name: ");
452 len = getdelim(path, &alen, '\n', stdin);
453 if ((*path)[len - 1] == '\n')
454 (*path)[len - 1] = '\0';
463 * Extract a regular file.
466 extract_file(struct archive *a, struct archive_entry *e, char **path)
471 struct timeval tv[2];
472 int cr, fd, text, warn, check;
474 unsigned char *p, *q, *end;
476 mode = archive_entry_mode(e) & 0777;
479 mtime = archive_entry_mtime(e);
481 /* look for existing file of same name */
483 if (lstat(*path, &sb) == 0) {
484 if (u_opt || f_opt) {
485 /* check if up-to-date */
486 if (S_ISREG(sb.st_mode) && sb.st_mtime >= mtime)
493 /* do not overwrite */
496 check = handle_existing_file(path);
500 return; /* do not overwrite */
507 if ((fd = open(*path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0)
508 error("open('%s')", *path);
510 /* loop over file contents and write to disk */
511 info(" extracting: %s", *path);
515 for (int n = 0; ; n++) {
516 if (tty && (n % 4) == 0)
517 info(" %c\b\b", spinner[(n / 4) % sizeof spinner]);
519 len = archive_read_data(a, buffer, sizeof buffer);
524 /* left over CR from previous buffer */
526 if (len == 0 || buffer[0] != '\n')
527 if (write(fd, "\r", 1) != 1)
528 error("write('%s')", *path);
538 * Detect whether this is a text file. The correct way to
539 * do this is to check the least significant bit of the
540 * "internal file attributes" field of the corresponding
541 * file header in the central directory, but libarchive
542 * does not read the central directory, so we have to
543 * guess by looking for non-ASCII characters in the
544 * buffer. Hopefully we won't guess wrong. If we do
545 * guess wrong, we print a warning message later.
547 if (a_opt && n == 0) {
548 for (p = buffer; p < end; ++p) {
549 if (!isascii((unsigned char)*p)) {
557 if (!a_opt || !text) {
558 if (write(fd, buffer, len) != len)
559 error("write('%s')", *path);
563 /* hard case: convert \r\n to \n (sigh...) */
564 for (p = buffer; p < end; p = q + 1) {
565 for (q = p; q < end; q++) {
566 if (!warn && !isascii(*q)) {
567 warningx("%s may be corrupted due"
568 " to weak text file detection"
569 " heuristic", *path);
581 if (write(fd, p, q - p) != q - p)
582 error("write('%s')", *path);
591 /* set access and modification time */
594 tv[1].tv_sec = mtime;
596 if (futimes(fd, tv) != 0)
597 error("utimes('%s')", *path);
599 error("close('%s')", *path);
603 * Extract a zipfile entry: first perform some sanity checks to ensure
604 * that it is either a directory or a regular file and that the path is
605 * not absolute and does not try to break out of the current directory;
606 * then call either extract_dir() or extract_file() as appropriate.
608 * This is complicated a bit by the various ways in which we need to
609 * manipulate the path name. Case conversion (if requested by the -L
610 * option) happens first, but the include / exclude patterns are applied
611 * to the full converted path name, before the directory part of the path
612 * is removed in accordance with the -j option. Sanity checks are
613 * intentionally done earlier than they need to be, so the user will get a
614 * warning about insecure paths even for files or directories which
615 * wouldn't be extracted anyway.
618 extract(struct archive *a, struct archive_entry *e)
620 char *pathname, *realpathname;
624 pathname = pathdup(archive_entry_pathname(e));
625 filetype = archive_entry_filetype(e);
628 if (pathname[0] == '/' ||
629 strncmp(pathname, "../", 3) == 0 ||
630 strstr(pathname, "/../") != NULL) {
631 warningx("skipping insecure entry '%s'", pathname);
632 ac(archive_read_data_skip(a));
637 /* I don't think this can happen in a zipfile.. */
638 if (!S_ISDIR(filetype) && !S_ISREG(filetype)) {
639 warningx("skipping non-regular entry '%s'", pathname);
640 ac(archive_read_data_skip(a));
645 /* skip directories in -j case */
646 if (S_ISDIR(filetype) && j_opt) {
647 ac(archive_read_data_skip(a));
652 /* apply include / exclude patterns */
653 if (!accept_pathname(pathname)) {
654 ac(archive_read_data_skip(a));
659 /* apply -j and -d */
661 for (p = q = pathname; *p; ++p)
664 realpathname = pathcat(d_arg, q);
666 realpathname = pathcat(d_arg, pathname);
669 /* ensure that parent directory exists */
670 make_parent(realpathname);
672 if (S_ISDIR(filetype))
673 extract_dir(a, e, realpathname);
675 extract_file(a, e, &realpathname);
682 extract_stdout(struct archive *a, struct archive_entry *e)
688 unsigned char *p, *q, *end;
690 pathname = pathdup(archive_entry_pathname(e));
691 filetype = archive_entry_filetype(e);
693 /* I don't think this can happen in a zipfile.. */
694 if (!S_ISDIR(filetype) && !S_ISREG(filetype)) {
695 warningx("skipping non-regular entry '%s'", pathname);
696 ac(archive_read_data_skip(a));
701 /* skip directories in -j case */
702 if (S_ISDIR(filetype)) {
703 ac(archive_read_data_skip(a));
708 /* apply include / exclude patterns */
709 if (!accept_pathname(pathname)) {
710 ac(archive_read_data_skip(a));
716 info("x %s\n", pathname);
721 for (int n = 0; ; n++) {
722 len = archive_read_data(a, buffer, sizeof buffer);
727 /* left over CR from previous buffer */
729 if (len == 0 || buffer[0] != '\n') {
730 if (fwrite("\r", 1, 1, stderr) != 1)
731 error("write('%s')", pathname);
742 * Detect whether this is a text file. The correct way to
743 * do this is to check the least significant bit of the
744 * "internal file attributes" field of the corresponding
745 * file header in the central directory, but libarchive
746 * does not read the central directory, so we have to
747 * guess by looking for non-ASCII characters in the
748 * buffer. Hopefully we won't guess wrong. If we do
749 * guess wrong, we print a warning message later.
751 if (a_opt && n == 0) {
752 for (p = buffer; p < end; ++p) {
753 if (!isascii((unsigned char)*p)) {
761 if (!a_opt || !text) {
762 if (fwrite(buffer, 1, len, stdout) != (size_t)len)
763 error("write('%s')", pathname);
767 /* hard case: convert \r\n to \n (sigh...) */
768 for (p = buffer; p < end; p = q + 1) {
769 for (q = p; q < end; q++) {
770 if (!warn && !isascii(*q)) {
771 warningx("%s may be corrupted due"
772 " to weak text file detection"
773 " heuristic", pathname);
785 if (fwrite(p, 1, q - p, stdout) != (size_t)(q - p))
786 error("write('%s')", pathname);
794 * Print the name of an entry to stdout.
797 list(struct archive *a, struct archive_entry *e)
802 mtime = archive_entry_mtime(e);
803 strftime(buf, sizeof(buf), "%m-%d-%g %R", localtime(&mtime));
806 printf(" %8ju %s %s\n",
807 (uintmax_t)archive_entry_size(e),
808 buf, archive_entry_pathname(e));
809 } else if (v_opt == 2) {
810 printf("%8ju Stored %7ju 0%% %s %08x %s\n",
811 (uintmax_t)archive_entry_size(e),
812 (uintmax_t)archive_entry_size(e),
815 archive_entry_pathname(e));
817 ac(archive_read_data_skip(a));
821 * Extract to memory to check CRC
824 test(struct archive *a, struct archive_entry *e)
830 if (S_ISDIR(archive_entry_filetype(e)))
833 info(" testing: %s\t", archive_entry_pathname(e));
834 while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0)
837 info(" %s\n", archive_error_string(a));
843 /* shouldn't be necessary, but it doesn't hurt */
844 ac(archive_read_data_skip(a));
851 * Main loop: open the zipfile, iterate over its contents and decide what
852 * to do with each entry.
855 unzip(const char *fn)
858 struct archive_entry *e;
860 uintmax_t total_size, file_count, error_count;
862 if ((fd = open(fn, O_RDONLY)) < 0)
865 if ((a = archive_read_new()) == NULL)
866 error("archive_read_new failed");
868 ac(archive_read_support_format_zip(a));
869 ac(archive_read_open_fd(a, fd, 8192));
871 if (!p_opt && !q_opt)
872 printf("Archive: %s\n", fn);
874 printf(" Length Date Time Name\n");
875 printf(" -------- ---- ---- ----\n");
876 } else if (v_opt == 2) {
877 printf(" Length Method Size Ratio Date Time CRC-32 Name\n");
878 printf("-------- ------ ------- ----- ---- ---- ------ ----\n");
885 ret = archive_read_next_header(a, &e);
886 if (ret == ARCHIVE_EOF)
890 error_count += test(a, e);
893 else if (p_opt || c_opt)
894 extract_stdout(a, e);
898 total_size += archive_entry_size(e);
903 printf(" -------- -------\n");
904 printf(" %8ju %ju file%s\n",
905 total_size, file_count, file_count != 1 ? "s" : "");
906 } else if (v_opt == 2) {
907 printf("-------- ------- --- -------\n");
908 printf("%8ju %7ju 0%% %ju file%s\n",
909 total_size, total_size, file_count,
910 file_count != 1 ? "s" : "");
913 ac(archive_read_close(a));
914 (void)archive_read_finish(a);
920 if (error_count > 0) {
921 errorx("%d checksum error(s) found.", error_count);
924 printf("No errors detected in compressed data of %s.\n",
934 fprintf(stderr, "usage: unzip [-aCcfjLlnopqtuv] [-d dir] [-x pattern] zipfile\n");
939 getopts(int argc, char *argv[])
943 optreset = optind = 1;
944 while ((opt = getopt(argc, argv, "aCcd:fjLlnopqtuvx:")) != -1)
994 add_pattern(&exclude, optarg);
1004 main(int argc, char *argv[])
1006 const char *zipfile;
1009 if (isatty(STDOUT_FILENO))
1012 if (getenv("UNZIP_DEBUG") != NULL)
1014 for (int i = 0; i < argc; ++i)
1015 debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n');
1018 * Info-ZIP's unzip(1) expects certain options to come before the
1019 * zipfile name, and others to come after - though it does not
1020 * enforce this. For simplicity, we accept *all* options both
1021 * before and after the zipfile name.
1023 nopts = getopts(argc, argv);
1027 zipfile = argv[nopts++];
1029 while (nopts < argc && *argv[nopts] != '-')
1030 add_pattern(&include, argv[nopts++]);
1032 nopts--; /* fake argv[0] */
1033 nopts += getopts(argc - nopts, argv + nopts);
1035 if (n_opt + o_opt + u_opt > 1)
1036 errorx("-n, -o and -u are contradictory");