2 * Copyright (c) 2007-2008 Dag-Erling Coïdan Smørgrav
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * This file would be much shorter if we didn't care about command-line
30 * compatibility with Info-ZIP's UnZip, which requires us to duplicate
31 * parts of libarchive in order to gain more detailed control of its
32 * behaviour for the purpose of implementing the -n, -o, -L and -a
36 #include <sys/queue.h>
50 #include <archive_entry.h>
52 /* command-line options */
53 static int a_opt; /* convert EOL */
54 static const char *d_arg; /* directory */
55 static int j_opt; /* junk directories */
56 static int L_opt; /* lowercase names */
57 static int l_opt; /* list */
58 static int n_opt; /* never overwrite */
59 static int o_opt; /* always overwrite */
60 static int q_opt; /* quiet */
61 static int t_opt; /* test */
62 static int u_opt; /* update */
64 /* time when unzip started */
68 static int unzip_debug;
73 /* error flag for -t */
74 static int test_failed;
76 /* convenience macro */
77 /* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
81 if (acret != ARCHIVE_OK) \
82 errorx("%s", archive_error_string(a)); \
86 * Indicates that last info() did not end with EOL. This helps error() et
87 * al. avoid printing an error message on the same line as an incomplete
88 * informational message.
92 /* fatal error message + errno */
94 error(const char *fmt, ...)
99 fprintf(stdout, "\n");
101 fprintf(stderr, "unzip: ");
103 vfprintf(stderr, fmt, ap);
105 fprintf(stderr, ": %s\n", strerror(errno));
109 /* fatal error message, no errno */
111 errorx(const char *fmt, ...)
116 fprintf(stdout, "\n");
118 fprintf(stderr, "unzip: ");
120 vfprintf(stderr, fmt, ap);
122 fprintf(stderr, "\n");
127 /* non-fatal error message + errno */
129 warning(const char *fmt, ...)
134 fprintf(stdout, "\n");
136 fprintf(stderr, "unzip: ");
138 vfprintf(stderr, fmt, ap);
140 fprintf(stderr, ": %s\n", strerror(errno));
144 /* non-fatal error message, no errno */
146 warningx(const char *fmt, ...)
151 fprintf(stdout, "\n");
153 fprintf(stderr, "unzip: ");
155 vfprintf(stderr, fmt, ap);
157 fprintf(stderr, "\n");
160 /* informational message (if not -q) */
162 info(const char *fmt, ...)
167 if (q_opt && !unzip_debug)
170 vfprintf(stdout, fmt, ap);
174 for (i = 0; fmt[i] != '\0'; ++i)
176 noeol = !(i && fmt[i - 1] == '\n');
179 /* debug message (if unzip_debug) */
181 debug(const char *fmt, ...)
189 vfprintf(stderr, fmt, ap);
193 for (i = 0; fmt[i] != '\0'; ++i)
195 noeol = !(i && fmt[i - 1] == '\n');
198 /* duplicate a path name, possibly converting to lower case */
200 pathdup(const char *path)
206 while (len && path[len - 1] == '/')
208 if ((str = malloc(len + 1)) == NULL) {
212 for (int i = 0; i < len; ++i)
213 str[i] = L_opt ? tolower(path[i]) : path[i];
219 /* concatenate two path names */
221 pathcat(const char *prefix, const char *path)
226 prelen = prefix ? strlen(prefix) + 1 : 0;
227 len = strlen(path) + 1;
228 if ((str = malloc(prelen + len)) == NULL) {
233 memcpy(str, prefix, prelen); /* includes zero */
234 str[prelen - 1] = '/'; /* splat zero */
236 memcpy(str + prelen, path, len); /* includes zero */
242 * Pattern lists for include / exclude processing
245 STAILQ_ENTRY(pattern) link;
249 STAILQ_HEAD(pattern_list, pattern);
250 static struct pattern_list include = STAILQ_HEAD_INITIALIZER(include);
251 static struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude);
254 * Add an entry to a pattern list
257 add_pattern(struct pattern_list *list, const char *pattern)
259 struct pattern *entry;
262 debug("adding pattern '%s'\n", pattern);
263 len = strlen(pattern);
264 if ((entry = malloc(sizeof *entry + len + 1)) == NULL) {
268 memset(&entry->link, 0, sizeof entry->link);
269 memcpy(entry->pattern, pattern, len + 1);
270 STAILQ_INSERT_TAIL(list, entry, link);
274 * Match a string against a list of patterns
277 match_pattern(struct pattern_list *list, const char *str)
279 struct pattern *entry;
281 STAILQ_FOREACH(entry, list, link) {
282 if (fnmatch(entry->pattern, str, 0) == 0)
289 * Verify that a given pathname is in the include list and not in the
293 accept_pathname(const char *pathname)
296 if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname))
298 if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname))
304 * Create the specified directory with the specified mode, taking certain
305 * precautions on they way.
308 make_dir(const char *path, int mode)
312 if (lstat(path, &sb) == 0) {
313 if (S_ISDIR(sb.st_mode))
316 * Normally, we should either ask the user about removing
317 * the non-directory of the same name as a directory we
318 * wish to create, or respect the -n or -o command-line
319 * options. However, this may lead to a later failure or
320 * even compromise (if this non-directory happens to be a
321 * symlink to somewhere unsafe), so we don't.
325 * Don't check unlink() result; failure will cause mkdir()
326 * to fail later, which we will catch.
330 if (mkdir(path, mode) != 0 && errno != EEXIST)
331 error("mkdir('%s')", path);
335 * Ensure that all directories leading up to (but not including) the
336 * specified path exist.
338 * XXX inefficient + modifies the file in-place
341 make_parent(char *path)
346 sep = strrchr(path, '/');
347 if (sep == NULL || sep == path)
350 if (lstat(path, &sb) == 0) {
351 if (S_ISDIR(sb.st_mode)) {
362 for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) {
363 /* root in case of absolute d_arg */
367 make_dir(path, 0755);
374 * Extract a directory.
377 extract_dir(struct archive *a, struct archive_entry *e, const char *path)
381 mode = archive_entry_filetype(e) & 0777;
386 * Some zipfiles contain directories with weird permissions such
387 * as 0644 or 0444. This can cause strange issues such as being
388 * unable to extract files into the directory we just created, or
389 * the user being unable to remove the directory later without
390 * first manually changing its permissions. Therefore, we whack
391 * the permissions into shape, assuming that the user wants full
392 * access and that anyone who gets read access also gets execute
401 info("d %s\n", path);
402 make_dir(path, mode);
403 ac(archive_read_data_skip(a));
406 static unsigned char buffer[8192];
407 static char spinner[] = { '|', '/', '-', '\\' };
410 * Extract a regular file.
413 extract_file(struct archive *a, struct archive_entry *e, const char *path)
418 struct timeval tv[2];
419 int cr, fd, text, warn;
421 unsigned char *p, *q, *end;
423 mode = archive_entry_filetype(e) & 0777;
426 mtime = archive_entry_mtime(e);
428 /* look for existing file of same name */
429 if (lstat(path, &sb) == 0) {
431 /* check if up-to-date */
432 if (S_ISREG(sb.st_mode) && sb.st_mtime > mtime)
439 /* do not overwrite */
443 errorx("not implemented");
447 if ((fd = open(path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0)
448 error("open('%s')", path);
450 /* loop over file contents and write to disk */
455 for (int n = 0; ; n++) {
456 if (tty && (n % 4) == 0)
457 info(" %c\b\b", spinner[(n / 4) % sizeof spinner]);
459 len = archive_read_data(a, buffer, sizeof buffer);
464 /* left over CR from previous buffer */
466 if (len == 0 || buffer[0] != '\n')
467 if (write(fd, "\r", 1) != 1)
468 error("write('%s')", path);
478 * Detect whether this is a text file. The correct way to
479 * do this is to check the least significant bit of the
480 * "internal file attributes" field of the corresponding
481 * file header in the central directory, but libarchive
482 * does not read the central directory, so we have to
483 * guess by looking for non-ASCII characters in the
484 * buffer. Hopefully we won't guess wrong. If we do
485 * guess wrong, we print a warning message later.
487 if (a_opt && n == 0) {
488 for (p = buffer; p < end; ++p) {
489 if (!isascii((unsigned char)*p)) {
497 if (!a_opt || !text) {
498 if (write(fd, buffer, len) != len)
499 error("write('%s')", path);
503 /* hard case: convert \r\n to \n (sigh...) */
504 for (p = buffer; p < end; p = q + 1) {
505 for (q = p; q < end; q++) {
506 if (!warn && !isascii(*q)) {
507 warningx("%s may be corrupted due"
508 " to weak text file detection"
521 if (write(fd, p, q - p) != q - p)
522 error("write('%s')", path);
531 /* set access and modification time */
534 tv[1].tv_sec = mtime;
536 if (futimes(fd, tv) != 0)
537 error("utimes('%s')", path);
539 error("close('%s')", path);
543 * Extract a zipfile entry: first perform some sanity checks to ensure
544 * that it is either a directory or a regular file and that the path is
545 * not absolute and does not try to break out of the current directory;
546 * then call either extract_dir() or extract_file() as appropriate.
548 * This is complicated a bit by the various ways in which we need to
549 * manipulate the path name. Case conversion (if requested by the -L
550 * option) happens first, but the include / exclude patterns are applied
551 * to the full converted path name, before the directory part of the path
552 * is removed in accordance with the -j option. Sanity checks are
553 * intentionally done earlier than they need to be, so the user will get a
554 * warning about insecure paths even for files or directories which
555 * wouldn't be extracted anyway.
558 extract(struct archive *a, struct archive_entry *e)
560 char *pathname, *realpathname;
564 pathname = pathdup(archive_entry_pathname(e));
565 filetype = archive_entry_filetype(e);
568 if (pathname[0] == '/' ||
569 strncmp(pathname, "../", 3) == 0 ||
570 strstr(pathname, "/../") != NULL) {
571 warningx("skipping insecure entry '%s'", pathname);
572 ac(archive_read_data_skip(a));
577 /* I don't think this can happen in a zipfile.. */
578 if (!S_ISDIR(filetype) && !S_ISREG(filetype)) {
579 warningx("skipping non-regular entry '%s'", pathname);
580 ac(archive_read_data_skip(a));
585 /* skip directories in -j case */
586 if (S_ISDIR(filetype) && j_opt) {
587 ac(archive_read_data_skip(a));
592 /* apply include / exclude patterns */
593 if (!accept_pathname(pathname)) {
594 ac(archive_read_data_skip(a));
599 /* apply -j and -d */
601 for (p = q = pathname; *p; ++p)
604 realpathname = pathcat(d_arg, q);
606 realpathname = pathcat(d_arg, pathname);
609 /* ensure that parent directory exists */
610 make_parent(realpathname);
612 if (S_ISDIR(filetype))
613 extract_dir(a, e, realpathname);
615 extract_file(a, e, realpathname);
622 * Print the name of an entry to stdout.
625 list(struct archive *a, struct archive_entry *e)
628 printf("%s\n", archive_entry_pathname(e));
629 ac(archive_read_data_skip(a));
633 * Extract to memory to check CRC
636 test(struct archive *a, struct archive_entry *e)
640 if (S_ISDIR(archive_entry_filetype(e)))
643 info("%s ", archive_entry_pathname(e));
644 while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0)
647 info("%s\n", archive_error_string(a));
653 /* shouldn't be necessary, but it doesn't hurt */
654 ac(archive_read_data_skip(a));
659 * Main loop: open the zipfile, iterate over its contents and decide what
660 * to do with each entry.
663 unzip(const char *fn)
666 struct archive_entry *e;
669 if ((fd = open(fn, O_RDONLY)) < 0)
672 a = archive_read_new();
673 ac(archive_read_support_format_zip(a));
674 ac(archive_read_open_fd(a, fd, 8192));
677 ret = archive_read_next_header(a, &e);
678 if (ret == ARCHIVE_EOF)
689 ac(archive_read_close(a));
690 (void)archive_read_finish(a);
694 if (t_opt && test_failed)
695 errorx("%d checksum error(s) found.", test_failed);
702 fprintf(stderr, "usage: unzip [-ajLlnoqtu] [-d dir] zipfile\n");
707 getopts(int argc, char *argv[])
711 optreset = optind = 1;
712 while ((opt = getopt(argc, argv, "ad:jLlnoqtux:")) != -1)
745 add_pattern(&exclude, optarg);
755 main(int argc, char *argv[])
760 if (isatty(STDOUT_FILENO))
763 if (getenv("UNZIP_DEBUG") != NULL)
765 for (int i = 0; i < argc; ++i)
766 debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n');
769 * Info-ZIP's unzip(1) expects certain options to come before the
770 * zipfile name, and others to come after - though it does not
771 * enforce this. For simplicity, we accept *all* options both
772 * before and after the zipfile name.
774 nopts = getopts(argc, argv);
778 zipfile = argv[nopts++];
780 while (nopts < argc && *argv[nopts] != '-')
781 add_pattern(&include, argv[nopts++]);
783 nopts--; /* fake argv[0] */
784 nopts += getopts(argc - nopts, argv + nopts);
786 if (n_opt + o_opt + u_opt > 1)
787 errorx("-n, -o and -u are contradictory");