2 * Copyright (c) 2003-2007 Tim Kientzle
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "bsdtar_platform.h"
27 __FBSDID("$FreeBSD$");
29 #ifdef HAVE_SYS_STAT_H
32 #ifdef HAVE_SYS_TYPES_H
33 #include <sys/types.h> /* Linux doesn't define mode_t, etc. in sys/stat.h. */
58 /* If we don't have wctype, we need to hack up some version of iswprint(). */
59 #define iswprint isprint
64 #include "passphrase.h"
66 static size_t bsdtar_expand_char(char *, size_t, char);
67 static const char *strip_components(const char *path, int elements);
69 #if defined(_WIN32) && !defined(__CYGWIN__)
73 /* TODO: Hack up a version of mbtowc for platforms with no wide
74 * character support at all. I think the following might suffice,
75 * but it needs careful testing.
77 * #define mbtowc(wcp, p, n) ((*wcp = *p), 1)
82 * Print a string, taking care with any non-printable characters.
84 * Note that we use a stack-allocated buffer to receive the formatted
85 * string if we can. This is partly performance (avoiding a call to
86 * malloc()), partly out of expedience (we have to call vsnprintf()
87 * before malloc() anyway to find out how big a buffer we need; we may
88 * as well point that first call at a small local buffer in case it
89 * works), but mostly for safety (so we can use this to print messages
90 * about out-of-memory conditions).
94 safe_fprintf(FILE *f, const char *fmt, ...)
96 char fmtbuff_stack[256]; /* Place to format the printf() string. */
97 char outbuff[256]; /* Buffer for outgoing characters. */
98 char *fmtbuff_heap; /* If fmtbuff_stack is too small, we use malloc */
99 char *fmtbuff; /* Pointer to fmtbuff_stack or fmtbuff_heap. */
108 /* Use a stack-allocated buffer if we can, for speed and safety. */
110 fmtbuff_length = sizeof(fmtbuff_stack);
111 fmtbuff = fmtbuff_stack;
113 /* Try formatting into the stack buffer. */
115 length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
118 /* If the result was too large, allocate a buffer on the heap. */
119 while (length < 0 || length >= fmtbuff_length) {
120 if (length >= fmtbuff_length)
121 fmtbuff_length = length+1;
122 else if (fmtbuff_length < 8192)
124 else if (fmtbuff_length < 1000000)
125 fmtbuff_length += fmtbuff_length / 4;
127 length = fmtbuff_length;
128 fmtbuff_heap[length-1] = '\0';
132 fmtbuff_heap = malloc(fmtbuff_length);
134 /* Reformat the result into the heap buffer if we can. */
135 if (fmtbuff_heap != NULL) {
136 fmtbuff = fmtbuff_heap;
138 length = vsnprintf(fmtbuff, fmtbuff_length, fmt, ap);
141 /* Leave fmtbuff pointing to the truncated
142 * string in fmtbuff_stack. */
143 fmtbuff = fmtbuff_stack;
144 length = sizeof(fmtbuff_stack) - 1;
149 /* Note: mbrtowc() has a cleaner API, but mbtowc() seems a bit
150 * more portable, so we use that here instead. */
151 if (mbtowc(NULL, NULL, 1) == -1) { /* Reset the shift state. */
152 /* mbtowc() should never fail in practice, but
153 * handle the theoretical error anyway. */
158 /* Write data, expanding unprintable characters. */
164 /* Convert to wide char, test if the wide
165 * char is printable in the current locale. */
166 if (try_wc && (n = mbtowc(&wc, p, length)) != -1) {
168 if (iswprint(wc) && wc != L'\\') {
169 /* Printable, copy the bytes through. */
173 /* Not printable, format the bytes. */
175 i += (unsigned)bsdtar_expand_char(
179 /* After any conversion failure, don't bother
180 * trying to convert the rest. */
181 i += (unsigned)bsdtar_expand_char(outbuff, i, *p++);
185 /* If our output buffer is full, dump it and keep going. */
186 if (i > (sizeof(outbuff) - 128)) {
188 fprintf(f, "%s", outbuff);
193 fprintf(f, "%s", outbuff);
195 /* If we allocated a heap-based formatting buffer, free it now. */
200 * Render an arbitrary sequence of bytes into printable ASCII characters.
203 bsdtar_expand_char(char *buff, size_t offset, char c)
207 if (isprint((unsigned char)c) && c != '\\')
212 case '\a': buff[i++] = 'a'; break;
213 case '\b': buff[i++] = 'b'; break;
214 case '\f': buff[i++] = 'f'; break;
215 case '\n': buff[i++] = 'n'; break;
217 /* On some platforms, \n and \r are the same. */
218 case '\r': buff[i++] = 'r'; break;
220 case '\t': buff[i++] = 't'; break;
221 case '\v': buff[i++] = 'v'; break;
222 case '\\': buff[i++] = '\\'; break;
224 sprintf(buff + i, "%03o", 0xFF & (int)c);
233 yes(const char *fmt, ...)
241 vfprintf(stderr, fmt, ap);
243 fprintf(stderr, " (y/N)? ");
246 l = read(2, buff, sizeof(buff) - 1);
248 fprintf(stderr, "Keyboard read failed\n");
255 for (p = buff; *p != '\0'; p++) {
256 if (isspace((unsigned char)*p))
272 * The logic here for -C <dir> attempts to avoid
273 * chdir() as long as possible. For example:
274 * "-C /foo -C /bar file" needs chdir("/bar") but not chdir("/foo")
275 * "-C /foo -C bar file" needs chdir("/foo/bar")
276 * "-C /foo -C bar /file1" does not need chdir()
277 * "-C /foo -C bar /file1 file2" needs chdir("/foo/bar") before file2
279 * The only correct way to handle this is to record a "pending" chdir
280 * request and combine multiple requests intelligently until we
281 * need to process a non-absolute file. set_chdir() adds the new dir
282 * to the pending list; do_chdir() actually executes any pending chdir.
284 * This way, programs that build tar command lines don't have to worry
285 * about -C with non-existent directories; such requests will only
286 * fail if the directory must be accessed.
290 set_chdir(struct bsdtar *bsdtar, const char *newdir)
292 #if defined(_WIN32) && !defined(__CYGWIN__)
293 if (newdir[0] == '/' || newdir[0] == '\\' ||
294 /* Detect this type, for example, "C:\" or "C:/" */
295 (((newdir[0] >= 'a' && newdir[0] <= 'z') ||
296 (newdir[0] >= 'A' && newdir[0] <= 'Z')) &&
297 newdir[1] == ':' && (newdir[2] == '/' || newdir[2] == '\\'))) {
299 if (newdir[0] == '/') {
301 /* The -C /foo -C /bar case; dump first one. */
302 free(bsdtar->pending_chdir);
303 bsdtar->pending_chdir = NULL;
305 if (bsdtar->pending_chdir == NULL)
306 /* Easy case: no previously-saved dir. */
307 bsdtar->pending_chdir = strdup(newdir);
309 /* The -C /foo -C bar case; concatenate */
310 char *old_pending = bsdtar->pending_chdir;
311 size_t old_len = strlen(old_pending);
312 bsdtar->pending_chdir = malloc(old_len + strlen(newdir) + 2);
313 if (old_pending[old_len - 1] == '/')
314 old_pending[old_len - 1] = '\0';
315 if (bsdtar->pending_chdir != NULL)
316 sprintf(bsdtar->pending_chdir, "%s/%s",
317 old_pending, newdir);
320 if (bsdtar->pending_chdir == NULL)
321 lafe_errc(1, errno, "No memory");
325 do_chdir(struct bsdtar *bsdtar)
327 if (bsdtar->pending_chdir == NULL)
330 if (chdir(bsdtar->pending_chdir) != 0) {
331 lafe_errc(1, 0, "could not chdir to '%s'\n",
332 bsdtar->pending_chdir);
334 free(bsdtar->pending_chdir);
335 bsdtar->pending_chdir = NULL;
339 strip_components(const char *p, int elements)
341 /* Skip as many elements as necessary. */
342 while (elements > 0) {
345 #if defined(_WIN32) && !defined(__CYGWIN__)
346 case '\\': /* Support \ path sep on Windows ONLY. */
351 /* Path is too short, skip it. */
356 /* Skip any / characters. This handles short paths that have
357 * additional / termination. This also handles the case where
358 * the logic above stops in the middle of a duplicate //
359 * sequence (which would otherwise get converted to an
364 #if defined(_WIN32) && !defined(__CYGWIN__)
365 case '\\': /* Support \ path sep on Windows ONLY. */
378 warn_strip_leading_char(struct bsdtar *bsdtar, const char *c)
380 if (!bsdtar->warned_lead_slash) {
382 "Removing leading '%c' from member names",
384 bsdtar->warned_lead_slash = 1;
389 warn_strip_drive_letter(struct bsdtar *bsdtar)
391 if (!bsdtar->warned_lead_slash) {
393 "Removing leading drive letter from "
395 bsdtar->warned_lead_slash = 1;
400 * Convert absolute path to non-absolute path by skipping leading
401 * absolute path prefixes.
404 strip_absolute_path(struct bsdtar *bsdtar, const char *p)
408 /* Remove leading "//./" or "//?/" or "//?/UNC/"
409 * (absolute path prefixes used by Windows API) */
410 if ((p[0] == '/' || p[0] == '\\') &&
411 (p[1] == '/' || p[1] == '\\') &&
412 (p[2] == '.' || p[2] == '?') &&
413 (p[3] == '/' || p[3] == '\\'))
416 (p[4] == 'U' || p[4] == 'u') &&
417 (p[5] == 'N' || p[5] == 'n') &&
418 (p[6] == 'C' || p[6] == 'c') &&
419 (p[7] == '/' || p[7] == '\\'))
423 warn_strip_drive_letter(bsdtar);
426 /* Remove multiple leading slashes and Windows drive letters. */
429 if (((p[0] >= 'a' && p[0] <= 'z') ||
430 (p[0] >= 'A' && p[0] <= 'Z')) &&
433 warn_strip_drive_letter(bsdtar);
436 /* Remove leading "/../", "/./", "//", etc. */
437 while (p[0] == '/' || p[0] == '\\') {
440 (p[3] == '/' || p[3] == '\\')) {
441 p += 3; /* Remove "/..", leave "/" for next pass. */
442 } else if (p[1] == '.' &&
443 (p[2] == '/' || p[2] == '\\')) {
444 p += 2; /* Remove "/.", leave "/" for next pass. */
446 p += 1; /* Remove "/". */
447 warn_strip_leading_char(bsdtar, rp);
455 * Handle --strip-components and any future path-rewriting options.
456 * Returns non-zero if the pathname should not be extracted.
458 * Note: The rewrites are applied uniformly to pathnames and hardlink
459 * names but not to symlink bodies. This is deliberate: Symlink
460 * bodies are not necessarily filenames. Even when they are, they
461 * need to be interpreted relative to the directory containing them,
462 * so simple rewrites like this are rarely appropriate.
464 * TODO: Support pax-style regex path rewrites.
467 edit_pathname(struct bsdtar *bsdtar, struct archive_entry *entry)
469 const char *name = archive_entry_pathname(entry);
470 const char *original_name = name;
471 const char *hardlinkname = archive_entry_hardlink(entry);
472 const char *original_hardlinkname = hardlinkname;
473 #if defined(HAVE_REGEX_H) || defined(HAVE_PCREPOSIX_H)
477 /* Apply user-specified substitution to pathname. */
478 r = apply_substitution(bsdtar, name, &subst_name, 0, 0);
480 lafe_warnc(0, "Invalid substitution, skipping entry");
484 archive_entry_copy_pathname(entry, subst_name);
485 if (*subst_name == '\0') {
490 name = archive_entry_pathname(entry);
491 original_name = name;
494 /* Apply user-specified substitution to hardlink target. */
495 if (hardlinkname != NULL) {
496 r = apply_substitution(bsdtar, hardlinkname, &subst_name, 0, 1);
498 lafe_warnc(0, "Invalid substitution, skipping entry");
502 archive_entry_copy_hardlink(entry, subst_name);
505 hardlinkname = archive_entry_hardlink(entry);
506 original_hardlinkname = hardlinkname;
509 /* Apply user-specified substitution to symlink body. */
510 if (archive_entry_symlink(entry) != NULL) {
511 r = apply_substitution(bsdtar, archive_entry_symlink(entry), &subst_name, 1, 0);
513 lafe_warnc(0, "Invalid substitution, skipping entry");
517 archive_entry_copy_symlink(entry, subst_name);
523 /* Strip leading dir names as per --strip-components option. */
524 if (bsdtar->strip_components > 0) {
525 name = strip_components(name, bsdtar->strip_components);
529 if (hardlinkname != NULL) {
530 hardlinkname = strip_components(hardlinkname,
531 bsdtar->strip_components);
532 if (hardlinkname == NULL)
537 if ((bsdtar->flags & OPTFLAG_ABSOLUTE_PATHS) == 0) {
538 /* By default, don't write or restore absolute pathnames. */
539 name = strip_absolute_path(bsdtar, name);
543 if (hardlinkname != NULL) {
544 hardlinkname = strip_absolute_path(bsdtar, hardlinkname);
545 if (*hardlinkname == '\0')
549 /* Strip redundant leading '/' characters. */
550 while (name[0] == '/' && name[1] == '/')
554 /* Replace name in archive_entry. */
555 if (name != original_name) {
556 archive_entry_copy_pathname(entry, name);
558 if (hardlinkname != original_hardlinkname) {
559 archive_entry_copy_hardlink(entry, hardlinkname);
565 * It would be nice to just use printf() for formatting large numbers,
566 * but the compatibility problems are quite a headache. Hence the
567 * following simple utility function.
570 tar_i64toa(int64_t n0)
572 static char buff[24];
573 uint64_t n = n0 < 0 ? -n0 : n0;
574 char *p = buff + sizeof(buff);
578 *--p = '0' + (int)(n % 10);
586 * Like strcmp(), but try to be a little more aware of the fact that
587 * we're comparing two paths. Right now, it just handles leading
588 * "./" and trailing '/' specially, so that "a/b/" == "./a/b"
590 * TODO: Make this better, so that "./a//b/./c/" == "a/b/c"
591 * TODO: After this works, push it down into libarchive.
592 * TODO: Publish the path normalization routines in libarchive so
593 * that bsdtar can normalize paths and use fast strcmp() instead
596 * Note: This is currently only used within write.c, so should
597 * not handle \ path separators.
601 pathcmp(const char *a, const char *b)
603 /* Skip leading './' */
604 if (a[0] == '.' && a[1] == '/' && a[2] != '\0')
606 if (b[0] == '.' && b[1] == '/' && b[2] != '\0')
608 /* Find the first difference, or return (0) if none. */
616 * If one ends in '/' and the other one doesn't,
619 if (a[0] == '/' && a[1] == '\0' && b[0] == '\0')
621 if (a[0] == '\0' && b[0] == '/' && b[1] == '\0')
623 /* They're really different, return the correct sign. */
624 return (*(const unsigned char *)a - *(const unsigned char *)b);
627 #define PPBUFF_SIZE 1024
629 passphrase_callback(struct archive *a, void *_client_data)
631 struct bsdtar *bsdtar = (struct bsdtar *)_client_data;
632 (void)a; /* UNUSED */
634 if (bsdtar->ppbuff == NULL) {
635 bsdtar->ppbuff = malloc(PPBUFF_SIZE);
636 if (bsdtar->ppbuff == NULL)
637 lafe_errc(1, errno, "Out of memory");
639 return lafe_readpassphrase("Enter passphrase:",
640 bsdtar->ppbuff, PPBUFF_SIZE);
644 passphrase_free(char *ppbuff)
646 if (ppbuff != NULL) {
647 memset(ppbuff, 0, PPBUFF_SIZE);
653 * Display information about the current file.
655 * The format here roughly duplicates the output of 'ls -l'.
656 * This is based on SUSv2, where 'tar tv' is documented as
657 * listing additional information in an "unspecified format,"
658 * and 'pax -l' is documented as using the same format as 'ls -l'.
661 list_item_verbose(struct bsdtar *bsdtar, FILE *out, struct archive_entry *entry)
671 * We avoid collecting the entire list in memory at once by
672 * listing things as we see them. However, that also means we can't
673 * just pre-compute the field widths. Instead, we start with guesses
674 * and just widen them as necessary. These numbers are completely
677 if (!bsdtar->u_width) {
679 bsdtar->gs_width = 13;
683 fprintf(out, "%s %d ",
684 archive_entry_strmode(entry),
685 archive_entry_nlink(entry));
687 /* Use uname if it's present, else uid. */
688 p = archive_entry_uname(entry);
689 if ((p == NULL) || (*p == '\0')) {
691 (unsigned long)archive_entry_uid(entry));
695 if (w > bsdtar->u_width)
697 fprintf(out, "%-*s ", (int)bsdtar->u_width, p);
699 /* Use gname if it's present, else gid. */
700 p = archive_entry_gname(entry);
701 if (p != NULL && p[0] != '\0') {
702 fprintf(out, "%s", p);
706 (unsigned long)archive_entry_gid(entry));
708 fprintf(out, "%s", tmp);
712 * Print device number or file size, right-aligned so as to make
713 * total width of group and devnum/filesize fields be gs_width.
714 * If gs_width is too small, grow it.
716 if (archive_entry_filetype(entry) == AE_IFCHR
717 || archive_entry_filetype(entry) == AE_IFBLK) {
718 sprintf(tmp, "%lu,%lu",
719 (unsigned long)archive_entry_rdevmajor(entry),
720 (unsigned long)archive_entry_rdevminor(entry));
722 strcpy(tmp, tar_i64toa(archive_entry_size(entry)));
724 if (w + strlen(tmp) >= bsdtar->gs_width)
725 bsdtar->gs_width = w+strlen(tmp)+1;
726 fprintf(out, "%*s", (int)(bsdtar->gs_width - w), tmp);
728 /* Format the time using 'ls -l' conventions. */
729 tim = archive_entry_mtime(entry);
730 #define HALF_YEAR (time_t)365 * 86400 / 2
731 #if defined(_WIN32) && !defined(__CYGWIN__)
732 #define DAY_FMT "%d" /* Windows' strftime function does not support %e format. */
734 #define DAY_FMT "%e" /* Day number without leading zeros */
736 if (tim < now - HALF_YEAR || tim > now + HALF_YEAR)
737 fmt = bsdtar->day_first ? DAY_FMT " %b %Y" : "%b " DAY_FMT " %Y";
739 fmt = bsdtar->day_first ? DAY_FMT " %b %H:%M" : "%b " DAY_FMT " %H:%M";
740 strftime(tmp, sizeof(tmp), fmt, localtime(&tim));
741 fprintf(out, " %s ", tmp);
742 safe_fprintf(out, "%s", archive_entry_pathname(entry));
744 /* Extra information for links. */
745 if (archive_entry_hardlink(entry)) /* Hard link */
746 safe_fprintf(out, " link to %s",
747 archive_entry_hardlink(entry));
748 else if (archive_entry_symlink(entry)) /* Symbolic link */
749 safe_fprintf(out, " -> %s", archive_entry_symlink(entry));