1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief File opening, unlinking, and closing
6 // Author: Lasse Collin
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
11 ///////////////////////////////////////////////////////////////////////////////
21 static bool warn_fchown;
24 #if defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
25 # include <sys/time.h>
26 #elif defined(HAVE__FUTIME)
27 # include <sys/utime.h>
28 #elif defined(HAVE_UTIME)
33 # ifdef HAVE_SYS_CAPSICUM_H
34 # include <sys/capsicum.h>
36 # include <sys/capability.h>
40 #include "tuklib_open_stdxxx.h"
50 // Using this macro to silence a warning from gcc -Wlogical-op.
51 #if EAGAIN == EWOULDBLOCK
52 # define IS_EAGAIN_OR_EWOULDBLOCK(e) ((e) == EAGAIN)
54 # define IS_EAGAIN_OR_EWOULDBLOCK(e) \
55 ((e) == EAGAIN || (e) == EWOULDBLOCK)
60 IO_WAIT_MORE, // Reading or writing is possible.
61 IO_WAIT_ERROR, // Error or user_abort
62 IO_WAIT_TIMEOUT, // poll() timed out
66 /// If true, try to create sparse files when decompressing.
67 static bool try_sparse = true;
70 /// True if the conditions for sandboxing (described in main()) have been met.
71 static bool sandbox_allowed = false;
74 #ifndef TUKLIB_DOSLIKE
75 /// File status flags of standard input. This is used by io_open_src()
76 /// and io_close_src().
77 static int stdin_flags;
78 static bool restore_stdin_flags = false;
80 /// Original file status flags of standard output. This is used by
81 /// io_open_dest() and io_close_dest() to save and restore the flags.
82 static int stdout_flags;
83 static bool restore_stdout_flags = false;
85 /// Self-pipe used together with the user_abort variable to avoid
86 /// race conditions with signal handling.
87 static int user_abort_pipe[2];
91 static bool io_write_buf(file_pair *pair, const uint8_t *buf, size_t size);
97 // Make sure that stdin, stdout, and stderr are connected to
98 // a valid file descriptor. Exit immediately with exit code ERROR
99 // if we cannot make the file descriptors valid. Maybe we should
100 // print an error message, but our stderr could be screwed anyway.
101 tuklib_open_stdxxx(E_ERROR);
103 #ifndef TUKLIB_DOSLIKE
104 // If fchown() fails setting the owner, we warn about it only if
106 warn_fchown = geteuid() == 0;
108 // Create a pipe for the self-pipe trick.
109 if (pipe(user_abort_pipe))
110 message_fatal(_("Error creating a pipe: %s"),
113 // Make both ends of the pipe non-blocking.
114 for (unsigned i = 0; i < 2; ++i) {
115 int flags = fcntl(user_abort_pipe[i], F_GETFL);
116 if (flags == -1 || fcntl(user_abort_pipe[i], F_SETFL,
117 flags | O_NONBLOCK) == -1)
118 message_fatal(_("Error creating a pipe: %s"),
124 // Avoid doing useless things when statting files.
125 // This isn't important but doesn't hurt.
126 _djstat_flags = _STAT_EXEC_EXT | _STAT_EXEC_MAGIC | _STAT_DIRSIZE;
133 #ifndef TUKLIB_DOSLIKE
135 io_write_to_user_abort_pipe(void)
137 // If the write() fails, it's probably due to the pipe being full.
138 // Failing in that case is fine. If the reason is something else,
139 // there's not much we can do since this is called in a signal
140 // handler. So ignore the errors and try to avoid warnings with
141 // GCC and glibc when _FORTIFY_SOURCE=2 is used.
143 const int ret = write(user_abort_pipe[1], &b, 1);
158 #ifdef ENABLE_SANDBOX
160 io_allow_sandbox(void)
162 sandbox_allowed = true;
167 /// Enables operating-system-specific sandbox if it is possible.
168 /// src_fd is the file descriptor of the input file.
170 io_sandbox_enter(int src_fd)
172 if (!sandbox_allowed) {
173 // This message is more often annoying than useful so
174 // it's commented out. It can be useful when developing
175 // the sandboxing code.
176 //message(V_DEBUG, _("Sandbox is disabled due "
177 // "to incompatible command line arguments"));
181 const char dummy_str[] = "x";
183 // Try to ensure that both libc and xz locale files have been
184 // loaded when NLS is enabled.
185 snprintf(NULL, 0, "%s%s", _(dummy_str), strerror(EINVAL));
187 // Try to ensure that iconv data files needed for handling multibyte
188 // characters have been loaded. This is needed at least with glibc.
189 tuklib_mbstr_width(dummy_str, NULL);
192 // Capsicum needs FreeBSD 10.0 or later.
195 if (cap_rights_limit(src_fd, cap_rights_init(&rights,
196 CAP_EVENT, CAP_FCNTL, CAP_LOOKUP, CAP_READ, CAP_SEEK)))
199 if (cap_rights_limit(STDOUT_FILENO, cap_rights_init(&rights,
200 CAP_EVENT, CAP_FCNTL, CAP_FSTAT, CAP_LOOKUP,
201 CAP_WRITE, CAP_SEEK)))
204 if (cap_rights_limit(user_abort_pipe[0], cap_rights_init(&rights,
208 if (cap_rights_limit(user_abort_pipe[1], cap_rights_init(&rights,
216 # error ENABLE_SANDBOX is defined but no sandboxing method was found.
219 // This message is annoying in xz -lvv.
220 //message(V_DEBUG, _("Sandbox was successfully enabled"));
224 message(V_DEBUG, _("Failed to enable the sandbox"));
226 #endif // ENABLE_SANDBOX
229 #ifndef TUKLIB_DOSLIKE
230 /// \brief Waits for input or output to become available or for a signal
232 /// This uses the self-pipe trick to avoid a race condition that can occur
233 /// if a signal is caught after user_abort has been checked but before e.g.
234 /// read() has been called. In that situation read() could block unless
235 /// non-blocking I/O is used. With non-blocking I/O something like select()
236 /// or poll() is needed to avoid a busy-wait loop, and the same race condition
237 /// pops up again. There are pselect() (POSIX-1.2001) and ppoll() (not in
238 /// POSIX) but neither is portable enough in 2013. The self-pipe trick is
239 /// old and very portable.
241 io_wait(file_pair *pair, int timeout, bool is_reading)
243 struct pollfd pfd[2];
246 pfd[0].fd = pair->src_fd;
247 pfd[0].events = POLLIN;
249 pfd[0].fd = pair->dest_fd;
250 pfd[0].events = POLLOUT;
253 pfd[1].fd = user_abort_pipe[0];
254 pfd[1].events = POLLIN;
257 const int ret = poll(pfd, 2, timeout);
260 return IO_WAIT_ERROR;
263 if (errno == EINTR || errno == EAGAIN)
266 message_error(_("%s: poll() failed: %s"),
267 is_reading ? pair->src_name
270 return IO_WAIT_ERROR;
274 return IO_WAIT_TIMEOUT;
276 if (pfd[0].revents != 0)
283 /// \brief Unlink a file
285 /// This tries to verify that the file being unlinked really is the file that
286 /// we want to unlink by verifying device and inode numbers. There's still
287 /// a small unavoidable race, but this is much better than nothing (the file
288 /// could have been moved/replaced even hours earlier).
290 io_unlink(const char *name, const struct stat *known_st)
292 #if defined(TUKLIB_DOSLIKE)
293 // On DOS-like systems, st_ino is meaningless, so don't bother
294 // testing it. Just silence a compiler warning.
299 // If --force was used, use stat() instead of lstat(). This way
300 // (de)compressing symlinks works correctly. However, it also means
301 // that xz cannot detect if a regular file foo is renamed to bar
302 // and then a symlink foo -> bar is created. Because of stat()
303 // instead of lstat(), xz will think that foo hasn't been replaced
304 // with another file. Thus, xz will remove foo even though it no
305 // longer is the same file that xz used when it started compressing.
306 // Probably it's not too bad though, so this doesn't need a more
308 const int stat_ret = opt_force
309 ? stat(name, &new_st) : lstat(name, &new_st);
313 // st_ino is an array, and we don't want to
314 // compare st_dev at all.
315 || memcmp(&new_st.st_ino, &known_st->st_ino,
316 sizeof(new_st.st_ino)) != 0
318 // Typical POSIX-like system
319 || new_st.st_dev != known_st->st_dev
320 || new_st.st_ino != known_st->st_ino
323 // TRANSLATORS: When compression or decompression finishes,
324 // and xz is going to remove the source file, xz first checks
325 // if the source file still exists, and if it does, does its
326 // device and inode numbers match what xz saw when it opened
327 // the source file. If these checks fail, this message is
328 // shown, %s being the filename, and the file is not deleted.
329 // The check for device and inode numbers is there, because
330 // it is possible that the user has put a new file in place
331 // of the original file, and in that case it obviously
332 // shouldn't be removed.
333 message_error(_("%s: File seems to have been moved, "
334 "not removing"), name);
337 // There's a race condition between lstat() and unlink()
338 // but at least we have tried to avoid removing wrong file.
340 message_error(_("%s: Cannot remove: %s"),
341 name, strerror(errno));
347 /// \brief Copies owner/group and permissions
349 /// \todo ACL and EA support
352 io_copy_attrs(const file_pair *pair)
354 // Skip chown and chmod on Windows.
355 #ifndef TUKLIB_DOSLIKE
356 // This function is more tricky than you may think at first.
357 // Blindly copying permissions may permit users to access the
358 // destination file who didn't have permission to access the
361 // Try changing the owner of the file. If we aren't root or the owner
362 // isn't already us, fchown() probably doesn't succeed. We warn
363 // about failing fchown() only if we are root.
364 if (fchown(pair->dest_fd, pair->src_st.st_uid, (gid_t)(-1))
366 message_warning(_("%s: Cannot set the file owner: %s"),
367 pair->dest_name, strerror(errno));
371 if (fchown(pair->dest_fd, (uid_t)(-1), pair->src_st.st_gid)) {
372 message_warning(_("%s: Cannot set the file group: %s"),
373 pair->dest_name, strerror(errno));
374 // We can still safely copy some additional permissions:
375 // `group' must be at least as strict as `other' and
378 // NOTE: After this, the owner of the source file may
379 // get additional permissions. This shouldn't be too bad,
380 // because the owner would have had permission to chmod
381 // the original file anyway.
382 mode = ((pair->src_st.st_mode & 0070) >> 3)
383 & (pair->src_st.st_mode & 0007);
384 mode = (pair->src_st.st_mode & 0700) | (mode << 3) | mode;
386 // Drop the setuid, setgid, and sticky bits.
387 mode = pair->src_st.st_mode & 0777;
390 if (fchmod(pair->dest_fd, mode))
391 message_warning(_("%s: Cannot set the file permissions: %s"),
392 pair->dest_name, strerror(errno));
395 // Copy the timestamps. We have several possible ways to do this, of
396 // which some are better in both security and precision.
398 // First, get the nanosecond part of the timestamps. As of writing,
399 // it's not standardized by POSIX, and there are several names for
400 // the same thing in struct stat.
404 # if defined(HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC)
406 atime_nsec = pair->src_st.st_atim.tv_nsec;
407 mtime_nsec = pair->src_st.st_mtim.tv_nsec;
409 # elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
411 atime_nsec = pair->src_st.st_atimespec.tv_nsec;
412 mtime_nsec = pair->src_st.st_mtimespec.tv_nsec;
414 # elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
415 // GNU and BSD without extensions
416 atime_nsec = pair->src_st.st_atimensec;
417 mtime_nsec = pair->src_st.st_mtimensec;
419 # elif defined(HAVE_STRUCT_STAT_ST_UATIME)
421 atime_nsec = pair->src_st.st_uatime * 1000;
422 mtime_nsec = pair->src_st.st_umtime * 1000;
424 # elif defined(HAVE_STRUCT_STAT_ST_ATIM_ST__TIM_TV_NSEC)
426 atime_nsec = pair->src_st.st_atim.st__tim.tv_nsec;
427 mtime_nsec = pair->src_st.st_mtim.st__tim.tv_nsec;
435 // Construct a structure to hold the timestamps and call appropriate
436 // function to set the timestamps.
437 #if defined(HAVE_FUTIMENS)
438 // Use nanosecond precision.
439 struct timespec tv[2];
440 tv[0].tv_sec = pair->src_st.st_atime;
441 tv[0].tv_nsec = atime_nsec;
442 tv[1].tv_sec = pair->src_st.st_mtime;
443 tv[1].tv_nsec = mtime_nsec;
445 (void)futimens(pair->dest_fd, tv);
447 #elif defined(HAVE_FUTIMES) || defined(HAVE_FUTIMESAT) || defined(HAVE_UTIMES)
448 // Use microsecond precision.
449 struct timeval tv[2];
450 tv[0].tv_sec = pair->src_st.st_atime;
451 tv[0].tv_usec = atime_nsec / 1000;
452 tv[1].tv_sec = pair->src_st.st_mtime;
453 tv[1].tv_usec = mtime_nsec / 1000;
455 # if defined(HAVE_FUTIMES)
456 (void)futimes(pair->dest_fd, tv);
457 # elif defined(HAVE_FUTIMESAT)
458 (void)futimesat(pair->dest_fd, NULL, tv);
460 // Argh, no function to use a file descriptor to set the timestamp.
461 (void)utimes(pair->dest_name, tv);
464 #elif defined(HAVE__FUTIME)
465 // Use one-second precision with Windows-specific _futime().
466 // We could use utime() too except that for some reason the
467 // timestamp will get reset at close(). With _futime() it works.
468 // This struct cannot be const as _futime() takes a non-const pointer.
469 struct _utimbuf buf = {
470 .actime = pair->src_st.st_atime,
471 .modtime = pair->src_st.st_mtime,
478 (void)_futime(pair->dest_fd, &buf);
480 #elif defined(HAVE_UTIME)
481 // Use one-second precision. utime() doesn't support using file
482 // descriptor either. Some systems have broken utime() prototype
483 // so don't make this const.
484 struct utimbuf buf = {
485 .actime = pair->src_st.st_atime,
486 .modtime = pair->src_st.st_mtime,
493 (void)utime(pair->dest_name, &buf);
500 /// Opens the source file. Returns false on success, true on error.
502 io_open_src_real(file_pair *pair)
504 // There's nothing to open when reading from stdin.
505 if (pair->src_name == stdin_filename) {
506 pair->src_fd = STDIN_FILENO;
507 #ifdef TUKLIB_DOSLIKE
508 setmode(STDIN_FILENO, O_BINARY);
510 // Try to set stdin to non-blocking mode. It won't work
511 // e.g. on OpenBSD if stdout is e.g. /dev/null. In such
512 // case we proceed as if stdin were non-blocking anyway
513 // (in case of /dev/null it will be in practice). The
514 // same applies to stdout in io_open_dest_real().
515 stdin_flags = fcntl(STDIN_FILENO, F_GETFL);
516 if (stdin_flags == -1) {
517 message_error(_("Error getting the file status flags "
518 "from standard input: %s"),
523 if ((stdin_flags & O_NONBLOCK) == 0
524 && fcntl(STDIN_FILENO, F_SETFL,
525 stdin_flags | O_NONBLOCK) != -1)
526 restore_stdin_flags = true;
528 #ifdef HAVE_POSIX_FADVISE
529 // It will fail if stdin is a pipe and that's fine.
530 (void)posix_fadvise(STDIN_FILENO, 0, 0,
531 opt_mode == MODE_LIST
533 : POSIX_FADV_SEQUENTIAL);
538 // Symlinks are not followed unless writing to stdout or --force
540 const bool follow_symlinks = opt_stdout || opt_force;
542 // We accept only regular files if we are writing the output
543 // to disk too. bzip2 allows overriding this with --force but
544 // gzip and xz don't.
545 const bool reg_files_only = !opt_stdout;
548 int flags = O_RDONLY | O_BINARY | O_NOCTTY;
550 #ifndef TUKLIB_DOSLIKE
551 // Use non-blocking I/O:
552 // - It prevents blocking when opening FIFOs and some other
553 // special files, which is good if we want to accept only
555 // - It can help avoiding some race conditions with signal handling.
559 #if defined(O_NOFOLLOW)
560 if (!follow_symlinks)
562 #elif !defined(TUKLIB_DOSLIKE)
563 // Some POSIX-like systems lack O_NOFOLLOW (it's not required
564 // by POSIX). Check for symlinks with a separate lstat() on
566 if (!follow_symlinks) {
568 if (lstat(pair->src_name, &st)) {
569 message_error("%s: %s", pair->src_name,
573 } else if (S_ISLNK(st.st_mode)) {
574 message_warning(_("%s: Is a symbolic link, "
575 "skipping"), pair->src_name);
581 (void)follow_symlinks;
584 // Try to open the file. Signals have been blocked so EINTR shouldn't
586 pair->src_fd = open(pair->src_name, flags);
588 if (pair->src_fd == -1) {
589 // Signals (that have a signal handler) have been blocked.
590 assert(errno != EINTR);
593 // Give an understandable error message if the reason
594 // for failing was that the file was a symbolic link.
596 // Note that at least Linux, OpenBSD, Solaris, and Darwin
597 // use ELOOP to indicate that O_NOFOLLOW was the reason
598 // that open() failed. Because there may be
599 // directories in the pathname, ELOOP may occur also
600 // because of a symlink loop in the directory part.
601 // So ELOOP doesn't tell us what actually went wrong,
602 // and this stupidity went into POSIX-1.2008 too.
604 // FreeBSD associates EMLINK with O_NOFOLLOW and
605 // Tru64 uses ENOTSUP. We use these directly here
606 // and skip the lstat() call and the associated race.
607 // I want to hear if there are other kernels that
608 // fail with something else than ELOOP with O_NOFOLLOW.
609 bool was_symlink = false;
611 # if defined(__FreeBSD__) || defined(__DragonFly__)
615 # elif defined(__digital__) && defined(__unix__)
616 if (errno == ENOTSUP)
619 # elif defined(__NetBSD__)
624 if (errno == ELOOP && !follow_symlinks) {
625 const int saved_errno = errno;
627 if (lstat(pair->src_name, &st) == 0
628 && S_ISLNK(st.st_mode))
636 message_warning(_("%s: Is a symbolic link, "
637 "skipping"), pair->src_name);
640 // Something else than O_NOFOLLOW failing
641 // (assuming that the race conditions didn't
643 message_error("%s: %s", pair->src_name,
649 // Stat the source file. We need the result also when we copy
650 // the permissions, and when unlinking.
652 // NOTE: Use stat() instead of fstat() with DJGPP, because
653 // then we have a better chance to get st_ino value that can
654 // be used in io_open_dest_real() to prevent overwriting the
657 if (stat(pair->src_name, &pair->src_st))
660 if (fstat(pair->src_fd, &pair->src_st))
664 if (S_ISDIR(pair->src_st.st_mode)) {
665 message_warning(_("%s: Is a directory, skipping"),
670 if (reg_files_only && !S_ISREG(pair->src_st.st_mode)) {
671 message_warning(_("%s: Not a regular file, skipping"),
676 #ifndef TUKLIB_DOSLIKE
677 if (reg_files_only && !opt_force) {
678 if (pair->src_st.st_mode & (S_ISUID | S_ISGID)) {
679 // gzip rejects setuid and setgid files even
680 // when --force was used. bzip2 doesn't check
681 // for them, but calls fchown() after fchmod(),
682 // and many systems automatically drop setuid
683 // and setgid bits there.
685 // We accept setuid and setgid files if
686 // --force was used. We drop these bits
687 // explicitly in io_copy_attr().
688 message_warning(_("%s: File has setuid or "
689 "setgid bit set, skipping"),
694 if (pair->src_st.st_mode & S_ISVTX) {
695 message_warning(_("%s: File has sticky bit "
701 if (pair->src_st.st_nlink > 1) {
702 message_warning(_("%s: Input file has more "
703 "than one hard link, "
704 "skipping"), pair->src_name);
709 // If it is something else than a regular file, wait until
710 // there is input available. This way reading from FIFOs
711 // will work when open() is used with O_NONBLOCK.
712 if (!S_ISREG(pair->src_st.st_mode)) {
714 const io_wait_ret ret = io_wait(pair, -1, true);
717 if (ret != IO_WAIT_MORE)
722 #ifdef HAVE_POSIX_FADVISE
723 // It will fail with some special files like FIFOs but that is fine.
724 (void)posix_fadvise(pair->src_fd, 0, 0,
725 opt_mode == MODE_LIST
727 : POSIX_FADV_SEQUENTIAL);
733 message_error("%s: %s", pair->src_name, strerror(errno));
735 (void)close(pair->src_fd);
741 io_open_src(const char *src_name)
743 if (is_empty_filename(src_name))
746 // Since we have only one file open at a time, we can use
747 // a statically allocated structure.
748 static file_pair pair;
751 .src_name = src_name,
756 .src_has_seen_input = false,
757 .flush_needed = false,
758 .dest_try_sparse = false,
759 .dest_pending_sparse = 0,
762 // Block the signals, for which we have a custom signal handler, so
763 // that we don't need to worry about EINTR.
765 const bool error = io_open_src_real(&pair);
768 #ifdef ENABLE_SANDBOX
770 io_sandbox_enter(pair.src_fd);
773 return error ? NULL : &pair;
777 /// \brief Closes source file of the file_pair structure
779 /// \param pair File whose src_fd should be closed
780 /// \param success If true, the file will be removed from the disk if
781 /// closing succeeds and --keep hasn't been used.
783 io_close_src(file_pair *pair, bool success)
785 #ifndef TUKLIB_DOSLIKE
786 if (restore_stdin_flags) {
787 assert(pair->src_fd == STDIN_FILENO);
789 restore_stdin_flags = false;
791 if (fcntl(STDIN_FILENO, F_SETFL, stdin_flags) == -1)
792 message_error(_("Error restoring the status flags "
793 "to standard input: %s"),
798 if (pair->src_fd != STDIN_FILENO && pair->src_fd != -1) {
799 // Close the file before possibly unlinking it. On DOS-like
800 // systems this is always required since unlinking will fail
801 // if the file is open. On POSIX systems it usually works
802 // to unlink open files, but in some cases it doesn't and
803 // one gets EBUSY in errno.
805 // xz 5.2.2 and older unlinked the file before closing it
806 // (except on DOS-like systems). The old code didn't handle
807 // EBUSY and could fail e.g. on some CIFS shares. The
808 // advantage of unlinking before closing is negligible
809 // (avoids a race between close() and stat()/lstat() and
810 // unlink()), so let's keep this simple.
811 (void)close(pair->src_fd);
813 if (success && !opt_keep_original)
814 io_unlink(pair->src_name, &pair->src_st);
822 io_open_dest_real(file_pair *pair)
824 if (opt_stdout || pair->src_fd == STDIN_FILENO) {
825 // We don't modify or free() this.
826 pair->dest_name = (char *)"(stdout)";
827 pair->dest_fd = STDOUT_FILENO;
828 #ifdef TUKLIB_DOSLIKE
829 setmode(STDOUT_FILENO, O_BINARY);
831 // Try to set O_NONBLOCK if it isn't already set.
832 // If it fails, we assume that stdout is non-blocking
833 // in practice. See the comments in io_open_src_real()
834 // for similar situation with stdin.
836 // NOTE: O_APPEND may be unset later in this function
837 // and it relies on stdout_flags being set here.
838 stdout_flags = fcntl(STDOUT_FILENO, F_GETFL);
839 if (stdout_flags == -1) {
840 message_error(_("Error getting the file status flags "
841 "from standard output: %s"),
846 if ((stdout_flags & O_NONBLOCK) == 0
847 && fcntl(STDOUT_FILENO, F_SETFL,
848 stdout_flags | O_NONBLOCK) != -1)
849 restore_stdout_flags = true;
852 pair->dest_name = suffix_get_dest_name(pair->src_name);
853 if (pair->dest_name == NULL)
858 if (stat(pair->dest_name, &st) == 0) {
859 // Check that it isn't a special file like "prn".
860 if (st.st_dev == -1) {
861 message_error("%s: Refusing to write to "
862 "a DOS special file",
864 free(pair->dest_name);
868 // Check that we aren't overwriting the source file.
869 if (st.st_dev == pair->src_st.st_dev
870 && st.st_ino == pair->src_st.st_ino) {
871 message_error("%s: Output file is the same "
874 free(pair->dest_name);
880 // If --force was used, unlink the target file first.
881 if (opt_force && unlink(pair->dest_name) && errno != ENOENT) {
882 message_error(_("%s: Cannot remove: %s"),
883 pair->dest_name, strerror(errno));
884 free(pair->dest_name);
889 int flags = O_WRONLY | O_BINARY | O_NOCTTY
891 #ifndef TUKLIB_DOSLIKE
894 const mode_t mode = S_IRUSR | S_IWUSR;
895 pair->dest_fd = open(pair->dest_name, flags, mode);
897 if (pair->dest_fd == -1) {
898 message_error("%s: %s", pair->dest_name,
900 free(pair->dest_name);
905 #ifndef TUKLIB_DOSLIKE
906 // dest_st isn't used on DOS-like systems except as a dummy
907 // argument to io_unlink(), so don't fstat() on such systems.
908 if (fstat(pair->dest_fd, &pair->dest_st)) {
909 // If fstat() really fails, we have a safe fallback here.
911 pair->dest_st.st_ino[0] = 0;
912 pair->dest_st.st_ino[1] = 0;
913 pair->dest_st.st_ino[2] = 0;
915 pair->dest_st.st_dev = 0;
916 pair->dest_st.st_ino = 0;
918 } else if (try_sparse && opt_mode == MODE_DECOMPRESS) {
919 // When writing to standard output, we need to be extra
921 // - It may be connected to something else than
923 // - We aren't necessarily writing to a new empty file
924 // or to the end of an existing file.
925 // - O_APPEND may be active.
927 // TODO: I'm keeping this disabled for DOS-like systems
928 // for now. FAT doesn't support sparse files, but NTFS
929 // does, so maybe this should be enabled on Windows after
931 if (pair->dest_fd == STDOUT_FILENO) {
932 if (!S_ISREG(pair->dest_st.st_mode))
935 if (stdout_flags & O_APPEND) {
936 // Creating a sparse file is not possible
937 // when O_APPEND is active (it's used by
938 // shell's >> redirection). As I understand
939 // it, it is safe to temporarily disable
940 // O_APPEND in xz, because if someone
941 // happened to write to the same file at the
942 // same time, results would be bad anyway
943 // (users shouldn't assume that xz uses any
944 // specific block size when writing data).
946 // The write position may be something else
947 // than the end of the file, so we must fix
948 // it to start writing at the end of the file
949 // to imitate O_APPEND.
950 if (lseek(STDOUT_FILENO, 0, SEEK_END) == -1)
953 // Construct the new file status flags.
954 // If O_NONBLOCK was set earlier in this
955 // function, it must be kept here too.
956 int flags = stdout_flags & ~O_APPEND;
957 if (restore_stdout_flags)
960 // If this fcntl() fails, we continue but won't
961 // try to create sparse output. The original
962 // flags will still be restored if needed (to
963 // unset O_NONBLOCK) when the file is finished.
964 if (fcntl(STDOUT_FILENO, F_SETFL, flags) == -1)
967 // Disabling O_APPEND succeeded. Mark
968 // that the flags should be restored
969 // in io_close_dest(). (This may have already
970 // been set when enabling O_NONBLOCK.)
971 restore_stdout_flags = true;
973 } else if (lseek(STDOUT_FILENO, 0, SEEK_CUR)
974 != pair->dest_st.st_size) {
975 // Writing won't start exactly at the end
976 // of the file. We cannot use sparse output,
977 // because it would probably corrupt the file.
982 pair->dest_try_sparse = true;
991 io_open_dest(file_pair *pair)
994 const bool ret = io_open_dest_real(pair);
1000 /// \brief Closes destination file of the file_pair structure
1002 /// \param pair File whose dest_fd should be closed
1003 /// \param success If false, the file will be removed from the disk.
1005 /// \return Zero if closing succeeds. On error, -1 is returned and
1006 /// error message printed.
1008 io_close_dest(file_pair *pair, bool success)
1010 #ifndef TUKLIB_DOSLIKE
1011 // If io_open_dest() has disabled O_APPEND, restore it here.
1012 if (restore_stdout_flags) {
1013 assert(pair->dest_fd == STDOUT_FILENO);
1015 restore_stdout_flags = false;
1017 if (fcntl(STDOUT_FILENO, F_SETFL, stdout_flags) == -1) {
1018 message_error(_("Error restoring the O_APPEND flag "
1019 "to standard output: %s"),
1026 if (pair->dest_fd == -1 || pair->dest_fd == STDOUT_FILENO)
1029 if (close(pair->dest_fd)) {
1030 message_error(_("%s: Closing the file failed: %s"),
1031 pair->dest_name, strerror(errno));
1033 // Closing destination file failed, so we cannot trust its
1034 // contents. Get rid of junk:
1035 io_unlink(pair->dest_name, &pair->dest_st);
1036 free(pair->dest_name);
1040 // If the operation using this file wasn't successful, we git rid
1041 // of the junk file.
1043 io_unlink(pair->dest_name, &pair->dest_st);
1045 free(pair->dest_name);
1052 io_close(file_pair *pair, bool success)
1054 // Take care of sparseness at the end of the output file.
1055 if (success && pair->dest_try_sparse
1056 && pair->dest_pending_sparse > 0) {
1057 // Seek forward one byte less than the size of the pending
1058 // hole, then write one zero-byte. This way the file grows
1059 // to its correct size. An alternative would be to use
1060 // ftruncate() but that isn't portable enough (e.g. it
1061 // doesn't work with FAT on Linux; FAT isn't that important
1062 // since it doesn't support sparse files anyway, but we don't
1063 // want to create corrupt files on it).
1064 if (lseek(pair->dest_fd, pair->dest_pending_sparse - 1,
1066 message_error(_("%s: Seeking failed when trying "
1067 "to create a sparse file: %s"),
1068 pair->dest_name, strerror(errno));
1071 const uint8_t zero[1] = { '\0' };
1072 if (io_write_buf(pair, zero, 1))
1079 // Copy the file attributes. We need to skip this if destination
1080 // file isn't open or it is standard output.
1081 if (success && pair->dest_fd != -1 && pair->dest_fd != STDOUT_FILENO)
1082 io_copy_attrs(pair);
1084 // Close the destination first. If it fails, we must not remove
1086 if (io_close_dest(pair, success))
1089 // Close the source file, and unlink it if the operation using this
1090 // file pair was successful and we haven't requested to keep the
1092 io_close_src(pair, success);
1101 io_fix_src_pos(file_pair *pair, size_t rewind_size)
1103 assert(rewind_size <= IO_BUFFER_SIZE);
1105 if (rewind_size > 0) {
1106 // This doesn't need to work on unseekable file descriptors,
1107 // so just ignore possible errors.
1108 (void)lseek(pair->src_fd, -(off_t)(rewind_size), SEEK_CUR);
1116 io_read(file_pair *pair, io_buf *buf, size_t size)
1118 // We use small buffers here.
1119 assert(size < SSIZE_MAX);
1123 while (pos < size) {
1124 const ssize_t amount = read(
1125 pair->src_fd, buf->u8 + pos, size - pos);
1128 pair->src_eof = true;
1133 if (errno == EINTR) {
1140 #ifndef TUKLIB_DOSLIKE
1141 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1142 // Disable the flush-timeout if no input has
1143 // been seen since the previous flush and thus
1144 // there would be nothing to flush after the
1145 // timeout expires (avoids busy waiting).
1146 const int timeout = pair->src_has_seen_input
1147 ? mytime_get_flush_timeout()
1150 switch (io_wait(pair, timeout, true)) {
1157 case IO_WAIT_TIMEOUT:
1158 pair->flush_needed = true;
1167 message_error(_("%s: Read error: %s"),
1168 pair->src_name, strerror(errno));
1173 pos += (size_t)(amount);
1175 if (!pair->src_has_seen_input) {
1176 pair->src_has_seen_input = true;
1177 mytime_set_flush_time();
1186 io_pread(file_pair *pair, io_buf *buf, size_t size, off_t pos)
1188 // Using lseek() and read() is more portable than pread() and
1189 // for us it is as good as real pread().
1190 if (lseek(pair->src_fd, pos, SEEK_SET) != pos) {
1191 message_error(_("%s: Error seeking the file: %s"),
1192 pair->src_name, strerror(errno));
1196 const size_t amount = io_read(pair, buf, size);
1197 if (amount == SIZE_MAX)
1200 if (amount != size) {
1201 message_error(_("%s: Unexpected end of file"),
1211 is_sparse(const io_buf *buf)
1213 assert(IO_BUFFER_SIZE % sizeof(uint64_t) == 0);
1215 for (size_t i = 0; i < ARRAY_SIZE(buf->u64); ++i)
1216 if (buf->u64[i] != 0)
1224 io_write_buf(file_pair *pair, const uint8_t *buf, size_t size)
1226 assert(size < SSIZE_MAX);
1229 const ssize_t amount = write(pair->dest_fd, buf, size);
1231 if (errno == EINTR) {
1238 #ifndef TUKLIB_DOSLIKE
1239 if (IS_EAGAIN_OR_EWOULDBLOCK(errno)) {
1240 if (io_wait(pair, -1, false) == IO_WAIT_MORE)
1247 // Handle broken pipe specially. gzip and bzip2
1248 // don't print anything on SIGPIPE. In addition,
1249 // gzip --quiet uses exit status 2 (warning) on
1250 // broken pipe instead of whatever raise(SIGPIPE)
1251 // would make it return. It is there to hide "Broken
1252 // pipe" message on some old shells (probably old
1255 // We don't do anything special with --quiet, which
1256 // is what bzip2 does too. If we get SIGPIPE, we
1257 // will handle it like other signals by setting
1258 // user_abort, and get EPIPE here.
1260 message_error(_("%s: Write error: %s"),
1261 pair->dest_name, strerror(errno));
1266 buf += (size_t)(amount);
1267 size -= (size_t)(amount);
1275 io_write(file_pair *pair, const io_buf *buf, size_t size)
1277 assert(size <= IO_BUFFER_SIZE);
1279 if (pair->dest_try_sparse) {
1280 // Check if the block is sparse (contains only zeros). If it
1281 // sparse, we just store the amount and return. We will take
1282 // care of actually skipping over the hole when we hit the
1283 // next data block or close the file.
1285 // Since io_close() requires that dest_pending_sparse > 0
1286 // if the file ends with sparse block, we must also return
1287 // if size == 0 to avoid doing the lseek().
1288 if (size == IO_BUFFER_SIZE) {
1289 // Even if the block was sparse, treat it as non-sparse
1290 // if the pending sparse amount is large compared to
1291 // the size of off_t. In practice this only matters
1292 // on 32-bit systems where off_t isn't always 64 bits.
1293 const off_t pending_max
1294 = (off_t)(1) << (sizeof(off_t) * CHAR_BIT - 2);
1295 if (is_sparse(buf) && pair->dest_pending_sparse
1297 pair->dest_pending_sparse += (off_t)(size);
1300 } else if (size == 0) {
1304 // This is not a sparse block. If we have a pending hole,
1306 if (pair->dest_pending_sparse > 0) {
1307 if (lseek(pair->dest_fd, pair->dest_pending_sparse,
1309 message_error(_("%s: Seeking failed when "
1310 "trying to create a sparse "
1311 "file: %s"), pair->dest_name,
1316 pair->dest_pending_sparse = 0;
1320 return io_write_buf(pair, buf->u8, size);