lib/libarchive/archive_read_open_filename.c

   1 /*-
   2  * Copyright (c) 2003-2007 Tim Kientzle
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24  */
  25
  26 #include "archive_platform.h"
  27 __FBSDID("$FreeBSD$");
  28
  29 #ifdef HAVE_SYS_STAT_H
  30 #include <sys/stat.h>
  31 #endif
  32 #ifdef HAVE_ERRNO_H
  33 #include <errno.h>
  34 #endif
  35 #ifdef HAVE_FCNTL_H
  36 #include <fcntl.h>
  37 #endif
  38 #ifdef HAVE_STDLIB_H
  39 #include <stdlib.h>
  40 #endif
  41 #ifdef HAVE_STRING_H
  42 #include <string.h>
  43 #endif
  44 #ifdef HAVE_UNISTD_H
  45 #include <unistd.h>
  46 #endif
  47
  48 #include "archive.h"
  49
  50 #ifndef O_BINARY
  51 #define O_BINARY 0
  52 #endif
  53
  54 struct read_file_data {
  55         int      fd;
  56         size_t   block_size;
  57         void    *buffer;
  58         mode_t   st_mode;  /* Mode bits for opened file. */
  59         char     can_skip; /* This file supports skipping. */
  60         char     filename[1]; /* Must be last! */
  61 };
  62
  63 static int      file_close(struct archive *, void *);
  64 static ssize_t  file_read(struct archive *, void *, const void **buff);
  65 #if ARCHIVE_API_VERSION < 2
  66 static ssize_t  file_skip(struct archive *, void *, size_t request);
  67 #else
  68 static off_t    file_skip(struct archive *, void *, off_t request);
  69 #endif
  70
  71 int
  72 archive_read_open_file(struct archive *a, const char *filename,
  73     size_t block_size)
  74 {
  75         return (archive_read_open_filename(a, filename, block_size));
  76 }
  77
  78 int
  79 archive_read_open_filename(struct archive *a, const char *filename,
  80     size_t block_size)
  81 {
  82         struct stat st;
  83         struct read_file_data *mine;
  84         void *b;
  85         int fd;
  86
  87         archive_clear_error(a);
  88         if (filename == NULL || filename[0] == '\0') {
  89                 /* We used to invoke archive_read_open_fd(a,0,block_size)
  90                  * here, but that doesn't (and shouldn't) handle the
  91                  * end-of-file flush when reading stdout from a pipe.
  92                  * Basically, read_open_fd() is intended for folks who
  93                  * are willing to handle such details themselves.  This
  94                  * API is intended to be a little smarter for folks who
  95                  * want easy handling of the common case.
  96                  */
  97                 filename = ""; /* Normalize NULL to "" */
  98                 fd = 0;
  99         } else {
 100                 fd = open(filename, O_RDONLY | O_BINARY);
 101                 if (fd < 0) {
 102                         archive_set_error(a, errno,
 103                             "Failed to open '%s'", filename);
 104                         return (ARCHIVE_FATAL);
 105                 }
 106         }
 107         if (fstat(fd, &st) != 0) {
 108                 archive_set_error(a, errno, "Can't stat '%s'", filename);
 109                 return (ARCHIVE_FATAL);
 110         }
 111
 112         mine = (struct read_file_data *)calloc(1,
 113             sizeof(*mine) + strlen(filename));
 114         b = malloc(block_size);
 115         if (mine == NULL || b == NULL) {
 116                 archive_set_error(a, ENOMEM, "No memory");
 117                 free(mine);
 118                 free(b);
 119                 return (ARCHIVE_FATAL);
 120         }
 121         strcpy(mine->filename, filename);
 122         mine->block_size = block_size;
 123         mine->buffer = b;
 124         mine->fd = fd;
 125         /* Remember mode so close can decide whether to flush. */
 126         mine->st_mode = st.st_mode;
 127         /* If we're reading a file from disk, ensure that we don't
 128            overwrite it with an extracted file. */
 129         if (S_ISREG(st.st_mode)) {
 130                 archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
 131                 /*
 132                  * Enabling skip here is a performance optimization
 133                  * for anything that supports lseek().  On FreeBSD
 134                  * (and probably many other systems), only regular
 135                  * files and raw disk devices support lseek() (on
 136                  * other input types, lseek() returns success but
 137                  * doesn't actually change the file pointer, which
 138                  * just completely screws up the position-tracking
 139                  * logic).  In addition, I've yet to find a portable
 140                  * way to determine if a device is a raw disk device.
 141                  * So I don't see a way to do much better than to only
 142                  * enable this optimization for regular files.
 143                  */
 144                 mine->can_skip = 1;
 145         }
 146         return (archive_read_open2(a, mine,
 147                 NULL, file_read, file_skip, file_close));
 148 }
 149
 150 static ssize_t
 151 file_read(struct archive *a, void *client_data, const void **buff)
 152 {
 153         struct read_file_data *mine = (struct read_file_data *)client_data;
 154         ssize_t bytes_read;
 155
 156         *buff = mine->buffer;
 157         bytes_read = read(mine->fd, mine->buffer, mine->block_size);
 158         if (bytes_read < 0) {
 159                 if (mine->filename[0] == '\0')
 160                         archive_set_error(a, errno, "Error reading stdin");
 161                 else
 162                         archive_set_error(a, errno, "Error reading '%s'",
 163                             mine->filename);
 164         }
 165         return (bytes_read);
 166 }
 167
 168 #if ARCHIVE_API_VERSION < 2
 169 static ssize_t
 170 file_skip(struct archive *a, void *client_data, size_t request)
 171 #else
 172 static off_t
 173 file_skip(struct archive *a, void *client_data, off_t request)
 174 #endif
 175 {
 176         struct read_file_data *mine = (struct read_file_data *)client_data;
 177         off_t old_offset, new_offset;
 178
 179         if (!mine->can_skip) /* We can't skip, so ... */
 180                 return (0); /* ... skip zero bytes. */
 181
 182         /* Reduce request to the next smallest multiple of block_size */
 183         request = (request / mine->block_size) * mine->block_size;
 184         if (request == 0)
 185                 return (0);
 186
 187         /*
 188          * Hurray for lazy evaluation: if the first lseek fails, the second
 189          * one will not be executed.
 190          */
 191         if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
 192             ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
 193         {
 194                 /* If skip failed once, it will probably fail again. */
 195                 mine->can_skip = 0;
 196
 197                 if (errno == ESPIPE)
 198                 {
 199                         /*
 200                          * Failure to lseek() can be caused by the file
 201                          * descriptor pointing to a pipe, socket or FIFO.
 202                          * Return 0 here, so the compression layer will use
 203                          * read()s instead to advance the file descriptor.
 204                          * It's slower of course, but works as well.
 205                          */
 206                         return (0);
 207                 }
 208                 /*
 209                  * There's been an error other than ESPIPE. This is most
 210                  * likely caused by a programmer error (too large request)
 211                  * or a corrupted archive file.
 212                  */
 213                 if (mine->filename[0] == '\0')
 214                         /*
 215                          * Should never get here, since lseek() on stdin ought
 216                          * to return an ESPIPE error.
 217                          */
 218                         archive_set_error(a, errno, "Error seeking in stdin");
 219                 else
 220                         archive_set_error(a, errno, "Error seeking in '%s'",
 221                             mine->filename);
 222                 return (-1);
 223         }
 224         return (new_offset - old_offset);
 225 }
 226
 227 static int
 228 file_close(struct archive *a, void *client_data)
 229 {
 230         struct read_file_data *mine = (struct read_file_data *)client_data;
 231
 232         (void)a; /* UNUSED */
 233
 234         /* Only flush and close if open succeeded. */
 235         if (mine->fd >= 0) {
 236                 /*
 237                  * Sometimes, we should flush the input before closing.
 238                  *   Regular files: faster to just close without flush.
 239                  *   Devices: must not flush (user might need to
 240                  *      read the "next" item on a non-rewind device).
 241                  *   Pipes and sockets:  must flush (otherwise, the
 242                  *      program feeding the pipe or socket may complain).
 243                  * Here, I flush everything except for regular files and
 244                  * device nodes.
 245                  */
 246                 if (!S_ISREG(mine->st_mode)
 247                     && !S_ISCHR(mine->st_mode)
 248                     && !S_ISBLK(mine->st_mode)) {
 249                         ssize_t bytesRead;
 250                         do {
 251                                 bytesRead = read(mine->fd, mine->buffer,
 252                                     mine->block_size);
 253                         } while (bytesRead > 0);
 254                 }
 255                 /* If a named file was opened, then it needs to be closed. */
 256                 if (mine->filename[0] != '\0')
 257                         close(mine->fd);
 258         }
 259         free(mine->buffer);
 260         free(mine);
 261         return (ARCHIVE_OK);
 262 }