lib/libarchive/archive_read.c

   1 /*-
   2  * Copyright (c) 2003-2007 Tim Kientzle
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
  15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
  18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24  */
  25
  26 /*
  27  * This file contains the "essential" portions of the read API, that
  28  * is, stuff that will probably always be used by any client that
  29  * actually needs to read an archive.  Optional pieces have been, as
  30  * far as possible, separated out into separate files to avoid
  31  * needlessly bloating statically-linked clients.
  32  */
  33
  34 #include "archive_platform.h"
  35 __FBSDID("$FreeBSD$");
  36
  37 #ifdef HAVE_ERRNO_H
  38 #include <errno.h>
  39 #endif
  40 #include <stdio.h>
  41 #ifdef HAVE_STDLIB_H
  42 #include <stdlib.h>
  43 #endif
  44 #ifdef HAVE_STRING_H
  45 #include <string.h>
  46 #endif
  47 #ifdef HAVE_UNISTD_H
  48 #include <unistd.h>
  49 #endif
  50
  51 #include "archive.h"
  52 #include "archive_entry.h"
  53 #include "archive_private.h"
  54 #include "archive_read_private.h"
  55
  56 #define minimum(a, b) (a < b ? a : b)
  57
  58 static int      build_stream(struct archive_read *);
  59 static int      choose_format(struct archive_read *);
  60 static struct archive_vtable *archive_read_vtable(void);
  61 static int      _archive_read_close(struct archive *);
  62 static int      _archive_read_finish(struct archive *);
  63
  64 static struct archive_vtable *
  65 archive_read_vtable(void)
  66 {
  67         static struct archive_vtable av;
  68         static int inited = 0;
  69
  70         if (!inited) {
  71                 av.archive_finish = _archive_read_finish;
  72                 av.archive_close = _archive_read_close;
  73         }
  74         return (&av);
  75 }
  76
  77 /*
  78  * Allocate, initialize and return a struct archive object.
  79  */
  80 struct archive *
  81 archive_read_new(void)
  82 {
  83         struct archive_read *a;
  84
  85         a = (struct archive_read *)malloc(sizeof(*a));
  86         if (a == NULL)
  87                 return (NULL);
  88         memset(a, 0, sizeof(*a));
  89         a->archive.magic = ARCHIVE_READ_MAGIC;
  90
  91         a->archive.state = ARCHIVE_STATE_NEW;
  92         a->entry = archive_entry_new();
  93         a->archive.vtable = archive_read_vtable();
  94
  95         return (&a->archive);
  96 }
  97
  98 /*
  99  * Record the do-not-extract-to file. This belongs in archive_read_extract.c.
 100  */
 101 void
 102 archive_read_extract_set_skip_file(struct archive *_a, dev_t d, ino_t i)
 103 {
 104         struct archive_read *a = (struct archive_read *)_a;
 105         __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY,
 106             "archive_read_extract_set_skip_file");
 107         a->skip_file_dev = d;
 108         a->skip_file_ino = i;
 109 }
 110
 111 /*
 112  * Set read options for the format.
 113  */
 114 int
 115 archive_read_set_format_options(struct archive *_a, const char *s)
 116 {
 117         struct archive_read *a;
 118         struct archive_format_descriptor *format;
 119         char key[64], val[64];
 120         char *valp;
 121         size_t i;
 122         int len, r;
 123
 124         if (s == NULL || *s == '\0')
 125                 return (ARCHIVE_OK);
 126         a = (struct archive_read *)_a;
 127         __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
 128             ARCHIVE_STATE_NEW, "archive_read_set_format_options");
 129         len = 0;
 130         for (i = 0; i < sizeof(a->formats)/sizeof(a->formats[0]); i++) {
 131                 format = &a->formats[i];
 132                 if (format == NULL || format->options == NULL ||
 133                     format->name == NULL)
 134                         /* This format does not support option. */
 135                         continue;
 136
 137                 while ((len = __archive_parse_options(s, format->name,
 138                     sizeof(key), key, sizeof(val), val)) > 0) {
 139                         valp = val[0] == '\0' ? NULL : val;
 140                         a->format = format;
 141                         r = format->options(a, key, valp);
 142                         a->format = NULL;
 143                         if (r == ARCHIVE_FATAL)
 144                                 return (r);
 145                         s += len;
 146                 }
 147         }
 148         if (len < 0) {
 149                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 150                     "Illegal format options.");
 151                 return (ARCHIVE_WARN);
 152         }
 153         return (ARCHIVE_OK);
 154 }
 155
 156 /*
 157  * Set read options for the filter.
 158  */
 159 int
 160 archive_read_set_filter_options(struct archive *_a, const char *s)
 161 {
 162         struct archive_read *a;
 163         struct archive_read_filter *filter;
 164         struct archive_read_filter_bidder *bidder;
 165         char key[64], val[64];
 166         int len, r;
 167
 168         if (s == NULL || *s == '\0')
 169                 return (ARCHIVE_OK);
 170         a = (struct archive_read *)_a;
 171         __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
 172             ARCHIVE_STATE_NEW, "archive_read_set_filter_options");
 173         filter = a->filter;
 174         len = 0;
 175         for (filter = a->filter; filter != NULL; filter = filter->upstream) {
 176                 bidder = filter->bidder;
 177                 if (bidder == NULL)
 178                         continue;
 179                 if (bidder->options == NULL)
 180                         /* This bidder does not support option */
 181                         continue;
 182                 while ((len = __archive_parse_options(s, filter->name,
 183                     sizeof(key), key, sizeof(val), val)) > 0) {
 184                         if (val[0] == '\0')
 185                                 r = bidder->options(bidder, key, NULL);
 186                         else
 187                                 r = bidder->options(bidder, key, val);
 188                         if (r == ARCHIVE_FATAL)
 189                                 return (r);
 190                         s += len;
 191                 }
 192         }
 193         if (len < 0) {
 194                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
 195                     "Illegal format options.");
 196                 return (ARCHIVE_WARN);
 197         }
 198         return (ARCHIVE_OK);
 199 }
 200
 201 /*
 202  * Set read options for the format and the filter.
 203  */
 204 int
 205 archive_read_set_options(struct archive *_a, const char *s)
 206 {
 207         int r;
 208
 209         r = archive_read_set_format_options(_a, s);
 210         if (r != ARCHIVE_OK)
 211                 return (r);
 212         r = archive_read_set_filter_options(_a, s);
 213         if (r != ARCHIVE_OK)
 214                 return (r);
 215         return (ARCHIVE_OK);
 216 }
 217
 218 /*
 219  * Open the archive
 220  */
 221 int
 222 archive_read_open(struct archive *a, void *client_data,
 223     archive_open_callback *client_opener, archive_read_callback *client_reader,
 224     archive_close_callback *client_closer)
 225 {
 226         /* Old archive_read_open() is just a thin shell around
 227          * archive_read_open2. */
 228         return archive_read_open2(a, client_data, client_opener,
 229             client_reader, NULL, client_closer);
 230 }
 231
 232 static ssize_t
 233 client_read_proxy(struct archive_read_filter *self, const void **buff)
 234 {
 235         ssize_t r;
 236         r = (self->archive->client.reader)(&self->archive->archive,
 237             self->data, buff);
 238         self->archive->archive.raw_position += r;
 239         return (r);
 240 }
 241
 242 static int64_t
 243 client_skip_proxy(struct archive_read_filter *self, int64_t request)
 244 {
 245         int64_t r;
 246         if (self->archive->client.skipper == NULL)
 247                 return (0);
 248         r = (self->archive->client.skipper)(&self->archive->archive,
 249             self->data, request);
 250         self->archive->archive.raw_position += r;
 251         return (r);
 252 }
 253
 254 static int
 255 client_close_proxy(struct archive_read_filter *self)
 256 {
 257         int r = ARCHIVE_OK;
 258
 259         if (self->archive->client.closer != NULL)
 260                 r = (self->archive->client.closer)((struct archive *)self->archive,
 261                     self->data);
 262         self->data = NULL;
 263         return (r);
 264 }
 265
 266
 267 int
 268 archive_read_open2(struct archive *_a, void *client_data,
 269     archive_open_callback *client_opener,
 270     archive_read_callback *client_reader,
 271     archive_skip_callback *client_skipper,
 272     archive_close_callback *client_closer)
 273 {
 274         struct archive_read *a = (struct archive_read *)_a;
 275         struct archive_read_filter *filter;
 276         int e;
 277
 278         __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
 279             "archive_read_open");
 280
 281         if (client_reader == NULL)
 282                 __archive_errx(1,
 283                     "No reader function provided to archive_read_open");
 284
 285         /* Open data source. */
 286         if (client_opener != NULL) {
 287                 e =(client_opener)(&a->archive, client_data);
 288                 if (e != 0) {
 289                         /* If the open failed, call the closer to clean up. */
 290                         if (client_closer)
 291                                 (client_closer)(&a->archive, client_data);
 292                         return (e);
 293                 }
 294         }
 295
 296         /* Save the client functions and mock up the initial source. */
 297         a->client.reader = client_reader;
 298         a->client.skipper = client_skipper;
 299         a->client.closer = client_closer;
 300
 301         filter = calloc(1, sizeof(*filter));
 302         if (filter == NULL)
 303                 return (ARCHIVE_FATAL);
 304         filter->bidder = NULL;
 305         filter->upstream = NULL;
 306         filter->archive = a;
 307         filter->data = client_data;
 308         filter->read = client_read_proxy;
 309         filter->skip = client_skip_proxy;
 310         filter->close = client_close_proxy;
 311         filter->name = "none";
 312         filter->code = ARCHIVE_COMPRESSION_NONE;
 313         a->filter = filter;
 314
 315         /* Build out the input pipeline. */
 316         e = build_stream(a);
 317         if (e == ARCHIVE_OK)
 318                 a->archive.state = ARCHIVE_STATE_HEADER;
 319
 320         return (e);
 321 }
 322
 323 /*
 324  * Allow each registered stream transform to bid on whether
 325  * it wants to handle this stream.  Repeat until we've finished
 326  * building the pipeline.
 327  */
 328 static int
 329 build_stream(struct archive_read *a)
 330 {
 331         int number_bidders, i, bid, best_bid;
 332         struct archive_read_filter_bidder *bidder, *best_bidder;
 333         struct archive_read_filter *filter;
 334         int r;
 335
 336         for (;;) {
 337                 number_bidders = sizeof(a->bidders) / sizeof(a->bidders[0]);
 338
 339                 best_bid = 0;
 340                 best_bidder = NULL;
 341
 342                 bidder = a->bidders;
 343                 for (i = 0; i < number_bidders; i++, bidder++) {
 344                         if (bidder->bid != NULL) {
 345                                 bid = (bidder->bid)(bidder, a->filter);
 346                                 if (bid > best_bid) {
 347                                         best_bid = bid;
 348                                         best_bidder = bidder;
 349                                 }
 350                         }
 351                 }
 352
 353                 /* If no bidder, we're done. */
 354                 if (best_bidder == NULL) {
 355                         a->archive.compression_name = a->filter->name;
 356                         a->archive.compression_code = a->filter->code;
 357                         return (ARCHIVE_OK);
 358                 }
 359
 360                 filter
 361                     = (struct archive_read_filter *)calloc(1, sizeof(*filter));
 362                 if (filter == NULL)
 363                         return (ARCHIVE_FATAL);
 364                 filter->bidder = best_bidder;
 365                 filter->archive = a;
 366                 filter->upstream = a->filter;
 367                 r = (best_bidder->init)(filter);
 368                 if (r != ARCHIVE_OK) {
 369                         free(filter);
 370                         return (r);
 371                 }
 372                 a->filter = filter;
 373         }
 374 }
 375
 376 /*
 377  * Read header of next entry.
 378  */
 379 int
 380 archive_read_next_header2(struct archive *_a, struct archive_entry *entry)
 381 {
 382         struct archive_read *a = (struct archive_read *)_a;
 383         int slot, ret;
 384
 385         __archive_check_magic(_a, ARCHIVE_READ_MAGIC,
 386             ARCHIVE_STATE_HEADER | ARCHIVE_STATE_DATA,
 387             "archive_read_next_header");
 388
 389         ++_a->file_count;
 390         archive_entry_clear(entry);
 391         archive_clear_error(&a->archive);
 392
 393         /*
 394          * If no format has yet been chosen, choose one.
 395          */
 396         if (a->format == NULL) {
 397                 slot = choose_format(a);
 398                 if (slot < 0) {
 399                         a->archive.state = ARCHIVE_STATE_FATAL;
 400                         return (ARCHIVE_FATAL);
 401                 }
 402                 a->format = &(a->formats[slot]);
 403         }
 404
 405         /*
 406          * If client didn't consume entire data, skip any remainder
 407          * (This is especially important for GNU incremental directories.)
 408          */
 409         if (a->archive.state == ARCHIVE_STATE_DATA) {
 410                 ret = archive_read_data_skip(&a->archive);
 411                 if (ret == ARCHIVE_EOF) {
 412                         archive_set_error(&a->archive, EIO, "Premature end-of-file.");
 413                         a->archive.state = ARCHIVE_STATE_FATAL;
 414                         return (ARCHIVE_FATAL);
 415                 }
 416                 if (ret != ARCHIVE_OK)
 417                         return (ret);
 418         }
 419
 420         /* Record start-of-header. */
 421         a->header_position = a->archive.file_position;
 422
 423         ret = (a->format->read_header)(a, entry);
 424
 425         /*
 426          * EOF and FATAL are persistent at this layer.  By
 427          * modifying the state, we guarantee that future calls to
 428          * read a header or read data will fail.
 429          */
 430         switch (ret) {
 431         case ARCHIVE_EOF:
 432                 a->archive.state = ARCHIVE_STATE_EOF;
 433                 break;
 434         case ARCHIVE_OK:
 435                 a->archive.state = ARCHIVE_STATE_DATA;
 436                 break;
 437         case ARCHIVE_WARN:
 438                 a->archive.state = ARCHIVE_STATE_DATA;
 439                 break;
 440         case ARCHIVE_RETRY:
 441                 break;
 442         case ARCHIVE_FATAL:
 443                 a->archive.state = ARCHIVE_STATE_FATAL;
 444                 break;
 445         }
 446
 447         a->read_data_output_offset = 0;
 448         a->read_data_remaining = 0;
 449         return (ret);
 450 }
 451
 452 int
 453 archive_read_next_header(struct archive *_a, struct archive_entry **entryp)
 454 {
 455         int ret;
 456         struct archive_read *a = (struct archive_read *)_a;
 457         *entryp = NULL;
 458         ret = archive_read_next_header2(_a, a->entry);
 459         *entryp = a->entry;
 460         return ret;
 461 }
 462
 463 /*
 464  * Allow each registered format to bid on whether it wants to handle
 465  * the next entry.  Return index of winning bidder.
 466  */
 467 static int
 468 choose_format(struct archive_read *a)
 469 {
 470         int slots;
 471         int i;
 472         int bid, best_bid;
 473         int best_bid_slot;
 474
 475         slots = sizeof(a->formats) / sizeof(a->formats[0]);
 476         best_bid = -1;
 477         best_bid_slot = -1;
 478
 479         /* Set up a->format and a->pformat_data for convenience of bidders. */
 480         a->format = &(a->formats[0]);
 481         for (i = 0; i < slots; i++, a->format++) {
 482                 if (a->format->bid) {
 483                         bid = (a->format->bid)(a);
 484                         if (bid == ARCHIVE_FATAL)
 485                                 return (ARCHIVE_FATAL);
 486                         if ((bid > best_bid) || (best_bid_slot < 0)) {
 487                                 best_bid = bid;
 488                                 best_bid_slot = i;
 489                         }
 490                 }
 491         }
 492
 493         /*
 494          * There were no bidders; this is a serious programmer error
 495          * and demands a quick and definitive abort.
 496          */
 497         if (best_bid_slot < 0)
 498                 __archive_errx(1, "No formats were registered; you must "
 499                     "invoke at least one archive_read_support_format_XXX "
 500                     "function in order to successfully read an archive.");
 501
 502         /*
 503          * There were bidders, but no non-zero bids; this means we
 504          * can't support this stream.
 505          */
 506         if (best_bid < 1) {
 507                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 508                     "Unrecognized archive format");
 509                 return (ARCHIVE_FATAL);
 510         }
 511
 512         return (best_bid_slot);
 513 }
 514
 515 /*
 516  * Return the file offset (within the uncompressed data stream) where
 517  * the last header started.
 518  */
 519 int64_t
 520 archive_read_header_position(struct archive *_a)
 521 {
 522         struct archive_read *a = (struct archive_read *)_a;
 523         __archive_check_magic(_a, ARCHIVE_READ_MAGIC,
 524             ARCHIVE_STATE_ANY, "archive_read_header_position");
 525         return (a->header_position);
 526 }
 527
 528 /*
 529  * Read data from an archive entry, using a read(2)-style interface.
 530  * This is a convenience routine that just calls
 531  * archive_read_data_block and copies the results into the client
 532  * buffer, filling any gaps with zero bytes.  Clients using this
 533  * API can be completely ignorant of sparse-file issues; sparse files
 534  * will simply be padded with nulls.
 535  *
 536  * DO NOT intermingle calls to this function and archive_read_data_block
 537  * to read a single entry body.
 538  */
 539 ssize_t
 540 archive_read_data(struct archive *_a, void *buff, size_t s)
 541 {
 542         struct archive_read *a = (struct archive_read *)_a;
 543         char    *dest;
 544         const void *read_buf;
 545         size_t   bytes_read;
 546         size_t   len;
 547         int      r;
 548
 549         bytes_read = 0;
 550         dest = (char *)buff;
 551
 552         while (s > 0) {
 553                 if (a->read_data_remaining == 0) {
 554                         read_buf = a->read_data_block;
 555                         r = archive_read_data_block(&a->archive, &read_buf,
 556                             &a->read_data_remaining, &a->read_data_offset);
 557                         a->read_data_block = read_buf;
 558                         if (r == ARCHIVE_EOF)
 559                                 return (bytes_read);
 560                         /*
 561                          * Error codes are all negative, so the status
 562                          * return here cannot be confused with a valid
 563                          * byte count.  (ARCHIVE_OK is zero.)
 564                          */
 565                         if (r < ARCHIVE_OK)
 566                                 return (r);
 567                 }
 568
 569                 if (a->read_data_offset < a->read_data_output_offset) {
 570                         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
 571                             "Encountered out-of-order sparse blocks");
 572                         return (ARCHIVE_RETRY);
 573                 }
 574
 575                 /* Compute the amount of zero padding needed. */
 576                 if (a->read_data_output_offset + (off_t)s <
 577                     a->read_data_offset) {
 578                         len = s;
 579                 } else if (a->read_data_output_offset <
 580                     a->read_data_offset) {
 581                         len = a->read_data_offset -
 582                             a->read_data_output_offset;
 583                 } else
 584                         len = 0;
 585
 586                 /* Add zeroes. */
 587                 memset(dest, 0, len);
 588                 s -= len;
 589                 a->read_data_output_offset += len;
 590                 dest += len;
 591                 bytes_read += len;
 592
 593                 /* Copy data if there is any space left. */
 594                 if (s > 0) {
 595                         len = a->read_data_remaining;
 596                         if (len > s)
 597                                 len = s;
 598                         memcpy(dest, a->read_data_block, len);
 599                         s -= len;
 600                         a->read_data_block += len;
 601                         a->read_data_remaining -= len;
 602                         a->read_data_output_offset += len;
 603                         a->read_data_offset += len;
 604                         dest += len;
 605                         bytes_read += len;
 606                 }
 607         }
 608         return (bytes_read);
 609 }
 610
 611 #if ARCHIVE_API_VERSION < 3
 612 /*
 613  * Obsolete function provided for compatibility only.  Note that the API
 614  * of this function doesn't allow the caller to detect if the remaining
 615  * data from the archive entry is shorter than the buffer provided, or
 616  * even if an error occurred while reading data.
 617  */
 618 int
 619 archive_read_data_into_buffer(struct archive *a, void *d, ssize_t len)
 620 {
 621
 622         archive_read_data(a, d, len);
 623         return (ARCHIVE_OK);
 624 }
 625 #endif
 626
 627 /*
 628  * Skip over all remaining data in this entry.
 629  */
 630 int
 631 archive_read_data_skip(struct archive *_a)
 632 {
 633         struct archive_read *a = (struct archive_read *)_a;
 634         int r;
 635         const void *buff;
 636         size_t size;
 637         off_t offset;
 638
 639         __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
 640             "archive_read_data_skip");
 641
 642         if (a->format->read_data_skip != NULL)
 643                 r = (a->format->read_data_skip)(a);
 644         else {
 645                 while ((r = archive_read_data_block(&a->archive,
 646                             &buff, &size, &offset))
 647                     == ARCHIVE_OK)
 648                         ;
 649         }
 650
 651         if (r == ARCHIVE_EOF)
 652                 r = ARCHIVE_OK;
 653
 654         a->archive.state = ARCHIVE_STATE_HEADER;
 655         return (r);
 656 }
 657
 658 /*
 659  * Read the next block of entry data from the archive.
 660  * This is a zero-copy interface; the client receives a pointer,
 661  * size, and file offset of the next available block of data.
 662  *
 663  * Returns ARCHIVE_OK if the operation is successful, ARCHIVE_EOF if
 664  * the end of entry is encountered.
 665  */
 666 int
 667 archive_read_data_block(struct archive *_a,
 668     const void **buff, size_t *size, off_t *offset)
 669 {
 670         struct archive_read *a = (struct archive_read *)_a;
 671         __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_DATA,
 672             "archive_read_data_block");
 673
 674         if (a->format->read_data == NULL) {
 675                 archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
 676                     "Internal error: "
 677                     "No format_read_data_block function registered");
 678                 return (ARCHIVE_FATAL);
 679         }
 680
 681         return (a->format->read_data)(a, buff, size, offset);
 682 }
 683
 684 /*
 685  * Close the file and release most resources.
 686  *
 687  * Be careful: client might just call read_new and then read_finish.
 688  * Don't assume we actually read anything or performed any non-trivial
 689  * initialization.
 690  */
 691 static int
 692 _archive_read_close(struct archive *_a)
 693 {
 694         struct archive_read *a = (struct archive_read *)_a;
 695         int r = ARCHIVE_OK, r1 = ARCHIVE_OK;
 696         size_t i, n;
 697
 698         __archive_check_magic(&a->archive, ARCHIVE_READ_MAGIC,
 699             ARCHIVE_STATE_ANY, "archive_read_close");
 700         archive_clear_error(&a->archive);
 701         a->archive.state = ARCHIVE_STATE_CLOSED;
 702
 703
 704         /* Call cleanup functions registered by optional components. */
 705         if (a->cleanup_archive_extract != NULL)
 706                 r = (a->cleanup_archive_extract)(a);
 707
 708         /* TODO: Clean up the formatters. */
 709
 710         /* Clean up the filter pipeline. */
 711         while (a->filter != NULL) {
 712                 struct archive_read_filter *t = a->filter->upstream;
 713                 if (a->filter->close != NULL) {
 714                         r1 = (a->filter->close)(a->filter);
 715                         if (r1 < r)
 716                                 r = r1;
 717                 }
 718                 free(a->filter->buffer);
 719                 free(a->filter);
 720                 a->filter = t;
 721         }
 722
 723         /* Release the bidder objects. */
 724         n = sizeof(a->bidders)/sizeof(a->bidders[0]);
 725         for (i = 0; i < n; i++) {
 726                 if (a->bidders[i].free != NULL) {
 727                         r1 = (a->bidders[i].free)(&a->bidders[i]);
 728                         if (r1 < r)
 729                                 r = r1;
 730                 }
 731         }
 732
 733         return (r);
 734 }
 735
 736 /*
 737  * Release memory and other resources.
 738  */
 739 int
 740 _archive_read_finish(struct archive *_a)
 741 {
 742         struct archive_read *a = (struct archive_read *)_a;
 743         int i;
 744         int slots;
 745         int r = ARCHIVE_OK;
 746
 747         __archive_check_magic(_a, ARCHIVE_READ_MAGIC, ARCHIVE_STATE_ANY,
 748             "archive_read_finish");
 749         if (a->archive.state != ARCHIVE_STATE_CLOSED)
 750                 r = archive_read_close(&a->archive);
 751
 752         /* Cleanup format-specific data. */
 753         slots = sizeof(a->formats) / sizeof(a->formats[0]);
 754         for (i = 0; i < slots; i++) {
 755                 a->format = &(a->formats[i]);
 756                 if (a->formats[i].cleanup)
 757                         (a->formats[i].cleanup)(a);
 758         }
 759
 760         archive_string_free(&a->archive.error_string);
 761         if (a->entry)
 762                 archive_entry_free(a->entry);
 763         a->archive.magic = 0;
 764         free(a);
 765 #if ARCHIVE_API_VERSION > 1
 766         return (r);
 767 #endif
 768 }
 769
 770 /*
 771  * Used internally by read format handlers to register their bid and
 772  * initialization functions.
 773  */
 774 int
 775 __archive_read_register_format(struct archive_read *a,
 776     void *format_data,
 777     const char *name,
 778     int (*bid)(struct archive_read *),
 779     int (*options)(struct archive_read *, const char *, const char *),
 780     int (*read_header)(struct archive_read *, struct archive_entry *),
 781     int (*read_data)(struct archive_read *, const void **, size_t *, off_t *),
 782     int (*read_data_skip)(struct archive_read *),
 783     int (*cleanup)(struct archive_read *))
 784 {
 785         int i, number_slots;
 786
 787         __archive_check_magic(&a->archive,
 788             ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
 789             "__archive_read_register_format");
 790
 791         number_slots = sizeof(a->formats) / sizeof(a->formats[0]);
 792
 793         for (i = 0; i < number_slots; i++) {
 794                 if (a->formats[i].bid == bid)
 795                         return (ARCHIVE_WARN); /* We've already installed */
 796                 if (a->formats[i].bid == NULL) {
 797                         a->formats[i].bid = bid;
 798                         a->formats[i].options = options;
 799                         a->formats[i].read_header = read_header;
 800                         a->formats[i].read_data = read_data;
 801                         a->formats[i].read_data_skip = read_data_skip;
 802                         a->formats[i].cleanup = cleanup;
 803                         a->formats[i].data = format_data;
 804                         a->formats[i].name = name;
 805                         return (ARCHIVE_OK);
 806                 }
 807         }
 808
 809         __archive_errx(1, "Not enough slots for format registration");
 810         return (ARCHIVE_FATAL); /* Never actually called. */
 811 }
 812
 813 /*
 814  * Used internally by decompression routines to register their bid and
 815  * initialization functions.
 816  */
 817 struct archive_read_filter_bidder *
 818 __archive_read_get_bidder(struct archive_read *a)
 819 {
 820         int i, number_slots;
 821
 822         __archive_check_magic(&a->archive,
 823             ARCHIVE_READ_MAGIC, ARCHIVE_STATE_NEW,
 824             "__archive_read_get_bidder");
 825
 826         number_slots = sizeof(a->bidders) / sizeof(a->bidders[0]);
 827
 828         for (i = 0; i < number_slots; i++) {
 829                 if (a->bidders[i].bid == NULL) {
 830                         memset(a->bidders + i, 0, sizeof(a->bidders[0]));
 831                         return (a->bidders + i);
 832                 }
 833         }
 834
 835         __archive_errx(1, "Not enough slots for compression registration");
 836         return (NULL); /* Never actually executed. */
 837 }
 838
 839 /*
 840  * The next three functions comprise the peek/consume internal I/O
 841  * system used by archive format readers.  This system allows fairly
 842  * flexible read-ahead and allows the I/O code to operate in a
 843  * zero-copy manner most of the time.
 844  *
 845  * In the ideal case, filters generate blocks of data
 846  * and __archive_read_ahead() just returns pointers directly into
 847  * those blocks.  Then __archive_read_consume() just bumps those
 848  * pointers.  Only if your request would span blocks does the I/O
 849  * layer use a copy buffer to provide you with a contiguous block of
 850  * data.  The __archive_read_skip() is an optimization; it scans ahead
 851  * very quickly (it usually translates into a seek() operation if
 852  * you're reading uncompressed disk files).
 853  *
 854  * A couple of useful idioms:
 855  *  * "I just want some data."  Ask for 1 byte and pay attention to
 856  *    the "number of bytes available" from __archive_read_ahead().
 857  *    You can consume more than you asked for; you just can't consume
 858  *    more than is available.  If you consume everything that's
 859  *    immediately available, the next read_ahead() call will pull
 860  *    the next block.
 861  *  * "I want to output a large block of data."  As above, ask for 1 byte,
 862  *    emit all that's available (up to whatever limit you have), then
 863  *    repeat until you're done.
 864  *  * "I want to peek ahead by a large amount."  Ask for 4k or so, then
 865  *    double and repeat until you get an error or have enough.  Note
 866  *    that the I/O layer will likely end up expanding its copy buffer
 867  *    to fit your request, so use this technique cautiously.  This
 868  *    technique is used, for example, by some of the format tasting
 869  *    code that has uncertain look-ahead needs.
 870  *
 871  * TODO: Someday, provide a more generic __archive_read_seek() for
 872  * those cases where it's useful.  This is tricky because there are lots
 873  * of cases where seek() is not available (reading gzip data from a
 874  * network socket, for instance), so there needs to be a good way to
 875  * communicate whether seek() is available and users of that interface
 876  * need to use non-seeking strategies whenever seek() is not available.
 877  */
 878
 879 /*
 880  * Looks ahead in the input stream:
 881  *  * If 'avail' pointer is provided, that returns number of bytes available
 882  *    in the current buffer, which may be much larger than requested.
 883  *  * If end-of-file, *avail gets set to zero.
 884  *  * If error, *avail gets error code.
 885  *  * If request can be met, returns pointer to data, returns NULL
 886  *    if request is not met.
 887  *
 888  * Note: If you just want "some data", ask for 1 byte and pay attention
 889  * to *avail, which will have the actual amount available.  If you
 890  * know exactly how many bytes you need, just ask for that and treat
 891  * a NULL return as an error.
 892  *
 893  * Important:  This does NOT move the file pointer.  See
 894  * __archive_read_consume() below.
 895  */
 896
 897 /*
 898  * This is tricky.  We need to provide our clients with pointers to
 899  * contiguous blocks of memory but we want to avoid copying whenever
 900  * possible.
 901  *
 902  * Mostly, this code returns pointers directly into the block of data
 903  * provided by the client_read routine.  It can do this unless the
 904  * request would split across blocks.  In that case, we have to copy
 905  * into an internal buffer to combine reads.
 906  */
 907 const void *
 908 __archive_read_ahead(struct archive_read *a, size_t min, ssize_t *avail)
 909 {
 910         return (__archive_read_filter_ahead(a->filter, min, avail));
 911 }
 912
 913 const void *
 914 __archive_read_filter_ahead(struct archive_read_filter *filter,
 915     size_t min, ssize_t *avail)
 916 {
 917         ssize_t bytes_read;
 918         size_t tocopy;
 919
 920         if (filter->fatal) {
 921                 if (avail)
 922                         *avail = ARCHIVE_FATAL;
 923                 return (NULL);
 924         }
 925
 926         /*
 927          * Keep pulling more data until we can satisfy the request.
 928          */
 929         for (;;) {
 930
 931                 /*
 932                  * If we can satisfy from the copy buffer (and the
 933                  * copy buffer isn't empty), we're done.  In particular,
 934                  * note that min == 0 is a perfectly well-defined
 935                  * request.
 936                  */
 937                 if (filter->avail >= min && filter->avail > 0) {
 938                         if (avail != NULL)
 939                                 *avail = filter->avail;
 940                         return (filter->next);
 941                 }
 942
 943                 /*
 944                  * We can satisfy directly from client buffer if everything
 945                  * currently in the copy buffer is still in the client buffer.
 946                  */
 947                 if (filter->client_total >= filter->client_avail + filter->avail
 948                     && filter->client_avail + filter->avail >= min) {
 949                         /* "Roll back" to client buffer. */
 950                         filter->client_avail += filter->avail;
 951                         filter->client_next -= filter->avail;
 952                         /* Copy buffer is now empty. */
 953                         filter->avail = 0;
 954                         filter->next = filter->buffer;
 955                         /* Return data from client buffer. */
 956                         if (avail != NULL)
 957                                 *avail = filter->client_avail;
 958                         return (filter->client_next);
 959                 }
 960
 961                 /* Move data forward in copy buffer if necessary. */
 962                 if (filter->next > filter->buffer &&
 963                     filter->next + min > filter->buffer + filter->buffer_size) {
 964                         if (filter->avail > 0)
 965                                 memmove(filter->buffer, filter->next, filter->avail);
 966                         filter->next = filter->buffer;
 967                 }
 968
 969                 /* If we've used up the client data, get more. */
 970                 if (filter->client_avail <= 0) {
 971                         if (filter->end_of_file) {
 972                                 if (avail != NULL)
 973                                         *avail = 0;
 974                                 return (NULL);
 975                         }
 976                         bytes_read = (filter->read)(filter,
 977                             &filter->client_buff);
 978                         if (bytes_read < 0) {           /* Read error. */
 979                                 filter->client_total = filter->client_avail = 0;
 980                                 filter->client_next = filter->client_buff = NULL;
 981                                 filter->fatal = 1;
 982                                 if (avail != NULL)
 983                                         *avail = ARCHIVE_FATAL;
 984                                 return (NULL);
 985                         }
 986                         if (bytes_read == 0) {  /* Premature end-of-file. */
 987                                 filter->client_total = filter->client_avail = 0;
 988                                 filter->client_next = filter->client_buff = NULL;
 989                                 filter->end_of_file = 1;
 990                                 /* Return whatever we do have. */
 991                                 if (avail != NULL)
 992                                         *avail = filter->avail;
 993                                 return (NULL);
 994                         }
 995                         filter->position += bytes_read;
 996                         filter->client_total = bytes_read;
 997                         filter->client_avail = filter->client_total;
 998                         filter->client_next = filter->client_buff;
 999                 }
1000                 else
1001                 {
1002                         /*
1003                          * We can't satisfy the request from the copy
1004                          * buffer or the existing client data, so we
1005                          * need to copy more client data over to the
1006                          * copy buffer.
1007                          */
1008
1009                         /* Ensure the buffer is big enough. */
1010                         if (min > filter->buffer_size) {
1011                                 size_t s, t;
1012                                 char *p;
1013
1014                                 /* Double the buffer; watch for overflow. */
1015                                 s = t = filter->buffer_size;
1016                                 if (s == 0)
1017                                         s = min;
1018                                 while (s < min) {
1019                                         t *= 2;
1020                                         if (t <= s) { /* Integer overflow! */
1021                                                 archive_set_error(
1022                                                         &filter->archive->archive,
1023                                                         ENOMEM,
1024                                                     "Unable to allocate copy buffer");
1025                                                 filter->fatal = 1;
1026                                                 if (avail != NULL)
1027                                                         *avail = ARCHIVE_FATAL;
1028                                                 return (NULL);
1029                                         }
1030                                         s = t;
1031                                 }
1032                                 /* Now s >= min, so allocate a new buffer. */
1033                                 p = (char *)malloc(s);
1034                                 if (p == NULL) {
1035                                         archive_set_error(
1036                                                 &filter->archive->archive,
1037                                                 ENOMEM,
1038                                             "Unable to allocate copy buffer");
1039                                         filter->fatal = 1;
1040                                         if (avail != NULL)
1041                                                 *avail = ARCHIVE_FATAL;
1042                                         return (NULL);
1043                                 }
1044                                 /* Move data into newly-enlarged buffer. */
1045                                 if (filter->avail > 0)
1046                                         memmove(p, filter->next, filter->avail);
1047                                 free(filter->buffer);
1048                                 filter->next = filter->buffer = p;
1049                                 filter->buffer_size = s;
1050                         }
1051
1052                         /* We can add client data to copy buffer. */
1053                         /* First estimate: copy to fill rest of buffer. */
1054                         tocopy = (filter->buffer + filter->buffer_size)
1055                             - (filter->next + filter->avail);
1056                         /* Don't waste time buffering more than we need to. */
1057                         if (tocopy + filter->avail > min)
1058                                 tocopy = min - filter->avail;
1059                         /* Don't copy more than is available. */
1060                         if (tocopy > filter->client_avail)
1061                                 tocopy = filter->client_avail;
1062
1063                         memcpy(filter->next + filter->avail, filter->client_next,
1064                             tocopy);
1065                         /* Remove this data from client buffer. */
1066                         filter->client_next += tocopy;
1067                         filter->client_avail -= tocopy;
1068                         /* add it to copy buffer. */
1069                         filter->avail += tocopy;
1070                 }
1071         }
1072 }
1073
1074 /*
1075  * Move the file pointer forward.  This should be called after
1076  * __archive_read_ahead() returns data to you.  Don't try to move
1077  * ahead by more than the amount of data available according to
1078  * __archive_read_ahead().
1079  */
1080 /*
1081  * Mark the appropriate data as used.  Note that the request here will
1082  * often be much smaller than the size of the previous read_ahead
1083  * request.
1084  */
1085 ssize_t
1086 __archive_read_consume(struct archive_read *a, size_t request)
1087 {
1088         ssize_t r;
1089         r = __archive_read_filter_consume(a->filter, request);
1090         a->archive.file_position += r;
1091         return (r);
1092 }
1093
1094 ssize_t
1095 __archive_read_filter_consume(struct archive_read_filter * filter,
1096     size_t request)
1097 {
1098         if (filter->avail > 0) {
1099                 /* Read came from copy buffer. */
1100                 filter->next += request;
1101                 filter->avail -= request;
1102         } else {
1103                 /* Read came from client buffer. */
1104                 filter->client_next += request;
1105                 filter->client_avail -= request;
1106         }
1107         return (request);
1108 }
1109
1110 /*
1111  * Move the file pointer ahead by an arbitrary amount.  If you're
1112  * reading uncompressed data from a disk file, this will actually
1113  * translate into a seek() operation.  Even in cases where seek()
1114  * isn't feasible, this at least pushes the read-and-discard loop
1115  * down closer to the data source.
1116  */
1117 int64_t
1118 __archive_read_skip(struct archive_read *a, int64_t request)
1119 {
1120         int64_t skipped = __archive_read_skip_lenient(a, request);
1121         if (skipped == request)
1122                 return (skipped);
1123         /* We hit EOF before we satisfied the skip request. */
1124         archive_set_error(&a->archive,
1125             ARCHIVE_ERRNO_MISC,
1126             "Truncated input file (needed %jd bytes, only %jd available)",
1127             (intmax_t)request, (intmax_t)skipped);
1128         return (ARCHIVE_FATAL);
1129 }
1130
1131 int64_t
1132 __archive_read_skip_lenient(struct archive_read *a, int64_t request)
1133 {
1134         int64_t skipped = __archive_read_filter_skip(a->filter, request);
1135         if (skipped > 0)
1136                 a->archive.file_position += skipped;
1137         return (skipped);
1138 }
1139
1140 int64_t
1141 __archive_read_filter_skip(struct archive_read_filter *filter, int64_t request)
1142 {
1143         off_t bytes_skipped, total_bytes_skipped = 0;
1144         size_t min;
1145
1146         if (filter->fatal)
1147                 return (-1);
1148         /*
1149          * If there is data in the buffers already, use that first.
1150          */
1151         if (filter->avail > 0) {
1152                 min = minimum(request, (off_t)filter->avail);
1153                 bytes_skipped = __archive_read_filter_consume(filter, min);
1154                 request -= bytes_skipped;
1155                 total_bytes_skipped += bytes_skipped;
1156         }
1157         if (filter->client_avail > 0) {
1158                 min = minimum(request, (off_t)filter->client_avail);
1159                 bytes_skipped = __archive_read_filter_consume(filter, min);
1160                 request -= bytes_skipped;
1161                 total_bytes_skipped += bytes_skipped;
1162         }
1163         if (request == 0)
1164                 return (total_bytes_skipped);
1165         /*
1166          * If a client_skipper was provided, try that first.
1167          */
1168 #if ARCHIVE_API_VERSION < 2
1169         if ((filter->skip != NULL) && (request < SSIZE_MAX)) {
1170 #else
1171         if (filter->skip != NULL) {
1172 #endif
1173                 bytes_skipped = (filter->skip)(filter, request);
1174                 if (bytes_skipped < 0) {        /* error */
1175                         filter->client_total = filter->client_avail = 0;
1176                         filter->client_next = filter->client_buff = NULL;
1177                         filter->fatal = 1;
1178                         return (bytes_skipped);
1179                 }
1180                 total_bytes_skipped += bytes_skipped;
1181                 request -= bytes_skipped;
1182                 filter->client_next = filter->client_buff;
1183                 filter->client_avail = filter->client_total = 0;
1184         }
1185         /*
1186          * Note that client_skipper will usually not satisfy the
1187          * full request (due to low-level blocking concerns),
1188          * so even if client_skipper is provided, we may still
1189          * have to use ordinary reads to finish out the request.
1190          */
1191         while (request > 0) {
1192                 const void* dummy_buffer;
1193                 ssize_t bytes_read;
1194                 dummy_buffer = __archive_read_filter_ahead(filter,
1195                     1, &bytes_read);
1196                 if (bytes_read < 0)
1197                         return (bytes_read);
1198                 if (bytes_read == 0) {
1199                         return (total_bytes_skipped);
1200                 }
1201                 min = (size_t)(minimum(bytes_read, request));
1202                 bytes_read = __archive_read_filter_consume(filter, min);
1203                 total_bytes_skipped += bytes_read;
1204                 request -= bytes_read;
1205         }
1206         return (total_bytes_skipped);
1207 }