2 * Copyright (c) 2004 Tim Kientzle
3 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
28 __FBSDID("$FreeBSD$");
41 #include "archive_endian.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_rb.h"
46 #include "archive_read_private.h"
49 #include "archive_crc32.h"
53 struct archive_rb_node node;
54 int64_t local_header_offset;
55 int64_t compressed_size;
56 int64_t uncompressed_size;
59 struct archive_entry *entry;
71 /* Structural information about the archive. */
72 int64_t central_directory_offset;
73 size_t central_directory_size;
74 size_t central_directory_entries;
75 char have_central_directory;
78 /* List of entries (seekable Zip only) */
79 size_t entries_remaining;
80 struct zip_entry *zip_entries;
81 struct zip_entry *entry;
82 struct archive_rb_tree tree;
86 /* entry_bytes_remaining is the number of bytes we expect. */
87 int64_t entry_bytes_remaining;
89 /* These count the number of bytes actually read for the entry. */
90 int64_t entry_compressed_bytes_read;
91 int64_t entry_uncompressed_bytes_read;
93 /* Running CRC32 of the decompressed data */
94 unsigned long entry_crc32;
96 /* Flags to mark progress of decompression. */
100 ssize_t filename_length;
101 ssize_t extra_length;
103 unsigned char *uncompressed_buffer;
104 size_t uncompressed_buffer_size;
110 struct archive_string extra;
111 struct archive_string_conv *sconv;
112 struct archive_string_conv *sconv_default;
113 struct archive_string_conv *sconv_utf8;
114 int init_default_conversion;
115 char format_name[64];
118 #define ZIP_LENGTH_AT_END 8
119 #define ZIP_ENCRYPTED (1<<0)
120 #define ZIP_STRONG_ENCRYPTED (1<<6)
121 #define ZIP_UTF8_NAME (1<<11)
123 static int archive_read_format_zip_streamable_bid(struct archive_read *, int);
124 static int archive_read_format_zip_seekable_bid(struct archive_read *, int);
125 static int archive_read_format_zip_options(struct archive_read *,
126 const char *, const char *);
127 static int archive_read_format_zip_cleanup(struct archive_read *);
128 static int archive_read_format_zip_read_data(struct archive_read *,
129 const void **, size_t *, int64_t *);
130 static int archive_read_format_zip_read_data_skip(struct archive_read *a);
131 static int archive_read_format_zip_seekable_read_header(struct archive_read *,
132 struct archive_entry *);
133 static int archive_read_format_zip_streamable_read_header(struct archive_read *,
134 struct archive_entry *);
136 static int zip_read_data_deflate(struct archive_read *a, const void **buff,
137 size_t *size, int64_t *offset);
139 static int zip_read_data_none(struct archive_read *a, const void **buff,
140 size_t *size, int64_t *offset);
141 static int zip_read_local_file_header(struct archive_read *a,
142 struct archive_entry *entry, struct zip *);
143 static time_t zip_time(const char *);
144 static const char *compression_name(int compression);
145 static void process_extra(const char *, size_t, struct zip_entry *);
147 int archive_read_support_format_zip_streamable(struct archive *);
148 int archive_read_support_format_zip_seekable(struct archive *);
151 archive_read_support_format_zip_streamable(struct archive *_a)
153 struct archive_read *a = (struct archive_read *)_a;
157 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
158 ARCHIVE_STATE_NEW, "archive_read_support_format_zip");
160 zip = (struct zip *)malloc(sizeof(*zip));
162 archive_set_error(&a->archive, ENOMEM,
163 "Can't allocate zip data");
164 return (ARCHIVE_FATAL);
166 memset(zip, 0, sizeof(*zip));
168 r = __archive_read_register_format(a,
171 archive_read_format_zip_streamable_bid,
172 archive_read_format_zip_options,
173 archive_read_format_zip_streamable_read_header,
174 archive_read_format_zip_read_data,
175 archive_read_format_zip_read_data_skip,
176 archive_read_format_zip_cleanup);
184 archive_read_support_format_zip_seekable(struct archive *_a)
186 struct archive_read *a = (struct archive_read *)_a;
190 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
191 ARCHIVE_STATE_NEW, "archive_read_support_format_zip_seekable");
193 zip = (struct zip *)malloc(sizeof(*zip));
195 archive_set_error(&a->archive, ENOMEM,
196 "Can't allocate zip data");
197 return (ARCHIVE_FATAL);
199 memset(zip, 0, sizeof(*zip));
201 r = __archive_read_register_format(a,
204 archive_read_format_zip_seekable_bid,
205 archive_read_format_zip_options,
206 archive_read_format_zip_seekable_read_header,
207 archive_read_format_zip_read_data,
208 archive_read_format_zip_read_data_skip,
209 archive_read_format_zip_cleanup);
217 archive_read_support_format_zip(struct archive *a)
220 r = archive_read_support_format_zip_streamable(a);
223 return (archive_read_support_format_zip_seekable(a));
227 * TODO: This is a performance sink because it forces the read core to
228 * drop buffered data from the start of file, which will then have to
229 * be re-read again if this bidder loses.
231 * We workaround this a little by passing in the best bid so far so
232 * that later bidders can do nothing if they know they'll never
233 * outbid. But we can certainly do better...
236 archive_read_format_zip_seekable_bid(struct archive_read *a, int best_bid)
238 struct zip *zip = (struct zip *)a->format->data;
242 /* If someone has already bid more than 32, then avoid
243 trashing the look-ahead buffers with a seek. */
247 filesize = __archive_read_seek(a, -22, SEEK_END);
248 /* If we can't seek, then we can't bid. */
252 /* TODO: More robust search for end of central directory record. */
253 if ((p = __archive_read_ahead(a, 22, NULL)) == NULL)
255 /* First four bytes are signature for end of central directory
256 record. Four zero bytes ensure this isn't a multi-volume
257 Zip file (which we don't yet support). */
258 if (memcmp(p, "PK\005\006\000\000\000\000", 8) != 0)
261 /* Since we've already done the hard work of finding the
262 end of central directory record, let's save the important
264 zip->central_directory_entries = archive_le16dec(p + 10);
265 zip->central_directory_size = archive_le32dec(p + 12);
266 zip->central_directory_offset = archive_le32dec(p + 16);
268 /* Just one volume, so central dir must all be on this volume. */
269 if (zip->central_directory_entries != archive_le16dec(p + 8))
271 /* Central directory can't extend beyond end of this file. */
272 if (zip->central_directory_offset + (int64_t)zip->central_directory_size > filesize)
275 /* This is just a tiny bit higher than the maximum returned by
276 the streaming Zip bidder. This ensures that the more accurate
277 seeking Zip parser wins whenever seek is available. */
282 cmp_node(const struct archive_rb_node *n1, const struct archive_rb_node *n2)
284 const struct zip_entry *e1 = (const struct zip_entry *)n1;
285 const struct zip_entry *e2 = (const struct zip_entry *)n2;
287 return ((int)(e2->local_header_offset - e1->local_header_offset));
291 cmp_key(const struct archive_rb_node *n, const void *key)
293 /* This function won't be called */
294 (void)n; /* UNUSED */
295 (void)key; /* UNUSED */
300 slurp_central_directory(struct archive_read *a, struct zip *zip)
303 static const struct archive_rb_tree_ops rb_ops = {
307 __archive_read_seek(a, zip->central_directory_offset, SEEK_SET);
308 zip->offset = zip->central_directory_offset;
309 __archive_rb_tree_init(&zip->tree, &rb_ops);
311 zip->zip_entries = calloc(zip->central_directory_entries,
312 sizeof(struct zip_entry));
313 for (i = 0; i < zip->central_directory_entries; ++i) {
314 struct zip_entry *zip_entry = &zip->zip_entries[i];
315 size_t filename_length, extra_length, comment_length;
316 uint32_t external_attributes;
319 if ((p = __archive_read_ahead(a, 46, NULL)) == NULL)
320 return ARCHIVE_FATAL;
321 if (memcmp(p, "PK\001\002", 4) != 0) {
322 archive_set_error(&a->archive,
323 -1, "Invalid central directory signature");
324 return ARCHIVE_FATAL;
326 zip->have_central_directory = 1;
327 /* version = p[4]; */
328 zip_entry->system = p[5];
329 /* version_required = archive_le16dec(p + 6); */
330 zip_entry->flags = archive_le16dec(p + 8);
331 zip_entry->compression = (char)archive_le16dec(p + 10);
332 zip_entry->mtime = zip_time(p + 12);
333 zip_entry->crc32 = archive_le32dec(p + 16);
334 zip_entry->compressed_size = archive_le32dec(p + 20);
335 zip_entry->uncompressed_size = archive_le32dec(p + 24);
336 filename_length = archive_le16dec(p + 28);
337 extra_length = archive_le16dec(p + 30);
338 comment_length = archive_le16dec(p + 32);
339 /* disk_start = archive_le16dec(p + 34); */ /* Better be zero. */
340 /* internal_attributes = archive_le16dec(p + 36); */ /* text bit */
341 external_attributes = archive_le32dec(p + 38);
342 zip_entry->local_header_offset = archive_le32dec(p + 42);
344 /* If we can't guess the mode, leave it zero here;
345 when we read the local file header we might get
348 if (zip_entry->system == 3) {
349 zip_entry->mode = external_attributes >> 16;
351 /* Register an entry to RB tree to sort it by file offset. */
352 __archive_rb_tree_insert_node(&zip->tree, &zip_entry->node);
354 /* We don't read the filename until we get to the
355 local file header. Reading it here would speed up
356 table-of-contents operations (removing the need to
357 find and read local file header to get the
358 filename) at the cost of requiring a lot of extra
360 /* We don't read the extra block here. We assume it
361 will be duplicated at the local file header. */
362 __archive_read_consume(a,
363 46 + filename_length + extra_length + comment_length);
370 zip_read_consume(struct archive_read *a, int64_t bytes)
372 struct zip *zip = (struct zip *)a->format->data;
375 skip = __archive_read_consume(a, bytes);
382 archive_read_format_zip_seekable_read_header(struct archive_read *a,
383 struct archive_entry *entry)
385 struct zip *zip = (struct zip *)a->format->data;
386 int r, ret = ARCHIVE_OK;
388 a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
389 if (a->archive.archive_format_name == NULL)
390 a->archive.archive_format_name = "ZIP";
392 if (zip->zip_entries == NULL) {
393 r = slurp_central_directory(a, zip);
394 zip->entries_remaining = zip->central_directory_entries;
397 /* Get first entry whose local header offset is lower than
398 * other entries in the archive file. */
400 (struct zip_entry *)ARCHIVE_RB_TREE_MIN(&zip->tree);
401 } else if (zip->entry != NULL) {
402 /* Get next entry in local header offset order. */
403 zip->entry = (struct zip_entry *)__archive_rb_tree_iterate(
404 &zip->tree, &zip->entry->node, ARCHIVE_RB_DIR_RIGHT);
407 if (zip->entries_remaining <= 0 || zip->entry == NULL)
409 --zip->entries_remaining;
411 if (zip->offset != zip->entry->local_header_offset) {
412 __archive_read_seek(a, zip->entry->local_header_offset,
414 zip->offset = zip->entry->local_header_offset;
417 r = zip_read_local_file_header(a, entry, zip);
420 if ((zip->entry->mode & AE_IFMT) == AE_IFLNK) {
422 struct archive_string_conv *sconv;
423 size_t linkname_length = (size_t)archive_entry_size(entry);
425 archive_entry_set_size(entry, 0);
426 p = __archive_read_ahead(a, linkname_length, NULL);
428 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
429 "Truncated Zip file");
430 return ARCHIVE_FATAL;
434 if (sconv == NULL && (zip->entry->flags & ZIP_UTF8_NAME))
435 sconv = zip->sconv_utf8;
437 sconv = zip->sconv_default;
438 if (archive_entry_copy_symlink_l(entry, p, linkname_length,
440 if (errno != ENOMEM && sconv == zip->sconv_utf8 &&
441 (zip->entry->flags & ZIP_UTF8_NAME))
442 archive_entry_copy_symlink_l(entry, p,
443 linkname_length, NULL);
444 if (errno == ENOMEM) {
445 archive_set_error(&a->archive, ENOMEM,
446 "Can't allocate memory for Symlink");
447 return (ARCHIVE_FATAL);
450 * Since there is no character-set regulation for
451 * symlink name, do not report the conversion error
452 * in an automatic conversion.
454 if (sconv != zip->sconv_utf8 ||
455 (zip->entry->flags & ZIP_UTF8_NAME) == 0) {
456 archive_set_error(&a->archive,
457 ARCHIVE_ERRNO_FILE_FORMAT,
458 "Symlink cannot be converted "
459 "from %s to current locale.",
460 archive_string_conversion_charset_name(
470 archive_read_format_zip_streamable_bid(struct archive_read *a, int best_bid)
474 (void)best_bid; /* UNUSED */
476 if ((p = __archive_read_ahead(a, 4, NULL)) == NULL)
480 * Bid of 30 here is: 16 bits for "PK",
481 * next 16-bit field has four options (-2 bits).
484 if (p[0] == 'P' && p[1] == 'K') {
485 if ((p[2] == '\001' && p[3] == '\002')
486 || (p[2] == '\003' && p[3] == '\004')
487 || (p[2] == '\005' && p[3] == '\006')
488 || (p[2] == '\007' && p[3] == '\010')
489 || (p[2] == '0' && p[3] == '0'))
493 /* TODO: It's worth looking ahead a little bit for a valid
494 * PK signature. In particular, that would make it possible
495 * to read some UUEncoded SFX files or SFX files coming from
496 * a network socket. */
502 archive_read_format_zip_options(struct archive_read *a,
503 const char *key, const char *val)
506 int ret = ARCHIVE_FAILED;
508 zip = (struct zip *)(a->format->data);
509 if (strcmp(key, "compat-2x") == 0) {
510 /* Handle filnames as libarchive 2.x */
511 zip->init_default_conversion = (val != NULL) ? 1 : 0;
513 } else if (strcmp(key, "hdrcharset") == 0) {
514 if (val == NULL || val[0] == 0)
515 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
516 "zip: hdrcharset option needs a character-set name");
518 zip->sconv = archive_string_conversion_from_charset(
519 &a->archive, val, 0);
520 if (zip->sconv != NULL) {
521 if (strcmp(val, "UTF-8") == 0)
522 zip->sconv_utf8 = zip->sconv;
530 /* Note: The "warn" return is just to inform the options
531 * supervisor that we didn't handle it. It will generate
532 * a suitable error if no one used this option. */
533 return (ARCHIVE_WARN);
537 archive_read_format_zip_streamable_read_header(struct archive_read *a,
538 struct archive_entry *entry)
542 a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
543 if (a->archive.archive_format_name == NULL)
544 a->archive.archive_format_name = "ZIP";
546 zip = (struct zip *)(a->format->data);
548 /* Make sure we have a zip_entry structure to use. */
549 if (zip->zip_entries == NULL) {
550 zip->zip_entries = malloc(sizeof(struct zip_entry));
551 if (zip->zip_entries == NULL) {
552 archive_set_error(&a->archive, ENOMEM, "Out of memory");
553 return ARCHIVE_FATAL;
556 zip->entry = zip->zip_entries;
557 memset(zip->entry, 0, sizeof(struct zip_entry));
559 /* Search ahead for the next local file header. */
560 zip_read_consume(a, zip->unconsumed);
567 p = __archive_read_ahead(a, 4, &bytes);
569 return (ARCHIVE_FATAL);
572 while (p + 4 <= end) {
573 if (p[0] == 'P' && p[1] == 'K') {
574 if (p[2] == '\001' && p[3] == '\002')
575 /* Beginning of central directory. */
576 return (ARCHIVE_EOF);
578 if (p[2] == '\003' && p[3] == '\004') {
579 /* Regular file entry. */
580 zip_read_consume(a, skipped);
581 return zip_read_local_file_header(a, entry, zip);
584 if (p[2] == '\005' && p[3] == '\006')
585 /* End of central directory. */
586 return (ARCHIVE_EOF);
591 zip_read_consume(a, skipped);
596 * Assumes file pointer is at beginning of local file header.
599 zip_read_local_file_header(struct archive_read *a, struct archive_entry *entry,
606 size_t len, filename_length, extra_length;
607 struct archive_string_conv *sconv;
608 struct zip_entry *zip_entry = zip->entry;
609 uint32_t local_crc32;
610 int64_t compressed_size, uncompressed_size;
611 int ret = ARCHIVE_OK;
614 zip->decompress_init = 0;
615 zip->end_of_entry = 0;
616 zip->entry_uncompressed_bytes_read = 0;
617 zip->entry_compressed_bytes_read = 0;
618 zip->entry_crc32 = crc32(0, NULL, 0);
620 /* Setup default conversion. */
621 if (zip->sconv == NULL && !zip->init_default_conversion) {
623 archive_string_default_conversion_for_read(&(a->archive));
624 zip->init_default_conversion = 1;
627 if ((p = __archive_read_ahead(a, 30, NULL)) == NULL) {
628 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
629 "Truncated ZIP file header");
630 return (ARCHIVE_FATAL);
633 if (memcmp(p, "PK\003\004", 4) != 0) {
634 archive_set_error(&a->archive, -1, "Damaged Zip archive");
635 return ARCHIVE_FATAL;
638 zip_entry->system = p[5];
639 zip_entry->flags = archive_le16dec(p + 6);
640 zip_entry->compression = (char)archive_le16dec(p + 8);
641 zip_entry->mtime = zip_time(p + 10);
642 local_crc32 = archive_le32dec(p + 14);
643 compressed_size = archive_le32dec(p + 18);
644 uncompressed_size = archive_le32dec(p + 22);
645 filename_length = archive_le16dec(p + 26);
646 extra_length = archive_le16dec(p + 28);
648 zip_read_consume(a, 30);
650 if (zip->have_central_directory) {
651 /* If we read the central dir entry, we must have size information
652 as well, so ignore the length-at-end flag. */
653 zip_entry->flags &= ~ZIP_LENGTH_AT_END;
654 /* If we have values from both the local file header
655 and the central directory, warn about mismatches
656 which might indicate a damaged file. But some
657 writers always put zero in the local header; don't
658 bother warning about that. */
659 if (local_crc32 != 0 && local_crc32 != zip_entry->crc32) {
660 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
661 "Inconsistent CRC32 values");
664 if (compressed_size != 0
665 && compressed_size != zip_entry->compressed_size) {
666 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
667 "Inconsistent compressed size");
670 if (uncompressed_size != 0
671 && uncompressed_size != zip_entry->uncompressed_size) {
672 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
673 "Inconsistent uncompressed size");
677 /* If we don't have the CD info, use whatever we do have. */
678 zip_entry->crc32 = local_crc32;
679 zip_entry->compressed_size = compressed_size;
680 zip_entry->uncompressed_size = uncompressed_size;
683 /* Read the filename. */
684 if ((h = __archive_read_ahead(a, filename_length, NULL)) == NULL) {
685 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
686 "Truncated ZIP file header");
687 return (ARCHIVE_FATAL);
689 if (zip_entry->flags & ZIP_UTF8_NAME) {
690 /* The filename is stored to be UTF-8. */
691 if (zip->sconv_utf8 == NULL) {
693 archive_string_conversion_from_charset(
694 &a->archive, "UTF-8", 1);
695 if (zip->sconv_utf8 == NULL)
696 return (ARCHIVE_FATAL);
698 sconv = zip->sconv_utf8;
699 } else if (zip->sconv != NULL)
702 sconv = zip->sconv_default;
704 if (archive_entry_copy_pathname_l(entry,
705 h, filename_length, sconv) != 0) {
706 if (errno == ENOMEM) {
707 archive_set_error(&a->archive, ENOMEM,
708 "Can't allocate memory for Pathname");
709 return (ARCHIVE_FATAL);
711 archive_set_error(&a->archive,
712 ARCHIVE_ERRNO_FILE_FORMAT,
713 "Pathname cannot be converted "
714 "from %s to current locale.",
715 archive_string_conversion_charset_name(sconv));
718 zip_read_consume(a, filename_length);
720 if (zip_entry->mode == 0) {
721 /* Especially in streaming mode, we can end up
722 here without having seen any mode information.
723 Guess from the filename. */
724 wp = archive_entry_pathname_w(entry);
727 if (len > 0 && wp[len - 1] == L'/')
728 zip_entry->mode = AE_IFDIR | 0777;
730 zip_entry->mode = AE_IFREG | 0666;
732 cp = archive_entry_pathname(entry);
733 len = (cp != NULL)?strlen(cp):0;
734 if (len > 0 && cp[len - 1] == '/')
735 zip_entry->mode = AE_IFDIR | 0777;
737 zip_entry->mode = AE_IFREG | 0666;
741 /* Read the extra data. */
742 if ((h = __archive_read_ahead(a, extra_length, NULL)) == NULL) {
743 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
744 "Truncated ZIP file header");
745 return (ARCHIVE_FATAL);
747 process_extra(h, extra_length, zip_entry);
748 zip_read_consume(a, extra_length);
750 /* Populate some additional entry fields: */
751 archive_entry_set_mode(entry, zip_entry->mode);
752 archive_entry_set_uid(entry, zip_entry->uid);
753 archive_entry_set_gid(entry, zip_entry->gid);
754 archive_entry_set_mtime(entry, zip_entry->mtime, 0);
755 archive_entry_set_ctime(entry, zip_entry->ctime, 0);
756 archive_entry_set_atime(entry, zip_entry->atime, 0);
757 /* Set the size only if it's meaningful. */
758 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END))
759 archive_entry_set_size(entry, zip_entry->uncompressed_size);
761 zip->entry_bytes_remaining = zip_entry->compressed_size;
763 /* If there's no body, force read_data() to return EOF immediately. */
764 if (0 == (zip_entry->flags & ZIP_LENGTH_AT_END)
765 && zip->entry_bytes_remaining < 1)
766 zip->end_of_entry = 1;
768 /* Set up a more descriptive format name. */
769 sprintf(zip->format_name, "ZIP %d.%d (%s)",
770 version / 10, version % 10,
771 compression_name(zip->entry->compression));
772 a->archive.archive_format_name = zip->format_name;
778 compression_name(int compression)
780 static const char *compression_names[] = {
792 if (0 <= compression && compression <
793 (int)(sizeof(compression_names)/sizeof(compression_names[0])))
794 return compression_names[compression];
799 /* Convert an MSDOS-style date/time into Unix-style time. */
801 zip_time(const char *p)
806 msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]);
807 msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]);
809 memset(&ts, 0, sizeof(ts));
810 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
811 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
812 ts.tm_mday = msDate & 0x1f; /* Day of month. */
813 ts.tm_hour = (msTime >> 11) & 0x1f;
814 ts.tm_min = (msTime >> 5) & 0x3f;
815 ts.tm_sec = (msTime << 1) & 0x3e;
821 archive_read_format_zip_read_data(struct archive_read *a,
822 const void **buff, size_t *size, int64_t *offset)
825 struct zip *zip = (struct zip *)(a->format->data);
827 *offset = zip->entry_uncompressed_bytes_read;
831 /* If we hit end-of-entry last time, return ARCHIVE_EOF. */
832 if (zip->end_of_entry)
833 return (ARCHIVE_EOF);
835 /* Return EOF immediately if this is a non-regular file. */
836 if (AE_IFREG != (zip->entry->mode & AE_IFMT))
837 return (ARCHIVE_EOF);
839 if (zip->entry->flags & (ZIP_ENCRYPTED | ZIP_STRONG_ENCRYPTED)) {
840 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
841 "Encrypted file is unsupported");
842 return (ARCHIVE_FAILED);
845 zip_read_consume(a, zip->unconsumed);
848 switch(zip->entry->compression) {
849 case 0: /* No compression. */
850 r = zip_read_data_none(a, buff, size, offset);
853 case 8: /* Deflate compression. */
854 r = zip_read_data_deflate(a, buff, size, offset);
857 default: /* Unsupported compression. */
858 /* Return a warning. */
859 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
860 "Unsupported ZIP compression method (%s)",
861 compression_name(zip->entry->compression));
862 /* We can't decompress this entry, but we will
863 * be able to skip() it and try the next entry. */
864 return (ARCHIVE_FAILED);
869 /* Update checksum */
871 zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size);
872 /* If we hit the end, swallow any end-of-data marker. */
873 if (zip->end_of_entry) {
874 /* Check file size, CRC against these values. */
875 if (zip->entry->compressed_size != zip->entry_compressed_bytes_read) {
876 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
877 "ZIP compressed data is wrong size (read %jd, expected %jd)",
878 (intmax_t)zip->entry_compressed_bytes_read,
879 (intmax_t)zip->entry->compressed_size);
880 return (ARCHIVE_WARN);
882 /* Size field only stores the lower 32 bits of the actual
884 if ((zip->entry->uncompressed_size & UINT32_MAX)
885 != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) {
886 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
887 "ZIP uncompressed data is wrong size (read %jd, expected %jd)",
888 (intmax_t)zip->entry_uncompressed_bytes_read,
889 (intmax_t)zip->entry->uncompressed_size);
890 return (ARCHIVE_WARN);
892 /* Check computed CRC against header */
893 if (zip->entry->crc32 != zip->entry_crc32) {
894 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
895 "ZIP bad CRC: 0x%lx should be 0x%lx",
896 (unsigned long)zip->entry_crc32,
897 (unsigned long)zip->entry->crc32);
898 return (ARCHIVE_WARN);
906 * Read "uncompressed" data. There are three cases:
907 * 1) We know the size of the data. This is always true for the
908 * seeking reader (we've examined the Central Directory already).
909 * 2) ZIP_LENGTH_AT_END was set, but only the CRC was deferred.
910 * Info-ZIP seems to do this; we know the size but have to grab
911 * the CRC from the data descriptor afterwards.
912 * 3) We're streaming and ZIP_LENGTH_AT_END was specified and
913 * we have no size information. In this case, we can do pretty
914 * well by watching for the data descriptor record. The data
915 * descriptor is 16 bytes and includes a computed CRC that should
916 * provide a strong check.
918 * TODO: Technically, the PK\007\010 signature is optional.
919 * In the original spec, the data descriptor contained CRC
920 * and size fields but had no leading signature. In practice,
921 * newer writers seem to provide the signature pretty consistently,
922 * but we might need to do something more complex here if
923 * we want to handle older archives that lack that signature.
925 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
926 * zip->end_of_entry if it consumes all of the data.
929 zip_read_data_none(struct archive_read *a, const void **_buff,
930 size_t *size, int64_t *offset)
936 (void)offset; /* UNUSED */
938 zip = (struct zip *)(a->format->data);
940 if (zip->entry->flags & ZIP_LENGTH_AT_END) {
943 /* Grab at least 16 bytes. */
944 buff = __archive_read_ahead(a, 16, &bytes_avail);
945 if (bytes_avail < 16) {
946 /* Zip archives have end-of-archive markers
947 that are longer than this, so a failure to get at
948 least 16 bytes really does indicate a truncated
950 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
951 "Truncated ZIP file data");
952 return (ARCHIVE_FATAL);
954 /* Check for a complete PK\007\010 signature. */
956 if (p[0] == 'P' && p[1] == 'K'
957 && p[2] == '\007' && p[3] == '\010'
958 && archive_le32dec(p + 4) == zip->entry_crc32
959 && archive_le32dec(p + 8) == zip->entry_compressed_bytes_read
960 && archive_le32dec(p + 12) == zip->entry_uncompressed_bytes_read) {
961 zip->entry->crc32 = archive_le32dec(p + 4);
962 zip->entry->compressed_size = archive_le32dec(p + 8);
963 zip->entry->uncompressed_size = archive_le32dec(p + 12);
964 zip->end_of_entry = 1;
965 zip->unconsumed = 16;
968 /* If not at EOF, ensure we consume at least one byte. */
971 /* Scan forward until we see where a PK\007\010 signature might be. */
972 /* Return bytes up until that point. On the next call, the code
973 above will verify the data descriptor. */
974 while (p < buff + bytes_avail - 4) {
975 if (p[3] == 'P') { p += 3; }
976 else if (p[3] == 'K') { p += 2; }
977 else if (p[3] == '\007') { p += 1; }
978 else if (p[3] == '\010' && p[2] == '\007'
979 && p[1] == 'K' && p[0] == 'P') {
983 bytes_avail = p - buff;
985 if (zip->entry_bytes_remaining == 0) {
986 zip->end_of_entry = 1;
989 /* Grab a bunch of bytes. */
990 buff = __archive_read_ahead(a, 1, &bytes_avail);
991 if (bytes_avail <= 0) {
992 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
993 "Truncated ZIP file data");
994 return (ARCHIVE_FATAL);
996 if (bytes_avail > zip->entry_bytes_remaining)
997 bytes_avail = (ssize_t)zip->entry_bytes_remaining;
1000 zip->entry_bytes_remaining -= bytes_avail;
1001 zip->entry_uncompressed_bytes_read += bytes_avail;
1002 zip->entry_compressed_bytes_read += bytes_avail;
1003 zip->unconsumed += bytes_avail;
1005 return (ARCHIVE_OK);
1010 zip_read_data_deflate(struct archive_read *a, const void **buff,
1011 size_t *size, int64_t *offset)
1014 ssize_t bytes_avail;
1015 const void *compressed_buff;
1018 (void)offset; /* UNUSED */
1020 zip = (struct zip *)(a->format->data);
1022 /* If the buffer hasn't been allocated, allocate it now. */
1023 if (zip->uncompressed_buffer == NULL) {
1024 zip->uncompressed_buffer_size = 256 * 1024;
1025 zip->uncompressed_buffer
1026 = (unsigned char *)malloc(zip->uncompressed_buffer_size);
1027 if (zip->uncompressed_buffer == NULL) {
1028 archive_set_error(&a->archive, ENOMEM,
1029 "No memory for ZIP decompression");
1030 return (ARCHIVE_FATAL);
1034 /* If we haven't yet read any data, initialize the decompressor. */
1035 if (!zip->decompress_init) {
1036 if (zip->stream_valid)
1037 r = inflateReset(&zip->stream);
1039 r = inflateInit2(&zip->stream,
1040 -15 /* Don't check for zlib header */);
1042 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1043 "Can't initialize ZIP decompression.");
1044 return (ARCHIVE_FATAL);
1046 /* Stream structure has been set up. */
1047 zip->stream_valid = 1;
1048 /* We've initialized decompression for this stream. */
1049 zip->decompress_init = 1;
1053 * Note: '1' here is a performance optimization.
1054 * Recall that the decompression layer returns a count of
1055 * available bytes; asking for more than that forces the
1056 * decompressor to combine reads by copying data.
1058 compressed_buff = __archive_read_ahead(a, 1, &bytes_avail);
1059 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)
1060 && bytes_avail > zip->entry_bytes_remaining) {
1061 bytes_avail = (ssize_t)zip->entry_bytes_remaining;
1063 if (bytes_avail <= 0) {
1064 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1065 "Truncated ZIP file body");
1066 return (ARCHIVE_FATAL);
1070 * A bug in zlib.h: stream.next_in should be marked 'const'
1071 * but isn't (the library never alters data through the
1072 * next_in pointer, only reads it). The result: this ugly
1073 * cast to remove 'const'.
1075 zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff;
1076 zip->stream.avail_in = bytes_avail;
1077 zip->stream.total_in = 0;
1078 zip->stream.next_out = zip->uncompressed_buffer;
1079 zip->stream.avail_out = zip->uncompressed_buffer_size;
1080 zip->stream.total_out = 0;
1082 r = inflate(&zip->stream, 0);
1087 zip->end_of_entry = 1;
1090 archive_set_error(&a->archive, ENOMEM,
1091 "Out of memory for ZIP decompression");
1092 return (ARCHIVE_FATAL);
1094 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1095 "ZIP decompression failed (%d)", r);
1096 return (ARCHIVE_FATAL);
1099 /* Consume as much as the compressor actually used. */
1100 bytes_avail = zip->stream.total_in;
1101 zip_read_consume(a, bytes_avail);
1102 zip->entry_bytes_remaining -= bytes_avail;
1103 zip->entry_compressed_bytes_read += bytes_avail;
1105 *size = zip->stream.total_out;
1106 zip->entry_uncompressed_bytes_read += zip->stream.total_out;
1107 *buff = zip->uncompressed_buffer;
1109 if (zip->end_of_entry && (zip->entry->flags & ZIP_LENGTH_AT_END)) {
1112 if (NULL == (p = __archive_read_ahead(a, 16, NULL))) {
1113 archive_set_error(&a->archive,
1114 ARCHIVE_ERRNO_FILE_FORMAT,
1115 "Truncated ZIP end-of-file record");
1116 return (ARCHIVE_FATAL);
1118 /* Consume the optional PK\007\010 marker. */
1119 if (p[0] == 'P' && p[1] == 'K' && p[2] == '\007' && p[3] == '\010') {
1120 zip->entry->crc32 = archive_le32dec(p + 4);
1121 zip->entry->compressed_size = archive_le32dec(p + 8);
1122 zip->entry->uncompressed_size = archive_le32dec(p + 12);
1123 zip->unconsumed = 16;
1127 return (ARCHIVE_OK);
1132 archive_read_format_zip_read_data_skip(struct archive_read *a)
1136 zip = (struct zip *)(a->format->data);
1138 /* If we've already read to end of data, we're done. */
1139 if (zip->end_of_entry)
1140 return (ARCHIVE_OK);
1142 /* So we know we're streaming... */
1143 if (0 == (zip->entry->flags & ZIP_LENGTH_AT_END)) {
1144 /* We know the compressed length, so we can just skip. */
1145 int64_t bytes_skipped = zip_read_consume(a,
1146 zip->entry_bytes_remaining + zip->unconsumed);
1147 if (bytes_skipped < 0)
1148 return (ARCHIVE_FATAL);
1149 zip->unconsumed = 0;
1150 return (ARCHIVE_OK);
1153 /* We're streaming and we don't know the length. */
1154 /* If the body is compressed and we know the format, we can
1155 * find an exact end-of-entry by decompressing it. */
1156 switch (zip->entry->compression) {
1158 case 8: /* Deflate compression. */
1159 while (!zip->end_of_entry) {
1161 const void *buff = NULL;
1164 r = zip_read_data_deflate(a, &buff, &size, &offset);
1165 if (r != ARCHIVE_OK)
1170 default: /* Uncompressed or unknown. */
1171 /* Scan for a PK\007\010 signature. */
1172 zip_read_consume(a, zip->unconsumed);
1173 zip->unconsumed = 0;
1175 const char *p, *buff;
1176 ssize_t bytes_avail;
1177 buff = __archive_read_ahead(a, 16, &bytes_avail);
1178 if (bytes_avail < 16) {
1179 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1180 "Truncated ZIP file data");
1181 return (ARCHIVE_FATAL);
1184 while (p <= buff + bytes_avail - 16) {
1185 if (p[3] == 'P') { p += 3; }
1186 else if (p[3] == 'K') { p += 2; }
1187 else if (p[3] == '\007') { p += 1; }
1188 else if (p[3] == '\010' && p[2] == '\007'
1189 && p[1] == 'K' && p[0] == 'P') {
1190 zip_read_consume(a, p - buff + 16);
1194 zip_read_consume(a, p - buff);
1200 archive_read_format_zip_cleanup(struct archive_read *a)
1204 zip = (struct zip *)(a->format->data);
1206 if (zip->stream_valid)
1207 inflateEnd(&zip->stream);
1209 free(zip->zip_entries);
1210 free(zip->uncompressed_buffer);
1211 archive_string_free(&(zip->extra));
1213 (a->format->data) = NULL;
1214 return (ARCHIVE_OK);
1218 * The extra data is stored as a list of
1219 * id1+size1+data1 + id2+size2+data2 ...
1220 * triplets. id and size are 2 bytes each.
1223 process_extra(const char *p, size_t extra_length, struct zip_entry* zip_entry)
1225 unsigned offset = 0;
1227 while (offset < extra_length - 4)
1229 unsigned short headerid = archive_le16dec(p + offset);
1230 unsigned short datasize = archive_le16dec(p + offset + 2);
1232 if (offset + datasize > extra_length)
1235 fprintf(stderr, "Header id 0x%x, length %d\n",
1236 headerid, datasize);
1240 /* Zip64 extended information extra field. */
1242 zip_entry->uncompressed_size =
1243 archive_le64dec(p + offset);
1245 zip_entry->compressed_size =
1246 archive_le64dec(p + offset + 8);
1250 /* Extended time field "UT". */
1251 int flags = p[offset];
1254 /* Flag bits indicate which dates are present. */
1258 fprintf(stderr, "mtime: %lld -> %d\n",
1259 (long long)zip_entry->mtime,
1260 archive_le32dec(p + offset));
1264 zip_entry->mtime = archive_le32dec(p + offset);
1272 zip_entry->atime = archive_le32dec(p + offset);
1280 zip_entry->ctime = archive_le32dec(p + offset);
1288 /* Info-ZIP Unix Extra Field (old version) "UX". */
1289 if (datasize >= 8) {
1290 zip_entry->atime = archive_le32dec(p + offset);
1291 zip_entry->mtime = archive_le32dec(p + offset + 4);
1293 if (datasize >= 12) {
1294 zip_entry->uid = archive_le16dec(p + offset + 8);
1295 zip_entry->gid = archive_le16dec(p + offset + 10);
1300 /* Info-ZIP Unix Extra Field (type 2) "Ux". */
1302 fprintf(stderr, "uid %d gid %d\n",
1303 archive_le16dec(p + offset),
1304 archive_le16dec(p + offset + 2));
1307 zip_entry->uid = archive_le16dec(p + offset);
1309 zip_entry->gid = archive_le16dec(p + offset + 2);
1313 /* Info-Zip Unix Extra Field (type 3) "ux". */
1314 int uidsize = 0, gidsize = 0;
1316 if (datasize >= 1 && p[offset] == 1) {/* version=1 */
1317 if (datasize >= 4) {
1318 /* get a uid size. */
1319 uidsize = p[offset+1];
1321 zip_entry->uid = archive_le16dec(
1323 else if (uidsize == 4 && datasize >= 6)
1324 zip_entry->uid = archive_le32dec(
1327 if (datasize >= (2 + uidsize + 3)) {
1328 /* get a gid size. */
1329 gidsize = p[offset+2+uidsize];
1331 zip_entry->gid = archive_le16dec(
1332 p+offset+2+uidsize+1);
1333 else if (gidsize == 4 &&
1334 datasize >= (2 + uidsize + 5))
1335 zip_entry->gid = archive_le32dec(
1336 p+offset+2+uidsize+1);
1347 if (offset != extra_length)
1350 "Extra data field contents do not match reported size!\n");