2 * Copyright (c) 2010-2012 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_private.h"
48 #include "archive_read_private.h"
49 #include "archive_endian.h"
53 /* Decoding status. */
57 * Window to see last decoded data, from 32KBi to 2MBi.
61 /* Window buffer, which is a loop buffer. */
62 unsigned char *w_buff;
63 /* The insert position to the window. */
65 /* The position where we can copy decoded code from the window. */
67 /* The length how many bytes we can copy decoded code from
70 /* Translation reversal for x86 processor CALL byte sequence(E8).
71 * This is used for LZX only. */
72 uint32_t translation_size;
75 #define VERBATIM_BLOCK 1
76 #define ALIGNED_OFFSET_BLOCK 2
77 #define UNCOMPRESSED_BLOCK 3
79 size_t block_bytes_avail;
80 /* Repeated offset. */
82 unsigned char rbytes[4];
96 #define CACHE_TYPE uint64_t
97 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
99 CACHE_TYPE cache_buffer;
100 /* Indicates how many bits avail in cache_buffer. */
112 unsigned char *bitlen;
115 * Use a index table. It's faster than searching a huffman
116 * coding tree, which is a binary tree. But a use of a large
117 * index table causes L1 cache read miss many times.
125 /* Direct access table. */
127 /* Binary tree table for extra bits over the direct access. */
138 static const int slots[] = {
139 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
142 #define SLOT_MAX 21/*->25*/
145 const unsigned char *next_in;
148 unsigned char *next_out;
155 * Cabinet file definitions.
157 /* CFHEADER offset */
158 #define CFHEADER_signature 0
159 #define CFHEADER_cbCabinet 8
160 #define CFHEADER_coffFiles 16
161 #define CFHEADER_versionMinor 24
162 #define CFHEADER_versionMajor 25
163 #define CFHEADER_cFolders 26
164 #define CFHEADER_cFiles 28
165 #define CFHEADER_flags 30
166 #define CFHEADER_setID 32
167 #define CFHEADER_iCabinet 34
168 #define CFHEADER_cbCFHeader 36
169 #define CFHEADER_cbCFFolder 38
170 #define CFHEADER_cbCFData 39
172 /* CFFOLDER offset */
173 #define CFFOLDER_coffCabStart 0
174 #define CFFOLDER_cCFData 4
175 #define CFFOLDER_typeCompress 6
176 #define CFFOLDER_abReserve 8
179 #define CFFILE_cbFile 0
180 #define CFFILE_uoffFolderStart 4
181 #define CFFILE_iFolder 8
182 #define CFFILE_date_time 10
183 #define CFFILE_attribs 14
186 #define CFDATA_csum 0
187 #define CFDATA_cbData 4
188 #define CFDATA_cbUncomp 6
190 static const char *compression_name[] = {
198 /* Sum value of this CFDATA. */
200 uint16_t compressed_size;
201 uint16_t compressed_bytes_remaining;
202 uint16_t uncompressed_size;
203 uint16_t uncompressed_bytes_remaining;
204 /* To know how many bytes we have decompressed. */
205 uint16_t uncompressed_avail;
206 /* Offset from the beginning of compressed data of this CFDATA */
207 uint16_t read_offset;
209 /* To keep memory image of this CFDATA to compute the sum. */
210 size_t memimage_size;
211 unsigned char *memimage;
212 /* Result of calculation of sum. */
213 uint32_t sum_calculated;
214 unsigned char sum_extra[4];
220 uint32_t cfdata_offset_in_cab;
221 uint16_t cfdata_count;
223 #define COMPTYPE_NONE 0x0000
224 #define COMPTYPE_MSZIP 0x0001
225 #define COMPTYPE_QUANTUM 0x0002
226 #define COMPTYPE_LZX 0x0003
228 const char *compname;
229 /* At the time reading CFDATA */
230 struct cfdata cfdata;
232 /* Flags to mark progress of decompression. */
233 char decompress_init;
237 uint32_t uncompressed_size;
241 #define iFoldCONTINUED_FROM_PREV 0xFFFD
242 #define iFoldCONTINUED_TO_NEXT 0xFFFE
243 #define iFoldCONTINUED_PREV_AND_NEXT 0xFFFF
245 #define ATTR_RDONLY 0x01
246 #define ATTR_NAME_IS_UTF 0x80
247 struct archive_string pathname;
251 /* Total bytes of all file size in a Cabinet. */
252 uint32_t total_bytes;
253 uint32_t files_offset;
254 uint16_t folder_count;
257 #define PREV_CABINET 0x0001
258 #define NEXT_CABINET 0x0002
259 #define RESERVE_PRESENT 0x0004
262 /* Version number. */
265 unsigned char cffolder;
266 unsigned char cfdata;
267 /* All folders in a cabinet. */
268 struct cffolder *folder_array;
269 /* All files in a cabinet. */
270 struct cffile *file_array;
275 /* entry_bytes_remaining is the number of bytes we expect. */
276 int64_t entry_offset;
277 int64_t entry_bytes_remaining;
278 int64_t entry_unconsumed;
279 int64_t entry_compressed_bytes_read;
280 int64_t entry_uncompressed_bytes_read;
281 struct cffolder *entry_cffolder;
282 struct cffile *entry_cffile;
283 struct cfdata *entry_cfdata;
285 /* Offset from beginning of a cabinet file. */
287 struct cfheader cfheader;
288 struct archive_wstring ws;
290 /* Flag to mark progress that an archive was read their first header.*/
294 char end_of_entry_cleanup;
295 char read_data_invoked;
296 int64_t bytes_skipped;
298 unsigned char *uncompressed_buffer;
299 size_t uncompressed_buffer_size;
301 int init_default_conversion;
302 struct archive_string_conv *sconv;
303 struct archive_string_conv *sconv_default;
304 struct archive_string_conv *sconv_utf8;
305 char format_name[64];
311 struct lzx_stream xstrm;
314 static int archive_read_format_cab_bid(struct archive_read *, int);
315 static int archive_read_format_cab_options(struct archive_read *,
316 const char *, const char *);
317 static int archive_read_format_cab_read_header(struct archive_read *,
318 struct archive_entry *);
319 static int archive_read_format_cab_read_data(struct archive_read *,
320 const void **, size_t *, int64_t *);
321 static int archive_read_format_cab_read_data_skip(struct archive_read *);
322 static int archive_read_format_cab_cleanup(struct archive_read *);
324 static int cab_skip_sfx(struct archive_read *);
325 static time_t cab_dos_time(const unsigned char *);
326 static int cab_read_data(struct archive_read *, const void **,
327 size_t *, int64_t *);
328 static int cab_read_header(struct archive_read *);
329 static uint32_t cab_checksum_cfdata_4(const void *, size_t bytes, uint32_t);
330 static uint32_t cab_checksum_cfdata(const void *, size_t bytes, uint32_t);
331 static void cab_checksum_update(struct archive_read *, size_t);
332 static int cab_checksum_finish(struct archive_read *);
333 static int cab_next_cfdata(struct archive_read *);
334 static const void *cab_read_ahead_cfdata(struct archive_read *, ssize_t *);
335 static const void *cab_read_ahead_cfdata_none(struct archive_read *, ssize_t *);
336 static const void *cab_read_ahead_cfdata_deflate(struct archive_read *,
338 static const void *cab_read_ahead_cfdata_lzx(struct archive_read *,
340 static int64_t cab_consume_cfdata(struct archive_read *, int64_t);
341 static int64_t cab_minimum_consume_cfdata(struct archive_read *, int64_t);
342 static int lzx_decode_init(struct lzx_stream *, int);
343 static int lzx_read_blocks(struct lzx_stream *, int);
344 static int lzx_decode_blocks(struct lzx_stream *, int);
345 static void lzx_decode_free(struct lzx_stream *);
346 static void lzx_translation(struct lzx_stream *, void *, size_t, uint32_t);
347 static void lzx_cleanup_bitstream(struct lzx_stream *);
348 static int lzx_decode(struct lzx_stream *, int);
349 static int lzx_read_pre_tree(struct lzx_stream *);
350 static int lzx_read_bitlen(struct lzx_stream *, struct huffman *, int);
351 static int lzx_huffman_init(struct huffman *, size_t, int);
352 static void lzx_huffman_free(struct huffman *);
353 static int lzx_make_huffman_table(struct huffman *);
354 static inline int lzx_decode_huffman(struct huffman *, unsigned);
355 static int lzx_decode_huffman_tree(struct huffman *, unsigned, int);
359 archive_read_support_format_cab(struct archive *_a)
361 struct archive_read *a = (struct archive_read *)_a;
365 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
366 ARCHIVE_STATE_NEW, "archive_read_support_format_cab");
368 cab = (struct cab *)calloc(1, sizeof(*cab));
370 archive_set_error(&a->archive, ENOMEM,
371 "Can't allocate CAB data");
372 return (ARCHIVE_FATAL);
374 archive_string_init(&cab->ws);
375 archive_wstring_ensure(&cab->ws, 256);
377 r = __archive_read_register_format(a,
380 archive_read_format_cab_bid,
381 archive_read_format_cab_options,
382 archive_read_format_cab_read_header,
383 archive_read_format_cab_read_data,
384 archive_read_format_cab_read_data_skip,
386 archive_read_format_cab_cleanup,
396 find_cab_magic(const char *p)
401 * Note: Self-Extraction program has 'MSCF' string in their
402 * program. If we were finding 'MSCF' string only, we got
403 * wrong place for Cabinet header, thus, we have to check
404 * following four bytes which are reserved and must be set
407 if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
419 archive_read_format_cab_bid(struct archive_read *a, int best_bid)
422 ssize_t bytes_avail, offset, window;
424 /* If there's already a better bid than we can ever
425 make, don't bother testing. */
429 if ((p = __archive_read_ahead(a, 8, NULL)) == NULL)
432 if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
436 * Attempt to handle self-extracting archives
437 * by noting a PE header and searching forward
438 * up to 128k for a 'MSCF' marker.
440 if (p[0] == 'M' && p[1] == 'Z') {
443 while (offset < (1024 * 128)) {
444 const char *h = __archive_read_ahead(a, offset + window,
447 /* Remaining bytes are less than window. */
454 while (p + 8 < h + bytes_avail) {
456 if ((next = find_cab_magic(p)) == 0)
467 archive_read_format_cab_options(struct archive_read *a,
468 const char *key, const char *val)
471 int ret = ARCHIVE_FAILED;
473 cab = (struct cab *)(a->format->data);
474 if (strcmp(key, "hdrcharset") == 0) {
475 if (val == NULL || val[0] == 0)
476 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
477 "cab: hdrcharset option needs a character-set name");
479 cab->sconv = archive_string_conversion_from_charset(
480 &a->archive, val, 0);
481 if (cab->sconv != NULL)
489 /* Note: The "warn" return is just to inform the options
490 * supervisor that we didn't handle it. It will generate
491 * a suitable error if no one used this option. */
492 return (ARCHIVE_WARN);
496 cab_skip_sfx(struct archive_read *a)
500 ssize_t bytes, window;
504 const char *h = __archive_read_ahead(a, window, &bytes);
506 /* Remaining size are less than window. */
509 archive_set_error(&a->archive,
510 ARCHIVE_ERRNO_FILE_FORMAT,
511 "Couldn't find out CAB header");
512 return (ARCHIVE_FATAL);
520 * Scan ahead until we find something that looks
521 * like the cab header.
525 if ((next = find_cab_magic(p)) == 0) {
527 __archive_read_consume(a, skip);
533 __archive_read_consume(a, skip);
538 truncated_error(struct archive_read *a)
540 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
541 "Truncated CAB header");
542 return (ARCHIVE_FATAL);
546 cab_strnlen(const unsigned char *p, size_t maxlen)
550 for (i = 0; i <= maxlen; i++) {
555 return (-1);/* invalid */
559 /* Read bytes as much as remaining. */
561 cab_read_ahead_remaining(struct archive_read *a, size_t min, ssize_t *avail)
566 p = __archive_read_ahead(a, min, avail);
574 /* Convert a path separator '\' -> '/' */
576 cab_convert_path_separator_1(struct archive_string *fn, unsigned char attr)
581 /* Easy check if we have '\' in multi-byte string. */
583 for (i = 0; i < archive_strlen(fn); i++) {
584 if (fn->s[i] == '\\') {
586 /* This may be second byte of multi-byte
592 } else if ((fn->s[i] & 0x80) && !(attr & ATTR_NAME_IS_UTF))
597 if (i == archive_strlen(fn))
603 * Replace a character '\' with '/' in wide character.
606 cab_convert_path_separator_2(struct cab *cab, struct archive_entry *entry)
611 /* If a conversion to wide character failed, force the replacement. */
612 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
613 archive_wstrcpy(&(cab->ws), wp);
614 for (i = 0; i < archive_strlen(&(cab->ws)); i++) {
615 if (cab->ws.s[i] == L'\\')
618 archive_entry_copy_pathname_w(entry, cab->ws.s);
623 * Read CFHEADER, CFFOLDER and CFFILE.
626 cab_read_header(struct archive_read *a)
628 const unsigned char *p;
635 int cur_folder, prev_folder;
638 a->archive.archive_format = ARCHIVE_FORMAT_CAB;
639 if (a->archive.archive_format_name == NULL)
640 a->archive.archive_format_name = "CAB";
642 if ((p = __archive_read_ahead(a, 42, NULL)) == NULL)
643 return (truncated_error(a));
645 cab = (struct cab *)(a->format->data);
646 if (cab->found_header == 0 &&
647 p[0] == 'M' && p[1] == 'Z') {
648 /* This is an executable? Must be self-extracting... */
649 err = cab_skip_sfx(a);
650 if (err < ARCHIVE_WARN)
653 /* Re-read header after processing the SFX. */
654 if ((p = __archive_read_ahead(a, 42, NULL)) == NULL)
655 return (truncated_error(a));
663 if (p[CFHEADER_signature+0] != 'M' || p[CFHEADER_signature+1] != 'S' ||
664 p[CFHEADER_signature+2] != 'C' || p[CFHEADER_signature+3] != 'F') {
665 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
666 "Couldn't find out CAB header");
667 return (ARCHIVE_FATAL);
669 hd->total_bytes = archive_le32dec(p + CFHEADER_cbCabinet);
670 hd->files_offset = archive_le32dec(p + CFHEADER_coffFiles);
671 hd->minor = p[CFHEADER_versionMinor];
672 hd->major = p[CFHEADER_versionMajor];
673 hd->folder_count = archive_le16dec(p + CFHEADER_cFolders);
674 if (hd->folder_count == 0)
676 hd->file_count = archive_le16dec(p + CFHEADER_cFiles);
677 if (hd->file_count == 0)
679 hd->flags = archive_le16dec(p + CFHEADER_flags);
680 hd->setid = archive_le16dec(p + CFHEADER_setID);
681 hd->cabinet = archive_le16dec(p + CFHEADER_iCabinet);
682 used = CFHEADER_iCabinet + 2;
683 if (hd->flags & RESERVE_PRESENT) {
685 cfheader = archive_le16dec(p + CFHEADER_cbCFHeader);
686 if (cfheader > 60000U)
688 hd->cffolder = p[CFHEADER_cbCFFolder];
689 hd->cfdata = p[CFHEADER_cbCFData];
690 used += 4;/* cbCFHeader, cbCFFolder and cbCFData */
691 used += cfheader;/* abReserve */
693 hd->cffolder = 0;/* Avoid compiling warning. */
694 if (hd->flags & PREV_CABINET) {
695 /* How many bytes are used for szCabinetPrev. */
696 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
697 return (truncated_error(a));
698 if ((len = cab_strnlen(p + used, 255)) <= 0)
701 /* How many bytes are used for szDiskPrev. */
702 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
703 return (truncated_error(a));
704 if ((len = cab_strnlen(p + used, 255)) <= 0)
708 if (hd->flags & NEXT_CABINET) {
709 /* How many bytes are used for szCabinetNext. */
710 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
711 return (truncated_error(a));
712 if ((len = cab_strnlen(p + used, 255)) <= 0)
715 /* How many bytes are used for szDiskNext. */
716 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
717 return (truncated_error(a));
718 if ((len = cab_strnlen(p + used, 255)) <= 0)
722 __archive_read_consume(a, used);
723 cab->cab_offset += used;
729 hd->folder_array = (struct cffolder *)calloc(
730 hd->folder_count, sizeof(struct cffolder));
731 if (hd->folder_array == NULL)
735 if (hd->flags & RESERVE_PRESENT)
736 bytes += hd->cffolder;
737 bytes *= hd->folder_count;
738 if ((p = __archive_read_ahead(a, bytes, NULL)) == NULL)
739 return (truncated_error(a));
741 for (i = 0; i < hd->folder_count; i++) {
742 struct cffolder *folder = &(hd->folder_array[i]);
743 folder->cfdata_offset_in_cab =
744 archive_le32dec(p + CFFOLDER_coffCabStart);
745 folder->cfdata_count = archive_le16dec(p+CFFOLDER_cCFData);
747 archive_le16dec(p+CFFOLDER_typeCompress) & 0x0F;
749 archive_le16dec(p+CFFOLDER_typeCompress) >> 8;
750 /* Get a compression name. */
751 if (folder->comptype <
752 sizeof(compression_name) / sizeof(compression_name[0]))
753 folder->compname = compression_name[folder->comptype];
755 folder->compname = "UNKNOWN";
758 if (hd->flags & RESERVE_PRESENT) {
759 p += hd->cffolder;/* abReserve */
760 used += hd->cffolder;
763 * Sanity check if each data is acceptable.
765 if (offset32 >= folder->cfdata_offset_in_cab)
767 offset32 = folder->cfdata_offset_in_cab;
769 /* Set a request to initialize zlib for the CFDATA of
771 folder->decompress_init = 0;
773 __archive_read_consume(a, used);
774 cab->cab_offset += used;
779 /* Seek read pointer to the offset of CFFILE if needed. */
780 skip = (int64_t)hd->files_offset - cab->cab_offset;
782 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
783 "Invalid offset of CFFILE %jd < %jd",
784 (intmax_t)hd->files_offset, (intmax_t)cab->cab_offset);
785 return (ARCHIVE_FATAL);
788 __archive_read_consume(a, skip);
789 cab->cab_offset += skip;
791 /* Allocate memory for CFDATA */
792 hd->file_array = (struct cffile *)calloc(
793 hd->file_count, sizeof(struct cffile));
794 if (hd->file_array == NULL)
798 for (i = 0; i < hd->file_count; i++) {
799 struct cffile *file = &(hd->file_array[i]);
802 if ((p = __archive_read_ahead(a, 16, NULL)) == NULL)
803 return (truncated_error(a));
804 file->uncompressed_size = archive_le32dec(p + CFFILE_cbFile);
805 file->offset = archive_le32dec(p + CFFILE_uoffFolderStart);
806 file->folder = archive_le16dec(p + CFFILE_iFolder);
807 file->mtime = cab_dos_time(p + CFFILE_date_time);
808 file->attr = (uint8_t)archive_le16dec(p + CFFILE_attribs);
809 __archive_read_consume(a, 16);
811 cab->cab_offset += 16;
812 if ((p = cab_read_ahead_remaining(a, 256, &avail)) == NULL)
813 return (truncated_error(a));
814 if ((len = cab_strnlen(p, avail-1)) <= 0)
817 /* Copy a pathname. */
818 archive_string_init(&(file->pathname));
819 archive_strncpy(&(file->pathname), p, len);
820 __archive_read_consume(a, len + 1);
821 cab->cab_offset += len + 1;
824 * Sanity check if each data is acceptable.
826 if (file->uncompressed_size > 0x7FFF8000)
827 goto invalid;/* Too large */
828 if ((int64_t)file->offset + (int64_t)file->uncompressed_size
829 > ARCHIVE_LITERAL_LL(0x7FFF8000))
830 goto invalid;/* Too large */
831 switch (file->folder) {
832 case iFoldCONTINUED_TO_NEXT:
833 /* This must be last file in a folder. */
834 if (i != hd->file_count -1)
836 cur_folder = hd->folder_count -1;
838 case iFoldCONTINUED_PREV_AND_NEXT:
839 /* This must be only one file in a folder. */
840 if (hd->file_count != 1)
843 case iFoldCONTINUED_FROM_PREV:
844 /* This must be first file in a folder. */
847 prev_folder = cur_folder = 0;
848 offset32 = file->offset;
851 if (file->folder >= hd->folder_count)
853 cur_folder = file->folder;
856 /* Dot not back track. */
857 if (cur_folder < prev_folder)
859 if (cur_folder != prev_folder)
861 prev_folder = cur_folder;
863 /* Make sure there are not any blanks from last file
865 if (offset32 != file->offset)
867 offset32 += file->uncompressed_size;
869 /* CFDATA is available for file contents. */
870 if (file->uncompressed_size > 0 &&
871 hd->folder_array[cur_folder].cfdata_count == 0)
875 if (hd->cabinet != 0 || hd->flags & (PREV_CABINET | NEXT_CABINET)) {
876 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
877 "Multivolume cabinet file is unsupported");
878 return (ARCHIVE_WARN);
882 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
883 "Invalid CAB header");
884 return (ARCHIVE_FATAL);
886 archive_set_error(&a->archive, ENOMEM,
887 "Can't allocate memory for CAB data");
888 return (ARCHIVE_FATAL);
892 archive_read_format_cab_read_header(struct archive_read *a,
893 struct archive_entry *entry)
897 struct cffolder *prev_folder;
899 struct archive_string_conv *sconv;
900 int err = ARCHIVE_OK, r;
902 cab = (struct cab *)(a->format->data);
903 if (cab->found_header == 0) {
904 err = cab_read_header(a);
905 if (err < ARCHIVE_WARN)
907 /* We've found the header. */
908 cab->found_header = 1;
912 if (hd->file_index >= hd->file_count) {
913 cab->end_of_archive = 1;
914 return (ARCHIVE_EOF);
916 file = &hd->file_array[hd->file_index++];
918 cab->end_of_entry = 0;
919 cab->end_of_entry_cleanup = 0;
920 cab->entry_compressed_bytes_read = 0;
921 cab->entry_uncompressed_bytes_read = 0;
922 cab->entry_unconsumed = 0;
923 cab->entry_cffile = file;
926 * Choose a proper folder.
928 prev_folder = cab->entry_cffolder;
929 switch (file->folder) {
930 case iFoldCONTINUED_FROM_PREV:
931 case iFoldCONTINUED_PREV_AND_NEXT:
932 cab->entry_cffolder = &hd->folder_array[0];
934 case iFoldCONTINUED_TO_NEXT:
935 cab->entry_cffolder = &hd->folder_array[hd->folder_count-1];
938 cab->entry_cffolder = &hd->folder_array[file->folder];
941 /* If a cffolder of this file is changed, reset a cfdata to read
942 * file contents from next cfdata. */
943 if (prev_folder != cab->entry_cffolder)
944 cab->entry_cfdata = NULL;
946 /* If a pathname is UTF-8, prepare a string conversion object
947 * for UTF-8 and use it. */
948 if (file->attr & ATTR_NAME_IS_UTF) {
949 if (cab->sconv_utf8 == NULL) {
951 archive_string_conversion_from_charset(
952 &(a->archive), "UTF-8", 1);
953 if (cab->sconv_utf8 == NULL)
954 return (ARCHIVE_FATAL);
956 sconv = cab->sconv_utf8;
957 } else if (cab->sconv != NULL) {
958 /* Choose the conversion specified by the option. */
961 /* Choose the default conversion. */
962 if (!cab->init_default_conversion) {
964 archive_string_default_conversion_for_read(
966 cab->init_default_conversion = 1;
968 sconv = cab->sconv_default;
972 * Set a default value and common data
974 r = cab_convert_path_separator_1(&(file->pathname), file->attr);
975 if (archive_entry_copy_pathname_l(entry, file->pathname.s,
976 archive_strlen(&(file->pathname)), sconv) != 0) {
977 if (errno == ENOMEM) {
978 archive_set_error(&a->archive, ENOMEM,
979 "Can't allocate memory for Pathname");
980 return (ARCHIVE_FATAL);
982 archive_set_error(&a->archive,
983 ARCHIVE_ERRNO_FILE_FORMAT,
984 "Pathname cannot be converted "
985 "from %s to current locale.",
986 archive_string_conversion_charset_name(sconv));
990 /* Convert a path separator '\' -> '/' */
991 cab_convert_path_separator_2(cab, entry);
994 archive_entry_set_size(entry, file->uncompressed_size);
995 if (file->attr & ATTR_RDONLY)
996 archive_entry_set_mode(entry, AE_IFREG | 0555);
998 archive_entry_set_mode(entry, AE_IFREG | 0666);
999 archive_entry_set_mtime(entry, file->mtime, 0);
1001 cab->entry_bytes_remaining = file->uncompressed_size;
1002 cab->entry_offset = 0;
1003 /* We don't need compress data. */
1004 if (file->uncompressed_size == 0)
1005 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
1007 /* Set up a more descriptive format name. */
1008 sprintf(cab->format_name, "CAB %d.%d (%s)",
1009 hd->major, hd->minor, cab->entry_cffolder->compname);
1010 a->archive.archive_format_name = cab->format_name;
1016 archive_read_format_cab_read_data(struct archive_read *a,
1017 const void **buff, size_t *size, int64_t *offset)
1019 struct cab *cab = (struct cab *)(a->format->data);
1022 switch (cab->entry_cffile->folder) {
1023 case iFoldCONTINUED_FROM_PREV:
1024 case iFoldCONTINUED_TO_NEXT:
1025 case iFoldCONTINUED_PREV_AND_NEXT:
1029 archive_clear_error(&a->archive);
1030 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1031 "Cannot restore this file split in multivolume.");
1032 return (ARCHIVE_FAILED);
1036 if (cab->read_data_invoked == 0) {
1037 if (cab->bytes_skipped) {
1038 if (cab->entry_cfdata == NULL) {
1039 r = cab_next_cfdata(a);
1043 if (cab_consume_cfdata(a, cab->bytes_skipped) < 0)
1044 return (ARCHIVE_FATAL);
1045 cab->bytes_skipped = 0;
1047 cab->read_data_invoked = 1;
1049 if (cab->entry_unconsumed) {
1050 /* Consume as much as the compressor actually used. */
1051 r = (int)cab_consume_cfdata(a, cab->entry_unconsumed);
1052 cab->entry_unconsumed = 0;
1056 if (cab->end_of_archive || cab->end_of_entry) {
1057 if (!cab->end_of_entry_cleanup) {
1058 /* End-of-entry cleanup done. */
1059 cab->end_of_entry_cleanup = 1;
1061 *offset = cab->entry_offset;
1064 return (ARCHIVE_EOF);
1067 return (cab_read_data(a, buff, size, offset));
1071 cab_checksum_cfdata_4(const void *p, size_t bytes, uint32_t seed)
1073 const unsigned char *b;
1077 u32num = (unsigned)bytes / 4;
1080 for (;u32num > 0; --u32num) {
1081 sum ^= archive_le32dec(b);
1088 cab_checksum_cfdata(const void *p, size_t bytes, uint32_t seed)
1090 const unsigned char *b;
1094 sum = cab_checksum_cfdata_4(p, bytes, seed);
1098 switch (bytes & 3) {
1100 t |= ((uint32_t)(*b++)) << 16;
1103 t |= ((uint32_t)(*b++)) << 8;
1117 cab_checksum_update(struct archive_read *a, size_t bytes)
1119 struct cab *cab = (struct cab *)(a->format->data);
1120 struct cfdata *cfdata = cab->entry_cfdata;
1121 const unsigned char *p;
1124 if (cfdata->sum == 0 || cfdata->sum_ptr == NULL)
1127 * Calculate the sum of this CFDATA.
1128 * Make sure CFDATA must be calculated in four bytes.
1130 p = cfdata->sum_ptr;
1132 if (cfdata->sum_extra_avail) {
1133 while (cfdata->sum_extra_avail < 4 && sumbytes > 0) {
1135 cfdata->sum_extra_avail++] = *p++;
1138 if (cfdata->sum_extra_avail == 4) {
1139 cfdata->sum_calculated = cab_checksum_cfdata_4(
1140 cfdata->sum_extra, 4, cfdata->sum_calculated);
1141 cfdata->sum_extra_avail = 0;
1145 int odd = sumbytes & 3;
1146 if (sumbytes - odd > 0)
1147 cfdata->sum_calculated = cab_checksum_cfdata_4(
1148 p, sumbytes - odd, cfdata->sum_calculated);
1150 memcpy(cfdata->sum_extra, p + sumbytes - odd, odd);
1151 cfdata->sum_extra_avail = odd;
1153 cfdata->sum_ptr = NULL;
1157 cab_checksum_finish(struct archive_read *a)
1159 struct cab *cab = (struct cab *)(a->format->data);
1160 struct cfdata *cfdata = cab->entry_cfdata;
1163 /* Do not need to compute a sum. */
1164 if (cfdata->sum == 0)
1165 return (ARCHIVE_OK);
1168 * Calculate the sum of remaining CFDATA.
1170 if (cfdata->sum_extra_avail) {
1171 cfdata->sum_calculated =
1172 cab_checksum_cfdata(cfdata->sum_extra,
1173 cfdata->sum_extra_avail, cfdata->sum_calculated);
1174 cfdata->sum_extra_avail = 0;
1178 if (cab->cfheader.flags & RESERVE_PRESENT)
1179 l += cab->cfheader.cfdata;
1180 cfdata->sum_calculated = cab_checksum_cfdata(
1181 cfdata->memimage + CFDATA_cbData, l, cfdata->sum_calculated);
1182 if (cfdata->sum_calculated != cfdata->sum) {
1183 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1184 "Checksum error CFDATA[%d] %x:%x in %d bytes",
1185 cab->entry_cffolder->cfdata_index -1,
1186 cfdata->sum, cfdata->sum_calculated,
1187 cfdata->compressed_size);
1188 return (ARCHIVE_FAILED);
1190 return (ARCHIVE_OK);
1194 * Read CFDATA if needed.
1197 cab_next_cfdata(struct archive_read *a)
1199 struct cab *cab = (struct cab *)(a->format->data);
1200 struct cfdata *cfdata = cab->entry_cfdata;
1202 /* There are remaining bytes in current CFDATA, use it first. */
1203 if (cfdata != NULL && cfdata->uncompressed_bytes_remaining > 0)
1204 return (ARCHIVE_OK);
1206 if (cfdata == NULL) {
1209 cab->entry_cffolder->cfdata_index = 0;
1211 /* Seek read pointer to the offset of CFDATA if needed. */
1212 skip = cab->entry_cffolder->cfdata_offset_in_cab
1216 switch (cab->entry_cffile->folder) {
1217 case iFoldCONTINUED_FROM_PREV:
1218 case iFoldCONTINUED_PREV_AND_NEXT:
1221 case iFoldCONTINUED_TO_NEXT:
1222 folder_index = cab->cfheader.folder_count-1;
1225 folder_index = cab->entry_cffile->folder;
1228 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1229 "Invalid offset of CFDATA in folder(%d) %jd < %jd",
1231 (intmax_t)cab->entry_cffolder->cfdata_offset_in_cab,
1232 (intmax_t)cab->cab_offset);
1233 return (ARCHIVE_FATAL);
1236 if (__archive_read_consume(a, skip) < 0)
1237 return (ARCHIVE_FATAL);
1239 cab->entry_cffolder->cfdata_offset_in_cab;
1246 if (cab->entry_cffolder->cfdata_index <
1247 cab->entry_cffolder->cfdata_count) {
1248 const unsigned char *p;
1251 cfdata = &(cab->entry_cffolder->cfdata);
1252 cab->entry_cffolder->cfdata_index++;
1253 cab->entry_cfdata = cfdata;
1254 cfdata->sum_calculated = 0;
1255 cfdata->sum_extra_avail = 0;
1256 cfdata->sum_ptr = NULL;
1258 if (cab->cfheader.flags & RESERVE_PRESENT)
1259 l += cab->cfheader.cfdata;
1260 if ((p = __archive_read_ahead(a, l, NULL)) == NULL)
1261 return (truncated_error(a));
1262 cfdata->sum = archive_le32dec(p + CFDATA_csum);
1263 cfdata->compressed_size = archive_le16dec(p + CFDATA_cbData);
1264 cfdata->compressed_bytes_remaining = cfdata->compressed_size;
1265 cfdata->uncompressed_size =
1266 archive_le16dec(p + CFDATA_cbUncomp);
1267 cfdata->uncompressed_bytes_remaining =
1268 cfdata->uncompressed_size;
1269 cfdata->uncompressed_avail = 0;
1270 cfdata->read_offset = 0;
1271 cfdata->unconsumed = 0;
1274 * Sanity check if data size is acceptable.
1276 if (cfdata->compressed_size == 0 ||
1277 cfdata->compressed_size > (0x8000+6144))
1279 if (cfdata->uncompressed_size > 0x8000)
1281 if (cfdata->uncompressed_size == 0) {
1282 switch (cab->entry_cffile->folder) {
1283 case iFoldCONTINUED_PREV_AND_NEXT:
1284 case iFoldCONTINUED_TO_NEXT:
1286 case iFoldCONTINUED_FROM_PREV:
1291 /* If CFDATA is not last in a folder, an uncompressed
1292 * size must be 0x8000(32KBi) */
1293 if ((cab->entry_cffolder->cfdata_index <
1294 cab->entry_cffolder->cfdata_count) &&
1295 cfdata->uncompressed_size != 0x8000)
1298 /* A compressed data size and an uncompressed data size must
1299 * be the same in no compression mode. */
1300 if (cab->entry_cffolder->comptype == COMPTYPE_NONE &&
1301 cfdata->compressed_size != cfdata->uncompressed_size)
1305 * Save CFDATA image for sum check.
1307 if (cfdata->memimage_size < (size_t)l) {
1308 free(cfdata->memimage);
1309 cfdata->memimage = malloc(l);
1310 if (cfdata->memimage == NULL) {
1311 archive_set_error(&a->archive, ENOMEM,
1312 "Can't allocate memory for CAB data");
1313 return (ARCHIVE_FATAL);
1315 cfdata->memimage_size = l;
1317 memcpy(cfdata->memimage, p, l);
1319 /* Consume bytes as much as we used. */
1320 __archive_read_consume(a, l);
1321 cab->cab_offset += l;
1322 } else if (cab->entry_cffolder->cfdata_count > 0) {
1323 /* Run out of all CFDATA in a folder. */
1324 cfdata->compressed_size = 0;
1325 cfdata->uncompressed_size = 0;
1326 cfdata->compressed_bytes_remaining = 0;
1327 cfdata->uncompressed_bytes_remaining = 0;
1329 /* Current folder does not have any CFDATA. */
1330 cfdata = &(cab->entry_cffolder->cfdata);
1331 cab->entry_cfdata = cfdata;
1332 memset(cfdata, 0, sizeof(*cfdata));
1334 return (ARCHIVE_OK);
1336 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1338 return (ARCHIVE_FATAL);
1342 * Read ahead CFDATA.
1345 cab_read_ahead_cfdata(struct archive_read *a, ssize_t *avail)
1347 struct cab *cab = (struct cab *)(a->format->data);
1350 err = cab_next_cfdata(a);
1351 if (err < ARCHIVE_OK) {
1356 switch (cab->entry_cffolder->comptype) {
1358 return (cab_read_ahead_cfdata_none(a, avail));
1359 case COMPTYPE_MSZIP:
1360 return (cab_read_ahead_cfdata_deflate(a, avail));
1362 return (cab_read_ahead_cfdata_lzx(a, avail));
1363 default: /* Unsupported compression. */
1364 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1365 "Unsupported CAB compression : %s",
1366 cab->entry_cffolder->compname);
1367 *avail = ARCHIVE_FAILED;
1373 * Read ahead CFDATA as uncompressed data.
1376 cab_read_ahead_cfdata_none(struct archive_read *a, ssize_t *avail)
1378 struct cab *cab = (struct cab *)(a->format->data);
1379 struct cfdata *cfdata;
1382 cfdata = cab->entry_cfdata;
1385 * Note: '1' here is a performance optimization.
1386 * Recall that the decompression layer returns a count of
1387 * available bytes; asking for more than that forces the
1388 * decompressor to combine reads by copying data.
1390 d = __archive_read_ahead(a, 1, avail);
1392 *avail = truncated_error(a);
1395 if (*avail > cfdata->uncompressed_bytes_remaining)
1396 *avail = cfdata->uncompressed_bytes_remaining;
1397 cfdata->uncompressed_avail = cfdata->uncompressed_size;
1398 cfdata->unconsumed = *avail;
1399 cfdata->sum_ptr = d;
1404 * Read ahead CFDATA as deflate data.
1408 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1410 struct cab *cab = (struct cab *)(a->format->data);
1411 struct cfdata *cfdata;
1417 cfdata = cab->entry_cfdata;
1418 /* If the buffer hasn't been allocated, allocate it now. */
1419 if (cab->uncompressed_buffer == NULL) {
1420 cab->uncompressed_buffer_size = 0x8000;
1421 cab->uncompressed_buffer
1422 = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1423 if (cab->uncompressed_buffer == NULL) {
1424 archive_set_error(&a->archive, ENOMEM,
1425 "No memory for CAB reader");
1426 *avail = ARCHIVE_FATAL;
1431 uavail = cfdata->uncompressed_avail;
1432 if (uavail == cfdata->uncompressed_size) {
1433 d = cab->uncompressed_buffer + cfdata->read_offset;
1434 *avail = uavail - cfdata->read_offset;
1438 if (!cab->entry_cffolder->decompress_init) {
1439 cab->stream.next_in = NULL;
1440 cab->stream.avail_in = 0;
1441 cab->stream.total_in = 0;
1442 cab->stream.next_out = NULL;
1443 cab->stream.avail_out = 0;
1444 cab->stream.total_out = 0;
1445 if (cab->stream_valid)
1446 r = inflateReset(&cab->stream);
1448 r = inflateInit2(&cab->stream,
1449 -15 /* Don't check for zlib header */);
1451 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1452 "Can't initialize deflate decompression.");
1453 *avail = ARCHIVE_FATAL;
1456 /* Stream structure has been set up. */
1457 cab->stream_valid = 1;
1458 /* We've initialized decompression for this stream. */
1459 cab->entry_cffolder->decompress_init = 1;
1462 if (cfdata->compressed_bytes_remaining == cfdata->compressed_size)
1467 cab->stream.total_out = uavail;
1469 * We always uncompress all data in current CFDATA.
1471 while (!eod && cab->stream.total_out < cfdata->uncompressed_size) {
1472 ssize_t bytes_avail;
1474 cab->stream.next_out =
1475 cab->uncompressed_buffer + cab->stream.total_out;
1476 cab->stream.avail_out =
1477 cfdata->uncompressed_size - cab->stream.total_out;
1479 d = __archive_read_ahead(a, 1, &bytes_avail);
1480 if (bytes_avail <= 0) {
1481 *avail = truncated_error(a);
1484 if (bytes_avail > cfdata->compressed_bytes_remaining)
1485 bytes_avail = cfdata->compressed_bytes_remaining;
1487 * A bug in zlib.h: stream.next_in should be marked 'const'
1488 * but isn't (the library never alters data through the
1489 * next_in pointer, only reads it). The result: this ugly
1490 * cast to remove 'const'.
1492 cab->stream.next_in = (Bytef *)(uintptr_t)d;
1493 cab->stream.avail_in = (uInt)bytes_avail;
1494 cab->stream.total_in = 0;
1496 /* Cut out a tow-byte MSZIP signature(0x43, 0x4b). */
1498 if (bytes_avail <= mszip) {
1500 if (cab->stream.next_in[0] != 0x43)
1502 if (bytes_avail > 1 &&
1503 cab->stream.next_in[1] != 0x4b)
1505 } else if (cab->stream.next_in[0] != 0x4b)
1507 cfdata->unconsumed = bytes_avail;
1508 cfdata->sum_ptr = d;
1509 if (cab_minimum_consume_cfdata(
1510 a, cfdata->unconsumed) < 0) {
1511 *avail = ARCHIVE_FATAL;
1514 mszip -= (int)bytes_avail;
1517 if (mszip == 1 && cab->stream.next_in[0] != 0x4b)
1519 else if (cab->stream.next_in[0] != 0x43 ||
1520 cab->stream.next_in[1] != 0x4b)
1522 cab->stream.next_in += mszip;
1523 cab->stream.avail_in -= mszip;
1524 cab->stream.total_in += mszip;
1528 r = inflate(&cab->stream, 0);
1538 cfdata->unconsumed = cab->stream.total_in;
1539 cfdata->sum_ptr = d;
1540 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1541 *avail = ARCHIVE_FATAL;
1545 uavail = (uint16_t)cab->stream.total_out;
1547 if (uavail < cfdata->uncompressed_size) {
1548 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1549 "Invalid uncompressed size (%d < %d)",
1550 uavail, cfdata->uncompressed_size);
1551 *avail = ARCHIVE_FATAL;
1556 * Note: I suspect there is a bug in makecab.exe because, in rare
1557 * case, compressed bytes are still remaining regardless we have
1558 * gotten all uncompressed bytes, which size is recorded in CFDATA,
1559 * as much as we need, and we have to use the garbage so as to
1560 * correctly compute the sum of CFDATA accordingly.
1562 if (cfdata->compressed_bytes_remaining > 0) {
1563 ssize_t bytes_avail;
1565 d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1567 if (bytes_avail <= 0) {
1568 *avail = truncated_error(a);
1571 cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1572 cfdata->sum_ptr = d;
1573 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1574 *avail = ARCHIVE_FATAL;
1580 * Set dictionary data for decompressing of next CFDATA, which
1581 * in the same folder. This is why we always do decompress CFDATA
1582 * even if beginning CFDATA or some of CFDATA are not used in
1583 * skipping file data.
1585 if (cab->entry_cffolder->cfdata_index <
1586 cab->entry_cffolder->cfdata_count) {
1587 r = inflateReset(&cab->stream);
1590 r = inflateSetDictionary(&cab->stream,
1591 cab->uncompressed_buffer, cfdata->uncompressed_size);
1596 d = cab->uncompressed_buffer + cfdata->read_offset;
1597 *avail = uavail - cfdata->read_offset;
1598 cfdata->uncompressed_avail = uavail;
1605 archive_set_error(&a->archive, ENOMEM,
1606 "Out of memory for deflate decompression");
1609 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1610 "Deflate decompression failed (%d)", r);
1613 *avail = ARCHIVE_FATAL;
1616 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1617 "CFDATA incorrect(no MSZIP signature)");
1618 *avail = ARCHIVE_FATAL;
1622 #else /* HAVE_ZLIB_H */
1625 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1627 *avail = ARCHIVE_FATAL;
1628 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1629 "libarchive compiled without deflate support (no libz)");
1633 #endif /* HAVE_ZLIB_H */
1636 cab_read_ahead_cfdata_lzx(struct archive_read *a, ssize_t *avail)
1638 struct cab *cab = (struct cab *)(a->format->data);
1639 struct cfdata *cfdata;
1644 cfdata = cab->entry_cfdata;
1645 /* If the buffer hasn't been allocated, allocate it now. */
1646 if (cab->uncompressed_buffer == NULL) {
1647 cab->uncompressed_buffer_size = 0x8000;
1648 cab->uncompressed_buffer
1649 = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1650 if (cab->uncompressed_buffer == NULL) {
1651 archive_set_error(&a->archive, ENOMEM,
1652 "No memory for CAB reader");
1653 *avail = ARCHIVE_FATAL;
1658 uavail = cfdata->uncompressed_avail;
1659 if (uavail == cfdata->uncompressed_size) {
1660 d = cab->uncompressed_buffer + cfdata->read_offset;
1661 *avail = uavail - cfdata->read_offset;
1665 if (!cab->entry_cffolder->decompress_init) {
1666 r = lzx_decode_init(&cab->xstrm,
1667 cab->entry_cffolder->compdata);
1668 if (r != ARCHIVE_OK) {
1669 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1670 "Can't initialize LZX decompression.");
1671 *avail = ARCHIVE_FATAL;
1674 /* We've initialized decompression for this stream. */
1675 cab->entry_cffolder->decompress_init = 1;
1678 /* Clean up remaining bits of previous CFDATA. */
1679 lzx_cleanup_bitstream(&cab->xstrm);
1680 cab->xstrm.total_out = uavail;
1681 while (cab->xstrm.total_out < cfdata->uncompressed_size) {
1682 ssize_t bytes_avail;
1684 cab->xstrm.next_out =
1685 cab->uncompressed_buffer + cab->xstrm.total_out;
1686 cab->xstrm.avail_out =
1687 cfdata->uncompressed_size - cab->xstrm.total_out;
1689 d = __archive_read_ahead(a, 1, &bytes_avail);
1690 if (bytes_avail <= 0) {
1691 archive_set_error(&a->archive,
1692 ARCHIVE_ERRNO_FILE_FORMAT,
1693 "Truncated CAB file data");
1694 *avail = ARCHIVE_FATAL;
1697 if (bytes_avail > cfdata->compressed_bytes_remaining)
1698 bytes_avail = cfdata->compressed_bytes_remaining;
1700 cab->xstrm.next_in = d;
1701 cab->xstrm.avail_in = bytes_avail;
1702 cab->xstrm.total_in = 0;
1703 r = lzx_decode(&cab->xstrm,
1704 cfdata->compressed_bytes_remaining == bytes_avail);
1710 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1711 "LZX decompression failed (%d)", r);
1712 *avail = ARCHIVE_FATAL;
1715 cfdata->unconsumed = cab->xstrm.total_in;
1716 cfdata->sum_ptr = d;
1717 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1718 *avail = ARCHIVE_FATAL;
1723 uavail = (uint16_t)cab->xstrm.total_out;
1725 * Make sure a read pointer advances to next CFDATA.
1727 if (cfdata->compressed_bytes_remaining > 0) {
1728 ssize_t bytes_avail;
1730 d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1732 if (bytes_avail <= 0) {
1733 *avail = truncated_error(a);
1736 cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1737 cfdata->sum_ptr = d;
1738 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1739 *avail = ARCHIVE_FATAL;
1745 * Translation reversal of x86 processor CALL byte sequence(E8).
1747 lzx_translation(&cab->xstrm, cab->uncompressed_buffer,
1748 cfdata->uncompressed_size,
1749 (cab->entry_cffolder->cfdata_index-1) * 0x8000);
1751 d = cab->uncompressed_buffer + cfdata->read_offset;
1752 *avail = uavail - cfdata->read_offset;
1753 cfdata->uncompressed_avail = uavail;
1760 * We always decompress CFDATA to consume CFDATA as much as we need
1761 * in uncompressed bytes because all CFDATA in a folder are related
1762 * so we do not skip any CFDATA without decompressing.
1763 * Note: If the folder of a CFFILE is iFoldCONTINUED_PREV_AND_NEXT or
1764 * iFoldCONTINUED_FROM_PREV, we won't decompress because a CFDATA for
1765 * the CFFILE is remaining bytes of previous Multivolume CAB file.
1768 cab_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1770 struct cab *cab = (struct cab *)(a->format->data);
1771 struct cfdata *cfdata;
1772 int64_t cbytes, rbytes;
1775 rbytes = cab_minimum_consume_cfdata(a, consumed_bytes);
1777 return (ARCHIVE_FATAL);
1779 cfdata = cab->entry_cfdata;
1780 while (rbytes > 0) {
1783 if (cfdata->compressed_size == 0) {
1784 archive_set_error(&a->archive,
1785 ARCHIVE_ERRNO_FILE_FORMAT,
1787 return (ARCHIVE_FATAL);
1789 cbytes = cfdata->uncompressed_bytes_remaining;
1790 if (cbytes > rbytes)
1794 if (cfdata->uncompressed_avail == 0 &&
1795 (cab->entry_cffile->folder == iFoldCONTINUED_PREV_AND_NEXT ||
1796 cab->entry_cffile->folder == iFoldCONTINUED_FROM_PREV)) {
1797 /* We have not read any data yet. */
1798 if (cbytes == cfdata->uncompressed_bytes_remaining) {
1799 /* Skip whole current CFDATA. */
1800 __archive_read_consume(a,
1801 cfdata->compressed_size);
1802 cab->cab_offset += cfdata->compressed_size;
1803 cfdata->compressed_bytes_remaining = 0;
1804 cfdata->uncompressed_bytes_remaining = 0;
1805 err = cab_next_cfdata(a);
1808 cfdata = cab->entry_cfdata;
1809 if (cfdata->uncompressed_size == 0) {
1810 switch (cab->entry_cffile->folder) {
1811 case iFoldCONTINUED_PREV_AND_NEXT:
1812 case iFoldCONTINUED_TO_NEXT:
1813 case iFoldCONTINUED_FROM_PREV:
1822 cfdata->read_offset += (uint16_t)cbytes;
1823 cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1825 } else if (cbytes == 0) {
1826 err = cab_next_cfdata(a);
1829 cfdata = cab->entry_cfdata;
1830 if (cfdata->uncompressed_size == 0) {
1831 switch (cab->entry_cffile->folder) {
1832 case iFoldCONTINUED_PREV_AND_NEXT:
1833 case iFoldCONTINUED_TO_NEXT:
1834 case iFoldCONTINUED_FROM_PREV:
1835 return (ARCHIVE_FATAL);
1842 while (cbytes > 0) {
1843 (void)cab_read_ahead_cfdata(a, &avail);
1845 return (ARCHIVE_FATAL);
1847 avail = (ssize_t)cbytes;
1848 if (cab_minimum_consume_cfdata(a, avail) < 0)
1849 return (ARCHIVE_FATAL);
1853 return (consumed_bytes);
1857 * Consume CFDATA as much as we have already gotten and
1858 * compute the sum of CFDATA.
1861 cab_minimum_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1863 struct cab *cab = (struct cab *)(a->format->data);
1864 struct cfdata *cfdata;
1865 int64_t cbytes, rbytes;
1868 cfdata = cab->entry_cfdata;
1869 rbytes = consumed_bytes;
1870 if (cab->entry_cffolder->comptype == COMPTYPE_NONE) {
1871 if (consumed_bytes < cfdata->unconsumed)
1872 cbytes = consumed_bytes;
1874 cbytes = cfdata->unconsumed;
1876 cfdata->read_offset += (uint16_t)cbytes;
1877 cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1878 cfdata->unconsumed -= cbytes;
1880 cbytes = cfdata->uncompressed_avail - cfdata->read_offset;
1882 if (consumed_bytes < cbytes)
1883 cbytes = consumed_bytes;
1885 cfdata->read_offset += (uint16_t)cbytes;
1886 cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1889 if (cfdata->unconsumed) {
1890 cbytes = cfdata->unconsumed;
1891 cfdata->unconsumed = 0;
1896 /* Compute the sum. */
1897 cab_checksum_update(a, (size_t)cbytes);
1899 /* Consume as much as the compressor actually used. */
1900 __archive_read_consume(a, cbytes);
1901 cab->cab_offset += cbytes;
1902 cfdata->compressed_bytes_remaining -= (uint16_t)cbytes;
1903 if (cfdata->compressed_bytes_remaining == 0) {
1904 err = cab_checksum_finish(a);
1913 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1914 * cab->end_of_entry if it consumes all of the data.
1917 cab_read_data(struct archive_read *a, const void **buff,
1918 size_t *size, int64_t *offset)
1920 struct cab *cab = (struct cab *)(a->format->data);
1921 ssize_t bytes_avail;
1923 if (cab->entry_bytes_remaining == 0) {
1926 *offset = cab->entry_offset;
1927 cab->end_of_entry = 1;
1928 return (ARCHIVE_OK);
1931 *buff = cab_read_ahead_cfdata(a, &bytes_avail);
1932 if (bytes_avail <= 0) {
1936 if (bytes_avail == 0 &&
1937 cab->entry_cfdata->uncompressed_size == 0) {
1938 /* All of CFDATA in a folder has been handled. */
1939 archive_set_error(&a->archive,
1940 ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CFDATA");
1941 return (ARCHIVE_FATAL);
1943 return ((int)bytes_avail);
1945 if (bytes_avail > cab->entry_bytes_remaining)
1946 bytes_avail = (ssize_t)cab->entry_bytes_remaining;
1948 *size = bytes_avail;
1949 *offset = cab->entry_offset;
1950 cab->entry_offset += bytes_avail;
1951 cab->entry_bytes_remaining -= bytes_avail;
1952 if (cab->entry_bytes_remaining == 0)
1953 cab->end_of_entry = 1;
1954 cab->entry_unconsumed = bytes_avail;
1955 if (cab->entry_cffolder->comptype == COMPTYPE_NONE) {
1956 /* Don't consume more than current entry used. */
1957 if (cab->entry_cfdata->unconsumed > cab->entry_unconsumed)
1958 cab->entry_cfdata->unconsumed = cab->entry_unconsumed;
1960 return (ARCHIVE_OK);
1964 archive_read_format_cab_read_data_skip(struct archive_read *a)
1967 int64_t bytes_skipped;
1970 cab = (struct cab *)(a->format->data);
1972 if (cab->end_of_archive)
1973 return (ARCHIVE_EOF);
1975 if (!cab->read_data_invoked) {
1976 cab->bytes_skipped += cab->entry_bytes_remaining;
1977 cab->entry_bytes_remaining = 0;
1978 /* This entry is finished and done. */
1979 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
1980 return (ARCHIVE_OK);
1983 if (cab->entry_unconsumed) {
1984 /* Consume as much as the compressor actually used. */
1985 r = (int)cab_consume_cfdata(a, cab->entry_unconsumed);
1986 cab->entry_unconsumed = 0;
1989 } else if (cab->entry_cfdata == NULL) {
1990 r = cab_next_cfdata(a);
1995 /* if we've already read to end of data, we're done. */
1996 if (cab->end_of_entry_cleanup)
1997 return (ARCHIVE_OK);
2000 * If the length is at the beginning, we can skip the
2001 * compressed data much more quickly.
2003 bytes_skipped = cab_consume_cfdata(a, cab->entry_bytes_remaining);
2004 if (bytes_skipped < 0)
2005 return (ARCHIVE_FATAL);
2007 /* If the compression type is none(uncompressed), we've already
2008 * consumed data as much as the current entry size. */
2009 if (cab->entry_cffolder->comptype == COMPTYPE_NONE &&
2010 cab->entry_cfdata != NULL)
2011 cab->entry_cfdata->unconsumed = 0;
2013 /* This entry is finished and done. */
2014 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
2015 return (ARCHIVE_OK);
2019 archive_read_format_cab_cleanup(struct archive_read *a)
2021 struct cab *cab = (struct cab *)(a->format->data);
2022 struct cfheader *hd = &cab->cfheader;
2025 if (hd->folder_array != NULL) {
2026 for (i = 0; i < hd->folder_count; i++)
2027 free(hd->folder_array[i].cfdata.memimage);
2028 free(hd->folder_array);
2030 if (hd->file_array != NULL) {
2031 for (i = 0; i < cab->cfheader.file_count; i++)
2032 archive_string_free(&(hd->file_array[i].pathname));
2033 free(hd->file_array);
2036 if (cab->stream_valid)
2037 inflateEnd(&cab->stream);
2039 lzx_decode_free(&cab->xstrm);
2040 archive_wstring_free(&cab->ws);
2041 free(cab->uncompressed_buffer);
2043 (a->format->data) = NULL;
2044 return (ARCHIVE_OK);
2047 /* Convert an MSDOS-style date/time into Unix-style time. */
2049 cab_dos_time(const unsigned char *p)
2054 msDate = archive_le16dec(p);
2055 msTime = archive_le16dec(p+2);
2057 memset(&ts, 0, sizeof(ts));
2058 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
2059 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
2060 ts.tm_mday = msDate & 0x1f; /* Day of month. */
2061 ts.tm_hour = (msTime >> 11) & 0x1f;
2062 ts.tm_min = (msTime >> 5) & 0x3f;
2063 ts.tm_sec = (msTime << 1) & 0x3e;
2065 return (mktime(&ts));
2068 /*****************************************************************
2070 * LZX decompression code.
2072 *****************************************************************/
2075 * Initialize LZX decoder.
2077 * Returns ARCHIVE_OK if initialization was successful.
2078 * Returns ARCHIVE_FAILED if w_bits has unsupported value.
2079 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
2083 lzx_decode_init(struct lzx_stream *strm, int w_bits)
2086 int slot, w_size, w_slot;
2090 if (strm->ds == NULL) {
2091 strm->ds = calloc(1, sizeof(*strm->ds));
2092 if (strm->ds == NULL)
2093 return (ARCHIVE_FATAL);
2096 ds->error = ARCHIVE_FAILED;
2098 /* Allow bits from 15(32KBi) up to 21(2MBi) */
2099 if (w_bits < SLOT_BASE || w_bits > SLOT_MAX)
2100 return (ARCHIVE_FAILED);
2102 ds->error = ARCHIVE_FATAL;
2107 w_size = ds->w_size;
2108 w_slot = slots[w_bits - SLOT_BASE];
2109 ds->w_size = 1U << w_bits;
2110 ds->w_mask = ds->w_size -1;
2111 if (ds->w_buff == NULL || w_size != ds->w_size) {
2113 ds->w_buff = malloc(ds->w_size);
2114 if (ds->w_buff == NULL)
2115 return (ARCHIVE_FATAL);
2117 ds->pos_tbl = malloc(sizeof(ds->pos_tbl[0]) * w_slot);
2118 if (ds->pos_tbl == NULL)
2119 return (ARCHIVE_FATAL);
2120 lzx_huffman_free(&(ds->mt));
2123 for (footer = 0; footer < 18; footer++)
2124 base_inc[footer] = 1 << footer;
2126 for (slot = 0; slot < w_slot; slot++) {
2131 base += base_inc[footer];
2134 for (n = base; n; n >>= 1)
2139 ds->pos_tbl[slot].base = base;
2140 ds->pos_tbl[slot].footer_bits = footer;
2145 ds->br.cache_buffer = 0;
2146 ds->br.cache_avail = 0;
2147 ds->r0 = ds->r1 = ds->r2 = 1;
2149 /* Initialize aligned offset tree. */
2150 if (lzx_huffman_init(&(ds->at), 8, 8) != ARCHIVE_OK)
2151 return (ARCHIVE_FATAL);
2153 /* Initialize pre-tree. */
2154 if (lzx_huffman_init(&(ds->pt), 20, 10) != ARCHIVE_OK)
2155 return (ARCHIVE_FATAL);
2157 /* Initialize Main tree. */
2158 if (lzx_huffman_init(&(ds->mt), 256+(w_slot<<3), 16)
2160 return (ARCHIVE_FATAL);
2162 /* Initialize Length tree. */
2163 if (lzx_huffman_init(&(ds->lt), 249, 16) != ARCHIVE_OK)
2164 return (ARCHIVE_FATAL);
2168 return (ARCHIVE_OK);
2172 * Release LZX decoder.
2175 lzx_decode_free(struct lzx_stream *strm)
2178 if (strm->ds == NULL)
2180 free(strm->ds->w_buff);
2181 free(strm->ds->pos_tbl);
2182 lzx_huffman_free(&(strm->ds->at));
2183 lzx_huffman_free(&(strm->ds->pt));
2184 lzx_huffman_free(&(strm->ds->mt));
2185 lzx_huffman_free(&(strm->ds->lt));
2191 * E8 Call Translation reversal.
2194 lzx_translation(struct lzx_stream *strm, void *p, size_t size, uint32_t offset)
2196 struct lzx_dec *ds = strm->ds;
2197 unsigned char *b, *end;
2199 if (!ds->translation || size <= 10)
2202 end = b + size - 10;
2203 while (b < end && (b = memchr(b, 0xE8, end - b)) != NULL) {
2204 size_t i = b - (unsigned char *)p;
2205 int32_t cp, displacement, value;
2207 cp = (int32_t)(offset + (uint32_t)i);
2208 value = archive_le32dec(&b[1]);
2209 if (value >= -cp && value < (int32_t)ds->translation_size) {
2211 displacement = value - cp;
2213 displacement = value + ds->translation_size;
2214 archive_le32enc(&b[1], (uint32_t)displacement);
2221 * Bit stream reader.
2223 /* Check that the cache buffer has enough bits. */
2224 #define lzx_br_has(br, n) ((br)->cache_avail >= n)
2225 /* Get compressed data by bit. */
2226 #define lzx_br_bits(br, n) \
2227 (((uint32_t)((br)->cache_buffer >> \
2228 ((br)->cache_avail - (n)))) & cache_masks[n])
2229 #define lzx_br_bits_forced(br, n) \
2230 (((uint32_t)((br)->cache_buffer << \
2231 ((n) - (br)->cache_avail))) & cache_masks[n])
2232 /* Read ahead to make sure the cache buffer has enough compressed data we
2234 * True : completed, there is enough data in the cache buffer.
2235 * False : we met that strm->next_in is empty, we have to get following
2237 #define lzx_br_read_ahead_0(strm, br, n) \
2238 (lzx_br_has((br), (n)) || lzx_br_fillup(strm, br))
2239 /* True : the cache buffer has some bits as much as we need.
2240 * False : there are no enough bits in the cache buffer to be used,
2241 * we have to get following bytes if we could. */
2242 #define lzx_br_read_ahead(strm, br, n) \
2243 (lzx_br_read_ahead_0((strm), (br), (n)) || lzx_br_has((br), (n)))
2245 /* Notify how many bits we consumed. */
2246 #define lzx_br_consume(br, n) ((br)->cache_avail -= (n))
2247 #define lzx_br_consume_unaligned_bits(br) ((br)->cache_avail &= ~0x0f)
2249 #define lzx_br_is_unaligned(br) ((br)->cache_avail & 0x0f)
2251 static const uint32_t cache_masks[] = {
2252 0x00000000, 0x00000001, 0x00000003, 0x00000007,
2253 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
2254 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF,
2255 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
2256 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF,
2257 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
2258 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF,
2259 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF,
2260 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
2264 * Shift away used bits in the cache data and fill it up with following bits.
2265 * Call this when cache buffer does not have enough bits you need.
2267 * Returns 1 if the cache buffer is full.
2268 * Returns 0 if the cache buffer is not full; input buffer is empty.
2271 lzx_br_fillup(struct lzx_stream *strm, struct lzx_br *br)
2274 * x86 processor family can read misaligned data without an access error.
2276 int n = CACHE_BITS - br->cache_avail;
2281 if (strm->avail_in >= 8) {
2283 ((uint64_t)strm->next_in[1]) << 56 |
2284 ((uint64_t)strm->next_in[0]) << 48 |
2285 ((uint64_t)strm->next_in[3]) << 40 |
2286 ((uint64_t)strm->next_in[2]) << 32 |
2287 ((uint32_t)strm->next_in[5]) << 24 |
2288 ((uint32_t)strm->next_in[4]) << 16 |
2289 ((uint32_t)strm->next_in[7]) << 8 |
2290 (uint32_t)strm->next_in[6];
2292 strm->avail_in -= 8;
2293 br->cache_avail += 8 * 8;
2298 if (strm->avail_in >= 6) {
2300 (br->cache_buffer << 48) |
2301 ((uint64_t)strm->next_in[1]) << 40 |
2302 ((uint64_t)strm->next_in[0]) << 32 |
2303 ((uint32_t)strm->next_in[3]) << 24 |
2304 ((uint32_t)strm->next_in[2]) << 16 |
2305 ((uint32_t)strm->next_in[5]) << 8 |
2306 (uint32_t)strm->next_in[4];
2308 strm->avail_in -= 6;
2309 br->cache_avail += 6 * 8;
2314 /* We have enough compressed data in
2315 * the cache buffer.*/
2320 if (strm->avail_in < 2) {
2321 /* There is not enough compressed data to
2322 * fill up the cache buffer. */
2323 if (strm->avail_in == 1) {
2324 br->odd = *strm->next_in++;
2331 (br->cache_buffer << 16) |
2332 archive_le16dec(strm->next_in);
2334 strm->avail_in -= 2;
2335 br->cache_avail += 16;
2341 lzx_br_fixup(struct lzx_stream *strm, struct lzx_br *br)
2343 int n = CACHE_BITS - br->cache_avail;
2345 if (br->have_odd && n >= 16 && strm->avail_in > 0) {
2347 (br->cache_buffer << 16) |
2348 ((uint16_t)(*strm->next_in)) << 8 | br->odd;
2351 br->cache_avail += 16;
2357 lzx_cleanup_bitstream(struct lzx_stream *strm)
2359 strm->ds->br.cache_avail = 0;
2360 strm->ds->br.have_odd = 0;
2366 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2367 * Please set available buffer and call this function again.
2368 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2369 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2370 * is broken or you do not set 'last' flag properly.
2372 #define ST_RD_TRANSLATION 0
2373 #define ST_RD_TRANSLATION_SIZE 1
2374 #define ST_RD_BLOCK_TYPE 2
2375 #define ST_RD_BLOCK_SIZE 3
2376 #define ST_RD_ALIGNMENT 4
2380 #define ST_COPY_UNCOMP1 8
2381 #define ST_COPY_UNCOMP2 9
2382 #define ST_RD_ALIGNED_OFFSET 10
2383 #define ST_RD_VERBATIM 11
2384 #define ST_RD_PRE_MAIN_TREE_256 12
2385 #define ST_MAIN_TREE_256 13
2386 #define ST_RD_PRE_MAIN_TREE_REM 14
2387 #define ST_MAIN_TREE_REM 15
2388 #define ST_RD_PRE_LENGTH_TREE 16
2389 #define ST_LENGTH_TREE 17
2391 #define ST_LENGTH 19
2392 #define ST_OFFSET 20
2393 #define ST_REAL_POS 21
2397 lzx_decode(struct lzx_stream *strm, int last)
2399 struct lzx_dec *ds = strm->ds;
2406 avail_in = strm->avail_in;
2407 lzx_br_fixup(strm, &(ds->br));
2409 if (ds->state < ST_MAIN)
2410 r = lzx_read_blocks(strm, last);
2412 int64_t bytes_written = strm->avail_out;
2413 r = lzx_decode_blocks(strm, last);
2414 bytes_written -= strm->avail_out;
2415 strm->next_out += bytes_written;
2416 strm->total_out += bytes_written;
2419 strm->total_in += avail_in - strm->avail_in;
2424 lzx_read_blocks(struct lzx_stream *strm, int last)
2426 struct lzx_dec *ds = strm->ds;
2427 struct lzx_br *br = &(ds->br);
2431 switch (ds->state) {
2432 case ST_RD_TRANSLATION:
2433 if (!lzx_br_read_ahead(strm, br, 1)) {
2434 ds->state = ST_RD_TRANSLATION;
2437 return (ARCHIVE_OK);
2439 ds->translation = lzx_br_bits(br, 1);
2440 lzx_br_consume(br, 1);
2442 case ST_RD_TRANSLATION_SIZE:
2443 if (ds->translation) {
2444 if (!lzx_br_read_ahead(strm, br, 32)) {
2445 ds->state = ST_RD_TRANSLATION_SIZE;
2448 return (ARCHIVE_OK);
2450 ds->translation_size = lzx_br_bits(br, 16);
2451 lzx_br_consume(br, 16);
2452 ds->translation_size <<= 16;
2453 ds->translation_size |= lzx_br_bits(br, 16);
2454 lzx_br_consume(br, 16);
2457 case ST_RD_BLOCK_TYPE:
2458 if (!lzx_br_read_ahead(strm, br, 3)) {
2459 ds->state = ST_RD_BLOCK_TYPE;
2462 return (ARCHIVE_OK);
2464 ds->block_type = lzx_br_bits(br, 3);
2465 lzx_br_consume(br, 3);
2466 /* Check a block type. */
2467 switch (ds->block_type) {
2468 case VERBATIM_BLOCK:
2469 case ALIGNED_OFFSET_BLOCK:
2470 case UNCOMPRESSED_BLOCK:
2473 goto failed;/* Invalid */
2476 case ST_RD_BLOCK_SIZE:
2477 if (!lzx_br_read_ahead(strm, br, 24)) {
2478 ds->state = ST_RD_BLOCK_SIZE;
2481 return (ARCHIVE_OK);
2483 ds->block_size = lzx_br_bits(br, 8);
2484 lzx_br_consume(br, 8);
2485 ds->block_size <<= 16;
2486 ds->block_size |= lzx_br_bits(br, 16);
2487 lzx_br_consume(br, 16);
2488 if (ds->block_size == 0)
2490 ds->block_bytes_avail = ds->block_size;
2491 if (ds->block_type != UNCOMPRESSED_BLOCK) {
2492 if (ds->block_type == VERBATIM_BLOCK)
2493 ds->state = ST_RD_VERBATIM;
2495 ds->state = ST_RD_ALIGNED_OFFSET;
2499 case ST_RD_ALIGNMENT:
2501 * Handle an Uncompressed Block.
2503 /* Skip padding to align following field on
2504 * 16-bit boundary. */
2505 if (lzx_br_is_unaligned(br))
2506 lzx_br_consume_unaligned_bits(br);
2508 if (lzx_br_read_ahead(strm, br, 16))
2509 lzx_br_consume(br, 16);
2511 ds->state = ST_RD_ALIGNMENT;
2514 return (ARCHIVE_OK);
2517 /* Preparation to read repeated offsets R0,R1 and R2. */
2518 ds->rbytes_avail = 0;
2519 ds->state = ST_RD_R0;
2526 /* Drain bits in the cache buffer of
2528 if (lzx_br_has(br, 32)) {
2529 u16 = lzx_br_bits(br, 16);
2530 lzx_br_consume(br, 16);
2531 archive_le16enc(ds->rbytes, u16);
2532 u16 = lzx_br_bits(br, 16);
2533 lzx_br_consume(br, 16);
2534 archive_le16enc(ds->rbytes+2, u16);
2535 ds->rbytes_avail = 4;
2536 } else if (lzx_br_has(br, 16)) {
2537 u16 = lzx_br_bits(br, 16);
2538 lzx_br_consume(br, 16);
2539 archive_le16enc(ds->rbytes, u16);
2540 ds->rbytes_avail = 2;
2542 if (ds->rbytes_avail < 4 && ds->br.have_odd) {
2543 ds->rbytes[ds->rbytes_avail++] =
2545 ds->br.have_odd = 0;
2547 while (ds->rbytes_avail < 4) {
2548 if (strm->avail_in <= 0) {
2551 return (ARCHIVE_OK);
2553 ds->rbytes[ds->rbytes_avail++] =
2557 ds->rbytes_avail = 0;
2558 if (ds->state == ST_RD_R0) {
2559 ds->r0 = archive_le32dec(ds->rbytes);
2562 ds->state = ST_RD_R1;
2563 } else if (ds->state == ST_RD_R1) {
2564 ds->r1 = archive_le32dec(ds->rbytes);
2567 ds->state = ST_RD_R2;
2568 } else if (ds->state == ST_RD_R2) {
2569 ds->r2 = archive_le32dec(ds->rbytes);
2572 /* We've gotten all repeated offsets. */
2573 ds->state = ST_COPY_UNCOMP1;
2575 } while (ds->state != ST_COPY_UNCOMP1);
2577 case ST_COPY_UNCOMP1:
2579 * Copy bytes form next_in to next_out directly.
2581 while (ds->block_bytes_avail) {
2584 if (strm->avail_out <= 0)
2585 /* Output buffer is empty. */
2586 return (ARCHIVE_OK);
2587 if (strm->avail_in <= 0) {
2588 /* Input buffer is empty. */
2591 return (ARCHIVE_OK);
2593 l = (int)ds->block_bytes_avail;
2594 if (l > ds->w_size - ds->w_pos)
2595 l = ds->w_size - ds->w_pos;
2596 if (l > strm->avail_out)
2597 l = (int)strm->avail_out;
2598 if (l > strm->avail_in)
2599 l = (int)strm->avail_in;
2600 memcpy(strm->next_out, strm->next_in, l);
2601 memcpy(&(ds->w_buff[ds->w_pos]),
2604 strm->avail_in -= l;
2605 strm->next_out += l;
2606 strm->avail_out -= l;
2607 strm->total_out += l;
2608 ds->w_pos = (ds->w_pos + l) & ds->w_mask;
2609 ds->block_bytes_avail -= l;
2612 case ST_COPY_UNCOMP2:
2613 /* Re-align; skip padding byte. */
2614 if (ds->block_size & 1) {
2615 if (strm->avail_in <= 0) {
2616 /* Input buffer is empty. */
2617 ds->state = ST_COPY_UNCOMP2;
2620 return (ARCHIVE_OK);
2625 /* This block ended. */
2626 ds->state = ST_RD_BLOCK_TYPE;
2627 return (ARCHIVE_EOF);
2628 /********************/
2629 case ST_RD_ALIGNED_OFFSET:
2631 * Read Aligned offset tree.
2633 if (!lzx_br_read_ahead(strm, br, 3 * ds->at.len_size)) {
2634 ds->state = ST_RD_ALIGNED_OFFSET;
2637 return (ARCHIVE_OK);
2639 memset(ds->at.freq, 0, sizeof(ds->at.freq));
2640 for (i = 0; i < ds->at.len_size; i++) {
2641 ds->at.bitlen[i] = lzx_br_bits(br, 3);
2642 ds->at.freq[ds->at.bitlen[i]]++;
2643 lzx_br_consume(br, 3);
2645 if (!lzx_make_huffman_table(&ds->at))
2648 case ST_RD_VERBATIM:
2651 case ST_RD_PRE_MAIN_TREE_256:
2653 * Read Pre-tree for first 256 elements of main tree.
2655 if (!lzx_read_pre_tree(strm)) {
2656 ds->state = ST_RD_PRE_MAIN_TREE_256;
2659 return (ARCHIVE_OK);
2661 if (!lzx_make_huffman_table(&ds->pt))
2665 case ST_MAIN_TREE_256:
2667 * Get path lengths of first 256 elements of main tree.
2669 r = lzx_read_bitlen(strm, &ds->mt, 256);
2673 ds->state = ST_MAIN_TREE_256;
2676 return (ARCHIVE_OK);
2680 case ST_RD_PRE_MAIN_TREE_REM:
2682 * Read Pre-tree for remaining elements of main tree.
2684 if (!lzx_read_pre_tree(strm)) {
2685 ds->state = ST_RD_PRE_MAIN_TREE_REM;
2688 return (ARCHIVE_OK);
2690 if (!lzx_make_huffman_table(&ds->pt))
2694 case ST_MAIN_TREE_REM:
2696 * Get path lengths of remaining elements of main tree.
2698 r = lzx_read_bitlen(strm, &ds->mt, -1);
2702 ds->state = ST_MAIN_TREE_REM;
2705 return (ARCHIVE_OK);
2707 if (!lzx_make_huffman_table(&ds->mt))
2711 case ST_RD_PRE_LENGTH_TREE:
2713 * Read Pre-tree for remaining elements of main tree.
2715 if (!lzx_read_pre_tree(strm)) {
2716 ds->state = ST_RD_PRE_LENGTH_TREE;
2719 return (ARCHIVE_OK);
2721 if (!lzx_make_huffman_table(&ds->pt))
2725 case ST_LENGTH_TREE:
2727 * Get path lengths of remaining elements of main tree.
2729 r = lzx_read_bitlen(strm, &ds->lt, -1);
2733 ds->state = ST_LENGTH_TREE;
2736 return (ARCHIVE_OK);
2738 if (!lzx_make_huffman_table(&ds->lt))
2740 ds->state = ST_MAIN;
2745 return (ds->error = ARCHIVE_FAILED);
2749 lzx_decode_blocks(struct lzx_stream *strm, int last)
2751 struct lzx_dec *ds = strm->ds;
2752 struct lzx_br bre = ds->br;
2753 struct huffman *at = &(ds->at), *lt = &(ds->lt), *mt = &(ds->mt);
2754 const struct lzx_pos_tbl *pos_tbl = ds->pos_tbl;
2755 unsigned char *noutp = strm->next_out;
2756 unsigned char *endp = noutp + strm->avail_out;
2757 unsigned char *w_buff = ds->w_buff;
2758 unsigned char *at_bitlen = at->bitlen;
2759 unsigned char *lt_bitlen = lt->bitlen;
2760 unsigned char *mt_bitlen = mt->bitlen;
2761 size_t block_bytes_avail = ds->block_bytes_avail;
2762 int at_max_bits = at->max_bits;
2763 int lt_max_bits = lt->max_bits;
2764 int mt_max_bits = mt->max_bits;
2765 int c, copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2766 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2767 int length_header = ds->length_header;
2768 int offset_bits = ds->offset_bits;
2769 int position_slot = ds->position_slot;
2770 int r0 = ds->r0, r1 = ds->r1, r2 = ds->r2;
2771 int state = ds->state;
2772 char block_type = ds->block_type;
2778 if (block_bytes_avail == 0) {
2779 /* This block ended. */
2780 ds->state = ST_RD_BLOCK_TYPE;
2782 ds->block_bytes_avail =
2784 ds->copy_len = copy_len;
2785 ds->copy_pos = copy_pos;
2786 ds->length_header = length_header;
2787 ds->position_slot = position_slot;
2788 ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
2790 strm->avail_out = endp - noutp;
2791 return (ARCHIVE_EOF);
2794 /* Output buffer is empty. */
2797 if (!lzx_br_read_ahead(strm, &bre,
2801 /* Remaining bits are less than
2802 * maximum bits(mt.max_bits) but maybe
2803 * it still remains as much as we need,
2804 * so we should try to use it with
2806 c = lzx_decode_huffman(mt,
2808 &bre, mt_max_bits));
2809 lzx_br_consume(&bre, mt_bitlen[c]);
2810 if (!lzx_br_has(&bre, 0))
2811 goto failed;/* Over read. */
2813 c = lzx_decode_huffman(mt,
2814 lzx_br_bits(&bre, mt_max_bits));
2815 lzx_br_consume(&bre, mt_bitlen[c]);
2820 * 'c' is exactly literal code.
2822 /* Save a decoded code to reference it
2825 w_pos = (w_pos + 1) & w_mask;
2826 /* Store the decoded code to output buffer. */
2828 block_bytes_avail--;
2831 * Get a match code, its length and offset.
2834 length_header = c & 7;
2835 position_slot = c >> 3;
2841 if (length_header == 7) {
2842 if (!lzx_br_read_ahead(strm, &bre,
2848 c = lzx_decode_huffman(lt,
2850 &bre, lt_max_bits));
2851 lzx_br_consume(&bre, lt_bitlen[c]);
2852 if (!lzx_br_has(&bre, 0))
2853 goto failed;/* Over read. */
2855 c = lzx_decode_huffman(lt,
2856 lzx_br_bits(&bre, lt_max_bits));
2857 lzx_br_consume(&bre, lt_bitlen[c]);
2859 copy_len = c + 7 + 2;
2861 copy_len = length_header + 2;
2862 if ((size_t)copy_len > block_bytes_avail)
2867 switch (position_slot) {
2868 case 0: /* Use repeated offset 0. */
2870 state = ST_REAL_POS;
2872 case 1: /* Use repeated offset 1. */
2874 /* Swap repeated offset. */
2877 state = ST_REAL_POS;
2879 case 2: /* Use repeated offset 2. */
2881 /* Swap repeated offset. */
2884 state = ST_REAL_POS;
2888 pos_tbl[position_slot].footer_bits;
2894 * Get the offset, which is a distance from
2895 * current window position.
2897 if (block_type == ALIGNED_OFFSET_BLOCK &&
2899 int offbits = offset_bits - 3;
2901 if (!lzx_br_read_ahead(strm, &bre, offbits)) {
2907 copy_pos = lzx_br_bits(&bre, offbits) << 3;
2909 /* Get an aligned number. */
2910 if (!lzx_br_read_ahead(strm, &bre,
2911 offbits + at_max_bits)) {
2916 lzx_br_consume(&bre, offbits);
2917 c = lzx_decode_huffman(at,
2918 lzx_br_bits_forced(&bre,
2920 lzx_br_consume(&bre, at_bitlen[c]);
2921 if (!lzx_br_has(&bre, 0))
2922 goto failed;/* Over read. */
2924 lzx_br_consume(&bre, offbits);
2925 c = lzx_decode_huffman(at,
2926 lzx_br_bits(&bre, at_max_bits));
2927 lzx_br_consume(&bre, at_bitlen[c]);
2929 /* Add an aligned number. */
2932 if (!lzx_br_read_ahead(strm, &bre,
2939 copy_pos = lzx_br_bits(&bre, offset_bits);
2940 lzx_br_consume(&bre, offset_bits);
2942 copy_pos += pos_tbl[position_slot].base -2;
2944 /* Update repeated offset LRU queue. */
2951 * Compute a real position in window.
2953 copy_pos = (w_pos - copy_pos) & w_mask;
2957 * Copy several bytes as extracted data from the window
2958 * into the output buffer.
2961 const unsigned char *s;
2965 if (copy_pos > w_pos) {
2966 if (l > w_size - copy_pos)
2967 l = w_size - copy_pos;
2969 if (l > w_size - w_pos)
2972 if (noutp + l >= endp)
2973 l = (int)(endp - noutp);
2974 s = w_buff + copy_pos;
2975 if (l >= 8 && ((copy_pos + l < w_pos)
2976 || (w_pos + l < copy_pos))) {
2977 memcpy(w_buff + w_pos, s, l);
2978 memcpy(noutp, s, l);
2984 for (li = 0; li < l; li++)
2985 noutp[li] = d[li] = s[li];
2988 copy_pos = (copy_pos + l) & w_mask;
2989 w_pos = (w_pos + l) & w_mask;
2990 block_bytes_avail -= l;
2992 /* A copy of current pattern ended. */
2995 if (noutp >= endp) {
2996 /* Output buffer is empty. */
3006 return (ds->error = ARCHIVE_FAILED);
3009 ds->block_bytes_avail = block_bytes_avail;
3010 ds->copy_len = copy_len;
3011 ds->copy_pos = copy_pos;
3012 ds->length_header = length_header;
3013 ds->offset_bits = offset_bits;
3014 ds->position_slot = position_slot;
3015 ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
3018 strm->avail_out = endp - noutp;
3019 return (ARCHIVE_OK);
3023 lzx_read_pre_tree(struct lzx_stream *strm)
3025 struct lzx_dec *ds = strm->ds;
3026 struct lzx_br *br = &(ds->br);
3030 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
3031 for (i = ds->loop; i < ds->pt.len_size; i++) {
3032 if (!lzx_br_read_ahead(strm, br, 4)) {
3036 ds->pt.bitlen[i] = lzx_br_bits(br, 4);
3037 ds->pt.freq[ds->pt.bitlen[i]]++;
3038 lzx_br_consume(br, 4);
3045 * Read a bunch of bit-lengths from pre-tree.
3048 lzx_read_bitlen(struct lzx_stream *strm, struct huffman *d, int end)
3050 struct lzx_dec *ds = strm->ds;
3051 struct lzx_br *br = &(ds->br);
3052 int c, i, j, ret, same;
3057 memset(d->freq, 0, sizeof(d->freq));
3063 if (!lzx_br_read_ahead(strm, br, ds->pt.max_bits))
3065 rbits = lzx_br_bits(br, ds->pt.max_bits);
3066 c = lzx_decode_huffman(&(ds->pt), rbits);
3068 case 17:/* several zero lengths, from 4 to 19. */
3069 if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+4))
3071 lzx_br_consume(br, ds->pt.bitlen[c]);
3072 same = lzx_br_bits(br, 4) + 4;
3074 return (-1);/* Invalid */
3075 lzx_br_consume(br, 4);
3076 for (j = 0; j < same; j++)
3079 case 18:/* many zero lengths, from 20 to 51. */
3080 if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+5))
3082 lzx_br_consume(br, ds->pt.bitlen[c]);
3083 same = lzx_br_bits(br, 5) + 20;
3085 return (-1);/* Invalid */
3086 lzx_br_consume(br, 5);
3087 memset(d->bitlen + i, 0, same);
3090 case 19:/* a few same lengths. */
3091 if (!lzx_br_read_ahead(strm, br,
3092 ds->pt.bitlen[c]+1+ds->pt.max_bits))
3094 lzx_br_consume(br, ds->pt.bitlen[c]);
3095 same = lzx_br_bits(br, 1) + 4;
3098 lzx_br_consume(br, 1);
3099 rbits = lzx_br_bits(br, ds->pt.max_bits);
3100 c = lzx_decode_huffman(&(ds->pt), rbits);
3101 lzx_br_consume(br, ds->pt.bitlen[c]);
3102 c = (d->bitlen[i] - c + 17) % 17;
3104 return (-1);/* Invalid */
3105 for (j = 0; j < same; j++)
3110 lzx_br_consume(br, ds->pt.bitlen[c]);
3111 c = (d->bitlen[i] - c + 17) % 17;
3113 return (-1);/* Invalid */
3126 lzx_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
3130 if (hf->bitlen == NULL || hf->len_size != (int)len_size) {
3132 hf->bitlen = calloc(len_size, sizeof(hf->bitlen[0]));
3133 if (hf->bitlen == NULL)
3134 return (ARCHIVE_FATAL);
3135 hf->len_size = (int)len_size;
3137 memset(hf->bitlen, 0, len_size * sizeof(hf->bitlen[0]));
3138 if (hf->tbl == NULL) {
3139 if (tbl_bits < HTBL_BITS)
3143 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
3144 if (hf->tbl == NULL)
3145 return (ARCHIVE_FATAL);
3146 hf->tbl_bits = tbl_bits;
3148 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
3149 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
3150 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
3151 if (hf->tree == NULL)
3152 return (ARCHIVE_FATAL);
3154 return (ARCHIVE_OK);
3158 lzx_huffman_free(struct huffman *hf)
3166 * Make a huffman coding table.
3169 lzx_make_huffman_table(struct huffman *hf)
3172 const unsigned char *bitlen;
3173 int bitptn[17], weight[17];
3174 int i, maxbits = 0, ptn, tbl_size, w;
3175 int diffbits, len_avail;
3178 * Initialize bit patterns.
3181 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
3185 ptn += hf->freq[i] * w;
3189 if ((ptn & 0xffff) != 0 || maxbits > hf->tbl_bits)
3190 return (0);/* Invalid */
3192 hf->max_bits = maxbits;
3195 * Cut out extra bits which we won't house in the table.
3196 * This preparation reduces the same calculation in the for-loop
3200 int ebits = 16 - maxbits;
3201 for (i = 1; i <= maxbits; i++) {
3202 bitptn[i] >>= ebits;
3203 weight[i] >>= ebits;
3206 if (maxbits > HTBL_BITS) {
3210 diffbits = maxbits - HTBL_BITS;
3211 for (i = 1; i <= HTBL_BITS; i++) {
3212 bitptn[i] >>= diffbits;
3213 weight[i] >>= diffbits;
3215 htbl_max = bitptn[HTBL_BITS] +
3216 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
3217 p = &(hf->tbl[htbl_max]);
3218 while (p < &hf->tbl[1U<<HTBL_BITS])
3222 hf->shift_bits = diffbits;
3227 tbl_size = 1 << HTBL_BITS;
3229 bitlen = hf->bitlen;
3230 len_avail = hf->len_size;
3232 for (i = 0; i < len_avail; i++) {
3241 /* Get a bit pattern */
3245 if (len <= HTBL_BITS) {
3246 /* Calculate next bit pattern */
3247 if ((bitptn[len] = ptn + cnt) > tbl_size)
3248 return (0);/* Invalid */
3249 /* Update the table */
3252 p[cnt] = (uint16_t)i;
3257 * A bit length is too big to be housed to a direct table,
3258 * so we use a tree model for its extra bits.
3260 bitptn[len] = ptn + cnt;
3261 bit = 1U << (diffbits -1);
3262 extlen = len - HTBL_BITS;
3264 p = &(tbl[ptn >> diffbits]);
3266 *p = len_avail + hf->tree_used;
3267 ht = &(hf->tree[hf->tree_used++]);
3268 if (hf->tree_used > hf->tree_avail)
3269 return (0);/* Invalid */
3273 if (*p < len_avail ||
3274 *p >= (len_avail + hf->tree_used))
3275 return (0);/* Invalid */
3276 ht = &(hf->tree[*p - len_avail]);
3278 while (--extlen > 0) {
3280 if (ht->left < len_avail) {
3281 ht->left = len_avail + hf->tree_used;
3282 ht = &(hf->tree[hf->tree_used++]);
3283 if (hf->tree_used > hf->tree_avail)
3284 return (0);/* Invalid */
3288 ht = &(hf->tree[ht->left - len_avail]);
3291 if (ht->right < len_avail) {
3292 ht->right = len_avail + hf->tree_used;
3293 ht = &(hf->tree[hf->tree_used++]);
3294 if (hf->tree_used > hf->tree_avail)
3295 return (0);/* Invalid */
3299 ht = &(hf->tree[ht->right - len_avail]);
3306 return (0);/* Invalid */
3307 ht->left = (uint16_t)i;
3310 return (0);/* Invalid */
3311 ht->right = (uint16_t)i;
3318 lzx_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
3324 extlen = hf->shift_bits;
3325 while (c >= hf->len_size) {
3327 if (extlen-- <= 0 || c >= hf->tree_used)
3329 if (rbits & (1U << extlen))
3338 lzx_decode_huffman(struct huffman *hf, unsigned rbits)
3342 * At first search an index table for a bit pattern.
3343 * If it fails, search a huffman tree for.
3345 c = hf->tbl[rbits >> hf->shift_bits];
3346 if (c < hf->len_size)
3348 /* This bit pattern needs to be found out at a huffman tree. */
3349 return (lzx_decode_huffman_tree(hf, rbits, c));