2 * Copyright (c) 2010-2012 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
45 #include "archive_entry.h"
46 #include "archive_entry_locale.h"
47 #include "archive_private.h"
48 #include "archive_read_private.h"
49 #include "archive_endian.h"
53 /* Decoding status. */
57 * Window to see last decoded data, from 32KBi to 2MBi.
61 /* Window buffer, which is a loop buffer. */
62 unsigned char *w_buff;
63 /* The insert position to the window. */
65 /* The position where we can copy decoded code from the window. */
67 /* The length how many bytes we can copy decoded code from
70 /* Translation reversal for x86 processor CALL byte sequence(E8).
71 * This is used for LZX only. */
72 uint32_t translation_size;
75 #define VERBATIM_BLOCK 1
76 #define ALIGNED_OFFSET_BLOCK 2
77 #define UNCOMPRESSED_BLOCK 3
79 size_t block_bytes_avail;
80 /* Repeated offset. */
82 unsigned char rbytes[4];
96 #define CACHE_TYPE uint64_t
97 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
99 CACHE_TYPE cache_buffer;
100 /* Indicates how many bits avail in cache_buffer. */
112 unsigned char *bitlen;
115 * Use a index table. It's faster than searching a huffman
116 * coding tree, which is a binary tree. But a use of a large
117 * index table causes L1 cache read miss many times.
125 /* Direct access table. */
127 /* Binary tree table for extra bits over the direct access. */
138 static const int slots[] = {
139 30, 32, 34, 36, 38, 42, 50, 66, 98, 162, 290
142 #define SLOT_MAX 21/*->25*/
145 const unsigned char *next_in;
148 unsigned char *next_out;
155 * Cabinet file definitions.
157 /* CFHEADER offset */
158 #define CFHEADER_signature 0
159 #define CFHEADER_cbCabinet 8
160 #define CFHEADER_coffFiles 16
161 #define CFHEADER_versionMinor 24
162 #define CFHEADER_versionMajor 25
163 #define CFHEADER_cFolders 26
164 #define CFHEADER_cFiles 28
165 #define CFHEADER_flags 30
166 #define CFHEADER_setID 32
167 #define CFHEADER_iCabinet 34
168 #define CFHEADER_cbCFHeader 36
169 #define CFHEADER_cbCFFolder 38
170 #define CFHEADER_cbCFData 39
172 /* CFFOLDER offset */
173 #define CFFOLDER_coffCabStart 0
174 #define CFFOLDER_cCFData 4
175 #define CFFOLDER_typeCompress 6
176 #define CFFOLDER_abReserve 8
179 #define CFFILE_cbFile 0
180 #define CFFILE_uoffFolderStart 4
181 #define CFFILE_iFolder 8
182 #define CFFILE_date_time 10
183 #define CFFILE_attribs 14
186 #define CFDATA_csum 0
187 #define CFDATA_cbData 4
188 #define CFDATA_cbUncomp 6
190 static const char *compression_name[] = {
198 /* Sum value of this CFDATA. */
200 uint16_t compressed_size;
201 uint16_t compressed_bytes_remaining;
202 uint16_t uncompressed_size;
203 uint16_t uncompressed_bytes_remaining;
204 /* To know how many bytes we have decompressed. */
205 uint16_t uncompressed_avail;
206 /* Offset from the beginning of compressed data of this CFDATA */
207 uint16_t read_offset;
209 /* To keep memory image of this CFDATA to compute the sum. */
210 size_t memimage_size;
211 unsigned char *memimage;
212 /* Result of calculation of sum. */
213 uint32_t sum_calculated;
214 unsigned char sum_extra[4];
220 uint32_t cfdata_offset_in_cab;
221 uint16_t cfdata_count;
223 #define COMPTYPE_NONE 0x0000
224 #define COMPTYPE_MSZIP 0x0001
225 #define COMPTYPE_QUANTUM 0x0002
226 #define COMPTYPE_LZX 0x0003
228 const char *compname;
229 /* At the time reading CFDATA */
230 struct cfdata cfdata;
232 /* Flags to mark progress of decompression. */
233 char decompress_init;
237 uint32_t uncompressed_size;
241 #define iFoldCONTINUED_FROM_PREV 0xFFFD
242 #define iFoldCONTINUED_TO_NEXT 0xFFFE
243 #define iFoldCONTINUED_PREV_AND_NEXT 0xFFFF
245 #define ATTR_RDONLY 0x01
246 #define ATTR_NAME_IS_UTF 0x80
247 struct archive_string pathname;
251 /* Total bytes of all file size in a Cabinet. */
252 uint32_t total_bytes;
253 uint32_t files_offset;
254 uint16_t folder_count;
257 #define PREV_CABINET 0x0001
258 #define NEXT_CABINET 0x0002
259 #define RESERVE_PRESENT 0x0004
262 /* Version number. */
265 unsigned char cffolder;
266 unsigned char cfdata;
267 /* All folders in a cabinet. */
268 struct cffolder *folder_array;
269 /* All files in a cabinet. */
270 struct cffile *file_array;
275 /* entry_bytes_remaining is the number of bytes we expect. */
276 int64_t entry_offset;
277 int64_t entry_bytes_remaining;
278 int64_t entry_unconsumed;
279 int64_t entry_compressed_bytes_read;
280 int64_t entry_uncompressed_bytes_read;
281 struct cffolder *entry_cffolder;
282 struct cffile *entry_cffile;
283 struct cfdata *entry_cfdata;
285 /* Offset from beginning of a cabinet file. */
287 struct cfheader cfheader;
288 struct archive_wstring ws;
290 /* Flag to mark progress that an archive was read their first header.*/
294 char end_of_entry_cleanup;
295 char read_data_invoked;
296 int64_t bytes_skipped;
298 unsigned char *uncompressed_buffer;
299 size_t uncompressed_buffer_size;
301 int init_default_conversion;
302 struct archive_string_conv *sconv;
303 struct archive_string_conv *sconv_default;
304 struct archive_string_conv *sconv_utf8;
305 char format_name[64];
311 struct lzx_stream xstrm;
314 static int archive_read_format_cab_bid(struct archive_read *, int);
315 static int archive_read_format_cab_options(struct archive_read *,
316 const char *, const char *);
317 static int archive_read_format_cab_read_header(struct archive_read *,
318 struct archive_entry *);
319 static int archive_read_format_cab_read_data(struct archive_read *,
320 const void **, size_t *, int64_t *);
321 static int archive_read_format_cab_read_data_skip(struct archive_read *);
322 static int archive_read_format_cab_cleanup(struct archive_read *);
324 static int cab_skip_sfx(struct archive_read *);
325 static time_t cab_dos_time(const unsigned char *);
326 static int cab_read_data(struct archive_read *, const void **,
327 size_t *, int64_t *);
328 static int cab_read_header(struct archive_read *);
329 static uint32_t cab_checksum_cfdata_4(const void *, size_t bytes, uint32_t);
330 static uint32_t cab_checksum_cfdata(const void *, size_t bytes, uint32_t);
331 static void cab_checksum_update(struct archive_read *, size_t);
332 static int cab_checksum_finish(struct archive_read *);
333 static int cab_next_cfdata(struct archive_read *);
334 static const void *cab_read_ahead_cfdata(struct archive_read *, ssize_t *);
335 static const void *cab_read_ahead_cfdata_none(struct archive_read *, ssize_t *);
336 static const void *cab_read_ahead_cfdata_deflate(struct archive_read *,
338 static const void *cab_read_ahead_cfdata_lzx(struct archive_read *,
340 static int64_t cab_consume_cfdata(struct archive_read *, int64_t);
341 static int64_t cab_minimum_consume_cfdata(struct archive_read *, int64_t);
342 static int lzx_decode_init(struct lzx_stream *, int);
343 static int lzx_read_blocks(struct lzx_stream *, int);
344 static int lzx_decode_blocks(struct lzx_stream *, int);
345 static void lzx_decode_free(struct lzx_stream *);
346 static void lzx_translation(struct lzx_stream *, void *, size_t, uint32_t);
347 static void lzx_cleanup_bitstream(struct lzx_stream *);
348 static int lzx_decode(struct lzx_stream *, int);
349 static int lzx_read_pre_tree(struct lzx_stream *);
350 static int lzx_read_bitlen(struct lzx_stream *, struct huffman *, int);
351 static int lzx_huffman_init(struct huffman *, size_t, int);
352 static void lzx_huffman_free(struct huffman *);
353 static int lzx_make_huffman_table(struct huffman *);
354 static inline int lzx_decode_huffman(struct huffman *, unsigned);
355 static int lzx_decode_huffman_tree(struct huffman *, unsigned, int);
359 archive_read_support_format_cab(struct archive *_a)
361 struct archive_read *a = (struct archive_read *)_a;
365 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
366 ARCHIVE_STATE_NEW, "archive_read_support_format_cab");
368 cab = (struct cab *)calloc(1, sizeof(*cab));
370 archive_set_error(&a->archive, ENOMEM,
371 "Can't allocate CAB data");
372 return (ARCHIVE_FATAL);
374 archive_string_init(&cab->ws);
375 archive_wstring_ensure(&cab->ws, 256);
377 r = __archive_read_register_format(a,
380 archive_read_format_cab_bid,
381 archive_read_format_cab_options,
382 archive_read_format_cab_read_header,
383 archive_read_format_cab_read_data,
384 archive_read_format_cab_read_data_skip,
386 archive_read_format_cab_cleanup,
396 find_cab_magic(const char *p)
401 * Note: Self-Extraction program has 'MSCF' string in their
402 * program. If we were finding 'MSCF' string only, we got
403 * wrong place for Cabinet header, thus, we have to check
404 * following four bytes which are reserved and must be set
407 if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
419 archive_read_format_cab_bid(struct archive_read *a, int best_bid)
422 ssize_t bytes_avail, offset, window;
424 /* If there's already a better bid than we can ever
425 make, don't bother testing. */
429 if ((p = __archive_read_ahead(a, 8, NULL)) == NULL)
432 if (memcmp(p, "MSCF\0\0\0\0", 8) == 0)
436 * Attempt to handle self-extracting archives
437 * by noting a PE header and searching forward
438 * up to 128k for a 'MSCF' marker.
440 if (p[0] == 'M' && p[1] == 'Z') {
443 while (offset < (1024 * 128)) {
444 const char *h = __archive_read_ahead(a, offset + window,
447 /* Remaining bytes are less than window. */
454 while (p + 8 < h + bytes_avail) {
456 if ((next = find_cab_magic(p)) == 0)
467 archive_read_format_cab_options(struct archive_read *a,
468 const char *key, const char *val)
471 int ret = ARCHIVE_FAILED;
473 cab = (struct cab *)(a->format->data);
474 if (strcmp(key, "hdrcharset") == 0) {
475 if (val == NULL || val[0] == 0)
476 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
477 "cab: hdrcharset option needs a character-set name");
479 cab->sconv = archive_string_conversion_from_charset(
480 &a->archive, val, 0);
481 if (cab->sconv != NULL)
489 /* Note: The "warn" return is just to inform the options
490 * supervisor that we didn't handle it. It will generate
491 * a suitable error if no one used this option. */
492 return (ARCHIVE_WARN);
496 cab_skip_sfx(struct archive_read *a)
500 ssize_t bytes, window;
504 const char *h = __archive_read_ahead(a, window, &bytes);
506 /* Remaining size are less than window. */
509 archive_set_error(&a->archive,
510 ARCHIVE_ERRNO_FILE_FORMAT,
511 "Couldn't find out CAB header");
512 return (ARCHIVE_FATAL);
520 * Scan ahead until we find something that looks
521 * like the cab header.
525 if ((next = find_cab_magic(p)) == 0) {
527 __archive_read_consume(a, skip);
533 __archive_read_consume(a, skip);
538 truncated_error(struct archive_read *a)
540 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
541 "Truncated CAB header");
542 return (ARCHIVE_FATAL);
546 cab_strnlen(const unsigned char *p, size_t maxlen)
550 for (i = 0; i <= maxlen; i++) {
555 return (-1);/* invalid */
559 /* Read bytes as much as remaining. */
561 cab_read_ahead_remaining(struct archive_read *a, size_t min, ssize_t *avail)
566 p = __archive_read_ahead(a, min, avail);
574 /* Convert a path separator '\' -> '/' */
576 cab_convert_path_separator_1(struct archive_string *fn, unsigned char attr)
581 /* Easy check if we have '\' in multi-byte string. */
583 for (i = 0; i < archive_strlen(fn); i++) {
584 if (fn->s[i] == '\\') {
586 /* This may be second byte of multi-byte
592 } else if ((fn->s[i] & 0x80) && !(attr & ATTR_NAME_IS_UTF))
597 if (i == archive_strlen(fn))
603 * Replace a character '\' with '/' in wide character.
606 cab_convert_path_separator_2(struct cab *cab, struct archive_entry *entry)
611 /* If a conversion to wide character failed, force the replacement. */
612 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
613 archive_wstrcpy(&(cab->ws), wp);
614 for (i = 0; i < archive_strlen(&(cab->ws)); i++) {
615 if (cab->ws.s[i] == L'\\')
618 archive_entry_copy_pathname_w(entry, cab->ws.s);
623 * Read CFHEADER, CFFOLDER and CFFILE.
626 cab_read_header(struct archive_read *a)
628 const unsigned char *p;
635 int cur_folder, prev_folder;
638 a->archive.archive_format = ARCHIVE_FORMAT_CAB;
639 if (a->archive.archive_format_name == NULL)
640 a->archive.archive_format_name = "CAB";
642 if ((p = __archive_read_ahead(a, 42, NULL)) == NULL)
643 return (truncated_error(a));
645 cab = (struct cab *)(a->format->data);
646 if (cab->found_header == 0 &&
647 p[0] == 'M' && p[1] == 'Z') {
648 /* This is an executable? Must be self-extracting... */
649 err = cab_skip_sfx(a);
650 if (err < ARCHIVE_WARN)
653 /* Re-read header after processing the SFX. */
654 if ((p = __archive_read_ahead(a, 42, NULL)) == NULL)
655 return (truncated_error(a));
663 if (p[CFHEADER_signature+0] != 'M' || p[CFHEADER_signature+1] != 'S' ||
664 p[CFHEADER_signature+2] != 'C' || p[CFHEADER_signature+3] != 'F') {
665 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
666 "Couldn't find out CAB header");
667 return (ARCHIVE_FATAL);
669 hd->total_bytes = archive_le32dec(p + CFHEADER_cbCabinet);
670 hd->files_offset = archive_le32dec(p + CFHEADER_coffFiles);
671 hd->minor = p[CFHEADER_versionMinor];
672 hd->major = p[CFHEADER_versionMajor];
673 hd->folder_count = archive_le16dec(p + CFHEADER_cFolders);
674 if (hd->folder_count == 0)
676 hd->file_count = archive_le16dec(p + CFHEADER_cFiles);
677 if (hd->file_count == 0)
679 hd->flags = archive_le16dec(p + CFHEADER_flags);
680 hd->setid = archive_le16dec(p + CFHEADER_setID);
681 hd->cabinet = archive_le16dec(p + CFHEADER_iCabinet);
682 used = CFHEADER_iCabinet + 2;
683 if (hd->flags & RESERVE_PRESENT) {
685 cfheader = archive_le16dec(p + CFHEADER_cbCFHeader);
686 if (cfheader > 60000U)
688 hd->cffolder = p[CFHEADER_cbCFFolder];
689 hd->cfdata = p[CFHEADER_cbCFData];
690 used += 4;/* cbCFHeader, cbCFFolder and cbCFData */
691 used += cfheader;/* abReserve */
693 hd->cffolder = 0;/* Avoid compiling warning. */
694 if (hd->flags & PREV_CABINET) {
695 /* How many bytes are used for szCabinetPrev. */
696 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
697 return (truncated_error(a));
698 if ((len = cab_strnlen(p + used, 255)) <= 0)
701 /* How many bytes are used for szDiskPrev. */
702 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
703 return (truncated_error(a));
704 if ((len = cab_strnlen(p + used, 255)) <= 0)
708 if (hd->flags & NEXT_CABINET) {
709 /* How many bytes are used for szCabinetNext. */
710 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
711 return (truncated_error(a));
712 if ((len = cab_strnlen(p + used, 255)) <= 0)
715 /* How many bytes are used for szDiskNext. */
716 if ((p = __archive_read_ahead(a, used+256, NULL)) == NULL)
717 return (truncated_error(a));
718 if ((len = cab_strnlen(p + used, 255)) <= 0)
722 __archive_read_consume(a, used);
723 cab->cab_offset += used;
729 hd->folder_array = (struct cffolder *)calloc(
730 hd->folder_count, sizeof(struct cffolder));
731 if (hd->folder_array == NULL)
735 if (hd->flags & RESERVE_PRESENT)
736 bytes += hd->cffolder;
737 bytes *= hd->folder_count;
738 if ((p = __archive_read_ahead(a, bytes, NULL)) == NULL)
739 return (truncated_error(a));
741 for (i = 0; i < hd->folder_count; i++) {
742 struct cffolder *folder = &(hd->folder_array[i]);
743 folder->cfdata_offset_in_cab =
744 archive_le32dec(p + CFFOLDER_coffCabStart);
745 folder->cfdata_count = archive_le16dec(p+CFFOLDER_cCFData);
747 archive_le16dec(p+CFFOLDER_typeCompress) & 0x0F;
749 archive_le16dec(p+CFFOLDER_typeCompress) >> 8;
750 /* Get a compression name. */
751 if (folder->comptype <
752 sizeof(compression_name) / sizeof(compression_name[0]))
753 folder->compname = compression_name[folder->comptype];
755 folder->compname = "UNKNOWN";
758 if (hd->flags & RESERVE_PRESENT) {
759 p += hd->cffolder;/* abReserve */
760 used += hd->cffolder;
763 * Sanity check if each data is acceptable.
765 if (offset32 >= folder->cfdata_offset_in_cab)
767 offset32 = folder->cfdata_offset_in_cab;
769 /* Set a request to initialize zlib for the CFDATA of
771 folder->decompress_init = 0;
773 __archive_read_consume(a, used);
774 cab->cab_offset += used;
779 /* Seek read pointer to the offset of CFFILE if needed. */
780 skip = (int64_t)hd->files_offset - cab->cab_offset;
782 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
783 "Invalid offset of CFFILE %jd < %jd",
784 (intmax_t)hd->files_offset, (intmax_t)cab->cab_offset);
785 return (ARCHIVE_FATAL);
788 __archive_read_consume(a, skip);
789 cab->cab_offset += skip;
791 /* Allocate memory for CFDATA */
792 hd->file_array = (struct cffile *)calloc(
793 hd->file_count, sizeof(struct cffile));
794 if (hd->file_array == NULL)
798 for (i = 0; i < hd->file_count; i++) {
799 struct cffile *file = &(hd->file_array[i]);
802 if ((p = __archive_read_ahead(a, 16, NULL)) == NULL)
803 return (truncated_error(a));
804 file->uncompressed_size = archive_le32dec(p + CFFILE_cbFile);
805 file->offset = archive_le32dec(p + CFFILE_uoffFolderStart);
806 file->folder = archive_le16dec(p + CFFILE_iFolder);
807 file->mtime = cab_dos_time(p + CFFILE_date_time);
808 file->attr = (uint8_t)archive_le16dec(p + CFFILE_attribs);
809 __archive_read_consume(a, 16);
811 cab->cab_offset += 16;
812 if ((p = cab_read_ahead_remaining(a, 256, &avail)) == NULL)
813 return (truncated_error(a));
814 if ((len = cab_strnlen(p, avail-1)) <= 0)
817 /* Copy a pathname. */
818 archive_string_init(&(file->pathname));
819 archive_strncpy(&(file->pathname), p, len);
820 __archive_read_consume(a, len + 1);
821 cab->cab_offset += len + 1;
824 * Sanity check if each data is acceptable.
826 if (file->uncompressed_size > 0x7FFF8000)
827 goto invalid;/* Too large */
828 if ((int64_t)file->offset + (int64_t)file->uncompressed_size
829 > ARCHIVE_LITERAL_LL(0x7FFF8000))
830 goto invalid;/* Too large */
831 switch (file->folder) {
832 case iFoldCONTINUED_TO_NEXT:
833 /* This must be last file in a folder. */
834 if (i != hd->file_count -1)
836 cur_folder = hd->folder_count -1;
838 case iFoldCONTINUED_PREV_AND_NEXT:
839 /* This must be only one file in a folder. */
840 if (hd->file_count != 1)
843 case iFoldCONTINUED_FROM_PREV:
844 /* This must be first file in a folder. */
847 prev_folder = cur_folder = 0;
848 offset32 = file->offset;
851 if (file->folder >= hd->folder_count)
853 cur_folder = file->folder;
856 /* Dot not back track. */
857 if (cur_folder < prev_folder)
859 if (cur_folder != prev_folder)
861 prev_folder = cur_folder;
863 /* Make sure there are not any blanks from last file
865 if (offset32 != file->offset)
867 offset32 += file->uncompressed_size;
869 /* CFDATA is available for file contents. */
870 if (file->uncompressed_size > 0 &&
871 hd->folder_array[cur_folder].cfdata_count == 0)
875 if (hd->cabinet != 0 || hd->flags & (PREV_CABINET | NEXT_CABINET)) {
876 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
877 "Multivolume cabinet file is unsupported");
878 return (ARCHIVE_WARN);
882 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
883 "Invalid CAB header");
884 return (ARCHIVE_FATAL);
886 archive_set_error(&a->archive, ENOMEM,
887 "Can't allocate memory for CAB data");
888 return (ARCHIVE_FATAL);
892 archive_read_format_cab_read_header(struct archive_read *a,
893 struct archive_entry *entry)
897 struct cffolder *prev_folder;
899 struct archive_string_conv *sconv;
900 int err = ARCHIVE_OK, r;
902 cab = (struct cab *)(a->format->data);
903 if (cab->found_header == 0) {
904 err = cab_read_header(a);
905 if (err < ARCHIVE_WARN)
907 /* We've found the header. */
908 cab->found_header = 1;
912 if (hd->file_index >= hd->file_count) {
913 cab->end_of_archive = 1;
914 return (ARCHIVE_EOF);
916 file = &hd->file_array[hd->file_index++];
918 cab->end_of_entry = 0;
919 cab->end_of_entry_cleanup = 0;
920 cab->entry_compressed_bytes_read = 0;
921 cab->entry_uncompressed_bytes_read = 0;
922 cab->entry_unconsumed = 0;
923 cab->entry_cffile = file;
926 * Choose a proper folder.
928 prev_folder = cab->entry_cffolder;
929 switch (file->folder) {
930 case iFoldCONTINUED_FROM_PREV:
931 case iFoldCONTINUED_PREV_AND_NEXT:
932 cab->entry_cffolder = &hd->folder_array[0];
934 case iFoldCONTINUED_TO_NEXT:
935 cab->entry_cffolder = &hd->folder_array[hd->folder_count-1];
938 cab->entry_cffolder = &hd->folder_array[file->folder];
941 /* If a cffolder of this file is changed, reset a cfdata to read
942 * file contents from next cfdata. */
943 if (prev_folder != cab->entry_cffolder)
944 cab->entry_cfdata = NULL;
946 /* If a pathname is UTF-8, prepare a string conversion object
947 * for UTF-8 and use it. */
948 if (file->attr & ATTR_NAME_IS_UTF) {
949 if (cab->sconv_utf8 == NULL) {
951 archive_string_conversion_from_charset(
952 &(a->archive), "UTF-8", 1);
953 if (cab->sconv_utf8 == NULL)
954 return (ARCHIVE_FATAL);
956 sconv = cab->sconv_utf8;
957 } else if (cab->sconv != NULL) {
958 /* Choose the conversion specified by the option. */
961 /* Choose the default conversion. */
962 if (!cab->init_default_conversion) {
964 archive_string_default_conversion_for_read(
966 cab->init_default_conversion = 1;
968 sconv = cab->sconv_default;
972 * Set a default value and common data
974 r = cab_convert_path_separator_1(&(file->pathname), file->attr);
975 if (archive_entry_copy_pathname_l(entry, file->pathname.s,
976 archive_strlen(&(file->pathname)), sconv) != 0) {
977 if (errno == ENOMEM) {
978 archive_set_error(&a->archive, ENOMEM,
979 "Can't allocate memory for Pathname");
980 return (ARCHIVE_FATAL);
982 archive_set_error(&a->archive,
983 ARCHIVE_ERRNO_FILE_FORMAT,
984 "Pathname cannot be converted "
985 "from %s to current locale.",
986 archive_string_conversion_charset_name(sconv));
990 /* Convert a path separator '\' -> '/' */
991 cab_convert_path_separator_2(cab, entry);
994 archive_entry_set_size(entry, file->uncompressed_size);
995 if (file->attr & ATTR_RDONLY)
996 archive_entry_set_mode(entry, AE_IFREG | 0555);
998 archive_entry_set_mode(entry, AE_IFREG | 0666);
999 archive_entry_set_mtime(entry, file->mtime, 0);
1001 cab->entry_bytes_remaining = file->uncompressed_size;
1002 cab->entry_offset = 0;
1003 /* We don't need compress data. */
1004 if (file->uncompressed_size == 0)
1005 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
1007 /* Set up a more descriptive format name. */
1008 sprintf(cab->format_name, "CAB %d.%d (%s)",
1009 hd->major, hd->minor, cab->entry_cffolder->compname);
1010 a->archive.archive_format_name = cab->format_name;
1016 archive_read_format_cab_read_data(struct archive_read *a,
1017 const void **buff, size_t *size, int64_t *offset)
1019 struct cab *cab = (struct cab *)(a->format->data);
1022 switch (cab->entry_cffile->folder) {
1023 case iFoldCONTINUED_FROM_PREV:
1024 case iFoldCONTINUED_TO_NEXT:
1025 case iFoldCONTINUED_PREV_AND_NEXT:
1029 archive_clear_error(&a->archive);
1030 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1031 "Cannot restore this file split in multivolume.");
1032 return (ARCHIVE_FAILED);
1036 if (cab->read_data_invoked == 0) {
1037 if (cab->bytes_skipped) {
1038 if (cab->entry_cfdata == NULL) {
1039 r = cab_next_cfdata(a);
1043 if (cab_consume_cfdata(a, cab->bytes_skipped) < 0)
1044 return (ARCHIVE_FATAL);
1045 cab->bytes_skipped = 0;
1047 cab->read_data_invoked = 1;
1049 if (cab->entry_unconsumed) {
1050 /* Consume as much as the compressor actually used. */
1051 r = (int)cab_consume_cfdata(a, cab->entry_unconsumed);
1052 cab->entry_unconsumed = 0;
1056 if (cab->end_of_archive || cab->end_of_entry) {
1057 if (!cab->end_of_entry_cleanup) {
1058 /* End-of-entry cleanup done. */
1059 cab->end_of_entry_cleanup = 1;
1061 *offset = cab->entry_offset;
1064 return (ARCHIVE_EOF);
1067 return (cab_read_data(a, buff, size, offset));
1071 cab_checksum_cfdata_4(const void *p, size_t bytes, uint32_t seed)
1073 const unsigned char *b;
1077 u32num = (unsigned)bytes / 4;
1080 for (;u32num > 0; --u32num) {
1081 sum ^= archive_le32dec(b);
1088 cab_checksum_cfdata(const void *p, size_t bytes, uint32_t seed)
1090 const unsigned char *b;
1094 sum = cab_checksum_cfdata_4(p, bytes, seed);
1098 switch (bytes & 3) {
1100 t |= ((uint32_t)(*b++)) << 16;
1103 t |= ((uint32_t)(*b++)) << 8;
1117 cab_checksum_update(struct archive_read *a, size_t bytes)
1119 struct cab *cab = (struct cab *)(a->format->data);
1120 struct cfdata *cfdata = cab->entry_cfdata;
1121 const unsigned char *p;
1124 if (cfdata->sum == 0 || cfdata->sum_ptr == NULL)
1127 * Calculate the sum of this CFDATA.
1128 * Make sure CFDATA must be calculated in four bytes.
1130 p = cfdata->sum_ptr;
1132 if (cfdata->sum_extra_avail) {
1133 while (cfdata->sum_extra_avail < 4 && sumbytes > 0) {
1135 cfdata->sum_extra_avail++] = *p++;
1138 if (cfdata->sum_extra_avail == 4) {
1139 cfdata->sum_calculated = cab_checksum_cfdata_4(
1140 cfdata->sum_extra, 4, cfdata->sum_calculated);
1141 cfdata->sum_extra_avail = 0;
1145 int odd = sumbytes & 3;
1146 if (sumbytes - odd > 0)
1147 cfdata->sum_calculated = cab_checksum_cfdata_4(
1148 p, sumbytes - odd, cfdata->sum_calculated);
1150 memcpy(cfdata->sum_extra, p + sumbytes - odd, odd);
1151 cfdata->sum_extra_avail = odd;
1153 cfdata->sum_ptr = NULL;
1157 cab_checksum_finish(struct archive_read *a)
1159 struct cab *cab = (struct cab *)(a->format->data);
1160 struct cfdata *cfdata = cab->entry_cfdata;
1163 /* Do not need to compute a sum. */
1164 if (cfdata->sum == 0)
1165 return (ARCHIVE_OK);
1168 * Calculate the sum of remaining CFDATA.
1170 if (cfdata->sum_extra_avail) {
1171 cfdata->sum_calculated =
1172 cab_checksum_cfdata(cfdata->sum_extra,
1173 cfdata->sum_extra_avail, cfdata->sum_calculated);
1174 cfdata->sum_extra_avail = 0;
1178 if (cab->cfheader.flags & RESERVE_PRESENT)
1179 l += cab->cfheader.cfdata;
1180 cfdata->sum_calculated = cab_checksum_cfdata(
1181 cfdata->memimage + CFDATA_cbData, l, cfdata->sum_calculated);
1182 if (cfdata->sum_calculated != cfdata->sum) {
1183 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1184 "Checksum error CFDATA[%d] %x:%x in %d bytes",
1185 cab->entry_cffolder->cfdata_index -1,
1186 cfdata->sum, cfdata->sum_calculated,
1187 cfdata->compressed_size);
1188 return (ARCHIVE_FAILED);
1190 return (ARCHIVE_OK);
1194 * Read CFDATA if needed.
1197 cab_next_cfdata(struct archive_read *a)
1199 struct cab *cab = (struct cab *)(a->format->data);
1200 struct cfdata *cfdata = cab->entry_cfdata;
1202 /* There are remaining bytes in current CFDATA, use it first. */
1203 if (cfdata != NULL && cfdata->uncompressed_bytes_remaining > 0)
1204 return (ARCHIVE_OK);
1206 if (cfdata == NULL) {
1209 cab->entry_cffolder->cfdata_index = 0;
1211 /* Seek read pointer to the offset of CFDATA if needed. */
1212 skip = cab->entry_cffolder->cfdata_offset_in_cab
1216 switch (cab->entry_cffile->folder) {
1217 case iFoldCONTINUED_FROM_PREV:
1218 case iFoldCONTINUED_PREV_AND_NEXT:
1221 case iFoldCONTINUED_TO_NEXT:
1222 folder_index = cab->cfheader.folder_count-1;
1225 folder_index = cab->entry_cffile->folder;
1228 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1229 "Invalid offset of CFDATA in folder(%d) %jd < %jd",
1231 (intmax_t)cab->entry_cffolder->cfdata_offset_in_cab,
1232 (intmax_t)cab->cab_offset);
1233 return (ARCHIVE_FATAL);
1236 if (__archive_read_consume(a, skip) < 0)
1237 return (ARCHIVE_FATAL);
1239 cab->entry_cffolder->cfdata_offset_in_cab;
1246 if (cab->entry_cffolder->cfdata_index <
1247 cab->entry_cffolder->cfdata_count) {
1248 const unsigned char *p;
1251 cfdata = &(cab->entry_cffolder->cfdata);
1252 cab->entry_cffolder->cfdata_index++;
1253 cab->entry_cfdata = cfdata;
1254 cfdata->sum_calculated = 0;
1255 cfdata->sum_extra_avail = 0;
1256 cfdata->sum_ptr = NULL;
1258 if (cab->cfheader.flags & RESERVE_PRESENT)
1259 l += cab->cfheader.cfdata;
1260 if ((p = __archive_read_ahead(a, l, NULL)) == NULL)
1261 return (truncated_error(a));
1262 cfdata->sum = archive_le32dec(p + CFDATA_csum);
1263 cfdata->compressed_size = archive_le16dec(p + CFDATA_cbData);
1264 cfdata->compressed_bytes_remaining = cfdata->compressed_size;
1265 cfdata->uncompressed_size =
1266 archive_le16dec(p + CFDATA_cbUncomp);
1267 cfdata->uncompressed_bytes_remaining =
1268 cfdata->uncompressed_size;
1269 cfdata->uncompressed_avail = 0;
1270 cfdata->read_offset = 0;
1271 cfdata->unconsumed = 0;
1274 * Sanity check if data size is acceptable.
1276 if (cfdata->compressed_size == 0 ||
1277 cfdata->compressed_size > (0x8000+6144))
1279 if (cfdata->uncompressed_size > 0x8000)
1281 if (cfdata->uncompressed_size == 0) {
1282 switch (cab->entry_cffile->folder) {
1283 case iFoldCONTINUED_PREV_AND_NEXT:
1284 case iFoldCONTINUED_TO_NEXT:
1286 case iFoldCONTINUED_FROM_PREV:
1291 /* If CFDATA is not last in a folder, an uncompressed
1292 * size must be 0x8000(32KBi) */
1293 if ((cab->entry_cffolder->cfdata_index <
1294 cab->entry_cffolder->cfdata_count) &&
1295 cfdata->uncompressed_size != 0x8000)
1298 /* A compressed data size and an uncompressed data size must
1299 * be the same in no compression mode. */
1300 if (cab->entry_cffolder->comptype == COMPTYPE_NONE &&
1301 cfdata->compressed_size != cfdata->uncompressed_size)
1305 * Save CFDATA image for sum check.
1307 if (cfdata->memimage_size < (size_t)l) {
1308 free(cfdata->memimage);
1309 cfdata->memimage = malloc(l);
1310 if (cfdata->memimage == NULL) {
1311 archive_set_error(&a->archive, ENOMEM,
1312 "Can't allocate memory for CAB data");
1313 return (ARCHIVE_FATAL);
1315 cfdata->memimage_size = l;
1317 memcpy(cfdata->memimage, p, l);
1319 /* Consume bytes as much as we used. */
1320 __archive_read_consume(a, l);
1321 cab->cab_offset += l;
1322 } else if (cab->entry_cffolder->cfdata_count > 0) {
1323 /* Run out of all CFDATA in a folder. */
1324 cfdata->compressed_size = 0;
1325 cfdata->uncompressed_size = 0;
1326 cfdata->compressed_bytes_remaining = 0;
1327 cfdata->uncompressed_bytes_remaining = 0;
1329 /* Current folder does not have any CFDATA. */
1330 cfdata = &(cab->entry_cffolder->cfdata);
1331 cab->entry_cfdata = cfdata;
1332 memset(cfdata, 0, sizeof(*cfdata));
1334 return (ARCHIVE_OK);
1336 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1338 return (ARCHIVE_FATAL);
1342 * Read ahead CFDATA.
1345 cab_read_ahead_cfdata(struct archive_read *a, ssize_t *avail)
1347 struct cab *cab = (struct cab *)(a->format->data);
1350 err = cab_next_cfdata(a);
1351 if (err < ARCHIVE_OK) {
1356 switch (cab->entry_cffolder->comptype) {
1358 return (cab_read_ahead_cfdata_none(a, avail));
1359 case COMPTYPE_MSZIP:
1360 return (cab_read_ahead_cfdata_deflate(a, avail));
1362 return (cab_read_ahead_cfdata_lzx(a, avail));
1363 default: /* Unsupported compression. */
1364 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1365 "Unsupported CAB compression : %s",
1366 cab->entry_cffolder->compname);
1367 *avail = ARCHIVE_FAILED;
1373 * Read ahead CFDATA as uncompressed data.
1376 cab_read_ahead_cfdata_none(struct archive_read *a, ssize_t *avail)
1378 struct cab *cab = (struct cab *)(a->format->data);
1379 struct cfdata *cfdata;
1382 cfdata = cab->entry_cfdata;
1385 * Note: '1' here is a performance optimization.
1386 * Recall that the decompression layer returns a count of
1387 * available bytes; asking for more than that forces the
1388 * decompressor to combine reads by copying data.
1390 d = __archive_read_ahead(a, 1, avail);
1392 *avail = truncated_error(a);
1395 if (*avail > cfdata->uncompressed_bytes_remaining)
1396 *avail = cfdata->uncompressed_bytes_remaining;
1397 cfdata->uncompressed_avail = cfdata->uncompressed_size;
1398 cfdata->unconsumed = *avail;
1399 cfdata->sum_ptr = d;
1404 * Read ahead CFDATA as deflate data.
1408 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1410 struct cab *cab = (struct cab *)(a->format->data);
1411 struct cfdata *cfdata;
1417 cfdata = cab->entry_cfdata;
1418 /* If the buffer hasn't been allocated, allocate it now. */
1419 if (cab->uncompressed_buffer == NULL) {
1420 cab->uncompressed_buffer_size = 0x8000;
1421 cab->uncompressed_buffer
1422 = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1423 if (cab->uncompressed_buffer == NULL) {
1424 archive_set_error(&a->archive, ENOMEM,
1425 "No memory for CAB reader");
1426 *avail = ARCHIVE_FATAL;
1431 uavail = cfdata->uncompressed_avail;
1432 if (uavail == cfdata->uncompressed_size) {
1433 d = cab->uncompressed_buffer + cfdata->read_offset;
1434 *avail = uavail - cfdata->read_offset;
1438 if (!cab->entry_cffolder->decompress_init) {
1439 cab->stream.next_in = NULL;
1440 cab->stream.avail_in = 0;
1441 cab->stream.total_in = 0;
1442 cab->stream.next_out = NULL;
1443 cab->stream.avail_out = 0;
1444 cab->stream.total_out = 0;
1445 if (cab->stream_valid)
1446 r = inflateReset(&cab->stream);
1448 r = inflateInit2(&cab->stream,
1449 -15 /* Don't check for zlib header */);
1451 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1452 "Can't initialize deflate decompression.");
1453 *avail = ARCHIVE_FATAL;
1456 /* Stream structure has been set up. */
1457 cab->stream_valid = 1;
1458 /* We've initialized decompression for this stream. */
1459 cab->entry_cffolder->decompress_init = 1;
1462 if (cfdata->compressed_bytes_remaining == cfdata->compressed_size)
1467 cab->stream.total_out = uavail;
1469 * We always uncompress all data in current CFDATA.
1471 while (!eod && cab->stream.total_out < cfdata->uncompressed_size) {
1472 ssize_t bytes_avail;
1474 cab->stream.next_out =
1475 cab->uncompressed_buffer + cab->stream.total_out;
1476 cab->stream.avail_out =
1477 cfdata->uncompressed_size - cab->stream.total_out;
1479 d = __archive_read_ahead(a, 1, &bytes_avail);
1480 if (bytes_avail <= 0) {
1481 *avail = truncated_error(a);
1484 if (bytes_avail > cfdata->compressed_bytes_remaining)
1485 bytes_avail = cfdata->compressed_bytes_remaining;
1487 * A bug in zlib.h: stream.next_in should be marked 'const'
1488 * but isn't (the library never alters data through the
1489 * next_in pointer, only reads it). The result: this ugly
1490 * cast to remove 'const'.
1492 cab->stream.next_in = (Bytef *)(uintptr_t)d;
1493 cab->stream.avail_in = (uInt)bytes_avail;
1494 cab->stream.total_in = 0;
1496 /* Cut out a tow-byte MSZIP signature(0x43, 0x4b). */
1498 if (bytes_avail <= 0)
1500 if (bytes_avail <= mszip) {
1502 if (cab->stream.next_in[0] != 0x43)
1504 if (bytes_avail > 1 &&
1505 cab->stream.next_in[1] != 0x4b)
1507 } else if (cab->stream.next_in[0] != 0x4b)
1509 cfdata->unconsumed = bytes_avail;
1510 cfdata->sum_ptr = d;
1511 if (cab_minimum_consume_cfdata(
1512 a, cfdata->unconsumed) < 0) {
1513 *avail = ARCHIVE_FATAL;
1516 mszip -= (int)bytes_avail;
1519 if (mszip == 1 && cab->stream.next_in[0] != 0x4b)
1521 else if (cab->stream.next_in[0] != 0x43 ||
1522 cab->stream.next_in[1] != 0x4b)
1524 cab->stream.next_in += mszip;
1525 cab->stream.avail_in -= mszip;
1526 cab->stream.total_in += mszip;
1530 r = inflate(&cab->stream, 0);
1540 cfdata->unconsumed = cab->stream.total_in;
1541 cfdata->sum_ptr = d;
1542 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1543 *avail = ARCHIVE_FATAL;
1547 uavail = (uint16_t)cab->stream.total_out;
1549 if (uavail < cfdata->uncompressed_size) {
1550 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1551 "Invalid uncompressed size (%d < %d)",
1552 uavail, cfdata->uncompressed_size);
1553 *avail = ARCHIVE_FATAL;
1558 * Note: I suspect there is a bug in makecab.exe because, in rare
1559 * case, compressed bytes are still remaining regardless we have
1560 * gotten all uncompressed bytes, which size is recorded in CFDATA,
1561 * as much as we need, and we have to use the garbage so as to
1562 * correctly compute the sum of CFDATA accordingly.
1564 if (cfdata->compressed_bytes_remaining > 0) {
1565 ssize_t bytes_avail;
1567 d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1569 if (bytes_avail <= 0) {
1570 *avail = truncated_error(a);
1573 cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1574 cfdata->sum_ptr = d;
1575 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1576 *avail = ARCHIVE_FATAL;
1582 * Set dictionary data for decompressing of next CFDATA, which
1583 * in the same folder. This is why we always do decompress CFDATA
1584 * even if beginning CFDATA or some of CFDATA are not used in
1585 * skipping file data.
1587 if (cab->entry_cffolder->cfdata_index <
1588 cab->entry_cffolder->cfdata_count) {
1589 r = inflateReset(&cab->stream);
1592 r = inflateSetDictionary(&cab->stream,
1593 cab->uncompressed_buffer, cfdata->uncompressed_size);
1598 d = cab->uncompressed_buffer + cfdata->read_offset;
1599 *avail = uavail - cfdata->read_offset;
1600 cfdata->uncompressed_avail = uavail;
1607 archive_set_error(&a->archive, ENOMEM,
1608 "Out of memory for deflate decompression");
1611 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1612 "Deflate decompression failed (%d)", r);
1615 *avail = ARCHIVE_FATAL;
1618 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1619 "CFDATA incorrect(no MSZIP signature)");
1620 *avail = ARCHIVE_FATAL;
1624 #else /* HAVE_ZLIB_H */
1627 cab_read_ahead_cfdata_deflate(struct archive_read *a, ssize_t *avail)
1629 *avail = ARCHIVE_FATAL;
1630 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1631 "libarchive compiled without deflate support (no libz)");
1635 #endif /* HAVE_ZLIB_H */
1638 cab_read_ahead_cfdata_lzx(struct archive_read *a, ssize_t *avail)
1640 struct cab *cab = (struct cab *)(a->format->data);
1641 struct cfdata *cfdata;
1646 cfdata = cab->entry_cfdata;
1647 /* If the buffer hasn't been allocated, allocate it now. */
1648 if (cab->uncompressed_buffer == NULL) {
1649 cab->uncompressed_buffer_size = 0x8000;
1650 cab->uncompressed_buffer
1651 = (unsigned char *)malloc(cab->uncompressed_buffer_size);
1652 if (cab->uncompressed_buffer == NULL) {
1653 archive_set_error(&a->archive, ENOMEM,
1654 "No memory for CAB reader");
1655 *avail = ARCHIVE_FATAL;
1660 uavail = cfdata->uncompressed_avail;
1661 if (uavail == cfdata->uncompressed_size) {
1662 d = cab->uncompressed_buffer + cfdata->read_offset;
1663 *avail = uavail - cfdata->read_offset;
1667 if (!cab->entry_cffolder->decompress_init) {
1668 r = lzx_decode_init(&cab->xstrm,
1669 cab->entry_cffolder->compdata);
1670 if (r != ARCHIVE_OK) {
1671 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1672 "Can't initialize LZX decompression.");
1673 *avail = ARCHIVE_FATAL;
1676 /* We've initialized decompression for this stream. */
1677 cab->entry_cffolder->decompress_init = 1;
1680 /* Clean up remaining bits of previous CFDATA. */
1681 lzx_cleanup_bitstream(&cab->xstrm);
1682 cab->xstrm.total_out = uavail;
1683 while (cab->xstrm.total_out < cfdata->uncompressed_size) {
1684 ssize_t bytes_avail;
1686 cab->xstrm.next_out =
1687 cab->uncompressed_buffer + cab->xstrm.total_out;
1688 cab->xstrm.avail_out =
1689 cfdata->uncompressed_size - cab->xstrm.total_out;
1691 d = __archive_read_ahead(a, 1, &bytes_avail);
1692 if (bytes_avail <= 0) {
1693 archive_set_error(&a->archive,
1694 ARCHIVE_ERRNO_FILE_FORMAT,
1695 "Truncated CAB file data");
1696 *avail = ARCHIVE_FATAL;
1699 if (bytes_avail > cfdata->compressed_bytes_remaining)
1700 bytes_avail = cfdata->compressed_bytes_remaining;
1702 cab->xstrm.next_in = d;
1703 cab->xstrm.avail_in = bytes_avail;
1704 cab->xstrm.total_in = 0;
1705 r = lzx_decode(&cab->xstrm,
1706 cfdata->compressed_bytes_remaining == bytes_avail);
1712 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1713 "LZX decompression failed (%d)", r);
1714 *avail = ARCHIVE_FATAL;
1717 cfdata->unconsumed = cab->xstrm.total_in;
1718 cfdata->sum_ptr = d;
1719 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1720 *avail = ARCHIVE_FATAL;
1725 uavail = (uint16_t)cab->xstrm.total_out;
1727 * Make sure a read pointer advances to next CFDATA.
1729 if (cfdata->compressed_bytes_remaining > 0) {
1730 ssize_t bytes_avail;
1732 d = __archive_read_ahead(a, cfdata->compressed_bytes_remaining,
1734 if (bytes_avail <= 0) {
1735 *avail = truncated_error(a);
1738 cfdata->unconsumed = cfdata->compressed_bytes_remaining;
1739 cfdata->sum_ptr = d;
1740 if (cab_minimum_consume_cfdata(a, cfdata->unconsumed) < 0) {
1741 *avail = ARCHIVE_FATAL;
1747 * Translation reversal of x86 processor CALL byte sequence(E8).
1749 lzx_translation(&cab->xstrm, cab->uncompressed_buffer,
1750 cfdata->uncompressed_size,
1751 (cab->entry_cffolder->cfdata_index-1) * 0x8000);
1753 d = cab->uncompressed_buffer + cfdata->read_offset;
1754 *avail = uavail - cfdata->read_offset;
1755 cfdata->uncompressed_avail = uavail;
1762 * We always decompress CFDATA to consume CFDATA as much as we need
1763 * in uncompressed bytes because all CFDATA in a folder are related
1764 * so we do not skip any CFDATA without decompressing.
1765 * Note: If the folder of a CFFILE is iFoldCONTINUED_PREV_AND_NEXT or
1766 * iFoldCONTINUED_FROM_PREV, we won't decompress because a CFDATA for
1767 * the CFFILE is remaining bytes of previous Multivolume CAB file.
1770 cab_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1772 struct cab *cab = (struct cab *)(a->format->data);
1773 struct cfdata *cfdata;
1774 int64_t cbytes, rbytes;
1777 rbytes = cab_minimum_consume_cfdata(a, consumed_bytes);
1779 return (ARCHIVE_FATAL);
1781 cfdata = cab->entry_cfdata;
1782 while (rbytes > 0) {
1785 if (cfdata->compressed_size == 0) {
1786 archive_set_error(&a->archive,
1787 ARCHIVE_ERRNO_FILE_FORMAT,
1789 return (ARCHIVE_FATAL);
1791 cbytes = cfdata->uncompressed_bytes_remaining;
1792 if (cbytes > rbytes)
1796 if (cfdata->uncompressed_avail == 0 &&
1797 (cab->entry_cffile->folder == iFoldCONTINUED_PREV_AND_NEXT ||
1798 cab->entry_cffile->folder == iFoldCONTINUED_FROM_PREV)) {
1799 /* We have not read any data yet. */
1800 if (cbytes == cfdata->uncompressed_bytes_remaining) {
1801 /* Skip whole current CFDATA. */
1802 __archive_read_consume(a,
1803 cfdata->compressed_size);
1804 cab->cab_offset += cfdata->compressed_size;
1805 cfdata->compressed_bytes_remaining = 0;
1806 cfdata->uncompressed_bytes_remaining = 0;
1807 err = cab_next_cfdata(a);
1810 cfdata = cab->entry_cfdata;
1811 if (cfdata->uncompressed_size == 0) {
1812 switch (cab->entry_cffile->folder) {
1813 case iFoldCONTINUED_PREV_AND_NEXT:
1814 case iFoldCONTINUED_TO_NEXT:
1815 case iFoldCONTINUED_FROM_PREV:
1824 cfdata->read_offset += (uint16_t)cbytes;
1825 cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1827 } else if (cbytes == 0) {
1828 err = cab_next_cfdata(a);
1831 cfdata = cab->entry_cfdata;
1832 if (cfdata->uncompressed_size == 0) {
1833 switch (cab->entry_cffile->folder) {
1834 case iFoldCONTINUED_PREV_AND_NEXT:
1835 case iFoldCONTINUED_TO_NEXT:
1836 case iFoldCONTINUED_FROM_PREV:
1837 return (ARCHIVE_FATAL);
1844 while (cbytes > 0) {
1845 (void)cab_read_ahead_cfdata(a, &avail);
1847 return (ARCHIVE_FATAL);
1849 avail = (ssize_t)cbytes;
1850 if (cab_minimum_consume_cfdata(a, avail) < 0)
1851 return (ARCHIVE_FATAL);
1855 return (consumed_bytes);
1859 * Consume CFDATA as much as we have already gotten and
1860 * compute the sum of CFDATA.
1863 cab_minimum_consume_cfdata(struct archive_read *a, int64_t consumed_bytes)
1865 struct cab *cab = (struct cab *)(a->format->data);
1866 struct cfdata *cfdata;
1867 int64_t cbytes, rbytes;
1870 cfdata = cab->entry_cfdata;
1871 rbytes = consumed_bytes;
1872 if (cab->entry_cffolder->comptype == COMPTYPE_NONE) {
1873 if (consumed_bytes < cfdata->unconsumed)
1874 cbytes = consumed_bytes;
1876 cbytes = cfdata->unconsumed;
1878 cfdata->read_offset += (uint16_t)cbytes;
1879 cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1880 cfdata->unconsumed -= cbytes;
1882 cbytes = cfdata->uncompressed_avail - cfdata->read_offset;
1884 if (consumed_bytes < cbytes)
1885 cbytes = consumed_bytes;
1887 cfdata->read_offset += (uint16_t)cbytes;
1888 cfdata->uncompressed_bytes_remaining -= (uint16_t)cbytes;
1891 if (cfdata->unconsumed) {
1892 cbytes = cfdata->unconsumed;
1893 cfdata->unconsumed = 0;
1898 /* Compute the sum. */
1899 cab_checksum_update(a, (size_t)cbytes);
1901 /* Consume as much as the compressor actually used. */
1902 __archive_read_consume(a, cbytes);
1903 cab->cab_offset += cbytes;
1904 cfdata->compressed_bytes_remaining -= (uint16_t)cbytes;
1905 if (cfdata->compressed_bytes_remaining == 0) {
1906 err = cab_checksum_finish(a);
1915 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1916 * cab->end_of_entry if it consumes all of the data.
1919 cab_read_data(struct archive_read *a, const void **buff,
1920 size_t *size, int64_t *offset)
1922 struct cab *cab = (struct cab *)(a->format->data);
1923 ssize_t bytes_avail;
1925 if (cab->entry_bytes_remaining == 0) {
1928 *offset = cab->entry_offset;
1929 cab->end_of_entry = 1;
1930 return (ARCHIVE_OK);
1933 *buff = cab_read_ahead_cfdata(a, &bytes_avail);
1934 if (bytes_avail <= 0) {
1938 if (bytes_avail == 0 &&
1939 cab->entry_cfdata->uncompressed_size == 0) {
1940 /* All of CFDATA in a folder has been handled. */
1941 archive_set_error(&a->archive,
1942 ARCHIVE_ERRNO_FILE_FORMAT, "Invalid CFDATA");
1943 return (ARCHIVE_FATAL);
1945 return ((int)bytes_avail);
1947 if (bytes_avail > cab->entry_bytes_remaining)
1948 bytes_avail = (ssize_t)cab->entry_bytes_remaining;
1950 *size = bytes_avail;
1951 *offset = cab->entry_offset;
1952 cab->entry_offset += bytes_avail;
1953 cab->entry_bytes_remaining -= bytes_avail;
1954 if (cab->entry_bytes_remaining == 0)
1955 cab->end_of_entry = 1;
1956 cab->entry_unconsumed = bytes_avail;
1957 if (cab->entry_cffolder->comptype == COMPTYPE_NONE) {
1958 /* Don't consume more than current entry used. */
1959 if (cab->entry_cfdata->unconsumed > cab->entry_unconsumed)
1960 cab->entry_cfdata->unconsumed = cab->entry_unconsumed;
1962 return (ARCHIVE_OK);
1966 archive_read_format_cab_read_data_skip(struct archive_read *a)
1969 int64_t bytes_skipped;
1972 cab = (struct cab *)(a->format->data);
1974 if (cab->end_of_archive)
1975 return (ARCHIVE_EOF);
1977 if (!cab->read_data_invoked) {
1978 cab->bytes_skipped += cab->entry_bytes_remaining;
1979 cab->entry_bytes_remaining = 0;
1980 /* This entry is finished and done. */
1981 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
1982 return (ARCHIVE_OK);
1985 if (cab->entry_unconsumed) {
1986 /* Consume as much as the compressor actually used. */
1987 r = (int)cab_consume_cfdata(a, cab->entry_unconsumed);
1988 cab->entry_unconsumed = 0;
1991 } else if (cab->entry_cfdata == NULL) {
1992 r = cab_next_cfdata(a);
1997 /* if we've already read to end of data, we're done. */
1998 if (cab->end_of_entry_cleanup)
1999 return (ARCHIVE_OK);
2002 * If the length is at the beginning, we can skip the
2003 * compressed data much more quickly.
2005 bytes_skipped = cab_consume_cfdata(a, cab->entry_bytes_remaining);
2006 if (bytes_skipped < 0)
2007 return (ARCHIVE_FATAL);
2009 /* If the compression type is none(uncompressed), we've already
2010 * consumed data as much as the current entry size. */
2011 if (cab->entry_cffolder->comptype == COMPTYPE_NONE &&
2012 cab->entry_cfdata != NULL)
2013 cab->entry_cfdata->unconsumed = 0;
2015 /* This entry is finished and done. */
2016 cab->end_of_entry_cleanup = cab->end_of_entry = 1;
2017 return (ARCHIVE_OK);
2021 archive_read_format_cab_cleanup(struct archive_read *a)
2023 struct cab *cab = (struct cab *)(a->format->data);
2024 struct cfheader *hd = &cab->cfheader;
2027 if (hd->folder_array != NULL) {
2028 for (i = 0; i < hd->folder_count; i++)
2029 free(hd->folder_array[i].cfdata.memimage);
2030 free(hd->folder_array);
2032 if (hd->file_array != NULL) {
2033 for (i = 0; i < cab->cfheader.file_count; i++)
2034 archive_string_free(&(hd->file_array[i].pathname));
2035 free(hd->file_array);
2038 if (cab->stream_valid)
2039 inflateEnd(&cab->stream);
2041 lzx_decode_free(&cab->xstrm);
2042 archive_wstring_free(&cab->ws);
2043 free(cab->uncompressed_buffer);
2045 (a->format->data) = NULL;
2046 return (ARCHIVE_OK);
2049 /* Convert an MSDOS-style date/time into Unix-style time. */
2051 cab_dos_time(const unsigned char *p)
2056 msDate = archive_le16dec(p);
2057 msTime = archive_le16dec(p+2);
2059 memset(&ts, 0, sizeof(ts));
2060 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
2061 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
2062 ts.tm_mday = msDate & 0x1f; /* Day of month. */
2063 ts.tm_hour = (msTime >> 11) & 0x1f;
2064 ts.tm_min = (msTime >> 5) & 0x3f;
2065 ts.tm_sec = (msTime << 1) & 0x3e;
2067 return (mktime(&ts));
2070 /*****************************************************************
2072 * LZX decompression code.
2074 *****************************************************************/
2077 * Initialize LZX decoder.
2079 * Returns ARCHIVE_OK if initialization was successful.
2080 * Returns ARCHIVE_FAILED if w_bits has unsupported value.
2081 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
2085 lzx_decode_init(struct lzx_stream *strm, int w_bits)
2088 int slot, w_size, w_slot;
2092 if (strm->ds == NULL) {
2093 strm->ds = calloc(1, sizeof(*strm->ds));
2094 if (strm->ds == NULL)
2095 return (ARCHIVE_FATAL);
2098 ds->error = ARCHIVE_FAILED;
2100 /* Allow bits from 15(32KBi) up to 21(2MBi) */
2101 if (w_bits < SLOT_BASE || w_bits > SLOT_MAX)
2102 return (ARCHIVE_FAILED);
2104 ds->error = ARCHIVE_FATAL;
2109 w_size = ds->w_size;
2110 w_slot = slots[w_bits - SLOT_BASE];
2111 ds->w_size = 1U << w_bits;
2112 ds->w_mask = ds->w_size -1;
2113 if (ds->w_buff == NULL || w_size != ds->w_size) {
2115 ds->w_buff = malloc(ds->w_size);
2116 if (ds->w_buff == NULL)
2117 return (ARCHIVE_FATAL);
2119 ds->pos_tbl = malloc(sizeof(ds->pos_tbl[0]) * w_slot);
2120 if (ds->pos_tbl == NULL)
2121 return (ARCHIVE_FATAL);
2122 lzx_huffman_free(&(ds->mt));
2125 for (footer = 0; footer < 18; footer++)
2126 base_inc[footer] = 1 << footer;
2128 for (slot = 0; slot < w_slot; slot++) {
2133 base += base_inc[footer];
2136 for (n = base; n; n >>= 1)
2141 ds->pos_tbl[slot].base = base;
2142 ds->pos_tbl[slot].footer_bits = footer;
2147 ds->br.cache_buffer = 0;
2148 ds->br.cache_avail = 0;
2149 ds->r0 = ds->r1 = ds->r2 = 1;
2151 /* Initialize aligned offset tree. */
2152 if (lzx_huffman_init(&(ds->at), 8, 8) != ARCHIVE_OK)
2153 return (ARCHIVE_FATAL);
2155 /* Initialize pre-tree. */
2156 if (lzx_huffman_init(&(ds->pt), 20, 10) != ARCHIVE_OK)
2157 return (ARCHIVE_FATAL);
2159 /* Initialize Main tree. */
2160 if (lzx_huffman_init(&(ds->mt), 256+(w_slot<<3), 16)
2162 return (ARCHIVE_FATAL);
2164 /* Initialize Length tree. */
2165 if (lzx_huffman_init(&(ds->lt), 249, 16) != ARCHIVE_OK)
2166 return (ARCHIVE_FATAL);
2170 return (ARCHIVE_OK);
2174 * Release LZX decoder.
2177 lzx_decode_free(struct lzx_stream *strm)
2180 if (strm->ds == NULL)
2182 free(strm->ds->w_buff);
2183 free(strm->ds->pos_tbl);
2184 lzx_huffman_free(&(strm->ds->at));
2185 lzx_huffman_free(&(strm->ds->pt));
2186 lzx_huffman_free(&(strm->ds->mt));
2187 lzx_huffman_free(&(strm->ds->lt));
2193 * E8 Call Translation reversal.
2196 lzx_translation(struct lzx_stream *strm, void *p, size_t size, uint32_t offset)
2198 struct lzx_dec *ds = strm->ds;
2199 unsigned char *b, *end;
2201 if (!ds->translation || size <= 10)
2204 end = b + size - 10;
2205 while (b < end && (b = memchr(b, 0xE8, end - b)) != NULL) {
2206 size_t i = b - (unsigned char *)p;
2207 int32_t cp, displacement, value;
2209 cp = (int32_t)(offset + (uint32_t)i);
2210 value = archive_le32dec(&b[1]);
2211 if (value >= -cp && value < (int32_t)ds->translation_size) {
2213 displacement = value - cp;
2215 displacement = value + ds->translation_size;
2216 archive_le32enc(&b[1], (uint32_t)displacement);
2223 * Bit stream reader.
2225 /* Check that the cache buffer has enough bits. */
2226 #define lzx_br_has(br, n) ((br)->cache_avail >= n)
2227 /* Get compressed data by bit. */
2228 #define lzx_br_bits(br, n) \
2229 (((uint32_t)((br)->cache_buffer >> \
2230 ((br)->cache_avail - (n)))) & cache_masks[n])
2231 #define lzx_br_bits_forced(br, n) \
2232 (((uint32_t)((br)->cache_buffer << \
2233 ((n) - (br)->cache_avail))) & cache_masks[n])
2234 /* Read ahead to make sure the cache buffer has enough compressed data we
2236 * True : completed, there is enough data in the cache buffer.
2237 * False : we met that strm->next_in is empty, we have to get following
2239 #define lzx_br_read_ahead_0(strm, br, n) \
2240 (lzx_br_has((br), (n)) || lzx_br_fillup(strm, br))
2241 /* True : the cache buffer has some bits as much as we need.
2242 * False : there are no enough bits in the cache buffer to be used,
2243 * we have to get following bytes if we could. */
2244 #define lzx_br_read_ahead(strm, br, n) \
2245 (lzx_br_read_ahead_0((strm), (br), (n)) || lzx_br_has((br), (n)))
2247 /* Notify how many bits we consumed. */
2248 #define lzx_br_consume(br, n) ((br)->cache_avail -= (n))
2249 #define lzx_br_consume_unaligned_bits(br) ((br)->cache_avail &= ~0x0f)
2251 #define lzx_br_is_unaligned(br) ((br)->cache_avail & 0x0f)
2253 static const uint32_t cache_masks[] = {
2254 0x00000000, 0x00000001, 0x00000003, 0x00000007,
2255 0x0000000F, 0x0000001F, 0x0000003F, 0x0000007F,
2256 0x000000FF, 0x000001FF, 0x000003FF, 0x000007FF,
2257 0x00000FFF, 0x00001FFF, 0x00003FFF, 0x00007FFF,
2258 0x0000FFFF, 0x0001FFFF, 0x0003FFFF, 0x0007FFFF,
2259 0x000FFFFF, 0x001FFFFF, 0x003FFFFF, 0x007FFFFF,
2260 0x00FFFFFF, 0x01FFFFFF, 0x03FFFFFF, 0x07FFFFFF,
2261 0x0FFFFFFF, 0x1FFFFFFF, 0x3FFFFFFF, 0x7FFFFFFF,
2262 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
2266 * Shift away used bits in the cache data and fill it up with following bits.
2267 * Call this when cache buffer does not have enough bits you need.
2269 * Returns 1 if the cache buffer is full.
2270 * Returns 0 if the cache buffer is not full; input buffer is empty.
2273 lzx_br_fillup(struct lzx_stream *strm, struct lzx_br *br)
2276 * x86 processor family can read misaligned data without an access error.
2278 int n = CACHE_BITS - br->cache_avail;
2283 if (strm->avail_in >= 8) {
2285 ((uint64_t)strm->next_in[1]) << 56 |
2286 ((uint64_t)strm->next_in[0]) << 48 |
2287 ((uint64_t)strm->next_in[3]) << 40 |
2288 ((uint64_t)strm->next_in[2]) << 32 |
2289 ((uint32_t)strm->next_in[5]) << 24 |
2290 ((uint32_t)strm->next_in[4]) << 16 |
2291 ((uint32_t)strm->next_in[7]) << 8 |
2292 (uint32_t)strm->next_in[6];
2294 strm->avail_in -= 8;
2295 br->cache_avail += 8 * 8;
2300 if (strm->avail_in >= 6) {
2302 (br->cache_buffer << 48) |
2303 ((uint64_t)strm->next_in[1]) << 40 |
2304 ((uint64_t)strm->next_in[0]) << 32 |
2305 ((uint32_t)strm->next_in[3]) << 24 |
2306 ((uint32_t)strm->next_in[2]) << 16 |
2307 ((uint32_t)strm->next_in[5]) << 8 |
2308 (uint32_t)strm->next_in[4];
2310 strm->avail_in -= 6;
2311 br->cache_avail += 6 * 8;
2316 /* We have enough compressed data in
2317 * the cache buffer.*/
2322 if (strm->avail_in < 2) {
2323 /* There is not enough compressed data to
2324 * fill up the cache buffer. */
2325 if (strm->avail_in == 1) {
2326 br->odd = *strm->next_in++;
2333 (br->cache_buffer << 16) |
2334 archive_le16dec(strm->next_in);
2336 strm->avail_in -= 2;
2337 br->cache_avail += 16;
2343 lzx_br_fixup(struct lzx_stream *strm, struct lzx_br *br)
2345 int n = CACHE_BITS - br->cache_avail;
2347 if (br->have_odd && n >= 16 && strm->avail_in > 0) {
2349 (br->cache_buffer << 16) |
2350 ((uint16_t)(*strm->next_in)) << 8 | br->odd;
2353 br->cache_avail += 16;
2359 lzx_cleanup_bitstream(struct lzx_stream *strm)
2361 strm->ds->br.cache_avail = 0;
2362 strm->ds->br.have_odd = 0;
2368 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
2369 * Please set available buffer and call this function again.
2370 * 2. Returns ARCHIVE_EOF if decompression has been completed.
2371 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
2372 * is broken or you do not set 'last' flag properly.
2374 #define ST_RD_TRANSLATION 0
2375 #define ST_RD_TRANSLATION_SIZE 1
2376 #define ST_RD_BLOCK_TYPE 2
2377 #define ST_RD_BLOCK_SIZE 3
2378 #define ST_RD_ALIGNMENT 4
2382 #define ST_COPY_UNCOMP1 8
2383 #define ST_COPY_UNCOMP2 9
2384 #define ST_RD_ALIGNED_OFFSET 10
2385 #define ST_RD_VERBATIM 11
2386 #define ST_RD_PRE_MAIN_TREE_256 12
2387 #define ST_MAIN_TREE_256 13
2388 #define ST_RD_PRE_MAIN_TREE_REM 14
2389 #define ST_MAIN_TREE_REM 15
2390 #define ST_RD_PRE_LENGTH_TREE 16
2391 #define ST_LENGTH_TREE 17
2393 #define ST_LENGTH 19
2394 #define ST_OFFSET 20
2395 #define ST_REAL_POS 21
2399 lzx_decode(struct lzx_stream *strm, int last)
2401 struct lzx_dec *ds = strm->ds;
2408 avail_in = strm->avail_in;
2409 lzx_br_fixup(strm, &(ds->br));
2411 if (ds->state < ST_MAIN)
2412 r = lzx_read_blocks(strm, last);
2414 int64_t bytes_written = strm->avail_out;
2415 r = lzx_decode_blocks(strm, last);
2416 bytes_written -= strm->avail_out;
2417 strm->next_out += bytes_written;
2418 strm->total_out += bytes_written;
2421 strm->total_in += avail_in - strm->avail_in;
2426 lzx_read_blocks(struct lzx_stream *strm, int last)
2428 struct lzx_dec *ds = strm->ds;
2429 struct lzx_br *br = &(ds->br);
2433 switch (ds->state) {
2434 case ST_RD_TRANSLATION:
2435 if (!lzx_br_read_ahead(strm, br, 1)) {
2436 ds->state = ST_RD_TRANSLATION;
2439 return (ARCHIVE_OK);
2441 ds->translation = lzx_br_bits(br, 1);
2442 lzx_br_consume(br, 1);
2444 case ST_RD_TRANSLATION_SIZE:
2445 if (ds->translation) {
2446 if (!lzx_br_read_ahead(strm, br, 32)) {
2447 ds->state = ST_RD_TRANSLATION_SIZE;
2450 return (ARCHIVE_OK);
2452 ds->translation_size = lzx_br_bits(br, 16);
2453 lzx_br_consume(br, 16);
2454 ds->translation_size <<= 16;
2455 ds->translation_size |= lzx_br_bits(br, 16);
2456 lzx_br_consume(br, 16);
2459 case ST_RD_BLOCK_TYPE:
2460 if (!lzx_br_read_ahead(strm, br, 3)) {
2461 ds->state = ST_RD_BLOCK_TYPE;
2464 return (ARCHIVE_OK);
2466 ds->block_type = lzx_br_bits(br, 3);
2467 lzx_br_consume(br, 3);
2468 /* Check a block type. */
2469 switch (ds->block_type) {
2470 case VERBATIM_BLOCK:
2471 case ALIGNED_OFFSET_BLOCK:
2472 case UNCOMPRESSED_BLOCK:
2475 goto failed;/* Invalid */
2478 case ST_RD_BLOCK_SIZE:
2479 if (!lzx_br_read_ahead(strm, br, 24)) {
2480 ds->state = ST_RD_BLOCK_SIZE;
2483 return (ARCHIVE_OK);
2485 ds->block_size = lzx_br_bits(br, 8);
2486 lzx_br_consume(br, 8);
2487 ds->block_size <<= 16;
2488 ds->block_size |= lzx_br_bits(br, 16);
2489 lzx_br_consume(br, 16);
2490 if (ds->block_size == 0)
2492 ds->block_bytes_avail = ds->block_size;
2493 if (ds->block_type != UNCOMPRESSED_BLOCK) {
2494 if (ds->block_type == VERBATIM_BLOCK)
2495 ds->state = ST_RD_VERBATIM;
2497 ds->state = ST_RD_ALIGNED_OFFSET;
2501 case ST_RD_ALIGNMENT:
2503 * Handle an Uncompressed Block.
2505 /* Skip padding to align following field on
2506 * 16-bit boundary. */
2507 if (lzx_br_is_unaligned(br))
2508 lzx_br_consume_unaligned_bits(br);
2510 if (lzx_br_read_ahead(strm, br, 16))
2511 lzx_br_consume(br, 16);
2513 ds->state = ST_RD_ALIGNMENT;
2516 return (ARCHIVE_OK);
2519 /* Preparation to read repeated offsets R0,R1 and R2. */
2520 ds->rbytes_avail = 0;
2521 ds->state = ST_RD_R0;
2528 /* Drain bits in the cache buffer of
2530 if (lzx_br_has(br, 32)) {
2531 u16 = lzx_br_bits(br, 16);
2532 lzx_br_consume(br, 16);
2533 archive_le16enc(ds->rbytes, u16);
2534 u16 = lzx_br_bits(br, 16);
2535 lzx_br_consume(br, 16);
2536 archive_le16enc(ds->rbytes+2, u16);
2537 ds->rbytes_avail = 4;
2538 } else if (lzx_br_has(br, 16)) {
2539 u16 = lzx_br_bits(br, 16);
2540 lzx_br_consume(br, 16);
2541 archive_le16enc(ds->rbytes, u16);
2542 ds->rbytes_avail = 2;
2544 if (ds->rbytes_avail < 4 && ds->br.have_odd) {
2545 ds->rbytes[ds->rbytes_avail++] =
2547 ds->br.have_odd = 0;
2549 while (ds->rbytes_avail < 4) {
2550 if (strm->avail_in <= 0) {
2553 return (ARCHIVE_OK);
2555 ds->rbytes[ds->rbytes_avail++] =
2559 ds->rbytes_avail = 0;
2560 if (ds->state == ST_RD_R0) {
2561 ds->r0 = archive_le32dec(ds->rbytes);
2564 ds->state = ST_RD_R1;
2565 } else if (ds->state == ST_RD_R1) {
2566 ds->r1 = archive_le32dec(ds->rbytes);
2569 ds->state = ST_RD_R2;
2570 } else if (ds->state == ST_RD_R2) {
2571 ds->r2 = archive_le32dec(ds->rbytes);
2574 /* We've gotten all repeated offsets. */
2575 ds->state = ST_COPY_UNCOMP1;
2577 } while (ds->state != ST_COPY_UNCOMP1);
2579 case ST_COPY_UNCOMP1:
2581 * Copy bytes form next_in to next_out directly.
2583 while (ds->block_bytes_avail) {
2586 if (strm->avail_out <= 0)
2587 /* Output buffer is empty. */
2588 return (ARCHIVE_OK);
2589 if (strm->avail_in <= 0) {
2590 /* Input buffer is empty. */
2593 return (ARCHIVE_OK);
2595 l = (int)ds->block_bytes_avail;
2596 if (l > ds->w_size - ds->w_pos)
2597 l = ds->w_size - ds->w_pos;
2598 if (l > strm->avail_out)
2599 l = (int)strm->avail_out;
2600 if (l > strm->avail_in)
2601 l = (int)strm->avail_in;
2602 memcpy(strm->next_out, strm->next_in, l);
2603 memcpy(&(ds->w_buff[ds->w_pos]),
2606 strm->avail_in -= l;
2607 strm->next_out += l;
2608 strm->avail_out -= l;
2609 strm->total_out += l;
2610 ds->w_pos = (ds->w_pos + l) & ds->w_mask;
2611 ds->block_bytes_avail -= l;
2614 case ST_COPY_UNCOMP2:
2615 /* Re-align; skip padding byte. */
2616 if (ds->block_size & 1) {
2617 if (strm->avail_in <= 0) {
2618 /* Input buffer is empty. */
2619 ds->state = ST_COPY_UNCOMP2;
2622 return (ARCHIVE_OK);
2627 /* This block ended. */
2628 ds->state = ST_RD_BLOCK_TYPE;
2629 return (ARCHIVE_EOF);
2630 /********************/
2631 case ST_RD_ALIGNED_OFFSET:
2633 * Read Aligned offset tree.
2635 if (!lzx_br_read_ahead(strm, br, 3 * ds->at.len_size)) {
2636 ds->state = ST_RD_ALIGNED_OFFSET;
2639 return (ARCHIVE_OK);
2641 memset(ds->at.freq, 0, sizeof(ds->at.freq));
2642 for (i = 0; i < ds->at.len_size; i++) {
2643 ds->at.bitlen[i] = lzx_br_bits(br, 3);
2644 ds->at.freq[ds->at.bitlen[i]]++;
2645 lzx_br_consume(br, 3);
2647 if (!lzx_make_huffman_table(&ds->at))
2650 case ST_RD_VERBATIM:
2653 case ST_RD_PRE_MAIN_TREE_256:
2655 * Read Pre-tree for first 256 elements of main tree.
2657 if (!lzx_read_pre_tree(strm)) {
2658 ds->state = ST_RD_PRE_MAIN_TREE_256;
2661 return (ARCHIVE_OK);
2663 if (!lzx_make_huffman_table(&ds->pt))
2667 case ST_MAIN_TREE_256:
2669 * Get path lengths of first 256 elements of main tree.
2671 r = lzx_read_bitlen(strm, &ds->mt, 256);
2675 ds->state = ST_MAIN_TREE_256;
2678 return (ARCHIVE_OK);
2682 case ST_RD_PRE_MAIN_TREE_REM:
2684 * Read Pre-tree for remaining elements of main tree.
2686 if (!lzx_read_pre_tree(strm)) {
2687 ds->state = ST_RD_PRE_MAIN_TREE_REM;
2690 return (ARCHIVE_OK);
2692 if (!lzx_make_huffman_table(&ds->pt))
2696 case ST_MAIN_TREE_REM:
2698 * Get path lengths of remaining elements of main tree.
2700 r = lzx_read_bitlen(strm, &ds->mt, -1);
2704 ds->state = ST_MAIN_TREE_REM;
2707 return (ARCHIVE_OK);
2709 if (!lzx_make_huffman_table(&ds->mt))
2713 case ST_RD_PRE_LENGTH_TREE:
2715 * Read Pre-tree for remaining elements of main tree.
2717 if (!lzx_read_pre_tree(strm)) {
2718 ds->state = ST_RD_PRE_LENGTH_TREE;
2721 return (ARCHIVE_OK);
2723 if (!lzx_make_huffman_table(&ds->pt))
2727 case ST_LENGTH_TREE:
2729 * Get path lengths of remaining elements of main tree.
2731 r = lzx_read_bitlen(strm, &ds->lt, -1);
2735 ds->state = ST_LENGTH_TREE;
2738 return (ARCHIVE_OK);
2740 if (!lzx_make_huffman_table(&ds->lt))
2742 ds->state = ST_MAIN;
2747 return (ds->error = ARCHIVE_FAILED);
2751 lzx_decode_blocks(struct lzx_stream *strm, int last)
2753 struct lzx_dec *ds = strm->ds;
2754 struct lzx_br bre = ds->br;
2755 struct huffman *at = &(ds->at), *lt = &(ds->lt), *mt = &(ds->mt);
2756 const struct lzx_pos_tbl *pos_tbl = ds->pos_tbl;
2757 unsigned char *noutp = strm->next_out;
2758 unsigned char *endp = noutp + strm->avail_out;
2759 unsigned char *w_buff = ds->w_buff;
2760 unsigned char *at_bitlen = at->bitlen;
2761 unsigned char *lt_bitlen = lt->bitlen;
2762 unsigned char *mt_bitlen = mt->bitlen;
2763 size_t block_bytes_avail = ds->block_bytes_avail;
2764 int at_max_bits = at->max_bits;
2765 int lt_max_bits = lt->max_bits;
2766 int mt_max_bits = mt->max_bits;
2767 int c, copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2768 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2769 int length_header = ds->length_header;
2770 int offset_bits = ds->offset_bits;
2771 int position_slot = ds->position_slot;
2772 int r0 = ds->r0, r1 = ds->r1, r2 = ds->r2;
2773 int state = ds->state;
2774 char block_type = ds->block_type;
2780 if (block_bytes_avail == 0) {
2781 /* This block ended. */
2782 ds->state = ST_RD_BLOCK_TYPE;
2784 ds->block_bytes_avail =
2786 ds->copy_len = copy_len;
2787 ds->copy_pos = copy_pos;
2788 ds->length_header = length_header;
2789 ds->position_slot = position_slot;
2790 ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
2792 strm->avail_out = endp - noutp;
2793 return (ARCHIVE_EOF);
2796 /* Output buffer is empty. */
2799 if (!lzx_br_read_ahead(strm, &bre,
2803 /* Remaining bits are less than
2804 * maximum bits(mt.max_bits) but maybe
2805 * it still remains as much as we need,
2806 * so we should try to use it with
2808 c = lzx_decode_huffman(mt,
2810 &bre, mt_max_bits));
2811 lzx_br_consume(&bre, mt_bitlen[c]);
2812 if (!lzx_br_has(&bre, 0))
2813 goto failed;/* Over read. */
2815 c = lzx_decode_huffman(mt,
2816 lzx_br_bits(&bre, mt_max_bits));
2817 lzx_br_consume(&bre, mt_bitlen[c]);
2822 * 'c' is exactly literal code.
2824 /* Save a decoded code to reference it
2827 w_pos = (w_pos + 1) & w_mask;
2828 /* Store the decoded code to output buffer. */
2830 block_bytes_avail--;
2833 * Get a match code, its length and offset.
2836 length_header = c & 7;
2837 position_slot = c >> 3;
2843 if (length_header == 7) {
2844 if (!lzx_br_read_ahead(strm, &bre,
2850 c = lzx_decode_huffman(lt,
2852 &bre, lt_max_bits));
2853 lzx_br_consume(&bre, lt_bitlen[c]);
2854 if (!lzx_br_has(&bre, 0))
2855 goto failed;/* Over read. */
2857 c = lzx_decode_huffman(lt,
2858 lzx_br_bits(&bre, lt_max_bits));
2859 lzx_br_consume(&bre, lt_bitlen[c]);
2861 copy_len = c + 7 + 2;
2863 copy_len = length_header + 2;
2864 if ((size_t)copy_len > block_bytes_avail)
2869 switch (position_slot) {
2870 case 0: /* Use repeated offset 0. */
2872 state = ST_REAL_POS;
2874 case 1: /* Use repeated offset 1. */
2876 /* Swap repeated offset. */
2879 state = ST_REAL_POS;
2881 case 2: /* Use repeated offset 2. */
2883 /* Swap repeated offset. */
2886 state = ST_REAL_POS;
2890 pos_tbl[position_slot].footer_bits;
2896 * Get the offset, which is a distance from
2897 * current window position.
2899 if (block_type == ALIGNED_OFFSET_BLOCK &&
2901 int offbits = offset_bits - 3;
2903 if (!lzx_br_read_ahead(strm, &bre, offbits)) {
2909 copy_pos = lzx_br_bits(&bre, offbits) << 3;
2911 /* Get an aligned number. */
2912 if (!lzx_br_read_ahead(strm, &bre,
2913 offbits + at_max_bits)) {
2918 lzx_br_consume(&bre, offbits);
2919 c = lzx_decode_huffman(at,
2920 lzx_br_bits_forced(&bre,
2922 lzx_br_consume(&bre, at_bitlen[c]);
2923 if (!lzx_br_has(&bre, 0))
2924 goto failed;/* Over read. */
2926 lzx_br_consume(&bre, offbits);
2927 c = lzx_decode_huffman(at,
2928 lzx_br_bits(&bre, at_max_bits));
2929 lzx_br_consume(&bre, at_bitlen[c]);
2931 /* Add an aligned number. */
2934 if (!lzx_br_read_ahead(strm, &bre,
2941 copy_pos = lzx_br_bits(&bre, offset_bits);
2942 lzx_br_consume(&bre, offset_bits);
2944 copy_pos += pos_tbl[position_slot].base -2;
2946 /* Update repeated offset LRU queue. */
2953 * Compute a real position in window.
2955 copy_pos = (w_pos - copy_pos) & w_mask;
2959 * Copy several bytes as extracted data from the window
2960 * into the output buffer.
2963 const unsigned char *s;
2967 if (copy_pos > w_pos) {
2968 if (l > w_size - copy_pos)
2969 l = w_size - copy_pos;
2971 if (l > w_size - w_pos)
2974 if (noutp + l >= endp)
2975 l = (int)(endp - noutp);
2976 s = w_buff + copy_pos;
2977 if (l >= 8 && ((copy_pos + l < w_pos)
2978 || (w_pos + l < copy_pos))) {
2979 memcpy(w_buff + w_pos, s, l);
2980 memcpy(noutp, s, l);
2986 for (li = 0; li < l; li++)
2987 noutp[li] = d[li] = s[li];
2990 copy_pos = (copy_pos + l) & w_mask;
2991 w_pos = (w_pos + l) & w_mask;
2992 block_bytes_avail -= l;
2994 /* A copy of current pattern ended. */
2997 if (noutp >= endp) {
2998 /* Output buffer is empty. */
3008 return (ds->error = ARCHIVE_FAILED);
3011 ds->block_bytes_avail = block_bytes_avail;
3012 ds->copy_len = copy_len;
3013 ds->copy_pos = copy_pos;
3014 ds->length_header = length_header;
3015 ds->offset_bits = offset_bits;
3016 ds->position_slot = position_slot;
3017 ds->r0 = r0; ds->r1 = r1; ds->r2 = r2;
3020 strm->avail_out = endp - noutp;
3021 return (ARCHIVE_OK);
3025 lzx_read_pre_tree(struct lzx_stream *strm)
3027 struct lzx_dec *ds = strm->ds;
3028 struct lzx_br *br = &(ds->br);
3032 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
3033 for (i = ds->loop; i < ds->pt.len_size; i++) {
3034 if (!lzx_br_read_ahead(strm, br, 4)) {
3038 ds->pt.bitlen[i] = lzx_br_bits(br, 4);
3039 ds->pt.freq[ds->pt.bitlen[i]]++;
3040 lzx_br_consume(br, 4);
3047 * Read a bunch of bit-lengths from pre-tree.
3050 lzx_read_bitlen(struct lzx_stream *strm, struct huffman *d, int end)
3052 struct lzx_dec *ds = strm->ds;
3053 struct lzx_br *br = &(ds->br);
3054 int c, i, j, ret, same;
3059 memset(d->freq, 0, sizeof(d->freq));
3065 if (!lzx_br_read_ahead(strm, br, ds->pt.max_bits))
3067 rbits = lzx_br_bits(br, ds->pt.max_bits);
3068 c = lzx_decode_huffman(&(ds->pt), rbits);
3070 case 17:/* several zero lengths, from 4 to 19. */
3071 if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+4))
3073 lzx_br_consume(br, ds->pt.bitlen[c]);
3074 same = lzx_br_bits(br, 4) + 4;
3076 return (-1);/* Invalid */
3077 lzx_br_consume(br, 4);
3078 for (j = 0; j < same; j++)
3081 case 18:/* many zero lengths, from 20 to 51. */
3082 if (!lzx_br_read_ahead(strm, br, ds->pt.bitlen[c]+5))
3084 lzx_br_consume(br, ds->pt.bitlen[c]);
3085 same = lzx_br_bits(br, 5) + 20;
3087 return (-1);/* Invalid */
3088 lzx_br_consume(br, 5);
3089 memset(d->bitlen + i, 0, same);
3092 case 19:/* a few same lengths. */
3093 if (!lzx_br_read_ahead(strm, br,
3094 ds->pt.bitlen[c]+1+ds->pt.max_bits))
3096 lzx_br_consume(br, ds->pt.bitlen[c]);
3097 same = lzx_br_bits(br, 1) + 4;
3100 lzx_br_consume(br, 1);
3101 rbits = lzx_br_bits(br, ds->pt.max_bits);
3102 c = lzx_decode_huffman(&(ds->pt), rbits);
3103 lzx_br_consume(br, ds->pt.bitlen[c]);
3104 c = (d->bitlen[i] - c + 17) % 17;
3106 return (-1);/* Invalid */
3107 for (j = 0; j < same; j++)
3112 lzx_br_consume(br, ds->pt.bitlen[c]);
3113 c = (d->bitlen[i] - c + 17) % 17;
3115 return (-1);/* Invalid */
3128 lzx_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
3132 if (hf->bitlen == NULL || hf->len_size != (int)len_size) {
3134 hf->bitlen = calloc(len_size, sizeof(hf->bitlen[0]));
3135 if (hf->bitlen == NULL)
3136 return (ARCHIVE_FATAL);
3137 hf->len_size = (int)len_size;
3139 memset(hf->bitlen, 0, len_size * sizeof(hf->bitlen[0]));
3140 if (hf->tbl == NULL) {
3141 if (tbl_bits < HTBL_BITS)
3145 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
3146 if (hf->tbl == NULL)
3147 return (ARCHIVE_FATAL);
3148 hf->tbl_bits = tbl_bits;
3150 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
3151 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
3152 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
3153 if (hf->tree == NULL)
3154 return (ARCHIVE_FATAL);
3156 return (ARCHIVE_OK);
3160 lzx_huffman_free(struct huffman *hf)
3168 * Make a huffman coding table.
3171 lzx_make_huffman_table(struct huffman *hf)
3174 const unsigned char *bitlen;
3175 int bitptn[17], weight[17];
3176 int i, maxbits = 0, ptn, tbl_size, w;
3177 int diffbits, len_avail;
3180 * Initialize bit patterns.
3183 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
3187 ptn += hf->freq[i] * w;
3191 if ((ptn & 0xffff) != 0 || maxbits > hf->tbl_bits)
3192 return (0);/* Invalid */
3194 hf->max_bits = maxbits;
3197 * Cut out extra bits which we won't house in the table.
3198 * This preparation reduces the same calculation in the for-loop
3202 int ebits = 16 - maxbits;
3203 for (i = 1; i <= maxbits; i++) {
3204 bitptn[i] >>= ebits;
3205 weight[i] >>= ebits;
3208 if (maxbits > HTBL_BITS) {
3212 diffbits = maxbits - HTBL_BITS;
3213 for (i = 1; i <= HTBL_BITS; i++) {
3214 bitptn[i] >>= diffbits;
3215 weight[i] >>= diffbits;
3217 htbl_max = bitptn[HTBL_BITS] +
3218 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
3219 p = &(hf->tbl[htbl_max]);
3220 while (p < &hf->tbl[1U<<HTBL_BITS])
3224 hf->shift_bits = diffbits;
3229 tbl_size = 1 << HTBL_BITS;
3231 bitlen = hf->bitlen;
3232 len_avail = hf->len_size;
3234 for (i = 0; i < len_avail; i++) {
3243 /* Get a bit pattern */
3247 if (len <= HTBL_BITS) {
3248 /* Calculate next bit pattern */
3249 if ((bitptn[len] = ptn + cnt) > tbl_size)
3250 return (0);/* Invalid */
3251 /* Update the table */
3254 p[cnt] = (uint16_t)i;
3259 * A bit length is too big to be housed to a direct table,
3260 * so we use a tree model for its extra bits.
3262 bitptn[len] = ptn + cnt;
3263 bit = 1U << (diffbits -1);
3264 extlen = len - HTBL_BITS;
3266 p = &(tbl[ptn >> diffbits]);
3268 *p = len_avail + hf->tree_used;
3269 ht = &(hf->tree[hf->tree_used++]);
3270 if (hf->tree_used > hf->tree_avail)
3271 return (0);/* Invalid */
3275 if (*p < len_avail ||
3276 *p >= (len_avail + hf->tree_used))
3277 return (0);/* Invalid */
3278 ht = &(hf->tree[*p - len_avail]);
3280 while (--extlen > 0) {
3282 if (ht->left < len_avail) {
3283 ht->left = len_avail + hf->tree_used;
3284 ht = &(hf->tree[hf->tree_used++]);
3285 if (hf->tree_used > hf->tree_avail)
3286 return (0);/* Invalid */
3290 ht = &(hf->tree[ht->left - len_avail]);
3293 if (ht->right < len_avail) {
3294 ht->right = len_avail + hf->tree_used;
3295 ht = &(hf->tree[hf->tree_used++]);
3296 if (hf->tree_used > hf->tree_avail)
3297 return (0);/* Invalid */
3301 ht = &(hf->tree[ht->right - len_avail]);
3308 return (0);/* Invalid */
3309 ht->left = (uint16_t)i;
3312 return (0);/* Invalid */
3313 ht->right = (uint16_t)i;
3320 lzx_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
3326 extlen = hf->shift_bits;
3327 while (c >= hf->len_size) {
3329 if (extlen-- <= 0 || c >= hf->tree_used)
3331 if (rbits & (1U << extlen))
3340 lzx_decode_huffman(struct huffman *hf, unsigned rbits)
3344 * At first search an index table for a bit pattern.
3345 * If it fails, search a huffman tree for.
3347 c = hf->tbl[rbits >> hf->shift_bits];
3348 if (c < hf->len_size)
3350 /* This bit pattern needs to be found out at a huffman tree. */
3351 return (lzx_decode_huffman_tree(hf, rbits, c));