2 * Copyright (c) 2008-2014 Michihiro NAKAJIMA
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 #include "archive_platform.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_endian.h"
49 #define MAXMATCH 256 /* Maximum match length. */
50 #define MINMATCH 3 /* Minimum match length. */
52 * Literal table format:
54 * +---------------+-------------------------+
55 * | literal code | match length |
56 * | 0 ... 255 | MINMATCH ... MAXMATCH |
57 * +---------------+-------------------------+
58 * <--- LT_BITLEN_SIZE --->
60 /* Literal table size. */
61 #define LT_BITLEN_SIZE (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62 /* Position table size.
63 * Note: this used for both position table and pre literal table.*/
64 #define PT_BITLEN_SIZE (3 + 16)
67 /* Decoding status. */
71 * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
76 /* Window buffer, which is a loop buffer. */
77 unsigned char *w_buff;
78 /* The insert position to the window. */
80 /* The position where we can copy decoded code from the window. */
82 /* The length how many bytes we can copy decoded code from
90 #define CACHE_TYPE uint64_t
91 #define CACHE_BITS (8 * sizeof(CACHE_TYPE))
93 CACHE_TYPE cache_buffer;
94 /* Indicates how many bits avail in cache_buffer. */
106 unsigned char *bitlen;
109 * Use a index table. It's faster than searching a huffman
110 * coding tree, which is a binary tree. But a use of a large
111 * index table causes L1 cache read miss many times.
119 /* Direct access table. */
121 /* Binary tree table for extra bits over the direct access. */
131 int literal_pt_len_size;
132 int literal_pt_len_bits;
133 int reading_position;
139 const unsigned char *next_in;
142 const unsigned char *ref_ptr;
149 /* entry_bytes_remaining is the number of bytes we expect. */
150 int64_t entry_offset;
151 int64_t entry_bytes_remaining;
152 int64_t entry_unconsumed;
153 uint16_t entry_crc_calculated;
155 size_t header_size; /* header size */
156 unsigned char level; /* header level */
157 char method[3]; /* compress type */
158 int64_t compsize; /* compressed data size */
159 int64_t origsize; /* original file size */
161 #define BIRTHTIME_IS_SET 1
162 #define ATIME_IS_SET 2
163 #define UNIX_MODE_IS_SET 4
166 long birthtime_tv_nsec;
174 struct archive_string uname;
175 struct archive_string gname;
178 struct archive_string_conv *sconv;
179 struct archive_string_conv *opt_sconv;
181 struct archive_string dirname;
182 struct archive_string filename;
183 struct archive_wstring ws;
185 unsigned char dos_attr;
187 /* Flag to mark progress that an archive was read their first header.*/
188 char found_first_header;
189 /* Flag to mark that indicates an empty directory. */
192 /* Flags to mark progress of decompression. */
193 char decompress_init;
195 char end_of_entry_cleanup;
196 char entry_is_compressed;
198 char format_name[64];
200 struct lzh_stream strm;
204 * LHA header common member offset.
206 #define H_METHOD_OFFSET 2 /* Compress type. */
207 #define H_ATTR_OFFSET 19 /* DOS attribute. */
208 #define H_LEVEL_OFFSET 20 /* Header Level. */
209 #define H_SIZE 22 /* Minimum header size. */
211 static int archive_read_format_lha_bid(struct archive_read *, int);
212 static int archive_read_format_lha_options(struct archive_read *,
213 const char *, const char *);
214 static int archive_read_format_lha_read_header(struct archive_read *,
215 struct archive_entry *);
216 static int archive_read_format_lha_read_data(struct archive_read *,
217 const void **, size_t *, int64_t *);
218 static int archive_read_format_lha_read_data_skip(struct archive_read *);
219 static int archive_read_format_lha_cleanup(struct archive_read *);
221 static void lha_replace_path_separator(struct lha *,
222 struct archive_entry *);
223 static int lha_read_file_header_0(struct archive_read *, struct lha *);
224 static int lha_read_file_header_1(struct archive_read *, struct lha *);
225 static int lha_read_file_header_2(struct archive_read *, struct lha *);
226 static int lha_read_file_header_3(struct archive_read *, struct lha *);
227 static int lha_read_file_extended_header(struct archive_read *,
228 struct lha *, uint16_t *, int, size_t, size_t *);
229 static size_t lha_check_header_format(const void *);
230 static int lha_skip_sfx(struct archive_read *);
231 static time_t lha_dos_time(const unsigned char *);
232 static time_t lha_win_time(uint64_t, long *);
233 static unsigned char lha_calcsum(unsigned char, const void *,
235 static int lha_parse_linkname(struct archive_string *,
236 struct archive_string *);
237 static int lha_read_data_none(struct archive_read *, const void **,
238 size_t *, int64_t *);
239 static int lha_read_data_lzh(struct archive_read *, const void **,
240 size_t *, int64_t *);
241 static void lha_crc16_init(void);
242 static uint16_t lha_crc16(uint16_t, const void *, size_t);
243 static int lzh_decode_init(struct lzh_stream *, const char *);
244 static void lzh_decode_free(struct lzh_stream *);
245 static int lzh_decode(struct lzh_stream *, int);
246 static int lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247 static int lzh_huffman_init(struct huffman *, size_t, int);
248 static void lzh_huffman_free(struct huffman *);
249 static int lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250 static int lzh_make_fake_table(struct huffman *, uint16_t);
251 static int lzh_make_huffman_table(struct huffman *);
252 static inline int lzh_decode_huffman(struct huffman *, unsigned);
253 static int lzh_decode_huffman_tree(struct huffman *, unsigned, int);
257 archive_read_support_format_lha(struct archive *_a)
259 struct archive_read *a = (struct archive_read *)_a;
263 archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264 ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
266 lha = (struct lha *)calloc(1, sizeof(*lha));
268 archive_set_error(&a->archive, ENOMEM,
269 "Can't allocate lha data");
270 return (ARCHIVE_FATAL);
272 archive_string_init(&lha->ws);
274 r = __archive_read_register_format(a,
277 archive_read_format_lha_bid,
278 archive_read_format_lha_options,
279 archive_read_format_lha_read_header,
280 archive_read_format_lha_read_data,
281 archive_read_format_lha_read_data_skip,
283 archive_read_format_lha_cleanup,
293 lha_check_header_format(const void *h)
295 const unsigned char *p = h;
296 size_t next_skip_bytes;
298 switch (p[H_METHOD_OFFSET+3]) {
300 * "-lh0-" ... "-lh7-" "-lhd-"
303 case '0': case '1': case '2': case '3':
304 case '4': case '5': case '6': case '7':
309 /* b0 == 0 means the end of an LHa archive file. */
312 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313 || p[H_METHOD_OFFSET+4] != '-')
316 if (p[H_METHOD_OFFSET+2] == 'h') {
318 if (p[H_METHOD_OFFSET+3] == 's')
320 if (p[H_LEVEL_OFFSET] == 0)
322 if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
325 if (p[H_METHOD_OFFSET+2] == 'z') {
326 /* LArc extensions: -lzs-,-lz4- and -lz5- */
327 if (p[H_LEVEL_OFFSET] != 0)
329 if (p[H_METHOD_OFFSET+3] == 's'
330 || p[H_METHOD_OFFSET+3] == '4'
331 || p[H_METHOD_OFFSET+3] == '5')
335 case 'h': next_skip_bytes = 1; break;
336 case 'z': next_skip_bytes = 1; break;
337 case 'l': next_skip_bytes = 2; break;
338 case '-': next_skip_bytes = 3; break;
339 default : next_skip_bytes = 4; break;
342 return (next_skip_bytes);
346 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
350 ssize_t bytes_avail, offset, window;
353 /* If there's already a better bid than we can ever
354 make, don't bother testing. */
358 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
361 if (lha_check_header_format(p) == 0)
364 if (p[0] == 'M' && p[1] == 'Z') {
368 while (offset < (1024 * 20)) {
369 buff = __archive_read_ahead(a, offset + window,
372 /* Remaining bytes are less than window. */
374 if (window < (H_SIZE + 3))
378 p = (const char *)buff + offset;
379 while (p + H_SIZE < (const char *)buff + bytes_avail) {
380 if ((next = lha_check_header_format(p)) == 0)
384 offset = p - (const char *)buff;
391 archive_read_format_lha_options(struct archive_read *a,
392 const char *key, const char *val)
395 int ret = ARCHIVE_FAILED;
397 lha = (struct lha *)(a->format->data);
398 if (strcmp(key, "hdrcharset") == 0) {
399 if (val == NULL || val[0] == 0)
400 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401 "lha: hdrcharset option needs a character-set name");
404 archive_string_conversion_from_charset(
405 &a->archive, val, 0);
406 if (lha->opt_sconv != NULL)
414 /* Note: The "warn" return is just to inform the options
415 * supervisor that we didn't handle it. It will generate
416 * a suitable error if no one used this option. */
417 return (ARCHIVE_WARN);
421 lha_skip_sfx(struct archive_read *a)
426 ssize_t bytes, window;
430 h = __archive_read_ahead(a, window, &bytes);
432 /* Remaining bytes are less than window. */
434 if (window < (H_SIZE + 3))
444 * Scan ahead until we find something that looks
445 * like the lha header.
447 while (p + H_SIZE < q) {
448 if ((next = lha_check_header_format(p)) == 0) {
449 skip = p - (const char *)h;
450 __archive_read_consume(a, skip);
455 skip = p - (const char *)h;
456 __archive_read_consume(a, skip);
459 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460 "Couldn't find out LHa header");
461 return (ARCHIVE_FATAL);
465 truncated_error(struct archive_read *a)
467 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468 "Truncated LHa header");
469 return (ARCHIVE_FATAL);
473 archive_read_format_lha_read_header(struct archive_read *a,
474 struct archive_entry *entry)
476 struct archive_string linkname;
477 struct archive_string pathname;
479 const unsigned char *p;
480 const char *signature;
485 a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486 if (a->archive.archive_format_name == NULL)
487 a->archive.archive_format_name = "lha";
489 lha = (struct lha *)(a->format->data);
490 lha->decompress_init = 0;
491 lha->end_of_entry = 0;
492 lha->end_of_entry_cleanup = 0;
493 lha->entry_unconsumed = 0;
495 if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
497 * LHa archiver added 0 to the tail of its archive file as
498 * the mark of the end of the archive.
500 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501 if (signature == NULL || signature[0] == 0)
502 return (ARCHIVE_EOF);
503 return (truncated_error(a));
506 signature = (const char *)p;
507 if (lha->found_first_header == 0 &&
508 signature[0] == 'M' && signature[1] == 'Z') {
509 /* This is an executable? Must be self-extracting... */
510 err = lha_skip_sfx(a);
511 if (err < ARCHIVE_WARN)
514 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515 return (truncated_error(a));
516 signature = (const char *)p;
518 /* signature[0] == 0 means the end of an LHa archive file. */
519 if (signature[0] == 0)
520 return (ARCHIVE_EOF);
523 * Check the header format and method type.
525 if (lha_check_header_format(p) != 0) {
526 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
528 return (ARCHIVE_FATAL);
531 /* We've found the first header. */
532 lha->found_first_header = 1;
533 /* Set a default value and common data */
534 lha->header_size = 0;
535 lha->level = p[H_LEVEL_OFFSET];
536 lha->method[0] = p[H_METHOD_OFFSET+1];
537 lha->method[1] = p[H_METHOD_OFFSET+2];
538 lha->method[2] = p[H_METHOD_OFFSET+3];
539 if (memcmp(lha->method, "lhd", 3) == 0)
543 if (memcmp(lha->method, "lh0", 3) == 0 ||
544 memcmp(lha->method, "lz4", 3) == 0)
545 lha->entry_is_compressed = 0;
547 lha->entry_is_compressed = 1;
553 lha->birthtime_tv_nsec = 0;
555 lha->mtime_tv_nsec = 0;
557 lha->atime_tv_nsec = 0;
558 lha->mode = (lha->directory)? 0777 : 0666;
561 archive_string_empty(&lha->dirname);
562 archive_string_empty(&lha->filename);
564 if (lha->opt_sconv != NULL)
565 lha->sconv = lha->opt_sconv;
569 switch (p[H_LEVEL_OFFSET]) {
571 err = lha_read_file_header_0(a, lha);
574 err = lha_read_file_header_1(a, lha);
577 err = lha_read_file_header_2(a, lha);
580 err = lha_read_file_header_3(a, lha);
583 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584 "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
588 if (err < ARCHIVE_WARN)
592 if (!lha->directory && archive_strlen(&lha->filename) == 0)
593 /* The filename has not been set */
594 return (truncated_error(a));
597 * Make a pathname from a dirname and a filename.
599 archive_string_concat(&lha->dirname, &lha->filename);
600 archive_string_init(&pathname);
601 archive_string_init(&linkname);
602 archive_string_copy(&pathname, &lha->dirname);
604 if ((lha->mode & AE_IFMT) == AE_IFLNK) {
606 * Extract the symlink-name if it's included in the pathname.
608 if (!lha_parse_linkname(&linkname, &pathname)) {
609 /* We couldn't get the symlink-name. */
610 archive_set_error(&a->archive,
611 ARCHIVE_ERRNO_FILE_FORMAT,
612 "Unknown symlink-name");
613 archive_string_free(&pathname);
614 archive_string_free(&linkname);
615 return (ARCHIVE_FAILED);
619 * Make sure a file-type is set.
620 * The mode has been overridden if it is in the extended data.
622 lha->mode = (lha->mode & ~AE_IFMT) |
623 ((lha->directory)? AE_IFDIR: AE_IFREG);
625 if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626 (lha->dos_attr & 1) != 0)
627 lha->mode &= ~(0222);/* read only. */
630 * Set basic file parameters.
632 if (archive_entry_copy_pathname_l(entry, pathname.s,
633 pathname.length, lha->sconv) != 0) {
634 if (errno == ENOMEM) {
635 archive_set_error(&a->archive, ENOMEM,
636 "Can't allocate memory for Pathname");
637 return (ARCHIVE_FATAL);
639 archive_set_error(&a->archive,
640 ARCHIVE_ERRNO_FILE_FORMAT,
641 "Pathname cannot be converted "
642 "from %s to current locale.",
643 archive_string_conversion_charset_name(lha->sconv));
646 archive_string_free(&pathname);
647 if (archive_strlen(&linkname) > 0) {
648 if (archive_entry_copy_symlink_l(entry, linkname.s,
649 linkname.length, lha->sconv) != 0) {
650 if (errno == ENOMEM) {
651 archive_set_error(&a->archive, ENOMEM,
652 "Can't allocate memory for Linkname");
653 return (ARCHIVE_FATAL);
655 archive_set_error(&a->archive,
656 ARCHIVE_ERRNO_FILE_FORMAT,
657 "Linkname cannot be converted "
658 "from %s to current locale.",
659 archive_string_conversion_charset_name(lha->sconv));
663 archive_entry_set_symlink(entry, NULL);
664 archive_string_free(&linkname);
666 * When a header level is 0, there is a possibility that
667 * a pathname and a symlink has '\' character, a directory
668 * separator in DOS/Windows. So we should convert it to '/'.
670 if (p[H_LEVEL_OFFSET] == 0)
671 lha_replace_path_separator(lha, entry);
673 archive_entry_set_mode(entry, lha->mode);
674 archive_entry_set_uid(entry, lha->uid);
675 archive_entry_set_gid(entry, lha->gid);
676 if (archive_strlen(&lha->uname) > 0)
677 archive_entry_set_uname(entry, lha->uname.s);
678 if (archive_strlen(&lha->gname) > 0)
679 archive_entry_set_gname(entry, lha->gname.s);
680 if (lha->setflag & BIRTHTIME_IS_SET) {
681 archive_entry_set_birthtime(entry, lha->birthtime,
682 lha->birthtime_tv_nsec);
683 archive_entry_set_ctime(entry, lha->birthtime,
684 lha->birthtime_tv_nsec);
686 archive_entry_unset_birthtime(entry);
687 archive_entry_unset_ctime(entry);
689 archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690 if (lha->setflag & ATIME_IS_SET)
691 archive_entry_set_atime(entry, lha->atime,
694 archive_entry_unset_atime(entry);
695 if (lha->directory || archive_entry_symlink(entry) != NULL)
696 archive_entry_unset_size(entry);
698 archive_entry_set_size(entry, lha->origsize);
701 * Prepare variables used to read a file content.
703 lha->entry_bytes_remaining = lha->compsize;
704 lha->entry_offset = 0;
705 lha->entry_crc_calculated = 0;
708 * This file does not have a content.
710 if (lha->directory || lha->compsize == 0)
711 lha->end_of_entry = 1;
713 sprintf(lha->format_name, "lha -%c%c%c-",
714 lha->method[0], lha->method[1], lha->method[2]);
715 a->archive.archive_format_name = lha->format_name;
721 * Replace a DOS path separator '\' by a character '/'.
722 * Some multi-byte character set have a character '\' in its second byte.
725 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
730 if ((wp = archive_entry_pathname_w(entry)) != NULL) {
731 archive_wstrcpy(&(lha->ws), wp);
732 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
733 if (lha->ws.s[i] == L'\\')
736 archive_entry_copy_pathname_w(entry, lha->ws.s);
739 if ((wp = archive_entry_symlink_w(entry)) != NULL) {
740 archive_wstrcpy(&(lha->ws), wp);
741 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
742 if (lha->ws.s[i] == L'\\')
745 archive_entry_copy_symlink_w(entry, lha->ws.s);
753 * +---------------+----------+----------------+-------------------+
754 * |header size(*1)|header sum|compression type|compressed size(*2)|
755 * +---------------+----------+----------------+-------------------+
756 * <---------------------(*1)----------*
758 * +11 +15 +17 +19 +20 +21
759 * +-----------------+---------+---------+--------------+----------------+
760 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
761 * +-----------------+---------+---------+--------------+----------------+
762 * *--------------------------------(*1)---------------------------------*
764 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+2+(*4)
765 * +---------------+---------+----------+----------------+------------------+
766 * |name length(*3)|file name|file CRC16|extra header(*4)| compressed data |
767 * +---------------+---------+----------+----------------+------------------+
768 * <--(*3)-> <------(*2)------>
769 * *----------------------(*1)-------------------------->
772 #define H0_HEADER_SIZE_OFFSET 0
773 #define H0_HEADER_SUM_OFFSET 1
774 #define H0_COMP_SIZE_OFFSET 7
775 #define H0_ORIG_SIZE_OFFSET 11
776 #define H0_DOS_TIME_OFFSET 15
777 #define H0_NAME_LEN_OFFSET 21
778 #define H0_FILE_NAME_OFFSET 22
779 #define H0_FIXED_SIZE 24
781 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
783 const unsigned char *p;
784 int extdsize, namelen;
785 unsigned char headersum, sum_calculated;
787 if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
788 return (truncated_error(a));
789 lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
790 headersum = p[H0_HEADER_SUM_OFFSET];
791 lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
792 lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
793 lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
794 namelen = p[H0_NAME_LEN_OFFSET];
795 extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
796 if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
797 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
798 "Invalid LHa header");
799 return (ARCHIVE_FATAL);
801 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
802 return (truncated_error(a));
804 archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
805 /* When extdsize == -2, A CRC16 value is not present in the header. */
807 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
808 lha->setflag |= CRC_IS_SET;
810 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
812 /* Read an extended header */
814 /* This extended data is set by 'LHa for UNIX' only.
817 p += H0_FILE_NAME_OFFSET + namelen + 2;
818 if (p[0] == 'U' && extdsize == 12) {
819 /* p[1] is a minor version. */
820 lha->mtime = archive_le32dec(&p[2]);
821 lha->mode = archive_le16dec(&p[6]);
822 lha->uid = archive_le16dec(&p[8]);
823 lha->gid = archive_le16dec(&p[10]);
824 lha->setflag |= UNIX_MODE_IS_SET;
827 __archive_read_consume(a, lha->header_size);
829 if (sum_calculated != headersum) {
830 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
831 "LHa header sum error");
832 return (ARCHIVE_FATAL);
842 * +---------------+----------+----------------+-------------+
843 * |header size(*1)|header sum|compression type|skip size(*2)|
844 * +---------------+----------+----------------+-------------+
845 * <---------------(*1)----------*
847 * +11 +15 +17 +19 +20 +21
848 * +-----------------+---------+---------+--------------+----------------+
849 * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
850 * +-----------------+---------+---------+--------------+----------------+
851 * *-------------------------------(*1)----------------------------------*
853 * +21 +22 +22+(*3) +22+(*3)+2 +22+(*3)+3 +22+(*3)+3+(*4)
854 * +---------------+---------+----------+-----------+-----------+
855 * |name length(*3)|file name|file CRC16| creator |padding(*4)|
856 * +---------------+---------+----------+-----------+-----------+
858 * *----------------------------(*1)----------------------------*
860 * +22+(*3)+3+(*4) +22+(*3)+3+(*4)+2 +22+(*3)+3+(*4)+2+(*5)
861 * +----------------+---------------------+------------------------+
862 * |next header size| extended header(*5) | compressed data |
863 * +----------------+---------------------+------------------------+
864 * *------(*1)-----> <--------------------(*2)-------------------->
866 #define H1_HEADER_SIZE_OFFSET 0
867 #define H1_HEADER_SUM_OFFSET 1
868 #define H1_COMP_SIZE_OFFSET 7
869 #define H1_ORIG_SIZE_OFFSET 11
870 #define H1_DOS_TIME_OFFSET 15
871 #define H1_NAME_LEN_OFFSET 21
872 #define H1_FILE_NAME_OFFSET 22
873 #define H1_FIXED_SIZE 27
875 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
877 const unsigned char *p;
880 int namelen, padding;
881 unsigned char headersum, sum_calculated;
885 if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
886 return (truncated_error(a));
888 lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
889 headersum = p[H1_HEADER_SUM_OFFSET];
890 /* Note: An extended header size is included in a compsize. */
891 lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
892 lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
893 lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
894 namelen = p[H1_NAME_LEN_OFFSET];
895 /* Calculate a padding size. The result will be normally 0 only(?) */
896 padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
898 if (namelen > 230 || padding < 0)
901 if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
902 return (truncated_error(a));
904 for (i = 0; i < namelen; i++) {
905 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
906 goto invalid;/* Invalid filename. */
908 archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
909 lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
910 lha->setflag |= CRC_IS_SET;
912 sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
913 /* Consume used bytes but not include `next header size' data
914 * since it will be consumed in lha_read_file_extended_header(). */
915 __archive_read_consume(a, lha->header_size - 2);
917 /* Read extended headers */
918 err2 = lha_read_file_extended_header(a, lha, NULL, 2,
919 (size_t)(lha->compsize + 2), &extdsize);
920 if (err2 < ARCHIVE_WARN)
924 /* Get a real compressed file size. */
925 lha->compsize -= extdsize - 2;
927 if (sum_calculated != headersum) {
928 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
929 "LHa header sum error");
930 return (ARCHIVE_FATAL);
934 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
935 "Invalid LHa header");
936 return (ARCHIVE_FATAL);
943 * +---------------+----------------+-------------------+-----------------+
944 * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
945 * +---------------+----------------+-------------------+-----------------+
946 * <--------------------------------(*1)---------------------------------*
948 * +15 +19 +20 +21 +23 +24
949 * +-----------------+------------+----------------+----------+-----------+
950 * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16| creator |
951 * +-----------------+------------+----------------+----------+-----------+
952 * *---------------------------------(*1)---------------------------------*
954 * +24 +26 +26+(*3) +26+(*3)+(*4)
955 * +----------------+-------------------+-------------+-------------------+
956 * |next header size|extended header(*3)| padding(*4) | compressed data |
957 * +----------------+-------------------+-------------+-------------------+
958 * *--------------------------(*1)-------------------> <------(*2)------->
961 #define H2_HEADER_SIZE_OFFSET 0
962 #define H2_COMP_SIZE_OFFSET 7
963 #define H2_ORIG_SIZE_OFFSET 11
964 #define H2_TIME_OFFSET 15
965 #define H2_CRC_OFFSET 21
966 #define H2_FIXED_SIZE 24
968 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
970 const unsigned char *p;
975 if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
976 return (truncated_error(a));
978 lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
979 lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
980 lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
981 lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
982 lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
983 lha->setflag |= CRC_IS_SET;
985 if (lha->header_size < H2_FIXED_SIZE) {
986 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
987 "Invalid LHa header size");
988 return (ARCHIVE_FATAL);
991 header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
992 __archive_read_consume(a, H2_FIXED_SIZE);
994 /* Read extended headers */
995 err = lha_read_file_extended_header(a, lha, &header_crc, 2,
996 lha->header_size - H2_FIXED_SIZE, &extdsize);
997 if (err < ARCHIVE_WARN)
1000 /* Calculate a padding size. The result will be normally 0 or 1. */
1001 padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1003 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1004 return (truncated_error(a));
1005 header_crc = lha_crc16(header_crc, p, padding);
1006 __archive_read_consume(a, padding);
1009 if (header_crc != lha->header_crc) {
1010 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1011 "LHa header CRC error");
1012 return (ARCHIVE_FATAL);
1021 * +------------+----------------+-------------------+-----------------+
1022 * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1023 * +------------+----------------+-------------------+-----------------+
1024 * <-------------------------------(*1)-------------------------------*
1026 * +15 +19 +20 +21 +23 +24
1027 * +-----------------+------------+----------------+----------+-----------+
1028 * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16| creator |
1029 * +-----------------+------------+----------------+----------+-----------+
1030 * *--------------------------------(*1)----------------------------------*
1032 * +24 +28 +32 +32+(*3)
1033 * +---------------+----------------+-------------------+-----------------+
1034 * |header size(*1)|next header size|extended header(*3)| compressed data |
1035 * +---------------+----------------+-------------------+-----------------+
1036 * *------------------------(*1)-----------------------> <------(*2)----->
1039 #define H3_FIELD_LEN_OFFSET 0
1040 #define H3_COMP_SIZE_OFFSET 7
1041 #define H3_ORIG_SIZE_OFFSET 11
1042 #define H3_TIME_OFFSET 15
1043 #define H3_CRC_OFFSET 21
1044 #define H3_HEADER_SIZE_OFFSET 24
1045 #define H3_FIXED_SIZE 28
1047 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1049 const unsigned char *p;
1052 uint16_t header_crc;
1054 if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1055 return (truncated_error(a));
1057 if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1059 lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1060 lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1061 lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1062 lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1063 lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1064 lha->setflag |= CRC_IS_SET;
1066 if (lha->header_size < H3_FIXED_SIZE + 4)
1068 header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1069 __archive_read_consume(a, H3_FIXED_SIZE);
1071 /* Read extended headers */
1072 err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1073 lha->header_size - H3_FIXED_SIZE, &extdsize);
1074 if (err < ARCHIVE_WARN)
1077 if (header_crc != lha->header_crc) {
1078 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1079 "LHa header CRC error");
1080 return (ARCHIVE_FATAL);
1084 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1085 "Invalid LHa header");
1086 return (ARCHIVE_FATAL);
1090 * Extended header format
1092 * +0 +2 +3 -- used in header 1 and 2
1093 * +0 +4 +5 -- used in header 3
1094 * +--------------+---------+-------------------+--------------+--
1095 * |ex-header size|header id| data |ex-header size| .......
1096 * +--------------+---------+-------------------+--------------+--
1097 * <-------------( ex-header size)------------> <-- next extended header --*
1099 * If the ex-header size is zero, it is the make of the end of extended
1104 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1105 uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1108 const unsigned char *extdheader;
1112 unsigned char extdtype;
1114 #define EXT_HEADER_CRC 0x00 /* Header CRC and information*/
1115 #define EXT_FILENAME 0x01 /* Filename */
1116 #define EXT_DIRECTORY 0x02 /* Directory name */
1117 #define EXT_DOS_ATTR 0x40 /* MS-DOS attribute */
1118 #define EXT_TIMESTAMP 0x41 /* Windows time stamp */
1119 #define EXT_FILESIZE 0x42 /* Large file size */
1120 #define EXT_TIMEZONE 0x43 /* Time zone */
1121 #define EXT_UTF16_FILENAME 0x44 /* UTF-16 filename */
1122 #define EXT_UTF16_DIRECTORY 0x45 /* UTF-16 directory name */
1123 #define EXT_CODEPAGE 0x46 /* Codepage */
1124 #define EXT_UNIX_MODE 0x50 /* File permission */
1125 #define EXT_UNIX_GID_UID 0x51 /* gid,uid */
1126 #define EXT_UNIX_GNAME 0x52 /* Group name */
1127 #define EXT_UNIX_UNAME 0x53 /* User name */
1128 #define EXT_UNIX_MTIME 0x54 /* Modified time */
1129 #define EXT_OS2_NEW_ATTR 0x7f /* new attribute(OS/2 only) */
1130 #define EXT_NEW_ATTR 0xff /* new attribute */
1132 *total_size = sizefield_length;
1135 /* Read an extended header size. */
1137 __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1138 return (truncated_error(a));
1139 /* Check if the size is the zero indicates the end of the
1140 * extended header. */
1141 if (sizefield_length == sizeof(uint16_t))
1142 extdsize = archive_le16dec(h);
1144 extdsize = archive_le32dec(h);
1145 if (extdsize == 0) {
1146 /* End of extended header */
1148 *crc = lha_crc16(*crc, h, sizefield_length);
1149 __archive_read_consume(a, sizefield_length);
1150 return (ARCHIVE_OK);
1153 /* Sanity check to the extended header size. */
1154 if (((uint64_t)*total_size + extdsize) >
1155 (uint64_t)limitsize ||
1156 extdsize <= (size_t)sizefield_length)
1159 /* Read the extended header. */
1160 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1161 return (truncated_error(a));
1162 *total_size += extdsize;
1164 extdheader = (const unsigned char *)h;
1165 /* Get the extended header type. */
1166 extdtype = extdheader[sizefield_length];
1167 /* Calculate an extended data size. */
1168 datasize = extdsize - (1 + sizefield_length);
1169 /* Skip an extended header size field and type field. */
1170 extdheader += sizefield_length + 1;
1172 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1173 *crc = lha_crc16(*crc, h, extdsize);
1175 case EXT_HEADER_CRC:
1176 /* We only use a header CRC. Following data will not
1178 if (datasize >= 2) {
1179 lha->header_crc = archive_le16dec(extdheader);
1181 static const char zeros[2] = {0, 0};
1182 *crc = lha_crc16(*crc, h,
1183 extdsize - datasize);
1184 /* CRC value itself as zero */
1185 *crc = lha_crc16(*crc, zeros, 2);
1186 *crc = lha_crc16(*crc,
1187 extdheader+2, datasize - 2);
1192 if (datasize == 0) {
1193 /* maybe directory header */
1194 archive_string_empty(&lha->filename);
1197 if (extdheader[0] == '\0')
1199 archive_strncpy(&lha->filename,
1200 (const char *)extdheader, datasize);
1203 if (datasize == 0 || extdheader[0] == '\0')
1204 /* no directory name data. exit this case. */
1207 archive_strncpy(&lha->dirname,
1208 (const char *)extdheader, datasize);
1210 * Convert directory delimiter from 0xFF
1211 * to '/' for local system.
1213 for (i = 0; i < lha->dirname.length; i++) {
1214 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1215 lha->dirname.s[i] = '/';
1217 /* Is last character directory separator? */
1218 if (lha->dirname.s[lha->dirname.length-1] != '/')
1219 /* invalid directory data */
1224 lha->dos_attr = (unsigned char)
1225 (archive_le16dec(extdheader) & 0xff);
1228 if (datasize == (sizeof(uint64_t) * 3)) {
1229 lha->birthtime = lha_win_time(
1230 archive_le64dec(extdheader),
1231 &lha->birthtime_tv_nsec);
1232 extdheader += sizeof(uint64_t);
1233 lha->mtime = lha_win_time(
1234 archive_le64dec(extdheader),
1235 &lha->mtime_tv_nsec);
1236 extdheader += sizeof(uint64_t);
1237 lha->atime = lha_win_time(
1238 archive_le64dec(extdheader),
1239 &lha->atime_tv_nsec);
1240 lha->setflag |= BIRTHTIME_IS_SET |
1245 if (datasize == sizeof(uint64_t) * 2) {
1246 lha->compsize = archive_le64dec(extdheader);
1247 extdheader += sizeof(uint64_t);
1248 lha->origsize = archive_le64dec(extdheader);
1252 /* Get an archived filename charset from codepage.
1253 * This overwrites the charset specified by
1254 * hdrcharset option. */
1255 if (datasize == sizeof(uint32_t)) {
1256 struct archive_string cp;
1257 const char *charset;
1259 archive_string_init(&cp);
1260 switch (archive_le32dec(extdheader)) {
1261 case 65001: /* UTF-8 */
1265 archive_string_sprintf(&cp, "CP%d",
1266 (int)archive_le32dec(extdheader));
1271 archive_string_conversion_from_charset(
1272 &(a->archive), charset, 1);
1273 archive_string_free(&cp);
1274 if (lha->sconv == NULL)
1275 return (ARCHIVE_FATAL);
1279 if (datasize == sizeof(uint16_t)) {
1280 lha->mode = archive_le16dec(extdheader);
1281 lha->setflag |= UNIX_MODE_IS_SET;
1284 case EXT_UNIX_GID_UID:
1285 if (datasize == (sizeof(uint16_t) * 2)) {
1286 lha->gid = archive_le16dec(extdheader);
1287 lha->uid = archive_le16dec(extdheader+2);
1290 case EXT_UNIX_GNAME:
1292 archive_strncpy(&lha->gname,
1293 (const char *)extdheader, datasize);
1295 case EXT_UNIX_UNAME:
1297 archive_strncpy(&lha->uname,
1298 (const char *)extdheader, datasize);
1300 case EXT_UNIX_MTIME:
1301 if (datasize == sizeof(uint32_t))
1302 lha->mtime = archive_le32dec(extdheader);
1304 case EXT_OS2_NEW_ATTR:
1305 /* This extended header is OS/2 depend. */
1306 if (datasize == 16) {
1307 lha->dos_attr = (unsigned char)
1308 (archive_le16dec(extdheader) & 0xff);
1309 lha->mode = archive_le16dec(extdheader+2);
1310 lha->gid = archive_le16dec(extdheader+4);
1311 lha->uid = archive_le16dec(extdheader+6);
1312 lha->birthtime = archive_le32dec(extdheader+8);
1313 lha->atime = archive_le32dec(extdheader+12);
1314 lha->setflag |= UNIX_MODE_IS_SET
1315 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1319 if (datasize == 20) {
1320 lha->mode = (mode_t)archive_le32dec(extdheader);
1321 lha->gid = archive_le32dec(extdheader+4);
1322 lha->uid = archive_le32dec(extdheader+8);
1323 lha->birthtime = archive_le32dec(extdheader+12);
1324 lha->atime = archive_le32dec(extdheader+16);
1325 lha->setflag |= UNIX_MODE_IS_SET
1326 | BIRTHTIME_IS_SET | ATIME_IS_SET;
1329 case EXT_TIMEZONE: /* Not supported */
1330 case EXT_UTF16_FILENAME: /* Not supported */
1331 case EXT_UTF16_DIRECTORY: /* Not supported */
1336 __archive_read_consume(a, extdsize);
1339 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1340 "Invalid extended LHa header");
1341 return (ARCHIVE_FATAL);
1345 lha_end_of_entry(struct archive_read *a)
1347 struct lha *lha = (struct lha *)(a->format->data);
1348 int r = ARCHIVE_EOF;
1350 if (!lha->end_of_entry_cleanup) {
1351 if ((lha->setflag & CRC_IS_SET) &&
1352 lha->crc != lha->entry_crc_calculated) {
1353 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1354 "LHa data CRC error");
1358 /* End-of-entry cleanup done. */
1359 lha->end_of_entry_cleanup = 1;
1365 archive_read_format_lha_read_data(struct archive_read *a,
1366 const void **buff, size_t *size, int64_t *offset)
1368 struct lha *lha = (struct lha *)(a->format->data);
1371 if (lha->entry_unconsumed) {
1372 /* Consume as much as the decompressor actually used. */
1373 __archive_read_consume(a, lha->entry_unconsumed);
1374 lha->entry_unconsumed = 0;
1376 if (lha->end_of_entry) {
1377 *offset = lha->entry_offset;
1380 return (lha_end_of_entry(a));
1383 if (lha->entry_is_compressed)
1384 r = lha_read_data_lzh(a, buff, size, offset);
1386 /* No compression. */
1387 r = lha_read_data_none(a, buff, size, offset);
1392 * Read a file content in no compression.
1394 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1395 * lha->end_of_entry if it consumes all of the data.
1398 lha_read_data_none(struct archive_read *a, const void **buff,
1399 size_t *size, int64_t *offset)
1401 struct lha *lha = (struct lha *)(a->format->data);
1402 ssize_t bytes_avail;
1404 if (lha->entry_bytes_remaining == 0) {
1407 *offset = lha->entry_offset;
1408 lha->end_of_entry = 1;
1409 return (ARCHIVE_OK);
1412 * Note: '1' here is a performance optimization.
1413 * Recall that the decompression layer returns a count of
1414 * available bytes; asking for more than that forces the
1415 * decompressor to combine reads by copying data.
1417 *buff = __archive_read_ahead(a, 1, &bytes_avail);
1418 if (bytes_avail <= 0) {
1419 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1420 "Truncated LHa file data");
1421 return (ARCHIVE_FATAL);
1423 if (bytes_avail > lha->entry_bytes_remaining)
1424 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1425 lha->entry_crc_calculated =
1426 lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1427 *size = bytes_avail;
1428 *offset = lha->entry_offset;
1429 lha->entry_offset += bytes_avail;
1430 lha->entry_bytes_remaining -= bytes_avail;
1431 if (lha->entry_bytes_remaining == 0)
1432 lha->end_of_entry = 1;
1433 lha->entry_unconsumed = bytes_avail;
1434 return (ARCHIVE_OK);
1438 * Read a file content in LZHUFF encoding.
1440 * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1441 * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1445 lha_read_data_lzh(struct archive_read *a, const void **buff,
1446 size_t *size, int64_t *offset)
1448 struct lha *lha = (struct lha *)(a->format->data);
1449 ssize_t bytes_avail;
1452 /* If we haven't yet read any data, initialize the decompressor. */
1453 if (!lha->decompress_init) {
1454 r = lzh_decode_init(&(lha->strm), lha->method);
1458 case ARCHIVE_FAILED:
1459 /* Unsupported compression. */
1463 archive_set_error(&a->archive,
1464 ARCHIVE_ERRNO_FILE_FORMAT,
1465 "Unsupported lzh compression method -%c%c%c-",
1466 lha->method[0], lha->method[1], lha->method[2]);
1467 /* We know compressed size; just skip it. */
1468 archive_read_format_lha_read_data_skip(a);
1469 return (ARCHIVE_WARN);
1471 archive_set_error(&a->archive, ENOMEM,
1472 "Couldn't allocate memory "
1473 "for lzh decompression");
1474 return (ARCHIVE_FATAL);
1476 /* We've initialized decompression for this stream. */
1477 lha->decompress_init = 1;
1478 lha->strm.avail_out = 0;
1479 lha->strm.total_out = 0;
1483 * Note: '1' here is a performance optimization.
1484 * Recall that the decompression layer returns a count of
1485 * available bytes; asking for more than that forces the
1486 * decompressor to combine reads by copying data.
1488 lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1489 if (bytes_avail <= 0) {
1490 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1491 "Truncated LHa file body");
1492 return (ARCHIVE_FATAL);
1494 if (bytes_avail > lha->entry_bytes_remaining)
1495 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1497 lha->strm.avail_in = (int)bytes_avail;
1498 lha->strm.total_in = 0;
1499 lha->strm.avail_out = 0;
1501 r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1506 lha->end_of_entry = 1;
1509 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1511 return (ARCHIVE_FAILED);
1513 lha->entry_unconsumed = lha->strm.total_in;
1514 lha->entry_bytes_remaining -= lha->strm.total_in;
1516 if (lha->strm.avail_out) {
1517 *offset = lha->entry_offset;
1518 *size = lha->strm.avail_out;
1519 *buff = lha->strm.ref_ptr;
1520 lha->entry_crc_calculated =
1521 lha_crc16(lha->entry_crc_calculated, *buff, *size);
1522 lha->entry_offset += *size;
1524 *offset = lha->entry_offset;
1527 if (lha->end_of_entry)
1528 return (lha_end_of_entry(a));
1530 return (ARCHIVE_OK);
1534 * Skip a file content.
1537 archive_read_format_lha_read_data_skip(struct archive_read *a)
1540 int64_t bytes_skipped;
1542 lha = (struct lha *)(a->format->data);
1544 if (lha->entry_unconsumed) {
1545 /* Consume as much as the decompressor actually used. */
1546 __archive_read_consume(a, lha->entry_unconsumed);
1547 lha->entry_unconsumed = 0;
1550 /* if we've already read to end of data, we're done. */
1551 if (lha->end_of_entry_cleanup)
1552 return (ARCHIVE_OK);
1555 * If the length is at the beginning, we can skip the
1556 * compressed data much more quickly.
1558 bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1559 if (bytes_skipped < 0)
1560 return (ARCHIVE_FATAL);
1562 /* This entry is finished and done. */
1563 lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1564 return (ARCHIVE_OK);
1568 archive_read_format_lha_cleanup(struct archive_read *a)
1570 struct lha *lha = (struct lha *)(a->format->data);
1572 lzh_decode_free(&(lha->strm));
1573 archive_string_free(&(lha->dirname));
1574 archive_string_free(&(lha->filename));
1575 archive_string_free(&(lha->uname));
1576 archive_string_free(&(lha->gname));
1577 archive_wstring_free(&(lha->ws));
1579 (a->format->data) = NULL;
1580 return (ARCHIVE_OK);
1584 * 'LHa for UNIX' utility has archived a symbolic-link name after
1585 * a pathname with '|' character.
1586 * This function extracts the symbolic-link name from the pathname.
1589 * 1. a symbolic-name is 'aaa/bb/cc'
1590 * 2. a filename is 'xxx/bbb'
1591 * then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1594 lha_parse_linkname(struct archive_string *linkname,
1595 struct archive_string *pathname)
1600 linkptr = strchr(pathname->s, '|');
1601 if (linkptr != NULL) {
1602 symlen = strlen(linkptr + 1);
1603 archive_strncpy(linkname, linkptr+1, symlen);
1606 pathname->length = strlen(pathname->s);
1613 /* Convert an MSDOS-style date/time into Unix-style time. */
1615 lha_dos_time(const unsigned char *p)
1620 msTime = archive_le16dec(p);
1621 msDate = archive_le16dec(p+2);
1623 memset(&ts, 0, sizeof(ts));
1624 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
1625 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
1626 ts.tm_mday = msDate & 0x1f; /* Day of month. */
1627 ts.tm_hour = (msTime >> 11) & 0x1f;
1628 ts.tm_min = (msTime >> 5) & 0x3f;
1629 ts.tm_sec = (msTime << 1) & 0x3e;
1631 return (mktime(&ts));
1634 /* Convert an MS-Windows-style date/time into Unix-style time. */
1636 lha_win_time(uint64_t wintime, long *ns)
1638 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1640 if (wintime >= EPOC_TIME) {
1641 wintime -= EPOC_TIME; /* 1970-01-01 00:00:00 (UTC) */
1643 *ns = (long)(wintime % 10000000) * 100;
1644 return (wintime / 10000000);
1652 static unsigned char
1653 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1655 unsigned char const *p = (unsigned char const *)pp;
1658 for (;size > 0; --size)
1663 static uint16_t crc16tbl[2][256];
1665 lha_crc16_init(void)
1668 static int crc16init = 0;
1674 for (i = 0; i < 256; i++) {
1676 uint16_t crc = (uint16_t)i;
1678 crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1679 crc16tbl[0][i] = crc;
1682 for (i = 0; i < 256; i++) {
1683 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1684 ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1689 lha_crc16(uint16_t crc, const void *pp, size_t len)
1691 const unsigned char *p = (const unsigned char *)pp;
1692 const uint16_t *buff;
1696 } u = { 0x01020304 };
1701 /* Process unaligned address. */
1702 if (((uintptr_t)p) & (uintptr_t)0x1) {
1703 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1706 buff = (const uint16_t *)p;
1708 * Modern C compiler such as GCC does not unroll automatically yet
1709 * without unrolling pragma, and Clang is so. So we should
1710 * unroll this loop for its performance.
1712 for (;len >= 8; len -= 8) {
1713 /* This if statement expects compiler optimization will
1714 * remove the statement which will not be executed. */
1716 #if defined(_MSC_VER) && _MSC_VER >= 1400 /* Visual Studio */
1717 # define bswap16(x) _byteswap_ushort(x)
1718 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1719 /* GCC 4.8 and later has __builtin_bswap16() */
1720 # define bswap16(x) __builtin_bswap16(x)
1721 #elif defined(__clang__)
1722 /* All clang versions have __builtin_bswap16() */
1723 # define bswap16(x) __builtin_bswap16(x)
1725 # define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1727 #define CRC16W do { \
1728 if(u.c[0] == 1) { /* Big endian */ \
1729 crc ^= bswap16(*buff); buff++; \
1732 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1742 p = (const unsigned char *)buff;
1744 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1750 * Initialize LZHUF decoder.
1752 * Returns ARCHIVE_OK if initialization was successful.
1753 * Returns ARCHIVE_FAILED if method is unsupported.
1754 * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1758 lzh_decode_init(struct lzh_stream *strm, const char *method)
1763 if (strm->ds == NULL) {
1764 strm->ds = calloc(1, sizeof(*strm->ds));
1765 if (strm->ds == NULL)
1766 return (ARCHIVE_FATAL);
1769 ds->error = ARCHIVE_FAILED;
1770 if (method == NULL || method[0] != 'l' || method[1] != 'h')
1771 return (ARCHIVE_FAILED);
1772 switch (method[2]) {
1774 w_bits = 13;/* 8KiB for window */
1777 w_bits = 15;/* 32KiB for window */
1780 w_bits = 16;/* 64KiB for window */
1783 return (ARCHIVE_FAILED);/* Not supported. */
1785 ds->error = ARCHIVE_FATAL;
1786 /* Expand a window size up to 128 KiB for decompressing process
1787 * performance whatever its original window size is. */
1788 ds->w_size = 1U << 17;
1789 ds->w_mask = ds->w_size -1;
1790 if (ds->w_buff == NULL) {
1791 ds->w_buff = malloc(ds->w_size);
1792 if (ds->w_buff == NULL)
1793 return (ARCHIVE_FATAL);
1795 w_size = 1U << w_bits;
1796 memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1799 ds->pos_pt_len_size = w_bits + 1;
1800 ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1801 ds->literal_pt_len_size = PT_BITLEN_SIZE;
1802 ds->literal_pt_len_bits = 5;
1803 ds->br.cache_buffer = 0;
1804 ds->br.cache_avail = 0;
1806 if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1808 return (ARCHIVE_FATAL);
1809 ds->lt.len_bits = 9;
1810 if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1812 return (ARCHIVE_FATAL);
1815 return (ARCHIVE_OK);
1819 * Release LZHUF decoder.
1822 lzh_decode_free(struct lzh_stream *strm)
1825 if (strm->ds == NULL)
1827 free(strm->ds->w_buff);
1828 lzh_huffman_free(&(strm->ds->lt));
1829 lzh_huffman_free(&(strm->ds->pt));
1835 * Bit stream reader.
1837 /* Check that the cache buffer has enough bits. */
1838 #define lzh_br_has(br, n) ((br)->cache_avail >= n)
1839 /* Get compressed data by bit. */
1840 #define lzh_br_bits(br, n) \
1841 (((uint16_t)((br)->cache_buffer >> \
1842 ((br)->cache_avail - (n)))) & cache_masks[n])
1843 #define lzh_br_bits_forced(br, n) \
1844 (((uint16_t)((br)->cache_buffer << \
1845 ((n) - (br)->cache_avail))) & cache_masks[n])
1846 /* Read ahead to make sure the cache buffer has enough compressed data we
1848 * True : completed, there is enough data in the cache buffer.
1849 * False : we met that strm->next_in is empty, we have to get following
1851 #define lzh_br_read_ahead_0(strm, br, n) \
1852 (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1853 /* True : the cache buffer has some bits as much as we need.
1854 * False : there are no enough bits in the cache buffer to be used,
1855 * we have to get following bytes if we could. */
1856 #define lzh_br_read_ahead(strm, br, n) \
1857 (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1859 /* Notify how many bits we consumed. */
1860 #define lzh_br_consume(br, n) ((br)->cache_avail -= (n))
1861 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1863 static const uint16_t cache_masks[] = {
1864 0x0000, 0x0001, 0x0003, 0x0007,
1865 0x000F, 0x001F, 0x003F, 0x007F,
1866 0x00FF, 0x01FF, 0x03FF, 0x07FF,
1867 0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1868 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1872 * Shift away used bits in the cache data and fill it up with following bits.
1873 * Call this when cache buffer does not have enough bits you need.
1875 * Returns 1 if the cache buffer is full.
1876 * Returns 0 if the cache buffer is not full; input buffer is empty.
1879 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1881 int n = CACHE_BITS - br->cache_avail;
1884 const int x = n >> 3;
1885 if (strm->avail_in >= x) {
1889 ((uint64_t)strm->next_in[0]) << 56 |
1890 ((uint64_t)strm->next_in[1]) << 48 |
1891 ((uint64_t)strm->next_in[2]) << 40 |
1892 ((uint64_t)strm->next_in[3]) << 32 |
1893 ((uint32_t)strm->next_in[4]) << 24 |
1894 ((uint32_t)strm->next_in[5]) << 16 |
1895 ((uint32_t)strm->next_in[6]) << 8 |
1896 (uint32_t)strm->next_in[7];
1898 strm->avail_in -= 8;
1899 br->cache_avail += 8 * 8;
1903 (br->cache_buffer << 56) |
1904 ((uint64_t)strm->next_in[0]) << 48 |
1905 ((uint64_t)strm->next_in[1]) << 40 |
1906 ((uint64_t)strm->next_in[2]) << 32 |
1907 ((uint32_t)strm->next_in[3]) << 24 |
1908 ((uint32_t)strm->next_in[4]) << 16 |
1909 ((uint32_t)strm->next_in[5]) << 8 |
1910 (uint32_t)strm->next_in[6];
1912 strm->avail_in -= 7;
1913 br->cache_avail += 7 * 8;
1917 (br->cache_buffer << 48) |
1918 ((uint64_t)strm->next_in[0]) << 40 |
1919 ((uint64_t)strm->next_in[1]) << 32 |
1920 ((uint32_t)strm->next_in[2]) << 24 |
1921 ((uint32_t)strm->next_in[3]) << 16 |
1922 ((uint32_t)strm->next_in[4]) << 8 |
1923 (uint32_t)strm->next_in[5];
1925 strm->avail_in -= 6;
1926 br->cache_avail += 6 * 8;
1929 /* We have enough compressed data in
1930 * the cache buffer.*/
1936 if (strm->avail_in == 0) {
1937 /* There is not enough compressed data to fill up the
1942 (br->cache_buffer << 8) | *strm->next_in++;
1944 br->cache_avail += 8;
1952 * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1953 * Please set available buffer and call this function again.
1954 * 2. Returns ARCHIVE_EOF if decompression has been completed.
1955 * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1956 * is broken or you do not set 'last' flag properly.
1957 * 4. 'last' flag is very important, you must set 1 to the flag if there
1958 * is no input data. The lha compressed data format does not provide how
1959 * to know the compressed data is really finished.
1960 * Note: lha command utility check if the total size of output bytes is
1961 * reached the uncompressed size recorded in its header. it does not mind
1962 * that the decoding process is properly finished.
1963 * GNU ZIP can decompress another compressed file made by SCO LZH compress.
1964 * it handles EOF as null to fill read buffer with zero until the decoding
1965 * process meet 2 bytes of zeros at reading a size of a next chunk, so the
1966 * zeros are treated as the mark of the end of the data although the zeros
1967 * is dummy, not the file data.
1969 static int lzh_read_blocks(struct lzh_stream *, int);
1970 static int lzh_decode_blocks(struct lzh_stream *, int);
1971 #define ST_RD_BLOCK 0
1972 #define ST_RD_PT_1 1
1973 #define ST_RD_PT_2 2
1974 #define ST_RD_PT_3 3
1975 #define ST_RD_PT_4 4
1976 #define ST_RD_LITERAL_1 5
1977 #define ST_RD_LITERAL_2 6
1978 #define ST_RD_LITERAL_3 7
1979 #define ST_RD_POS_DATA_1 8
1980 #define ST_GET_LITERAL 9
1981 #define ST_GET_POS_1 10
1982 #define ST_GET_POS_2 11
1983 #define ST_COPY_DATA 12
1986 lzh_decode(struct lzh_stream *strm, int last)
1988 struct lzh_dec *ds = strm->ds;
1995 avail_in = strm->avail_in;
1997 if (ds->state < ST_GET_LITERAL)
1998 r = lzh_read_blocks(strm, last);
2000 r = lzh_decode_blocks(strm, last);
2002 strm->total_in += avail_in - strm->avail_in;
2007 lzh_emit_window(struct lzh_stream *strm, size_t s)
2009 strm->ref_ptr = strm->ds->w_buff;
2010 strm->avail_out = (int)s;
2011 strm->total_out += s;
2015 lzh_read_blocks(struct lzh_stream *strm, int last)
2017 struct lzh_dec *ds = strm->ds;
2018 struct lzh_br *br = &(ds->br);
2023 switch (ds->state) {
2026 * Read a block number indicates how many blocks
2027 * we will handle. The block is composed of a
2028 * literal and a match, sometimes a literal only
2029 * in particular, there are no reference data at
2030 * the beginning of the decompression.
2032 if (!lzh_br_read_ahead_0(strm, br, 16)) {
2034 /* We need following data. */
2035 return (ARCHIVE_OK);
2036 if (lzh_br_has(br, 8)) {
2038 * It seems there are extra bits.
2039 * 1. Compressed data is broken.
2040 * 2. `last' flag does not properly
2045 if (ds->w_pos > 0) {
2046 lzh_emit_window(strm, ds->w_pos);
2048 return (ARCHIVE_OK);
2050 /* End of compressed data; we have completely
2051 * handled all compressed data. */
2052 return (ARCHIVE_EOF);
2054 ds->blocks_avail = lzh_br_bits(br, 16);
2055 if (ds->blocks_avail == 0)
2057 lzh_br_consume(br, 16);
2059 * Read a literal table compressed in huffman
2062 ds->pt.len_size = ds->literal_pt_len_size;
2063 ds->pt.len_bits = ds->literal_pt_len_bits;
2064 ds->reading_position = 0;
2067 /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2068 * used in reading both a literal table and a
2069 * position table. */
2070 if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2072 goto failed;/* Truncated data. */
2073 ds->state = ST_RD_PT_1;
2074 return (ARCHIVE_OK);
2076 ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2077 lzh_br_consume(br, ds->pt.len_bits);
2080 if (ds->pt.len_avail == 0) {
2081 /* There is no bitlen. */
2082 if (!lzh_br_read_ahead(strm, br,
2085 goto failed;/* Truncated data.*/
2086 ds->state = ST_RD_PT_2;
2087 return (ARCHIVE_OK);
2089 if (!lzh_make_fake_table(&(ds->pt),
2090 lzh_br_bits(br, ds->pt.len_bits)))
2091 goto failed;/* Invalid data. */
2092 lzh_br_consume(br, ds->pt.len_bits);
2093 if (ds->reading_position)
2094 ds->state = ST_GET_LITERAL;
2096 ds->state = ST_RD_LITERAL_1;
2098 } else if (ds->pt.len_avail > ds->pt.len_size)
2099 goto failed;/* Invalid data. */
2101 memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2102 if (ds->pt.len_avail < 3 ||
2103 ds->pt.len_size == ds->pos_pt_len_size) {
2104 ds->state = ST_RD_PT_4;
2109 ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2111 if (ds->loop < 0 || last)
2112 goto failed;/* Invalid data. */
2113 /* Not completed, get following data. */
2114 ds->state = ST_RD_PT_3;
2115 return (ARCHIVE_OK);
2117 /* There are some null in bitlen of the literal. */
2118 if (!lzh_br_read_ahead(strm, br, 2)) {
2120 goto failed;/* Truncated data. */
2121 ds->state = ST_RD_PT_3;
2122 return (ARCHIVE_OK);
2124 c = lzh_br_bits(br, 2);
2125 lzh_br_consume(br, 2);
2126 if (c > ds->pt.len_avail - 3)
2127 goto failed;/* Invalid data. */
2128 for (i = 3; c-- > 0 ;)
2129 ds->pt.bitlen[i++] = 0;
2133 ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2135 if (ds->loop < ds->pt.len_avail) {
2136 if (ds->loop < 0 || last)
2137 goto failed;/* Invalid data. */
2138 /* Not completed, get following data. */
2139 ds->state = ST_RD_PT_4;
2140 return (ARCHIVE_OK);
2142 if (!lzh_make_huffman_table(&(ds->pt)))
2143 goto failed;/* Invalid data */
2144 if (ds->reading_position) {
2145 ds->state = ST_GET_LITERAL;
2149 case ST_RD_LITERAL_1:
2150 if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2152 goto failed;/* Truncated data. */
2153 ds->state = ST_RD_LITERAL_1;
2154 return (ARCHIVE_OK);
2156 ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2157 lzh_br_consume(br, ds->lt.len_bits);
2159 case ST_RD_LITERAL_2:
2160 if (ds->lt.len_avail == 0) {
2161 /* There is no bitlen. */
2162 if (!lzh_br_read_ahead(strm, br,
2165 goto failed;/* Truncated data.*/
2166 ds->state = ST_RD_LITERAL_2;
2167 return (ARCHIVE_OK);
2169 if (!lzh_make_fake_table(&(ds->lt),
2170 lzh_br_bits(br, ds->lt.len_bits)))
2171 goto failed;/* Invalid data */
2172 lzh_br_consume(br, ds->lt.len_bits);
2173 ds->state = ST_RD_POS_DATA_1;
2175 } else if (ds->lt.len_avail > ds->lt.len_size)
2176 goto failed;/* Invalid data */
2178 memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2180 case ST_RD_LITERAL_3:
2182 while (i < ds->lt.len_avail) {
2183 if (!lzh_br_read_ahead(strm, br,
2186 goto failed;/* Truncated data.*/
2188 ds->state = ST_RD_LITERAL_3;
2189 return (ARCHIVE_OK);
2191 rbits = lzh_br_bits(br, ds->pt.max_bits);
2192 c = lzh_decode_huffman(&(ds->pt), rbits);
2194 /* Note: 'c' will never be more than
2195 * eighteen since it's limited by
2196 * PT_BITLEN_SIZE, which is being set
2197 * to ds->pt.len_size through
2198 * ds->literal_pt_len_size. */
2199 lzh_br_consume(br, ds->pt.bitlen[c]);
2202 ds->lt.bitlen[i++] = c;
2203 } else if (c == 0) {
2204 lzh_br_consume(br, ds->pt.bitlen[c]);
2205 ds->lt.bitlen[i++] = 0;
2207 /* c == 1 or c == 2 */
2208 int n = (c == 1)?4:9;
2209 if (!lzh_br_read_ahead(strm, br,
2210 ds->pt.bitlen[c] + n)) {
2211 if (last) /* Truncated data. */
2214 ds->state = ST_RD_LITERAL_3;
2215 return (ARCHIVE_OK);
2217 lzh_br_consume(br, ds->pt.bitlen[c]);
2218 c = lzh_br_bits(br, n);
2219 lzh_br_consume(br, n);
2221 if (i + c > ds->lt.len_avail)
2222 goto failed;/* Invalid data */
2223 memset(&(ds->lt.bitlen[i]), 0, c);
2227 if (i > ds->lt.len_avail ||
2228 !lzh_make_huffman_table(&(ds->lt)))
2229 goto failed;/* Invalid data */
2231 case ST_RD_POS_DATA_1:
2233 * Read a position table compressed in huffman
2236 ds->pt.len_size = ds->pos_pt_len_size;
2237 ds->pt.len_bits = ds->pos_pt_len_bits;
2238 ds->reading_position = 1;
2239 ds->state = ST_RD_PT_1;
2241 case ST_GET_LITERAL:
2246 return (ds->error = ARCHIVE_FAILED);
2250 lzh_decode_blocks(struct lzh_stream *strm, int last)
2252 struct lzh_dec *ds = strm->ds;
2253 struct lzh_br bre = ds->br;
2254 struct huffman *lt = &(ds->lt);
2255 struct huffman *pt = &(ds->pt);
2256 unsigned char *w_buff = ds->w_buff;
2257 unsigned char *lt_bitlen = lt->bitlen;
2258 unsigned char *pt_bitlen = pt->bitlen;
2259 int blocks_avail = ds->blocks_avail, c = 0;
2260 int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2261 int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2262 int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2263 int state = ds->state;
2267 case ST_GET_LITERAL:
2269 if (blocks_avail == 0) {
2270 /* We have decoded all blocks.
2271 * Let's handle next blocks. */
2272 ds->state = ST_RD_BLOCK;
2274 ds->blocks_avail = 0;
2280 /* lzh_br_read_ahead() always try to fill the
2281 * cache buffer up. In specific situation we
2282 * are close to the end of the data, the cache
2283 * buffer will not be full and thus we have to
2284 * determine if the cache buffer has some bits
2285 * as much as we need after lzh_br_read_ahead()
2287 if (!lzh_br_read_ahead(strm, &bre,
2291 /* Remaining bits are less than
2292 * maximum bits(lt.max_bits) but maybe
2293 * it still remains as much as we need,
2294 * so we should try to use it with
2296 c = lzh_decode_huffman(lt,
2297 lzh_br_bits_forced(&bre,
2299 lzh_br_consume(&bre, lt_bitlen[c]);
2300 if (!lzh_br_has(&bre, 0))
2301 goto failed;/* Over read. */
2303 c = lzh_decode_huffman(lt,
2304 lzh_br_bits(&bre, lt_max_bits));
2305 lzh_br_consume(&bre, lt_bitlen[c]);
2309 /* Current block is a match data. */
2312 * 'c' is exactly a literal code.
2314 /* Save a decoded code to reference it
2317 if (++w_pos >= w_size) {
2319 lzh_emit_window(strm, w_size);
2323 /* 'c' is the length of a match pattern we have
2324 * already extracted, which has be stored in
2325 * window(ds->w_buff). */
2326 copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2330 * Get a reference position.
2332 if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2334 state = ST_GET_POS_1;
2335 ds->copy_len = copy_len;
2338 copy_pos = lzh_decode_huffman(pt,
2339 lzh_br_bits_forced(&bre, pt_max_bits));
2340 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2341 if (!lzh_br_has(&bre, 0))
2342 goto failed;/* Over read. */
2344 copy_pos = lzh_decode_huffman(pt,
2345 lzh_br_bits(&bre, pt_max_bits));
2346 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2351 /* We need an additional adjustment number to
2353 int p = copy_pos - 1;
2354 if (!lzh_br_read_ahead(strm, &bre, p)) {
2356 goto failed;/* Truncated data.*/
2357 state = ST_GET_POS_2;
2358 ds->copy_len = copy_len;
2359 ds->copy_pos = copy_pos;
2362 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2363 lzh_br_consume(&bre, p);
2365 /* The position is actually a distance from the last
2366 * code we had extracted and thus we have to convert
2367 * it to a position of the window. */
2368 copy_pos = (w_pos - copy_pos - 1) & w_mask;
2372 * Copy `copy_len' bytes as extracted data from
2373 * the window into the output buffer.
2379 if (copy_pos > w_pos) {
2380 if (l > w_size - copy_pos)
2381 l = w_size - copy_pos;
2383 if (l > w_size - w_pos)
2386 if ((copy_pos + l < w_pos)
2387 || (w_pos + l < copy_pos)) {
2389 memcpy(w_buff + w_pos,
2390 w_buff + copy_pos, l);
2392 const unsigned char *s;
2397 s = w_buff + copy_pos;
2398 for (li = 0; li < l-1;) {
2406 if (w_pos == w_size) {
2408 lzh_emit_window(strm, w_size);
2410 state = ST_GET_LITERAL;
2412 state = ST_COPY_DATA;
2413 ds->copy_len = copy_len - l;
2415 (copy_pos + l) & w_mask;
2420 /* A copy of current pattern ended. */
2423 copy_pos = (copy_pos + l) & w_mask;
2425 state = ST_GET_LITERAL;
2430 return (ds->error = ARCHIVE_FAILED);
2433 ds->blocks_avail = blocks_avail;
2436 return (ARCHIVE_OK);
2440 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2444 if (hf->bitlen == NULL) {
2445 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2446 if (hf->bitlen == NULL)
2447 return (ARCHIVE_FATAL);
2449 if (hf->tbl == NULL) {
2450 if (tbl_bits < HTBL_BITS)
2454 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2455 if (hf->tbl == NULL)
2456 return (ARCHIVE_FATAL);
2458 if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2459 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2460 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2461 if (hf->tree == NULL)
2462 return (ARCHIVE_FATAL);
2464 hf->len_size = (int)len_size;
2465 hf->tbl_bits = tbl_bits;
2466 return (ARCHIVE_OK);
2470 lzh_huffman_free(struct huffman *hf)
2477 static char bitlen_tbl[0x400] = {
2478 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2479 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2480 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2481 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2482 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2483 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2484 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2485 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2486 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2487 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2488 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2489 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2490 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2491 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2492 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2493 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2494 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2495 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2496 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2497 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2498 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2499 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2500 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2501 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2502 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2503 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2504 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2505 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2506 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2507 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2508 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2509 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
2510 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2511 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2512 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2513 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2514 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2515 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2516 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2517 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2518 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2519 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2520 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2521 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2522 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2523 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2524 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2525 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
2526 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2527 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2528 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2529 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2530 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2531 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2532 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2533 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2534 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2535 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2536 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2537 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2538 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2539 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2540 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2541 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16, 0
2544 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2546 struct lzh_dec *ds = strm->ds;
2547 struct lzh_br *br = &(ds->br);
2550 for (i = start; i < end; ) {
2552 * bit pattern the number we need
2561 * 1111111111110 -> 16
2563 if (!lzh_br_read_ahead(strm, br, 3))
2565 if ((c = lzh_br_bits(br, 3)) == 7) {
2566 if (!lzh_br_read_ahead(strm, br, 13))
2568 c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2570 lzh_br_consume(br, c - 3);
2572 return (-1);/* Invalid data. */
2574 lzh_br_consume(br, 3);
2575 ds->pt.bitlen[i++] = c;
2582 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2584 if (c >= hf->len_size)
2589 hf->bitlen[hf->tbl[0]] = 0;
2594 * Make a huffman coding table.
2597 lzh_make_huffman_table(struct huffman *hf)
2600 const unsigned char *bitlen;
2601 int bitptn[17], weight[17];
2602 int i, maxbits = 0, ptn, tbl_size, w;
2603 int diffbits, len_avail;
2606 * Initialize bit patterns.
2609 for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2613 ptn += hf->freq[i] * w;
2617 if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2618 return (0);/* Invalid */
2620 hf->max_bits = maxbits;
2623 * Cut out extra bits which we won't house in the table.
2624 * This preparation reduces the same calculation in the for-loop
2628 int ebits = 16 - maxbits;
2629 for (i = 1; i <= maxbits; i++) {
2630 bitptn[i] >>= ebits;
2631 weight[i] >>= ebits;
2634 if (maxbits > HTBL_BITS) {
2638 diffbits = maxbits - HTBL_BITS;
2639 for (i = 1; i <= HTBL_BITS; i++) {
2640 bitptn[i] >>= diffbits;
2641 weight[i] >>= diffbits;
2643 htbl_max = bitptn[HTBL_BITS] +
2644 weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2645 p = &(hf->tbl[htbl_max]);
2646 while (p < &hf->tbl[1U<<HTBL_BITS])
2650 hf->shift_bits = diffbits;
2655 tbl_size = 1 << HTBL_BITS;
2657 bitlen = hf->bitlen;
2658 len_avail = hf->len_avail;
2660 for (i = 0; i < len_avail; i++) {
2669 /* Get a bit pattern */
2673 if (len <= HTBL_BITS) {
2674 /* Calculate next bit pattern */
2675 if ((bitptn[len] = ptn + cnt) > tbl_size)
2676 return (0);/* Invalid */
2677 /* Update the table */
2684 pc[0] = (uint16_t)i;
2685 pc[1] = (uint16_t)i;
2686 pc[2] = (uint16_t)i;
2687 pc[3] = (uint16_t)i;
2688 pc[4] = (uint16_t)i;
2689 pc[5] = (uint16_t)i;
2690 pc[6] = (uint16_t)i;
2691 pc[7] = (uint16_t)i;
2695 8 * sizeof(uint16_t));
2700 16 * sizeof(uint16_t));
2704 memcpy(p, pc, cnt * sizeof(uint16_t));
2707 p[--cnt] = (uint16_t)i;
2708 p[--cnt] = (uint16_t)i;
2711 p[--cnt] = (uint16_t)i;
2717 * A bit length is too big to be housed to a direct table,
2718 * so we use a tree model for its extra bits.
2720 bitptn[len] = ptn + cnt;
2721 bit = 1U << (diffbits -1);
2722 extlen = len - HTBL_BITS;
2724 p = &(tbl[ptn >> diffbits]);
2726 *p = len_avail + hf->tree_used;
2727 ht = &(hf->tree[hf->tree_used++]);
2728 if (hf->tree_used > hf->tree_avail)
2729 return (0);/* Invalid */
2733 if (*p < len_avail ||
2734 *p >= (len_avail + hf->tree_used))
2735 return (0);/* Invalid */
2736 ht = &(hf->tree[*p - len_avail]);
2738 while (--extlen > 0) {
2740 if (ht->left < len_avail) {
2741 ht->left = len_avail + hf->tree_used;
2742 ht = &(hf->tree[hf->tree_used++]);
2743 if (hf->tree_used > hf->tree_avail)
2744 return (0);/* Invalid */
2748 ht = &(hf->tree[ht->left - len_avail]);
2751 if (ht->right < len_avail) {
2752 ht->right = len_avail + hf->tree_used;
2753 ht = &(hf->tree[hf->tree_used++]);
2754 if (hf->tree_used > hf->tree_avail)
2755 return (0);/* Invalid */
2759 ht = &(hf->tree[ht->right - len_avail]);
2766 return (0);/* Invalid */
2767 ht->left = (uint16_t)i;
2770 return (0);/* Invalid */
2771 ht->right = (uint16_t)i;
2778 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2784 extlen = hf->shift_bits;
2785 while (c >= hf->len_avail) {
2787 if (extlen-- <= 0 || c >= hf->tree_used)
2789 if (rbits & (1U << extlen))
2798 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2802 * At first search an index table for a bit pattern.
2803 * If it fails, search a huffman tree for.
2805 c = hf->tbl[rbits >> hf->shift_bits];
2806 if (c < hf->len_avail || hf->len_avail == 0)
2808 /* This bit pattern needs to be found out at a huffman tree. */
2809 return (lzh_decode_huffman_tree(hf, rbits, c));