]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/archive_read_support_format_lha.c
MFH r336801,r336854:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / archive_read_support_format_lha.c
1 /*-
2  * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_endian.h"
47
48
49 #define MAXMATCH                256     /* Maximum match length. */
50 #define MINMATCH                3       /* Minimum match length. */
51 /*
52  * Literal table format:
53  * +0              +256                      +510
54  * +---------------+-------------------------+
55  * | literal code  |       match length      |
56  * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57  * +---------------+-------------------------+
58  *  <---          LT_BITLEN_SIZE         --->
59  */
60 /* Literal table size. */
61 #define LT_BITLEN_SIZE          (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62 /* Position table size.
63  * Note: this used for both position table and pre literal table.*/
64 #define PT_BITLEN_SIZE          (3 + 16)
65
66 struct lzh_dec {
67         /* Decoding status. */
68         int                      state;
69
70         /*
71          * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72          * data.
73          */
74         int                      w_size;
75         int                      w_mask;
76         /* Window buffer, which is a loop buffer. */
77         unsigned char           *w_buff;
78         /* The insert position to the window. */
79         int                      w_pos;
80         /* The position where we can copy decoded code from the window. */
81         int                      copy_pos;
82         /* The length how many bytes we can copy decoded code from
83          * the window. */
84         int                      copy_len;
85
86         /*
87          * Bit stream reader.
88          */
89         struct lzh_br {
90 #define CACHE_TYPE              uint64_t
91 #define CACHE_BITS              (8 * sizeof(CACHE_TYPE))
92                 /* Cache buffer. */
93                 CACHE_TYPE       cache_buffer;
94                 /* Indicates how many bits avail in cache_buffer. */
95                 int              cache_avail;
96         } br;
97
98         /*
99          * Huffman coding.
100          */
101         struct huffman {
102                 int              len_size;
103                 int              len_avail;
104                 int              len_bits;
105                 int              freq[17];
106                 unsigned char   *bitlen;
107
108                 /*
109                  * Use a index table. It's faster than searching a huffman
110                  * coding tree, which is a binary tree. But a use of a large
111                  * index table causes L1 cache read miss many times.
112                  */
113 #define HTBL_BITS       10
114                 int              max_bits;
115                 int              shift_bits;
116                 int              tbl_bits;
117                 int              tree_used;
118                 int              tree_avail;
119                 /* Direct access table. */
120                 uint16_t        *tbl;
121                 /* Binary tree table for extra bits over the direct access. */
122                 struct htree_t {
123                         uint16_t left;
124                         uint16_t right;
125                 }               *tree;
126         }                        lt, pt;
127
128         int                      blocks_avail;
129         int                      pos_pt_len_size;
130         int                      pos_pt_len_bits;
131         int                      literal_pt_len_size;
132         int                      literal_pt_len_bits;
133         int                      reading_position;
134         int                      loop;
135         int                      error;
136 };
137
138 struct lzh_stream {
139         const unsigned char     *next_in;
140         int                      avail_in;
141         int64_t                  total_in;
142         const unsigned char     *ref_ptr;
143         int                      avail_out;
144         int64_t                  total_out;
145         struct lzh_dec          *ds;
146 };
147
148 struct lha {
149         /* entry_bytes_remaining is the number of bytes we expect.          */
150         int64_t                  entry_offset;
151         int64_t                  entry_bytes_remaining;
152         int64_t                  entry_unconsumed;
153         uint16_t                 entry_crc_calculated;
154  
155         size_t                   header_size;   /* header size              */
156         unsigned char            level;         /* header level             */
157         char                     method[3];     /* compress type            */
158         int64_t                  compsize;      /* compressed data size     */
159         int64_t                  origsize;      /* original file size       */
160         int                      setflag;
161 #define BIRTHTIME_IS_SET        1
162 #define ATIME_IS_SET            2
163 #define UNIX_MODE_IS_SET        4
164 #define CRC_IS_SET              8
165         time_t                   birthtime;
166         long                     birthtime_tv_nsec;
167         time_t                   mtime;
168         long                     mtime_tv_nsec;
169         time_t                   atime;
170         long                     atime_tv_nsec;
171         mode_t                   mode;
172         int64_t                  uid;
173         int64_t                  gid;
174         struct archive_string    uname;
175         struct archive_string    gname;
176         uint16_t                 header_crc;
177         uint16_t                 crc;
178         struct archive_string_conv *sconv;
179         struct archive_string_conv *opt_sconv;
180
181         struct archive_string    dirname;
182         struct archive_string    filename;
183         struct archive_wstring   ws;
184
185         unsigned char            dos_attr;
186
187         /* Flag to mark progress that an archive was read their first header.*/
188         char                     found_first_header;
189         /* Flag to mark that indicates an empty directory. */
190         char                     directory;
191
192         /* Flags to mark progress of decompression. */
193         char                     decompress_init;
194         char                     end_of_entry;
195         char                     end_of_entry_cleanup;
196         char                     entry_is_compressed;
197
198         char                     format_name[64];
199
200         struct lzh_stream        strm;
201 };
202
203 /*
204  * LHA header common member offset.
205  */
206 #define H_METHOD_OFFSET 2       /* Compress type. */
207 #define H_ATTR_OFFSET   19      /* DOS attribute. */
208 #define H_LEVEL_OFFSET  20      /* Header Level.  */
209 #define H_SIZE          22      /* Minimum header size. */
210
211 static int      archive_read_format_lha_bid(struct archive_read *, int);
212 static int      archive_read_format_lha_options(struct archive_read *,
213                     const char *, const char *);
214 static int      archive_read_format_lha_read_header(struct archive_read *,
215                     struct archive_entry *);
216 static int      archive_read_format_lha_read_data(struct archive_read *,
217                     const void **, size_t *, int64_t *);
218 static int      archive_read_format_lha_read_data_skip(struct archive_read *);
219 static int      archive_read_format_lha_cleanup(struct archive_read *);
220
221 static void     lha_replace_path_separator(struct lha *,
222                     struct archive_entry *);
223 static int      lha_read_file_header_0(struct archive_read *, struct lha *);
224 static int      lha_read_file_header_1(struct archive_read *, struct lha *);
225 static int      lha_read_file_header_2(struct archive_read *, struct lha *);
226 static int      lha_read_file_header_3(struct archive_read *, struct lha *);
227 static int      lha_read_file_extended_header(struct archive_read *,
228                     struct lha *, uint16_t *, int, size_t, size_t *);
229 static size_t   lha_check_header_format(const void *);
230 static int      lha_skip_sfx(struct archive_read *);
231 static time_t   lha_dos_time(const unsigned char *);
232 static time_t   lha_win_time(uint64_t, long *);
233 static unsigned char    lha_calcsum(unsigned char, const void *,
234                     int, size_t);
235 static int      lha_parse_linkname(struct archive_string *,
236                     struct archive_string *);
237 static int      lha_read_data_none(struct archive_read *, const void **,
238                     size_t *, int64_t *);
239 static int      lha_read_data_lzh(struct archive_read *, const void **,
240                     size_t *, int64_t *);
241 static void     lha_crc16_init(void);
242 static uint16_t lha_crc16(uint16_t, const void *, size_t);
243 static int      lzh_decode_init(struct lzh_stream *, const char *);
244 static void     lzh_decode_free(struct lzh_stream *);
245 static int      lzh_decode(struct lzh_stream *, int);
246 static int      lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247 static int      lzh_huffman_init(struct huffman *, size_t, int);
248 static void     lzh_huffman_free(struct huffman *);
249 static int      lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250 static int      lzh_make_fake_table(struct huffman *, uint16_t);
251 static int      lzh_make_huffman_table(struct huffman *);
252 static inline int lzh_decode_huffman(struct huffman *, unsigned);
253 static int      lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256 int
257 archive_read_support_format_lha(struct archive *_a)
258 {
259         struct archive_read *a = (struct archive_read *)_a;
260         struct lha *lha;
261         int r;
262
263         archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264             ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266         lha = (struct lha *)calloc(1, sizeof(*lha));
267         if (lha == NULL) {
268                 archive_set_error(&a->archive, ENOMEM,
269                     "Can't allocate lha data");
270                 return (ARCHIVE_FATAL);
271         }
272         archive_string_init(&lha->ws);
273
274         r = __archive_read_register_format(a,
275             lha,
276             "lha",
277             archive_read_format_lha_bid,
278             archive_read_format_lha_options,
279             archive_read_format_lha_read_header,
280             archive_read_format_lha_read_data,
281             archive_read_format_lha_read_data_skip,
282             NULL,
283             archive_read_format_lha_cleanup,
284             NULL,
285             NULL);
286
287         if (r != ARCHIVE_OK)
288                 free(lha);
289         return (ARCHIVE_OK);
290 }
291
292 static size_t
293 lha_check_header_format(const void *h)
294 {
295         const unsigned char *p = h;
296         size_t next_skip_bytes;
297
298         switch (p[H_METHOD_OFFSET+3]) {
299         /*
300          * "-lh0-" ... "-lh7-" "-lhd-"
301          * "-lzs-" "-lz5-"
302          */
303         case '0': case '1': case '2': case '3':
304         case '4': case '5': case '6': case '7':
305         case 'd':
306         case 's':
307                 next_skip_bytes = 4;
308
309                 /* b0 == 0 means the end of an LHa archive file.        */
310                 if (p[0] == 0)
311                         break;
312                 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313                     ||  p[H_METHOD_OFFSET+4] != '-')
314                         break;
315
316                 if (p[H_METHOD_OFFSET+2] == 'h') {
317                         /* "-lh?-" */
318                         if (p[H_METHOD_OFFSET+3] == 's')
319                                 break;
320                         if (p[H_LEVEL_OFFSET] == 0)
321                                 return (0);
322                         if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323                                 return (0);
324                 }
325                 if (p[H_METHOD_OFFSET+2] == 'z') {
326                         /* LArc extensions: -lzs-,-lz4- and -lz5- */
327                         if (p[H_LEVEL_OFFSET] != 0)
328                                 break;
329                         if (p[H_METHOD_OFFSET+3] == 's'
330                             || p[H_METHOD_OFFSET+3] == '4'
331                             || p[H_METHOD_OFFSET+3] == '5')
332                                 return (0);
333                 }
334                 break;
335         case 'h': next_skip_bytes = 1; break;
336         case 'z': next_skip_bytes = 1; break;
337         case 'l': next_skip_bytes = 2; break;
338         case '-': next_skip_bytes = 3; break;
339         default : next_skip_bytes = 4; break;
340         }
341
342         return (next_skip_bytes);
343 }
344
345 static int
346 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347 {
348         const char *p;
349         const void *buff;
350         ssize_t bytes_avail, offset, window;
351         size_t next;
352
353         /* If there's already a better bid than we can ever
354            make, don't bother testing. */
355         if (best_bid > 30)
356                 return (-1);
357
358         if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359                 return (-1);
360
361         if (lha_check_header_format(p) == 0)
362                 return (30);
363
364         if (p[0] == 'M' && p[1] == 'Z') {
365                 /* PE file */
366                 offset = 0;
367                 window = 4096;
368                 while (offset < (1024 * 20)) {
369                         buff = __archive_read_ahead(a, offset + window,
370                             &bytes_avail);
371                         if (buff == NULL) {
372                                 /* Remaining bytes are less than window. */
373                                 window >>= 1;
374                                 if (window < (H_SIZE + 3))
375                                         return (0);
376                                 continue;
377                         }
378                         p = (const char *)buff + offset;
379                         while (p + H_SIZE < (const char *)buff + bytes_avail) {
380                                 if ((next = lha_check_header_format(p)) == 0)
381                                         return (30);
382                                 p += next;
383                         }
384                         offset = p - (const char *)buff;
385                 }
386         }
387         return (0);
388 }
389
390 static int
391 archive_read_format_lha_options(struct archive_read *a,
392     const char *key, const char *val)
393 {
394         struct lha *lha;
395         int ret = ARCHIVE_FAILED;
396
397         lha = (struct lha *)(a->format->data);
398         if (strcmp(key, "hdrcharset")  == 0) {
399                 if (val == NULL || val[0] == 0)
400                         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401                             "lha: hdrcharset option needs a character-set name");
402                 else {
403                         lha->opt_sconv =
404                             archive_string_conversion_from_charset(
405                                 &a->archive, val, 0);
406                         if (lha->opt_sconv != NULL)
407                                 ret = ARCHIVE_OK;
408                         else
409                                 ret = ARCHIVE_FATAL;
410                 }
411                 return (ret);
412         }
413
414         /* Note: The "warn" return is just to inform the options
415          * supervisor that we didn't handle it.  It will generate
416          * a suitable error if no one used this option. */
417         return (ARCHIVE_WARN);
418 }
419
420 static int
421 lha_skip_sfx(struct archive_read *a)
422 {
423         const void *h;
424         const char *p, *q;
425         size_t next, skip;
426         ssize_t bytes, window;
427
428         window = 4096;
429         for (;;) {
430                 h = __archive_read_ahead(a, window, &bytes);
431                 if (h == NULL) {
432                         /* Remaining bytes are less than window. */
433                         window >>= 1;
434                         if (window < (H_SIZE + 3))
435                                 goto fatal;
436                         continue;
437                 }
438                 if (bytes < H_SIZE)
439                         goto fatal;
440                 p = h;
441                 q = p + bytes;
442
443                 /*
444                  * Scan ahead until we find something that looks
445                  * like the lha header.
446                  */
447                 while (p + H_SIZE < q) {
448                         if ((next = lha_check_header_format(p)) == 0) {
449                                 skip = p - (const char *)h;
450                                 __archive_read_consume(a, skip);
451                                 return (ARCHIVE_OK);
452                         }
453                         p += next;
454                 }
455                 skip = p - (const char *)h;
456                 __archive_read_consume(a, skip);
457         }
458 fatal:
459         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460             "Couldn't find out LHa header");
461         return (ARCHIVE_FATAL);
462 }
463
464 static int
465 truncated_error(struct archive_read *a)
466 {
467         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468             "Truncated LHa header");
469         return (ARCHIVE_FATAL);
470 }
471
472 static int
473 archive_read_format_lha_read_header(struct archive_read *a,
474     struct archive_entry *entry)
475 {
476         struct archive_string linkname;
477         struct archive_string pathname;
478         struct lha *lha;
479         const unsigned char *p;
480         const char *signature;
481         int err;
482         
483         lha_crc16_init();
484
485         a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486         if (a->archive.archive_format_name == NULL)
487                 a->archive.archive_format_name = "lha";
488
489         lha = (struct lha *)(a->format->data);
490         lha->decompress_init = 0;
491         lha->end_of_entry = 0;
492         lha->end_of_entry_cleanup = 0;
493         lha->entry_unconsumed = 0;
494
495         if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496                 /*
497                  * LHa archiver added 0 to the tail of its archive file as
498                  * the mark of the end of the archive.
499                  */
500                 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501                 if (signature == NULL || signature[0] == 0)
502                         return (ARCHIVE_EOF);
503                 return (truncated_error(a));
504         }
505
506         signature = (const char *)p;
507         if (lha->found_first_header == 0 &&
508             signature[0] == 'M' && signature[1] == 'Z') {
509                 /* This is an executable?  Must be self-extracting...   */
510                 err = lha_skip_sfx(a);
511                 if (err < ARCHIVE_WARN)
512                         return (err);
513
514                 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515                         return (truncated_error(a));
516                 signature = (const char *)p;
517         }
518         /* signature[0] == 0 means the end of an LHa archive file. */
519         if (signature[0] == 0)
520                 return (ARCHIVE_EOF);
521
522         /*
523          * Check the header format and method type.
524          */
525         if (lha_check_header_format(p) != 0) {
526                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527                     "Bad LHa file");
528                 return (ARCHIVE_FATAL);
529         }
530
531         /* We've found the first header. */
532         lha->found_first_header = 1;
533         /* Set a default value and common data */
534         lha->header_size = 0;
535         lha->level = p[H_LEVEL_OFFSET];
536         lha->method[0] = p[H_METHOD_OFFSET+1];
537         lha->method[1] = p[H_METHOD_OFFSET+2];
538         lha->method[2] = p[H_METHOD_OFFSET+3];
539         if (memcmp(lha->method, "lhd", 3) == 0)
540                 lha->directory = 1;
541         else
542                 lha->directory = 0;
543         if (memcmp(lha->method, "lh0", 3) == 0 ||
544             memcmp(lha->method, "lz4", 3) == 0)
545                 lha->entry_is_compressed = 0;
546         else
547                 lha->entry_is_compressed = 1;
548
549         lha->compsize = 0;
550         lha->origsize = 0;
551         lha->setflag = 0;
552         lha->birthtime = 0;
553         lha->birthtime_tv_nsec = 0;
554         lha->mtime = 0;
555         lha->mtime_tv_nsec = 0;
556         lha->atime = 0;
557         lha->atime_tv_nsec = 0;
558         lha->mode = (lha->directory)? 0777 : 0666;
559         lha->uid = 0;
560         lha->gid = 0;
561         archive_string_empty(&lha->dirname);
562         archive_string_empty(&lha->filename);
563         lha->dos_attr = 0;
564         if (lha->opt_sconv != NULL)
565                 lha->sconv = lha->opt_sconv;
566         else
567                 lha->sconv = NULL;
568
569         switch (p[H_LEVEL_OFFSET]) {
570         case 0:
571                 err = lha_read_file_header_0(a, lha);
572                 break;
573         case 1:
574                 err = lha_read_file_header_1(a, lha);
575                 break;
576         case 2:
577                 err = lha_read_file_header_2(a, lha);
578                 break;
579         case 3:
580                 err = lha_read_file_header_3(a, lha);
581                 break;
582         default:
583                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584                     "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585                 err = ARCHIVE_FATAL;
586                 break;
587         }
588         if (err < ARCHIVE_WARN)
589                 return (err);
590
591
592         if (!lha->directory && archive_strlen(&lha->filename) == 0)
593                 /* The filename has not been set */
594                 return (truncated_error(a));
595
596         /*
597          * Make a pathname from a dirname and a filename.
598          */
599         archive_string_concat(&lha->dirname, &lha->filename);
600         archive_string_init(&pathname);
601         archive_string_init(&linkname);
602         archive_string_copy(&pathname, &lha->dirname);
603
604         if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605                 /*
606                  * Extract the symlink-name if it's included in the pathname.
607                  */
608                 if (!lha_parse_linkname(&linkname, &pathname)) {
609                         /* We couldn't get the symlink-name. */
610                         archive_set_error(&a->archive,
611                             ARCHIVE_ERRNO_FILE_FORMAT,
612                             "Unknown symlink-name");
613                         archive_string_free(&pathname);
614                         archive_string_free(&linkname);
615                         return (ARCHIVE_FAILED);
616                 }
617         } else {
618                 /*
619                  * Make sure a file-type is set.
620                  * The mode has been overridden if it is in the extended data.
621                  */
622                 lha->mode = (lha->mode & ~AE_IFMT) |
623                     ((lha->directory)? AE_IFDIR: AE_IFREG);
624         }
625         if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626             (lha->dos_attr & 1) != 0)
627                 lha->mode &= ~(0222);/* read only. */
628
629         /*
630          * Set basic file parameters.
631          */
632         if (archive_entry_copy_pathname_l(entry, pathname.s,
633             pathname.length, lha->sconv) != 0) {
634                 if (errno == ENOMEM) {
635                         archive_set_error(&a->archive, ENOMEM,
636                             "Can't allocate memory for Pathname");
637                         return (ARCHIVE_FATAL);
638                 }
639                 archive_set_error(&a->archive,
640                     ARCHIVE_ERRNO_FILE_FORMAT,
641                     "Pathname cannot be converted "
642                     "from %s to current locale.",
643                     archive_string_conversion_charset_name(lha->sconv));
644                 err = ARCHIVE_WARN;
645         }
646         archive_string_free(&pathname);
647         if (archive_strlen(&linkname) > 0) {
648                 if (archive_entry_copy_symlink_l(entry, linkname.s,
649                     linkname.length, lha->sconv) != 0) {
650                         if (errno == ENOMEM) {
651                                 archive_set_error(&a->archive, ENOMEM,
652                                     "Can't allocate memory for Linkname");
653                                 return (ARCHIVE_FATAL);
654                         }
655                         archive_set_error(&a->archive,
656                             ARCHIVE_ERRNO_FILE_FORMAT,
657                             "Linkname cannot be converted "
658                             "from %s to current locale.",
659                             archive_string_conversion_charset_name(lha->sconv));
660                         err = ARCHIVE_WARN;
661                 }
662         } else
663                 archive_entry_set_symlink(entry, NULL);
664         archive_string_free(&linkname);
665         /*
666          * When a header level is 0, there is a possibility that
667          * a pathname and a symlink has '\' character, a directory
668          * separator in DOS/Windows. So we should convert it to '/'.
669          */
670         if (p[H_LEVEL_OFFSET] == 0)
671                 lha_replace_path_separator(lha, entry);
672
673         archive_entry_set_mode(entry, lha->mode);
674         archive_entry_set_uid(entry, lha->uid);
675         archive_entry_set_gid(entry, lha->gid);
676         if (archive_strlen(&lha->uname) > 0)
677                 archive_entry_set_uname(entry, lha->uname.s);
678         if (archive_strlen(&lha->gname) > 0)
679                 archive_entry_set_gname(entry, lha->gname.s);
680         if (lha->setflag & BIRTHTIME_IS_SET) {
681                 archive_entry_set_birthtime(entry, lha->birthtime,
682                     lha->birthtime_tv_nsec);
683                 archive_entry_set_ctime(entry, lha->birthtime,
684                     lha->birthtime_tv_nsec);
685         } else {
686                 archive_entry_unset_birthtime(entry);
687                 archive_entry_unset_ctime(entry);
688         }
689         archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690         if (lha->setflag & ATIME_IS_SET)
691                 archive_entry_set_atime(entry, lha->atime,
692                     lha->atime_tv_nsec);
693         else
694                 archive_entry_unset_atime(entry);
695         if (lha->directory || archive_entry_symlink(entry) != NULL)
696                 archive_entry_unset_size(entry);
697         else
698                 archive_entry_set_size(entry, lha->origsize);
699
700         /*
701          * Prepare variables used to read a file content.
702          */
703         lha->entry_bytes_remaining = lha->compsize;
704         if (lha->entry_bytes_remaining < 0) {
705                 archive_set_error(&a->archive,
706                     ARCHIVE_ERRNO_FILE_FORMAT,
707                     "Invalid LHa entry size");
708                 return (ARCHIVE_FATAL);
709         }
710         lha->entry_offset = 0;
711         lha->entry_crc_calculated = 0;
712
713         /*
714          * This file does not have a content.
715          */
716         if (lha->directory || lha->compsize == 0)
717                 lha->end_of_entry = 1;
718
719         sprintf(lha->format_name, "lha -%c%c%c-",
720             lha->method[0], lha->method[1], lha->method[2]);
721         a->archive.archive_format_name = lha->format_name;
722
723         return (err);
724 }
725
726 /*
727  * Replace a DOS path separator '\' by a character '/'.
728  * Some multi-byte character set have  a character '\' in its second byte.
729  */
730 static void
731 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
732 {
733         const wchar_t *wp;
734         size_t i;
735
736         if ((wp = archive_entry_pathname_w(entry)) != NULL) {
737                 archive_wstrcpy(&(lha->ws), wp);
738                 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
739                         if (lha->ws.s[i] == L'\\')
740                                 lha->ws.s[i] = L'/';
741                 }
742                 archive_entry_copy_pathname_w(entry, lha->ws.s);
743         }
744
745         if ((wp = archive_entry_symlink_w(entry)) != NULL) {
746                 archive_wstrcpy(&(lha->ws), wp);
747                 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
748                         if (lha->ws.s[i] == L'\\')
749                                 lha->ws.s[i] = L'/';
750                 }
751                 archive_entry_copy_symlink_w(entry, lha->ws.s);
752         }
753 }
754
755 /*
756  * Header 0 format
757  *
758  * +0              +1         +2               +7                  +11
759  * +---------------+----------+----------------+-------------------+
760  * |header size(*1)|header sum|compression type|compressed size(*2)|
761  * +---------------+----------+----------------+-------------------+
762  *                             <---------------------(*1)----------*
763  *
764  * +11               +15       +17       +19            +20              +21
765  * +-----------------+---------+---------+--------------+----------------+
766  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
767  * +-----------------+---------+---------+--------------+----------------+
768  * *--------------------------------(*1)---------------------------------*
769  *
770  * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
771  * +---------------+---------+----------+----------------+------------------+
772  * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
773  * +---------------+---------+----------+----------------+------------------+
774  *                  <--(*3)->                             <------(*2)------>
775  * *----------------------(*1)-------------------------->
776  *
777  */
778 #define H0_HEADER_SIZE_OFFSET   0
779 #define H0_HEADER_SUM_OFFSET    1
780 #define H0_COMP_SIZE_OFFSET     7
781 #define H0_ORIG_SIZE_OFFSET     11
782 #define H0_DOS_TIME_OFFSET      15
783 #define H0_NAME_LEN_OFFSET      21
784 #define H0_FILE_NAME_OFFSET     22
785 #define H0_FIXED_SIZE           24
786 static int
787 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
788 {
789         const unsigned char *p;
790         int extdsize, namelen;
791         unsigned char headersum, sum_calculated;
792
793         if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
794                 return (truncated_error(a));
795         lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
796         headersum = p[H0_HEADER_SUM_OFFSET];
797         lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
798         lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
799         lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
800         namelen = p[H0_NAME_LEN_OFFSET];
801         extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
802         if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
803                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
804                     "Invalid LHa header");
805                 return (ARCHIVE_FATAL);
806         }
807         if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
808                 return (truncated_error(a));
809
810         archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
811         /* When extdsize == -2, A CRC16 value is not present in the header. */
812         if (extdsize >= 0) {
813                 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
814                 lha->setflag |= CRC_IS_SET;
815         }
816         sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
817
818         /* Read an extended header */
819         if (extdsize > 0) {
820                 /* This extended data is set by 'LHa for UNIX' only.
821                  * Maybe fixed size.
822                  */
823                 p += H0_FILE_NAME_OFFSET + namelen + 2;
824                 if (p[0] == 'U' && extdsize == 12) {
825                         /* p[1] is a minor version. */
826                         lha->mtime = archive_le32dec(&p[2]);
827                         lha->mode = archive_le16dec(&p[6]);
828                         lha->uid = archive_le16dec(&p[8]);
829                         lha->gid = archive_le16dec(&p[10]);
830                         lha->setflag |= UNIX_MODE_IS_SET;
831                 }
832         }
833         __archive_read_consume(a, lha->header_size);
834
835         if (sum_calculated != headersum) {
836                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
837                     "LHa header sum error");
838                 return (ARCHIVE_FATAL);
839         }
840
841         return (ARCHIVE_OK);
842 }
843
844 /*
845  * Header 1 format
846  *
847  * +0              +1         +2               +7            +11
848  * +---------------+----------+----------------+-------------+
849  * |header size(*1)|header sum|compression type|skip size(*2)|
850  * +---------------+----------+----------------+-------------+
851  *                             <---------------(*1)----------*
852  *
853  * +11               +15       +17       +19            +20              +21
854  * +-----------------+---------+---------+--------------+----------------+
855  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
856  * +-----------------+---------+---------+--------------+----------------+
857  * *-------------------------------(*1)----------------------------------*
858  *
859  * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
860  * +---------------+---------+----------+-----------+-----------+
861  * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
862  * +---------------+---------+----------+-----------+-----------+
863  *                  <--(*3)->
864  * *----------------------------(*1)----------------------------*
865  *
866  * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
867  * +----------------+---------------------+------------------------+
868  * |next header size| extended header(*5) |     compressed data    |
869  * +----------------+---------------------+------------------------+
870  * *------(*1)-----> <--------------------(*2)-------------------->
871  */
872 #define H1_HEADER_SIZE_OFFSET   0
873 #define H1_HEADER_SUM_OFFSET    1
874 #define H1_COMP_SIZE_OFFSET     7
875 #define H1_ORIG_SIZE_OFFSET     11
876 #define H1_DOS_TIME_OFFSET      15
877 #define H1_NAME_LEN_OFFSET      21
878 #define H1_FILE_NAME_OFFSET     22
879 #define H1_FIXED_SIZE           27
880 static int
881 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
882 {
883         const unsigned char *p;
884         size_t extdsize;
885         int i, err, err2;
886         int namelen, padding;
887         unsigned char headersum, sum_calculated;
888
889         err = ARCHIVE_OK;
890
891         if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
892                 return (truncated_error(a));
893
894         lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
895         headersum = p[H1_HEADER_SUM_OFFSET];
896         /* Note: An extended header size is included in a compsize. */
897         lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
898         lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
899         lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
900         namelen = p[H1_NAME_LEN_OFFSET];
901         /* Calculate a padding size. The result will be normally 0 only(?) */
902         padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
903
904         if (namelen > 230 || padding < 0)
905                 goto invalid;
906
907         if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
908                 return (truncated_error(a));
909
910         for (i = 0; i < namelen; i++) {
911                 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
912                         goto invalid;/* Invalid filename. */
913         }
914         archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
915         lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
916         lha->setflag |= CRC_IS_SET;
917
918         sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
919         /* Consume used bytes but not include `next header size' data
920          * since it will be consumed in lha_read_file_extended_header(). */
921         __archive_read_consume(a, lha->header_size - 2);
922
923         /* Read extended headers */
924         err2 = lha_read_file_extended_header(a, lha, NULL, 2,
925             (size_t)(lha->compsize + 2), &extdsize);
926         if (err2 < ARCHIVE_WARN)
927                 return (err2);
928         if (err2 < err)
929                 err = err2;
930         /* Get a real compressed file size. */
931         lha->compsize -= extdsize - 2;
932
933         if (lha->compsize < 0)
934                 goto invalid;   /* Invalid compressed file size */
935
936         if (sum_calculated != headersum) {
937                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
938                     "LHa header sum error");
939                 return (ARCHIVE_FATAL);
940         }
941         return (err);
942 invalid:
943         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
944             "Invalid LHa header");
945         return (ARCHIVE_FATAL);
946 }
947
948 /*
949  * Header 2 format
950  *
951  * +0              +2               +7                  +11               +15
952  * +---------------+----------------+-------------------+-----------------+
953  * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
954  * +---------------+----------------+-------------------+-----------------+
955  *  <--------------------------------(*1)---------------------------------*
956  *
957  * +15               +19          +20              +21        +23         +24
958  * +-----------------+------------+----------------+----------+-----------+
959  * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
960  * +-----------------+------------+----------------+----------+-----------+
961  * *---------------------------------(*1)---------------------------------*
962  *
963  * +24              +26                 +26+(*3)      +26+(*3)+(*4)
964  * +----------------+-------------------+-------------+-------------------+
965  * |next header size|extended header(*3)| padding(*4) |  compressed data  |
966  * +----------------+-------------------+-------------+-------------------+
967  * *--------------------------(*1)-------------------> <------(*2)------->
968  *
969  */
970 #define H2_HEADER_SIZE_OFFSET   0
971 #define H2_COMP_SIZE_OFFSET     7
972 #define H2_ORIG_SIZE_OFFSET     11
973 #define H2_TIME_OFFSET          15
974 #define H2_CRC_OFFSET           21
975 #define H2_FIXED_SIZE           24
976 static int
977 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
978 {
979         const unsigned char *p;
980         size_t extdsize;
981         int err, padding;
982         uint16_t header_crc;
983
984         if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
985                 return (truncated_error(a));
986
987         lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
988         lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
989         lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
990         lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
991         lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
992         lha->setflag |= CRC_IS_SET;
993
994         if (lha->header_size < H2_FIXED_SIZE) {
995                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
996                     "Invalid LHa header size");
997                 return (ARCHIVE_FATAL);
998         }
999
1000         header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
1001         __archive_read_consume(a, H2_FIXED_SIZE);
1002
1003         /* Read extended headers */
1004         err = lha_read_file_extended_header(a, lha, &header_crc, 2,
1005                   lha->header_size - H2_FIXED_SIZE, &extdsize);
1006         if (err < ARCHIVE_WARN)
1007                 return (err);
1008
1009         /* Calculate a padding size. The result will be normally 0 or 1. */
1010         padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1011         if (padding > 0) {
1012                 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1013                         return (truncated_error(a));
1014                 header_crc = lha_crc16(header_crc, p, padding);
1015                 __archive_read_consume(a, padding);
1016         }
1017
1018         if (header_crc != lha->header_crc) {
1019                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1020                     "LHa header CRC error");
1021                 return (ARCHIVE_FATAL);
1022         }
1023         return (err);
1024 }
1025
1026 /*
1027  * Header 3 format
1028  *
1029  * +0           +2               +7                  +11               +15
1030  * +------------+----------------+-------------------+-----------------+
1031  * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1032  * +------------+----------------+-------------------+-----------------+
1033  *  <-------------------------------(*1)-------------------------------*
1034  *
1035  * +15               +19          +20              +21        +23         +24
1036  * +-----------------+------------+----------------+----------+-----------+
1037  * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1038  * +-----------------+------------+----------------+----------+-----------+
1039  * *--------------------------------(*1)----------------------------------*
1040  *
1041  * +24             +28              +32                 +32+(*3)
1042  * +---------------+----------------+-------------------+-----------------+
1043  * |header size(*1)|next header size|extended header(*3)| compressed data |
1044  * +---------------+----------------+-------------------+-----------------+
1045  * *------------------------(*1)-----------------------> <------(*2)----->
1046  *
1047  */
1048 #define H3_FIELD_LEN_OFFSET     0
1049 #define H3_COMP_SIZE_OFFSET     7
1050 #define H3_ORIG_SIZE_OFFSET     11
1051 #define H3_TIME_OFFSET          15
1052 #define H3_CRC_OFFSET           21
1053 #define H3_HEADER_SIZE_OFFSET   24
1054 #define H3_FIXED_SIZE           28
1055 static int
1056 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1057 {
1058         const unsigned char *p;
1059         size_t extdsize;
1060         int err;
1061         uint16_t header_crc;
1062
1063         if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1064                 return (truncated_error(a));
1065
1066         if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1067                 goto invalid;
1068         lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1069         lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1070         lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1071         lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1072         lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1073         lha->setflag |= CRC_IS_SET;
1074
1075         if (lha->header_size < H3_FIXED_SIZE + 4)
1076                 goto invalid;
1077         header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1078         __archive_read_consume(a, H3_FIXED_SIZE);
1079
1080         /* Read extended headers */
1081         err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1082                   lha->header_size - H3_FIXED_SIZE, &extdsize);
1083         if (err < ARCHIVE_WARN)
1084                 return (err);
1085
1086         if (header_crc != lha->header_crc) {
1087                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1088                     "LHa header CRC error");
1089                 return (ARCHIVE_FATAL);
1090         }
1091         return (err);
1092 invalid:
1093         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1094             "Invalid LHa header");
1095         return (ARCHIVE_FATAL);
1096 }
1097
1098 /*
1099  * Extended header format
1100  *
1101  * +0             +2        +3  -- used in header 1 and 2
1102  * +0             +4        +5  -- used in header 3
1103  * +--------------+---------+-------------------+--------------+--
1104  * |ex-header size|header id|        data       |ex-header size| .......
1105  * +--------------+---------+-------------------+--------------+--
1106  *  <-------------( ex-header size)------------> <-- next extended header --*
1107  *
1108  * If the ex-header size is zero, it is the make of the end of extended
1109  * headers.
1110  *
1111  */
1112 static int
1113 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1114     uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1115 {
1116         const void *h;
1117         const unsigned char *extdheader;
1118         size_t  extdsize;
1119         size_t  datasize;
1120         unsigned int i;
1121         unsigned char extdtype;
1122
1123 #define EXT_HEADER_CRC          0x00            /* Header CRC and information*/
1124 #define EXT_FILENAME            0x01            /* Filename                 */
1125 #define EXT_DIRECTORY           0x02            /* Directory name           */
1126 #define EXT_DOS_ATTR            0x40            /* MS-DOS attribute         */
1127 #define EXT_TIMESTAMP           0x41            /* Windows time stamp       */
1128 #define EXT_FILESIZE            0x42            /* Large file size          */
1129 #define EXT_TIMEZONE            0x43            /* Time zone                */
1130 #define EXT_UTF16_FILENAME      0x44            /* UTF-16 filename          */
1131 #define EXT_UTF16_DIRECTORY     0x45            /* UTF-16 directory name    */
1132 #define EXT_CODEPAGE            0x46            /* Codepage                 */
1133 #define EXT_UNIX_MODE           0x50            /* File permission          */
1134 #define EXT_UNIX_GID_UID        0x51            /* gid,uid                  */
1135 #define EXT_UNIX_GNAME          0x52            /* Group name               */
1136 #define EXT_UNIX_UNAME          0x53            /* User name                */
1137 #define EXT_UNIX_MTIME          0x54            /* Modified time            */
1138 #define EXT_OS2_NEW_ATTR        0x7f            /* new attribute(OS/2 only) */
1139 #define EXT_NEW_ATTR            0xff            /* new attribute            */
1140
1141         *total_size = sizefield_length;
1142
1143         for (;;) {
1144                 /* Read an extended header size. */
1145                 if ((h =
1146                     __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1147                         return (truncated_error(a));
1148                 /* Check if the size is the zero indicates the end of the
1149                  * extended header. */
1150                 if (sizefield_length == sizeof(uint16_t))
1151                         extdsize = archive_le16dec(h);
1152                 else
1153                         extdsize = archive_le32dec(h);
1154                 if (extdsize == 0) {
1155                         /* End of extended header */
1156                         if (crc != NULL)
1157                                 *crc = lha_crc16(*crc, h, sizefield_length);
1158                         __archive_read_consume(a, sizefield_length);
1159                         return (ARCHIVE_OK);
1160                 }
1161
1162                 /* Sanity check to the extended header size. */
1163                 if (((uint64_t)*total_size + extdsize) >
1164                                     (uint64_t)limitsize ||
1165                     extdsize <= (size_t)sizefield_length)
1166                         goto invalid;
1167
1168                 /* Read the extended header. */
1169                 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1170                         return (truncated_error(a));
1171                 *total_size += extdsize;
1172
1173                 extdheader = (const unsigned char *)h;
1174                 /* Get the extended header type. */
1175                 extdtype = extdheader[sizefield_length];
1176                 /* Calculate an extended data size. */
1177                 datasize = extdsize - (1 + sizefield_length);
1178                 /* Skip an extended header size field and type field. */
1179                 extdheader += sizefield_length + 1;
1180
1181                 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1182                         *crc = lha_crc16(*crc, h, extdsize);
1183                 switch (extdtype) {
1184                 case EXT_HEADER_CRC:
1185                         /* We only use a header CRC. Following data will not
1186                          * be used. */
1187                         if (datasize >= 2) {
1188                                 lha->header_crc = archive_le16dec(extdheader);
1189                                 if (crc != NULL) {
1190                                         static const char zeros[2] = {0, 0};
1191                                         *crc = lha_crc16(*crc, h,
1192                                             extdsize - datasize);
1193                                         /* CRC value itself as zero */
1194                                         *crc = lha_crc16(*crc, zeros, 2);
1195                                         *crc = lha_crc16(*crc,
1196                                             extdheader+2, datasize - 2);
1197                                 }
1198                         }
1199                         break;
1200                 case EXT_FILENAME:
1201                         if (datasize == 0) {
1202                                 /* maybe directory header */
1203                                 archive_string_empty(&lha->filename);
1204                                 break;
1205                         }
1206                         if (extdheader[0] == '\0')
1207                                 goto invalid;
1208                         archive_strncpy(&lha->filename,
1209                             (const char *)extdheader, datasize);
1210                         break;
1211                 case EXT_DIRECTORY:
1212                         if (datasize == 0 || extdheader[0] == '\0')
1213                                 /* no directory name data. exit this case. */
1214                                 goto invalid;
1215
1216                         archive_strncpy(&lha->dirname,
1217                             (const char *)extdheader, datasize);
1218                         /*
1219                          * Convert directory delimiter from 0xFF
1220                          * to '/' for local system.
1221                          */
1222                         for (i = 0; i < lha->dirname.length; i++) {
1223                                 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1224                                         lha->dirname.s[i] = '/';
1225                         }
1226                         /* Is last character directory separator? */
1227                         if (lha->dirname.s[lha->dirname.length-1] != '/')
1228                                 /* invalid directory data */
1229                                 goto invalid;
1230                         break;
1231                 case EXT_DOS_ATTR:
1232                         if (datasize == 2)
1233                                 lha->dos_attr = (unsigned char)
1234                                     (archive_le16dec(extdheader) & 0xff);
1235                         break;
1236                 case EXT_TIMESTAMP:
1237                         if (datasize == (sizeof(uint64_t) * 3)) {
1238                                 lha->birthtime = lha_win_time(
1239                                     archive_le64dec(extdheader),
1240                                     &lha->birthtime_tv_nsec);
1241                                 extdheader += sizeof(uint64_t);
1242                                 lha->mtime = lha_win_time(
1243                                     archive_le64dec(extdheader),
1244                                     &lha->mtime_tv_nsec);
1245                                 extdheader += sizeof(uint64_t);
1246                                 lha->atime = lha_win_time(
1247                                     archive_le64dec(extdheader),
1248                                     &lha->atime_tv_nsec);
1249                                 lha->setflag |= BIRTHTIME_IS_SET |
1250                                     ATIME_IS_SET;
1251                         }
1252                         break;
1253                 case EXT_FILESIZE:
1254                         if (datasize == sizeof(uint64_t) * 2) {
1255                                 lha->compsize = archive_le64dec(extdheader);
1256                                 extdheader += sizeof(uint64_t);
1257                                 lha->origsize = archive_le64dec(extdheader);
1258                         }
1259                         break;
1260                 case EXT_CODEPAGE:
1261                         /* Get an archived filename charset from codepage.
1262                          * This overwrites the charset specified by
1263                          * hdrcharset option. */
1264                         if (datasize == sizeof(uint32_t)) {
1265                                 struct archive_string cp;
1266                                 const char *charset;
1267
1268                                 archive_string_init(&cp);
1269                                 switch (archive_le32dec(extdheader)) {
1270                                 case 65001: /* UTF-8 */
1271                                         charset = "UTF-8";
1272                                         break;
1273                                 default:
1274                                         archive_string_sprintf(&cp, "CP%d",
1275                                             (int)archive_le32dec(extdheader));
1276                                         charset = cp.s;
1277                                         break;
1278                                 }
1279                                 lha->sconv =
1280                                     archive_string_conversion_from_charset(
1281                                         &(a->archive), charset, 1);
1282                                 archive_string_free(&cp);
1283                                 if (lha->sconv == NULL)
1284                                         return (ARCHIVE_FATAL);
1285                         }
1286                         break;
1287                 case EXT_UNIX_MODE:
1288                         if (datasize == sizeof(uint16_t)) {
1289                                 lha->mode = archive_le16dec(extdheader);
1290                                 lha->setflag |= UNIX_MODE_IS_SET;
1291                         }
1292                         break;
1293                 case EXT_UNIX_GID_UID:
1294                         if (datasize == (sizeof(uint16_t) * 2)) {
1295                                 lha->gid = archive_le16dec(extdheader);
1296                                 lha->uid = archive_le16dec(extdheader+2);
1297                         }
1298                         break;
1299                 case EXT_UNIX_GNAME:
1300                         if (datasize > 0)
1301                                 archive_strncpy(&lha->gname,
1302                                     (const char *)extdheader, datasize);
1303                         break;
1304                 case EXT_UNIX_UNAME:
1305                         if (datasize > 0)
1306                                 archive_strncpy(&lha->uname,
1307                                     (const char *)extdheader, datasize);
1308                         break;
1309                 case EXT_UNIX_MTIME:
1310                         if (datasize == sizeof(uint32_t))
1311                                 lha->mtime = archive_le32dec(extdheader);
1312                         break;
1313                 case EXT_OS2_NEW_ATTR:
1314                         /* This extended header is OS/2 depend. */
1315                         if (datasize == 16) {
1316                                 lha->dos_attr = (unsigned char)
1317                                     (archive_le16dec(extdheader) & 0xff);
1318                                 lha->mode = archive_le16dec(extdheader+2);
1319                                 lha->gid = archive_le16dec(extdheader+4);
1320                                 lha->uid = archive_le16dec(extdheader+6);
1321                                 lha->birthtime = archive_le32dec(extdheader+8);
1322                                 lha->atime = archive_le32dec(extdheader+12);
1323                                 lha->setflag |= UNIX_MODE_IS_SET
1324                                     | BIRTHTIME_IS_SET | ATIME_IS_SET;
1325                         }
1326                         break;
1327                 case EXT_NEW_ATTR:
1328                         if (datasize == 20) {
1329                                 lha->mode = (mode_t)archive_le32dec(extdheader);
1330                                 lha->gid = archive_le32dec(extdheader+4);
1331                                 lha->uid = archive_le32dec(extdheader+8);
1332                                 lha->birthtime = archive_le32dec(extdheader+12);
1333                                 lha->atime = archive_le32dec(extdheader+16);
1334                                 lha->setflag |= UNIX_MODE_IS_SET
1335                                     | BIRTHTIME_IS_SET | ATIME_IS_SET;
1336                         }
1337                         break;
1338                 case EXT_TIMEZONE:              /* Not supported */
1339                 case EXT_UTF16_FILENAME:        /* Not supported */
1340                 case EXT_UTF16_DIRECTORY:       /* Not supported */
1341                 default:
1342                         break;
1343                 }
1344
1345                 __archive_read_consume(a, extdsize);
1346         }
1347 invalid:
1348         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1349             "Invalid extended LHa header");
1350         return (ARCHIVE_FATAL);
1351 }
1352
1353 static int
1354 lha_end_of_entry(struct archive_read *a)
1355 {
1356         struct lha *lha = (struct lha *)(a->format->data);
1357         int r = ARCHIVE_EOF;
1358
1359         if (!lha->end_of_entry_cleanup) {
1360                 if ((lha->setflag & CRC_IS_SET) &&
1361                     lha->crc != lha->entry_crc_calculated) {
1362                         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1363                             "LHa data CRC error");
1364                         r = ARCHIVE_WARN;
1365                 }
1366
1367                 /* End-of-entry cleanup done. */
1368                 lha->end_of_entry_cleanup = 1;
1369         }
1370         return (r);
1371 }
1372
1373 static int
1374 archive_read_format_lha_read_data(struct archive_read *a,
1375     const void **buff, size_t *size, int64_t *offset)
1376 {
1377         struct lha *lha = (struct lha *)(a->format->data);
1378         int r;
1379
1380         if (lha->entry_unconsumed) {
1381                 /* Consume as much as the decompressor actually used. */
1382                 __archive_read_consume(a, lha->entry_unconsumed);
1383                 lha->entry_unconsumed = 0;
1384         }
1385         if (lha->end_of_entry) {
1386                 *offset = lha->entry_offset;
1387                 *size = 0;
1388                 *buff = NULL;
1389                 return (lha_end_of_entry(a));
1390         }
1391
1392         if (lha->entry_is_compressed)
1393                 r =  lha_read_data_lzh(a, buff, size, offset);
1394         else
1395                 /* No compression. */
1396                 r =  lha_read_data_none(a, buff, size, offset);
1397         return (r);
1398 }
1399
1400 /*
1401  * Read a file content in no compression.
1402  *
1403  * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1404  * lha->end_of_entry if it consumes all of the data.
1405  */
1406 static int
1407 lha_read_data_none(struct archive_read *a, const void **buff,
1408     size_t *size, int64_t *offset)
1409 {
1410         struct lha *lha = (struct lha *)(a->format->data);
1411         ssize_t bytes_avail;
1412
1413         if (lha->entry_bytes_remaining == 0) {
1414                 *buff = NULL;
1415                 *size = 0;
1416                 *offset = lha->entry_offset;
1417                 lha->end_of_entry = 1;
1418                 return (ARCHIVE_OK);
1419         }
1420         /*
1421          * Note: '1' here is a performance optimization.
1422          * Recall that the decompression layer returns a count of
1423          * available bytes; asking for more than that forces the
1424          * decompressor to combine reads by copying data.
1425          */
1426         *buff = __archive_read_ahead(a, 1, &bytes_avail);
1427         if (bytes_avail <= 0) {
1428                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1429                     "Truncated LHa file data");
1430                 return (ARCHIVE_FATAL);
1431         }
1432         if (bytes_avail > lha->entry_bytes_remaining)
1433                 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1434         lha->entry_crc_calculated =
1435             lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1436         *size = bytes_avail;
1437         *offset = lha->entry_offset;
1438         lha->entry_offset += bytes_avail;
1439         lha->entry_bytes_remaining -= bytes_avail;
1440         if (lha->entry_bytes_remaining == 0)
1441                 lha->end_of_entry = 1;
1442         lha->entry_unconsumed = bytes_avail;
1443         return (ARCHIVE_OK);
1444 }
1445
1446 /*
1447  * Read a file content in LZHUFF encoding.
1448  *
1449  * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1450  * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1451  * all of the data.
1452  */
1453 static int
1454 lha_read_data_lzh(struct archive_read *a, const void **buff,
1455     size_t *size, int64_t *offset)
1456 {
1457         struct lha *lha = (struct lha *)(a->format->data);
1458         ssize_t bytes_avail;
1459         int r;
1460
1461         /* If we haven't yet read any data, initialize the decompressor. */
1462         if (!lha->decompress_init) {
1463                 r = lzh_decode_init(&(lha->strm), lha->method);
1464                 switch (r) {
1465                 case ARCHIVE_OK:
1466                         break;
1467                 case ARCHIVE_FAILED:
1468                         /* Unsupported compression. */
1469                         *buff = NULL;
1470                         *size = 0;
1471                         *offset = 0;
1472                         archive_set_error(&a->archive,
1473                             ARCHIVE_ERRNO_FILE_FORMAT,
1474                             "Unsupported lzh compression method -%c%c%c-",
1475                             lha->method[0], lha->method[1], lha->method[2]);
1476                         /* We know compressed size; just skip it. */
1477                         archive_read_format_lha_read_data_skip(a);
1478                         return (ARCHIVE_WARN);
1479                 default:
1480                         archive_set_error(&a->archive, ENOMEM,
1481                             "Couldn't allocate memory "
1482                             "for lzh decompression");
1483                         return (ARCHIVE_FATAL);
1484                 }
1485                 /* We've initialized decompression for this stream. */
1486                 lha->decompress_init = 1;
1487                 lha->strm.avail_out = 0;
1488                 lha->strm.total_out = 0;
1489         }
1490
1491         /*
1492          * Note: '1' here is a performance optimization.
1493          * Recall that the decompression layer returns a count of
1494          * available bytes; asking for more than that forces the
1495          * decompressor to combine reads by copying data.
1496          */
1497         lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1498         if (bytes_avail <= 0) {
1499                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1500                     "Truncated LHa file body");
1501                 return (ARCHIVE_FATAL);
1502         }
1503         if (bytes_avail > lha->entry_bytes_remaining)
1504                 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1505
1506         lha->strm.avail_in = (int)bytes_avail;
1507         lha->strm.total_in = 0;
1508         lha->strm.avail_out = 0;
1509
1510         r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1511         switch (r) {
1512         case ARCHIVE_OK:
1513                 break;
1514         case ARCHIVE_EOF:
1515                 lha->end_of_entry = 1;
1516                 break;
1517         default:
1518                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1519                     "Bad lzh data");
1520                 return (ARCHIVE_FAILED);
1521         }
1522         lha->entry_unconsumed = lha->strm.total_in;
1523         lha->entry_bytes_remaining -= lha->strm.total_in;
1524
1525         if (lha->strm.avail_out) {
1526                 *offset = lha->entry_offset;
1527                 *size = lha->strm.avail_out;
1528                 *buff = lha->strm.ref_ptr;
1529                 lha->entry_crc_calculated =
1530                     lha_crc16(lha->entry_crc_calculated, *buff, *size);
1531                 lha->entry_offset += *size;
1532         } else {
1533                 *offset = lha->entry_offset;
1534                 *size = 0;
1535                 *buff = NULL;
1536                 if (lha->end_of_entry)
1537                         return (lha_end_of_entry(a));
1538         }
1539         return (ARCHIVE_OK);
1540 }
1541
1542 /*
1543  * Skip a file content.
1544  */
1545 static int
1546 archive_read_format_lha_read_data_skip(struct archive_read *a)
1547 {
1548         struct lha *lha;
1549         int64_t bytes_skipped;
1550
1551         lha = (struct lha *)(a->format->data);
1552
1553         if (lha->entry_unconsumed) {
1554                 /* Consume as much as the decompressor actually used. */
1555                 __archive_read_consume(a, lha->entry_unconsumed);
1556                 lha->entry_unconsumed = 0;
1557         }
1558
1559         /* if we've already read to end of data, we're done. */
1560         if (lha->end_of_entry_cleanup)
1561                 return (ARCHIVE_OK);
1562
1563         /*
1564          * If the length is at the beginning, we can skip the
1565          * compressed data much more quickly.
1566          */
1567         bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1568         if (bytes_skipped < 0)
1569                 return (ARCHIVE_FATAL);
1570
1571         /* This entry is finished and done. */
1572         lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1573         return (ARCHIVE_OK);
1574 }
1575
1576 static int
1577 archive_read_format_lha_cleanup(struct archive_read *a)
1578 {
1579         struct lha *lha = (struct lha *)(a->format->data);
1580
1581         lzh_decode_free(&(lha->strm));
1582         archive_string_free(&(lha->dirname));
1583         archive_string_free(&(lha->filename));
1584         archive_string_free(&(lha->uname));
1585         archive_string_free(&(lha->gname));
1586         archive_wstring_free(&(lha->ws));
1587         free(lha);
1588         (a->format->data) = NULL;
1589         return (ARCHIVE_OK);
1590 }
1591
1592 /*
1593  * 'LHa for UNIX' utility has archived a symbolic-link name after
1594  * a pathname with '|' character.
1595  * This function extracts the symbolic-link name from the pathname.
1596  *
1597  * example.
1598  *   1. a symbolic-name is 'aaa/bb/cc'
1599  *   2. a filename is 'xxx/bbb'
1600  *  then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1601  */
1602 static int
1603 lha_parse_linkname(struct archive_string *linkname,
1604     struct archive_string *pathname)
1605 {
1606         char *  linkptr;
1607         size_t  symlen;
1608
1609         linkptr = strchr(pathname->s, '|');
1610         if (linkptr != NULL) {
1611                 symlen = strlen(linkptr + 1);
1612                 archive_strncpy(linkname, linkptr+1, symlen);
1613
1614                 *linkptr = 0;
1615                 pathname->length = strlen(pathname->s);
1616
1617                 return (1);
1618         }
1619         return (0);
1620 }
1621
1622 /* Convert an MSDOS-style date/time into Unix-style time. */
1623 static time_t
1624 lha_dos_time(const unsigned char *p)
1625 {
1626         int msTime, msDate;
1627         struct tm ts;
1628
1629         msTime = archive_le16dec(p);
1630         msDate = archive_le16dec(p+2);
1631
1632         memset(&ts, 0, sizeof(ts));
1633         ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1634         ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1635         ts.tm_mday = msDate & 0x1f;                 /* Day of month.     */
1636         ts.tm_hour = (msTime >> 11) & 0x1f;
1637         ts.tm_min = (msTime >> 5) & 0x3f;
1638         ts.tm_sec = (msTime << 1) & 0x3e;
1639         ts.tm_isdst = -1;
1640         return (mktime(&ts));
1641 }
1642
1643 /* Convert an MS-Windows-style date/time into Unix-style time. */
1644 static time_t
1645 lha_win_time(uint64_t wintime, long *ns)
1646 {
1647 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1648
1649         if (wintime >= EPOC_TIME) {
1650                 wintime -= EPOC_TIME;   /* 1970-01-01 00:00:00 (UTC) */
1651                 if (ns != NULL)
1652                         *ns = (long)(wintime % 10000000) * 100;
1653                 return (wintime / 10000000);
1654         } else {
1655                 if (ns != NULL)
1656                         *ns = 0;
1657                 return (0);
1658         }
1659 }
1660
1661 static unsigned char
1662 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1663 {
1664         unsigned char const *p = (unsigned char const *)pp;
1665
1666         p += offset;
1667         for (;size > 0; --size)
1668                 sum += *p++;
1669         return (sum);
1670 }
1671
1672 static uint16_t crc16tbl[2][256];
1673 static void
1674 lha_crc16_init(void)
1675 {
1676         unsigned int i;
1677         static int crc16init = 0;
1678
1679         if (crc16init)
1680                 return;
1681         crc16init = 1;
1682
1683         for (i = 0; i < 256; i++) {
1684                 unsigned int j;
1685                 uint16_t crc = (uint16_t)i;
1686                 for (j = 8; j; j--)
1687                         crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1688                 crc16tbl[0][i] = crc;
1689         }
1690
1691         for (i = 0; i < 256; i++) {
1692                 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1693                         ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1694         }
1695 }
1696
1697 static uint16_t
1698 lha_crc16(uint16_t crc, const void *pp, size_t len)
1699 {
1700         const unsigned char *p = (const unsigned char *)pp;
1701         const uint16_t *buff;
1702         const union {
1703                 uint32_t i;
1704                 char c[4];
1705         } u = { 0x01020304 };
1706
1707         if (len == 0)
1708                 return crc;
1709
1710         /* Process unaligned address. */
1711         if (((uintptr_t)p) & (uintptr_t)0x1) {
1712                 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1713                 len--;
1714         }
1715         buff = (const uint16_t *)p;
1716         /*
1717          * Modern C compiler such as GCC does not unroll automatically yet
1718          * without unrolling pragma, and Clang is so. So we should
1719          * unroll this loop for its performance.
1720          */
1721         for (;len >= 8; len -= 8) {
1722                 /* This if statement expects compiler optimization will
1723                  * remove the statement which will not be executed. */
1724 #undef bswap16
1725 #if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1726 #  define bswap16(x) _byteswap_ushort(x)
1727 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1728 /* GCC 4.8 and later has __builtin_bswap16() */
1729 #  define bswap16(x) __builtin_bswap16(x)
1730 #elif defined(__clang__)
1731 /* All clang versions have __builtin_bswap16() */
1732 #  define bswap16(x) __builtin_bswap16(x)
1733 #else
1734 #  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1735 #endif
1736 #define CRC16W  do {    \
1737                 if(u.c[0] == 1) { /* Big endian */              \
1738                         crc ^= bswap16(*buff); buff++;          \
1739                 } else                                          \
1740                         crc ^= *buff++;                         \
1741                 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1742 } while (0)
1743                 CRC16W;
1744                 CRC16W;
1745                 CRC16W;
1746                 CRC16W;
1747 #undef CRC16W
1748 #undef bswap16
1749         }
1750
1751         p = (const unsigned char *)buff;
1752         for (;len; len--) {
1753                 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1754         }
1755         return crc;
1756 }
1757
1758 /*
1759  * Initialize LZHUF decoder.
1760  *
1761  * Returns ARCHIVE_OK if initialization was successful.
1762  * Returns ARCHIVE_FAILED if method is unsupported.
1763  * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1764  * error occurred.
1765  */
1766 static int
1767 lzh_decode_init(struct lzh_stream *strm, const char *method)
1768 {
1769         struct lzh_dec *ds;
1770         int w_bits, w_size;
1771
1772         if (strm->ds == NULL) {
1773                 strm->ds = calloc(1, sizeof(*strm->ds));
1774                 if (strm->ds == NULL)
1775                         return (ARCHIVE_FATAL);
1776         }
1777         ds = strm->ds;
1778         ds->error = ARCHIVE_FAILED;
1779         if (method == NULL || method[0] != 'l' || method[1] != 'h')
1780                 return (ARCHIVE_FAILED);
1781         switch (method[2]) {
1782         case '5':
1783                 w_bits = 13;/* 8KiB for window */
1784                 break;
1785         case '6':
1786                 w_bits = 15;/* 32KiB for window */
1787                 break;
1788         case '7':
1789                 w_bits = 16;/* 64KiB for window */
1790                 break;
1791         default:
1792                 return (ARCHIVE_FAILED);/* Not supported. */
1793         }
1794         ds->error = ARCHIVE_FATAL;
1795         /* Expand a window size up to 128 KiB for decompressing process
1796          * performance whatever its original window size is. */
1797         ds->w_size = 1U << 17;
1798         ds->w_mask = ds->w_size -1;
1799         if (ds->w_buff == NULL) {
1800                 ds->w_buff = malloc(ds->w_size);
1801                 if (ds->w_buff == NULL)
1802                         return (ARCHIVE_FATAL);
1803         }
1804         w_size = 1U << w_bits;
1805         memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1806         ds->w_pos = 0;
1807         ds->state = 0;
1808         ds->pos_pt_len_size = w_bits + 1;
1809         ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1810         ds->literal_pt_len_size = PT_BITLEN_SIZE;
1811         ds->literal_pt_len_bits = 5;
1812         ds->br.cache_buffer = 0;
1813         ds->br.cache_avail = 0;
1814
1815         if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1816             != ARCHIVE_OK)
1817                 return (ARCHIVE_FATAL);
1818         ds->lt.len_bits = 9;
1819         if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1820             != ARCHIVE_OK)
1821                 return (ARCHIVE_FATAL);
1822         ds->error = 0;
1823
1824         return (ARCHIVE_OK);
1825 }
1826
1827 /*
1828  * Release LZHUF decoder.
1829  */
1830 static void
1831 lzh_decode_free(struct lzh_stream *strm)
1832 {
1833
1834         if (strm->ds == NULL)
1835                 return;
1836         free(strm->ds->w_buff);
1837         lzh_huffman_free(&(strm->ds->lt));
1838         lzh_huffman_free(&(strm->ds->pt));
1839         free(strm->ds);
1840         strm->ds = NULL;
1841 }
1842
1843 /*
1844  * Bit stream reader.
1845  */
1846 /* Check that the cache buffer has enough bits. */
1847 #define lzh_br_has(br, n)       ((br)->cache_avail >= n)
1848 /* Get compressed data by bit. */
1849 #define lzh_br_bits(br, n)                              \
1850         (((uint16_t)((br)->cache_buffer >>              \
1851                 ((br)->cache_avail - (n)))) & cache_masks[n])
1852 #define lzh_br_bits_forced(br, n)                       \
1853         (((uint16_t)((br)->cache_buffer <<              \
1854                 ((n) - (br)->cache_avail))) & cache_masks[n])
1855 /* Read ahead to make sure the cache buffer has enough compressed data we
1856  * will use.
1857  *  True  : completed, there is enough data in the cache buffer.
1858  *  False : we met that strm->next_in is empty, we have to get following
1859  *          bytes. */
1860 #define lzh_br_read_ahead_0(strm, br, n)        \
1861         (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1862 /*  True  : the cache buffer has some bits as much as we need.
1863  *  False : there are no enough bits in the cache buffer to be used,
1864  *          we have to get following bytes if we could. */
1865 #define lzh_br_read_ahead(strm, br, n)  \
1866         (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1867
1868 /* Notify how many bits we consumed. */
1869 #define lzh_br_consume(br, n)   ((br)->cache_avail -= (n))
1870 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1871
1872 static const uint16_t cache_masks[] = {
1873         0x0000, 0x0001, 0x0003, 0x0007,
1874         0x000F, 0x001F, 0x003F, 0x007F,
1875         0x00FF, 0x01FF, 0x03FF, 0x07FF,
1876         0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1877         0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1878 };
1879
1880 /*
1881  * Shift away used bits in the cache data and fill it up with following bits.
1882  * Call this when cache buffer does not have enough bits you need.
1883  *
1884  * Returns 1 if the cache buffer is full.
1885  * Returns 0 if the cache buffer is not full; input buffer is empty.
1886  */
1887 static int
1888 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1889 {
1890         int n = CACHE_BITS - br->cache_avail;
1891
1892         for (;;) {
1893                 const int x = n >> 3;
1894                 if (strm->avail_in >= x) {
1895                         switch (x) {
1896                         case 8:
1897                                 br->cache_buffer =
1898                                     ((uint64_t)strm->next_in[0]) << 56 |
1899                                     ((uint64_t)strm->next_in[1]) << 48 |
1900                                     ((uint64_t)strm->next_in[2]) << 40 |
1901                                     ((uint64_t)strm->next_in[3]) << 32 |
1902                                     ((uint32_t)strm->next_in[4]) << 24 |
1903                                     ((uint32_t)strm->next_in[5]) << 16 |
1904                                     ((uint32_t)strm->next_in[6]) << 8 |
1905                                      (uint32_t)strm->next_in[7];
1906                                 strm->next_in += 8;
1907                                 strm->avail_in -= 8;
1908                                 br->cache_avail += 8 * 8;
1909                                 return (1);
1910                         case 7:
1911                                 br->cache_buffer =
1912                                    (br->cache_buffer << 56) |
1913                                     ((uint64_t)strm->next_in[0]) << 48 |
1914                                     ((uint64_t)strm->next_in[1]) << 40 |
1915                                     ((uint64_t)strm->next_in[2]) << 32 |
1916                                     ((uint32_t)strm->next_in[3]) << 24 |
1917                                     ((uint32_t)strm->next_in[4]) << 16 |
1918                                     ((uint32_t)strm->next_in[5]) << 8 |
1919                                      (uint32_t)strm->next_in[6];
1920                                 strm->next_in += 7;
1921                                 strm->avail_in -= 7;
1922                                 br->cache_avail += 7 * 8;
1923                                 return (1);
1924                         case 6:
1925                                 br->cache_buffer =
1926                                    (br->cache_buffer << 48) |
1927                                     ((uint64_t)strm->next_in[0]) << 40 |
1928                                     ((uint64_t)strm->next_in[1]) << 32 |
1929                                     ((uint32_t)strm->next_in[2]) << 24 |
1930                                     ((uint32_t)strm->next_in[3]) << 16 |
1931                                     ((uint32_t)strm->next_in[4]) << 8 |
1932                                      (uint32_t)strm->next_in[5];
1933                                 strm->next_in += 6;
1934                                 strm->avail_in -= 6;
1935                                 br->cache_avail += 6 * 8;
1936                                 return (1);
1937                         case 0:
1938                                 /* We have enough compressed data in
1939                                  * the cache buffer.*/
1940                                 return (1);
1941                         default:
1942                                 break;
1943                         }
1944                 }
1945                 if (strm->avail_in == 0) {
1946                         /* There is not enough compressed data to fill up the
1947                          * cache buffer. */
1948                         return (0);
1949                 }
1950                 br->cache_buffer =
1951                    (br->cache_buffer << 8) | *strm->next_in++;
1952                 strm->avail_in--;
1953                 br->cache_avail += 8;
1954                 n -= 8;
1955         }
1956 }
1957
1958 /*
1959  * Decode LZHUF.
1960  *
1961  * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1962  *    Please set available buffer and call this function again.
1963  * 2. Returns ARCHIVE_EOF if decompression has been completed.
1964  * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1965  *    is broken or you do not set 'last' flag properly.
1966  * 4. 'last' flag is very important, you must set 1 to the flag if there
1967  *    is no input data. The lha compressed data format does not provide how
1968  *    to know the compressed data is really finished.
1969  *    Note: lha command utility check if the total size of output bytes is
1970  *    reached the uncompressed size recorded in its header. it does not mind
1971  *    that the decoding process is properly finished.
1972  *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
1973  *    it handles EOF as null to fill read buffer with zero until the decoding
1974  *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
1975  *    zeros are treated as the mark of the end of the data although the zeros
1976  *    is dummy, not the file data.
1977  */
1978 static int      lzh_read_blocks(struct lzh_stream *, int);
1979 static int      lzh_decode_blocks(struct lzh_stream *, int);
1980 #define ST_RD_BLOCK             0
1981 #define ST_RD_PT_1              1
1982 #define ST_RD_PT_2              2
1983 #define ST_RD_PT_3              3
1984 #define ST_RD_PT_4              4
1985 #define ST_RD_LITERAL_1         5
1986 #define ST_RD_LITERAL_2         6
1987 #define ST_RD_LITERAL_3         7
1988 #define ST_RD_POS_DATA_1        8
1989 #define ST_GET_LITERAL          9
1990 #define ST_GET_POS_1            10
1991 #define ST_GET_POS_2            11
1992 #define ST_COPY_DATA            12
1993
1994 static int
1995 lzh_decode(struct lzh_stream *strm, int last)
1996 {
1997         struct lzh_dec *ds = strm->ds;
1998         int avail_in;
1999         int r;
2000
2001         if (ds->error)
2002                 return (ds->error);
2003
2004         avail_in = strm->avail_in;
2005         do {
2006                 if (ds->state < ST_GET_LITERAL)
2007                         r = lzh_read_blocks(strm, last);
2008                 else
2009                         r = lzh_decode_blocks(strm, last);
2010         } while (r == 100);
2011         strm->total_in += avail_in - strm->avail_in;
2012         return (r);
2013 }
2014
2015 static void
2016 lzh_emit_window(struct lzh_stream *strm, size_t s)
2017 {
2018         strm->ref_ptr = strm->ds->w_buff;
2019         strm->avail_out = (int)s;
2020         strm->total_out += s;
2021 }
2022
2023 static int
2024 lzh_read_blocks(struct lzh_stream *strm, int last)
2025 {
2026         struct lzh_dec *ds = strm->ds;
2027         struct lzh_br *br = &(ds->br);
2028         int c = 0, i;
2029         unsigned rbits;
2030
2031         for (;;) {
2032                 switch (ds->state) {
2033                 case ST_RD_BLOCK:
2034                         /*
2035                          * Read a block number indicates how many blocks
2036                          * we will handle. The block is composed of a
2037                          * literal and a match, sometimes a literal only
2038                          * in particular, there are no reference data at
2039                          * the beginning of the decompression.
2040                          */
2041                         if (!lzh_br_read_ahead_0(strm, br, 16)) {
2042                                 if (!last)
2043                                         /* We need following data. */
2044                                         return (ARCHIVE_OK);
2045                                 if (lzh_br_has(br, 8)) {
2046                                         /*
2047                                          * It seems there are extra bits.
2048                                          *  1. Compressed data is broken.
2049                                          *  2. `last' flag does not properly
2050                                          *     set.
2051                                          */
2052                                         goto failed;
2053                                 }
2054                                 if (ds->w_pos > 0) {
2055                                         lzh_emit_window(strm, ds->w_pos);
2056                                         ds->w_pos = 0;
2057                                         return (ARCHIVE_OK);
2058                                 }
2059                                 /* End of compressed data; we have completely
2060                                  * handled all compressed data. */
2061                                 return (ARCHIVE_EOF);
2062                         }
2063                         ds->blocks_avail = lzh_br_bits(br, 16);
2064                         if (ds->blocks_avail == 0)
2065                                 goto failed;
2066                         lzh_br_consume(br, 16);
2067                         /*
2068                          * Read a literal table compressed in huffman
2069                          * coding.
2070                          */
2071                         ds->pt.len_size = ds->literal_pt_len_size;
2072                         ds->pt.len_bits = ds->literal_pt_len_bits;
2073                         ds->reading_position = 0;
2074                         /* FALL THROUGH */
2075                 case ST_RD_PT_1:
2076                         /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2077                          * used in reading both a literal table and a
2078                          * position table. */
2079                         if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2080                                 if (last)
2081                                         goto failed;/* Truncated data. */
2082                                 ds->state = ST_RD_PT_1;
2083                                 return (ARCHIVE_OK);
2084                         }
2085                         ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2086                         lzh_br_consume(br, ds->pt.len_bits);
2087                         /* FALL THROUGH */
2088                 case ST_RD_PT_2:
2089                         if (ds->pt.len_avail == 0) {
2090                                 /* There is no bitlen. */
2091                                 if (!lzh_br_read_ahead(strm, br,
2092                                     ds->pt.len_bits)) {
2093                                         if (last)
2094                                                 goto failed;/* Truncated data.*/
2095                                         ds->state = ST_RD_PT_2;
2096                                         return (ARCHIVE_OK);
2097                                 }
2098                                 if (!lzh_make_fake_table(&(ds->pt),
2099                                     lzh_br_bits(br, ds->pt.len_bits)))
2100                                         goto failed;/* Invalid data. */
2101                                 lzh_br_consume(br, ds->pt.len_bits);
2102                                 if (ds->reading_position)
2103                                         ds->state = ST_GET_LITERAL;
2104                                 else
2105                                         ds->state = ST_RD_LITERAL_1;
2106                                 break;
2107                         } else if (ds->pt.len_avail > ds->pt.len_size)
2108                                 goto failed;/* Invalid data. */
2109                         ds->loop = 0;
2110                         memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2111                         if (ds->pt.len_avail < 3 ||
2112                             ds->pt.len_size == ds->pos_pt_len_size) {
2113                                 ds->state = ST_RD_PT_4;
2114                                 break;
2115                         }
2116                         /* FALL THROUGH */
2117                 case ST_RD_PT_3:
2118                         ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2119                         if (ds->loop < 3) {
2120                                 if (ds->loop < 0 || last)
2121                                         goto failed;/* Invalid data. */
2122                                 /* Not completed, get following data. */
2123                                 ds->state = ST_RD_PT_3;
2124                                 return (ARCHIVE_OK);
2125                         }
2126                         /* There are some null in bitlen of the literal. */
2127                         if (!lzh_br_read_ahead(strm, br, 2)) {
2128                                 if (last)
2129                                         goto failed;/* Truncated data. */
2130                                 ds->state = ST_RD_PT_3;
2131                                 return (ARCHIVE_OK);
2132                         }
2133                         c = lzh_br_bits(br, 2);
2134                         lzh_br_consume(br, 2);
2135                         if (c > ds->pt.len_avail - 3)
2136                                 goto failed;/* Invalid data. */
2137                         for (i = 3; c-- > 0 ;)
2138                                 ds->pt.bitlen[i++] = 0;
2139                         ds->loop = i;
2140                         /* FALL THROUGH */
2141                 case ST_RD_PT_4:
2142                         ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2143                             ds->pt.len_avail);
2144                         if (ds->loop < ds->pt.len_avail) {
2145                                 if (ds->loop < 0 || last)
2146                                         goto failed;/* Invalid data. */
2147                                 /* Not completed, get following data. */
2148                                 ds->state = ST_RD_PT_4;
2149                                 return (ARCHIVE_OK);
2150                         }
2151                         if (!lzh_make_huffman_table(&(ds->pt)))
2152                                 goto failed;/* Invalid data */
2153                         if (ds->reading_position) {
2154                                 ds->state = ST_GET_LITERAL;
2155                                 break;
2156                         }
2157                         /* FALL THROUGH */
2158                 case ST_RD_LITERAL_1:
2159                         if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2160                                 if (last)
2161                                         goto failed;/* Truncated data. */
2162                                 ds->state = ST_RD_LITERAL_1;
2163                                 return (ARCHIVE_OK);
2164                         }
2165                         ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2166                         lzh_br_consume(br, ds->lt.len_bits);
2167                         /* FALL THROUGH */
2168                 case ST_RD_LITERAL_2:
2169                         if (ds->lt.len_avail == 0) {
2170                                 /* There is no bitlen. */
2171                                 if (!lzh_br_read_ahead(strm, br,
2172                                     ds->lt.len_bits)) {
2173                                         if (last)
2174                                                 goto failed;/* Truncated data.*/
2175                                         ds->state = ST_RD_LITERAL_2;
2176                                         return (ARCHIVE_OK);
2177                                 }
2178                                 if (!lzh_make_fake_table(&(ds->lt),
2179                                     lzh_br_bits(br, ds->lt.len_bits)))
2180                                         goto failed;/* Invalid data */
2181                                 lzh_br_consume(br, ds->lt.len_bits);
2182                                 ds->state = ST_RD_POS_DATA_1;
2183                                 break;
2184                         } else if (ds->lt.len_avail > ds->lt.len_size)
2185                                 goto failed;/* Invalid data */
2186                         ds->loop = 0;
2187                         memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2188                         /* FALL THROUGH */
2189                 case ST_RD_LITERAL_3:
2190                         i = ds->loop;
2191                         while (i < ds->lt.len_avail) {
2192                                 if (!lzh_br_read_ahead(strm, br,
2193                                     ds->pt.max_bits)) {
2194                                         if (last)
2195                                                 goto failed;/* Truncated data.*/
2196                                         ds->loop = i;
2197                                         ds->state = ST_RD_LITERAL_3;
2198                                         return (ARCHIVE_OK);
2199                                 }
2200                                 rbits = lzh_br_bits(br, ds->pt.max_bits);
2201                                 c = lzh_decode_huffman(&(ds->pt), rbits);
2202                                 if (c > 2) {
2203                                         /* Note: 'c' will never be more than
2204                                          * eighteen since it's limited by
2205                                          * PT_BITLEN_SIZE, which is being set
2206                                          * to ds->pt.len_size through
2207                                          * ds->literal_pt_len_size. */
2208                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2209                                         c -= 2;
2210                                         ds->lt.freq[c]++;
2211                                         ds->lt.bitlen[i++] = c;
2212                                 } else if (c == 0) {
2213                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2214                                         ds->lt.bitlen[i++] = 0;
2215                                 } else {
2216                                         /* c == 1 or c == 2 */
2217                                         int n = (c == 1)?4:9;
2218                                         if (!lzh_br_read_ahead(strm, br,
2219                                              ds->pt.bitlen[c] + n)) {
2220                                                 if (last) /* Truncated data. */
2221                                                         goto failed;
2222                                                 ds->loop = i;
2223                                                 ds->state = ST_RD_LITERAL_3;
2224                                                 return (ARCHIVE_OK);
2225                                         }
2226                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2227                                         c = lzh_br_bits(br, n);
2228                                         lzh_br_consume(br, n);
2229                                         c += (n == 4)?3:20;
2230                                         if (i + c > ds->lt.len_avail)
2231                                                 goto failed;/* Invalid data */
2232                                         memset(&(ds->lt.bitlen[i]), 0, c);
2233                                         i += c;
2234                                 }
2235                         }
2236                         if (i > ds->lt.len_avail ||
2237                             !lzh_make_huffman_table(&(ds->lt)))
2238                                 goto failed;/* Invalid data */
2239                         /* FALL THROUGH */
2240                 case ST_RD_POS_DATA_1:
2241                         /*
2242                          * Read a position table compressed in huffman
2243                          * coding.
2244                          */
2245                         ds->pt.len_size = ds->pos_pt_len_size;
2246                         ds->pt.len_bits = ds->pos_pt_len_bits;
2247                         ds->reading_position = 1;
2248                         ds->state = ST_RD_PT_1;
2249                         break;
2250                 case ST_GET_LITERAL:
2251                         return (100);
2252                 }
2253         }
2254 failed:
2255         return (ds->error = ARCHIVE_FAILED);
2256 }
2257
2258 static int
2259 lzh_decode_blocks(struct lzh_stream *strm, int last)
2260 {
2261         struct lzh_dec *ds = strm->ds;
2262         struct lzh_br bre = ds->br;
2263         struct huffman *lt = &(ds->lt);
2264         struct huffman *pt = &(ds->pt);
2265         unsigned char *w_buff = ds->w_buff;
2266         unsigned char *lt_bitlen = lt->bitlen;
2267         unsigned char *pt_bitlen = pt->bitlen;
2268         int blocks_avail = ds->blocks_avail, c = 0;
2269         int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2270         int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2271         int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2272         int state = ds->state;
2273
2274         for (;;) {
2275                 switch (state) {
2276                 case ST_GET_LITERAL:
2277                         for (;;) {
2278                                 if (blocks_avail == 0) {
2279                                         /* We have decoded all blocks.
2280                                          * Let's handle next blocks. */
2281                                         ds->state = ST_RD_BLOCK;
2282                                         ds->br = bre;
2283                                         ds->blocks_avail = 0;
2284                                         ds->w_pos = w_pos;
2285                                         ds->copy_pos = 0;
2286                                         return (100);
2287                                 }
2288
2289                                 /* lzh_br_read_ahead() always try to fill the
2290                                  * cache buffer up. In specific situation we
2291                                  * are close to the end of the data, the cache
2292                                  * buffer will not be full and thus we have to
2293                                  * determine if the cache buffer has some bits
2294                                  * as much as we need after lzh_br_read_ahead()
2295                                  * failed. */
2296                                 if (!lzh_br_read_ahead(strm, &bre,
2297                                     lt_max_bits)) {
2298                                         if (!last)
2299                                                 goto next_data;
2300                                         /* Remaining bits are less than
2301                                          * maximum bits(lt.max_bits) but maybe
2302                                          * it still remains as much as we need,
2303                                          * so we should try to use it with
2304                                          * dummy bits. */
2305                                         c = lzh_decode_huffman(lt,
2306                                               lzh_br_bits_forced(&bre,
2307                                                 lt_max_bits));
2308                                         lzh_br_consume(&bre, lt_bitlen[c]);
2309                                         if (!lzh_br_has(&bre, 0))
2310                                                 goto failed;/* Over read. */
2311                                 } else {
2312                                         c = lzh_decode_huffman(lt,
2313                                               lzh_br_bits(&bre, lt_max_bits));
2314                                         lzh_br_consume(&bre, lt_bitlen[c]);
2315                                 }
2316                                 blocks_avail--;
2317                                 if (c > UCHAR_MAX)
2318                                         /* Current block is a match data. */
2319                                         break;
2320                                 /*
2321                                  * 'c' is exactly a literal code.
2322                                  */
2323                                 /* Save a decoded code to reference it
2324                                  * afterward. */
2325                                 w_buff[w_pos] = c;
2326                                 if (++w_pos >= w_size) {
2327                                         w_pos = 0;
2328                                         lzh_emit_window(strm, w_size);
2329                                         goto next_data;
2330                                 }
2331                         }
2332                         /* 'c' is the length of a match pattern we have
2333                          * already extracted, which has be stored in
2334                          * window(ds->w_buff). */
2335                         copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2336                         /* FALL THROUGH */
2337                 case ST_GET_POS_1:
2338                         /*
2339                          * Get a reference position. 
2340                          */
2341                         if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2342                                 if (!last) {
2343                                         state = ST_GET_POS_1;
2344                                         ds->copy_len = copy_len;
2345                                         goto next_data;
2346                                 }
2347                                 copy_pos = lzh_decode_huffman(pt,
2348                                     lzh_br_bits_forced(&bre, pt_max_bits));
2349                                 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2350                                 if (!lzh_br_has(&bre, 0))
2351                                         goto failed;/* Over read. */
2352                         } else {
2353                                 copy_pos = lzh_decode_huffman(pt,
2354                                     lzh_br_bits(&bre, pt_max_bits));
2355                                 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2356                         }
2357                         /* FALL THROUGH */
2358                 case ST_GET_POS_2:
2359                         if (copy_pos > 1) {
2360                                 /* We need an additional adjustment number to
2361                                  * the position. */
2362                                 int p = copy_pos - 1;
2363                                 if (!lzh_br_read_ahead(strm, &bre, p)) {
2364                                         if (last)
2365                                                 goto failed;/* Truncated data.*/
2366                                         state = ST_GET_POS_2;
2367                                         ds->copy_len = copy_len;
2368                                         ds->copy_pos = copy_pos;
2369                                         goto next_data;
2370                                 }
2371                                 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2372                                 lzh_br_consume(&bre, p);
2373                         }
2374                         /* The position is actually a distance from the last
2375                          * code we had extracted and thus we have to convert
2376                          * it to a position of the window. */
2377                         copy_pos = (w_pos - copy_pos - 1) & w_mask;
2378                         /* FALL THROUGH */
2379                 case ST_COPY_DATA:
2380                         /*
2381                          * Copy `copy_len' bytes as extracted data from
2382                          * the window into the output buffer.
2383                          */
2384                         for (;;) {
2385                                 int l;
2386
2387                                 l = copy_len;
2388                                 if (copy_pos > w_pos) {
2389                                         if (l > w_size - copy_pos)
2390                                                 l = w_size - copy_pos;
2391                                 } else {
2392                                         if (l > w_size - w_pos)
2393                                                 l = w_size - w_pos;
2394                                 }
2395                                 if ((copy_pos + l < w_pos)
2396                                     || (w_pos + l < copy_pos)) {
2397                                         /* No overlap. */
2398                                         memcpy(w_buff + w_pos,
2399                                             w_buff + copy_pos, l);
2400                                 } else {
2401                                         const unsigned char *s;
2402                                         unsigned char *d;
2403                                         int li;
2404
2405                                         d = w_buff + w_pos;
2406                                         s = w_buff + copy_pos;
2407                                         for (li = 0; li < l-1;) {
2408                                                 d[li] = s[li];li++;
2409                                                 d[li] = s[li];li++;
2410                                         }
2411                                         if (li < l)
2412                                                 d[li] = s[li];
2413                                 }
2414                                 w_pos += l;
2415                                 if (w_pos == w_size) {
2416                                         w_pos = 0;
2417                                         lzh_emit_window(strm, w_size);
2418                                         if (copy_len <= l)
2419                                                 state = ST_GET_LITERAL;
2420                                         else {
2421                                                 state = ST_COPY_DATA;
2422                                                 ds->copy_len = copy_len - l;
2423                                                 ds->copy_pos =
2424                                                     (copy_pos + l) & w_mask;
2425                                         }
2426                                         goto next_data;
2427                                 }
2428                                 if (copy_len <= l)
2429                                         /* A copy of current pattern ended. */
2430                                         break;
2431                                 copy_len -= l;
2432                                 copy_pos = (copy_pos + l) & w_mask;
2433                         }
2434                         state = ST_GET_LITERAL;
2435                         break;
2436                 }
2437         }
2438 failed:
2439         return (ds->error = ARCHIVE_FAILED);
2440 next_data:
2441         ds->br = bre;
2442         ds->blocks_avail = blocks_avail;
2443         ds->state = state;
2444         ds->w_pos = w_pos;
2445         return (ARCHIVE_OK);
2446 }
2447
2448 static int
2449 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2450 {
2451         int bits;
2452
2453         if (hf->bitlen == NULL) {
2454                 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2455                 if (hf->bitlen == NULL)
2456                         return (ARCHIVE_FATAL);
2457         }
2458         if (hf->tbl == NULL) {
2459                 if (tbl_bits < HTBL_BITS)
2460                         bits = tbl_bits;
2461                 else
2462                         bits = HTBL_BITS;
2463                 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2464                 if (hf->tbl == NULL)
2465                         return (ARCHIVE_FATAL);
2466         }
2467         if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2468                 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2469                 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2470                 if (hf->tree == NULL)
2471                         return (ARCHIVE_FATAL);
2472         }
2473         hf->len_size = (int)len_size;
2474         hf->tbl_bits = tbl_bits;
2475         return (ARCHIVE_OK);
2476 }
2477
2478 static void
2479 lzh_huffman_free(struct huffman *hf)
2480 {
2481         free(hf->bitlen);
2482         free(hf->tbl);
2483         free(hf->tree);
2484 }
2485
2486 static const char bitlen_tbl[0x400] = {
2487          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2488          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2489          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2490          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2491          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2492          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2493          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2494          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2495          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2496          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2497          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2498          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2499          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2500          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2501          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2502          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2503          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2504          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2505          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2506          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2507          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2508          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2509          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2510          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2511          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2512          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2513          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2514          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2515          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2516          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2517          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2518          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2519          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2520          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2521          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2522          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2523          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2524          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2525          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2526          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2527          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2528          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2529          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2530          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2531          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2532          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2533          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2534          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2535          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2536          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2537          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2538          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2539          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2540          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2541          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2542          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2543         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2544         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2545         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2546         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2547         11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2548         11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2549         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2550         13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2551 };
2552 static int
2553 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2554 {
2555         struct lzh_dec *ds = strm->ds;
2556         struct lzh_br *br = &(ds->br);
2557         int c, i;
2558
2559         for (i = start; i < end; ) {
2560                 /*
2561                  *  bit pattern     the number we need
2562                  *     000           ->  0
2563                  *     001           ->  1
2564                  *     010           ->  2
2565                  *     ...
2566                  *     110           ->  6
2567                  *     1110          ->  7
2568                  *     11110         ->  8
2569                  *     ...
2570                  *     1111111111110 ->  16
2571                  */
2572                 if (!lzh_br_read_ahead(strm, br, 3))
2573                         return (i);
2574                 if ((c = lzh_br_bits(br, 3)) == 7) {
2575                         if (!lzh_br_read_ahead(strm, br, 13))
2576                                 return (i);
2577                         c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2578                         if (c)
2579                                 lzh_br_consume(br, c - 3);
2580                         else
2581                                 return (-1);/* Invalid data. */
2582                 } else
2583                         lzh_br_consume(br, 3);
2584                 ds->pt.bitlen[i++] = c;
2585                 ds->pt.freq[c]++;
2586         }
2587         return (i);
2588 }
2589
2590 static int
2591 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2592 {
2593         if (c >= hf->len_size)
2594                 return (0);
2595         hf->tbl[0] = c;
2596         hf->max_bits = 0;
2597         hf->shift_bits = 0;
2598         hf->bitlen[hf->tbl[0]] = 0;
2599         return (1);
2600 }
2601
2602 /*
2603  * Make a huffman coding table.
2604  */
2605 static int
2606 lzh_make_huffman_table(struct huffman *hf)
2607 {
2608         uint16_t *tbl;
2609         const unsigned char *bitlen;
2610         int bitptn[17], weight[17];
2611         int i, maxbits = 0, ptn, tbl_size, w;
2612         int diffbits, len_avail;
2613
2614         /*
2615          * Initialize bit patterns.
2616          */
2617         ptn = 0;
2618         for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2619                 bitptn[i] = ptn;
2620                 weight[i] = w;
2621                 if (hf->freq[i]) {
2622                         ptn += hf->freq[i] * w;
2623                         maxbits = i;
2624                 }
2625         }
2626         if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2627                 return (0);/* Invalid */
2628
2629         hf->max_bits = maxbits;
2630
2631         /*
2632          * Cut out extra bits which we won't house in the table.
2633          * This preparation reduces the same calculation in the for-loop
2634          * making the table.
2635          */
2636         if (maxbits < 16) {
2637                 int ebits = 16 - maxbits;
2638                 for (i = 1; i <= maxbits; i++) {
2639                         bitptn[i] >>= ebits;
2640                         weight[i] >>= ebits;
2641                 }
2642         }
2643         if (maxbits > HTBL_BITS) {
2644                 unsigned htbl_max;
2645                 uint16_t *p;
2646
2647                 diffbits = maxbits - HTBL_BITS;
2648                 for (i = 1; i <= HTBL_BITS; i++) {
2649                         bitptn[i] >>= diffbits;
2650                         weight[i] >>= diffbits;
2651                 }
2652                 htbl_max = bitptn[HTBL_BITS] +
2653                     weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2654                 p = &(hf->tbl[htbl_max]);
2655                 while (p < &hf->tbl[1U<<HTBL_BITS])
2656                         *p++ = 0;
2657         } else
2658                 diffbits = 0;
2659         hf->shift_bits = diffbits;
2660
2661         /*
2662          * Make the table.
2663          */
2664         tbl_size = 1 << HTBL_BITS;
2665         tbl = hf->tbl;
2666         bitlen = hf->bitlen;
2667         len_avail = hf->len_avail;
2668         hf->tree_used = 0;
2669         for (i = 0; i < len_avail; i++) {
2670                 uint16_t *p;
2671                 int len, cnt;
2672                 uint16_t bit;
2673                 int extlen;
2674                 struct htree_t *ht;
2675
2676                 if (bitlen[i] == 0)
2677                         continue;
2678                 /* Get a bit pattern */
2679                 len = bitlen[i];
2680                 ptn = bitptn[len];
2681                 cnt = weight[len];
2682                 if (len <= HTBL_BITS) {
2683                         /* Calculate next bit pattern */
2684                         if ((bitptn[len] = ptn + cnt) > tbl_size)
2685                                 return (0);/* Invalid */
2686                         /* Update the table */
2687                         p = &(tbl[ptn]);
2688                         if (cnt > 7) {
2689                                 uint16_t *pc;
2690
2691                                 cnt -= 8;
2692                                 pc = &p[cnt];
2693                                 pc[0] = (uint16_t)i;
2694                                 pc[1] = (uint16_t)i;
2695                                 pc[2] = (uint16_t)i;
2696                                 pc[3] = (uint16_t)i;
2697                                 pc[4] = (uint16_t)i;
2698                                 pc[5] = (uint16_t)i;
2699                                 pc[6] = (uint16_t)i;
2700                                 pc[7] = (uint16_t)i;
2701                                 if (cnt > 7) {
2702                                         cnt -= 8;
2703                                         memcpy(&p[cnt], pc,
2704                                                 8 * sizeof(uint16_t));
2705                                         pc = &p[cnt];
2706                                         while (cnt > 15) {
2707                                                 cnt -= 16;
2708                                                 memcpy(&p[cnt], pc,
2709                                                         16 * sizeof(uint16_t));
2710                                         }
2711                                 }
2712                                 if (cnt)
2713                                         memcpy(p, pc, cnt * sizeof(uint16_t));
2714                         } else {
2715                                 while (cnt > 1) {
2716                                         p[--cnt] = (uint16_t)i;
2717                                         p[--cnt] = (uint16_t)i;
2718                                 }
2719                                 if (cnt)
2720                                         p[--cnt] = (uint16_t)i;
2721                         }
2722                         continue;
2723                 }
2724
2725                 /*
2726                  * A bit length is too big to be housed to a direct table,
2727                  * so we use a tree model for its extra bits.
2728                  */
2729                 bitptn[len] = ptn + cnt;
2730                 bit = 1U << (diffbits -1);
2731                 extlen = len - HTBL_BITS;
2732                 
2733                 p = &(tbl[ptn >> diffbits]);
2734                 if (*p == 0) {
2735                         *p = len_avail + hf->tree_used;
2736                         ht = &(hf->tree[hf->tree_used++]);
2737                         if (hf->tree_used > hf->tree_avail)
2738                                 return (0);/* Invalid */
2739                         ht->left = 0;
2740                         ht->right = 0;
2741                 } else {
2742                         if (*p < len_avail ||
2743                             *p >= (len_avail + hf->tree_used))
2744                                 return (0);/* Invalid */
2745                         ht = &(hf->tree[*p - len_avail]);
2746                 }
2747                 while (--extlen > 0) {
2748                         if (ptn & bit) {
2749                                 if (ht->left < len_avail) {
2750                                         ht->left = len_avail + hf->tree_used;
2751                                         ht = &(hf->tree[hf->tree_used++]);
2752                                         if (hf->tree_used > hf->tree_avail)
2753                                                 return (0);/* Invalid */
2754                                         ht->left = 0;
2755                                         ht->right = 0;
2756                                 } else {
2757                                         ht = &(hf->tree[ht->left - len_avail]);
2758                                 }
2759                         } else {
2760                                 if (ht->right < len_avail) {
2761                                         ht->right = len_avail + hf->tree_used;
2762                                         ht = &(hf->tree[hf->tree_used++]);
2763                                         if (hf->tree_used > hf->tree_avail)
2764                                                 return (0);/* Invalid */
2765                                         ht->left = 0;
2766                                         ht->right = 0;
2767                                 } else {
2768                                         ht = &(hf->tree[ht->right - len_avail]);
2769                                 }
2770                         }
2771                         bit >>= 1;
2772                 }
2773                 if (ptn & bit) {
2774                         if (ht->left != 0)
2775                                 return (0);/* Invalid */
2776                         ht->left = (uint16_t)i;
2777                 } else {
2778                         if (ht->right != 0)
2779                                 return (0);/* Invalid */
2780                         ht->right = (uint16_t)i;
2781                 }
2782         }
2783         return (1);
2784 }
2785
2786 static int
2787 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2788 {
2789         struct htree_t *ht;
2790         int extlen;
2791
2792         ht = hf->tree;
2793         extlen = hf->shift_bits;
2794         while (c >= hf->len_avail) {
2795                 c -= hf->len_avail;
2796                 if (extlen-- <= 0 || c >= hf->tree_used)
2797                         return (0);
2798                 if (rbits & (1U << extlen))
2799                         c = ht[c].left;
2800                 else
2801                         c = ht[c].right;
2802         }
2803         return (c);
2804 }
2805
2806 static inline int
2807 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2808 {
2809         int c;
2810         /*
2811          * At first search an index table for a bit pattern.
2812          * If it fails, search a huffman tree for.
2813          */
2814         c = hf->tbl[rbits >> hf->shift_bits];
2815         if (c < hf->len_avail || hf->len_avail == 0)
2816                 return (c);
2817         /* This bit pattern needs to be found out at a huffman tree. */
2818         return (lzh_decode_huffman_tree(hf, rbits, c));
2819 }
2820