]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/archive_read_support_format_lha.c
MFC r315636,315876,316095:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / archive_read_support_format_lha.c
1 /*-
2  * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_endian.h"
47
48
49 #define MAXMATCH                256     /* Maximum match length. */
50 #define MINMATCH                3       /* Minimum match length. */
51 /*
52  * Literal table format:
53  * +0              +256                      +510
54  * +---------------+-------------------------+
55  * | literal code  |       match length      |
56  * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57  * +---------------+-------------------------+
58  *  <---          LT_BITLEN_SIZE         --->
59  */
60 /* Literal table size. */
61 #define LT_BITLEN_SIZE          (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62 /* Position table size.
63  * Note: this used for both position table and pre literal table.*/
64 #define PT_BITLEN_SIZE          (3 + 16)
65
66 struct lzh_dec {
67         /* Decoding status. */
68         int                      state;
69
70         /*
71          * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72          * data.
73          */
74         int                      w_size;
75         int                      w_mask;
76         /* Window buffer, which is a loop buffer. */
77         unsigned char           *w_buff;
78         /* The insert position to the window. */
79         int                      w_pos;
80         /* The position where we can copy decoded code from the window. */
81         int                      copy_pos;
82         /* The length how many bytes we can copy decoded code from
83          * the window. */
84         int                      copy_len;
85
86         /*
87          * Bit stream reader.
88          */
89         struct lzh_br {
90 #define CACHE_TYPE              uint64_t
91 #define CACHE_BITS              (8 * sizeof(CACHE_TYPE))
92                 /* Cache buffer. */
93                 CACHE_TYPE       cache_buffer;
94                 /* Indicates how many bits avail in cache_buffer. */
95                 int              cache_avail;
96         } br;
97
98         /*
99          * Huffman coding.
100          */
101         struct huffman {
102                 int              len_size;
103                 int              len_avail;
104                 int              len_bits;
105                 int              freq[17];
106                 unsigned char   *bitlen;
107
108                 /*
109                  * Use a index table. It's faster than searching a huffman
110                  * coding tree, which is a binary tree. But a use of a large
111                  * index table causes L1 cache read miss many times.
112                  */
113 #define HTBL_BITS       10
114                 int              max_bits;
115                 int              shift_bits;
116                 int              tbl_bits;
117                 int              tree_used;
118                 int              tree_avail;
119                 /* Direct access table. */
120                 uint16_t        *tbl;
121                 /* Binary tree table for extra bits over the direct access. */
122                 struct htree_t {
123                         uint16_t left;
124                         uint16_t right;
125                 }               *tree;
126         }                        lt, pt;
127
128         int                      blocks_avail;
129         int                      pos_pt_len_size;
130         int                      pos_pt_len_bits;
131         int                      literal_pt_len_size;
132         int                      literal_pt_len_bits;
133         int                      reading_position;
134         int                      loop;
135         int                      error;
136 };
137
138 struct lzh_stream {
139         const unsigned char     *next_in;
140         int                      avail_in;
141         int64_t                  total_in;
142         const unsigned char     *ref_ptr;
143         int                      avail_out;
144         int64_t                  total_out;
145         struct lzh_dec          *ds;
146 };
147
148 struct lha {
149         /* entry_bytes_remaining is the number of bytes we expect.          */
150         int64_t                  entry_offset;
151         int64_t                  entry_bytes_remaining;
152         int64_t                  entry_unconsumed;
153         uint16_t                 entry_crc_calculated;
154  
155         size_t                   header_size;   /* header size              */
156         unsigned char            level;         /* header level             */
157         char                     method[3];     /* compress type            */
158         int64_t                  compsize;      /* compressed data size     */
159         int64_t                  origsize;      /* original file size       */
160         int                      setflag;
161 #define BIRTHTIME_IS_SET        1
162 #define ATIME_IS_SET            2
163 #define UNIX_MODE_IS_SET        4
164 #define CRC_IS_SET              8
165         time_t                   birthtime;
166         long                     birthtime_tv_nsec;
167         time_t                   mtime;
168         long                     mtime_tv_nsec;
169         time_t                   atime;
170         long                     atime_tv_nsec;
171         mode_t                   mode;
172         int64_t                  uid;
173         int64_t                  gid;
174         struct archive_string    uname;
175         struct archive_string    gname;
176         uint16_t                 header_crc;
177         uint16_t                 crc;
178         struct archive_string_conv *sconv;
179         struct archive_string_conv *opt_sconv;
180
181         struct archive_string    dirname;
182         struct archive_string    filename;
183         struct archive_wstring   ws;
184
185         unsigned char            dos_attr;
186
187         /* Flag to mark progress that an archive was read their first header.*/
188         char                     found_first_header;
189         /* Flag to mark that indicates an empty directory. */
190         char                     directory;
191
192         /* Flags to mark progress of decompression. */
193         char                     decompress_init;
194         char                     end_of_entry;
195         char                     end_of_entry_cleanup;
196         char                     entry_is_compressed;
197
198         char                     format_name[64];
199
200         struct lzh_stream        strm;
201 };
202
203 /*
204  * LHA header common member offset.
205  */
206 #define H_METHOD_OFFSET 2       /* Compress type. */
207 #define H_ATTR_OFFSET   19      /* DOS attribute. */
208 #define H_LEVEL_OFFSET  20      /* Header Level.  */
209 #define H_SIZE          22      /* Minimum header size. */
210
211 static int      archive_read_format_lha_bid(struct archive_read *, int);
212 static int      archive_read_format_lha_options(struct archive_read *,
213                     const char *, const char *);
214 static int      archive_read_format_lha_read_header(struct archive_read *,
215                     struct archive_entry *);
216 static int      archive_read_format_lha_read_data(struct archive_read *,
217                     const void **, size_t *, int64_t *);
218 static int      archive_read_format_lha_read_data_skip(struct archive_read *);
219 static int      archive_read_format_lha_cleanup(struct archive_read *);
220
221 static void     lha_replace_path_separator(struct lha *,
222                     struct archive_entry *);
223 static int      lha_read_file_header_0(struct archive_read *, struct lha *);
224 static int      lha_read_file_header_1(struct archive_read *, struct lha *);
225 static int      lha_read_file_header_2(struct archive_read *, struct lha *);
226 static int      lha_read_file_header_3(struct archive_read *, struct lha *);
227 static int      lha_read_file_extended_header(struct archive_read *,
228                     struct lha *, uint16_t *, int, size_t, size_t *);
229 static size_t   lha_check_header_format(const void *);
230 static int      lha_skip_sfx(struct archive_read *);
231 static time_t   lha_dos_time(const unsigned char *);
232 static time_t   lha_win_time(uint64_t, long *);
233 static unsigned char    lha_calcsum(unsigned char, const void *,
234                     int, size_t);
235 static int      lha_parse_linkname(struct archive_string *,
236                     struct archive_string *);
237 static int      lha_read_data_none(struct archive_read *, const void **,
238                     size_t *, int64_t *);
239 static int      lha_read_data_lzh(struct archive_read *, const void **,
240                     size_t *, int64_t *);
241 static void     lha_crc16_init(void);
242 static uint16_t lha_crc16(uint16_t, const void *, size_t);
243 static int      lzh_decode_init(struct lzh_stream *, const char *);
244 static void     lzh_decode_free(struct lzh_stream *);
245 static int      lzh_decode(struct lzh_stream *, int);
246 static int      lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247 static int      lzh_huffman_init(struct huffman *, size_t, int);
248 static void     lzh_huffman_free(struct huffman *);
249 static int      lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250 static int      lzh_make_fake_table(struct huffman *, uint16_t);
251 static int      lzh_make_huffman_table(struct huffman *);
252 static inline int lzh_decode_huffman(struct huffman *, unsigned);
253 static int      lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256 int
257 archive_read_support_format_lha(struct archive *_a)
258 {
259         struct archive_read *a = (struct archive_read *)_a;
260         struct lha *lha;
261         int r;
262
263         archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264             ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266         lha = (struct lha *)calloc(1, sizeof(*lha));
267         if (lha == NULL) {
268                 archive_set_error(&a->archive, ENOMEM,
269                     "Can't allocate lha data");
270                 return (ARCHIVE_FATAL);
271         }
272         archive_string_init(&lha->ws);
273
274         r = __archive_read_register_format(a,
275             lha,
276             "lha",
277             archive_read_format_lha_bid,
278             archive_read_format_lha_options,
279             archive_read_format_lha_read_header,
280             archive_read_format_lha_read_data,
281             archive_read_format_lha_read_data_skip,
282             NULL,
283             archive_read_format_lha_cleanup,
284             NULL,
285             NULL);
286
287         if (r != ARCHIVE_OK)
288                 free(lha);
289         return (ARCHIVE_OK);
290 }
291
292 static size_t
293 lha_check_header_format(const void *h)
294 {
295         const unsigned char *p = h;
296         size_t next_skip_bytes;
297
298         switch (p[H_METHOD_OFFSET+3]) {
299         /*
300          * "-lh0-" ... "-lh7-" "-lhd-"
301          * "-lzs-" "-lz5-"
302          */
303         case '0': case '1': case '2': case '3':
304         case '4': case '5': case '6': case '7':
305         case 'd':
306         case 's':
307                 next_skip_bytes = 4;
308
309                 /* b0 == 0 means the end of an LHa archive file.        */
310                 if (p[0] == 0)
311                         break;
312                 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313                     ||  p[H_METHOD_OFFSET+4] != '-')
314                         break;
315
316                 if (p[H_METHOD_OFFSET+2] == 'h') {
317                         /* "-lh?-" */
318                         if (p[H_METHOD_OFFSET+3] == 's')
319                                 break;
320                         if (p[H_LEVEL_OFFSET] == 0)
321                                 return (0);
322                         if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323                                 return (0);
324                 }
325                 if (p[H_METHOD_OFFSET+2] == 'z') {
326                         /* LArc extensions: -lzs-,-lz4- and -lz5- */
327                         if (p[H_LEVEL_OFFSET] != 0)
328                                 break;
329                         if (p[H_METHOD_OFFSET+3] == 's'
330                             || p[H_METHOD_OFFSET+3] == '4'
331                             || p[H_METHOD_OFFSET+3] == '5')
332                                 return (0);
333                 }
334                 break;
335         case 'h': next_skip_bytes = 1; break;
336         case 'z': next_skip_bytes = 1; break;
337         case 'l': next_skip_bytes = 2; break;
338         case '-': next_skip_bytes = 3; break;
339         default : next_skip_bytes = 4; break;
340         }
341
342         return (next_skip_bytes);
343 }
344
345 static int
346 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347 {
348         const char *p;
349         const void *buff;
350         ssize_t bytes_avail, offset, window;
351         size_t next;
352
353         /* If there's already a better bid than we can ever
354            make, don't bother testing. */
355         if (best_bid > 30)
356                 return (-1);
357
358         if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359                 return (-1);
360
361         if (lha_check_header_format(p) == 0)
362                 return (30);
363
364         if (p[0] == 'M' && p[1] == 'Z') {
365                 /* PE file */
366                 offset = 0;
367                 window = 4096;
368                 while (offset < (1024 * 20)) {
369                         buff = __archive_read_ahead(a, offset + window,
370                             &bytes_avail);
371                         if (buff == NULL) {
372                                 /* Remaining bytes are less than window. */
373                                 window >>= 1;
374                                 if (window < (H_SIZE + 3))
375                                         return (0);
376                                 continue;
377                         }
378                         p = (const char *)buff + offset;
379                         while (p + H_SIZE < (const char *)buff + bytes_avail) {
380                                 if ((next = lha_check_header_format(p)) == 0)
381                                         return (30);
382                                 p += next;
383                         }
384                         offset = p - (const char *)buff;
385                 }
386         }
387         return (0);
388 }
389
390 static int
391 archive_read_format_lha_options(struct archive_read *a,
392     const char *key, const char *val)
393 {
394         struct lha *lha;
395         int ret = ARCHIVE_FAILED;
396
397         lha = (struct lha *)(a->format->data);
398         if (strcmp(key, "hdrcharset")  == 0) {
399                 if (val == NULL || val[0] == 0)
400                         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401                             "lha: hdrcharset option needs a character-set name");
402                 else {
403                         lha->opt_sconv =
404                             archive_string_conversion_from_charset(
405                                 &a->archive, val, 0);
406                         if (lha->opt_sconv != NULL)
407                                 ret = ARCHIVE_OK;
408                         else
409                                 ret = ARCHIVE_FATAL;
410                 }
411                 return (ret);
412         }
413
414         /* Note: The "warn" return is just to inform the options
415          * supervisor that we didn't handle it.  It will generate
416          * a suitable error if no one used this option. */
417         return (ARCHIVE_WARN);
418 }
419
420 static int
421 lha_skip_sfx(struct archive_read *a)
422 {
423         const void *h;
424         const char *p, *q;
425         size_t next, skip;
426         ssize_t bytes, window;
427
428         window = 4096;
429         for (;;) {
430                 h = __archive_read_ahead(a, window, &bytes);
431                 if (h == NULL) {
432                         /* Remaining bytes are less than window. */
433                         window >>= 1;
434                         if (window < (H_SIZE + 3))
435                                 goto fatal;
436                         continue;
437                 }
438                 if (bytes < H_SIZE)
439                         goto fatal;
440                 p = h;
441                 q = p + bytes;
442
443                 /*
444                  * Scan ahead until we find something that looks
445                  * like the lha header.
446                  */
447                 while (p + H_SIZE < q) {
448                         if ((next = lha_check_header_format(p)) == 0) {
449                                 skip = p - (const char *)h;
450                                 __archive_read_consume(a, skip);
451                                 return (ARCHIVE_OK);
452                         }
453                         p += next;
454                 }
455                 skip = p - (const char *)h;
456                 __archive_read_consume(a, skip);
457         }
458 fatal:
459         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460             "Couldn't find out LHa header");
461         return (ARCHIVE_FATAL);
462 }
463
464 static int
465 truncated_error(struct archive_read *a)
466 {
467         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468             "Truncated LHa header");
469         return (ARCHIVE_FATAL);
470 }
471
472 static int
473 archive_read_format_lha_read_header(struct archive_read *a,
474     struct archive_entry *entry)
475 {
476         struct archive_string linkname;
477         struct archive_string pathname;
478         struct lha *lha;
479         const unsigned char *p;
480         const char *signature;
481         int err;
482         
483         lha_crc16_init();
484
485         a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486         if (a->archive.archive_format_name == NULL)
487                 a->archive.archive_format_name = "lha";
488
489         lha = (struct lha *)(a->format->data);
490         lha->decompress_init = 0;
491         lha->end_of_entry = 0;
492         lha->end_of_entry_cleanup = 0;
493         lha->entry_unconsumed = 0;
494
495         if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496                 /*
497                  * LHa archiver added 0 to the tail of its archive file as
498                  * the mark of the end of the archive.
499                  */
500                 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501                 if (signature == NULL || signature[0] == 0)
502                         return (ARCHIVE_EOF);
503                 return (truncated_error(a));
504         }
505
506         signature = (const char *)p;
507         if (lha->found_first_header == 0 &&
508             signature[0] == 'M' && signature[1] == 'Z') {
509                 /* This is an executable?  Must be self-extracting...   */
510                 err = lha_skip_sfx(a);
511                 if (err < ARCHIVE_WARN)
512                         return (err);
513
514                 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515                         return (truncated_error(a));
516                 signature = (const char *)p;
517         }
518         /* signature[0] == 0 means the end of an LHa archive file. */
519         if (signature[0] == 0)
520                 return (ARCHIVE_EOF);
521
522         /*
523          * Check the header format and method type.
524          */
525         if (lha_check_header_format(p) != 0) {
526                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527                     "Bad LHa file");
528                 return (ARCHIVE_FATAL);
529         }
530
531         /* We've found the first header. */
532         lha->found_first_header = 1;
533         /* Set a default value and common data */
534         lha->header_size = 0;
535         lha->level = p[H_LEVEL_OFFSET];
536         lha->method[0] = p[H_METHOD_OFFSET+1];
537         lha->method[1] = p[H_METHOD_OFFSET+2];
538         lha->method[2] = p[H_METHOD_OFFSET+3];
539         if (memcmp(lha->method, "lhd", 3) == 0)
540                 lha->directory = 1;
541         else
542                 lha->directory = 0;
543         if (memcmp(lha->method, "lh0", 3) == 0 ||
544             memcmp(lha->method, "lz4", 3) == 0)
545                 lha->entry_is_compressed = 0;
546         else
547                 lha->entry_is_compressed = 1;
548
549         lha->compsize = 0;
550         lha->origsize = 0;
551         lha->setflag = 0;
552         lha->birthtime = 0;
553         lha->birthtime_tv_nsec = 0;
554         lha->mtime = 0;
555         lha->mtime_tv_nsec = 0;
556         lha->atime = 0;
557         lha->atime_tv_nsec = 0;
558         lha->mode = (lha->directory)? 0777 : 0666;
559         lha->uid = 0;
560         lha->gid = 0;
561         archive_string_empty(&lha->dirname);
562         archive_string_empty(&lha->filename);
563         lha->dos_attr = 0;
564         if (lha->opt_sconv != NULL)
565                 lha->sconv = lha->opt_sconv;
566         else
567                 lha->sconv = NULL;
568
569         switch (p[H_LEVEL_OFFSET]) {
570         case 0:
571                 err = lha_read_file_header_0(a, lha);
572                 break;
573         case 1:
574                 err = lha_read_file_header_1(a, lha);
575                 break;
576         case 2:
577                 err = lha_read_file_header_2(a, lha);
578                 break;
579         case 3:
580                 err = lha_read_file_header_3(a, lha);
581                 break;
582         default:
583                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584                     "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585                 err = ARCHIVE_FATAL;
586                 break;
587         }
588         if (err < ARCHIVE_WARN)
589                 return (err);
590
591
592         if (!lha->directory && archive_strlen(&lha->filename) == 0)
593                 /* The filename has not been set */
594                 return (truncated_error(a));
595
596         /*
597          * Make a pathname from a dirname and a filename.
598          */
599         archive_string_concat(&lha->dirname, &lha->filename);
600         archive_string_init(&pathname);
601         archive_string_init(&linkname);
602         archive_string_copy(&pathname, &lha->dirname);
603
604         if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605                 /*
606                  * Extract the symlink-name if it's included in the pathname.
607                  */
608                 if (!lha_parse_linkname(&linkname, &pathname)) {
609                         /* We couldn't get the symlink-name. */
610                         archive_set_error(&a->archive,
611                             ARCHIVE_ERRNO_FILE_FORMAT,
612                             "Unknown symlink-name");
613                         archive_string_free(&pathname);
614                         archive_string_free(&linkname);
615                         return (ARCHIVE_FAILED);
616                 }
617         } else {
618                 /*
619                  * Make sure a file-type is set.
620                  * The mode has been overridden if it is in the extended data.
621                  */
622                 lha->mode = (lha->mode & ~AE_IFMT) |
623                     ((lha->directory)? AE_IFDIR: AE_IFREG);
624         }
625         if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626             (lha->dos_attr & 1) != 0)
627                 lha->mode &= ~(0222);/* read only. */
628
629         /*
630          * Set basic file parameters.
631          */
632         if (archive_entry_copy_pathname_l(entry, pathname.s,
633             pathname.length, lha->sconv) != 0) {
634                 if (errno == ENOMEM) {
635                         archive_set_error(&a->archive, ENOMEM,
636                             "Can't allocate memory for Pathname");
637                         return (ARCHIVE_FATAL);
638                 }
639                 archive_set_error(&a->archive,
640                     ARCHIVE_ERRNO_FILE_FORMAT,
641                     "Pathname cannot be converted "
642                     "from %s to current locale.",
643                     archive_string_conversion_charset_name(lha->sconv));
644                 err = ARCHIVE_WARN;
645         }
646         archive_string_free(&pathname);
647         if (archive_strlen(&linkname) > 0) {
648                 if (archive_entry_copy_symlink_l(entry, linkname.s,
649                     linkname.length, lha->sconv) != 0) {
650                         if (errno == ENOMEM) {
651                                 archive_set_error(&a->archive, ENOMEM,
652                                     "Can't allocate memory for Linkname");
653                                 return (ARCHIVE_FATAL);
654                         }
655                         archive_set_error(&a->archive,
656                             ARCHIVE_ERRNO_FILE_FORMAT,
657                             "Linkname cannot be converted "
658                             "from %s to current locale.",
659                             archive_string_conversion_charset_name(lha->sconv));
660                         err = ARCHIVE_WARN;
661                 }
662         } else
663                 archive_entry_set_symlink(entry, NULL);
664         archive_string_free(&linkname);
665         /*
666          * When a header level is 0, there is a possibility that
667          * a pathname and a symlink has '\' character, a directory
668          * separator in DOS/Windows. So we should convert it to '/'.
669          */
670         if (p[H_LEVEL_OFFSET] == 0)
671                 lha_replace_path_separator(lha, entry);
672
673         archive_entry_set_mode(entry, lha->mode);
674         archive_entry_set_uid(entry, lha->uid);
675         archive_entry_set_gid(entry, lha->gid);
676         if (archive_strlen(&lha->uname) > 0)
677                 archive_entry_set_uname(entry, lha->uname.s);
678         if (archive_strlen(&lha->gname) > 0)
679                 archive_entry_set_gname(entry, lha->gname.s);
680         if (lha->setflag & BIRTHTIME_IS_SET) {
681                 archive_entry_set_birthtime(entry, lha->birthtime,
682                     lha->birthtime_tv_nsec);
683                 archive_entry_set_ctime(entry, lha->birthtime,
684                     lha->birthtime_tv_nsec);
685         } else {
686                 archive_entry_unset_birthtime(entry);
687                 archive_entry_unset_ctime(entry);
688         }
689         archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690         if (lha->setflag & ATIME_IS_SET)
691                 archive_entry_set_atime(entry, lha->atime,
692                     lha->atime_tv_nsec);
693         else
694                 archive_entry_unset_atime(entry);
695         if (lha->directory || archive_entry_symlink(entry) != NULL)
696                 archive_entry_unset_size(entry);
697         else
698                 archive_entry_set_size(entry, lha->origsize);
699
700         /*
701          * Prepare variables used to read a file content.
702          */
703         lha->entry_bytes_remaining = lha->compsize;
704         lha->entry_offset = 0;
705         lha->entry_crc_calculated = 0;
706
707         /*
708          * This file does not have a content.
709          */
710         if (lha->directory || lha->compsize == 0)
711                 lha->end_of_entry = 1;
712
713         sprintf(lha->format_name, "lha -%c%c%c-",
714             lha->method[0], lha->method[1], lha->method[2]);
715         a->archive.archive_format_name = lha->format_name;
716
717         return (err);
718 }
719
720 /*
721  * Replace a DOS path separator '\' by a character '/'.
722  * Some multi-byte character set have  a character '\' in its second byte.
723  */
724 static void
725 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
726 {
727         const wchar_t *wp;
728         size_t i;
729
730         if ((wp = archive_entry_pathname_w(entry)) != NULL) {
731                 archive_wstrcpy(&(lha->ws), wp);
732                 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
733                         if (lha->ws.s[i] == L'\\')
734                                 lha->ws.s[i] = L'/';
735                 }
736                 archive_entry_copy_pathname_w(entry, lha->ws.s);
737         }
738
739         if ((wp = archive_entry_symlink_w(entry)) != NULL) {
740                 archive_wstrcpy(&(lha->ws), wp);
741                 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
742                         if (lha->ws.s[i] == L'\\')
743                                 lha->ws.s[i] = L'/';
744                 }
745                 archive_entry_copy_symlink_w(entry, lha->ws.s);
746         }
747 }
748
749 /*
750  * Header 0 format
751  *
752  * +0              +1         +2               +7                  +11
753  * +---------------+----------+----------------+-------------------+
754  * |header size(*1)|header sum|compression type|compressed size(*2)|
755  * +---------------+----------+----------------+-------------------+
756  *                             <---------------------(*1)----------*
757  *
758  * +11               +15       +17       +19            +20              +21
759  * +-----------------+---------+---------+--------------+----------------+
760  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
761  * +-----------------+---------+---------+--------------+----------------+
762  * *--------------------------------(*1)---------------------------------*
763  *
764  * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
765  * +---------------+---------+----------+----------------+------------------+
766  * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
767  * +---------------+---------+----------+----------------+------------------+
768  *                  <--(*3)->                             <------(*2)------>
769  * *----------------------(*1)-------------------------->
770  *
771  */
772 #define H0_HEADER_SIZE_OFFSET   0
773 #define H0_HEADER_SUM_OFFSET    1
774 #define H0_COMP_SIZE_OFFSET     7
775 #define H0_ORIG_SIZE_OFFSET     11
776 #define H0_DOS_TIME_OFFSET      15
777 #define H0_NAME_LEN_OFFSET      21
778 #define H0_FILE_NAME_OFFSET     22
779 #define H0_FIXED_SIZE           24
780 static int
781 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
782 {
783         const unsigned char *p;
784         int extdsize, namelen;
785         unsigned char headersum, sum_calculated;
786
787         if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
788                 return (truncated_error(a));
789         lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
790         headersum = p[H0_HEADER_SUM_OFFSET];
791         lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
792         lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
793         lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
794         namelen = p[H0_NAME_LEN_OFFSET];
795         extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
796         if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
797                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
798                     "Invalid LHa header");
799                 return (ARCHIVE_FATAL);
800         }
801         if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
802                 return (truncated_error(a));
803
804         archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
805         /* When extdsize == -2, A CRC16 value is not present in the header. */
806         if (extdsize >= 0) {
807                 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
808                 lha->setflag |= CRC_IS_SET;
809         }
810         sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
811
812         /* Read an extended header */
813         if (extdsize > 0) {
814                 /* This extended data is set by 'LHa for UNIX' only.
815                  * Maybe fixed size.
816                  */
817                 p += H0_FILE_NAME_OFFSET + namelen + 2;
818                 if (p[0] == 'U' && extdsize == 12) {
819                         /* p[1] is a minor version. */
820                         lha->mtime = archive_le32dec(&p[2]);
821                         lha->mode = archive_le16dec(&p[6]);
822                         lha->uid = archive_le16dec(&p[8]);
823                         lha->gid = archive_le16dec(&p[10]);
824                         lha->setflag |= UNIX_MODE_IS_SET;
825                 }
826         }
827         __archive_read_consume(a, lha->header_size);
828
829         if (sum_calculated != headersum) {
830                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
831                     "LHa header sum error");
832                 return (ARCHIVE_FATAL);
833         }
834
835         return (ARCHIVE_OK);
836 }
837
838 /*
839  * Header 1 format
840  *
841  * +0              +1         +2               +7            +11
842  * +---------------+----------+----------------+-------------+
843  * |header size(*1)|header sum|compression type|skip size(*2)|
844  * +---------------+----------+----------------+-------------+
845  *                             <---------------(*1)----------*
846  *
847  * +11               +15       +17       +19            +20              +21
848  * +-----------------+---------+---------+--------------+----------------+
849  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
850  * +-----------------+---------+---------+--------------+----------------+
851  * *-------------------------------(*1)----------------------------------*
852  *
853  * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
854  * +---------------+---------+----------+-----------+-----------+
855  * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
856  * +---------------+---------+----------+-----------+-----------+
857  *                  <--(*3)->
858  * *----------------------------(*1)----------------------------*
859  *
860  * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
861  * +----------------+---------------------+------------------------+
862  * |next header size| extended header(*5) |     compressed data    |
863  * +----------------+---------------------+------------------------+
864  * *------(*1)-----> <--------------------(*2)-------------------->
865  */
866 #define H1_HEADER_SIZE_OFFSET   0
867 #define H1_HEADER_SUM_OFFSET    1
868 #define H1_COMP_SIZE_OFFSET     7
869 #define H1_ORIG_SIZE_OFFSET     11
870 #define H1_DOS_TIME_OFFSET      15
871 #define H1_NAME_LEN_OFFSET      21
872 #define H1_FILE_NAME_OFFSET     22
873 #define H1_FIXED_SIZE           27
874 static int
875 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
876 {
877         const unsigned char *p;
878         size_t extdsize;
879         int i, err, err2;
880         int namelen, padding;
881         unsigned char headersum, sum_calculated;
882
883         err = ARCHIVE_OK;
884
885         if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
886                 return (truncated_error(a));
887
888         lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
889         headersum = p[H1_HEADER_SUM_OFFSET];
890         /* Note: An extended header size is included in a compsize. */
891         lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
892         lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
893         lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
894         namelen = p[H1_NAME_LEN_OFFSET];
895         /* Calculate a padding size. The result will be normally 0 only(?) */
896         padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
897
898         if (namelen > 230 || padding < 0)
899                 goto invalid;
900
901         if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
902                 return (truncated_error(a));
903
904         for (i = 0; i < namelen; i++) {
905                 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
906                         goto invalid;/* Invalid filename. */
907         }
908         archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
909         lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
910         lha->setflag |= CRC_IS_SET;
911
912         sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
913         /* Consume used bytes but not include `next header size' data
914          * since it will be consumed in lha_read_file_extended_header(). */
915         __archive_read_consume(a, lha->header_size - 2);
916
917         /* Read extended headers */
918         err2 = lha_read_file_extended_header(a, lha, NULL, 2,
919             (size_t)(lha->compsize + 2), &extdsize);
920         if (err2 < ARCHIVE_WARN)
921                 return (err2);
922         if (err2 < err)
923                 err = err2;
924         /* Get a real compressed file size. */
925         lha->compsize -= extdsize - 2;
926
927         if (lha->compsize < 0)
928                 goto invalid;   /* Invalid compressed file size */
929
930         if (sum_calculated != headersum) {
931                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
932                     "LHa header sum error");
933                 return (ARCHIVE_FATAL);
934         }
935         return (err);
936 invalid:
937         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
938             "Invalid LHa header");
939         return (ARCHIVE_FATAL);
940 }
941
942 /*
943  * Header 2 format
944  *
945  * +0              +2               +7                  +11               +15
946  * +---------------+----------------+-------------------+-----------------+
947  * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
948  * +---------------+----------------+-------------------+-----------------+
949  *  <--------------------------------(*1)---------------------------------*
950  *
951  * +15               +19          +20              +21        +23         +24
952  * +-----------------+------------+----------------+----------+-----------+
953  * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
954  * +-----------------+------------+----------------+----------+-----------+
955  * *---------------------------------(*1)---------------------------------*
956  *
957  * +24              +26                 +26+(*3)      +26+(*3)+(*4)
958  * +----------------+-------------------+-------------+-------------------+
959  * |next header size|extended header(*3)| padding(*4) |  compressed data  |
960  * +----------------+-------------------+-------------+-------------------+
961  * *--------------------------(*1)-------------------> <------(*2)------->
962  *
963  */
964 #define H2_HEADER_SIZE_OFFSET   0
965 #define H2_COMP_SIZE_OFFSET     7
966 #define H2_ORIG_SIZE_OFFSET     11
967 #define H2_TIME_OFFSET          15
968 #define H2_CRC_OFFSET           21
969 #define H2_FIXED_SIZE           24
970 static int
971 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
972 {
973         const unsigned char *p;
974         size_t extdsize;
975         int err, padding;
976         uint16_t header_crc;
977
978         if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
979                 return (truncated_error(a));
980
981         lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
982         lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
983         lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
984         lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
985         lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
986         lha->setflag |= CRC_IS_SET;
987
988         if (lha->header_size < H2_FIXED_SIZE) {
989                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
990                     "Invalid LHa header size");
991                 return (ARCHIVE_FATAL);
992         }
993
994         header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
995         __archive_read_consume(a, H2_FIXED_SIZE);
996
997         /* Read extended headers */
998         err = lha_read_file_extended_header(a, lha, &header_crc, 2,
999                   lha->header_size - H2_FIXED_SIZE, &extdsize);
1000         if (err < ARCHIVE_WARN)
1001                 return (err);
1002
1003         /* Calculate a padding size. The result will be normally 0 or 1. */
1004         padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1005         if (padding > 0) {
1006                 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1007                         return (truncated_error(a));
1008                 header_crc = lha_crc16(header_crc, p, padding);
1009                 __archive_read_consume(a, padding);
1010         }
1011
1012         if (header_crc != lha->header_crc) {
1013                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1014                     "LHa header CRC error");
1015                 return (ARCHIVE_FATAL);
1016         }
1017         return (err);
1018 }
1019
1020 /*
1021  * Header 3 format
1022  *
1023  * +0           +2               +7                  +11               +15
1024  * +------------+----------------+-------------------+-----------------+
1025  * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1026  * +------------+----------------+-------------------+-----------------+
1027  *  <-------------------------------(*1)-------------------------------*
1028  *
1029  * +15               +19          +20              +21        +23         +24
1030  * +-----------------+------------+----------------+----------+-----------+
1031  * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1032  * +-----------------+------------+----------------+----------+-----------+
1033  * *--------------------------------(*1)----------------------------------*
1034  *
1035  * +24             +28              +32                 +32+(*3)
1036  * +---------------+----------------+-------------------+-----------------+
1037  * |header size(*1)|next header size|extended header(*3)| compressed data |
1038  * +---------------+----------------+-------------------+-----------------+
1039  * *------------------------(*1)-----------------------> <------(*2)----->
1040  *
1041  */
1042 #define H3_FIELD_LEN_OFFSET     0
1043 #define H3_COMP_SIZE_OFFSET     7
1044 #define H3_ORIG_SIZE_OFFSET     11
1045 #define H3_TIME_OFFSET          15
1046 #define H3_CRC_OFFSET           21
1047 #define H3_HEADER_SIZE_OFFSET   24
1048 #define H3_FIXED_SIZE           28
1049 static int
1050 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1051 {
1052         const unsigned char *p;
1053         size_t extdsize;
1054         int err;
1055         uint16_t header_crc;
1056
1057         if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1058                 return (truncated_error(a));
1059
1060         if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1061                 goto invalid;
1062         lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1063         lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1064         lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1065         lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1066         lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1067         lha->setflag |= CRC_IS_SET;
1068
1069         if (lha->header_size < H3_FIXED_SIZE + 4)
1070                 goto invalid;
1071         header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1072         __archive_read_consume(a, H3_FIXED_SIZE);
1073
1074         /* Read extended headers */
1075         err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1076                   lha->header_size - H3_FIXED_SIZE, &extdsize);
1077         if (err < ARCHIVE_WARN)
1078                 return (err);
1079
1080         if (header_crc != lha->header_crc) {
1081                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1082                     "LHa header CRC error");
1083                 return (ARCHIVE_FATAL);
1084         }
1085         return (err);
1086 invalid:
1087         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1088             "Invalid LHa header");
1089         return (ARCHIVE_FATAL);
1090 }
1091
1092 /*
1093  * Extended header format
1094  *
1095  * +0             +2        +3  -- used in header 1 and 2
1096  * +0             +4        +5  -- used in header 3
1097  * +--------------+---------+-------------------+--------------+--
1098  * |ex-header size|header id|        data       |ex-header size| .......
1099  * +--------------+---------+-------------------+--------------+--
1100  *  <-------------( ex-header size)------------> <-- next extended header --*
1101  *
1102  * If the ex-header size is zero, it is the make of the end of extended
1103  * headers.
1104  *
1105  */
1106 static int
1107 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1108     uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1109 {
1110         const void *h;
1111         const unsigned char *extdheader;
1112         size_t  extdsize;
1113         size_t  datasize;
1114         unsigned int i;
1115         unsigned char extdtype;
1116
1117 #define EXT_HEADER_CRC          0x00            /* Header CRC and information*/
1118 #define EXT_FILENAME            0x01            /* Filename                 */
1119 #define EXT_DIRECTORY           0x02            /* Directory name           */
1120 #define EXT_DOS_ATTR            0x40            /* MS-DOS attribute         */
1121 #define EXT_TIMESTAMP           0x41            /* Windows time stamp       */
1122 #define EXT_FILESIZE            0x42            /* Large file size          */
1123 #define EXT_TIMEZONE            0x43            /* Time zone                */
1124 #define EXT_UTF16_FILENAME      0x44            /* UTF-16 filename          */
1125 #define EXT_UTF16_DIRECTORY     0x45            /* UTF-16 directory name    */
1126 #define EXT_CODEPAGE            0x46            /* Codepage                 */
1127 #define EXT_UNIX_MODE           0x50            /* File permission          */
1128 #define EXT_UNIX_GID_UID        0x51            /* gid,uid                  */
1129 #define EXT_UNIX_GNAME          0x52            /* Group name               */
1130 #define EXT_UNIX_UNAME          0x53            /* User name                */
1131 #define EXT_UNIX_MTIME          0x54            /* Modified time            */
1132 #define EXT_OS2_NEW_ATTR        0x7f            /* new attribute(OS/2 only) */
1133 #define EXT_NEW_ATTR            0xff            /* new attribute            */
1134
1135         *total_size = sizefield_length;
1136
1137         for (;;) {
1138                 /* Read an extended header size. */
1139                 if ((h =
1140                     __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1141                         return (truncated_error(a));
1142                 /* Check if the size is the zero indicates the end of the
1143                  * extended header. */
1144                 if (sizefield_length == sizeof(uint16_t))
1145                         extdsize = archive_le16dec(h);
1146                 else
1147                         extdsize = archive_le32dec(h);
1148                 if (extdsize == 0) {
1149                         /* End of extended header */
1150                         if (crc != NULL)
1151                                 *crc = lha_crc16(*crc, h, sizefield_length);
1152                         __archive_read_consume(a, sizefield_length);
1153                         return (ARCHIVE_OK);
1154                 }
1155
1156                 /* Sanity check to the extended header size. */
1157                 if (((uint64_t)*total_size + extdsize) >
1158                                     (uint64_t)limitsize ||
1159                     extdsize <= (size_t)sizefield_length)
1160                         goto invalid;
1161
1162                 /* Read the extended header. */
1163                 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1164                         return (truncated_error(a));
1165                 *total_size += extdsize;
1166
1167                 extdheader = (const unsigned char *)h;
1168                 /* Get the extended header type. */
1169                 extdtype = extdheader[sizefield_length];
1170                 /* Calculate an extended data size. */
1171                 datasize = extdsize - (1 + sizefield_length);
1172                 /* Skip an extended header size field and type field. */
1173                 extdheader += sizefield_length + 1;
1174
1175                 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1176                         *crc = lha_crc16(*crc, h, extdsize);
1177                 switch (extdtype) {
1178                 case EXT_HEADER_CRC:
1179                         /* We only use a header CRC. Following data will not
1180                          * be used. */
1181                         if (datasize >= 2) {
1182                                 lha->header_crc = archive_le16dec(extdheader);
1183                                 if (crc != NULL) {
1184                                         static const char zeros[2] = {0, 0};
1185                                         *crc = lha_crc16(*crc, h,
1186                                             extdsize - datasize);
1187                                         /* CRC value itself as zero */
1188                                         *crc = lha_crc16(*crc, zeros, 2);
1189                                         *crc = lha_crc16(*crc,
1190                                             extdheader+2, datasize - 2);
1191                                 }
1192                         }
1193                         break;
1194                 case EXT_FILENAME:
1195                         if (datasize == 0) {
1196                                 /* maybe directory header */
1197                                 archive_string_empty(&lha->filename);
1198                                 break;
1199                         }
1200                         if (extdheader[0] == '\0')
1201                                 goto invalid;
1202                         archive_strncpy(&lha->filename,
1203                             (const char *)extdheader, datasize);
1204                         break;
1205                 case EXT_DIRECTORY:
1206                         if (datasize == 0 || extdheader[0] == '\0')
1207                                 /* no directory name data. exit this case. */
1208                                 goto invalid;
1209
1210                         archive_strncpy(&lha->dirname,
1211                             (const char *)extdheader, datasize);
1212                         /*
1213                          * Convert directory delimiter from 0xFF
1214                          * to '/' for local system.
1215                          */
1216                         for (i = 0; i < lha->dirname.length; i++) {
1217                                 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1218                                         lha->dirname.s[i] = '/';
1219                         }
1220                         /* Is last character directory separator? */
1221                         if (lha->dirname.s[lha->dirname.length-1] != '/')
1222                                 /* invalid directory data */
1223                                 goto invalid;
1224                         break;
1225                 case EXT_DOS_ATTR:
1226                         if (datasize == 2)
1227                                 lha->dos_attr = (unsigned char)
1228                                     (archive_le16dec(extdheader) & 0xff);
1229                         break;
1230                 case EXT_TIMESTAMP:
1231                         if (datasize == (sizeof(uint64_t) * 3)) {
1232                                 lha->birthtime = lha_win_time(
1233                                     archive_le64dec(extdheader),
1234                                     &lha->birthtime_tv_nsec);
1235                                 extdheader += sizeof(uint64_t);
1236                                 lha->mtime = lha_win_time(
1237                                     archive_le64dec(extdheader),
1238                                     &lha->mtime_tv_nsec);
1239                                 extdheader += sizeof(uint64_t);
1240                                 lha->atime = lha_win_time(
1241                                     archive_le64dec(extdheader),
1242                                     &lha->atime_tv_nsec);
1243                                 lha->setflag |= BIRTHTIME_IS_SET |
1244                                     ATIME_IS_SET;
1245                         }
1246                         break;
1247                 case EXT_FILESIZE:
1248                         if (datasize == sizeof(uint64_t) * 2) {
1249                                 lha->compsize = archive_le64dec(extdheader);
1250                                 extdheader += sizeof(uint64_t);
1251                                 lha->origsize = archive_le64dec(extdheader);
1252                         }
1253                         break;
1254                 case EXT_CODEPAGE:
1255                         /* Get an archived filename charset from codepage.
1256                          * This overwrites the charset specified by
1257                          * hdrcharset option. */
1258                         if (datasize == sizeof(uint32_t)) {
1259                                 struct archive_string cp;
1260                                 const char *charset;
1261
1262                                 archive_string_init(&cp);
1263                                 switch (archive_le32dec(extdheader)) {
1264                                 case 65001: /* UTF-8 */
1265                                         charset = "UTF-8";
1266                                         break;
1267                                 default:
1268                                         archive_string_sprintf(&cp, "CP%d",
1269                                             (int)archive_le32dec(extdheader));
1270                                         charset = cp.s;
1271                                         break;
1272                                 }
1273                                 lha->sconv =
1274                                     archive_string_conversion_from_charset(
1275                                         &(a->archive), charset, 1);
1276                                 archive_string_free(&cp);
1277                                 if (lha->sconv == NULL)
1278                                         return (ARCHIVE_FATAL);
1279                         }
1280                         break;
1281                 case EXT_UNIX_MODE:
1282                         if (datasize == sizeof(uint16_t)) {
1283                                 lha->mode = archive_le16dec(extdheader);
1284                                 lha->setflag |= UNIX_MODE_IS_SET;
1285                         }
1286                         break;
1287                 case EXT_UNIX_GID_UID:
1288                         if (datasize == (sizeof(uint16_t) * 2)) {
1289                                 lha->gid = archive_le16dec(extdheader);
1290                                 lha->uid = archive_le16dec(extdheader+2);
1291                         }
1292                         break;
1293                 case EXT_UNIX_GNAME:
1294                         if (datasize > 0)
1295                                 archive_strncpy(&lha->gname,
1296                                     (const char *)extdheader, datasize);
1297                         break;
1298                 case EXT_UNIX_UNAME:
1299                         if (datasize > 0)
1300                                 archive_strncpy(&lha->uname,
1301                                     (const char *)extdheader, datasize);
1302                         break;
1303                 case EXT_UNIX_MTIME:
1304                         if (datasize == sizeof(uint32_t))
1305                                 lha->mtime = archive_le32dec(extdheader);
1306                         break;
1307                 case EXT_OS2_NEW_ATTR:
1308                         /* This extended header is OS/2 depend. */
1309                         if (datasize == 16) {
1310                                 lha->dos_attr = (unsigned char)
1311                                     (archive_le16dec(extdheader) & 0xff);
1312                                 lha->mode = archive_le16dec(extdheader+2);
1313                                 lha->gid = archive_le16dec(extdheader+4);
1314                                 lha->uid = archive_le16dec(extdheader+6);
1315                                 lha->birthtime = archive_le32dec(extdheader+8);
1316                                 lha->atime = archive_le32dec(extdheader+12);
1317                                 lha->setflag |= UNIX_MODE_IS_SET
1318                                     | BIRTHTIME_IS_SET | ATIME_IS_SET;
1319                         }
1320                         break;
1321                 case EXT_NEW_ATTR:
1322                         if (datasize == 20) {
1323                                 lha->mode = (mode_t)archive_le32dec(extdheader);
1324                                 lha->gid = archive_le32dec(extdheader+4);
1325                                 lha->uid = archive_le32dec(extdheader+8);
1326                                 lha->birthtime = archive_le32dec(extdheader+12);
1327                                 lha->atime = archive_le32dec(extdheader+16);
1328                                 lha->setflag |= UNIX_MODE_IS_SET
1329                                     | BIRTHTIME_IS_SET | ATIME_IS_SET;
1330                         }
1331                         break;
1332                 case EXT_TIMEZONE:              /* Not supported */
1333                 case EXT_UTF16_FILENAME:        /* Not supported */
1334                 case EXT_UTF16_DIRECTORY:       /* Not supported */
1335                 default:
1336                         break;
1337                 }
1338
1339                 __archive_read_consume(a, extdsize);
1340         }
1341 invalid:
1342         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1343             "Invalid extended LHa header");
1344         return (ARCHIVE_FATAL);
1345 }
1346
1347 static int
1348 lha_end_of_entry(struct archive_read *a)
1349 {
1350         struct lha *lha = (struct lha *)(a->format->data);
1351         int r = ARCHIVE_EOF;
1352
1353         if (!lha->end_of_entry_cleanup) {
1354                 if ((lha->setflag & CRC_IS_SET) &&
1355                     lha->crc != lha->entry_crc_calculated) {
1356                         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1357                             "LHa data CRC error");
1358                         r = ARCHIVE_WARN;
1359                 }
1360
1361                 /* End-of-entry cleanup done. */
1362                 lha->end_of_entry_cleanup = 1;
1363         }
1364         return (r);
1365 }
1366
1367 static int
1368 archive_read_format_lha_read_data(struct archive_read *a,
1369     const void **buff, size_t *size, int64_t *offset)
1370 {
1371         struct lha *lha = (struct lha *)(a->format->data);
1372         int r;
1373
1374         if (lha->entry_unconsumed) {
1375                 /* Consume as much as the decompressor actually used. */
1376                 __archive_read_consume(a, lha->entry_unconsumed);
1377                 lha->entry_unconsumed = 0;
1378         }
1379         if (lha->end_of_entry) {
1380                 *offset = lha->entry_offset;
1381                 *size = 0;
1382                 *buff = NULL;
1383                 return (lha_end_of_entry(a));
1384         }
1385
1386         if (lha->entry_is_compressed)
1387                 r =  lha_read_data_lzh(a, buff, size, offset);
1388         else
1389                 /* No compression. */
1390                 r =  lha_read_data_none(a, buff, size, offset);
1391         return (r);
1392 }
1393
1394 /*
1395  * Read a file content in no compression.
1396  *
1397  * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1398  * lha->end_of_entry if it consumes all of the data.
1399  */
1400 static int
1401 lha_read_data_none(struct archive_read *a, const void **buff,
1402     size_t *size, int64_t *offset)
1403 {
1404         struct lha *lha = (struct lha *)(a->format->data);
1405         ssize_t bytes_avail;
1406
1407         if (lha->entry_bytes_remaining == 0) {
1408                 *buff = NULL;
1409                 *size = 0;
1410                 *offset = lha->entry_offset;
1411                 lha->end_of_entry = 1;
1412                 return (ARCHIVE_OK);
1413         }
1414         /*
1415          * Note: '1' here is a performance optimization.
1416          * Recall that the decompression layer returns a count of
1417          * available bytes; asking for more than that forces the
1418          * decompressor to combine reads by copying data.
1419          */
1420         *buff = __archive_read_ahead(a, 1, &bytes_avail);
1421         if (bytes_avail <= 0) {
1422                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1423                     "Truncated LHa file data");
1424                 return (ARCHIVE_FATAL);
1425         }
1426         if (bytes_avail > lha->entry_bytes_remaining)
1427                 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1428         lha->entry_crc_calculated =
1429             lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1430         *size = bytes_avail;
1431         *offset = lha->entry_offset;
1432         lha->entry_offset += bytes_avail;
1433         lha->entry_bytes_remaining -= bytes_avail;
1434         if (lha->entry_bytes_remaining == 0)
1435                 lha->end_of_entry = 1;
1436         lha->entry_unconsumed = bytes_avail;
1437         return (ARCHIVE_OK);
1438 }
1439
1440 /*
1441  * Read a file content in LZHUFF encoding.
1442  *
1443  * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1444  * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1445  * all of the data.
1446  */
1447 static int
1448 lha_read_data_lzh(struct archive_read *a, const void **buff,
1449     size_t *size, int64_t *offset)
1450 {
1451         struct lha *lha = (struct lha *)(a->format->data);
1452         ssize_t bytes_avail;
1453         int r;
1454
1455         /* If we haven't yet read any data, initialize the decompressor. */
1456         if (!lha->decompress_init) {
1457                 r = lzh_decode_init(&(lha->strm), lha->method);
1458                 switch (r) {
1459                 case ARCHIVE_OK:
1460                         break;
1461                 case ARCHIVE_FAILED:
1462                         /* Unsupported compression. */
1463                         *buff = NULL;
1464                         *size = 0;
1465                         *offset = 0;
1466                         archive_set_error(&a->archive,
1467                             ARCHIVE_ERRNO_FILE_FORMAT,
1468                             "Unsupported lzh compression method -%c%c%c-",
1469                             lha->method[0], lha->method[1], lha->method[2]);
1470                         /* We know compressed size; just skip it. */
1471                         archive_read_format_lha_read_data_skip(a);
1472                         return (ARCHIVE_WARN);
1473                 default:
1474                         archive_set_error(&a->archive, ENOMEM,
1475                             "Couldn't allocate memory "
1476                             "for lzh decompression");
1477                         return (ARCHIVE_FATAL);
1478                 }
1479                 /* We've initialized decompression for this stream. */
1480                 lha->decompress_init = 1;
1481                 lha->strm.avail_out = 0;
1482                 lha->strm.total_out = 0;
1483         }
1484
1485         /*
1486          * Note: '1' here is a performance optimization.
1487          * Recall that the decompression layer returns a count of
1488          * available bytes; asking for more than that forces the
1489          * decompressor to combine reads by copying data.
1490          */
1491         lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1492         if (bytes_avail <= 0) {
1493                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1494                     "Truncated LHa file body");
1495                 return (ARCHIVE_FATAL);
1496         }
1497         if (bytes_avail > lha->entry_bytes_remaining)
1498                 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1499
1500         lha->strm.avail_in = (int)bytes_avail;
1501         lha->strm.total_in = 0;
1502         lha->strm.avail_out = 0;
1503
1504         r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1505         switch (r) {
1506         case ARCHIVE_OK:
1507                 break;
1508         case ARCHIVE_EOF:
1509                 lha->end_of_entry = 1;
1510                 break;
1511         default:
1512                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1513                     "Bad lzh data");
1514                 return (ARCHIVE_FAILED);
1515         }
1516         lha->entry_unconsumed = lha->strm.total_in;
1517         lha->entry_bytes_remaining -= lha->strm.total_in;
1518
1519         if (lha->strm.avail_out) {
1520                 *offset = lha->entry_offset;
1521                 *size = lha->strm.avail_out;
1522                 *buff = lha->strm.ref_ptr;
1523                 lha->entry_crc_calculated =
1524                     lha_crc16(lha->entry_crc_calculated, *buff, *size);
1525                 lha->entry_offset += *size;
1526         } else {
1527                 *offset = lha->entry_offset;
1528                 *size = 0;
1529                 *buff = NULL;
1530                 if (lha->end_of_entry)
1531                         return (lha_end_of_entry(a));
1532         }
1533         return (ARCHIVE_OK);
1534 }
1535
1536 /*
1537  * Skip a file content.
1538  */
1539 static int
1540 archive_read_format_lha_read_data_skip(struct archive_read *a)
1541 {
1542         struct lha *lha;
1543         int64_t bytes_skipped;
1544
1545         lha = (struct lha *)(a->format->data);
1546
1547         if (lha->entry_unconsumed) {
1548                 /* Consume as much as the decompressor actually used. */
1549                 __archive_read_consume(a, lha->entry_unconsumed);
1550                 lha->entry_unconsumed = 0;
1551         }
1552
1553         /* if we've already read to end of data, we're done. */
1554         if (lha->end_of_entry_cleanup)
1555                 return (ARCHIVE_OK);
1556
1557         /*
1558          * If the length is at the beginning, we can skip the
1559          * compressed data much more quickly.
1560          */
1561         bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1562         if (bytes_skipped < 0)
1563                 return (ARCHIVE_FATAL);
1564
1565         /* This entry is finished and done. */
1566         lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1567         return (ARCHIVE_OK);
1568 }
1569
1570 static int
1571 archive_read_format_lha_cleanup(struct archive_read *a)
1572 {
1573         struct lha *lha = (struct lha *)(a->format->data);
1574
1575         lzh_decode_free(&(lha->strm));
1576         archive_string_free(&(lha->dirname));
1577         archive_string_free(&(lha->filename));
1578         archive_string_free(&(lha->uname));
1579         archive_string_free(&(lha->gname));
1580         archive_wstring_free(&(lha->ws));
1581         free(lha);
1582         (a->format->data) = NULL;
1583         return (ARCHIVE_OK);
1584 }
1585
1586 /*
1587  * 'LHa for UNIX' utility has archived a symbolic-link name after
1588  * a pathname with '|' character.
1589  * This function extracts the symbolic-link name from the pathname.
1590  *
1591  * example.
1592  *   1. a symbolic-name is 'aaa/bb/cc'
1593  *   2. a filename is 'xxx/bbb'
1594  *  then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1595  */
1596 static int
1597 lha_parse_linkname(struct archive_string *linkname,
1598     struct archive_string *pathname)
1599 {
1600         char *  linkptr;
1601         size_t  symlen;
1602
1603         linkptr = strchr(pathname->s, '|');
1604         if (linkptr != NULL) {
1605                 symlen = strlen(linkptr + 1);
1606                 archive_strncpy(linkname, linkptr+1, symlen);
1607
1608                 *linkptr = 0;
1609                 pathname->length = strlen(pathname->s);
1610
1611                 return (1);
1612         }
1613         return (0);
1614 }
1615
1616 /* Convert an MSDOS-style date/time into Unix-style time. */
1617 static time_t
1618 lha_dos_time(const unsigned char *p)
1619 {
1620         int msTime, msDate;
1621         struct tm ts;
1622
1623         msTime = archive_le16dec(p);
1624         msDate = archive_le16dec(p+2);
1625
1626         memset(&ts, 0, sizeof(ts));
1627         ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1628         ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1629         ts.tm_mday = msDate & 0x1f;                 /* Day of month.     */
1630         ts.tm_hour = (msTime >> 11) & 0x1f;
1631         ts.tm_min = (msTime >> 5) & 0x3f;
1632         ts.tm_sec = (msTime << 1) & 0x3e;
1633         ts.tm_isdst = -1;
1634         return (mktime(&ts));
1635 }
1636
1637 /* Convert an MS-Windows-style date/time into Unix-style time. */
1638 static time_t
1639 lha_win_time(uint64_t wintime, long *ns)
1640 {
1641 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1642
1643         if (wintime >= EPOC_TIME) {
1644                 wintime -= EPOC_TIME;   /* 1970-01-01 00:00:00 (UTC) */
1645                 if (ns != NULL)
1646                         *ns = (long)(wintime % 10000000) * 100;
1647                 return (wintime / 10000000);
1648         } else {
1649                 if (ns != NULL)
1650                         *ns = 0;
1651                 return (0);
1652         }
1653 }
1654
1655 static unsigned char
1656 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1657 {
1658         unsigned char const *p = (unsigned char const *)pp;
1659
1660         p += offset;
1661         for (;size > 0; --size)
1662                 sum += *p++;
1663         return (sum);
1664 }
1665
1666 static uint16_t crc16tbl[2][256];
1667 static void
1668 lha_crc16_init(void)
1669 {
1670         unsigned int i;
1671         static int crc16init = 0;
1672
1673         if (crc16init)
1674                 return;
1675         crc16init = 1;
1676
1677         for (i = 0; i < 256; i++) {
1678                 unsigned int j;
1679                 uint16_t crc = (uint16_t)i;
1680                 for (j = 8; j; j--)
1681                         crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1682                 crc16tbl[0][i] = crc;
1683         }
1684
1685         for (i = 0; i < 256; i++) {
1686                 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1687                         ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1688         }
1689 }
1690
1691 static uint16_t
1692 lha_crc16(uint16_t crc, const void *pp, size_t len)
1693 {
1694         const unsigned char *p = (const unsigned char *)pp;
1695         const uint16_t *buff;
1696         const union {
1697                 uint32_t i;
1698                 char c[4];
1699         } u = { 0x01020304 };
1700
1701         if (len == 0)
1702                 return crc;
1703
1704         /* Process unaligned address. */
1705         if (((uintptr_t)p) & (uintptr_t)0x1) {
1706                 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1707                 len--;
1708         }
1709         buff = (const uint16_t *)p;
1710         /*
1711          * Modern C compiler such as GCC does not unroll automatically yet
1712          * without unrolling pragma, and Clang is so. So we should
1713          * unroll this loop for its performance.
1714          */
1715         for (;len >= 8; len -= 8) {
1716                 /* This if statement expects compiler optimization will
1717                  * remove the statement which will not be executed. */
1718 #undef bswap16
1719 #if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1720 #  define bswap16(x) _byteswap_ushort(x)
1721 #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 8) || __GNUC__ > 4)
1722 /* GCC 4.8 and later has __builtin_bswap16() */
1723 #  define bswap16(x) __builtin_bswap16(x)
1724 #elif defined(__clang__)
1725 /* All clang versions have __builtin_bswap16() */
1726 #  define bswap16(x) __builtin_bswap16(x)
1727 #else
1728 #  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1729 #endif
1730 #define CRC16W  do {    \
1731                 if(u.c[0] == 1) { /* Big endian */              \
1732                         crc ^= bswap16(*buff); buff++;          \
1733                 } else                                          \
1734                         crc ^= *buff++;                         \
1735                 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1736 } while (0)
1737                 CRC16W;
1738                 CRC16W;
1739                 CRC16W;
1740                 CRC16W;
1741 #undef CRC16W
1742 #undef bswap16
1743         }
1744
1745         p = (const unsigned char *)buff;
1746         for (;len; len--) {
1747                 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1748         }
1749         return crc;
1750 }
1751
1752 /*
1753  * Initialize LZHUF decoder.
1754  *
1755  * Returns ARCHIVE_OK if initialization was successful.
1756  * Returns ARCHIVE_FAILED if method is unsupported.
1757  * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1758  * error occurred.
1759  */
1760 static int
1761 lzh_decode_init(struct lzh_stream *strm, const char *method)
1762 {
1763         struct lzh_dec *ds;
1764         int w_bits, w_size;
1765
1766         if (strm->ds == NULL) {
1767                 strm->ds = calloc(1, sizeof(*strm->ds));
1768                 if (strm->ds == NULL)
1769                         return (ARCHIVE_FATAL);
1770         }
1771         ds = strm->ds;
1772         ds->error = ARCHIVE_FAILED;
1773         if (method == NULL || method[0] != 'l' || method[1] != 'h')
1774                 return (ARCHIVE_FAILED);
1775         switch (method[2]) {
1776         case '5':
1777                 w_bits = 13;/* 8KiB for window */
1778                 break;
1779         case '6':
1780                 w_bits = 15;/* 32KiB for window */
1781                 break;
1782         case '7':
1783                 w_bits = 16;/* 64KiB for window */
1784                 break;
1785         default:
1786                 return (ARCHIVE_FAILED);/* Not supported. */
1787         }
1788         ds->error = ARCHIVE_FATAL;
1789         /* Expand a window size up to 128 KiB for decompressing process
1790          * performance whatever its original window size is. */
1791         ds->w_size = 1U << 17;
1792         ds->w_mask = ds->w_size -1;
1793         if (ds->w_buff == NULL) {
1794                 ds->w_buff = malloc(ds->w_size);
1795                 if (ds->w_buff == NULL)
1796                         return (ARCHIVE_FATAL);
1797         }
1798         w_size = 1U << w_bits;
1799         memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1800         ds->w_pos = 0;
1801         ds->state = 0;
1802         ds->pos_pt_len_size = w_bits + 1;
1803         ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1804         ds->literal_pt_len_size = PT_BITLEN_SIZE;
1805         ds->literal_pt_len_bits = 5;
1806         ds->br.cache_buffer = 0;
1807         ds->br.cache_avail = 0;
1808
1809         if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1810             != ARCHIVE_OK)
1811                 return (ARCHIVE_FATAL);
1812         ds->lt.len_bits = 9;
1813         if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1814             != ARCHIVE_OK)
1815                 return (ARCHIVE_FATAL);
1816         ds->error = 0;
1817
1818         return (ARCHIVE_OK);
1819 }
1820
1821 /*
1822  * Release LZHUF decoder.
1823  */
1824 static void
1825 lzh_decode_free(struct lzh_stream *strm)
1826 {
1827
1828         if (strm->ds == NULL)
1829                 return;
1830         free(strm->ds->w_buff);
1831         lzh_huffman_free(&(strm->ds->lt));
1832         lzh_huffman_free(&(strm->ds->pt));
1833         free(strm->ds);
1834         strm->ds = NULL;
1835 }
1836
1837 /*
1838  * Bit stream reader.
1839  */
1840 /* Check that the cache buffer has enough bits. */
1841 #define lzh_br_has(br, n)       ((br)->cache_avail >= n)
1842 /* Get compressed data by bit. */
1843 #define lzh_br_bits(br, n)                              \
1844         (((uint16_t)((br)->cache_buffer >>              \
1845                 ((br)->cache_avail - (n)))) & cache_masks[n])
1846 #define lzh_br_bits_forced(br, n)                       \
1847         (((uint16_t)((br)->cache_buffer <<              \
1848                 ((n) - (br)->cache_avail))) & cache_masks[n])
1849 /* Read ahead to make sure the cache buffer has enough compressed data we
1850  * will use.
1851  *  True  : completed, there is enough data in the cache buffer.
1852  *  False : we met that strm->next_in is empty, we have to get following
1853  *          bytes. */
1854 #define lzh_br_read_ahead_0(strm, br, n)        \
1855         (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1856 /*  True  : the cache buffer has some bits as much as we need.
1857  *  False : there are no enough bits in the cache buffer to be used,
1858  *          we have to get following bytes if we could. */
1859 #define lzh_br_read_ahead(strm, br, n)  \
1860         (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1861
1862 /* Notify how many bits we consumed. */
1863 #define lzh_br_consume(br, n)   ((br)->cache_avail -= (n))
1864 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1865
1866 static const uint16_t cache_masks[] = {
1867         0x0000, 0x0001, 0x0003, 0x0007,
1868         0x000F, 0x001F, 0x003F, 0x007F,
1869         0x00FF, 0x01FF, 0x03FF, 0x07FF,
1870         0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1871         0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1872 };
1873
1874 /*
1875  * Shift away used bits in the cache data and fill it up with following bits.
1876  * Call this when cache buffer does not have enough bits you need.
1877  *
1878  * Returns 1 if the cache buffer is full.
1879  * Returns 0 if the cache buffer is not full; input buffer is empty.
1880  */
1881 static int
1882 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1883 {
1884         int n = CACHE_BITS - br->cache_avail;
1885
1886         for (;;) {
1887                 const int x = n >> 3;
1888                 if (strm->avail_in >= x) {
1889                         switch (x) {
1890                         case 8:
1891                                 br->cache_buffer =
1892                                     ((uint64_t)strm->next_in[0]) << 56 |
1893                                     ((uint64_t)strm->next_in[1]) << 48 |
1894                                     ((uint64_t)strm->next_in[2]) << 40 |
1895                                     ((uint64_t)strm->next_in[3]) << 32 |
1896                                     ((uint32_t)strm->next_in[4]) << 24 |
1897                                     ((uint32_t)strm->next_in[5]) << 16 |
1898                                     ((uint32_t)strm->next_in[6]) << 8 |
1899                                      (uint32_t)strm->next_in[7];
1900                                 strm->next_in += 8;
1901                                 strm->avail_in -= 8;
1902                                 br->cache_avail += 8 * 8;
1903                                 return (1);
1904                         case 7:
1905                                 br->cache_buffer =
1906                                    (br->cache_buffer << 56) |
1907                                     ((uint64_t)strm->next_in[0]) << 48 |
1908                                     ((uint64_t)strm->next_in[1]) << 40 |
1909                                     ((uint64_t)strm->next_in[2]) << 32 |
1910                                     ((uint32_t)strm->next_in[3]) << 24 |
1911                                     ((uint32_t)strm->next_in[4]) << 16 |
1912                                     ((uint32_t)strm->next_in[5]) << 8 |
1913                                      (uint32_t)strm->next_in[6];
1914                                 strm->next_in += 7;
1915                                 strm->avail_in -= 7;
1916                                 br->cache_avail += 7 * 8;
1917                                 return (1);
1918                         case 6:
1919                                 br->cache_buffer =
1920                                    (br->cache_buffer << 48) |
1921                                     ((uint64_t)strm->next_in[0]) << 40 |
1922                                     ((uint64_t)strm->next_in[1]) << 32 |
1923                                     ((uint32_t)strm->next_in[2]) << 24 |
1924                                     ((uint32_t)strm->next_in[3]) << 16 |
1925                                     ((uint32_t)strm->next_in[4]) << 8 |
1926                                      (uint32_t)strm->next_in[5];
1927                                 strm->next_in += 6;
1928                                 strm->avail_in -= 6;
1929                                 br->cache_avail += 6 * 8;
1930                                 return (1);
1931                         case 0:
1932                                 /* We have enough compressed data in
1933                                  * the cache buffer.*/
1934                                 return (1);
1935                         default:
1936                                 break;
1937                         }
1938                 }
1939                 if (strm->avail_in == 0) {
1940                         /* There is not enough compressed data to fill up the
1941                          * cache buffer. */
1942                         return (0);
1943                 }
1944                 br->cache_buffer =
1945                    (br->cache_buffer << 8) | *strm->next_in++;
1946                 strm->avail_in--;
1947                 br->cache_avail += 8;
1948                 n -= 8;
1949         }
1950 }
1951
1952 /*
1953  * Decode LZHUF.
1954  *
1955  * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1956  *    Please set available buffer and call this function again.
1957  * 2. Returns ARCHIVE_EOF if decompression has been completed.
1958  * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1959  *    is broken or you do not set 'last' flag properly.
1960  * 4. 'last' flag is very important, you must set 1 to the flag if there
1961  *    is no input data. The lha compressed data format does not provide how
1962  *    to know the compressed data is really finished.
1963  *    Note: lha command utility check if the total size of output bytes is
1964  *    reached the uncompressed size recorded in its header. it does not mind
1965  *    that the decoding process is properly finished.
1966  *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
1967  *    it handles EOF as null to fill read buffer with zero until the decoding
1968  *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
1969  *    zeros are treated as the mark of the end of the data although the zeros
1970  *    is dummy, not the file data.
1971  */
1972 static int      lzh_read_blocks(struct lzh_stream *, int);
1973 static int      lzh_decode_blocks(struct lzh_stream *, int);
1974 #define ST_RD_BLOCK             0
1975 #define ST_RD_PT_1              1
1976 #define ST_RD_PT_2              2
1977 #define ST_RD_PT_3              3
1978 #define ST_RD_PT_4              4
1979 #define ST_RD_LITERAL_1         5
1980 #define ST_RD_LITERAL_2         6
1981 #define ST_RD_LITERAL_3         7
1982 #define ST_RD_POS_DATA_1        8
1983 #define ST_GET_LITERAL          9
1984 #define ST_GET_POS_1            10
1985 #define ST_GET_POS_2            11
1986 #define ST_COPY_DATA            12
1987
1988 static int
1989 lzh_decode(struct lzh_stream *strm, int last)
1990 {
1991         struct lzh_dec *ds = strm->ds;
1992         int avail_in;
1993         int r;
1994
1995         if (ds->error)
1996                 return (ds->error);
1997
1998         avail_in = strm->avail_in;
1999         do {
2000                 if (ds->state < ST_GET_LITERAL)
2001                         r = lzh_read_blocks(strm, last);
2002                 else
2003                         r = lzh_decode_blocks(strm, last);
2004         } while (r == 100);
2005         strm->total_in += avail_in - strm->avail_in;
2006         return (r);
2007 }
2008
2009 static void
2010 lzh_emit_window(struct lzh_stream *strm, size_t s)
2011 {
2012         strm->ref_ptr = strm->ds->w_buff;
2013         strm->avail_out = (int)s;
2014         strm->total_out += s;
2015 }
2016
2017 static int
2018 lzh_read_blocks(struct lzh_stream *strm, int last)
2019 {
2020         struct lzh_dec *ds = strm->ds;
2021         struct lzh_br *br = &(ds->br);
2022         int c = 0, i;
2023         unsigned rbits;
2024
2025         for (;;) {
2026                 switch (ds->state) {
2027                 case ST_RD_BLOCK:
2028                         /*
2029                          * Read a block number indicates how many blocks
2030                          * we will handle. The block is composed of a
2031                          * literal and a match, sometimes a literal only
2032                          * in particular, there are no reference data at
2033                          * the beginning of the decompression.
2034                          */
2035                         if (!lzh_br_read_ahead_0(strm, br, 16)) {
2036                                 if (!last)
2037                                         /* We need following data. */
2038                                         return (ARCHIVE_OK);
2039                                 if (lzh_br_has(br, 8)) {
2040                                         /*
2041                                          * It seems there are extra bits.
2042                                          *  1. Compressed data is broken.
2043                                          *  2. `last' flag does not properly
2044                                          *     set.
2045                                          */
2046                                         goto failed;
2047                                 }
2048                                 if (ds->w_pos > 0) {
2049                                         lzh_emit_window(strm, ds->w_pos);
2050                                         ds->w_pos = 0;
2051                                         return (ARCHIVE_OK);
2052                                 }
2053                                 /* End of compressed data; we have completely
2054                                  * handled all compressed data. */
2055                                 return (ARCHIVE_EOF);
2056                         }
2057                         ds->blocks_avail = lzh_br_bits(br, 16);
2058                         if (ds->blocks_avail == 0)
2059                                 goto failed;
2060                         lzh_br_consume(br, 16);
2061                         /*
2062                          * Read a literal table compressed in huffman
2063                          * coding.
2064                          */
2065                         ds->pt.len_size = ds->literal_pt_len_size;
2066                         ds->pt.len_bits = ds->literal_pt_len_bits;
2067                         ds->reading_position = 0;
2068                         /* FALL THROUGH */
2069                 case ST_RD_PT_1:
2070                         /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2071                          * used in reading both a literal table and a
2072                          * position table. */
2073                         if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2074                                 if (last)
2075                                         goto failed;/* Truncated data. */
2076                                 ds->state = ST_RD_PT_1;
2077                                 return (ARCHIVE_OK);
2078                         }
2079                         ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2080                         lzh_br_consume(br, ds->pt.len_bits);
2081                         /* FALL THROUGH */
2082                 case ST_RD_PT_2:
2083                         if (ds->pt.len_avail == 0) {
2084                                 /* There is no bitlen. */
2085                                 if (!lzh_br_read_ahead(strm, br,
2086                                     ds->pt.len_bits)) {
2087                                         if (last)
2088                                                 goto failed;/* Truncated data.*/
2089                                         ds->state = ST_RD_PT_2;
2090                                         return (ARCHIVE_OK);
2091                                 }
2092                                 if (!lzh_make_fake_table(&(ds->pt),
2093                                     lzh_br_bits(br, ds->pt.len_bits)))
2094                                         goto failed;/* Invalid data. */
2095                                 lzh_br_consume(br, ds->pt.len_bits);
2096                                 if (ds->reading_position)
2097                                         ds->state = ST_GET_LITERAL;
2098                                 else
2099                                         ds->state = ST_RD_LITERAL_1;
2100                                 break;
2101                         } else if (ds->pt.len_avail > ds->pt.len_size)
2102                                 goto failed;/* Invalid data. */
2103                         ds->loop = 0;
2104                         memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2105                         if (ds->pt.len_avail < 3 ||
2106                             ds->pt.len_size == ds->pos_pt_len_size) {
2107                                 ds->state = ST_RD_PT_4;
2108                                 break;
2109                         }
2110                         /* FALL THROUGH */
2111                 case ST_RD_PT_3:
2112                         ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2113                         if (ds->loop < 3) {
2114                                 if (ds->loop < 0 || last)
2115                                         goto failed;/* Invalid data. */
2116                                 /* Not completed, get following data. */
2117                                 ds->state = ST_RD_PT_3;
2118                                 return (ARCHIVE_OK);
2119                         }
2120                         /* There are some null in bitlen of the literal. */
2121                         if (!lzh_br_read_ahead(strm, br, 2)) {
2122                                 if (last)
2123                                         goto failed;/* Truncated data. */
2124                                 ds->state = ST_RD_PT_3;
2125                                 return (ARCHIVE_OK);
2126                         }
2127                         c = lzh_br_bits(br, 2);
2128                         lzh_br_consume(br, 2);
2129                         if (c > ds->pt.len_avail - 3)
2130                                 goto failed;/* Invalid data. */
2131                         for (i = 3; c-- > 0 ;)
2132                                 ds->pt.bitlen[i++] = 0;
2133                         ds->loop = i;
2134                         /* FALL THROUGH */
2135                 case ST_RD_PT_4:
2136                         ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2137                             ds->pt.len_avail);
2138                         if (ds->loop < ds->pt.len_avail) {
2139                                 if (ds->loop < 0 || last)
2140                                         goto failed;/* Invalid data. */
2141                                 /* Not completed, get following data. */
2142                                 ds->state = ST_RD_PT_4;
2143                                 return (ARCHIVE_OK);
2144                         }
2145                         if (!lzh_make_huffman_table(&(ds->pt)))
2146                                 goto failed;/* Invalid data */
2147                         if (ds->reading_position) {
2148                                 ds->state = ST_GET_LITERAL;
2149                                 break;
2150                         }
2151                         /* FALL THROUGH */
2152                 case ST_RD_LITERAL_1:
2153                         if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2154                                 if (last)
2155                                         goto failed;/* Truncated data. */
2156                                 ds->state = ST_RD_LITERAL_1;
2157                                 return (ARCHIVE_OK);
2158                         }
2159                         ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2160                         lzh_br_consume(br, ds->lt.len_bits);
2161                         /* FALL THROUGH */
2162                 case ST_RD_LITERAL_2:
2163                         if (ds->lt.len_avail == 0) {
2164                                 /* There is no bitlen. */
2165                                 if (!lzh_br_read_ahead(strm, br,
2166                                     ds->lt.len_bits)) {
2167                                         if (last)
2168                                                 goto failed;/* Truncated data.*/
2169                                         ds->state = ST_RD_LITERAL_2;
2170                                         return (ARCHIVE_OK);
2171                                 }
2172                                 if (!lzh_make_fake_table(&(ds->lt),
2173                                     lzh_br_bits(br, ds->lt.len_bits)))
2174                                         goto failed;/* Invalid data */
2175                                 lzh_br_consume(br, ds->lt.len_bits);
2176                                 ds->state = ST_RD_POS_DATA_1;
2177                                 break;
2178                         } else if (ds->lt.len_avail > ds->lt.len_size)
2179                                 goto failed;/* Invalid data */
2180                         ds->loop = 0;
2181                         memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2182                         /* FALL THROUGH */
2183                 case ST_RD_LITERAL_3:
2184                         i = ds->loop;
2185                         while (i < ds->lt.len_avail) {
2186                                 if (!lzh_br_read_ahead(strm, br,
2187                                     ds->pt.max_bits)) {
2188                                         if (last)
2189                                                 goto failed;/* Truncated data.*/
2190                                         ds->loop = i;
2191                                         ds->state = ST_RD_LITERAL_3;
2192                                         return (ARCHIVE_OK);
2193                                 }
2194                                 rbits = lzh_br_bits(br, ds->pt.max_bits);
2195                                 c = lzh_decode_huffman(&(ds->pt), rbits);
2196                                 if (c > 2) {
2197                                         /* Note: 'c' will never be more than
2198                                          * eighteen since it's limited by
2199                                          * PT_BITLEN_SIZE, which is being set
2200                                          * to ds->pt.len_size through
2201                                          * ds->literal_pt_len_size. */
2202                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2203                                         c -= 2;
2204                                         ds->lt.freq[c]++;
2205                                         ds->lt.bitlen[i++] = c;
2206                                 } else if (c == 0) {
2207                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2208                                         ds->lt.bitlen[i++] = 0;
2209                                 } else {
2210                                         /* c == 1 or c == 2 */
2211                                         int n = (c == 1)?4:9;
2212                                         if (!lzh_br_read_ahead(strm, br,
2213                                              ds->pt.bitlen[c] + n)) {
2214                                                 if (last) /* Truncated data. */
2215                                                         goto failed;
2216                                                 ds->loop = i;
2217                                                 ds->state = ST_RD_LITERAL_3;
2218                                                 return (ARCHIVE_OK);
2219                                         }
2220                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2221                                         c = lzh_br_bits(br, n);
2222                                         lzh_br_consume(br, n);
2223                                         c += (n == 4)?3:20;
2224                                         if (i + c > ds->lt.len_avail)
2225                                                 goto failed;/* Invalid data */
2226                                         memset(&(ds->lt.bitlen[i]), 0, c);
2227                                         i += c;
2228                                 }
2229                         }
2230                         if (i > ds->lt.len_avail ||
2231                             !lzh_make_huffman_table(&(ds->lt)))
2232                                 goto failed;/* Invalid data */
2233                         /* FALL THROUGH */
2234                 case ST_RD_POS_DATA_1:
2235                         /*
2236                          * Read a position table compressed in huffman
2237                          * coding.
2238                          */
2239                         ds->pt.len_size = ds->pos_pt_len_size;
2240                         ds->pt.len_bits = ds->pos_pt_len_bits;
2241                         ds->reading_position = 1;
2242                         ds->state = ST_RD_PT_1;
2243                         break;
2244                 case ST_GET_LITERAL:
2245                         return (100);
2246                 }
2247         }
2248 failed:
2249         return (ds->error = ARCHIVE_FAILED);
2250 }
2251
2252 static int
2253 lzh_decode_blocks(struct lzh_stream *strm, int last)
2254 {
2255         struct lzh_dec *ds = strm->ds;
2256         struct lzh_br bre = ds->br;
2257         struct huffman *lt = &(ds->lt);
2258         struct huffman *pt = &(ds->pt);
2259         unsigned char *w_buff = ds->w_buff;
2260         unsigned char *lt_bitlen = lt->bitlen;
2261         unsigned char *pt_bitlen = pt->bitlen;
2262         int blocks_avail = ds->blocks_avail, c = 0;
2263         int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2264         int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2265         int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2266         int state = ds->state;
2267
2268         for (;;) {
2269                 switch (state) {
2270                 case ST_GET_LITERAL:
2271                         for (;;) {
2272                                 if (blocks_avail == 0) {
2273                                         /* We have decoded all blocks.
2274                                          * Let's handle next blocks. */
2275                                         ds->state = ST_RD_BLOCK;
2276                                         ds->br = bre;
2277                                         ds->blocks_avail = 0;
2278                                         ds->w_pos = w_pos;
2279                                         ds->copy_pos = 0;
2280                                         return (100);
2281                                 }
2282
2283                                 /* lzh_br_read_ahead() always try to fill the
2284                                  * cache buffer up. In specific situation we
2285                                  * are close to the end of the data, the cache
2286                                  * buffer will not be full and thus we have to
2287                                  * determine if the cache buffer has some bits
2288                                  * as much as we need after lzh_br_read_ahead()
2289                                  * failed. */
2290                                 if (!lzh_br_read_ahead(strm, &bre,
2291                                     lt_max_bits)) {
2292                                         if (!last)
2293                                                 goto next_data;
2294                                         /* Remaining bits are less than
2295                                          * maximum bits(lt.max_bits) but maybe
2296                                          * it still remains as much as we need,
2297                                          * so we should try to use it with
2298                                          * dummy bits. */
2299                                         c = lzh_decode_huffman(lt,
2300                                               lzh_br_bits_forced(&bre,
2301                                                 lt_max_bits));
2302                                         lzh_br_consume(&bre, lt_bitlen[c]);
2303                                         if (!lzh_br_has(&bre, 0))
2304                                                 goto failed;/* Over read. */
2305                                 } else {
2306                                         c = lzh_decode_huffman(lt,
2307                                               lzh_br_bits(&bre, lt_max_bits));
2308                                         lzh_br_consume(&bre, lt_bitlen[c]);
2309                                 }
2310                                 blocks_avail--;
2311                                 if (c > UCHAR_MAX)
2312                                         /* Current block is a match data. */
2313                                         break;
2314                                 /*
2315                                  * 'c' is exactly a literal code.
2316                                  */
2317                                 /* Save a decoded code to reference it
2318                                  * afterward. */
2319                                 w_buff[w_pos] = c;
2320                                 if (++w_pos >= w_size) {
2321                                         w_pos = 0;
2322                                         lzh_emit_window(strm, w_size);
2323                                         goto next_data;
2324                                 }
2325                         }
2326                         /* 'c' is the length of a match pattern we have
2327                          * already extracted, which has be stored in
2328                          * window(ds->w_buff). */
2329                         copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2330                         /* FALL THROUGH */
2331                 case ST_GET_POS_1:
2332                         /*
2333                          * Get a reference position. 
2334                          */
2335                         if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2336                                 if (!last) {
2337                                         state = ST_GET_POS_1;
2338                                         ds->copy_len = copy_len;
2339                                         goto next_data;
2340                                 }
2341                                 copy_pos = lzh_decode_huffman(pt,
2342                                     lzh_br_bits_forced(&bre, pt_max_bits));
2343                                 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2344                                 if (!lzh_br_has(&bre, 0))
2345                                         goto failed;/* Over read. */
2346                         } else {
2347                                 copy_pos = lzh_decode_huffman(pt,
2348                                     lzh_br_bits(&bre, pt_max_bits));
2349                                 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2350                         }
2351                         /* FALL THROUGH */
2352                 case ST_GET_POS_2:
2353                         if (copy_pos > 1) {
2354                                 /* We need an additional adjustment number to
2355                                  * the position. */
2356                                 int p = copy_pos - 1;
2357                                 if (!lzh_br_read_ahead(strm, &bre, p)) {
2358                                         if (last)
2359                                                 goto failed;/* Truncated data.*/
2360                                         state = ST_GET_POS_2;
2361                                         ds->copy_len = copy_len;
2362                                         ds->copy_pos = copy_pos;
2363                                         goto next_data;
2364                                 }
2365                                 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2366                                 lzh_br_consume(&bre, p);
2367                         }
2368                         /* The position is actually a distance from the last
2369                          * code we had extracted and thus we have to convert
2370                          * it to a position of the window. */
2371                         copy_pos = (w_pos - copy_pos - 1) & w_mask;
2372                         /* FALL THROUGH */
2373                 case ST_COPY_DATA:
2374                         /*
2375                          * Copy `copy_len' bytes as extracted data from
2376                          * the window into the output buffer.
2377                          */
2378                         for (;;) {
2379                                 int l;
2380
2381                                 l = copy_len;
2382                                 if (copy_pos > w_pos) {
2383                                         if (l > w_size - copy_pos)
2384                                                 l = w_size - copy_pos;
2385                                 } else {
2386                                         if (l > w_size - w_pos)
2387                                                 l = w_size - w_pos;
2388                                 }
2389                                 if ((copy_pos + l < w_pos)
2390                                     || (w_pos + l < copy_pos)) {
2391                                         /* No overlap. */
2392                                         memcpy(w_buff + w_pos,
2393                                             w_buff + copy_pos, l);
2394                                 } else {
2395                                         const unsigned char *s;
2396                                         unsigned char *d;
2397                                         int li;
2398
2399                                         d = w_buff + w_pos;
2400                                         s = w_buff + copy_pos;
2401                                         for (li = 0; li < l-1;) {
2402                                                 d[li] = s[li];li++;
2403                                                 d[li] = s[li];li++;
2404                                         }
2405                                         if (li < l)
2406                                                 d[li] = s[li];
2407                                 }
2408                                 w_pos += l;
2409                                 if (w_pos == w_size) {
2410                                         w_pos = 0;
2411                                         lzh_emit_window(strm, w_size);
2412                                         if (copy_len <= l)
2413                                                 state = ST_GET_LITERAL;
2414                                         else {
2415                                                 state = ST_COPY_DATA;
2416                                                 ds->copy_len = copy_len - l;
2417                                                 ds->copy_pos =
2418                                                     (copy_pos + l) & w_mask;
2419                                         }
2420                                         goto next_data;
2421                                 }
2422                                 if (copy_len <= l)
2423                                         /* A copy of current pattern ended. */
2424                                         break;
2425                                 copy_len -= l;
2426                                 copy_pos = (copy_pos + l) & w_mask;
2427                         }
2428                         state = ST_GET_LITERAL;
2429                         break;
2430                 }
2431         }
2432 failed:
2433         return (ds->error = ARCHIVE_FAILED);
2434 next_data:
2435         ds->br = bre;
2436         ds->blocks_avail = blocks_avail;
2437         ds->state = state;
2438         ds->w_pos = w_pos;
2439         return (ARCHIVE_OK);
2440 }
2441
2442 static int
2443 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2444 {
2445         int bits;
2446
2447         if (hf->bitlen == NULL) {
2448                 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2449                 if (hf->bitlen == NULL)
2450                         return (ARCHIVE_FATAL);
2451         }
2452         if (hf->tbl == NULL) {
2453                 if (tbl_bits < HTBL_BITS)
2454                         bits = tbl_bits;
2455                 else
2456                         bits = HTBL_BITS;
2457                 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2458                 if (hf->tbl == NULL)
2459                         return (ARCHIVE_FATAL);
2460         }
2461         if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2462                 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2463                 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2464                 if (hf->tree == NULL)
2465                         return (ARCHIVE_FATAL);
2466         }
2467         hf->len_size = (int)len_size;
2468         hf->tbl_bits = tbl_bits;
2469         return (ARCHIVE_OK);
2470 }
2471
2472 static void
2473 lzh_huffman_free(struct huffman *hf)
2474 {
2475         free(hf->bitlen);
2476         free(hf->tbl);
2477         free(hf->tree);
2478 }
2479
2480 static const char bitlen_tbl[0x400] = {
2481          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2482          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2483          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2484          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2485          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2486          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2487          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2488          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2489          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2490          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2491          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2492          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2493          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2494          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2495          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2496          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2497          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2498          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2499          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2500          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2501          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2502          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2503          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2504          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2505          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2506          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2507          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2508          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2509          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2510          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2511          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2512          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2513          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2514          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2515          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2516          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2517          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2518          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2519          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2520          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2521          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2522          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2523          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2524          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2525          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2526          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2527          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2528          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2529          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2530          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2531          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2532          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2533          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2534          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2535          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2536          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2537         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2538         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2539         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2540         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2541         11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2542         11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2543         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2544         13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2545 };
2546 static int
2547 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2548 {
2549         struct lzh_dec *ds = strm->ds;
2550         struct lzh_br *br = &(ds->br);
2551         int c, i;
2552
2553         for (i = start; i < end; ) {
2554                 /*
2555                  *  bit pattern     the number we need
2556                  *     000           ->  0
2557                  *     001           ->  1
2558                  *     010           ->  2
2559                  *     ...
2560                  *     110           ->  6
2561                  *     1110          ->  7
2562                  *     11110         ->  8
2563                  *     ...
2564                  *     1111111111110 ->  16
2565                  */
2566                 if (!lzh_br_read_ahead(strm, br, 3))
2567                         return (i);
2568                 if ((c = lzh_br_bits(br, 3)) == 7) {
2569                         if (!lzh_br_read_ahead(strm, br, 13))
2570                                 return (i);
2571                         c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2572                         if (c)
2573                                 lzh_br_consume(br, c - 3);
2574                         else
2575                                 return (-1);/* Invalid data. */
2576                 } else
2577                         lzh_br_consume(br, 3);
2578                 ds->pt.bitlen[i++] = c;
2579                 ds->pt.freq[c]++;
2580         }
2581         return (i);
2582 }
2583
2584 static int
2585 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2586 {
2587         if (c >= hf->len_size)
2588                 return (0);
2589         hf->tbl[0] = c;
2590         hf->max_bits = 0;
2591         hf->shift_bits = 0;
2592         hf->bitlen[hf->tbl[0]] = 0;
2593         return (1);
2594 }
2595
2596 /*
2597  * Make a huffman coding table.
2598  */
2599 static int
2600 lzh_make_huffman_table(struct huffman *hf)
2601 {
2602         uint16_t *tbl;
2603         const unsigned char *bitlen;
2604         int bitptn[17], weight[17];
2605         int i, maxbits = 0, ptn, tbl_size, w;
2606         int diffbits, len_avail;
2607
2608         /*
2609          * Initialize bit patterns.
2610          */
2611         ptn = 0;
2612         for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2613                 bitptn[i] = ptn;
2614                 weight[i] = w;
2615                 if (hf->freq[i]) {
2616                         ptn += hf->freq[i] * w;
2617                         maxbits = i;
2618                 }
2619         }
2620         if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2621                 return (0);/* Invalid */
2622
2623         hf->max_bits = maxbits;
2624
2625         /*
2626          * Cut out extra bits which we won't house in the table.
2627          * This preparation reduces the same calculation in the for-loop
2628          * making the table.
2629          */
2630         if (maxbits < 16) {
2631                 int ebits = 16 - maxbits;
2632                 for (i = 1; i <= maxbits; i++) {
2633                         bitptn[i] >>= ebits;
2634                         weight[i] >>= ebits;
2635                 }
2636         }
2637         if (maxbits > HTBL_BITS) {
2638                 unsigned htbl_max;
2639                 uint16_t *p;
2640
2641                 diffbits = maxbits - HTBL_BITS;
2642                 for (i = 1; i <= HTBL_BITS; i++) {
2643                         bitptn[i] >>= diffbits;
2644                         weight[i] >>= diffbits;
2645                 }
2646                 htbl_max = bitptn[HTBL_BITS] +
2647                     weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2648                 p = &(hf->tbl[htbl_max]);
2649                 while (p < &hf->tbl[1U<<HTBL_BITS])
2650                         *p++ = 0;
2651         } else
2652                 diffbits = 0;
2653         hf->shift_bits = diffbits;
2654
2655         /*
2656          * Make the table.
2657          */
2658         tbl_size = 1 << HTBL_BITS;
2659         tbl = hf->tbl;
2660         bitlen = hf->bitlen;
2661         len_avail = hf->len_avail;
2662         hf->tree_used = 0;
2663         for (i = 0; i < len_avail; i++) {
2664                 uint16_t *p;
2665                 int len, cnt;
2666                 uint16_t bit;
2667                 int extlen;
2668                 struct htree_t *ht;
2669
2670                 if (bitlen[i] == 0)
2671                         continue;
2672                 /* Get a bit pattern */
2673                 len = bitlen[i];
2674                 ptn = bitptn[len];
2675                 cnt = weight[len];
2676                 if (len <= HTBL_BITS) {
2677                         /* Calculate next bit pattern */
2678                         if ((bitptn[len] = ptn + cnt) > tbl_size)
2679                                 return (0);/* Invalid */
2680                         /* Update the table */
2681                         p = &(tbl[ptn]);
2682                         if (cnt > 7) {
2683                                 uint16_t *pc;
2684
2685                                 cnt -= 8;
2686                                 pc = &p[cnt];
2687                                 pc[0] = (uint16_t)i;
2688                                 pc[1] = (uint16_t)i;
2689                                 pc[2] = (uint16_t)i;
2690                                 pc[3] = (uint16_t)i;
2691                                 pc[4] = (uint16_t)i;
2692                                 pc[5] = (uint16_t)i;
2693                                 pc[6] = (uint16_t)i;
2694                                 pc[7] = (uint16_t)i;
2695                                 if (cnt > 7) {
2696                                         cnt -= 8;
2697                                         memcpy(&p[cnt], pc,
2698                                                 8 * sizeof(uint16_t));
2699                                         pc = &p[cnt];
2700                                         while (cnt > 15) {
2701                                                 cnt -= 16;
2702                                                 memcpy(&p[cnt], pc,
2703                                                         16 * sizeof(uint16_t));
2704                                         }
2705                                 }
2706                                 if (cnt)
2707                                         memcpy(p, pc, cnt * sizeof(uint16_t));
2708                         } else {
2709                                 while (cnt > 1) {
2710                                         p[--cnt] = (uint16_t)i;
2711                                         p[--cnt] = (uint16_t)i;
2712                                 }
2713                                 if (cnt)
2714                                         p[--cnt] = (uint16_t)i;
2715                         }
2716                         continue;
2717                 }
2718
2719                 /*
2720                  * A bit length is too big to be housed to a direct table,
2721                  * so we use a tree model for its extra bits.
2722                  */
2723                 bitptn[len] = ptn + cnt;
2724                 bit = 1U << (diffbits -1);
2725                 extlen = len - HTBL_BITS;
2726                 
2727                 p = &(tbl[ptn >> diffbits]);
2728                 if (*p == 0) {
2729                         *p = len_avail + hf->tree_used;
2730                         ht = &(hf->tree[hf->tree_used++]);
2731                         if (hf->tree_used > hf->tree_avail)
2732                                 return (0);/* Invalid */
2733                         ht->left = 0;
2734                         ht->right = 0;
2735                 } else {
2736                         if (*p < len_avail ||
2737                             *p >= (len_avail + hf->tree_used))
2738                                 return (0);/* Invalid */
2739                         ht = &(hf->tree[*p - len_avail]);
2740                 }
2741                 while (--extlen > 0) {
2742                         if (ptn & bit) {
2743                                 if (ht->left < len_avail) {
2744                                         ht->left = len_avail + hf->tree_used;
2745                                         ht = &(hf->tree[hf->tree_used++]);
2746                                         if (hf->tree_used > hf->tree_avail)
2747                                                 return (0);/* Invalid */
2748                                         ht->left = 0;
2749                                         ht->right = 0;
2750                                 } else {
2751                                         ht = &(hf->tree[ht->left - len_avail]);
2752                                 }
2753                         } else {
2754                                 if (ht->right < len_avail) {
2755                                         ht->right = len_avail + hf->tree_used;
2756                                         ht = &(hf->tree[hf->tree_used++]);
2757                                         if (hf->tree_used > hf->tree_avail)
2758                                                 return (0);/* Invalid */
2759                                         ht->left = 0;
2760                                         ht->right = 0;
2761                                 } else {
2762                                         ht = &(hf->tree[ht->right - len_avail]);
2763                                 }
2764                         }
2765                         bit >>= 1;
2766                 }
2767                 if (ptn & bit) {
2768                         if (ht->left != 0)
2769                                 return (0);/* Invalid */
2770                         ht->left = (uint16_t)i;
2771                 } else {
2772                         if (ht->right != 0)
2773                                 return (0);/* Invalid */
2774                         ht->right = (uint16_t)i;
2775                 }
2776         }
2777         return (1);
2778 }
2779
2780 static int
2781 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2782 {
2783         struct htree_t *ht;
2784         int extlen;
2785
2786         ht = hf->tree;
2787         extlen = hf->shift_bits;
2788         while (c >= hf->len_avail) {
2789                 c -= hf->len_avail;
2790                 if (extlen-- <= 0 || c >= hf->tree_used)
2791                         return (0);
2792                 if (rbits & (1U << extlen))
2793                         c = ht[c].left;
2794                 else
2795                         c = ht[c].right;
2796         }
2797         return (c);
2798 }
2799
2800 static inline int
2801 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2802 {
2803         int c;
2804         /*
2805          * At first search an index table for a bit pattern.
2806          * If it fails, search a huffman tree for.
2807          */
2808         c = hf->tbl[rbits >> hf->shift_bits];
2809         if (c < hf->len_avail || hf->len_avail == 0)
2810                 return (c);
2811         /* This bit pattern needs to be found out at a huffman tree. */
2812         return (lzh_decode_huffman_tree(hf, rbits, c));
2813 }
2814