]> CyberLeo.Net >> Repos - FreeBSD/stable/10.git/blob - contrib/libarchive/libarchive/archive_read_support_format_lha.c
MFC r299529,r299540,r299576,r299896:
[FreeBSD/stable/10.git] / contrib / libarchive / libarchive / archive_read_support_format_lha.c
1 /*-
2  * Copyright (c) 2008-2014 Michihiro NAKAJIMA
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25
26 #include "archive_platform.h"
27
28 #ifdef HAVE_ERRNO_H
29 #include <errno.h>
30 #endif
31 #ifdef HAVE_LIMITS_H
32 #include <limits.h>
33 #endif
34 #ifdef HAVE_STDLIB_H
35 #include <stdlib.h>
36 #endif
37 #ifdef HAVE_STRING_H
38 #include <string.h>
39 #endif
40
41 #include "archive.h"
42 #include "archive_entry.h"
43 #include "archive_entry_locale.h"
44 #include "archive_private.h"
45 #include "archive_read_private.h"
46 #include "archive_endian.h"
47
48
49 #define MAXMATCH                256     /* Maximum match length. */
50 #define MINMATCH                3       /* Minimum match length. */
51 /*
52  * Literal table format:
53  * +0              +256                      +510
54  * +---------------+-------------------------+
55  * | literal code  |       match length      |
56  * |   0 ... 255   |  MINMATCH ... MAXMATCH  |
57  * +---------------+-------------------------+
58  *  <---          LT_BITLEN_SIZE         --->
59  */
60 /* Literal table size. */
61 #define LT_BITLEN_SIZE          (UCHAR_MAX + 1 + MAXMATCH - MINMATCH + 1)
62 /* Position table size.
63  * Note: this used for both position table and pre literal table.*/
64 #define PT_BITLEN_SIZE          (3 + 16)
65
66 struct lzh_dec {
67         /* Decoding status. */
68         int                      state;
69
70         /*
71          * Window to see last 8Ki(lh5),32Ki(lh6),64Ki(lh7) bytes of decoded
72          * data.
73          */
74         int                      w_size;
75         int                      w_mask;
76         /* Window buffer, which is a loop buffer. */
77         unsigned char           *w_buff;
78         /* The insert position to the window. */
79         int                      w_pos;
80         /* The position where we can copy decoded code from the window. */
81         int                      copy_pos;
82         /* The length how many bytes we can copy decoded code from
83          * the window. */
84         int                      copy_len;
85
86         /*
87          * Bit stream reader.
88          */
89         struct lzh_br {
90 #define CACHE_TYPE              uint64_t
91 #define CACHE_BITS              (8 * sizeof(CACHE_TYPE))
92                 /* Cache buffer. */
93                 CACHE_TYPE       cache_buffer;
94                 /* Indicates how many bits avail in cache_buffer. */
95                 int              cache_avail;
96         } br;
97
98         /*
99          * Huffman coding.
100          */
101         struct huffman {
102                 int              len_size;
103                 int              len_avail;
104                 int              len_bits;
105                 int              freq[17];
106                 unsigned char   *bitlen;
107
108                 /*
109                  * Use a index table. It's faster than searching a huffman
110                  * coding tree, which is a binary tree. But a use of a large
111                  * index table causes L1 cache read miss many times.
112                  */
113 #define HTBL_BITS       10
114                 int              max_bits;
115                 int              shift_bits;
116                 int              tbl_bits;
117                 int              tree_used;
118                 int              tree_avail;
119                 /* Direct access table. */
120                 uint16_t        *tbl;
121                 /* Binary tree table for extra bits over the direct access. */
122                 struct htree_t {
123                         uint16_t left;
124                         uint16_t right;
125                 }               *tree;
126         }                        lt, pt;
127
128         int                      blocks_avail;
129         int                      pos_pt_len_size;
130         int                      pos_pt_len_bits;
131         int                      literal_pt_len_size;
132         int                      literal_pt_len_bits;
133         int                      reading_position;
134         int                      loop;
135         int                      error;
136 };
137
138 struct lzh_stream {
139         const unsigned char     *next_in;
140         int                      avail_in;
141         int64_t                  total_in;
142         const unsigned char     *ref_ptr;
143         int                      avail_out;
144         int64_t                  total_out;
145         struct lzh_dec          *ds;
146 };
147
148 struct lha {
149         /* entry_bytes_remaining is the number of bytes we expect.          */
150         int64_t                  entry_offset;
151         int64_t                  entry_bytes_remaining;
152         int64_t                  entry_unconsumed;
153         uint16_t                 entry_crc_calculated;
154  
155         size_t                   header_size;   /* header size              */
156         unsigned char            level;         /* header level             */
157         char                     method[3];     /* compress type            */
158         int64_t                  compsize;      /* compressed data size     */
159         int64_t                  origsize;      /* original file size       */
160         int                      setflag;
161 #define BIRTHTIME_IS_SET        1
162 #define ATIME_IS_SET            2
163 #define UNIX_MODE_IS_SET        4
164 #define CRC_IS_SET              8
165         time_t                   birthtime;
166         long                     birthtime_tv_nsec;
167         time_t                   mtime;
168         long                     mtime_tv_nsec;
169         time_t                   atime;
170         long                     atime_tv_nsec;
171         mode_t                   mode;
172         int64_t                  uid;
173         int64_t                  gid;
174         struct archive_string    uname;
175         struct archive_string    gname;
176         uint16_t                 header_crc;
177         uint16_t                 crc;
178         struct archive_string_conv *sconv;
179         struct archive_string_conv *opt_sconv;
180
181         struct archive_string    dirname;
182         struct archive_string    filename;
183         struct archive_wstring   ws;
184
185         unsigned char            dos_attr;
186
187         /* Flag to mark progress that an archive was read their first header.*/
188         char                     found_first_header;
189         /* Flag to mark that indicates an empty directory. */
190         char                     directory;
191
192         /* Flags to mark progress of decompression. */
193         char                     decompress_init;
194         char                     end_of_entry;
195         char                     end_of_entry_cleanup;
196         char                     entry_is_compressed;
197
198         char                     format_name[64];
199
200         struct lzh_stream        strm;
201 };
202
203 /*
204  * LHA header common member offset.
205  */
206 #define H_METHOD_OFFSET 2       /* Compress type. */
207 #define H_ATTR_OFFSET   19      /* DOS attribute. */
208 #define H_LEVEL_OFFSET  20      /* Header Level.  */
209 #define H_SIZE          22      /* Minimum header size. */
210
211 static int      archive_read_format_lha_bid(struct archive_read *, int);
212 static int      archive_read_format_lha_options(struct archive_read *,
213                     const char *, const char *);
214 static int      archive_read_format_lha_read_header(struct archive_read *,
215                     struct archive_entry *);
216 static int      archive_read_format_lha_read_data(struct archive_read *,
217                     const void **, size_t *, int64_t *);
218 static int      archive_read_format_lha_read_data_skip(struct archive_read *);
219 static int      archive_read_format_lha_cleanup(struct archive_read *);
220
221 static void     lha_replace_path_separator(struct lha *,
222                     struct archive_entry *);
223 static int      lha_read_file_header_0(struct archive_read *, struct lha *);
224 static int      lha_read_file_header_1(struct archive_read *, struct lha *);
225 static int      lha_read_file_header_2(struct archive_read *, struct lha *);
226 static int      lha_read_file_header_3(struct archive_read *, struct lha *);
227 static int      lha_read_file_extended_header(struct archive_read *,
228                     struct lha *, uint16_t *, int, size_t, size_t *);
229 static size_t   lha_check_header_format(const void *);
230 static int      lha_skip_sfx(struct archive_read *);
231 static time_t   lha_dos_time(const unsigned char *);
232 static time_t   lha_win_time(uint64_t, long *);
233 static unsigned char    lha_calcsum(unsigned char, const void *,
234                     int, size_t);
235 static int      lha_parse_linkname(struct archive_string *,
236                     struct archive_string *);
237 static int      lha_read_data_none(struct archive_read *, const void **,
238                     size_t *, int64_t *);
239 static int      lha_read_data_lzh(struct archive_read *, const void **,
240                     size_t *, int64_t *);
241 static void     lha_crc16_init(void);
242 static uint16_t lha_crc16(uint16_t, const void *, size_t);
243 static int      lzh_decode_init(struct lzh_stream *, const char *);
244 static void     lzh_decode_free(struct lzh_stream *);
245 static int      lzh_decode(struct lzh_stream *, int);
246 static int      lzh_br_fillup(struct lzh_stream *, struct lzh_br *);
247 static int      lzh_huffman_init(struct huffman *, size_t, int);
248 static void     lzh_huffman_free(struct huffman *);
249 static int      lzh_read_pt_bitlen(struct lzh_stream *, int start, int end);
250 static int      lzh_make_fake_table(struct huffman *, uint16_t);
251 static int      lzh_make_huffman_table(struct huffman *);
252 static inline int lzh_decode_huffman(struct huffman *, unsigned);
253 static int      lzh_decode_huffman_tree(struct huffman *, unsigned, int);
254
255
256 int
257 archive_read_support_format_lha(struct archive *_a)
258 {
259         struct archive_read *a = (struct archive_read *)_a;
260         struct lha *lha;
261         int r;
262
263         archive_check_magic(_a, ARCHIVE_READ_MAGIC,
264             ARCHIVE_STATE_NEW, "archive_read_support_format_lha");
265
266         lha = (struct lha *)calloc(1, sizeof(*lha));
267         if (lha == NULL) {
268                 archive_set_error(&a->archive, ENOMEM,
269                     "Can't allocate lha data");
270                 return (ARCHIVE_FATAL);
271         }
272         archive_string_init(&lha->ws);
273
274         r = __archive_read_register_format(a,
275             lha,
276             "lha",
277             archive_read_format_lha_bid,
278             archive_read_format_lha_options,
279             archive_read_format_lha_read_header,
280             archive_read_format_lha_read_data,
281             archive_read_format_lha_read_data_skip,
282             NULL,
283             archive_read_format_lha_cleanup,
284             NULL,
285             NULL);
286
287         if (r != ARCHIVE_OK)
288                 free(lha);
289         return (ARCHIVE_OK);
290 }
291
292 static size_t
293 lha_check_header_format(const void *h)
294 {
295         const unsigned char *p = h;
296         size_t next_skip_bytes;
297
298         switch (p[H_METHOD_OFFSET+3]) {
299         /*
300          * "-lh0-" ... "-lh7-" "-lhd-"
301          * "-lzs-" "-lz5-"
302          */
303         case '0': case '1': case '2': case '3':
304         case '4': case '5': case '6': case '7':
305         case 'd':
306         case 's':
307                 next_skip_bytes = 4;
308
309                 /* b0 == 0 means the end of an LHa archive file.        */
310                 if (p[0] == 0)
311                         break;
312                 if (p[H_METHOD_OFFSET] != '-' || p[H_METHOD_OFFSET+1] != 'l'
313                     ||  p[H_METHOD_OFFSET+4] != '-')
314                         break;
315
316                 if (p[H_METHOD_OFFSET+2] == 'h') {
317                         /* "-lh?-" */
318                         if (p[H_METHOD_OFFSET+3] == 's')
319                                 break;
320                         if (p[H_LEVEL_OFFSET] == 0)
321                                 return (0);
322                         if (p[H_LEVEL_OFFSET] <= 3 && p[H_ATTR_OFFSET] == 0x20)
323                                 return (0);
324                 }
325                 if (p[H_METHOD_OFFSET+2] == 'z') {
326                         /* LArc extensions: -lzs-,-lz4- and -lz5- */
327                         if (p[H_LEVEL_OFFSET] != 0)
328                                 break;
329                         if (p[H_METHOD_OFFSET+3] == 's'
330                             || p[H_METHOD_OFFSET+3] == '4'
331                             || p[H_METHOD_OFFSET+3] == '5')
332                                 return (0);
333                 }
334                 break;
335         case 'h': next_skip_bytes = 1; break;
336         case 'z': next_skip_bytes = 1; break;
337         case 'l': next_skip_bytes = 2; break;
338         case '-': next_skip_bytes = 3; break;
339         default : next_skip_bytes = 4; break;
340         }
341
342         return (next_skip_bytes);
343 }
344
345 static int
346 archive_read_format_lha_bid(struct archive_read *a, int best_bid)
347 {
348         const char *p;
349         const void *buff;
350         ssize_t bytes_avail, offset, window;
351         size_t next;
352
353         /* If there's already a better bid than we can ever
354            make, don't bother testing. */
355         if (best_bid > 30)
356                 return (-1);
357
358         if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL)
359                 return (-1);
360
361         if (lha_check_header_format(p) == 0)
362                 return (30);
363
364         if (p[0] == 'M' && p[1] == 'Z') {
365                 /* PE file */
366                 offset = 0;
367                 window = 4096;
368                 while (offset < (1024 * 20)) {
369                         buff = __archive_read_ahead(a, offset + window,
370                             &bytes_avail);
371                         if (buff == NULL) {
372                                 /* Remaining bytes are less than window. */
373                                 window >>= 1;
374                                 if (window < (H_SIZE + 3))
375                                         return (0);
376                                 continue;
377                         }
378                         p = (const char *)buff + offset;
379                         while (p + H_SIZE < (const char *)buff + bytes_avail) {
380                                 if ((next = lha_check_header_format(p)) == 0)
381                                         return (30);
382                                 p += next;
383                         }
384                         offset = p - (const char *)buff;
385                 }
386         }
387         return (0);
388 }
389
390 static int
391 archive_read_format_lha_options(struct archive_read *a,
392     const char *key, const char *val)
393 {
394         struct lha *lha;
395         int ret = ARCHIVE_FAILED;
396
397         lha = (struct lha *)(a->format->data);
398         if (strcmp(key, "hdrcharset")  == 0) {
399                 if (val == NULL || val[0] == 0)
400                         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
401                             "lha: hdrcharset option needs a character-set name");
402                 else {
403                         lha->opt_sconv =
404                             archive_string_conversion_from_charset(
405                                 &a->archive, val, 0);
406                         if (lha->opt_sconv != NULL)
407                                 ret = ARCHIVE_OK;
408                         else
409                                 ret = ARCHIVE_FATAL;
410                 }
411                 return (ret);
412         }
413
414         /* Note: The "warn" return is just to inform the options
415          * supervisor that we didn't handle it.  It will generate
416          * a suitable error if no one used this option. */
417         return (ARCHIVE_WARN);
418 }
419
420 static int
421 lha_skip_sfx(struct archive_read *a)
422 {
423         const void *h;
424         const char *p, *q;
425         size_t next, skip;
426         ssize_t bytes, window;
427
428         window = 4096;
429         for (;;) {
430                 h = __archive_read_ahead(a, window, &bytes);
431                 if (h == NULL) {
432                         /* Remaining bytes are less than window. */
433                         window >>= 1;
434                         if (window < (H_SIZE + 3))
435                                 goto fatal;
436                         continue;
437                 }
438                 if (bytes < H_SIZE)
439                         goto fatal;
440                 p = h;
441                 q = p + bytes;
442
443                 /*
444                  * Scan ahead until we find something that looks
445                  * like the lha header.
446                  */
447                 while (p + H_SIZE < q) {
448                         if ((next = lha_check_header_format(p)) == 0) {
449                                 skip = p - (const char *)h;
450                                 __archive_read_consume(a, skip);
451                                 return (ARCHIVE_OK);
452                         }
453                         p += next;
454                 }
455                 skip = p - (const char *)h;
456                 __archive_read_consume(a, skip);
457         }
458 fatal:
459         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
460             "Couldn't find out LHa header");
461         return (ARCHIVE_FATAL);
462 }
463
464 static int
465 truncated_error(struct archive_read *a)
466 {
467         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
468             "Truncated LHa header");
469         return (ARCHIVE_FATAL);
470 }
471
472 static int
473 archive_read_format_lha_read_header(struct archive_read *a,
474     struct archive_entry *entry)
475 {
476         struct archive_string linkname;
477         struct archive_string pathname;
478         struct lha *lha;
479         const unsigned char *p;
480         const char *signature;
481         int err;
482         
483         lha_crc16_init();
484
485         a->archive.archive_format = ARCHIVE_FORMAT_LHA;
486         if (a->archive.archive_format_name == NULL)
487                 a->archive.archive_format_name = "lha";
488
489         lha = (struct lha *)(a->format->data);
490         lha->decompress_init = 0;
491         lha->end_of_entry = 0;
492         lha->end_of_entry_cleanup = 0;
493         lha->entry_unconsumed = 0;
494
495         if ((p = __archive_read_ahead(a, H_SIZE, NULL)) == NULL) {
496                 /*
497                  * LHa archiver added 0 to the tail of its archive file as
498                  * the mark of the end of the archive.
499                  */
500                 signature = __archive_read_ahead(a, sizeof(signature[0]), NULL);
501                 if (signature == NULL || signature[0] == 0)
502                         return (ARCHIVE_EOF);
503                 return (truncated_error(a));
504         }
505
506         signature = (const char *)p;
507         if (lha->found_first_header == 0 &&
508             signature[0] == 'M' && signature[1] == 'Z') {
509                 /* This is an executable?  Must be self-extracting...   */
510                 err = lha_skip_sfx(a);
511                 if (err < ARCHIVE_WARN)
512                         return (err);
513
514                 if ((p = __archive_read_ahead(a, sizeof(*p), NULL)) == NULL)
515                         return (truncated_error(a));
516                 signature = (const char *)p;
517         }
518         /* signature[0] == 0 means the end of an LHa archive file. */
519         if (signature[0] == 0)
520                 return (ARCHIVE_EOF);
521
522         /*
523          * Check the header format and method type.
524          */
525         if (lha_check_header_format(p) != 0) {
526                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
527                     "Bad LHa file");
528                 return (ARCHIVE_FATAL);
529         }
530
531         /* We've found the first header. */
532         lha->found_first_header = 1;
533         /* Set a default value and common data */
534         lha->header_size = 0;
535         lha->level = p[H_LEVEL_OFFSET];
536         lha->method[0] = p[H_METHOD_OFFSET+1];
537         lha->method[1] = p[H_METHOD_OFFSET+2];
538         lha->method[2] = p[H_METHOD_OFFSET+3];
539         if (memcmp(lha->method, "lhd", 3) == 0)
540                 lha->directory = 1;
541         else
542                 lha->directory = 0;
543         if (memcmp(lha->method, "lh0", 3) == 0 ||
544             memcmp(lha->method, "lz4", 3) == 0)
545                 lha->entry_is_compressed = 0;
546         else
547                 lha->entry_is_compressed = 1;
548
549         lha->compsize = 0;
550         lha->origsize = 0;
551         lha->setflag = 0;
552         lha->birthtime = 0;
553         lha->birthtime_tv_nsec = 0;
554         lha->mtime = 0;
555         lha->mtime_tv_nsec = 0;
556         lha->atime = 0;
557         lha->atime_tv_nsec = 0;
558         lha->mode = (lha->directory)? 0777 : 0666;
559         lha->uid = 0;
560         lha->gid = 0;
561         archive_string_empty(&lha->dirname);
562         archive_string_empty(&lha->filename);
563         lha->dos_attr = 0;
564         if (lha->opt_sconv != NULL)
565                 lha->sconv = lha->opt_sconv;
566         else
567                 lha->sconv = NULL;
568
569         switch (p[H_LEVEL_OFFSET]) {
570         case 0:
571                 err = lha_read_file_header_0(a, lha);
572                 break;
573         case 1:
574                 err = lha_read_file_header_1(a, lha);
575                 break;
576         case 2:
577                 err = lha_read_file_header_2(a, lha);
578                 break;
579         case 3:
580                 err = lha_read_file_header_3(a, lha);
581                 break;
582         default:
583                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
584                     "Unsupported LHa header level %d", p[H_LEVEL_OFFSET]);
585                 err = ARCHIVE_FATAL;
586                 break;
587         }
588         if (err < ARCHIVE_WARN)
589                 return (err);
590
591
592         if (!lha->directory && archive_strlen(&lha->filename) == 0)
593                 /* The filename has not been set */
594                 return (truncated_error(a));
595
596         /*
597          * Make a pathname from a dirname and a filename.
598          */
599         archive_string_concat(&lha->dirname, &lha->filename);
600         archive_string_init(&pathname);
601         archive_string_init(&linkname);
602         archive_string_copy(&pathname, &lha->dirname);
603
604         if ((lha->mode & AE_IFMT) == AE_IFLNK) {
605                 /*
606                  * Extract the symlink-name if it's included in the pathname.
607                  */
608                 if (!lha_parse_linkname(&linkname, &pathname)) {
609                         /* We couldn't get the symlink-name. */
610                         archive_set_error(&a->archive,
611                             ARCHIVE_ERRNO_FILE_FORMAT,
612                             "Unknown symlink-name");
613                         archive_string_free(&pathname);
614                         archive_string_free(&linkname);
615                         return (ARCHIVE_FAILED);
616                 }
617         } else {
618                 /*
619                  * Make sure a file-type is set.
620                  * The mode has been overridden if it is in the extended data.
621                  */
622                 lha->mode = (lha->mode & ~AE_IFMT) |
623                     ((lha->directory)? AE_IFDIR: AE_IFREG);
624         }
625         if ((lha->setflag & UNIX_MODE_IS_SET) == 0 &&
626             (lha->dos_attr & 1) != 0)
627                 lha->mode &= ~(0222);/* read only. */
628
629         /*
630          * Set basic file parameters.
631          */
632         if (archive_entry_copy_pathname_l(entry, pathname.s,
633             pathname.length, lha->sconv) != 0) {
634                 if (errno == ENOMEM) {
635                         archive_set_error(&a->archive, ENOMEM,
636                             "Can't allocate memory for Pathname");
637                         return (ARCHIVE_FATAL);
638                 }
639                 archive_set_error(&a->archive,
640                     ARCHIVE_ERRNO_FILE_FORMAT,
641                     "Pathname cannot be converted "
642                     "from %s to current locale.",
643                     archive_string_conversion_charset_name(lha->sconv));
644                 err = ARCHIVE_WARN;
645         }
646         archive_string_free(&pathname);
647         if (archive_strlen(&linkname) > 0) {
648                 if (archive_entry_copy_symlink_l(entry, linkname.s,
649                     linkname.length, lha->sconv) != 0) {
650                         if (errno == ENOMEM) {
651                                 archive_set_error(&a->archive, ENOMEM,
652                                     "Can't allocate memory for Linkname");
653                                 return (ARCHIVE_FATAL);
654                         }
655                         archive_set_error(&a->archive,
656                             ARCHIVE_ERRNO_FILE_FORMAT,
657                             "Linkname cannot be converted "
658                             "from %s to current locale.",
659                             archive_string_conversion_charset_name(lha->sconv));
660                         err = ARCHIVE_WARN;
661                 }
662         } else
663                 archive_entry_set_symlink(entry, NULL);
664         archive_string_free(&linkname);
665         /*
666          * When a header level is 0, there is a possibility that
667          * a pathname and a symlink has '\' character, a directory
668          * separator in DOS/Windows. So we should convert it to '/'.
669          */
670         if (p[H_LEVEL_OFFSET] == 0)
671                 lha_replace_path_separator(lha, entry);
672
673         archive_entry_set_mode(entry, lha->mode);
674         archive_entry_set_uid(entry, lha->uid);
675         archive_entry_set_gid(entry, lha->gid);
676         if (archive_strlen(&lha->uname) > 0)
677                 archive_entry_set_uname(entry, lha->uname.s);
678         if (archive_strlen(&lha->gname) > 0)
679                 archive_entry_set_gname(entry, lha->gname.s);
680         if (lha->setflag & BIRTHTIME_IS_SET) {
681                 archive_entry_set_birthtime(entry, lha->birthtime,
682                     lha->birthtime_tv_nsec);
683                 archive_entry_set_ctime(entry, lha->birthtime,
684                     lha->birthtime_tv_nsec);
685         } else {
686                 archive_entry_unset_birthtime(entry);
687                 archive_entry_unset_ctime(entry);
688         }
689         archive_entry_set_mtime(entry, lha->mtime, lha->mtime_tv_nsec);
690         if (lha->setflag & ATIME_IS_SET)
691                 archive_entry_set_atime(entry, lha->atime,
692                     lha->atime_tv_nsec);
693         else
694                 archive_entry_unset_atime(entry);
695         if (lha->directory || archive_entry_symlink(entry) != NULL)
696                 archive_entry_unset_size(entry);
697         else
698                 archive_entry_set_size(entry, lha->origsize);
699
700         /*
701          * Prepare variables used to read a file content.
702          */
703         lha->entry_bytes_remaining = lha->compsize;
704         lha->entry_offset = 0;
705         lha->entry_crc_calculated = 0;
706
707         /*
708          * This file does not have a content.
709          */
710         if (lha->directory || lha->compsize == 0)
711                 lha->end_of_entry = 1;
712
713         sprintf(lha->format_name, "lha -%c%c%c-",
714             lha->method[0], lha->method[1], lha->method[2]);
715         a->archive.archive_format_name = lha->format_name;
716
717         return (err);
718 }
719
720 /*
721  * Replace a DOS path separator '\' by a character '/'.
722  * Some multi-byte character set have  a character '\' in its second byte.
723  */
724 static void
725 lha_replace_path_separator(struct lha *lha, struct archive_entry *entry)
726 {
727         const wchar_t *wp;
728         size_t i;
729
730         if ((wp = archive_entry_pathname_w(entry)) != NULL) {
731                 archive_wstrcpy(&(lha->ws), wp);
732                 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
733                         if (lha->ws.s[i] == L'\\')
734                                 lha->ws.s[i] = L'/';
735                 }
736                 archive_entry_copy_pathname_w(entry, lha->ws.s);
737         }
738
739         if ((wp = archive_entry_symlink_w(entry)) != NULL) {
740                 archive_wstrcpy(&(lha->ws), wp);
741                 for (i = 0; i < archive_strlen(&(lha->ws)); i++) {
742                         if (lha->ws.s[i] == L'\\')
743                                 lha->ws.s[i] = L'/';
744                 }
745                 archive_entry_copy_symlink_w(entry, lha->ws.s);
746         }
747 }
748
749 /*
750  * Header 0 format
751  *
752  * +0              +1         +2               +7                  +11
753  * +---------------+----------+----------------+-------------------+
754  * |header size(*1)|header sum|compression type|compressed size(*2)|
755  * +---------------+----------+----------------+-------------------+
756  *                             <---------------------(*1)----------*
757  *
758  * +11               +15       +17       +19            +20              +21
759  * +-----------------+---------+---------+--------------+----------------+
760  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=0)|
761  * +-----------------+---------+---------+--------------+----------------+
762  * *--------------------------------(*1)---------------------------------*
763  *
764  * +21             +22       +22+(*3)   +22+(*3)+2       +22+(*3)+2+(*4)
765  * +---------------+---------+----------+----------------+------------------+
766  * |name length(*3)|file name|file CRC16|extra header(*4)|  compressed data |
767  * +---------------+---------+----------+----------------+------------------+
768  *                  <--(*3)->                             <------(*2)------>
769  * *----------------------(*1)-------------------------->
770  *
771  */
772 #define H0_HEADER_SIZE_OFFSET   0
773 #define H0_HEADER_SUM_OFFSET    1
774 #define H0_COMP_SIZE_OFFSET     7
775 #define H0_ORIG_SIZE_OFFSET     11
776 #define H0_DOS_TIME_OFFSET      15
777 #define H0_NAME_LEN_OFFSET      21
778 #define H0_FILE_NAME_OFFSET     22
779 #define H0_FIXED_SIZE           24
780 static int
781 lha_read_file_header_0(struct archive_read *a, struct lha *lha)
782 {
783         const unsigned char *p;
784         int extdsize, namelen;
785         unsigned char headersum, sum_calculated;
786
787         if ((p = __archive_read_ahead(a, H0_FIXED_SIZE, NULL)) == NULL)
788                 return (truncated_error(a));
789         lha->header_size = p[H0_HEADER_SIZE_OFFSET] + 2;
790         headersum = p[H0_HEADER_SUM_OFFSET];
791         lha->compsize = archive_le32dec(p + H0_COMP_SIZE_OFFSET);
792         lha->origsize = archive_le32dec(p + H0_ORIG_SIZE_OFFSET);
793         lha->mtime = lha_dos_time(p + H0_DOS_TIME_OFFSET);
794         namelen = p[H0_NAME_LEN_OFFSET];
795         extdsize = (int)lha->header_size - H0_FIXED_SIZE - namelen;
796         if ((namelen > 221 || extdsize < 0) && extdsize != -2) {
797                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
798                     "Invalid LHa header");
799                 return (ARCHIVE_FATAL);
800         }
801         if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
802                 return (truncated_error(a));
803
804         archive_strncpy(&lha->filename, p + H0_FILE_NAME_OFFSET, namelen);
805         /* When extdsize == -2, A CRC16 value is not present in the header. */
806         if (extdsize >= 0) {
807                 lha->crc = archive_le16dec(p + H0_FILE_NAME_OFFSET + namelen);
808                 lha->setflag |= CRC_IS_SET;
809         }
810         sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
811
812         /* Read an extended header */
813         if (extdsize > 0) {
814                 /* This extended data is set by 'LHa for UNIX' only.
815                  * Maybe fixed size.
816                  */
817                 p += H0_FILE_NAME_OFFSET + namelen + 2;
818                 if (p[0] == 'U' && extdsize == 12) {
819                         /* p[1] is a minor version. */
820                         lha->mtime = archive_le32dec(&p[2]);
821                         lha->mode = archive_le16dec(&p[6]);
822                         lha->uid = archive_le16dec(&p[8]);
823                         lha->gid = archive_le16dec(&p[10]);
824                         lha->setflag |= UNIX_MODE_IS_SET;
825                 }
826         }
827         __archive_read_consume(a, lha->header_size);
828
829         if (sum_calculated != headersum) {
830                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
831                     "LHa header sum error");
832                 return (ARCHIVE_FATAL);
833         }
834
835         return (ARCHIVE_OK);
836 }
837
838 /*
839  * Header 1 format
840  *
841  * +0              +1         +2               +7            +11
842  * +---------------+----------+----------------+-------------+
843  * |header size(*1)|header sum|compression type|skip size(*2)|
844  * +---------------+----------+----------------+-------------+
845  *                             <---------------(*1)----------*
846  *
847  * +11               +15       +17       +19            +20              +21
848  * +-----------------+---------+---------+--------------+----------------+
849  * |uncompressed size|time(DOS)|date(DOS)|attribute(DOS)|header level(=1)|
850  * +-----------------+---------+---------+--------------+----------------+
851  * *-------------------------------(*1)----------------------------------*
852  *
853  * +21             +22       +22+(*3)   +22+(*3)+2  +22+(*3)+3  +22+(*3)+3+(*4)
854  * +---------------+---------+----------+-----------+-----------+
855  * |name length(*3)|file name|file CRC16|  creator  |padding(*4)|
856  * +---------------+---------+----------+-----------+-----------+
857  *                  <--(*3)->
858  * *----------------------------(*1)----------------------------*
859  *
860  * +22+(*3)+3+(*4)  +22+(*3)+3+(*4)+2     +22+(*3)+3+(*4)+2+(*5)
861  * +----------------+---------------------+------------------------+
862  * |next header size| extended header(*5) |     compressed data    |
863  * +----------------+---------------------+------------------------+
864  * *------(*1)-----> <--------------------(*2)-------------------->
865  */
866 #define H1_HEADER_SIZE_OFFSET   0
867 #define H1_HEADER_SUM_OFFSET    1
868 #define H1_COMP_SIZE_OFFSET     7
869 #define H1_ORIG_SIZE_OFFSET     11
870 #define H1_DOS_TIME_OFFSET      15
871 #define H1_NAME_LEN_OFFSET      21
872 #define H1_FILE_NAME_OFFSET     22
873 #define H1_FIXED_SIZE           27
874 static int
875 lha_read_file_header_1(struct archive_read *a, struct lha *lha)
876 {
877         const unsigned char *p;
878         size_t extdsize;
879         int i, err, err2;
880         int namelen, padding;
881         unsigned char headersum, sum_calculated;
882
883         err = ARCHIVE_OK;
884
885         if ((p = __archive_read_ahead(a, H1_FIXED_SIZE, NULL)) == NULL)
886                 return (truncated_error(a));
887
888         lha->header_size = p[H1_HEADER_SIZE_OFFSET] + 2;
889         headersum = p[H1_HEADER_SUM_OFFSET];
890         /* Note: An extended header size is included in a compsize. */
891         lha->compsize = archive_le32dec(p + H1_COMP_SIZE_OFFSET);
892         lha->origsize = archive_le32dec(p + H1_ORIG_SIZE_OFFSET);
893         lha->mtime = lha_dos_time(p + H1_DOS_TIME_OFFSET);
894         namelen = p[H1_NAME_LEN_OFFSET];
895         /* Calculate a padding size. The result will be normally 0 only(?) */
896         padding = ((int)lha->header_size) - H1_FIXED_SIZE - namelen;
897
898         if (namelen > 230 || padding < 0)
899                 goto invalid;
900
901         if ((p = __archive_read_ahead(a, lha->header_size, NULL)) == NULL)
902                 return (truncated_error(a));
903
904         for (i = 0; i < namelen; i++) {
905                 if (p[i + H1_FILE_NAME_OFFSET] == 0xff)
906                         goto invalid;/* Invalid filename. */
907         }
908         archive_strncpy(&lha->filename, p + H1_FILE_NAME_OFFSET, namelen);
909         lha->crc = archive_le16dec(p + H1_FILE_NAME_OFFSET + namelen);
910         lha->setflag |= CRC_IS_SET;
911
912         sum_calculated = lha_calcsum(0, p, 2, lha->header_size - 2);
913         /* Consume used bytes but not include `next header size' data
914          * since it will be consumed in lha_read_file_extended_header(). */
915         __archive_read_consume(a, lha->header_size - 2);
916
917         /* Read extended headers */
918         err2 = lha_read_file_extended_header(a, lha, NULL, 2,
919             (size_t)(lha->compsize + 2), &extdsize);
920         if (err2 < ARCHIVE_WARN)
921                 return (err2);
922         if (err2 < err)
923                 err = err2;
924         /* Get a real compressed file size. */
925         lha->compsize -= extdsize - 2;
926
927         if (sum_calculated != headersum) {
928                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
929                     "LHa header sum error");
930                 return (ARCHIVE_FATAL);
931         }
932         return (err);
933 invalid:
934         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
935             "Invalid LHa header");
936         return (ARCHIVE_FATAL);
937 }
938
939 /*
940  * Header 2 format
941  *
942  * +0              +2               +7                  +11               +15
943  * +---------------+----------------+-------------------+-----------------+
944  * |header size(*1)|compression type|compressed size(*2)|uncompressed size|
945  * +---------------+----------------+-------------------+-----------------+
946  *  <--------------------------------(*1)---------------------------------*
947  *
948  * +15               +19          +20              +21        +23         +24
949  * +-----------------+------------+----------------+----------+-----------+
950  * |data/time(time_t)| 0x20 fixed |header level(=2)|file CRC16|  creator  |
951  * +-----------------+------------+----------------+----------+-----------+
952  * *---------------------------------(*1)---------------------------------*
953  *
954  * +24              +26                 +26+(*3)      +26+(*3)+(*4)
955  * +----------------+-------------------+-------------+-------------------+
956  * |next header size|extended header(*3)| padding(*4) |  compressed data  |
957  * +----------------+-------------------+-------------+-------------------+
958  * *--------------------------(*1)-------------------> <------(*2)------->
959  *
960  */
961 #define H2_HEADER_SIZE_OFFSET   0
962 #define H2_COMP_SIZE_OFFSET     7
963 #define H2_ORIG_SIZE_OFFSET     11
964 #define H2_TIME_OFFSET          15
965 #define H2_CRC_OFFSET           21
966 #define H2_FIXED_SIZE           24
967 static int
968 lha_read_file_header_2(struct archive_read *a, struct lha *lha)
969 {
970         const unsigned char *p;
971         size_t extdsize;
972         int err, padding;
973         uint16_t header_crc;
974
975         if ((p = __archive_read_ahead(a, H2_FIXED_SIZE, NULL)) == NULL)
976                 return (truncated_error(a));
977
978         lha->header_size =archive_le16dec(p + H2_HEADER_SIZE_OFFSET);
979         lha->compsize = archive_le32dec(p + H2_COMP_SIZE_OFFSET);
980         lha->origsize = archive_le32dec(p + H2_ORIG_SIZE_OFFSET);
981         lha->mtime = archive_le32dec(p + H2_TIME_OFFSET);
982         lha->crc = archive_le16dec(p + H2_CRC_OFFSET);
983         lha->setflag |= CRC_IS_SET;
984
985         if (lha->header_size < H2_FIXED_SIZE) {
986                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
987                     "Invalid LHa header size");
988                 return (ARCHIVE_FATAL);
989         }
990
991         header_crc = lha_crc16(0, p, H2_FIXED_SIZE);
992         __archive_read_consume(a, H2_FIXED_SIZE);
993
994         /* Read extended headers */
995         err = lha_read_file_extended_header(a, lha, &header_crc, 2,
996                   lha->header_size - H2_FIXED_SIZE, &extdsize);
997         if (err < ARCHIVE_WARN)
998                 return (err);
999
1000         /* Calculate a padding size. The result will be normally 0 or 1. */
1001         padding = (int)lha->header_size - (int)(H2_FIXED_SIZE + extdsize);
1002         if (padding > 0) {
1003                 if ((p = __archive_read_ahead(a, padding, NULL)) == NULL)
1004                         return (truncated_error(a));
1005                 header_crc = lha_crc16(header_crc, p, padding);
1006                 __archive_read_consume(a, padding);
1007         }
1008
1009         if (header_crc != lha->header_crc) {
1010                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1011                     "LHa header CRC error");
1012                 return (ARCHIVE_FATAL);
1013         }
1014         return (err);
1015 }
1016
1017 /*
1018  * Header 3 format
1019  *
1020  * +0           +2               +7                  +11               +15
1021  * +------------+----------------+-------------------+-----------------+
1022  * | 0x04 fixed |compression type|compressed size(*2)|uncompressed size|
1023  * +------------+----------------+-------------------+-----------------+
1024  *  <-------------------------------(*1)-------------------------------*
1025  *
1026  * +15               +19          +20              +21        +23         +24
1027  * +-----------------+------------+----------------+----------+-----------+
1028  * |date/time(time_t)| 0x20 fixed |header level(=3)|file CRC16|  creator  |
1029  * +-----------------+------------+----------------+----------+-----------+
1030  * *--------------------------------(*1)----------------------------------*
1031  *
1032  * +24             +28              +32                 +32+(*3)
1033  * +---------------+----------------+-------------------+-----------------+
1034  * |header size(*1)|next header size|extended header(*3)| compressed data |
1035  * +---------------+----------------+-------------------+-----------------+
1036  * *------------------------(*1)-----------------------> <------(*2)----->
1037  *
1038  */
1039 #define H3_FIELD_LEN_OFFSET     0
1040 #define H3_COMP_SIZE_OFFSET     7
1041 #define H3_ORIG_SIZE_OFFSET     11
1042 #define H3_TIME_OFFSET          15
1043 #define H3_CRC_OFFSET           21
1044 #define H3_HEADER_SIZE_OFFSET   24
1045 #define H3_FIXED_SIZE           28
1046 static int
1047 lha_read_file_header_3(struct archive_read *a, struct lha *lha)
1048 {
1049         const unsigned char *p;
1050         size_t extdsize;
1051         int err;
1052         uint16_t header_crc;
1053
1054         if ((p = __archive_read_ahead(a, H3_FIXED_SIZE, NULL)) == NULL)
1055                 return (truncated_error(a));
1056
1057         if (archive_le16dec(p + H3_FIELD_LEN_OFFSET) != 4)
1058                 goto invalid;
1059         lha->header_size =archive_le32dec(p + H3_HEADER_SIZE_OFFSET);
1060         lha->compsize = archive_le32dec(p + H3_COMP_SIZE_OFFSET);
1061         lha->origsize = archive_le32dec(p + H3_ORIG_SIZE_OFFSET);
1062         lha->mtime = archive_le32dec(p + H3_TIME_OFFSET);
1063         lha->crc = archive_le16dec(p + H3_CRC_OFFSET);
1064         lha->setflag |= CRC_IS_SET;
1065
1066         if (lha->header_size < H3_FIXED_SIZE + 4)
1067                 goto invalid;
1068         header_crc = lha_crc16(0, p, H3_FIXED_SIZE);
1069         __archive_read_consume(a, H3_FIXED_SIZE);
1070
1071         /* Read extended headers */
1072         err = lha_read_file_extended_header(a, lha, &header_crc, 4,
1073                   lha->header_size - H3_FIXED_SIZE, &extdsize);
1074         if (err < ARCHIVE_WARN)
1075                 return (err);
1076
1077         if (header_crc != lha->header_crc) {
1078                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1079                     "LHa header CRC error");
1080                 return (ARCHIVE_FATAL);
1081         }
1082         return (err);
1083 invalid:
1084         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1085             "Invalid LHa header");
1086         return (ARCHIVE_FATAL);
1087 }
1088
1089 /*
1090  * Extended header format
1091  *
1092  * +0             +2        +3  -- used in header 1 and 2
1093  * +0             +4        +5  -- used in header 3
1094  * +--------------+---------+-------------------+--------------+--
1095  * |ex-header size|header id|        data       |ex-header size| .......
1096  * +--------------+---------+-------------------+--------------+--
1097  *  <-------------( ex-header size)------------> <-- next extended header --*
1098  *
1099  * If the ex-header size is zero, it is the make of the end of extended
1100  * headers.
1101  *
1102  */
1103 static int
1104 lha_read_file_extended_header(struct archive_read *a, struct lha *lha,
1105     uint16_t *crc, int sizefield_length, size_t limitsize, size_t *total_size)
1106 {
1107         const void *h;
1108         const unsigned char *extdheader;
1109         size_t  extdsize;
1110         size_t  datasize;
1111         unsigned int i;
1112         unsigned char extdtype;
1113
1114 #define EXT_HEADER_CRC          0x00            /* Header CRC and information*/
1115 #define EXT_FILENAME            0x01            /* Filename                 */
1116 #define EXT_DIRECTORY           0x02            /* Directory name           */
1117 #define EXT_DOS_ATTR            0x40            /* MS-DOS attribute         */
1118 #define EXT_TIMESTAMP           0x41            /* Windows time stamp       */
1119 #define EXT_FILESIZE            0x42            /* Large file size          */
1120 #define EXT_TIMEZONE            0x43            /* Time zone                */
1121 #define EXT_UTF16_FILENAME      0x44            /* UTF-16 filename          */
1122 #define EXT_UTF16_DIRECTORY     0x45            /* UTF-16 directory name    */
1123 #define EXT_CODEPAGE            0x46            /* Codepage                 */
1124 #define EXT_UNIX_MODE           0x50            /* File permission          */
1125 #define EXT_UNIX_GID_UID        0x51            /* gid,uid                  */
1126 #define EXT_UNIX_GNAME          0x52            /* Group name               */
1127 #define EXT_UNIX_UNAME          0x53            /* User name                */
1128 #define EXT_UNIX_MTIME          0x54            /* Modified time            */
1129 #define EXT_OS2_NEW_ATTR        0x7f            /* new attribute(OS/2 only) */
1130 #define EXT_NEW_ATTR            0xff            /* new attribute            */
1131
1132         *total_size = sizefield_length;
1133
1134         for (;;) {
1135                 /* Read an extended header size. */
1136                 if ((h =
1137                     __archive_read_ahead(a, sizefield_length, NULL)) == NULL)
1138                         return (truncated_error(a));
1139                 /* Check if the size is the zero indicates the end of the
1140                  * extended header. */
1141                 if (sizefield_length == sizeof(uint16_t))
1142                         extdsize = archive_le16dec(h);
1143                 else
1144                         extdsize = archive_le32dec(h);
1145                 if (extdsize == 0) {
1146                         /* End of extended header */
1147                         if (crc != NULL)
1148                                 *crc = lha_crc16(*crc, h, sizefield_length);
1149                         __archive_read_consume(a, sizefield_length);
1150                         return (ARCHIVE_OK);
1151                 }
1152
1153                 /* Sanity check to the extended header size. */
1154                 if (((uint64_t)*total_size + extdsize) >
1155                                     (uint64_t)limitsize ||
1156                     extdsize <= (size_t)sizefield_length)
1157                         goto invalid;
1158
1159                 /* Read the extended header. */
1160                 if ((h = __archive_read_ahead(a, extdsize, NULL)) == NULL)
1161                         return (truncated_error(a));
1162                 *total_size += extdsize;
1163
1164                 extdheader = (const unsigned char *)h;
1165                 /* Get the extended header type. */
1166                 extdtype = extdheader[sizefield_length];
1167                 /* Calculate an extended data size. */
1168                 datasize = extdsize - (1 + sizefield_length);
1169                 /* Skip an extended header size field and type field. */
1170                 extdheader += sizefield_length + 1;
1171
1172                 if (crc != NULL && extdtype != EXT_HEADER_CRC)
1173                         *crc = lha_crc16(*crc, h, extdsize);
1174                 switch (extdtype) {
1175                 case EXT_HEADER_CRC:
1176                         /* We only use a header CRC. Following data will not
1177                          * be used. */
1178                         if (datasize >= 2) {
1179                                 lha->header_crc = archive_le16dec(extdheader);
1180                                 if (crc != NULL) {
1181                                         static const char zeros[2] = {0, 0};
1182                                         *crc = lha_crc16(*crc, h,
1183                                             extdsize - datasize);
1184                                         /* CRC value itself as zero */
1185                                         *crc = lha_crc16(*crc, zeros, 2);
1186                                         *crc = lha_crc16(*crc,
1187                                             extdheader+2, datasize - 2);
1188                                 }
1189                         }
1190                         break;
1191                 case EXT_FILENAME:
1192                         if (datasize == 0) {
1193                                 /* maybe directory header */
1194                                 archive_string_empty(&lha->filename);
1195                                 break;
1196                         }
1197                         if (extdheader[0] == '\0')
1198                                 goto invalid;
1199                         archive_strncpy(&lha->filename,
1200                             (const char *)extdheader, datasize);
1201                         break;
1202                 case EXT_DIRECTORY:
1203                         if (datasize == 0 || extdheader[0] == '\0')
1204                                 /* no directory name data. exit this case. */
1205                                 goto invalid;
1206
1207                         archive_strncpy(&lha->dirname,
1208                             (const char *)extdheader, datasize);
1209                         /*
1210                          * Convert directory delimiter from 0xFF
1211                          * to '/' for local system.
1212                          */
1213                         for (i = 0; i < lha->dirname.length; i++) {
1214                                 if ((unsigned char)lha->dirname.s[i] == 0xFF)
1215                                         lha->dirname.s[i] = '/';
1216                         }
1217                         /* Is last character directory separator? */
1218                         if (lha->dirname.s[lha->dirname.length-1] != '/')
1219                                 /* invalid directory data */
1220                                 goto invalid;
1221                         break;
1222                 case EXT_DOS_ATTR:
1223                         if (datasize == 2)
1224                                 lha->dos_attr = (unsigned char)
1225                                     (archive_le16dec(extdheader) & 0xff);
1226                         break;
1227                 case EXT_TIMESTAMP:
1228                         if (datasize == (sizeof(uint64_t) * 3)) {
1229                                 lha->birthtime = lha_win_time(
1230                                     archive_le64dec(extdheader),
1231                                     &lha->birthtime_tv_nsec);
1232                                 extdheader += sizeof(uint64_t);
1233                                 lha->mtime = lha_win_time(
1234                                     archive_le64dec(extdheader),
1235                                     &lha->mtime_tv_nsec);
1236                                 extdheader += sizeof(uint64_t);
1237                                 lha->atime = lha_win_time(
1238                                     archive_le64dec(extdheader),
1239                                     &lha->atime_tv_nsec);
1240                                 lha->setflag |= BIRTHTIME_IS_SET |
1241                                     ATIME_IS_SET;
1242                         }
1243                         break;
1244                 case EXT_FILESIZE:
1245                         if (datasize == sizeof(uint64_t) * 2) {
1246                                 lha->compsize = archive_le64dec(extdheader);
1247                                 extdheader += sizeof(uint64_t);
1248                                 lha->origsize = archive_le64dec(extdheader);
1249                         }
1250                         break;
1251                 case EXT_CODEPAGE:
1252                         /* Get an archived filename charset from codepage.
1253                          * This overwrites the charset specified by
1254                          * hdrcharset option. */
1255                         if (datasize == sizeof(uint32_t)) {
1256                                 struct archive_string cp;
1257                                 const char *charset;
1258
1259                                 archive_string_init(&cp);
1260                                 switch (archive_le32dec(extdheader)) {
1261                                 case 65001: /* UTF-8 */
1262                                         charset = "UTF-8";
1263                                         break;
1264                                 default:
1265                                         archive_string_sprintf(&cp, "CP%d",
1266                                             (int)archive_le32dec(extdheader));
1267                                         charset = cp.s;
1268                                         break;
1269                                 }
1270                                 lha->sconv =
1271                                     archive_string_conversion_from_charset(
1272                                         &(a->archive), charset, 1);
1273                                 archive_string_free(&cp);
1274                                 if (lha->sconv == NULL)
1275                                         return (ARCHIVE_FATAL);
1276                         }
1277                         break;
1278                 case EXT_UNIX_MODE:
1279                         if (datasize == sizeof(uint16_t)) {
1280                                 lha->mode = archive_le16dec(extdheader);
1281                                 lha->setflag |= UNIX_MODE_IS_SET;
1282                         }
1283                         break;
1284                 case EXT_UNIX_GID_UID:
1285                         if (datasize == (sizeof(uint16_t) * 2)) {
1286                                 lha->gid = archive_le16dec(extdheader);
1287                                 lha->uid = archive_le16dec(extdheader+2);
1288                         }
1289                         break;
1290                 case EXT_UNIX_GNAME:
1291                         if (datasize > 0)
1292                                 archive_strncpy(&lha->gname,
1293                                     (const char *)extdheader, datasize);
1294                         break;
1295                 case EXT_UNIX_UNAME:
1296                         if (datasize > 0)
1297                                 archive_strncpy(&lha->uname,
1298                                     (const char *)extdheader, datasize);
1299                         break;
1300                 case EXT_UNIX_MTIME:
1301                         if (datasize == sizeof(uint32_t))
1302                                 lha->mtime = archive_le32dec(extdheader);
1303                         break;
1304                 case EXT_OS2_NEW_ATTR:
1305                         /* This extended header is OS/2 depend. */
1306                         if (datasize == 16) {
1307                                 lha->dos_attr = (unsigned char)
1308                                     (archive_le16dec(extdheader) & 0xff);
1309                                 lha->mode = archive_le16dec(extdheader+2);
1310                                 lha->gid = archive_le16dec(extdheader+4);
1311                                 lha->uid = archive_le16dec(extdheader+6);
1312                                 lha->birthtime = archive_le32dec(extdheader+8);
1313                                 lha->atime = archive_le32dec(extdheader+12);
1314                                 lha->setflag |= UNIX_MODE_IS_SET
1315                                     | BIRTHTIME_IS_SET | ATIME_IS_SET;
1316                         }
1317                         break;
1318                 case EXT_NEW_ATTR:
1319                         if (datasize == 20) {
1320                                 lha->mode = (mode_t)archive_le32dec(extdheader);
1321                                 lha->gid = archive_le32dec(extdheader+4);
1322                                 lha->uid = archive_le32dec(extdheader+8);
1323                                 lha->birthtime = archive_le32dec(extdheader+12);
1324                                 lha->atime = archive_le32dec(extdheader+16);
1325                                 lha->setflag |= UNIX_MODE_IS_SET
1326                                     | BIRTHTIME_IS_SET | ATIME_IS_SET;
1327                         }
1328                         break;
1329                 case EXT_TIMEZONE:              /* Not supported */
1330                 case EXT_UTF16_FILENAME:        /* Not supported */
1331                 case EXT_UTF16_DIRECTORY:       /* Not supported */
1332                 default:
1333                         break;
1334                 }
1335
1336                 __archive_read_consume(a, extdsize);
1337         }
1338 invalid:
1339         archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1340             "Invalid extended LHa header");
1341         return (ARCHIVE_FATAL);
1342 }
1343
1344 static int
1345 lha_end_of_entry(struct archive_read *a)
1346 {
1347         struct lha *lha = (struct lha *)(a->format->data);
1348         int r = ARCHIVE_EOF;
1349
1350         if (!lha->end_of_entry_cleanup) {
1351                 if ((lha->setflag & CRC_IS_SET) &&
1352                     lha->crc != lha->entry_crc_calculated) {
1353                         archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1354                             "LHa data CRC error");
1355                         r = ARCHIVE_WARN;
1356                 }
1357
1358                 /* End-of-entry cleanup done. */
1359                 lha->end_of_entry_cleanup = 1;
1360         }
1361         return (r);
1362 }
1363
1364 static int
1365 archive_read_format_lha_read_data(struct archive_read *a,
1366     const void **buff, size_t *size, int64_t *offset)
1367 {
1368         struct lha *lha = (struct lha *)(a->format->data);
1369         int r;
1370
1371         if (lha->entry_unconsumed) {
1372                 /* Consume as much as the decompressor actually used. */
1373                 __archive_read_consume(a, lha->entry_unconsumed);
1374                 lha->entry_unconsumed = 0;
1375         }
1376         if (lha->end_of_entry) {
1377                 *offset = lha->entry_offset;
1378                 *size = 0;
1379                 *buff = NULL;
1380                 return (lha_end_of_entry(a));
1381         }
1382
1383         if (lha->entry_is_compressed)
1384                 r =  lha_read_data_lzh(a, buff, size, offset);
1385         else
1386                 /* No compression. */
1387                 r =  lha_read_data_none(a, buff, size, offset);
1388         return (r);
1389 }
1390
1391 /*
1392  * Read a file content in no compression.
1393  *
1394  * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
1395  * lha->end_of_entry if it consumes all of the data.
1396  */
1397 static int
1398 lha_read_data_none(struct archive_read *a, const void **buff,
1399     size_t *size, int64_t *offset)
1400 {
1401         struct lha *lha = (struct lha *)(a->format->data);
1402         ssize_t bytes_avail;
1403
1404         if (lha->entry_bytes_remaining == 0) {
1405                 *buff = NULL;
1406                 *size = 0;
1407                 *offset = lha->entry_offset;
1408                 lha->end_of_entry = 1;
1409                 return (ARCHIVE_OK);
1410         }
1411         /*
1412          * Note: '1' here is a performance optimization.
1413          * Recall that the decompression layer returns a count of
1414          * available bytes; asking for more than that forces the
1415          * decompressor to combine reads by copying data.
1416          */
1417         *buff = __archive_read_ahead(a, 1, &bytes_avail);
1418         if (bytes_avail <= 0) {
1419                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1420                     "Truncated LHa file data");
1421                 return (ARCHIVE_FATAL);
1422         }
1423         if (bytes_avail > lha->entry_bytes_remaining)
1424                 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1425         lha->entry_crc_calculated =
1426             lha_crc16(lha->entry_crc_calculated, *buff, bytes_avail);
1427         *size = bytes_avail;
1428         *offset = lha->entry_offset;
1429         lha->entry_offset += bytes_avail;
1430         lha->entry_bytes_remaining -= bytes_avail;
1431         if (lha->entry_bytes_remaining == 0)
1432                 lha->end_of_entry = 1;
1433         lha->entry_unconsumed = bytes_avail;
1434         return (ARCHIVE_OK);
1435 }
1436
1437 /*
1438  * Read a file content in LZHUFF encoding.
1439  *
1440  * Returns ARCHIVE_OK if successful, returns ARCHIVE_WARN if compression is
1441  * unsupported, ARCHIVE_FATAL otherwise, sets lha->end_of_entry if it consumes
1442  * all of the data.
1443  */
1444 static int
1445 lha_read_data_lzh(struct archive_read *a, const void **buff,
1446     size_t *size, int64_t *offset)
1447 {
1448         struct lha *lha = (struct lha *)(a->format->data);
1449         ssize_t bytes_avail;
1450         int r;
1451
1452         /* If we haven't yet read any data, initialize the decompressor. */
1453         if (!lha->decompress_init) {
1454                 r = lzh_decode_init(&(lha->strm), lha->method);
1455                 switch (r) {
1456                 case ARCHIVE_OK:
1457                         break;
1458                 case ARCHIVE_FAILED:
1459                         /* Unsupported compression. */
1460                         *buff = NULL;
1461                         *size = 0;
1462                         *offset = 0;
1463                         archive_set_error(&a->archive,
1464                             ARCHIVE_ERRNO_FILE_FORMAT,
1465                             "Unsupported lzh compression method -%c%c%c-",
1466                             lha->method[0], lha->method[1], lha->method[2]);
1467                         /* We know compressed size; just skip it. */
1468                         archive_read_format_lha_read_data_skip(a);
1469                         return (ARCHIVE_WARN);
1470                 default:
1471                         archive_set_error(&a->archive, ENOMEM,
1472                             "Couldn't allocate memory "
1473                             "for lzh decompression");
1474                         return (ARCHIVE_FATAL);
1475                 }
1476                 /* We've initialized decompression for this stream. */
1477                 lha->decompress_init = 1;
1478                 lha->strm.avail_out = 0;
1479                 lha->strm.total_out = 0;
1480         }
1481
1482         /*
1483          * Note: '1' here is a performance optimization.
1484          * Recall that the decompression layer returns a count of
1485          * available bytes; asking for more than that forces the
1486          * decompressor to combine reads by copying data.
1487          */
1488         lha->strm.next_in = __archive_read_ahead(a, 1, &bytes_avail);
1489         if (bytes_avail <= 0) {
1490                 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1491                     "Truncated LHa file body");
1492                 return (ARCHIVE_FATAL);
1493         }
1494         if (bytes_avail > lha->entry_bytes_remaining)
1495                 bytes_avail = (ssize_t)lha->entry_bytes_remaining;
1496
1497         lha->strm.avail_in = (int)bytes_avail;
1498         lha->strm.total_in = 0;
1499         lha->strm.avail_out = 0;
1500
1501         r = lzh_decode(&(lha->strm), bytes_avail == lha->entry_bytes_remaining);
1502         switch (r) {
1503         case ARCHIVE_OK:
1504                 break;
1505         case ARCHIVE_EOF:
1506                 lha->end_of_entry = 1;
1507                 break;
1508         default:
1509                 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1510                     "Bad lzh data");
1511                 return (ARCHIVE_FAILED);
1512         }
1513         lha->entry_unconsumed = lha->strm.total_in;
1514         lha->entry_bytes_remaining -= lha->strm.total_in;
1515
1516         if (lha->strm.avail_out) {
1517                 *offset = lha->entry_offset;
1518                 *size = lha->strm.avail_out;
1519                 *buff = lha->strm.ref_ptr;
1520                 lha->entry_crc_calculated =
1521                     lha_crc16(lha->entry_crc_calculated, *buff, *size);
1522                 lha->entry_offset += *size;
1523         } else {
1524                 *offset = lha->entry_offset;
1525                 *size = 0;
1526                 *buff = NULL;
1527                 if (lha->end_of_entry)
1528                         return (lha_end_of_entry(a));
1529         }
1530         return (ARCHIVE_OK);
1531 }
1532
1533 /*
1534  * Skip a file content.
1535  */
1536 static int
1537 archive_read_format_lha_read_data_skip(struct archive_read *a)
1538 {
1539         struct lha *lha;
1540         int64_t bytes_skipped;
1541
1542         lha = (struct lha *)(a->format->data);
1543
1544         if (lha->entry_unconsumed) {
1545                 /* Consume as much as the decompressor actually used. */
1546                 __archive_read_consume(a, lha->entry_unconsumed);
1547                 lha->entry_unconsumed = 0;
1548         }
1549
1550         /* if we've already read to end of data, we're done. */
1551         if (lha->end_of_entry_cleanup)
1552                 return (ARCHIVE_OK);
1553
1554         /*
1555          * If the length is at the beginning, we can skip the
1556          * compressed data much more quickly.
1557          */
1558         bytes_skipped = __archive_read_consume(a, lha->entry_bytes_remaining);
1559         if (bytes_skipped < 0)
1560                 return (ARCHIVE_FATAL);
1561
1562         /* This entry is finished and done. */
1563         lha->end_of_entry_cleanup = lha->end_of_entry = 1;
1564         return (ARCHIVE_OK);
1565 }
1566
1567 static int
1568 archive_read_format_lha_cleanup(struct archive_read *a)
1569 {
1570         struct lha *lha = (struct lha *)(a->format->data);
1571
1572         lzh_decode_free(&(lha->strm));
1573         archive_string_free(&(lha->dirname));
1574         archive_string_free(&(lha->filename));
1575         archive_string_free(&(lha->uname));
1576         archive_string_free(&(lha->gname));
1577         archive_wstring_free(&(lha->ws));
1578         free(lha);
1579         (a->format->data) = NULL;
1580         return (ARCHIVE_OK);
1581 }
1582
1583 /*
1584  * 'LHa for UNIX' utility has archived a symbolic-link name after
1585  * a pathname with '|' character.
1586  * This function extracts the symbolic-link name from the pathname.
1587  *
1588  * example.
1589  *   1. a symbolic-name is 'aaa/bb/cc'
1590  *   2. a filename is 'xxx/bbb'
1591  *  then a archived pathname is 'xxx/bbb|aaa/bb/cc'
1592  */
1593 static int
1594 lha_parse_linkname(struct archive_string *linkname,
1595     struct archive_string *pathname)
1596 {
1597         char *  linkptr;
1598         size_t  symlen;
1599
1600         linkptr = strchr(pathname->s, '|');
1601         if (linkptr != NULL) {
1602                 symlen = strlen(linkptr + 1);
1603                 archive_strncpy(linkname, linkptr+1, symlen);
1604
1605                 *linkptr = 0;
1606                 pathname->length = strlen(pathname->s);
1607
1608                 return (1);
1609         }
1610         return (0);
1611 }
1612
1613 /* Convert an MSDOS-style date/time into Unix-style time. */
1614 static time_t
1615 lha_dos_time(const unsigned char *p)
1616 {
1617         int msTime, msDate;
1618         struct tm ts;
1619
1620         msTime = archive_le16dec(p);
1621         msDate = archive_le16dec(p+2);
1622
1623         memset(&ts, 0, sizeof(ts));
1624         ts.tm_year = ((msDate >> 9) & 0x7f) + 80;   /* Years since 1900. */
1625         ts.tm_mon = ((msDate >> 5) & 0x0f) - 1;     /* Month number.     */
1626         ts.tm_mday = msDate & 0x1f;                 /* Day of month.     */
1627         ts.tm_hour = (msTime >> 11) & 0x1f;
1628         ts.tm_min = (msTime >> 5) & 0x3f;
1629         ts.tm_sec = (msTime << 1) & 0x3e;
1630         ts.tm_isdst = -1;
1631         return (mktime(&ts));
1632 }
1633
1634 /* Convert an MS-Windows-style date/time into Unix-style time. */
1635 static time_t
1636 lha_win_time(uint64_t wintime, long *ns)
1637 {
1638 #define EPOC_TIME ARCHIVE_LITERAL_ULL(116444736000000000)
1639
1640         if (wintime >= EPOC_TIME) {
1641                 wintime -= EPOC_TIME;   /* 1970-01-01 00:00:00 (UTC) */
1642                 if (ns != NULL)
1643                         *ns = (long)(wintime % 10000000) * 100;
1644                 return (wintime / 10000000);
1645         } else {
1646                 if (ns != NULL)
1647                         *ns = 0;
1648                 return (0);
1649         }
1650 }
1651
1652 static unsigned char
1653 lha_calcsum(unsigned char sum, const void *pp, int offset, size_t size)
1654 {
1655         unsigned char const *p = (unsigned char const *)pp;
1656
1657         p += offset;
1658         for (;size > 0; --size)
1659                 sum += *p++;
1660         return (sum);
1661 }
1662
1663 static uint16_t crc16tbl[2][256];
1664 static void
1665 lha_crc16_init(void)
1666 {
1667         unsigned int i;
1668         static int crc16init = 0;
1669
1670         if (crc16init)
1671                 return;
1672         crc16init = 1;
1673
1674         for (i = 0; i < 256; i++) {
1675                 unsigned int j;
1676                 uint16_t crc = (uint16_t)i;
1677                 for (j = 8; j; j--)
1678                         crc = (crc >> 1) ^ ((crc & 1) * 0xA001);
1679                 crc16tbl[0][i] = crc;
1680         }
1681
1682         for (i = 0; i < 256; i++) {
1683                 crc16tbl[1][i] = (crc16tbl[0][i] >> 8)
1684                         ^ crc16tbl[0][crc16tbl[0][i] & 0xff];
1685         }
1686 }
1687
1688 static uint16_t
1689 lha_crc16(uint16_t crc, const void *pp, size_t len)
1690 {
1691         const unsigned char *p = (const unsigned char *)pp;
1692         const uint16_t *buff;
1693         const union {
1694                 uint32_t i;
1695                 char c[4];
1696         } u = { 0x01020304 };
1697
1698         if (len == 0)
1699                 return crc;
1700
1701         /* Process unaligned address. */
1702         if (((uintptr_t)p) & (uintptr_t)0x1) {
1703                 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1704                 len--;
1705         }
1706         buff = (const uint16_t *)p;
1707         /*
1708          * Modern C compiler such as GCC does not unroll automatically yet
1709          * without unrolling pragma, and Clang is so. So we should
1710          * unroll this loop for its performance.
1711          */
1712         for (;len >= 8; len -= 8) {
1713                 /* This if statement expects compiler optimization will
1714                  * remove the stament which will not be executed. */
1715 #if defined(_MSC_VER) && _MSC_VER >= 1400  /* Visual Studio */
1716 #  define bswap16(x) _byteswap_ushort(x)
1717 #elif (defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8) \
1718       || defined(__clang__)
1719 #  define bswap16(x) __builtin_bswap16(x)
1720 #else
1721 #  define bswap16(x) ((((x) >> 8) & 0xff) | ((x) << 8))
1722 #endif
1723 #define CRC16W  do {    \
1724                 if(u.c[0] == 1) { /* Big endian */              \
1725                         crc ^= bswap16(*buff); buff++;          \
1726                 } else                                          \
1727                         crc ^= *buff++;                         \
1728                 crc = crc16tbl[1][crc & 0xff] ^ crc16tbl[0][crc >> 8];\
1729 } while (0)
1730                 CRC16W;
1731                 CRC16W;
1732                 CRC16W;
1733                 CRC16W;
1734 #undef CRC16W
1735 #undef bswap16
1736         }
1737
1738         p = (const unsigned char *)buff;
1739         for (;len; len--) {
1740                 crc = (crc >> 8) ^ crc16tbl[0][(crc ^ *p++) & 0xff];
1741         }
1742         return crc;
1743 }
1744
1745 /*
1746  * Initialize LZHUF decoder.
1747  *
1748  * Returns ARCHIVE_OK if initialization was successful.
1749  * Returns ARCHIVE_FAILED if method is unsupported.
1750  * Returns ARCHIVE_FATAL if initialization failed; memory allocation
1751  * error occurred.
1752  */
1753 static int
1754 lzh_decode_init(struct lzh_stream *strm, const char *method)
1755 {
1756         struct lzh_dec *ds;
1757         int w_bits, w_size;
1758
1759         if (strm->ds == NULL) {
1760                 strm->ds = calloc(1, sizeof(*strm->ds));
1761                 if (strm->ds == NULL)
1762                         return (ARCHIVE_FATAL);
1763         }
1764         ds = strm->ds;
1765         ds->error = ARCHIVE_FAILED;
1766         if (method == NULL || method[0] != 'l' || method[1] != 'h')
1767                 return (ARCHIVE_FAILED);
1768         switch (method[2]) {
1769         case '5':
1770                 w_bits = 13;/* 8KiB for window */
1771                 break;
1772         case '6':
1773                 w_bits = 15;/* 32KiB for window */
1774                 break;
1775         case '7':
1776                 w_bits = 16;/* 64KiB for window */
1777                 break;
1778         default:
1779                 return (ARCHIVE_FAILED);/* Not supported. */
1780         }
1781         ds->error = ARCHIVE_FATAL;
1782         /* Expand a window size up to 128 KiB for decompressing process
1783          * performance whatever its original window size is. */
1784         ds->w_size = 1U << 17;
1785         ds->w_mask = ds->w_size -1;
1786         if (ds->w_buff == NULL) {
1787                 ds->w_buff = malloc(ds->w_size);
1788                 if (ds->w_buff == NULL)
1789                         return (ARCHIVE_FATAL);
1790         }
1791         w_size = 1U << w_bits;
1792         memset(ds->w_buff + ds->w_size - w_size, 0x20, w_size);
1793         ds->w_pos = 0;
1794         ds->state = 0;
1795         ds->pos_pt_len_size = w_bits + 1;
1796         ds->pos_pt_len_bits = (w_bits == 15 || w_bits == 16)? 5: 4;
1797         ds->literal_pt_len_size = PT_BITLEN_SIZE;
1798         ds->literal_pt_len_bits = 5;
1799         ds->br.cache_buffer = 0;
1800         ds->br.cache_avail = 0;
1801
1802         if (lzh_huffman_init(&(ds->lt), LT_BITLEN_SIZE, 16)
1803             != ARCHIVE_OK)
1804                 return (ARCHIVE_FATAL);
1805         ds->lt.len_bits = 9;
1806         if (lzh_huffman_init(&(ds->pt), PT_BITLEN_SIZE, 16)
1807             != ARCHIVE_OK)
1808                 return (ARCHIVE_FATAL);
1809         ds->error = 0;
1810
1811         return (ARCHIVE_OK);
1812 }
1813
1814 /*
1815  * Release LZHUF decoder.
1816  */
1817 static void
1818 lzh_decode_free(struct lzh_stream *strm)
1819 {
1820
1821         if (strm->ds == NULL)
1822                 return;
1823         free(strm->ds->w_buff);
1824         lzh_huffman_free(&(strm->ds->lt));
1825         lzh_huffman_free(&(strm->ds->pt));
1826         free(strm->ds);
1827         strm->ds = NULL;
1828 }
1829
1830 /*
1831  * Bit stream reader.
1832  */
1833 /* Check that the cache buffer has enough bits. */
1834 #define lzh_br_has(br, n)       ((br)->cache_avail >= n)
1835 /* Get compressed data by bit. */
1836 #define lzh_br_bits(br, n)                              \
1837         (((uint16_t)((br)->cache_buffer >>              \
1838                 ((br)->cache_avail - (n)))) & cache_masks[n])
1839 #define lzh_br_bits_forced(br, n)                       \
1840         (((uint16_t)((br)->cache_buffer <<              \
1841                 ((n) - (br)->cache_avail))) & cache_masks[n])
1842 /* Read ahead to make sure the cache buffer has enough compressed data we
1843  * will use.
1844  *  True  : completed, there is enough data in the cache buffer.
1845  *  False : we met that strm->next_in is empty, we have to get following
1846  *          bytes. */
1847 #define lzh_br_read_ahead_0(strm, br, n)        \
1848         (lzh_br_has(br, (n)) || lzh_br_fillup(strm, br))
1849 /*  True  : the cache buffer has some bits as much as we need.
1850  *  False : there are no enough bits in the cache buffer to be used,
1851  *          we have to get following bytes if we could. */
1852 #define lzh_br_read_ahead(strm, br, n)  \
1853         (lzh_br_read_ahead_0((strm), (br), (n)) || lzh_br_has((br), (n)))
1854
1855 /* Notify how many bits we consumed. */
1856 #define lzh_br_consume(br, n)   ((br)->cache_avail -= (n))
1857 #define lzh_br_unconsume(br, n) ((br)->cache_avail += (n))
1858
1859 static const uint16_t cache_masks[] = {
1860         0x0000, 0x0001, 0x0003, 0x0007,
1861         0x000F, 0x001F, 0x003F, 0x007F,
1862         0x00FF, 0x01FF, 0x03FF, 0x07FF,
1863         0x0FFF, 0x1FFF, 0x3FFF, 0x7FFF,
1864         0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
1865 };
1866
1867 /*
1868  * Shift away used bits in the cache data and fill it up with following bits.
1869  * Call this when cache buffer does not have enough bits you need.
1870  *
1871  * Returns 1 if the cache buffer is full.
1872  * Returns 0 if the cache buffer is not full; input buffer is empty.
1873  */
1874 static int
1875 lzh_br_fillup(struct lzh_stream *strm, struct lzh_br *br)
1876 {
1877         int n = CACHE_BITS - br->cache_avail;
1878
1879         for (;;) {
1880                 const int x = n >> 3;
1881                 if (strm->avail_in >= x) {
1882                         switch (x) {
1883                         case 8:
1884                                 br->cache_buffer =
1885                                     ((uint64_t)strm->next_in[0]) << 56 |
1886                                     ((uint64_t)strm->next_in[1]) << 48 |
1887                                     ((uint64_t)strm->next_in[2]) << 40 |
1888                                     ((uint64_t)strm->next_in[3]) << 32 |
1889                                     ((uint32_t)strm->next_in[4]) << 24 |
1890                                     ((uint32_t)strm->next_in[5]) << 16 |
1891                                     ((uint32_t)strm->next_in[6]) << 8 |
1892                                      (uint32_t)strm->next_in[7];
1893                                 strm->next_in += 8;
1894                                 strm->avail_in -= 8;
1895                                 br->cache_avail += 8 * 8;
1896                                 return (1);
1897                         case 7:
1898                                 br->cache_buffer =
1899                                    (br->cache_buffer << 56) |
1900                                     ((uint64_t)strm->next_in[0]) << 48 |
1901                                     ((uint64_t)strm->next_in[1]) << 40 |
1902                                     ((uint64_t)strm->next_in[2]) << 32 |
1903                                     ((uint32_t)strm->next_in[3]) << 24 |
1904                                     ((uint32_t)strm->next_in[4]) << 16 |
1905                                     ((uint32_t)strm->next_in[5]) << 8 |
1906                                      (uint32_t)strm->next_in[6];
1907                                 strm->next_in += 7;
1908                                 strm->avail_in -= 7;
1909                                 br->cache_avail += 7 * 8;
1910                                 return (1);
1911                         case 6:
1912                                 br->cache_buffer =
1913                                    (br->cache_buffer << 48) |
1914                                     ((uint64_t)strm->next_in[0]) << 40 |
1915                                     ((uint64_t)strm->next_in[1]) << 32 |
1916                                     ((uint32_t)strm->next_in[2]) << 24 |
1917                                     ((uint32_t)strm->next_in[3]) << 16 |
1918                                     ((uint32_t)strm->next_in[4]) << 8 |
1919                                      (uint32_t)strm->next_in[5];
1920                                 strm->next_in += 6;
1921                                 strm->avail_in -= 6;
1922                                 br->cache_avail += 6 * 8;
1923                                 return (1);
1924                         case 0:
1925                                 /* We have enough compressed data in
1926                                  * the cache buffer.*/
1927                                 return (1);
1928                         default:
1929                                 break;
1930                         }
1931                 }
1932                 if (strm->avail_in == 0) {
1933                         /* There is not enough compressed data to fill up the
1934                          * cache buffer. */
1935                         return (0);
1936                 }
1937                 br->cache_buffer =
1938                    (br->cache_buffer << 8) | *strm->next_in++;
1939                 strm->avail_in--;
1940                 br->cache_avail += 8;
1941                 n -= 8;
1942         }
1943 }
1944
1945 /*
1946  * Decode LZHUF.
1947  *
1948  * 1. Returns ARCHIVE_OK if output buffer or input buffer are empty.
1949  *    Please set available buffer and call this function again.
1950  * 2. Returns ARCHIVE_EOF if decompression has been completed.
1951  * 3. Returns ARCHIVE_FAILED if an error occurred; compressed data
1952  *    is broken or you do not set 'last' flag properly.
1953  * 4. 'last' flag is very important, you must set 1 to the flag if there
1954  *    is no input data. The lha compressed data format does not provide how
1955  *    to know the compressed data is really finished.
1956  *    Note: lha command utility check if the total size of output bytes is
1957  *    reached the uncompressed size recorded in its header. it does not mind
1958  *    that the decoding process is properly finished.
1959  *    GNU ZIP can decompress another compressed file made by SCO LZH compress.
1960  *    it handles EOF as null to fill read buffer with zero until the decoding
1961  *    process meet 2 bytes of zeros at reading a size of a next chunk, so the
1962  *    zeros are treated as the mark of the end of the data although the zeros
1963  *    is dummy, not the file data.
1964  */
1965 static int      lzh_read_blocks(struct lzh_stream *, int);
1966 static int      lzh_decode_blocks(struct lzh_stream *, int);
1967 #define ST_RD_BLOCK             0
1968 #define ST_RD_PT_1              1
1969 #define ST_RD_PT_2              2
1970 #define ST_RD_PT_3              3
1971 #define ST_RD_PT_4              4
1972 #define ST_RD_LITERAL_1         5
1973 #define ST_RD_LITERAL_2         6
1974 #define ST_RD_LITERAL_3         7
1975 #define ST_RD_POS_DATA_1        8
1976 #define ST_GET_LITERAL          9
1977 #define ST_GET_POS_1            10
1978 #define ST_GET_POS_2            11
1979 #define ST_COPY_DATA            12
1980
1981 static int
1982 lzh_decode(struct lzh_stream *strm, int last)
1983 {
1984         struct lzh_dec *ds = strm->ds;
1985         int avail_in;
1986         int r;
1987
1988         if (ds->error)
1989                 return (ds->error);
1990
1991         avail_in = strm->avail_in;
1992         do {
1993                 if (ds->state < ST_GET_LITERAL)
1994                         r = lzh_read_blocks(strm, last);
1995                 else
1996                         r = lzh_decode_blocks(strm, last);
1997         } while (r == 100);
1998         strm->total_in += avail_in - strm->avail_in;
1999         return (r);
2000 }
2001
2002 static void
2003 lzh_emit_window(struct lzh_stream *strm, size_t s)
2004 {
2005         strm->ref_ptr = strm->ds->w_buff;
2006         strm->avail_out = (int)s;
2007         strm->total_out += s;
2008 }
2009
2010 static int
2011 lzh_read_blocks(struct lzh_stream *strm, int last)
2012 {
2013         struct lzh_dec *ds = strm->ds;
2014         struct lzh_br *br = &(ds->br);
2015         int c = 0, i;
2016         unsigned rbits;
2017
2018         for (;;) {
2019                 switch (ds->state) {
2020                 case ST_RD_BLOCK:
2021                         /*
2022                          * Read a block number indicates how many blocks
2023                          * we will handle. The block is composed of a
2024                          * literal and a match, sometimes a literal only
2025                          * in particular, there are no reference data at
2026                          * the beginning of the decompression.
2027                          */
2028                         if (!lzh_br_read_ahead_0(strm, br, 16)) {
2029                                 if (!last)
2030                                         /* We need following data. */
2031                                         return (ARCHIVE_OK);
2032                                 if (lzh_br_has(br, 8)) {
2033                                         /*
2034                                          * It seems there are extra bits.
2035                                          *  1. Compressed data is broken.
2036                                          *  2. `last' flag does not properly
2037                                          *     set.
2038                                          */
2039                                         goto failed;
2040                                 }
2041                                 if (ds->w_pos > 0) {
2042                                         lzh_emit_window(strm, ds->w_pos);
2043                                         ds->w_pos = 0;
2044                                         return (ARCHIVE_OK);
2045                                 }
2046                                 /* End of compressed data; we have completely
2047                                  * handled all compressed data. */
2048                                 return (ARCHIVE_EOF);
2049                         }
2050                         ds->blocks_avail = lzh_br_bits(br, 16);
2051                         if (ds->blocks_avail == 0)
2052                                 goto failed;
2053                         lzh_br_consume(br, 16);
2054                         /*
2055                          * Read a literal table compressed in huffman
2056                          * coding.
2057                          */
2058                         ds->pt.len_size = ds->literal_pt_len_size;
2059                         ds->pt.len_bits = ds->literal_pt_len_bits;
2060                         ds->reading_position = 0;
2061                         /* FALL THROUGH */
2062                 case ST_RD_PT_1:
2063                         /* Note: ST_RD_PT_1, ST_RD_PT_2 and ST_RD_PT_4 are
2064                          * used in reading both a literal table and a
2065                          * position table. */
2066                         if (!lzh_br_read_ahead(strm, br, ds->pt.len_bits)) {
2067                                 if (last)
2068                                         goto failed;/* Truncated data. */
2069                                 ds->state = ST_RD_PT_1;
2070                                 return (ARCHIVE_OK);
2071                         }
2072                         ds->pt.len_avail = lzh_br_bits(br, ds->pt.len_bits);
2073                         lzh_br_consume(br, ds->pt.len_bits);
2074                         /* FALL THROUGH */
2075                 case ST_RD_PT_2:
2076                         if (ds->pt.len_avail == 0) {
2077                                 /* There is no bitlen. */
2078                                 if (!lzh_br_read_ahead(strm, br,
2079                                     ds->pt.len_bits)) {
2080                                         if (last)
2081                                                 goto failed;/* Truncated data.*/
2082                                         ds->state = ST_RD_PT_2;
2083                                         return (ARCHIVE_OK);
2084                                 }
2085                                 if (!lzh_make_fake_table(&(ds->pt),
2086                                     lzh_br_bits(br, ds->pt.len_bits)))
2087                                         goto failed;/* Invalid data. */
2088                                 lzh_br_consume(br, ds->pt.len_bits);
2089                                 if (ds->reading_position)
2090                                         ds->state = ST_GET_LITERAL;
2091                                 else
2092                                         ds->state = ST_RD_LITERAL_1;
2093                                 break;
2094                         } else if (ds->pt.len_avail > ds->pt.len_size)
2095                                 goto failed;/* Invalid data. */
2096                         ds->loop = 0;
2097                         memset(ds->pt.freq, 0, sizeof(ds->pt.freq));
2098                         if (ds->pt.len_avail < 3 ||
2099                             ds->pt.len_size == ds->pos_pt_len_size) {
2100                                 ds->state = ST_RD_PT_4;
2101                                 break;
2102                         }
2103                         /* FALL THROUGH */
2104                 case ST_RD_PT_3:
2105                         ds->loop = lzh_read_pt_bitlen(strm, ds->loop, 3);
2106                         if (ds->loop < 3) {
2107                                 if (ds->loop < 0 || last)
2108                                         goto failed;/* Invalid data. */
2109                                 /* Not completed, get following data. */
2110                                 ds->state = ST_RD_PT_3;
2111                                 return (ARCHIVE_OK);
2112                         }
2113                         /* There are some null in bitlen of the literal. */
2114                         if (!lzh_br_read_ahead(strm, br, 2)) {
2115                                 if (last)
2116                                         goto failed;/* Truncated data. */
2117                                 ds->state = ST_RD_PT_3;
2118                                 return (ARCHIVE_OK);
2119                         }
2120                         c = lzh_br_bits(br, 2);
2121                         lzh_br_consume(br, 2);
2122                         if (c > ds->pt.len_avail - 3)
2123                                 goto failed;/* Invalid data. */
2124                         for (i = 3; c-- > 0 ;)
2125                                 ds->pt.bitlen[i++] = 0;
2126                         ds->loop = i;
2127                         /* FALL THROUGH */
2128                 case ST_RD_PT_4:
2129                         ds->loop = lzh_read_pt_bitlen(strm, ds->loop,
2130                             ds->pt.len_avail);
2131                         if (ds->loop < ds->pt.len_avail) {
2132                                 if (ds->loop < 0 || last)
2133                                         goto failed;/* Invalid data. */
2134                                 /* Not completed, get following data. */
2135                                 ds->state = ST_RD_PT_4;
2136                                 return (ARCHIVE_OK);
2137                         }
2138                         if (!lzh_make_huffman_table(&(ds->pt)))
2139                                 goto failed;/* Invalid data */
2140                         if (ds->reading_position) {
2141                                 ds->state = ST_GET_LITERAL;
2142                                 break;
2143                         }
2144                         /* FALL THROUGH */
2145                 case ST_RD_LITERAL_1:
2146                         if (!lzh_br_read_ahead(strm, br, ds->lt.len_bits)) {
2147                                 if (last)
2148                                         goto failed;/* Truncated data. */
2149                                 ds->state = ST_RD_LITERAL_1;
2150                                 return (ARCHIVE_OK);
2151                         }
2152                         ds->lt.len_avail = lzh_br_bits(br, ds->lt.len_bits);
2153                         lzh_br_consume(br, ds->lt.len_bits);
2154                         /* FALL THROUGH */
2155                 case ST_RD_LITERAL_2:
2156                         if (ds->lt.len_avail == 0) {
2157                                 /* There is no bitlen. */
2158                                 if (!lzh_br_read_ahead(strm, br,
2159                                     ds->lt.len_bits)) {
2160                                         if (last)
2161                                                 goto failed;/* Truncated data.*/
2162                                         ds->state = ST_RD_LITERAL_2;
2163                                         return (ARCHIVE_OK);
2164                                 }
2165                                 if (!lzh_make_fake_table(&(ds->lt),
2166                                     lzh_br_bits(br, ds->lt.len_bits)))
2167                                         goto failed;/* Invalid data */
2168                                 lzh_br_consume(br, ds->lt.len_bits);
2169                                 ds->state = ST_RD_POS_DATA_1;
2170                                 break;
2171                         } else if (ds->lt.len_avail > ds->lt.len_size)
2172                                 goto failed;/* Invalid data */
2173                         ds->loop = 0;
2174                         memset(ds->lt.freq, 0, sizeof(ds->lt.freq));
2175                         /* FALL THROUGH */
2176                 case ST_RD_LITERAL_3:
2177                         i = ds->loop;
2178                         while (i < ds->lt.len_avail) {
2179                                 if (!lzh_br_read_ahead(strm, br,
2180                                     ds->pt.max_bits)) {
2181                                         if (last)
2182                                                 goto failed;/* Truncated data.*/
2183                                         ds->loop = i;
2184                                         ds->state = ST_RD_LITERAL_3;
2185                                         return (ARCHIVE_OK);
2186                                 }
2187                                 rbits = lzh_br_bits(br, ds->pt.max_bits);
2188                                 c = lzh_decode_huffman(&(ds->pt), rbits);
2189                                 if (c > 2) {
2190                                         /* Note: 'c' will never be more than
2191                                          * eighteen since it's limited by
2192                                          * PT_BITLEN_SIZE, which is being set
2193                                          * to ds->pt.len_size through
2194                                          * ds->literal_pt_len_size. */
2195                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2196                                         c -= 2;
2197                                         ds->lt.freq[c]++;
2198                                         ds->lt.bitlen[i++] = c;
2199                                 } else if (c == 0) {
2200                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2201                                         ds->lt.bitlen[i++] = 0;
2202                                 } else {
2203                                         /* c == 1 or c == 2 */
2204                                         int n = (c == 1)?4:9;
2205                                         if (!lzh_br_read_ahead(strm, br,
2206                                              ds->pt.bitlen[c] + n)) {
2207                                                 if (last) /* Truncated data. */
2208                                                         goto failed;
2209                                                 ds->loop = i;
2210                                                 ds->state = ST_RD_LITERAL_3;
2211                                                 return (ARCHIVE_OK);
2212                                         }
2213                                         lzh_br_consume(br, ds->pt.bitlen[c]);
2214                                         c = lzh_br_bits(br, n);
2215                                         lzh_br_consume(br, n);
2216                                         c += (n == 4)?3:20;
2217                                         if (i + c > ds->lt.len_avail)
2218                                                 goto failed;/* Invalid data */
2219                                         memset(&(ds->lt.bitlen[i]), 0, c);
2220                                         i += c;
2221                                 }
2222                         }
2223                         if (i > ds->lt.len_avail ||
2224                             !lzh_make_huffman_table(&(ds->lt)))
2225                                 goto failed;/* Invalid data */
2226                         /* FALL THROUGH */
2227                 case ST_RD_POS_DATA_1:
2228                         /*
2229                          * Read a position table compressed in huffman
2230                          * coding.
2231                          */
2232                         ds->pt.len_size = ds->pos_pt_len_size;
2233                         ds->pt.len_bits = ds->pos_pt_len_bits;
2234                         ds->reading_position = 1;
2235                         ds->state = ST_RD_PT_1;
2236                         break;
2237                 case ST_GET_LITERAL:
2238                         return (100);
2239                 }
2240         }
2241 failed:
2242         return (ds->error = ARCHIVE_FAILED);
2243 }
2244
2245 static int
2246 lzh_decode_blocks(struct lzh_stream *strm, int last)
2247 {
2248         struct lzh_dec *ds = strm->ds;
2249         struct lzh_br bre = ds->br;
2250         struct huffman *lt = &(ds->lt);
2251         struct huffman *pt = &(ds->pt);
2252         unsigned char *w_buff = ds->w_buff;
2253         unsigned char *lt_bitlen = lt->bitlen;
2254         unsigned char *pt_bitlen = pt->bitlen;
2255         int blocks_avail = ds->blocks_avail, c = 0;
2256         int copy_len = ds->copy_len, copy_pos = ds->copy_pos;
2257         int w_pos = ds->w_pos, w_mask = ds->w_mask, w_size = ds->w_size;
2258         int lt_max_bits = lt->max_bits, pt_max_bits = pt->max_bits;
2259         int state = ds->state;
2260
2261         for (;;) {
2262                 switch (state) {
2263                 case ST_GET_LITERAL:
2264                         for (;;) {
2265                                 if (blocks_avail == 0) {
2266                                         /* We have decoded all blocks.
2267                                          * Let's handle next blocks. */
2268                                         ds->state = ST_RD_BLOCK;
2269                                         ds->br = bre;
2270                                         ds->blocks_avail = 0;
2271                                         ds->w_pos = w_pos;
2272                                         ds->copy_pos = 0;
2273                                         return (100);
2274                                 }
2275
2276                                 /* lzh_br_read_ahead() always try to fill the
2277                                  * cache buffer up. In specific situation we
2278                                  * are close to the end of the data, the cache
2279                                  * buffer will not be full and thus we have to
2280                                  * determine if the cache buffer has some bits
2281                                  * as much as we need after lzh_br_read_ahead()
2282                                  * failed. */
2283                                 if (!lzh_br_read_ahead(strm, &bre,
2284                                     lt_max_bits)) {
2285                                         if (!last)
2286                                                 goto next_data;
2287                                         /* Remaining bits are less than
2288                                          * maximum bits(lt.max_bits) but maybe
2289                                          * it still remains as much as we need,
2290                                          * so we should try to use it with
2291                                          * dummy bits. */
2292                                         c = lzh_decode_huffman(lt,
2293                                               lzh_br_bits_forced(&bre,
2294                                                 lt_max_bits));
2295                                         lzh_br_consume(&bre, lt_bitlen[c]);
2296                                         if (!lzh_br_has(&bre, 0))
2297                                                 goto failed;/* Over read. */
2298                                 } else {
2299                                         c = lzh_decode_huffman(lt,
2300                                               lzh_br_bits(&bre, lt_max_bits));
2301                                         lzh_br_consume(&bre, lt_bitlen[c]);
2302                                 }
2303                                 blocks_avail--;
2304                                 if (c > UCHAR_MAX)
2305                                         /* Current block is a match data. */
2306                                         break;
2307                                 /*
2308                                  * 'c' is exactly a literal code.
2309                                  */
2310                                 /* Save a decoded code to reference it
2311                                  * afterward. */
2312                                 w_buff[w_pos] = c;
2313                                 if (++w_pos >= w_size) {
2314                                         w_pos = 0;
2315                                         lzh_emit_window(strm, w_size);
2316                                         goto next_data;
2317                                 }
2318                         }
2319                         /* 'c' is the length of a match pattern we have
2320                          * already extracted, which has be stored in
2321                          * window(ds->w_buff). */
2322                         copy_len = c - (UCHAR_MAX + 1) + MINMATCH;
2323                         /* FALL THROUGH */
2324                 case ST_GET_POS_1:
2325                         /*
2326                          * Get a reference position. 
2327                          */
2328                         if (!lzh_br_read_ahead(strm, &bre, pt_max_bits)) {
2329                                 if (!last) {
2330                                         state = ST_GET_POS_1;
2331                                         ds->copy_len = copy_len;
2332                                         goto next_data;
2333                                 }
2334                                 copy_pos = lzh_decode_huffman(pt,
2335                                     lzh_br_bits_forced(&bre, pt_max_bits));
2336                                 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2337                                 if (!lzh_br_has(&bre, 0))
2338                                         goto failed;/* Over read. */
2339                         } else {
2340                                 copy_pos = lzh_decode_huffman(pt,
2341                                     lzh_br_bits(&bre, pt_max_bits));
2342                                 lzh_br_consume(&bre, pt_bitlen[copy_pos]);
2343                         }
2344                         /* FALL THROUGH */
2345                 case ST_GET_POS_2:
2346                         if (copy_pos > 1) {
2347                                 /* We need an additional adjustment number to
2348                                  * the position. */
2349                                 int p = copy_pos - 1;
2350                                 if (!lzh_br_read_ahead(strm, &bre, p)) {
2351                                         if (last)
2352                                                 goto failed;/* Truncated data.*/
2353                                         state = ST_GET_POS_2;
2354                                         ds->copy_len = copy_len;
2355                                         ds->copy_pos = copy_pos;
2356                                         goto next_data;
2357                                 }
2358                                 copy_pos = (1 << p) + lzh_br_bits(&bre, p);
2359                                 lzh_br_consume(&bre, p);
2360                         }
2361                         /* The position is actually a distance from the last
2362                          * code we had extracted and thus we have to convert
2363                          * it to a position of the window. */
2364                         copy_pos = (w_pos - copy_pos - 1) & w_mask;
2365                         /* FALL THROUGH */
2366                 case ST_COPY_DATA:
2367                         /*
2368                          * Copy `copy_len' bytes as extracted data from
2369                          * the window into the output buffer.
2370                          */
2371                         for (;;) {
2372                                 int l;
2373
2374                                 l = copy_len;
2375                                 if (copy_pos > w_pos) {
2376                                         if (l > w_size - copy_pos)
2377                                                 l = w_size - copy_pos;
2378                                 } else {
2379                                         if (l > w_size - w_pos)
2380                                                 l = w_size - w_pos;
2381                                 }
2382                                 if ((copy_pos + l < w_pos)
2383                                     || (w_pos + l < copy_pos)) {
2384                                         /* No overlap. */
2385                                         memcpy(w_buff + w_pos,
2386                                             w_buff + copy_pos, l);
2387                                 } else {
2388                                         const unsigned char *s;
2389                                         unsigned char *d;
2390                                         int li;
2391
2392                                         d = w_buff + w_pos;
2393                                         s = w_buff + copy_pos;
2394                                         for (li = 0; li < l-1;) {
2395                                                 d[li] = s[li];li++;
2396                                                 d[li] = s[li];li++;
2397                                         }
2398                                         if (li < l)
2399                                                 d[li] = s[li];
2400                                 }
2401                                 w_pos += l;
2402                                 if (w_pos == w_size) {
2403                                         w_pos = 0;
2404                                         lzh_emit_window(strm, w_size);
2405                                         if (copy_len <= l)
2406                                                 state = ST_GET_LITERAL;
2407                                         else {
2408                                                 state = ST_COPY_DATA;
2409                                                 ds->copy_len = copy_len - l;
2410                                                 ds->copy_pos =
2411                                                     (copy_pos + l) & w_mask;
2412                                         }
2413                                         goto next_data;
2414                                 }
2415                                 if (copy_len <= l)
2416                                         /* A copy of current pattern ended. */
2417                                         break;
2418                                 copy_len -= l;
2419                                 copy_pos = (copy_pos + l) & w_mask;
2420                         }
2421                         state = ST_GET_LITERAL;
2422                         break;
2423                 }
2424         }
2425 failed:
2426         return (ds->error = ARCHIVE_FAILED);
2427 next_data:
2428         ds->br = bre;
2429         ds->blocks_avail = blocks_avail;
2430         ds->state = state;
2431         ds->w_pos = w_pos;
2432         return (ARCHIVE_OK);
2433 }
2434
2435 static int
2436 lzh_huffman_init(struct huffman *hf, size_t len_size, int tbl_bits)
2437 {
2438         int bits;
2439
2440         if (hf->bitlen == NULL) {
2441                 hf->bitlen = malloc(len_size * sizeof(hf->bitlen[0]));
2442                 if (hf->bitlen == NULL)
2443                         return (ARCHIVE_FATAL);
2444         }
2445         if (hf->tbl == NULL) {
2446                 if (tbl_bits < HTBL_BITS)
2447                         bits = tbl_bits;
2448                 else
2449                         bits = HTBL_BITS;
2450                 hf->tbl = malloc(((size_t)1 << bits) * sizeof(hf->tbl[0]));
2451                 if (hf->tbl == NULL)
2452                         return (ARCHIVE_FATAL);
2453         }
2454         if (hf->tree == NULL && tbl_bits > HTBL_BITS) {
2455                 hf->tree_avail = 1 << (tbl_bits - HTBL_BITS + 4);
2456                 hf->tree = malloc(hf->tree_avail * sizeof(hf->tree[0]));
2457                 if (hf->tree == NULL)
2458                         return (ARCHIVE_FATAL);
2459         }
2460         hf->len_size = (int)len_size;
2461         hf->tbl_bits = tbl_bits;
2462         return (ARCHIVE_OK);
2463 }
2464
2465 static void
2466 lzh_huffman_free(struct huffman *hf)
2467 {
2468         free(hf->bitlen);
2469         free(hf->tbl);
2470         free(hf->tree);
2471 }
2472
2473 static char bitlen_tbl[0x400] = {
2474          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2475          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2476          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2477          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2478          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2479          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2480          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2481          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2482          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2483          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2484          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2485          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2486          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2487          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2488          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2489          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2490          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2491          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2492          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2493          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2494          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2495          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2496          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2497          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2498          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2499          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2500          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2501          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2502          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2503          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2504          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2505          7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
2506          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2507          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2508          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2509          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2510          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2511          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2512          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2513          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2514          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2515          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2516          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2517          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2518          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2519          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2520          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2521          8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
2522          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2523          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2524          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2525          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2526          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2527          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2528          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2529          9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
2530         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2531         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2532         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2533         10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2534         11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2535         11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
2536         12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
2537         13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 16,  0
2538 };
2539 static int
2540 lzh_read_pt_bitlen(struct lzh_stream *strm, int start, int end)
2541 {
2542         struct lzh_dec *ds = strm->ds;
2543         struct lzh_br *br = &(ds->br);
2544         int c, i;
2545
2546         for (i = start; i < end; ) {
2547                 /*
2548                  *  bit pattern     the number we need
2549                  *     000           ->  0
2550                  *     001           ->  1
2551                  *     010           ->  2
2552                  *     ...
2553                  *     110           ->  6
2554                  *     1110          ->  7
2555                  *     11110         ->  8
2556                  *     ...
2557                  *     1111111111110 ->  16
2558                  */
2559                 if (!lzh_br_read_ahead(strm, br, 3))
2560                         return (i);
2561                 if ((c = lzh_br_bits(br, 3)) == 7) {
2562                         if (!lzh_br_read_ahead(strm, br, 13))
2563                                 return (i);
2564                         c = bitlen_tbl[lzh_br_bits(br, 13) & 0x3FF];
2565                         if (c)
2566                                 lzh_br_consume(br, c - 3);
2567                         else
2568                                 return (-1);/* Invalid data. */
2569                 } else
2570                         lzh_br_consume(br, 3);
2571                 ds->pt.bitlen[i++] = c;
2572                 ds->pt.freq[c]++;
2573         }
2574         return (i);
2575 }
2576
2577 static int
2578 lzh_make_fake_table(struct huffman *hf, uint16_t c)
2579 {
2580         if (c >= hf->len_size)
2581                 return (0);
2582         hf->tbl[0] = c;
2583         hf->max_bits = 0;
2584         hf->shift_bits = 0;
2585         hf->bitlen[hf->tbl[0]] = 0;
2586         return (1);
2587 }
2588
2589 /*
2590  * Make a huffman coding table.
2591  */
2592 static int
2593 lzh_make_huffman_table(struct huffman *hf)
2594 {
2595         uint16_t *tbl;
2596         const unsigned char *bitlen;
2597         int bitptn[17], weight[17];
2598         int i, maxbits = 0, ptn, tbl_size, w;
2599         int diffbits, len_avail;
2600
2601         /*
2602          * Initialize bit patterns.
2603          */
2604         ptn = 0;
2605         for (i = 1, w = 1 << 15; i <= 16; i++, w >>= 1) {
2606                 bitptn[i] = ptn;
2607                 weight[i] = w;
2608                 if (hf->freq[i]) {
2609                         ptn += hf->freq[i] * w;
2610                         maxbits = i;
2611                 }
2612         }
2613         if (ptn != 0x10000 || maxbits > hf->tbl_bits)
2614                 return (0);/* Invalid */
2615
2616         hf->max_bits = maxbits;
2617
2618         /*
2619          * Cut out extra bits which we won't house in the table.
2620          * This preparation reduces the same calculation in the for-loop
2621          * making the table.
2622          */
2623         if (maxbits < 16) {
2624                 int ebits = 16 - maxbits;
2625                 for (i = 1; i <= maxbits; i++) {
2626                         bitptn[i] >>= ebits;
2627                         weight[i] >>= ebits;
2628                 }
2629         }
2630         if (maxbits > HTBL_BITS) {
2631                 unsigned htbl_max;
2632                 uint16_t *p;
2633
2634                 diffbits = maxbits - HTBL_BITS;
2635                 for (i = 1; i <= HTBL_BITS; i++) {
2636                         bitptn[i] >>= diffbits;
2637                         weight[i] >>= diffbits;
2638                 }
2639                 htbl_max = bitptn[HTBL_BITS] +
2640                     weight[HTBL_BITS] * hf->freq[HTBL_BITS];
2641                 p = &(hf->tbl[htbl_max]);
2642                 while (p < &hf->tbl[1U<<HTBL_BITS])
2643                         *p++ = 0;
2644         } else
2645                 diffbits = 0;
2646         hf->shift_bits = diffbits;
2647
2648         /*
2649          * Make the table.
2650          */
2651         tbl_size = 1 << HTBL_BITS;
2652         tbl = hf->tbl;
2653         bitlen = hf->bitlen;
2654         len_avail = hf->len_avail;
2655         hf->tree_used = 0;
2656         for (i = 0; i < len_avail; i++) {
2657                 uint16_t *p;
2658                 int len, cnt;
2659                 uint16_t bit;
2660                 int extlen;
2661                 struct htree_t *ht;
2662
2663                 if (bitlen[i] == 0)
2664                         continue;
2665                 /* Get a bit pattern */
2666                 len = bitlen[i];
2667                 ptn = bitptn[len];
2668                 cnt = weight[len];
2669                 if (len <= HTBL_BITS) {
2670                         /* Calculate next bit pattern */
2671                         if ((bitptn[len] = ptn + cnt) > tbl_size)
2672                                 return (0);/* Invalid */
2673                         /* Update the table */
2674                         p = &(tbl[ptn]);
2675                         if (cnt > 7) {
2676                                 uint16_t *pc;
2677
2678                                 cnt -= 8;
2679                                 pc = &p[cnt];
2680                                 pc[0] = (uint16_t)i;
2681                                 pc[1] = (uint16_t)i;
2682                                 pc[2] = (uint16_t)i;
2683                                 pc[3] = (uint16_t)i;
2684                                 pc[4] = (uint16_t)i;
2685                                 pc[5] = (uint16_t)i;
2686                                 pc[6] = (uint16_t)i;
2687                                 pc[7] = (uint16_t)i;
2688                                 if (cnt > 7) {
2689                                         cnt -= 8;
2690                                         memcpy(&p[cnt], pc,
2691                                                 8 * sizeof(uint16_t));
2692                                         pc = &p[cnt];
2693                                         while (cnt > 15) {
2694                                                 cnt -= 16;
2695                                                 memcpy(&p[cnt], pc,
2696                                                         16 * sizeof(uint16_t));
2697                                         }
2698                                 }
2699                                 if (cnt)
2700                                         memcpy(p, pc, cnt * sizeof(uint16_t));
2701                         } else {
2702                                 while (cnt > 1) {
2703                                         p[--cnt] = (uint16_t)i;
2704                                         p[--cnt] = (uint16_t)i;
2705                                 }
2706                                 if (cnt)
2707                                         p[--cnt] = (uint16_t)i;
2708                         }
2709                         continue;
2710                 }
2711
2712                 /*
2713                  * A bit length is too big to be housed to a direct table,
2714                  * so we use a tree model for its extra bits.
2715                  */
2716                 bitptn[len] = ptn + cnt;
2717                 bit = 1U << (diffbits -1);
2718                 extlen = len - HTBL_BITS;
2719                 
2720                 p = &(tbl[ptn >> diffbits]);
2721                 if (*p == 0) {
2722                         *p = len_avail + hf->tree_used;
2723                         ht = &(hf->tree[hf->tree_used++]);
2724                         if (hf->tree_used > hf->tree_avail)
2725                                 return (0);/* Invalid */
2726                         ht->left = 0;
2727                         ht->right = 0;
2728                 } else {
2729                         if (*p < len_avail ||
2730                             *p >= (len_avail + hf->tree_used))
2731                                 return (0);/* Invalid */
2732                         ht = &(hf->tree[*p - len_avail]);
2733                 }
2734                 while (--extlen > 0) {
2735                         if (ptn & bit) {
2736                                 if (ht->left < len_avail) {
2737                                         ht->left = len_avail + hf->tree_used;
2738                                         ht = &(hf->tree[hf->tree_used++]);
2739                                         if (hf->tree_used > hf->tree_avail)
2740                                                 return (0);/* Invalid */
2741                                         ht->left = 0;
2742                                         ht->right = 0;
2743                                 } else {
2744                                         ht = &(hf->tree[ht->left - len_avail]);
2745                                 }
2746                         } else {
2747                                 if (ht->right < len_avail) {
2748                                         ht->right = len_avail + hf->tree_used;
2749                                         ht = &(hf->tree[hf->tree_used++]);
2750                                         if (hf->tree_used > hf->tree_avail)
2751                                                 return (0);/* Invalid */
2752                                         ht->left = 0;
2753                                         ht->right = 0;
2754                                 } else {
2755                                         ht = &(hf->tree[ht->right - len_avail]);
2756                                 }
2757                         }
2758                         bit >>= 1;
2759                 }
2760                 if (ptn & bit) {
2761                         if (ht->left != 0)
2762                                 return (0);/* Invalid */
2763                         ht->left = (uint16_t)i;
2764                 } else {
2765                         if (ht->right != 0)
2766                                 return (0);/* Invalid */
2767                         ht->right = (uint16_t)i;
2768                 }
2769         }
2770         return (1);
2771 }
2772
2773 static int
2774 lzh_decode_huffman_tree(struct huffman *hf, unsigned rbits, int c)
2775 {
2776         struct htree_t *ht;
2777         int extlen;
2778
2779         ht = hf->tree;
2780         extlen = hf->shift_bits;
2781         while (c >= hf->len_avail) {
2782                 c -= hf->len_avail;
2783                 if (extlen-- <= 0 || c >= hf->tree_used)
2784                         return (0);
2785                 if (rbits & (1U << extlen))
2786                         c = ht[c].left;
2787                 else
2788                         c = ht[c].right;
2789         }
2790         return (c);
2791 }
2792
2793 static inline int
2794 lzh_decode_huffman(struct huffman *hf, unsigned rbits)
2795 {
2796         int c;
2797         /*
2798          * At first search an index table for a bit pattern.
2799          * If it fails, search a huffman tree for.
2800          */
2801         c = hf->tbl[rbits >> hf->shift_bits];
2802         if (c < hf->len_avail || hf->len_avail == 0)
2803                 return (c);
2804         /* This bit pattern needs to be found out at a huffman tree. */
2805         return (lzh_decode_huffman_tree(hf, rbits, c));
2806 }
2807