2 * Copyright (c) 2009 Michihiro NAKAJIMA
3 * Copyright (c) 2003-2008 Tim Kientzle and Miklos Vajna
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "archive_platform.h"
29 __FBSDID("$FreeBSD$");
51 #include "archive_endian.h"
52 #include "archive_private.h"
53 #include "archive_read_private.h"
55 #if HAVE_LZMA_H && HAVE_LIBLZMA
59 unsigned char *out_block;
60 size_t out_block_size;
62 char eof; /* True = found end of compressed data. */
65 /* Combined lzma/xz filter */
66 static ssize_t xz_filter_read(struct archive_read_filter *, const void **);
67 static int xz_filter_close(struct archive_read_filter *);
68 static int xz_lzma_bidder_init(struct archive_read_filter *);
70 #elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
73 lzmadec_stream stream;
74 unsigned char *out_block;
75 size_t out_block_size;
77 char eof; /* True = found end of compressed data. */
80 /* Lzma-only filter */
81 static ssize_t lzma_filter_read(struct archive_read_filter *, const void **);
82 static int lzma_filter_close(struct archive_read_filter *);
86 * Note that we can detect xz and lzma compressed files even if we
87 * can't decompress them. (In fact, we like detecting them because we
88 * can give better error messages.) So the bid framework here gets
89 * compiled even if no lzma library is available.
91 static int xz_bidder_bid(struct archive_read_filter_bidder *,
92 struct archive_read_filter *);
93 static int xz_bidder_init(struct archive_read_filter *);
94 static int lzma_bidder_bid(struct archive_read_filter_bidder *,
95 struct archive_read_filter *);
96 static int lzma_bidder_init(struct archive_read_filter *);
99 archive_read_support_compression_xz(struct archive *_a)
101 struct archive_read *a = (struct archive_read *)_a;
102 struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
104 archive_clear_error(_a);
106 return (ARCHIVE_FATAL);
109 bidder->bid = xz_bidder_bid;
110 bidder->init = xz_bidder_init;
111 bidder->options = NULL;
113 #if HAVE_LZMA_H && HAVE_LIBLZMA
116 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
117 "Using external unxz program for xz decompression");
118 return (ARCHIVE_WARN);
123 archive_read_support_compression_lzma(struct archive *_a)
125 struct archive_read *a = (struct archive_read *)_a;
126 struct archive_read_filter_bidder *bidder = __archive_read_get_bidder(a);
128 archive_clear_error(_a);
130 return (ARCHIVE_FATAL);
133 bidder->bid = lzma_bidder_bid;
134 bidder->init = lzma_bidder_init;
135 bidder->options = NULL;
137 #if HAVE_LZMA_H && HAVE_LIBLZMA
139 #elif HAVE_LZMADEC_H && HAVE_LIBLZMADEC
142 archive_set_error(_a, ARCHIVE_ERRNO_MISC,
143 "Using external unlzma program for lzma decompression");
144 return (ARCHIVE_WARN);
149 * Test whether we can handle this data.
152 xz_bidder_bid(struct archive_read_filter_bidder *self,
153 struct archive_read_filter *filter)
155 const unsigned char *buffer;
159 (void)self; /* UNUSED */
161 buffer = __archive_read_filter_ahead(filter, 6, &avail);
166 * Verify Header Magic Bytes : FD 37 7A 58 5A 00
169 if (buffer[0] != 0xFD)
172 if (buffer[1] != 0x37)
175 if (buffer[2] != 0x7A)
178 if (buffer[3] != 0x58)
181 if (buffer[4] != 0x5A)
184 if (buffer[5] != 0x00)
188 return (bits_checked);
192 * Test whether we can handle this data.
194 * <sigh> LZMA has a rather poor file signature. Zeros do not
195 * make good signature bytes as a rule, and the only non-zero byte
196 * here is an ASCII character. For example, an uncompressed tar
197 * archive whose first file is ']' would satisfy this check. It may
198 * be necessary to exclude LZMA from compression_all() because of
199 * this. Clients of libarchive would then have to explicitly enable
200 * LZMA checking instead of (or in addition to) compression_all() when
201 * they have other evidence (file name, command-line option) to go on.
204 lzma_bidder_bid(struct archive_read_filter_bidder *self,
205 struct archive_read_filter *filter)
207 const unsigned char *buffer;
210 uint64_t uncompressed_size;
213 (void)self; /* UNUSED */
215 buffer = __archive_read_filter_ahead(filter, 14, &avail);
219 /* First byte of raw LZMA stream is commonly 0x5d.
220 * The first byte is a special number, which consists of
221 * three parameters of LZMA compression, a number of literal
222 * context bits(which is from 0 to 8, default is 3), a number
223 * of literal pos bits(which is from 0 to 4, default is 0),
224 * a number of pos bits(which is from 0 to 4, default is 2).
225 * The first byte is made by
226 * (pos bits * 5 + literal pos bit) * 9 + * literal contest bit,
227 * and so the default value in this field is
228 * (2 * 5 + 0) * 9 + 3 = 0x5d.
229 * lzma of LZMA SDK has options to change those parameters.
230 * It means a range of this field is from 0 to 224. And lzma of
231 * XZ Utils with option -e records 0x5e in this field. */
232 /* NOTE: If this checking of the first byte increases false
233 * recognition, we should allow only 0x5d and 0x5e for the first
234 * byte of LZMA stream. */
236 if (buffer[0] > (4 * 5 + 4) * 9 + 8)
238 /* Most likely value in the first byte of LZMA stream. */
239 if (buffer[0] == 0x5d || buffer[0] == 0x5e)
242 /* Sixth through fourteenth bytes are uncompressed size,
243 * stored in little-endian order. `-1' means uncompressed
244 * size is unknown and lzma of XZ Utils always records `-1'
246 uncompressed_size = archive_le64dec(buffer+5);
247 if (uncompressed_size == (uint64_t)ARCHIVE_LITERAL_LL(-1))
250 /* Second through fifth bytes are dictionary size, stored in
251 * little-endian order. The minimum dictionary size is
252 * 1 << 12(4KiB) which the lzma of LZMA SDK uses with option
253 * -d12 and the maxinam dictionary size is 1 << 27(128MiB)
254 * which the one uses with option -d27.
255 * NOTE: A comment of LZMA SDK source code says this dictionary
256 * range is from 1 << 12 to 1 << 30. */
257 dicsize = archive_le32dec(buffer+1);
259 case 0x00001000:/* lzma of LZMA SDK option -d12. */
260 case 0x00002000:/* lzma of LZMA SDK option -d13. */
261 case 0x00004000:/* lzma of LZMA SDK option -d14. */
262 case 0x00008000:/* lzma of LZMA SDK option -d15. */
263 case 0x00010000:/* lzma of XZ Utils option -0 and -1.
264 * lzma of LZMA SDK option -d16. */
265 case 0x00020000:/* lzma of LZMA SDK option -d17. */
266 case 0x00040000:/* lzma of LZMA SDK option -d18. */
267 case 0x00080000:/* lzma of XZ Utils option -2.
268 * lzma of LZMA SDK option -d19. */
269 case 0x00100000:/* lzma of XZ Utils option -3.
270 * lzma of LZMA SDK option -d20. */
271 case 0x00200000:/* lzma of XZ Utils option -4.
272 * lzma of LZMA SDK option -d21. */
273 case 0x00400000:/* lzma of XZ Utils option -5.
274 * lzma of LZMA SDK option -d22. */
275 case 0x00800000:/* lzma of XZ Utils option -6.
276 * lzma of LZMA SDK option -d23. */
277 case 0x01000000:/* lzma of XZ Utils option -7.
278 * lzma of LZMA SDK option -d24. */
279 case 0x02000000:/* lzma of XZ Utils option -8.
280 * lzma of LZMA SDK option -d25. */
281 case 0x04000000:/* lzma of XZ Utils option -9.
282 * lzma of LZMA SDK option -d26. */
283 case 0x08000000:/* lzma of LZMA SDK option -d27. */
287 /* If a memory usage for encoding was not enough on
288 * the platform where LZMA stream was made, lzma of
289 * XZ Utils automatically decreased the dictionary
290 * size to enough memory for encoding by 1Mi bytes
292 if (dicsize <= 0x03F00000 && dicsize >= 0x00300000 &&
293 (dicsize & ((1 << 20)-1)) == 0 &&
294 bits_checked == 8 + 64) {
298 /* Otherwise dictionary size is unlikely. But it is
299 * possible that someone makes lzma stream with
300 * liblzma/LZMA SDK in one's dictionary size. */
304 /* TODO: The above test is still very weak. It would be
305 * good to do better. */
307 return (bits_checked);
310 #if HAVE_LZMA_H && HAVE_LIBLZMA
313 * liblzma 4.999.7 and later support both lzma and xz streams.
316 xz_bidder_init(struct archive_read_filter *self)
318 self->code = ARCHIVE_COMPRESSION_XZ;
320 return (xz_lzma_bidder_init(self));
324 lzma_bidder_init(struct archive_read_filter *self)
326 self->code = ARCHIVE_COMPRESSION_LZMA;
328 return (xz_lzma_bidder_init(self));
332 * Setup the callbacks.
335 xz_lzma_bidder_init(struct archive_read_filter *self)
337 static const size_t out_block_size = 64 * 1024;
339 struct private_data *state;
342 state = (struct private_data *)calloc(sizeof(*state), 1);
343 out_block = (unsigned char *)malloc(out_block_size);
344 if (state == NULL || out_block == NULL) {
345 archive_set_error(&self->archive->archive, ENOMEM,
346 "Can't allocate data for xz decompression");
349 return (ARCHIVE_FATAL);
353 state->out_block_size = out_block_size;
354 state->out_block = out_block;
355 self->read = xz_filter_read;
356 self->skip = NULL; /* not supported */
357 self->close = xz_filter_close;
359 state->stream.avail_in = 0;
361 state->stream.next_out = state->out_block;
362 state->stream.avail_out = state->out_block_size;
364 /* Initialize compression library.
365 * TODO: I don't know what value is best for memlimit.
366 * maybe, it needs to check memory size which
367 * running system has.
369 if (self->code == ARCHIVE_COMPRESSION_XZ)
370 ret = lzma_stream_decoder(&(state->stream),
371 (1U << 30),/* memlimit */
374 ret = lzma_alone_decoder(&(state->stream),
375 (1U << 30));/* memlimit */
380 /* Library setup failed: Choose an error message and clean up. */
383 archive_set_error(&self->archive->archive, ENOMEM,
384 "Internal error initializing compression library: "
385 "Cannot allocate memory");
387 case LZMA_OPTIONS_ERROR:
388 archive_set_error(&self->archive->archive,
390 "Internal error initializing compression library: "
391 "Invalid or unsupported options");
394 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
395 "Internal error initializing lzma library");
399 free(state->out_block);
402 return (ARCHIVE_FATAL);
406 * Return the next block of decompressed data.
409 xz_filter_read(struct archive_read_filter *self, const void **p)
411 struct private_data *state;
416 state = (struct private_data *)self->data;
418 /* Empty our output buffer. */
419 state->stream.next_out = state->out_block;
420 state->stream.avail_out = state->out_block_size;
422 /* Try to fill the output buffer. */
423 while (state->stream.avail_out > 0 && !state->eof) {
424 state->stream.next_in =
425 __archive_read_filter_ahead(self->upstream, 1, &avail_in);
426 if (state->stream.next_in == NULL && avail_in < 0)
427 return (ARCHIVE_FATAL);
428 state->stream.avail_in = avail_in;
430 /* Decompress as much as we can in one pass. */
431 ret = lzma_code(&(state->stream),
432 (state->stream.avail_in == 0)? LZMA_FINISH: LZMA_RUN);
434 case LZMA_STREAM_END: /* Found end of stream. */
437 case LZMA_OK: /* Decompressor made some progress. */
438 __archive_read_filter_consume(self->upstream,
439 avail_in - state->stream.avail_in);
442 archive_set_error(&self->archive->archive, ENOMEM,
443 "Lzma library error: Cannot allocate memory");
444 return (ARCHIVE_FATAL);
445 case LZMA_MEMLIMIT_ERROR:
446 archive_set_error(&self->archive->archive, ENOMEM,
447 "Lzma library error: Out of memory");
448 return (ARCHIVE_FATAL);
449 case LZMA_FORMAT_ERROR:
450 archive_set_error(&self->archive->archive,
452 "Lzma library error: format not recognized");
453 return (ARCHIVE_FATAL);
454 case LZMA_OPTIONS_ERROR:
455 archive_set_error(&self->archive->archive,
457 "Lzma library error: Invalid options");
458 return (ARCHIVE_FATAL);
459 case LZMA_DATA_ERROR:
460 archive_set_error(&self->archive->archive,
462 "Lzma library error: Corrupted input data");
463 return (ARCHIVE_FATAL);
465 archive_set_error(&self->archive->archive,
467 "Lzma library error: No progress is possible");
468 return (ARCHIVE_FATAL);
470 /* Return an error. */
471 archive_set_error(&self->archive->archive,
473 "Lzma decompression failed: Unknown error");
474 return (ARCHIVE_FATAL);
478 decompressed = state->stream.next_out - state->out_block;
479 state->total_out += decompressed;
480 if (decompressed == 0)
483 *p = state->out_block;
484 return (decompressed);
488 * Clean up the decompressor.
491 xz_filter_close(struct archive_read_filter *self)
493 struct private_data *state;
495 state = (struct private_data *)self->data;
496 lzma_end(&(state->stream));
497 free(state->out_block);
504 #if HAVE_LZMADEC_H && HAVE_LIBLZMADEC
507 * If we have the older liblzmadec library, then we can handle
508 * LZMA streams but not XZ streams.
512 * Setup the callbacks.
515 lzma_bidder_init(struct archive_read_filter *self)
517 static const size_t out_block_size = 64 * 1024;
519 struct private_data *state;
520 ssize_t ret, avail_in;
522 self->code = ARCHIVE_COMPRESSION_LZMA;
525 state = (struct private_data *)calloc(sizeof(*state), 1);
526 out_block = (unsigned char *)malloc(out_block_size);
527 if (state == NULL || out_block == NULL) {
528 archive_set_error(&self->archive->archive, ENOMEM,
529 "Can't allocate data for lzma decompression");
532 return (ARCHIVE_FATAL);
536 state->out_block_size = out_block_size;
537 state->out_block = out_block;
538 self->read = lzma_filter_read;
539 self->skip = NULL; /* not supported */
540 self->close = lzma_filter_close;
542 /* Prime the lzma library with 18 bytes of input. */
543 state->stream.next_in = (unsigned char *)(uintptr_t)
544 __archive_read_filter_ahead(self->upstream, 18, &avail_in);
545 if (state->stream.next_in == NULL)
546 return (ARCHIVE_FATAL);
547 state->stream.avail_in = avail_in;
548 state->stream.next_out = state->out_block;
549 state->stream.avail_out = state->out_block_size;
551 /* Initialize compression library. */
552 ret = lzmadec_init(&(state->stream));
553 __archive_read_filter_consume(self->upstream,
554 avail_in - state->stream.avail_in);
555 if (ret == LZMADEC_OK)
558 /* Library setup failed: Clean up. */
559 archive_set_error(&self->archive->archive, ARCHIVE_ERRNO_MISC,
560 "Internal error initializing lzma library");
562 /* Override the error message if we know what really went wrong. */
564 case LZMADEC_HEADER_ERROR:
565 archive_set_error(&self->archive->archive,
567 "Internal error initializing compression library: "
570 case LZMADEC_MEM_ERROR:
571 archive_set_error(&self->archive->archive, ENOMEM,
572 "Internal error initializing compression library: "
577 free(state->out_block);
580 return (ARCHIVE_FATAL);
584 * Return the next block of decompressed data.
587 lzma_filter_read(struct archive_read_filter *self, const void **p)
589 struct private_data *state;
591 ssize_t avail_in, ret;
593 state = (struct private_data *)self->data;
595 /* Empty our output buffer. */
596 state->stream.next_out = state->out_block;
597 state->stream.avail_out = state->out_block_size;
599 /* Try to fill the output buffer. */
600 while (state->stream.avail_out > 0 && !state->eof) {
601 state->stream.next_in = (unsigned char *)(uintptr_t)
602 __archive_read_filter_ahead(self->upstream, 1, &avail_in);
603 if (state->stream.next_in == NULL && avail_in < 0)
604 return (ARCHIVE_FATAL);
605 state->stream.avail_in = avail_in;
607 /* Decompress as much as we can in one pass. */
608 ret = lzmadec_decode(&(state->stream), avail_in == 0);
610 case LZMADEC_STREAM_END: /* Found end of stream. */
613 case LZMADEC_OK: /* Decompressor made some progress. */
614 __archive_read_filter_consume(self->upstream,
615 avail_in - state->stream.avail_in);
617 case LZMADEC_BUF_ERROR: /* Insufficient input data? */
618 archive_set_error(&self->archive->archive,
620 "Insufficient compressed data");
621 return (ARCHIVE_FATAL);
623 /* Return an error. */
624 archive_set_error(&self->archive->archive,
626 "Lzma decompression failed");
627 return (ARCHIVE_FATAL);
631 decompressed = state->stream.next_out - state->out_block;
632 state->total_out += decompressed;
633 if (decompressed == 0)
636 *p = state->out_block;
637 return (decompressed);
641 * Clean up the decompressor.
644 lzma_filter_close(struct archive_read_filter *self)
646 struct private_data *state;
649 state = (struct private_data *)self->data;
651 switch (lzmadec_end(&(state->stream))) {
655 archive_set_error(&(self->archive->archive),
657 "Failed to clean up %s compressor",
658 self->archive->archive.compression_name);
662 free(state->out_block);
671 * If we have no suitable library on this system, we can't actually do
672 * the decompression. We can, however, still detect compressed
673 * archives and emit a useful message.
677 lzma_bidder_init(struct archive_read_filter *self)
681 r = __archive_read_program(self, "unlzma");
682 /* Note: We set the format here even if __archive_read_program()
683 * above fails. We do, after all, know what the format is
684 * even if we weren't able to read it. */
685 self->code = ARCHIVE_COMPRESSION_LZMA;
690 #endif /* HAVE_LZMADEC_H */
694 xz_bidder_init(struct archive_read_filter *self)
698 r = __archive_read_program(self, "unxz");
699 /* Note: We set the format here even if __archive_read_program()
700 * above fails. We do, after all, know what the format is
701 * even if we weren't able to read it. */
702 self->code = ARCHIVE_COMPRESSION_XZ;
708 #endif /* HAVE_LZMA_H */