1 /* $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $ */
4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD
6 * Copyright (c) 2011 The NetBSD Foundation, Inc.
9 * This code is derived from software contributed to The NetBSD Foundation
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
43 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
45 lzma_stream strm = LZMA_STREAM_INIT;
46 static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
48 lzma_action action = LZMA_RUN;
57 memcpy(ibuf, pre, prelen);
58 strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
59 if (strm.avail_in == (size_t)-1)
60 maybe_err("read failed");
61 infile_newdata(strm.avail_in);
62 strm.avail_in += prelen;
63 *bytes_in = strm.avail_in;
65 if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
66 maybe_errx("Can't initialize decoder (%d)", ret);
70 if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
71 maybe_errx("Can't read headers (%d)", ret);
75 strm.avail_out = sizeof(obuf);
79 if (strm.avail_in == 0) {
81 strm.avail_in = read(i, ibuf, sizeof(ibuf));
82 switch (strm.avail_in) {
84 maybe_err("read failed");
90 infile_newdata(strm.avail_in);
91 *bytes_in += strm.avail_in;
96 ret = lzma_code(&strm, action);
98 // Write and check write error before checking decoder error.
99 // This way as much data as possible gets written to output
100 // even if decoder detected an error.
101 if (strm.avail_out == 0 || ret != LZMA_OK) {
102 const size_t write_size = sizeof(obuf) - strm.avail_out;
104 if (write(o, obuf, write_size) != (ssize_t)write_size)
105 maybe_err("write failed");
107 strm.next_out = obuf;
108 strm.avail_out = sizeof(obuf);
109 bytes_out += write_size;
112 if (ret != LZMA_OK) {
113 if (ret == LZMA_STREAM_END) {
114 // Check that there's no trailing garbage.
115 if (strm.avail_in != 0 || read(i, ibuf, 1))
116 ret = LZMA_DATA_ERROR;
126 msg = strerror(ENOMEM);
129 case LZMA_FORMAT_ERROR:
130 msg = "File format not recognized";
133 case LZMA_OPTIONS_ERROR:
134 // FIXME: Better message?
135 msg = "Unsupported compression options";
138 case LZMA_DATA_ERROR:
139 msg = "File is corrupt";
143 msg = "Unexpected end of input";
146 case LZMA_MEMLIMIT_ERROR:
147 msg = "Reached memory limit";
151 maybe_errx("Unknown error (%d)", ret);
154 maybe_errx("%s", msg);
163 * Copied various bits and pieces from xz support code or brute force
167 #define my_min(A,B) ((A)<(B)?(A):(B))
169 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
170 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
172 # define IO_BUFFER_SIZE 8192
174 # define IO_BUFFER_SIZE (BUFSIZ & ~7U)
177 /// is_sparse() accesses the buffer as uint64_t for maximum speed.
178 /// Use an union to make sure that the buffer is properly aligned.
180 uint8_t u8[IO_BUFFER_SIZE];
181 uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
182 uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
187 io_pread(int fd, io_buf *buf, size_t size, off_t pos)
189 // Using lseek() and read() is more portable than pread() and
190 // for us it is as good as real pread().
191 if (lseek(fd, pos, SEEK_SET) != pos) {
195 const size_t amount = read(fd, buf, size);
196 if (amount == SIZE_MAX)
199 if (amount != size) {
207 * Most of the following is copied (mostly verbatim) from the xz
208 * distribution, from file src/xz/list.c
211 ///////////////////////////////////////////////////////////////////////////////
214 /// \brief Listing information about .xz files
216 // Author: Lasse Collin
218 // This file has been put into the public domain.
219 // You can do whatever you want with this file.
221 ///////////////////////////////////////////////////////////////////////////////
224 /// Information about a .xz file
226 /// Combined Index of all Streams in the file
229 /// Total amount of Stream Padding
230 uint64_t stream_padding;
232 /// Highest memory usage so far
233 uint64_t memusage_max;
235 /// True if all Blocks so far have Compressed Size and
236 /// Uncompressed Size fields
239 /// Oldest XZ Utils version that will decompress the file
240 uint32_t min_version;
244 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
247 /// \brief Parse the Index(es) from the given .xz file
249 /// \param xfi Pointer to structure where the decoded information
251 /// \param pair Input file
253 /// \return On success, false is returned. On error, true is returned.
255 // TODO: This function is pretty big. liblzma should have a function that
256 // takes a callback function to parse the Index(es) from a .xz file to make
257 // it easy for applications.
259 parse_indexes(xz_file_info *xfi, int src_fd)
264 if (st.st_size <= 0) {
268 if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
273 lzma_stream_flags header_flags;
274 lzma_stream_flags footer_flags;
277 // lzma_stream for the Index decoder
278 lzma_stream strm = LZMA_STREAM_INIT;
280 // All Indexes decoded so far
281 lzma_index *combined_index = NULL;
283 // The Index currently being decoded
284 lzma_index *this_index = NULL;
286 // Current position in the file. We parse the file backwards so
287 // initialize it to point to the end of the file.
288 off_t pos = st.st_size;
290 // Each loop iteration decodes one Index.
292 // Check that there is enough data left to contain at least
293 // the Stream Header and Stream Footer. This check cannot
294 // fail in the first pass of this loop.
295 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
299 pos -= LZMA_STREAM_HEADER_SIZE;
300 lzma_vli stream_padding = 0;
302 // Locate the Stream Footer. There may be Stream Padding which
303 // we must skip when reading backwards.
305 if (pos < LZMA_STREAM_HEADER_SIZE) {
309 if (io_pread(src_fd, &buf,
310 LZMA_STREAM_HEADER_SIZE, pos))
313 // Stream Padding is always a multiple of four bytes.
318 // To avoid calling io_pread() for every four bytes
319 // of Stream Padding, take advantage that we read
320 // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
321 // check them too before calling io_pread() again.
326 } while (i >= 0 && buf.u32[i] == 0);
329 // Decode the Stream Footer.
330 ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
331 if (ret != LZMA_OK) {
335 // Check that the Stream Footer doesn't specify something
336 // that we don't support. This can only happen if the xz
337 // version is older than liblzma and liblzma supports
340 // It is enough to check Stream Footer. Stream Header must
341 // match when it is compared against Stream Footer with
342 // lzma_stream_flags_compare().
343 if (footer_flags.version != 0) {
347 // Check that the size of the Index field looks sane.
348 lzma_vli index_size = footer_flags.backward_size;
349 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
353 // Set pos to the beginning of the Index.
357 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
358 if (ret != LZMA_OK) {
363 // Don't give the decoder more input than the
365 strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
366 if (io_pread(src_fd, &buf, strm.avail_in, pos))
369 pos += strm.avail_in;
370 index_size -= strm.avail_in;
372 strm.next_in = buf.u8;
373 ret = lzma_code(&strm, LZMA_RUN);
375 } while (ret == LZMA_OK);
377 // If the decoding seems to be successful, check also that
378 // the Index decoder consumed as much input as indicated
379 // by the Backward Size field.
380 if (ret == LZMA_STREAM_END)
381 if (index_size != 0 || strm.avail_in != 0)
382 ret = LZMA_DATA_ERROR;
384 if (ret != LZMA_STREAM_END) {
385 // LZMA_BUFFER_ERROR means that the Index decoder
386 // would have liked more input than what the Index
387 // size should be according to Stream Footer.
388 // The message for LZMA_DATA_ERROR makes more
389 // sense in that case.
390 if (ret == LZMA_BUF_ERROR)
391 ret = LZMA_DATA_ERROR;
396 // Decode the Stream Header and check that its Stream Flags
397 // match the Stream Footer.
398 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
399 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
403 pos -= lzma_index_total_size(this_index);
404 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
407 ret = lzma_stream_header_decode(&header_flags, buf.u8);
408 if (ret != LZMA_OK) {
412 ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
413 if (ret != LZMA_OK) {
417 // Store the decoded Stream Flags into this_index. This is
418 // needed so that we can print which Check is used in each
420 ret = lzma_index_stream_flags(this_index, &footer_flags);
424 // Store also the size of the Stream Padding field. It is
425 // needed to show the offsets of the Streams correctly.
426 ret = lzma_index_stream_padding(this_index, stream_padding);
430 if (combined_index != NULL) {
431 // Append the earlier decoded Indexes
433 ret = lzma_index_cat(
434 this_index, combined_index, NULL);
435 if (ret != LZMA_OK) {
440 combined_index = this_index;
443 xfi->stream_padding += stream_padding;
449 // All OK. Make combined_index available to the caller.
450 xfi->idx = combined_index;
454 // Something went wrong, free the allocated memory.
456 lzma_index_end(combined_index, NULL);
457 lzma_index_end(this_index, NULL);
461 /***************** end of copy form list.c *************************/
464 * Small wrapper to extract total length of a file
469 xz_file_info xfi = XZ_FILE_INFO_INIT;
470 if (!parse_indexes(&xfi, fd)) {
471 off_t res = lzma_index_uncompressed_size(xfi.idx);
472 lzma_index_end(xfi.idx, NULL);