usr.bin/gzip/unxz.c

   1 /*      $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $  */
   2
   3 /*-
   4  * SPDX-License-Identifier: BSD-2-Clause
   5  *
   6  * Copyright (c) 2011 The NetBSD Foundation, Inc.
   7  * All rights reserved.
   8  *
   9  * This code is derived from software contributed to The NetBSD Foundation
  10  * by Christos Zoulas.
  11  *
  12  * Redistribution and use in source and binary forms, with or without
  13  * modification, are permitted provided that the following conditions
  14  * are met:
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33 #include <sys/cdefs.h>
  34 #include <stdarg.h>
  35 #include <errno.h>
  36 #include <stdio.h>
  37 #include <unistd.h>
  38 #include <lzma.h>
  39
  40 static off_t
  41 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
  42 {
  43         lzma_stream strm = LZMA_STREAM_INIT;
  44         static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
  45         lzma_ret ret;
  46         lzma_action action = LZMA_RUN;
  47         off_t bytes_out, bp;
  48         uint8_t ibuf[BUFSIZ];
  49         uint8_t obuf[BUFSIZ];
  50
  51         if (bytes_in == NULL)
  52                 bytes_in = &bp;
  53
  54         strm.next_in = ibuf;
  55         memcpy(ibuf, pre, prelen);
  56         strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
  57         if (strm.avail_in == (size_t)-1)
  58                 maybe_err("read failed");
  59         infile_newdata(strm.avail_in);
  60         strm.avail_in += prelen;
  61         *bytes_in = strm.avail_in;
  62
  63         if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
  64                 maybe_errx("Can't initialize decoder (%d)", ret);
  65
  66         strm.next_out = NULL;
  67         strm.avail_out = 0;
  68         if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
  69                 maybe_errx("Can't read headers (%d)", ret);
  70
  71         bytes_out = 0;
  72         strm.next_out = obuf;
  73         strm.avail_out = sizeof(obuf);
  74
  75         for (;;) {
  76                 check_siginfo();
  77                 if (strm.avail_in == 0) {
  78                         strm.next_in = ibuf;
  79                         strm.avail_in = read(i, ibuf, sizeof(ibuf));
  80                         switch (strm.avail_in) {
  81                         case (size_t)-1:
  82                                 maybe_err("read failed");
  83                                 /*NOTREACHED*/
  84                         case 0:
  85                                 action = LZMA_FINISH;
  86                                 break;
  87                         default:
  88                                 infile_newdata(strm.avail_in);
  89                                 *bytes_in += strm.avail_in;
  90                                 break;
  91                         }
  92                 }
  93
  94                 ret = lzma_code(&strm, action);
  95
  96                 // Write and check write error before checking decoder error.
  97                 // This way as much data as possible gets written to output
  98                 // even if decoder detected an error.
  99                 if (strm.avail_out == 0 || ret != LZMA_OK) {
 100                         const size_t write_size = sizeof(obuf) - strm.avail_out;
 101
 102                         if (write(o, obuf, write_size) != (ssize_t)write_size)
 103                                 maybe_err("write failed");
 104
 105                         strm.next_out = obuf;
 106                         strm.avail_out = sizeof(obuf);
 107                         bytes_out += write_size;
 108                 }
 109
 110                 if (ret != LZMA_OK) {
 111                         if (ret == LZMA_STREAM_END) {
 112                                 // Check that there's no trailing garbage.
 113                                 if (strm.avail_in != 0 || read(i, ibuf, 1))
 114                                         ret = LZMA_DATA_ERROR;
 115                                 else {
 116                                         lzma_end(&strm);
 117                                         return bytes_out;
 118                                 }
 119                         }
 120
 121                         const char *msg;
 122                         switch (ret) {
 123                         case LZMA_MEM_ERROR:
 124                                 msg = strerror(ENOMEM);
 125                                 break;
 126
 127                         case LZMA_FORMAT_ERROR:
 128                                 msg = "File format not recognized";
 129                                 break;
 130
 131                         case LZMA_OPTIONS_ERROR:
 132                                 // FIXME: Better message?
 133                                 msg = "Unsupported compression options";
 134                                 break;
 135
 136                         case LZMA_DATA_ERROR:
 137                                 msg = "File is corrupt";
 138                                 break;
 139
 140                         case LZMA_BUF_ERROR:
 141                                 msg = "Unexpected end of input";
 142                                 break;
 143
 144                         case LZMA_MEMLIMIT_ERROR:
 145                                 msg = "Reached memory limit";
 146                                 break;
 147
 148                         default:
 149                                 maybe_errx("Unknown error (%d)", ret);
 150                                 break;
 151                         }
 152                         maybe_errx("%s", msg);
 153
 154                 }
 155         }
 156 }
 157
 158 #include <stdbool.h>
 159
 160 /*
 161  * Copied various bits and pieces from xz support code or brute force
 162  * replacements.
 163  */
 164
 165 #define my_min(A,B)     ((A)<(B)?(A):(B))
 166
 167 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
 168 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
 169 #if BUFSIZ <= 1024
 170 #       define IO_BUFFER_SIZE 8192
 171 #else
 172 #       define IO_BUFFER_SIZE (BUFSIZ & ~7U)
 173 #endif
 174
 175 /// is_sparse() accesses the buffer as uint64_t for maximum speed.
 176 /// Use an union to make sure that the buffer is properly aligned.
 177 typedef union {
 178         uint8_t u8[IO_BUFFER_SIZE];
 179         uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
 180         uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
 181 } io_buf;
 182
 183
 184 static bool
 185 io_pread(int fd, io_buf *buf, size_t size, off_t pos)
 186 {
 187         // Using lseek() and read() is more portable than pread() and
 188         // for us it is as good as real pread().
 189         if (lseek(fd, pos, SEEK_SET) != pos) {
 190                 return true;
 191         }
 192
 193         const size_t amount = read(fd, buf, size);
 194         if (amount == SIZE_MAX)
 195                 return true;
 196
 197         if (amount != size) {
 198                 return true;
 199         }
 200
 201         return false;
 202 }
 203
 204 /*
 205  * Most of the following is copied (mostly verbatim) from the xz
 206  * distribution, from file src/xz/list.c
 207  */
 208
 209 ///////////////////////////////////////////////////////////////////////////////
 210 //
 211 /// \file       list.c
 212 /// \brief      Listing information about .xz files
 213 //
 214 //  Author:     Lasse Collin
 215 //
 216 //  This file has been put into the public domain.
 217 //  You can do whatever you want with this file.
 218 //
 219 ///////////////////////////////////////////////////////////////////////////////
 220
 221
 222 /// Information about a .xz file
 223 typedef struct {
 224         /// Combined Index of all Streams in the file
 225         lzma_index *idx;
 226
 227         /// Total amount of Stream Padding
 228         uint64_t stream_padding;
 229
 230         /// Highest memory usage so far
 231         uint64_t memusage_max;
 232
 233         /// True if all Blocks so far have Compressed Size and
 234         /// Uncompressed Size fields
 235         bool all_have_sizes;
 236
 237         /// Oldest XZ Utils version that will decompress the file
 238         uint32_t min_version;
 239
 240 } xz_file_info;
 241
 242 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
 243
 244
 245 /// \brief      Parse the Index(es) from the given .xz file
 246 ///
 247 /// \param      xfi     Pointer to structure where the decoded information
 248 ///                     is stored.
 249 /// \param      pair    Input file
 250 ///
 251 /// \return     On success, false is returned. On error, true is returned.
 252 ///
 253 // TODO: This function is pretty big. liblzma should have a function that
 254 // takes a callback function to parse the Index(es) from a .xz file to make
 255 // it easy for applications.
 256 static bool
 257 parse_indexes(xz_file_info *xfi, int src_fd)
 258 {
 259         struct stat st;
 260
 261         if (fstat(src_fd, &st) != 0) {
 262                 return true;
 263         }
 264
 265         if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
 266                 return true;
 267         }
 268
 269         io_buf buf;
 270         lzma_stream_flags header_flags;
 271         lzma_stream_flags footer_flags;
 272         lzma_ret ret;
 273
 274         // lzma_stream for the Index decoder
 275         lzma_stream strm = LZMA_STREAM_INIT;
 276
 277         // All Indexes decoded so far
 278         lzma_index *combined_index = NULL;
 279
 280         // The Index currently being decoded
 281         lzma_index *this_index = NULL;
 282
 283         // Current position in the file. We parse the file backwards so
 284         // initialize it to point to the end of the file.
 285         off_t pos = st.st_size;
 286
 287         // Each loop iteration decodes one Index.
 288         do {
 289                 // Check that there is enough data left to contain at least
 290                 // the Stream Header and Stream Footer. This check cannot
 291                 // fail in the first pass of this loop.
 292                 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
 293                         goto error;
 294                 }
 295
 296                 pos -= LZMA_STREAM_HEADER_SIZE;
 297                 lzma_vli stream_padding = 0;
 298
 299                 // Locate the Stream Footer. There may be Stream Padding which
 300                 // we must skip when reading backwards.
 301                 while (true) {
 302                         if (pos < LZMA_STREAM_HEADER_SIZE) {
 303                                 goto error;
 304                         }
 305
 306                         if (io_pread(src_fd, &buf,
 307                                         LZMA_STREAM_HEADER_SIZE, pos))
 308                                 goto error;
 309
 310                         // Stream Padding is always a multiple of four bytes.
 311                         int i = 2;
 312                         if (buf.u32[i] != 0)
 313                                 break;
 314
 315                         // To avoid calling io_pread() for every four bytes
 316                         // of Stream Padding, take advantage that we read
 317                         // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
 318                         // check them too before calling io_pread() again.
 319                         do {
 320                                 stream_padding += 4;
 321                                 pos -= 4;
 322                                 --i;
 323                         } while (i >= 0 && buf.u32[i] == 0);
 324                 }
 325
 326                 // Decode the Stream Footer.
 327                 ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
 328                 if (ret != LZMA_OK) {
 329                         goto error;
 330                 }
 331
 332                 // Check that the Stream Footer doesn't specify something
 333                 // that we don't support. This can only happen if the xz
 334                 // version is older than liblzma and liblzma supports
 335                 // something new.
 336                 //
 337                 // It is enough to check Stream Footer. Stream Header must
 338                 // match when it is compared against Stream Footer with
 339                 // lzma_stream_flags_compare().
 340                 if (footer_flags.version != 0) {
 341                         goto error;
 342                 }
 343
 344                 // Check that the size of the Index field looks sane.
 345                 lzma_vli index_size = footer_flags.backward_size;
 346                 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
 347                         goto error;
 348                 }
 349
 350                 // Set pos to the beginning of the Index.
 351                 pos -= index_size;
 352
 353                 // Decode the Index.
 354                 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
 355                 if (ret != LZMA_OK) {
 356                         goto error;
 357                 }
 358
 359                 do {
 360                         // Don't give the decoder more input than the
 361                         // Index size.
 362                         strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
 363                         if (io_pread(src_fd, &buf, strm.avail_in, pos))
 364                                 goto error;
 365
 366                         pos += strm.avail_in;
 367                         index_size -= strm.avail_in;
 368
 369                         strm.next_in = buf.u8;
 370                         ret = lzma_code(&strm, LZMA_RUN);
 371
 372                 } while (ret == LZMA_OK);
 373
 374                 // If the decoding seems to be successful, check also that
 375                 // the Index decoder consumed as much input as indicated
 376                 // by the Backward Size field.
 377                 if (ret == LZMA_STREAM_END)
 378                         if (index_size != 0 || strm.avail_in != 0)
 379                                 ret = LZMA_DATA_ERROR;
 380
 381                 if (ret != LZMA_STREAM_END) {
 382                         // LZMA_BUFFER_ERROR means that the Index decoder
 383                         // would have liked more input than what the Index
 384                         // size should be according to Stream Footer.
 385                         // The message for LZMA_DATA_ERROR makes more
 386                         // sense in that case.
 387                         if (ret == LZMA_BUF_ERROR)
 388                                 ret = LZMA_DATA_ERROR;
 389
 390                         goto error;
 391                 }
 392
 393                 // Decode the Stream Header and check that its Stream Flags
 394                 // match the Stream Footer.
 395                 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
 396                 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
 397                         goto error;
 398                 }
 399
 400                 pos -= lzma_index_total_size(this_index);
 401                 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
 402                         goto error;
 403
 404                 ret = lzma_stream_header_decode(&header_flags, buf.u8);
 405                 if (ret != LZMA_OK) {
 406                         goto error;
 407                 }
 408
 409                 ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
 410                 if (ret != LZMA_OK) {
 411                         goto error;
 412                 }
 413
 414                 // Store the decoded Stream Flags into this_index. This is
 415                 // needed so that we can print which Check is used in each
 416                 // Stream.
 417                 ret = lzma_index_stream_flags(this_index, &footer_flags);
 418                 if (ret != LZMA_OK)
 419                         goto error;
 420
 421                 // Store also the size of the Stream Padding field. It is
 422                 // needed to show the offsets of the Streams correctly.
 423                 ret = lzma_index_stream_padding(this_index, stream_padding);
 424                 if (ret != LZMA_OK)
 425                         goto error;
 426
 427                 if (combined_index != NULL) {
 428                         // Append the earlier decoded Indexes
 429                         // after this_index.
 430                         ret = lzma_index_cat(
 431                                         this_index, combined_index, NULL);
 432                         if (ret != LZMA_OK) {
 433                                 goto error;
 434                         }
 435                 }
 436
 437                 combined_index = this_index;
 438                 this_index = NULL;
 439
 440                 xfi->stream_padding += stream_padding;
 441
 442         } while (pos > 0);
 443
 444         lzma_end(&strm);
 445
 446         // All OK. Make combined_index available to the caller.
 447         xfi->idx = combined_index;
 448         return false;
 449
 450 error:
 451         // Something went wrong, free the allocated memory.
 452         lzma_end(&strm);
 453         lzma_index_end(combined_index, NULL);
 454         lzma_index_end(this_index, NULL);
 455         return true;
 456 }
 457
 458 /***************** end of copy form list.c *************************/
 459
 460 /*
 461  * Small wrapper to extract total length of a file
 462  */
 463 off_t
 464 unxz_len(int fd)
 465 {
 466         xz_file_info xfi = XZ_FILE_INFO_INIT;
 467         if (!parse_indexes(&xfi, fd)) {
 468                 off_t res = lzma_index_uncompressed_size(xfi.idx);
 469                 lzma_index_end(xfi.idx, NULL);
 470                 return res;
 471         }
 472         return 0;
 473 }
 474