usr.bin/gzip/unxz.c

   1 /*      $NetBSD: unxz.c,v 1.8 2018/10/06 16:36:45 martin Exp $  */
   2
   3 /*-
   4  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
   5  *
   6  * Copyright (c) 2011 The NetBSD Foundation, Inc.
   7  * All rights reserved.
   8  *
   9  * This code is derived from software contributed to The NetBSD Foundation
  10  * by Christos Zoulas.
  11  *
  12  * Redistribution and use in source and binary forms, with or without
  13  * modification, are permitted provided that the following conditions
  14  * are met:
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  */
  33 #include <sys/cdefs.h>
  34 __FBSDID("$FreeBSD$");
  35
  36 #include <stdarg.h>
  37 #include <errno.h>
  38 #include <stdio.h>
  39 #include <unistd.h>
  40 #include <lzma.h>
  41
  42 static off_t
  43 unxz(int i, int o, char *pre, size_t prelen, off_t *bytes_in)
  44 {
  45         lzma_stream strm = LZMA_STREAM_INIT;
  46         static const int flags = LZMA_TELL_UNSUPPORTED_CHECK|LZMA_CONCATENATED;
  47         lzma_ret ret;
  48         lzma_action action = LZMA_RUN;
  49         off_t bytes_out, bp;
  50         uint8_t ibuf[BUFSIZ];
  51         uint8_t obuf[BUFSIZ];
  52
  53         if (bytes_in == NULL)
  54                 bytes_in = &bp;
  55
  56         strm.next_in = ibuf;
  57         memcpy(ibuf, pre, prelen);
  58         strm.avail_in = read(i, ibuf + prelen, sizeof(ibuf) - prelen);
  59         if (strm.avail_in == (size_t)-1)
  60                 maybe_err("read failed");
  61         infile_newdata(strm.avail_in);
  62         strm.avail_in += prelen;
  63         *bytes_in = strm.avail_in;
  64
  65         if ((ret = lzma_stream_decoder(&strm, UINT64_MAX, flags)) != LZMA_OK)
  66                 maybe_errx("Can't initialize decoder (%d)", ret);
  67
  68         strm.next_out = NULL;
  69         strm.avail_out = 0;
  70         if ((ret = lzma_code(&strm, LZMA_RUN)) != LZMA_OK)
  71                 maybe_errx("Can't read headers (%d)", ret);
  72
  73         bytes_out = 0;
  74         strm.next_out = obuf;
  75         strm.avail_out = sizeof(obuf);
  76
  77         for (;;) {
  78                 check_siginfo();
  79                 if (strm.avail_in == 0) {
  80                         strm.next_in = ibuf;
  81                         strm.avail_in = read(i, ibuf, sizeof(ibuf));
  82                         switch (strm.avail_in) {
  83                         case (size_t)-1:
  84                                 maybe_err("read failed");
  85                                 /*NOTREACHED*/
  86                         case 0:
  87                                 action = LZMA_FINISH;
  88                                 break;
  89                         default:
  90                                 infile_newdata(strm.avail_in);
  91                                 *bytes_in += strm.avail_in;
  92                                 break;
  93                         }
  94                 }
  95
  96                 ret = lzma_code(&strm, action);
  97
  98                 // Write and check write error before checking decoder error.
  99                 // This way as much data as possible gets written to output
 100                 // even if decoder detected an error.
 101                 if (strm.avail_out == 0 || ret != LZMA_OK) {
 102                         const size_t write_size = sizeof(obuf) - strm.avail_out;
 103
 104                         if (write(o, obuf, write_size) != (ssize_t)write_size)
 105                                 maybe_err("write failed");
 106
 107                         strm.next_out = obuf;
 108                         strm.avail_out = sizeof(obuf);
 109                         bytes_out += write_size;
 110                 }
 111
 112                 if (ret != LZMA_OK) {
 113                         if (ret == LZMA_STREAM_END) {
 114                                 // Check that there's no trailing garbage.
 115                                 if (strm.avail_in != 0 || read(i, ibuf, 1))
 116                                         ret = LZMA_DATA_ERROR;
 117                                 else {
 118                                         lzma_end(&strm);
 119                                         return bytes_out;
 120                                 }
 121                         }
 122
 123                         const char *msg;
 124                         switch (ret) {
 125                         case LZMA_MEM_ERROR:
 126                                 msg = strerror(ENOMEM);
 127                                 break;
 128
 129                         case LZMA_FORMAT_ERROR:
 130                                 msg = "File format not recognized";
 131                                 break;
 132
 133                         case LZMA_OPTIONS_ERROR:
 134                                 // FIXME: Better message?
 135                                 msg = "Unsupported compression options";
 136                                 break;
 137
 138                         case LZMA_DATA_ERROR:
 139                                 msg = "File is corrupt";
 140                                 break;
 141
 142                         case LZMA_BUF_ERROR:
 143                                 msg = "Unexpected end of input";
 144                                 break;
 145
 146                         case LZMA_MEMLIMIT_ERROR:
 147                                 msg = "Reached memory limit";
 148                                 break;
 149
 150                         default:
 151                                 maybe_errx("Unknown error (%d)", ret);
 152                                 break;
 153                         }
 154                         maybe_errx("%s", msg);
 155
 156                 }
 157         }
 158 }
 159
 160 #include <stdbool.h>
 161
 162 /*
 163  * Copied various bits and pieces from xz support code or brute force
 164  * replacements.
 165  */
 166
 167 #define my_min(A,B)     ((A)<(B)?(A):(B))
 168
 169 // Some systems have suboptimal BUFSIZ. Use a bit bigger value on them.
 170 // We also need that IO_BUFFER_SIZE is a multiple of 8 (sizeof(uint64_t))
 171 #if BUFSIZ <= 1024
 172 #       define IO_BUFFER_SIZE 8192
 173 #else
 174 #       define IO_BUFFER_SIZE (BUFSIZ & ~7U)
 175 #endif
 176
 177 /// is_sparse() accesses the buffer as uint64_t for maximum speed.
 178 /// Use an union to make sure that the buffer is properly aligned.
 179 typedef union {
 180         uint8_t u8[IO_BUFFER_SIZE];
 181         uint32_t u32[IO_BUFFER_SIZE / sizeof(uint32_t)];
 182         uint64_t u64[IO_BUFFER_SIZE / sizeof(uint64_t)];
 183 } io_buf;
 184
 185
 186 static bool
 187 io_pread(int fd, io_buf *buf, size_t size, off_t pos)
 188 {
 189         // Using lseek() and read() is more portable than pread() and
 190         // for us it is as good as real pread().
 191         if (lseek(fd, pos, SEEK_SET) != pos) {
 192                 return true;
 193         }
 194
 195         const size_t amount = read(fd, buf, size);
 196         if (amount == SIZE_MAX)
 197                 return true;
 198
 199         if (amount != size) {
 200                 return true;
 201         }
 202
 203         return false;
 204 }
 205
 206 /*
 207  * Most of the following is copied (mostly verbatim) from the xz
 208  * distribution, from file src/xz/list.c
 209  */
 210
 211 ///////////////////////////////////////////////////////////////////////////////
 212 //
 213 /// \file       list.c
 214 /// \brief      Listing information about .xz files
 215 //
 216 //  Author:     Lasse Collin
 217 //
 218 //  This file has been put into the public domain.
 219 //  You can do whatever you want with this file.
 220 //
 221 ///////////////////////////////////////////////////////////////////////////////
 222
 223
 224 /// Information about a .xz file
 225 typedef struct {
 226         /// Combined Index of all Streams in the file
 227         lzma_index *idx;
 228
 229         /// Total amount of Stream Padding
 230         uint64_t stream_padding;
 231
 232         /// Highest memory usage so far
 233         uint64_t memusage_max;
 234
 235         /// True if all Blocks so far have Compressed Size and
 236         /// Uncompressed Size fields
 237         bool all_have_sizes;
 238
 239         /// Oldest XZ Utils version that will decompress the file
 240         uint32_t min_version;
 241
 242 } xz_file_info;
 243
 244 #define XZ_FILE_INFO_INIT { NULL, 0, 0, true, 50000002 }
 245
 246
 247 /// \brief      Parse the Index(es) from the given .xz file
 248 ///
 249 /// \param      xfi     Pointer to structure where the decoded information
 250 ///                     is stored.
 251 /// \param      pair    Input file
 252 ///
 253 /// \return     On success, false is returned. On error, true is returned.
 254 ///
 255 // TODO: This function is pretty big. liblzma should have a function that
 256 // takes a callback function to parse the Index(es) from a .xz file to make
 257 // it easy for applications.
 258 static bool
 259 parse_indexes(xz_file_info *xfi, int src_fd)
 260 {
 261         struct stat st;
 262
 263         fstat(src_fd, &st);
 264         if (st.st_size <= 0) {
 265                 return true;
 266         }
 267
 268         if (st.st_size < 2 * LZMA_STREAM_HEADER_SIZE) {
 269                 return true;
 270         }
 271
 272         io_buf buf;
 273         lzma_stream_flags header_flags;
 274         lzma_stream_flags footer_flags;
 275         lzma_ret ret;
 276
 277         // lzma_stream for the Index decoder
 278         lzma_stream strm = LZMA_STREAM_INIT;
 279
 280         // All Indexes decoded so far
 281         lzma_index *combined_index = NULL;
 282
 283         // The Index currently being decoded
 284         lzma_index *this_index = NULL;
 285
 286         // Current position in the file. We parse the file backwards so
 287         // initialize it to point to the end of the file.
 288         off_t pos = st.st_size;
 289
 290         // Each loop iteration decodes one Index.
 291         do {
 292                 // Check that there is enough data left to contain at least
 293                 // the Stream Header and Stream Footer. This check cannot
 294                 // fail in the first pass of this loop.
 295                 if (pos < 2 * LZMA_STREAM_HEADER_SIZE) {
 296                         goto error;
 297                 }
 298
 299                 pos -= LZMA_STREAM_HEADER_SIZE;
 300                 lzma_vli stream_padding = 0;
 301
 302                 // Locate the Stream Footer. There may be Stream Padding which
 303                 // we must skip when reading backwards.
 304                 while (true) {
 305                         if (pos < LZMA_STREAM_HEADER_SIZE) {
 306                                 goto error;
 307                         }
 308
 309                         if (io_pread(src_fd, &buf,
 310                                         LZMA_STREAM_HEADER_SIZE, pos))
 311                                 goto error;
 312
 313                         // Stream Padding is always a multiple of four bytes.
 314                         int i = 2;
 315                         if (buf.u32[i] != 0)
 316                                 break;
 317
 318                         // To avoid calling io_pread() for every four bytes
 319                         // of Stream Padding, take advantage that we read
 320                         // 12 bytes (LZMA_STREAM_HEADER_SIZE) already and
 321                         // check them too before calling io_pread() again.
 322                         do {
 323                                 stream_padding += 4;
 324                                 pos -= 4;
 325                                 --i;
 326                         } while (i >= 0 && buf.u32[i] == 0);
 327                 }
 328
 329                 // Decode the Stream Footer.
 330                 ret = lzma_stream_footer_decode(&footer_flags, buf.u8);
 331                 if (ret != LZMA_OK) {
 332                         goto error;
 333                 }
 334
 335                 // Check that the Stream Footer doesn't specify something
 336                 // that we don't support. This can only happen if the xz
 337                 // version is older than liblzma and liblzma supports
 338                 // something new.
 339                 //
 340                 // It is enough to check Stream Footer. Stream Header must
 341                 // match when it is compared against Stream Footer with
 342                 // lzma_stream_flags_compare().
 343                 if (footer_flags.version != 0) {
 344                         goto error;
 345                 }
 346
 347                 // Check that the size of the Index field looks sane.
 348                 lzma_vli index_size = footer_flags.backward_size;
 349                 if ((lzma_vli)(pos) < index_size + LZMA_STREAM_HEADER_SIZE) {
 350                         goto error;
 351                 }
 352
 353                 // Set pos to the beginning of the Index.
 354                 pos -= index_size;
 355
 356                 // Decode the Index.
 357                 ret = lzma_index_decoder(&strm, &this_index, UINT64_MAX);
 358                 if (ret != LZMA_OK) {
 359                         goto error;
 360                 }
 361
 362                 do {
 363                         // Don't give the decoder more input than the
 364                         // Index size.
 365                         strm.avail_in = my_min(IO_BUFFER_SIZE, index_size);
 366                         if (io_pread(src_fd, &buf, strm.avail_in, pos))
 367                                 goto error;
 368
 369                         pos += strm.avail_in;
 370                         index_size -= strm.avail_in;
 371
 372                         strm.next_in = buf.u8;
 373                         ret = lzma_code(&strm, LZMA_RUN);
 374
 375                 } while (ret == LZMA_OK);
 376
 377                 // If the decoding seems to be successful, check also that
 378                 // the Index decoder consumed as much input as indicated
 379                 // by the Backward Size field.
 380                 if (ret == LZMA_STREAM_END)
 381                         if (index_size != 0 || strm.avail_in != 0)
 382                                 ret = LZMA_DATA_ERROR;
 383
 384                 if (ret != LZMA_STREAM_END) {
 385                         // LZMA_BUFFER_ERROR means that the Index decoder
 386                         // would have liked more input than what the Index
 387                         // size should be according to Stream Footer.
 388                         // The message for LZMA_DATA_ERROR makes more
 389                         // sense in that case.
 390                         if (ret == LZMA_BUF_ERROR)
 391                                 ret = LZMA_DATA_ERROR;
 392
 393                         goto error;
 394                 }
 395
 396                 // Decode the Stream Header and check that its Stream Flags
 397                 // match the Stream Footer.
 398                 pos -= footer_flags.backward_size + LZMA_STREAM_HEADER_SIZE;
 399                 if ((lzma_vli)(pos) < lzma_index_total_size(this_index)) {
 400                         goto error;
 401                 }
 402
 403                 pos -= lzma_index_total_size(this_index);
 404                 if (io_pread(src_fd, &buf, LZMA_STREAM_HEADER_SIZE, pos))
 405                         goto error;
 406
 407                 ret = lzma_stream_header_decode(&header_flags, buf.u8);
 408                 if (ret != LZMA_OK) {
 409                         goto error;
 410                 }
 411
 412                 ret = lzma_stream_flags_compare(&header_flags, &footer_flags);
 413                 if (ret != LZMA_OK) {
 414                         goto error;
 415                 }
 416
 417                 // Store the decoded Stream Flags into this_index. This is
 418                 // needed so that we can print which Check is used in each
 419                 // Stream.
 420                 ret = lzma_index_stream_flags(this_index, &footer_flags);
 421                 if (ret != LZMA_OK)
 422                         goto error;
 423
 424                 // Store also the size of the Stream Padding field. It is
 425                 // needed to show the offsets of the Streams correctly.
 426                 ret = lzma_index_stream_padding(this_index, stream_padding);
 427                 if (ret != LZMA_OK)
 428                         goto error;
 429
 430                 if (combined_index != NULL) {
 431                         // Append the earlier decoded Indexes
 432                         // after this_index.
 433                         ret = lzma_index_cat(
 434                                         this_index, combined_index, NULL);
 435                         if (ret != LZMA_OK) {
 436                                 goto error;
 437                         }
 438                 }
 439
 440                 combined_index = this_index;
 441                 this_index = NULL;
 442
 443                 xfi->stream_padding += stream_padding;
 444
 445         } while (pos > 0);
 446
 447         lzma_end(&strm);
 448
 449         // All OK. Make combined_index available to the caller.
 450         xfi->idx = combined_index;
 451         return false;
 452
 453 error:
 454         // Something went wrong, free the allocated memory.
 455         lzma_end(&strm);
 456         lzma_index_end(combined_index, NULL);
 457         lzma_index_end(this_index, NULL);
 458         return true;
 459 }
 460
 461 /***************** end of copy form list.c *************************/
 462
 463 /*
 464  * Small wrapper to extract total length of a file
 465  */
 466 off_t
 467 unxz_len(int fd)
 468 {
 469         xz_file_info xfi = XZ_FILE_INFO_INIT;
 470         if (!parse_indexes(&xfi, fd)) {
 471                 off_t res = lzma_index_uncompressed_size(xfi.idx);
 472                 lzma_index_end(xfi.idx, NULL);
 473                 return res;
 474         }
 475         return 0;
 476 }
 477