1 ///////////////////////////////////////////////////////////////////////////////
4 /// \brief Simple single-threaded tool to uncompress .xz or .lzma files
6 // Author: Lasse Collin
8 // This file has been put into the public domain.
9 // You can do whatever you want with this file.
11 ///////////////////////////////////////////////////////////////////////////////
22 #include "tuklib_progname.h"
23 #include "tuklib_exit.h"
32 # define TOOL_FORMAT "lzma"
34 # define TOOL_FORMAT "xz"
38 /// Number of bytes to use memory at maximum
39 static uint64_t memlimit;
41 /// Total amount of physical RAM
42 static uint64_t total_ram;
44 /// Error messages are suppressed if this is zero, which is the case when
45 /// --quiet has been given at least twice.
46 static unsigned int display_errors = 2;
49 static void lzma_attribute((format(printf, 1, 2)))
50 my_errorf(const char *fmt, ...)
56 fprintf(stderr, "%s: ", progname);
57 vfprintf(stderr, fmt, ap);
58 fprintf(stderr, "\n");
66 static void lzma_attribute((noreturn))
69 // Round up to the next MiB and do it correctly also with UINT64_MAX.
70 const uint64_t mem_mib = (memlimit >> 20)
71 + ((memlimit & ((UINT32_C(1) << 20) - 1)) != 0);
74 "Usage: %s [OPTION]... [FILE]...\n"
75 "Uncompress files in the ." TOOL_FORMAT " format to the standard output.\n"
77 " -c, --stdout (ignored)\n"
78 " -d, --decompress (ignored)\n"
79 " -k, --keep (ignored)\n"
80 " -M, --memory=NUM use NUM bytes of memory at maximum (0 means default)\n"
81 " -q, --quiet specify *twice* to suppress errors\n"
82 " -Q, --no-warn (ignored)\n"
83 " -h, --help display this help and exit\n"
84 " -V, --version display the version number and exit\n"
86 "With no FILE, or when FILE is -, read standard input.\n"
88 "On this system and configuration, this program will use a maximum of roughly\n"
89 "%" PRIu64 " MiB RAM.\n"
91 "Report bugs to <" PACKAGE_BUGREPORT "> (in English or Finnish).\n"
92 PACKAGE_NAME " home page: <" PACKAGE_URL ">\n", progname, mem_mib);
93 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
97 static void lzma_attribute((noreturn))
100 printf(TOOL_FORMAT "dec (" PACKAGE_NAME ") " LZMA_VERSION_STRING "\n"
101 "liblzma %s\n", lzma_version_string());
103 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);
107 /// Find out the amount of physical memory (RAM) in the system, and set
108 /// the memory usage limit to the given percentage of RAM.
110 memlimit_set_percentage(uint32_t percentage)
112 memlimit = percentage * total_ram / 100;
117 /// Set the memory usage limit to give number of bytes. Zero is a special
118 /// value to indicate the default limit.
120 memlimit_set(uint64_t new_memlimit)
122 if (new_memlimit != 0) {
123 memlimit = new_memlimit;
125 memlimit = 40 * total_ram / 100;
126 if (memlimit < UINT64_C(80) * 1024 * 1024) {
127 memlimit = 80 * total_ram / 100;
128 if (memlimit > UINT64_C(80) * 1024 * 1024)
129 memlimit = UINT64_C(80) * 1024 * 1024;
137 /// Get the total amount of physical RAM and set the memory usage limit
138 /// to the default value.
142 // If we cannot determine the amount of RAM, use the assumption
143 // defined by the configure script.
144 total_ram = lzma_physmem();
146 total_ram = (uint64_t)(ASSUME_RAM) * 1024 * 1024;
153 /// \brief Convert a string to uint64_t
155 /// This is rudely copied from src/xz/util.c and modified a little. :-(
157 /// \param max Return value when the string "max" was specified.
160 str_to_uint64(const char *value, uint64_t max)
164 // Accept special value "max".
165 if (strcmp(value, "max") == 0)
168 if (*value < '0' || *value > '9') {
169 my_errorf("%s: Value is not a non-negative decimal integer",
176 if (result > (UINT64_MAX - 9) / 10)
180 result += *value - '0';
182 } while (*value >= '0' && *value <= '9');
184 if (*value != '\0') {
186 uint64_t multiplier = 0;
187 if (*value == 'k' || *value == 'K')
188 multiplier = UINT64_C(1) << 10;
189 else if (*value == 'm' || *value == 'M')
190 multiplier = UINT64_C(1) << 20;
191 else if (*value == 'g' || *value == 'G')
192 multiplier = UINT64_C(1) << 30;
196 // Allow also e.g. Ki, KiB, and KB.
197 if (*value != '\0' && strcmp(value, "i") != 0
198 && strcmp(value, "iB") != 0
199 && strcmp(value, "B") != 0)
202 if (multiplier == 0) {
203 my_errorf("%s: Invalid suffix", value - 1);
207 // Don't overflow here either.
208 if (result > UINT64_MAX / multiplier)
211 result *= multiplier;
218 /// Parses command line options.
220 parse_options(int argc, char **argv)
222 static const char short_opts[] = "cdkM:hqQV";
223 static const struct option long_opts[] = {
224 { "stdout", no_argument, NULL, 'c' },
225 { "to-stdout", no_argument, NULL, 'c' },
226 { "decompress", no_argument, NULL, 'd' },
227 { "uncompress", no_argument, NULL, 'd' },
228 { "keep", no_argument, NULL, 'k' },
229 { "memory", required_argument, NULL, 'M' },
230 { "quiet", no_argument, NULL, 'q' },
231 { "no-warn", no_argument, NULL, 'Q' },
232 { "help", no_argument, NULL, 'h' },
233 { "version", no_argument, NULL, 'V' },
239 while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
249 // Support specifying the limit as a percentage of
250 // installed physical RAM.
251 const size_t len = strlen(optarg);
252 if (len > 0 && optarg[len - 1] == '%') {
253 // Memory limit is a percentage of total
255 optarg[len - 1] = '\0';
256 const uint64_t percentage
257 = str_to_uint64(optarg, 100);
258 if (percentage < 1 || percentage > 100) {
259 my_errorf("Percentage must be in "
260 "the range [1, 100]");
264 memlimit_set_percentage(percentage);
266 memlimit_set(str_to_uint64(
267 optarg, UINT64_MAX));
274 if (display_errors > 0)
295 uncompress(lzma_stream *strm, FILE *file, const char *filename)
299 // Initialize the decoder
301 ret = lzma_alone_decoder(strm, memlimit);
303 ret = lzma_stream_decoder(strm, memlimit, LZMA_CONCATENATED);
306 // The only reasonable error here is LZMA_MEM_ERROR.
307 // FIXME: Maybe also LZMA_MEMLIMIT_ERROR in future?
308 if (ret != LZMA_OK) {
309 my_errorf("%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM)
310 : "Internal error (bug)");
314 // Input and output buffers
315 uint8_t in_buf[BUFSIZ];
316 uint8_t out_buf[BUFSIZ];
319 strm->next_out = out_buf;
320 strm->avail_out = BUFSIZ;
322 lzma_action action = LZMA_RUN;
325 if (strm->avail_in == 0) {
326 strm->next_in = in_buf;
327 strm->avail_in = fread(in_buf, 1, BUFSIZ, file);
330 // POSIX says that fread() sets errno if
331 // an error occurred. ferror() doesn't
333 my_errorf("%s: Error reading input file: %s",
334 filename, strerror(errno));
339 // When using LZMA_CONCATENATED, we need to tell
340 // liblzma when it has got all the input.
342 action = LZMA_FINISH;
346 ret = lzma_code(strm, action);
348 // Write and check write error before checking decoder error.
349 // This way as much data as possible gets written to output
350 // even if decoder detected an error.
351 if (strm->avail_out == 0 || ret != LZMA_OK) {
352 const size_t write_size = BUFSIZ - strm->avail_out;
354 if (fwrite(out_buf, 1, write_size, stdout)
356 // Wouldn't be a surprise if writing to stderr
357 // would fail too but at least try to show an
359 my_errorf("Cannot write to standard output: "
360 "%s", strerror(errno));
364 strm->next_out = out_buf;
365 strm->avail_out = BUFSIZ;
368 if (ret != LZMA_OK) {
369 if (ret == LZMA_STREAM_END) {
371 // Check that there's no trailing garbage.
372 if (strm->avail_in != 0
373 || fread(in_buf, 1, 1, file)
376 ret = LZMA_DATA_ERROR;
380 // lzma_stream_decoder() already guarantees
381 // that there's no trailing garbage.
382 assert(strm->avail_in == 0);
383 assert(action == LZMA_FINISH);
392 msg = strerror(ENOMEM);
395 case LZMA_MEMLIMIT_ERROR:
396 msg = "Memory usage limit reached";
399 case LZMA_FORMAT_ERROR:
400 msg = "File format not recognized";
403 case LZMA_OPTIONS_ERROR:
404 // FIXME: Better message?
405 msg = "Unsupported compression options";
408 case LZMA_DATA_ERROR:
409 msg = "File is corrupt";
413 msg = "Unexpected end of input";
417 msg = "Internal error (bug)";
421 my_errorf("%s: %s", filename, msg);
429 main(int argc, char **argv)
431 // Initialize progname which we will be used in error messages.
432 tuklib_progname_init(argv);
434 // Set the default memory usage limit. This is needed before parsing
435 // the command line arguments.
438 // Parse the command line options.
439 parse_options(argc, argv);
441 // The same lzma_stream is used for all files that we decode. This way
442 // we don't need to reallocate memory for every file if they use same
443 // compression settings.
444 lzma_stream strm = LZMA_STREAM_INIT;
446 // Some systems require setting stdin and stdout to binary mode.
447 #ifdef TUKLIB_DOSLIKE
448 setmode(fileno(stdin), O_BINARY);
449 setmode(fileno(stdout), O_BINARY);
452 if (optind == argc) {
453 // No filenames given, decode from stdin.
454 uncompress(&strm, stdin, "(stdin)");
456 // Loop through the filenames given on the command line.
458 // "-" indicates stdin.
459 if (strcmp(argv[optind], "-") == 0) {
460 uncompress(&strm, stdin, "(stdin)");
462 FILE *file = fopen(argv[optind], "rb");
464 my_errorf("%s: %s", argv[optind],
469 uncompress(&strm, file, argv[optind]);
472 } while (++optind < argc);
476 // Free the memory only when debugging. Freeing wastes some time,
477 // but allows detecting possible memory leaks with Valgrind.
481 tuklib_exit(EXIT_SUCCESS, EXIT_FAILURE, display_errors);