]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - contrib/xz/src/xz/args.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / contrib / xz / src / xz / args.c
1 ///////////////////////////////////////////////////////////////////////////////
2 //
3 /// \file       args.c
4 /// \brief      Argument parsing
5 ///
6 /// \note       Filter-specific options parsing is in options.c.
7 //
8 //  Author:     Lasse Collin
9 //
10 //  This file has been put into the public domain.
11 //  You can do whatever you want with this file.
12 //
13 ///////////////////////////////////////////////////////////////////////////////
14
15 #include "private.h"
16
17 #include "getopt.h"
18 #include <ctype.h>
19
20
21 bool opt_stdout = false;
22 bool opt_force = false;
23 bool opt_keep_original = false;
24 bool opt_robot = false;
25
26 // We don't modify or free() this, but we need to assign it in some
27 // non-const pointers.
28 const char *stdin_filename = "(stdin)";
29
30
31 static void
32 parse_real(args_info *args, int argc, char **argv)
33 {
34         enum {
35                 OPT_SUBBLOCK = INT_MIN,
36                 OPT_X86,
37                 OPT_POWERPC,
38                 OPT_IA64,
39                 OPT_ARM,
40                 OPT_ARMTHUMB,
41                 OPT_SPARC,
42                 OPT_DELTA,
43                 OPT_LZMA1,
44                 OPT_LZMA2,
45
46                 OPT_NO_SPARSE,
47                 OPT_FILES,
48                 OPT_FILES0,
49                 OPT_INFO_MEMORY,
50                 OPT_ROBOT,
51         };
52
53         static const char short_opts[]
54                         = "cC:defF:hHlkM:qQrS:tT:vVz0123456789";
55
56         static const struct option long_opts[] = {
57                 // Operation mode
58                 { "compress",     no_argument,       NULL,  'z' },
59                 { "decompress",   no_argument,       NULL,  'd' },
60                 { "uncompress",   no_argument,       NULL,  'd' },
61                 { "test",         no_argument,       NULL,  't' },
62                 { "list",         no_argument,       NULL,  'l' },
63
64                 // Operation modifiers
65                 { "keep",         no_argument,       NULL,  'k' },
66                 { "force",        no_argument,       NULL,  'f' },
67                 { "stdout",       no_argument,       NULL,  'c' },
68                 { "to-stdout",    no_argument,       NULL,  'c' },
69                 { "no-sparse",    no_argument,       NULL,  OPT_NO_SPARSE },
70                 { "suffix",       required_argument, NULL,  'S' },
71                 // { "recursive",      no_argument,       NULL,  'r' }, // TODO
72                 { "files",        optional_argument, NULL,  OPT_FILES },
73                 { "files0",       optional_argument, NULL,  OPT_FILES0 },
74
75                 // Basic compression settings
76                 { "format",       required_argument, NULL,  'F' },
77                 { "check",        required_argument, NULL,  'C' },
78                 { "memory",       required_argument, NULL,  'M' },
79                 { "threads",      required_argument, NULL,  'T' },
80
81                 { "extreme",      no_argument,       NULL,  'e' },
82                 { "fast",         no_argument,       NULL,  '0' },
83                 { "best",         no_argument,       NULL,  '9' },
84
85                 // Filters
86                 { "lzma1",        optional_argument, NULL,  OPT_LZMA1 },
87                 { "lzma2",        optional_argument, NULL,  OPT_LZMA2 },
88                 { "x86",          optional_argument, NULL,  OPT_X86 },
89                 { "powerpc",      optional_argument, NULL,  OPT_POWERPC },
90                 { "ia64",         optional_argument, NULL,  OPT_IA64 },
91                 { "arm",          optional_argument, NULL,  OPT_ARM },
92                 { "armthumb",     optional_argument, NULL,  OPT_ARMTHUMB },
93                 { "sparc",        optional_argument, NULL,  OPT_SPARC },
94                 { "delta",        optional_argument, NULL,  OPT_DELTA },
95                 { "subblock",     optional_argument, NULL,  OPT_SUBBLOCK },
96
97                 // Other options
98                 { "quiet",        no_argument,       NULL,  'q' },
99                 { "verbose",      no_argument,       NULL,  'v' },
100                 { "no-warn",      no_argument,       NULL,  'Q' },
101                 { "robot",        no_argument,       NULL,  OPT_ROBOT },
102                 { "info-memory",  no_argument,       NULL,  OPT_INFO_MEMORY },
103                 { "help",         no_argument,       NULL,  'h' },
104                 { "long-help",    no_argument,       NULL,  'H' },
105                 { "version",      no_argument,       NULL,  'V' },
106
107                 { NULL,                 0,                 NULL,   0 }
108         };
109
110         int c;
111
112         while ((c = getopt_long(argc, argv, short_opts, long_opts, NULL))
113                         != -1) {
114                 switch (c) {
115                 // Compression preset (also for decompression if --format=raw)
116                 case '0': case '1': case '2': case '3': case '4':
117                 case '5': case '6': case '7': case '8': case '9':
118                         coder_set_preset(c - '0');
119                         break;
120
121                 // --memory
122                 case 'M': {
123                         // Support specifying the limit as a percentage of
124                         // installed physical RAM.
125                         size_t len = strlen(optarg);
126                         if (len > 0 && optarg[len - 1] == '%') {
127                                 optarg[len - 1] = '\0';
128                                 hardware_memlimit_set_percentage(
129                                                 str_to_uint64(
130                                                 "memory%", optarg, 1, 100));
131                         } else {
132                                 // On 32-bit systems, SIZE_MAX would make more
133                                 // sense than UINT64_MAX. But use UINT64_MAX
134                                 // still so that scripts that assume > 4 GiB
135                                 // values don't break.
136                                 hardware_memlimit_set(str_to_uint64(
137                                                 "memory", optarg,
138                                                 0, UINT64_MAX));
139                         }
140
141                         break;
142                 }
143
144                 // --suffix
145                 case 'S':
146                         suffix_set(optarg);
147                         break;
148
149                 case 'T':
150                         hardware_threadlimit_set(str_to_uint64(
151                                         "threads", optarg, 0, UINT32_MAX));
152                         break;
153
154                 // --version
155                 case 'V':
156                         // This doesn't return.
157                         message_version();
158
159                 // --stdout
160                 case 'c':
161                         opt_stdout = true;
162                         break;
163
164                 // --decompress
165                 case 'd':
166                         opt_mode = MODE_DECOMPRESS;
167                         break;
168
169                 // --extreme
170                 case 'e':
171                         coder_set_extreme();
172                         break;
173
174                 // --force
175                 case 'f':
176                         opt_force = true;
177                         break;
178
179                 // --info-memory
180                 case OPT_INFO_MEMORY:
181                         // This doesn't return.
182                         message_memlimit();
183
184                 // --help
185                 case 'h':
186                         // This doesn't return.
187                         message_help(false);
188
189                 // --long-help
190                 case 'H':
191                         // This doesn't return.
192                         message_help(true);
193
194                 // --list
195                 case 'l':
196                         opt_mode = MODE_LIST;
197                         break;
198
199                 // --keep
200                 case 'k':
201                         opt_keep_original = true;
202                         break;
203
204                 // --quiet
205                 case 'q':
206                         message_verbosity_decrease();
207                         break;
208
209                 case 'Q':
210                         set_exit_no_warn();
211                         break;
212
213                 case 't':
214                         opt_mode = MODE_TEST;
215                         break;
216
217                 // --verbose
218                 case 'v':
219                         message_verbosity_increase();
220                         break;
221
222                 // --robot
223                 case OPT_ROBOT:
224                         opt_robot = true;
225
226                         // This is to make sure that floating point numbers
227                         // always have a dot as decimal separator.
228                         setlocale(LC_NUMERIC, "C");
229                         break;
230
231                 case 'z':
232                         opt_mode = MODE_COMPRESS;
233                         break;
234
235                 // Filter setup
236
237                 case OPT_SUBBLOCK:
238                         coder_add_filter(LZMA_FILTER_SUBBLOCK,
239                                         options_subblock(optarg));
240                         break;
241
242                 case OPT_X86:
243                         coder_add_filter(LZMA_FILTER_X86,
244                                         options_bcj(optarg));
245                         break;
246
247                 case OPT_POWERPC:
248                         coder_add_filter(LZMA_FILTER_POWERPC,
249                                         options_bcj(optarg));
250                         break;
251
252                 case OPT_IA64:
253                         coder_add_filter(LZMA_FILTER_IA64,
254                                         options_bcj(optarg));
255                         break;
256
257                 case OPT_ARM:
258                         coder_add_filter(LZMA_FILTER_ARM,
259                                         options_bcj(optarg));
260                         break;
261
262                 case OPT_ARMTHUMB:
263                         coder_add_filter(LZMA_FILTER_ARMTHUMB,
264                                         options_bcj(optarg));
265                         break;
266
267                 case OPT_SPARC:
268                         coder_add_filter(LZMA_FILTER_SPARC,
269                                         options_bcj(optarg));
270                         break;
271
272                 case OPT_DELTA:
273                         coder_add_filter(LZMA_FILTER_DELTA,
274                                         options_delta(optarg));
275                         break;
276
277                 case OPT_LZMA1:
278                         coder_add_filter(LZMA_FILTER_LZMA1,
279                                         options_lzma(optarg));
280                         break;
281
282                 case OPT_LZMA2:
283                         coder_add_filter(LZMA_FILTER_LZMA2,
284                                         options_lzma(optarg));
285                         break;
286
287                 // Other
288
289                 // --format
290                 case 'F': {
291                         // Just in case, support both "lzma" and "alone" since
292                         // the latter was used for forward compatibility in
293                         // LZMA Utils 4.32.x.
294                         static const struct {
295                                 char str[8];
296                                 enum format_type format;
297                         } types[] = {
298                                 { "auto",   FORMAT_AUTO },
299                                 { "xz",     FORMAT_XZ },
300                                 { "lzma",   FORMAT_LZMA },
301                                 { "alone",  FORMAT_LZMA },
302                                 // { "gzip",   FORMAT_GZIP },
303                                 // { "gz",     FORMAT_GZIP },
304                                 { "raw",    FORMAT_RAW },
305                         };
306
307                         size_t i = 0;
308                         while (strcmp(types[i].str, optarg) != 0)
309                                 if (++i == ARRAY_SIZE(types))
310                                         message_fatal(_("%s: Unknown file "
311                                                         "format type"),
312                                                         optarg);
313
314                         opt_format = types[i].format;
315                         break;
316                 }
317
318                 // --check
319                 case 'C': {
320                         static const struct {
321                                 char str[8];
322                                 lzma_check check;
323                         } types[] = {
324                                 { "none",   LZMA_CHECK_NONE },
325                                 { "crc32",  LZMA_CHECK_CRC32 },
326                                 { "crc64",  LZMA_CHECK_CRC64 },
327                                 { "sha256", LZMA_CHECK_SHA256 },
328                         };
329
330                         size_t i = 0;
331                         while (strcmp(types[i].str, optarg) != 0) {
332                                 if (++i == ARRAY_SIZE(types))
333                                         message_fatal(_("%s: Unsupported "
334                                                         "integrity "
335                                                         "check type"), optarg);
336                         }
337
338                         // Use a separate check in case we are using different
339                         // liblzma than what was used to compile us.
340                         if (!lzma_check_is_supported(types[i].check))
341                                 message_fatal(_("%s: Unsupported integrity "
342                                                 "check type"), optarg);
343
344                         coder_set_check(types[i].check);
345                         break;
346                 }
347
348                 case OPT_NO_SPARSE:
349                         io_no_sparse();
350                         break;
351
352                 case OPT_FILES:
353                         args->files_delim = '\n';
354
355                 // Fall through
356
357                 case OPT_FILES0:
358                         if (args->files_name != NULL)
359                                 message_fatal(_("Only one file can be "
360                                                 "specified with `--files' "
361                                                 "or `--files0'."));
362
363                         if (optarg == NULL) {
364                                 args->files_name = (char *)stdin_filename;
365                                 args->files_file = stdin;
366                         } else {
367                                 args->files_name = optarg;
368                                 args->files_file = fopen(optarg,
369                                                 c == OPT_FILES ? "r" : "rb");
370                                 if (args->files_file == NULL)
371                                         message_fatal("%s: %s", optarg,
372                                                         strerror(errno));
373                         }
374
375                         break;
376
377                 default:
378                         message_try_help();
379                         tuklib_exit(E_ERROR, E_ERROR, false);
380                 }
381         }
382
383         return;
384 }
385
386
387 static void
388 parse_environment(args_info *args, char *argv0)
389 {
390         char *env = getenv("XZ_OPT");
391         if (env == NULL)
392                 return;
393
394         // We modify the string, so make a copy of it.
395         env = xstrdup(env);
396
397         // Calculate the number of arguments in env. argc stats at one
398         // to include space for the program name.
399         int argc = 1;
400         bool prev_was_space = true;
401         for (size_t i = 0; env[i] != '\0'; ++i) {
402                 // NOTE: Cast to unsigned char is needed so that correct
403                 // value gets passed to isspace(), which expects
404                 // unsigned char cast to int. Casting to int is done
405                 // automatically due to integer promotion, but we need to
406                 // force char to unsigned char manually. Otherwise 8-bit
407                 // characters would get promoted to wrong value if
408                 // char is signed.
409                 if (isspace((unsigned char)env[i])) {
410                         prev_was_space = true;
411                 } else if (prev_was_space) {
412                         prev_was_space = false;
413
414                         // Keep argc small enough to fit into a singed int
415                         // and to keep it usable for memory allocation.
416                         if (++argc == MIN(INT_MAX, SIZE_MAX / sizeof(char *)))
417                                 message_fatal(_("The environment variable "
418                                                 "XZ_OPT contains too many "
419                                                 "arguments"));
420                 }
421         }
422
423         // Allocate memory to hold pointers to the arguments. Add one to get
424         // space for the terminating NULL (if some systems happen to need it).
425         char **argv = xmalloc(((size_t)(argc) + 1) * sizeof(char *));
426         argv[0] = argv0;
427         argv[argc] = NULL;
428
429         // Go through the string again. Split the arguments using '\0'
430         // characters and add pointers to the resulting strings to argv.
431         argc = 1;
432         prev_was_space = true;
433         for (size_t i = 0; env[i] != '\0'; ++i) {
434                 if (isspace((unsigned char)env[i])) {
435                         prev_was_space = true;
436                         env[i] = '\0';
437                 } else if (prev_was_space) {
438                         prev_was_space = false;
439                         argv[argc++] = env + i;
440                 }
441         }
442
443         // Parse the argument list we got from the environment. All non-option
444         // arguments i.e. filenames are ignored.
445         parse_real(args, argc, argv);
446
447         // Reset the state of the getopt_long() so that we can parse the
448         // command line options too. There are two incompatible ways to
449         // do it.
450 #ifdef HAVE_OPTRESET
451         // BSD
452         optind = 1;
453         optreset = 1;
454 #else
455         // GNU, Solaris
456         optind = 0;
457 #endif
458
459         // We don't need the argument list from environment anymore.
460         free(argv);
461         free(env);
462
463         return;
464 }
465
466
467 extern void
468 args_parse(args_info *args, int argc, char **argv)
469 {
470         // Initialize those parts of *args that we need later.
471         args->files_name = NULL;
472         args->files_file = NULL;
473         args->files_delim = '\0';
474
475         // Check how we were called.
476         {
477                 // Remove the leading path name, if any.
478                 const char *name = strrchr(argv[0], '/');
479                 if (name == NULL)
480                         name = argv[0];
481                 else
482                         ++name;
483
484                 // NOTE: It's possible that name[0] is now '\0' if argv[0]
485                 // is weird, but it doesn't matter here.
486
487                 // Look for full command names instead of substrings like
488                 // "un", "cat", and "lz" to reduce possibility of false
489                 // positives when the programs have been renamed.
490                 if (strstr(name, "xzcat") != NULL) {
491                         opt_mode = MODE_DECOMPRESS;
492                         opt_stdout = true;
493                 } else if (strstr(name, "unxz") != NULL) {
494                         opt_mode = MODE_DECOMPRESS;
495                 } else if (strstr(name, "lzcat") != NULL) {
496                         opt_format = FORMAT_LZMA;
497                         opt_mode = MODE_DECOMPRESS;
498                         opt_stdout = true;
499                 } else if (strstr(name, "unlzma") != NULL) {
500                         opt_format = FORMAT_LZMA;
501                         opt_mode = MODE_DECOMPRESS;
502                 } else if (strstr(name, "lzma") != NULL) {
503                         opt_format = FORMAT_LZMA;
504                 }
505         }
506
507         // First the flags from environment
508         parse_environment(args, argv[0]);
509
510         // Then from the command line
511         parse_real(args, argc, argv);
512
513         // Never remove the source file when the destination is not on disk.
514         // In test mode the data is written nowhere, but setting opt_stdout
515         // will make the rest of the code behave well.
516         if (opt_stdout || opt_mode == MODE_TEST) {
517                 opt_keep_original = true;
518                 opt_stdout = true;
519         }
520
521         // When compressing, if no --format flag was used, or it
522         // was --format=auto, we compress to the .xz format.
523         if (opt_mode == MODE_COMPRESS && opt_format == FORMAT_AUTO)
524                 opt_format = FORMAT_XZ;
525
526         // Compression settings need to be validated (options themselves and
527         // their memory usage) when compressing to any file format. It has to
528         // be done also when uncompressing raw data, since for raw decoding
529         // the options given on the command line are used to know what kind
530         // of raw data we are supposed to decode.
531         if (opt_mode == MODE_COMPRESS || opt_format == FORMAT_RAW)
532                 coder_set_compression_settings();
533
534         // If no filenames are given, use stdin.
535         if (argv[optind] == NULL && args->files_name == NULL) {
536                 // We don't modify or free() the "-" constant. The caller
537                 // modifies this so don't make the struct itself const.
538                 static char *names_stdin[2] = { (char *)"-", NULL };
539                 args->arg_names = names_stdin;
540                 args->arg_count = 1;
541         } else {
542                 // We got at least one filename from the command line, or
543                 // --files or --files0 was specified.
544                 args->arg_names = argv + optind;
545                 args->arg_count = argc - optind;
546         }
547
548         return;
549 }