]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/sort/sort.c
Add 'contrib/bsddialog/' from commit '857c66bb5f3c5651b012beb1b5ea6ba39354ea94'
[FreeBSD/FreeBSD.git] / usr.bin / sort / sort.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36
37 #include <err.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <getopt.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <md5.h>
44 #include <regex.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53
54 #include "coll.h"
55 #include "file.h"
56 #include "sort.h"
57
58 #ifndef WITHOUT_NLS
59 #include <nl_types.h>
60 nl_catd catalog;
61 #endif
62
63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
64
65 static bool need_random;
66
67 MD5_CTX md5_ctx;
68
69 /*
70  * Default messages to use when NLS is disabled or no catalogue
71  * is found.
72  */
73 const char *nlsstr[] = { "",
74 /* 1*/"mutually exclusive flags",
75 /* 2*/"extra argument not allowed with -c",
76 /* 3*/"Unknown feature",
77 /* 4*/"Wrong memory buffer specification",
78 /* 5*/"0 field in key specs",
79 /* 6*/"0 column in key specs",
80 /* 7*/"Wrong file mode",
81 /* 8*/"Cannot open file for reading",
82 /* 9*/"Radix sort cannot be used with these sort options",
83 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
84 /*11*/"Invalid key position",
85 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
86       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
87       "[-o outfile] [--batch-size size] [--files0-from file] "
88       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
89       "[--mmap] "
90 #if defined(SORT_THREADS)
91       "[--parallel thread_no] "
92 #endif
93       "[--human-numeric-sort] "
94       "[--version-sort] [--random-sort [--random-source file]] "
95       "[--compress-program program] [file ...]\n" };
96
97 struct sort_opts sort_opts_vals;
98
99 bool debug_sort;
100 bool need_hint;
101
102 size_t mb_cur_max;
103
104 #if defined(SORT_THREADS)
105 unsigned int ncpu = 1;
106 size_t nthreads = 1;
107 #endif
108
109 static bool gnusort_numeric_compatibility;
110
111 static struct sort_mods default_sort_mods_object;
112 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
113
114 static bool print_symbols_on_debug;
115
116 /*
117  * Arguments from file (when file0-from option is used:
118  */
119 static size_t argc_from_file0 = (size_t)-1;
120 static char **argv_from_file0;
121
122 /*
123  * Placeholder symbols for options which have no single-character equivalent
124  */
125 enum
126 {
127         SORT_OPT = CHAR_MAX + 1,
128         HELP_OPT,
129         FF_OPT,
130         BS_OPT,
131         VERSION_OPT,
132         DEBUG_OPT,
133 #if defined(SORT_THREADS)
134         PARALLEL_OPT,
135 #endif
136         RANDOMSOURCE_OPT,
137         COMPRESSPROGRAM_OPT,
138         QSORT_OPT,
139         MERGESORT_OPT,
140         HEAPSORT_OPT,
141         RADIXSORT_OPT,
142         MMAP_OPT
143 };
144
145 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
146 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
147
148 static struct option long_options[] = {
149                                 { "batch-size", required_argument, NULL, BS_OPT },
150                                 { "buffer-size", required_argument, NULL, 'S' },
151                                 { "check", optional_argument, NULL, 'c' },
152                                 { "check=silent|quiet", optional_argument, NULL, 'C' },
153                                 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
154                                 { "debug", no_argument, NULL, DEBUG_OPT },
155                                 { "dictionary-order", no_argument, NULL, 'd' },
156                                 { "field-separator", required_argument, NULL, 't' },
157                                 { "files0-from", required_argument, NULL, FF_OPT },
158                                 { "general-numeric-sort", no_argument, NULL, 'g' },
159                                 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
160                                 { "help",no_argument, NULL, HELP_OPT },
161                                 { "human-numeric-sort", no_argument, NULL, 'h' },
162                                 { "ignore-leading-blanks", no_argument, NULL, 'b' },
163                                 { "ignore-case", no_argument, NULL, 'f' },
164                                 { "ignore-nonprinting", no_argument, NULL, 'i' },
165                                 { "key", required_argument, NULL, 'k' },
166                                 { "merge", no_argument, NULL, 'm' },
167                                 { "mergesort", no_argument, NULL, MERGESORT_OPT },
168                                 { "mmap", no_argument, NULL, MMAP_OPT },
169                                 { "month-sort", no_argument, NULL, 'M' },
170                                 { "numeric-sort", no_argument, NULL, 'n' },
171                                 { "output", required_argument, NULL, 'o' },
172 #if defined(SORT_THREADS)
173                                 { "parallel", required_argument, NULL, PARALLEL_OPT },
174 #endif
175                                 { "qsort", no_argument, NULL, QSORT_OPT },
176                                 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
177                                 { "random-sort", no_argument, NULL, 'R' },
178                                 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
179                                 { "reverse", no_argument, NULL, 'r' },
180                                 { "sort", required_argument, NULL, SORT_OPT },
181                                 { "stable", no_argument, NULL, 's' },
182                                 { "temporary-directory",required_argument, NULL, 'T' },
183                                 { "unique", no_argument, NULL, 'u' },
184                                 { "version", no_argument, NULL, VERSION_OPT },
185                                 { "version-sort",no_argument, NULL, 'V' },
186                                 { "zero-terminated", no_argument, NULL, 'z' },
187                                 { NULL, no_argument, NULL, 0 }
188 };
189
190 void fix_obsolete_keys(int *argc, char **argv);
191
192 /*
193  * Check where sort modifier is present
194  */
195 static bool
196 sort_modifier_empty(struct sort_mods *sm)
197 {
198
199         if (sm == NULL)
200                 return (true);
201         return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
202             sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
203 }
204
205 /*
206  * Print out usage text.
207  */
208 static void
209 usage(bool opt_err)
210 {
211         FILE *out;
212
213         out = opt_err ? stderr : stdout;
214
215         fprintf(out, getstr(12), getprogname());
216         if (opt_err)
217                 exit(2);
218         exit(0);
219 }
220
221 /*
222  * Read input file names from a file (file0-from option).
223  */
224 static void
225 read_fns_from_file0(const char *fn)
226 {
227         FILE *f;
228         char *line = NULL;
229         size_t linesize = 0;
230         ssize_t linelen;
231
232         if (fn == NULL)
233                 return;
234
235         f = fopen(fn, "r");
236         if (f == NULL)
237                 err(2, "%s", fn);
238
239         while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
240                 if (*line != '\0') {
241                         if (argc_from_file0 == (size_t) - 1)
242                                 argc_from_file0 = 0;
243                         ++argc_from_file0;
244                         argv_from_file0 = sort_realloc(argv_from_file0,
245                             argc_from_file0 * sizeof(char *));
246                         if (argv_from_file0 == NULL)
247                                 err(2, NULL);
248                         argv_from_file0[argc_from_file0 - 1] = line;
249                 } else {
250                         free(line);
251                 }
252                 line = NULL;
253                 linesize = 0;
254         }
255         if (ferror(f))
256                 err(2, "%s: getdelim", fn);
257
258         closefile(f, fn);
259 }
260
261 /*
262  * Check how much RAM is available for the sort.
263  */
264 static void
265 set_hw_params(void)
266 {
267         long pages, psize;
268
269 #if defined(SORT_THREADS)
270         ncpu = 1;
271 #endif
272
273         pages = sysconf(_SC_PHYS_PAGES);
274         if (pages < 1) {
275                 perror("sysconf pages");
276                 pages = 1;
277         }
278         psize = sysconf(_SC_PAGESIZE);
279         if (psize < 1) {
280                 perror("sysconf psize");
281                 psize = 4096;
282         }
283 #if defined(SORT_THREADS)
284         ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
285         if (ncpu < 1)
286                 ncpu = 1;
287         else if(ncpu > 32)
288                 ncpu = 32;
289
290         nthreads = ncpu;
291 #endif
292
293         free_memory = (unsigned long long) pages * (unsigned long long) psize;
294         available_free_memory = free_memory / 2;
295
296         if (available_free_memory < 1024)
297                 available_free_memory = 1024;
298 }
299
300 /*
301  * Convert "plain" symbol to wide symbol, with default value.
302  */
303 static void
304 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
305 {
306
307         if (wc && c) {
308                 int res;
309
310                 res = mbtowc(wc, c, mb_cur_max);
311                 if (res < 1)
312                         *wc = def;
313         }
314 }
315
316 /*
317  * Set current locale symbols.
318  */
319 static void
320 set_locale(void)
321 {
322         struct lconv *lc;
323         const char *locale;
324
325         setlocale(LC_ALL, "");
326
327         mb_cur_max = MB_CUR_MAX;
328
329         lc = localeconv();
330
331         if (lc) {
332                 /* obtain LC_NUMERIC info */
333                 /* Convert to wide char form */
334                 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
335                     symbol_decimal_point);
336                 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
337                     symbol_thousands_sep);
338                 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
339                     symbol_positive_sign);
340                 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
341                     symbol_negative_sign);
342         }
343
344         if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
345                 gnusort_numeric_compatibility = true;
346
347         locale = setlocale(LC_COLLATE, NULL);
348
349         if (locale) {
350                 char *tmpl;
351                 const char *cclocale;
352
353                 tmpl = sort_strdup(locale);
354                 cclocale = setlocale(LC_COLLATE, "C");
355                 if (cclocale && !strcmp(cclocale, tmpl))
356                         byte_sort = true;
357                 else {
358                         const char *pclocale;
359
360                         pclocale = setlocale(LC_COLLATE, "POSIX");
361                         if (pclocale && !strcmp(pclocale, tmpl))
362                                 byte_sort = true;
363                 }
364                 setlocale(LC_COLLATE, tmpl);
365                 sort_free(tmpl);
366         }
367 }
368
369 /*
370  * Set directory temporary files.
371  */
372 static void
373 set_tmpdir(void)
374 {
375         char *td;
376
377         td = getenv("TMPDIR");
378         if (td != NULL)
379                 tmpdir = sort_strdup(td);
380 }
381
382 /*
383  * Parse -S option.
384  */
385 static unsigned long long
386 parse_memory_buffer_value(const char *value)
387 {
388
389         if (value == NULL)
390                 return (available_free_memory);
391         else {
392                 char *endptr;
393                 unsigned long long membuf;
394
395                 endptr = NULL;
396                 errno = 0;
397                 membuf = strtoll(value, &endptr, 10);
398
399                 if (errno != 0) {
400                         warn("%s",getstr(4));
401                         membuf = available_free_memory;
402                 } else {
403                         switch (*endptr){
404                         case 'Y':
405                                 membuf *= 1024;
406                                 /* FALLTHROUGH */
407                         case 'Z':
408                                 membuf *= 1024;
409                                 /* FALLTHROUGH */
410                         case 'E':
411                                 membuf *= 1024;
412                                 /* FALLTHROUGH */
413                         case 'P':
414                                 membuf *= 1024;
415                                 /* FALLTHROUGH */
416                         case 'T':
417                                 membuf *= 1024;
418                                 /* FALLTHROUGH */
419                         case 'G':
420                                 membuf *= 1024;
421                                 /* FALLTHROUGH */
422                         case 'M':
423                                 membuf *= 1024;
424                                 /* FALLTHROUGH */
425                         case '\0':
426                         case 'K':
427                                 membuf *= 1024;
428                                 /* FALLTHROUGH */
429                         case 'b':
430                                 break;
431                         case '%':
432                                 membuf = (available_free_memory * membuf) /
433                                     100;
434                                 break;
435                         default:
436                                 warnc(EINVAL, "%s", optarg);
437                                 membuf = available_free_memory;
438                         }
439                 }
440                 return (membuf);
441         }
442 }
443
444 /*
445  * Signal handler that clears the temporary files.
446  */
447 static void
448 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
449     void *context __unused)
450 {
451
452         clear_tmp_files();
453         exit(-1);
454 }
455
456 /*
457  * Set signal handler on panic signals.
458  */
459 static void
460 set_signal_handler(void)
461 {
462         struct sigaction sa;
463
464         memset(&sa, 0, sizeof(sa));
465         sa.sa_sigaction = &sig_handler;
466         sa.sa_flags = SA_SIGINFO;
467
468         if (sigaction(SIGTERM, &sa, NULL) < 0) {
469                 perror("sigaction");
470                 return;
471         }
472         if (sigaction(SIGHUP, &sa, NULL) < 0) {
473                 perror("sigaction");
474                 return;
475         }
476         if (sigaction(SIGINT, &sa, NULL) < 0) {
477                 perror("sigaction");
478                 return;
479         }
480         if (sigaction(SIGQUIT, &sa, NULL) < 0) {
481                 perror("sigaction");
482                 return;
483         }
484         if (sigaction(SIGABRT, &sa, NULL) < 0) {
485                 perror("sigaction");
486                 return;
487         }
488         if (sigaction(SIGBUS, &sa, NULL) < 0) {
489                 perror("sigaction");
490                 return;
491         }
492         if (sigaction(SIGSEGV, &sa, NULL) < 0) {
493                 perror("sigaction");
494                 return;
495         }
496         if (sigaction(SIGUSR1, &sa, NULL) < 0) {
497                 perror("sigaction");
498                 return;
499         }
500         if (sigaction(SIGUSR2, &sa, NULL) < 0) {
501                 perror("sigaction");
502                 return;
503         }
504 }
505
506 /*
507  * Print "unknown" message and exit with status 2.
508  */
509 static void
510 unknown(const char *what)
511 {
512
513         errx(2, "%s: %s", getstr(3), what);
514 }
515
516 /*
517  * Check whether contradictory input options are used.
518  */
519 static void
520 check_mutually_exclusive_flags(char c, bool *mef_flags)
521 {
522         int fo_index, mec;
523         bool found_others, found_this;
524
525         found_others = found_this = false;
526         fo_index = 0;
527
528         for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
529                 mec = mutually_exclusive_flags[i];
530
531                 if (mec != c) {
532                         if (mef_flags[i]) {
533                                 if (found_this)
534                                         errx(1, "%c:%c: %s", c, mec, getstr(1));
535                                 found_others = true;
536                                 fo_index = i;
537                         }
538                 } else {
539                         if (found_others)
540                                 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
541                         mef_flags[i] = true;
542                         found_this = true;
543                 }
544         }
545 }
546
547 /*
548  * Initialise sort opts data.
549  */
550 static void
551 set_sort_opts(void)
552 {
553
554         memset(&default_sort_mods_object, 0,
555             sizeof(default_sort_mods_object));
556         memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
557         default_sort_mods_object.func =
558             get_sort_func(&default_sort_mods_object);
559 }
560
561 /*
562  * Set a sort modifier on a sort modifiers object.
563  */
564 static bool
565 set_sort_modifier(struct sort_mods *sm, int c)
566 {
567
568         if (sm == NULL)
569                 return (true);
570
571         switch (c){
572         case 'b':
573                 sm->bflag = true;
574                 break;
575         case 'd':
576                 sm->dflag = true;
577                 break;
578         case 'f':
579                 sm->fflag = true;
580                 break;
581         case 'g':
582                 sm->gflag = true;
583                 need_hint = true;
584                 break;
585         case 'i':
586                 sm->iflag = true;
587                 break;
588         case 'R':
589                 sm->Rflag = true;
590                 need_hint = true;
591                 need_random = true;
592                 break;
593         case 'M':
594                 initialise_months();
595                 sm->Mflag = true;
596                 need_hint = true;
597                 break;
598         case 'n':
599                 sm->nflag = true;
600                 need_hint = true;
601                 print_symbols_on_debug = true;
602                 break;
603         case 'r':
604                 sm->rflag = true;
605                 break;
606         case 'V':
607                 sm->Vflag = true;
608                 break;
609         case 'h':
610                 sm->hflag = true;
611                 need_hint = true;
612                 print_symbols_on_debug = true;
613                 break;
614         default:
615                 return (false);
616         }
617
618         sort_opts_vals.complex_sort = true;
619         sm->func = get_sort_func(sm);
620         return (true);
621 }
622
623 /*
624  * Parse POS in -k option.
625  */
626 static int
627 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
628 {
629         regmatch_t pmatch[4];
630         regex_t re;
631         char *c, *f;
632         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
633         size_t len, nmatch;
634         int ret;
635
636         ret = -1;
637         nmatch = 4;
638         c = f = NULL;
639
640         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
641                 return (-1);
642
643         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
644                 goto end;
645
646         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
647                 goto end;
648
649         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
650                 goto end;
651
652         len = pmatch[1].rm_eo - pmatch[1].rm_so;
653         f = sort_malloc((len + 1) * sizeof(char));
654
655         strncpy(f, s + pmatch[1].rm_so, len);
656         f[len] = '\0';
657
658         if (second) {
659                 errno = 0;
660                 ks->f2 = (size_t) strtoul(f, NULL, 10);
661                 if (errno != 0)
662                         err(2, "-k");
663                 if (ks->f2 == 0) {
664                         warn("%s",getstr(5));
665                         goto end;
666                 }
667         } else {
668                 errno = 0;
669                 ks->f1 = (size_t) strtoul(f, NULL, 10);
670                 if (errno != 0)
671                         err(2, "-k");
672                 if (ks->f1 == 0) {
673                         warn("%s",getstr(5));
674                         goto end;
675                 }
676         }
677
678         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
679                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
680                 c = sort_malloc((len + 1) * sizeof(char));
681
682                 strncpy(c, s + pmatch[2].rm_so + 1, len);
683                 c[len] = '\0';
684
685                 if (second) {
686                         errno = 0;
687                         ks->c2 = (size_t) strtoul(c, NULL, 10);
688                         if (errno != 0)
689                                 err(2, "-k");
690                 } else {
691                         errno = 0;
692                         ks->c1 = (size_t) strtoul(c, NULL, 10);
693                         if (errno != 0)
694                                 err(2, "-k");
695                         if (ks->c1 == 0) {
696                                 warn("%s",getstr(6));
697                                 goto end;
698                         }
699                 }
700         } else {
701                 if (second)
702                         ks->c2 = 0;
703                 else
704                         ks->c1 = 1;
705         }
706
707         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
708                 regoff_t i = 0;
709
710                 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
711                         check_mutually_exclusive_flags(s[i], mef_flags);
712                         if (s[i] == 'b') {
713                                 if (second)
714                                         ks->pos2b = true;
715                                 else
716                                         ks->pos1b = true;
717                         } else if (!set_sort_modifier(&(ks->sm), s[i]))
718                                 goto end;
719                 }
720         }
721
722         ret = 0;
723
724 end:
725
726         if (c)
727                 sort_free(c);
728         if (f)
729                 sort_free(f);
730         regfree(&re);
731
732         return (ret);
733 }
734
735 /*
736  * Parse -k option value.
737  */
738 static int
739 parse_k(const char *s, struct key_specs *ks)
740 {
741         int ret = -1;
742         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
743             { false, false, false, false, false, false };
744
745         if (s && *s) {
746                 char *sptr;
747
748                 sptr = strchr(s, ',');
749                 if (sptr) {
750                         size_t size1;
751                         char *pos1, *pos2;
752
753                         size1 = sptr - s;
754
755                         if (size1 < 1)
756                                 return (-1);
757                         pos1 = sort_malloc((size1 + 1) * sizeof(char));
758
759                         strncpy(pos1, s, size1);
760                         pos1[size1] = '\0';
761
762                         ret = parse_pos(pos1, ks, mef_flags, false);
763
764                         sort_free(pos1);
765                         if (ret < 0)
766                                 return (ret);
767
768                         pos2 = sort_strdup(sptr + 1);
769                         ret = parse_pos(pos2, ks, mef_flags, true);
770                         sort_free(pos2);
771                 } else
772                         ret = parse_pos(s, ks, mef_flags, false);
773         }
774
775         return (ret);
776 }
777
778 /*
779  * Parse POS in +POS -POS option.
780  */
781 static int
782 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
783 {
784         regex_t re;
785         regmatch_t pmatch[4];
786         char *c, *f;
787         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
788         int ret;
789         size_t len, nmatch;
790
791         ret = -1;
792         nmatch = 4;
793         c = f = NULL;
794         *nc = *nf = 0;
795
796         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
797                 return (-1);
798
799         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
800                 goto end;
801
802         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
803                 goto end;
804
805         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
806                 goto end;
807
808         len = pmatch[1].rm_eo - pmatch[1].rm_so;
809         f = sort_malloc((len + 1) * sizeof(char));
810
811         strncpy(f, s + pmatch[1].rm_so, len);
812         f[len] = '\0';
813
814         errno = 0;
815         *nf = (size_t) strtoul(f, NULL, 10);
816         if (errno != 0)
817                 errx(2, "%s", getstr(11));
818
819         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
820                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
821                 c = sort_malloc((len + 1) * sizeof(char));
822
823                 strncpy(c, s + pmatch[2].rm_so + 1, len);
824                 c[len] = '\0';
825
826                 errno = 0;
827                 *nc = (size_t) strtoul(c, NULL, 10);
828                 if (errno != 0)
829                         errx(2, "%s", getstr(11));
830         }
831
832         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
833
834                 len = pmatch[3].rm_eo - pmatch[3].rm_so;
835
836                 strncpy(sopts, s + pmatch[3].rm_so, len);
837                 sopts[len] = '\0';
838         }
839
840         ret = 0;
841
842 end:
843         if (c)
844                 sort_free(c);
845         if (f)
846                 sort_free(f);
847         regfree(&re);
848
849         return (ret);
850 }
851
852 /*
853  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
854  */
855 void
856 fix_obsolete_keys(int *argc, char **argv)
857 {
858         char sopt[129];
859
860         for (int i = 1; i < *argc; i++) {
861                 char *arg1;
862
863                 arg1 = argv[i];
864
865                 if (strcmp(arg1, "--") == 0) {
866                         /* Following arguments are treated as filenames. */
867                         break;
868                 }
869
870                 if (strlen(arg1) > 1 && arg1[0] == '+') {
871                         int c1, f1;
872                         char sopts1[128];
873
874                         sopts1[0] = 0;
875                         c1 = f1 = 0;
876
877                         if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
878                                 continue;
879                         else {
880                                 f1 += 1;
881                                 c1 += 1;
882                                 if (i + 1 < *argc) {
883                                         char *arg2 = argv[i + 1];
884
885                                         if (strlen(arg2) > 1 &&
886                                             arg2[0] == '-') {
887                                                 int c2, f2;
888                                                 char sopts2[128];
889
890                                                 sopts2[0] = 0;
891                                                 c2 = f2 = 0;
892
893                                                 if (parse_pos_obs(arg2 + 1,
894                                                     &f2, &c2, sopts2) >= 0) {
895                                                         if (c2 > 0)
896                                                                 f2 += 1;
897                                                         sprintf(sopt, "-k%d.%d%s,%d.%d%s",
898                                                             f1, c1, sopts1, f2, c2, sopts2);
899                                                         argv[i] = sort_strdup(sopt);
900                                                         for (int j = i + 1; j + 1 < *argc; j++)
901                                                                 argv[j] = argv[j + 1];
902                                                         *argc -= 1;
903                                                         continue;
904                                                 }
905                                         }
906                                 }
907                                 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
908                                 argv[i] = sort_strdup(sopt);
909                         }
910                 }
911         }
912 }
913
914 /*
915  * Seed random sort
916  */
917 static void
918 get_random_seed(const char *random_source)
919 {
920         char randseed[32];
921         struct stat fsb, rsb;
922         ssize_t rd;
923         int rsfd;
924
925         rsfd = -1;
926         rd = sizeof(randseed);
927
928         if (random_source == NULL) {
929                 if (getentropy(randseed, sizeof(randseed)) < 0)
930                         err(EX_SOFTWARE, "getentropy");
931                 goto out;
932         }
933
934         rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
935         if (rsfd < 0)
936                 err(EX_NOINPUT, "open: %s", random_source);
937
938         if (fstat(rsfd, &fsb) != 0)
939                 err(EX_SOFTWARE, "fstat");
940
941         if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
942                 err(EX_USAGE,
943                     "random seed isn't a regular file or /dev/random");
944
945         /*
946          * Regular files: read up to maximum seed size and explicitly
947          * reject longer files.
948          */
949         if (S_ISREG(fsb.st_mode)) {
950                 if (fsb.st_size > (off_t)sizeof(randseed))
951                         errx(EX_USAGE, "random seed is too large (%jd >"
952                             " %zu)!", (intmax_t)fsb.st_size,
953                             sizeof(randseed));
954                 else if (fsb.st_size < 1)
955                         errx(EX_USAGE, "random seed is too small ("
956                             "0 bytes)");
957
958                 memset(randseed, 0, sizeof(randseed));
959
960                 rd = read(rsfd, randseed, fsb.st_size);
961                 if (rd < 0)
962                         err(EX_SOFTWARE, "reading random seed file %s",
963                             random_source);
964                 if (rd < (ssize_t)fsb.st_size)
965                         errx(EX_SOFTWARE, "short read from %s", random_source);
966         } else if (S_ISCHR(fsb.st_mode)) {
967                 if (stat("/dev/random", &rsb) < 0)
968                         err(EX_SOFTWARE, "stat");
969
970                 if (fsb.st_dev != rsb.st_dev ||
971                     fsb.st_ino != rsb.st_ino)
972                         errx(EX_USAGE, "random seed is a character "
973                             "device other than /dev/random");
974
975                 if (getentropy(randseed, sizeof(randseed)) < 0)
976                         err(EX_SOFTWARE, "getentropy");
977         }
978
979 out:
980         if (rsfd >= 0)
981                 close(rsfd);
982
983         MD5Init(&md5_ctx);
984         MD5Update(&md5_ctx, randseed, rd);
985 }
986
987 /*
988  * Main function.
989  */
990 int
991 main(int argc, char **argv)
992 {
993         char *outfile, *real_outfile;
994         char *random_source = NULL;
995         int c, result;
996         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
997             { false, false, false, false, false, false };
998
999         result = 0;
1000         outfile = sort_strdup("-");
1001         real_outfile = NULL;
1002
1003         struct sort_mods *sm = &default_sort_mods_object;
1004
1005         init_tmp_files();
1006
1007         set_signal_handler();
1008
1009         set_hw_params();
1010         set_locale();
1011         set_tmpdir();
1012         set_sort_opts();
1013
1014         fix_obsolete_keys(&argc, argv);
1015
1016         while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1017             != -1)) {
1018
1019                 check_mutually_exclusive_flags(c, mef_flags);
1020
1021                 if (!set_sort_modifier(sm, c)) {
1022
1023                         switch (c) {
1024                         case 'c':
1025                                 sort_opts_vals.cflag = true;
1026                                 if (optarg) {
1027                                         if (!strcmp(optarg, "diagnose-first"))
1028                                                 ;
1029                                         else if (!strcmp(optarg, "silent") ||
1030                                             !strcmp(optarg, "quiet"))
1031                                                 sort_opts_vals.csilentflag = true;
1032                                         else if (*optarg)
1033                                                 unknown(optarg);
1034                                 }
1035                                 break;
1036                         case 'C':
1037                                 sort_opts_vals.cflag = true;
1038                                 sort_opts_vals.csilentflag = true;
1039                                 break;
1040                         case 'k':
1041                         {
1042                                 sort_opts_vals.complex_sort = true;
1043                                 sort_opts_vals.kflag = true;
1044
1045                                 keys_num++;
1046                                 keys = sort_realloc(keys, keys_num *
1047                                     sizeof(struct key_specs));
1048                                 memset(&(keys[keys_num - 1]), 0,
1049                                     sizeof(struct key_specs));
1050
1051                                 if (parse_k(optarg, &(keys[keys_num - 1]))
1052                                     < 0) {
1053                                         errc(2, EINVAL, "-k %s", optarg);
1054                                 }
1055
1056                                 break;
1057                         }
1058                         case 'm':
1059                                 sort_opts_vals.mflag = true;
1060                                 break;
1061                         case 'o':
1062                                 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1063                                 strcpy(outfile, optarg);
1064                                 break;
1065                         case 's':
1066                                 sort_opts_vals.sflag = true;
1067                                 break;
1068                         case 'S':
1069                                 available_free_memory =
1070                                     parse_memory_buffer_value(optarg);
1071                                 break;
1072                         case 'T':
1073                                 tmpdir = sort_strdup(optarg);
1074                                 break;
1075                         case 't':
1076                                 while (strlen(optarg) > 1) {
1077                                         if (optarg[0] != '\\') {
1078                                                 errc(2, EINVAL, "%s", optarg);
1079                                         }
1080                                         optarg += 1;
1081                                         if (*optarg == '0') {
1082                                                 *optarg = 0;
1083                                                 break;
1084                                         }
1085                                 }
1086                                 sort_opts_vals.tflag = true;
1087                                 sort_opts_vals.field_sep = btowc(optarg[0]);
1088                                 if (sort_opts_vals.field_sep == WEOF) {
1089                                         errno = EINVAL;
1090                                         err(2, NULL);
1091                                 }
1092                                 if (!gnusort_numeric_compatibility) {
1093                                         if (symbol_decimal_point == sort_opts_vals.field_sep)
1094                                                 symbol_decimal_point = WEOF;
1095                                         if (symbol_thousands_sep == sort_opts_vals.field_sep)
1096                                                 symbol_thousands_sep = WEOF;
1097                                         if (symbol_negative_sign == sort_opts_vals.field_sep)
1098                                                 symbol_negative_sign = WEOF;
1099                                         if (symbol_positive_sign == sort_opts_vals.field_sep)
1100                                                 symbol_positive_sign = WEOF;
1101                                 }
1102                                 break;
1103                         case 'u':
1104                                 sort_opts_vals.uflag = true;
1105                                 /* stable sort for the correct unique val */
1106                                 sort_opts_vals.sflag = true;
1107                                 break;
1108                         case 'z':
1109                                 sort_opts_vals.zflag = true;
1110                                 break;
1111                         case SORT_OPT:
1112                                 if (optarg) {
1113                                         if (!strcmp(optarg, "general-numeric"))
1114                                                 set_sort_modifier(sm, 'g');
1115                                         else if (!strcmp(optarg, "human-numeric"))
1116                                                 set_sort_modifier(sm, 'h');
1117                                         else if (!strcmp(optarg, "numeric"))
1118                                                 set_sort_modifier(sm, 'n');
1119                                         else if (!strcmp(optarg, "month"))
1120                                                 set_sort_modifier(sm, 'M');
1121                                         else if (!strcmp(optarg, "random"))
1122                                                 set_sort_modifier(sm, 'R');
1123                                         else
1124                                                 unknown(optarg);
1125                                 }
1126                                 break;
1127 #if defined(SORT_THREADS)
1128                         case PARALLEL_OPT:
1129                                 nthreads = (size_t)(atoi(optarg));
1130                                 if (nthreads < 1)
1131                                         nthreads = 1;
1132                                 if (nthreads > 1024)
1133                                         nthreads = 1024;
1134                                 break;
1135 #endif
1136                         case QSORT_OPT:
1137                                 sort_opts_vals.sort_method = SORT_QSORT;
1138                                 break;
1139                         case MERGESORT_OPT:
1140                                 sort_opts_vals.sort_method = SORT_MERGESORT;
1141                                 break;
1142                         case MMAP_OPT:
1143                                 use_mmap = true;
1144                                 break;
1145                         case HEAPSORT_OPT:
1146                                 sort_opts_vals.sort_method = SORT_HEAPSORT;
1147                                 break;
1148                         case RADIXSORT_OPT:
1149                                 sort_opts_vals.sort_method = SORT_RADIXSORT;
1150                                 break;
1151                         case RANDOMSOURCE_OPT:
1152                                 random_source = strdup(optarg);
1153                                 break;
1154                         case COMPRESSPROGRAM_OPT:
1155                                 compress_program = strdup(optarg);
1156                                 break;
1157                         case FF_OPT:
1158                                 read_fns_from_file0(optarg);
1159                                 break;
1160                         case BS_OPT:
1161                         {
1162                                 errno = 0;
1163                                 long mof = strtol(optarg, NULL, 10);
1164                                 if (errno != 0)
1165                                         err(2, "--batch-size");
1166                                 if (mof >= 2)
1167                                         max_open_files = (size_t) mof + 1;
1168                         }
1169                                 break;
1170                         case VERSION_OPT:
1171                                 printf("%s\n", VERSION);
1172                                 exit(EXIT_SUCCESS);
1173                                 /* NOTREACHED */
1174                                 break;
1175                         case DEBUG_OPT:
1176                                 debug_sort = true;
1177                                 break;
1178                         case HELP_OPT:
1179                                 usage(false);
1180                                 /* NOTREACHED */
1181                                 break;
1182                         default:
1183                                 usage(true);
1184                                 /* NOTREACHED */
1185                         }
1186                 }
1187         }
1188
1189         argc -= optind;
1190         argv += optind;
1191
1192         if (argv_from_file0) {
1193                 argc = argc_from_file0;
1194                 argv = argv_from_file0;
1195         }
1196
1197 #ifndef WITHOUT_NLS
1198         catalog = catopen("sort", NL_CAT_LOCALE);
1199 #endif
1200
1201         if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1202                 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1203
1204 #ifndef WITHOUT_NLS
1205         catclose(catalog);
1206 #endif
1207
1208         if (keys_num == 0) {
1209                 keys_num = 1;
1210                 keys = sort_realloc(keys, sizeof(struct key_specs));
1211                 memset(&(keys[0]), 0, sizeof(struct key_specs));
1212                 keys[0].c1 = 1;
1213                 keys[0].pos1b = default_sort_mods->bflag;
1214                 keys[0].pos2b = default_sort_mods->bflag;
1215                 memcpy(&(keys[0].sm), default_sort_mods,
1216                     sizeof(struct sort_mods));
1217         }
1218
1219         for (size_t i = 0; i < keys_num; i++) {
1220                 struct key_specs *ks;
1221
1222                 ks = &(keys[i]);
1223
1224                 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1225                     !(ks->pos2b)) {
1226                         ks->pos1b = sm->bflag;
1227                         ks->pos2b = sm->bflag;
1228                         memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1229                 }
1230
1231                 ks->sm.func = get_sort_func(&(ks->sm));
1232         }
1233
1234         if (debug_sort) {
1235                 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1236 #if defined(SORT_THREADS)
1237                 printf("Number of CPUs: %d\n",(int)ncpu);
1238                 nthreads = 1;
1239 #endif
1240                 printf("Using collate rules of %s locale\n",
1241                     setlocale(LC_COLLATE, NULL));
1242                 if (byte_sort)
1243                         printf("Byte sort is used\n");
1244                 if (print_symbols_on_debug) {
1245                         printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1246                         if (symbol_thousands_sep)
1247                                 printf("Thousands separator: <%lc>\n",
1248                                     symbol_thousands_sep);
1249                         printf("Positive sign: <%lc>\n", symbol_positive_sign);
1250                         printf("Negative sign: <%lc>\n", symbol_negative_sign);
1251                 }
1252         }
1253
1254         if (need_random)
1255                 get_random_seed(random_source);
1256
1257         /* Case when the outfile equals one of the input files: */
1258         if (strcmp(outfile, "-")) {
1259
1260                 for(int i = 0; i < argc; ++i) {
1261                         if (strcmp(argv[i], outfile) == 0) {
1262                                 real_outfile = sort_strdup(outfile);
1263                                 for(;;) {
1264                                         char* tmp = sort_malloc(strlen(outfile) +
1265                                             strlen(".tmp") + 1);
1266
1267                                         strcpy(tmp, outfile);
1268                                         strcpy(tmp + strlen(tmp), ".tmp");
1269                                         sort_free(outfile);
1270                                         outfile = tmp;
1271                                         if (access(outfile, F_OK) < 0)
1272                                                 break;
1273                                 }
1274                                 tmp_file_atexit(outfile);
1275                         }
1276                 }
1277         }
1278
1279 #if defined(SORT_THREADS)
1280         if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1281                 nthreads = 1;
1282 #endif
1283
1284         if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1285                 struct file_list fl;
1286                 struct sort_list list;
1287
1288                 sort_list_init(&list);
1289                 file_list_init(&fl, true);
1290
1291                 if (argc < 1)
1292                         procfile("-", &list, &fl);
1293                 else {
1294                         while (argc > 0) {
1295                                 procfile(*argv, &list, &fl);
1296                                 --argc;
1297                                 ++argv;
1298                         }
1299                 }
1300
1301                 if (fl.count < 1)
1302                         sort_list_to_file(&list, outfile);
1303                 else {
1304                         if (list.count > 0) {
1305                                 char *flast = new_tmp_file_name();
1306
1307                                 sort_list_to_file(&list, flast);
1308                                 file_list_add(&fl, flast, false);
1309                         }
1310                         merge_files(&fl, outfile);
1311                 }
1312
1313                 file_list_clean(&fl);
1314
1315                 /*
1316                  * We are about to exit the program, so we can ignore
1317                  * the clean-up for speed
1318                  *
1319                  * sort_list_clean(&list);
1320                  */
1321
1322         } else if (sort_opts_vals.cflag) {
1323                 result = (argc == 0) ? (check("-")) : (check(*argv));
1324         } else if (sort_opts_vals.mflag) {
1325                 struct file_list fl;
1326
1327                 file_list_init(&fl, false);
1328                 /* No file arguments remaining means "read from stdin." */
1329                 if (argc == 0)
1330                         file_list_add(&fl, "-", true);
1331                 else
1332                         file_list_populate(&fl, argc, argv, true);
1333                 merge_files(&fl, outfile);
1334                 file_list_clean(&fl);
1335         }
1336
1337         if (real_outfile) {
1338                 unlink(real_outfile);
1339                 if (rename(outfile, real_outfile) < 0)
1340                         err(2, NULL);
1341                 sort_free(real_outfile);
1342         }
1343
1344         sort_free(outfile);
1345
1346         return (result);
1347 }