]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/sort/sort.c
Remove spurious newline
[FreeBSD/FreeBSD.git] / usr.bin / sort / sort.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36
37 #include <err.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <getopt.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <md5.h>
44 #include <regex.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53
54 #include "coll.h"
55 #include "file.h"
56 #include "sort.h"
57
58 #ifndef WITHOUT_NLS
59 #include <nl_types.h>
60 nl_catd catalog;
61 #endif
62
63 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
64
65 static bool need_random;
66
67 MD5_CTX md5_ctx;
68
69 /*
70  * Default messages to use when NLS is disabled or no catalogue
71  * is found.
72  */
73 const char *nlsstr[] = { "",
74 /* 1*/"mutually exclusive flags",
75 /* 2*/"extra argument not allowed with -c",
76 /* 3*/"Unknown feature",
77 /* 4*/"Wrong memory buffer specification",
78 /* 5*/"0 field in key specs",
79 /* 6*/"0 column in key specs",
80 /* 7*/"Wrong file mode",
81 /* 8*/"Cannot open file for reading",
82 /* 9*/"Radix sort cannot be used with these sort options",
83 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
84 /*11*/"Invalid key position",
85 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
86       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
87       "[-o outfile] [--batch-size size] [--files0-from file] "
88       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
89       "[--mmap] "
90 #if defined(SORT_THREADS)
91       "[--parallel thread_no] "
92 #endif
93       "[--human-numeric-sort] "
94       "[--version-sort] [--random-sort [--random-source file]] "
95       "[--compress-program program] [file ...]\n" };
96
97 struct sort_opts sort_opts_vals;
98
99 bool debug_sort;
100 bool need_hint;
101
102 #if defined(SORT_THREADS)
103 unsigned int ncpu = 1;
104 size_t nthreads = 1;
105 #endif
106
107 static bool gnusort_numeric_compatibility;
108
109 static struct sort_mods default_sort_mods_object;
110 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
111
112 static bool print_symbols_on_debug;
113
114 /*
115  * Arguments from file (when file0-from option is used:
116  */
117 static size_t argc_from_file0 = (size_t)-1;
118 static char **argv_from_file0;
119
120 /*
121  * Placeholder symbols for options which have no single-character equivalent
122  */
123 enum
124 {
125         SORT_OPT = CHAR_MAX + 1,
126         HELP_OPT,
127         FF_OPT,
128         BS_OPT,
129         VERSION_OPT,
130         DEBUG_OPT,
131 #if defined(SORT_THREADS)
132         PARALLEL_OPT,
133 #endif
134         RANDOMSOURCE_OPT,
135         COMPRESSPROGRAM_OPT,
136         QSORT_OPT,
137         MERGESORT_OPT,
138         HEAPSORT_OPT,
139         RADIXSORT_OPT,
140         MMAP_OPT
141 };
142
143 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
144 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
145
146 static struct option long_options[] = {
147                                 { "batch-size", required_argument, NULL, BS_OPT },
148                                 { "buffer-size", required_argument, NULL, 'S' },
149                                 { "check", optional_argument, NULL, 'c' },
150                                 { "check=silent|quiet", optional_argument, NULL, 'C' },
151                                 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
152                                 { "debug", no_argument, NULL, DEBUG_OPT },
153                                 { "dictionary-order", no_argument, NULL, 'd' },
154                                 { "field-separator", required_argument, NULL, 't' },
155                                 { "files0-from", required_argument, NULL, FF_OPT },
156                                 { "general-numeric-sort", no_argument, NULL, 'g' },
157                                 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
158                                 { "help",no_argument, NULL, HELP_OPT },
159                                 { "human-numeric-sort", no_argument, NULL, 'h' },
160                                 { "ignore-leading-blanks", no_argument, NULL, 'b' },
161                                 { "ignore-case", no_argument, NULL, 'f' },
162                                 { "ignore-nonprinting", no_argument, NULL, 'i' },
163                                 { "key", required_argument, NULL, 'k' },
164                                 { "merge", no_argument, NULL, 'm' },
165                                 { "mergesort", no_argument, NULL, MERGESORT_OPT },
166                                 { "mmap", no_argument, NULL, MMAP_OPT },
167                                 { "month-sort", no_argument, NULL, 'M' },
168                                 { "numeric-sort", no_argument, NULL, 'n' },
169                                 { "output", required_argument, NULL, 'o' },
170 #if defined(SORT_THREADS)
171                                 { "parallel", required_argument, NULL, PARALLEL_OPT },
172 #endif
173                                 { "qsort", no_argument, NULL, QSORT_OPT },
174                                 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
175                                 { "random-sort", no_argument, NULL, 'R' },
176                                 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
177                                 { "reverse", no_argument, NULL, 'r' },
178                                 { "sort", required_argument, NULL, SORT_OPT },
179                                 { "stable", no_argument, NULL, 's' },
180                                 { "temporary-directory",required_argument, NULL, 'T' },
181                                 { "unique", no_argument, NULL, 'u' },
182                                 { "version", no_argument, NULL, VERSION_OPT },
183                                 { "version-sort",no_argument, NULL, 'V' },
184                                 { "zero-terminated", no_argument, NULL, 'z' },
185                                 { NULL, no_argument, NULL, 0 }
186 };
187
188 void fix_obsolete_keys(int *argc, char **argv);
189
190 /*
191  * Check where sort modifier is present
192  */
193 static bool
194 sort_modifier_empty(struct sort_mods *sm)
195 {
196
197         if (sm == NULL)
198                 return (true);
199         return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
200             sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
201 }
202
203 /*
204  * Print out usage text.
205  */
206 static void
207 usage(bool opt_err)
208 {
209         FILE *out;
210
211         out = opt_err ? stderr : stdout;
212
213         fprintf(out, getstr(12), getprogname());
214         if (opt_err)
215                 exit(2);
216         exit(0);
217 }
218
219 /*
220  * Read input file names from a file (file0-from option).
221  */
222 static void
223 read_fns_from_file0(const char *fn)
224 {
225         FILE *f;
226         char *line = NULL;
227         size_t linesize = 0;
228         ssize_t linelen;
229
230         if (fn == NULL)
231                 return;
232
233         f = fopen(fn, "r");
234         if (f == NULL)
235                 err(2, "%s", fn);
236
237         while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
238                 if (*line != '\0') {
239                         if (argc_from_file0 == (size_t) - 1)
240                                 argc_from_file0 = 0;
241                         ++argc_from_file0;
242                         argv_from_file0 = sort_realloc(argv_from_file0,
243                             argc_from_file0 * sizeof(char *));
244                         if (argv_from_file0 == NULL)
245                                 err(2, NULL);
246                         argv_from_file0[argc_from_file0 - 1] = line;
247                 } else {
248                         free(line);
249                 }
250                 line = NULL;
251                 linesize = 0;
252         }
253         if (ferror(f))
254                 err(2, "%s: getdelim", fn);
255
256         closefile(f, fn);
257 }
258
259 /*
260  * Check how much RAM is available for the sort.
261  */
262 static void
263 set_hw_params(void)
264 {
265         long pages, psize;
266
267 #if defined(SORT_THREADS)
268         ncpu = 1;
269 #endif
270
271         pages = sysconf(_SC_PHYS_PAGES);
272         if (pages < 1) {
273                 perror("sysconf pages");
274                 pages = 1;
275         }
276         psize = sysconf(_SC_PAGESIZE);
277         if (psize < 1) {
278                 perror("sysconf psize");
279                 psize = 4096;
280         }
281 #if defined(SORT_THREADS)
282         ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
283         if (ncpu < 1)
284                 ncpu = 1;
285         else if(ncpu > 32)
286                 ncpu = 32;
287
288         nthreads = ncpu;
289 #endif
290
291         free_memory = (unsigned long long) pages * (unsigned long long) psize;
292         available_free_memory = free_memory / 2;
293
294         if (available_free_memory < 1024)
295                 available_free_memory = 1024;
296 }
297
298 /*
299  * Convert "plain" symbol to wide symbol, with default value.
300  */
301 static void
302 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
303 {
304
305         if (wc && c) {
306                 int res;
307
308                 res = mbtowc(wc, c, MB_CUR_MAX);
309                 if (res < 1)
310                         *wc = def;
311         }
312 }
313
314 /*
315  * Set current locale symbols.
316  */
317 static void
318 set_locale(void)
319 {
320         struct lconv *lc;
321         const char *locale;
322
323         setlocale(LC_ALL, "");
324
325         lc = localeconv();
326
327         if (lc) {
328                 /* obtain LC_NUMERIC info */
329                 /* Convert to wide char form */
330                 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
331                     symbol_decimal_point);
332                 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
333                     symbol_thousands_sep);
334                 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
335                     symbol_positive_sign);
336                 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
337                     symbol_negative_sign);
338         }
339
340         if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
341                 gnusort_numeric_compatibility = true;
342
343         locale = setlocale(LC_COLLATE, NULL);
344
345         if (locale) {
346                 char *tmpl;
347                 const char *cclocale;
348
349                 tmpl = sort_strdup(locale);
350                 cclocale = setlocale(LC_COLLATE, "C");
351                 if (cclocale && !strcmp(cclocale, tmpl))
352                         byte_sort = true;
353                 else {
354                         const char *pclocale;
355
356                         pclocale = setlocale(LC_COLLATE, "POSIX");
357                         if (pclocale && !strcmp(pclocale, tmpl))
358                                 byte_sort = true;
359                 }
360                 setlocale(LC_COLLATE, tmpl);
361                 sort_free(tmpl);
362         }
363 }
364
365 /*
366  * Set directory temporary files.
367  */
368 static void
369 set_tmpdir(void)
370 {
371         char *td;
372
373         td = getenv("TMPDIR");
374         if (td != NULL)
375                 tmpdir = sort_strdup(td);
376 }
377
378 /*
379  * Parse -S option.
380  */
381 static unsigned long long
382 parse_memory_buffer_value(const char *value)
383 {
384
385         if (value == NULL)
386                 return (available_free_memory);
387         else {
388                 char *endptr;
389                 unsigned long long membuf;
390
391                 endptr = NULL;
392                 errno = 0;
393                 membuf = strtoll(value, &endptr, 10);
394
395                 if (errno != 0) {
396                         warn("%s",getstr(4));
397                         membuf = available_free_memory;
398                 } else {
399                         switch (*endptr){
400                         case 'Y':
401                                 membuf *= 1024;
402                                 /* FALLTHROUGH */
403                         case 'Z':
404                                 membuf *= 1024;
405                                 /* FALLTHROUGH */
406                         case 'E':
407                                 membuf *= 1024;
408                                 /* FALLTHROUGH */
409                         case 'P':
410                                 membuf *= 1024;
411                                 /* FALLTHROUGH */
412                         case 'T':
413                                 membuf *= 1024;
414                                 /* FALLTHROUGH */
415                         case 'G':
416                                 membuf *= 1024;
417                                 /* FALLTHROUGH */
418                         case 'M':
419                                 membuf *= 1024;
420                                 /* FALLTHROUGH */
421                         case '\0':
422                         case 'K':
423                                 membuf *= 1024;
424                                 /* FALLTHROUGH */
425                         case 'b':
426                                 break;
427                         case '%':
428                                 membuf = (available_free_memory * membuf) /
429                                     100;
430                                 break;
431                         default:
432                                 warnc(EINVAL, "%s", optarg);
433                                 membuf = available_free_memory;
434                         }
435                 }
436                 return (membuf);
437         }
438 }
439
440 /*
441  * Signal handler that clears the temporary files.
442  */
443 static void
444 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
445     void *context __unused)
446 {
447
448         clear_tmp_files();
449         exit(-1);
450 }
451
452 /*
453  * Set signal handler on panic signals.
454  */
455 static void
456 set_signal_handler(void)
457 {
458         struct sigaction sa;
459
460         memset(&sa, 0, sizeof(sa));
461         sa.sa_sigaction = &sig_handler;
462         sa.sa_flags = SA_SIGINFO;
463
464         if (sigaction(SIGTERM, &sa, NULL) < 0) {
465                 perror("sigaction");
466                 return;
467         }
468         if (sigaction(SIGHUP, &sa, NULL) < 0) {
469                 perror("sigaction");
470                 return;
471         }
472         if (sigaction(SIGINT, &sa, NULL) < 0) {
473                 perror("sigaction");
474                 return;
475         }
476         if (sigaction(SIGQUIT, &sa, NULL) < 0) {
477                 perror("sigaction");
478                 return;
479         }
480         if (sigaction(SIGABRT, &sa, NULL) < 0) {
481                 perror("sigaction");
482                 return;
483         }
484         if (sigaction(SIGBUS, &sa, NULL) < 0) {
485                 perror("sigaction");
486                 return;
487         }
488         if (sigaction(SIGSEGV, &sa, NULL) < 0) {
489                 perror("sigaction");
490                 return;
491         }
492         if (sigaction(SIGUSR1, &sa, NULL) < 0) {
493                 perror("sigaction");
494                 return;
495         }
496         if (sigaction(SIGUSR2, &sa, NULL) < 0) {
497                 perror("sigaction");
498                 return;
499         }
500 }
501
502 /*
503  * Print "unknown" message and exit with status 2.
504  */
505 static void
506 unknown(const char *what)
507 {
508
509         errx(2, "%s: %s", getstr(3), what);
510 }
511
512 /*
513  * Check whether contradictory input options are used.
514  */
515 static void
516 check_mutually_exclusive_flags(char c, bool *mef_flags)
517 {
518         int fo_index, mec;
519         bool found_others, found_this;
520
521         found_others = found_this = false;
522         fo_index = 0;
523
524         for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
525                 mec = mutually_exclusive_flags[i];
526
527                 if (mec != c) {
528                         if (mef_flags[i]) {
529                                 if (found_this)
530                                         errx(1, "%c:%c: %s", c, mec, getstr(1));
531                                 found_others = true;
532                                 fo_index = i;
533                         }
534                 } else {
535                         if (found_others)
536                                 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
537                         mef_flags[i] = true;
538                         found_this = true;
539                 }
540         }
541 }
542
543 /*
544  * Initialise sort opts data.
545  */
546 static void
547 set_sort_opts(void)
548 {
549
550         memset(&default_sort_mods_object, 0,
551             sizeof(default_sort_mods_object));
552         memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
553         default_sort_mods_object.func =
554             get_sort_func(&default_sort_mods_object);
555 }
556
557 /*
558  * Set a sort modifier on a sort modifiers object.
559  */
560 static bool
561 set_sort_modifier(struct sort_mods *sm, int c)
562 {
563
564         if (sm == NULL)
565                 return (true);
566
567         switch (c){
568         case 'b':
569                 sm->bflag = true;
570                 break;
571         case 'd':
572                 sm->dflag = true;
573                 break;
574         case 'f':
575                 sm->fflag = true;
576                 break;
577         case 'g':
578                 sm->gflag = true;
579                 need_hint = true;
580                 break;
581         case 'i':
582                 sm->iflag = true;
583                 break;
584         case 'R':
585                 sm->Rflag = true;
586                 need_hint = true;
587                 need_random = true;
588                 break;
589         case 'M':
590                 initialise_months();
591                 sm->Mflag = true;
592                 need_hint = true;
593                 break;
594         case 'n':
595                 sm->nflag = true;
596                 need_hint = true;
597                 print_symbols_on_debug = true;
598                 break;
599         case 'r':
600                 sm->rflag = true;
601                 break;
602         case 'V':
603                 sm->Vflag = true;
604                 break;
605         case 'h':
606                 sm->hflag = true;
607                 need_hint = true;
608                 print_symbols_on_debug = true;
609                 break;
610         default:
611                 return (false);
612         }
613
614         sort_opts_vals.complex_sort = true;
615         sm->func = get_sort_func(sm);
616         return (true);
617 }
618
619 /*
620  * Parse POS in -k option.
621  */
622 static int
623 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
624 {
625         regmatch_t pmatch[4];
626         regex_t re;
627         char *c, *f;
628         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
629         size_t len, nmatch;
630         int ret;
631
632         ret = -1;
633         nmatch = 4;
634         c = f = NULL;
635
636         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
637                 return (-1);
638
639         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
640                 goto end;
641
642         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
643                 goto end;
644
645         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
646                 goto end;
647
648         len = pmatch[1].rm_eo - pmatch[1].rm_so;
649         f = sort_malloc((len + 1) * sizeof(char));
650
651         strncpy(f, s + pmatch[1].rm_so, len);
652         f[len] = '\0';
653
654         if (second) {
655                 errno = 0;
656                 ks->f2 = (size_t) strtoul(f, NULL, 10);
657                 if (errno != 0)
658                         err(2, "-k");
659                 if (ks->f2 == 0) {
660                         warn("%s",getstr(5));
661                         goto end;
662                 }
663         } else {
664                 errno = 0;
665                 ks->f1 = (size_t) strtoul(f, NULL, 10);
666                 if (errno != 0)
667                         err(2, "-k");
668                 if (ks->f1 == 0) {
669                         warn("%s",getstr(5));
670                         goto end;
671                 }
672         }
673
674         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
675                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
676                 c = sort_malloc((len + 1) * sizeof(char));
677
678                 strncpy(c, s + pmatch[2].rm_so + 1, len);
679                 c[len] = '\0';
680
681                 if (second) {
682                         errno = 0;
683                         ks->c2 = (size_t) strtoul(c, NULL, 10);
684                         if (errno != 0)
685                                 err(2, "-k");
686                 } else {
687                         errno = 0;
688                         ks->c1 = (size_t) strtoul(c, NULL, 10);
689                         if (errno != 0)
690                                 err(2, "-k");
691                         if (ks->c1 == 0) {
692                                 warn("%s",getstr(6));
693                                 goto end;
694                         }
695                 }
696         } else {
697                 if (second)
698                         ks->c2 = 0;
699                 else
700                         ks->c1 = 1;
701         }
702
703         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
704                 regoff_t i = 0;
705
706                 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
707                         check_mutually_exclusive_flags(s[i], mef_flags);
708                         if (s[i] == 'b') {
709                                 if (second)
710                                         ks->pos2b = true;
711                                 else
712                                         ks->pos1b = true;
713                         } else if (!set_sort_modifier(&(ks->sm), s[i]))
714                                 goto end;
715                 }
716         }
717
718         ret = 0;
719
720 end:
721
722         if (c)
723                 sort_free(c);
724         if (f)
725                 sort_free(f);
726         regfree(&re);
727
728         return (ret);
729 }
730
731 /*
732  * Parse -k option value.
733  */
734 static int
735 parse_k(const char *s, struct key_specs *ks)
736 {
737         int ret = -1;
738         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
739             { false, false, false, false, false, false };
740
741         if (s && *s) {
742                 char *sptr;
743
744                 sptr = strchr(s, ',');
745                 if (sptr) {
746                         size_t size1;
747                         char *pos1, *pos2;
748
749                         size1 = sptr - s;
750
751                         if (size1 < 1)
752                                 return (-1);
753                         pos1 = sort_malloc((size1 + 1) * sizeof(char));
754
755                         strncpy(pos1, s, size1);
756                         pos1[size1] = '\0';
757
758                         ret = parse_pos(pos1, ks, mef_flags, false);
759
760                         sort_free(pos1);
761                         if (ret < 0)
762                                 return (ret);
763
764                         pos2 = sort_strdup(sptr + 1);
765                         ret = parse_pos(pos2, ks, mef_flags, true);
766                         sort_free(pos2);
767                 } else
768                         ret = parse_pos(s, ks, mef_flags, false);
769         }
770
771         return (ret);
772 }
773
774 /*
775  * Parse POS in +POS -POS option.
776  */
777 static int
778 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
779 {
780         regex_t re;
781         regmatch_t pmatch[4];
782         char *c, *f;
783         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
784         int ret;
785         size_t len, nmatch;
786
787         ret = -1;
788         nmatch = 4;
789         c = f = NULL;
790         *nc = *nf = 0;
791
792         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
793                 return (-1);
794
795         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
796                 goto end;
797
798         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
799                 goto end;
800
801         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
802                 goto end;
803
804         len = pmatch[1].rm_eo - pmatch[1].rm_so;
805         f = sort_malloc((len + 1) * sizeof(char));
806
807         strncpy(f, s + pmatch[1].rm_so, len);
808         f[len] = '\0';
809
810         errno = 0;
811         *nf = (size_t) strtoul(f, NULL, 10);
812         if (errno != 0)
813                 errx(2, "%s", getstr(11));
814
815         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
816                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
817                 c = sort_malloc((len + 1) * sizeof(char));
818
819                 strncpy(c, s + pmatch[2].rm_so + 1, len);
820                 c[len] = '\0';
821
822                 errno = 0;
823                 *nc = (size_t) strtoul(c, NULL, 10);
824                 if (errno != 0)
825                         errx(2, "%s", getstr(11));
826         }
827
828         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
829
830                 len = pmatch[3].rm_eo - pmatch[3].rm_so;
831
832                 strncpy(sopts, s + pmatch[3].rm_so, len);
833                 sopts[len] = '\0';
834         }
835
836         ret = 0;
837
838 end:
839         if (c)
840                 sort_free(c);
841         if (f)
842                 sort_free(f);
843         regfree(&re);
844
845         return (ret);
846 }
847
848 /*
849  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
850  */
851 void
852 fix_obsolete_keys(int *argc, char **argv)
853 {
854         char sopt[129];
855
856         for (int i = 1; i < *argc; i++) {
857                 char *arg1;
858
859                 arg1 = argv[i];
860
861                 if (strlen(arg1) > 1 && arg1[0] == '+') {
862                         int c1, f1;
863                         char sopts1[128];
864
865                         sopts1[0] = 0;
866                         c1 = f1 = 0;
867
868                         if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
869                                 continue;
870                         else {
871                                 f1 += 1;
872                                 c1 += 1;
873                                 if (i + 1 < *argc) {
874                                         char *arg2 = argv[i + 1];
875
876                                         if (strlen(arg2) > 1 &&
877                                             arg2[0] == '-') {
878                                                 int c2, f2;
879                                                 char sopts2[128];
880
881                                                 sopts2[0] = 0;
882                                                 c2 = f2 = 0;
883
884                                                 if (parse_pos_obs(arg2 + 1,
885                                                     &f2, &c2, sopts2) >= 0) {
886                                                         if (c2 > 0)
887                                                                 f2 += 1;
888                                                         sprintf(sopt, "-k%d.%d%s,%d.%d%s",
889                                                             f1, c1, sopts1, f2, c2, sopts2);
890                                                         argv[i] = sort_strdup(sopt);
891                                                         for (int j = i + 1; j + 1 < *argc; j++)
892                                                                 argv[j] = argv[j + 1];
893                                                         *argc -= 1;
894                                                         continue;
895                                                 }
896                                         }
897                                 }
898                                 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
899                                 argv[i] = sort_strdup(sopt);
900                         }
901                 }
902         }
903 }
904
905 /*
906  * Seed random sort
907  */
908 static void
909 get_random_seed(const char *random_source)
910 {
911         char randseed[32];
912         struct stat fsb, rsb;
913         ssize_t rd;
914         int rsfd;
915
916         rsfd = -1;
917         rd = sizeof(randseed);
918
919         if (random_source == NULL) {
920                 if (getentropy(randseed, sizeof(randseed)) < 0)
921                         err(EX_SOFTWARE, "getentropy");
922                 goto out;
923         }
924
925         rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
926         if (rsfd < 0)
927                 err(EX_NOINPUT, "open: %s", random_source);
928
929         if (fstat(rsfd, &fsb) != 0)
930                 err(EX_SOFTWARE, "fstat");
931
932         if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
933                 err(EX_USAGE,
934                     "random seed isn't a regular file or /dev/random");
935
936         /*
937          * Regular files: read up to maximum seed size and explicitly
938          * reject longer files.
939          */
940         if (S_ISREG(fsb.st_mode)) {
941                 if (fsb.st_size > (off_t)sizeof(randseed))
942                         errx(EX_USAGE, "random seed is too large (%jd >"
943                             " %zu)!", (intmax_t)fsb.st_size,
944                             sizeof(randseed));
945                 else if (fsb.st_size < 1)
946                         errx(EX_USAGE, "random seed is too small ("
947                             "0 bytes)");
948
949                 memset(randseed, 0, sizeof(randseed));
950
951                 rd = read(rsfd, randseed, fsb.st_size);
952                 if (rd < 0)
953                         err(EX_SOFTWARE, "reading random seed file %s",
954                             random_source);
955                 if (rd < (ssize_t)fsb.st_size)
956                         errx(EX_SOFTWARE, "short read from %s", random_source);
957         } else if (S_ISCHR(fsb.st_mode)) {
958                 if (stat("/dev/random", &rsb) < 0)
959                         err(EX_SOFTWARE, "stat");
960
961                 if (fsb.st_dev != rsb.st_dev ||
962                     fsb.st_ino != rsb.st_ino)
963                         errx(EX_USAGE, "random seed is a character "
964                             "device other than /dev/random");
965
966                 if (getentropy(randseed, sizeof(randseed)) < 0)
967                         err(EX_SOFTWARE, "getentropy");
968         }
969
970 out:
971         if (rsfd >= 0)
972                 close(rsfd);
973
974         MD5Init(&md5_ctx);
975         MD5Update(&md5_ctx, randseed, rd);
976 }
977
978 /*
979  * Main function.
980  */
981 int
982 main(int argc, char **argv)
983 {
984         char *outfile, *real_outfile;
985         char *random_source = NULL;
986         int c, result;
987         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
988             { false, false, false, false, false, false };
989
990         result = 0;
991         outfile = sort_strdup("-");
992         real_outfile = NULL;
993
994         struct sort_mods *sm = &default_sort_mods_object;
995
996         init_tmp_files();
997
998         set_signal_handler();
999
1000         set_hw_params();
1001         set_locale();
1002         set_tmpdir();
1003         set_sort_opts();
1004
1005         fix_obsolete_keys(&argc, argv);
1006
1007         while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1008             != -1)) {
1009
1010                 check_mutually_exclusive_flags(c, mef_flags);
1011
1012                 if (!set_sort_modifier(sm, c)) {
1013
1014                         switch (c) {
1015                         case 'c':
1016                                 sort_opts_vals.cflag = true;
1017                                 if (optarg) {
1018                                         if (!strcmp(optarg, "diagnose-first"))
1019                                                 ;
1020                                         else if (!strcmp(optarg, "silent") ||
1021                                             !strcmp(optarg, "quiet"))
1022                                                 sort_opts_vals.csilentflag = true;
1023                                         else if (*optarg)
1024                                                 unknown(optarg);
1025                                 }
1026                                 break;
1027                         case 'C':
1028                                 sort_opts_vals.cflag = true;
1029                                 sort_opts_vals.csilentflag = true;
1030                                 break;
1031                         case 'k':
1032                         {
1033                                 sort_opts_vals.complex_sort = true;
1034                                 sort_opts_vals.kflag = true;
1035
1036                                 keys_num++;
1037                                 keys = sort_realloc(keys, keys_num *
1038                                     sizeof(struct key_specs));
1039                                 memset(&(keys[keys_num - 1]), 0,
1040                                     sizeof(struct key_specs));
1041
1042                                 if (parse_k(optarg, &(keys[keys_num - 1]))
1043                                     < 0) {
1044                                         errc(2, EINVAL, "-k %s", optarg);
1045                                 }
1046
1047                                 break;
1048                         }
1049                         case 'm':
1050                                 sort_opts_vals.mflag = true;
1051                                 break;
1052                         case 'o':
1053                                 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1054                                 strcpy(outfile, optarg);
1055                                 break;
1056                         case 's':
1057                                 sort_opts_vals.sflag = true;
1058                                 break;
1059                         case 'S':
1060                                 available_free_memory =
1061                                     parse_memory_buffer_value(optarg);
1062                                 break;
1063                         case 'T':
1064                                 tmpdir = sort_strdup(optarg);
1065                                 break;
1066                         case 't':
1067                                 while (strlen(optarg) > 1) {
1068                                         if (optarg[0] != '\\') {
1069                                                 errc(2, EINVAL, "%s", optarg);
1070                                         }
1071                                         optarg += 1;
1072                                         if (*optarg == '0') {
1073                                                 *optarg = 0;
1074                                                 break;
1075                                         }
1076                                 }
1077                                 sort_opts_vals.tflag = true;
1078                                 sort_opts_vals.field_sep = btowc(optarg[0]);
1079                                 if (sort_opts_vals.field_sep == WEOF) {
1080                                         errno = EINVAL;
1081                                         err(2, NULL);
1082                                 }
1083                                 if (!gnusort_numeric_compatibility) {
1084                                         if (symbol_decimal_point == sort_opts_vals.field_sep)
1085                                                 symbol_decimal_point = WEOF;
1086                                         if (symbol_thousands_sep == sort_opts_vals.field_sep)
1087                                                 symbol_thousands_sep = WEOF;
1088                                         if (symbol_negative_sign == sort_opts_vals.field_sep)
1089                                                 symbol_negative_sign = WEOF;
1090                                         if (symbol_positive_sign == sort_opts_vals.field_sep)
1091                                                 symbol_positive_sign = WEOF;
1092                                 }
1093                                 break;
1094                         case 'u':
1095                                 sort_opts_vals.uflag = true;
1096                                 /* stable sort for the correct unique val */
1097                                 sort_opts_vals.sflag = true;
1098                                 break;
1099                         case 'z':
1100                                 sort_opts_vals.zflag = true;
1101                                 break;
1102                         case SORT_OPT:
1103                                 if (optarg) {
1104                                         if (!strcmp(optarg, "general-numeric"))
1105                                                 set_sort_modifier(sm, 'g');
1106                                         else if (!strcmp(optarg, "human-numeric"))
1107                                                 set_sort_modifier(sm, 'h');
1108                                         else if (!strcmp(optarg, "numeric"))
1109                                                 set_sort_modifier(sm, 'n');
1110                                         else if (!strcmp(optarg, "month"))
1111                                                 set_sort_modifier(sm, 'M');
1112                                         else if (!strcmp(optarg, "random"))
1113                                                 set_sort_modifier(sm, 'R');
1114                                         else
1115                                                 unknown(optarg);
1116                                 }
1117                                 break;
1118 #if defined(SORT_THREADS)
1119                         case PARALLEL_OPT:
1120                                 nthreads = (size_t)(atoi(optarg));
1121                                 if (nthreads < 1)
1122                                         nthreads = 1;
1123                                 if (nthreads > 1024)
1124                                         nthreads = 1024;
1125                                 break;
1126 #endif
1127                         case QSORT_OPT:
1128                                 sort_opts_vals.sort_method = SORT_QSORT;
1129                                 break;
1130                         case MERGESORT_OPT:
1131                                 sort_opts_vals.sort_method = SORT_MERGESORT;
1132                                 break;
1133                         case MMAP_OPT:
1134                                 use_mmap = true;
1135                                 break;
1136                         case HEAPSORT_OPT:
1137                                 sort_opts_vals.sort_method = SORT_HEAPSORT;
1138                                 break;
1139                         case RADIXSORT_OPT:
1140                                 sort_opts_vals.sort_method = SORT_RADIXSORT;
1141                                 break;
1142                         case RANDOMSOURCE_OPT:
1143                                 random_source = strdup(optarg);
1144                                 break;
1145                         case COMPRESSPROGRAM_OPT:
1146                                 compress_program = strdup(optarg);
1147                                 break;
1148                         case FF_OPT:
1149                                 read_fns_from_file0(optarg);
1150                                 break;
1151                         case BS_OPT:
1152                         {
1153                                 errno = 0;
1154                                 long mof = strtol(optarg, NULL, 10);
1155                                 if (errno != 0)
1156                                         err(2, "--batch-size");
1157                                 if (mof >= 2)
1158                                         max_open_files = (size_t) mof + 1;
1159                         }
1160                                 break;
1161                         case VERSION_OPT:
1162                                 printf("%s\n", VERSION);
1163                                 exit(EXIT_SUCCESS);
1164                                 /* NOTREACHED */
1165                                 break;
1166                         case DEBUG_OPT:
1167                                 debug_sort = true;
1168                                 break;
1169                         case HELP_OPT:
1170                                 usage(false);
1171                                 /* NOTREACHED */
1172                                 break;
1173                         default:
1174                                 usage(true);
1175                                 /* NOTREACHED */
1176                         }
1177                 }
1178         }
1179
1180         argc -= optind;
1181         argv += optind;
1182
1183         if (argv_from_file0) {
1184                 argc = argc_from_file0;
1185                 argv = argv_from_file0;
1186         }
1187
1188 #ifndef WITHOUT_NLS
1189         catalog = catopen("sort", NL_CAT_LOCALE);
1190 #endif
1191
1192         if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1193                 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1194
1195 #ifndef WITHOUT_NLS
1196         catclose(catalog);
1197 #endif
1198
1199         if (keys_num == 0) {
1200                 keys_num = 1;
1201                 keys = sort_realloc(keys, sizeof(struct key_specs));
1202                 memset(&(keys[0]), 0, sizeof(struct key_specs));
1203                 keys[0].c1 = 1;
1204                 keys[0].pos1b = default_sort_mods->bflag;
1205                 keys[0].pos2b = default_sort_mods->bflag;
1206                 memcpy(&(keys[0].sm), default_sort_mods,
1207                     sizeof(struct sort_mods));
1208         }
1209
1210         for (size_t i = 0; i < keys_num; i++) {
1211                 struct key_specs *ks;
1212
1213                 ks = &(keys[i]);
1214
1215                 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1216                     !(ks->pos2b)) {
1217                         ks->pos1b = sm->bflag;
1218                         ks->pos2b = sm->bflag;
1219                         memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1220                 }
1221
1222                 ks->sm.func = get_sort_func(&(ks->sm));
1223         }
1224
1225         if (debug_sort) {
1226                 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1227 #if defined(SORT_THREADS)
1228                 printf("Number of CPUs: %d\n",(int)ncpu);
1229                 nthreads = 1;
1230 #endif
1231                 printf("Using collate rules of %s locale\n",
1232                     setlocale(LC_COLLATE, NULL));
1233                 if (byte_sort)
1234                         printf("Byte sort is used\n");
1235                 if (print_symbols_on_debug) {
1236                         printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1237                         if (symbol_thousands_sep)
1238                                 printf("Thousands separator: <%lc>\n",
1239                                     symbol_thousands_sep);
1240                         printf("Positive sign: <%lc>\n", symbol_positive_sign);
1241                         printf("Negative sign: <%lc>\n", symbol_negative_sign);
1242                 }
1243         }
1244
1245         if (need_random)
1246                 get_random_seed(random_source);
1247
1248         /* Case when the outfile equals one of the input files: */
1249         if (strcmp(outfile, "-")) {
1250
1251                 for(int i = 0; i < argc; ++i) {
1252                         if (strcmp(argv[i], outfile) == 0) {
1253                                 real_outfile = sort_strdup(outfile);
1254                                 for(;;) {
1255                                         char* tmp = sort_malloc(strlen(outfile) +
1256                                             strlen(".tmp") + 1);
1257
1258                                         strcpy(tmp, outfile);
1259                                         strcpy(tmp + strlen(tmp), ".tmp");
1260                                         sort_free(outfile);
1261                                         outfile = tmp;
1262                                         if (access(outfile, F_OK) < 0)
1263                                                 break;
1264                                 }
1265                                 tmp_file_atexit(outfile);
1266                         }
1267                 }
1268         }
1269
1270 #if defined(SORT_THREADS)
1271         if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1272                 nthreads = 1;
1273 #endif
1274
1275         if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1276                 struct file_list fl;
1277                 struct sort_list list;
1278
1279                 sort_list_init(&list);
1280                 file_list_init(&fl, true);
1281
1282                 if (argc < 1)
1283                         procfile("-", &list, &fl);
1284                 else {
1285                         while (argc > 0) {
1286                                 procfile(*argv, &list, &fl);
1287                                 --argc;
1288                                 ++argv;
1289                         }
1290                 }
1291
1292                 if (fl.count < 1)
1293                         sort_list_to_file(&list, outfile);
1294                 else {
1295                         if (list.count > 0) {
1296                                 char *flast = new_tmp_file_name();
1297
1298                                 sort_list_to_file(&list, flast);
1299                                 file_list_add(&fl, flast, false);
1300                         }
1301                         merge_files(&fl, outfile);
1302                 }
1303
1304                 file_list_clean(&fl);
1305
1306                 /*
1307                  * We are about to exit the program, so we can ignore
1308                  * the clean-up for speed
1309                  *
1310                  * sort_list_clean(&list);
1311                  */
1312
1313         } else if (sort_opts_vals.cflag) {
1314                 result = (argc == 0) ? (check("-")) : (check(*argv));
1315         } else if (sort_opts_vals.mflag) {
1316                 struct file_list fl;
1317
1318                 file_list_init(&fl, false);
1319                 /* No file arguments remaining means "read from stdin." */
1320                 if (argc == 0)
1321                         file_list_add(&fl, "-", true);
1322                 else
1323                         file_list_populate(&fl, argc, argv, true);
1324                 merge_files(&fl, outfile);
1325                 file_list_clean(&fl);
1326         }
1327
1328         if (real_outfile) {
1329                 unlink(real_outfile);
1330                 if (rename(outfile, real_outfile) < 0)
1331                         err(2, NULL);
1332                 sort_free(real_outfile);
1333         }
1334
1335         sort_free(outfile);
1336
1337         return (result);
1338 }