]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.bin/sort/sort.c
Merge bmake-20230414
[FreeBSD/FreeBSD.git] / usr.bin / sort / sort.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32
33 #include <sys/stat.h>
34 #include <sys/sysctl.h>
35 #include <sys/types.h>
36
37 #include <err.h>
38 #include <errno.h>
39 #include <fcntl.h>
40 #include <getopt.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <md5.h>
44 #include <regex.h>
45 #include <signal.h>
46 #include <stdbool.h>
47 #include <stdio.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <unistd.h>
51 #include <wchar.h>
52 #include <wctype.h>
53
54 #include "coll.h"
55 #include "file.h"
56 #include "sort.h"
57
58 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
59
60 static bool need_random;
61
62 MD5_CTX md5_ctx;
63
64 /*
65  * Default messages to use
66  */
67 const char *nlsstr[] = { "",
68 /* 1*/"mutually exclusive flags",
69 /* 2*/"extra argument not allowed with -c",
70 /* 3*/"Unknown feature",
71 /* 4*/"Wrong memory buffer specification",
72 /* 5*/"0 field in key specs",
73 /* 6*/"0 column in key specs",
74 /* 7*/"Wrong file mode",
75 /* 8*/"Cannot open file for reading",
76 /* 9*/"Radix sort cannot be used with these sort options",
77 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
78 /*11*/"Invalid key position",
79 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
80       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
81       "[-o outfile] [--batch-size size] [--files0-from file] "
82       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
83       "[--mmap] "
84 #if defined(SORT_THREADS)
85       "[--parallel thread_no] "
86 #endif
87       "[--human-numeric-sort] "
88       "[--version-sort] [--random-sort [--random-source file]] "
89       "[--compress-program program] [file ...]\n" };
90
91 struct sort_opts sort_opts_vals;
92
93 bool debug_sort;
94 bool need_hint;
95
96 size_t mb_cur_max;
97
98 #if defined(SORT_THREADS)
99 unsigned int ncpu = 1;
100 size_t nthreads = 1;
101 #endif
102
103 static bool gnusort_numeric_compatibility;
104
105 static struct sort_mods default_sort_mods_object;
106 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
107
108 static bool print_symbols_on_debug;
109
110 /*
111  * Arguments from file (when file0-from option is used:
112  */
113 static size_t argc_from_file0 = (size_t)-1;
114 static char **argv_from_file0;
115
116 /*
117  * Placeholder symbols for options which have no single-character equivalent
118  */
119 enum
120 {
121         SORT_OPT = CHAR_MAX + 1,
122         HELP_OPT,
123         FF_OPT,
124         BS_OPT,
125         VERSION_OPT,
126         DEBUG_OPT,
127 #if defined(SORT_THREADS)
128         PARALLEL_OPT,
129 #endif
130         RANDOMSOURCE_OPT,
131         COMPRESSPROGRAM_OPT,
132         QSORT_OPT,
133         MERGESORT_OPT,
134         HEAPSORT_OPT,
135         RADIXSORT_OPT,
136         MMAP_OPT
137 };
138
139 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
140 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
141
142 static struct option long_options[] = {
143                                 { "batch-size", required_argument, NULL, BS_OPT },
144                                 { "buffer-size", required_argument, NULL, 'S' },
145                                 { "check", optional_argument, NULL, 'c' },
146                                 { "check=silent|quiet", optional_argument, NULL, 'C' },
147                                 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
148                                 { "debug", no_argument, NULL, DEBUG_OPT },
149                                 { "dictionary-order", no_argument, NULL, 'd' },
150                                 { "field-separator", required_argument, NULL, 't' },
151                                 { "files0-from", required_argument, NULL, FF_OPT },
152                                 { "general-numeric-sort", no_argument, NULL, 'g' },
153                                 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
154                                 { "help",no_argument, NULL, HELP_OPT },
155                                 { "human-numeric-sort", no_argument, NULL, 'h' },
156                                 { "ignore-leading-blanks", no_argument, NULL, 'b' },
157                                 { "ignore-case", no_argument, NULL, 'f' },
158                                 { "ignore-nonprinting", no_argument, NULL, 'i' },
159                                 { "key", required_argument, NULL, 'k' },
160                                 { "merge", no_argument, NULL, 'm' },
161                                 { "mergesort", no_argument, NULL, MERGESORT_OPT },
162                                 { "mmap", no_argument, NULL, MMAP_OPT },
163                                 { "month-sort", no_argument, NULL, 'M' },
164                                 { "numeric-sort", no_argument, NULL, 'n' },
165                                 { "output", required_argument, NULL, 'o' },
166 #if defined(SORT_THREADS)
167                                 { "parallel", required_argument, NULL, PARALLEL_OPT },
168 #endif
169                                 { "qsort", no_argument, NULL, QSORT_OPT },
170                                 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
171                                 { "random-sort", no_argument, NULL, 'R' },
172                                 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
173                                 { "reverse", no_argument, NULL, 'r' },
174                                 { "sort", required_argument, NULL, SORT_OPT },
175                                 { "stable", no_argument, NULL, 's' },
176                                 { "temporary-directory",required_argument, NULL, 'T' },
177                                 { "unique", no_argument, NULL, 'u' },
178                                 { "version", no_argument, NULL, VERSION_OPT },
179                                 { "version-sort",no_argument, NULL, 'V' },
180                                 { "zero-terminated", no_argument, NULL, 'z' },
181                                 { NULL, no_argument, NULL, 0 }
182 };
183
184 void fix_obsolete_keys(int *argc, char **argv);
185
186 /*
187  * Check where sort modifier is present
188  */
189 static bool
190 sort_modifier_empty(struct sort_mods *sm)
191 {
192
193         if (sm == NULL)
194                 return (true);
195         return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
196             sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
197 }
198
199 /*
200  * Print out usage text.
201  */
202 static void
203 usage(bool opt_err)
204 {
205         FILE *out;
206
207         out = opt_err ? stderr : stdout;
208
209         fprintf(out, getstr(12), getprogname());
210         if (opt_err)
211                 exit(2);
212         exit(0);
213 }
214
215 /*
216  * Read input file names from a file (file0-from option).
217  */
218 static void
219 read_fns_from_file0(const char *fn)
220 {
221         FILE *f;
222         char *line = NULL;
223         size_t linesize = 0;
224         ssize_t linelen;
225
226         if (fn == NULL)
227                 return;
228
229         f = fopen(fn, "r");
230         if (f == NULL)
231                 err(2, "%s", fn);
232
233         while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
234                 if (*line != '\0') {
235                         if (argc_from_file0 == (size_t) - 1)
236                                 argc_from_file0 = 0;
237                         ++argc_from_file0;
238                         argv_from_file0 = sort_realloc(argv_from_file0,
239                             argc_from_file0 * sizeof(char *));
240                         if (argv_from_file0 == NULL)
241                                 err(2, NULL);
242                         argv_from_file0[argc_from_file0 - 1] = line;
243                 } else {
244                         free(line);
245                 }
246                 line = NULL;
247                 linesize = 0;
248         }
249         if (ferror(f))
250                 err(2, "%s: getdelim", fn);
251
252         closefile(f, fn);
253 }
254
255 /*
256  * Check how much RAM is available for the sort.
257  */
258 static void
259 set_hw_params(void)
260 {
261         long pages, psize;
262
263 #if defined(SORT_THREADS)
264         ncpu = 1;
265 #endif
266
267         pages = sysconf(_SC_PHYS_PAGES);
268         if (pages < 1) {
269                 perror("sysconf pages");
270                 pages = 1;
271         }
272         psize = sysconf(_SC_PAGESIZE);
273         if (psize < 1) {
274                 perror("sysconf psize");
275                 psize = 4096;
276         }
277 #if defined(SORT_THREADS)
278         ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
279         if (ncpu < 1)
280                 ncpu = 1;
281         else if(ncpu > 32)
282                 ncpu = 32;
283
284         nthreads = ncpu;
285 #endif
286
287         free_memory = (unsigned long long) pages * (unsigned long long) psize;
288         available_free_memory = free_memory / 2;
289
290         if (available_free_memory < 1024)
291                 available_free_memory = 1024;
292 }
293
294 /*
295  * Convert "plain" symbol to wide symbol, with default value.
296  */
297 static void
298 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
299 {
300
301         if (wc && c) {
302                 int res;
303
304                 res = mbtowc(wc, c, mb_cur_max);
305                 if (res < 1)
306                         *wc = def;
307         }
308 }
309
310 /*
311  * Set current locale symbols.
312  */
313 static void
314 set_locale(void)
315 {
316         struct lconv *lc;
317         const char *locale;
318
319         setlocale(LC_ALL, "");
320
321         mb_cur_max = MB_CUR_MAX;
322
323         lc = localeconv();
324
325         if (lc) {
326                 /* obtain LC_NUMERIC info */
327                 /* Convert to wide char form */
328                 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
329                     symbol_decimal_point);
330                 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
331                     symbol_thousands_sep);
332                 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
333                     symbol_positive_sign);
334                 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
335                     symbol_negative_sign);
336         }
337
338         if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
339                 gnusort_numeric_compatibility = true;
340
341         locale = setlocale(LC_COLLATE, NULL);
342
343         if (locale) {
344                 char *tmpl;
345                 const char *cclocale;
346
347                 tmpl = sort_strdup(locale);
348                 cclocale = setlocale(LC_COLLATE, "C");
349                 if (cclocale && !strcmp(cclocale, tmpl))
350                         byte_sort = true;
351                 else {
352                         const char *pclocale;
353
354                         pclocale = setlocale(LC_COLLATE, "POSIX");
355                         if (pclocale && !strcmp(pclocale, tmpl))
356                                 byte_sort = true;
357                 }
358                 setlocale(LC_COLLATE, tmpl);
359                 sort_free(tmpl);
360         }
361 }
362
363 /*
364  * Set directory temporary files.
365  */
366 static void
367 set_tmpdir(void)
368 {
369         char *td;
370
371         td = getenv("TMPDIR");
372         if (td != NULL)
373                 tmpdir = sort_strdup(td);
374 }
375
376 /*
377  * Parse -S option.
378  */
379 static unsigned long long
380 parse_memory_buffer_value(const char *value)
381 {
382
383         if (value == NULL)
384                 return (available_free_memory);
385         else {
386                 char *endptr;
387                 unsigned long long membuf;
388
389                 endptr = NULL;
390                 errno = 0;
391                 membuf = strtoll(value, &endptr, 10);
392
393                 if (errno != 0) {
394                         warn("%s",getstr(4));
395                         membuf = available_free_memory;
396                 } else {
397                         switch (*endptr){
398                         case 'Y':
399                                 membuf *= 1024;
400                                 /* FALLTHROUGH */
401                         case 'Z':
402                                 membuf *= 1024;
403                                 /* FALLTHROUGH */
404                         case 'E':
405                                 membuf *= 1024;
406                                 /* FALLTHROUGH */
407                         case 'P':
408                                 membuf *= 1024;
409                                 /* FALLTHROUGH */
410                         case 'T':
411                                 membuf *= 1024;
412                                 /* FALLTHROUGH */
413                         case 'G':
414                                 membuf *= 1024;
415                                 /* FALLTHROUGH */
416                         case 'M':
417                                 membuf *= 1024;
418                                 /* FALLTHROUGH */
419                         case '\0':
420                         case 'K':
421                                 membuf *= 1024;
422                                 /* FALLTHROUGH */
423                         case 'b':
424                                 break;
425                         case '%':
426                                 membuf = (available_free_memory * membuf) /
427                                     100;
428                                 break;
429                         default:
430                                 warnc(EINVAL, "%s", optarg);
431                                 membuf = available_free_memory;
432                         }
433                 }
434                 return (membuf);
435         }
436 }
437
438 /*
439  * Signal handler that clears the temporary files.
440  */
441 static void
442 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
443     void *context __unused)
444 {
445
446         clear_tmp_files();
447         exit(-1);
448 }
449
450 /*
451  * Set signal handler on panic signals.
452  */
453 static void
454 set_signal_handler(void)
455 {
456         struct sigaction sa;
457
458         memset(&sa, 0, sizeof(sa));
459         sa.sa_sigaction = &sig_handler;
460         sa.sa_flags = SA_SIGINFO;
461
462         if (sigaction(SIGTERM, &sa, NULL) < 0) {
463                 perror("sigaction");
464                 return;
465         }
466         if (sigaction(SIGHUP, &sa, NULL) < 0) {
467                 perror("sigaction");
468                 return;
469         }
470         if (sigaction(SIGINT, &sa, NULL) < 0) {
471                 perror("sigaction");
472                 return;
473         }
474         if (sigaction(SIGQUIT, &sa, NULL) < 0) {
475                 perror("sigaction");
476                 return;
477         }
478         if (sigaction(SIGABRT, &sa, NULL) < 0) {
479                 perror("sigaction");
480                 return;
481         }
482         if (sigaction(SIGBUS, &sa, NULL) < 0) {
483                 perror("sigaction");
484                 return;
485         }
486         if (sigaction(SIGSEGV, &sa, NULL) < 0) {
487                 perror("sigaction");
488                 return;
489         }
490         if (sigaction(SIGUSR1, &sa, NULL) < 0) {
491                 perror("sigaction");
492                 return;
493         }
494         if (sigaction(SIGUSR2, &sa, NULL) < 0) {
495                 perror("sigaction");
496                 return;
497         }
498 }
499
500 /*
501  * Print "unknown" message and exit with status 2.
502  */
503 static void
504 unknown(const char *what)
505 {
506
507         errx(2, "%s: %s", getstr(3), what);
508 }
509
510 /*
511  * Check whether contradictory input options are used.
512  */
513 static void
514 check_mutually_exclusive_flags(char c, bool *mef_flags)
515 {
516         int fo_index, mec;
517         bool found_others, found_this;
518
519         found_others = found_this = false;
520         fo_index = 0;
521
522         for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
523                 mec = mutually_exclusive_flags[i];
524
525                 if (mec != c) {
526                         if (mef_flags[i]) {
527                                 if (found_this)
528                                         errx(1, "%c:%c: %s", c, mec, getstr(1));
529                                 found_others = true;
530                                 fo_index = i;
531                         }
532                 } else {
533                         if (found_others)
534                                 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
535                         mef_flags[i] = true;
536                         found_this = true;
537                 }
538         }
539 }
540
541 /*
542  * Initialise sort opts data.
543  */
544 static void
545 set_sort_opts(void)
546 {
547
548         memset(&default_sort_mods_object, 0,
549             sizeof(default_sort_mods_object));
550         memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
551         default_sort_mods_object.func =
552             get_sort_func(&default_sort_mods_object);
553 }
554
555 /*
556  * Set a sort modifier on a sort modifiers object.
557  */
558 static bool
559 set_sort_modifier(struct sort_mods *sm, int c)
560 {
561
562         if (sm == NULL)
563                 return (true);
564
565         switch (c){
566         case 'b':
567                 sm->bflag = true;
568                 break;
569         case 'd':
570                 sm->dflag = true;
571                 break;
572         case 'f':
573                 sm->fflag = true;
574                 break;
575         case 'g':
576                 sm->gflag = true;
577                 need_hint = true;
578                 break;
579         case 'i':
580                 sm->iflag = true;
581                 break;
582         case 'R':
583                 sm->Rflag = true;
584                 need_hint = true;
585                 need_random = true;
586                 break;
587         case 'M':
588                 initialise_months();
589                 sm->Mflag = true;
590                 need_hint = true;
591                 break;
592         case 'n':
593                 sm->nflag = true;
594                 need_hint = true;
595                 print_symbols_on_debug = true;
596                 break;
597         case 'r':
598                 sm->rflag = true;
599                 break;
600         case 'V':
601                 sm->Vflag = true;
602                 break;
603         case 'h':
604                 sm->hflag = true;
605                 need_hint = true;
606                 print_symbols_on_debug = true;
607                 break;
608         default:
609                 return (false);
610         }
611
612         sort_opts_vals.complex_sort = true;
613         sm->func = get_sort_func(sm);
614         return (true);
615 }
616
617 /*
618  * Parse POS in -k option.
619  */
620 static int
621 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
622 {
623         regmatch_t pmatch[4];
624         regex_t re;
625         char *c, *f;
626         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
627         size_t len, nmatch;
628         int ret;
629
630         ret = -1;
631         nmatch = 4;
632         c = f = NULL;
633
634         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
635                 return (-1);
636
637         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
638                 goto end;
639
640         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
641                 goto end;
642
643         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
644                 goto end;
645
646         len = pmatch[1].rm_eo - pmatch[1].rm_so;
647         f = sort_malloc((len + 1) * sizeof(char));
648
649         strncpy(f, s + pmatch[1].rm_so, len);
650         f[len] = '\0';
651
652         if (second) {
653                 errno = 0;
654                 ks->f2 = (size_t) strtoul(f, NULL, 10);
655                 if (errno != 0)
656                         err(2, "-k");
657                 if (ks->f2 == 0) {
658                         warn("%s",getstr(5));
659                         goto end;
660                 }
661         } else {
662                 errno = 0;
663                 ks->f1 = (size_t) strtoul(f, NULL, 10);
664                 if (errno != 0)
665                         err(2, "-k");
666                 if (ks->f1 == 0) {
667                         warn("%s",getstr(5));
668                         goto end;
669                 }
670         }
671
672         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
673                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
674                 c = sort_malloc((len + 1) * sizeof(char));
675
676                 strncpy(c, s + pmatch[2].rm_so + 1, len);
677                 c[len] = '\0';
678
679                 if (second) {
680                         errno = 0;
681                         ks->c2 = (size_t) strtoul(c, NULL, 10);
682                         if (errno != 0)
683                                 err(2, "-k");
684                 } else {
685                         errno = 0;
686                         ks->c1 = (size_t) strtoul(c, NULL, 10);
687                         if (errno != 0)
688                                 err(2, "-k");
689                         if (ks->c1 == 0) {
690                                 warn("%s",getstr(6));
691                                 goto end;
692                         }
693                 }
694         } else {
695                 if (second)
696                         ks->c2 = 0;
697                 else
698                         ks->c1 = 1;
699         }
700
701         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
702                 regoff_t i = 0;
703
704                 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
705                         check_mutually_exclusive_flags(s[i], mef_flags);
706                         if (s[i] == 'b') {
707                                 if (second)
708                                         ks->pos2b = true;
709                                 else
710                                         ks->pos1b = true;
711                         } else if (!set_sort_modifier(&(ks->sm), s[i]))
712                                 goto end;
713                 }
714         }
715
716         ret = 0;
717
718 end:
719
720         if (c)
721                 sort_free(c);
722         if (f)
723                 sort_free(f);
724         regfree(&re);
725
726         return (ret);
727 }
728
729 /*
730  * Parse -k option value.
731  */
732 static int
733 parse_k(const char *s, struct key_specs *ks)
734 {
735         int ret = -1;
736         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
737             { false, false, false, false, false, false };
738
739         if (s && *s) {
740                 char *sptr;
741
742                 sptr = strchr(s, ',');
743                 if (sptr) {
744                         size_t size1;
745                         char *pos1, *pos2;
746
747                         size1 = sptr - s;
748
749                         if (size1 < 1)
750                                 return (-1);
751                         pos1 = sort_malloc((size1 + 1) * sizeof(char));
752
753                         strncpy(pos1, s, size1);
754                         pos1[size1] = '\0';
755
756                         ret = parse_pos(pos1, ks, mef_flags, false);
757
758                         sort_free(pos1);
759                         if (ret < 0)
760                                 return (ret);
761
762                         pos2 = sort_strdup(sptr + 1);
763                         ret = parse_pos(pos2, ks, mef_flags, true);
764                         sort_free(pos2);
765                 } else
766                         ret = parse_pos(s, ks, mef_flags, false);
767         }
768
769         return (ret);
770 }
771
772 /*
773  * Parse POS in +POS -POS option.
774  */
775 static int
776 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
777 {
778         regex_t re;
779         regmatch_t pmatch[4];
780         char *c, *f;
781         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
782         int ret;
783         size_t len, nmatch;
784
785         ret = -1;
786         nmatch = 4;
787         c = f = NULL;
788         *nc = *nf = 0;
789
790         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
791                 return (-1);
792
793         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
794                 goto end;
795
796         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
797                 goto end;
798
799         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
800                 goto end;
801
802         len = pmatch[1].rm_eo - pmatch[1].rm_so;
803         f = sort_malloc((len + 1) * sizeof(char));
804
805         strncpy(f, s + pmatch[1].rm_so, len);
806         f[len] = '\0';
807
808         errno = 0;
809         *nf = (size_t) strtoul(f, NULL, 10);
810         if (errno != 0)
811                 errx(2, "%s", getstr(11));
812
813         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
814                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
815                 c = sort_malloc((len + 1) * sizeof(char));
816
817                 strncpy(c, s + pmatch[2].rm_so + 1, len);
818                 c[len] = '\0';
819
820                 errno = 0;
821                 *nc = (size_t) strtoul(c, NULL, 10);
822                 if (errno != 0)
823                         errx(2, "%s", getstr(11));
824         }
825
826         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
827
828                 len = pmatch[3].rm_eo - pmatch[3].rm_so;
829
830                 strncpy(sopts, s + pmatch[3].rm_so, len);
831                 sopts[len] = '\0';
832         }
833
834         ret = 0;
835
836 end:
837         if (c)
838                 sort_free(c);
839         if (f)
840                 sort_free(f);
841         regfree(&re);
842
843         return (ret);
844 }
845
846 /*
847  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
848  */
849 void
850 fix_obsolete_keys(int *argc, char **argv)
851 {
852         char sopt[129];
853
854         for (int i = 1; i < *argc; i++) {
855                 char *arg1;
856
857                 arg1 = argv[i];
858
859                 if (strcmp(arg1, "--") == 0) {
860                         /* Following arguments are treated as filenames. */
861                         break;
862                 }
863
864                 if (strlen(arg1) > 1 && arg1[0] == '+') {
865                         int c1, f1;
866                         char sopts1[128];
867
868                         sopts1[0] = 0;
869                         c1 = f1 = 0;
870
871                         if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
872                                 continue;
873                         else {
874                                 f1 += 1;
875                                 c1 += 1;
876                                 if (i + 1 < *argc) {
877                                         char *arg2 = argv[i + 1];
878
879                                         if (strlen(arg2) > 1 &&
880                                             arg2[0] == '-') {
881                                                 int c2, f2;
882                                                 char sopts2[128];
883
884                                                 sopts2[0] = 0;
885                                                 c2 = f2 = 0;
886
887                                                 if (parse_pos_obs(arg2 + 1,
888                                                     &f2, &c2, sopts2) >= 0) {
889                                                         if (c2 > 0)
890                                                                 f2 += 1;
891                                                         sprintf(sopt, "-k%d.%d%s,%d.%d%s",
892                                                             f1, c1, sopts1, f2, c2, sopts2);
893                                                         argv[i] = sort_strdup(sopt);
894                                                         for (int j = i + 1; j + 1 < *argc; j++)
895                                                                 argv[j] = argv[j + 1];
896                                                         *argc -= 1;
897                                                         continue;
898                                                 }
899                                         }
900                                 }
901                                 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
902                                 argv[i] = sort_strdup(sopt);
903                         }
904                 }
905         }
906 }
907
908 /*
909  * Seed random sort
910  */
911 static void
912 get_random_seed(const char *random_source)
913 {
914         char randseed[32];
915         struct stat fsb, rsb;
916         ssize_t rd;
917         int rsfd;
918
919         rsfd = -1;
920         rd = sizeof(randseed);
921
922         if (random_source == NULL) {
923                 if (getentropy(randseed, sizeof(randseed)) < 0)
924                         err(EX_SOFTWARE, "getentropy");
925                 goto out;
926         }
927
928         rsfd = open(random_source, O_RDONLY | O_CLOEXEC);
929         if (rsfd < 0)
930                 err(EX_NOINPUT, "open: %s", random_source);
931
932         if (fstat(rsfd, &fsb) != 0)
933                 err(EX_SOFTWARE, "fstat");
934
935         if (!S_ISREG(fsb.st_mode) && !S_ISCHR(fsb.st_mode))
936                 err(EX_USAGE,
937                     "random seed isn't a regular file or /dev/random");
938
939         /*
940          * Regular files: read up to maximum seed size and explicitly
941          * reject longer files.
942          */
943         if (S_ISREG(fsb.st_mode)) {
944                 if (fsb.st_size > (off_t)sizeof(randseed))
945                         errx(EX_USAGE, "random seed is too large (%jd >"
946                             " %zu)!", (intmax_t)fsb.st_size,
947                             sizeof(randseed));
948                 else if (fsb.st_size < 1)
949                         errx(EX_USAGE, "random seed is too small ("
950                             "0 bytes)");
951
952                 memset(randseed, 0, sizeof(randseed));
953
954                 rd = read(rsfd, randseed, fsb.st_size);
955                 if (rd < 0)
956                         err(EX_SOFTWARE, "reading random seed file %s",
957                             random_source);
958                 if (rd < (ssize_t)fsb.st_size)
959                         errx(EX_SOFTWARE, "short read from %s", random_source);
960         } else if (S_ISCHR(fsb.st_mode)) {
961                 if (stat("/dev/random", &rsb) < 0)
962                         err(EX_SOFTWARE, "stat");
963
964                 if (fsb.st_dev != rsb.st_dev ||
965                     fsb.st_ino != rsb.st_ino)
966                         errx(EX_USAGE, "random seed is a character "
967                             "device other than /dev/random");
968
969                 if (getentropy(randseed, sizeof(randseed)) < 0)
970                         err(EX_SOFTWARE, "getentropy");
971         }
972
973 out:
974         if (rsfd >= 0)
975                 close(rsfd);
976
977         MD5Init(&md5_ctx);
978         MD5Update(&md5_ctx, randseed, rd);
979 }
980
981 /*
982  * Main function.
983  */
984 int
985 main(int argc, char **argv)
986 {
987         char *outfile, *real_outfile;
988         char *random_source = NULL;
989         int c, result;
990         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
991             { false, false, false, false, false, false };
992
993         result = 0;
994         outfile = sort_strdup("-");
995         real_outfile = NULL;
996
997         struct sort_mods *sm = &default_sort_mods_object;
998
999         init_tmp_files();
1000
1001         set_signal_handler();
1002
1003         set_hw_params();
1004         set_locale();
1005         set_tmpdir();
1006         set_sort_opts();
1007
1008         fix_obsolete_keys(&argc, argv);
1009
1010         while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
1011             != -1)) {
1012
1013                 check_mutually_exclusive_flags(c, mef_flags);
1014
1015                 if (!set_sort_modifier(sm, c)) {
1016
1017                         switch (c) {
1018                         case 'c':
1019                                 sort_opts_vals.cflag = true;
1020                                 if (optarg) {
1021                                         if (!strcmp(optarg, "diagnose-first"))
1022                                                 ;
1023                                         else if (!strcmp(optarg, "silent") ||
1024                                             !strcmp(optarg, "quiet"))
1025                                                 sort_opts_vals.csilentflag = true;
1026                                         else if (*optarg)
1027                                                 unknown(optarg);
1028                                 }
1029                                 break;
1030                         case 'C':
1031                                 sort_opts_vals.cflag = true;
1032                                 sort_opts_vals.csilentflag = true;
1033                                 break;
1034                         case 'k':
1035                         {
1036                                 sort_opts_vals.complex_sort = true;
1037                                 sort_opts_vals.kflag = true;
1038
1039                                 keys_num++;
1040                                 keys = sort_realloc(keys, keys_num *
1041                                     sizeof(struct key_specs));
1042                                 memset(&(keys[keys_num - 1]), 0,
1043                                     sizeof(struct key_specs));
1044
1045                                 if (parse_k(optarg, &(keys[keys_num - 1]))
1046                                     < 0) {
1047                                         errc(2, EINVAL, "-k %s", optarg);
1048                                 }
1049
1050                                 break;
1051                         }
1052                         case 'm':
1053                                 sort_opts_vals.mflag = true;
1054                                 break;
1055                         case 'o':
1056                                 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1057                                 strcpy(outfile, optarg);
1058                                 break;
1059                         case 's':
1060                                 sort_opts_vals.sflag = true;
1061                                 break;
1062                         case 'S':
1063                                 available_free_memory =
1064                                     parse_memory_buffer_value(optarg);
1065                                 break;
1066                         case 'T':
1067                                 tmpdir = sort_strdup(optarg);
1068                                 break;
1069                         case 't':
1070                                 while (strlen(optarg) > 1) {
1071                                         if (optarg[0] != '\\') {
1072                                                 errc(2, EINVAL, "%s", optarg);
1073                                         }
1074                                         optarg += 1;
1075                                         if (*optarg == '0') {
1076                                                 *optarg = 0;
1077                                                 break;
1078                                         }
1079                                 }
1080                                 sort_opts_vals.tflag = true;
1081                                 sort_opts_vals.field_sep = btowc(optarg[0]);
1082                                 if (sort_opts_vals.field_sep == WEOF) {
1083                                         errno = EINVAL;
1084                                         err(2, NULL);
1085                                 }
1086                                 if (!gnusort_numeric_compatibility) {
1087                                         if (symbol_decimal_point == sort_opts_vals.field_sep)
1088                                                 symbol_decimal_point = WEOF;
1089                                         if (symbol_thousands_sep == sort_opts_vals.field_sep)
1090                                                 symbol_thousands_sep = WEOF;
1091                                         if (symbol_negative_sign == sort_opts_vals.field_sep)
1092                                                 symbol_negative_sign = WEOF;
1093                                         if (symbol_positive_sign == sort_opts_vals.field_sep)
1094                                                 symbol_positive_sign = WEOF;
1095                                 }
1096                                 break;
1097                         case 'u':
1098                                 sort_opts_vals.uflag = true;
1099                                 /* stable sort for the correct unique val */
1100                                 sort_opts_vals.sflag = true;
1101                                 break;
1102                         case 'z':
1103                                 sort_opts_vals.zflag = true;
1104                                 break;
1105                         case SORT_OPT:
1106                                 if (optarg) {
1107                                         if (!strcmp(optarg, "general-numeric"))
1108                                                 set_sort_modifier(sm, 'g');
1109                                         else if (!strcmp(optarg, "human-numeric"))
1110                                                 set_sort_modifier(sm, 'h');
1111                                         else if (!strcmp(optarg, "numeric"))
1112                                                 set_sort_modifier(sm, 'n');
1113                                         else if (!strcmp(optarg, "month"))
1114                                                 set_sort_modifier(sm, 'M');
1115                                         else if (!strcmp(optarg, "random"))
1116                                                 set_sort_modifier(sm, 'R');
1117                                         else
1118                                                 unknown(optarg);
1119                                 }
1120                                 break;
1121 #if defined(SORT_THREADS)
1122                         case PARALLEL_OPT:
1123                                 nthreads = (size_t)(atoi(optarg));
1124                                 if (nthreads < 1)
1125                                         nthreads = 1;
1126                                 if (nthreads > 1024)
1127                                         nthreads = 1024;
1128                                 break;
1129 #endif
1130                         case QSORT_OPT:
1131                                 sort_opts_vals.sort_method = SORT_QSORT;
1132                                 break;
1133                         case MERGESORT_OPT:
1134                                 sort_opts_vals.sort_method = SORT_MERGESORT;
1135                                 break;
1136                         case MMAP_OPT:
1137                                 use_mmap = true;
1138                                 break;
1139                         case HEAPSORT_OPT:
1140                                 sort_opts_vals.sort_method = SORT_HEAPSORT;
1141                                 break;
1142                         case RADIXSORT_OPT:
1143                                 sort_opts_vals.sort_method = SORT_RADIXSORT;
1144                                 break;
1145                         case RANDOMSOURCE_OPT:
1146                                 random_source = strdup(optarg);
1147                                 break;
1148                         case COMPRESSPROGRAM_OPT:
1149                                 compress_program = strdup(optarg);
1150                                 break;
1151                         case FF_OPT:
1152                                 read_fns_from_file0(optarg);
1153                                 break;
1154                         case BS_OPT:
1155                         {
1156                                 errno = 0;
1157                                 long mof = strtol(optarg, NULL, 10);
1158                                 if (errno != 0)
1159                                         err(2, "--batch-size");
1160                                 if (mof >= 2)
1161                                         max_open_files = (size_t) mof + 1;
1162                         }
1163                                 break;
1164                         case VERSION_OPT:
1165                                 printf("%s\n", VERSION);
1166                                 exit(EXIT_SUCCESS);
1167                                 /* NOTREACHED */
1168                                 break;
1169                         case DEBUG_OPT:
1170                                 debug_sort = true;
1171                                 break;
1172                         case HELP_OPT:
1173                                 usage(false);
1174                                 /* NOTREACHED */
1175                                 break;
1176                         default:
1177                                 usage(true);
1178                                 /* NOTREACHED */
1179                         }
1180                 }
1181         }
1182
1183         argc -= optind;
1184         argv += optind;
1185
1186         if (argv_from_file0) {
1187                 argc = argc_from_file0;
1188                 argv = argv_from_file0;
1189         }
1190
1191         if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1192                 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1193
1194         if (keys_num == 0) {
1195                 keys_num = 1;
1196                 keys = sort_realloc(keys, sizeof(struct key_specs));
1197                 memset(&(keys[0]), 0, sizeof(struct key_specs));
1198                 keys[0].c1 = 1;
1199                 keys[0].pos1b = default_sort_mods->bflag;
1200                 keys[0].pos2b = default_sort_mods->bflag;
1201                 memcpy(&(keys[0].sm), default_sort_mods,
1202                     sizeof(struct sort_mods));
1203         }
1204
1205         for (size_t i = 0; i < keys_num; i++) {
1206                 struct key_specs *ks;
1207
1208                 ks = &(keys[i]);
1209
1210                 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1211                     !(ks->pos2b)) {
1212                         ks->pos1b = sm->bflag;
1213                         ks->pos2b = sm->bflag;
1214                         memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1215                 }
1216
1217                 ks->sm.func = get_sort_func(&(ks->sm));
1218         }
1219
1220         if (debug_sort) {
1221                 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1222 #if defined(SORT_THREADS)
1223                 printf("Number of CPUs: %d\n",(int)ncpu);
1224                 nthreads = 1;
1225 #endif
1226                 printf("Using collate rules of %s locale\n",
1227                     setlocale(LC_COLLATE, NULL));
1228                 if (byte_sort)
1229                         printf("Byte sort is used\n");
1230                 if (print_symbols_on_debug) {
1231                         printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1232                         if (symbol_thousands_sep)
1233                                 printf("Thousands separator: <%lc>\n",
1234                                     symbol_thousands_sep);
1235                         printf("Positive sign: <%lc>\n", symbol_positive_sign);
1236                         printf("Negative sign: <%lc>\n", symbol_negative_sign);
1237                 }
1238         }
1239
1240         if (need_random)
1241                 get_random_seed(random_source);
1242
1243         /* Case when the outfile equals one of the input files: */
1244         if (strcmp(outfile, "-")) {
1245
1246                 for(int i = 0; i < argc; ++i) {
1247                         if (strcmp(argv[i], outfile) == 0) {
1248                                 real_outfile = sort_strdup(outfile);
1249                                 for(;;) {
1250                                         char* tmp = sort_malloc(strlen(outfile) +
1251                                             strlen(".tmp") + 1);
1252
1253                                         strcpy(tmp, outfile);
1254                                         strcpy(tmp + strlen(tmp), ".tmp");
1255                                         sort_free(outfile);
1256                                         outfile = tmp;
1257                                         if (access(outfile, F_OK) < 0)
1258                                                 break;
1259                                 }
1260                                 tmp_file_atexit(outfile);
1261                         }
1262                 }
1263         }
1264
1265 #if defined(SORT_THREADS)
1266         if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1267                 nthreads = 1;
1268 #endif
1269
1270         if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1271                 struct file_list fl;
1272                 struct sort_list list;
1273
1274                 sort_list_init(&list);
1275                 file_list_init(&fl, true);
1276
1277                 if (argc < 1)
1278                         procfile("-", &list, &fl);
1279                 else {
1280                         while (argc > 0) {
1281                                 procfile(*argv, &list, &fl);
1282                                 --argc;
1283                                 ++argv;
1284                         }
1285                 }
1286
1287                 if (fl.count < 1)
1288                         sort_list_to_file(&list, outfile);
1289                 else {
1290                         if (list.count > 0) {
1291                                 char *flast = new_tmp_file_name();
1292
1293                                 sort_list_to_file(&list, flast);
1294                                 file_list_add(&fl, flast, false);
1295                         }
1296                         merge_files(&fl, outfile);
1297                 }
1298
1299                 file_list_clean(&fl);
1300
1301                 /*
1302                  * We are about to exit the program, so we can ignore
1303                  * the clean-up for speed
1304                  *
1305                  * sort_list_clean(&list);
1306                  */
1307
1308         } else if (sort_opts_vals.cflag) {
1309                 result = (argc == 0) ? (check("-")) : (check(*argv));
1310         } else if (sort_opts_vals.mflag) {
1311                 struct file_list fl;
1312
1313                 file_list_init(&fl, false);
1314                 /* No file arguments remaining means "read from stdin." */
1315                 if (argc == 0)
1316                         file_list_add(&fl, "-", true);
1317                 else
1318                         file_list_populate(&fl, argc, argv, true);
1319                 merge_files(&fl, outfile);
1320                 file_list_clean(&fl);
1321         }
1322
1323         if (real_outfile) {
1324                 unlink(real_outfile);
1325                 if (rename(outfile, real_outfile) < 0)
1326                         err(2, NULL);
1327                 sort_free(real_outfile);
1328         }
1329
1330         sort_free(outfile);
1331
1332         return (result);
1333 }