]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - usr.bin/sort/sort.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / usr.bin / sort / sort.c
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <md5.h>
41 #include <regex.h>
42 #include <signal.h>
43 #include <stdbool.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50
51 #include "coll.h"
52 #include "file.h"
53 #include "sort.h"
54
55 #ifndef WITHOUT_NLS
56 #include <nl_types.h>
57 nl_catd catalog;
58 #endif
59
60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
61
62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
64
65 static bool need_random;
66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67 static const void *random_seed;
68 static size_t random_seed_size;
69
70 MD5_CTX md5_ctx;
71
72 /*
73  * Default messages to use when NLS is disabled or no catalogue
74  * is found.
75  */
76 const char *nlsstr[] = { "",
77 /* 1*/"mutually exclusive flags",
78 /* 2*/"extra argument not allowed with -c",
79 /* 3*/"Unknown feature",
80 /* 4*/"Wrong memory buffer specification",
81 /* 5*/"0 field in key specs",
82 /* 6*/"0 column in key specs",
83 /* 7*/"Wrong file mode",
84 /* 8*/"Cannot open file for reading",
85 /* 9*/"Radix sort cannot be used with these sort options",
86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87 /*11*/"Invalid key position",
88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90       "[-o outfile] [--batch-size size] [--files0-from file] "
91       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92       "[--mmap] "
93 #if defined(SORT_THREADS)
94       "[--parallel thread_no] "
95 #endif
96       "[--human-numeric-sort] "
97       "[--version-sort] [--random-sort [--random-source file]] "
98       "[--compress-program program] [file ...]\n" };
99
100 struct sort_opts sort_opts_vals;
101
102 bool debug_sort;
103 bool need_hint;
104
105 #if defined(SORT_THREADS)
106 unsigned int ncpu = 1;
107 size_t nthreads = 1;
108 #endif
109
110 static bool gnusort_numeric_compatibility;
111
112 static struct sort_mods default_sort_mods_object;
113 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
114
115 static bool print_symbols_on_debug;
116
117 /*
118  * Arguments from file (when file0-from option is used:
119  */
120 static size_t argc_from_file0 = (size_t)-1;
121 static char **argv_from_file0;
122
123 /*
124  * Placeholder symbols for options which have no single-character equivalent
125  */
126 enum
127 {
128         SORT_OPT = CHAR_MAX + 1,
129         HELP_OPT,
130         FF_OPT,
131         BS_OPT,
132         VERSION_OPT,
133         DEBUG_OPT,
134 #if defined(SORT_THREADS)
135         PARALLEL_OPT,
136 #endif
137         RANDOMSOURCE_OPT,
138         COMPRESSPROGRAM_OPT,
139         QSORT_OPT,
140         MERGESORT_OPT,
141         HEAPSORT_OPT,
142         RADIXSORT_OPT,
143         MMAP_OPT
144 };
145
146 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
148
149 static struct option long_options[] = {
150                                 { "batch-size", required_argument, NULL, BS_OPT },
151                                 { "buffer-size", required_argument, NULL, 'S' },
152                                 { "check", optional_argument, NULL, 'c' },
153                                 { "check=silent|quiet", optional_argument, NULL, 'C' },
154                                 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
155                                 { "debug", no_argument, NULL, DEBUG_OPT },
156                                 { "dictionary-order", no_argument, NULL, 'd' },
157                                 { "field-separator", required_argument, NULL, 't' },
158                                 { "files0-from", required_argument, NULL, FF_OPT },
159                                 { "general-numeric-sort", no_argument, NULL, 'g' },
160                                 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
161                                 { "help",no_argument, NULL, HELP_OPT },
162                                 { "human-numeric-sort", no_argument, NULL, 'h' },
163                                 { "ignore-leading-blanks", no_argument, NULL, 'b' },
164                                 { "ignore-case", no_argument, NULL, 'f' },
165                                 { "ignore-nonprinting", no_argument, NULL, 'i' },
166                                 { "key", required_argument, NULL, 'k' },
167                                 { "merge", no_argument, NULL, 'm' },
168                                 { "mergesort", no_argument, NULL, MERGESORT_OPT },
169                                 { "mmap", no_argument, NULL, MMAP_OPT },
170                                 { "month-sort", no_argument, NULL, 'M' },
171                                 { "numeric-sort", no_argument, NULL, 'n' },
172                                 { "output", required_argument, NULL, 'o' },
173 #if defined(SORT_THREADS)
174                                 { "parallel", required_argument, NULL, PARALLEL_OPT },
175 #endif
176                                 { "qsort", no_argument, NULL, QSORT_OPT },
177                                 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
178                                 { "random-sort", no_argument, NULL, 'R' },
179                                 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
180                                 { "reverse", no_argument, NULL, 'r' },
181                                 { "sort", required_argument, NULL, SORT_OPT },
182                                 { "stable", no_argument, NULL, 's' },
183                                 { "temporary-directory",required_argument, NULL, 'T' },
184                                 { "unique", no_argument, NULL, 'u' },
185                                 { "version", no_argument, NULL, VERSION_OPT },
186                                 { "version-sort",no_argument, NULL, 'V' },
187                                 { "zero-terminated", no_argument, NULL, 'z' },
188                                 { NULL, no_argument, NULL, 0 }
189 };
190
191 void fix_obsolete_keys(int *argc, char **argv);
192
193 /*
194  * Check where sort modifier is present
195  */
196 static bool
197 sort_modifier_empty(struct sort_mods *sm)
198 {
199
200         if (sm == NULL)
201                 return (true);
202         return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
203             sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
204 }
205
206 /*
207  * Print out usage text.
208  */
209 static void
210 usage(bool opt_err)
211 {
212         struct option *o;
213         FILE *out;
214
215         out = stdout;
216         o = &(long_options[0]);
217
218         if (opt_err)
219                 out = stderr;
220         fprintf(out, getstr(12), getprogname());
221         if (opt_err)
222                 exit(2);
223         exit(0);
224 }
225
226 /*
227  * Read input file names from a file (file0-from option).
228  */
229 static void
230 read_fns_from_file0(const char *fn)
231 {
232         FILE *f;
233         char *line = NULL;
234         size_t linesize = 0;
235         ssize_t linelen;
236
237         if (fn == NULL)
238                 return;
239
240         f = fopen(fn, "r");
241         if (f == NULL)
242                 err(2, "%s", fn);
243
244         while ((linelen = getdelim(&line, &linesize, '\0', f)) != -1) {
245                 if (*line != '\0') {
246                         if (argc_from_file0 == (size_t) - 1)
247                                 argc_from_file0 = 0;
248                         ++argc_from_file0;
249                         argv_from_file0 = sort_realloc(argv_from_file0,
250                             argc_from_file0 * sizeof(char *));
251                         if (argv_from_file0 == NULL)
252                                 err(2, NULL);
253                         argv_from_file0[argc_from_file0 - 1] = line;
254                 } else {
255                         free(line);
256                 }
257                 line = NULL;
258                 linesize = 0;
259         }
260         if (ferror(f))
261                 err(2, "%s: getdelim", fn);
262
263         closefile(f, fn);
264 }
265
266 /*
267  * Check how much RAM is available for the sort.
268  */
269 static void
270 set_hw_params(void)
271 {
272         long pages, psize;
273
274         pages = psize = 0;
275
276 #if defined(SORT_THREADS)
277         ncpu = 1;
278 #endif
279
280         pages = sysconf(_SC_PHYS_PAGES);
281         if (pages < 1) {
282                 perror("sysconf pages");
283                 psize = 1;
284         }
285         psize = sysconf(_SC_PAGESIZE);
286         if (psize < 1) {
287                 perror("sysconf psize");
288                 psize = 4096;
289         }
290 #if defined(SORT_THREADS)
291         ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
292         if (ncpu < 1)
293                 ncpu = 1;
294         else if(ncpu > 32)
295                 ncpu = 32;
296
297         nthreads = ncpu;
298 #endif
299
300         free_memory = (unsigned long long) pages * (unsigned long long) psize;
301         available_free_memory = free_memory / 2;
302
303         if (available_free_memory < 1024)
304                 available_free_memory = 1024;
305 }
306
307 /*
308  * Convert "plain" symbol to wide symbol, with default value.
309  */
310 static void
311 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
312 {
313
314         if (wc && c) {
315                 int res;
316
317                 res = mbtowc(wc, c, MB_CUR_MAX);
318                 if (res < 1)
319                         *wc = def;
320         }
321 }
322
323 /*
324  * Set current locale symbols.
325  */
326 static void
327 set_locale(void)
328 {
329         struct lconv *lc;
330         const char *locale;
331
332         setlocale(LC_ALL, "");
333
334         lc = localeconv();
335
336         if (lc) {
337                 /* obtain LC_NUMERIC info */
338                 /* Convert to wide char form */
339                 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
340                     symbol_decimal_point);
341                 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
342                     symbol_thousands_sep);
343                 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
344                     symbol_positive_sign);
345                 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
346                     symbol_negative_sign);
347         }
348
349         if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
350                 gnusort_numeric_compatibility = true;
351
352         locale = setlocale(LC_COLLATE, NULL);
353
354         if (locale) {
355                 char *tmpl;
356                 const char *cclocale;
357
358                 tmpl = sort_strdup(locale);
359                 cclocale = setlocale(LC_COLLATE, "C");
360                 if (cclocale && !strcmp(cclocale, tmpl))
361                         byte_sort = true;
362                 else {
363                         const char *pclocale;
364
365                         pclocale = setlocale(LC_COLLATE, "POSIX");
366                         if (pclocale && !strcmp(pclocale, tmpl))
367                                 byte_sort = true;
368                 }
369                 setlocale(LC_COLLATE, tmpl);
370                 sort_free(tmpl);
371         }
372 }
373
374 /*
375  * Set directory temporary files.
376  */
377 static void
378 set_tmpdir(void)
379 {
380         char *td;
381
382         td = getenv("TMPDIR");
383         if (td != NULL)
384                 tmpdir = sort_strdup(td);
385 }
386
387 /*
388  * Parse -S option.
389  */
390 static unsigned long long
391 parse_memory_buffer_value(const char *value)
392 {
393
394         if (value == NULL)
395                 return (available_free_memory);
396         else {
397                 char *endptr;
398                 unsigned long long membuf;
399
400                 endptr = NULL;
401                 errno = 0;
402                 membuf = strtoll(value, &endptr, 10);
403
404                 if (errno != 0) {
405                         warn("%s",getstr(4));
406                         membuf = available_free_memory;
407                 } else {
408                         switch (*endptr){
409                         case 'Y':
410                                 membuf *= 1024;
411                                 /* FALLTHROUGH */
412                         case 'Z':
413                                 membuf *= 1024;
414                                 /* FALLTHROUGH */
415                         case 'E':
416                                 membuf *= 1024;
417                                 /* FALLTHROUGH */
418                         case 'P':
419                                 membuf *= 1024;
420                                 /* FALLTHROUGH */
421                         case 'T':
422                                 membuf *= 1024;
423                                 /* FALLTHROUGH */
424                         case 'G':
425                                 membuf *= 1024;
426                                 /* FALLTHROUGH */
427                         case 'M':
428                                 membuf *= 1024;
429                                 /* FALLTHROUGH */
430                         case '\0':
431                         case 'K':
432                                 membuf *= 1024;
433                                 /* FALLTHROUGH */
434                         case 'b':
435                                 break;
436                         case '%':
437                                 membuf = (available_free_memory * membuf) /
438                                     100;
439                                 break;
440                         default:
441                                 warnc(EINVAL, "%s", optarg);
442                                 membuf = available_free_memory;
443                         }
444                 }
445                 return (membuf);
446         }
447 }
448
449 /*
450  * Signal handler that clears the temporary files.
451  */
452 static void
453 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
454     void *context __unused)
455 {
456
457         clear_tmp_files();
458         exit(-1);
459 }
460
461 /*
462  * Set signal handler on panic signals.
463  */
464 static void
465 set_signal_handler(void)
466 {
467         struct sigaction sa;
468
469         memset(&sa, 0, sizeof(sa));
470         sa.sa_sigaction = &sig_handler;
471         sa.sa_flags = SA_SIGINFO;
472
473         if (sigaction(SIGTERM, &sa, NULL) < 0) {
474                 perror("sigaction");
475                 return;
476         }
477         if (sigaction(SIGHUP, &sa, NULL) < 0) {
478                 perror("sigaction");
479                 return;
480         }
481         if (sigaction(SIGINT, &sa, NULL) < 0) {
482                 perror("sigaction");
483                 return;
484         }
485         if (sigaction(SIGQUIT, &sa, NULL) < 0) {
486                 perror("sigaction");
487                 return;
488         }
489         if (sigaction(SIGABRT, &sa, NULL) < 0) {
490                 perror("sigaction");
491                 return;
492         }
493         if (sigaction(SIGBUS, &sa, NULL) < 0) {
494                 perror("sigaction");
495                 return;
496         }
497         if (sigaction(SIGSEGV, &sa, NULL) < 0) {
498                 perror("sigaction");
499                 return;
500         }
501         if (sigaction(SIGUSR1, &sa, NULL) < 0) {
502                 perror("sigaction");
503                 return;
504         }
505         if (sigaction(SIGUSR2, &sa, NULL) < 0) {
506                 perror("sigaction");
507                 return;
508         }
509 }
510
511 /*
512  * Print "unknown" message and exit with status 2.
513  */
514 static void
515 unknown(const char *what)
516 {
517
518         errx(2, "%s: %s", getstr(3), what);
519 }
520
521 /*
522  * Check whether contradictory input options are used.
523  */
524 static void
525 check_mutually_exclusive_flags(char c, bool *mef_flags)
526 {
527         int fo_index, mec;
528         bool found_others, found_this;
529
530         found_others = found_this =false;
531         fo_index = 0;
532
533         for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
534                 mec = mutually_exclusive_flags[i];
535
536                 if (mec != c) {
537                         if (mef_flags[i]) {
538                                 if (found_this)
539                                         errx(1, "%c:%c: %s", c, mec, getstr(1));
540                                 found_others = true;
541                                 fo_index = i;
542                         }
543                 } else {
544                         if (found_others)
545                                 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
546                         mef_flags[i] = true;
547                         found_this = true;
548                 }
549         }
550 }
551
552 /*
553  * Initialise sort opts data.
554  */
555 static void
556 set_sort_opts(void)
557 {
558
559         memset(&default_sort_mods_object, 0,
560             sizeof(default_sort_mods_object));
561         memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
562         default_sort_mods_object.func =
563             get_sort_func(&default_sort_mods_object);
564 }
565
566 /*
567  * Set a sort modifier on a sort modifiers object.
568  */
569 static bool
570 set_sort_modifier(struct sort_mods *sm, int c)
571 {
572
573         if (sm) {
574                 switch (c){
575                 case 'b':
576                         sm->bflag = true;
577                         break;
578                 case 'd':
579                         sm->dflag = true;
580                         break;
581                 case 'f':
582                         sm->fflag = true;
583                         break;
584                 case 'g':
585                         sm->gflag = true;
586                         need_hint = true;
587                         break;
588                 case 'i':
589                         sm->iflag = true;
590                         break;
591                 case 'R':
592                         sm->Rflag = true;
593                         need_random = true;
594                         break;
595                 case 'M':
596                         initialise_months();
597                         sm->Mflag = true;
598                         need_hint = true;
599                         break;
600                 case 'n':
601                         sm->nflag = true;
602                         need_hint = true;
603                         print_symbols_on_debug = true;
604                         break;
605                 case 'r':
606                         sm->rflag = true;
607                         break;
608                 case 'V':
609                         sm->Vflag = true;
610                         break;
611                 case 'h':
612                         sm->hflag = true;
613                         need_hint = true;
614                         print_symbols_on_debug = true;
615                         break;
616                 default:
617                         return false;
618                 }
619                 sort_opts_vals.complex_sort = true;
620                 sm->func = get_sort_func(sm);
621         }
622         return (true);
623 }
624
625 /*
626  * Parse POS in -k option.
627  */
628 static int
629 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
630 {
631         regmatch_t pmatch[4];
632         regex_t re;
633         char *c, *f;
634         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
635         size_t len, nmatch;
636         int ret;
637
638         ret = -1;
639         nmatch = 4;
640         c = f = NULL;
641
642         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
643                 return (-1);
644
645         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
646                 goto end;
647
648         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
649                 goto end;
650
651         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
652                 goto end;
653
654         len = pmatch[1].rm_eo - pmatch[1].rm_so;
655         f = sort_malloc((len + 1) * sizeof(char));
656
657         strncpy(f, s + pmatch[1].rm_so, len);
658         f[len] = '\0';
659
660         if (second) {
661                 errno = 0;
662                 ks->f2 = (size_t) strtoul(f, NULL, 10);
663                 if (errno != 0)
664                         err(2, "-k");
665                 if (ks->f2 == 0) {
666                         warn("%s",getstr(5));
667                         goto end;
668                 }
669         } else {
670                 errno = 0;
671                 ks->f1 = (size_t) strtoul(f, NULL, 10);
672                 if (errno != 0)
673                         err(2, "-k");
674                 if (ks->f1 == 0) {
675                         warn("%s",getstr(5));
676                         goto end;
677                 }
678         }
679
680         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
681                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
682                 c = sort_malloc((len + 1) * sizeof(char));
683
684                 strncpy(c, s + pmatch[2].rm_so + 1, len);
685                 c[len] = '\0';
686
687                 if (second) {
688                         errno = 0;
689                         ks->c2 = (size_t) strtoul(c, NULL, 10);
690                         if (errno != 0)
691                                 err(2, "-k");
692                 } else {
693                         errno = 0;
694                         ks->c1 = (size_t) strtoul(c, NULL, 10);
695                         if (errno != 0)
696                                 err(2, "-k");
697                         if (ks->c1 == 0) {
698                                 warn("%s",getstr(6));
699                                 goto end;
700                         }
701                 }
702         } else {
703                 if (second)
704                         ks->c2 = 0;
705                 else
706                         ks->c1 = 1;
707         }
708
709         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
710                 regoff_t i = 0;
711
712                 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
713                         check_mutually_exclusive_flags(s[i], mef_flags);
714                         if (s[i] == 'b') {
715                                 if (second)
716                                         ks->pos2b = true;
717                                 else
718                                         ks->pos1b = true;
719                         } else if (!set_sort_modifier(&(ks->sm), s[i]))
720                                 goto end;
721                 }
722         }
723
724         ret = 0;
725
726 end:
727
728         if (c)
729                 sort_free(c);
730         if (f)
731                 sort_free(f);
732         regfree(&re);
733
734         return (ret);
735 }
736
737 /*
738  * Parse -k option value.
739  */
740 static int
741 parse_k(const char *s, struct key_specs *ks)
742 {
743         int ret = -1;
744         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
745             { false, false, false, false, false, false };
746
747         if (s && *s) {
748                 char *sptr;
749
750                 sptr = strchr(s, ',');
751                 if (sptr) {
752                         size_t size1;
753                         char *pos1, *pos2;
754
755                         size1 = sptr - s;
756
757                         if (size1 < 1)
758                                 return (-1);
759                         pos1 = sort_malloc((size1 + 1) * sizeof(char));
760
761                         strncpy(pos1, s, size1);
762                         pos1[size1] = '\0';
763
764                         ret = parse_pos(pos1, ks, mef_flags, false);
765
766                         sort_free(pos1);
767                         if (ret < 0)
768                                 return (ret);
769
770                         pos2 = sort_strdup(sptr + 1);
771                         ret = parse_pos(pos2, ks, mef_flags, true);
772                         sort_free(pos2);
773                 } else
774                         ret = parse_pos(s, ks, mef_flags, false);
775         }
776
777         return (ret);
778 }
779
780 /*
781  * Parse POS in +POS -POS option.
782  */
783 static int
784 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
785 {
786         regex_t re;
787         regmatch_t pmatch[4];
788         char *c, *f;
789         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
790         int ret;
791         size_t len, nmatch;
792
793         ret = -1;
794         nmatch = 4;
795         c = f = NULL;
796         *nc = *nf = 0;
797
798         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
799                 return (-1);
800
801         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
802                 goto end;
803
804         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
805                 goto end;
806
807         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
808                 goto end;
809
810         len = pmatch[1].rm_eo - pmatch[1].rm_so;
811         f = sort_malloc((len + 1) * sizeof(char));
812
813         strncpy(f, s + pmatch[1].rm_so, len);
814         f[len] = '\0';
815
816         errno = 0;
817         *nf = (size_t) strtoul(f, NULL, 10);
818         if (errno != 0)
819                 errx(2, "%s", getstr(11));
820
821         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
822                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
823                 c = sort_malloc((len + 1) * sizeof(char));
824
825                 strncpy(c, s + pmatch[2].rm_so + 1, len);
826                 c[len] = '\0';
827
828                 errno = 0;
829                 *nc = (size_t) strtoul(c, NULL, 10);
830                 if (errno != 0)
831                         errx(2, "%s", getstr(11));
832         }
833
834         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
835
836                 len = pmatch[3].rm_eo - pmatch[3].rm_so;
837
838                 strncpy(sopts, s + pmatch[3].rm_so, len);
839                 sopts[len] = '\0';
840         }
841
842         ret = 0;
843
844 end:
845         if (c)
846                 sort_free(c);
847         if (f)
848                 sort_free(f);
849         regfree(&re);
850
851         return (ret);
852 }
853
854 /*
855  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
856  */
857 void
858 fix_obsolete_keys(int *argc, char **argv)
859 {
860         char sopt[129];
861
862         for (int i = 1; i < *argc; i++) {
863                 char *arg1;
864
865                 arg1 = argv[i];
866
867                 if (strlen(arg1) > 1 && arg1[0] == '+') {
868                         int c1, f1;
869                         char sopts1[128];
870
871                         sopts1[0] = 0;
872                         c1 = f1 = 0;
873
874                         if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
875                                 continue;
876                         else {
877                                 f1 += 1;
878                                 c1 += 1;
879                                 if (i + 1 < *argc) {
880                                         char *arg2 = argv[i + 1];
881
882                                         if (strlen(arg2) > 1 &&
883                                             arg2[0] == '-') {
884                                                 int c2, f2;
885                                                 char sopts2[128];
886
887                                                 sopts2[0] = 0;
888                                                 c2 = f2 = 0;
889
890                                                 if (parse_pos_obs(arg2 + 1,
891                                                     &f2, &c2, sopts2) >= 0) {
892                                                         if (c2 > 0)
893                                                                 f2 += 1;
894                                                         sprintf(sopt, "-k%d.%d%s,%d.%d%s",
895                                                             f1, c1, sopts1, f2, c2, sopts2);
896                                                         argv[i] = sort_strdup(sopt);
897                                                         for (int j = i + 1; j + 1 < *argc; j++)
898                                                                 argv[j] = argv[j + 1];
899                                                         *argc -= 1;
900                                                         continue;
901                                                 }
902                                         }
903                                 }
904                                 sprintf(sopt, "-k%d.%d%s", f1, c1, sopts1);
905                                 argv[i] = sort_strdup(sopt);
906                         }
907                 }
908         }
909 }
910
911 /*
912  * Set random seed
913  */
914 static void
915 set_random_seed(void)
916 {
917         if (need_random) {
918
919                 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
920                         FILE* fseed;
921                         MD5_CTX ctx;
922                         char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
923                         size_t sz = 0;
924
925                         fseed = openfile(random_source, "r");
926                         while (!feof(fseed)) {
927                                 int cr;
928
929                                 cr = fgetc(fseed);
930                                 if (cr == EOF)
931                                         break;
932
933                                 rsd[sz++] = (char) cr;
934
935                                 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
936                                         break;
937                         }
938
939                         closefile(fseed, random_source);
940
941                         MD5Init(&ctx);
942                         MD5Update(&ctx, rsd, sz);
943
944                         random_seed = MD5End(&ctx, NULL);
945                         random_seed_size = strlen(random_seed);
946
947                 } else {
948                         MD5_CTX ctx;
949                         char *b;
950
951                         MD5Init(&ctx);
952                         b = MD5File(random_source, NULL);
953                         if (b == NULL)
954                                 err(2, NULL);
955
956                         random_seed = b;
957                         random_seed_size = strlen(b);
958                 }
959
960                 MD5Init(&md5_ctx);
961                 if(random_seed_size>0) {
962                         MD5Update(&md5_ctx, random_seed, random_seed_size);
963                 }
964         }
965 }
966
967 /*
968  * Main function.
969  */
970 int
971 main(int argc, char **argv)
972 {
973         char *outfile, *real_outfile;
974         int c, result;
975         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
976             { false, false, false, false, false, false };
977
978         result = 0;
979         outfile = sort_strdup("-");
980         real_outfile = NULL;
981
982         struct sort_mods *sm = &default_sort_mods_object;
983
984         init_tmp_files();
985
986         set_signal_handler();
987
988         set_hw_params();
989         set_locale();
990         set_tmpdir();
991         set_sort_opts();
992
993         fix_obsolete_keys(&argc, argv);
994
995         while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
996             != -1)) {
997
998                 check_mutually_exclusive_flags(c, mef_flags);
999
1000                 if (!set_sort_modifier(sm, c)) {
1001
1002                         switch (c) {
1003                         case 'c':
1004                                 sort_opts_vals.cflag = true;
1005                                 if (optarg) {
1006                                         if (!strcmp(optarg, "diagnose-first"))
1007                                                 ;
1008                                         else if (!strcmp(optarg, "silent") ||
1009                                             !strcmp(optarg, "quiet"))
1010                                                 sort_opts_vals.csilentflag = true;
1011                                         else if (*optarg)
1012                                                 unknown(optarg);
1013                                 }
1014                                 break;
1015                         case 'C':
1016                                 sort_opts_vals.cflag = true;
1017                                 sort_opts_vals.csilentflag = true;
1018                                 break;
1019                         case 'k':
1020                         {
1021                                 sort_opts_vals.complex_sort = true;
1022                                 sort_opts_vals.kflag = true;
1023
1024                                 keys_num++;
1025                                 keys = sort_realloc(keys, keys_num *
1026                                     sizeof(struct key_specs));
1027                                 memset(&(keys[keys_num - 1]), 0,
1028                                     sizeof(struct key_specs));
1029
1030                                 if (parse_k(optarg, &(keys[keys_num - 1]))
1031                                     < 0) {
1032                                         errc(2, EINVAL, "-k %s", optarg);
1033                                 }
1034
1035                                 break;
1036                         }
1037                         case 'm':
1038                                 sort_opts_vals.mflag = true;
1039                                 break;
1040                         case 'o':
1041                                 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1042                                 strcpy(outfile, optarg);
1043                                 break;
1044                         case 's':
1045                                 sort_opts_vals.sflag = true;
1046                                 break;
1047                         case 'S':
1048                                 available_free_memory =
1049                                     parse_memory_buffer_value(optarg);
1050                                 break;
1051                         case 'T':
1052                                 tmpdir = sort_strdup(optarg);
1053                                 break;
1054                         case 't':
1055                                 while (strlen(optarg) > 1) {
1056                                         if (optarg[0] != '\\') {
1057                                                 errc(2, EINVAL, "%s", optarg);
1058                                         }
1059                                         optarg += 1;
1060                                         if (*optarg == '0') {
1061                                                 *optarg = 0;
1062                                                 break;
1063                                         }
1064                                 }
1065                                 sort_opts_vals.tflag = true;
1066                                 sort_opts_vals.field_sep = btowc(optarg[0]);
1067                                 if (sort_opts_vals.field_sep == WEOF) {
1068                                         errno = EINVAL;
1069                                         err(2, NULL);
1070                                 }
1071                                 if (!gnusort_numeric_compatibility) {
1072                                         if (symbol_decimal_point == sort_opts_vals.field_sep)
1073                                                 symbol_decimal_point = WEOF;
1074                                         if (symbol_thousands_sep == sort_opts_vals.field_sep)
1075                                                 symbol_thousands_sep = WEOF;
1076                                         if (symbol_negative_sign == sort_opts_vals.field_sep)
1077                                                 symbol_negative_sign = WEOF;
1078                                         if (symbol_positive_sign == sort_opts_vals.field_sep)
1079                                                 symbol_positive_sign = WEOF;
1080                                 }
1081                                 break;
1082                         case 'u':
1083                                 sort_opts_vals.uflag = true;
1084                                 /* stable sort for the correct unique val */
1085                                 sort_opts_vals.sflag = true;
1086                                 break;
1087                         case 'z':
1088                                 sort_opts_vals.zflag = true;
1089                                 break;
1090                         case SORT_OPT:
1091                                 if (optarg) {
1092                                         if (!strcmp(optarg, "general-numeric"))
1093                                                 set_sort_modifier(sm, 'g');
1094                                         else if (!strcmp(optarg, "human-numeric"))
1095                                                 set_sort_modifier(sm, 'h');
1096                                         else if (!strcmp(optarg, "numeric"))
1097                                                 set_sort_modifier(sm, 'n');
1098                                         else if (!strcmp(optarg, "month"))
1099                                                 set_sort_modifier(sm, 'M');
1100                                         else if (!strcmp(optarg, "random"))
1101                                                 set_sort_modifier(sm, 'R');
1102                                         else
1103                                                 unknown(optarg);
1104                                 }
1105                                 break;
1106 #if defined(SORT_THREADS)
1107                         case PARALLEL_OPT:
1108                                 nthreads = (size_t)(atoi(optarg));
1109                                 if (nthreads < 1)
1110                                         nthreads = 1;
1111                                 if (nthreads > 1024)
1112                                         nthreads = 1024;
1113                                 break;
1114 #endif
1115                         case QSORT_OPT:
1116                                 sort_opts_vals.sort_method = SORT_QSORT;
1117                                 break;
1118                         case MERGESORT_OPT:
1119                                 sort_opts_vals.sort_method = SORT_MERGESORT;
1120                                 break;
1121                         case MMAP_OPT:
1122                                 use_mmap = true;
1123                                 break;
1124                         case HEAPSORT_OPT:
1125                                 sort_opts_vals.sort_method = SORT_HEAPSORT;
1126                                 break;
1127                         case RADIXSORT_OPT:
1128                                 sort_opts_vals.sort_method = SORT_RADIXSORT;
1129                                 break;
1130                         case RANDOMSOURCE_OPT:
1131                                 random_source = strdup(optarg);
1132                                 break;
1133                         case COMPRESSPROGRAM_OPT:
1134                                 compress_program = strdup(optarg);
1135                                 break;
1136                         case FF_OPT:
1137                                 read_fns_from_file0(optarg);
1138                                 break;
1139                         case BS_OPT:
1140                         {
1141                                 errno = 0;
1142                                 long mof = strtol(optarg, NULL, 10);
1143                                 if (errno != 0)
1144                                         err(2, "--batch-size");
1145                                 if (mof >= 2)
1146                                         max_open_files = (size_t) mof + 1;
1147                         }
1148                                 break;
1149                         case VERSION_OPT:
1150                                 printf("%s\n", VERSION);
1151                                 exit(EXIT_SUCCESS);
1152                                 /* NOTREACHED */
1153                                 break;
1154                         case DEBUG_OPT:
1155                                 debug_sort = true;
1156                                 break;
1157                         case HELP_OPT:
1158                                 usage(false);
1159                                 /* NOTREACHED */
1160                                 break;
1161                         default:
1162                                 usage(true);
1163                                 /* NOTREACHED */
1164                         }
1165                 }
1166         }
1167
1168         argc -= optind;
1169         argv += optind;
1170
1171 #ifndef WITHOUT_NLS
1172         catalog = catopen("sort", NL_CAT_LOCALE);
1173 #endif
1174
1175         if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1176                 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1177
1178 #ifndef WITHOUT_NLS
1179         catclose(catalog);
1180 #endif
1181
1182         if (keys_num == 0) {
1183                 keys_num = 1;
1184                 keys = sort_realloc(keys, sizeof(struct key_specs));
1185                 memset(&(keys[0]), 0, sizeof(struct key_specs));
1186                 keys[0].c1 = 1;
1187                 keys[0].pos1b = default_sort_mods->bflag;
1188                 keys[0].pos2b = default_sort_mods->bflag;
1189                 memcpy(&(keys[0].sm), default_sort_mods,
1190                     sizeof(struct sort_mods));
1191         }
1192
1193         for (size_t i = 0; i < keys_num; i++) {
1194                 struct key_specs *ks;
1195
1196                 ks = &(keys[i]);
1197
1198                 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1199                     !(ks->pos2b)) {
1200                         ks->pos1b = sm->bflag;
1201                         ks->pos2b = sm->bflag;
1202                         memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1203                 }
1204
1205                 ks->sm.func = get_sort_func(&(ks->sm));
1206         }
1207
1208         if (argv_from_file0) {
1209                 argc = argc_from_file0;
1210                 argv = argv_from_file0;
1211         }
1212
1213         if (debug_sort) {
1214                 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1215 #if defined(SORT_THREADS)
1216                 printf("Number of CPUs: %d\n",(int)ncpu);
1217                 nthreads = 1;
1218 #endif
1219                 printf("Using collate rules of %s locale\n",
1220                     setlocale(LC_COLLATE, NULL));
1221                 if (byte_sort)
1222                         printf("Byte sort is used\n");
1223                 if (print_symbols_on_debug) {
1224                         printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1225                         if (symbol_thousands_sep)
1226                                 printf("Thousands separator: <%lc>\n",
1227                                     symbol_thousands_sep);
1228                         printf("Positive sign: <%lc>\n", symbol_positive_sign);
1229                         printf("Negative sign: <%lc>\n", symbol_negative_sign);
1230                 }
1231         }
1232
1233         set_random_seed();
1234
1235         /* Case when the outfile equals one of the input files: */
1236         if (strcmp(outfile, "-")) {
1237
1238                 for(int i = 0; i < argc; ++i) {
1239                         if (strcmp(argv[i], outfile) == 0) {
1240                                 real_outfile = sort_strdup(outfile);
1241                                 for(;;) {
1242                                         char* tmp = sort_malloc(strlen(outfile) +
1243                                             strlen(".tmp") + 1);
1244
1245                                         strcpy(tmp, outfile);
1246                                         strcpy(tmp + strlen(tmp), ".tmp");
1247                                         sort_free(outfile);
1248                                         outfile = tmp;
1249                                         if (access(outfile, F_OK) < 0)
1250                                                 break;
1251                                 }
1252                                 tmp_file_atexit(outfile);
1253                         }
1254                 }
1255         }
1256
1257 #if defined(SORT_THREADS)
1258         if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1259                 nthreads = 1;
1260 #endif
1261
1262         if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1263                 struct file_list fl;
1264                 struct sort_list list;
1265
1266                 sort_list_init(&list);
1267                 file_list_init(&fl, true);
1268
1269                 if (argc < 1)
1270                         procfile("-", &list, &fl);
1271                 else {
1272                         while (argc > 0) {
1273                                 procfile(*argv, &list, &fl);
1274                                 --argc;
1275                                 ++argv;
1276                         }
1277                 }
1278
1279                 if (fl.count < 1)
1280                         sort_list_to_file(&list, outfile);
1281                 else {
1282                         if (list.count > 0) {
1283                                 char *flast = new_tmp_file_name();
1284
1285                                 sort_list_to_file(&list, flast);
1286                                 file_list_add(&fl, flast, false);
1287                         }
1288                         merge_files(&fl, outfile);
1289                 }
1290
1291                 file_list_clean(&fl);
1292
1293                 /*
1294                  * We are about to exit the program, so we can ignore
1295                  * the clean-up for speed
1296                  *
1297                  * sort_list_clean(&list);
1298                  */
1299
1300         } else if (sort_opts_vals.cflag) {
1301                 result = (argc == 0) ? (check("-")) : (check(*argv));
1302         } else if (sort_opts_vals.mflag) {
1303                 struct file_list fl;
1304
1305                 file_list_init(&fl, false);
1306                 file_list_populate(&fl, argc, argv, true);
1307                 merge_files(&fl, outfile);
1308                 file_list_clean(&fl);
1309         }
1310
1311         if (real_outfile) {
1312                 unlink(real_outfile);
1313                 if (rename(outfile, real_outfile) < 0)
1314                         err(2, NULL);
1315                 sort_free(real_outfile);
1316         }
1317
1318         sort_free(outfile);
1319
1320         return (result);
1321 }