]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - usr.bin/sort/sort.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / usr.bin / sort / sort.c
1 /*-
2  * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3  * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30
31 #include <sys/stat.h>
32 #include <sys/sysctl.h>
33 #include <sys/types.h>
34
35 #include <err.h>
36 #include <errno.h>
37 #include <getopt.h>
38 #include <limits.h>
39 #include <locale.h>
40 #include <md5.h>
41 #include <regex.h>
42 #include <signal.h>
43 #include <stdbool.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include <unistd.h>
48 #include <wchar.h>
49 #include <wctype.h>
50
51 #include "coll.h"
52 #include "file.h"
53 #include "sort.h"
54
55 #ifndef WITHOUT_NLS
56 #include <nl_types.h>
57 nl_catd catalog;
58 #endif
59
60 #define OPTIONS "bcCdfghik:Mmno:RrsS:t:T:uVz"
61
62 #define DEFAULT_RANDOM_SORT_SEED_FILE ("/dev/random")
63 #define MAX_DEFAULT_RANDOM_SEED_DATA_SIZE (1024)
64
65 static bool need_random;
66 static const char *random_source = DEFAULT_RANDOM_SORT_SEED_FILE;
67 static const void *random_seed;
68 static size_t random_seed_size;
69
70 MD5_CTX md5_ctx;
71
72 /*
73  * Default messages to use when NLS is disabled or no catalogue
74  * is found.
75  */
76 const char *nlsstr[] = { "",
77 /* 1*/"mutually exclusive flags",
78 /* 2*/"extra argument not allowed with -c",
79 /* 3*/"Unknown feature",
80 /* 4*/"Wrong memory buffer specification",
81 /* 5*/"0 field in key specs",
82 /* 6*/"0 column in key specs",
83 /* 7*/"Wrong file mode",
84 /* 8*/"Cannot open file for reading",
85 /* 9*/"Radix sort cannot be used with these sort options",
86 /*10*/"The chosen sort method cannot be used with stable and/or unique sort",
87 /*11*/"Invalid key position",
88 /*12*/"Usage: %s [-bcCdfigMmnrsuz] [-kPOS1[,POS2] ... ] "
89       "[+POS1 [-POS2]] [-S memsize] [-T tmpdir] [-t separator] "
90       "[-o outfile] [--batch-size size] [--files0-from file] "
91       "[--heapsort] [--mergesort] [--radixsort] [--qsort] "
92       "[--mmap] "
93 #if defined(SORT_THREADS)
94       "[--parallel thread_no] "
95 #endif
96       "[--human-numeric-sort] "
97       "[--version-sort] [--random-sort [--random-source file]] "
98       "[--compress-program program] [file ...]\n" };
99
100 struct sort_opts sort_opts_vals;
101
102 bool debug_sort;
103 bool need_hint;
104
105 #if defined(SORT_THREADS)
106 unsigned int ncpu = 1;
107 size_t nthreads = 1;
108 #endif
109
110 static bool gnusort_numeric_compatibility;
111
112 static struct sort_mods default_sort_mods_object;
113 struct sort_mods * const default_sort_mods = &default_sort_mods_object;
114
115 static bool print_symbols_on_debug;
116
117 /*
118  * Arguments from file (when file0-from option is used:
119  */
120 static size_t argc_from_file0 = (size_t)-1;
121 static char **argv_from_file0;
122
123 /*
124  * Placeholder symbols for options which have no single-character equivalent
125  */
126 enum
127 {
128         SORT_OPT = CHAR_MAX + 1,
129         HELP_OPT,
130         FF_OPT,
131         BS_OPT,
132         VERSION_OPT,
133         DEBUG_OPT,
134 #if defined(SORT_THREADS)
135         PARALLEL_OPT,
136 #endif
137         RANDOMSOURCE_OPT,
138         COMPRESSPROGRAM_OPT,
139         QSORT_OPT,
140         MERGESORT_OPT,
141         HEAPSORT_OPT,
142         RADIXSORT_OPT,
143         MMAP_OPT
144 };
145
146 #define NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS 6
147 static const char mutually_exclusive_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] = { 'M', 'n', 'g', 'R', 'h', 'V' };
148
149 static struct option long_options[] = {
150                                 { "batch-size", required_argument, NULL, BS_OPT },
151                                 { "buffer-size", required_argument, NULL, 'S' },
152                                 { "check", optional_argument, NULL, 'c' },
153                                 { "check=silent|quiet", optional_argument, NULL, 'C' },
154                                 { "compress-program", required_argument, NULL, COMPRESSPROGRAM_OPT },
155                                 { "debug", no_argument, NULL, DEBUG_OPT },
156                                 { "dictionary-order", no_argument, NULL, 'd' },
157                                 { "field-separator", required_argument, NULL, 't' },
158                                 { "files0-from", required_argument, NULL, FF_OPT },
159                                 { "general-numeric-sort", no_argument, NULL, 'g' },
160                                 { "heapsort", no_argument, NULL, HEAPSORT_OPT },
161                                 { "help",no_argument, NULL, HELP_OPT },
162                                 { "human-numeric-sort", no_argument, NULL, 'h' },
163                                 { "ignore-leading-blanks", no_argument, NULL, 'b' },
164                                 { "ignore-case", no_argument, NULL, 'f' },
165                                 { "ignore-nonprinting", no_argument, NULL, 'i' },
166                                 { "key", required_argument, NULL, 'k' },
167                                 { "merge", no_argument, NULL, 'm' },
168                                 { "mergesort", no_argument, NULL, MERGESORT_OPT },
169                                 { "mmap", no_argument, NULL, MMAP_OPT },
170                                 { "month-sort", no_argument, NULL, 'M' },
171                                 { "numeric-sort", no_argument, NULL, 'n' },
172                                 { "output", required_argument, NULL, 'o' },
173 #if defined(SORT_THREADS)
174                                 { "parallel", required_argument, NULL, PARALLEL_OPT },
175 #endif
176                                 { "qsort", no_argument, NULL, QSORT_OPT },
177                                 { "radixsort", no_argument, NULL, RADIXSORT_OPT },
178                                 { "random-sort", no_argument, NULL, 'R' },
179                                 { "random-source", required_argument, NULL, RANDOMSOURCE_OPT },
180                                 { "reverse", no_argument, NULL, 'r' },
181                                 { "sort", required_argument, NULL, SORT_OPT },
182                                 { "stable", no_argument, NULL, 's' },
183                                 { "temporary-directory",required_argument, NULL, 'T' },
184                                 { "unique", no_argument, NULL, 'u' },
185                                 { "version", no_argument, NULL, VERSION_OPT },
186                                 { "version-sort",no_argument, NULL, 'V' },
187                                 { "zero-terminated", no_argument, NULL, 'z' },
188                                 { NULL, no_argument, NULL, 0 }
189 };
190
191 void fix_obsolete_keys(int *argc, char **argv);
192
193 /*
194  * Check where sort modifier is present
195  */
196 static bool
197 sort_modifier_empty(struct sort_mods *sm)
198 {
199
200         if (sm == NULL)
201                 return (true);
202         return (!(sm->Mflag || sm->Vflag || sm->nflag || sm->gflag ||
203             sm->rflag || sm->Rflag || sm->hflag || sm->dflag || sm->fflag));
204 }
205
206 /*
207  * Print out usage text.
208  */
209 static void
210 usage(bool opt_err)
211 {
212         struct option *o;
213         FILE *out;
214
215         out = stdout;
216         o = &(long_options[0]);
217
218         if (opt_err)
219                 out = stderr;
220         fprintf(out, getstr(12), getprogname());
221         if (opt_err)
222                 exit(2);
223         exit(0);
224 }
225
226 /*
227  * Read input file names from a file (file0-from option).
228  */
229 static void
230 read_fns_from_file0(const char *fn)
231 {
232         if (fn) {
233                 struct file0_reader f0r;
234                 FILE *f;
235
236                 f = fopen(fn, "r");
237                 if (f == NULL)
238                         err(2, NULL);
239
240                 memset(&f0r, 0, sizeof(f0r));
241                 f0r.f = f;
242
243                 while (!feof(f)) {
244                         char *line = read_file0_line(&f0r);
245
246                         if (line && *line) {
247                                 if (argc_from_file0 == (size_t)-1)
248                                         argc_from_file0 = 0;
249                                 ++argc_from_file0;
250                                 argv_from_file0 = sort_realloc(argv_from_file0,
251                                     argc_from_file0 * sizeof(char *));
252                                 if (argv_from_file0 == NULL)
253                                         err(2, NULL);
254                                 argv_from_file0[argc_from_file0 - 1] =
255                                     sort_strdup(line);
256                         }
257                 }
258                 closefile(f, fn);
259         }
260 }
261
262 /*
263  * Check how much RAM is available for the sort.
264  */
265 static void
266 set_hw_params(void)
267 {
268         long pages, psize;
269
270         pages = psize = 0;
271
272 #if defined(SORT_THREADS)
273         ncpu = 1;
274 #endif
275
276         pages = sysconf(_SC_PHYS_PAGES);
277         if (pages < 1) {
278                 perror("sysconf pages");
279                 psize = 1;
280         }
281         psize = sysconf(_SC_PAGESIZE);
282         if (psize < 1) {
283                 perror("sysconf psize");
284                 psize = 4096;
285         }
286 #if defined(SORT_THREADS)
287         ncpu = (unsigned int)sysconf(_SC_NPROCESSORS_ONLN);
288         if (ncpu < 1)
289                 ncpu = 1;
290         else if(ncpu > 32)
291                 ncpu = 32;
292
293         nthreads = ncpu;
294 #endif
295
296         free_memory = (unsigned long long) pages * (unsigned long long) psize;
297         available_free_memory = free_memory / 2;
298
299         if (available_free_memory < 1024)
300                 available_free_memory = 1024;
301 }
302
303 /*
304  * Convert "plain" symbol to wide symbol, with default value.
305  */
306 static void
307 conv_mbtowc(wchar_t *wc, const char *c, const wchar_t def)
308 {
309
310         if (wc && c) {
311                 int res;
312
313                 res = mbtowc(wc, c, MB_CUR_MAX);
314                 if (res < 1)
315                         *wc = def;
316         }
317 }
318
319 /*
320  * Set current locale symbols.
321  */
322 static void
323 set_locale(void)
324 {
325         struct lconv *lc;
326         const char *locale;
327
328         setlocale(LC_ALL, "");
329
330         lc = localeconv();
331
332         if (lc) {
333                 /* obtain LC_NUMERIC info */
334                 /* Convert to wide char form */
335                 conv_mbtowc(&symbol_decimal_point, lc->decimal_point,
336                     symbol_decimal_point);
337                 conv_mbtowc(&symbol_thousands_sep, lc->thousands_sep,
338                     symbol_thousands_sep);
339                 conv_mbtowc(&symbol_positive_sign, lc->positive_sign,
340                     symbol_positive_sign);
341                 conv_mbtowc(&symbol_negative_sign, lc->negative_sign,
342                     symbol_negative_sign);
343         }
344
345         if (getenv("GNUSORT_NUMERIC_COMPATIBILITY"))
346                 gnusort_numeric_compatibility = true;
347
348         locale = setlocale(LC_COLLATE, NULL);
349
350         if (locale) {
351                 char *tmpl;
352                 const char *cclocale;
353
354                 tmpl = sort_strdup(locale);
355                 cclocale = setlocale(LC_COLLATE, "C");
356                 if (cclocale && !strcmp(cclocale, tmpl))
357                         byte_sort = true;
358                 else {
359                         const char *pclocale;
360
361                         pclocale = setlocale(LC_COLLATE, "POSIX");
362                         if (pclocale && !strcmp(pclocale, tmpl))
363                                 byte_sort = true;
364                 }
365                 setlocale(LC_COLLATE, tmpl);
366                 sort_free(tmpl);
367         }
368 }
369
370 /*
371  * Set directory temporary files.
372  */
373 static void
374 set_tmpdir(void)
375 {
376         char *td;
377
378         td = getenv("TMPDIR");
379         if (td != NULL)
380                 tmpdir = sort_strdup(td);
381 }
382
383 /*
384  * Parse -S option.
385  */
386 static unsigned long long
387 parse_memory_buffer_value(const char *value)
388 {
389
390         if (value == NULL)
391                 return (available_free_memory);
392         else {
393                 char *endptr;
394                 unsigned long long membuf;
395
396                 endptr = NULL;
397                 errno = 0;
398                 membuf = strtoll(value, &endptr, 10);
399
400                 if (errno != 0) {
401                         warn("%s",getstr(4));
402                         membuf = available_free_memory;
403                 } else {
404                         switch (*endptr){
405                         case 'Y':
406                                 membuf *= 1024;
407                                 /* FALLTHROUGH */
408                         case 'Z':
409                                 membuf *= 1024;
410                                 /* FALLTHROUGH */
411                         case 'E':
412                                 membuf *= 1024;
413                                 /* FALLTHROUGH */
414                         case 'P':
415                                 membuf *= 1024;
416                                 /* FALLTHROUGH */
417                         case 'T':
418                                 membuf *= 1024;
419                                 /* FALLTHROUGH */
420                         case 'G':
421                                 membuf *= 1024;
422                                 /* FALLTHROUGH */
423                         case 'M':
424                                 membuf *= 1024;
425                                 /* FALLTHROUGH */
426                         case '\0':
427                         case 'K':
428                                 membuf *= 1024;
429                                 /* FALLTHROUGH */
430                         case 'b':
431                                 break;
432                         case '%':
433                                 membuf = (available_free_memory * membuf) /
434                                     100;
435                                 break;
436                         default:
437                                 warnc(EINVAL, "%s", optarg);
438                                 membuf = available_free_memory;
439                         }
440                 }
441                 return (membuf);
442         }
443 }
444
445 /*
446  * Signal handler that clears the temporary files.
447  */
448 static void
449 sig_handler(int sig __unused, siginfo_t *siginfo __unused,
450     void *context __unused)
451 {
452
453         clear_tmp_files();
454         exit(-1);
455 }
456
457 /*
458  * Set signal handler on panic signals.
459  */
460 static void
461 set_signal_handler(void)
462 {
463         struct sigaction sa;
464
465         memset(&sa, 0, sizeof(sa));
466         sa.sa_sigaction = &sig_handler;
467         sa.sa_flags = SA_SIGINFO;
468
469         if (sigaction(SIGTERM, &sa, NULL) < 0) {
470                 perror("sigaction");
471                 return;
472         }
473         if (sigaction(SIGHUP, &sa, NULL) < 0) {
474                 perror("sigaction");
475                 return;
476         }
477         if (sigaction(SIGINT, &sa, NULL) < 0) {
478                 perror("sigaction");
479                 return;
480         }
481         if (sigaction(SIGQUIT, &sa, NULL) < 0) {
482                 perror("sigaction");
483                 return;
484         }
485         if (sigaction(SIGABRT, &sa, NULL) < 0) {
486                 perror("sigaction");
487                 return;
488         }
489         if (sigaction(SIGBUS, &sa, NULL) < 0) {
490                 perror("sigaction");
491                 return;
492         }
493         if (sigaction(SIGSEGV, &sa, NULL) < 0) {
494                 perror("sigaction");
495                 return;
496         }
497         if (sigaction(SIGUSR1, &sa, NULL) < 0) {
498                 perror("sigaction");
499                 return;
500         }
501         if (sigaction(SIGUSR2, &sa, NULL) < 0) {
502                 perror("sigaction");
503                 return;
504         }
505 }
506
507 /*
508  * Print "unknown" message and exit with status 2.
509  */
510 static void
511 unknown(const char *what)
512 {
513
514         errx(2, "%s: %s", getstr(3), what);
515 }
516
517 /*
518  * Check whether contradictory input options are used.
519  */
520 static void
521 check_mutually_exclusive_flags(char c, bool *mef_flags)
522 {
523         int fo_index, mec;
524         bool found_others, found_this;
525
526         found_others = found_this =false;
527         fo_index = 0;
528
529         for (int i = 0; i < NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS; i++) {
530                 mec = mutually_exclusive_flags[i];
531
532                 if (mec != c) {
533                         if (mef_flags[i]) {
534                                 if (found_this)
535                                         errx(1, "%c:%c: %s", c, mec, getstr(1));
536                                 found_others = true;
537                                 fo_index = i;
538                         }
539                 } else {
540                         if (found_others)
541                                 errx(1, "%c:%c: %s", c, mutually_exclusive_flags[fo_index], getstr(1));
542                         mef_flags[i] = true;
543                         found_this = true;
544                 }
545         }
546 }
547
548 /*
549  * Initialise sort opts data.
550  */
551 static void
552 set_sort_opts(void)
553 {
554
555         memset(&default_sort_mods_object, 0,
556             sizeof(default_sort_mods_object));
557         memset(&sort_opts_vals, 0, sizeof(sort_opts_vals));
558         default_sort_mods_object.func =
559             get_sort_func(&default_sort_mods_object);
560 }
561
562 /*
563  * Set a sort modifier on a sort modifiers object.
564  */
565 static bool
566 set_sort_modifier(struct sort_mods *sm, int c)
567 {
568
569         if (sm) {
570                 switch (c){
571                 case 'b':
572                         sm->bflag = true;
573                         break;
574                 case 'd':
575                         sm->dflag = true;
576                         break;
577                 case 'f':
578                         sm->fflag = true;
579                         break;
580                 case 'g':
581                         sm->gflag = true;
582                         need_hint = true;
583                         break;
584                 case 'i':
585                         sm->iflag = true;
586                         break;
587                 case 'R':
588                         sm->Rflag = true;
589                         need_random = true;
590                         break;
591                 case 'M':
592                         initialise_months();
593                         sm->Mflag = true;
594                         need_hint = true;
595                         break;
596                 case 'n':
597                         sm->nflag = true;
598                         need_hint = true;
599                         print_symbols_on_debug = true;
600                         break;
601                 case 'r':
602                         sm->rflag = true;
603                         break;
604                 case 'V':
605                         sm->Vflag = true;
606                         break;
607                 case 'h':
608                         sm->hflag = true;
609                         need_hint = true;
610                         print_symbols_on_debug = true;
611                         break;
612                 default:
613                         return false;
614                 }
615                 sort_opts_vals.complex_sort = true;
616                 sm->func = get_sort_func(sm);
617         }
618         return (true);
619 }
620
621 /*
622  * Parse POS in -k option.
623  */
624 static int
625 parse_pos(const char *s, struct key_specs *ks, bool *mef_flags, bool second)
626 {
627         regmatch_t pmatch[4];
628         regex_t re;
629         char *c, *f;
630         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([bdfirMngRhV]+)?$";
631         size_t len, nmatch;
632         int ret;
633
634         ret = -1;
635         nmatch = 4;
636         c = f = NULL;
637
638         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
639                 return (-1);
640
641         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
642                 goto end;
643
644         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
645                 goto end;
646
647         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
648                 goto end;
649
650         len = pmatch[1].rm_eo - pmatch[1].rm_so;
651         f = sort_malloc((len + 1) * sizeof(char));
652
653         strncpy(f, s + pmatch[1].rm_so, len);
654         f[len] = '\0';
655
656         if (second) {
657                 errno = 0;
658                 ks->f2 = (size_t) strtoul(f, NULL, 10);
659                 if (errno != 0)
660                         err(2, "-k");
661                 if (ks->f2 == 0) {
662                         warn("%s",getstr(5));
663                         goto end;
664                 }
665         } else {
666                 errno = 0;
667                 ks->f1 = (size_t) strtoul(f, NULL, 10);
668                 if (errno != 0)
669                         err(2, "-k");
670                 if (ks->f1 == 0) {
671                         warn("%s",getstr(5));
672                         goto end;
673                 }
674         }
675
676         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
677                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
678                 c = sort_malloc((len + 1) * sizeof(char));
679
680                 strncpy(c, s + pmatch[2].rm_so + 1, len);
681                 c[len] = '\0';
682
683                 if (second) {
684                         errno = 0;
685                         ks->c2 = (size_t) strtoul(c, NULL, 10);
686                         if (errno != 0)
687                                 err(2, "-k");
688                 } else {
689                         errno = 0;
690                         ks->c1 = (size_t) strtoul(c, NULL, 10);
691                         if (errno != 0)
692                                 err(2, "-k");
693                         if (ks->c1 == 0) {
694                                 warn("%s",getstr(6));
695                                 goto end;
696                         }
697                 }
698         } else {
699                 if (second)
700                         ks->c2 = 0;
701                 else
702                         ks->c1 = 1;
703         }
704
705         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
706                 regoff_t i = 0;
707
708                 for (i = pmatch[3].rm_so; i < pmatch[3].rm_eo; i++) {
709                         check_mutually_exclusive_flags(s[i], mef_flags);
710                         if (s[i] == 'b') {
711                                 if (second)
712                                         ks->pos2b = true;
713                                 else
714                                         ks->pos1b = true;
715                         } else if (!set_sort_modifier(&(ks->sm), s[i]))
716                                 goto end;
717                 }
718         }
719
720         ret = 0;
721
722 end:
723
724         if (c)
725                 sort_free(c);
726         if (f)
727                 sort_free(f);
728         regfree(&re);
729
730         return (ret);
731 }
732
733 /*
734  * Parse -k option value.
735  */
736 static int
737 parse_k(const char *s, struct key_specs *ks)
738 {
739         int ret = -1;
740         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
741             { false, false, false, false, false, false };
742
743         if (s && *s) {
744                 char *sptr;
745
746                 sptr = strchr(s, ',');
747                 if (sptr) {
748                         size_t size1;
749                         char *pos1, *pos2;
750
751                         size1 = sptr - s;
752
753                         if (size1 < 1)
754                                 return (-1);
755                         pos1 = sort_malloc((size1 + 1) * sizeof(char));
756
757                         strncpy(pos1, s, size1);
758                         pos1[size1] = '\0';
759
760                         ret = parse_pos(pos1, ks, mef_flags, false);
761
762                         sort_free(pos1);
763                         if (ret < 0)
764                                 return (ret);
765
766                         pos2 = sort_strdup(sptr + 1);
767                         ret = parse_pos(pos2, ks, mef_flags, true);
768                         sort_free(pos2);
769                 } else
770                         ret = parse_pos(s, ks, mef_flags, false);
771         }
772
773         return (ret);
774 }
775
776 /*
777  * Parse POS in +POS -POS option.
778  */
779 static int
780 parse_pos_obs(const char *s, int *nf, int *nc, char* sopts)
781 {
782         regex_t re;
783         regmatch_t pmatch[4];
784         char *c, *f;
785         const char *sregexp = "^([0-9]+)(\\.[0-9]+)?([A-Za-z]+)?$";
786         int ret;
787         size_t len, nmatch;
788
789         ret = -1;
790         nmatch = 4;
791         c = f = NULL;
792         *nc = *nf = 0;
793
794         if (regcomp(&re, sregexp, REG_EXTENDED) != 0)
795                 return (-1);
796
797         if (regexec(&re, s, nmatch, pmatch, 0) != 0)
798                 goto end;
799
800         if (pmatch[0].rm_eo <= pmatch[0].rm_so)
801                 goto end;
802
803         if (pmatch[1].rm_eo <= pmatch[1].rm_so)
804                 goto end;
805
806         len = pmatch[1].rm_eo - pmatch[1].rm_so;
807         f = sort_malloc((len + 1) * sizeof(char));
808
809         strncpy(f, s + pmatch[1].rm_so, len);
810         f[len] = '\0';
811
812         errno = 0;
813         *nf = (size_t) strtoul(f, NULL, 10);
814         if (errno != 0)
815                 errx(2, "%s", getstr(11));
816
817         if (pmatch[2].rm_eo > pmatch[2].rm_so) {
818                 len = pmatch[2].rm_eo - pmatch[2].rm_so - 1;
819                 c = sort_malloc((len + 1) * sizeof(char));
820
821                 strncpy(c, s + pmatch[2].rm_so + 1, len);
822                 c[len] = '\0';
823
824                 errno = 0;
825                 *nc = (size_t) strtoul(c, NULL, 10);
826                 if (errno != 0)
827                         errx(2, "%s", getstr(11));
828         }
829
830         if (pmatch[3].rm_eo > pmatch[3].rm_so) {
831
832                 len = pmatch[3].rm_eo - pmatch[3].rm_so;
833
834                 strncpy(sopts, s + pmatch[3].rm_so, len);
835                 sopts[len] = '\0';
836         }
837
838         ret = 0;
839
840 end:
841         if (c)
842                 sort_free(c);
843         if (f)
844                 sort_free(f);
845         regfree(&re);
846
847         return (ret);
848 }
849
850 /*
851  * "Translate" obsolete +POS1 -POS2 syntax into new -kPOS1,POS2 syntax
852  */
853 void
854 fix_obsolete_keys(int *argc, char **argv)
855 {
856         char sopt[129];
857
858         for (int i = 1; i < *argc; i++) {
859                 char *arg1;
860
861                 arg1 = argv[i];
862
863                 if (strlen(arg1) > 1 && arg1[0] == '+') {
864                         int c1, f1;
865                         char sopts1[128];
866
867                         sopts1[0] = 0;
868                         c1 = f1 = 0;
869
870                         if (parse_pos_obs(arg1 + 1, &f1, &c1, sopts1) < 0)
871                                 continue;
872                         else {
873                                 f1 += 1;
874                                 c1 += 1;
875                                 if (i + 1 < *argc) {
876                                         char *arg2 = argv[i + 1];
877
878                                         if (strlen(arg2) > 1 &&
879                                             arg2[0] == '-') {
880                                                 int c2, f2;
881                                                 char sopts2[128];
882
883                                                 sopts2[0] = 0;
884                                                 c2 = f2 = 0;
885
886                                                 if (parse_pos_obs(arg2 + 1,
887                                                     &f2, &c2, sopts2) >= 0) {
888                                                         if (c2 > 0)
889                                                                 f2 += 1;
890                                                         sprintf(sopt, "-k%d.%d%s,%d.%d%s",
891                                                             f1, c1, sopts1, f2, c2, sopts2);
892                                                         argv[i] = sort_strdup(sopt);
893                                                         for (int j = i + 1; j + 1 < *argc; j++)
894                                                                 argv[j] = argv[j + 1];
895                                                         *argc -= 1;
896                                                         continue;
897                                                 }
898                                         }
899                                 }
900                                 sprintf(sopt, "-k%d.%d", f1, c1);
901                                 argv[i] = sort_strdup(sopt);
902                         }
903                 }
904         }
905 }
906
907 /*
908  * Set random seed
909  */
910 static void
911 set_random_seed(void)
912 {
913         if (need_random) {
914
915                 if (strcmp(random_source, DEFAULT_RANDOM_SORT_SEED_FILE) == 0) {
916                         FILE* fseed;
917                         MD5_CTX ctx;
918                         char rsd[MAX_DEFAULT_RANDOM_SEED_DATA_SIZE];
919                         size_t sz = 0;
920
921                         fseed = openfile(random_source, "r");
922                         while (!feof(fseed)) {
923                                 int cr;
924
925                                 cr = fgetc(fseed);
926                                 if (cr == EOF)
927                                         break;
928
929                                 rsd[sz++] = (char) cr;
930
931                                 if (sz >= MAX_DEFAULT_RANDOM_SEED_DATA_SIZE)
932                                         break;
933                         }
934
935                         closefile(fseed, random_source);
936
937                         MD5Init(&ctx);
938                         MD5Update(&ctx, rsd, sz);
939
940                         random_seed = MD5End(&ctx, NULL);
941                         random_seed_size = strlen(random_seed);
942
943                 } else {
944                         MD5_CTX ctx;
945                         char *b;
946
947                         MD5Init(&ctx);
948                         b = MD5File(random_source, NULL);
949                         if (b == NULL)
950                                 err(2, NULL);
951
952                         random_seed = b;
953                         random_seed_size = strlen(b);
954                 }
955
956                 MD5Init(&md5_ctx);
957                 if(random_seed_size>0) {
958                         MD5Update(&md5_ctx, random_seed, random_seed_size);
959                 }
960         }
961 }
962
963 /*
964  * Main function.
965  */
966 int
967 main(int argc, char **argv)
968 {
969         char *outfile, *real_outfile;
970         int c, result;
971         bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
972             { false, false, false, false, false, false };
973
974         result = 0;
975         outfile = sort_strdup("-");
976         real_outfile = NULL;
977
978         struct sort_mods *sm = &default_sort_mods_object;
979
980         init_tmp_files();
981
982         set_signal_handler();
983
984         set_hw_params();
985         set_locale();
986         set_tmpdir();
987         set_sort_opts();
988
989         fix_obsolete_keys(&argc, argv);
990
991         while (((c = getopt_long(argc, argv, OPTIONS, long_options, NULL))
992             != -1)) {
993
994                 check_mutually_exclusive_flags(c, mef_flags);
995
996                 if (!set_sort_modifier(sm, c)) {
997
998                         switch (c) {
999                         case 'c':
1000                                 sort_opts_vals.cflag = true;
1001                                 if (optarg) {
1002                                         if (!strcmp(optarg, "diagnose-first"))
1003                                                 ;
1004                                         else if (!strcmp(optarg, "silent") ||
1005                                             !strcmp(optarg, "quiet"))
1006                                                 sort_opts_vals.csilentflag = true;
1007                                         else if (*optarg)
1008                                                 unknown(optarg);
1009                                 }
1010                                 break;
1011                         case 'C':
1012                                 sort_opts_vals.cflag = true;
1013                                 sort_opts_vals.csilentflag = true;
1014                                 break;
1015                         case 'k':
1016                         {
1017                                 sort_opts_vals.complex_sort = true;
1018                                 sort_opts_vals.kflag = true;
1019
1020                                 keys_num++;
1021                                 keys = sort_realloc(keys, keys_num *
1022                                     sizeof(struct key_specs));
1023                                 memset(&(keys[keys_num - 1]), 0,
1024                                     sizeof(struct key_specs));
1025
1026                                 if (parse_k(optarg, &(keys[keys_num - 1]))
1027                                     < 0) {
1028                                         errc(2, EINVAL, "-k %s", optarg);
1029                                 }
1030
1031                                 break;
1032                         }
1033                         case 'm':
1034                                 sort_opts_vals.mflag = true;
1035                                 break;
1036                         case 'o':
1037                                 outfile = sort_realloc(outfile, (strlen(optarg) + 1));
1038                                 strcpy(outfile, optarg);
1039                                 break;
1040                         case 's':
1041                                 sort_opts_vals.sflag = true;
1042                                 break;
1043                         case 'S':
1044                                 available_free_memory =
1045                                     parse_memory_buffer_value(optarg);
1046                                 break;
1047                         case 'T':
1048                                 tmpdir = sort_strdup(optarg);
1049                                 break;
1050                         case 't':
1051                                 while (strlen(optarg) > 1) {
1052                                         if (optarg[0] != '\\') {
1053                                                 errc(2, EINVAL, "%s", optarg);
1054                                         }
1055                                         optarg += 1;
1056                                         if (*optarg == '0') {
1057                                                 *optarg = 0;
1058                                                 break;
1059                                         }
1060                                 }
1061                                 sort_opts_vals.tflag = true;
1062                                 sort_opts_vals.field_sep = btowc(optarg[0]);
1063                                 if (sort_opts_vals.field_sep == WEOF) {
1064                                         errno = EINVAL;
1065                                         err(2, NULL);
1066                                 }
1067                                 if (!gnusort_numeric_compatibility) {
1068                                         if (symbol_decimal_point == sort_opts_vals.field_sep)
1069                                                 symbol_decimal_point = WEOF;
1070                                         if (symbol_thousands_sep == sort_opts_vals.field_sep)
1071                                                 symbol_thousands_sep = WEOF;
1072                                         if (symbol_negative_sign == sort_opts_vals.field_sep)
1073                                                 symbol_negative_sign = WEOF;
1074                                         if (symbol_positive_sign == sort_opts_vals.field_sep)
1075                                                 symbol_positive_sign = WEOF;
1076                                 }
1077                                 break;
1078                         case 'u':
1079                                 sort_opts_vals.uflag = true;
1080                                 /* stable sort for the correct unique val */
1081                                 sort_opts_vals.sflag = true;
1082                                 break;
1083                         case 'z':
1084                                 sort_opts_vals.zflag = true;
1085                                 break;
1086                         case SORT_OPT:
1087                                 if (optarg) {
1088                                         if (!strcmp(optarg, "general-numeric"))
1089                                                 set_sort_modifier(sm, 'g');
1090                                         else if (!strcmp(optarg, "human-numeric"))
1091                                                 set_sort_modifier(sm, 'h');
1092                                         else if (!strcmp(optarg, "numeric"))
1093                                                 set_sort_modifier(sm, 'n');
1094                                         else if (!strcmp(optarg, "month"))
1095                                                 set_sort_modifier(sm, 'M');
1096                                         else if (!strcmp(optarg, "random"))
1097                                                 set_sort_modifier(sm, 'R');
1098                                         else
1099                                                 unknown(optarg);
1100                                 }
1101                                 break;
1102 #if defined(SORT_THREADS)
1103                         case PARALLEL_OPT:
1104                                 nthreads = (size_t)(atoi(optarg));
1105                                 if (nthreads < 1)
1106                                         nthreads = 1;
1107                                 if (nthreads > 1024)
1108                                         nthreads = 1024;
1109                                 break;
1110 #endif
1111                         case QSORT_OPT:
1112                                 sort_opts_vals.sort_method = SORT_QSORT;
1113                                 break;
1114                         case MERGESORT_OPT:
1115                                 sort_opts_vals.sort_method = SORT_MERGESORT;
1116                                 break;
1117                         case MMAP_OPT:
1118                                 use_mmap = true;
1119                                 break;
1120                         case HEAPSORT_OPT:
1121                                 sort_opts_vals.sort_method = SORT_HEAPSORT;
1122                                 break;
1123                         case RADIXSORT_OPT:
1124                                 sort_opts_vals.sort_method = SORT_RADIXSORT;
1125                                 break;
1126                         case RANDOMSOURCE_OPT:
1127                                 random_source = strdup(optarg);
1128                                 break;
1129                         case COMPRESSPROGRAM_OPT:
1130                                 compress_program = strdup(optarg);
1131                                 break;
1132                         case FF_OPT:
1133                                 read_fns_from_file0(optarg);
1134                                 break;
1135                         case BS_OPT:
1136                         {
1137                                 errno = 0;
1138                                 long mof = strtol(optarg, NULL, 10);
1139                                 if (errno != 0)
1140                                         err(2, "--batch-size");
1141                                 if (mof >= 2)
1142                                         max_open_files = (size_t) mof + 1;
1143                         }
1144                                 break;
1145                         case VERSION_OPT:
1146                                 printf("%s\n", VERSION);
1147                                 exit(EXIT_SUCCESS);
1148                                 /* NOTREACHED */
1149                                 break;
1150                         case DEBUG_OPT:
1151                                 debug_sort = true;
1152                                 break;
1153                         case HELP_OPT:
1154                                 usage(false);
1155                                 /* NOTREACHED */
1156                                 break;
1157                         default:
1158                                 usage(true);
1159                                 /* NOTREACHED */
1160                         }
1161                 }
1162         }
1163
1164         argc -= optind;
1165         argv += optind;
1166
1167 #ifndef WITHOUT_NLS
1168         catalog = catopen("sort", NL_CAT_LOCALE);
1169 #endif
1170
1171         if (sort_opts_vals.cflag && sort_opts_vals.mflag)
1172                 errx(1, "%c:%c: %s", 'm', 'c', getstr(1));
1173
1174 #ifndef WITHOUT_NLS
1175         catclose(catalog);
1176 #endif
1177
1178         if (keys_num == 0) {
1179                 keys_num = 1;
1180                 keys = sort_realloc(keys, sizeof(struct key_specs));
1181                 memset(&(keys[0]), 0, sizeof(struct key_specs));
1182                 keys[0].c1 = 1;
1183                 keys[0].pos1b = default_sort_mods->bflag;
1184                 keys[0].pos2b = default_sort_mods->bflag;
1185                 memcpy(&(keys[0].sm), default_sort_mods,
1186                     sizeof(struct sort_mods));
1187         }
1188
1189         for (size_t i = 0; i < keys_num; i++) {
1190                 struct key_specs *ks;
1191
1192                 ks = &(keys[i]);
1193
1194                 if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
1195                     !(ks->pos2b)) {
1196                         ks->pos1b = sm->bflag;
1197                         ks->pos2b = sm->bflag;
1198                         memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
1199                 }
1200
1201                 ks->sm.func = get_sort_func(&(ks->sm));
1202         }
1203
1204         if (argv_from_file0) {
1205                 argc = argc_from_file0;
1206                 argv = argv_from_file0;
1207         }
1208
1209         if (debug_sort) {
1210                 printf("Memory to be used for sorting: %llu\n",available_free_memory);
1211 #if defined(SORT_THREADS)
1212                 printf("Number of CPUs: %d\n",(int)ncpu);
1213                 nthreads = 1;
1214 #endif
1215                 printf("Using collate rules of %s locale\n",
1216                     setlocale(LC_COLLATE, NULL));
1217                 if (byte_sort)
1218                         printf("Byte sort is used\n");
1219                 if (print_symbols_on_debug) {
1220                         printf("Decimal Point: <%lc>\n", symbol_decimal_point);
1221                         if (symbol_thousands_sep)
1222                                 printf("Thousands separator: <%lc>\n",
1223                                     symbol_thousands_sep);
1224                         printf("Positive sign: <%lc>\n", symbol_positive_sign);
1225                         printf("Negative sign: <%lc>\n", symbol_negative_sign);
1226                 }
1227         }
1228
1229         set_random_seed();
1230
1231         /* Case when the outfile equals one of the input files: */
1232         if (strcmp(outfile, "-")) {
1233
1234                 for(int i = 0; i < argc; ++i) {
1235                         if (strcmp(argv[i], outfile) == 0) {
1236                                 real_outfile = sort_strdup(outfile);
1237                                 for(;;) {
1238                                         char* tmp = sort_malloc(strlen(outfile) +
1239                                             strlen(".tmp") + 1);
1240
1241                                         strcpy(tmp, outfile);
1242                                         strcpy(tmp + strlen(tmp), ".tmp");
1243                                         sort_free(outfile);
1244                                         outfile = tmp;
1245                                         if (access(outfile, F_OK) < 0)
1246                                                 break;
1247                                 }
1248                                 tmp_file_atexit(outfile);
1249                         }
1250                 }
1251         }
1252
1253 #if defined(SORT_THREADS)
1254         if ((argc < 1) || (strcmp(outfile, "-") == 0) || (*outfile == 0))
1255                 nthreads = 1;
1256 #endif
1257
1258         if (!sort_opts_vals.cflag && !sort_opts_vals.mflag) {
1259                 struct file_list fl;
1260                 struct sort_list list;
1261
1262                 sort_list_init(&list);
1263                 file_list_init(&fl, true);
1264
1265                 if (argc < 1)
1266                         procfile("-", &list, &fl);
1267                 else {
1268                         while (argc > 0) {
1269                                 procfile(*argv, &list, &fl);
1270                                 --argc;
1271                                 ++argv;
1272                         }
1273                 }
1274
1275                 if (fl.count < 1)
1276                         sort_list_to_file(&list, outfile);
1277                 else {
1278                         if (list.count > 0) {
1279                                 char *flast = new_tmp_file_name();
1280
1281                                 sort_list_to_file(&list, flast);
1282                                 file_list_add(&fl, flast, false);
1283                         }
1284                         merge_files(&fl, outfile);
1285                 }
1286
1287                 file_list_clean(&fl);
1288
1289                 /*
1290                  * We are about to exit the program, so we can ignore
1291                  * the clean-up for speed
1292                  *
1293                  * sort_list_clean(&list);
1294                  */
1295
1296         } else if (sort_opts_vals.cflag) {
1297                 result = (argc == 0) ? (check("-")) : (check(*argv));
1298         } else if (sort_opts_vals.mflag) {
1299                 struct file_list fl;
1300
1301                 file_list_init(&fl, false);
1302                 file_list_populate(&fl, argc, argv, true);
1303                 merge_files(&fl, outfile);
1304                 file_list_clean(&fl);
1305         }
1306
1307         if (real_outfile) {
1308                 unlink(real_outfile);
1309                 if (rename(outfile, real_outfile) < 0)
1310                         err(2, NULL);
1311                 sort_free(real_outfile);
1312         }
1313
1314         sort_free(outfile);
1315
1316         return (result);
1317 }