2 /* sort - sort lines of text (with all kinds of options).
3 Copyright (C) 88, 1991-2004 Free Software Foundation, Inc.
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 Written December 1988 by Mike Haertel.
20 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
21 or (US mail) as Mike Haertel c/o Free Software Foundation.
23 Ørn E. Hansen added NLS support in 1997. */
29 #include <sys/types.h>
33 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */
34 /* Get mbstate_t, mbrtowc(), wcwidth(). */
39 /* Get isw* functions. */
44 /* Get nl_langinfo(). */
45 #if HAVE_LANGINFO_CODESET
46 # include <langinfo.h>
49 /* Include this after wctype.h so that we `#undef' ISPRINT
50 (from Solaris's euc.h, from widec.h, from wctype.h) before
51 redefining and using it. */
54 #include "hard-locale.h"
56 #include "long-options.h"
60 #include "stdio-safer.h"
64 #if HAVE_SYS_RESOURCE_H
65 # include <sys/resource.h>
68 struct rlimit { size_t rlim_cur; };
69 # define getrlimit(Resource, Rlp) (-1)
72 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
73 installation; work around this configuration error. */
74 #if !defined MB_LEN_MAX || MB_LEN_MAX == 1
75 # define MB_LEN_MAX 16
78 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */
79 #if HAVE_MBRTOWC && defined mbstate_t
80 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
83 /* The official name of this program (e.g., no `g' prefix). */
84 #define PROGRAM_NAME "sort"
86 #define AUTHORS "Mike Haertel", "Paul Eggert"
88 #if HAVE_LANGINFO_CODESET
89 # include <langinfo.h>
93 # define sigprocmask(How, Set, Oset) /* empty */
101 #define UCHAR_LIM (UCHAR_MAX + 1)
103 #ifndef DEFAULT_TMPDIR
104 # define DEFAULT_TMPDIR "/tmp"
110 /* POSIX says to exit with status 1 if invoked with -c and the
111 input is not properly sorted. */
112 SORT_OUT_OF_ORDER = 1,
114 /* POSIX says any other irregular exit must exit with a status
115 code greater than 1. */
119 #define C_DECIMAL_POINT '.'
120 #define NEGATION_SIGN '-'
121 #define NUMERIC_ZERO '0'
125 static char decimal_point;
126 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
127 static int force_general_numcompare = 0;
129 /* Nonzero if the corresponding locales are hard. */
130 static bool hard_LC_COLLATE;
131 # if HAVE_NL_LANGINFO
132 static bool hard_LC_TIME;
135 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
139 # define decimal_point C_DECIMAL_POINT
140 # define IS_THOUSANDS_SEP(x) false
144 #define NONZERO(x) (x != 0)
146 /* get a multibyte character's byte length. */
147 #define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE) \
151 mbstate_t state_bak; \
154 mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE); \
161 /* Fall through. */ \
168 /* The kind of blanks for '-b' to skip in various options. */
169 enum blanktype { bl_start, bl_end, bl_both };
171 /* The character marking end of line. Default to \n. */
172 static char eolchar = '\n';
174 /* Lines are held in core as counted strings. */
177 char *text; /* Text of the line. */
178 size_t length; /* Length including final newline. */
179 char *keybeg; /* Start of first key. */
180 char *keylim; /* Limit of first key. */
186 char *buf; /* Dynamically allocated buffer,
187 partitioned into 3 regions:
190 - an array of lines, in reverse order. */
191 size_t used; /* Number of bytes used for input data. */
192 size_t nlines; /* Number of lines in the line array. */
193 size_t alloc; /* Number of bytes allocated. */
194 size_t left; /* Number of bytes left from previous reads. */
195 size_t line_bytes; /* Number of bytes to reserve for each line. */
196 bool eof; /* An EOF has been read. */
201 size_t sword; /* Zero-origin 'word' to start at. */
202 size_t schar; /* Additional characters to skip. */
203 size_t eword; /* Zero-origin first word after field. */
204 size_t echar; /* Additional characters in field. */
205 bool const *ignore; /* Boolean array of characters to ignore. */
206 char const *translate; /* Translation applied to characters. */
207 bool skipsblanks; /* Skip leading blanks when finding start. */
208 bool skipeblanks; /* Skip leading blanks when finding end. */
209 bool numeric; /* Flag for numeric comparison. Handle
210 strings of digits with optional decimal
211 point, but no exponential notation. */
212 bool general_numeric; /* Flag for general, numeric comparison.
213 Handle numbers in exponential notation. */
214 bool month; /* Flag for comparison by month name. */
215 bool reverse; /* Reverse the sense of comparison. */
216 struct keyfield *next; /* Next keyfield to try. */
225 /* The name this program was run with. */
228 /* FIXME: None of these tables work with multibyte character sets.
229 Also, there are many other bugs when handling multibyte characters.
230 One way to fix this is to rewrite `sort' to use wide characters
231 internally, but doing this with good performance is a bit
234 /* Table of blanks. */
235 static bool blanks[UCHAR_LIM];
237 /* Table of non-printing characters. */
238 static bool nonprinting[UCHAR_LIM];
240 /* Table of non-dictionary characters (not letters, digits, or blanks). */
241 static bool nondictionary[UCHAR_LIM];
243 /* Translation table folding lower case to upper. */
244 static char fold_toupper[UCHAR_LIM];
246 #define MONTHS_PER_YEAR 12
248 /* Table mapping month names to integers.
249 Alphabetic order allows binary search. */
250 static struct month monthtab[] =
266 /* During the merge phase, the number of files to merge at once. */
269 /* Minimum size for a merge or check buffer. */
270 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
272 /* Minimum sort size; the code might not work with smaller sizes. */
273 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
275 /* The number of bytes needed for a merge or check buffer, which can
276 function relatively efficiently even if it holds only one line. If
277 a longer line is seen, this value is increased. */
278 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
280 /* The approximate maximum number of bytes of main memory to use, as
281 specified by the user. Zero if the user has not specified a size. */
282 static size_t sort_size;
284 /* The guessed size for non-regular files. */
285 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
287 /* Array of directory names in which any temporary files are to be created. */
288 static char const **temp_dirs;
290 /* Number of temporary directory names used. */
291 static size_t temp_dir_count;
293 /* Number of allocated slots in temp_dirs. */
294 static size_t temp_dir_alloc;
296 /* Flag to reverse the order of all comparisons. */
299 /* Flag for stable sort. This turns off the last ditch bytewise
300 comparison of lines, and instead leaves lines in the same order
301 they were read if all keys compare equal. */
304 /* Tab character separating fields. If tab_default, then fields are
305 separated by the empty string between a non-blank character and a blank
307 static bool tab_default = true;
308 static unsigned char tab[MB_LEN_MAX + 1];
309 static size_t tab_length = 1;
311 /* Flag to remove consecutive duplicate lines from the output.
312 Only the last of a sequence of equal lines will be output. */
315 /* Nonzero if any of the input files are the standard input. */
316 static bool have_read_stdin;
318 /* List of key field comparisons to be tried. */
319 static struct keyfield *keylist;
321 static void sortlines_temp (struct line *, size_t, struct line *);
326 if (status != EXIT_SUCCESS)
327 fprintf (stderr, _("Try `%s --help' for more information.\n"),
332 Usage: %s [OPTION]... [FILE]...\n\
336 Write sorted concatenation of all FILE(s) to standard output.\n\
342 Mandatory arguments to long options are mandatory for short options too.\n\
345 -b, --ignore-leading-blanks ignore leading blanks\n\
346 -d, --dictionary-order consider only blanks and alphanumeric characters\n\
347 -f, --ignore-case fold lower case to upper case characters\n\
350 -g, --general-numeric-sort compare according to general numerical value\n\
351 -i, --ignore-nonprinting consider only printable characters\n\
352 -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
353 -n, --numeric-sort compare according to string numerical value\n\
354 -r, --reverse reverse the result of comparisons\n\
360 -c, --check check whether input is sorted; do not sort\n\
361 -k, --key=POS1[,POS2] start a key at POS1, end it at POS 2 (origin 1)\n\
362 -m, --merge merge already sorted files; do not sort\n\
363 -o, --output=FILE write result to FILE instead of standard output\n\
364 -s, --stable stabilize sort by disabling last-resort comparison\n\
365 -S, --buffer-size=SIZE use SIZE for main memory buffer\n\
368 -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
369 -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\
370 multiple options specify multiple directories\n\
371 -u, --unique with -c, check for strict ordering;\n\
372 without -c, output only the first of an equal run\n\
375 -z, --zero-terminated end lines with 0 byte, not newline\n\
377 fputs (HELP_OPTION_DESCRIPTION, stdout);
378 fputs (VERSION_OPTION_DESCRIPTION, stdout);
381 POS is F[.C][OPTS], where F is the field number and C the character position\n\
382 in the field. OPTS is one or more single-letter ordering options, which\n\
383 override global ordering options for that key. If no key is given, use the\n\
384 entire line as the key.\n\
386 SIZE may be followed by the following multiplicative suffixes:\n\
389 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
391 With no FILE, or when FILE is -, read standard input.\n\
394 The locale specified by the environment affects sort order.\n\
395 Set LC_ALL=C to get the traditional sort order that uses\n\
396 native byte values.\n\
398 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
404 #define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
406 static struct option const long_options[] =
408 {"ignore-leading-blanks", no_argument, NULL, 'b'},
409 {"check", no_argument, NULL, 'c'},
410 {"dictionary-order", no_argument, NULL, 'd'},
411 {"ignore-case", no_argument, NULL, 'f'},
412 {"general-numeric-sort", no_argument, NULL, 'g'},
413 {"ignore-nonprinting", no_argument, NULL, 'i'},
414 {"key", required_argument, NULL, 'k'},
415 {"merge", no_argument, NULL, 'm'},
416 {"month-sort", no_argument, NULL, 'M'},
417 {"numeric-sort", no_argument, NULL, 'n'},
418 {"output", required_argument, NULL, 'o'},
419 {"reverse", no_argument, NULL, 'r'},
420 {"stable", no_argument, NULL, 's'},
421 {"buffer-size", required_argument, NULL, 'S'},
422 {"field-separator", required_argument, NULL, 't'},
423 {"temporary-directory", required_argument, NULL, 'T'},
424 {"unique", no_argument, NULL, 'u'},
425 {"zero-terminated", no_argument, NULL, 'z'},
426 {GETOPT_HELP_OPTION_DECL},
427 {GETOPT_VERSION_OPTION_DECL},
431 /* The set of signals that are caught. */
432 static sigset_t caught_signals;
434 /* The list of temporary files. */
437 struct tempnode *volatile next;
438 char name[1]; /* Actual size is 1 + file name length. */
440 static struct tempnode *volatile temphead;
442 /* Fucntion pointers. */
444 (*inittables) (void);
447 (* begfield) (const struct line *line, const struct keyfield *key);
450 (* limfield) (const struct line *line, const struct keyfield *key);
453 (*getmonth) (const char *s, size_t len);
456 (* keycompare) (const struct line *a, const struct line *b);
458 /* Test for white space multibyte character.
459 Set LENGTH the byte length of investigated multibyte character. */
462 ismbblank (const char *str, size_t len, size_t *length)
468 memset (&state, '\0', sizeof(mbstate_t));
469 mblength = mbrtowc (&wc, str, len, &state);
471 if (mblength == (size_t)-1 || mblength == (size_t)-2)
477 *length = (mblength < 1) ? 1 : mblength;
478 return iswblank (wc);
482 /* Clean up any remaining temporary files. */
487 struct tempnode const *node;
489 for (node = temphead; node; node = node->next)
493 /* Report MESSAGE for FILE, then clean up and exit.
494 If FILE is null, it represents standard output. */
496 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
498 die (char const *message, char const *file)
500 error (0, errno, "%s: %s", message, file ? file : _("standard output"));
504 /* Create a new temporary file, returning its newly allocated name.
505 Store into *PFP a stream open for writing. */
508 create_temp_file (FILE **pfp)
510 static char const slashbase[] = "/sortXXXXXX";
511 static size_t temp_dir_index;
515 char const *temp_dir = temp_dirs[temp_dir_index];
516 size_t len = strlen (temp_dir);
517 struct tempnode *node =
518 xmalloc (sizeof node->next + len + sizeof slashbase);
519 char *file = node->name;
521 memcpy (file, temp_dir, len);
522 memcpy (file + len, slashbase, sizeof slashbase);
523 node->next = temphead;
524 if (++temp_dir_index == temp_dir_count)
527 /* Create the temporary file in a critical section, to avoid races. */
528 sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
533 sigprocmask (SIG_SETMASK, &oldset, NULL);
536 if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
537 die (_("cannot create temporary file"), file);
542 /* Return a stream for FILE, opened with mode HOW. A null FILE means
543 standard output; HOW should be "w". When opening for input, "-"
544 means standard input. To avoid confusion, do not return file
545 descriptors 0, 1, or 2. */
548 xfopen (const char *file, const char *how)
554 else if (STREQ (file, "-") && *how == 'r')
556 have_read_stdin = true;
561 if ((fp = fopen_safer (file, how)) == NULL)
562 die (_("open failed"), file);
568 /* Close FP, whose name is FILE, and report any errors. */
571 xfclose (FILE *fp, char const *file)
575 /* Allow reading stdin from tty more than once. */
581 if (fclose (fp) != 0)
582 die (_("close failed"), file);
587 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
589 if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
590 die (_("write failed"), output_file);
593 /* Append DIR to the array of temporary directory names. */
595 add_temp_dir (char const *dir)
597 if (temp_dir_count == temp_dir_alloc)
598 temp_dirs = x2nrealloc (temp_dirs, &temp_dir_alloc, sizeof *temp_dirs);
600 temp_dirs[temp_dir_count++] = dir;
603 /* Search through the list of temporary files for NAME;
604 remove it if it is found on the list. */
607 zaptemp (const char *name)
609 struct tempnode *volatile *pnode;
610 struct tempnode *node;
612 for (pnode = &temphead; (node = *pnode); pnode = &node->next)
613 if (node->name == name)
622 #if HAVE_LANGINFO_CODESET
625 struct_month_cmp (const void *m1, const void *m2)
627 struct month const *month1 = m1;
628 struct month const *month2 = m2;
629 return strcmp (month1->name, month2->name);
634 /* Initialize the character class tables. */
637 inittables_uni (void)
641 for (i = 0; i < UCHAR_LIM; ++i)
643 blanks[i] = !!ISBLANK (i);
644 nonprinting[i] = !ISPRINT (i);
645 nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
646 fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
650 /* If we're not in the "C" locale, read different names for months. */
653 for (i = 0; i < MONTHS_PER_YEAR; i++)
660 s = (char *) nl_langinfo (ABMON_1 + i);
662 monthtab[i].name = name = xmalloc (s_len + 1);
663 monthtab[i].val = i + 1;
665 for (j = 0; j < s_len; j++)
666 name[j] = fold_toupper[to_uchar (s[j])];
669 qsort ((void *) monthtab, MONTHS_PER_YEAR,
670 sizeof *monthtab, struct_month_cmp);
681 size_t s_len, mblength;
682 char mbc[MB_LEN_MAX];
684 mbstate_t state_mb, state_wc;
686 for (i = 0; i < MONTHS_PER_YEAR; i++)
688 s = (char *) nl_langinfo (ABMON_1 + i);
690 monthtab[i].name = name = (char *) xmalloc (s_len + 1);
691 monthtab[i].val = i + 1;
693 memset (&state_mb, '\0', sizeof (mbstate_t));
694 memset (&state_wc, '\0', sizeof (mbstate_t));
696 for (j = 0; j < s_len;)
698 if (!ismbblank (s + j, s_len - j, &mblength))
703 for (k = 0; j < s_len;)
705 mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
706 assert (mblength != (size_t)-1 && mblength != (size_t)-2);
713 memcpy (mbc, s + j, mblength);
719 mblength = wcrtomb (mbc, pwc, &state_wc);
720 assert (mblength != (size_t)0 && mblength != (size_t)-1);
723 for (l = 0; l < mblength; l++)
728 qsort ((void *) monthtab, MONTHS_PER_YEAR,
729 sizeof (struct month), struct_month_cmp);
733 /* Specify the amount of main memory to use when sorting. */
735 specify_sort_size (char const *s)
739 enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
741 /* The default unit is KiB. */
742 if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
744 if (n <= UINTMAX_MAX / 1024)
747 e = LONGINT_OVERFLOW;
750 /* A 'b' suffix means bytes; a '%' suffix means percent of memory. */
751 if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
760 double mem = physmem_total () * n / 100;
762 /* Use "<", not "<=", to avoid problems with rounding. */
763 if (mem < UINTMAX_MAX)
769 e = LONGINT_OVERFLOW;
776 /* If multiple sort sizes are specified, take the maximum, so
777 that option order does not matter. */
784 sort_size = MAX (sort_size, MIN_SORT_SIZE);
788 e = LONGINT_OVERFLOW;
791 STRTOL_FATAL_ERROR (s, _("sort size"), e);
794 /* Return the default sort size. */
796 default_sort_size (void)
798 /* Let MEM be available memory or 1/8 of total memory, whichever
800 double avail = physmem_available ();
801 double total = physmem_total ();
802 double mem = MAX (avail, total / 8);
803 struct rlimit rlimit;
805 /* Let SIZE be MEM, but no more than the maximum object size or
806 system resource limits. Avoid the MIN macro here, as it is not
807 quite right when only one argument is floating point. Don't
808 bother to check for values like RLIM_INFINITY since in practice
809 they are not much less than SIZE_MAX. */
810 size_t size = SIZE_MAX;
813 if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
814 size = rlimit.rlim_cur;
816 if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
817 size = rlimit.rlim_cur;
820 /* Leave a large safety margin for the above limits, as failure can
821 occur when they are exceeded. */
825 /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
826 Exceeding RSS is not fatal, but can be quite slow. */
827 if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
828 size = rlimit.rlim_cur / 16 * 15;
831 /* Use no less than the minimum. */
832 return MAX (size, MIN_SORT_SIZE);
835 /* Return the sort buffer size to use with the input files identified
836 by FPS and FILES, which are alternate paths to the same files.
837 NFILES gives the number of input files; NFPS may be less. Assume
838 that each input line requires LINE_BYTES extra bytes' worth of line
839 information. Do not exceed a bound on the size: if the bound is
840 not specified by the user, use a default. */
843 sort_buffer_size (FILE *const *fps, int nfps,
844 char *const *files, int nfiles,
847 /* A bound on the input size. If zero, the bound hasn't been
849 static size_t size_bound;
851 /* In the worst case, each input byte is a newline. */
852 size_t worst_case_per_input_byte = line_bytes + 1;
854 /* Keep enough room for one extra input line and an extra byte.
855 This extra room might be needed when preparing to read EOF. */
856 size_t size = worst_case_per_input_byte + 1;
860 for (i = 0; i < nfiles; i++)
866 if ((i < nfps ? fstat (fileno (fps[i]), &st)
867 : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
868 : stat (files[i], &st))
870 die (_("stat failed"), files[i]);
872 if (S_ISREG (st.st_mode))
873 file_size = st.st_size;
876 /* The file has unknown size. If the user specified a sort
877 buffer size, use that; otherwise, guess the size. */
880 file_size = INPUT_FILE_SIZE_GUESS;
885 size_bound = sort_size;
887 size_bound = default_sort_size ();
890 /* Add the amount of memory needed to represent the worst case
891 where the input consists entirely of newlines followed by a
892 single non-newline. Check for overflow. */
893 worst_case = file_size * worst_case_per_input_byte + 1;
894 if (file_size != worst_case / worst_case_per_input_byte
895 || size_bound - size <= worst_case)
903 /* Initialize BUF. Reserve LINE_BYTES bytes for each line; LINE_BYTES
904 must be at least sizeof (struct line). Allocate ALLOC bytes
908 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
910 /* Ensure that the line array is properly aligned. If the desired
911 size cannot be allocated, repeatedly halve it until allocation
912 succeeds. The smaller allocation may hurt overall performance,
913 but that's better than failing. */
916 alloc += sizeof (struct line) - alloc % sizeof (struct line);
917 buf->buf = malloc (alloc);
921 if (alloc <= line_bytes + 1)
925 buf->line_bytes = line_bytes;
927 buf->used = buf->left = buf->nlines = 0;
931 /* Return one past the limit of the line array. */
933 static inline struct line *
934 buffer_linelim (struct buffer const *buf)
936 return (struct line *) (buf->buf + buf->alloc);
939 /* Return a pointer to the first character of the field specified
943 begfield_uni (const struct line *line, const struct keyfield *key)
945 register char *ptr = line->text, *lim = ptr + line->length - 1;
946 register size_t sword = key->sword;
947 register size_t schar = key->schar;
948 register size_t remaining_bytes;
950 /* The leading field separator itself is included in a field when -t
954 while (ptr < lim && sword--)
956 while (ptr < lim && *ptr != tab[0])
962 while (ptr < lim && sword--)
964 while (ptr < lim && blanks[to_uchar (*ptr)])
966 while (ptr < lim && !blanks[to_uchar (*ptr)])
970 if (key->skipsblanks)
971 while (ptr < lim && blanks[to_uchar (*ptr)])
974 /* Advance PTR by SCHAR (if possible), but no further than LIM. */
975 remaining_bytes = lim - ptr;
976 if (schar < remaining_bytes)
986 begfield_mb (const struct line *line, const struct keyfield *key)
989 char *ptr = line->text, *lim = ptr + line->length - 1;
990 size_t sword = key->sword;
991 size_t schar = key->schar;
995 memset (&state, '\0', sizeof(mbstate_t));
998 while (ptr < lim && sword--)
1000 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1002 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1007 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1012 while (ptr < lim && sword--)
1014 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1018 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1021 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1025 if (key->skipsblanks)
1026 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1029 for (i = 0; i < schar; i++)
1031 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1033 if (ptr + mblength > lim)
1043 /* Return the limit of (a pointer to the first character after) the field
1044 in LINE specified by KEY. */
1047 limfield_uni (const struct line *line, const struct keyfield *key)
1049 register char *ptr = line->text, *lim = ptr + line->length - 1;
1050 register size_t eword = key->eword, echar = key->echar;
1051 register size_t remaining_bytes;
1053 /* Move PTR past EWORD fields or to one past the last byte on LINE,
1054 whichever comes first. If there are more than EWORD fields, leave
1055 PTR pointing at the beginning of the field having zero-based index,
1056 EWORD. If a delimiter character was specified (via -t), then that
1057 `beginning' is the first character following the delimiting TAB.
1058 Otherwise, leave PTR pointing at the first `blank' character after
1059 the preceding field. */
1061 while (ptr < lim && eword--)
1063 while (ptr < lim && *ptr != tab[0])
1065 if (ptr < lim && (eword | echar))
1069 while (ptr < lim && eword--)
1071 while (ptr < lim && blanks[to_uchar (*ptr)])
1073 while (ptr < lim && !blanks[to_uchar (*ptr)])
1077 #ifdef POSIX_UNSPECIFIED
1078 /* The following block of code makes GNU sort incompatible with
1079 standard Unix sort, so it's ifdef'd out for now.
1080 The POSIX spec isn't clear on how to interpret this.
1081 FIXME: request clarification.
1083 From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1084 Date: Thu, 30 May 96 12:20:41 -0400
1085 [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1087 [...]I believe I've found another bug in `sort'.
1092 $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1095 $ /bin/sort -k1.7,1.7 </tmp/sort.in
1099 Unix sort produced the answer I expected: sort on the single character
1100 in column 7. GNU sort produced different results, because it disagrees
1101 on the interpretation of the key-end spec "M.N". Unix sort reads this
1102 as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1103 "skip M-1 fields, then either N-1 characters or the rest of the current
1104 field, whichever comes first". This extra clause applies only to
1105 key-ends, not key-starts.
1108 /* Make LIM point to the end of (one byte past) the current field. */
1112 newlim = memchr (ptr, tab[0], lim - ptr);
1120 while (newlim < lim && blanks[to_uchar (*newlim)])
1122 while (newlim < lim && !blanks[to_uchar (*newlim)])
1128 /* If we're ignoring leading blanks when computing the End
1129 of the field, don't start counting bytes until after skipping
1130 past any leading blanks. */
1131 if (key->skipeblanks)
1132 while (ptr < lim && blanks[to_uchar (*ptr)])
1135 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1136 remaining_bytes = lim - ptr;
1137 if (echar < remaining_bytes)
1147 limfield_mb (const struct line *line, const struct keyfield *key)
1149 char *ptr = line->text, *lim = ptr + line->length - 1;
1150 size_t eword = key->eword, echar = key->echar;
1155 memset (&state, '\0', sizeof(mbstate_t));
1158 while (ptr < lim && eword--)
1160 while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1162 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1165 if (ptr < lim && (eword | echar))
1167 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1172 while (ptr < lim && eword--)
1174 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1178 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1181 while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1186 # ifdef POSIX_UNSPECIFIED
1187 /* Make LIM point to the end of (one byte past) the current field. */
1193 for (p = ptr; p < lim;)
1195 if (memcmp (p, tab, tab_length) == 0)
1201 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1210 while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
1214 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1217 while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
1223 /* If we're skipping leading blanks, don't start counting characters
1224 * until after skipping past any leading blanks. */
1225 if (key->skipeblanks)
1226 while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1229 memset (&state, '\0', sizeof(mbstate_t));
1231 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
1232 for (i = 0; i < echar; i++)
1234 GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1236 if (ptr + mblength > lim)
1246 /* Fill BUF reading from FP, moving buf->left bytes from the end
1247 of buf->buf to the beginning first. If EOF is reached and the
1248 file wasn't terminated by a newline, supply one. Set up BUF's line
1249 table too. FILE is the name of the file corresponding to FP.
1250 Return true if some input was read. */
1253 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
1255 struct keyfield const *key = keylist;
1257 size_t line_bytes = buf->line_bytes;
1258 size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1263 if (buf->used != buf->left)
1265 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1266 buf->used = buf->left;
1272 char *ptr = buf->buf + buf->used;
1273 struct line *linelim = buffer_linelim (buf);
1274 struct line *line = linelim - buf->nlines;
1275 size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1276 char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1278 while (line_bytes + 1 < avail)
1280 /* Read as many bytes as possible, but do not read so many
1281 bytes that there might not be enough room for the
1282 corresponding line array. The worst case is when the
1283 rest of the input file consists entirely of newlines,
1284 except that the last byte is not a newline. */
1285 size_t readsize = (avail - 1) / (line_bytes + 1);
1286 size_t bytes_read = fread (ptr, 1, readsize, fp);
1287 char *ptrlim = ptr + bytes_read;
1289 avail -= bytes_read;
1291 if (bytes_read != readsize)
1294 die (_("read failed"), file);
1298 if (buf->buf == ptrlim)
1300 if (ptrlim[-1] != eol)
1305 /* Find and record each line in the just-read input. */
1306 while ((p = memchr (ptr, eol, ptrlim - ptr)))
1310 line->text = line_start;
1311 line->length = ptr - line_start;
1312 mergesize = MAX (mergesize, line->length);
1313 avail -= line_bytes;
1317 /* Precompute the position of the first key for
1319 line->keylim = (key->eword == SIZE_MAX
1321 : limfield (line, key));
1323 if (key->sword != SIZE_MAX)
1324 line->keybeg = begfield (line, key);
1327 if (key->skipsblanks)
1334 while (ismbblank (line_start, ptr - line_start, &mblength))
1335 line_start += mblength;
1340 while (blanks[to_uchar (*line_start)])
1344 line->keybeg = line_start;
1356 buf->used = ptr - buf->buf;
1357 buf->nlines = buffer_linelim (buf) - line;
1358 if (buf->nlines != 0)
1360 buf->left = ptr - line_start;
1361 merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1365 /* The current input line is too long to fit in the buffer.
1366 Double the buffer size and try again. */
1367 buf->buf = x2nrealloc (buf->buf, &buf->alloc, sizeof *(buf->buf));
1371 /* Compare strings A and B containing decimal fractions < 1. Each string
1372 should begin with a decimal point followed immediately by the digits
1373 of the fraction. Strings not of this form are considered to be zero. */
1375 /* The goal here, is to take two numbers a and b... compare these
1376 in parallel. Instead of converting each, and then comparing the
1377 outcome. Most likely stopping the comparison before the conversion
1378 is complete. The algorithm used, in the old sort:
1380 Algorithm: fraccompare
1381 Action : compare two decimal fractions
1382 accepts : char *a, char *b
1383 returns : -1 if a<b, 0 if a=b, 1 if a>b.
1386 if *a == decimal_point AND *b == decimal_point
1387 find first character different in a and b.
1388 if both are digits, return the difference *a - *b.
1391 if digit return 1, else 0
1394 if digit return -1, else 0
1395 if *a is a decimal_point
1396 skip past decimal_point and zeros
1397 if digit return 1, else 0
1398 if *b is a decimal_point
1399 skip past decimal_point and zeros
1400 if digit return -1, else 0
1404 fraccompare (register const char *a, register const char *b)
1406 if (*a == decimal_point && *b == decimal_point)
1408 while (*++a == *++b)
1411 if (ISDIGIT (*a) && ISDIGIT (*b))
1414 goto a_trailing_nonzero;
1416 goto b_trailing_nonzero;
1419 else if (*a++ == decimal_point)
1422 while (*a == NUMERIC_ZERO)
1424 return ISDIGIT (*a);
1426 else if (*b++ == decimal_point)
1429 while (*b == NUMERIC_ZERO)
1431 return - ISDIGIT (*b);
1436 /* Compare strings A and B as numbers without explicitly converting them to
1437 machine numbers. Comparatively slow for short strings, but asymptotically
1441 numcompare (register const char *a, register const char *b)
1453 size_t alen = strnlen (a, MB_LEN_MAX);
1454 size_t blen = strnlen (b, MB_LEN_MAX);
1456 while (ismbblank (a, alen, &mblength))
1457 a += mblength, alen -= mblength;
1458 while (ismbblank (b, blen, &mblength))
1459 b += mblength, blen -= mblength;
1470 while (blanks[to_uchar (tmpa)])
1472 while (blanks[to_uchar (tmpb)])
1476 if (tmpa == NEGATION_SIGN)
1480 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1481 if (tmpb != NEGATION_SIGN)
1483 if (tmpa == decimal_point)
1486 while (tmpa == NUMERIC_ZERO);
1489 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1491 if (tmpb == decimal_point)
1494 while (tmpb == NUMERIC_ZERO);
1501 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1503 while (tmpa == tmpb && ISDIGIT (tmpa))
1507 while (IS_THOUSANDS_SEP (tmpa));
1510 while (IS_THOUSANDS_SEP (tmpb));
1513 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1514 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1515 return -fraccompare (a, b);
1519 for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1522 while (IS_THOUSANDS_SEP (tmpa));
1524 for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1527 while (IS_THOUSANDS_SEP (tmpb));
1530 return log_a < log_b ? 1 : -1;
1537 else if (tmpb == NEGATION_SIGN)
1541 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1542 if (tmpb == decimal_point)
1545 while (tmpb == NUMERIC_ZERO);
1548 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1550 if (tmpa == decimal_point)
1553 while (tmpa == NUMERIC_ZERO);
1560 while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1562 while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1565 while (tmpa == tmpb && ISDIGIT (tmpa))
1569 while (IS_THOUSANDS_SEP (tmpa));
1572 while (IS_THOUSANDS_SEP (tmpb));
1575 if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1576 || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1577 return fraccompare (a, b);
1581 for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1584 while (IS_THOUSANDS_SEP (tmpa));
1586 for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1589 while (IS_THOUSANDS_SEP (tmpb));
1592 return log_a < log_b ? -1 : 1;
1602 general_numcompare (const char *sa, const char *sb)
1604 /* FIXME: add option to warn about failed conversions. */
1605 /* FIXME: maybe add option to try expensive FP conversion
1606 only if A and B can't be compared more cheaply/accurately. */
1614 struct lconv *lconvp = localeconv ();
1615 size_t thousands_sep_len = strlen (lconvp->thousands_sep);
1617 bufa = (char *) xmalloc (strlen (sa) + 1);
1618 bufb = (char *) xmalloc (strlen (sb) + 1);
1622 if (force_general_numcompare)
1626 a = strtod (bufa, &ea);
1627 if (memcmp (ea, lconvp->thousands_sep, thousands_sep_len) == 0)
1629 for (p = ea; *(p + thousands_sep_len) != '\0'; p++)
1630 *p = *(p + thousands_sep_len);
1639 b = strtod (bufb, &eb);
1640 if (memcmp (eb, lconvp->thousands_sep, thousands_sep_len) == 0)
1642 for (p = eb; *(p + thousands_sep_len) != '\0'; p++)
1643 *p = *(p + thousands_sep_len);
1652 a = strtod (bufa, &ea);
1653 b = strtod (bufb, &eb);
1656 /* Put conversion errors at the start of the collating sequence. */
1660 return bufb == eb ? 0 : -1;
1664 /* Sort numbers in the usual way, where -0 == +0. Put NaNs after
1665 conversion errors but before numbers; sort them by internal
1666 bit-pattern, for lack of a more portable alternative. */
1672 : memcmp ((char *) &a, (char *) &b, sizeof a));
1675 /* Return an integer in 1..12 of the month name S with length LEN.
1676 Return 0 if the name in S is not recognized. */
1679 getmonth_uni (const char *s, size_t len)
1683 register int lo = 0, hi = MONTHS_PER_YEAR, result;
1685 while (len > 0 && blanks[to_uchar (*s)])
1694 month = alloca (len + 1);
1695 for (i = 0; i < len; ++i)
1696 month[i] = fold_toupper[to_uchar (s[i])];
1701 int ix = (lo + hi) / 2;
1703 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1708 while (hi - lo > 1);
1710 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1711 ? monthtab[lo].val : 0);
1718 getmonth_mb (const char *s, size_t len)
1722 register int lo = 0, hi = MONTHS_PER_YEAR, result;
1724 size_t wclength, mblength;
1726 const wchar_t **wpp;
1730 while (len > 0 && ismbblank (s, len, &mblength))
1739 month = (char *) alloca (len + 1);
1741 tmp = (char *) alloca (len + 1);
1742 memcpy (tmp, s, len);
1744 pp = (const char **)&tmp;
1745 month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
1746 memset (&state, '\0', sizeof(mbstate_t));
1748 wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
1749 assert (wclength != (size_t)-1 && *pp == NULL);
1751 for (i = 0; i < wclength; i++)
1752 month_wcs[i] = towupper(month_wcs[i]);
1753 month_wcs[i] = L'\0';
1755 wpp = (const wchar_t **)&month_wcs;
1757 mblength = wcsrtombs (month, wpp, len + 1, &state);
1758 assert (mblength != (-1) && *wpp == NULL);
1762 int ix = (lo + hi) / 2;
1764 if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1769 while (hi - lo > 1);
1771 result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1772 ? monthtab[lo].val : 0);
1778 /* Compare two lines A and B trying every key in sequence until there
1779 are no more keys or a difference is found. */
1782 keycompare_uni (const struct line *a, const struct line *b)
1784 struct keyfield const *key = keylist;
1786 /* For the first iteration only, the key positions have been
1787 precomputed for us. */
1788 register char *texta = a->keybeg;
1789 register char *textb = b->keybeg;
1790 register char *lima = a->keylim;
1791 register char *limb = b->keylim;
1797 register char const *translate = key->translate;
1798 register bool const *ignore = key->ignore;
1800 /* Find the lengths. */
1801 size_t lena = lima <= texta ? 0 : lima - texta;
1802 size_t lenb = limb <= textb ? 0 : limb - textb;
1804 /* Actually compare the fields. */
1805 if (key->numeric | key->general_numeric)
1807 char savea = *lima, saveb = *limb;
1809 *lima = *limb = '\0';
1810 diff = ((key->numeric ? numcompare : general_numcompare)
1812 *lima = savea, *limb = saveb;
1814 else if (key->month)
1815 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1816 /* Sorting like this may become slow, so in a simple locale the user
1817 can select a faster sort that is similar to ascii sort */
1818 else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1820 if (ignore || translate)
1822 char *copy_a = alloca (lena + 1 + lenb + 1);
1823 char *copy_b = copy_a + lena + 1;
1824 size_t new_len_a, new_len_b, i;
1826 /* Ignore and/or translate chars before comparing. */
1827 for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1831 copy_a[new_len_a] = (translate
1832 ? translate[to_uchar (texta[i])]
1834 if (!ignore || !ignore[to_uchar (texta[i])])
1839 copy_b[new_len_b] = (translate
1840 ? translate[to_uchar (textb[i])]
1842 if (!ignore || !ignore[to_uchar (textb[i])])
1847 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1850 diff = - NONZERO (lenb);
1854 diff = xmemcoll (texta, lena, textb, lenb);
1858 #define CMP_WITH_IGNORE(A, B) \
1863 while (texta < lima && ignore[to_uchar (*texta)]) \
1865 while (textb < limb && ignore[to_uchar (*textb)]) \
1867 if (! (texta < lima && textb < limb)) \
1869 diff = to_uchar (A) - to_uchar (B); \
1876 diff = (texta < lima) - (textb < limb); \
1881 CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1882 translate[to_uchar (*textb)]);
1884 CMP_WITH_IGNORE (*texta, *textb);
1887 diff = - NONZERO (lenb);
1894 while (texta < lima && textb < limb)
1896 diff = (to_uchar (translate[to_uchar (*texta++)])
1897 - to_uchar (translate[to_uchar (*textb++)]));
1904 diff = memcmp (texta, textb, MIN (lena, lenb));
1908 diff = lena < lenb ? -1 : lena != lenb;
1918 /* Find the beginning and limit of the next field. */
1919 if (key->eword != SIZE_MAX)
1920 lima = limfield (a, key), limb = limfield (b, key);
1922 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1924 if (key->sword != SIZE_MAX)
1925 texta = begfield (a, key), textb = begfield (b, key);
1928 texta = a->text, textb = b->text;
1929 if (key->skipsblanks)
1931 while (texta < lima && blanks[to_uchar (*texta)])
1933 while (textb < limb && blanks[to_uchar (*textb)])
1944 return key->reverse ? -diff : diff;
1949 keycompare_mb (const struct line *a, const struct line *b)
1951 struct keyfield *key = keylist;
1953 /* For the first iteration only, the key positions have been
1954 precomputed for us. */
1955 char *texta = a->keybeg;
1956 char *textb = b->keybeg;
1957 char *lima = a->keylim;
1958 char *limb = b->keylim;
1960 size_t mblength_a, mblength_b;
1962 mbstate_t state_a, state_b;
1966 memset (&state_a, '\0', sizeof(mbstate_t));
1967 memset (&state_b, '\0', sizeof(mbstate_t));
1971 unsigned char *translate = (unsigned char *) key->translate;
1972 bool const *ignore = key->ignore;
1974 /* Find the lengths. */
1975 size_t lena = lima <= texta ? 0 : lima - texta;
1976 size_t lenb = limb <= textb ? 0 : limb - textb;
1978 /* Actually compare the fields. */
1979 if (key->numeric | key->general_numeric)
1981 char savea = *lima, saveb = *limb;
1983 *lima = *limb = '\0';
1984 if (force_general_numcompare)
1985 diff = general_numcompare (texta, textb);
1987 diff = ((key->numeric ? numcompare : general_numcompare)
1989 *lima = savea, *limb = saveb;
1991 else if (key->month)
1992 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1995 if (ignore || translate)
1997 char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
1998 char *copy_b = copy_a + lena + 1;
1999 size_t new_len_a, new_len_b;
2002 /* Ignore and/or translate chars before comparing. */
2003 # define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE) \
2007 char mbc[MB_LEN_MAX]; \
2008 mbstate_t state_wc; \
2010 for (NEW_LEN = i = 0; i < LEN;) \
2012 mbstate_t state_bak; \
2014 state_bak = STATE; \
2015 MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE); \
2017 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1 \
2020 if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1) \
2021 STATE = state_bak; \
2023 COPY[NEW_LEN++] = TEXT[i++]; \
2029 if ((ignore == nonprinting && !iswprint (WC)) \
2030 || (ignore == nondictionary \
2031 && !iswalnum (WC) && !iswblank (WC))) \
2041 uwc = toupper(WC); \
2044 memcpy (mbc, TEXT + i, MBLENGTH); \
2051 memset (&state_wc, '\0', sizeof (mbstate_t)); \
2053 MBLENGTH = wcrtomb (mbc, WC, &state_wc); \
2054 assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0); \
2057 for (j = 0; j < MBLENGTH; j++) \
2058 COPY[NEW_LEN++] = mbc[j]; \
2061 for (j = 0; j < MBLENGTH; j++) \
2062 COPY[NEW_LEN++] = TEXT[i++]; \
2064 COPY[NEW_LEN] = '\0'; \
2067 IGNORE_CHARS (new_len_a, lena, texta, copy_a,
2068 wc_a, mblength_a, state_a);
2069 IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
2070 wc_b, mblength_b, state_b);
2071 diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2074 diff = - NONZERO (lenb);
2078 diff = xmemcoll (texta, lena, textb, lenb);
2088 /* Find the beginning and limit of the next field. */
2089 if (key->eword != -1)
2090 lima = limfield (a, key), limb = limfield (b, key);
2092 lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2094 if (key->sword != -1)
2095 texta = begfield (a, key), textb = begfield (b, key);
2098 texta = a->text, textb = b->text;
2099 if (key->skipsblanks)
2101 while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
2102 texta += mblength_a;
2103 while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
2104 textb += mblength_b;
2114 return key->reverse ? -diff : diff;
2118 /* Compare two lines A and B, returning negative, zero, or positive
2119 depending on whether A compares less than, equal to, or greater than B. */
2122 compare (register const struct line *a, register const struct line *b)
2127 /* First try to compare on the specified keys (if any).
2128 The only two cases with no key at all are unadorned sort,
2129 and unadorned sort -r. */
2132 diff = keycompare (a, b);
2134 if (diff | unique | stable)
2138 /* If the keys all compare equal (or no keys were specified)
2139 fall through to the default comparison. */
2140 alen = a->length - 1, blen = b->length - 1;
2143 diff = - NONZERO (blen);
2146 else if (HAVE_SETLOCALE && hard_LC_COLLATE)
2147 diff = xmemcoll (a->text, alen, b->text, blen);
2148 else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2149 diff = alen < blen ? -1 : alen != blen;
2151 return reverse ? -diff : diff;
2154 /* Check that the lines read from FILE_NAME come in order. Print a
2155 diagnostic (FILE_NAME, line number, contents of line) to stderr and return
2156 false if they are not in order. Otherwise, print no diagnostic
2160 check (char const *file_name)
2162 FILE *fp = xfopen (file_name, "r");
2163 struct buffer buf; /* Input buffer. */
2164 struct line temp; /* Copy of previous line. */
2166 uintmax_t line_number = 0;
2167 struct keyfield const *key = keylist;
2168 bool nonunique = ! unique;
2169 bool ordered = true;
2171 initbuf (&buf, sizeof (struct line),
2172 MAX (merge_buffer_size, sort_size));
2175 while (fillbuf (&buf, fp, file_name))
2177 struct line const *line = buffer_linelim (&buf);
2178 struct line const *linebase = line - buf.nlines;
2180 /* Make sure the line saved from the old buffer contents is
2181 less than or equal to the first line of the new buffer. */
2182 if (alloc && nonunique <= compare (&temp, line - 1))
2186 struct line const *disorder_line = line - 1;
2187 uintmax_t disorder_line_number =
2188 buffer_linelim (&buf) - disorder_line + line_number;
2189 char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2190 fprintf (stderr, _("%s: %s:%s: disorder: "),
2191 program_name, file_name,
2192 umaxtostr (disorder_line_number, hr_buf));
2193 write_bytes (disorder_line->text, disorder_line->length, stderr,
2194 _("standard error"));
2200 /* Compare each line in the buffer with its successor. */
2201 while (linebase < --line)
2202 if (nonunique <= compare (line, line - 1))
2203 goto found_disorder;
2205 line_number += buf.nlines;
2207 /* Save the last line of the buffer. */
2208 if (alloc < line->length)
2215 alloc = line->length;
2219 while (alloc < line->length);
2221 temp.text = xrealloc (temp.text, alloc);
2223 memcpy (temp.text, line->text, line->length);
2224 temp.length = line->length;
2227 temp.keybeg = temp.text + (line->keybeg - line->text);
2228 temp.keylim = temp.text + (line->keylim - line->text);
2232 xfclose (fp, file_name);
2239 /* Merge lines from FILES onto OFP. NFILES cannot be greater than
2240 NMERGE. Close input and output files before returning.
2241 OUTPUT_FILE gives the name of the output file. If it is NULL,
2242 the output file is standard output. If OFP is NULL, the output
2243 file has not been opened yet (or written to, if standard output). */
2246 mergefps (char **files, register int nfiles,
2247 FILE *ofp, const char *output_file)
2249 FILE *fps[NMERGE]; /* Input streams for each file. */
2250 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2251 struct line saved; /* Saved line storage for unique check. */
2252 struct line const *savedline = NULL;
2253 /* &saved if there is a saved line. */
2254 size_t savealloc = 0; /* Size allocated for the saved line. */
2255 struct line const *cur[NMERGE]; /* Current line in each line table. */
2256 struct line const *base[NMERGE]; /* Base of each line table. */
2257 int ord[NMERGE]; /* Table representing a permutation of fps,
2258 such that cur[ord[0]] is the smallest line
2259 and will be next output. */
2260 register int i, j, t;
2261 struct keyfield const *key = keylist;
2264 /* Read initial lines from each input file. */
2265 for (i = 0; i < nfiles; )
2267 fps[i] = xfopen (files[i], "r");
2268 initbuf (&buffer[i], sizeof (struct line),
2269 MAX (merge_buffer_size, sort_size / nfiles));
2270 if (fillbuf (&buffer[i], fps[i], files[i]))
2272 struct line const *linelim = buffer_linelim (&buffer[i]);
2273 cur[i] = linelim - 1;
2274 base[i] = linelim - buffer[i].nlines;
2279 /* fps[i] is empty; eliminate it from future consideration. */
2280 xfclose (fps[i], files[i]);
2282 free (buffer[i].buf);
2284 for (j = i; j < nfiles; ++j)
2285 files[j] = files[j + 1];
2290 ofp = xfopen (output_file, "w");
2292 /* Set up the ord table according to comparisons among input lines.
2293 Since this only reorders two items if one is strictly greater than
2294 the other, it is stable. */
2295 for (i = 0; i < nfiles; ++i)
2297 for (i = 1; i < nfiles; ++i)
2298 if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2299 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2301 /* Repeatedly output the smallest line until no input remains. */
2304 struct line const *smallest = cur[ord[0]];
2306 /* If uniquified output is turned on, output only the first of
2307 an identical series of lines. */
2310 if (savedline && compare (savedline, smallest))
2313 write_bytes (saved.text, saved.length, ofp, output_file);
2318 if (savealloc < smallest->length)
2323 savealloc = smallest->length;
2326 while ((savealloc *= 2) < smallest->length);
2328 saved.text = xrealloc (saved.text, savealloc);
2330 saved.length = smallest->length;
2331 memcpy (saved.text, smallest->text, saved.length);
2335 saved.text + (smallest->keybeg - smallest->text);
2337 saved.text + (smallest->keylim - smallest->text);
2342 write_bytes (smallest->text, smallest->length, ofp, output_file);
2344 /* Check if we need to read more lines into core. */
2345 if (base[ord[0]] < smallest)
2346 cur[ord[0]] = smallest - 1;
2349 if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
2351 struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2352 cur[ord[0]] = linelim - 1;
2353 base[ord[0]] = linelim - buffer[ord[0]].nlines;
2357 /* We reached EOF on fps[ord[0]]. */
2358 for (i = 1; i < nfiles; ++i)
2359 if (ord[i] > ord[0])
2362 xfclose (fps[ord[0]], files[ord[0]]);
2363 zaptemp (files[ord[0]]);
2364 free (buffer[ord[0]].buf);
2365 for (i = ord[0]; i < nfiles; ++i)
2367 fps[i] = fps[i + 1];
2368 files[i] = files[i + 1];
2369 buffer[i] = buffer[i + 1];
2370 cur[i] = cur[i + 1];
2371 base[i] = base[i + 1];
2373 for (i = 0; i < nfiles; ++i)
2374 ord[i] = ord[i + 1];
2379 /* The new line just read in may be larger than other lines
2380 already in core; push it back in the queue until we encounter
2381 a line larger than it. */
2382 for (i = 1; i < nfiles; ++i)
2384 t = compare (cur[ord[0]], cur[ord[i]]);
2386 t = ord[0] - ord[i];
2391 for (j = 1; j < i; ++j)
2392 ord[j - 1] = ord[j];
2396 if (unique && savedline)
2398 write_bytes (saved.text, saved.length, ofp, output_file);
2402 xfclose (ofp, output_file);
2405 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2406 and HI (with NHI members). T, LO, and HI point just past their
2407 respective arrays, and the arrays are in reverse order. NLO and
2408 NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */
2411 mergelines (struct line *t,
2412 struct line const *lo, size_t nlo,
2413 struct line const *hi, size_t nhi)
2416 if (compare (lo - 1, hi - 1) <= 0)
2421 /* HI - NHI equalled T - (NLO + NHI) when this function
2422 began. Therefore HI must equal T now, and there is no
2423 need to copy from HI to T. */
2441 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2442 NLINES must be at least 2.
2443 The input and output arrays are in reverse order, and LINES and
2444 TEMP point just past the end of their respective arrays.
2446 Use a recursive divide-and-conquer algorithm, in the style
2447 suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use
2448 the optimization suggested by exercise 5.2.4-10; this requires room
2449 for only 1.5*N lines, rather than the usual 2*N lines. Knuth
2450 writes that this memory optimization was originally published by
2451 D. A. Bell, Comp J. 1 (1958), 75. */
2454 sortlines (struct line *lines, size_t nlines, struct line *temp)
2458 if (0 < compare (&lines[-1], &lines[-2]))
2460 struct line tmp = lines[-1];
2461 lines[-1] = lines[-2];
2467 size_t nlo = nlines / 2;
2468 size_t nhi = nlines - nlo;
2469 struct line *lo = lines;
2470 struct line *hi = lines - nlo;
2471 struct line *sorted_lo = temp;
2473 sortlines (hi, nhi, temp);
2475 sortlines_temp (lo, nlo, sorted_lo);
2477 sorted_lo[-1] = lo[-1];
2479 mergelines (lines, sorted_lo, nlo, hi, nhi);
2483 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2484 rather than sorting in place. */
2487 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2491 bool swap = (0 < compare (&lines[-1], &lines[-2]));
2492 temp[-1] = lines[-1 - swap];
2493 temp[-2] = lines[-2 + swap];
2497 size_t nlo = nlines / 2;
2498 size_t nhi = nlines - nlo;
2499 struct line *lo = lines;
2500 struct line *hi = lines - nlo;
2501 struct line *sorted_hi = temp - nlo;
2503 sortlines_temp (hi, nhi, sorted_hi);
2505 sortlines (lo, nlo, temp);
2507 mergelines (temp, lo, nlo, sorted_hi, nhi);
2511 /* Return the index of the first of NFILES FILES that is the same file
2512 as OUTFILE. If none can be the same, return NFILES.
2514 This test ensures that an otherwise-erroneous use like
2515 "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2516 It's not clear that POSIX requires this nicety.
2517 Detect common error cases, but don't try to catch obscure cases like
2518 "cat ... FILE ... | sort -m -o FILE"
2519 where traditional "sort" doesn't copy the input and where
2520 people should know that they're getting into trouble anyway.
2521 Catching these obscure cases would slow down performance in
2525 first_same_file (char * const *files, int nfiles, char const *outfile)
2528 bool got_outstat = false;
2529 struct stat instat, outstat;
2531 for (i = 0; i < nfiles; i++)
2533 bool standard_input = STREQ (files[i], "-");
2535 if (outfile && STREQ (outfile, files[i]) && ! standard_input)
2542 ? stat (outfile, &outstat)
2543 : fstat (STDOUT_FILENO, &outstat))
2548 if (((standard_input
2549 ? fstat (STDIN_FILENO, &instat)
2550 : stat (files[i], &instat))
2552 && SAME_INODE (instat, outstat))
2559 /* Merge NFILES FILES onto OUTPUT_FILE. However, merge at most
2560 MAX_MERGE input files directly onto OUTPUT_FILE. MAX_MERGE cannot
2561 exceed NMERGE. A null OUTPUT_FILE stands for standard output. */
2564 merge (char **files, int nfiles, int max_merge, char const *output_file)
2566 while (max_merge < nfiles)
2571 for (i = 0; i < nfiles / NMERGE; ++i)
2573 temp = create_temp_file (&tfp);
2574 mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
2577 temp = create_temp_file (&tfp);
2578 mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
2585 mergefps (files, nfiles, NULL, output_file);
2588 /* Sort NFILES FILES onto OUTPUT_FILE. */
2591 sort (char * const *files, int nfiles, char const *output_file)
2594 int n_temp_files = 0;
2595 bool output_file_created = false;
2601 char const *temp_output;
2602 char const *file = *files;
2603 FILE *fp = xfopen (file, "r");
2605 size_t bytes_per_line = (2 * sizeof (struct line)
2606 - sizeof (struct line) / 2);
2609 initbuf (&buf, bytes_per_line,
2610 sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2615 while (fillbuf (&buf, fp, file))
2618 struct line *linebase;
2620 if (buf.eof && nfiles
2621 && (bytes_per_line + 1
2622 < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2624 /* End of file, but there is more input and buffer room.
2625 Concatenate the next input file; this is faster in
2627 buf.left = buf.used;
2631 line = buffer_linelim (&buf);
2632 linebase = line - buf.nlines;
2634 sortlines (line, buf.nlines, linebase);
2635 if (buf.eof && !nfiles && !n_temp_files && !buf.left)
2638 tfp = xfopen (output_file, "w");
2639 temp_output = output_file;
2640 output_file_created = true;
2645 temp_output = create_temp_file (&tfp);
2651 write_bytes (line->text, line->length, tfp, temp_output);
2653 while (linebase < line && compare (line, line - 1) == 0)
2656 while (linebase < line);
2658 xfclose (tfp, temp_output);
2660 if (output_file_created)
2669 if (! output_file_created)
2671 int i = n_temp_files;
2672 struct tempnode *node;
2673 char **tempfiles = xnmalloc (n_temp_files, sizeof *tempfiles);
2674 for (node = temphead; i > 0; node = node->next)
2675 tempfiles[--i] = node->name;
2676 merge (tempfiles, n_temp_files, NMERGE, output_file);
2681 /* Insert key KEY at the end of the key list. */
2684 insertkey (struct keyfield *key)
2686 struct keyfield **p;
2688 for (p = &keylist; *p; p = &(*p)->next)
2694 /* Report a bad field specification SPEC, with extra info MSGID. */
2696 static void badfieldspec (char const *, char const *)
2699 badfieldspec (char const *spec, char const *msgid)
2701 error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2706 /* Parse the leading integer in STRING and store the resulting value
2707 (which must fit into size_t) into *VAL. Return the address of the
2708 suffix after the integer. If MSGID is NULL, return NULL after
2709 failure; otherwise, report MSGID and exit on failure. */
2712 parse_field_count (char const *string, size_t *val, char const *msgid)
2717 switch (xstrtoumax (string, &suffix, 10, &n, ""))
2720 case LONGINT_INVALID_SUFFIX_CHAR:
2725 case LONGINT_OVERFLOW:
2726 case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2728 error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2729 _(msgid), (int) (suffix - string), string);
2732 case LONGINT_INVALID:
2734 error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2742 /* Handle interrupts and hangups. */
2745 sighandler (int sig)
2747 #ifndef SA_NOCLDSTOP
2748 signal (sig, SIG_IGN);
2753 signal (sig, SIG_DFL);
2757 /* Set the ordering options for KEY specified in S.
2758 Return the address of the first character in S that
2759 is not a valid ordering option.
2760 BLANKTYPE is the kind of blanks that 'b' should skip. */
2763 set_ordering (register const char *s, struct keyfield *key,
2764 enum blanktype blanktype)
2771 if (blanktype == bl_start || blanktype == bl_both)
2772 key->skipsblanks = true;
2773 if (blanktype == bl_end || blanktype == bl_both)
2774 key->skipeblanks = true;
2777 key->ignore = nondictionary;
2780 key->translate = fold_toupper;
2783 key->general_numeric = true;
2786 /* Option order should not matter, so don't let -i override
2787 -d. -d implies -i, but -i does not imply -d. */
2789 key->ignore = nonprinting;
2795 key->numeric = true;
2798 key->reverse = true;
2808 static struct keyfield *
2811 struct keyfield *key = xzalloc (sizeof *key);
2812 key->eword = SIZE_MAX;
2817 main (int argc, char **argv)
2819 struct keyfield *key;
2820 struct keyfield gkey;
2823 bool checkonly = false;
2824 bool mergeonly = false;
2826 bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2827 bool obsolete_usage = (posix2_version () < 200112);
2828 char const *short_options = (obsolete_usage
2829 ? COMMON_SHORT_OPTIONS "y::"
2830 : COMMON_SHORT_OPTIONS "y:");
2831 char *minus = "-", **files;
2832 char const *outfile = NULL;
2834 initialize_main (&argc, &argv);
2835 program_name = argv[0];
2836 setlocale (LC_ALL, "");
2837 bindtextdomain (PACKAGE, LOCALEDIR);
2838 textdomain (PACKAGE);
2842 initialize_exit_failure (SORT_FAILURE);
2843 atexit (close_stdout);
2845 hard_LC_COLLATE = hard_locale (LC_COLLATE);
2846 #if HAVE_NL_LANGINFO
2847 hard_LC_TIME = hard_locale (LC_TIME);
2851 /* Let's get locale's representation of the decimal point */
2853 struct lconv const *lconvp = localeconv ();
2855 decimal_point = *lconvp->decimal_point;
2856 if (! decimal_point || lconvp->decimal_point[1])
2858 decimal_point = C_DECIMAL_POINT;
2859 if (lconvp->decimal_point[0] && lconvp->decimal_point[1])
2860 force_general_numcompare = 1;
2863 /* We don't support multibyte thousands separators yet. */
2864 th_sep = *lconvp->thousands_sep;
2865 if (! th_sep || lconvp->thousands_sep[1])
2867 th_sep = CHAR_MAX + 1;
2868 if (lconvp->thousands_sep[0] && lconvp->thousands_sep[1])
2869 force_general_numcompare = 1;
2877 inittables = inittables_mb;
2878 begfield = begfield_mb;
2879 limfield = limfield_mb;
2880 getmonth = getmonth_mb;
2881 keycompare = keycompare_mb;
2886 inittables = inittables_uni;
2887 begfield = begfield_uni;
2888 limfield = limfield_uni;
2889 keycompare = keycompare_uni;
2890 getmonth = getmonth_uni;
2893 have_read_stdin = false;
2898 static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2899 enum { nsigs = sizeof sig / sizeof sig[0] };
2902 struct sigaction act;
2904 sigemptyset (&caught_signals);
2905 for (i = 0; i < nsigs; i++)
2907 sigaction (sig[i], NULL, &act);
2908 if (act.sa_handler != SIG_IGN)
2909 sigaddset (&caught_signals, sig[i]);
2912 act.sa_handler = sighandler;
2913 act.sa_mask = caught_signals;
2916 for (i = 0; i < nsigs; i++)
2917 if (sigismember (&caught_signals, sig[i]))
2918 sigaction (sig[i], &act, NULL);
2920 for (i = 0; i < nsigs; i++)
2921 if (signal (sig[i], SIG_IGN) != SIG_IGN)
2922 signal (sig[i], sighandler);
2926 gkey.sword = gkey.eword = SIZE_MAX;
2928 gkey.translate = NULL;
2929 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2930 gkey.skipsblanks = gkey.skipeblanks = false;
2932 files = xnmalloc (argc, sizeof *files);
2936 /* Parse an operand as a file after "--" was seen; or if
2937 pedantic and a file was seen, unless the POSIX version
2938 predates 1003.1-2001 and -c was not seen and the operand is
2939 "-o FILE" or "-oFILE". */
2942 || (posixly_correct && nfiles != 0
2943 && ! (obsolete_usage
2946 && argv[optind][0] == '-' && argv[optind][1] == 'o'
2947 && (argv[optind][2] || optind + 1 != argc)))
2948 || ((c = getopt_long (argc, argv, short_options,
2949 long_options, NULL))
2954 files[nfiles++] = argv[optind++];
2960 if (obsolete_usage && optarg[0] == '+')
2962 /* Treat +POS1 [-POS2] as a key if possible; but silently
2963 treat an operand as a file if it is not a valid +POS1. */
2965 s = parse_field_count (optarg + 1, &key->sword, NULL);
2967 s = parse_field_count (s + 1, &key->schar, NULL);
2968 if (! (key->sword | key->schar))
2969 key->sword = SIZE_MAX;
2970 if (! s || *set_ordering (s, key, bl_start))
2977 if (optind != argc && argv[optind][0] == '-'
2978 && ISDIGIT (argv[optind][1]))
2980 char const *optarg1 = argv[optind++];
2981 s = parse_field_count (optarg1 + 1, &key->eword,
2982 N_("invalid number after `-'"));
2984 s = parse_field_count (s + 1, &key->echar,
2985 N_("invalid number after `.'"));
2986 if (*set_ordering (s, key, bl_end))
2987 badfieldspec (optarg1,
2988 N_("stray character in field spec"));
2994 files[nfiles++] = optarg;
3009 set_ordering (str, &gkey, bl_both);
3021 s = parse_field_count (optarg, &key->sword,
3022 N_("invalid number at field start"));
3025 /* Provoke with `sort -k0' */
3026 badfieldspec (optarg, N_("field number is zero"));
3030 s = parse_field_count (s + 1, &key->schar,
3031 N_("invalid number after `.'"));
3034 /* Provoke with `sort -k1.0' */
3035 badfieldspec (optarg, N_("character offset is zero"));
3038 if (! (key->sword | key->schar))
3039 key->sword = SIZE_MAX;
3040 s = set_ordering (s, key, bl_start);
3043 key->eword = SIZE_MAX;
3049 s = parse_field_count (s + 1, &key->eword,
3050 N_("invalid number after `,'"));
3053 /* Provoke with `sort -k1,0' */
3054 badfieldspec (optarg, N_("field number is zero"));
3057 s = parse_field_count (s + 1, &key->echar,
3058 N_("invalid number after `.'"));
3061 /* `-k 2,3' is equivalent to `+1 -3'. */
3064 s = set_ordering (s, key, bl_end);
3067 badfieldspec (optarg, N_("stray character in field spec"));
3076 if (outfile && !STREQ (outfile, optarg))
3077 error (SORT_FAILURE, 0, _("multiple output files specified"));
3086 specify_sort_size (optarg);
3091 char newtab[MB_LEN_MAX + 1];
3092 size_t newtab_length = 1;
3093 strncpy (newtab, optarg, MB_LEN_MAX);
3095 error (SORT_FAILURE, 0, _("empty tab"));
3103 memset (&state, '\0', sizeof (mbstate_t));
3104 newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, MB_LEN_MAX), &state);
3105 switch (newtab_length)
3113 if (optarg[newtab_length])
3115 /* Provoke with `sort -txx'. Complain about
3116 "multi-character tab" instead of "multibyte tab", so
3117 that the diagnostic's wording does not need to be
3118 changed once multibyte characters are supported. */
3119 error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
3128 if (STREQ (optarg, "\\0"))
3132 /* Provoke with `sort -txx'. Complain about
3133 "multi-character tab" instead of "multibyte tab", so
3134 that the diagnostic's wording does not need to be
3135 changed once multibyte characters are supported. */
3136 error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
3140 if (!tab_default && (tab_length != newtab_length
3141 || memcmp(tab, newtab, tab_length) != 0))
3142 error (SORT_FAILURE, 0, _("incompatible tabs"));
3143 memcpy(tab, newtab, newtab_length);
3144 tab_length = newtab_length;
3145 tab_default = false;
3150 add_temp_dir (optarg);
3158 /* Accept and ignore e.g. -y0 for compatibility with Solaris
3159 2.x through Solaris 7. -y is marked as obsolete starting
3167 case_GETOPT_HELP_CHAR;
3169 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3172 usage (SORT_FAILURE);
3176 /* Inheritance of global options to individual keys. */
3177 for (key = keylist; key; key = key->next)
3178 if (! (key->ignore || key->translate
3179 || (key->skipsblanks | key->reverse
3180 | key->skipeblanks | key->month | key->numeric
3181 | key->general_numeric)))
3183 key->ignore = gkey.ignore;
3184 key->translate = gkey.translate;
3185 key->skipsblanks = gkey.skipsblanks;
3186 key->skipeblanks = gkey.skipeblanks;
3187 key->month = gkey.month;
3188 key->numeric = gkey.numeric;
3189 key->general_numeric = gkey.general_numeric;
3190 key->reverse = gkey.reverse;
3193 if (!keylist && (gkey.ignore || gkey.translate
3194 || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3195 | gkey.numeric | gkey.general_numeric)))
3197 reverse = gkey.reverse;
3199 if (temp_dir_count == 0)
3201 char const *tmp_dir = getenv ("TMPDIR");
3202 add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3215 error (0, 0, _("extra operand %s not allowed with -c"),
3217 usage (SORT_FAILURE);
3220 /* POSIX requires that sort return 1 IFF invoked with -c and the
3221 input is not properly sorted. */
3222 exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3227 int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
3228 merge (files, nfiles, max_merge, outfile);
3231 sort (files, nfiles, outfile);
3233 if (have_read_stdin && fclose (stdin) == EOF)
3234 die (_("close failed"), "-");
3236 exit (EXIT_SUCCESS);