contrib/gnu-sort/src/sort.c

   1 /* $FreeBSD$ */
   2 /* sort - sort lines of text (with all kinds of options).
   3    Copyright (C) 88, 1991-2004 Free Software Foundation, Inc.
   4
   5    This program is free software; you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 2, or (at your option)
   8    any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program; if not, write to the Free Software
  17    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18
  19    Written December 1988 by Mike Haertel.
  20    The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
  21    or (US mail) as Mike Haertel c/o Free Software Foundation.
  22
  23    Ørn E. Hansen added NLS support in 1997.  */
  24
  25 #include <config.h>
  26
  27 #include <assert.h>
  28 #include <getopt.h>
  29 #include <sys/types.h>
  30 #include <signal.h>
  31 #include <stdio.h>
  32
  33 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
  34 /* Get mbstate_t, mbrtowc(), wcwidth().  */
  35 #if HAVE_WCHAR_H
  36 # include <wchar.h>
  37 #endif
  38
  39 /* Get isw* functions. */
  40 #if HAVE_WCTYPE_H
  41 # include <wctype.h>
  42 #endif
  43
  44 /* Get nl_langinfo(). */
  45 #if HAVE_LANGINFO_CODESET
  46 # include <langinfo.h>
  47 #endif
  48
  49 /* Include this after wctype.h so that we `#undef' ISPRINT
  50    (from Solaris's euc.h, from widec.h, from wctype.h) before
  51    redefining and using it. */
  52 #include "system.h"
  53 #include "error.h"
  54 #include "hard-locale.h"
  55 #include "inttostr.h"
  56 #include "long-options.h"
  57 #include "physmem.h"
  58 #include "posixver.h"
  59 #include "quote.h"
  60 #include "stdio-safer.h"
  61 #include "xmemcoll.h"
  62 #include "xstrtol.h"
  63
  64 #if HAVE_SYS_RESOURCE_H
  65 # include <sys/resource.h>
  66 #endif
  67 #ifndef RLIMIT_DATA
  68 struct rlimit { size_t rlim_cur; };
  69 # define getrlimit(Resource, Rlp) (-1)
  70 #endif
  71
  72 /* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
  73    installation; work around this configuration error.  */
  74 #if !defined MB_LEN_MAX || MB_LEN_MAX == 1
  75 # define MB_LEN_MAX 16
  76 #endif
  77
  78 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
  79 #if HAVE_MBRTOWC && defined mbstate_t
  80 # define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
  81 #endif
  82
  83 /* The official name of this program (e.g., no `g' prefix).  */
  84 #define PROGRAM_NAME "sort"
  85
  86 #define AUTHORS "Mike Haertel", "Paul Eggert"
  87
  88 #if HAVE_LANGINFO_CODESET
  89 # include <langinfo.h>
  90 #endif
  91
  92 #ifndef SA_NOCLDSTOP
  93 # define sigprocmask(How, Set, Oset) /* empty */
  94 # define sigset_t int
  95 #endif
  96
  97 #ifndef STDC_HEADERS
  98 double strtod ();
  99 #endif
 100
 101 #define UCHAR_LIM (UCHAR_MAX + 1)
 102
 103 #ifndef DEFAULT_TMPDIR
 104 # define DEFAULT_TMPDIR "/tmp"
 105 #endif
 106
 107 /* Exit statuses.  */
 108 enum
 109   {
 110     /* POSIX says to exit with status 1 if invoked with -c and the
 111        input is not properly sorted.  */
 112     SORT_OUT_OF_ORDER = 1,
 113
 114     /* POSIX says any other irregular exit must exit with a status
 115        code greater than 1.  */
 116     SORT_FAILURE = 2
 117   };
 118
 119 #define C_DECIMAL_POINT '.'
 120 #define NEGATION_SIGN   '-'
 121 #define NUMERIC_ZERO    '0'
 122
 123 #if HAVE_SETLOCALE
 124
 125 static char decimal_point;
 126 static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */
 127 static int force_general_numcompare = 0;
 128
 129 /* Nonzero if the corresponding locales are hard.  */
 130 static bool hard_LC_COLLATE;
 131 # if HAVE_NL_LANGINFO
 132 static bool hard_LC_TIME;
 133 # endif
 134
 135 # define IS_THOUSANDS_SEP(x) ((x) == th_sep)
 136
 137 #else
 138
 139 # define decimal_point C_DECIMAL_POINT
 140 # define IS_THOUSANDS_SEP(x) false
 141
 142 #endif
 143
 144 #define NONZERO(x) (x != 0)
 145
 146 /* get a multibyte character's byte length. */
 147 #define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE)                  \
 148   do                                                                    \
 149     {                                                                   \
 150       wchar_t wc;                                                       \
 151       mbstate_t state_bak;                                              \
 152                                                                         \
 153       state_bak = STATE;                                                \
 154       mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE);                 \
 155                                                                         \
 156       switch (MBLENGTH)                                                 \
 157         {                                                               \
 158         case (size_t)-1:                                                \
 159         case (size_t)-2:                                                \
 160           STATE = state_bak;                                            \
 161                 /* Fall through. */                                     \
 162         case 0:                                                         \
 163           MBLENGTH = 1;                                                 \
 164       }                                                                 \
 165     }                                                                   \
 166   while (0)
 167
 168 /* The kind of blanks for '-b' to skip in various options. */
 169 enum blanktype { bl_start, bl_end, bl_both };
 170
 171 /* The character marking end of line. Default to \n. */
 172 static char eolchar = '\n';
 173
 174 /* Lines are held in core as counted strings. */
 175 struct line
 176 {
 177   char *text;                   /* Text of the line. */
 178   size_t length;                /* Length including final newline. */
 179   char *keybeg;                 /* Start of first key. */
 180   char *keylim;                 /* Limit of first key. */
 181 };
 182
 183 /* Input buffers. */
 184 struct buffer
 185 {
 186   char *buf;                    /* Dynamically allocated buffer,
 187                                    partitioned into 3 regions:
 188                                    - input data;
 189                                    - unused area;
 190                                    - an array of lines, in reverse order.  */
 191   size_t used;                  /* Number of bytes used for input data.  */
 192   size_t nlines;                /* Number of lines in the line array.  */
 193   size_t alloc;                 /* Number of bytes allocated. */
 194   size_t left;                  /* Number of bytes left from previous reads. */
 195   size_t line_bytes;            /* Number of bytes to reserve for each line. */
 196   bool eof;                     /* An EOF has been read.  */
 197 };
 198
 199 struct keyfield
 200 {
 201   size_t sword;                 /* Zero-origin 'word' to start at. */
 202   size_t schar;                 /* Additional characters to skip. */
 203   size_t eword;                 /* Zero-origin first word after field. */
 204   size_t echar;                 /* Additional characters in field. */
 205   bool const *ignore;           /* Boolean array of characters to ignore. */
 206   char const *translate;        /* Translation applied to characters. */
 207   bool skipsblanks;             /* Skip leading blanks when finding start.  */
 208   bool skipeblanks;             /* Skip leading blanks when finding end.  */
 209   bool numeric;                 /* Flag for numeric comparison.  Handle
 210                                    strings of digits with optional decimal
 211                                    point, but no exponential notation. */
 212   bool general_numeric;         /* Flag for general, numeric comparison.
 213                                    Handle numbers in exponential notation. */
 214   bool month;                   /* Flag for comparison by month name. */
 215   bool reverse;                 /* Reverse the sense of comparison. */
 216   struct keyfield *next;        /* Next keyfield to try. */
 217 };
 218
 219 struct month
 220 {
 221   char const *name;
 222   int val;
 223 };
 224
 225 /* The name this program was run with. */
 226 char *program_name;
 227
 228 /* FIXME: None of these tables work with multibyte character sets.
 229    Also, there are many other bugs when handling multibyte characters.
 230    One way to fix this is to rewrite `sort' to use wide characters
 231    internally, but doing this with good performance is a bit
 232    tricky.  */
 233
 234 /* Table of blanks.  */
 235 static bool blanks[UCHAR_LIM];
 236
 237 /* Table of non-printing characters. */
 238 static bool nonprinting[UCHAR_LIM];
 239
 240 /* Table of non-dictionary characters (not letters, digits, or blanks). */
 241 static bool nondictionary[UCHAR_LIM];
 242
 243 /* Translation table folding lower case to upper.  */
 244 static char fold_toupper[UCHAR_LIM];
 245
 246 #define MONTHS_PER_YEAR 12
 247
 248 /* Table mapping month names to integers.
 249    Alphabetic order allows binary search. */
 250 static struct month monthtab[] =
 251 {
 252   {"APR", 4},
 253   {"AUG", 8},
 254   {"DEC", 12},
 255   {"FEB", 2},
 256   {"JAN", 1},
 257   {"JUL", 7},
 258   {"JUN", 6},
 259   {"MAR", 3},
 260   {"MAY", 5},
 261   {"NOV", 11},
 262   {"OCT", 10},
 263   {"SEP", 9}
 264 };
 265
 266 /* During the merge phase, the number of files to merge at once. */
 267 #define NMERGE 16
 268
 269 /* Minimum size for a merge or check buffer.  */
 270 #define MIN_MERGE_BUFFER_SIZE (2 + sizeof (struct line))
 271
 272 /* Minimum sort size; the code might not work with smaller sizes.  */
 273 #define MIN_SORT_SIZE (NMERGE * MIN_MERGE_BUFFER_SIZE)
 274
 275 /* The number of bytes needed for a merge or check buffer, which can
 276    function relatively efficiently even if it holds only one line.  If
 277    a longer line is seen, this value is increased.  */
 278 static size_t merge_buffer_size = MAX (MIN_MERGE_BUFFER_SIZE, 256 * 1024);
 279
 280 /* The approximate maximum number of bytes of main memory to use, as
 281    specified by the user.  Zero if the user has not specified a size.  */
 282 static size_t sort_size;
 283
 284 /* The guessed size for non-regular files.  */
 285 #define INPUT_FILE_SIZE_GUESS (1024 * 1024)
 286
 287 /* Array of directory names in which any temporary files are to be created. */
 288 static char const **temp_dirs;
 289
 290 /* Number of temporary directory names used.  */
 291 static size_t temp_dir_count;
 292
 293 /* Number of allocated slots in temp_dirs.  */
 294 static size_t temp_dir_alloc;
 295
 296 /* Flag to reverse the order of all comparisons. */
 297 static bool reverse;
 298
 299 /* Flag for stable sort.  This turns off the last ditch bytewise
 300    comparison of lines, and instead leaves lines in the same order
 301    they were read if all keys compare equal.  */
 302 static bool stable;
 303
 304 /* Tab character separating fields.  If tab_default, then fields are
 305    separated by the empty string between a non-blank character and a blank
 306    character. */
 307 static bool tab_default = true;
 308 static unsigned char tab[MB_LEN_MAX + 1];
 309 static size_t tab_length = 1;
 310
 311 /* Flag to remove consecutive duplicate lines from the output.
 312    Only the last of a sequence of equal lines will be output. */
 313 static bool unique;
 314
 315 /* Nonzero if any of the input files are the standard input. */
 316 static bool have_read_stdin;
 317
 318 /* List of key field comparisons to be tried.  */
 319 static struct keyfield *keylist;
 320
 321 static void sortlines_temp (struct line *, size_t, struct line *);
 322
 323 void
 324 usage (int status)
 325 {
 326   if (status != EXIT_SUCCESS)
 327     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 328              program_name);
 329   else
 330     {
 331       printf (_("\
 332 Usage: %s [OPTION]... [FILE]...\n\
 333 "),
 334               program_name);
 335       fputs (_("\
 336 Write sorted concatenation of all FILE(s) to standard output.\n\
 337 \n\
 338 Ordering options:\n\
 339 \n\
 340 "), stdout);
 341       fputs (_("\
 342 Mandatory arguments to long options are mandatory for short options too.\n\
 343 "), stdout);
 344       fputs (_("\
 345   -b, --ignore-leading-blanks ignore leading blanks\n\
 346   -d, --dictionary-order      consider only blanks and alphanumeric characters\n\
 347   -f, --ignore-case           fold lower case to upper case characters\n\
 348 "), stdout);
 349       fputs (_("\
 350   -g, --general-numeric-sort  compare according to general numerical value\n\
 351   -i, --ignore-nonprinting    consider only printable characters\n\
 352   -M, --month-sort            compare (unknown) < `JAN' < ... < `DEC'\n\
 353   -n, --numeric-sort          compare according to string numerical value\n\
 354   -r, --reverse               reverse the result of comparisons\n\
 355 \n\
 356 "), stdout);
 357       fputs (_("\
 358 Other options:\n\
 359 \n\
 360   -c, --check               check whether input is sorted; do not sort\n\
 361   -k, --key=POS1[,POS2]     start a key at POS1, end it at POS 2 (origin 1)\n\
 362   -m, --merge               merge already sorted files; do not sort\n\
 363   -o, --output=FILE         write result to FILE instead of standard output\n\
 364   -s, --stable              stabilize sort by disabling last-resort comparison\n\
 365   -S, --buffer-size=SIZE    use SIZE for main memory buffer\n\
 366 "), stdout);
 367       printf (_("\
 368   -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\
 369   -T, --temporary-directory=DIR  use DIR for temporaries, not $TMPDIR or %s;\n\
 370                               multiple options specify multiple directories\n\
 371   -u, --unique              with -c, check for strict ordering;\n\
 372                               without -c, output only the first of an equal run\n\
 373 "), DEFAULT_TMPDIR);
 374       fputs (_("\
 375   -z, --zero-terminated     end lines with 0 byte, not newline\n\
 376 "), stdout);
 377       fputs (HELP_OPTION_DESCRIPTION, stdout);
 378       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 379       fputs (_("\
 380 \n\
 381 POS is F[.C][OPTS], where F is the field number and C the character position\n\
 382 in the field.  OPTS is one or more single-letter ordering options, which\n\
 383 override global ordering options for that key.  If no key is given, use the\n\
 384 entire line as the key.\n\
 385 \n\
 386 SIZE may be followed by the following multiplicative suffixes:\n\
 387 "), stdout);
 388       fputs (_("\
 389 % 1% of memory, b 1, K 1024 (default), and so on for M, G, T, P, E, Z, Y.\n\
 390 \n\
 391 With no FILE, or when FILE is -, read standard input.\n\
 392 \n\
 393 *** WARNING ***\n\
 394 The locale specified by the environment affects sort order.\n\
 395 Set LC_ALL=C to get the traditional sort order that uses\n\
 396 native byte values.\n\
 397 "), stdout );
 398       printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
 399     }
 400
 401   exit (status);
 402 }
 403
 404 #define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
 405
 406 static struct option const long_options[] =
 407 {
 408   {"ignore-leading-blanks", no_argument, NULL, 'b'},
 409   {"check", no_argument, NULL, 'c'},
 410   {"dictionary-order", no_argument, NULL, 'd'},
 411   {"ignore-case", no_argument, NULL, 'f'},
 412   {"general-numeric-sort", no_argument, NULL, 'g'},
 413   {"ignore-nonprinting", no_argument, NULL, 'i'},
 414   {"key", required_argument, NULL, 'k'},
 415   {"merge", no_argument, NULL, 'm'},
 416   {"month-sort", no_argument, NULL, 'M'},
 417   {"numeric-sort", no_argument, NULL, 'n'},
 418   {"output", required_argument, NULL, 'o'},
 419   {"reverse", no_argument, NULL, 'r'},
 420   {"stable", no_argument, NULL, 's'},
 421   {"buffer-size", required_argument, NULL, 'S'},
 422   {"field-separator", required_argument, NULL, 't'},
 423   {"temporary-directory", required_argument, NULL, 'T'},
 424   {"unique", no_argument, NULL, 'u'},
 425   {"zero-terminated", no_argument, NULL, 'z'},
 426   {GETOPT_HELP_OPTION_DECL},
 427   {GETOPT_VERSION_OPTION_DECL},
 428   {0, 0, 0, 0},
 429 };
 430
 431 /* The set of signals that are caught.  */
 432 static sigset_t caught_signals;
 433
 434 /* The list of temporary files. */
 435 struct tempnode
 436 {
 437   struct tempnode *volatile next;
 438   char name[1];  /* Actual size is 1 + file name length.  */
 439 };
 440 static struct tempnode *volatile temphead;
 441
 442 /* Fucntion pointers. */
 443 static void
 444 (*inittables) (void);
 445
 446 static char *
 447 (* begfield) (const struct line *line, const struct keyfield *key);
 448
 449 static char *
 450 (* limfield) (const struct line *line, const struct keyfield *key);
 451
 452 static int
 453 (*getmonth) (const char *s, size_t len);
 454
 455 static int
 456 (* keycompare) (const struct line *a, const struct line *b);
 457
 458 /* Test for white space multibyte character.
 459    Set LENGTH the byte length of investigated multibyte character. */
 460 #if HAVE_MBRTOWC
 461 static int
 462 ismbblank (const char *str, size_t len, size_t *length)
 463 {
 464   size_t mblength;
 465   wchar_t wc;
 466   mbstate_t state;
 467
 468   memset (&state, '\0', sizeof(mbstate_t));
 469   mblength = mbrtowc (&wc, str, len, &state);
 470
 471   if (mblength == (size_t)-1 || mblength == (size_t)-2)
 472     {
 473       *length = 1;
 474       return 0;
 475     }
 476
 477   *length = (mblength < 1) ? 1 : mblength;
 478   return iswblank (wc);
 479 }
 480 #endif
 481
 482 /* Clean up any remaining temporary files. */
 483
 484 static void
 485 cleanup (void)
 486 {
 487   struct tempnode const *node;
 488
 489   for (node = temphead; node; node = node->next)
 490     unlink (node->name);
 491 }
 492
 493 /* Report MESSAGE for FILE, then clean up and exit.
 494    If FILE is null, it represents standard output.  */
 495
 496 static void die (char const *, char const *) ATTRIBUTE_NORETURN;
 497 static void
 498 die (char const *message, char const *file)
 499 {
 500   error (0, errno, "%s: %s", message, file ? file : _("standard output"));
 501   exit (SORT_FAILURE);
 502 }
 503
 504 /* Create a new temporary file, returning its newly allocated name.
 505    Store into *PFP a stream open for writing.  */
 506
 507 static char *
 508 create_temp_file (FILE **pfp)
 509 {
 510   static char const slashbase[] = "/sortXXXXXX";
 511   static size_t temp_dir_index;
 512   sigset_t oldset;
 513   int fd;
 514   int saved_errno;
 515   char const *temp_dir = temp_dirs[temp_dir_index];
 516   size_t len = strlen (temp_dir);
 517   struct tempnode *node =
 518     xmalloc (sizeof node->next + len + sizeof slashbase);
 519   char *file = node->name;
 520
 521   memcpy (file, temp_dir, len);
 522   memcpy (file + len, slashbase, sizeof slashbase);
 523   node->next = temphead;
 524   if (++temp_dir_index == temp_dir_count)
 525     temp_dir_index = 0;
 526
 527   /* Create the temporary file in a critical section, to avoid races.  */
 528   sigprocmask (SIG_BLOCK, &caught_signals, &oldset);
 529   fd = mkstemp (file);
 530   if (0 <= fd)
 531     temphead = node;
 532   saved_errno = errno;
 533   sigprocmask (SIG_SETMASK, &oldset, NULL);
 534   errno = saved_errno;
 535
 536   if (fd < 0 || (*pfp = fdopen (fd, "w")) == NULL)
 537     die (_("cannot create temporary file"), file);
 538
 539   return file;
 540 }
 541
 542 /* Return a stream for FILE, opened with mode HOW.  A null FILE means
 543    standard output; HOW should be "w".  When opening for input, "-"
 544    means standard input.  To avoid confusion, do not return file
 545    descriptors 0, 1, or 2.  */
 546
 547 static FILE *
 548 xfopen (const char *file, const char *how)
 549 {
 550   FILE *fp;
 551
 552   if (!file)
 553     fp = stdout;
 554   else if (STREQ (file, "-") && *how == 'r')
 555     {
 556       have_read_stdin = true;
 557       fp = stdin;
 558     }
 559   else
 560     {
 561       if ((fp = fopen_safer (file, how)) == NULL)
 562         die (_("open failed"), file);
 563     }
 564
 565   return fp;
 566 }
 567
 568 /* Close FP, whose name is FILE, and report any errors.  */
 569
 570 static void
 571 xfclose (FILE *fp, char const *file)
 572 {
 573   if (fp == stdin)
 574     {
 575       /* Allow reading stdin from tty more than once. */
 576       if (feof (fp))
 577         clearerr (fp);
 578     }
 579   else
 580     {
 581       if (fclose (fp) != 0)
 582         die (_("close failed"), file);
 583     }
 584 }
 585
 586 static void
 587 write_bytes (const char *buf, size_t n_bytes, FILE *fp, const char *output_file)
 588 {
 589   if (fwrite (buf, 1, n_bytes, fp) != n_bytes)
 590     die (_("write failed"), output_file);
 591 }
 592
 593 /* Append DIR to the array of temporary directory names.  */
 594 static void
 595 add_temp_dir (char const *dir)
 596 {
 597   if (temp_dir_count == temp_dir_alloc)
 598     temp_dirs = x2nrealloc (temp_dirs, &temp_dir_alloc, sizeof *temp_dirs);
 599
 600   temp_dirs[temp_dir_count++] = dir;
 601 }
 602
 603 /* Search through the list of temporary files for NAME;
 604    remove it if it is found on the list. */
 605
 606 static void
 607 zaptemp (const char *name)
 608 {
 609   struct tempnode *volatile *pnode;
 610   struct tempnode *node;
 611
 612   for (pnode = &temphead; (node = *pnode); pnode = &node->next)
 613     if (node->name == name)
 614       {
 615         unlink (name);
 616         *pnode = node->next;
 617         free (node);
 618         break;
 619       }
 620 }
 621
 622 #if HAVE_LANGINFO_CODESET
 623
 624 static int
 625 struct_month_cmp (const void *m1, const void *m2)
 626 {
 627   struct month const *month1 = m1;
 628   struct month const *month2 = m2;
 629   return strcmp (month1->name, month2->name);
 630 }
 631
 632 #endif
 633
 634 /* Initialize the character class tables. */
 635
 636 static void
 637 inittables_uni (void)
 638 {
 639   int i;
 640
 641   for (i = 0; i < UCHAR_LIM; ++i)
 642     {
 643       blanks[i] = !!ISBLANK (i);
 644       nonprinting[i] = !ISPRINT (i);
 645       nondictionary[i] = !ISALNUM (i) && !ISBLANK (i);
 646       fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i);
 647     }
 648
 649 #if HAVE_NL_LANGINFO
 650   /* If we're not in the "C" locale, read different names for months.  */
 651   if (hard_LC_TIME)
 652     {
 653       for (i = 0; i < MONTHS_PER_YEAR; i++)
 654         {
 655           char const *s;
 656           size_t s_len;
 657           size_t j;
 658           char *name;
 659
 660           s = (char *) nl_langinfo (ABMON_1 + i);
 661           s_len = strlen (s);
 662           monthtab[i].name = name = xmalloc (s_len + 1);
 663           monthtab[i].val = i + 1;
 664
 665           for (j = 0; j < s_len; j++)
 666             name[j] = fold_toupper[to_uchar (s[j])];
 667           name[j] = '\0';
 668         }
 669       qsort ((void *) monthtab, MONTHS_PER_YEAR,
 670              sizeof *monthtab, struct_month_cmp);
 671     }
 672 #endif
 673 }
 674
 675 #if HAVE_MBRTOWC
 676 static void
 677 inittables_mb (void)
 678 {
 679   int i, j, k, l;
 680   char *name, *s;
 681   size_t s_len, mblength;
 682   char mbc[MB_LEN_MAX];
 683   wchar_t wc, pwc;
 684   mbstate_t state_mb, state_wc;
 685
 686   for (i = 0; i < MONTHS_PER_YEAR; i++)
 687     {
 688       s = (char *) nl_langinfo (ABMON_1 + i);
 689       s_len = strlen (s);
 690       monthtab[i].name = name = (char *) xmalloc (s_len + 1);
 691       monthtab[i].val = i + 1;
 692
 693       memset (&state_mb, '\0', sizeof (mbstate_t));
 694       memset (&state_wc, '\0', sizeof (mbstate_t));
 695
 696       for (j = 0; j < s_len;)
 697         {
 698           if (!ismbblank (s + j, s_len - j, &mblength))
 699             break;
 700           j += mblength;
 701         }
 702
 703       for (k = 0; j < s_len;)
 704         {
 705           mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
 706           assert (mblength != (size_t)-1 && mblength != (size_t)-2);
 707           if (mblength == 0)
 708             break;
 709
 710           pwc = towupper (wc);
 711           if (pwc == wc)
 712             {
 713               memcpy (mbc, s + j, mblength);
 714               j += mblength;
 715             }
 716           else
 717             {
 718               j += mblength;
 719               mblength = wcrtomb (mbc, pwc, &state_wc);
 720               assert (mblength != (size_t)0 && mblength != (size_t)-1);
 721             }
 722
 723           for (l = 0; l < mblength; l++)
 724             name[k++] = mbc[l];
 725         }
 726       name[k] = '\0';
 727     }
 728   qsort ((void *) monthtab, MONTHS_PER_YEAR,
 729       sizeof (struct month), struct_month_cmp);
 730 }
 731 #endif
 732
 733 /* Specify the amount of main memory to use when sorting.  */
 734 static void
 735 specify_sort_size (char const *s)
 736 {
 737   uintmax_t n;
 738   char *suffix;
 739   enum strtol_error e = xstrtoumax (s, &suffix, 10, &n, "EgGkKmMPtTYZ");
 740
 741   /* The default unit is KiB.  */
 742   if (e == LONGINT_OK && ISDIGIT (suffix[-1]))
 743     {
 744       if (n <= UINTMAX_MAX / 1024)
 745         n *= 1024;
 746       else
 747         e = LONGINT_OVERFLOW;
 748     }
 749
 750   /* A 'b' suffix means bytes; a '%' suffix means percent of memory.  */
 751   if (e == LONGINT_INVALID_SUFFIX_CHAR && ISDIGIT (suffix[-1]) && ! suffix[1])
 752     switch (suffix[0])
 753       {
 754       case 'b':
 755         e = LONGINT_OK;
 756         break;
 757
 758       case '%':
 759         {
 760           double mem = physmem_total () * n / 100;
 761
 762           /* Use "<", not "<=", to avoid problems with rounding.  */
 763           if (mem < UINTMAX_MAX)
 764             {
 765               n = mem;
 766               e = LONGINT_OK;
 767             }
 768           else
 769             e = LONGINT_OVERFLOW;
 770         }
 771         break;
 772       }
 773
 774   if (e == LONGINT_OK)
 775     {
 776       /* If multiple sort sizes are specified, take the maximum, so
 777          that option order does not matter.  */
 778       if (n < sort_size)
 779         return;
 780
 781       sort_size = n;
 782       if (sort_size == n)
 783         {
 784           sort_size = MAX (sort_size, MIN_SORT_SIZE);
 785           return;
 786         }
 787
 788       e = LONGINT_OVERFLOW;
 789     }
 790
 791   STRTOL_FATAL_ERROR (s, _("sort size"), e);
 792 }
 793
 794 /* Return the default sort size.  */
 795 static size_t
 796 default_sort_size (void)
 797 {
 798   /* Let MEM be available memory or 1/8 of total memory, whichever
 799      is greater.  */
 800   double avail = physmem_available ();
 801   double total = physmem_total ();
 802   double mem = MAX (avail, total / 8);
 803   struct rlimit rlimit;
 804
 805   /* Let SIZE be MEM, but no more than the maximum object size or
 806      system resource limits.  Avoid the MIN macro here, as it is not
 807      quite right when only one argument is floating point.  Don't
 808      bother to check for values like RLIM_INFINITY since in practice
 809      they are not much less than SIZE_MAX.  */
 810   size_t size = SIZE_MAX;
 811   if (mem < size)
 812     size = mem;
 813   if (getrlimit (RLIMIT_DATA, &rlimit) == 0 && rlimit.rlim_cur < size)
 814     size = rlimit.rlim_cur;
 815 #ifdef RLIMIT_AS
 816   if (getrlimit (RLIMIT_AS, &rlimit) == 0 && rlimit.rlim_cur < size)
 817     size = rlimit.rlim_cur;
 818 #endif
 819
 820   /* Leave a large safety margin for the above limits, as failure can
 821      occur when they are exceeded.  */
 822   size /= 2;
 823
 824 #ifdef RLIMIT_RSS
 825   /* Leave a 1/16 margin for RSS to leave room for code, stack, etc.
 826      Exceeding RSS is not fatal, but can be quite slow.  */
 827   if (getrlimit (RLIMIT_RSS, &rlimit) == 0 && rlimit.rlim_cur / 16 * 15 < size)
 828     size = rlimit.rlim_cur / 16 * 15;
 829 #endif
 830
 831   /* Use no less than the minimum.  */
 832   return MAX (size, MIN_SORT_SIZE);
 833 }
 834
 835 /* Return the sort buffer size to use with the input files identified
 836    by FPS and FILES, which are alternate paths to the same files.
 837    NFILES gives the number of input files; NFPS may be less.  Assume
 838    that each input line requires LINE_BYTES extra bytes' worth of line
 839    information.  Do not exceed a bound on the size: if the bound is
 840    not specified by the user, use a default.  */
 841
 842 static size_t
 843 sort_buffer_size (FILE *const *fps, int nfps,
 844                   char *const *files, int nfiles,
 845                   size_t line_bytes)
 846 {
 847   /* A bound on the input size.  If zero, the bound hasn't been
 848      determined yet.  */
 849   static size_t size_bound;
 850
 851   /* In the worst case, each input byte is a newline.  */
 852   size_t worst_case_per_input_byte = line_bytes + 1;
 853
 854   /* Keep enough room for one extra input line and an extra byte.
 855      This extra room might be needed when preparing to read EOF.  */
 856   size_t size = worst_case_per_input_byte + 1;
 857
 858   int i;
 859
 860   for (i = 0; i < nfiles; i++)
 861     {
 862       struct stat st;
 863       off_t file_size;
 864       size_t worst_case;
 865
 866       if ((i < nfps ? fstat (fileno (fps[i]), &st)
 867            : STREQ (files[i], "-") ? fstat (STDIN_FILENO, &st)
 868            : stat (files[i], &st))
 869           != 0)
 870         die (_("stat failed"), files[i]);
 871
 872       if (S_ISREG (st.st_mode))
 873         file_size = st.st_size;
 874       else
 875         {
 876           /* The file has unknown size.  If the user specified a sort
 877              buffer size, use that; otherwise, guess the size.  */
 878           if (sort_size)
 879             return sort_size;
 880           file_size = INPUT_FILE_SIZE_GUESS;
 881         }
 882
 883       if (! size_bound)
 884         {
 885           size_bound = sort_size;
 886           if (! size_bound)
 887             size_bound = default_sort_size ();
 888         }
 889
 890       /* Add the amount of memory needed to represent the worst case
 891          where the input consists entirely of newlines followed by a
 892          single non-newline.  Check for overflow.  */
 893       worst_case = file_size * worst_case_per_input_byte + 1;
 894       if (file_size != worst_case / worst_case_per_input_byte
 895           || size_bound - size <= worst_case)
 896         return size_bound;
 897       size += worst_case;
 898     }
 899
 900   return size;
 901 }
 902
 903 /* Initialize BUF.  Reserve LINE_BYTES bytes for each line; LINE_BYTES
 904    must be at least sizeof (struct line).  Allocate ALLOC bytes
 905    initially.  */
 906
 907 static void
 908 initbuf (struct buffer *buf, size_t line_bytes, size_t alloc)
 909 {
 910   /* Ensure that the line array is properly aligned.  If the desired
 911      size cannot be allocated, repeatedly halve it until allocation
 912      succeeds.  The smaller allocation may hurt overall performance,
 913      but that's better than failing.  */
 914   for (;;)
 915     {
 916       alloc += sizeof (struct line) - alloc % sizeof (struct line);
 917       buf->buf = malloc (alloc);
 918       if (buf->buf)
 919         break;
 920       alloc /= 2;
 921       if (alloc <= line_bytes + 1)
 922         xalloc_die ();
 923     }
 924
 925   buf->line_bytes = line_bytes;
 926   buf->alloc = alloc;
 927   buf->used = buf->left = buf->nlines = 0;
 928   buf->eof = false;
 929 }
 930
 931 /* Return one past the limit of the line array.  */
 932
 933 static inline struct line *
 934 buffer_linelim (struct buffer const *buf)
 935 {
 936   return (struct line *) (buf->buf + buf->alloc);
 937 }
 938
 939 /* Return a pointer to the first character of the field specified
 940    by KEY in LINE. */
 941
 942 static char *
 943 begfield_uni (const struct line *line, const struct keyfield *key)
 944 {
 945   register char *ptr = line->text, *lim = ptr + line->length - 1;
 946   register size_t sword = key->sword;
 947   register size_t schar = key->schar;
 948   register size_t remaining_bytes;
 949
 950   /* The leading field separator itself is included in a field when -t
 951      is absent.  */
 952
 953   if (!tab_default)
 954     while (ptr < lim && sword--)
 955       {
 956         while (ptr < lim && *ptr != tab[0])
 957           ++ptr;
 958         if (ptr < lim)
 959           ++ptr;
 960       }
 961   else
 962     while (ptr < lim && sword--)
 963       {
 964         while (ptr < lim && blanks[to_uchar (*ptr)])
 965           ++ptr;
 966         while (ptr < lim && !blanks[to_uchar (*ptr)])
 967           ++ptr;
 968       }
 969
 970   if (key->skipsblanks)
 971     while (ptr < lim && blanks[to_uchar (*ptr)])
 972       ++ptr;
 973
 974   /* Advance PTR by SCHAR (if possible), but no further than LIM.  */
 975   remaining_bytes = lim - ptr;
 976   if (schar < remaining_bytes)
 977     ptr += schar;
 978   else
 979     ptr = lim;
 980
 981   return ptr;
 982 }
 983
 984 #if HAVE_MBRTOWC
 985 static char *
 986 begfield_mb (const struct line *line, const struct keyfield *key)
 987 {
 988   int i;
 989   char *ptr = line->text, *lim = ptr + line->length - 1;
 990   size_t sword = key->sword;
 991   size_t schar = key->schar;
 992   size_t mblength;
 993   mbstate_t state;
 994
 995   memset (&state, '\0', sizeof(mbstate_t));
 996
 997   if (!tab_default)
 998     while (ptr < lim && sword--)
 999       {
1000         while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1001           {
1002             GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1003             ptr += mblength;
1004           }
1005         if (ptr < lim)
1006           {
1007             GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1008             ptr += mblength;
1009           }
1010       }
1011   else
1012     while (ptr < lim && sword--)
1013       {
1014         while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1015           ptr += mblength;
1016         if (ptr < lim)
1017           {
1018             GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1019             ptr += mblength;
1020           }
1021         while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1022           ptr += mblength;
1023       }
1024
1025   if (key->skipsblanks)
1026     while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1027       ptr += mblength;
1028
1029   for (i = 0; i < schar; i++)
1030     {
1031       GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1032
1033       if (ptr + mblength > lim)
1034         break;
1035       else
1036         ptr += mblength;
1037     }
1038
1039   return ptr;
1040 }
1041 #endif
1042
1043 /* Return the limit of (a pointer to the first character after) the field
1044    in LINE specified by KEY. */
1045
1046 static char *
1047 limfield_uni (const struct line *line, const struct keyfield *key)
1048 {
1049   register char *ptr = line->text, *lim = ptr + line->length - 1;
1050   register size_t eword = key->eword, echar = key->echar;
1051   register size_t remaining_bytes;
1052
1053   /* Move PTR past EWORD fields or to one past the last byte on LINE,
1054      whichever comes first.  If there are more than EWORD fields, leave
1055      PTR pointing at the beginning of the field having zero-based index,
1056      EWORD.  If a delimiter character was specified (via -t), then that
1057      `beginning' is the first character following the delimiting TAB.
1058      Otherwise, leave PTR pointing at the first `blank' character after
1059      the preceding field.  */
1060   if (!tab_default)
1061     while (ptr < lim && eword--)
1062       {
1063         while (ptr < lim && *ptr != tab[0])
1064           ++ptr;
1065         if (ptr < lim && (eword | echar))
1066           ++ptr;
1067       }
1068   else
1069     while (ptr < lim && eword--)
1070       {
1071         while (ptr < lim && blanks[to_uchar (*ptr)])
1072           ++ptr;
1073         while (ptr < lim && !blanks[to_uchar (*ptr)])
1074           ++ptr;
1075       }
1076
1077 #ifdef POSIX_UNSPECIFIED
1078   /* The following block of code makes GNU sort incompatible with
1079      standard Unix sort, so it's ifdef'd out for now.
1080      The POSIX spec isn't clear on how to interpret this.
1081      FIXME: request clarification.
1082
1083      From: kwzh@gnu.ai.mit.edu (Karl Heuer)
1084      Date: Thu, 30 May 96 12:20:41 -0400
1085      [Translated to POSIX 1003.1-2001 terminology by Paul Eggert.]
1086
1087      [...]I believe I've found another bug in `sort'.
1088
1089      $ cat /tmp/sort.in
1090      a b c 2 d
1091      pq rs 1 t
1092      $ textutils-1.15/src/sort -k1.7,1.7 </tmp/sort.in
1093      a b c 2 d
1094      pq rs 1 t
1095      $ /bin/sort -k1.7,1.7 </tmp/sort.in
1096      pq rs 1 t
1097      a b c 2 d
1098
1099      Unix sort produced the answer I expected: sort on the single character
1100      in column 7.  GNU sort produced different results, because it disagrees
1101      on the interpretation of the key-end spec "M.N".  Unix sort reads this
1102      as "skip M-1 fields, then N-1 characters"; but GNU sort wants it to mean
1103      "skip M-1 fields, then either N-1 characters or the rest of the current
1104      field, whichever comes first".  This extra clause applies only to
1105      key-ends, not key-starts.
1106      */
1107
1108   /* Make LIM point to the end of (one byte past) the current field.  */
1109   if (!tab_default)
1110     {
1111       char *newlim;
1112       newlim = memchr (ptr, tab[0], lim - ptr);
1113       if (newlim)
1114         lim = newlim;
1115     }
1116   else
1117     {
1118       char *newlim;
1119       newlim = ptr;
1120       while (newlim < lim && blanks[to_uchar (*newlim)])
1121         ++newlim;
1122       while (newlim < lim && !blanks[to_uchar (*newlim)])
1123         ++newlim;
1124       lim = newlim;
1125     }
1126 #endif
1127
1128   /* If we're ignoring leading blanks when computing the End
1129      of the field, don't start counting bytes until after skipping
1130      past any leading blanks. */
1131   if (key->skipeblanks)
1132     while (ptr < lim && blanks[to_uchar (*ptr)])
1133       ++ptr;
1134
1135   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1136   remaining_bytes = lim - ptr;
1137   if (echar < remaining_bytes)
1138     ptr += echar;
1139   else
1140     ptr = lim;
1141
1142   return ptr;
1143 }
1144
1145 #if HAVE_MBRTOWC
1146 static char *
1147 limfield_mb (const struct line *line, const struct keyfield *key)
1148 {
1149   char *ptr = line->text, *lim = ptr + line->length - 1;
1150   size_t eword = key->eword, echar = key->echar;
1151   int i;
1152   size_t mblength;
1153   mbstate_t state;
1154
1155   memset (&state, '\0', sizeof(mbstate_t));
1156
1157   if (!tab_default)
1158     while (ptr < lim && eword--)
1159       {
1160         while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
1161           {
1162             GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1163             ptr += mblength;
1164           }
1165         if (ptr < lim && (eword | echar))
1166           {
1167             GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1168             ptr += mblength;
1169           }
1170       }
1171   else
1172     while (ptr < lim && eword--)
1173       {
1174         while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1175           ptr += mblength;
1176         if (ptr < lim)
1177           {
1178             GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1179             ptr += mblength;
1180           }
1181         while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
1182           ptr += mblength;
1183       }
1184
1185
1186 # ifdef POSIX_UNSPECIFIED
1187   /* Make LIM point to the end of (one byte past) the current field.  */
1188   if (!tab_default)
1189     {
1190       char *newlim, *p;
1191
1192       newlim = NULL;
1193       for (p = ptr; p < lim;)
1194         {
1195           if (memcmp (p, tab, tab_length) == 0)
1196             {
1197               newlim = p;
1198               break;
1199             }
1200
1201           GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1202           p += mblength;
1203         }
1204     }
1205   else
1206     {
1207       char *newlim;
1208       newlim = ptr;
1209
1210       while (newlim < lim && ismbblank (newlim, lim - newlim, &mblength))
1211         newlim += mblength;
1212       if (ptr < lim)
1213         {
1214           GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1215           ptr += mblength;
1216         }
1217       while (newlim < lim && !ismbblank (newlim, lim - newlim, &mblength))
1218         newlim += mblength;
1219       lim = newlim;
1220     }
1221 # endif
1222
1223   /* If we're skipping leading blanks, don't start counting characters
1224    *      until after skipping past any leading blanks.  */
1225   if (key->skipeblanks)
1226     while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
1227       ptr += mblength;
1228
1229   memset (&state, '\0', sizeof(mbstate_t));
1230
1231   /* Advance PTR by ECHAR (if possible), but no further than LIM.  */
1232   for (i = 0; i < echar; i++)
1233     {
1234       GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
1235
1236       if (ptr + mblength > lim)
1237         break;
1238       else
1239         ptr += mblength;
1240     }
1241
1242   return ptr;
1243 }
1244 #endif
1245
1246 /* Fill BUF reading from FP, moving buf->left bytes from the end
1247    of buf->buf to the beginning first.  If EOF is reached and the
1248    file wasn't terminated by a newline, supply one.  Set up BUF's line
1249    table too.  FILE is the name of the file corresponding to FP.
1250    Return true if some input was read.  */
1251
1252 static bool
1253 fillbuf (struct buffer *buf, register FILE *fp, char const *file)
1254 {
1255   struct keyfield const *key = keylist;
1256   char eol = eolchar;
1257   size_t line_bytes = buf->line_bytes;
1258   size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE;
1259
1260   if (buf->eof)
1261     return false;
1262
1263   if (buf->used != buf->left)
1264     {
1265       memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
1266       buf->used = buf->left;
1267       buf->nlines = 0;
1268     }
1269
1270   for (;;)
1271     {
1272       char *ptr = buf->buf + buf->used;
1273       struct line *linelim = buffer_linelim (buf);
1274       struct line *line = linelim - buf->nlines;
1275       size_t avail = (char *) linelim - buf->nlines * line_bytes - ptr;
1276       char *line_start = buf->nlines ? line->text + line->length : buf->buf;
1277
1278       while (line_bytes + 1 < avail)
1279         {
1280           /* Read as many bytes as possible, but do not read so many
1281              bytes that there might not be enough room for the
1282              corresponding line array.  The worst case is when the
1283              rest of the input file consists entirely of newlines,
1284              except that the last byte is not a newline.  */
1285           size_t readsize = (avail - 1) / (line_bytes + 1);
1286           size_t bytes_read = fread (ptr, 1, readsize, fp);
1287           char *ptrlim = ptr + bytes_read;
1288           char *p;
1289           avail -= bytes_read;
1290
1291           if (bytes_read != readsize)
1292             {
1293               if (ferror (fp))
1294                 die (_("read failed"), file);
1295               if (feof (fp))
1296                 {
1297                   buf->eof = true;
1298                   if (buf->buf == ptrlim)
1299                     return false;
1300                   if (ptrlim[-1] != eol)
1301                     *ptrlim++ = eol;
1302                 }
1303             }
1304
1305           /* Find and record each line in the just-read input.  */
1306           while ((p = memchr (ptr, eol, ptrlim - ptr)))
1307             {
1308               ptr = p + 1;
1309               line--;
1310               line->text = line_start;
1311               line->length = ptr - line_start;
1312               mergesize = MAX (mergesize, line->length);
1313               avail -= line_bytes;
1314
1315               if (key)
1316                 {
1317                   /* Precompute the position of the first key for
1318                      efficiency. */
1319                   line->keylim = (key->eword == SIZE_MAX
1320                                   ? p
1321                                   : limfield (line, key));
1322
1323                   if (key->sword != SIZE_MAX)
1324                     line->keybeg = begfield (line, key);
1325                   else
1326                     {
1327                       if (key->skipsblanks)
1328 #if HAVE_MBRTOWC
1329                         {
1330                           if (MB_CUR_MAX > 1)
1331                             {
1332                               size_t mblength;
1333
1334                               while (ismbblank (line_start, ptr - line_start, &mblength))
1335                                 line_start += mblength;
1336                             }
1337                           else
1338 #endif
1339                             {
1340                               while (blanks[to_uchar (*line_start)])
1341                                 line_start++;
1342                             }
1343                         }
1344                       line->keybeg = line_start;
1345                     }
1346                 }
1347
1348               line_start = ptr;
1349             }
1350
1351           ptr = ptrlim;
1352           if (buf->eof)
1353             break;
1354         }
1355
1356       buf->used = ptr - buf->buf;
1357       buf->nlines = buffer_linelim (buf) - line;
1358       if (buf->nlines != 0)
1359         {
1360           buf->left = ptr - line_start;
1361           merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE;
1362           return true;
1363         }
1364
1365       /* The current input line is too long to fit in the buffer.
1366          Double the buffer size and try again.  */
1367       buf->buf = x2nrealloc (buf->buf, &buf->alloc, sizeof *(buf->buf));
1368     }
1369 }
1370
1371 /* Compare strings A and B containing decimal fractions < 1.  Each string
1372    should begin with a decimal point followed immediately by the digits
1373    of the fraction.  Strings not of this form are considered to be zero. */
1374
1375 /* The goal here, is to take two numbers a and b... compare these
1376    in parallel.  Instead of converting each, and then comparing the
1377    outcome.  Most likely stopping the comparison before the conversion
1378    is complete.  The algorithm used, in the old sort:
1379
1380    Algorithm: fraccompare
1381    Action   : compare two decimal fractions
1382    accepts  : char *a, char *b
1383    returns  : -1 if a<b, 0 if a=b, 1 if a>b.
1384    implement:
1385
1386    if *a == decimal_point AND *b == decimal_point
1387      find first character different in a and b.
1388      if both are digits, return the difference *a - *b.
1389      if *a is a digit
1390        skip past zeros
1391        if digit return 1, else 0
1392      if *b is a digit
1393        skip past zeros
1394        if digit return -1, else 0
1395    if *a is a decimal_point
1396      skip past decimal_point and zeros
1397      if digit return 1, else 0
1398    if *b is a decimal_point
1399      skip past decimal_point and zeros
1400      if digit return -1, else 0
1401    return 0 */
1402
1403 static int
1404 fraccompare (register const char *a, register const char *b)
1405 {
1406   if (*a == decimal_point && *b == decimal_point)
1407     {
1408       while (*++a == *++b)
1409         if (! ISDIGIT (*a))
1410           return 0;
1411       if (ISDIGIT (*a) && ISDIGIT (*b))
1412         return *a - *b;
1413       if (ISDIGIT (*a))
1414         goto a_trailing_nonzero;
1415       if (ISDIGIT (*b))
1416         goto b_trailing_nonzero;
1417       return 0;
1418     }
1419   else if (*a++ == decimal_point)
1420     {
1421     a_trailing_nonzero:
1422       while (*a == NUMERIC_ZERO)
1423         a++;
1424       return ISDIGIT (*a);
1425     }
1426   else if (*b++ == decimal_point)
1427     {
1428     b_trailing_nonzero:
1429       while (*b == NUMERIC_ZERO)
1430         b++;
1431       return - ISDIGIT (*b);
1432     }
1433   return 0;
1434 }
1435
1436 /* Compare strings A and B as numbers without explicitly converting them to
1437    machine numbers.  Comparatively slow for short strings, but asymptotically
1438    hideously fast. */
1439
1440 static int
1441 numcompare (register const char *a, register const char *b)
1442 {
1443   char tmpa;
1444   char tmpb;
1445   int tmp;
1446   size_t log_a;
1447   size_t log_b;
1448
1449 #if HAVE_MBRTOWC
1450   if (MB_CUR_MAX > 1)
1451     {
1452       size_t mblength;
1453       size_t alen = strnlen (a, MB_LEN_MAX);
1454       size_t blen = strnlen (b, MB_LEN_MAX);
1455
1456       while (ismbblank (a, alen, &mblength))
1457         a += mblength, alen -= mblength;
1458       while (ismbblank (b, blen, &mblength))
1459         b += mblength, blen -= mblength;
1460
1461       tmpa = *a;
1462       tmpb = *b;
1463     }
1464   else
1465 #endif
1466     {
1467       tmpa = *a;
1468       tmpb = *b;
1469
1470       while (blanks[to_uchar (tmpa)])
1471         tmpa = *++a;
1472       while (blanks[to_uchar (tmpb)])
1473         tmpb = *++b;
1474     }
1475
1476   if (tmpa == NEGATION_SIGN)
1477     {
1478       do
1479         tmpa = *++a;
1480       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa));
1481       if (tmpb != NEGATION_SIGN)
1482         {
1483           if (tmpa == decimal_point)
1484             do
1485               tmpa = *++a;
1486             while (tmpa == NUMERIC_ZERO);
1487           if (ISDIGIT (tmpa))
1488             return -1;
1489           while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1490             tmpb = *++b;
1491           if (tmpb == decimal_point)
1492             do
1493               tmpb = *++b;
1494             while (tmpb == NUMERIC_ZERO);
1495           if (ISDIGIT (tmpb))
1496             return -1;
1497           return 0;
1498         }
1499       do
1500         tmpb = *++b;
1501       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1502
1503       while (tmpa == tmpb && ISDIGIT (tmpa))
1504         {
1505           do
1506             tmpa = *++a;
1507           while (IS_THOUSANDS_SEP (tmpa));
1508           do
1509             tmpb = *++b;
1510           while (IS_THOUSANDS_SEP (tmpb));
1511         }
1512
1513       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1514           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1515         return -fraccompare (a, b);
1516
1517       tmp = tmpb - tmpa;
1518
1519       for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1520         do
1521           tmpa = *++a;
1522         while (IS_THOUSANDS_SEP (tmpa));
1523
1524       for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1525         do
1526           tmpb = *++b;
1527         while (IS_THOUSANDS_SEP (tmpb));
1528
1529       if (log_a != log_b)
1530         return log_a < log_b ? 1 : -1;
1531
1532       if (!log_a)
1533         return 0;
1534
1535       return tmp;
1536     }
1537   else if (tmpb == NEGATION_SIGN)
1538     {
1539       do
1540         tmpb = *++b;
1541       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb));
1542       if (tmpb == decimal_point)
1543         do
1544           tmpb = *++b;
1545         while (tmpb == NUMERIC_ZERO);
1546       if (ISDIGIT (tmpb))
1547         return 1;
1548       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1549         tmpa = *++a;
1550       if (tmpa == decimal_point)
1551         do
1552           tmpa = *++a;
1553         while (tmpa == NUMERIC_ZERO);
1554       if (ISDIGIT (tmpa))
1555         return 1;
1556       return 0;
1557     }
1558   else
1559     {
1560       while (tmpa == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpa))
1561         tmpa = *++a;
1562       while (tmpb == NUMERIC_ZERO || IS_THOUSANDS_SEP (tmpb))
1563         tmpb = *++b;
1564
1565       while (tmpa == tmpb && ISDIGIT (tmpa))
1566         {
1567           do
1568             tmpa = *++a;
1569           while (IS_THOUSANDS_SEP (tmpa));
1570           do
1571             tmpb = *++b;
1572           while (IS_THOUSANDS_SEP (tmpb));
1573         }
1574
1575       if ((tmpa == decimal_point && !ISDIGIT (tmpb))
1576           || (tmpb == decimal_point && !ISDIGIT (tmpa)))
1577         return fraccompare (a, b);
1578
1579       tmp = tmpa - tmpb;
1580
1581       for (log_a = 0; ISDIGIT (tmpa); ++log_a)
1582         do
1583           tmpa = *++a;
1584         while (IS_THOUSANDS_SEP (tmpa));
1585
1586       for (log_b = 0; ISDIGIT (tmpb); ++log_b)
1587         do
1588           tmpb = *++b;
1589         while (IS_THOUSANDS_SEP (tmpb));
1590
1591       if (log_a != log_b)
1592         return log_a < log_b ? -1 : 1;
1593
1594       if (!log_a)
1595         return 0;
1596
1597       return tmp;
1598     }
1599 }
1600
1601 static int
1602 general_numcompare (const char *sa, const char *sb)
1603 {
1604   /* FIXME: add option to warn about failed conversions.  */
1605   /* FIXME: maybe add option to try expensive FP conversion
1606      only if A and B can't be compared more cheaply/accurately.  */
1607
1608   char *bufa, *ea;
1609   char *bufb, *eb;
1610   double a;
1611   double b;
1612
1613   char *p;
1614   struct lconv *lconvp = localeconv ();
1615   size_t thousands_sep_len = strlen (lconvp->thousands_sep);
1616
1617   bufa = (char *) xmalloc (strlen (sa) + 1);
1618   bufb = (char *) xmalloc (strlen (sb) + 1);
1619   strcpy (bufa, sa);
1620   strcpy (bufb, sb);
1621
1622   if (force_general_numcompare)
1623     {
1624       while (1)
1625         {
1626           a = strtod (bufa, &ea);
1627           if (memcmp (ea, lconvp->thousands_sep, thousands_sep_len) == 0)
1628             {
1629               for (p = ea; *(p + thousands_sep_len) != '\0'; p++)
1630                 *p = *(p + thousands_sep_len);
1631               *p = '\0';
1632               continue;
1633             }
1634           break;
1635         }
1636
1637       while (1)
1638         {
1639           b = strtod (bufb, &eb);
1640           if (memcmp (eb, lconvp->thousands_sep, thousands_sep_len) == 0)
1641             {
1642               for (p = eb; *(p + thousands_sep_len) != '\0'; p++)
1643                 *p = *(p + thousands_sep_len);
1644               *p = '\0';
1645               continue;
1646             }
1647           break;
1648         }
1649     }
1650   else
1651     {
1652       a = strtod (bufa, &ea);
1653       b = strtod (bufb, &eb);
1654     }
1655
1656   /* Put conversion errors at the start of the collating sequence.  */
1657   free (bufa);
1658   free (bufb);
1659   if (bufa == ea)
1660     return bufb == eb ? 0 : -1;
1661   if (bufb == eb)
1662     return 1;
1663
1664   /* Sort numbers in the usual way, where -0 == +0.  Put NaNs after
1665      conversion errors but before numbers; sort them by internal
1666      bit-pattern, for lack of a more portable alternative.  */
1667   return (a < b ? -1
1668           : a > b ? 1
1669           : a == b ? 0
1670           : b == b ? -1
1671           : a == a ? 1
1672           : memcmp ((char *) &a, (char *) &b, sizeof a));
1673 }
1674
1675 /* Return an integer in 1..12 of the month name S with length LEN.
1676    Return 0 if the name in S is not recognized.  */
1677
1678 static int
1679 getmonth_uni (const char *s, size_t len)
1680 {
1681   char *month;
1682   register size_t i;
1683   register int lo = 0, hi = MONTHS_PER_YEAR, result;
1684
1685   while (len > 0 && blanks[to_uchar (*s)])
1686     {
1687       ++s;
1688       --len;
1689     }
1690
1691   if (len == 0)
1692     return 0;
1693
1694   month = alloca (len + 1);
1695   for (i = 0; i < len; ++i)
1696     month[i] = fold_toupper[to_uchar (s[i])];
1697   month[len] = '\0';
1698
1699   do
1700     {
1701       int ix = (lo + hi) / 2;
1702
1703       if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1704         hi = ix;
1705       else
1706         lo = ix;
1707     }
1708   while (hi - lo > 1);
1709
1710   result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1711             ? monthtab[lo].val : 0);
1712
1713   return result;
1714 }
1715
1716 #if HAVE_MBRTOWC
1717 static int
1718 getmonth_mb (const char *s, size_t len)
1719 {
1720   char *month;
1721   register size_t i;
1722   register int lo = 0, hi = MONTHS_PER_YEAR, result;
1723   char *tmp;
1724   size_t wclength, mblength;
1725   const char **pp;
1726   const wchar_t **wpp;
1727   wchar_t *month_wcs;
1728   mbstate_t state;
1729
1730   while (len > 0 && ismbblank (s, len, &mblength))
1731     {
1732       s += mblength;
1733       len -= mblength;
1734     }
1735
1736   if (len == 0)
1737     return 0;
1738
1739   month = (char *) alloca (len + 1);
1740
1741   tmp = (char *) alloca (len + 1);
1742   memcpy (tmp, s, len);
1743   tmp[len] = '\0';
1744   pp = (const char **)&tmp;
1745   month_wcs = (wchar_t *) alloca ((len + 1) * sizeof (wchar_t));
1746   memset (&state, '\0', sizeof(mbstate_t));
1747
1748   wclength = mbsrtowcs (month_wcs, pp, len + 1, &state);
1749   assert (wclength != (size_t)-1 && *pp == NULL);
1750
1751   for (i = 0; i < wclength; i++)
1752       month_wcs[i] = towupper(month_wcs[i]);
1753   month_wcs[i] = L'\0';
1754
1755   wpp = (const wchar_t **)&month_wcs;
1756
1757   mblength = wcsrtombs (month, wpp, len + 1, &state);
1758   assert (mblength != (-1) && *wpp == NULL);
1759
1760   do
1761     {
1762       int ix = (lo + hi) / 2;
1763
1764       if (strncmp (month, monthtab[ix].name, strlen (monthtab[ix].name)) < 0)
1765         hi = ix;
1766       else
1767         lo = ix;
1768     }
1769   while (hi - lo > 1);
1770
1771   result = (!strncmp (month, monthtab[lo].name, strlen (monthtab[lo].name))
1772       ? monthtab[lo].val : 0);
1773
1774   return result;
1775 }
1776 #endif
1777
1778 /* Compare two lines A and B trying every key in sequence until there
1779    are no more keys or a difference is found. */
1780
1781 static int
1782 keycompare_uni (const struct line *a, const struct line *b)
1783 {
1784   struct keyfield const *key = keylist;
1785
1786   /* For the first iteration only, the key positions have been
1787      precomputed for us. */
1788   register char *texta = a->keybeg;
1789   register char *textb = b->keybeg;
1790   register char *lima = a->keylim;
1791   register char *limb = b->keylim;
1792
1793   int diff;
1794
1795   for (;;)
1796     {
1797       register char const *translate = key->translate;
1798       register bool const *ignore = key->ignore;
1799
1800       /* Find the lengths. */
1801       size_t lena = lima <= texta ? 0 : lima - texta;
1802       size_t lenb = limb <= textb ? 0 : limb - textb;
1803
1804       /* Actually compare the fields. */
1805       if (key->numeric | key->general_numeric)
1806         {
1807           char savea = *lima, saveb = *limb;
1808
1809           *lima = *limb = '\0';
1810           diff = ((key->numeric ? numcompare : general_numcompare)
1811                   (texta, textb));
1812           *lima = savea, *limb = saveb;
1813         }
1814       else if (key->month)
1815         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1816       /* Sorting like this may become slow, so in a simple locale the user
1817          can select a faster sort that is similar to ascii sort  */
1818       else if (HAVE_SETLOCALE && hard_LC_COLLATE)
1819         {
1820           if (ignore || translate)
1821             {
1822               char *copy_a = alloca (lena + 1 + lenb + 1);
1823               char *copy_b = copy_a + lena + 1;
1824               size_t new_len_a, new_len_b, i;
1825
1826               /* Ignore and/or translate chars before comparing.  */
1827               for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++)
1828                 {
1829                   if (i < lena)
1830                     {
1831                       copy_a[new_len_a] = (translate
1832                                            ? translate[to_uchar (texta[i])]
1833                                            : texta[i]);
1834                       if (!ignore || !ignore[to_uchar (texta[i])])
1835                         ++new_len_a;
1836                     }
1837                   if (i < lenb)
1838                     {
1839                       copy_b[new_len_b] = (translate
1840                                            ? translate[to_uchar (textb[i])]
1841                                            : textb [i]);
1842                       if (!ignore || !ignore[to_uchar (textb[i])])
1843                         ++new_len_b;
1844                     }
1845                 }
1846
1847               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
1848             }
1849           else if (lena == 0)
1850             diff = - NONZERO (lenb);
1851           else if (lenb == 0)
1852             goto greater;
1853           else
1854             diff = xmemcoll (texta, lena, textb, lenb);
1855         }
1856       else if (ignore)
1857         {
1858 #define CMP_WITH_IGNORE(A, B)                                           \
1859   do                                                                    \
1860     {                                                                   \
1861           for (;;)                                                      \
1862             {                                                           \
1863               while (texta < lima && ignore[to_uchar (*texta)])         \
1864                 ++texta;                                                \
1865               while (textb < limb && ignore[to_uchar (*textb)])         \
1866                 ++textb;                                                \
1867               if (! (texta < lima && textb < limb))                     \
1868                 break;                                                  \
1869               diff = to_uchar (A) - to_uchar (B);                       \
1870               if (diff)                                                 \
1871                 goto not_equal;                                         \
1872               ++texta;                                                  \
1873               ++textb;                                                  \
1874             }                                                           \
1875                                                                         \
1876           diff = (texta < lima) - (textb < limb);                       \
1877     }                                                                   \
1878   while (0)
1879
1880           if (translate)
1881             CMP_WITH_IGNORE (translate[to_uchar (*texta)],
1882                              translate[to_uchar (*textb)]);
1883           else
1884             CMP_WITH_IGNORE (*texta, *textb);
1885         }
1886       else if (lena == 0)
1887         diff = - NONZERO (lenb);
1888       else if (lenb == 0)
1889         goto greater;
1890       else
1891         {
1892           if (translate)
1893             {
1894               while (texta < lima && textb < limb)
1895                 {
1896                   diff = (to_uchar (translate[to_uchar (*texta++)])
1897                           - to_uchar (translate[to_uchar (*textb++)]));
1898                   if (diff)
1899                     goto not_equal;
1900                 }
1901             }
1902           else
1903             {
1904               diff = memcmp (texta, textb, MIN (lena, lenb));
1905               if (diff)
1906                 goto not_equal;
1907             }
1908           diff = lena < lenb ? -1 : lena != lenb;
1909         }
1910
1911       if (diff)
1912         goto not_equal;
1913
1914       key = key->next;
1915       if (! key)
1916         break;
1917
1918       /* Find the beginning and limit of the next field.  */
1919       if (key->eword != SIZE_MAX)
1920         lima = limfield (a, key), limb = limfield (b, key);
1921       else
1922         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
1923
1924       if (key->sword != SIZE_MAX)
1925         texta = begfield (a, key), textb = begfield (b, key);
1926       else
1927         {
1928           texta = a->text, textb = b->text;
1929           if (key->skipsblanks)
1930             {
1931               while (texta < lima && blanks[to_uchar (*texta)])
1932                 ++texta;
1933               while (textb < limb && blanks[to_uchar (*textb)])
1934                 ++textb;
1935             }
1936         }
1937     }
1938
1939   return 0;
1940
1941  greater:
1942   diff = 1;
1943  not_equal:
1944   return key->reverse ? -diff : diff;
1945 }
1946
1947 #if HAVE_MBRTOWC
1948 static int
1949 keycompare_mb (const struct line *a, const struct line *b)
1950 {
1951   struct keyfield *key = keylist;
1952
1953   /* For the first iteration only, the key positions have been
1954      precomputed for us. */
1955   char *texta = a->keybeg;
1956   char *textb = b->keybeg;
1957   char *lima = a->keylim;
1958   char *limb = b->keylim;
1959
1960   size_t mblength_a, mblength_b;
1961   wchar_t wc_a, wc_b;
1962   mbstate_t state_a, state_b;
1963
1964   int diff;
1965
1966   memset (&state_a, '\0', sizeof(mbstate_t));
1967   memset (&state_b, '\0', sizeof(mbstate_t));
1968
1969   for (;;)
1970     {
1971       unsigned char *translate = (unsigned char *) key->translate;
1972       bool const *ignore = key->ignore;
1973
1974       /* Find the lengths. */
1975       size_t lena = lima <= texta ? 0 : lima - texta;
1976       size_t lenb = limb <= textb ? 0 : limb - textb;
1977
1978       /* Actually compare the fields. */
1979       if (key->numeric | key->general_numeric)
1980         {
1981           char savea = *lima, saveb = *limb;
1982
1983           *lima = *limb = '\0';
1984           if (force_general_numcompare)
1985             diff = general_numcompare (texta, textb);
1986           else
1987             diff = ((key->numeric ? numcompare : general_numcompare)
1988                 (texta, textb));
1989           *lima = savea, *limb = saveb;
1990         }
1991       else if (key->month)
1992         diff = getmonth (texta, lena) - getmonth (textb, lenb);
1993       else
1994         {
1995           if (ignore || translate)
1996             {
1997               char *copy_a = (char *) alloca (lena + 1 + lenb + 1);
1998               char *copy_b = copy_a + lena + 1;
1999               size_t new_len_a, new_len_b;
2000               size_t i, j;
2001
2002               /* Ignore and/or translate chars before comparing.  */
2003 # define IGNORE_CHARS(NEW_LEN, LEN, TEXT, COPY, WC, MBLENGTH, STATE)    \
2004   do                                                                    \
2005     {                                                                   \
2006       wchar_t uwc;                                                      \
2007       char mbc[MB_LEN_MAX];                                             \
2008       mbstate_t state_wc;                                               \
2009                                                                         \
2010       for (NEW_LEN = i = 0; i < LEN;)                                   \
2011         {                                                               \
2012           mbstate_t state_bak;                                          \
2013                                                                         \
2014           state_bak = STATE;                                            \
2015           MBLENGTH = mbrtowc (&WC, TEXT + i, LEN - i, &STATE);          \
2016                                                                         \
2017           if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1          \
2018               || MBLENGTH == 0)                                         \
2019             {                                                           \
2020               if (MBLENGTH == (size_t)-2 || MBLENGTH == (size_t)-1)     \
2021                 STATE = state_bak;                                      \
2022               if (!ignore)                                              \
2023                 COPY[NEW_LEN++] = TEXT[i++];                            \
2024               continue;                                                 \
2025             }                                                           \
2026                                                                         \
2027           if (ignore)                                                   \
2028             {                                                           \
2029               if ((ignore == nonprinting && !iswprint (WC))             \
2030                    || (ignore == nondictionary                          \
2031                        && !iswalnum (WC) && !iswblank (WC)))            \
2032                 {                                                       \
2033                   i += MBLENGTH;                                        \
2034                   continue;                                             \
2035                 }                                                       \
2036             }                                                           \
2037                                                                         \
2038           if (translate)                                                \
2039             {                                                           \
2040                                                                         \
2041               uwc = toupper(WC);                                        \
2042               if (WC == uwc)                                            \
2043                 {                                                       \
2044                   memcpy (mbc, TEXT + i, MBLENGTH);                     \
2045                   i += MBLENGTH;                                        \
2046                 }                                                       \
2047               else                                                      \
2048                 {                                                       \
2049                   i += MBLENGTH;                                        \
2050                   WC = uwc;                                             \
2051                   memset (&state_wc, '\0', sizeof (mbstate_t));         \
2052                                                                         \
2053                   MBLENGTH = wcrtomb (mbc, WC, &state_wc);              \
2054                   assert (MBLENGTH != (size_t)-1 && MBLENGTH != 0);     \
2055                 }                                                       \
2056                                                                         \
2057               for (j = 0; j < MBLENGTH; j++)                            \
2058                 COPY[NEW_LEN++] = mbc[j];                               \
2059             }                                                           \
2060           else                                                          \
2061             for (j = 0; j < MBLENGTH; j++)                              \
2062               COPY[NEW_LEN++] = TEXT[i++];                              \
2063         }                                                               \
2064       COPY[NEW_LEN] = '\0';                                             \
2065     }                                                                   \
2066   while (0)
2067               IGNORE_CHARS (new_len_a, lena, texta, copy_a,
2068                             wc_a, mblength_a, state_a);
2069               IGNORE_CHARS (new_len_b, lenb, textb, copy_b,
2070                             wc_b, mblength_b, state_b);
2071               diff = xmemcoll (copy_a, new_len_a, copy_b, new_len_b);
2072             }
2073           else if (lena == 0)
2074             diff = - NONZERO (lenb);
2075           else if (lenb == 0)
2076             goto greater;
2077           else
2078             diff = xmemcoll (texta, lena, textb, lenb);
2079         }
2080
2081       if (diff)
2082         goto not_equal;
2083
2084       key = key->next;
2085       if (! key)
2086         break;
2087
2088       /* Find the beginning and limit of the next field.  */
2089       if (key->eword != -1)
2090         lima = limfield (a, key), limb = limfield (b, key);
2091       else
2092         lima = a->text + a->length - 1, limb = b->text + b->length - 1;
2093
2094       if (key->sword != -1)
2095         texta = begfield (a, key), textb = begfield (b, key);
2096       else
2097         {
2098           texta = a->text, textb = b->text;
2099           if (key->skipsblanks)
2100             {
2101               while (texta < lima && ismbblank (texta, lima - texta, &mblength_a))
2102                 texta += mblength_a;
2103               while (textb < limb && ismbblank (textb, limb - textb, &mblength_b))
2104                 textb += mblength_b;
2105             }
2106         }
2107     }
2108
2109   return 0;
2110
2111 greater:
2112   diff = 1;
2113 not_equal:
2114   return key->reverse ? -diff : diff;
2115 }
2116 #endif
2117
2118 /* Compare two lines A and B, returning negative, zero, or positive
2119    depending on whether A compares less than, equal to, or greater than B. */
2120
2121 static int
2122 compare (register const struct line *a, register const struct line *b)
2123 {
2124   int diff;
2125   size_t alen, blen;
2126
2127   /* First try to compare on the specified keys (if any).
2128      The only two cases with no key at all are unadorned sort,
2129      and unadorned sort -r. */
2130   if (keylist)
2131     {
2132       diff = keycompare (a, b);
2133       alloca (0);
2134       if (diff | unique | stable)
2135         return diff;
2136     }
2137
2138   /* If the keys all compare equal (or no keys were specified)
2139      fall through to the default comparison.  */
2140   alen = a->length - 1, blen = b->length - 1;
2141
2142   if (alen == 0)
2143     diff = - NONZERO (blen);
2144   else if (blen == 0)
2145     diff = 1;
2146   else if (HAVE_SETLOCALE && hard_LC_COLLATE)
2147     diff = xmemcoll (a->text, alen, b->text, blen);
2148   else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen))))
2149     diff = alen < blen ? -1 : alen != blen;
2150
2151   return reverse ? -diff : diff;
2152 }
2153
2154 /* Check that the lines read from FILE_NAME come in order.  Print a
2155    diagnostic (FILE_NAME, line number, contents of line) to stderr and return
2156    false if they are not in order.  Otherwise, print no diagnostic
2157    and return true.  */
2158
2159 static bool
2160 check (char const *file_name)
2161 {
2162   FILE *fp = xfopen (file_name, "r");
2163   struct buffer buf;            /* Input buffer. */
2164   struct line temp;             /* Copy of previous line. */
2165   size_t alloc = 0;
2166   uintmax_t line_number = 0;
2167   struct keyfield const *key = keylist;
2168   bool nonunique = ! unique;
2169   bool ordered = true;
2170
2171   initbuf (&buf, sizeof (struct line),
2172            MAX (merge_buffer_size, sort_size));
2173   temp.text = NULL;
2174
2175   while (fillbuf (&buf, fp, file_name))
2176     {
2177       struct line const *line = buffer_linelim (&buf);
2178       struct line const *linebase = line - buf.nlines;
2179
2180       /* Make sure the line saved from the old buffer contents is
2181          less than or equal to the first line of the new buffer. */
2182       if (alloc && nonunique <= compare (&temp, line - 1))
2183         {
2184         found_disorder:
2185           {
2186             struct line const *disorder_line = line - 1;
2187             uintmax_t disorder_line_number =
2188               buffer_linelim (&buf) - disorder_line + line_number;
2189             char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)];
2190             fprintf (stderr, _("%s: %s:%s: disorder: "),
2191                      program_name, file_name,
2192                      umaxtostr (disorder_line_number, hr_buf));
2193             write_bytes (disorder_line->text, disorder_line->length, stderr,
2194                          _("standard error"));
2195             ordered = false;
2196             break;
2197           }
2198         }
2199
2200       /* Compare each line in the buffer with its successor.  */
2201       while (linebase < --line)
2202         if (nonunique <= compare (line, line - 1))
2203           goto found_disorder;
2204
2205       line_number += buf.nlines;
2206
2207       /* Save the last line of the buffer.  */
2208       if (alloc < line->length)
2209         {
2210           do
2211             {
2212               alloc *= 2;
2213               if (! alloc)
2214                 {
2215                   alloc = line->length;
2216                   break;
2217                 }
2218             }
2219           while (alloc < line->length);
2220
2221           temp.text = xrealloc (temp.text, alloc);
2222         }
2223       memcpy (temp.text, line->text, line->length);
2224       temp.length = line->length;
2225       if (key)
2226         {
2227           temp.keybeg = temp.text + (line->keybeg - line->text);
2228           temp.keylim = temp.text + (line->keylim - line->text);
2229         }
2230     }
2231
2232   xfclose (fp, file_name);
2233   free (buf.buf);
2234   if (temp.text)
2235     free (temp.text);
2236   return ordered;
2237 }
2238
2239 /* Merge lines from FILES onto OFP.  NFILES cannot be greater than
2240    NMERGE.  Close input and output files before returning.
2241    OUTPUT_FILE gives the name of the output file.  If it is NULL,
2242    the output file is standard output.  If OFP is NULL, the output
2243    file has not been opened yet (or written to, if standard output).  */
2244
2245 static void
2246 mergefps (char **files, register int nfiles,
2247           FILE *ofp, const char *output_file)
2248 {
2249   FILE *fps[NMERGE];            /* Input streams for each file.  */
2250   struct buffer buffer[NMERGE]; /* Input buffers for each file. */
2251   struct line saved;            /* Saved line storage for unique check. */
2252   struct line const *savedline = NULL;
2253                                 /* &saved if there is a saved line. */
2254   size_t savealloc = 0;         /* Size allocated for the saved line. */
2255   struct line const *cur[NMERGE]; /* Current line in each line table. */
2256   struct line const *base[NMERGE]; /* Base of each line table.  */
2257   int ord[NMERGE];              /* Table representing a permutation of fps,
2258                                    such that cur[ord[0]] is the smallest line
2259                                    and will be next output. */
2260   register int i, j, t;
2261   struct keyfield const *key = keylist;
2262   saved.text = NULL;
2263
2264   /* Read initial lines from each input file. */
2265   for (i = 0; i < nfiles; )
2266     {
2267       fps[i] = xfopen (files[i], "r");
2268       initbuf (&buffer[i], sizeof (struct line),
2269                MAX (merge_buffer_size, sort_size / nfiles));
2270       if (fillbuf (&buffer[i], fps[i], files[i]))
2271         {
2272           struct line const *linelim = buffer_linelim (&buffer[i]);
2273           cur[i] = linelim - 1;
2274           base[i] = linelim - buffer[i].nlines;
2275           i++;
2276         }
2277       else
2278         {
2279           /* fps[i] is empty; eliminate it from future consideration.  */
2280           xfclose (fps[i], files[i]);
2281           zaptemp (files[i]);
2282           free (buffer[i].buf);
2283           --nfiles;
2284           for (j = i; j < nfiles; ++j)
2285             files[j] = files[j + 1];
2286         }
2287     }
2288
2289   if (! ofp)
2290     ofp = xfopen (output_file, "w");
2291
2292   /* Set up the ord table according to comparisons among input lines.
2293      Since this only reorders two items if one is strictly greater than
2294      the other, it is stable. */
2295   for (i = 0; i < nfiles; ++i)
2296     ord[i] = i;
2297   for (i = 1; i < nfiles; ++i)
2298     if (0 < compare (cur[ord[i - 1]], cur[ord[i]]))
2299       t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
2300
2301   /* Repeatedly output the smallest line until no input remains. */
2302   while (nfiles)
2303     {
2304       struct line const *smallest = cur[ord[0]];
2305
2306       /* If uniquified output is turned on, output only the first of
2307          an identical series of lines. */
2308       if (unique)
2309         {
2310           if (savedline && compare (savedline, smallest))
2311             {
2312               savedline = 0;
2313               write_bytes (saved.text, saved.length, ofp, output_file);
2314             }
2315           if (!savedline)
2316             {
2317               savedline = &saved;
2318               if (savealloc < smallest->length)
2319                 {
2320                   do
2321                     if (! savealloc)
2322                       {
2323                         savealloc = smallest->length;
2324                         break;
2325                       }
2326                   while ((savealloc *= 2) < smallest->length);
2327
2328                   saved.text = xrealloc (saved.text, savealloc);
2329                 }
2330               saved.length = smallest->length;
2331               memcpy (saved.text, smallest->text, saved.length);
2332               if (key)
2333                 {
2334                   saved.keybeg =
2335                     saved.text + (smallest->keybeg - smallest->text);
2336                   saved.keylim =
2337                     saved.text + (smallest->keylim - smallest->text);
2338                 }
2339             }
2340         }
2341       else
2342         write_bytes (smallest->text, smallest->length, ofp, output_file);
2343
2344       /* Check if we need to read more lines into core. */
2345       if (base[ord[0]] < smallest)
2346         cur[ord[0]] = smallest - 1;
2347       else
2348         {
2349           if (fillbuf (&buffer[ord[0]], fps[ord[0]], files[ord[0]]))
2350             {
2351               struct line const *linelim = buffer_linelim (&buffer[ord[0]]);
2352               cur[ord[0]] = linelim - 1;
2353               base[ord[0]] = linelim - buffer[ord[0]].nlines;
2354             }
2355           else
2356             {
2357               /* We reached EOF on fps[ord[0]]. */
2358               for (i = 1; i < nfiles; ++i)
2359                 if (ord[i] > ord[0])
2360                   --ord[i];
2361               --nfiles;
2362               xfclose (fps[ord[0]], files[ord[0]]);
2363               zaptemp (files[ord[0]]);
2364               free (buffer[ord[0]].buf);
2365               for (i = ord[0]; i < nfiles; ++i)
2366                 {
2367                   fps[i] = fps[i + 1];
2368                   files[i] = files[i + 1];
2369                   buffer[i] = buffer[i + 1];
2370                   cur[i] = cur[i + 1];
2371                   base[i] = base[i + 1];
2372                 }
2373               for (i = 0; i < nfiles; ++i)
2374                 ord[i] = ord[i + 1];
2375               continue;
2376             }
2377         }
2378
2379       /* The new line just read in may be larger than other lines
2380          already in core; push it back in the queue until we encounter
2381          a line larger than it. */
2382       for (i = 1; i < nfiles; ++i)
2383         {
2384           t = compare (cur[ord[0]], cur[ord[i]]);
2385           if (!t)
2386             t = ord[0] - ord[i];
2387           if (t < 0)
2388             break;
2389         }
2390       t = ord[0];
2391       for (j = 1; j < i; ++j)
2392         ord[j - 1] = ord[j];
2393       ord[i - 1] = t;
2394     }
2395
2396   if (unique && savedline)
2397     {
2398       write_bytes (saved.text, saved.length, ofp, output_file);
2399       free (saved.text);
2400     }
2401
2402   xfclose (ofp, output_file);
2403 }
2404
2405 /* Merge into T the two sorted arrays of lines LO (with NLO members)
2406    and HI (with NHI members).  T, LO, and HI point just past their
2407    respective arrays, and the arrays are in reverse order.  NLO and
2408    NHI must be positive, and HI - NHI must equal T - (NLO + NHI).  */
2409
2410 static inline void
2411 mergelines (struct line *t,
2412             struct line const *lo, size_t nlo,
2413             struct line const *hi, size_t nhi)
2414 {
2415   for (;;)
2416     if (compare (lo - 1, hi - 1) <= 0)
2417       {
2418         *--t = *--lo;
2419         if (! --nlo)
2420           {
2421             /* HI - NHI equalled T - (NLO + NHI) when this function
2422                began.  Therefore HI must equal T now, and there is no
2423                need to copy from HI to T.  */
2424             return;
2425           }
2426       }
2427     else
2428       {
2429         *--t = *--hi;
2430         if (! --nhi)
2431           {
2432             do
2433               *--t = *--lo;
2434             while (--nlo);
2435
2436             return;
2437           }
2438       }
2439 }
2440
2441 /* Sort the array LINES with NLINES members, using TEMP for temporary space.
2442    NLINES must be at least 2.
2443    The input and output arrays are in reverse order, and LINES and
2444    TEMP point just past the end of their respective arrays.
2445
2446    Use a recursive divide-and-conquer algorithm, in the style
2447    suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23.  Use
2448    the optimization suggested by exercise 5.2.4-10; this requires room
2449    for only 1.5*N lines, rather than the usual 2*N lines.  Knuth
2450    writes that this memory optimization was originally published by
2451    D. A. Bell, Comp J. 1 (1958), 75.  */
2452
2453 static void
2454 sortlines (struct line *lines, size_t nlines, struct line *temp)
2455 {
2456   if (nlines == 2)
2457     {
2458       if (0 < compare (&lines[-1], &lines[-2]))
2459         {
2460           struct line tmp = lines[-1];
2461           lines[-1] = lines[-2];
2462           lines[-2] = tmp;
2463         }
2464     }
2465   else
2466     {
2467       size_t nlo = nlines / 2;
2468       size_t nhi = nlines - nlo;
2469       struct line *lo = lines;
2470       struct line *hi = lines - nlo;
2471       struct line *sorted_lo = temp;
2472
2473       sortlines (hi, nhi, temp);
2474       if (1 < nlo)
2475         sortlines_temp (lo, nlo, sorted_lo);
2476       else
2477         sorted_lo[-1] = lo[-1];
2478
2479       mergelines (lines, sorted_lo, nlo, hi, nhi);
2480     }
2481 }
2482
2483 /* Like sortlines (LINES, NLINES, TEMP), except output into TEMP
2484    rather than sorting in place.  */
2485
2486 static void
2487 sortlines_temp (struct line *lines, size_t nlines, struct line *temp)
2488 {
2489   if (nlines == 2)
2490     {
2491       bool swap = (0 < compare (&lines[-1], &lines[-2]));
2492       temp[-1] = lines[-1 - swap];
2493       temp[-2] = lines[-2 + swap];
2494     }
2495   else
2496     {
2497       size_t nlo = nlines / 2;
2498       size_t nhi = nlines - nlo;
2499       struct line *lo = lines;
2500       struct line *hi = lines - nlo;
2501       struct line *sorted_hi = temp - nlo;
2502
2503       sortlines_temp (hi, nhi, sorted_hi);
2504       if (1 < nlo)
2505         sortlines (lo, nlo, temp);
2506
2507       mergelines (temp, lo, nlo, sorted_hi, nhi);
2508     }
2509 }
2510
2511 /* Return the index of the first of NFILES FILES that is the same file
2512    as OUTFILE.  If none can be the same, return NFILES.
2513
2514    This test ensures that an otherwise-erroneous use like
2515    "sort -m -o FILE ... FILE ..." copies FILE before writing to it.
2516    It's not clear that POSIX requires this nicety.
2517    Detect common error cases, but don't try to catch obscure cases like
2518    "cat ... FILE ... | sort -m -o FILE"
2519    where traditional "sort" doesn't copy the input and where
2520    people should know that they're getting into trouble anyway.
2521    Catching these obscure cases would slow down performance in
2522    common cases.  */
2523
2524 static int
2525 first_same_file (char * const *files, int nfiles, char const *outfile)
2526 {
2527   int i;
2528   bool got_outstat = false;
2529   struct stat instat, outstat;
2530
2531   for (i = 0; i < nfiles; i++)
2532     {
2533       bool standard_input = STREQ (files[i], "-");
2534
2535       if (outfile && STREQ (outfile, files[i]) && ! standard_input)
2536         return i;
2537
2538       if (! got_outstat)
2539         {
2540           got_outstat = true;
2541           if ((outfile
2542                ? stat (outfile, &outstat)
2543                : fstat (STDOUT_FILENO, &outstat))
2544               != 0)
2545             return nfiles;
2546         }
2547
2548       if (((standard_input
2549             ? fstat (STDIN_FILENO, &instat)
2550             : stat (files[i], &instat))
2551            == 0)
2552           && SAME_INODE (instat, outstat))
2553         return i;
2554     }
2555
2556   return nfiles;
2557 }
2558
2559 /* Merge NFILES FILES onto OUTPUT_FILE.  However, merge at most
2560    MAX_MERGE input files directly onto OUTPUT_FILE.  MAX_MERGE cannot
2561    exceed NMERGE.  A null OUTPUT_FILE stands for standard output.  */
2562
2563 static void
2564 merge (char **files, int nfiles, int max_merge, char const *output_file)
2565 {
2566   while (max_merge < nfiles)
2567     {
2568       FILE *tfp;
2569       int i, t = 0;
2570       char *temp;
2571       for (i = 0; i < nfiles / NMERGE; ++i)
2572         {
2573           temp = create_temp_file (&tfp);
2574           mergefps (&files[i * NMERGE], NMERGE, tfp, temp);
2575           files[t++] = temp;
2576         }
2577       temp = create_temp_file (&tfp);
2578       mergefps (&files[i * NMERGE], nfiles % NMERGE, tfp, temp);
2579       files[t++] = temp;
2580       nfiles = t;
2581       if (nfiles == 1)
2582         break;
2583     }
2584
2585   mergefps (files, nfiles, NULL, output_file);
2586 }
2587
2588 /* Sort NFILES FILES onto OUTPUT_FILE. */
2589
2590 static void
2591 sort (char * const *files, int nfiles, char const *output_file)
2592 {
2593   struct buffer buf;
2594   int n_temp_files = 0;
2595   bool output_file_created = false;
2596
2597   buf.alloc = 0;
2598
2599   while (nfiles)
2600     {
2601       char const *temp_output;
2602       char const *file = *files;
2603       FILE *fp = xfopen (file, "r");
2604       FILE *tfp;
2605       size_t bytes_per_line = (2 * sizeof (struct line)
2606                                - sizeof (struct line) / 2);
2607
2608       if (! buf.alloc)
2609         initbuf (&buf, bytes_per_line,
2610                  sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line));
2611       buf.eof = false;
2612       files++;
2613       nfiles--;
2614
2615       while (fillbuf (&buf, fp, file))
2616         {
2617           struct line *line;
2618           struct line *linebase;
2619
2620           if (buf.eof && nfiles
2621               && (bytes_per_line + 1
2622                   < (buf.alloc - buf.used - bytes_per_line * buf.nlines)))
2623             {
2624               /* End of file, but there is more input and buffer room.
2625                  Concatenate the next input file; this is faster in
2626                  the usual case.  */
2627               buf.left = buf.used;
2628               break;
2629             }
2630
2631           line = buffer_linelim (&buf);
2632           linebase = line - buf.nlines;
2633           if (1 < buf.nlines)
2634             sortlines (line, buf.nlines, linebase);
2635           if (buf.eof && !nfiles && !n_temp_files && !buf.left)
2636             {
2637               xfclose (fp, file);
2638               tfp = xfopen (output_file, "w");
2639               temp_output = output_file;
2640               output_file_created = true;
2641             }
2642           else
2643             {
2644               ++n_temp_files;
2645               temp_output = create_temp_file (&tfp);
2646             }
2647
2648           do
2649             {
2650               line--;
2651               write_bytes (line->text, line->length, tfp, temp_output);
2652               if (unique)
2653                 while (linebase < line && compare (line, line - 1) == 0)
2654                   line--;
2655             }
2656           while (linebase < line);
2657
2658           xfclose (tfp, temp_output);
2659
2660           if (output_file_created)
2661             goto finish;
2662         }
2663       xfclose (fp, file);
2664     }
2665
2666  finish:
2667   free (buf.buf);
2668
2669   if (! output_file_created)
2670     {
2671       int i = n_temp_files;
2672       struct tempnode *node;
2673       char **tempfiles = xnmalloc (n_temp_files, sizeof *tempfiles);
2674       for (node = temphead; i > 0; node = node->next)
2675         tempfiles[--i] = node->name;
2676       merge (tempfiles, n_temp_files, NMERGE, output_file);
2677       free (tempfiles);
2678     }
2679 }
2680
2681 /* Insert key KEY at the end of the key list.  */
2682
2683 static void
2684 insertkey (struct keyfield *key)
2685 {
2686   struct keyfield **p;
2687
2688   for (p = &keylist; *p; p = &(*p)->next)
2689     continue;
2690   *p = key;
2691   key->next = NULL;
2692 }
2693
2694 /* Report a bad field specification SPEC, with extra info MSGID.  */
2695
2696 static void badfieldspec (char const *, char const *)
2697      ATTRIBUTE_NORETURN;
2698 static void
2699 badfieldspec (char const *spec, char const *msgid)
2700 {
2701   error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
2702          _(msgid), spec);
2703   abort ();
2704 }
2705
2706 /* Parse the leading integer in STRING and store the resulting value
2707    (which must fit into size_t) into *VAL.  Return the address of the
2708    suffix after the integer.  If MSGID is NULL, return NULL after
2709    failure; otherwise, report MSGID and exit on failure.  */
2710
2711 static char const *
2712 parse_field_count (char const *string, size_t *val, char const *msgid)
2713 {
2714   char *suffix;
2715   uintmax_t n;
2716
2717   switch (xstrtoumax (string, &suffix, 10, &n, ""))
2718     {
2719     case LONGINT_OK:
2720     case LONGINT_INVALID_SUFFIX_CHAR:
2721       *val = n;
2722       if (*val == n)
2723         break;
2724       /* Fall through.  */
2725     case LONGINT_OVERFLOW:
2726     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
2727       if (msgid)
2728         error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
2729                _(msgid), (int) (suffix - string), string);
2730       return NULL;
2731
2732     case LONGINT_INVALID:
2733       if (msgid)
2734         error (SORT_FAILURE, 0, _("%s: invalid count at start of `%s'"),
2735                _(msgid), string);
2736       return NULL;
2737     }
2738
2739   return suffix;
2740 }
2741
2742 /* Handle interrupts and hangups. */
2743
2744 static void
2745 sighandler (int sig)
2746 {
2747 #ifndef SA_NOCLDSTOP
2748   signal (sig, SIG_IGN);
2749 #endif
2750
2751   cleanup ();
2752
2753   signal (sig, SIG_DFL);
2754   raise (sig);
2755 }
2756
2757 /* Set the ordering options for KEY specified in S.
2758    Return the address of the first character in S that
2759    is not a valid ordering option.
2760    BLANKTYPE is the kind of blanks that 'b' should skip. */
2761
2762 static char *
2763 set_ordering (register const char *s, struct keyfield *key,
2764               enum blanktype blanktype)
2765 {
2766   while (*s)
2767     {
2768       switch (*s)
2769         {
2770         case 'b':
2771           if (blanktype == bl_start || blanktype == bl_both)
2772             key->skipsblanks = true;
2773           if (blanktype == bl_end || blanktype == bl_both)
2774             key->skipeblanks = true;
2775           break;
2776         case 'd':
2777           key->ignore = nondictionary;
2778           break;
2779         case 'f':
2780           key->translate = fold_toupper;
2781           break;
2782         case 'g':
2783           key->general_numeric = true;
2784           break;
2785         case 'i':
2786           /* Option order should not matter, so don't let -i override
2787              -d.  -d implies -i, but -i does not imply -d.  */
2788           if (! key->ignore)
2789             key->ignore = nonprinting;
2790           break;
2791         case 'M':
2792           key->month = true;
2793           break;
2794         case 'n':
2795           key->numeric = true;
2796           break;
2797         case 'r':
2798           key->reverse = true;
2799           break;
2800         default:
2801           return (char *) s;
2802         }
2803       ++s;
2804     }
2805   return (char *) s;
2806 }
2807
2808 static struct keyfield *
2809 new_key (void)
2810 {
2811   struct keyfield *key = xzalloc (sizeof *key);
2812   key->eword = SIZE_MAX;
2813   return key;
2814 }
2815
2816 int
2817 main (int argc, char **argv)
2818 {
2819   struct keyfield *key;
2820   struct keyfield gkey;
2821   char const *s;
2822   int c = 0;
2823   bool checkonly = false;
2824   bool mergeonly = false;
2825   int nfiles = 0;
2826   bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
2827   bool obsolete_usage = (posix2_version () < 200112);
2828   char const *short_options = (obsolete_usage
2829                                ? COMMON_SHORT_OPTIONS "y::"
2830                                : COMMON_SHORT_OPTIONS "y:");
2831   char *minus = "-", **files;
2832   char const *outfile = NULL;
2833
2834   initialize_main (&argc, &argv);
2835   program_name = argv[0];
2836   setlocale (LC_ALL, "");
2837   bindtextdomain (PACKAGE, LOCALEDIR);
2838   textdomain (PACKAGE);
2839
2840   atexit (cleanup);
2841
2842   initialize_exit_failure (SORT_FAILURE);
2843   atexit (close_stdout);
2844
2845   hard_LC_COLLATE = hard_locale (LC_COLLATE);
2846 #if HAVE_NL_LANGINFO
2847   hard_LC_TIME = hard_locale (LC_TIME);
2848 #endif
2849
2850 #if HAVE_SETLOCALE
2851   /* Let's get locale's representation of the decimal point */
2852   {
2853     struct lconv const *lconvp = localeconv ();
2854
2855     decimal_point = *lconvp->decimal_point;
2856     if (! decimal_point || lconvp->decimal_point[1])
2857       {
2858         decimal_point = C_DECIMAL_POINT;
2859         if (lconvp->decimal_point[0] && lconvp->decimal_point[1])
2860           force_general_numcompare = 1;
2861       }
2862
2863     /* We don't support multibyte thousands separators yet.  */
2864     th_sep = *lconvp->thousands_sep;
2865     if (! th_sep || lconvp->thousands_sep[1])
2866       {
2867         th_sep = CHAR_MAX + 1;
2868         if (lconvp->thousands_sep[0] && lconvp->thousands_sep[1])
2869           force_general_numcompare = 1;
2870       }
2871   }
2872 #endif
2873
2874 #if HAVE_MBRTOWC
2875   if (MB_CUR_MAX > 1)
2876     {
2877       inittables = inittables_mb;
2878       begfield = begfield_mb;
2879       limfield = limfield_mb;
2880       getmonth = getmonth_mb;
2881       keycompare = keycompare_mb;
2882     }
2883   else
2884 #endif
2885     {
2886       inittables = inittables_uni;
2887       begfield = begfield_uni;
2888       limfield = limfield_uni;
2889       keycompare = keycompare_uni;
2890       getmonth = getmonth_uni;
2891     }
2892
2893   have_read_stdin = false;
2894   inittables ();
2895
2896   {
2897     int i;
2898     static int const sig[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM };
2899     enum { nsigs = sizeof sig / sizeof sig[0] };
2900
2901 #ifdef SA_NOCLDSTOP
2902     struct sigaction act;
2903
2904     sigemptyset (&caught_signals);
2905     for (i = 0; i < nsigs; i++)
2906       {
2907         sigaction (sig[i], NULL, &act);
2908         if (act.sa_handler != SIG_IGN)
2909           sigaddset (&caught_signals, sig[i]);
2910       }
2911
2912     act.sa_handler = sighandler;
2913     act.sa_mask = caught_signals;
2914     act.sa_flags = 0;
2915
2916     for (i = 0; i < nsigs; i++)
2917       if (sigismember (&caught_signals, sig[i]))
2918         sigaction (sig[i], &act, NULL);
2919 #else
2920     for (i = 0; i < nsigs; i++)
2921       if (signal (sig[i], SIG_IGN) != SIG_IGN)
2922         signal (sig[i], sighandler);
2923 #endif
2924   }
2925
2926   gkey.sword = gkey.eword = SIZE_MAX;
2927   gkey.ignore = NULL;
2928   gkey.translate = NULL;
2929   gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
2930   gkey.skipsblanks = gkey.skipeblanks = false;
2931
2932   files = xnmalloc (argc, sizeof *files);
2933
2934   for (;;)
2935     {
2936       /* Parse an operand as a file after "--" was seen; or if
2937          pedantic and a file was seen, unless the POSIX version
2938          predates 1003.1-2001 and -c was not seen and the operand is
2939          "-o FILE" or "-oFILE".  */
2940
2941       if (c == -1
2942           || (posixly_correct && nfiles != 0
2943               && ! (obsolete_usage
2944                     && ! checkonly
2945                     && optind != argc
2946                     && argv[optind][0] == '-' && argv[optind][1] == 'o'
2947                     && (argv[optind][2] || optind + 1 != argc)))
2948           || ((c = getopt_long (argc, argv, short_options,
2949                                 long_options, NULL))
2950               == -1))
2951         {
2952           if (argc <= optind)
2953             break;
2954           files[nfiles++] = argv[optind++];
2955         }
2956       else switch (c)
2957         {
2958         case 1:
2959           key = NULL;
2960           if (obsolete_usage && optarg[0] == '+')
2961             {
2962               /* Treat +POS1 [-POS2] as a key if possible; but silently
2963                  treat an operand as a file if it is not a valid +POS1.  */
2964               key = new_key ();
2965               s = parse_field_count (optarg + 1, &key->sword, NULL);
2966               if (s && *s == '.')
2967                 s = parse_field_count (s + 1, &key->schar, NULL);
2968               if (! (key->sword | key->schar))
2969                 key->sword = SIZE_MAX;
2970               if (! s || *set_ordering (s, key, bl_start))
2971                 {
2972                   free (key);
2973                   key = NULL;
2974                 }
2975               else
2976                 {
2977                   if (optind != argc && argv[optind][0] == '-'
2978                       && ISDIGIT (argv[optind][1]))
2979                     {
2980                       char const *optarg1 = argv[optind++];
2981                       s = parse_field_count (optarg1 + 1, &key->eword,
2982                                              N_("invalid number after `-'"));
2983                       if (*s == '.')
2984                         s = parse_field_count (s + 1, &key->echar,
2985                                                N_("invalid number after `.'"));
2986                       if (*set_ordering (s, key, bl_end))
2987                         badfieldspec (optarg1,
2988                                       N_("stray character in field spec"));
2989                     }
2990                   insertkey (key);
2991                 }
2992             }
2993           if (! key)
2994             files[nfiles++] = optarg;
2995           break;
2996
2997         case 'b':
2998         case 'd':
2999         case 'f':
3000         case 'g':
3001         case 'i':
3002         case 'M':
3003         case 'n':
3004         case 'r':
3005           {
3006             char str[2];
3007             str[0] = c;
3008             str[1] = '\0';
3009             set_ordering (str, &gkey, bl_both);
3010           }
3011           break;
3012
3013         case 'c':
3014           checkonly = true;
3015           break;
3016
3017         case 'k':
3018           key = new_key ();
3019
3020           /* Get POS1. */
3021           s = parse_field_count (optarg, &key->sword,
3022                                  N_("invalid number at field start"));
3023           if (! key->sword--)
3024             {
3025               /* Provoke with `sort -k0' */
3026               badfieldspec (optarg, N_("field number is zero"));
3027             }
3028           if (*s == '.')
3029             {
3030               s = parse_field_count (s + 1, &key->schar,
3031                                      N_("invalid number after `.'"));
3032               if (! key->schar--)
3033                 {
3034                   /* Provoke with `sort -k1.0' */
3035                   badfieldspec (optarg, N_("character offset is zero"));
3036                 }
3037             }
3038           if (! (key->sword | key->schar))
3039             key->sword = SIZE_MAX;
3040           s = set_ordering (s, key, bl_start);
3041           if (*s != ',')
3042             {
3043               key->eword = SIZE_MAX;
3044               key->echar = 0;
3045             }
3046           else
3047             {
3048               /* Get POS2. */
3049               s = parse_field_count (s + 1, &key->eword,
3050                                      N_("invalid number after `,'"));
3051               if (! key->eword--)
3052                 {
3053                   /* Provoke with `sort -k1,0' */
3054                   badfieldspec (optarg, N_("field number is zero"));
3055                 }
3056               if (*s == '.')
3057                 s = parse_field_count (s + 1, &key->echar,
3058                                        N_("invalid number after `.'"));
3059               else
3060                 {
3061                   /* `-k 2,3' is equivalent to `+1 -3'.  */
3062                   key->eword++;
3063                 }
3064               s = set_ordering (s, key, bl_end);
3065             }
3066           if (*s)
3067             badfieldspec (optarg, N_("stray character in field spec"));
3068           insertkey (key);
3069           break;
3070
3071         case 'm':
3072           mergeonly = true;
3073           break;
3074
3075         case 'o':
3076           if (outfile && !STREQ (outfile, optarg))
3077             error (SORT_FAILURE, 0, _("multiple output files specified"));
3078           outfile = optarg;
3079           break;
3080
3081         case 's':
3082           stable = true;
3083           break;
3084
3085         case 'S':
3086           specify_sort_size (optarg);
3087           break;
3088
3089         case 't':
3090           {
3091             char newtab[MB_LEN_MAX + 1];
3092             size_t newtab_length = 1;
3093             strncpy (newtab, optarg, MB_LEN_MAX);
3094             if (! newtab[0])
3095               error (SORT_FAILURE, 0, _("empty tab"));
3096 #if HAVE_MBRTOWC
3097             if (MB_CUR_MAX > 1)
3098               {
3099                 wchar_t wc;
3100                 mbstate_t state;
3101                 size_t i;
3102
3103                 memset (&state, '\0', sizeof (mbstate_t));
3104                 newtab_length = mbrtowc (&wc, newtab, strnlen (newtab, MB_LEN_MAX), &state);
3105                 switch (newtab_length)
3106                   {
3107                   case (size_t) -1:
3108                   case (size_t) -2:
3109                   case 0:
3110                     newtab_length = 1;
3111                   }
3112
3113                 if (optarg[newtab_length])
3114                   {
3115                     /* Provoke with `sort -txx'.  Complain about
3116                        "multi-character tab" instead of "multibyte tab", so
3117                        that the diagnostic's wording does not need to be
3118                        changed once multibyte characters are supported.  */
3119                     error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
3120                            optarg);
3121                   }
3122               }
3123             else
3124 #endif
3125
3126             if (optarg[1])
3127               {
3128                 if (STREQ (optarg, "\\0"))
3129                   newtab[0] = '\0';
3130                 else
3131                   {
3132                     /* Provoke with `sort -txx'.  Complain about
3133                        "multi-character tab" instead of "multibyte tab", so
3134                        that the diagnostic's wording does not need to be
3135                        changed once multibyte characters are supported.  */
3136                     error (SORT_FAILURE, 0, _("multi-character tab `%s'"),
3137                            optarg);
3138                   }
3139               }
3140             if (!tab_default && (tab_length != newtab_length
3141                 || memcmp(tab, newtab, tab_length) != 0))
3142               error (SORT_FAILURE, 0, _("incompatible tabs"));
3143             memcpy(tab, newtab, newtab_length);
3144             tab_length = newtab_length;
3145             tab_default = false;
3146           }
3147           break;
3148
3149         case 'T':
3150           add_temp_dir (optarg);
3151           break;
3152
3153         case 'u':
3154           unique = true;
3155           break;
3156
3157         case 'y':
3158           /* Accept and ignore e.g. -y0 for compatibility with Solaris
3159              2.x through Solaris 7.  -y is marked as obsolete starting
3160              with Solaris 8.  */
3161           break;
3162
3163         case 'z':
3164           eolchar = 0;
3165           break;
3166
3167         case_GETOPT_HELP_CHAR;
3168
3169         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
3170
3171         default:
3172           usage (SORT_FAILURE);
3173         }
3174     }
3175
3176   /* Inheritance of global options to individual keys. */
3177   for (key = keylist; key; key = key->next)
3178     if (! (key->ignore || key->translate
3179            || (key->skipsblanks | key->reverse
3180                | key->skipeblanks | key->month | key->numeric
3181                | key->general_numeric)))
3182       {
3183         key->ignore = gkey.ignore;
3184         key->translate = gkey.translate;
3185         key->skipsblanks = gkey.skipsblanks;
3186         key->skipeblanks = gkey.skipeblanks;
3187         key->month = gkey.month;
3188         key->numeric = gkey.numeric;
3189         key->general_numeric = gkey.general_numeric;
3190         key->reverse = gkey.reverse;
3191       }
3192
3193   if (!keylist && (gkey.ignore || gkey.translate
3194                    || (gkey.skipsblanks | gkey.skipeblanks | gkey.month
3195                        | gkey.numeric | gkey.general_numeric)))
3196     insertkey (&gkey);
3197   reverse = gkey.reverse;
3198
3199   if (temp_dir_count == 0)
3200     {
3201       char const *tmp_dir = getenv ("TMPDIR");
3202       add_temp_dir (tmp_dir ? tmp_dir : DEFAULT_TMPDIR);
3203     }
3204
3205   if (nfiles == 0)
3206     {
3207       nfiles = 1;
3208       files = &minus;
3209     }
3210
3211   if (checkonly)
3212     {
3213       if (nfiles > 1)
3214         {
3215           error (0, 0, _("extra operand %s not allowed with -c"),
3216                  quote (files[1]));
3217           usage (SORT_FAILURE);
3218         }
3219
3220       /* POSIX requires that sort return 1 IFF invoked with -c and the
3221          input is not properly sorted.  */
3222       exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER);
3223     }
3224
3225   if (mergeonly)
3226     {
3227       int max_merge = first_same_file (files, MIN (nfiles, NMERGE), outfile);
3228       merge (files, nfiles, max_merge, outfile);
3229     }
3230   else
3231     sort (files, nfiles, outfile);
3232
3233   if (have_read_stdin && fclose (stdin) == EOF)
3234     die (_("close failed"), "-");
3235
3236   exit (EXIT_SUCCESS);
3237 }