1 /* sort - sort lines of text (with all kinds of options).
2 Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
18 Written December 1988 by Mike Haertel.
19 The author may be reached (Email) at the address mike@gnu.ai.mit.edu,
20 or (US mail) as Mike Haertel c/o Free Software Foundation. */
24 /* Get isblank from GNU libc. */
27 #include <sys/types.h>
35 #include "long-options.h"
52 /* Undefine, to avoid warning about redefinition on some systems. */
54 #define min(a, b) ((a) < (b) ? (a) : (b))
56 #define UCHAR_LIM (UCHAR_MAX + 1)
57 #define UCHAR(c) ((unsigned char) (c))
59 #ifndef DEFAULT_TMPDIR
60 #define DEFAULT_TMPDIR "/tmp"
63 /* The kind of blanks for '-b' to skip in various options. */
64 enum blanktype { bl_start, bl_end, bl_both };
66 /* Lines are held in core as counted strings. */
69 char *text; /* Text of the line. */
70 int length; /* Length not including final newline. */
71 char *keybeg; /* Start of first key. */
72 char *keylim; /* Limit of first key. */
75 /* Arrays of lines. */
78 struct line *lines; /* Dynamically allocated array of lines. */
79 int used; /* Number of slots used. */
80 int alloc; /* Number of slots allocated. */
81 int limit; /* Max number of slots to allocate. */
87 char *buf; /* Dynamically allocated buffer. */
88 int used; /* Number of bytes used. */
89 int alloc; /* Number of bytes allocated. */
90 int left; /* Number of bytes left after line parsing. */
95 int sword; /* Zero-origin 'word' to start at. */
96 int schar; /* Additional characters to skip. */
97 int skipsblanks; /* Skip leading white space at start. */
98 int eword; /* Zero-origin first word after field. */
99 int echar; /* Additional characters in field. */
100 int skipeblanks; /* Skip trailing white space at finish. */
101 int *ignore; /* Boolean array of characters to ignore. */
102 char *translate; /* Translation applied to characters. */
103 int numeric; /* Flag for numeric comparison. Handle
104 strings of digits with optional decimal
105 point, but no exponential notation. */
106 int general_numeric; /* Flag for general, numeric comparison.
107 Handle numbers in exponential notation. */
108 int month; /* Flag for comparison by month name. */
109 int reverse; /* Reverse the sense of comparison. */
110 struct keyfield *next; /* Next keyfield to try. */
119 /* The name this program was run with. */
122 /* Table of digits. */
123 static int digits[UCHAR_LIM];
125 /* Table of white space. */
126 static int blanks[UCHAR_LIM];
128 /* Table of non-printing characters. */
129 static int nonprinting[UCHAR_LIM];
131 /* Table of non-dictionary characters (not letters, digits, or blanks). */
132 static int nondictionary[UCHAR_LIM];
134 /* Translation table folding lower case to upper. */
135 static char fold_toupper[UCHAR_LIM];
137 /* Table mapping 3-letter month names to integers.
138 Alphabetic order allows binary search. */
139 static struct month const monthtab[] =
155 /* During the merge phase, the number of files to merge at once. */
158 /* Initial buffer size for in core sorting. Will not grow unless a
159 line longer than this is seen. */
160 static int sortalloc = 512 * 1024;
162 /* Initial buffer size for in core merge buffers. Bear in mind that
163 up to NMERGE * mergealloc bytes may be allocated for merge buffers. */
164 static int mergealloc = 16 * 1024;
166 /* Guess of average line length. */
167 static int linelength = 30;
169 /* Maximum number of elements for the array(s) of struct line's, in bytes. */
170 #define LINEALLOC (256 * 1024)
172 /* Prefix for temporary file names. */
173 static char *temp_file_prefix;
175 /* Flag to reverse the order of all comparisons. */
178 /* Flag for stable sort. This turns off the last ditch bytewise
179 comparison of lines, and instead leaves lines in the same order
180 they were read if all keys compare equal. */
183 /* Tab character separating fields. If NUL, then fields are separated
184 by the empty string between a non-whitespace character and a whitespace
188 /* Flag to remove consecutive duplicate lines from the output.
189 Only the last of a sequence of equal lines will be output. */
192 /* Nonzero if any of the input files are the standard input. */
193 static int have_read_stdin;
195 /* Lists of key field comparisons to be tried. */
196 static struct keyfield keyhead;
200 COLLDIFF (int a, int b)
204 if ((unsigned char)a == (unsigned char)b)
208 return strcoll(s[0], s[1]);
212 collcmp(char *a, char *b, int mini)
226 #endif /* __FreeBSD__ */
232 fprintf (stderr, _("Try `%s --help' for more information.\n"),
237 Usage: %s [OPTION]... [FILE]...\n\
241 Write sorted concatenation of all FILE(s) to standard output.\n\
243 +POS1 [-POS2] start a key at POS1, end it before POS2\n\
244 -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\
245 -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\
246 -b ignore leading blanks in sort fields or keys\n\
247 -c check if given files already sorted, do not sort\n\
248 -d consider only [a-zA-Z0-9 ] characters in keys\n\
249 -f fold lower case to upper case characters in keys\n\
250 -g compare according to general numerical value, imply -b\n\
251 -i consider only [\\040-\\0176] characters in keys\n\
252 -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\
253 -m merge already sorted files, do not sort\n\
254 -n compare according to string numerical value, imply -b\n\
255 -o FILE write result on FILE instead of standard output\n\
256 -r reverse the result of comparisons\n\
257 -s stabilize sort by disabling last resort comparison\n\
258 -t SEP use SEParator instead of non- to whitespace transition\n\
259 -u with -c, check for strict ordering\n\
260 -u with -m, only output the first of an equal sequence\n\
261 --help display this help and exit\n\
262 --version output version information and exit\n\
264 POS is F[.C][OPTS], where F is the field number and C the character\n\
265 position in the field, both counted from zero. OPTS is made up of one\n\
266 or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\
267 for that key. If no key given, use the entire line as key. With no\n\
268 FILE, or when FILE is -, read standard input.\n\
275 /* The list of temporary files. */
276 static struct tempnode
279 struct tempnode *next;
282 /* Clean up any remaining temporary files. */
287 struct tempnode *node;
289 for (node = temphead.next; node; node = node->next)
293 /* Allocate N bytes of memory dynamically, with error checking. */
296 xmalloc (unsigned int n)
303 error (0, 0, _("virtual memory exhausted"));
310 /* Change the size of an allocated block of memory P to N bytes,
312 If P is NULL, run xmalloc.
313 If N is 0, run free and return NULL. */
316 xrealloc (char *p, unsigned int n)
328 error (0, 0, _("virtual memory exhausted"));
336 xtmpfopen (const char *file)
341 fd = open (file, O_EXCL | O_WRONLY | O_CREAT | O_TRUNC, 0600);
342 if (fd < 0 || (fp = fdopen (fd, "w")) == NULL)
344 error (0, errno, "%s", file);
353 xfopen (const char *file, const char *how)
357 if (strcmp (file, "-") == 0)
363 if ((fp = fopen (file, how)) == NULL)
365 error (0, errno, "%s", file);
381 /* Allow reading stdin from tty more than once. */
385 else if (fp == stdout)
387 if (fflush (fp) != 0)
389 error (0, errno, _("flushing file"));
396 if (fclose (fp) != 0)
398 error (0, errno, _("error closing file"));
406 xfwrite (const char *buf, int size, int nelem, FILE *fp)
408 if (fwrite (buf, size, nelem, fp) != nelem)
410 error (0, errno, _("write error"));
416 /* Return a name for a temporary file. */
421 static unsigned int seq;
422 int len = strlen (temp_file_prefix);
423 char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1);
424 struct tempnode *node;
426 node = (struct tempnode *) xmalloc (sizeof (struct tempnode));
428 "%s%ssort%5.5d%5.5d",
430 (len && temp_file_prefix[len - 1] != '/') ? "/" : "",
431 (unsigned int) getpid () & 0xffff, seq);
433 /* Make sure that SEQ's value fits in 5 digits. */
439 node->next = temphead.next;
440 temphead.next = node;
444 /* Search through the list of temporary files for NAME;
445 remove it if it is found on the list. */
450 struct tempnode *node, *temp;
452 for (node = &temphead; node->next; node = node->next)
453 if (!strcmp (name, node->next->name))
460 node->next = temp->next;
461 free ((char *) temp);
465 /* Initialize the character class tables. */
472 for (i = 0; i < UCHAR_LIM; ++i)
480 if (!ISALNUM (i) && !ISBLANK (i))
481 nondictionary[i] = 1;
483 fold_toupper[i] = toupper (i);
489 /* Initialize BUF, allocating ALLOC bytes initially. */
492 initbuf (struct buffer *buf, int alloc)
495 buf->buf = xmalloc (buf->alloc);
496 buf->used = buf->left = 0;
499 /* Fill BUF reading from FP, moving buf->left bytes from the end
500 of buf->buf to the beginning first. If EOF is reached and the
501 file wasn't terminated by a newline, supply one. Return a count
502 of bytes buffered. */
505 fillbuf (struct buffer *buf, FILE *fp)
509 memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left);
510 buf->used = buf->left;
512 while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used)))
514 if (buf->used == buf->alloc)
517 buf->buf = xrealloc (buf->buf, buf->alloc);
519 cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp);
522 error (0, errno, _("read error"));
529 if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n')
531 if (buf->used == buf->alloc)
534 buf->buf = xrealloc (buf->buf, buf->alloc);
536 buf->buf[buf->used++] = '\n';
542 /* Initialize LINES, allocating space for ALLOC lines initially.
543 LIMIT is the maximum possible number of lines to allocate space
547 initlines (struct lines *lines, int alloc, int limit)
549 lines->alloc = alloc;
550 lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line));
552 lines->limit = limit;
555 /* Return a pointer to the first character of the field specified
559 begfield (const struct line *line, const struct keyfield *key)
561 register char *ptr = line->text, *lim = ptr + line->length;
562 register int sword = key->sword, schar = key->schar;
565 while (ptr < lim && sword--)
567 while (ptr < lim && *ptr != tab)
573 while (ptr < lim && sword--)
575 while (ptr < lim && blanks[UCHAR (*ptr)])
577 while (ptr < lim && !blanks[UCHAR (*ptr)])
581 if (key->skipsblanks)
582 while (ptr < lim && blanks[UCHAR (*ptr)])
585 if (ptr + schar <= lim)
593 /* Return the limit of (a pointer to the first character after) the field
594 in LINE specified by KEY. */
597 limfield (const struct line *line, const struct keyfield *key)
599 register char *ptr = line->text, *lim = ptr + line->length;
600 register int eword = key->eword, echar = key->echar;
602 /* Note: from the POSIX spec:
603 The leading field separator itself is included in
604 a field when -t is not used. FIXME: move this comment up... */
606 /* Move PTR past EWORD fields or to one past the last byte on LINE,
607 whichever comes first. If there are more than EWORD fields, leave
608 PTR pointing at the beginning of the field having zero-based index,
609 EWORD. If a delimiter character was specified (via -t), then that
610 `beginning' is the first character following the delimiting TAB.
611 Otherwise, leave PTR pointing at the first `blank' character after
612 the preceding field. */
614 while (ptr < lim && eword--)
616 while (ptr < lim && *ptr != tab)
618 if (ptr < lim && (eword || echar > 0))
622 while (ptr < lim && eword--)
624 while (ptr < lim && blanks[UCHAR (*ptr)])
626 while (ptr < lim && !blanks[UCHAR (*ptr)])
630 /* Make LIM point to the end of (one byte past) the current field. */
634 newlim = memchr (ptr, tab, lim - ptr);
642 while (newlim < lim && blanks[UCHAR (*newlim)])
644 while (newlim < lim && !blanks[UCHAR (*newlim)])
649 /* If we're skipping leading blanks, don't start counting characters
650 until after skipping past any leading blanks. */
651 if (key->skipsblanks)
652 while (ptr < lim && blanks[UCHAR (*ptr)])
655 /* Advance PTR by ECHAR (if possible), but no further than LIM. */
656 if (ptr + echar <= lim)
667 trim_trailing_blanks (const char *a_start, char **a_end)
669 while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))])
673 /* Find the lines in BUF, storing pointers and lengths in LINES.
674 Also replace newlines in BUF with NULs. */
677 findlines (struct buffer *buf, struct lines *lines)
679 register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr;
680 struct keyfield *key = keyhead.next;
684 while (beg < lim && (ptr = memchr (beg, '\n', lim - beg))
685 && lines->used < lines->limit)
687 /* There are various places in the code that rely on a NUL
688 being at the end of in-core lines; NULs inside the lines
689 will not cause trouble, though. */
692 if (lines->used == lines->alloc)
695 lines->lines = (struct line *)
696 xrealloc ((char *) lines->lines,
697 lines->alloc * sizeof (struct line));
700 lines->lines[lines->used].text = beg;
701 lines->lines[lines->used].length = ptr - beg;
703 /* Precompute the position of the first key for efficiency. */
707 lines->lines[lines->used].keylim =
708 limfield (&lines->lines[lines->used], key);
710 lines->lines[lines->used].keylim = ptr;
713 lines->lines[lines->used].keybeg =
714 begfield (&lines->lines[lines->used], key);
717 if (key->skipsblanks)
718 while (blanks[UCHAR (*beg)])
720 lines->lines[lines->used].keybeg = beg;
722 if (key->skipeblanks)
724 trim_trailing_blanks (lines->lines[lines->used].keybeg,
725 &lines->lines[lines->used].keylim);
730 lines->lines[lines->used].keybeg = 0;
731 lines->lines[lines->used].keylim = 0;
738 buf->left = lim - beg;
741 /* Compare strings A and B containing decimal fractions < 1. Each string
742 should begin with a decimal point followed immediately by the digits
743 of the fraction. Strings not of this form are considered to be zero. */
746 fraccompare (register const char *a, register const char *b)
748 register tmpa = UCHAR (*a), tmpb = UCHAR (*b);
750 if (tmpa == '.' && tmpb == '.')
753 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
754 while (tmpa == tmpb && digits[tmpa]);
755 if (digits[tmpa] && digits[tmpb])
775 else if (tmpa == '.')
784 else if (tmpb == '.')
796 /* Compare strings A and B as numbers without explicitly converting them to
797 machine numbers. Comparatively slow for short strings, but asymptotically
801 numcompare (register const char *a, register const char *b)
803 register int tmpa, tmpb, loga, logb, tmp;
840 while (tmpa == tmpb && digits[tmpa])
841 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
843 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
844 return -fraccompare (a, b);
847 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
853 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
858 if ((tmp = logb - loga) != 0)
865 return COLLDIFF (tmpb, tmpa);
870 else if (tmpb == '-')
898 while (tmpa == tmpb && digits[tmpa])
899 tmpa = UCHAR (*++a), tmpb = UCHAR (*++b);
901 if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa]))
902 return fraccompare (a, b);
905 for (loga = 1; digits[UCHAR (*++a)]; ++loga)
911 for (logb = 1; digits[UCHAR (*++b)]; ++logb)
916 if ((tmp = loga - logb) != 0)
923 return COLLDIFF (tmpa, tmpb);
931 general_numcompare (const char *sa, const char *sb)
934 /* FIXME: add option to warn about failed conversions. */
935 /* FIXME: maybe add option to try expensive FP conversion
936 only if A and B can't be compared more cheaply/accurately. */
937 if (xstrtod (sa, NULL, &a))
941 if (xstrtod (sb, NULL, &b))
945 return a == b ? 0 : a < b ? -1 : 1;
948 /* Return an integer <= 12 associated with month name S with length LEN,
949 0 if the name in S is not recognized. */
952 getmonth (const char *s, int len)
955 register int i, lo = 0, hi = 12;
957 while (len > 0 && blanks[UCHAR(*s)])
963 for (i = 0; i < 3; ++i)
964 month[i] = fold_toupper[UCHAR (s[i])];
968 if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0)
972 if (!strcmp (month, monthtab[lo].name))
973 return monthtab[lo].val;
977 /* Compare two lines A and B trying every key in sequence until there
978 are no more keys or a difference is found. */
981 keycompare (const struct line *a, const struct line *b)
983 register char *texta, *textb, *lima, *limb, *translate;
984 register int *ignore;
985 struct keyfield *key;
986 int diff = 0, iter = 0, lena, lenb;
988 for (key = keyhead.next; key; key = key->next, ++iter)
990 ignore = key->ignore;
991 translate = key->translate;
993 /* Find the beginning and limit of each field. */
994 if (iter || a->keybeg == NULL || b->keybeg == NULL)
997 lima = limfield (a, key), limb = limfield (b, key);
999 lima = a->text + a->length, limb = b->text + b->length;
1001 if (key->sword >= 0)
1002 texta = begfield (a, key), textb = begfield (b, key);
1005 texta = a->text, textb = b->text;
1006 if (key->skipsblanks)
1008 while (texta < lima && blanks[UCHAR (*texta)])
1010 while (textb < limb && blanks[UCHAR (*textb)])
1017 /* For the first iteration only, the key positions have
1018 been precomputed for us. */
1019 texta = a->keybeg, lima = a->keylim;
1020 textb = b->keybeg, limb = b->keylim;
1023 /* Find the lengths. */
1024 lena = lima - texta, lenb = limb - textb;
1030 if (key->skipeblanks)
1032 char *a_end = texta + lena;
1033 char *b_end = textb + lenb;
1034 trim_trailing_blanks (texta, &a_end);
1035 trim_trailing_blanks (textb, &b_end);
1036 lena = a_end - texta;
1037 lenb = b_end - textb;
1040 /* Actually compare the fields. */
1045 char savea = *lima, saveb = *limb;
1047 *lima = *limb = '\0';
1048 diff = numcompare (texta, textb);
1049 *lima = savea, *limb = saveb;
1052 diff = numcompare (texta, textb);
1055 return key->reverse ? -diff : diff;
1058 else if (key->general_numeric)
1062 char savea = *lima, saveb = *limb;
1064 *lima = *limb = '\0';
1065 diff = general_numcompare (texta, textb);
1066 *lima = savea, *limb = saveb;
1069 diff = general_numcompare (texta, textb);
1072 return key->reverse ? -diff : diff;
1075 else if (key->month)
1077 diff = getmonth (texta, lena) - getmonth (textb, lenb);
1079 return key->reverse ? -diff : diff;
1082 else if (ignore && translate)
1085 #define CMP_FUNC(A, B) COLLDIFF ((A), (B))
1087 #define CMP_FUNC(A, B) (A) - (B)
1089 #define CMP_WITH_IGNORE(A, B) \
1092 while (texta < lima && textb < limb) \
1094 while (texta < lima && ignore[UCHAR (*texta)]) \
1096 while (textb < limb && ignore[UCHAR (*textb)]) \
1098 if (texta < lima && textb < limb) \
1102 diff = CMP_FUNC((A), (B)); \
1109 if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \
1111 else if (texta < lima && textb == limb \
1112 && !ignore[UCHAR (*texta)]) \
1118 while (texta < lima && ignore[UCHAR (*texta)]) \
1120 while (textb < limb && ignore[UCHAR (*textb)]) \
1123 if (texta == lima && textb < limb) \
1125 else if (texta < lima && textb == limb) \
1128 /* Relative lengths are meaningless if characters were ignored. \
1129 Handling this case here avoids what might be an invalid length \
1130 comparison below. */ \
1131 if (diff == 0 && texta == lima && textb == limb) \
1136 CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]);
1138 CMP_WITH_IGNORE (*texta, *textb);
1140 while (texta < lima && textb < limb)
1142 if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)])
1145 diff = COLLDIFF (translate[UCHAR (*--texta)],
1146 translate[UCHAR (*--textb)]);
1148 diff = (translate[UCHAR (*--texta)]
1149 - translate[UCHAR (*--textb)]);
1156 diff = collcmp (texta, textb, min (lena, lenb));
1158 diff = memcmp (texta, textb, min (lena, lenb));
1162 return key->reverse ? -diff : diff;
1163 if ((diff = lena - lenb) != 0)
1164 return key->reverse ? -diff : diff;
1170 /* Compare two lines A and B, returning negative, zero, or positive
1171 depending on whether A compares less than, equal to, or greater than B. */
1174 compare (register const struct line *a, register const struct line *b)
1176 int diff, tmpa, tmpb, mini;
1178 /* First try to compare on the specified keys (if any).
1179 The only two cases with no key at all are unadorned sort,
1180 and unadorned sort -r. */
1183 diff = keycompare (a, b);
1186 if (unique || stable)
1190 /* If the keys all compare equal (or no keys were specified)
1191 fall through to the default byte-by-byte comparison. */
1192 tmpa = a->length, tmpb = b->length;
1193 mini = min (tmpa, tmpb);
1198 char *ap = a->text, *bp = b->text;
1201 diff = UCHAR (*ap) - UCHAR (*bp);
1206 diff = collcmp (ap, bp, mini);
1208 diff = memcmp (ap, bp, mini);
1217 return reverse ? -diff : diff;
1220 /* Check that the lines read from the given FP come in order. Return
1221 1 if they do and 0 if there is a disorder.
1222 FIXME: return number of first out-of-order line if not sorted. */
1227 struct buffer buf; /* Input buffer. */
1228 struct lines lines; /* Lines scanned from the buffer. */
1229 struct line temp; /* Copy of previous line. */
1230 int cc; /* Character count. */
1231 int alloc, sorted = 1;
1233 initbuf (&buf, mergealloc);
1234 initlines (&lines, mergealloc / linelength + 1,
1235 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1237 temp.text = xmalloc (alloc);
1239 cc = fillbuf (&buf, fp);
1243 findlines (&buf, &lines);
1247 struct line *prev_line; /* Pointer to previous line. */
1248 int cmp; /* Result of calling compare. */
1251 /* Compare each line in the buffer with its successor. */
1252 for (i = 0; i < lines.used - 1; ++i)
1254 cmp = compare (&lines.lines[i], &lines.lines[i + 1]);
1255 if ((unique && cmp >= 0) || (cmp > 0))
1262 /* Save the last line of the buffer and refill the buffer. */
1263 prev_line = lines.lines + (lines.used - 1);
1264 if (prev_line->length > alloc)
1266 while (prev_line->length + 1 > alloc)
1268 temp.text = xrealloc (temp.text, alloc);
1270 memcpy (temp.text, prev_line->text, prev_line->length + 1);
1271 temp.length = prev_line->length;
1272 temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text);
1273 temp.keylim = temp.text + (prev_line->keylim - prev_line->text);
1275 cc = fillbuf (&buf, fp);
1279 findlines (&buf, &lines);
1280 /* Make sure the line saved from the old buffer contents is
1281 less than or equal to the first line of the new buffer. */
1282 cmp = compare (&temp, &lines.lines[0]);
1283 if ((unique && cmp >= 0) || (cmp > 0))
1293 free ((char *) lines.lines);
1298 /* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE.
1299 Close FPS before returning. */
1302 mergefps (FILE **fps, register int nfps, FILE *ofp)
1304 struct buffer buffer[NMERGE]; /* Input buffers for each file. */
1305 struct lines lines[NMERGE]; /* Line tables for each buffer. */
1306 struct line saved; /* Saved line for unique check. */
1307 int savedflag = 0; /* True if there is a saved line. */
1308 int savealloc; /* Size allocated for the saved line. */
1309 int cur[NMERGE]; /* Current line in each line table. */
1310 int ord[NMERGE]; /* Table representing a permutation of fps,
1311 such that lines[ord[0]].lines[cur[ord[0]]]
1312 is the smallest line and will be next
1314 register int i, j, t;
1316 #ifdef lint /* Suppress `used before initialized' warning. */
1320 /* Allocate space for a saved line if necessary. */
1323 savealloc = linelength;
1324 saved.text = xmalloc (savealloc);
1327 /* Read initial lines from each input file. */
1328 for (i = 0; i < nfps; ++i)
1330 initbuf (&buffer[i], mergealloc);
1331 /* If a file is empty, eliminate it from future consideration. */
1332 while (i < nfps && !fillbuf (&buffer[i], fps[i]))
1336 for (j = i; j < nfps; ++j)
1337 fps[j] = fps[j + 1];
1340 free (buffer[i].buf);
1343 initlines (&lines[i], mergealloc / linelength + 1,
1344 LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line)));
1345 findlines (&buffer[i], &lines[i]);
1350 /* Set up the ord table according to comparisons among input lines.
1351 Since this only reorders two items if one is strictly greater than
1352 the other, it is stable. */
1353 for (i = 0; i < nfps; ++i)
1355 for (i = 1; i < nfps; ++i)
1356 if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]],
1357 &lines[ord[i]].lines[cur[ord[i]]]) > 0)
1358 t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0;
1360 /* Repeatedly output the smallest line until no input remains. */
1363 /* If uniqified output is turned on, output only the first of
1364 an identical series of lines. */
1367 if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]]))
1369 xfwrite (saved.text, 1, saved.length, ofp);
1375 if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1377 while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1)
1379 saved.text = xrealloc (saved.text, savealloc);
1381 saved.length = lines[ord[0]].lines[cur[ord[0]]].length;
1382 memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text,
1384 if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL)
1386 saved.keybeg = saved.text +
1387 (lines[ord[0]].lines[cur[ord[0]]].keybeg
1388 - lines[ord[0]].lines[cur[ord[0]]].text);
1390 if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL)
1392 saved.keylim = saved.text +
1393 (lines[ord[0]].lines[cur[ord[0]]].keylim
1394 - lines[ord[0]].lines[cur[ord[0]]].text);
1401 xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1,
1402 lines[ord[0]].lines[cur[ord[0]]].length, ofp);
1406 /* Check if we need to read more lines into core. */
1407 if (++cur[ord[0]] == lines[ord[0]].used)
1408 if (fillbuf (&buffer[ord[0]], fps[ord[0]]))
1410 findlines (&buffer[ord[0]], &lines[ord[0]]);
1415 /* We reached EOF on fps[ord[0]]. */
1416 for (i = 1; i < nfps; ++i)
1417 if (ord[i] > ord[0])
1420 xfclose (fps[ord[0]]);
1421 free (buffer[ord[0]].buf);
1422 free ((char *) lines[ord[0]].lines);
1423 for (i = ord[0]; i < nfps; ++i)
1425 fps[i] = fps[i + 1];
1426 buffer[i] = buffer[i + 1];
1427 lines[i] = lines[i + 1];
1428 cur[i] = cur[i + 1];
1430 for (i = 0; i < nfps; ++i)
1431 ord[i] = ord[i + 1];
1435 /* The new line just read in may be larger than other lines
1436 already in core; push it back in the queue until we encounter
1437 a line larger than it. */
1438 for (i = 1; i < nfps; ++i)
1440 t = compare (&lines[ord[0]].lines[cur[ord[0]]],
1441 &lines[ord[i]].lines[cur[ord[i]]]);
1443 t = ord[0] - ord[i];
1448 for (j = 1; j < i; ++j)
1449 ord[j - 1] = ord[j];
1453 if (unique && savedflag)
1455 xfwrite (saved.text, 1, saved.length, ofp);
1461 /* Sort the array LINES with NLINES members, using TEMP for temporary space. */
1464 sortlines (struct line *lines, int nlines, struct line *temp)
1466 register struct line *lo, *hi, *t;
1467 register int nlo, nhi;
1471 if (compare (&lines[0], &lines[1]) > 0)
1472 *temp = lines[0], lines[0] = lines[1], lines[1] = *temp;
1482 sortlines (lo, nlo, temp);
1485 sortlines (hi, nhi, temp);
1490 if (compare (lo, hi) <= 0)
1491 *t++ = *lo++, --nlo;
1493 *t++ = *hi++, --nhi;
1497 for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo)
1501 /* Check that each of the NFILES FILES is ordered.
1502 Return a count of disordered files. */
1505 check (char **files, int nfiles)
1507 int i, disorders = 0;
1510 for (i = 0; i < nfiles; ++i)
1512 fp = xfopen (files[i], "r");
1515 fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]);
1522 /* Merge NFILES FILES onto OFP. */
1525 merge (char **files, int nfiles, FILE *ofp)
1529 FILE *fps[NMERGE], *tfp;
1531 while (nfiles > NMERGE)
1534 for (i = 0; i < nfiles / NMERGE; ++i)
1536 for (j = 0; j < NMERGE; ++j)
1537 fps[j] = xfopen (files[i * NMERGE + j], "r");
1538 tfp = xtmpfopen (temp = tempname ());
1539 mergefps (fps, NMERGE, tfp);
1541 for (j = 0; j < NMERGE; ++j)
1542 zaptemp (files[i * NMERGE + j]);
1545 for (j = 0; j < nfiles % NMERGE; ++j)
1546 fps[j] = xfopen (files[i * NMERGE + j], "r");
1547 tfp = xtmpfopen (temp = tempname ());
1548 mergefps (fps, nfiles % NMERGE, tfp);
1550 for (j = 0; j < nfiles % NMERGE; ++j)
1551 zaptemp (files[i * NMERGE + j]);
1556 for (i = 0; i < nfiles; ++i)
1557 fps[i] = xfopen (files[i], "r");
1558 mergefps (fps, i, ofp);
1559 for (i = 0; i < nfiles; ++i)
1563 /* Sort NFILES FILES onto OFP. */
1566 sort (char **files, int nfiles, FILE *ofp)
1573 struct tempnode *node;
1574 int n_temp_files = 0;
1577 initbuf (&buf, sortalloc);
1578 initlines (&lines, sortalloc / linelength + 1,
1579 LINEALLOC / sizeof (struct line));
1581 tmp = (struct line *) xmalloc (ntmp * sizeof (struct line));
1585 fp = xfopen (*files++, "r");
1586 while (fillbuf (&buf, fp))
1588 findlines (&buf, &lines);
1589 if (lines.used > ntmp)
1591 while (lines.used > ntmp)
1593 tmp = (struct line *)
1594 xrealloc ((char *) tmp, ntmp * sizeof (struct line));
1596 sortlines (lines.lines, lines.used, tmp);
1597 if (feof (fp) && !nfiles && !n_temp_files && !buf.left)
1602 tfp = xtmpfopen (tempname ());
1604 for (i = 0; i < lines.used; ++i)
1605 if (!unique || i == 0
1606 || compare (&lines.lines[i], &lines.lines[i - 1]))
1608 xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp);
1618 free ((char *) lines.lines);
1619 free ((char *) tmp);
1623 tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *));
1625 for (node = temphead.next; i > 0; node = node->next)
1626 tempfiles[--i] = node->name;
1627 merge (tempfiles, n_temp_files, ofp);
1628 free ((char *) tempfiles);
1632 /* Insert key KEY at the end of the list (`keyhead'). */
1635 insertkey (struct keyfield *key)
1637 struct keyfield *k = &keyhead;
1646 badfieldspec (const char *s)
1648 error (2, 0, _("invalid field specification `%s'"), s);
1651 /* Handle interrupts and hangups. */
1654 sighandler (int sig)
1657 struct sigaction sigact;
1659 sigact.sa_handler = SIG_DFL;
1660 sigemptyset (&sigact.sa_mask);
1661 sigact.sa_flags = 0;
1662 sigaction (sig, &sigact, NULL);
1663 #else /* !SA_INTERRUPT */
1664 signal (sig, SIG_DFL);
1665 #endif /* SA_INTERRUPT */
1667 kill (getpid (), sig);
1670 /* Set the ordering options for KEY specified in S.
1671 Return the address of the first character in S that
1672 is not a valid ordering option.
1673 BLANKTYPE is the kind of blanks that 'b' should skip. */
1676 set_ordering (register const char *s, struct keyfield *key,
1677 enum blanktype blanktype)
1684 if (blanktype == bl_start || blanktype == bl_both)
1685 key->skipsblanks = 1;
1686 if (blanktype == bl_end || blanktype == bl_both)
1687 key->skipeblanks = 1;
1690 key->ignore = nondictionary;
1693 key->translate = fold_toupper;
1696 key->general_numeric = 1;
1699 key->ignore = nonprinting;
1706 if (blanktype == bl_start || blanktype == bl_both)
1707 key->skipsblanks = 1;
1708 if (blanktype == bl_end || blanktype == bl_both)
1709 key->skipeblanks = 1;
1723 main (int argc, char **argv)
1725 struct keyfield *key = NULL, gkey;
1728 int checkonly = 0, mergeonly = 0, nfiles = 0;
1729 char *minus = "-", *outfile = minus, **files, *tmp;
1732 struct sigaction oldact, newact;
1733 #endif /* SA_INTERRUPT */
1736 (void) setlocale(LC_ALL, "");
1738 program_name = argv[0];
1740 parse_long_options (argc, argv, "sort", version_string, usage);
1742 have_read_stdin = 0;
1745 temp_file_prefix = getenv ("TMPDIR");
1746 if (temp_file_prefix == NULL)
1747 temp_file_prefix = DEFAULT_TMPDIR;
1750 newact.sa_handler = sighandler;
1751 sigemptyset (&newact.sa_mask);
1752 newact.sa_flags = 0;
1754 sigaction (SIGINT, NULL, &oldact);
1755 if (oldact.sa_handler != SIG_IGN)
1756 sigaction (SIGINT, &newact, NULL);
1757 sigaction (SIGHUP, NULL, &oldact);
1758 if (oldact.sa_handler != SIG_IGN)
1759 sigaction (SIGHUP, &newact, NULL);
1760 sigaction (SIGPIPE, NULL, &oldact);
1761 if (oldact.sa_handler != SIG_IGN)
1762 sigaction (SIGPIPE, &newact, NULL);
1763 sigaction (SIGTERM, NULL, &oldact);
1764 if (oldact.sa_handler != SIG_IGN)
1765 sigaction (SIGTERM, &newact, NULL);
1766 #else /* !SA_INTERRUPT */
1767 if (signal (SIGINT, SIG_IGN) != SIG_IGN)
1768 signal (SIGINT, sighandler);
1769 if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
1770 signal (SIGHUP, sighandler);
1771 if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
1772 signal (SIGPIPE, sighandler);
1773 if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
1774 signal (SIGTERM, sighandler);
1775 #endif /* !SA_INTERRUPT */
1777 gkey.sword = gkey.eword = -1;
1779 gkey.translate = NULL;
1780 gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0;
1781 gkey.skipsblanks = gkey.skipeblanks = 0;
1783 files = (char **) xmalloc (sizeof (char *) * argc);
1785 for (i = 1; i < argc; ++i)
1787 if (argv[i][0] == '+')
1791 key = (struct keyfield *) xmalloc (sizeof (struct keyfield));
1794 key->translate = NULL;
1795 key->skipsblanks = key->skipeblanks = 0;
1796 key->numeric = key->general_numeric = key->month = key->reverse = 0;
1798 if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])))
1799 badfieldspec (argv[i]);
1800 for (t = 0; digits[UCHAR (*s)]; ++s)
1801 t = 10 * t + *s - '0';
1804 for (++s; digits[UCHAR (*s)]; ++s)
1805 t2 = 10 * t2 + *s - '0';
1813 s = set_ordering (s, key, bl_start);
1815 badfieldspec (argv[i]);
1817 else if (argv[i][0] == '-' && argv[i][1])
1820 if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))
1824 for (t = 0; digits[UCHAR (*s)]; ++s)
1825 t = t * 10 + *s - '0';
1828 for (++s; digits[UCHAR (*s)]; ++s)
1829 t2 = t2 * 10 + *s - '0';
1832 s = set_ordering (s, key, bl_end);
1834 badfieldspec (argv[i]);
1841 s = set_ordering (s, &gkey, bl_both);
1855 error (2, 0, _("option `-k' requires an argument"));
1861 key = (struct keyfield *)
1862 xmalloc (sizeof (struct keyfield));
1865 key->translate = NULL;
1866 key->skipsblanks = key->skipeblanks = 0;
1867 key->numeric = key->month = key->reverse = 0;
1869 if (!digits[UCHAR (*s)])
1870 badfieldspec (argv[i]);
1871 for (t = 0; digits[UCHAR (*s)]; ++s)
1872 t = 10 * t + *s - '0';
1875 /* Provoke with `sort -k0' */
1876 error (0, 0, _("the starting field number argument \
1877 to the `-k' option must be positive"));
1878 badfieldspec (argv[i]);
1884 if (!digits[UCHAR (s[1])])
1886 /* Provoke with `sort -k1.' */
1887 error (0, 0, _("starting field spec has `.' but \
1888 lacks following character offset"));
1889 badfieldspec (argv[i]);
1891 for (++s; digits[UCHAR (*s)]; ++s)
1892 t2 = 10 * t2 + *s - '0';
1895 /* Provoke with `sort -k1.0' */
1896 error (0, 0, _("starting field character offset \
1897 argument to the `-k' option\nmust be positive"));
1898 badfieldspec (argv[i]);
1909 s = set_ordering (s, key, bl_start);
1916 badfieldspec (argv[i]);
1919 /* Skip over comma. */
1923 /* Provoke with `sort -k1,' */
1924 error (0, 0, _("field specification has `,' but \
1925 lacks following field spec"));
1926 badfieldspec (argv[i]);
1929 for (t = 0; digits[UCHAR (*s)]; ++s)
1930 t = t * 10 + *s - '0';
1933 /* Provoke with `sort -k1,0' */
1934 error (0, 0, _("ending field number argument \
1935 to the `-k' option must be positive"));
1936 badfieldspec (argv[i]);
1942 if (!digits[UCHAR (s[1])])
1944 /* Provoke with `sort -k1,1.' */
1945 error (0, 0, _("ending field spec has `.' \
1946 but lacks following character offset"));
1947 badfieldspec (argv[i]);
1949 for (++s; digits[UCHAR (*s)]; ++s)
1950 t2 = t2 * 10 + *s - '0';
1954 /* `-k 2,3' is equivalent to `+1 -3'. */
1959 s = set_ordering (s, key, bl_end);
1961 badfieldspec (argv[i]);
1975 error (2, 0, _("option `-o' requires an argument"));
1977 outfile = argv[++i];
1986 else if (i < argc - 1)
1992 error (2, 0, _("option `-t' requires an argument"));
1996 temp_file_prefix = ++s;
2000 temp_file_prefix = argv[++i];
2002 error (2, 0, _("option `-T' requires an argument"));
2010 /* Accept and ignore e.g. -y0 for compatibility with
2014 fprintf (stderr, _("%s: unrecognized option `-%c'\n"),
2022 else /* Not an option. */
2024 files[nfiles++] = argv[i];
2032 /* Inheritance of global options to individual keys. */
2033 for (key = keyhead.next; key; key = key->next)
2034 if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse
2035 && !key->skipeblanks && !key->month && !key->numeric
2036 && !key->general_numeric)
2038 key->ignore = gkey.ignore;
2039 key->translate = gkey.translate;
2040 key->skipsblanks = gkey.skipsblanks;
2041 key->skipeblanks = gkey.skipeblanks;
2042 key->month = gkey.month;
2043 key->numeric = gkey.numeric;
2044 key->general_numeric = gkey.general_numeric;
2045 key->reverse = gkey.reverse;
2048 if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks
2049 || gkey.skipeblanks || gkey.month || gkey.numeric
2050 || gkey.general_numeric))
2052 reverse = gkey.reverse;
2061 exit (check (files, nfiles) != 0);
2063 if (strcmp (outfile, "-"))
2065 struct stat outstat;
2066 if (stat (outfile, &outstat) == 0)
2068 /* The following code prevents a race condition when
2069 people use the brain dead shell programming idiom:
2070 cat file | sort -o file
2071 This feature is provided for historical compatibility,
2072 but we strongly discourage ever relying on this in
2073 new shell programs. */
2075 /* Temporarily copy each input file that might be another name
2076 for the output file. When in doubt (e.g. a pipe), copy. */
2077 for (i = 0; i < nfiles; ++i)
2083 if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i]))
2086 if ((strcmp (files[i], "-")
2087 ? stat (files[i], &instat)
2088 : fstat (fileno (stdin), &instat)) != 0)
2090 error (0, errno, "%s", files[i]);
2094 if (S_ISREG (instat.st_mode)
2095 && (instat.st_ino != outstat.st_ino
2096 || instat.st_dev != outstat.st_dev))
2098 /* We know the files are distinct. */
2103 fp = xfopen (files[i], "r");
2105 ofp = xtmpfopen (tmp);
2106 while ((cc = fread (buf, 1, sizeof buf, fp)) > 0)
2107 xfwrite (buf, 1, cc, ofp);
2110 error (0, errno, "%s", files[i]);
2119 ofp = xfopen (outfile, "w");
2125 merge (files, nfiles, ofp);
2127 sort (files, nfiles, ofp);
2130 /* If we wait for the implicit flush on exit, and the parent process
2131 has closed stdout (e.g., exec >&- in a shell), then the output file
2132 winds up empty. I don't understand why. This is under SunOS,
2133 Solaris, Ultrix, and Irix. This premature fflush makes the output
2134 reappear. --karl@cs.umb.edu */
2135 if (fflush (ofp) < 0)
2136 error (1, errno, _("%s: write error"), outfile);
2138 if (have_read_stdin && fclose (stdin) == EOF)
2139 error (1, errno, outfile);
2140 if (ferror (stdout) || fclose (stdout) == EOF)
2141 error (1, errno, _("%s: write error"), outfile);