1 /* cmp - compare two files byte by byte
3 Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1998, 2001,
4 2002, 2004 Free Software Foundation, Inc.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14 See the GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; see the file COPYING.
18 If not, write to the Free Software Foundation,
19 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
31 #include <file-type.h>
33 #include <hard-locale.h>
36 #include <unlocked-io.h>
37 #include <version-etc.h>
41 #if defined LC_MESSAGES && ENABLE_NLS
42 # define hard_locale_LC_MESSAGES hard_locale (LC_MESSAGES)
44 # define hard_locale_LC_MESSAGES 0
47 static int cmp (void);
48 static off_t file_position (int);
49 static size_t block_compare (word const *, word const *);
50 static size_t block_compare_and_count (word const *, word const *, off_t *);
51 static void sprintc (char *, unsigned char);
53 /* Name under which this program was invoked. */
56 /* Filenames of the compared files. */
57 static char const *file[2];
59 /* File descriptors of the files. */
60 static int file_desc[2];
62 /* Status of the files. */
63 static struct stat stat_buf[2];
65 /* Read buffers for the files. */
66 static word *buffer[2];
68 /* Optimal block size for the files. */
69 static size_t buf_size;
71 /* Initial prefix to ignore for each file. */
72 static off_t ignore_initial[2];
74 /* Number of bytes to compare. */
75 static uintmax_t bytes = UINTMAX_MAX;
78 static enum comparison_type
80 type_first_diff, /* Print the first difference. */
81 type_all_diffs, /* Print all differences. */
82 type_status /* Exit status only. */
85 /* If nonzero, print values of bytes quoted like cat -t does. */
86 static bool opt_print_bytes;
88 /* Values for long options that do not have single-letter equivalents. */
91 HELP_OPTION = CHAR_MAX + 1
94 static struct option const long_options[] =
96 {"print-bytes", 0, 0, 'b'},
97 {"print-chars", 0, 0, 'c'}, /* obsolescent as of diffutils 2.7.3 */
98 {"ignore-initial", 1, 0, 'i'},
99 {"verbose", 0, 0, 'l'},
100 {"bytes", 1, 0, 'n'},
101 {"silent", 0, 0, 's'},
102 {"quiet", 0, 0, 's'},
103 {"version", 0, 0, 'v'},
104 {"help", 0, 0, HELP_OPTION},
108 static void try_help (char const *, char const *) __attribute__((noreturn));
110 try_help (char const *reason_msgid, char const *operand)
113 error (0, 0, _(reason_msgid), operand);
114 error (EXIT_TROUBLE, 0,
115 _("Try `%s --help' for more information."), program_name);
119 static char const valid_suffixes[] = "kKMGTPEZY0";
121 /* Update ignore_initial[F] according to the result of parsing an
122 *operand ARGPTR of --ignore-initial, updating *ARGPTR to point
123 *after the operand. If DELIMITER is nonzero, the operand may be
124 *followed by DELIMITER; otherwise it must be null-terminated. */
126 specify_ignore_initial (int f, char **argptr, char delimiter)
130 char const *arg = *argptr;
131 strtol_error e = xstrtoumax (arg, argptr, 0, &val, valid_suffixes);
132 if (! (e == LONGINT_OK
133 || (e == LONGINT_INVALID_SUFFIX_CHAR && **argptr == delimiter))
134 || (o = val) < 0 || o != val || val == UINTMAX_MAX)
135 try_help ("invalid --ignore-initial value `%s'", arg);
136 if (ignore_initial[f] < o)
137 ignore_initial[f] = o;
140 /* Specify the output format. */
142 specify_comparison_type (enum comparison_type t)
144 if (comparison_type && comparison_type != t)
145 try_help ("options -l and -s are incompatible", 0);
153 error (EXIT_TROUBLE, 0, "%s", _("write failed"));
154 else if (fclose (stdout) != 0)
155 error (EXIT_TROUBLE, errno, "%s", _("standard output"));
158 static char const * const option_help_msgid[] = {
159 N_("-b --print-bytes Print differing bytes."),
160 N_("-i SKIP --ignore-initial=SKIP Skip the first SKIP bytes of input."),
161 N_("-i SKIP1:SKIP2 --ignore-initial=SKIP1:SKIP2"),
162 N_(" Skip the first SKIP1 bytes of FILE1 and the first SKIP2 bytes of FILE2."),
163 N_("-l --verbose Output byte numbers and values of all differing bytes."),
164 N_("-n LIMIT --bytes=LIMIT Compare at most LIMIT bytes."),
165 N_("-s --quiet --silent Output nothing; yield exit status only."),
166 N_("-v --version Output version info."),
167 N_("--help Output this help."),
174 char const * const *p;
176 printf (_("Usage: %s [OPTION]... FILE1 [FILE2 [SKIP1 [SKIP2]]]\n"),
178 printf ("%s\n\n", _("Compare two files byte by byte."));
179 for (p = option_help_msgid; *p; p++)
180 printf (" %s\n", _(*p));
181 printf ("\n%s\n%s\n\n%s\n%s\n\n%s\n",
182 _("SKIP1 and SKIP2 are the number of bytes to skip in each file."),
183 _("SKIP values may be followed by the following multiplicative suffixes:\n\
184 kB 1000, K 1024, MB 1,000,000, M 1,048,576,\n\
185 GB 1,000,000,000, G 1,073,741,824, and so on for T, P, E, Z, Y."),
186 _("If a FILE is `-' or missing, read standard input."),
187 _("Exit status is 0 if inputs are the same, 1 if different, 2 if trouble."),
188 _("Report bugs to <bug-gnu-utils@gnu.org>."));
192 main (int argc, char **argv)
194 int c, f, exit_status;
195 size_t words_per_buffer;
197 exit_failure = EXIT_TROUBLE;
198 initialize_main (&argc, &argv);
199 program_name = argv[0];
200 setlocale (LC_ALL, "");
201 bindtextdomain (PACKAGE, LOCALEDIR);
202 textdomain (PACKAGE);
205 /* Parse command line options. */
207 while ((c = getopt_long (argc, argv, "bci:ln:sv", long_options, 0))
212 case 'c': /* 'c' is obsolescent as of diffutils 2.7.3 */
213 opt_print_bytes = true;
217 specify_ignore_initial (0, &optarg, ':');
218 if (*optarg++ == ':')
219 specify_ignore_initial (1, &optarg, 0);
220 else if (ignore_initial[1] < ignore_initial[0])
221 ignore_initial[1] = ignore_initial[0];
225 specify_comparison_type (type_all_diffs);
231 if (xstrtoumax (optarg, 0, 0, &n, valid_suffixes) != LONGINT_OK)
232 try_help ("invalid --bytes value `%s'", optarg);
239 specify_comparison_type (type_status);
243 /* TRANSLATORS: Please translate the second "o" in "Torbjorn
244 Granlund" to an o-with-umlaut (U+00F6, LATIN SMALL LETTER O
245 WITH DIAERESIS) if possible. */
246 version_etc (stdout, "cmp", PACKAGE_NAME, PACKAGE_VERSION,
247 _("Torbjorn Granlund"), "David MacKenzie", (char *) 0);
261 try_help ("missing operand after `%s'", argv[argc - 1]);
263 file[0] = argv[optind++];
264 file[1] = optind < argc ? argv[optind++] : "-";
266 for (f = 0; f < 2 && optind < argc; f++)
268 char *arg = argv[optind++];
269 specify_ignore_initial (f, &arg, 0);
273 try_help ("extra operand `%s'", argv[optind]);
275 for (f = 0; f < 2; f++)
277 /* If file[1] is "-", treat it first; this avoids a misdiagnostic if
278 stdin is closed and opening file[0] yields file descriptor 0. */
279 int f1 = f ^ (strcmp (file[1], "-") == 0);
281 /* Two files with the same name and offset are identical.
282 But wait until we open the file once, for proper diagnostics. */
283 if (f && ignore_initial[0] == ignore_initial[1]
284 && file_name_cmp (file[0], file[1]) == 0)
287 file_desc[f1] = (strcmp (file[f1], "-") == 0
289 : open (file[f1], O_RDONLY, 0));
290 if (file_desc[f1] < 0 || fstat (file_desc[f1], stat_buf + f1) != 0)
292 if (file_desc[f1] < 0 && comparison_type == type_status)
295 error (EXIT_TROUBLE, errno, "%s", file[f1]);
298 set_binary_mode (file_desc[f1], true);
301 /* If the files are links to the same inode and have the same file position,
302 they are identical. */
304 if (0 < same_file (&stat_buf[0], &stat_buf[1])
305 && same_file_attributes (&stat_buf[0], &stat_buf[1])
306 && file_position (0) == file_position (1))
309 /* If output is redirected to the null device, we may assume `-s'. */
311 if (comparison_type != type_status)
313 struct stat outstat, nullstat;
315 if (fstat (STDOUT_FILENO, &outstat) == 0
316 && stat (NULL_DEVICE, &nullstat) == 0
317 && 0 < same_file (&outstat, &nullstat))
318 comparison_type = type_status;
321 /* If only a return code is needed,
322 and if both input descriptors are associated with plain files,
323 conclude that the files differ if they have different sizes
324 and if more bytes will be compared than are in the smaller file. */
326 if (comparison_type == type_status
327 && S_ISREG (stat_buf[0].st_mode)
328 && S_ISREG (stat_buf[1].st_mode))
330 off_t s0 = stat_buf[0].st_size - file_position (0);
331 off_t s1 = stat_buf[1].st_size - file_position (1);
336 if (s0 != s1 && MIN (s0, s1) < bytes)
340 /* Get the optimal block size of the files. */
342 buf_size = buffer_lcm (STAT_BLOCKSIZE (stat_buf[0]),
343 STAT_BLOCKSIZE (stat_buf[1]),
344 PTRDIFF_MAX - sizeof (word));
346 /* Allocate word-aligned buffers, with space for sentinels at the end. */
348 words_per_buffer = (buf_size + 2 * sizeof (word) - 1) / sizeof (word);
349 buffer[0] = xmalloc (2 * sizeof (word) * words_per_buffer);
350 buffer[1] = buffer[0] + words_per_buffer;
352 exit_status = cmp ();
354 for (f = 0; f < 2; f++)
355 if (close (file_desc[f]) != 0)
356 error (EXIT_TROUBLE, errno, "%s", file[f]);
357 if (exit_status != 0 && comparison_type != type_status)
363 /* Compare the two files already open on `file_desc[0]' and `file_desc[1]',
364 using `buffer[0]' and `buffer[1]'.
365 Return EXIT_SUCCESS if identical, EXIT_FAILURE if different,
371 off_t line_number = 1; /* Line number (1...) of difference. */
372 off_t byte_number = 1; /* Byte number (1...) of difference. */
373 uintmax_t remaining = bytes; /* Remaining number of bytes to compare. */
374 size_t read0, read1; /* Number of bytes read from each file. */
375 size_t first_diff; /* Offset (0...) in buffers of 1st diff. */
376 size_t smaller; /* The lesser of `read0' and `read1'. */
377 word *buffer0 = buffer[0];
378 word *buffer1 = buffer[1];
379 char *buf0 = (char *) buffer0;
380 char *buf1 = (char *) buffer1;
381 int ret = EXIT_SUCCESS;
385 if (comparison_type == type_all_diffs)
387 off_t byte_number_max = MIN (bytes, TYPE_MAXIMUM (off_t));
389 for (f = 0; f < 2; f++)
390 if (S_ISREG (stat_buf[f].st_mode))
392 off_t file_bytes = stat_buf[f].st_size - file_position (f);
393 if (file_bytes < byte_number_max)
394 byte_number_max = file_bytes;
397 for (offset_width = 1; (byte_number_max /= 10) != 0; offset_width++)
401 for (f = 0; f < 2; f++)
403 off_t ig = ignore_initial[f];
404 if (ig && file_position (f) == -1)
406 /* lseek failed; read and discard the ignored initial prefix. */
409 size_t bytes_to_read = MIN (ig, buf_size);
410 size_t r = block_read (file_desc[f], buf0, bytes_to_read);
411 if (r != bytes_to_read)
414 error (EXIT_TROUBLE, errno, "%s", file[f]);
425 size_t bytes_to_read = buf_size;
427 if (remaining != UINTMAX_MAX)
429 if (remaining < bytes_to_read)
430 bytes_to_read = remaining;
431 remaining -= bytes_to_read;
434 read0 = block_read (file_desc[0], buf0, bytes_to_read);
435 if (read0 == SIZE_MAX)
436 error (EXIT_TROUBLE, errno, "%s", file[0]);
437 read1 = block_read (file_desc[1], buf1, bytes_to_read);
438 if (read1 == SIZE_MAX)
439 error (EXIT_TROUBLE, errno, "%s", file[1]);
441 /* Insert sentinels for the block compare. */
443 buf0[read0] = ~buf1[read0];
444 buf1[read1] = ~buf0[read1];
446 /* If the line number should be written for differing files,
447 compare the blocks and count the number of newlines
449 first_diff = (comparison_type == type_first_diff
450 ? block_compare_and_count (buffer0, buffer1, &line_number)
451 : block_compare (buffer0, buffer1));
453 byte_number += first_diff;
454 smaller = MIN (read0, read1);
456 if (first_diff < smaller)
458 switch (comparison_type)
460 case type_first_diff:
462 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
463 char line_buf[INT_BUFSIZE_BOUND (off_t)];
464 char const *byte_num = offtostr (byte_number, byte_buf);
465 char const *line_num = offtostr (line_number, line_buf);
466 if (!opt_print_bytes)
468 /* See POSIX 1003.1-2001 for this format. This
469 message is used only in the POSIX locale, so it
470 need not be translated. */
471 static char const char_message[] =
472 "%s %s differ: char %s, line %s\n";
474 /* The POSIX rationale recommends using the word
475 "byte" outside the POSIX locale. Some gettext
476 implementations translate even in the POSIX
477 locale if certain other environment variables
478 are set, so use "byte" if a translation is
479 available, or if outside the POSIX locale. */
480 static char const byte_msgid[] =
481 N_("%s %s differ: byte %s, line %s\n");
482 char const *byte_message = _(byte_msgid);
483 bool use_byte_message = (byte_message != byte_msgid
484 || hard_locale_LC_MESSAGES);
486 printf (use_byte_message ? byte_message : char_message,
487 file[0], file[1], byte_num, line_num);
491 unsigned char c0 = buf0[first_diff];
492 unsigned char c1 = buf1[first_diff];
497 printf (_("%s %s differ: byte %s, line %s is %3o %s %3o %s\n"),
498 file[0], file[1], byte_num, line_num,
509 unsigned char c0 = buf0[first_diff];
510 unsigned char c1 = buf1[first_diff];
513 char byte_buf[INT_BUFSIZE_BOUND (off_t)];
514 char const *byte_num = offtostr (byte_number, byte_buf);
515 if (!opt_print_bytes)
517 /* See POSIX 1003.1-2001 for this format. */
518 printf ("%*s %3o %3o\n",
519 offset_width, byte_num, c0, c1);
527 printf ("%*s %3o %-4s %3o %s\n",
528 offset_width, byte_num, c0, s0, c1, s1);
534 while (first_diff < smaller);
542 if (comparison_type != type_status)
544 /* See POSIX 1003.1-2001 for this format. */
545 fprintf (stderr, _("cmp: EOF on %s\n"), file[read1 < read0]);
551 while (read0 == buf_size);
556 /* Compare two blocks of memory P0 and P1 until they differ,
557 and count the number of '\n' occurrences in the common
559 If the blocks are not guaranteed to be different, put sentinels at the ends
560 of the blocks before calling this function.
562 Return the offset of the first byte that differs.
563 Increment *COUNT by the count of '\n' occurrences. */
566 block_compare_and_count (word const *p0, word const *p1, off_t *count)
568 word l; /* One word from first buffer. */
569 word const *l0, *l1; /* Pointers into each buffer. */
570 char const *c0, *c1; /* Pointers for finding exact address. */
571 size_t cnt = 0; /* Number of '\n' occurrences. */
572 word nnnn; /* Newline, sizeof (word) times. */
576 for (i = 0; i < sizeof nnnn; i++)
577 nnnn = (nnnn << CHAR_BIT) | '\n';
579 /* Find the rough position of the first difference by reading words,
582 for (l0 = p0, l1 = p1; (l = *l0) == *l1; l0++, l1++)
585 for (i = 0; i < sizeof l; i++)
587 unsigned char uc = l;
593 /* Find the exact differing position (endianness independent). */
595 for (c0 = (char const *) l0, c1 = (char const *) l1;
601 return c0 - (char const *) p0;
604 /* Compare two blocks of memory P0 and P1 until they differ.
605 If the blocks are not guaranteed to be different, put sentinels at the ends
606 of the blocks before calling this function.
608 Return the offset of the first byte that differs. */
611 block_compare (word const *p0, word const *p1)
616 /* Find the rough position of the first difference by reading words,
619 for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++)
622 /* Find the exact differing position (endianness independent). */
624 for (c0 = (char const *) l0, c1 = (char const *) l1;
629 return c0 - (char const *) p0;
632 /* Put into BUF the unsigned char C, making unprintable bytes
633 visible by quoting like cat -t does. */
636 sprintc (char *buf, unsigned char c)
662 /* Position file F to ignore_initial[F] bytes from its initial position,
663 and yield its new position. Don't try more than once. */
666 file_position (int f)
668 static bool positioned[2];
669 static off_t position[2];
673 positioned[f] = true;
674 position[f] = lseek (file_desc[f], ignore_initial[f], SEEK_CUR);