1 /* $OpenBSD: sdiff.c,v 1.36 2015/12/29 19:04:46 gsoares Exp $ */
4 * Written by Raymond Lai <ray@cyth.net>.
11 #include <sys/param.h>
12 #include <sys/queue.h>
14 #include <sys/types.h>
32 #define DIFF_PATH "/usr/bin/diff"
36 * Each column must be at least one character wide, plus three
37 * characters between the columns (space, [<|>], space).
41 /* 3 kilobytes of chars */
44 /* A single diff line. */
46 STAILQ_ENTRY(diffline) diffentries;
52 static void astrcat(char **, const char *);
53 static void enqueue(char *, char, char *);
54 static char *mktmpcpy(const char *);
55 static int istextfile(FILE *);
56 static void binexec(char *, char *, char *) __dead2;
57 static void freediff(struct diffline *);
58 static void int_usage(void);
59 static int parsecmd(FILE *, FILE *, FILE *);
60 static void printa(FILE *, size_t);
61 static void printc(FILE *, size_t, FILE *, size_t);
62 static void printcol(const char *, size_t *, const size_t);
63 static void printd(FILE *, size_t);
64 static void println(const char *, const char, const char *);
65 static void processq(void);
66 static void prompt(const char *, const char *);
67 static void usage(void) __dead2;
68 static char *xfgets(FILE *);
70 static STAILQ_HEAD(, diffline) diffhead = STAILQ_HEAD_INITIALIZER(diffhead);
71 static size_t line_width; /* width of a line (two columns and divider) */
72 static size_t width; /* width of each column */
73 static size_t file1ln, file2ln; /* line number of file1 and file2 */
74 static int Iflag = 0; /* ignore sets matching regexp */
75 static int lflag; /* print only left column for identical lines */
76 static int sflag; /* skip identical lines */
77 FILE *outfp; /* file to save changes to */
78 const char *tmpdir; /* TMPDIR or /tmp */
81 HELP_OPT = CHAR_MAX + 1,
93 /* the following groupings must be in sequence */
101 /* end order-sensitive enums */
110 static struct option longopts[] = {
111 /* options only processed in sdiff */
112 { "left-column", no_argument, NULL, LEFTC_OPT },
113 { "suppress-common-lines", no_argument, NULL, 's' },
114 { "width", required_argument, NULL, 'w' },
116 { "output", required_argument, NULL, 'o' },
117 { "diff-program", required_argument, NULL, DIFFPROG_OPT },
119 /* Options processed by diff. */
120 { "ignore-file-name-case", no_argument, NULL, FCASE_IGNORE_OPT },
121 { "no-ignore-file-name-case", no_argument, NULL, FCASE_SENSITIVE_OPT },
122 { "strip-trailing-cr", no_argument, NULL, STRIPCR_OPT },
123 { "tabsize", required_argument, NULL, TSIZE_OPT },
124 { "help", no_argument, NULL, HELP_OPT },
125 { "text", no_argument, NULL, 'a' },
126 { "ignore-blank-lines", no_argument, NULL, 'B' },
127 { "ignore-space-change", no_argument, NULL, 'b' },
128 { "minimal", no_argument, NULL, 'd' },
129 { "ignore-tab-expansion", no_argument, NULL, 'E' },
130 { "ignore-matching-lines", required_argument, NULL, 'I' },
131 { "ignore-case", no_argument, NULL, 'i' },
132 { "expand-tabs", no_argument, NULL, 't' },
133 { "speed-large-files", no_argument, NULL, 'H' },
134 { "ignore-all-space", no_argument, NULL, 'W' },
136 { NULL, 0, NULL, '\0'}
139 static const char *help_msg[] = {
140 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1 file2\n",
141 "-l, --left-column: only print the left column for identical lines.",
142 "-o OUTFILE, --output=OUTFILE: interactively merge file1 and file2 into outfile.",
143 "-s, --suppress-common-lines: skip identical lines.",
144 "-w WIDTH, --width=WIDTH: print a maximum of WIDTH characters on each line.",
146 "Options passed to diff(1) are:",
147 "\t-a, --text: treat file1 and file2 as text files.",
148 "\t-b, --ignore-trailing-cr: ignore trailing blank spaces.",
149 "\t-d, --minimal: minimize diff size.",
150 "\t-I RE, --ignore-matching-lines=RE: ignore changes whose line matches RE.",
151 "\t-i, --ignore-case: do a case-insensitive comparison.",
152 "\t-t, --expand-tabs: sxpand tabs to spaces.",
153 "\t-W, --ignore-all-spaces: ignore all spaces.",
154 "\t--speed-large-files: assume large file with scattered changes.",
155 "\t--strip-trailing-cr: strip trailing carriage return.",
156 "\t--ignore-file-name-case: ignore case of file names.",
157 "\t--no-ignore-file-name-case: do not ignore file name case",
158 "\t--tabsize NUM: change size of tabs (default 8.)",
164 * Create temporary file if source_file is not a regular file.
165 * Returns temporary file name if one was malloced, NULL if unnecessary.
168 mktmpcpy(const char *source_file)
176 /* Open input and output. */
177 ifd = open(source_file, O_RDONLY, 0);
178 /* File was opened successfully. */
180 if (fstat(ifd, &sb) == -1)
181 err(2, "error getting file status from %s", source_file);
184 if (S_ISREG(sb.st_mode)) {
189 /* If ``-'' does not exist the user meant stdin. */
190 if (errno == ENOENT && strcmp(source_file, "-") == 0)
193 err(2, "error opening %s", source_file);
196 /* Not a regular file, so copy input into temporary file. */
197 if (asprintf(&target_file, "%s/sdiff.XXXXXXXXXX", tmpdir) == -1)
199 if ((ofd = mkstemp(target_file)) == -1) {
200 warn("error opening %s", target_file);
203 while ((rcount = read(ifd, buf, sizeof(buf))) != -1 &&
207 wcount = write(ofd, buf, (size_t)rcount);
208 if (-1 == wcount || rcount != wcount) {
209 warn("error writing to %s", target_file);
214 warn("error reading from %s", source_file);
221 return (target_file);
229 main(int argc, char **argv)
231 FILE *diffpipe=NULL, *file1, *file2;
232 size_t diffargc = 0, wflag = WIDTH;
233 int ch, fd[2] = {-1}, status;
235 const char *outfile = NULL;
237 char **diffargv, *diffprog = DIFF_PATH, *filename1, *filename2,
238 *tmp1, *tmp2, *s1, *s2;
242 * Process diff flags.
245 * Allocate memory for diff arguments and NULL.
246 * Each flag has at most one argument, so doubling argc gives an
247 * upper limit of how many diff args can be passed. argv[0],
248 * file1, and file2 won't have arguments so doubling them will
249 * waste some memory; however we need an extra space for the
250 * NULL at the end, so it sort of works out.
252 if (!(diffargv = calloc(argc, sizeof(char **) * 2)))
255 /* Add first argument, the program name. */
256 diffargv[diffargc++] = diffprog;
258 /* create a dynamic string for merging single-switch options */
259 if ( asprintf(&diffargv[diffargc++], "-") < 0 )
262 while ((ch = getopt_long(argc, argv, "aBbdEHI:ilo:stWw:",
263 longopts, NULL)) != -1) {
267 /* only compatible --long-name-form with diff */
268 case FCASE_IGNORE_OPT:
269 case FCASE_SENSITIVE_OPT:
274 /* combine no-arg single switches */
284 for(popt = longopts; ch != popt->val && popt->name != NULL; popt++);
285 diffargv[1] = realloc(diffargv[1], sizeof(char) * strlen(diffargv[1]) + 2);
287 * In diff, the 'W' option is 'w' and the 'w' is 'W'.
290 sprintf(diffargv[1], "%sw", diffargv[1]);
292 sprintf(diffargv[1], "%s%c", diffargv[1], ch);
295 diffargv[0] = diffprog = optarg;
299 diffargv[diffargc++] = "-I";
300 diffargv[diffargc++] = optarg;
312 wflag = strtonum(optarg, WIDTH_MIN,
315 errx(2, "width is %s: %s", errstr, optarg);
318 for (i = 0; help_msg[i] != NULL; i++)
319 printf("%s\n", help_msg[i]);
328 /* no single switches were used */
329 if (strcmp(diffargv[1], "-") == 0 ) {
330 for ( i = 1; i < argc-1; i++) {
331 diffargv[i] = diffargv[i+1];
333 diffargv[diffargc-1] = NULL;
343 if (outfile && (outfp = fopen(outfile, "w")) == NULL)
344 err(2, "could not open: %s", optarg);
346 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
353 * Create temporary files for diff and sdiff to share if file1
354 * or file2 are not regular files. This allows sdiff and diff
355 * to read the same inputs if one or both inputs are stdin.
357 * If any temporary files were created, their names would be
358 * saved in tmp1 or tmp2. tmp1 should never equal tmp2.
361 /* file1 and file2 are the same, so copy to same temp file. */
362 if (strcmp(filename1, filename2) == 0) {
363 if ((tmp1 = mktmpcpy(filename1)))
364 filename1 = filename2 = tmp1;
365 /* Copy file1 and file2 into separate temp files. */
367 if ((tmp1 = mktmpcpy(filename1)))
369 if ((tmp2 = mktmpcpy(filename2)))
373 diffargv[diffargc++] = filename1;
374 diffargv[diffargc++] = filename2;
375 /* Add NULL to end of array to indicate end of array. */
376 diffargv[diffargc++] = NULL;
378 /* Subtract column divider and divide by two. */
379 width = (wflag - 3) / 2;
380 /* Make sure line_width can fit in size_t. */
381 if (width > (SIZE_MAX - 3) / 2)
382 errx(2, "width is too large: %zu", width);
383 line_width = width * 2 + 3;
388 switch (pid = fork()) {
391 /* We don't read from the pipe. */
393 if (dup2(fd[1], STDOUT_FILENO) == -1)
394 err(2, "child could not duplicate descriptor");
395 /* Free unused descriptor. */
397 execvp(diffprog, diffargv);
398 err(2, "could not execute diff: %s", diffprog);
401 err(2, "could not fork");
406 /* We don't write to the pipe. */
409 /* Open pipe to diff command. */
410 if ((diffpipe = fdopen(fd[0], "r")) == NULL)
411 err(2, "could not open diff pipe");
413 if ((file1 = fopen(filename1, "r")) == NULL)
414 err(2, "could not open %s", filename1);
415 if ((file2 = fopen(filename2, "r")) == NULL)
416 err(2, "could not open %s", filename2);
417 if (!istextfile(file1) || !istextfile(file2)) {
418 /* Close open files and pipe, delete temps */
421 if (diffpipe != NULL)
425 warn("Error deleting %s.", tmp1);
428 warn("Error deleting %s.", tmp2);
431 binexec(diffprog, filename1, filename2);
433 /* Line numbers start at one. */
434 file1ln = file2ln = 1;
436 /* Read and parse diff output. */
437 while (parsecmd(diffpipe, file1, file2) != EOF)
441 /* Wait for diff to exit. */
442 if (waitpid(pid, &status, 0) == -1 || !WIFEXITED(status) ||
443 WEXITSTATUS(status) >= 2)
444 err(2, "diff exited abnormally.");
446 /* Delete and free unneeded temporary files. */
449 warn("Error deleting %s.", tmp1);
452 warn("Error deleting %s.", tmp2);
455 filename1 = filename2 = tmp1 = tmp2 = NULL;
457 /* No more diffs, so print common lines. */
459 while ((s1 = xfgets(file1)))
460 enqueue(s1, ' ', NULL);
466 enqueue(s1, ' ', s2);
472 /* Process unmodified lines. */
475 /* Return diff exit status. */
476 return (WEXITSTATUS(status));
480 * When sdiff detects a binary file as input, executes them with
481 * diff to maintain the same behavior as GNU sdiff with binary input.
484 binexec(char *diffprog, char *f1, char *f2)
487 char *args[] = {diffprog, f1, f2, (char *) 0};
488 execv(diffprog, args);
490 /* If execv() fails, sdiff's execution will continue below. */
491 errx(1, "could not execute diff process");
495 * Checks whether a file appears to be a text file.
505 for (i = 0; i <= MAX_CHECK; i++) {
519 * Prints an individual column (left or right), taking into account
520 * that tabs are variable-width. Takes a string, the current column
521 * the cursor is on the screen, and the maximum value of the column.
522 * The column value is updated as we go along.
525 printcol(const char *s, size_t *col, const size_t col_max)
528 for (; *s && *col < col_max; ++s) {
534 * If rounding to next multiple of eight causes
535 * an integer overflow, just return.
537 if (*col > SIZE_MAX - 8)
540 /* Round to next multiple of eight. */
541 new_col = (*col / 8 + 1) * 8;
544 * If printing the tab goes past the column
545 * width, don't print it and just quit.
547 if (new_col > col_max)
559 * Prompts user to either choose between two strings or edit one, both,
563 prompt(const char *s1, const char *s2)
567 /* Print command prompt. */
570 /* Get user input. */
571 for (; (cmd = xfgets(stdin)); free(cmd)) {
574 /* Skip leading whitespace. */
575 for (p = cmd; isspace(*p); ++p)
581 if (eparse(p, s1, s2) == -1)
586 /* Choose left column as-is. */
588 fprintf(outfp, "%s\n", s1);
589 /* End of command parsing. */
595 /* Choose right column as-is. */
597 fprintf(outfp, "%s\n", s2);
598 /* End of command parsing. */
607 /* Interactive usage help. */
613 /* Prompt user again. */
621 * If there was no error, we received an EOF from stdin, so we
630 * Takes two strings, separated by a column divider. NULL strings are
631 * treated as empty columns. If the divider is the ` ' character, the
632 * second column is not printed (-l flag). In this case, the second
633 * string must be NULL. When the second column is NULL, the divider
634 * does not print the trailing space following the divider character.
636 * Takes into account that tabs can take multiple columns.
639 println(const char *s1, const char div, const char *s2)
643 /* Print first column. Skips if s1 == NULL. */
646 /* Skip angle bracket and space. */
647 printcol(s1, &col, width);
651 /* Otherwise, we pad this column up to width. */
652 for (; col < width; ++col)
655 /* Only print left column. */
656 if (div == ' ' && !s2) {
662 * Print column divider. If there is no second column, we don't
663 * need to add the space for padding.
666 printf(" %c\n", div);
672 /* Skip angle bracket and space. */
673 printcol(s2, &col, line_width);
679 * Reads a line from file and returns as a string. If EOF is reached,
680 * NULL is returned. The returned string must be freed afterwards.
693 if ((l = getline(&s, &linecap, file)) == -1) {
695 err(2, "error reading file");
706 * Parse ed commands from diffpipe and print lines from file1 (lines
707 * to change or delete) or file2 (lines to add or change).
711 parsecmd(FILE *diffpipe, FILE *file1, FILE *file2)
713 size_t file1start, file1end, file2start, file2end, n;
714 /* ed command line and pointer to characters in line */
719 /* Read ed command. */
720 if (!(line = xfgets(diffpipe)))
724 /* Go to character after line number. */
729 file1start = strtonum(line, 0, INT_MAX, &errstr);
731 errx(2, "file1 start is %s: %s", errstr, line);
733 /* A range is specified for file1. */
736 /* Go to character after file2end. */
741 file1end = strtonum(q, 0, INT_MAX, &errstr);
743 errx(2, "file1 end is %s: %s", errstr, line);
744 if (file1start > file1end)
745 errx(2, "invalid line range in file1: %s", line);
747 file1end = file1start;
750 /* Check that cmd is valid. */
751 if (!(cmd == 'a' || cmd == 'c' || cmd == 'd'))
752 errx(2, "ed command not recognized: %c: %s", cmd, line);
755 /* Go to character after line number. */
760 file2start = strtonum(q, 0, INT_MAX, &errstr);
762 errx(2, "file2 start is %s: %s", errstr, line);
765 * There should either be a comma signifying a second line
766 * number or the line should just end here.
768 if (c != ',' && c != '\0')
769 errx(2, "invalid line range in file2: %c: %s", c, line);
773 file2end = strtonum(p, 0, INT_MAX, &errstr);
775 errx(2, "file2 end is %s: %s", errstr, line);
776 if (file2start >= file2end)
777 errx(2, "invalid line range in file2: %s", line);
779 file2end = file2start;
781 /* Appends happen _after_ stated line. */
783 if (file1start != file1end)
784 errx(2, "append cannot have a file1 range: %s",
786 if (file1start == SIZE_MAX)
787 errx(2, "file1 line range too high: %s", line);
788 file1start = ++file1end;
791 * I'm not sure what the deal is with the line numbers for
794 else if (cmd == 'd') {
795 if (file2start != file2end)
796 errx(2, "delete cannot have a file2 range: %s",
798 if (file2start == SIZE_MAX)
799 errx(2, "file2 line range too high: %s", line);
800 file2start = ++file2end;
804 * Continue reading file1 and file2 until we reach line numbers
805 * specified by diff. Should only happen with -I flag.
807 for (; file1ln < file1start && file2ln < file2start;
808 ++file1ln, ++file2ln) {
811 if (!(s1 = xfgets(file1)))
812 errx(2, "file1 shorter than expected");
813 if (!(s2 = xfgets(file2)))
814 errx(2, "file2 shorter than expected");
816 /* If the -l flag was specified, print only left column. */
820 * XXX - If -l and -I are both specified, all
821 * unchanged or ignored lines are shown with a
822 * `(' divider. This matches GNU sdiff, but I
823 * believe it is a bug. Just check out:
824 * gsdiff -l -I '^$' samefile samefile.
827 enqueue(s1, '(', NULL);
829 enqueue(s1, ' ', NULL);
831 enqueue(s1, ' ', s2);
833 /* Ignore deleted lines. */
834 for (; file1ln < file1start; ++file1ln) {
837 if (!(s = xfgets(file1)))
838 errx(2, "file1 shorter than expected");
840 enqueue(s, '(', NULL);
842 /* Ignore added lines. */
843 for (; file2ln < file2start; ++file2ln) {
846 if (!(s = xfgets(file2)))
847 errx(2, "file2 shorter than expected");
849 /* If -l flag was given, don't print right column. */
853 enqueue(NULL, ')', s);
856 /* Process unmodified or skipped lines. */
861 printa(file2, file2end);
862 n = file2end - file2start + 1;
865 printc(file1, file1end, file2, file2end);
866 n = file1end - file1start + 1 + 1 + file2end - file2start + 1;
869 printd(file1, file1end);
870 n = file1end - file1start + 1;
873 errx(2, "invalid diff command: %c: %s", cmd, line);
877 /* Skip to next ed line. */
879 if (!(line = xfgets(diffpipe)))
880 errx(2, "diff ended early");
888 * Queues up a diff line.
891 enqueue(char *left, char div, char *right)
893 struct diffline *diffp;
895 if (!(diffp = malloc(sizeof(struct diffline))))
899 diffp->right = right;
900 STAILQ_INSERT_TAIL(&diffhead, diffp, diffentries);
904 * Free a diffline structure and its elements.
907 freediff(struct diffline *diffp)
916 * Append second string into first. Repeated appends to the same string
917 * are cached, making this an O(n) function, where n = strlen(append).
920 astrcat(char **s, const char *append)
922 /* Length of string in previous run. */
923 static size_t offset = 0;
926 * String from previous run. Compared to *s to see if we are
927 * dealing with the same string. If so, we can use offset.
929 static const char *oldstr = NULL;
933 * First string is NULL, so just copy append.
936 if (!(*s = strdup(append)))
939 /* Keep track of string. */
947 * *s is a string so concatenate.
950 /* Did we process the same string in the last run? */
952 * If this is a different string from the one we just processed
960 /* Size = strlen(*s) + \n + strlen(append) + '\0'. */
961 newsiz = offset + 1 + strlen(append) + 1;
963 /* Resize *s to fit new string. */
964 newstr = realloc(*s, newsiz);
969 /* *s + offset should be end of string. */
971 strlcpy(*s + offset, "\n", newsiz - offset);
972 strlcat(*s + offset, append, newsiz - offset);
974 /* New string length should be exactly newsiz - 1 characters. */
975 /* Store generated string's values. */
981 * Process diff set queue, printing, prompting, and saving each diff
982 * line stored in queue.
987 struct diffline *diffp;
988 char divc, *left, *right;
990 /* Don't process empty queue. */
991 if (STAILQ_EMPTY(&diffhead))
994 /* Remember the divider. */
995 divc = STAILQ_FIRST(&diffhead)->div;
1000 * Go through set of diffs, concatenating each line in left or
1001 * right column into two long strings, `left' and `right'.
1003 STAILQ_FOREACH(diffp, &diffhead, diffentries) {
1005 * Print changed lines if -s was given,
1006 * print all lines if -s was not given.
1008 if (!sflag || diffp->div == '|' || diffp->div == '<' ||
1010 println(diffp->left, diffp->div, diffp->right);
1012 /* Append new lines to diff set. */
1014 astrcat(&left, diffp->left);
1016 astrcat(&right, diffp->right);
1019 /* Empty queue and free each diff line and its elements. */
1020 while (!STAILQ_EMPTY(&diffhead)) {
1021 diffp = STAILQ_FIRST(&diffhead);
1022 STAILQ_REMOVE_HEAD(&diffhead, diffentries);
1026 /* Write to outfp, prompting user if lines are different. */
1029 case ' ': case '(': case ')':
1030 fprintf(outfp, "%s\n", left);
1032 case '|': case '<': case '>':
1033 prompt(left, right);
1036 errx(2, "invalid divider: %c", divc);
1039 /* Free left and right. */
1045 * Print lines following an (a)ppend command.
1048 printa(FILE *file, size_t line2)
1052 for (; file2ln <= line2; ++file2ln) {
1053 if (!(line = xfgets(file)))
1054 errx(2, "append ended early");
1055 enqueue(NULL, '>', line);
1061 * Print lines following a (c)hange command, from file1ln to file1end
1062 * and from file2ln to file2end.
1065 printc(FILE *file1, size_t file1end, FILE *file2, size_t file2end)
1068 STAILQ_ENTRY(fileline) fileentries;
1071 STAILQ_HEAD(, fileline) delqhead = STAILQ_HEAD_INITIALIZER(delqhead);
1073 /* Read lines to be deleted. */
1074 for (; file1ln <= file1end; ++file1ln) {
1075 struct fileline *linep;
1078 /* Read lines from both. */
1079 if (!(line1 = xfgets(file1)))
1080 errx(2, "error reading file1 in delete in change");
1082 /* Add to delete queue. */
1083 if (!(linep = malloc(sizeof(struct fileline))))
1085 linep->line = line1;
1086 STAILQ_INSERT_TAIL(&delqhead, linep, fileentries);
1089 /* Process changed lines.. */
1090 for (; !STAILQ_EMPTY(&delqhead) && file2ln <= file2end;
1092 struct fileline *del;
1096 if (!(add = xfgets(file2)))
1097 errx(2, "error reading add in change");
1099 del = STAILQ_FIRST(&delqhead);
1100 enqueue(del->line, '|', add);
1101 STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1103 * Free fileline structure but not its elements since
1104 * they are queued up.
1110 /* Process remaining lines to add. */
1111 for (; file2ln <= file2end; ++file2ln) {
1115 if (!(add = xfgets(file2)))
1116 errx(2, "error reading add in change");
1118 enqueue(NULL, '>', add);
1122 /* Process remaining lines to delete. */
1123 while (!STAILQ_EMPTY(&delqhead)) {
1124 struct fileline *filep;
1126 filep = STAILQ_FIRST(&delqhead);
1127 enqueue(filep->line, '<', NULL);
1128 STAILQ_REMOVE_HEAD(&delqhead, fileentries);
1135 * Print deleted lines from file, from file1ln to file1end.
1138 printd(FILE *file1, size_t file1end)
1142 /* Print out lines file1ln to line2. */
1143 for (; file1ln <= file1end; ++file1ln) {
1144 if (!(line1 = xfgets(file1)))
1145 errx(2, "file1 ended early in delete");
1146 enqueue(line1, '<', NULL);
1152 * Interactive mode usage.
1158 puts("e:\tedit blank diff\n"
1159 "eb:\tedit both diffs concatenated\n"
1160 "el:\tedit left diff\n"
1161 "er:\tedit right diff\n"
1162 "l | 1:\tchoose left diff\n"
1163 "r | 2:\tchoose right diff\n"
1164 "s:\tsilent mode--don't print identical lines\n"
1165 "v:\tverbose mode--print identical lines\n"
1174 "usage: sdiff [-abdilstW] [-I regexp] [-o outfile] [-w width] file1"