1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */
4 * Copyright (C) Caldera International Inc. 2001-2002.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code and documentation must retain the above
11 * copyright notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement:
17 * This product includes software developed or owned by Caldera
19 * 4. Neither the name of Caldera International, Inc. nor the names of other
20 * contributors may be used to endorse or promote products derived from
21 * this software without specific prior written permission.
23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
37 * Copyright (c) 1991, 1993
38 * The Regents of the University of California. All rights reserved.
40 * Redistribution and use in source and binary forms, with or without
41 * modification, are permitted provided that the following conditions
43 * 1. Redistributions of source code must retain the above copyright
44 * notice, this list of conditions and the following disclaimer.
45 * 2. Redistributions in binary form must reproduce the above copyright
46 * notice, this list of conditions and the following disclaimer in the
47 * documentation and/or other materials provided with the distribution.
48 * 3. Neither the name of the University nor the names of its contributors
49 * may be used to endorse or promote products derived from this software
50 * without specific prior written permission.
52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
77 #include <capsicum_helpers.h>
96 #define _PATH_PR "/usr/bin/pr"
99 * diff - compare two files.
103 * Uses an algorithm due to Harold Stone, which finds
104 * a pair of longest identical subsequences in the two
107 * The major goal is to generate the match vector J.
108 * J[i] is the index of the line in file1 corresponding
109 * to line i file0. J[i] = 0 if there is no
110 * such line in file1.
112 * Lines are hashed so as to work in core. All potential
113 * matches are located by sorting the lines of each file
114 * on the hash (called ``value''). In particular, this
115 * collects the equivalence classes in file1 together.
116 * Subroutine equiv replaces the value of each line in
117 * file0 by the index of the first element of its
118 * matching equivalence in (the reordered) file1.
119 * To save space equiv squeezes file1 into a single
120 * array member in which the equivalence classes
121 * are simply concatenated, except that their first
122 * members are flagged by changing sign.
124 * Next the indices that point into member are unsorted into
125 * array class according to the original order of file0.
127 * The cleverness lies in routine stone. This marches
128 * through the lines of file0, developing a vector klist
129 * of "k-candidates". At step i a k-candidate is a matched
130 * pair of lines x,y (x in file0 y in file1) such that
131 * there is a common subsequence of length k
132 * between the first i lines of file0 and the first y
133 * lines of file1, but there is no such subsequence for
134 * any smaller y. x is the earliest possible mate to y
135 * that occurs in such a subsequence.
137 * Whenever any of the members of the equivalence class of
138 * lines in file1 matable to a line in file0 has serial number
139 * less than the y of some k-candidate, that k-candidate
140 * with the smallest such y is replaced. The new
141 * k-candidate is chained (via pred) to the current
142 * k-1 candidate so that the actual subsequence can
143 * be recovered. When a member has serial number greater
144 * that the y of all k-candidates, the klist is extended.
145 * At the end, the longest subsequence is pulled out
146 * and placed in the array J by unravel
148 * With J in hand, the matches there recorded are
149 * check'ed against reality to assure that no spurious
150 * matches have crept in due to hashing. If they have,
151 * they are broken, and "jackpot" is recorded--a harmless
152 * matter except that a true match for a spuriously
153 * mated line may now be unnecessarily reported as a change.
155 * Much of the complexity of the program comes simply
156 * from trying to minimize core utilization and
157 * maximize the range of doable problems by dynamically
158 * allocating what is needed and reusing what is not.
159 * The core requirements for problems larger than somewhat
160 * are (in words) 2*length(file0) + length(file1) +
161 * 3*(number of k-candidates installed), typically about
162 * 6n words for files of length n.
177 * The following struct is used to record change information when
178 * doing a "context" or "unified" diff. (see routine "change" to
179 * understand the highly mnemonic field names)
182 int a; /* start line in old file */
183 int b; /* end line in old file */
184 int c; /* start line in new file */
185 int d; /* end line in new file */
188 #define diff_output printf
189 static FILE *opentemp(const char *);
190 static void output(char *, FILE *, char *, FILE *, int);
191 static void check(FILE *, FILE *, int);
192 static void range(int, int, const char *);
193 static void uni_range(int, int);
194 static void dump_context_vec(FILE *, FILE *, int);
195 static void dump_unified_vec(FILE *, FILE *, int);
196 static void prepare(int, FILE *, size_t, int);
197 static void prune(void);
198 static void equiv(struct line *, int, struct line *, int, int *);
199 static void unravel(int);
200 static void unsort(struct line *, int, int *);
201 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
202 static void sort(struct line *, int);
203 static void print_header(const char *, const char *);
204 static int ignoreline(char *);
205 static int asciifile(FILE *);
206 static int fetch(long *, int, int, FILE *, int, int, int);
207 static int newcand(int, int, int);
208 static int search(int *, int, int);
209 static int skipline(FILE *);
210 static int isqrt(int);
211 static int stone(int *, int, int *, int *, int);
212 static int readhash(FILE *, int);
213 static int files_differ(FILE *, FILE *, int);
214 static char *match_function(const long *, int, FILE *);
215 static char *preadline(int, size_t, off_t);
217 static int *J; /* will be overlaid on class */
218 static int *class; /* will be overlaid on file[0] */
219 static int *klist; /* will be overlaid on file[0] after class */
220 static int *member; /* will be overlaid on file[1] */
222 static int inifdef; /* whether or not we are in a #ifdef block */
224 static int pref, suff; /* length of prefix and suffix */
226 static int anychange;
227 static long *ixnew; /* will be overlaid on file[1] */
228 static long *ixold; /* will be overlaid on klist */
229 static struct cand *clist; /* merely a free storage pot for candidates */
230 static int clistlen; /* the length of clist */
231 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */
232 static u_char *chrtran; /* translation table for case-folding */
233 static struct context_vec *context_vec_start;
234 static struct context_vec *context_vec_end;
235 static struct context_vec *context_vec_ptr;
237 #define FUNCTION_CONTEXT_SIZE 55
238 static char lastbuf[FUNCTION_CONTEXT_SIZE];
240 static int lastmatchline;
244 * chrtran points to one of 2 translation tables: cup2low if folding upper to
245 * lower case clow2low if not folding case
247 static u_char clow2low[256] = {
248 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
249 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
250 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
251 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
252 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
253 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
254 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
255 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
256 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
257 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
258 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
259 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
260 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
261 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
262 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
263 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
264 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
265 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
266 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
267 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
268 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
269 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
270 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
274 static u_char cup2low[256] = {
275 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
276 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
277 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
278 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
279 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
280 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
281 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
282 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
283 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
284 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
285 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
286 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
287 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
288 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
289 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
290 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
291 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
292 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
293 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
294 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
295 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
296 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
297 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
302 diffreg(char *file1, char *file2, int flags, int capsicum)
309 cap_rights_t rights_ro;
318 context_vec_ptr = context_vec_start - 1;
319 if (flags & D_IGNORECASE)
323 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
324 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
325 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
328 if (flags & D_EMPTY1)
329 f1 = fopen(_PATH_DEVNULL, "r");
331 if (!S_ISREG(stb1.st_mode)) {
332 if ((f1 = opentemp(file1)) == NULL ||
333 fstat(fileno(f1), &stb1) < 0) {
338 } else if (strcmp(file1, "-") == 0)
341 f1 = fopen(file1, "r");
349 if (flags & D_EMPTY2)
350 f2 = fopen(_PATH_DEVNULL, "r");
352 if (!S_ISREG(stb2.st_mode)) {
353 if ((f2 = opentemp(file2)) == NULL ||
354 fstat(fileno(f2), &stb2) < 0) {
359 } else if (strcmp(file2, "-") == 0)
362 f2 = fopen(file2, "r");
371 /* redirect stdout to pr */
376 xasprintf(&header, "%s %s %s", diffargs, file1, file2);
377 signal(SIGPIPE, SIG_IGN);
381 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
385 err(2, "No more processes");
388 if (pfd[0] != STDIN_FILENO) {
389 dup2(pfd[0], STDIN_FILENO);
393 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
398 if (pfd[1] != STDOUT_FILENO) {
399 ostdout = dup(STDOUT_FILENO);
400 dup2(pfd[1], STDOUT_FILENO);
409 e = xmalloc(sizeof(struct kevent));
410 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
412 if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
418 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
419 if (cap_rights_limit(fileno(f1), &rights_ro) < 0
421 err(2, "unable to limit rights on: %s", file1);
422 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
424 err(2, "unable to limit rights on: %s", file2);
425 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
426 /* stding has already been limited */
427 if (caph_limit_stderr() == -1)
428 err(2, "unable to limit stderr");
429 if (caph_limit_stdout() == -1)
430 err(2, "unable to limit stdout");
431 } else if (caph_limit_stdio() == -1)
432 err(2, "unable to limit stdio");
434 caph_cache_catpages();
436 if (cap_enter() < 0 && errno != ENOSYS)
437 err(2, "unable to enter capability mode");
440 switch (files_differ(f1, f2, flags)) {
451 if ((flags & D_FORCEASCII) == 0 &&
452 (!asciifile(f1) || !asciifile(f2))) {
457 prepare(0, f1, stb1.st_size, flags);
458 prepare(1, f2, stb2.st_size, flags);
461 sort(sfile[0], slen[0]);
462 sort(sfile[1], slen[1]);
464 member = (int *)file[1];
465 equiv(sfile[0], slen[0], sfile[1], slen[1], member);
466 member = xreallocarray(member, slen[1] + 2, sizeof(*member));
468 class = (int *)file[0];
469 unsort(sfile[0], slen[0], class);
470 class = xreallocarray(class, slen[0] + 2, sizeof(*class));
472 klist = xcalloc(slen[0] + 2, sizeof(*klist));
475 clist = xcalloc(clistlen, sizeof(*clist));
476 i = stone(class, slen[0], member, klist, flags);
480 J = xreallocarray(J, len[0] + 2, sizeof(*J));
485 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
486 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
487 check(f1, f2, flags);
488 output(file1, f1, file2, f2, flags);
489 if (ostdout != -1 && e != NULL) {
490 /* close the pipe to pr and restore stdout */
494 if (ostdout != STDOUT_FILENO) {
495 close(STDOUT_FILENO);
496 dup2(ostdout, STDOUT_FILENO);
499 if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
503 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
504 errx(2, "pr exited abnormally");
505 else if (WIFSIGNALED(wstatus))
506 errx(2, "pr killed by signal %d",
525 * Check to see if the given files differ.
526 * Returns 0 if they are the same, 1 if different, and -1 on error.
527 * XXX - could use code from cmp(1) [faster]
530 files_differ(FILE *f1, FILE *f2, int flags)
532 char buf1[BUFSIZ], buf2[BUFSIZ];
535 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
536 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
539 i = fread(buf1, 1, sizeof(buf1), f1);
540 j = fread(buf2, 1, sizeof(buf2), f2);
541 if ((!i && ferror(f1)) || (!j && ferror(f2)))
547 if (memcmp(buf1, buf2, i) != 0)
553 opentemp(const char *f)
555 char buf[BUFSIZ], tempfile[PATH_MAX];
559 if (strcmp(f, "-") == 0)
561 else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
564 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
566 if ((ofd = mkstemp(tempfile)) < 0) {
571 while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
572 if (write(ofd, buf, nread) != nread) {
579 lseek(ofd, (off_t)0, SEEK_SET);
580 return (fdopen(ofd, "r"));
584 splice(char *dir, char *path)
589 dirlen = strlen(dir);
590 while (dirlen != 0 && dir[dirlen - 1] == '/')
592 if ((tail = strrchr(path, '/')) == NULL)
596 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
601 prepare(int i, FILE *fd, size_t filesize, int flags)
609 sz = MIN(filesize, SIZE_MAX) / 25;
613 p = xcalloc(sz + 3, sizeof(*p));
614 for (j = 0; (h = readhash(fd, flags));) {
617 p = xreallocarray(p, sz + 3, sizeof(*p));
630 for (pref = 0; pref < len[0] && pref < len[1] &&
631 file[0][pref + 1].value == file[1][pref + 1].value;
634 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
635 file[0][len[0] - suff].value == file[1][len[1] - suff].value;
638 for (j = 0; j < 2; j++) {
639 sfile[j] = file[j] + pref;
640 slen[j] = len[j] - pref - suff;
641 for (i = 0; i <= slen[j]; i++)
642 sfile[j][i].serial = i;
647 equiv(struct line *a, int n, struct line *b, int m, int *c)
652 while (i <= n && j <= m) {
653 if (a[i].value < b[j].value)
655 else if (a[i].value == b[j].value)
666 while (b[j + 1].value == b[j].value) {
674 /* Code taken from ping.c */
683 do { /* newton was a stinker */
688 } while ((x - y) > 1 || (x - y) < -1);
694 stone(int *a, int n, int *b, int *c, int flags)
697 int oldc, tc, oldl, sq;
698 u_int numtries, bound;
700 if (flags & D_MINIMAL)
704 bound = MAX(256, sq);
708 c[0] = newcand(0, 0, 0);
709 for (i = 1; i <= n; i++) {
718 if (y <= clist[oldc].y)
724 if (clist[c[l]].y <= y)
727 c[l] = newcand(i, y, oldc);
732 c[l] = newcand(i, y, oldc);
736 } while ((y = b[++j]) > 0 && numtries < bound);
742 newcand(int x, int y, int pred)
746 if (clen == clistlen) {
747 clistlen = clistlen * 11 / 10;
748 clist = xreallocarray(clist, clistlen, sizeof(*clist));
758 search(int *c, int k, int y)
762 if (clist[c[k]].y < y) /* quick look for typical case */
787 for (i = 0; i <= len[0]; i++)
788 J[i] = i <= pref ? i :
789 i > len[0] - suff ? i + len[1] - len[0] : 0;
790 for (q = clist + p; q->y != 0; q = clist + q->pred)
791 J[q->x + pref] = q->y + pref;
795 * Check does double duty:
796 * 1. ferret out any fortuitous correspondences due
797 * to confounding by hashing (which result in "jackpot")
798 * 2. collect random access indexes to the two files
801 check(FILE *f1, FILE *f2, int flags)
803 int i, j, jackpot, c, d;
809 ixold[0] = ixnew[0] = 0;
812 for (i = 1; i <= len[0]; i++) {
814 ixold[i] = ctold += skipline(f1);
818 ixnew[j] = ctnew += skipline(f2);
821 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
826 * GNU diff ignores a missing newline
827 * in one file for -b or -w.
829 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
830 if (c == EOF && d == '\n') {
833 } else if (c == '\n' && d == EOF) {
840 if (flags & D_STRIPCR) {
842 if ((c = getc(f1)) == '\n') {
848 if ((d = getc(f2)) == '\n') {
854 if ((flags & D_FOLDBLANKS) && isspace(c) &&
860 } while (isspace(c = getc(f1)));
865 } while (isspace(d = getc(f2)));
866 } else if ((flags & D_IGNOREBLANKS)) {
867 while (isspace(c) && c != '\n') {
871 while (isspace(d) && d != '\n') {
876 if (chrtran[c] != chrtran[d]) {
879 if (c != '\n' && c != EOF)
880 ctold += skipline(f1);
881 if (d != '\n' && c != EOF)
882 ctnew += skipline(f2);
885 if (c == '\n' || c == EOF)
892 if ((c = getc(f1)) != (d = getc(f2))) {
895 if (c != '\n' && c != EOF)
896 ctold += skipline(f1);
897 if (d != '\n' && c != EOF)
898 ctnew += skipline(f2);
901 if (c == '\n' || c == EOF)
909 for (; j <= len[1]; j++) {
910 ixnew[j] = ctnew += skipline(f2);
914 * fprintf(stderr, "jackpot\n");
918 /* shellsort CACM #201 */
920 sort(struct line *a, int n)
922 struct line *ai, *aim, w;
927 for (j = 1; j <= n; j *= 2)
929 for (m /= 2; m != 0; m /= 2) {
931 for (j = 1; j <= k; j++) {
932 for (ai = &a[j]; ai > a; ai -= m) {
935 break; /* wraparound */
936 if (aim->value > ai[0].value ||
937 (aim->value == ai[0].value &&
938 aim->serial > ai[0].serial))
940 w.value = ai[0].value;
941 ai[0].value = aim->value;
942 aim->value = w.value;
943 w.serial = ai[0].serial;
944 ai[0].serial = aim->serial;
945 aim->serial = w.serial;
952 unsort(struct line *f, int l, int *b)
956 a = xcalloc(l + 1, sizeof(*a));
957 for (i = 1; i <= l; i++)
958 a[f[i].serial] = f[i].value;
959 for (i = 1; i <= l; i++)
969 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
977 int m, i0, i1, j0, j1;
983 J[m + 1] = len[1] + 1;
984 if (diff_format != D_EDIT) {
985 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
986 while (i0 <= m && J[i0] == J[i0 - 1] + 1)
990 while (i1 < m && J[i1 + 1] == 0)
994 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
997 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
998 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
1002 while (i1 > 1 && J[i1 - 1] == 0)
1006 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1010 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1011 if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
1014 if ((c = getc(f1)) == EOF)
1016 diff_output("%c", c);
1020 if (anychange != 0) {
1021 if (diff_format == D_CONTEXT)
1022 dump_context_vec(f1, f2, flags);
1023 else if (diff_format == D_UNIFIED)
1024 dump_unified_vec(f1, f2, flags);
1029 range(int a, int b, const char *separator)
1031 diff_output("%d", a > b ? b : a);
1033 diff_output("%s%d", separator, b);
1037 uni_range(int a, int b)
1040 diff_output("%d,%d", a, b - a + 1);
1042 diff_output("%d", b);
1044 diff_output("%d,0", b);
1048 preadline(int fd, size_t rlen, off_t off)
1053 line = xmalloc(rlen + 1);
1054 if ((nr = pread(fd, line, rlen, off)) < 0)
1055 err(2, "preadline");
1056 if (nr > 0 && line[nr-1] == '\n')
1063 ignoreline(char *line)
1067 ret = regexec(&ignore_re, line, 0, NULL, 0);
1069 return (ret == 0); /* if it matched, it should be ignored. */
1073 * Indicate that there is a difference between lines a and b of the from file
1074 * to get to lines c to d of the to file. If a is greater then b then there
1075 * are no lines in the from file involved and this means that there were
1076 * lines appended (beginning at b). If c is greater than d then there are
1077 * lines missing from the to file.
1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1083 static size_t max_context = 64;
1089 if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1092 if (ignore_pats != NULL) {
1095 * All lines in the change, insert, or delete must
1096 * match an ignore pattern for the change to be
1099 if (a <= b) { /* Changes and deletes. */
1100 for (i = a; i <= b; i++) {
1101 line = preadline(fileno(f1),
1102 ixold[i] - ixold[i - 1], ixold[i - 1]);
1103 if (!ignoreline(line))
1107 if (a > b || c <= d) { /* Changes and inserts. */
1108 for (i = c; i <= d; i++) {
1109 line = preadline(fileno(f2),
1110 ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1111 if (!ignoreline(line))
1118 if (*pflags & D_HEADER) {
1119 diff_output("%s %s %s\n", diffargs, file1, file2);
1120 *pflags &= ~D_HEADER;
1122 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1124 * Allocate change records as needed.
1126 if (context_vec_ptr == context_vec_end - 1) {
1127 ptrdiff_t offset = context_vec_ptr - context_vec_start;
1129 context_vec_start = xreallocarray(context_vec_start,
1130 max_context, sizeof(*context_vec_start));
1131 context_vec_end = context_vec_start + max_context;
1132 context_vec_ptr = context_vec_start + offset;
1134 if (anychange == 0) {
1136 * Print the context/unidiff header first time through.
1138 print_header(file1, file2);
1140 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1141 c > context_vec_ptr->d + (2 * diff_context) + 1) {
1143 * If this change is more than 'diff_context' lines from the
1144 * previous change, dump the record and reset it.
1146 if (diff_format == D_CONTEXT)
1147 dump_context_vec(f1, f2, *pflags);
1149 dump_unified_vec(f1, f2, *pflags);
1152 context_vec_ptr->a = a;
1153 context_vec_ptr->b = b;
1154 context_vec_ptr->c = c;
1155 context_vec_ptr->d = d;
1160 switch (diff_format) {
1166 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1167 if (diff_format == D_NORMAL)
1172 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1178 diff_output("a%d %d\n", b, d - c + 1);
1180 diff_output("d%d %d\n", a, b - a + 1);
1182 /* add changed lines */
1183 diff_output("a%d %d\n", b, d - c + 1);
1187 if (diff_format == D_GFORMAT) {
1189 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1190 nc = ixold[a > b ? b : a - 1] - curpos;
1191 for (i = 0; i < nc; i++)
1192 diff_output("%c", getc(f1));
1193 for (walk = group_format; *walk != '\0'; walk++) {
1198 fetch(ixold, a, b, f1, '<', 1, *pflags);
1201 fetch(ixnew, c, d, f2, '>', 0, *pflags);
1204 diff_output("%%%c", *walk);
1209 diff_output("%c", *walk);
1212 if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1213 fetch(ixold, a, b, f1, '<', 1, *pflags);
1214 if (a <= b && c <= d && diff_format == D_NORMAL)
1215 diff_output("---\n");
1218 if (diff_format != D_GFORMAT)
1219 f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1220 if (f != 0 && diff_format == D_EDIT) {
1222 * A non-zero return value for D_EDIT indicates that the
1223 * last line printed was a bare dot (".") that has been
1224 * escaped as ".." to prevent ed(1) from misinterpreting
1225 * it. We have to add a substitute command to change this
1226 * back and restart where we left off.
1229 diff_output("%ds/.//\n", a + f - 1);
1235 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1238 diff_output("#endif /* %s */\n", ifdefname);
1244 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1246 int i, j, c, lastc, col, nc;
1250 * When doing #ifdef's, copy down to current line
1251 * if this is the first file, so that stuff makes it to output.
1253 if ((diff_format == D_IFDEF) && oldfile) {
1254 long curpos = ftell(lb);
1255 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1256 nc = f[a > b ? b : a - 1] - curpos;
1257 for (i = 0; i < nc; i++)
1258 diff_output("%c", getc(lb));
1262 if (diff_format == D_IFDEF) {
1264 diff_output("#else /* %s%s */\n",
1265 oldfile == 1 ? "!" : "", ifdefname);
1268 diff_output("#ifndef %s\n", ifdefname);
1270 diff_output("#ifdef %s\n", ifdefname);
1272 inifdef = 1 + oldfile;
1274 for (i = a; i <= b; i++) {
1275 fseek(lb, f[i - 1], SEEK_SET);
1276 nc = f[i] - f[i - 1];
1277 if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) &&
1279 diff_output("%c", ch);
1280 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1281 || diff_format == D_UNIFIED))
1283 else if (diff_format != D_UNIFIED)
1287 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1288 if ((c = getc(lb)) == EOF) {
1289 if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1290 diff_format == D_NREVERSE)
1291 warnx("No newline at end of file");
1293 diff_output("\n\\ No newline at end of "
1297 if (c == '\t' && (flags & D_EXPANDTABS)) {
1298 newcol = ((col/tabsize)+1)*tabsize;
1301 } while (++col < newcol);
1303 if (diff_format == D_EDIT && j == 1 && c == '\n'
1306 * Don't print a bare "." line
1307 * since that will confuse ed(1).
1308 * Print ".." instead and return,
1309 * giving the caller an offset
1310 * from which to restart.
1315 diff_output("%c", c);
1324 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1327 readhash(FILE *f, int flags)
1334 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1335 if (flags & D_IGNORECASE)
1336 for (i = 0; (t = getc(f)) != '\n'; i++) {
1337 if (flags & D_STRIPCR && t == '\r') {
1348 sum = sum * 127 + chrtran[t];
1351 for (i = 0; (t = getc(f)) != '\n'; i++) {
1352 if (flags & D_STRIPCR && t == '\r') {
1363 sum = sum * 127 + t;
1367 switch (t = getc(f)) {
1376 if (space && (flags & D_IGNOREBLANKS) == 0) {
1380 sum = sum * 127 + chrtran[t];
1394 * There is a remote possibility that we end up with a zero sum.
1395 * Zero is used as an EOF marker, so return 1 instead.
1397 return (sum == 0 ? 1 : sum);
1403 unsigned char buf[BUFSIZ];
1410 cnt = fread(buf, 1, sizeof(buf), f);
1411 return (memchr(buf, '\0', cnt) == NULL);
1414 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1417 match_function(const long *f, int pos, FILE *fp)
1419 unsigned char buf[FUNCTION_CONTEXT_SIZE];
1421 int last = lastline;
1422 const char *state = NULL;
1425 while (pos > last) {
1426 fseek(fp, f[pos - 1], SEEK_SET);
1427 nc = f[pos] - f[pos - 1];
1428 if (nc >= sizeof(buf))
1429 nc = sizeof(buf) - 1;
1430 nc = fread(buf, 1, nc, fp);
1433 buf[strcspn(buf, "\n")] = '\0';
1434 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1435 if (begins_with(buf, "private:")) {
1437 state = " (private)";
1438 } else if (begins_with(buf, "protected:")) {
1440 state = " (protected)";
1441 } else if (begins_with(buf, "public:")) {
1443 state = " (public)";
1445 strlcpy(lastbuf, buf, sizeof lastbuf);
1447 strlcat(lastbuf, state,
1449 lastmatchline = pos;
1456 return lastmatchline > 0 ? lastbuf : NULL;
1459 /* dump accumulated "context" diff changes */
1461 dump_context_vec(FILE *f1, FILE *f2, int flags)
1463 struct context_vec *cvp = context_vec_start;
1464 int lowa, upb, lowc, upd, do_output;
1468 if (context_vec_start > context_vec_ptr)
1471 b = d = 0; /* gcc */
1472 lowa = MAX(1, cvp->a - diff_context);
1473 upb = MIN(len[0], context_vec_ptr->b + diff_context);
1474 lowc = MAX(1, cvp->c - diff_context);
1475 upd = MIN(len[1], context_vec_ptr->d + diff_context);
1477 diff_output("***************");
1478 if ((flags & D_PROTOTYPE)) {
1479 f = match_function(ixold, lowa-1, f1);
1481 diff_output(" %s", f);
1483 diff_output("\n*** ");
1484 range(lowa, upb, ",");
1485 diff_output(" ****\n");
1488 * Output changes to the "old" file. The first loop suppresses
1489 * output if there were no changes to the "old" file (we'll see
1490 * the "old" lines as context in the "new" list).
1493 for (; cvp <= context_vec_ptr; cvp++)
1494 if (cvp->a <= cvp->b) {
1495 cvp = context_vec_start;
1500 while (cvp <= context_vec_ptr) {
1506 if (a <= b && c <= d)
1509 ch = (a <= b) ? 'd' : 'a';
1512 fetch(ixold, lowa, b, f1, ' ', 0, flags);
1514 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515 fetch(ixold, a, b, f1,
1516 ch == 'c' ? '!' : '-', 0, flags);
1521 fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1523 /* output changes to the "new" file */
1524 diff_output("--- ");
1525 range(lowc, upd, ",");
1526 diff_output(" ----\n");
1529 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1530 if (cvp->c <= cvp->d) {
1531 cvp = context_vec_start;
1536 while (cvp <= context_vec_ptr) {
1542 if (a <= b && c <= d)
1545 ch = (a <= b) ? 'd' : 'a';
1548 fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1550 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1551 fetch(ixnew, c, d, f2,
1552 ch == 'c' ? '!' : '+', 0, flags);
1557 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1559 context_vec_ptr = context_vec_start - 1;
1562 /* dump accumulated "unified" diff changes */
1564 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1566 struct context_vec *cvp = context_vec_start;
1567 int lowa, upb, lowc, upd;
1571 if (context_vec_start > context_vec_ptr)
1574 b = d = 0; /* gcc */
1575 lowa = MAX(1, cvp->a - diff_context);
1576 upb = MIN(len[0], context_vec_ptr->b + diff_context);
1577 lowc = MAX(1, cvp->c - diff_context);
1578 upd = MIN(len[1], context_vec_ptr->d + diff_context);
1580 diff_output("@@ -");
1581 uni_range(lowa, upb);
1583 uni_range(lowc, upd);
1585 if ((flags & D_PROTOTYPE)) {
1586 f = match_function(ixold, lowa-1, f1);
1588 diff_output(" %s", f);
1593 * Output changes in "unified" diff format--the old and new lines
1594 * are printed together.
1596 for (; cvp <= context_vec_ptr; cvp++) {
1603 * c: both new and old changes
1604 * d: only changes in the old file
1605 * a: only changes in the new file
1607 if (a <= b && c <= d)
1610 ch = (a <= b) ? 'd' : 'a';
1614 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1615 fetch(ixold, a, b, f1, '-', 0, flags);
1616 fetch(ixnew, c, d, f2, '+', 0, flags);
1619 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1620 fetch(ixold, a, b, f1, '-', 0, flags);
1623 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1624 fetch(ixnew, c, d, f2, '+', 0, flags);
1630 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1632 context_vec_ptr = context_vec_start - 1;
1636 print_header(const char *file1, const char *file2)
1638 const char *time_format;
1643 struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1644 int nsec1 = stb1.st_mtim.tv_nsec;
1645 int nsec2 = stb2.st_mtim.tv_nsec;
1647 time_format = "%Y-%m-%d %H:%M:%S";
1651 tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1652 tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1653 strftime(buf1, 256, time_format, tm_ptr1);
1654 strftime(buf2, 256, time_format, tm_ptr2);
1656 strftime(end1, 10, "%z", tm_ptr1);
1657 strftime(end2, 10, "%z", tm_ptr2);
1658 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1659 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1661 if (label[0] != NULL)
1662 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1665 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1667 if (label[1] != NULL)
1668 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1671 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",