usr.bin/diff/diffreg.c

   1 /*      $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $     */
   2
   3 /*
   4  * Copyright (C) Caldera International Inc.  2001-2002.
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code and documentation must retain the above
  11  *    copyright notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. All advertising materials mentioning features or use of this software
  16  *    must display the following acknowledgement:
  17  *      This product includes software developed or owned by Caldera
  18  *      International, Inc.
  19  * 4. Neither the name of Caldera International, Inc. nor the names of other
  20  *    contributors may be used to endorse or promote products derived from
  21  *    this software without specific prior written permission.
  22  *
  23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
  24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
  25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
  28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  34  * POSSIBILITY OF SUCH DAMAGE.
  35  */
  36 /*-
  37  * Copyright (c) 1991, 1993
  38  *      The Regents of the University of California.  All rights reserved.
  39  *
  40  * Redistribution and use in source and binary forms, with or without
  41  * modification, are permitted provided that the following conditions
  42  * are met:
  43  * 1. Redistributions of source code must retain the above copyright
  44  *    notice, this list of conditions and the following disclaimer.
  45  * 2. Redistributions in binary form must reproduce the above copyright
  46  *    notice, this list of conditions and the following disclaimer in the
  47  *    documentation and/or other materials provided with the distribution.
  48  * 3. Neither the name of the University nor the names of its contributors
  49  *    may be used to endorse or promote products derived from this software
  50  *    without specific prior written permission.
  51  *
  52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  62  * SUCH DAMAGE.
  63  *
  64  *      @(#)diffreg.c   8.1 (Berkeley) 6/6/93
  65  */
  66
  67 #include <sys/cdefs.h>
  68 __FBSDID("$FreeBSD$");
  69
  70 #include <sys/capsicum.h>
  71 #include <sys/procdesc.h>
  72 #include <sys/stat.h>
  73 #include <sys/types.h>
  74 #include <sys/event.h>
  75 #include <sys/wait.h>
  76
  77 #include <capsicum_helpers.h>
  78 #include <ctype.h>
  79 #include <err.h>
  80 #include <errno.h>
  81 #include <fcntl.h>
  82 #include <paths.h>
  83 #include <regex.h>
  84 #include <stddef.h>
  85 #include <stdint.h>
  86 #include <stdio.h>
  87 #include <stdlib.h>
  88 #include <string.h>
  89 #include <unistd.h>
  90 #include <limits.h>
  91 #include <signal.h>
  92
  93 #include "diff.h"
  94 #include "xmalloc.h"
  95
  96 #define _PATH_PR "/usr/bin/pr"
  97
  98 /*
  99  * diff - compare two files.
 100  */
 101
 102 /*
 103  *      Uses an algorithm due to Harold Stone, which finds
 104  *      a pair of longest identical subsequences in the two
 105  *      files.
 106  *
 107  *      The major goal is to generate the match vector J.
 108  *      J[i] is the index of the line in file1 corresponding
 109  *      to line i file0. J[i] = 0 if there is no
 110  *      such line in file1.
 111  *
 112  *      Lines are hashed so as to work in core. All potential
 113  *      matches are located by sorting the lines of each file
 114  *      on the hash (called ``value''). In particular, this
 115  *      collects the equivalence classes in file1 together.
 116  *      Subroutine equiv replaces the value of each line in
 117  *      file0 by the index of the first element of its
 118  *      matching equivalence in (the reordered) file1.
 119  *      To save space equiv squeezes file1 into a single
 120  *      array member in which the equivalence classes
 121  *      are simply concatenated, except that their first
 122  *      members are flagged by changing sign.
 123  *
 124  *      Next the indices that point into member are unsorted into
 125  *      array class according to the original order of file0.
 126  *
 127  *      The cleverness lies in routine stone. This marches
 128  *      through the lines of file0, developing a vector klist
 129  *      of "k-candidates". At step i a k-candidate is a matched
 130  *      pair of lines x,y (x in file0 y in file1) such that
 131  *      there is a common subsequence of length k
 132  *      between the first i lines of file0 and the first y
 133  *      lines of file1, but there is no such subsequence for
 134  *      any smaller y. x is the earliest possible mate to y
 135  *      that occurs in such a subsequence.
 136  *
 137  *      Whenever any of the members of the equivalence class of
 138  *      lines in file1 matable to a line in file0 has serial number
 139  *      less than the y of some k-candidate, that k-candidate
 140  *      with the smallest such y is replaced. The new
 141  *      k-candidate is chained (via pred) to the current
 142  *      k-1 candidate so that the actual subsequence can
 143  *      be recovered. When a member has serial number greater
 144  *      that the y of all k-candidates, the klist is extended.
 145  *      At the end, the longest subsequence is pulled out
 146  *      and placed in the array J by unravel
 147  *
 148  *      With J in hand, the matches there recorded are
 149  *      check'ed against reality to assure that no spurious
 150  *      matches have crept in due to hashing. If they have,
 151  *      they are broken, and "jackpot" is recorded--a harmless
 152  *      matter except that a true match for a spuriously
 153  *      mated line may now be unnecessarily reported as a change.
 154  *
 155  *      Much of the complexity of the program comes simply
 156  *      from trying to minimize core utilization and
 157  *      maximize the range of doable problems by dynamically
 158  *      allocating what is needed and reusing what is not.
 159  *      The core requirements for problems larger than somewhat
 160  *      are (in words) 2*length(file0) + length(file1) +
 161  *      3*(number of k-candidates installed),  typically about
 162  *      6n words for files of length n.
 163  */
 164
 165 struct cand {
 166         int     x;
 167         int     y;
 168         int     pred;
 169 };
 170
 171 static struct line {
 172         int     serial;
 173         int     value;
 174 } *file[2];
 175
 176 /*
 177  * The following struct is used to record change information when
 178  * doing a "context" or "unified" diff.  (see routine "change" to
 179  * understand the highly mnemonic field names)
 180  */
 181 struct context_vec {
 182         int     a;              /* start line in old file */
 183         int     b;              /* end line in old file */
 184         int     c;              /* start line in new file */
 185         int     d;              /* end line in new file */
 186 };
 187
 188 #define diff_output     printf
 189 static FILE     *opentemp(const char *);
 190 static void      output(char *, FILE *, char *, FILE *, int);
 191 static void      check(FILE *, FILE *, int);
 192 static void      range(int, int, const char *);
 193 static void      uni_range(int, int);
 194 static void      dump_context_vec(FILE *, FILE *, int);
 195 static void      dump_unified_vec(FILE *, FILE *, int);
 196 static void      prepare(int, FILE *, size_t, int);
 197 static void      prune(void);
 198 static void      equiv(struct line *, int, struct line *, int, int *);
 199 static void      unravel(int);
 200 static void      unsort(struct line *, int, int *);
 201 static void      change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
 202 static void      sort(struct line *, int);
 203 static void      print_header(const char *, const char *);
 204 static int       ignoreline(char *);
 205 static int       asciifile(FILE *);
 206 static int       fetch(long *, int, int, FILE *, int, int, int);
 207 static int       newcand(int, int, int);
 208 static int       search(int *, int, int);
 209 static int       skipline(FILE *);
 210 static int       isqrt(int);
 211 static int       stone(int *, int, int *, int *, int);
 212 static int       readhash(FILE *, int);
 213 static int       files_differ(FILE *, FILE *, int);
 214 static char     *match_function(const long *, int, FILE *);
 215 static char     *preadline(int, size_t, off_t);
 216
 217 static int  *J;                 /* will be overlaid on class */
 218 static int  *class;             /* will be overlaid on file[0] */
 219 static int  *klist;             /* will be overlaid on file[0] after class */
 220 static int  *member;            /* will be overlaid on file[1] */
 221 static int   clen;
 222 static int   inifdef;           /* whether or not we are in a #ifdef block */
 223 static int   len[2];
 224 static int   pref, suff;        /* length of prefix and suffix */
 225 static int   slen[2];
 226 static int   anychange;
 227 static long *ixnew;             /* will be overlaid on file[1] */
 228 static long *ixold;             /* will be overlaid on klist */
 229 static struct cand *clist;      /* merely a free storage pot for candidates */
 230 static int   clistlen;          /* the length of clist */
 231 static struct line *sfile[2];   /* shortened by pruning common prefix/suffix */
 232 static u_char *chrtran;         /* translation table for case-folding */
 233 static struct context_vec *context_vec_start;
 234 static struct context_vec *context_vec_end;
 235 static struct context_vec *context_vec_ptr;
 236
 237 #define FUNCTION_CONTEXT_SIZE   55
 238 static char lastbuf[FUNCTION_CONTEXT_SIZE];
 239 static int lastline;
 240 static int lastmatchline;
 241
 242
 243 /*
 244  * chrtran points to one of 2 translation tables: cup2low if folding upper to
 245  * lower case clow2low if not folding case
 246  */
 247 static u_char clow2low[256] = {
 248         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
 249         0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
 250         0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 251         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
 252         0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
 253         0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
 254         0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
 255         0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
 256         0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
 257         0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
 258         0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
 259         0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
 260         0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
 261         0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
 262         0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
 263         0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
 264         0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
 265         0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
 266         0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
 267         0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
 268         0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
 269         0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
 270         0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
 271         0xfd, 0xfe, 0xff
 272 };
 273
 274 static u_char cup2low[256] = {
 275         0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
 276         0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
 277         0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
 278         0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
 279         0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
 280         0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
 281         0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
 282         0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
 283         0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
 284         0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
 285         0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
 286         0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
 287         0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
 288         0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
 289         0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
 290         0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
 291         0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
 292         0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
 293         0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
 294         0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
 295         0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
 296         0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
 297         0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
 298         0xfd, 0xfe, 0xff
 299 };
 300
 301 int
 302 diffreg(char *file1, char *file2, int flags, int capsicum)
 303 {
 304         FILE *f1, *f2;
 305         int i, rval;
 306         int     ostdout = -1;
 307         int pr_pd, kq;
 308         struct kevent *e;
 309         cap_rights_t rights_ro;
 310
 311         e = NULL;
 312         kq = -1;
 313         f1 = f2 = NULL;
 314         rval = D_SAME;
 315         anychange = 0;
 316         lastline = 0;
 317         lastmatchline = 0;
 318         context_vec_ptr = context_vec_start - 1;
 319         if (flags & D_IGNORECASE)
 320                 chrtran = cup2low;
 321         else
 322                 chrtran = clow2low;
 323         if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
 324                 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
 325         if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
 326                 goto closem;
 327
 328         if (flags & D_EMPTY1)
 329                 f1 = fopen(_PATH_DEVNULL, "r");
 330         else {
 331                 if (!S_ISREG(stb1.st_mode)) {
 332                         if ((f1 = opentemp(file1)) == NULL ||
 333                             fstat(fileno(f1), &stb1) < 0) {
 334                                 warn("%s", file1);
 335                                 status |= 2;
 336                                 goto closem;
 337                         }
 338                 } else if (strcmp(file1, "-") == 0)
 339                         f1 = stdin;
 340                 else
 341                         f1 = fopen(file1, "r");
 342         }
 343         if (f1 == NULL) {
 344                 warn("%s", file1);
 345                 status |= 2;
 346                 goto closem;
 347         }
 348
 349         if (flags & D_EMPTY2)
 350                 f2 = fopen(_PATH_DEVNULL, "r");
 351         else {
 352                 if (!S_ISREG(stb2.st_mode)) {
 353                         if ((f2 = opentemp(file2)) == NULL ||
 354                             fstat(fileno(f2), &stb2) < 0) {
 355                                 warn("%s", file2);
 356                                 status |= 2;
 357                                 goto closem;
 358                         }
 359                 } else if (strcmp(file2, "-") == 0)
 360                         f2 = stdin;
 361                 else
 362                         f2 = fopen(file2, "r");
 363         }
 364         if (f2 == NULL) {
 365                 warn("%s", file2);
 366                 status |= 2;
 367                 goto closem;
 368         }
 369
 370         if (lflag) {
 371                 /* redirect stdout to pr */
 372                 int      pfd[2];
 373                 pid_t   pid;
 374                 char    *header;
 375
 376                 xasprintf(&header, "%s %s %s", diffargs, file1, file2);
 377                 signal(SIGPIPE, SIG_IGN);
 378                 fflush(stdout);
 379                 rewind(stdout);
 380                 pipe(pfd);
 381                 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
 382                 case -1:
 383                         status |= 2;
 384                         free(header);
 385                         err(2, "No more processes");
 386                 case 0:
 387                         /* child */
 388                         if (pfd[0] != STDIN_FILENO) {
 389                                 dup2(pfd[0], STDIN_FILENO);
 390                                 close(pfd[0]);
 391                         }
 392                         close(pfd[1]);
 393                         execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
 394                         _exit(127);
 395                 default:
 396
 397                         /* parent */
 398                         if (pfd[1] != STDOUT_FILENO) {
 399                                 ostdout = dup(STDOUT_FILENO);
 400                                 dup2(pfd[1], STDOUT_FILENO);
 401                                 close(pfd[1]);
 402                         }
 403                         close(pfd[0]);
 404                         rewind(stdout);
 405                         free(header);
 406                         kq = kqueue();
 407                         if (kq == -1)
 408                                 err(2, "kqueue");
 409                         e = xmalloc(sizeof(struct kevent));
 410                         EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
 411                             NULL);
 412                         if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
 413                                 err(2, "kevent");
 414                 }
 415         }
 416
 417         if (capsicum) {
 418                 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
 419                 if (cap_rights_limit(fileno(f1), &rights_ro) < 0
 420                     && errno != ENOSYS)
 421                         err(2, "unable to limit rights on: %s", file1);
 422                 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
 423                     errno != ENOSYS)
 424                         err(2, "unable to limit rights on: %s", file2);
 425                 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
 426                         /* stding has already been limited */
 427                         if (caph_limit_stderr() == -1)
 428                                 err(2, "unable to limit stderr");
 429                         if (caph_limit_stdout() == -1)
 430                                 err(2, "unable to limit stdout");
 431                 } else if (caph_limit_stdio() == -1)
 432                                 err(2, "unable to limit stdio");
 433
 434                 caph_cache_catpages();
 435                 caph_cache_tzdata();
 436                 if (cap_enter() < 0 && errno != ENOSYS)
 437                         err(2, "unable to enter capability mode");
 438         }
 439
 440         switch (files_differ(f1, f2, flags)) {
 441         case 0:
 442                 goto closem;
 443         case 1:
 444                 break;
 445         default:
 446                 /* error */
 447                 status |= 2;
 448                 goto closem;
 449         }
 450
 451         if ((flags & D_FORCEASCII) == 0 &&
 452             (!asciifile(f1) || !asciifile(f2))) {
 453                 rval = D_BINARY;
 454                 status |= 1;
 455                 goto closem;
 456         }
 457         prepare(0, f1, stb1.st_size, flags);
 458         prepare(1, f2, stb2.st_size, flags);
 459
 460         prune();
 461         sort(sfile[0], slen[0]);
 462         sort(sfile[1], slen[1]);
 463
 464         member = (int *)file[1];
 465         equiv(sfile[0], slen[0], sfile[1], slen[1], member);
 466         member = xreallocarray(member, slen[1] + 2, sizeof(*member));
 467
 468         class = (int *)file[0];
 469         unsort(sfile[0], slen[0], class);
 470         class = xreallocarray(class, slen[0] + 2, sizeof(*class));
 471
 472         klist = xcalloc(slen[0] + 2, sizeof(*klist));
 473         clen = 0;
 474         clistlen = 100;
 475         clist = xcalloc(clistlen, sizeof(*clist));
 476         i = stone(class, slen[0], member, klist, flags);
 477         free(member);
 478         free(class);
 479
 480         J = xreallocarray(J, len[0] + 2, sizeof(*J));
 481         unravel(klist[i]);
 482         free(clist);
 483         free(klist);
 484
 485         ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
 486         ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
 487         check(f1, f2, flags);
 488         output(file1, f1, file2, f2, flags);
 489         if (ostdout != -1 && e != NULL) {
 490                 /* close the pipe to pr and restore stdout */
 491                 int wstatus;
 492
 493                 fflush(stdout);
 494                 if (ostdout != STDOUT_FILENO) {
 495                         close(STDOUT_FILENO);
 496                         dup2(ostdout, STDOUT_FILENO);
 497                         close(ostdout);
 498                 }
 499                 if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
 500                         err(2, "kevent");
 501                 wstatus = e[0].data;
 502                 close(kq);
 503                 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
 504                         errx(2, "pr exited abnormally");
 505                 else if (WIFSIGNALED(wstatus))
 506                         errx(2, "pr killed by signal %d",
 507                             WTERMSIG(wstatus));
 508         }
 509
 510 closem:
 511         if (anychange) {
 512                 status |= 1;
 513                 if (rval == D_SAME)
 514                         rval = D_DIFFER;
 515         }
 516         if (f1 != NULL)
 517                 fclose(f1);
 518         if (f2 != NULL)
 519                 fclose(f2);
 520
 521         return (rval);
 522 }
 523
 524 /*
 525  * Check to see if the given files differ.
 526  * Returns 0 if they are the same, 1 if different, and -1 on error.
 527  * XXX - could use code from cmp(1) [faster]
 528  */
 529 static int
 530 files_differ(FILE *f1, FILE *f2, int flags)
 531 {
 532         char buf1[BUFSIZ], buf2[BUFSIZ];
 533         size_t i, j;
 534
 535         if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
 536             (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
 537                 return (1);
 538         for (;;) {
 539                 i = fread(buf1, 1, sizeof(buf1), f1);
 540                 j = fread(buf2, 1, sizeof(buf2), f2);
 541                 if ((!i && ferror(f1)) || (!j && ferror(f2)))
 542                         return (-1);
 543                 if (i != j)
 544                         return (1);
 545                 if (i == 0)
 546                         return (0);
 547                 if (memcmp(buf1, buf2, i) != 0)
 548                         return (1);
 549         }
 550 }
 551
 552 static FILE *
 553 opentemp(const char *f)
 554 {
 555         char buf[BUFSIZ], tempfile[PATH_MAX];
 556         ssize_t nread;
 557         int ifd, ofd;
 558
 559         if (strcmp(f, "-") == 0)
 560                 ifd = STDIN_FILENO;
 561         else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
 562                 return (NULL);
 563
 564         (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
 565
 566         if ((ofd = mkstemp(tempfile)) < 0) {
 567                 close(ifd);
 568                 return (NULL);
 569         }
 570         unlink(tempfile);
 571         while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
 572                 if (write(ofd, buf, nread) != nread) {
 573                         close(ifd);
 574                         close(ofd);
 575                         return (NULL);
 576                 }
 577         }
 578         close(ifd);
 579         lseek(ofd, (off_t)0, SEEK_SET);
 580         return (fdopen(ofd, "r"));
 581 }
 582
 583 char *
 584 splice(char *dir, char *path)
 585 {
 586         char *tail, *buf;
 587         size_t dirlen;
 588
 589         dirlen = strlen(dir);
 590         while (dirlen != 0 && dir[dirlen - 1] == '/')
 591             dirlen--;
 592         if ((tail = strrchr(path, '/')) == NULL)
 593                 tail = path;
 594         else
 595                 tail++;
 596         xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
 597         return (buf);
 598 }
 599
 600 static void
 601 prepare(int i, FILE *fd, size_t filesize, int flags)
 602 {
 603         struct line *p;
 604         int h;
 605         size_t sz, j;
 606
 607         rewind(fd);
 608
 609         sz = MIN(filesize, SIZE_MAX) / 25;
 610         if (sz < 100)
 611                 sz = 100;
 612
 613         p = xcalloc(sz + 3, sizeof(*p));
 614         for (j = 0; (h = readhash(fd, flags));) {
 615                 if (j == sz) {
 616                         sz = sz * 3 / 2;
 617                         p = xreallocarray(p, sz + 3, sizeof(*p));
 618                 }
 619                 p[++j].value = h;
 620         }
 621         len[i] = j;
 622         file[i] = p;
 623 }
 624
 625 static void
 626 prune(void)
 627 {
 628         int i, j;
 629
 630         for (pref = 0; pref < len[0] && pref < len[1] &&
 631             file[0][pref + 1].value == file[1][pref + 1].value;
 632             pref++)
 633                 ;
 634         for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
 635             file[0][len[0] - suff].value == file[1][len[1] - suff].value;
 636             suff++)
 637                 ;
 638         for (j = 0; j < 2; j++) {
 639                 sfile[j] = file[j] + pref;
 640                 slen[j] = len[j] - pref - suff;
 641                 for (i = 0; i <= slen[j]; i++)
 642                         sfile[j][i].serial = i;
 643         }
 644 }
 645
 646 static void
 647 equiv(struct line *a, int n, struct line *b, int m, int *c)
 648 {
 649         int i, j;
 650
 651         i = j = 1;
 652         while (i <= n && j <= m) {
 653                 if (a[i].value < b[j].value)
 654                         a[i++].value = 0;
 655                 else if (a[i].value == b[j].value)
 656                         a[i++].value = j;
 657                 else
 658                         j++;
 659         }
 660         while (i <= n)
 661                 a[i++].value = 0;
 662         b[m + 1].value = 0;
 663         j = 0;
 664         while (++j <= m) {
 665                 c[j] = -b[j].serial;
 666                 while (b[j + 1].value == b[j].value) {
 667                         j++;
 668                         c[j] = b[j].serial;
 669                 }
 670         }
 671         c[j] = -1;
 672 }
 673
 674 /* Code taken from ping.c */
 675 static int
 676 isqrt(int n)
 677 {
 678         int y, x = 1;
 679
 680         if (n == 0)
 681                 return (0);
 682
 683         do { /* newton was a stinker */
 684                 y = x;
 685                 x = n / x;
 686                 x += y;
 687                 x /= 2;
 688         } while ((x - y) > 1 || (x - y) < -1);
 689
 690         return (x);
 691 }
 692
 693 static int
 694 stone(int *a, int n, int *b, int *c, int flags)
 695 {
 696         int i, k, y, j, l;
 697         int oldc, tc, oldl, sq;
 698         u_int numtries, bound;
 699
 700         if (flags & D_MINIMAL)
 701                 bound = UINT_MAX;
 702         else {
 703                 sq = isqrt(n);
 704                 bound = MAX(256, sq);
 705         }
 706
 707         k = 0;
 708         c[0] = newcand(0, 0, 0);
 709         for (i = 1; i <= n; i++) {
 710                 j = a[i];
 711                 if (j == 0)
 712                         continue;
 713                 y = -b[j];
 714                 oldl = 0;
 715                 oldc = c[0];
 716                 numtries = 0;
 717                 do {
 718                         if (y <= clist[oldc].y)
 719                                 continue;
 720                         l = search(c, k, y);
 721                         if (l != oldl + 1)
 722                                 oldc = c[l - 1];
 723                         if (l <= k) {
 724                                 if (clist[c[l]].y <= y)
 725                                         continue;
 726                                 tc = c[l];
 727                                 c[l] = newcand(i, y, oldc);
 728                                 oldc = tc;
 729                                 oldl = l;
 730                                 numtries++;
 731                         } else {
 732                                 c[l] = newcand(i, y, oldc);
 733                                 k++;
 734                                 break;
 735                         }
 736                 } while ((y = b[++j]) > 0 && numtries < bound);
 737         }
 738         return (k);
 739 }
 740
 741 static int
 742 newcand(int x, int y, int pred)
 743 {
 744         struct cand *q;
 745
 746         if (clen == clistlen) {
 747                 clistlen = clistlen * 11 / 10;
 748                 clist = xreallocarray(clist, clistlen, sizeof(*clist));
 749         }
 750         q = clist + clen;
 751         q->x = x;
 752         q->y = y;
 753         q->pred = pred;
 754         return (clen++);
 755 }
 756
 757 static int
 758 search(int *c, int k, int y)
 759 {
 760         int i, j, l, t;
 761
 762         if (clist[c[k]].y < y)  /* quick look for typical case */
 763                 return (k + 1);
 764         i = 0;
 765         j = k + 1;
 766         for (;;) {
 767                 l = (i + j) / 2;
 768                 if (l <= i)
 769                         break;
 770                 t = clist[c[l]].y;
 771                 if (t > y)
 772                         j = l;
 773                 else if (t < y)
 774                         i = l;
 775                 else
 776                         return (l);
 777         }
 778         return (l + 1);
 779 }
 780
 781 static void
 782 unravel(int p)
 783 {
 784         struct cand *q;
 785         int i;
 786
 787         for (i = 0; i <= len[0]; i++)
 788                 J[i] = i <= pref ? i :
 789                     i > len[0] - suff ? i + len[1] - len[0] : 0;
 790         for (q = clist + p; q->y != 0; q = clist + q->pred)
 791                 J[q->x + pref] = q->y + pref;
 792 }
 793
 794 /*
 795  * Check does double duty:
 796  *  1.  ferret out any fortuitous correspondences due
 797  *      to confounding by hashing (which result in "jackpot")
 798  *  2.  collect random access indexes to the two files
 799  */
 800 static void
 801 check(FILE *f1, FILE *f2, int flags)
 802 {
 803         int i, j, jackpot, c, d;
 804         long ctold, ctnew;
 805
 806         rewind(f1);
 807         rewind(f2);
 808         j = 1;
 809         ixold[0] = ixnew[0] = 0;
 810         jackpot = 0;
 811         ctold = ctnew = 0;
 812         for (i = 1; i <= len[0]; i++) {
 813                 if (J[i] == 0) {
 814                         ixold[i] = ctold += skipline(f1);
 815                         continue;
 816                 }
 817                 while (j < J[i]) {
 818                         ixnew[j] = ctnew += skipline(f2);
 819                         j++;
 820                 }
 821                 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
 822                         for (;;) {
 823                                 c = getc(f1);
 824                                 d = getc(f2);
 825                                 /*
 826                                  * GNU diff ignores a missing newline
 827                                  * in one file for -b or -w.
 828                                  */
 829                                 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
 830                                         if (c == EOF && d == '\n') {
 831                                                 ctnew++;
 832                                                 break;
 833                                         } else if (c == '\n' && d == EOF) {
 834                                                 ctold++;
 835                                                 break;
 836                                         }
 837                                 }
 838                                 ctold++;
 839                                 ctnew++;
 840                                 if (flags & D_STRIPCR) {
 841                                         if (c == '\r') {
 842                                                 if ((c = getc(f1)) == '\n') {
 843                                                         ctnew++;
 844                                                         break;
 845                                                 }
 846                                         }
 847                                         if (d == '\r') {
 848                                                 if ((d = getc(f2)) == '\n') {
 849                                                         ctold++;
 850                                                         break;
 851                                                 }
 852                                         }
 853                                 }
 854                                 if ((flags & D_FOLDBLANKS) && isspace(c) &&
 855                                     isspace(d)) {
 856                                         do {
 857                                                 if (c == '\n')
 858                                                         break;
 859                                                 ctold++;
 860                                         } while (isspace(c = getc(f1)));
 861                                         do {
 862                                                 if (d == '\n')
 863                                                         break;
 864                                                 ctnew++;
 865                                         } while (isspace(d = getc(f2)));
 866                                 } else if ((flags & D_IGNOREBLANKS)) {
 867                                         while (isspace(c) && c != '\n') {
 868                                                 c = getc(f1);
 869                                                 ctold++;
 870                                         }
 871                                         while (isspace(d) && d != '\n') {
 872                                                 d = getc(f2);
 873                                                 ctnew++;
 874                                         }
 875                                 }
 876                                 if (chrtran[c] != chrtran[d]) {
 877                                         jackpot++;
 878                                         J[i] = 0;
 879                                         if (c != '\n' && c != EOF)
 880                                                 ctold += skipline(f1);
 881                                         if (d != '\n' && c != EOF)
 882                                                 ctnew += skipline(f2);
 883                                         break;
 884                                 }
 885                                 if (c == '\n' || c == EOF)
 886                                         break;
 887                         }
 888                 } else {
 889                         for (;;) {
 890                                 ctold++;
 891                                 ctnew++;
 892                                 if ((c = getc(f1)) != (d = getc(f2))) {
 893                                         /* jackpot++; */
 894                                         J[i] = 0;
 895                                         if (c != '\n' && c != EOF)
 896                                                 ctold += skipline(f1);
 897                                         if (d != '\n' && c != EOF)
 898                                                 ctnew += skipline(f2);
 899                                         break;
 900                                 }
 901                                 if (c == '\n' || c == EOF)
 902                                         break;
 903                         }
 904                 }
 905                 ixold[i] = ctold;
 906                 ixnew[j] = ctnew;
 907                 j++;
 908         }
 909         for (; j <= len[1]; j++) {
 910                 ixnew[j] = ctnew += skipline(f2);
 911         }
 912         /*
 913          * if (jackpot)
 914          *      fprintf(stderr, "jackpot\n");
 915          */
 916 }
 917
 918 /* shellsort CACM #201 */
 919 static void
 920 sort(struct line *a, int n)
 921 {
 922         struct line *ai, *aim, w;
 923         int j, m = 0, k;
 924
 925         if (n == 0)
 926                 return;
 927         for (j = 1; j <= n; j *= 2)
 928                 m = 2 * j - 1;
 929         for (m /= 2; m != 0; m /= 2) {
 930                 k = n - m;
 931                 for (j = 1; j <= k; j++) {
 932                         for (ai = &a[j]; ai > a; ai -= m) {
 933                                 aim = &ai[m];
 934                                 if (aim < ai)
 935                                         break;  /* wraparound */
 936                                 if (aim->value > ai[0].value ||
 937                                     (aim->value == ai[0].value &&
 938                                         aim->serial > ai[0].serial))
 939                                         break;
 940                                 w.value = ai[0].value;
 941                                 ai[0].value = aim->value;
 942                                 aim->value = w.value;
 943                                 w.serial = ai[0].serial;
 944                                 ai[0].serial = aim->serial;
 945                                 aim->serial = w.serial;
 946                         }
 947                 }
 948         }
 949 }
 950
 951 static void
 952 unsort(struct line *f, int l, int *b)
 953 {
 954         int *a, i;
 955
 956         a = xcalloc(l + 1, sizeof(*a));
 957         for (i = 1; i <= l; i++)
 958                 a[f[i].serial] = f[i].value;
 959         for (i = 1; i <= l; i++)
 960                 b[i] = a[i];
 961         free(a);
 962 }
 963
 964 static int
 965 skipline(FILE *f)
 966 {
 967         int i, c;
 968
 969         for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
 970                 continue;
 971         return (i);
 972 }
 973
 974 static void
 975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
 976 {
 977         int m, i0, i1, j0, j1;
 978
 979         rewind(f1);
 980         rewind(f2);
 981         m = len[0];
 982         J[0] = 0;
 983         J[m + 1] = len[1] + 1;
 984         if (diff_format != D_EDIT) {
 985                 for (i0 = 1; i0 <= m; i0 = i1 + 1) {
 986                         while (i0 <= m && J[i0] == J[i0 - 1] + 1)
 987                                 i0++;
 988                         j0 = J[i0 - 1] + 1;
 989                         i1 = i0 - 1;
 990                         while (i1 < m && J[i1 + 1] == 0)
 991                                 i1++;
 992                         j1 = J[i1 + 1] - 1;
 993                         J[i1] = j1;
 994                         change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
 995                 }
 996         } else {
 997                 for (i0 = m; i0 >= 1; i0 = i1 - 1) {
 998                         while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
 999                                 i0--;
1000                         j0 = J[i0 + 1] - 1;
1001                         i1 = i0 + 1;
1002                         while (i1 > 1 && J[i1 - 1] == 0)
1003                                 i1--;
1004                         j1 = J[i1 - 1] + 1;
1005                         J[i1] = j1;
1006                         change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1007                 }
1008         }
1009         if (m == 0)
1010                 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1011         if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
1012                 for (;;) {
1013 #define c i0
1014                         if ((c = getc(f1)) == EOF)
1015                                 return;
1016                         diff_output("%c", c);
1017                 }
1018 #undef c
1019         }
1020         if (anychange != 0) {
1021                 if (diff_format == D_CONTEXT)
1022                         dump_context_vec(f1, f2, flags);
1023                 else if (diff_format == D_UNIFIED)
1024                         dump_unified_vec(f1, f2, flags);
1025         }
1026 }
1027
1028 static void
1029 range(int a, int b, const char *separator)
1030 {
1031         diff_output("%d", a > b ? b : a);
1032         if (a < b)
1033                 diff_output("%s%d", separator, b);
1034 }
1035
1036 static void
1037 uni_range(int a, int b)
1038 {
1039         if (a < b)
1040                 diff_output("%d,%d", a, b - a + 1);
1041         else if (a == b)
1042                 diff_output("%d", b);
1043         else
1044                 diff_output("%d,0", b);
1045 }
1046
1047 static char *
1048 preadline(int fd, size_t rlen, off_t off)
1049 {
1050         char *line;
1051         ssize_t nr;
1052
1053         line = xmalloc(rlen + 1);
1054         if ((nr = pread(fd, line, rlen, off)) < 0)
1055                 err(2, "preadline");
1056         if (nr > 0 && line[nr-1] == '\n')
1057                 nr--;
1058         line[nr] = '\0';
1059         return (line);
1060 }
1061
1062 static int
1063 ignoreline(char *line)
1064 {
1065         int ret;
1066
1067         ret = regexec(&ignore_re, line, 0, NULL, 0);
1068         free(line);
1069         return (ret == 0);      /* if it matched, it should be ignored. */
1070 }
1071
1072 /*
1073  * Indicate that there is a difference between lines a and b of the from file
1074  * to get to lines c to d of the to file.  If a is greater then b then there
1075  * are no lines in the from file involved and this means that there were
1076  * lines appended (beginning at b).  If c is greater than d then there are
1077  * lines missing from the to file.
1078  */
1079 static void
1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1081     int *pflags)
1082 {
1083         static size_t max_context = 64;
1084         long curpos;
1085         int i, nc, f;
1086         const char *walk;
1087
1088 restart:
1089         if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1090             a > b && c > d)
1091                 return;
1092         if (ignore_pats != NULL) {
1093                 char *line;
1094                 /*
1095                  * All lines in the change, insert, or delete must
1096                  * match an ignore pattern for the change to be
1097                  * ignored.
1098                  */
1099                 if (a <= b) {           /* Changes and deletes. */
1100                         for (i = a; i <= b; i++) {
1101                                 line = preadline(fileno(f1),
1102                                     ixold[i] - ixold[i - 1], ixold[i - 1]);
1103                                 if (!ignoreline(line))
1104                                         goto proceed;
1105                         }
1106                 }
1107                 if (a > b || c <= d) {  /* Changes and inserts. */
1108                         for (i = c; i <= d; i++) {
1109                                 line = preadline(fileno(f2),
1110                                     ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1111                                 if (!ignoreline(line))
1112                                         goto proceed;
1113                         }
1114                 }
1115                 return;
1116         }
1117 proceed:
1118         if (*pflags & D_HEADER) {
1119                 diff_output("%s %s %s\n", diffargs, file1, file2);
1120                 *pflags &= ~D_HEADER;
1121         }
1122         if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1123                 /*
1124                  * Allocate change records as needed.
1125                  */
1126                 if (context_vec_ptr == context_vec_end - 1) {
1127                         ptrdiff_t offset = context_vec_ptr - context_vec_start;
1128                         max_context <<= 1;
1129                         context_vec_start = xreallocarray(context_vec_start,
1130                             max_context, sizeof(*context_vec_start));
1131                         context_vec_end = context_vec_start + max_context;
1132                         context_vec_ptr = context_vec_start + offset;
1133                 }
1134                 if (anychange == 0) {
1135                         /*
1136                          * Print the context/unidiff header first time through.
1137                          */
1138                         print_header(file1, file2);
1139                         anychange = 1;
1140                 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1141                     c > context_vec_ptr->d + (2 * diff_context) + 1) {
1142                         /*
1143                          * If this change is more than 'diff_context' lines from the
1144                          * previous change, dump the record and reset it.
1145                          */
1146                         if (diff_format == D_CONTEXT)
1147                                 dump_context_vec(f1, f2, *pflags);
1148                         else
1149                                 dump_unified_vec(f1, f2, *pflags);
1150                 }
1151                 context_vec_ptr++;
1152                 context_vec_ptr->a = a;
1153                 context_vec_ptr->b = b;
1154                 context_vec_ptr->c = c;
1155                 context_vec_ptr->d = d;
1156                 return;
1157         }
1158         if (anychange == 0)
1159                 anychange = 1;
1160         switch (diff_format) {
1161         case D_BRIEF:
1162                 return;
1163         case D_NORMAL:
1164         case D_EDIT:
1165                 range(a, b, ",");
1166                 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1167                 if (diff_format == D_NORMAL)
1168                         range(c, d, ",");
1169                 diff_output("\n");
1170                 break;
1171         case D_REVERSE:
1172                 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1173                 range(a, b, " ");
1174                 diff_output("\n");
1175                 break;
1176         case D_NREVERSE:
1177                 if (a > b)
1178                         diff_output("a%d %d\n", b, d - c + 1);
1179                 else {
1180                         diff_output("d%d %d\n", a, b - a + 1);
1181                         if (!(c > d))
1182                                 /* add changed lines */
1183                                 diff_output("a%d %d\n", b, d - c + 1);
1184                 }
1185                 break;
1186         }
1187         if (diff_format == D_GFORMAT) {
1188                 curpos = ftell(f1);
1189                 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1190                 nc = ixold[a > b ? b : a - 1] - curpos;
1191                 for (i = 0; i < nc; i++)
1192                         diff_output("%c", getc(f1));
1193                 for (walk = group_format; *walk != '\0'; walk++) {
1194                         if (*walk == '%') {
1195                                 walk++;
1196                                 switch (*walk) {
1197                                 case '<':
1198                                         fetch(ixold, a, b, f1, '<', 1, *pflags);
1199                                         break;
1200                                 case '>':
1201                                         fetch(ixnew, c, d, f2, '>', 0, *pflags);
1202                                         break;
1203                                 default:
1204                                         diff_output("%%%c", *walk);
1205                                         break;
1206                                 }
1207                                 continue;
1208                         }
1209                         diff_output("%c", *walk);
1210                 }
1211         }
1212         if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1213                 fetch(ixold, a, b, f1, '<', 1, *pflags);
1214                 if (a <= b && c <= d && diff_format == D_NORMAL)
1215                         diff_output("---\n");
1216         }
1217         f = 0;
1218         if (diff_format != D_GFORMAT)
1219                 f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1220         if (f != 0 && diff_format == D_EDIT) {
1221                 /*
1222                  * A non-zero return value for D_EDIT indicates that the
1223                  * last line printed was a bare dot (".") that has been
1224                  * escaped as ".." to prevent ed(1) from misinterpreting
1225                  * it.  We have to add a substitute command to change this
1226                  * back and restart where we left off.
1227                  */
1228                 diff_output(".\n");
1229                 diff_output("%ds/.//\n", a + f - 1);
1230                 b = a + f - 1;
1231                 a = b + 1;
1232                 c += f;
1233                 goto restart;
1234         }
1235         if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1236                 diff_output(".\n");
1237         if (inifdef) {
1238                 diff_output("#endif /* %s */\n", ifdefname);
1239                 inifdef = 0;
1240         }
1241 }
1242
1243 static int
1244 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1245 {
1246         int i, j, c, lastc, col, nc;
1247         int     newcol;
1248
1249         /*
1250          * When doing #ifdef's, copy down to current line
1251          * if this is the first file, so that stuff makes it to output.
1252          */
1253         if ((diff_format == D_IFDEF) && oldfile) {
1254                 long curpos = ftell(lb);
1255                 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1256                 nc = f[a > b ? b : a - 1] - curpos;
1257                 for (i = 0; i < nc; i++)
1258                         diff_output("%c", getc(lb));
1259         }
1260         if (a > b)
1261                 return (0);
1262         if (diff_format == D_IFDEF) {
1263                 if (inifdef) {
1264                         diff_output("#else /* %s%s */\n",
1265                             oldfile == 1 ? "!" : "", ifdefname);
1266                 } else {
1267                         if (oldfile)
1268                                 diff_output("#ifndef %s\n", ifdefname);
1269                         else
1270                                 diff_output("#ifdef %s\n", ifdefname);
1271                 }
1272                 inifdef = 1 + oldfile;
1273         }
1274         for (i = a; i <= b; i++) {
1275                 fseek(lb, f[i - 1], SEEK_SET);
1276                 nc = f[i] - f[i - 1];
1277                 if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) &&
1278                     ch != '\0') {
1279                         diff_output("%c", ch);
1280                         if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1281                             || diff_format == D_UNIFIED))
1282                                 diff_output("\t");
1283                         else if (diff_format != D_UNIFIED)
1284                                 diff_output(" ");
1285                 }
1286                 col = 0;
1287                 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1288                         if ((c = getc(lb)) == EOF) {
1289                                 if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1290                                     diff_format == D_NREVERSE)
1291                                         warnx("No newline at end of file");
1292                                 else
1293                                         diff_output("\n\\ No newline at end of "
1294                                             "file\n");
1295                                 return (0);
1296                         }
1297                         if (c == '\t' && (flags & D_EXPANDTABS)) {
1298                                 newcol = ((col/tabsize)+1)*tabsize;
1299                                 do {
1300                                         diff_output(" ");
1301                                 } while (++col < newcol);
1302                         } else {
1303                                 if (diff_format == D_EDIT && j == 1 && c == '\n'
1304                                     && lastc == '.') {
1305                                         /*
1306                                          * Don't print a bare "." line
1307                                          * since that will confuse ed(1).
1308                                          * Print ".." instead and return,
1309                                          * giving the caller an offset
1310                                          * from which to restart.
1311                                          */
1312                                         diff_output(".\n");
1313                                         return (i - a + 1);
1314                                 }
1315                                 diff_output("%c", c);
1316                                 col++;
1317                         }
1318                 }
1319         }
1320         return (0);
1321 }
1322
1323 /*
1324  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1325  */
1326 static int
1327 readhash(FILE *f, int flags)
1328 {
1329         int i, t, space;
1330         int sum;
1331
1332         sum = 1;
1333         space = 0;
1334         if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1335                 if (flags & D_IGNORECASE)
1336                         for (i = 0; (t = getc(f)) != '\n'; i++) {
1337                                 if (flags & D_STRIPCR && t == '\r') {
1338                                         t = getc(f);
1339                                         if (t == '\n')
1340                                                 break;
1341                                         ungetc(t, f);
1342                                 }
1343                                 if (t == EOF) {
1344                                         if (i == 0)
1345                                                 return (0);
1346                                         break;
1347                                 }
1348                                 sum = sum * 127 + chrtran[t];
1349                         }
1350                 else
1351                         for (i = 0; (t = getc(f)) != '\n'; i++) {
1352                                 if (flags & D_STRIPCR && t == '\r') {
1353                                         t = getc(f);
1354                                         if (t == '\n')
1355                                                 break;
1356                                         ungetc(t, f);
1357                                 }
1358                                 if (t == EOF) {
1359                                         if (i == 0)
1360                                                 return (0);
1361                                         break;
1362                                 }
1363                                 sum = sum * 127 + t;
1364                         }
1365         } else {
1366                 for (i = 0;;) {
1367                         switch (t = getc(f)) {
1368                         case '\r':
1369                         case '\t':
1370                         case '\v':
1371                         case '\f':
1372                         case ' ':
1373                                 space++;
1374                                 continue;
1375                         default:
1376                                 if (space && (flags & D_IGNOREBLANKS) == 0) {
1377                                         i++;
1378                                         space = 0;
1379                                 }
1380                                 sum = sum * 127 + chrtran[t];
1381                                 i++;
1382                                 continue;
1383                         case EOF:
1384                                 if (i == 0)
1385                                         return (0);
1386                                 /* FALLTHROUGH */
1387                         case '\n':
1388                                 break;
1389                         }
1390                         break;
1391                 }
1392         }
1393         /*
1394          * There is a remote possibility that we end up with a zero sum.
1395          * Zero is used as an EOF marker, so return 1 instead.
1396          */
1397         return (sum == 0 ? 1 : sum);
1398 }
1399
1400 static int
1401 asciifile(FILE *f)
1402 {
1403         unsigned char buf[BUFSIZ];
1404         size_t cnt;
1405
1406         if (f == NULL)
1407                 return (1);
1408
1409         rewind(f);
1410         cnt = fread(buf, 1, sizeof(buf), f);
1411         return (memchr(buf, '\0', cnt) == NULL);
1412 }
1413
1414 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1415
1416 static char *
1417 match_function(const long *f, int pos, FILE *fp)
1418 {
1419         unsigned char buf[FUNCTION_CONTEXT_SIZE];
1420         size_t nc;
1421         int last = lastline;
1422         const char *state = NULL;
1423
1424         lastline = pos;
1425         while (pos > last) {
1426                 fseek(fp, f[pos - 1], SEEK_SET);
1427                 nc = f[pos] - f[pos - 1];
1428                 if (nc >= sizeof(buf))
1429                         nc = sizeof(buf) - 1;
1430                 nc = fread(buf, 1, nc, fp);
1431                 if (nc > 0) {
1432                         buf[nc] = '\0';
1433                         buf[strcspn(buf, "\n")] = '\0';
1434                         if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1435                                 if (begins_with(buf, "private:")) {
1436                                         if (!state)
1437                                                 state = " (private)";
1438                                 } else if (begins_with(buf, "protected:")) {
1439                                         if (!state)
1440                                                 state = " (protected)";
1441                                 } else if (begins_with(buf, "public:")) {
1442                                         if (!state)
1443                                                 state = " (public)";
1444                                 } else {
1445                                         strlcpy(lastbuf, buf, sizeof lastbuf);
1446                                         if (state)
1447                                                 strlcat(lastbuf, state,
1448                                                     sizeof lastbuf);
1449                                         lastmatchline = pos;
1450                                         return lastbuf;
1451                                 }
1452                         }
1453                 }
1454                 pos--;
1455         }
1456         return lastmatchline > 0 ? lastbuf : NULL;
1457 }
1458
1459 /* dump accumulated "context" diff changes */
1460 static void
1461 dump_context_vec(FILE *f1, FILE *f2, int flags)
1462 {
1463         struct context_vec *cvp = context_vec_start;
1464         int lowa, upb, lowc, upd, do_output;
1465         int a, b, c, d;
1466         char ch, *f;
1467
1468         if (context_vec_start > context_vec_ptr)
1469                 return;
1470
1471         b = d = 0;              /* gcc */
1472         lowa = MAX(1, cvp->a - diff_context);
1473         upb = MIN(len[0], context_vec_ptr->b + diff_context);
1474         lowc = MAX(1, cvp->c - diff_context);
1475         upd = MIN(len[1], context_vec_ptr->d + diff_context);
1476
1477         diff_output("***************");
1478         if ((flags & D_PROTOTYPE)) {
1479                 f = match_function(ixold, lowa-1, f1);
1480                 if (f != NULL)
1481                         diff_output(" %s", f);
1482         }
1483         diff_output("\n*** ");
1484         range(lowa, upb, ",");
1485         diff_output(" ****\n");
1486
1487         /*
1488          * Output changes to the "old" file.  The first loop suppresses
1489          * output if there were no changes to the "old" file (we'll see
1490          * the "old" lines as context in the "new" list).
1491          */
1492         do_output = 0;
1493         for (; cvp <= context_vec_ptr; cvp++)
1494                 if (cvp->a <= cvp->b) {
1495                         cvp = context_vec_start;
1496                         do_output++;
1497                         break;
1498                 }
1499         if (do_output) {
1500                 while (cvp <= context_vec_ptr) {
1501                         a = cvp->a;
1502                         b = cvp->b;
1503                         c = cvp->c;
1504                         d = cvp->d;
1505
1506                         if (a <= b && c <= d)
1507                                 ch = 'c';
1508                         else
1509                                 ch = (a <= b) ? 'd' : 'a';
1510
1511                         if (ch == 'a')
1512                                 fetch(ixold, lowa, b, f1, ' ', 0, flags);
1513                         else {
1514                                 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1515                                 fetch(ixold, a, b, f1,
1516                                     ch == 'c' ? '!' : '-', 0, flags);
1517                         }
1518                         lowa = b + 1;
1519                         cvp++;
1520                 }
1521                 fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1522         }
1523         /* output changes to the "new" file */
1524         diff_output("--- ");
1525         range(lowc, upd, ",");
1526         diff_output(" ----\n");
1527
1528         do_output = 0;
1529         for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1530                 if (cvp->c <= cvp->d) {
1531                         cvp = context_vec_start;
1532                         do_output++;
1533                         break;
1534                 }
1535         if (do_output) {
1536                 while (cvp <= context_vec_ptr) {
1537                         a = cvp->a;
1538                         b = cvp->b;
1539                         c = cvp->c;
1540                         d = cvp->d;
1541
1542                         if (a <= b && c <= d)
1543                                 ch = 'c';
1544                         else
1545                                 ch = (a <= b) ? 'd' : 'a';
1546
1547                         if (ch == 'd')
1548                                 fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1549                         else {
1550                                 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1551                                 fetch(ixnew, c, d, f2,
1552                                     ch == 'c' ? '!' : '+', 0, flags);
1553                         }
1554                         lowc = d + 1;
1555                         cvp++;
1556                 }
1557                 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1558         }
1559         context_vec_ptr = context_vec_start - 1;
1560 }
1561
1562 /* dump accumulated "unified" diff changes */
1563 static void
1564 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1565 {
1566         struct context_vec *cvp = context_vec_start;
1567         int lowa, upb, lowc, upd;
1568         int a, b, c, d;
1569         char ch, *f;
1570
1571         if (context_vec_start > context_vec_ptr)
1572                 return;
1573
1574         b = d = 0;              /* gcc */
1575         lowa = MAX(1, cvp->a - diff_context);
1576         upb = MIN(len[0], context_vec_ptr->b + diff_context);
1577         lowc = MAX(1, cvp->c - diff_context);
1578         upd = MIN(len[1], context_vec_ptr->d + diff_context);
1579
1580         diff_output("@@ -");
1581         uni_range(lowa, upb);
1582         diff_output(" +");
1583         uni_range(lowc, upd);
1584         diff_output(" @@");
1585         if ((flags & D_PROTOTYPE)) {
1586                 f = match_function(ixold, lowa-1, f1);
1587                 if (f != NULL)
1588                         diff_output(" %s", f);
1589         }
1590         diff_output("\n");
1591
1592         /*
1593          * Output changes in "unified" diff format--the old and new lines
1594          * are printed together.
1595          */
1596         for (; cvp <= context_vec_ptr; cvp++) {
1597                 a = cvp->a;
1598                 b = cvp->b;
1599                 c = cvp->c;
1600                 d = cvp->d;
1601
1602                 /*
1603                  * c: both new and old changes
1604                  * d: only changes in the old file
1605                  * a: only changes in the new file
1606                  */
1607                 if (a <= b && c <= d)
1608                         ch = 'c';
1609                 else
1610                         ch = (a <= b) ? 'd' : 'a';
1611
1612                 switch (ch) {
1613                 case 'c':
1614                         fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1615                         fetch(ixold, a, b, f1, '-', 0, flags);
1616                         fetch(ixnew, c, d, f2, '+', 0, flags);
1617                         break;
1618                 case 'd':
1619                         fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1620                         fetch(ixold, a, b, f1, '-', 0, flags);
1621                         break;
1622                 case 'a':
1623                         fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1624                         fetch(ixnew, c, d, f2, '+', 0, flags);
1625                         break;
1626                 }
1627                 lowa = b + 1;
1628                 lowc = d + 1;
1629         }
1630         fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1631
1632         context_vec_ptr = context_vec_start - 1;
1633 }
1634
1635 static void
1636 print_header(const char *file1, const char *file2)
1637 {
1638         const char *time_format;
1639         char buf1[256];
1640         char buf2[256];
1641         char end1[10];
1642         char end2[10];
1643         struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1644         int nsec1 = stb1.st_mtim.tv_nsec;
1645         int nsec2 = stb2.st_mtim.tv_nsec;
1646
1647         time_format = "%Y-%m-%d %H:%M:%S";
1648
1649         if (cflag)
1650                 time_format = "%c";
1651         tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1652         tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1653         strftime(buf1, 256, time_format, tm_ptr1);
1654         strftime(buf2, 256, time_format, tm_ptr2);
1655         if (!cflag) {
1656                 strftime(end1, 10, "%z", tm_ptr1);
1657                 strftime(end2, 10, "%z", tm_ptr2);
1658                 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1659                 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1660         }
1661         if (label[0] != NULL)
1662                 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1663                     label[0]);
1664         else
1665                 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1666                     file1, buf1);
1667         if (label[1] != NULL)
1668                 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1669                     label[1]);
1670         else
1671                 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1672                     file2, buf2);
1673 }