contrib/diff/io.c

   1 /* File I/O for GNU DIFF.
   2    Copyright (C) 1988, 1989, 1992, 1993, 1994 Free Software Foundation, Inc.
   3
   4 This file is part of GNU DIFF.
   5
   6 GNU DIFF is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU DIFF is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU DIFF; see the file COPYING.  If not, write to
  18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
  19
  20 #include "diff.h"
  21
  22 /* Rotate a value n bits to the left. */
  23 #define UINT_BIT (sizeof (unsigned) * CHAR_BIT)
  24 #define ROL(v, n) ((v) << (n) | (v) >> (UINT_BIT - (n)))
  25
  26 /* Given a hash value and a new character, return a new hash value. */
  27 #define HASH(h, c) ((c) + ROL (h, 7))
  28
  29 /* Guess remaining number of lines from number N of lines so far,
  30    size S so far, and total size T.  */
  31 #define GUESS_LINES(n,s,t) (((t) - (s)) / ((n) < 10 ? 32 : (s) / ((n)-1)) + 5)
  32
  33 /* Type used for fast prefix comparison in find_identical_ends.  */
  34 #ifndef word
  35 #define word int
  36 #endif
  37 \f
  38 /* Lines are put into equivalence classes (of lines that match in line_cmp).
  39    Each equivalence class is represented by one of these structures,
  40    but only while the classes are being computed.
  41    Afterward, each class is represented by a number.  */
  42 struct equivclass
  43 {
  44   int next;     /* Next item in this bucket. */
  45   unsigned hash;        /* Hash of lines in this class.  */
  46   char const *line;     /* A line that fits this class. */
  47   size_t length;        /* That line's length, not counting its newline.  */
  48 };
  49
  50 /* Hash-table: array of buckets, each being a chain of equivalence classes.
  51    buckets[-1] is reserved for incomplete lines.  */
  52 static int *buckets;
  53
  54 /* Number of buckets in the hash table array, not counting buckets[-1]. */
  55 static int nbuckets;
  56
  57 /* Array in which the equivalence classes are allocated.
  58    The bucket-chains go through the elements in this array.
  59    The number of an equivalence class is its index in this array.  */
  60 static struct equivclass *equivs;
  61
  62 /* Index of first free element in the array `equivs'.  */
  63 static int equivs_index;
  64
  65 /* Number of elements allocated in the array `equivs'.  */
  66 static int equivs_alloc;
  67
  68 static void find_and_hash_each_line PARAMS((struct file_data *));
  69 static void find_identical_ends PARAMS((struct file_data[]));
  70 static void prepare_text_end PARAMS((struct file_data *));
  71 \f
  72 /* Check for binary files and compare them for exact identity.  */
  73
  74 /* Return 1 if BUF contains a non text character.
  75    SIZE is the number of characters in BUF.  */
  76
  77 #define binary_file_p(buf, size) (memchr (buf, '\0', size) != 0)
  78
  79 /* Get ready to read the current file.
  80    Return nonzero if SKIP_TEST is zero,
  81    and if it appears to be a binary file.  */
  82
  83 int
  84 sip (current, skip_test)
  85      struct file_data *current;
  86      int skip_test;
  87 {
  88   /* If we have a nonexistent file at this stage, treat it as empty.  */
  89   if (current->desc < 0)
  90     {
  91       /* Leave room for a sentinel.  */
  92       current->bufsize = sizeof (word);
  93       current->buffer = xmalloc (current->bufsize);
  94     }
  95   else
  96     {
  97       current->bufsize = STAT_BLOCKSIZE (current->stat);
  98       current->buffer = xmalloc (current->bufsize);
  99
 100       if (! skip_test)
 101         {
 102           /* Check first part of file to see if it's a binary file.  */
 103 #if HAVE_SETMODE
 104           int oldmode = setmode (current->desc, O_BINARY);
 105 #endif
 106           size_t n = read (current->desc, current->buffer, current->bufsize);
 107           if (n == -1)
 108             pfatal_with_name (current->name);
 109           current->buffered_chars = n;
 110 #if HAVE_SETMODE
 111           if (oldmode != O_BINARY)
 112             {
 113               if (lseek (current->desc, - (off_t) n, SEEK_CUR) == -1)
 114                 pfatal_with_name (current->name);
 115               setmode (current->desc, oldmode);
 116               current->buffered_chars = 0;
 117             }
 118 #endif
 119           return binary_file_p (current->buffer, n);
 120         }
 121     }
 122
 123   current->buffered_chars = 0;
 124   return 0;
 125 }
 126
 127 /* Slurp the rest of the current file completely into memory.  */
 128
 129 void
 130 slurp (current)
 131      struct file_data *current;
 132 {
 133   size_t cc;
 134
 135   if (current->desc < 0)
 136     /* The file is nonexistent.  */
 137     ;
 138   else if (S_ISREG (current->stat.st_mode))
 139     {
 140       /* It's a regular file; slurp in the rest all at once.  */
 141
 142       /* Get the size out of the stat block.
 143          Allocate enough room for appended newline and sentinel.  */
 144       cc = current->stat.st_size + 1 + sizeof (word);
 145       if (current->bufsize < cc)
 146         {
 147           current->bufsize = cc;
 148           current->buffer = xrealloc (current->buffer, cc);
 149         }
 150
 151       if (current->buffered_chars < current->stat.st_size)
 152         {
 153           cc = read (current->desc,
 154                      current->buffer + current->buffered_chars,
 155                      current->stat.st_size - current->buffered_chars);
 156           if (cc == -1)
 157             pfatal_with_name (current->name);
 158           current->buffered_chars += cc;
 159         }
 160     }
 161   /* It's not a regular file; read it, growing the buffer as needed.  */
 162   else if (always_text_flag || current->buffered_chars != 0)
 163     {
 164       for (;;)
 165         {
 166           if (current->buffered_chars == current->bufsize)
 167             {
 168               current->bufsize = current->bufsize * 2;
 169               current->buffer = xrealloc (current->buffer, current->bufsize);
 170             }
 171           cc = read (current->desc,
 172                      current->buffer + current->buffered_chars,
 173                      current->bufsize - current->buffered_chars);
 174           if (cc == 0)
 175             break;
 176           if (cc == -1)
 177             pfatal_with_name (current->name);
 178           current->buffered_chars += cc;
 179         }
 180       /* Allocate just enough room for appended newline and sentinel.  */
 181       current->bufsize = current->buffered_chars + 1 + sizeof (word);
 182       current->buffer = xrealloc (current->buffer, current->bufsize);
 183     }
 184 }
 185 \f
 186 /* Split the file into lines, simultaneously computing the equivalence class for
 187    each line. */
 188
 189 static void
 190 find_and_hash_each_line (current)
 191      struct file_data *current;
 192 {
 193   unsigned h;
 194   unsigned char const *p = (unsigned char const *) current->prefix_end;
 195   unsigned char c;
 196   int i, *bucket;
 197   size_t length;
 198
 199   /* Cache often-used quantities in local variables to help the compiler.  */
 200   char const **linbuf = current->linbuf;
 201   int alloc_lines = current->alloc_lines;
 202   int line = 0;
 203   int linbuf_base = current->linbuf_base;
 204   int *cureqs = (int *) xmalloc (alloc_lines * sizeof (int));
 205   struct equivclass *eqs = equivs;
 206   int eqs_index = equivs_index;
 207   int eqs_alloc = equivs_alloc;
 208   char const *suffix_begin = current->suffix_begin;
 209   char const *bufend = current->buffer + current->buffered_chars;
 210   int use_line_cmp = ignore_some_line_changes;
 211
 212   while ((char const *) p < suffix_begin)
 213     {
 214       char const *ip = (char const *) p;
 215
 216       /* Compute the equivalence class for this line.  */
 217
 218       h = 0;
 219
 220       /* Hash this line until we find a newline. */
 221       if (ignore_case_flag)
 222         {
 223           if (ignore_all_space_flag)
 224             while ((c = *p++) != '\n')
 225               {
 226                 if (! ISSPACE (c))
 227                   h = HASH (h, ISUPPER (c) ? tolower (c) : c);
 228               }
 229           else if (ignore_space_change_flag)
 230             while ((c = *p++) != '\n')
 231               {
 232                 if (ISSPACE (c))
 233                   {
 234                     for (;;)
 235                       {
 236                         c = *p++;
 237                         if (!ISSPACE (c))
 238                           break;
 239                         if (c == '\n')
 240                           goto hashing_done;
 241                       }
 242                     h = HASH (h, ' ');
 243                   }
 244                 /* C is now the first non-space.  */
 245                 h = HASH (h, ISUPPER (c) ? tolower (c) : c);
 246               }
 247           else
 248             while ((c = *p++) != '\n')
 249               h = HASH (h, ISUPPER (c) ? tolower (c) : c);
 250         }
 251       else
 252         {
 253           if (ignore_all_space_flag)
 254             while ((c = *p++) != '\n')
 255               {
 256                 if (! ISSPACE (c))
 257                   h = HASH (h, c);
 258               }
 259           else if (ignore_space_change_flag)
 260             while ((c = *p++) != '\n')
 261               {
 262                 if (ISSPACE (c))
 263                   {
 264                     for (;;)
 265                       {
 266                         c = *p++;
 267                         if (!ISSPACE (c))
 268                           break;
 269                         if (c == '\n')
 270                           goto hashing_done;
 271                       }
 272                     h = HASH (h, ' ');
 273                   }
 274                 /* C is now the first non-space.  */
 275                 h = HASH (h, c);
 276               }
 277           else
 278             while ((c = *p++) != '\n')
 279               h = HASH (h, c);
 280         }
 281    hashing_done:;
 282
 283       bucket = &buckets[h % nbuckets];
 284       length = (char const *) p - ip - 1;
 285
 286       if ((char const *) p == bufend
 287           && current->missing_newline
 288           && ROBUST_OUTPUT_STYLE (output_style))
 289         {
 290           /* This line is incomplete.  If this is significant,
 291              put the line into bucket[-1].  */
 292           if (! (ignore_space_change_flag | ignore_all_space_flag))
 293             bucket = &buckets[-1];
 294
 295           /* Omit the inserted newline when computing linbuf later.  */
 296           p--;
 297           bufend = suffix_begin = (char const *) p;
 298         }
 299
 300       for (i = *bucket;  ;  i = eqs[i].next)
 301         if (!i)
 302           {
 303             /* Create a new equivalence class in this bucket. */
 304             i = eqs_index++;
 305             if (i == eqs_alloc)
 306               eqs = (struct equivclass *)
 307                       xrealloc (eqs, (eqs_alloc*=2) * sizeof(*eqs));
 308             eqs[i].next = *bucket;
 309             eqs[i].hash = h;
 310             eqs[i].line = ip;
 311             eqs[i].length = length;
 312             *bucket = i;
 313             break;
 314           }
 315         else if (eqs[i].hash == h)
 316           {
 317             char const *eqline = eqs[i].line;
 318
 319             /* Reuse existing equivalence class if the lines are identical.
 320                This detects the common case of exact identity
 321                faster than complete comparison would.  */
 322             if (eqs[i].length == length && memcmp (eqline, ip, length) == 0)
 323               break;
 324
 325             /* Reuse existing class if line_cmp reports the lines equal.  */
 326             if (use_line_cmp && line_cmp (eqline, ip) == 0)
 327               break;
 328           }
 329
 330       /* Maybe increase the size of the line table. */
 331       if (line == alloc_lines)
 332         {
 333           /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
 334           alloc_lines = 2 * alloc_lines - linbuf_base;
 335           cureqs = (int *) xrealloc (cureqs, alloc_lines * sizeof (*cureqs));
 336           linbuf = (char const **) xrealloc (linbuf + linbuf_base,
 337                                              (alloc_lines - linbuf_base)
 338                                              * sizeof (*linbuf))
 339                    - linbuf_base;
 340         }
 341       linbuf[line] = ip;
 342       cureqs[line] = i;
 343       ++line;
 344     }
 345
 346   current->buffered_lines = line;
 347
 348   for (i = 0;  ;  i++)
 349     {
 350       /* Record the line start for lines in the suffix that we care about.
 351          Record one more line start than lines,
 352          so that we can compute the length of any buffered line.  */
 353       if (line == alloc_lines)
 354         {
 355           /* Double (alloc_lines - linbuf_base) by adding to alloc_lines.  */
 356           alloc_lines = 2 * alloc_lines - linbuf_base;
 357           linbuf = (char const **) xrealloc (linbuf + linbuf_base,
 358                                              (alloc_lines - linbuf_base)
 359                                              * sizeof (*linbuf))
 360                    - linbuf_base;
 361         }
 362       linbuf[line] = (char const *) p;
 363
 364       if ((char const *) p == bufend)
 365         break;
 366
 367       if (context <= i && no_diff_means_no_output)
 368         break;
 369
 370       line++;
 371
 372       while (*p++ != '\n')
 373         ;
 374     }
 375
 376   /* Done with cache in local variables.  */
 377   current->linbuf = linbuf;
 378   current->valid_lines = line;
 379   current->alloc_lines = alloc_lines;
 380   current->equivs = cureqs;
 381   equivs = eqs;
 382   equivs_alloc = eqs_alloc;
 383   equivs_index = eqs_index;
 384 }
 385 \f
 386 /* Prepare the end of the text.  Make sure it's initialized.
 387    Make sure text ends in a newline,
 388    but remember that we had to add one.  */
 389
 390 static void
 391 prepare_text_end (current)
 392      struct file_data *current;
 393 {
 394   size_t buffered_chars = current->buffered_chars;
 395   char *p = current->buffer;
 396
 397   if (buffered_chars == 0 || p[buffered_chars - 1] == '\n')
 398     current->missing_newline = 0;
 399   else
 400     {
 401       p[buffered_chars++] = '\n';
 402       current->buffered_chars = buffered_chars;
 403       current->missing_newline = 1;
 404     }
 405
 406   /* Don't use uninitialized storage when planting or using sentinels.  */
 407   if (p)
 408     bzero (p + buffered_chars, sizeof (word));
 409 }
 410 \f
 411 /* Given a vector of two file_data objects, find the identical
 412    prefixes and suffixes of each object. */
 413
 414 static void
 415 find_identical_ends (filevec)
 416      struct file_data filevec[];
 417 {
 418   word *w0, *w1;
 419   char *p0, *p1, *buffer0, *buffer1;
 420   char const *end0, *beg0;
 421   char const **linbuf0, **linbuf1;
 422   int i, lines;
 423   size_t n0, n1, tem;
 424   int alloc_lines0, alloc_lines1;
 425   int buffered_prefix, prefix_count, prefix_mask;
 426
 427   slurp (&filevec[0]);
 428   if (filevec[0].desc != filevec[1].desc)
 429     slurp (&filevec[1]);
 430   else
 431     {
 432       filevec[1].buffer = filevec[0].buffer;
 433       filevec[1].bufsize = filevec[0].bufsize;
 434       filevec[1].buffered_chars = filevec[0].buffered_chars;
 435     }
 436   for (i = 0; i < 2; i++)
 437     prepare_text_end (&filevec[i]);
 438
 439   /* Find identical prefix.  */
 440
 441   p0 = buffer0 = filevec[0].buffer;
 442   p1 = buffer1 = filevec[1].buffer;
 443
 444   n0 = filevec[0].buffered_chars;
 445   n1 = filevec[1].buffered_chars;
 446
 447   if (p0 == p1)
 448     /* The buffers are the same; sentinels won't work.  */
 449     p0 = p1 += n1;
 450   else
 451     {
 452       /* Insert end sentinels, in this case characters that are guaranteed
 453          to make the equality test false, and thus terminate the loop.  */
 454
 455       if (n0 < n1)
 456         p0[n0] = ~p1[n0];
 457       else
 458         p1[n1] = ~p0[n1];
 459
 460       /* Loop until first mismatch, or to the sentinel characters.  */
 461
 462       /* Compare a word at a time for speed.  */
 463       w0 = (word *) p0;
 464       w1 = (word *) p1;
 465       while (*w0++ == *w1++)
 466         ;
 467       --w0, --w1;
 468
 469       /* Do the last few bytes of comparison a byte at a time.  */
 470       p0 = (char *) w0;
 471       p1 = (char *) w1;
 472       while (*p0++ == *p1++)
 473         ;
 474       --p0, --p1;
 475
 476       /* Don't mistakenly count missing newline as part of prefix. */
 477       if (ROBUST_OUTPUT_STYLE (output_style)
 478           && (buffer0 + n0 - filevec[0].missing_newline < p0)
 479              !=
 480              (buffer1 + n1 - filevec[1].missing_newline < p1))
 481         --p0, --p1;
 482     }
 483
 484   /* Now P0 and P1 point at the first nonmatching characters.  */
 485
 486   /* Skip back to last line-beginning in the prefix,
 487      and then discard up to HORIZON_LINES lines from the prefix.  */
 488   i = horizon_lines;
 489   while (p0 != buffer0 && (p0[-1] != '\n' || i--))
 490     --p0, --p1;
 491
 492   /* Record the prefix.  */
 493   filevec[0].prefix_end = p0;
 494   filevec[1].prefix_end = p1;
 495
 496   /* Find identical suffix.  */
 497
 498   /* P0 and P1 point beyond the last chars not yet compared.  */
 499   p0 = buffer0 + n0;
 500   p1 = buffer1 + n1;
 501
 502   if (! ROBUST_OUTPUT_STYLE (output_style)
 503       || filevec[0].missing_newline == filevec[1].missing_newline)
 504     {
 505       end0 = p0;        /* Addr of last char in file 0.  */
 506
 507       /* Get value of P0 at which we should stop scanning backward:
 508          this is when either P0 or P1 points just past the last char
 509          of the identical prefix.  */
 510       beg0 = filevec[0].prefix_end + (n0 < n1 ? 0 : n0 - n1);
 511
 512       /* Scan back until chars don't match or we reach that point.  */
 513       while (p0 != beg0)
 514         if (*--p0 != *--p1)
 515           {
 516             /* Point at the first char of the matching suffix.  */
 517             ++p0, ++p1;
 518             beg0 = p0;
 519             break;
 520           }
 521
 522       /* Are we at a line-beginning in both files?  If not, add the rest of
 523          this line to the main body.  Discard up to HORIZON_LINES lines from
 524          the identical suffix.  Also, discard one extra line,
 525          because shift_boundaries may need it.  */
 526       i = horizon_lines + !((buffer0 == p0 || p0[-1] == '\n')
 527                             &&
 528                             (buffer1 == p1 || p1[-1] == '\n'));
 529       while (i-- && p0 != end0)
 530         while (*p0++ != '\n')
 531           ;
 532
 533       p1 += p0 - beg0;
 534     }
 535
 536   /* Record the suffix.  */
 537   filevec[0].suffix_begin = p0;
 538   filevec[1].suffix_begin = p1;
 539
 540   /* Calculate number of lines of prefix to save.
 541
 542      prefix_count == 0 means save the whole prefix;
 543      we need this with for options like -D that output the whole file.
 544      We also need it for options like -F that output some preceding line;
 545      at least we will need to find the last few lines,
 546      but since we don't know how many, it's easiest to find them all.
 547
 548      Otherwise, prefix_count != 0.  Save just prefix_count lines at start
 549      of the line buffer; they'll be moved to the proper location later.
 550      Handle 1 more line than the context says (because we count 1 too many),
 551      rounded up to the next power of 2 to speed index computation.  */
 552
 553   if (no_diff_means_no_output && ! function_regexp_list)
 554     {
 555       for (prefix_count = 1;  prefix_count < context + 1;  prefix_count *= 2)
 556         ;
 557       prefix_mask = prefix_count - 1;
 558       alloc_lines0
 559         = prefix_count
 560           + GUESS_LINES (0, 0, p0 - filevec[0].prefix_end)
 561           + context;
 562     }
 563   else
 564     {
 565       prefix_count = 0;
 566       prefix_mask = ~0;
 567       alloc_lines0 = GUESS_LINES (0, 0, n0);
 568     }
 569
 570   lines = 0;
 571   linbuf0 = (char const **) xmalloc (alloc_lines0 * sizeof (*linbuf0));
 572
 573   /* If the prefix is needed, find the prefix lines.  */
 574   if (! (no_diff_means_no_output
 575          && filevec[0].prefix_end == p0
 576          && filevec[1].prefix_end == p1))
 577     {
 578       p0 = buffer0;
 579       end0 = filevec[0].prefix_end;
 580       while (p0 != end0)
 581         {
 582           int l = lines++ & prefix_mask;
 583           if (l == alloc_lines0)
 584             linbuf0 = (char const **) xrealloc (linbuf0, (alloc_lines0 *= 2)
 585                                                          * sizeof(*linbuf0));
 586           linbuf0[l] = p0;
 587           while (*p0++ != '\n')
 588             ;
 589         }
 590     }
 591   buffered_prefix = prefix_count && context < lines ? context : lines;
 592
 593   /* Allocate line buffer 1.  */
 594   tem = prefix_count ? filevec[1].suffix_begin - buffer1 : n1;
 595
 596   alloc_lines1
 597     = (buffered_prefix
 598        + GUESS_LINES (lines, filevec[1].prefix_end - buffer1, tem)
 599        + context);
 600   linbuf1 = (char const **) xmalloc (alloc_lines1 * sizeof (*linbuf1));
 601
 602   if (buffered_prefix != lines)
 603     {
 604       /* Rotate prefix lines to proper location.  */
 605       for (i = 0;  i < buffered_prefix;  i++)
 606         linbuf1[i] = linbuf0[(lines - context + i) & prefix_mask];
 607       for (i = 0;  i < buffered_prefix;  i++)
 608         linbuf0[i] = linbuf1[i];
 609     }
 610
 611   /* Initialize line buffer 1 from line buffer 0.  */
 612   for (i = 0; i < buffered_prefix; i++)
 613     linbuf1[i] = linbuf0[i] - buffer0 + buffer1;
 614
 615   /* Record the line buffer, adjusted so that
 616      linbuf*[0] points at the first differing line.  */
 617   filevec[0].linbuf = linbuf0 + buffered_prefix;
 618   filevec[1].linbuf = linbuf1 + buffered_prefix;
 619   filevec[0].linbuf_base = filevec[1].linbuf_base = - buffered_prefix;
 620   filevec[0].alloc_lines = alloc_lines0 - buffered_prefix;
 621   filevec[1].alloc_lines = alloc_lines1 - buffered_prefix;
 622   filevec[0].prefix_lines = filevec[1].prefix_lines = lines;
 623 }
 624 \f
 625 /* Largest primes less than some power of two, for nbuckets.  Values range
 626    from useful to preposterous.  If one of these numbers isn't prime
 627    after all, don't blame it on me, blame it on primes (6) . . . */
 628 static int const primes[] =
 629 {
 630   509,
 631   1021,
 632   2039,
 633   4093,
 634   8191,
 635   16381,
 636   32749,
 637 #if 32767 < INT_MAX
 638   65521,
 639   131071,
 640   262139,
 641   524287,
 642   1048573,
 643   2097143,
 644   4194301,
 645   8388593,
 646   16777213,
 647   33554393,
 648   67108859,                     /* Preposterously large . . . */
 649   134217689,
 650   268435399,
 651   536870909,
 652   1073741789,
 653   2147483647,
 654 #endif
 655   0
 656 };
 657
 658 /* Given a vector of two file_data objects, read the file associated
 659    with each one, and build the table of equivalence classes.
 660    Return 1 if either file appears to be a binary file.
 661    If PRETEND_BINARY is nonzero, pretend they are binary regardless.  */
 662
 663 int
 664 read_files (filevec, pretend_binary)
 665      struct file_data filevec[];
 666      int pretend_binary;
 667 {
 668   int i;
 669   int skip_test = always_text_flag | pretend_binary;
 670   int appears_binary = pretend_binary | sip (&filevec[0], skip_test);
 671
 672   if (filevec[0].desc != filevec[1].desc)
 673     appears_binary |= sip (&filevec[1], skip_test | appears_binary);
 674   else
 675     {
 676       filevec[1].buffer = filevec[0].buffer;
 677       filevec[1].bufsize = filevec[0].bufsize;
 678       filevec[1].buffered_chars = filevec[0].buffered_chars;
 679     }
 680   if (appears_binary)
 681     {
 682 #if HAVE_SETMODE
 683       setmode (filevec[0].desc, O_BINARY);
 684       setmode (filevec[1].desc, O_BINARY);
 685 #endif
 686       return 1;
 687     }
 688
 689   find_identical_ends (filevec);
 690
 691   equivs_alloc = filevec[0].alloc_lines + filevec[1].alloc_lines + 1;
 692   equivs = (struct equivclass *) xmalloc (equivs_alloc * sizeof (struct equivclass));
 693   /* Equivalence class 0 is permanently safe for lines that were not
 694      hashed.  Real equivalence classes start at 1. */
 695   equivs_index = 1;
 696
 697   for (i = 0;  primes[i] < equivs_alloc / 3;  i++)
 698     if (! primes[i])
 699       abort ();
 700   nbuckets = primes[i];
 701
 702   buckets = (int *) xmalloc ((nbuckets + 1) * sizeof (*buckets));
 703   bzero (buckets++, (nbuckets + 1) * sizeof (*buckets));
 704
 705   for (i = 0; i < 2; i++)
 706     find_and_hash_each_line (&filevec[i]);
 707
 708   filevec[0].equiv_max = filevec[1].equiv_max = equivs_index;
 709
 710   free (equivs);
 711   free (buckets - 1);
 712
 713   return 0;
 714 }