contrib/subversion/subversion/libsvn_diff/diff_file.c

   1 /*
   2  * diff_file.c :  routines for doing diffs on files
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <apr.h>
  26 #include <apr_pools.h>
  27 #include <apr_general.h>
  28 #include <apr_file_io.h>
  29 #include <apr_file_info.h>
  30 #include <apr_time.h>
  31 #include <apr_mmap.h>
  32 #include <apr_getopt.h>
  33
  34 #include <assert.h>
  35
  36 #include "svn_error.h"
  37 #include "svn_diff.h"
  38 #include "svn_types.h"
  39 #include "svn_string.h"
  40 #include "svn_subst.h"
  41 #include "svn_io.h"
  42 #include "svn_utf.h"
  43 #include "svn_pools.h"
  44 #include "diff.h"
  45 #include "svn_private_config.h"
  46 #include "svn_path.h"
  47 #include "svn_ctype.h"
  48
  49 #include "private/svn_utf_private.h"
  50 #include "private/svn_eol_private.h"
  51 #include "private/svn_dep_compat.h"
  52 #include "private/svn_adler32.h"
  53 #include "private/svn_diff_private.h"
  54
  55 /* A token, i.e. a line read from a file. */
  56 typedef struct svn_diff__file_token_t
  57 {
  58   /* Next token in free list. */
  59   struct svn_diff__file_token_t *next;
  60   svn_diff_datasource_e datasource;
  61   /* Offset in the datasource. */
  62   apr_off_t offset;
  63   /* Offset of the normalized token (may skip leading whitespace) */
  64   apr_off_t norm_offset;
  65   /* Total length - before normalization. */
  66   apr_off_t raw_length;
  67   /* Total length - after normalization. */
  68   apr_off_t length;
  69 } svn_diff__file_token_t;
  70
  71
  72 typedef struct svn_diff__file_baton_t
  73 {
  74   const svn_diff_file_options_t *options;
  75
  76   struct file_info {
  77     const char *path;  /* path to this file, absolute or relative to CWD */
  78
  79     /* All the following fields are active while this datasource is open */
  80     apr_file_t *file;  /* handle of this file */
  81     apr_off_t size;    /* total raw size in bytes of this file */
  82
  83     /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
  84     int chunk;     /* the current chunk number, zero-based */
  85     char *buffer;  /* a buffer containing the current chunk */
  86     char *curp;    /* current position in the current chunk */
  87     char *endp;    /* next memory address after the current chunk */
  88
  89     svn_diff__normalize_state_t normalize_state;
  90
  91     /* Where the identical suffix starts in this datasource */
  92     int suffix_start_chunk;
  93     apr_off_t suffix_offset_in_chunk;
  94   } files[4];
  95
  96   /* List of free tokens that may be reused. */
  97   svn_diff__file_token_t *tokens;
  98
  99   apr_pool_t *pool;
 100 } svn_diff__file_baton_t;
 101
 102 static int
 103 datasource_to_index(svn_diff_datasource_e datasource)
 104 {
 105   switch (datasource)
 106     {
 107     case svn_diff_datasource_original:
 108       return 0;
 109
 110     case svn_diff_datasource_modified:
 111       return 1;
 112
 113     case svn_diff_datasource_latest:
 114       return 2;
 115
 116     case svn_diff_datasource_ancestor:
 117       return 3;
 118     }
 119
 120   return -1;
 121 }
 122
 123 /* Files are read in chunks of 128k.  There is no support for this number
 124  * whatsoever.  If there is a number someone comes up with that has some
 125  * argumentation, let's use that.
 126  */
 127 /* If you change this number, update test_norm_offset(),
 128  * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
 129  */
 130 #define CHUNK_SHIFT 17
 131 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
 132
 133 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
 134 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
 135 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
 136
 137
 138 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
 139  * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
 140  */
 141 static APR_INLINE svn_error_t *
 142 read_chunk(apr_file_t *file,
 143            char *buffer, apr_off_t length,
 144            apr_off_t offset, apr_pool_t *scratch_pool)
 145 {
 146   /* XXX: The final offset may not be the one we asked for.
 147    * XXX: Check.
 148    */
 149   SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
 150   return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
 151                                 NULL, NULL, scratch_pool);
 152 }
 153
 154
 155 /* Map or read a file at PATH. *BUFFER will point to the file
 156  * contents; if the file was mapped, *FILE and *MM will contain the
 157  * mmap context; otherwise they will be NULL.  SIZE will contain the
 158  * file size.  Allocate from POOL.
 159  */
 160 #if APR_HAS_MMAP
 161 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
 162 #define MMAP_T_ARG(NAME)   &(NAME),
 163 #else
 164 #define MMAP_T_PARAM(NAME)
 165 #define MMAP_T_ARG(NAME)
 166 #endif
 167
 168 static svn_error_t *
 169 map_or_read_file(apr_file_t **file,
 170                  MMAP_T_PARAM(mm)
 171                  char **buffer, apr_size_t *size_p,
 172                  const char *path, apr_pool_t *pool)
 173 {
 174   apr_finfo_t finfo;
 175   apr_status_t rv;
 176   apr_size_t size;
 177
 178   *buffer = NULL;
 179
 180   SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
 181   SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
 182
 183   if (finfo.size > APR_SIZE_MAX)
 184     {
 185       return svn_error_createf(APR_ENOMEM, NULL,
 186                                _("File '%s' is too large to be read in "
 187                                  "to memory"), path);
 188     }
 189
 190   size = (apr_size_t) finfo.size;
 191 #if APR_HAS_MMAP
 192   if (size > APR_MMAP_THRESHOLD)
 193     {
 194       rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
 195       if (rv == APR_SUCCESS)
 196         {
 197           *buffer = (*mm)->mm;
 198         }
 199       else
 200         {
 201           /* Clear *MM because output parameters are undefined on error. */
 202           *mm = NULL;
 203         }
 204
 205       /* On failure we just fall through and try reading the file into
 206        * memory instead.
 207        */
 208     }
 209 #endif /* APR_HAS_MMAP */
 210
 211    if (*buffer == NULL && size > 0)
 212     {
 213       *buffer = apr_palloc(pool, size);
 214
 215       SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
 216
 217       /* Since we have the entire contents of the file we can
 218        * close it now.
 219        */
 220       SVN_ERR(svn_io_file_close(*file, pool));
 221
 222       *file = NULL;
 223     }
 224
 225   *size_p = size;
 226
 227   return SVN_NO_ERROR;
 228 }
 229
 230
 231 /* For all files in the FILE array, increment the curp pointer.  If a file
 232  * points before the beginning of file, let it point at the first byte again.
 233  * If the end of the current chunk is reached, read the next chunk in the
 234  * buffer and point curp to the start of the chunk.  If EOF is reached, set
 235  * curp equal to endp to indicate EOF. */
 236 #define INCREMENT_POINTERS(all_files, files_len, pool)                       \
 237   do {                                                                       \
 238     apr_size_t svn_macro__i;                                                 \
 239                                                                              \
 240     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 241     {                                                                        \
 242       if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
 243         (all_files)[svn_macro__i].curp++;                                    \
 244       else                                                                   \
 245         SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
 246     }                                                                        \
 247   } while (0)
 248
 249
 250 /* For all files in the FILE array, decrement the curp pointer.  If the
 251  * start of a chunk is reached, read the previous chunk in the buffer and
 252  * point curp to the last byte of the chunk.  If the beginning of a FILE is
 253  * reached, set chunk to -1 to indicate BOF. */
 254 #define DECREMENT_POINTERS(all_files, files_len, pool)                       \
 255   do {                                                                       \
 256     apr_size_t svn_macro__i;                                                 \
 257                                                                              \
 258     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 259     {                                                                        \
 260       if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
 261         (all_files)[svn_macro__i].curp--;                                    \
 262       else                                                                   \
 263         SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
 264     }                                                                        \
 265   } while (0)
 266
 267
 268 static svn_error_t *
 269 increment_chunk(struct file_info *file, apr_pool_t *pool)
 270 {
 271   apr_off_t length;
 272   apr_off_t last_chunk = offset_to_chunk(file->size);
 273
 274   if (file->chunk == -1)
 275     {
 276       /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
 277       file->chunk = 0;
 278       file->curp = file->buffer;
 279     }
 280   else if (file->chunk == last_chunk)
 281     {
 282       /* We are at the last chunk. Indicate EOF by setting curp == endp. */
 283       file->curp = file->endp;
 284     }
 285   else
 286     {
 287       /* There are still chunks left. Read next chunk and reset pointers. */
 288       file->chunk++;
 289       length = file->chunk == last_chunk ?
 290         offset_in_chunk(file->size) : CHUNK_SIZE;
 291       SVN_ERR(read_chunk(file->file, file->buffer,
 292                          length, chunk_to_offset(file->chunk),
 293                          pool));
 294       file->endp = file->buffer + length;
 295       file->curp = file->buffer;
 296     }
 297
 298   return SVN_NO_ERROR;
 299 }
 300
 301
 302 static svn_error_t *
 303 decrement_chunk(struct file_info *file, apr_pool_t *pool)
 304 {
 305   if (file->chunk == 0)
 306     {
 307       /* We are already at the first chunk. Indicate BOF (Beginning Of File)
 308          by setting chunk = -1 and curp = endp - 1. Both conditions are
 309          important. They help the increment step to catch the BOF situation
 310          in an efficient way. */
 311       file->chunk--;
 312       file->curp = file->endp - 1;
 313     }
 314   else
 315     {
 316       /* Read previous chunk and reset pointers. */
 317       file->chunk--;
 318       SVN_ERR(read_chunk(file->file, file->buffer,
 319                          CHUNK_SIZE, chunk_to_offset(file->chunk),
 320                          pool));
 321       file->endp = file->buffer + CHUNK_SIZE;
 322       file->curp = file->endp - 1;
 323     }
 324
 325   return SVN_NO_ERROR;
 326 }
 327
 328
 329 /* Check whether one of the FILEs has its pointers 'before' the beginning of
 330  * the file (this can happen while scanning backwards). This is the case if
 331  * one of them has chunk == -1. */
 332 static svn_boolean_t
 333 is_one_at_bof(struct file_info file[], apr_size_t file_len)
 334 {
 335   apr_size_t i;
 336
 337   for (i = 0; i < file_len; i++)
 338     if (file[i].chunk == -1)
 339       return TRUE;
 340
 341   return FALSE;
 342 }
 343
 344 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
 345  * one of them has curp == endp (this can only happen at the last chunk)) */
 346 static svn_boolean_t
 347 is_one_at_eof(struct file_info file[], apr_size_t file_len)
 348 {
 349   apr_size_t i;
 350
 351   for (i = 0; i < file_len; i++)
 352     if (file[i].curp == file[i].endp)
 353       return TRUE;
 354
 355   return FALSE;
 356 }
 357
 358 /* Quickly determine whether there is a eol char in CHUNK.
 359  * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
 360  */
 361
 362 #if SVN_UNALIGNED_ACCESS_IS_OK
 363 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
 364 {
 365   apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
 366   apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
 367
 368   r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 369   n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 370
 371   return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
 372 }
 373 #endif
 374
 375 /* Find the prefix which is identical between all elements of the FILE array.
 376  * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
 377  * set to TRUE if one of the FILEs reached its end while scanning prefix,
 378  * i.e. at least one file consisted entirely of prefix.  Otherwise,
 379  * REACHED_ONE_EOF is set to FALSE.
 380  *
 381  * After this function is finished, the buffers, chunks, curp's and endp's
 382  * of the FILEs are set to point at the first byte after the prefix. */
 383 static svn_error_t *
 384 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
 385                       struct file_info file[], apr_size_t file_len,
 386                       apr_pool_t *pool)
 387 {
 388   svn_boolean_t had_cr = FALSE;
 389   svn_boolean_t is_match;
 390   apr_off_t lines = 0;
 391   apr_size_t i;
 392
 393   *reached_one_eof = FALSE;
 394
 395   for (i = 1, is_match = TRUE; i < file_len; i++)
 396     is_match = is_match && *file[0].curp == *file[i].curp;
 397   while (is_match)
 398     {
 399 #if SVN_UNALIGNED_ACCESS_IS_OK
 400       apr_ssize_t max_delta, delta;
 401 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 402
 403       /* ### TODO: see if we can take advantage of
 404          diff options like ignore_eol_style or ignore_space. */
 405       /* check for eol, and count */
 406       if (*file[0].curp == '\r')
 407         {
 408           lines++;
 409           had_cr = TRUE;
 410         }
 411       else if (*file[0].curp == '\n' && !had_cr)
 412         {
 413           lines++;
 414         }
 415       else
 416         {
 417           had_cr = FALSE;
 418         }
 419
 420       INCREMENT_POINTERS(file, file_len, pool);
 421
 422 #if SVN_UNALIGNED_ACCESS_IS_OK
 423
 424       /* Try to advance as far as possible with machine-word granularity.
 425        * Determine how far we may advance with chunky ops without reaching
 426        * endp for any of the files.
 427        * Signedness is important here if curp gets close to endp.
 428        */
 429       max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
 430       for (i = 1; i < file_len; i++)
 431         {
 432           delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
 433           if (delta < max_delta)
 434             max_delta = delta;
 435         }
 436
 437       is_match = TRUE;
 438       for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
 439         {
 440           apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
 441           if (contains_eol(chunk))
 442             break;
 443
 444           for (i = 1; i < file_len; i++)
 445             if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
 446               {
 447                 is_match = FALSE;
 448                 break;
 449               }
 450
 451           if (! is_match)
 452             break;
 453         }
 454
 455       if (delta /* > 0*/)
 456         {
 457           /* We either found a mismatch or an EOL at or shortly behind curp+delta
 458            * or we cannot proceed with chunky ops without exceeding endp.
 459            * In any way, everything up to curp + delta is equal and not an EOL.
 460            */
 461           for (i = 0; i < file_len; i++)
 462             file[i].curp += delta;
 463
 464           /* Skipped data without EOL markers, so last char was not a CR. */
 465           had_cr = FALSE;
 466         }
 467 #endif
 468
 469       *reached_one_eof = is_one_at_eof(file, file_len);
 470       if (*reached_one_eof)
 471         break;
 472       else
 473         for (i = 1, is_match = TRUE; i < file_len; i++)
 474           is_match = is_match && *file[0].curp == *file[i].curp;
 475     }
 476
 477   if (had_cr)
 478     {
 479       /* Check if we ended in the middle of a \r\n for one file, but \r for
 480          another. If so, back up one byte, so the next loop will back up
 481          the entire line. Also decrement lines, since we counted one
 482          too many for the \r. */
 483       svn_boolean_t ended_at_nonmatching_newline = FALSE;
 484       for (i = 0; i < file_len; i++)
 485         if (file[i].curp < file[i].endp)
 486           ended_at_nonmatching_newline = ended_at_nonmatching_newline
 487                                          || *file[i].curp == '\n';
 488       if (ended_at_nonmatching_newline)
 489         {
 490           lines--;
 491           DECREMENT_POINTERS(file, file_len, pool);
 492         }
 493     }
 494
 495   /* Back up one byte, so we point at the last identical byte */
 496   DECREMENT_POINTERS(file, file_len, pool);
 497
 498   /* Back up to the last eol sequence (\n, \r\n or \r) */
 499   while (!is_one_at_bof(file, file_len) &&
 500          *file[0].curp != '\n' && *file[0].curp != '\r')
 501     DECREMENT_POINTERS(file, file_len, pool);
 502
 503   /* Slide one byte forward, to point past the eol sequence */
 504   INCREMENT_POINTERS(file, file_len, pool);
 505
 506   *prefix_lines = lines;
 507
 508   return SVN_NO_ERROR;
 509 }
 510
 511
 512 /* The number of identical suffix lines to keep with the middle section. These
 513  * lines are not eliminated as suffix, and can be picked up by the token
 514  * parsing and lcs steps. This is mainly for backward compatibility with
 515  * the previous diff (and blame) output (if there are multiple diff solutions,
 516  * our lcs algorithm prefers taking common lines from the start, rather than
 517  * from the end. By giving it back some suffix lines, we give it some wiggle
 518  * room to find the exact same diff as before).
 519  *
 520  * The number 50 is more or less arbitrary, based on some real-world tests
 521  * with big files (and then doubling the required number to be on the safe
 522  * side). This has a negligible effect on the power of the optimization. */
 523 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
 524 #ifndef SUFFIX_LINES_TO_KEEP
 525 #define SUFFIX_LINES_TO_KEEP 50
 526 #endif
 527
 528 /* Find the suffix which is identical between all elements of the FILE array.
 529  * Return the number of suffix lines in SUFFIX_LINES.
 530  *
 531  * Before this function is called the FILEs' pointers and chunks should be
 532  * positioned right after the identical prefix (which is the case after
 533  * find_identical_prefix), so we can determine where suffix scanning should
 534  * ultimately stop. */
 535 static svn_error_t *
 536 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
 537                       apr_size_t file_len, apr_pool_t *pool)
 538 {
 539   struct file_info file_for_suffix[4] = { { 0 }  };
 540   apr_off_t length[4];
 541   apr_off_t suffix_min_chunk0;
 542   apr_off_t suffix_min_offset0;
 543   apr_off_t min_file_size;
 544   int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
 545   svn_boolean_t is_match;
 546   apr_off_t lines = 0;
 547   svn_boolean_t had_nl;
 548   apr_size_t i;
 549
 550   /* Initialize file_for_suffix[].
 551      Read last chunk, position curp at last byte. */
 552   for (i = 0; i < file_len; i++)
 553     {
 554       file_for_suffix[i].path = file[i].path;
 555       file_for_suffix[i].file = file[i].file;
 556       file_for_suffix[i].size = file[i].size;
 557       file_for_suffix[i].chunk =
 558         (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
 559       length[i] = offset_in_chunk(file_for_suffix[i].size);
 560       if (length[i] == 0)
 561         {
 562           /* last chunk is an empty chunk -> start at next-to-last chunk */
 563           file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
 564           length[i] = CHUNK_SIZE;
 565         }
 566
 567       if (file_for_suffix[i].chunk == file[i].chunk)
 568         {
 569           /* Prefix ended in last chunk, so we can reuse the prefix buffer */
 570           file_for_suffix[i].buffer = file[i].buffer;
 571         }
 572       else
 573         {
 574           /* There is at least more than 1 chunk,
 575              so allocate full chunk size buffer */
 576           file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
 577           SVN_ERR(read_chunk(file_for_suffix[i].file,
 578                              file_for_suffix[i].buffer, length[i],
 579                              chunk_to_offset(file_for_suffix[i].chunk),
 580                              pool));
 581         }
 582       file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
 583       file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
 584     }
 585
 586   /* Get the chunk and pointer offset (for file[0]) at which we should stop
 587      scanning backward for the identical suffix, i.e. when we reach prefix. */
 588   suffix_min_chunk0 = file[0].chunk;
 589   suffix_min_offset0 = file[0].curp - file[0].buffer;
 590
 591   /* Compensate if other files are smaller than file[0] */
 592   for (i = 1, min_file_size = file[0].size; i < file_len; i++)
 593     if (file[i].size < min_file_size)
 594       min_file_size = file[i].size;
 595   if (file[0].size > min_file_size)
 596     {
 597       suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
 598       suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
 599     }
 600
 601   /* Scan backwards until mismatch or until we reach the prefix. */
 602   for (i = 1, is_match = TRUE; i < file_len; i++)
 603     is_match = is_match
 604                && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 605   if (is_match && *file_for_suffix[0].curp != '\r'
 606                && *file_for_suffix[0].curp != '\n')
 607     /* Count an extra line for the last line not ending in an eol. */
 608     lines++;
 609
 610   had_nl = FALSE;
 611   while (is_match)
 612     {
 613       svn_boolean_t reached_prefix;
 614 #if SVN_UNALIGNED_ACCESS_IS_OK
 615       /* Initialize the minimum pointer positions. */
 616       const char *min_curp[4];
 617       svn_boolean_t can_read_word;
 618 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 619
 620       /* ### TODO: see if we can take advantage of
 621          diff options like ignore_eol_style or ignore_space. */
 622       /* check for eol, and count */
 623       if (*file_for_suffix[0].curp == '\n')
 624         {
 625           lines++;
 626           had_nl = TRUE;
 627         }
 628       else if (*file_for_suffix[0].curp == '\r' && !had_nl)
 629         {
 630           lines++;
 631         }
 632       else
 633         {
 634           had_nl = FALSE;
 635         }
 636
 637       DECREMENT_POINTERS(file_for_suffix, file_len, pool);
 638
 639 #if SVN_UNALIGNED_ACCESS_IS_OK
 640       for (i = 0; i < file_len; i++)
 641         min_curp[i] = file_for_suffix[i].buffer;
 642
 643       /* If we are in the same chunk that contains the last part of the common
 644          prefix, use the min_curp[0] pointer to make sure we don't get a
 645          suffix that overlaps the already determined common prefix. */
 646       if (file_for_suffix[0].chunk == suffix_min_chunk0)
 647         min_curp[0] += suffix_min_offset0;
 648
 649       /* Scan quickly by reading with machine-word granularity. */
 650       for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
 651         can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
 652                          > min_curp[i]);
 653
 654       while (can_read_word)
 655         {
 656           apr_uintptr_t chunk;
 657
 658           /* For each file curp is positioned at the current byte, but we
 659              want to examine the current byte and the ones before the current
 660              location as one machine word. */
 661
 662           chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
 663                                              - sizeof(apr_uintptr_t));
 664           if (contains_eol(chunk))
 665             break;
 666
 667           for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
 668             is_match = (chunk
 669                            == *(const apr_uintptr_t *)
 670                                     (file_for_suffix[i].curp + 1
 671                                        - sizeof(apr_uintptr_t)));
 672
 673           if (! is_match)
 674             break;
 675
 676           for (i = 0; i < file_len; i++)
 677             {
 678               file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
 679               can_read_word = can_read_word
 680                               && (  (file_for_suffix[i].curp + 1
 681                                        - sizeof(apr_uintptr_t))
 682                                   > min_curp[i]);
 683             }
 684
 685           /* We skipped some bytes, so there are no closing EOLs */
 686           had_nl = FALSE;
 687         }
 688
 689       /* The > min_curp[i] check leaves at least one final byte for checking
 690          in the non block optimized case below. */
 691 #endif
 692
 693       reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
 694                        && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
 695                           == suffix_min_offset0;
 696       if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
 697         break;
 698
 699       is_match = TRUE;
 700       for (i = 1; i < file_len; i++)
 701         is_match = is_match
 702                    && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 703     }
 704
 705   /* Slide one byte forward, to point at the first byte of identical suffix */
 706   INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 707
 708   /* Slide forward until we find an eol sequence to add the rest of the line
 709      we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
 710      one file reaches its end. */
 711   do
 712     {
 713       svn_boolean_t had_cr = FALSE;
 714       while (!is_one_at_eof(file_for_suffix, file_len)
 715              && *file_for_suffix[0].curp != '\n'
 716              && *file_for_suffix[0].curp != '\r')
 717         INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 718
 719       /* Slide one or two more bytes, to point past the eol. */
 720       if (!is_one_at_eof(file_for_suffix, file_len)
 721           && *file_for_suffix[0].curp == '\r')
 722         {
 723           lines--;
 724           had_cr = TRUE;
 725           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 726         }
 727       if (!is_one_at_eof(file_for_suffix, file_len)
 728           && *file_for_suffix[0].curp == '\n')
 729         {
 730           if (!had_cr)
 731             lines--;
 732           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 733         }
 734     }
 735   while (!is_one_at_eof(file_for_suffix, file_len)
 736          && suffix_lines_to_keep--);
 737
 738   if (is_one_at_eof(file_for_suffix, file_len))
 739     lines = 0;
 740
 741   /* Save the final suffix information in the original file_info */
 742   for (i = 0; i < file_len; i++)
 743     {
 744       file[i].suffix_start_chunk = file_for_suffix[i].chunk;
 745       file[i].suffix_offset_in_chunk =
 746         file_for_suffix[i].curp - file_for_suffix[i].buffer;
 747     }
 748
 749   *suffix_lines = lines;
 750
 751   return SVN_NO_ERROR;
 752 }
 753
 754
 755 /* Let FILE stand for the array of file_info struct elements of BATON->files
 756  * that are indexed by the elements of the DATASOURCE array.
 757  * BATON's type is (svn_diff__file_baton_t *).
 758  *
 759  * For each file in the FILE array, open the file at FILE.path; initialize
 760  * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
 761  * buffer and read the first chunk.  Then find the prefix and suffix lines
 762  * which are identical between all the files.  Return the number of identical
 763  * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
 764  * SUFFIX_LINES.
 765  *
 766  * Finding the identical prefix and suffix allows us to exclude those from the
 767  * rest of the diff algorithm, which increases performance by reducing the
 768  * problem space.
 769  *
 770  * Implements svn_diff_fns2_t::datasources_open. */
 771 static svn_error_t *
 772 datasources_open(void *baton,
 773                  apr_off_t *prefix_lines,
 774                  apr_off_t *suffix_lines,
 775                  const svn_diff_datasource_e *datasources,
 776                  apr_size_t datasources_len)
 777 {
 778   svn_diff__file_baton_t *file_baton = baton;
 779   struct file_info files[4];
 780   apr_off_t length[4];
 781 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 782   svn_boolean_t reached_one_eof;
 783 #endif
 784   apr_size_t i;
 785
 786   /* Make sure prefix_lines and suffix_lines are set correctly, even if we
 787    * exit early because one of the files is empty. */
 788   *prefix_lines = 0;
 789   *suffix_lines = 0;
 790
 791   /* Open datasources and read first chunk */
 792   for (i = 0; i < datasources_len; i++)
 793     {
 794       svn_filesize_t filesize;
 795       struct file_info *file
 796           = &file_baton->files[datasource_to_index(datasources[i])];
 797       SVN_ERR(svn_io_file_open(&file->file, file->path,
 798                                APR_READ, APR_OS_DEFAULT, file_baton->pool));
 799       SVN_ERR(svn_io_file_size_get(&filesize, file->file, file_baton->pool));
 800       file->size = filesize;
 801       length[i] = filesize > CHUNK_SIZE ? CHUNK_SIZE : filesize;
 802       file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
 803       SVN_ERR(read_chunk(file->file, file->buffer,
 804                          length[i], 0, file_baton->pool));
 805       file->endp = file->buffer + length[i];
 806       file->curp = file->buffer;
 807       /* Set suffix_start_chunk to a guard value, so if suffix scanning is
 808        * skipped because one of the files is empty, or because of
 809        * reached_one_eof, we can still easily check for the suffix during
 810        * token reading (datasource_get_next_token). */
 811       file->suffix_start_chunk = -1;
 812
 813       files[i] = *file;
 814     }
 815
 816   for (i = 0; i < datasources_len; i++)
 817     if (length[i] == 0)
 818       /* There will not be any identical prefix/suffix, so we're done. */
 819       return SVN_NO_ERROR;
 820
 821 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 822
 823   SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
 824                                 files, datasources_len, file_baton->pool));
 825
 826   if (!reached_one_eof)
 827     /* No file consisted totally of identical prefix,
 828      * so there may be some identical suffix.  */
 829     SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
 830                                   file_baton->pool));
 831
 832 #endif
 833
 834   /* Copy local results back to baton. */
 835   for (i = 0; i < datasources_len; i++)
 836     file_baton->files[datasource_to_index(datasources[i])] = files[i];
 837
 838   return SVN_NO_ERROR;
 839 }
 840
 841
 842 /* Implements svn_diff_fns2_t::datasource_close */
 843 static svn_error_t *
 844 datasource_close(void *baton, svn_diff_datasource_e datasource)
 845 {
 846   /* Do nothing.  The compare_token function needs previous datasources
 847    * to stay available until all datasources are processed.
 848    */
 849
 850   return SVN_NO_ERROR;
 851 }
 852
 853 /* Implements svn_diff_fns2_t::datasource_get_next_token */
 854 static svn_error_t *
 855 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
 856                           svn_diff_datasource_e datasource)
 857 {
 858   svn_diff__file_baton_t *file_baton = baton;
 859   svn_diff__file_token_t *file_token;
 860   struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
 861   char *endp;
 862   char *curp;
 863   char *eol;
 864   apr_off_t last_chunk;
 865   apr_off_t length;
 866   apr_uint32_t h = 0;
 867   /* Did the last chunk end in a CR character? */
 868   svn_boolean_t had_cr = FALSE;
 869
 870   *token = NULL;
 871
 872   curp = file->curp;
 873   endp = file->endp;
 874
 875   last_chunk = offset_to_chunk(file->size);
 876
 877   /* Are we already at the end of a chunk? */
 878   if (curp == endp)
 879     {
 880       /* Are we at EOF */
 881       if (last_chunk == file->chunk)
 882         return SVN_NO_ERROR; /* EOF */
 883
 884       /* Or right before an identical suffix in the next chunk? */
 885       if (file->chunk + 1 == file->suffix_start_chunk
 886           && file->suffix_offset_in_chunk == 0)
 887         return SVN_NO_ERROR;
 888     }
 889
 890   /* Stop when we encounter the identical suffix. If suffix scanning was not
 891    * performed, suffix_start_chunk will be -1, so this condition will never
 892    * be true. */
 893   if (file->chunk == file->suffix_start_chunk
 894       && (curp - file->buffer) == file->suffix_offset_in_chunk)
 895     return SVN_NO_ERROR;
 896
 897   /* Allocate a new token, or fetch one from the "reusable tokens" list. */
 898   file_token = file_baton->tokens;
 899   if (file_token)
 900     {
 901       file_baton->tokens = file_token->next;
 902     }
 903   else
 904     {
 905       file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
 906     }
 907
 908   file_token->datasource = datasource;
 909   file_token->offset = chunk_to_offset(file->chunk)
 910                        + (curp - file->buffer);
 911   file_token->norm_offset = file_token->offset;
 912   file_token->raw_length = 0;
 913   file_token->length = 0;
 914
 915   while (1)
 916     {
 917       eol = svn_eol__find_eol_start(curp, endp - curp);
 918       if (eol)
 919         {
 920           had_cr = (*eol == '\r');
 921           eol++;
 922           /* If we have the whole eol sequence in the chunk... */
 923           if (!(had_cr && eol == endp))
 924             {
 925               /* Also skip past the '\n' in an '\r\n' sequence. */
 926               if (had_cr && *eol == '\n')
 927                 eol++;
 928               break;
 929             }
 930         }
 931
 932       if (file->chunk == last_chunk)
 933         {
 934           eol = endp;
 935           break;
 936         }
 937
 938       length = endp - curp;
 939       file_token->raw_length += length;
 940       {
 941         char *c = curp;
 942
 943         svn_diff__normalize_buffer(&c, &length,
 944                                    &file->normalize_state,
 945                                    curp, file_baton->options);
 946         if (file_token->length == 0)
 947           {
 948             /* When we are reading the first part of the token, move the
 949                normalized offset past leading ignored characters, if any. */
 950             file_token->norm_offset += (c - curp);
 951           }
 952         file_token->length += length;
 953         h = svn__adler32(h, c, length);
 954       }
 955
 956       curp = endp = file->buffer;
 957       file->chunk++;
 958       length = file->chunk == last_chunk ?
 959         offset_in_chunk(file->size) : CHUNK_SIZE;
 960       endp += length;
 961       file->endp = endp;
 962
 963       /* Issue #4283: Normally we should have checked for reaching the skipped
 964          suffix here, but because we assume that a suffix always starts on a
 965          line and token boundary we rely on catching the suffix earlier in this
 966          function.
 967
 968          When changing things here, make sure the whitespace settings are
 969          applied, or we might not reach the exact suffix boundary as token
 970          boundary. */
 971       SVN_ERR(read_chunk(file->file,
 972                          curp, length,
 973                          chunk_to_offset(file->chunk),
 974                          file_baton->pool));
 975
 976       /* If the last chunk ended in a CR, we're done. */
 977       if (had_cr)
 978         {
 979           eol = curp;
 980           if (*curp == '\n')
 981             ++eol;
 982           break;
 983         }
 984     }
 985
 986   length = eol - curp;
 987   file_token->raw_length += length;
 988   file->curp = eol;
 989
 990   /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
 991    * with a spurious empty token.  Avoid returning it.
 992    * Note that we use the unnormalized length; we don't want a line containing
 993    * only spaces (and no trailing newline) to appear like a non-existent
 994    * line. */
 995   if (file_token->raw_length > 0)
 996     {
 997       char *c = curp;
 998       svn_diff__normalize_buffer(&c, &length,
 999                                  &file->normalize_state,
1000                                  curp, file_baton->options);
1001       if (file_token->length == 0)
1002         {
1003           /* When we are reading the first part of the token, move the
1004              normalized offset past leading ignored characters, if any. */
1005           file_token->norm_offset += (c - curp);
1006         }
1007
1008       file_token->length += length;
1009
1010       *hash = svn__adler32(h, c, length);
1011       *token = file_token;
1012     }
1013
1014   return SVN_NO_ERROR;
1015 }
1016
1017 #define COMPARE_CHUNK_SIZE 4096
1018
1019 /* Implements svn_diff_fns2_t::token_compare */
1020 static svn_error_t *
1021 token_compare(void *baton, void *token1, void *token2, int *compare)
1022 {
1023   svn_diff__file_baton_t *file_baton = baton;
1024   svn_diff__file_token_t *file_token[2];
1025   char buffer[2][COMPARE_CHUNK_SIZE];
1026   char *bufp[2];
1027   apr_off_t offset[2];
1028   struct file_info *file[2];
1029   apr_off_t length[2];
1030   apr_off_t total_length;
1031   /* How much is left to read of each token from the file. */
1032   apr_off_t raw_length[2];
1033   int i;
1034   svn_diff__normalize_state_t state[2];
1035
1036   file_token[0] = token1;
1037   file_token[1] = token2;
1038   if (file_token[0]->length < file_token[1]->length)
1039     {
1040       *compare = -1;
1041       return SVN_NO_ERROR;
1042     }
1043
1044   if (file_token[0]->length > file_token[1]->length)
1045     {
1046       *compare = 1;
1047       return SVN_NO_ERROR;
1048     }
1049
1050   total_length = file_token[0]->length;
1051   if (total_length == 0)
1052     {
1053       *compare = 0;
1054       return SVN_NO_ERROR;
1055     }
1056
1057   for (i = 0; i < 2; ++i)
1058     {
1059       int idx = datasource_to_index(file_token[i]->datasource);
1060
1061       file[i] = &file_baton->files[idx];
1062       offset[i] = file_token[i]->norm_offset;
1063       state[i] = svn_diff__normalize_state_normal;
1064
1065       if (offset_to_chunk(offset[i]) == file[i]->chunk)
1066         {
1067           /* If the start of the token is in memory, the entire token is
1068            * in memory.
1069            */
1070           bufp[i] = file[i]->buffer;
1071           bufp[i] += offset_in_chunk(offset[i]);
1072
1073           length[i] = total_length;
1074           raw_length[i] = 0;
1075         }
1076       else
1077         {
1078           apr_off_t skipped;
1079
1080           length[i] = 0;
1081
1082           /* When we skipped the first part of the token via the whitespace
1083              normalization we must reduce the raw length of the token */
1084           skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1085
1086           raw_length[i] = file_token[i]->raw_length - skipped;
1087         }
1088     }
1089
1090   do
1091     {
1092       apr_off_t len;
1093       for (i = 0; i < 2; i++)
1094         {
1095           if (length[i] == 0)
1096             {
1097               /* Error if raw_length is 0, that's an unexpected change
1098                * of the file that can happen when ingoring whitespace
1099                * and that can lead to an infinite loop. */
1100               if (raw_length[i] == 0)
1101                 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1102                                          NULL,
1103                                          _("The file '%s' changed unexpectedly"
1104                                            " during diff"),
1105                                          file[i]->path);
1106
1107               /* Read a chunk from disk into a buffer */
1108               bufp[i] = buffer[i];
1109               length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1110                 COMPARE_CHUNK_SIZE : raw_length[i];
1111
1112               SVN_ERR(read_chunk(file[i]->file,
1113                                  bufp[i], length[i], offset[i],
1114                                  file_baton->pool));
1115               offset[i] += length[i];
1116               raw_length[i] -= length[i];
1117               /* bufp[i] gets reset to buffer[i] before reading each chunk,
1118                  so, overwriting it isn't a problem */
1119               svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1120                                          bufp[i], file_baton->options);
1121
1122               /* assert(length[i] == file_token[i]->length); */
1123             }
1124         }
1125
1126       len = length[0] > length[1] ? length[1] : length[0];
1127
1128       /* Compare two chunks (that could be entire tokens if they both reside
1129        * in memory).
1130        */
1131       *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1132       if (*compare != 0)
1133         return SVN_NO_ERROR;
1134
1135       total_length -= len;
1136       length[0] -= len;
1137       length[1] -= len;
1138       bufp[0] += len;
1139       bufp[1] += len;
1140     }
1141   while(total_length > 0);
1142
1143   *compare = 0;
1144   return SVN_NO_ERROR;
1145 }
1146
1147
1148 /* Implements svn_diff_fns2_t::token_discard */
1149 static void
1150 token_discard(void *baton, void *token)
1151 {
1152   svn_diff__file_baton_t *file_baton = baton;
1153   svn_diff__file_token_t *file_token = token;
1154
1155   /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1156   file_token->next = file_baton->tokens;
1157   file_baton->tokens = file_token;
1158 }
1159
1160
1161 /* Implements svn_diff_fns2_t::token_discard_all */
1162 static void
1163 token_discard_all(void *baton)
1164 {
1165   svn_diff__file_baton_t *file_baton = baton;
1166
1167   /* Discard all memory in use by the tokens, and close all open files. */
1168   svn_pool_clear(file_baton->pool);
1169 }
1170
1171
1172 static const svn_diff_fns2_t svn_diff__file_vtable =
1173 {
1174   datasources_open,
1175   datasource_close,
1176   datasource_get_next_token,
1177   token_compare,
1178   token_discard,
1179   token_discard_all
1180 };
1181
1182 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1183 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1184
1185 /* Options supported by svn_diff_file_options_parse(). */
1186 static const apr_getopt_option_t diff_options[] =
1187 {
1188   { "ignore-space-change", 'b', 0, NULL },
1189   { "ignore-all-space", 'w', 0, NULL },
1190   { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1191   { "show-c-function", 'p', 0, NULL },
1192   /* ### For compatibility; we don't support the argument to -u, because
1193    * ### we don't have optional argument support. */
1194   { "unified", 'u', 0, NULL },
1195   { "context", 'U', 1, NULL },
1196   { NULL, 0, 0, NULL }
1197 };
1198
1199 svn_diff_file_options_t *
1200 svn_diff_file_options_create(apr_pool_t *pool)
1201 {
1202   svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1203
1204   opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1205
1206   return opts;
1207 }
1208
1209 /* A baton for use with opt_parsing_error_func(). */
1210 struct opt_parsing_error_baton_t
1211 {
1212   svn_error_t *err;
1213   apr_pool_t *pool;
1214 };
1215
1216 /* Store an error message from apr_getopt_long().  Set BATON->err to a new
1217  * error with a message generated from FMT and the remaining arguments.
1218  * Implements apr_getopt_err_fn_t. */
1219 static void
1220 opt_parsing_error_func(void *baton,
1221                        const char *fmt, ...)
1222 {
1223   struct opt_parsing_error_baton_t *b = baton;
1224   const char *message;
1225   va_list ap;
1226
1227   va_start(ap, fmt);
1228   message = apr_pvsprintf(b->pool, fmt, ap);
1229   va_end(ap);
1230
1231   /* Skip leading ": " (if present, which it always is in known cases). */
1232   if (strncmp(message, ": ", 2) == 0)
1233     message += 2;
1234
1235   b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1236 }
1237
1238 svn_error_t *
1239 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1240                             const apr_array_header_t *args,
1241                             apr_pool_t *pool)
1242 {
1243   apr_getopt_t *os;
1244   struct opt_parsing_error_baton_t opt_parsing_error_baton;
1245   apr_array_header_t *argv;
1246
1247   opt_parsing_error_baton.err = NULL;
1248   opt_parsing_error_baton.pool = pool;
1249
1250   /* Make room for each option (starting at index 1) plus trailing NULL. */
1251   argv = apr_array_make(pool, args->nelts + 2, sizeof(char*));
1252   APR_ARRAY_PUSH(argv, const char *) = "";
1253   apr_array_cat(argv, args);
1254   APR_ARRAY_PUSH(argv, const char *) = NULL;
1255
1256   apr_getopt_init(&os, pool,
1257                   argv->nelts - 1 /* Exclude trailing NULL */,
1258                   (const char *const *) argv->elts);
1259
1260   /* Capture any error message from apr_getopt_long().  This will typically
1261    * say which option is wrong, which we would not otherwise know. */
1262   os->errfn = opt_parsing_error_func;
1263   os->errarg = &opt_parsing_error_baton;
1264
1265   while (1)
1266     {
1267       const char *opt_arg;
1268       int opt_id;
1269       apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1270
1271       if (APR_STATUS_IS_EOF(err))
1272         break;
1273       if (err)
1274         /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1275          * it always will produce one, but never mind if it doesn't.  Avoid
1276          * using the message associated with the return code ERR, because
1277          * it refers to the "command line" which may be misleading here. */
1278         return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1279                                 opt_parsing_error_baton.err,
1280                                 _("Error in options to internal diff"));
1281
1282       switch (opt_id)
1283         {
1284         case 'b':
1285           /* -w takes precedence over -b. */
1286           if (! options->ignore_space)
1287             options->ignore_space = svn_diff_file_ignore_space_change;
1288           break;
1289         case 'w':
1290           options->ignore_space = svn_diff_file_ignore_space_all;
1291           break;
1292         case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1293           options->ignore_eol_style = TRUE;
1294           break;
1295         case 'p':
1296           options->show_c_function = TRUE;
1297           break;
1298         case 'U':
1299           SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1300           break;
1301         default:
1302           break;
1303         }
1304     }
1305
1306   /* Check for spurious arguments. */
1307   if (os->ind < os->argc)
1308     return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1309                              _("Invalid argument '%s' in diff options"),
1310                              os->argv[os->ind]);
1311
1312   return SVN_NO_ERROR;
1313 }
1314
1315 svn_error_t *
1316 svn_diff_file_diff_2(svn_diff_t **diff,
1317                      const char *original,
1318                      const char *modified,
1319                      const svn_diff_file_options_t *options,
1320                      apr_pool_t *pool)
1321 {
1322   svn_diff__file_baton_t baton = { 0 };
1323
1324   baton.options = options;
1325   baton.files[0].path = original;
1326   baton.files[1].path = modified;
1327   baton.pool = svn_pool_create(pool);
1328
1329   SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1330
1331   svn_pool_destroy(baton.pool);
1332   return SVN_NO_ERROR;
1333 }
1334
1335 svn_error_t *
1336 svn_diff_file_diff3_2(svn_diff_t **diff,
1337                       const char *original,
1338                       const char *modified,
1339                       const char *latest,
1340                       const svn_diff_file_options_t *options,
1341                       apr_pool_t *pool)
1342 {
1343   svn_diff__file_baton_t baton = { 0 };
1344
1345   baton.options = options;
1346   baton.files[0].path = original;
1347   baton.files[1].path = modified;
1348   baton.files[2].path = latest;
1349   baton.pool = svn_pool_create(pool);
1350
1351   SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1352
1353   svn_pool_destroy(baton.pool);
1354   return SVN_NO_ERROR;
1355 }
1356
1357 svn_error_t *
1358 svn_diff_file_diff4_2(svn_diff_t **diff,
1359                       const char *original,
1360                       const char *modified,
1361                       const char *latest,
1362                       const char *ancestor,
1363                       const svn_diff_file_options_t *options,
1364                       apr_pool_t *pool)
1365 {
1366   svn_diff__file_baton_t baton = { 0 };
1367
1368   baton.options = options;
1369   baton.files[0].path = original;
1370   baton.files[1].path = modified;
1371   baton.files[2].path = latest;
1372   baton.files[3].path = ancestor;
1373   baton.pool = svn_pool_create(pool);
1374
1375   SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1376
1377   svn_pool_destroy(baton.pool);
1378   return SVN_NO_ERROR;
1379 }
1380
1381 \f
1382 /** Display unified context diffs **/
1383
1384 /* Maximum length of the extra context to show when show_c_function is set.
1385  * GNU diff uses 40, let's be brave and use 50 instead. */
1386 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1387 typedef struct svn_diff__file_output_baton_t
1388 {
1389   svn_stream_t *output_stream;
1390   const char *header_encoding;
1391
1392   /* Cached markers, in header_encoding. */
1393   const char *context_str;
1394   const char *delete_str;
1395   const char *insert_str;
1396
1397   const char *path[2];
1398   apr_file_t *file[2];
1399
1400   apr_off_t   current_line[2];
1401
1402   char        buffer[2][4096];
1403   apr_size_t  length[2];
1404   char       *curp[2];
1405
1406   apr_off_t   hunk_start[2];
1407   apr_off_t   hunk_length[2];
1408   svn_stringbuf_t *hunk;
1409
1410   /* Should we emit C functions in the unified diff header */
1411   svn_boolean_t show_c_function;
1412   /* Extra strings to skip over if we match. */
1413   apr_array_header_t *extra_skip_match;
1414   /* "Context" to append to the @@ line when the show_c_function option
1415    * is set. */
1416   svn_stringbuf_t *extra_context;
1417   /* Extra context for the current hunk. */
1418   char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1419
1420   int context_size;
1421
1422   /* Cancel handler */
1423   svn_cancel_func_t cancel_func;
1424   void *cancel_baton;
1425
1426   apr_pool_t *pool;
1427 } svn_diff__file_output_baton_t;
1428
1429 typedef enum svn_diff__file_output_unified_type_e
1430 {
1431   svn_diff__file_output_unified_skip,
1432   svn_diff__file_output_unified_context,
1433   svn_diff__file_output_unified_delete,
1434   svn_diff__file_output_unified_insert
1435 } svn_diff__file_output_unified_type_e;
1436
1437
1438 static svn_error_t *
1439 output_unified_line(svn_diff__file_output_baton_t *baton,
1440                     svn_diff__file_output_unified_type_e type, int idx)
1441 {
1442   char *curp;
1443   char *eol;
1444   apr_size_t length;
1445   svn_error_t *err;
1446   svn_boolean_t bytes_processed = FALSE;
1447   svn_boolean_t had_cr = FALSE;
1448   /* Are we collecting extra context? */
1449   svn_boolean_t collect_extra = FALSE;
1450
1451   length = baton->length[idx];
1452   curp = baton->curp[idx];
1453
1454   /* Lazily update the current line even if we're at EOF.
1455    * This way we fake output of context at EOF
1456    */
1457   baton->current_line[idx]++;
1458
1459   if (length == 0 && apr_file_eof(baton->file[idx]))
1460     {
1461       return SVN_NO_ERROR;
1462     }
1463
1464   do
1465     {
1466       if (length > 0)
1467         {
1468           if (!bytes_processed)
1469             {
1470               switch (type)
1471                 {
1472                 case svn_diff__file_output_unified_context:
1473                   svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1474                   baton->hunk_length[0]++;
1475                   baton->hunk_length[1]++;
1476                   break;
1477                 case svn_diff__file_output_unified_delete:
1478                   svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1479                   baton->hunk_length[0]++;
1480                   break;
1481                 case svn_diff__file_output_unified_insert:
1482                   svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1483                   baton->hunk_length[1]++;
1484                   break;
1485                 default:
1486                   break;
1487                 }
1488
1489               if (baton->show_c_function
1490                   && (type == svn_diff__file_output_unified_skip
1491                       || type == svn_diff__file_output_unified_context)
1492                   && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1493                   && !svn_cstring_match_glob_list(curp,
1494                                                   baton->extra_skip_match))
1495                 {
1496                   svn_stringbuf_setempty(baton->extra_context);
1497                   collect_extra = TRUE;
1498                 }
1499             }
1500
1501           eol = svn_eol__find_eol_start(curp, length);
1502
1503           if (eol != NULL)
1504             {
1505               apr_size_t len;
1506
1507               had_cr = (*eol == '\r');
1508               eol++;
1509               len = (apr_size_t)(eol - curp);
1510
1511               if (! had_cr || len < length)
1512                 {
1513                   if (had_cr && *eol == '\n')
1514                     {
1515                       ++eol;
1516                       ++len;
1517                     }
1518
1519                   length -= len;
1520
1521                   if (type != svn_diff__file_output_unified_skip)
1522                     {
1523                       svn_stringbuf_appendbytes(baton->hunk, curp, len);
1524                     }
1525                   if (collect_extra)
1526                     {
1527                       svn_stringbuf_appendbytes(baton->extra_context,
1528                                                 curp, len);
1529                     }
1530
1531                   baton->curp[idx] = eol;
1532                   baton->length[idx] = length;
1533
1534                   err = SVN_NO_ERROR;
1535
1536                   break;
1537                 }
1538             }
1539
1540           if (type != svn_diff__file_output_unified_skip)
1541             {
1542               svn_stringbuf_appendbytes(baton->hunk, curp, length);
1543             }
1544
1545           if (collect_extra)
1546             {
1547               svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1548             }
1549
1550           bytes_processed = TRUE;
1551         }
1552
1553       curp = baton->buffer[idx];
1554       length = sizeof(baton->buffer[idx]);
1555
1556       err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1557
1558       /* If the last chunk ended with a CR, we look for an LF at the start
1559          of this chunk. */
1560       if (had_cr)
1561         {
1562           if (! err && length > 0 && *curp == '\n')
1563             {
1564               if (type != svn_diff__file_output_unified_skip)
1565                 {
1566                   svn_stringbuf_appendbyte(baton->hunk, *curp);
1567                 }
1568               /* We don't append the LF to extra_context, since it would
1569                * just be stripped anyway. */
1570               ++curp;
1571               --length;
1572             }
1573
1574           baton->curp[idx] = curp;
1575           baton->length[idx] = length;
1576
1577           break;
1578         }
1579     }
1580   while (! err);
1581
1582   if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1583     return err;
1584
1585   if (err && APR_STATUS_IS_EOF(err->apr_err))
1586     {
1587       svn_error_clear(err);
1588       /* Special case if we reach the end of file AND the last line is in the
1589          changed range AND the file doesn't end with a newline */
1590       if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1591           && ! had_cr)
1592         {
1593           SVN_ERR(svn_diff__unified_append_no_newline_msg(
1594                     baton->hunk, baton->header_encoding, baton->pool));
1595         }
1596
1597       baton->length[idx] = 0;
1598     }
1599
1600   return SVN_NO_ERROR;
1601 }
1602
1603 static APR_INLINE svn_error_t *
1604 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1605                           int source,
1606                           svn_diff__file_output_unified_type_e type,
1607                           apr_off_t until,
1608                           svn_cancel_func_t cancel_func,
1609                           void *cancel_baton)
1610 {
1611   while (output_baton->current_line[source] < until)
1612     {
1613       if (cancel_func)
1614         SVN_ERR(cancel_func(cancel_baton));
1615
1616       SVN_ERR(output_unified_line(output_baton, type, source));
1617     }
1618   return SVN_NO_ERROR;
1619 }
1620
1621 static svn_error_t *
1622 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1623 {
1624   apr_off_t target_line;
1625   apr_size_t hunk_len;
1626   apr_off_t old_start;
1627   apr_off_t new_start;
1628
1629   if (svn_stringbuf_isempty(baton->hunk))
1630     {
1631       /* Nothing to flush */
1632       return SVN_NO_ERROR;
1633     }
1634
1635   target_line = baton->hunk_start[0] + baton->hunk_length[0]
1636                 + baton->context_size;
1637
1638   /* Add trailing context to the hunk */
1639   SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1640                                     svn_diff__file_output_unified_context,
1641                                     target_line,
1642                                     baton->cancel_func, baton->cancel_baton));
1643
1644   old_start = baton->hunk_start[0];
1645   new_start = baton->hunk_start[1];
1646
1647   /* If the file is non-empty, convert the line indexes from
1648      zero based to one based */
1649   if (baton->hunk_length[0])
1650     old_start++;
1651   if (baton->hunk_length[1])
1652     new_start++;
1653
1654   /* Write the hunk header */
1655   SVN_ERR(svn_diff__unified_write_hunk_header(
1656             baton->output_stream, baton->header_encoding, "@@",
1657             old_start, baton->hunk_length[0],
1658             new_start, baton->hunk_length[1],
1659             baton->hunk_extra_context,
1660             baton->pool));
1661
1662   /* Output the hunk content */
1663   hunk_len = baton->hunk->len;
1664   SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1665                            &hunk_len));
1666
1667   /* Prepare for the next hunk */
1668   baton->hunk_length[0] = 0;
1669   baton->hunk_length[1] = 0;
1670   baton->hunk_start[0] = 0;
1671   baton->hunk_start[1] = 0;
1672   svn_stringbuf_setempty(baton->hunk);
1673
1674   return SVN_NO_ERROR;
1675 }
1676
1677 static svn_error_t *
1678 output_unified_diff_modified(void *baton,
1679   apr_off_t original_start, apr_off_t original_length,
1680   apr_off_t modified_start, apr_off_t modified_length,
1681   apr_off_t latest_start, apr_off_t latest_length)
1682 {
1683   svn_diff__file_output_baton_t *output_baton = baton;
1684   apr_off_t context_prefix_length;
1685   apr_off_t prev_context_end;
1686   svn_boolean_t init_hunk = FALSE;
1687
1688   if (original_start > output_baton->context_size)
1689     context_prefix_length = output_baton->context_size;
1690   else
1691     context_prefix_length = original_start;
1692
1693   /* Calculate where the previous hunk will end if we would write it now
1694      (including the necessary context at the end) */
1695   if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1696     {
1697       prev_context_end = output_baton->hunk_start[0]
1698                          + output_baton->hunk_length[0]
1699                          + output_baton->context_size;
1700     }
1701   else
1702     {
1703       prev_context_end = -1;
1704
1705       if (output_baton->hunk_start[0] == 0
1706           && (original_length > 0 || modified_length > 0))
1707         init_hunk = TRUE;
1708     }
1709
1710   /* If the changed range is far enough from the previous range, flush the current
1711      hunk. */
1712   {
1713     apr_off_t new_hunk_start = (original_start - context_prefix_length);
1714
1715     if (output_baton->current_line[0] < new_hunk_start
1716           && prev_context_end <= new_hunk_start)
1717       {
1718         SVN_ERR(output_unified_flush_hunk(output_baton));
1719         init_hunk = TRUE;
1720       }
1721     else if (output_baton->hunk_length[0] > 0
1722              || output_baton->hunk_length[1] > 0)
1723       {
1724         /* We extend the current hunk */
1725
1726
1727         /* Original: Output the context preceding the changed range */
1728         SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1729                                           svn_diff__file_output_unified_context,
1730                                           original_start,
1731                                           output_baton->cancel_func,
1732                                           output_baton->cancel_baton));
1733       }
1734   }
1735
1736   /* Original: Skip lines until we are at the beginning of the context we want
1737      to display */
1738   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1739                                     svn_diff__file_output_unified_skip,
1740                                     original_start - context_prefix_length,
1741                                     output_baton->cancel_func,
1742                                     output_baton->cancel_baton));
1743
1744   /* Note that the above skip stores data for the show_c_function support below */
1745
1746   if (init_hunk)
1747     {
1748       SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1749                      && output_baton->hunk_length[1] == 0);
1750
1751       output_baton->hunk_start[0] = original_start - context_prefix_length;
1752       output_baton->hunk_start[1] = modified_start - context_prefix_length;
1753     }
1754
1755   if (init_hunk && output_baton->show_c_function)
1756     {
1757       apr_size_t p;
1758       const char *invalid_character;
1759
1760       /* Save the extra context for later use.
1761        * Note that the last byte of the hunk_extra_context array is never
1762        * touched after it is zero-initialized, so the array is always
1763        * 0-terminated. */
1764       strncpy(output_baton->hunk_extra_context,
1765               output_baton->extra_context->data,
1766               SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1767       /* Trim whitespace at the end, most notably to get rid of any
1768        * newline characters. */
1769       p = strlen(output_baton->hunk_extra_context);
1770       while (p > 0
1771              && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1772         {
1773           output_baton->hunk_extra_context[--p] = '\0';
1774         }
1775       invalid_character =
1776         svn_utf__last_valid(output_baton->hunk_extra_context,
1777                             SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1778       for (p = invalid_character - output_baton->hunk_extra_context;
1779            p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1780         {
1781           output_baton->hunk_extra_context[p] = '\0';
1782         }
1783     }
1784
1785   /* Modified: Skip lines until we are at the start of the changed range */
1786   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1787                                     svn_diff__file_output_unified_skip,
1788                                     modified_start,
1789                                     output_baton->cancel_func,
1790                                     output_baton->cancel_baton));
1791
1792   /* Original: Output the context preceding the changed range */
1793   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1794                                     svn_diff__file_output_unified_context,
1795                                     original_start,
1796                                     output_baton->cancel_func,
1797                                     output_baton->cancel_baton));
1798
1799   /* Both: Output the changed range */
1800   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1801                                     svn_diff__file_output_unified_delete,
1802                                     original_start + original_length,
1803                                     output_baton->cancel_func,
1804                                     output_baton->cancel_baton));
1805   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1806                                     svn_diff__file_output_unified_insert,
1807                                     modified_start + modified_length,
1808                                     output_baton->cancel_func,
1809                                     output_baton->cancel_baton));
1810
1811   return SVN_NO_ERROR;
1812 }
1813
1814 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1815 static svn_error_t *
1816 output_unified_default_hdr(const char **header, const char *path,
1817                            apr_pool_t *pool)
1818 {
1819   apr_finfo_t file_info;
1820   apr_time_exp_t exploded_time;
1821   char time_buffer[64];
1822   apr_size_t time_len;
1823   const char *utf8_timestr;
1824
1825   SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1826   apr_time_exp_lt(&exploded_time, file_info.mtime);
1827
1828   apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1829   /* Order of date components can be different in different languages */
1830                _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1831
1832   SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1833
1834   *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1835
1836   return SVN_NO_ERROR;
1837 }
1838
1839 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1840 {
1841   NULL, /* output_common */
1842   output_unified_diff_modified,
1843   NULL, /* output_diff_latest */
1844   NULL, /* output_diff_common */
1845   NULL  /* output_conflict */
1846 };
1847
1848 svn_error_t *
1849 svn_diff_file_output_unified4(svn_stream_t *output_stream,
1850                               svn_diff_t *diff,
1851                               const char *original_path,
1852                               const char *modified_path,
1853                               const char *original_header,
1854                               const char *modified_header,
1855                               const char *header_encoding,
1856                               const char *relative_to_dir,
1857                               svn_boolean_t show_c_function,
1858                               int context_size,
1859                               svn_cancel_func_t cancel_func,
1860                               void *cancel_baton,
1861                               apr_pool_t *pool)
1862 {
1863   if (svn_diff_contains_diffs(diff))
1864     {
1865       svn_diff__file_output_baton_t baton;
1866       int i;
1867
1868       memset(&baton, 0, sizeof(baton));
1869       baton.output_stream = output_stream;
1870       baton.cancel_func = cancel_func;
1871       baton.cancel_baton = cancel_baton;
1872       baton.pool = pool;
1873       baton.header_encoding = header_encoding;
1874       baton.path[0] = original_path;
1875       baton.path[1] = modified_path;
1876       baton.hunk = svn_stringbuf_create_empty(pool);
1877       baton.show_c_function = show_c_function;
1878       baton.extra_context = svn_stringbuf_create_empty(pool);
1879       baton.context_size = (context_size >= 0) ? context_size
1880                                               : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1881
1882       if (show_c_function)
1883         {
1884           baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1885
1886           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1887           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1888           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1889         }
1890
1891       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1892                                             header_encoding, pool));
1893       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1894                                             header_encoding, pool));
1895       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1896                                             header_encoding, pool));
1897
1898       if (relative_to_dir)
1899         {
1900           /* Possibly adjust the "original" and "modified" paths shown in
1901              the output (see issue #2723). */
1902           const char *child_path;
1903
1904           if (! original_header)
1905             {
1906               child_path = svn_dirent_is_child(relative_to_dir,
1907                                                original_path, pool);
1908               if (child_path)
1909                 original_path = child_path;
1910               else
1911                 return svn_error_createf(
1912                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1913                                    _("Path '%s' must be inside "
1914                                      "the directory '%s'"),
1915                                    svn_dirent_local_style(original_path, pool),
1916                                    svn_dirent_local_style(relative_to_dir,
1917                                                           pool));
1918             }
1919
1920           if (! modified_header)
1921             {
1922               child_path = svn_dirent_is_child(relative_to_dir,
1923                                                modified_path, pool);
1924               if (child_path)
1925                 modified_path = child_path;
1926               else
1927                 return svn_error_createf(
1928                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1929                                    _("Path '%s' must be inside "
1930                                      "the directory '%s'"),
1931                                    svn_dirent_local_style(modified_path, pool),
1932                                    svn_dirent_local_style(relative_to_dir,
1933                                                           pool));
1934             }
1935         }
1936
1937       for (i = 0; i < 2; i++)
1938         {
1939           SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1940                                    APR_READ, APR_OS_DEFAULT, pool));
1941         }
1942
1943       if (original_header == NULL)
1944         {
1945           SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1946                                              pool));
1947         }
1948
1949       if (modified_header == NULL)
1950         {
1951           SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1952                                              pool));
1953         }
1954
1955       SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1956                                              original_header, modified_header,
1957                                              pool));
1958
1959       SVN_ERR(svn_diff_output2(diff, &baton,
1960                                &svn_diff__file_output_unified_vtable,
1961                                cancel_func, cancel_baton));
1962       SVN_ERR(output_unified_flush_hunk(&baton));
1963
1964       for (i = 0; i < 2; i++)
1965         {
1966           SVN_ERR(svn_io_file_close(baton.file[i], pool));
1967         }
1968     }
1969
1970   return SVN_NO_ERROR;
1971 }
1972
1973 \f
1974 /** Display diff3 **/
1975
1976 /* A stream to remember *leading* context.  Note that this stream does
1977    *not* copy the data that it is remembering; it just saves
1978    *pointers! */
1979 typedef struct context_saver_t {
1980   svn_stream_t *stream;
1981   int context_size;
1982   const char **data; /* const char *data[context_size] */
1983   apr_size_t *len;   /* apr_size_t len[context_size] */
1984   apr_size_t next_slot;
1985   apr_ssize_t total_writes;
1986 } context_saver_t;
1987
1988
1989 static svn_error_t *
1990 context_saver_stream_write(void *baton,
1991                            const char *data,
1992                            apr_size_t *len)
1993 {
1994   context_saver_t *cs = baton;
1995
1996   if (cs->context_size > 0)
1997     {
1998       cs->data[cs->next_slot] = data;
1999       cs->len[cs->next_slot] = *len;
2000       cs->next_slot = (cs->next_slot + 1) % cs->context_size;
2001       cs->total_writes++;
2002     }
2003   return SVN_NO_ERROR;
2004 }
2005
2006 typedef struct svn_diff3__file_output_baton_t
2007 {
2008   svn_stream_t *output_stream;
2009
2010   const char *path[3];
2011
2012   apr_off_t   current_line[3];
2013
2014   char       *buffer[3];
2015   char       *endp[3];
2016   char       *curp[3];
2017
2018   /* The following four members are in the encoding used for the output. */
2019   const char *conflict_modified;
2020   const char *conflict_original;
2021   const char *conflict_separator;
2022   const char *conflict_latest;
2023
2024   const char *marker_eol;
2025
2026   svn_diff_conflict_display_style_t conflict_style;
2027   int context_size;
2028
2029   /* cancel support */
2030   svn_cancel_func_t cancel_func;
2031   void *cancel_baton;
2032
2033   /* The rest of the fields are for
2034      svn_diff_conflict_display_only_conflicts only.  Note that for
2035      these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2036      (soon after a conflict) a "trailing context stream", never the
2037      actual output stream.*/
2038   /* The actual output stream. */
2039   svn_stream_t *real_output_stream;
2040   context_saver_t *context_saver;
2041   /* Used to allocate context_saver and trailing context streams, and
2042      for some printfs. */
2043   apr_pool_t *pool;
2044 } svn_diff3__file_output_baton_t;
2045
2046 static svn_error_t *
2047 flush_context_saver(context_saver_t *cs,
2048                     svn_stream_t *output_stream)
2049 {
2050   int i;
2051   for (i = 0; i < cs->context_size; i++)
2052     {
2053       apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2054       if (cs->data[slot])
2055         {
2056           apr_size_t len = cs->len[slot];
2057           SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2058         }
2059     }
2060   return SVN_NO_ERROR;
2061 }
2062
2063 static void
2064 make_context_saver(svn_diff3__file_output_baton_t *fob)
2065 {
2066   context_saver_t *cs;
2067
2068   assert(fob->context_size > 0); /* Or nothing to save */
2069
2070   svn_pool_clear(fob->pool);
2071   cs = apr_pcalloc(fob->pool, sizeof(*cs));
2072   cs->stream = svn_stream_empty(fob->pool);
2073   svn_stream_set_baton(cs->stream, cs);
2074   svn_stream_set_write(cs->stream, context_saver_stream_write);
2075   fob->context_saver = cs;
2076   fob->output_stream = cs->stream;
2077   cs->context_size = fob->context_size;
2078   cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2079   cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2080 }
2081
2082
2083 /* A stream which prints LINES_TO_PRINT (based on context size) lines to
2084    BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2085    a context_saver; used for *trailing* context. */
2086
2087 struct trailing_context_printer {
2088   apr_size_t lines_to_print;
2089   svn_diff3__file_output_baton_t *fob;
2090 };
2091
2092
2093
2094 static svn_error_t *
2095 trailing_context_printer_write(void *baton,
2096                                const char *data,
2097                                apr_size_t *len)
2098 {
2099   struct trailing_context_printer *tcp = baton;
2100   SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2101   SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2102   tcp->lines_to_print--;
2103   if (tcp->lines_to_print == 0)
2104     make_context_saver(tcp->fob);
2105   return SVN_NO_ERROR;
2106 }
2107
2108
2109 static void
2110 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2111 {
2112   struct trailing_context_printer *tcp;
2113   svn_stream_t *s;
2114
2115   svn_pool_clear(btn->pool);
2116
2117   tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2118   tcp->lines_to_print = btn->context_size;
2119   tcp->fob = btn;
2120   s = svn_stream_empty(btn->pool);
2121   svn_stream_set_baton(s, tcp);
2122   svn_stream_set_write(s, trailing_context_printer_write);
2123   btn->output_stream = s;
2124 }
2125
2126
2127
2128 typedef enum svn_diff3__file_output_type_e
2129 {
2130   svn_diff3__file_output_skip,
2131   svn_diff3__file_output_normal
2132 } svn_diff3__file_output_type_e;
2133
2134
2135 static svn_error_t *
2136 output_line(svn_diff3__file_output_baton_t *baton,
2137             svn_diff3__file_output_type_e type, int idx)
2138 {
2139   char *curp;
2140   char *endp;
2141   char *eol;
2142   apr_size_t len;
2143
2144   curp = baton->curp[idx];
2145   endp = baton->endp[idx];
2146
2147   /* Lazily update the current line even if we're at EOF.
2148    */
2149   baton->current_line[idx]++;
2150
2151   if (curp == endp)
2152     return SVN_NO_ERROR;
2153
2154   eol = svn_eol__find_eol_start(curp, endp - curp);
2155   if (!eol)
2156     eol = endp;
2157   else
2158     {
2159       svn_boolean_t had_cr = (*eol == '\r');
2160       eol++;
2161       if (had_cr && eol != endp && *eol == '\n')
2162         eol++;
2163     }
2164
2165   if (type != svn_diff3__file_output_skip)
2166     {
2167       len = eol - curp;
2168       /* Note that the trailing context printer assumes that
2169          svn_stream_write is called exactly once per line. */
2170       SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2171     }
2172
2173   baton->curp[idx] = eol;
2174
2175   return SVN_NO_ERROR;
2176 }
2177
2178 static svn_error_t *
2179 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2180 {
2181   return svn_stream_puts(btn->output_stream, btn->marker_eol);
2182 }
2183
2184 static svn_error_t *
2185 output_hunk(void *baton, int idx, apr_off_t target_line,
2186             apr_off_t target_length)
2187 {
2188   svn_diff3__file_output_baton_t *output_baton = baton;
2189
2190   /* Skip lines until we are at the start of the changed range */
2191   while (output_baton->current_line[idx] < target_line)
2192     {
2193       SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2194     }
2195
2196   target_line += target_length;
2197
2198   while (output_baton->current_line[idx] < target_line)
2199     {
2200       SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2201     }
2202
2203   return SVN_NO_ERROR;
2204 }
2205
2206 static svn_error_t *
2207 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2208               apr_off_t modified_start, apr_off_t modified_length,
2209               apr_off_t latest_start, apr_off_t latest_length)
2210 {
2211   return output_hunk(baton, 1, modified_start, modified_length);
2212 }
2213
2214 static svn_error_t *
2215 output_diff_modified(void *baton,
2216                      apr_off_t original_start, apr_off_t original_length,
2217                      apr_off_t modified_start, apr_off_t modified_length,
2218                      apr_off_t latest_start, apr_off_t latest_length)
2219 {
2220   return output_hunk(baton, 1, modified_start, modified_length);
2221 }
2222
2223 static svn_error_t *
2224 output_diff_latest(void *baton,
2225                    apr_off_t original_start, apr_off_t original_length,
2226                    apr_off_t modified_start, apr_off_t modified_length,
2227                    apr_off_t latest_start, apr_off_t latest_length)
2228 {
2229   return output_hunk(baton, 2, latest_start, latest_length);
2230 }
2231
2232 static svn_error_t *
2233 output_conflict(void *baton,
2234                 apr_off_t original_start, apr_off_t original_length,
2235                 apr_off_t modified_start, apr_off_t modified_length,
2236                 apr_off_t latest_start, apr_off_t latest_length,
2237                 svn_diff_t *diff);
2238
2239 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2240 {
2241   output_common,
2242   output_diff_modified,
2243   output_diff_latest,
2244   output_diff_modified, /* output_diff_common */
2245   output_conflict
2246 };
2247
2248 static svn_error_t *
2249 output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2250                                     const char *label,
2251                                     apr_off_t start,
2252                                     apr_off_t length)
2253 {
2254   if (length == 1)
2255     SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2256                               "%s (%" APR_OFF_T_FMT ")",
2257                               label, start + 1));
2258   else
2259     SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2260                               "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2261                               label, start + 1, length));
2262
2263   SVN_ERR(output_marker_eol(btn));
2264
2265   return SVN_NO_ERROR;
2266 }
2267
2268 static svn_error_t *
2269 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2270                              apr_off_t original_start,
2271                              apr_off_t original_length,
2272                              apr_off_t modified_start,
2273                              apr_off_t modified_length,
2274                              apr_off_t latest_start,
2275                              apr_off_t latest_length)
2276 {
2277   /* Are we currently saving starting context (as opposed to printing
2278      trailing context)?  If so, flush it. */
2279   if (btn->output_stream == btn->context_saver->stream)
2280     {
2281       if (btn->context_saver->total_writes > btn->context_size)
2282         SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2283       SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2284     }
2285
2286   /* Print to the real output stream. */
2287   btn->output_stream = btn->real_output_stream;
2288
2289   /* Output the conflict itself. */
2290   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2291                                               modified_start, modified_length));
2292   SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2293
2294   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2295                                               original_start, original_length));
2296   SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2297
2298   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2299                             "%s%s", btn->conflict_separator, btn->marker_eol));
2300   SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2301   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2302                                               latest_start, latest_length));
2303
2304   /* Go into print-trailing-context mode instead. */
2305   make_trailing_context_printer(btn);
2306
2307   return SVN_NO_ERROR;
2308 }
2309
2310
2311 static svn_error_t *
2312 output_conflict(void *baton,
2313                 apr_off_t original_start, apr_off_t original_length,
2314                 apr_off_t modified_start, apr_off_t modified_length,
2315                 apr_off_t latest_start, apr_off_t latest_length,
2316                 svn_diff_t *diff)
2317 {
2318   svn_diff3__file_output_baton_t *file_baton = baton;
2319
2320   svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2321
2322   if (style == svn_diff_conflict_display_only_conflicts)
2323     return output_conflict_with_context(file_baton,
2324                                         original_start, original_length,
2325                                         modified_start, modified_length,
2326                                         latest_start, latest_length);
2327
2328   if (style == svn_diff_conflict_display_resolved_modified_latest)
2329     {
2330       if (diff)
2331         return svn_diff_output2(diff, baton,
2332                                 &svn_diff3__file_output_vtable,
2333                                 file_baton->cancel_func,
2334                                 file_baton->cancel_baton);
2335       else
2336         style = svn_diff_conflict_display_modified_latest;
2337     }
2338
2339   if (style == svn_diff_conflict_display_modified_latest ||
2340       style == svn_diff_conflict_display_modified_original_latest)
2341     {
2342       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2343                                file_baton->conflict_modified));
2344       SVN_ERR(output_marker_eol(file_baton));
2345
2346       SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2347
2348       if (style == svn_diff_conflict_display_modified_original_latest)
2349         {
2350           SVN_ERR(svn_stream_puts(file_baton->output_stream,
2351                                    file_baton->conflict_original));
2352           SVN_ERR(output_marker_eol(file_baton));
2353           SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2354         }
2355
2356       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2357                               file_baton->conflict_separator));
2358       SVN_ERR(output_marker_eol(file_baton));
2359
2360       SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2361
2362       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2363                               file_baton->conflict_latest));
2364       SVN_ERR(output_marker_eol(file_baton));
2365     }
2366   else if (style == svn_diff_conflict_display_modified)
2367     SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2368   else if (style == svn_diff_conflict_display_latest)
2369     SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2370   else /* unknown style */
2371     SVN_ERR_MALFUNCTION();
2372
2373   return SVN_NO_ERROR;
2374 }
2375
2376 svn_error_t *
2377 svn_diff_file_output_merge3(svn_stream_t *output_stream,
2378                             svn_diff_t *diff,
2379                             const char *original_path,
2380                             const char *modified_path,
2381                             const char *latest_path,
2382                             const char *conflict_original,
2383                             const char *conflict_modified,
2384                             const char *conflict_latest,
2385                             const char *conflict_separator,
2386                             svn_diff_conflict_display_style_t style,
2387                             svn_cancel_func_t cancel_func,
2388                             void *cancel_baton,
2389                             apr_pool_t *scratch_pool)
2390 {
2391   svn_diff3__file_output_baton_t baton;
2392   apr_file_t *file[3];
2393   int idx;
2394 #if APR_HAS_MMAP
2395   apr_mmap_t *mm[3] = { 0 };
2396 #endif /* APR_HAS_MMAP */
2397   const char *eol;
2398   svn_boolean_t conflicts_only =
2399     (style == svn_diff_conflict_display_only_conflicts);
2400
2401   memset(&baton, 0, sizeof(baton));
2402   baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2403   if (conflicts_only)
2404     {
2405       baton.pool = svn_pool_create(scratch_pool);
2406       make_context_saver(&baton);
2407       baton.real_output_stream = output_stream;
2408     }
2409   else
2410     baton.output_stream = output_stream;
2411   baton.path[0] = original_path;
2412   baton.path[1] = modified_path;
2413   baton.path[2] = latest_path;
2414   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2415                                     conflict_modified ? conflict_modified
2416                                     : apr_psprintf(scratch_pool, "<<<<<<< %s",
2417                                                    modified_path),
2418                                     scratch_pool));
2419   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2420                                     conflict_original ? conflict_original
2421                                     : apr_psprintf(scratch_pool, "||||||| %s",
2422                                                    original_path),
2423                                     scratch_pool));
2424   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2425                                     conflict_separator ? conflict_separator
2426                                     : "=======", scratch_pool));
2427   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2428                                     conflict_latest ? conflict_latest
2429                                     : apr_psprintf(scratch_pool, ">>>>>>> %s",
2430                                                    latest_path),
2431                                     scratch_pool));
2432
2433   baton.conflict_style = style;
2434
2435   for (idx = 0; idx < 3; idx++)
2436     {
2437       apr_size_t size;
2438
2439       SVN_ERR(map_or_read_file(&file[idx],
2440                                MMAP_T_ARG(mm[idx])
2441                                &baton.buffer[idx], &size,
2442                                baton.path[idx], scratch_pool));
2443
2444       baton.curp[idx] = baton.buffer[idx];
2445       baton.endp[idx] = baton.buffer[idx];
2446
2447       if (baton.endp[idx])
2448         baton.endp[idx] += size;
2449     }
2450
2451   /* Check what eol marker we should use for conflict markers.
2452      We use the eol marker of the modified file and fall back on the
2453      platform's eol marker if that file doesn't contain any newlines. */
2454   eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2455                             NULL);
2456   if (! eol)
2457     eol = APR_EOL_STR;
2458   baton.marker_eol = eol;
2459
2460   baton.cancel_func = cancel_func;
2461   baton.cancel_baton = cancel_baton;
2462
2463   SVN_ERR(svn_diff_output2(diff, &baton,
2464                           &svn_diff3__file_output_vtable,
2465                           cancel_func, cancel_baton));
2466
2467   for (idx = 0; idx < 3; idx++)
2468     {
2469 #if APR_HAS_MMAP
2470       if (mm[idx])
2471         {
2472           apr_status_t rv = apr_mmap_delete(mm[idx]);
2473           if (rv != APR_SUCCESS)
2474             {
2475               return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2476                                         baton.path[idx]);
2477             }
2478         }
2479 #endif /* APR_HAS_MMAP */
2480
2481       if (file[idx])
2482         {
2483           SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2484         }
2485     }
2486
2487   if (conflicts_only)
2488     svn_pool_destroy(baton.pool);
2489
2490   return SVN_NO_ERROR;
2491 }
2492