contrib/subversion/subversion/libsvn_diff/diff_file.c

   1 /*
   2  * diff_file.c :  routines for doing diffs on files
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <apr.h>
  26 #include <apr_pools.h>
  27 #include <apr_general.h>
  28 #include <apr_file_io.h>
  29 #include <apr_file_info.h>
  30 #include <apr_time.h>
  31 #include <apr_mmap.h>
  32 #include <apr_getopt.h>
  33
  34 #include "svn_error.h"
  35 #include "svn_diff.h"
  36 #include "svn_types.h"
  37 #include "svn_string.h"
  38 #include "svn_subst.h"
  39 #include "svn_io.h"
  40 #include "svn_utf.h"
  41 #include "svn_pools.h"
  42 #include "diff.h"
  43 #include "svn_private_config.h"
  44 #include "svn_path.h"
  45 #include "svn_ctype.h"
  46
  47 #include "private/svn_utf_private.h"
  48 #include "private/svn_eol_private.h"
  49 #include "private/svn_dep_compat.h"
  50 #include "private/svn_adler32.h"
  51 #include "private/svn_diff_private.h"
  52
  53 /* A token, i.e. a line read from a file. */
  54 typedef struct svn_diff__file_token_t
  55 {
  56   /* Next token in free list. */
  57   struct svn_diff__file_token_t *next;
  58   svn_diff_datasource_e datasource;
  59   /* Offset in the datasource. */
  60   apr_off_t offset;
  61   /* Offset of the normalized token (may skip leading whitespace) */
  62   apr_off_t norm_offset;
  63   /* Total length - before normalization. */
  64   apr_off_t raw_length;
  65   /* Total length - after normalization. */
  66   apr_off_t length;
  67 } svn_diff__file_token_t;
  68
  69
  70 typedef struct svn_diff__file_baton_t
  71 {
  72   const svn_diff_file_options_t *options;
  73
  74   struct file_info {
  75     const char *path;  /* path to this file, absolute or relative to CWD */
  76
  77     /* All the following fields are active while this datasource is open */
  78     apr_file_t *file;  /* handle of this file */
  79     apr_off_t size;    /* total raw size in bytes of this file */
  80
  81     /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
  82     int chunk;     /* the current chunk number, zero-based */
  83     char *buffer;  /* a buffer containing the current chunk */
  84     char *curp;    /* current position in the current chunk */
  85     char *endp;    /* next memory address after the current chunk */
  86
  87     svn_diff__normalize_state_t normalize_state;
  88
  89     /* Where the identical suffix starts in this datasource */
  90     int suffix_start_chunk;
  91     apr_off_t suffix_offset_in_chunk;
  92   } files[4];
  93
  94   /* List of free tokens that may be reused. */
  95   svn_diff__file_token_t *tokens;
  96
  97   apr_pool_t *pool;
  98 } svn_diff__file_baton_t;
  99
 100 static int
 101 datasource_to_index(svn_diff_datasource_e datasource)
 102 {
 103   switch (datasource)
 104     {
 105     case svn_diff_datasource_original:
 106       return 0;
 107
 108     case svn_diff_datasource_modified:
 109       return 1;
 110
 111     case svn_diff_datasource_latest:
 112       return 2;
 113
 114     case svn_diff_datasource_ancestor:
 115       return 3;
 116     }
 117
 118   return -1;
 119 }
 120
 121 /* Files are read in chunks of 128k.  There is no support for this number
 122  * whatsoever.  If there is a number someone comes up with that has some
 123  * argumentation, let's use that.
 124  */
 125 /* If you change this number, update test_norm_offset(),
 126  * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
 127  */
 128 #define CHUNK_SHIFT 17
 129 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
 130
 131 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
 132 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
 133 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
 134
 135
 136 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
 137  * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
 138  */
 139 static APR_INLINE svn_error_t *
 140 read_chunk(apr_file_t *file, const char *path,
 141            char *buffer, apr_off_t length,
 142            apr_off_t offset, apr_pool_t *pool)
 143 {
 144   /* XXX: The final offset may not be the one we asked for.
 145    * XXX: Check.
 146    */
 147   SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
 148   return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
 149                                 NULL, NULL, pool);
 150 }
 151
 152
 153 /* Map or read a file at PATH. *BUFFER will point to the file
 154  * contents; if the file was mapped, *FILE and *MM will contain the
 155  * mmap context; otherwise they will be NULL.  SIZE will contain the
 156  * file size.  Allocate from POOL.
 157  */
 158 #if APR_HAS_MMAP
 159 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
 160 #define MMAP_T_ARG(NAME)   &(NAME),
 161 #else
 162 #define MMAP_T_PARAM(NAME)
 163 #define MMAP_T_ARG(NAME)
 164 #endif
 165
 166 static svn_error_t *
 167 map_or_read_file(apr_file_t **file,
 168                  MMAP_T_PARAM(mm)
 169                  char **buffer, apr_off_t *size,
 170                  const char *path, apr_pool_t *pool)
 171 {
 172   apr_finfo_t finfo;
 173   apr_status_t rv;
 174
 175   *buffer = NULL;
 176
 177   SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
 178   SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
 179
 180 #if APR_HAS_MMAP
 181   if (finfo.size > APR_MMAP_THRESHOLD)
 182     {
 183       rv = apr_mmap_create(mm, *file, 0, (apr_size_t) finfo.size,
 184                            APR_MMAP_READ, pool);
 185       if (rv == APR_SUCCESS)
 186         {
 187           *buffer = (*mm)->mm;
 188         }
 189
 190       /* On failure we just fall through and try reading the file into
 191        * memory instead.
 192        */
 193     }
 194 #endif /* APR_HAS_MMAP */
 195
 196    if (*buffer == NULL && finfo.size > 0)
 197     {
 198       *buffer = apr_palloc(pool, (apr_size_t) finfo.size);
 199
 200       SVN_ERR(svn_io_file_read_full2(*file, *buffer, (apr_size_t) finfo.size,
 201                                      NULL, NULL, pool));
 202
 203       /* Since we have the entire contents of the file we can
 204        * close it now.
 205        */
 206       SVN_ERR(svn_io_file_close(*file, pool));
 207
 208       *file = NULL;
 209     }
 210
 211   *size = finfo.size;
 212
 213   return SVN_NO_ERROR;
 214 }
 215
 216
 217 /* For all files in the FILE array, increment the curp pointer.  If a file
 218  * points before the beginning of file, let it point at the first byte again.
 219  * If the end of the current chunk is reached, read the next chunk in the
 220  * buffer and point curp to the start of the chunk.  If EOF is reached, set
 221  * curp equal to endp to indicate EOF. */
 222 #define INCREMENT_POINTERS(all_files, files_len, pool)                       \
 223   do {                                                                       \
 224     apr_size_t svn_macro__i;                                                 \
 225                                                                              \
 226     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 227     {                                                                        \
 228       if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
 229         (all_files)[svn_macro__i].curp++;                                    \
 230       else                                                                   \
 231         SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
 232     }                                                                        \
 233   } while (0)
 234
 235
 236 /* For all files in the FILE array, decrement the curp pointer.  If the
 237  * start of a chunk is reached, read the previous chunk in the buffer and
 238  * point curp to the last byte of the chunk.  If the beginning of a FILE is
 239  * reached, set chunk to -1 to indicate BOF. */
 240 #define DECREMENT_POINTERS(all_files, files_len, pool)                       \
 241   do {                                                                       \
 242     apr_size_t svn_macro__i;                                                 \
 243                                                                              \
 244     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 245     {                                                                        \
 246       if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
 247         (all_files)[svn_macro__i].curp--;                                    \
 248       else                                                                   \
 249         SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
 250     }                                                                        \
 251   } while (0)
 252
 253
 254 static svn_error_t *
 255 increment_chunk(struct file_info *file, apr_pool_t *pool)
 256 {
 257   apr_off_t length;
 258   apr_off_t last_chunk = offset_to_chunk(file->size);
 259
 260   if (file->chunk == -1)
 261     {
 262       /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
 263       file->chunk = 0;
 264       file->curp = file->buffer;
 265     }
 266   else if (file->chunk == last_chunk)
 267     {
 268       /* We are at the last chunk. Indicate EOF by setting curp == endp. */
 269       file->curp = file->endp;
 270     }
 271   else
 272     {
 273       /* There are still chunks left. Read next chunk and reset pointers. */
 274       file->chunk++;
 275       length = file->chunk == last_chunk ?
 276         offset_in_chunk(file->size) : CHUNK_SIZE;
 277       SVN_ERR(read_chunk(file->file, file->path, file->buffer,
 278                          length, chunk_to_offset(file->chunk),
 279                          pool));
 280       file->endp = file->buffer + length;
 281       file->curp = file->buffer;
 282     }
 283
 284   return SVN_NO_ERROR;
 285 }
 286
 287
 288 static svn_error_t *
 289 decrement_chunk(struct file_info *file, apr_pool_t *pool)
 290 {
 291   if (file->chunk == 0)
 292     {
 293       /* We are already at the first chunk. Indicate BOF (Beginning Of File)
 294          by setting chunk = -1 and curp = endp - 1. Both conditions are
 295          important. They help the increment step to catch the BOF situation
 296          in an efficient way. */
 297       file->chunk--;
 298       file->curp = file->endp - 1;
 299     }
 300   else
 301     {
 302       /* Read previous chunk and reset pointers. */
 303       file->chunk--;
 304       SVN_ERR(read_chunk(file->file, file->path, file->buffer,
 305                          CHUNK_SIZE, chunk_to_offset(file->chunk),
 306                          pool));
 307       file->endp = file->buffer + CHUNK_SIZE;
 308       file->curp = file->endp - 1;
 309     }
 310
 311   return SVN_NO_ERROR;
 312 }
 313
 314
 315 /* Check whether one of the FILEs has its pointers 'before' the beginning of
 316  * the file (this can happen while scanning backwards). This is the case if
 317  * one of them has chunk == -1. */
 318 static svn_boolean_t
 319 is_one_at_bof(struct file_info file[], apr_size_t file_len)
 320 {
 321   apr_size_t i;
 322
 323   for (i = 0; i < file_len; i++)
 324     if (file[i].chunk == -1)
 325       return TRUE;
 326
 327   return FALSE;
 328 }
 329
 330 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
 331  * one of them has curp == endp (this can only happen at the last chunk)) */
 332 static svn_boolean_t
 333 is_one_at_eof(struct file_info file[], apr_size_t file_len)
 334 {
 335   apr_size_t i;
 336
 337   for (i = 0; i < file_len; i++)
 338     if (file[i].curp == file[i].endp)
 339       return TRUE;
 340
 341   return FALSE;
 342 }
 343
 344 /* Quickly determine whether there is a eol char in CHUNK.
 345  * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
 346  */
 347
 348 #if SVN_UNALIGNED_ACCESS_IS_OK
 349 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
 350 {
 351   apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
 352   apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
 353
 354   r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 355   n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 356
 357   return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
 358 }
 359 #endif
 360
 361 /* Find the prefix which is identical between all elements of the FILE array.
 362  * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
 363  * set to TRUE if one of the FILEs reached its end while scanning prefix,
 364  * i.e. at least one file consisted entirely of prefix.  Otherwise,
 365  * REACHED_ONE_EOF is set to FALSE.
 366  *
 367  * After this function is finished, the buffers, chunks, curp's and endp's
 368  * of the FILEs are set to point at the first byte after the prefix. */
 369 static svn_error_t *
 370 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
 371                       struct file_info file[], apr_size_t file_len,
 372                       apr_pool_t *pool)
 373 {
 374   svn_boolean_t had_cr = FALSE;
 375   svn_boolean_t is_match;
 376   apr_off_t lines = 0;
 377   apr_size_t i;
 378
 379   *reached_one_eof = FALSE;
 380
 381   for (i = 1, is_match = TRUE; i < file_len; i++)
 382     is_match = is_match && *file[0].curp == *file[i].curp;
 383   while (is_match)
 384     {
 385 #if SVN_UNALIGNED_ACCESS_IS_OK
 386       apr_ssize_t max_delta, delta;
 387 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 388
 389       /* ### TODO: see if we can take advantage of
 390          diff options like ignore_eol_style or ignore_space. */
 391       /* check for eol, and count */
 392       if (*file[0].curp == '\r')
 393         {
 394           lines++;
 395           had_cr = TRUE;
 396         }
 397       else if (*file[0].curp == '\n' && !had_cr)
 398         {
 399           lines++;
 400         }
 401       else
 402         {
 403           had_cr = FALSE;
 404         }
 405
 406       INCREMENT_POINTERS(file, file_len, pool);
 407
 408 #if SVN_UNALIGNED_ACCESS_IS_OK
 409
 410       /* Try to advance as far as possible with machine-word granularity.
 411        * Determine how far we may advance with chunky ops without reaching
 412        * endp for any of the files.
 413        * Signedness is important here if curp gets close to endp.
 414        */
 415       max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
 416       for (i = 1; i < file_len; i++)
 417         {
 418           delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
 419           if (delta < max_delta)
 420             max_delta = delta;
 421         }
 422
 423       is_match = TRUE;
 424       for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
 425         {
 426           apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
 427           if (contains_eol(chunk))
 428             break;
 429
 430           for (i = 1; i < file_len; i++)
 431             if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
 432               {
 433                 is_match = FALSE;
 434                 break;
 435               }
 436
 437           if (! is_match)
 438             break;
 439         }
 440
 441       if (delta /* > 0*/)
 442         {
 443           /* We either found a mismatch or an EOL at or shortly behind curp+delta
 444            * or we cannot proceed with chunky ops without exceeding endp.
 445            * In any way, everything up to curp + delta is equal and not an EOL.
 446            */
 447           for (i = 0; i < file_len; i++)
 448             file[i].curp += delta;
 449
 450           /* Skipped data without EOL markers, so last char was not a CR. */
 451           had_cr = FALSE;
 452         }
 453 #endif
 454
 455       *reached_one_eof = is_one_at_eof(file, file_len);
 456       if (*reached_one_eof)
 457         break;
 458       else
 459         for (i = 1, is_match = TRUE; i < file_len; i++)
 460           is_match = is_match && *file[0].curp == *file[i].curp;
 461     }
 462
 463   if (had_cr)
 464     {
 465       /* Check if we ended in the middle of a \r\n for one file, but \r for
 466          another. If so, back up one byte, so the next loop will back up
 467          the entire line. Also decrement lines, since we counted one
 468          too many for the \r. */
 469       svn_boolean_t ended_at_nonmatching_newline = FALSE;
 470       for (i = 0; i < file_len; i++)
 471         if (file[i].curp < file[i].endp)
 472           ended_at_nonmatching_newline = ended_at_nonmatching_newline
 473                                          || *file[i].curp == '\n';
 474       if (ended_at_nonmatching_newline)
 475         {
 476           lines--;
 477           DECREMENT_POINTERS(file, file_len, pool);
 478         }
 479     }
 480
 481   /* Back up one byte, so we point at the last identical byte */
 482   DECREMENT_POINTERS(file, file_len, pool);
 483
 484   /* Back up to the last eol sequence (\n, \r\n or \r) */
 485   while (!is_one_at_bof(file, file_len) &&
 486          *file[0].curp != '\n' && *file[0].curp != '\r')
 487     DECREMENT_POINTERS(file, file_len, pool);
 488
 489   /* Slide one byte forward, to point past the eol sequence */
 490   INCREMENT_POINTERS(file, file_len, pool);
 491
 492   *prefix_lines = lines;
 493
 494   return SVN_NO_ERROR;
 495 }
 496
 497
 498 /* The number of identical suffix lines to keep with the middle section. These
 499  * lines are not eliminated as suffix, and can be picked up by the token
 500  * parsing and lcs steps. This is mainly for backward compatibility with
 501  * the previous diff (and blame) output (if there are multiple diff solutions,
 502  * our lcs algorithm prefers taking common lines from the start, rather than
 503  * from the end. By giving it back some suffix lines, we give it some wiggle
 504  * room to find the exact same diff as before).
 505  *
 506  * The number 50 is more or less arbitrary, based on some real-world tests
 507  * with big files (and then doubling the required number to be on the safe
 508  * side). This has a negligible effect on the power of the optimization. */
 509 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
 510 #ifndef SUFFIX_LINES_TO_KEEP
 511 #define SUFFIX_LINES_TO_KEEP 50
 512 #endif
 513
 514 /* Find the suffix which is identical between all elements of the FILE array.
 515  * Return the number of suffix lines in SUFFIX_LINES.
 516  *
 517  * Before this function is called the FILEs' pointers and chunks should be
 518  * positioned right after the identical prefix (which is the case after
 519  * find_identical_prefix), so we can determine where suffix scanning should
 520  * ultimately stop. */
 521 static svn_error_t *
 522 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
 523                       apr_size_t file_len, apr_pool_t *pool)
 524 {
 525   struct file_info file_for_suffix[4] = { { 0 }  };
 526   apr_off_t length[4];
 527   apr_off_t suffix_min_chunk0;
 528   apr_off_t suffix_min_offset0;
 529   apr_off_t min_file_size;
 530   int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
 531   svn_boolean_t is_match;
 532   apr_off_t lines = 0;
 533   svn_boolean_t had_cr;
 534   svn_boolean_t had_nl;
 535   apr_size_t i;
 536
 537   /* Initialize file_for_suffix[].
 538      Read last chunk, position curp at last byte. */
 539   for (i = 0; i < file_len; i++)
 540     {
 541       file_for_suffix[i].path = file[i].path;
 542       file_for_suffix[i].file = file[i].file;
 543       file_for_suffix[i].size = file[i].size;
 544       file_for_suffix[i].chunk =
 545         (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
 546       length[i] = offset_in_chunk(file_for_suffix[i].size);
 547       if (length[i] == 0)
 548         {
 549           /* last chunk is an empty chunk -> start at next-to-last chunk */
 550           file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
 551           length[i] = CHUNK_SIZE;
 552         }
 553
 554       if (file_for_suffix[i].chunk == file[i].chunk)
 555         {
 556           /* Prefix ended in last chunk, so we can reuse the prefix buffer */
 557           file_for_suffix[i].buffer = file[i].buffer;
 558         }
 559       else
 560         {
 561           /* There is at least more than 1 chunk,
 562              so allocate full chunk size buffer */
 563           file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
 564           SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path,
 565                              file_for_suffix[i].buffer, length[i],
 566                              chunk_to_offset(file_for_suffix[i].chunk),
 567                              pool));
 568         }
 569       file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
 570       file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
 571     }
 572
 573   /* Get the chunk and pointer offset (for file[0]) at which we should stop
 574      scanning backward for the identical suffix, i.e. when we reach prefix. */
 575   suffix_min_chunk0 = file[0].chunk;
 576   suffix_min_offset0 = file[0].curp - file[0].buffer;
 577
 578   /* Compensate if other files are smaller than file[0] */
 579   for (i = 1, min_file_size = file[0].size; i < file_len; i++)
 580     if (file[i].size < min_file_size)
 581       min_file_size = file[i].size;
 582   if (file[0].size > min_file_size)
 583     {
 584       suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
 585       suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
 586     }
 587
 588   /* Scan backwards until mismatch or until we reach the prefix. */
 589   for (i = 1, is_match = TRUE; i < file_len; i++)
 590     is_match = is_match
 591                && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 592   if (is_match && *file_for_suffix[0].curp != '\r'
 593                && *file_for_suffix[0].curp != '\n')
 594     /* Count an extra line for the last line not ending in an eol. */
 595     lines++;
 596
 597   had_nl = FALSE;
 598   while (is_match)
 599     {
 600       svn_boolean_t reached_prefix;
 601 #if SVN_UNALIGNED_ACCESS_IS_OK
 602       /* Initialize the minimum pointer positions. */
 603       const char *min_curp[4];
 604       svn_boolean_t can_read_word;
 605 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 606
 607       /* ### TODO: see if we can take advantage of
 608          diff options like ignore_eol_style or ignore_space. */
 609       /* check for eol, and count */
 610       if (*file_for_suffix[0].curp == '\n')
 611         {
 612           lines++;
 613           had_nl = TRUE;
 614         }
 615       else if (*file_for_suffix[0].curp == '\r' && !had_nl)
 616         {
 617           lines++;
 618         }
 619       else
 620         {
 621           had_nl = FALSE;
 622         }
 623
 624       DECREMENT_POINTERS(file_for_suffix, file_len, pool);
 625
 626 #if SVN_UNALIGNED_ACCESS_IS_OK
 627       for (i = 0; i < file_len; i++)
 628         min_curp[i] = file_for_suffix[i].buffer;
 629
 630       /* If we are in the same chunk that contains the last part of the common
 631          prefix, use the min_curp[0] pointer to make sure we don't get a
 632          suffix that overlaps the already determined common prefix. */
 633       if (file_for_suffix[0].chunk == suffix_min_chunk0)
 634         min_curp[0] += suffix_min_offset0;
 635
 636       /* Scan quickly by reading with machine-word granularity. */
 637       for (i = 0, can_read_word = TRUE; i < file_len; i++)
 638         can_read_word = can_read_word
 639                         && (  (file_for_suffix[i].curp + 1
 640                                  - sizeof(apr_uintptr_t))
 641                             > min_curp[i]);
 642       while (can_read_word)
 643         {
 644           apr_uintptr_t chunk;
 645
 646           /* For each file curp is positioned at the current byte, but we
 647              want to examine the current byte and the ones before the current
 648              location as one machine word. */
 649
 650           chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
 651                                              - sizeof(apr_uintptr_t));
 652           if (contains_eol(chunk))
 653             break;
 654
 655           for (i = 1, is_match = TRUE; i < file_len; i++)
 656             is_match = is_match
 657                        && (   chunk
 658                            == *(const apr_uintptr_t *)
 659                                     (file_for_suffix[i].curp + 1
 660                                        - sizeof(apr_uintptr_t)));
 661
 662           if (! is_match)
 663             break;
 664
 665           for (i = 0; i < file_len; i++)
 666             {
 667               file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
 668               can_read_word = can_read_word
 669                               && (  (file_for_suffix[i].curp + 1
 670                                        - sizeof(apr_uintptr_t))
 671                                   > min_curp[i]);
 672             }
 673
 674           /* We skipped some bytes, so there are no closing EOLs */
 675           had_nl = FALSE;
 676           had_cr = FALSE;
 677         }
 678
 679       /* The > min_curp[i] check leaves at least one final byte for checking
 680          in the non block optimized case below. */
 681 #endif
 682
 683       reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
 684                        && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
 685                           == suffix_min_offset0;
 686       if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
 687         break;
 688
 689       is_match = TRUE;
 690       for (i = 1; i < file_len; i++)
 691         is_match = is_match
 692                    && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 693     }
 694
 695   /* Slide one byte forward, to point at the first byte of identical suffix */
 696   INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 697
 698   /* Slide forward until we find an eol sequence to add the rest of the line
 699      we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
 700      one file reaches its end. */
 701   do
 702     {
 703       had_cr = FALSE;
 704       while (!is_one_at_eof(file_for_suffix, file_len)
 705              && *file_for_suffix[0].curp != '\n'
 706              && *file_for_suffix[0].curp != '\r')
 707         INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 708
 709       /* Slide one or two more bytes, to point past the eol. */
 710       if (!is_one_at_eof(file_for_suffix, file_len)
 711           && *file_for_suffix[0].curp == '\r')
 712         {
 713           lines--;
 714           had_cr = TRUE;
 715           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 716         }
 717       if (!is_one_at_eof(file_for_suffix, file_len)
 718           && *file_for_suffix[0].curp == '\n')
 719         {
 720           if (!had_cr)
 721             lines--;
 722           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 723         }
 724     }
 725   while (!is_one_at_eof(file_for_suffix, file_len)
 726          && suffix_lines_to_keep--);
 727
 728   if (is_one_at_eof(file_for_suffix, file_len))
 729     lines = 0;
 730
 731   /* Save the final suffix information in the original file_info */
 732   for (i = 0; i < file_len; i++)
 733     {
 734       file[i].suffix_start_chunk = file_for_suffix[i].chunk;
 735       file[i].suffix_offset_in_chunk =
 736         file_for_suffix[i].curp - file_for_suffix[i].buffer;
 737     }
 738
 739   *suffix_lines = lines;
 740
 741   return SVN_NO_ERROR;
 742 }
 743
 744
 745 /* Let FILE stand for the array of file_info struct elements of BATON->files
 746  * that are indexed by the elements of the DATASOURCE array.
 747  * BATON's type is (svn_diff__file_baton_t *).
 748  *
 749  * For each file in the FILE array, open the file at FILE.path; initialize
 750  * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
 751  * buffer and read the first chunk.  Then find the prefix and suffix lines
 752  * which are identical between all the files.  Return the number of identical
 753  * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
 754  * SUFFIX_LINES.
 755  *
 756  * Finding the identical prefix and suffix allows us to exclude those from the
 757  * rest of the diff algorithm, which increases performance by reducing the
 758  * problem space.
 759  *
 760  * Implements svn_diff_fns2_t::datasources_open. */
 761 static svn_error_t *
 762 datasources_open(void *baton,
 763                  apr_off_t *prefix_lines,
 764                  apr_off_t *suffix_lines,
 765                  const svn_diff_datasource_e *datasources,
 766                  apr_size_t datasources_len)
 767 {
 768   svn_diff__file_baton_t *file_baton = baton;
 769   struct file_info files[4];
 770   apr_finfo_t finfo[4];
 771   apr_off_t length[4];
 772 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 773   svn_boolean_t reached_one_eof;
 774 #endif
 775   apr_size_t i;
 776
 777   /* Make sure prefix_lines and suffix_lines are set correctly, even if we
 778    * exit early because one of the files is empty. */
 779   *prefix_lines = 0;
 780   *suffix_lines = 0;
 781
 782   /* Open datasources and read first chunk */
 783   for (i = 0; i < datasources_len; i++)
 784     {
 785       struct file_info *file
 786           = &file_baton->files[datasource_to_index(datasources[i])];
 787       SVN_ERR(svn_io_file_open(&file->file, file->path,
 788                                APR_READ, APR_OS_DEFAULT, file_baton->pool));
 789       SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
 790                                    file->file, file_baton->pool));
 791       file->size = finfo[i].size;
 792       length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
 793       file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
 794       SVN_ERR(read_chunk(file->file, file->path, file->buffer,
 795                          length[i], 0, file_baton->pool));
 796       file->endp = file->buffer + length[i];
 797       file->curp = file->buffer;
 798       /* Set suffix_start_chunk to a guard value, so if suffix scanning is
 799        * skipped because one of the files is empty, or because of
 800        * reached_one_eof, we can still easily check for the suffix during
 801        * token reading (datasource_get_next_token). */
 802       file->suffix_start_chunk = -1;
 803
 804       files[i] = *file;
 805     }
 806
 807   for (i = 0; i < datasources_len; i++)
 808     if (length[i] == 0)
 809       /* There will not be any identical prefix/suffix, so we're done. */
 810       return SVN_NO_ERROR;
 811
 812 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 813
 814   SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
 815                                 files, datasources_len, file_baton->pool));
 816
 817   if (!reached_one_eof)
 818     /* No file consisted totally of identical prefix,
 819      * so there may be some identical suffix.  */
 820     SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
 821                                   file_baton->pool));
 822
 823 #endif
 824
 825   /* Copy local results back to baton. */
 826   for (i = 0; i < datasources_len; i++)
 827     file_baton->files[datasource_to_index(datasources[i])] = files[i];
 828
 829   return SVN_NO_ERROR;
 830 }
 831
 832
 833 /* Implements svn_diff_fns2_t::datasource_close */
 834 static svn_error_t *
 835 datasource_close(void *baton, svn_diff_datasource_e datasource)
 836 {
 837   /* Do nothing.  The compare_token function needs previous datasources
 838    * to stay available until all datasources are processed.
 839    */
 840
 841   return SVN_NO_ERROR;
 842 }
 843
 844 /* Implements svn_diff_fns2_t::datasource_get_next_token */
 845 static svn_error_t *
 846 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
 847                           svn_diff_datasource_e datasource)
 848 {
 849   svn_diff__file_baton_t *file_baton = baton;
 850   svn_diff__file_token_t *file_token;
 851   struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
 852   char *endp;
 853   char *curp;
 854   char *eol;
 855   apr_off_t last_chunk;
 856   apr_off_t length;
 857   apr_uint32_t h = 0;
 858   /* Did the last chunk end in a CR character? */
 859   svn_boolean_t had_cr = FALSE;
 860
 861   *token = NULL;
 862
 863   curp = file->curp;
 864   endp = file->endp;
 865
 866   last_chunk = offset_to_chunk(file->size);
 867
 868   /* Are we already at the end of a chunk? */
 869   if (curp == endp)
 870     {
 871       /* Are we at EOF */
 872       if (last_chunk == file->chunk)
 873         return SVN_NO_ERROR; /* EOF */
 874
 875       /* Or right before an identical suffix in the next chunk? */
 876       if (file->chunk + 1 == file->suffix_start_chunk
 877           && file->suffix_offset_in_chunk == 0)
 878         return SVN_NO_ERROR;
 879     }
 880
 881   /* Stop when we encounter the identical suffix. If suffix scanning was not
 882    * performed, suffix_start_chunk will be -1, so this condition will never
 883    * be true. */
 884   if (file->chunk == file->suffix_start_chunk
 885       && (curp - file->buffer) == file->suffix_offset_in_chunk)
 886     return SVN_NO_ERROR;
 887
 888   /* Allocate a new token, or fetch one from the "reusable tokens" list. */
 889   file_token = file_baton->tokens;
 890   if (file_token)
 891     {
 892       file_baton->tokens = file_token->next;
 893     }
 894   else
 895     {
 896       file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
 897     }
 898
 899   file_token->datasource = datasource;
 900   file_token->offset = chunk_to_offset(file->chunk)
 901                        + (curp - file->buffer);
 902   file_token->norm_offset = file_token->offset;
 903   file_token->raw_length = 0;
 904   file_token->length = 0;
 905
 906   while (1)
 907     {
 908       eol = svn_eol__find_eol_start(curp, endp - curp);
 909       if (eol)
 910         {
 911           had_cr = (*eol == '\r');
 912           eol++;
 913           /* If we have the whole eol sequence in the chunk... */
 914           if (!(had_cr && eol == endp))
 915             {
 916               /* Also skip past the '\n' in an '\r\n' sequence. */
 917               if (had_cr && *eol == '\n')
 918                 eol++;
 919               break;
 920             }
 921         }
 922
 923       if (file->chunk == last_chunk)
 924         {
 925           eol = endp;
 926           break;
 927         }
 928
 929       length = endp - curp;
 930       file_token->raw_length += length;
 931       {
 932         char *c = curp;
 933
 934         svn_diff__normalize_buffer(&c, &length,
 935                                    &file->normalize_state,
 936                                    curp, file_baton->options);
 937         if (file_token->length == 0)
 938           {
 939             /* When we are reading the first part of the token, move the
 940                normalized offset past leading ignored characters, if any. */
 941             file_token->norm_offset += (c - curp);
 942           }
 943         file_token->length += length;
 944         h = svn__adler32(h, c, length);
 945       }
 946
 947       curp = endp = file->buffer;
 948       file->chunk++;
 949       length = file->chunk == last_chunk ?
 950         offset_in_chunk(file->size) : CHUNK_SIZE;
 951       endp += length;
 952       file->endp = endp;
 953
 954       /* Issue #4283: Normally we should have checked for reaching the skipped
 955          suffix here, but because we assume that a suffix always starts on a
 956          line and token boundary we rely on catching the suffix earlier in this
 957          function.
 958
 959          When changing things here, make sure the whitespace settings are
 960          applied, or we mught not reach the exact suffix boundary as token
 961          boundary. */
 962       SVN_ERR(read_chunk(file->file, file->path,
 963                          curp, length,
 964                          chunk_to_offset(file->chunk),
 965                          file_baton->pool));
 966
 967       /* If the last chunk ended in a CR, we're done. */
 968       if (had_cr)
 969         {
 970           eol = curp;
 971           if (*curp == '\n')
 972             ++eol;
 973           break;
 974         }
 975     }
 976
 977   length = eol - curp;
 978   file_token->raw_length += length;
 979   file->curp = eol;
 980
 981   /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
 982    * with a spurious empty token.  Avoid returning it.
 983    * Note that we use the unnormalized length; we don't want a line containing
 984    * only spaces (and no trailing newline) to appear like a non-existent
 985    * line. */
 986   if (file_token->raw_length > 0)
 987     {
 988       char *c = curp;
 989       svn_diff__normalize_buffer(&c, &length,
 990                                  &file->normalize_state,
 991                                  curp, file_baton->options);
 992       if (file_token->length == 0)
 993         {
 994           /* When we are reading the first part of the token, move the
 995              normalized offset past leading ignored characters, if any. */
 996           file_token->norm_offset += (c - curp);
 997         }
 998
 999       file_token->length += length;
1000
1001       *hash = svn__adler32(h, c, length);
1002       *token = file_token;
1003     }
1004
1005   return SVN_NO_ERROR;
1006 }
1007
1008 #define COMPARE_CHUNK_SIZE 4096
1009
1010 /* Implements svn_diff_fns2_t::token_compare */
1011 static svn_error_t *
1012 token_compare(void *baton, void *token1, void *token2, int *compare)
1013 {
1014   svn_diff__file_baton_t *file_baton = baton;
1015   svn_diff__file_token_t *file_token[2];
1016   char buffer[2][COMPARE_CHUNK_SIZE];
1017   char *bufp[2];
1018   apr_off_t offset[2];
1019   struct file_info *file[2];
1020   apr_off_t length[2];
1021   apr_off_t total_length;
1022   /* How much is left to read of each token from the file. */
1023   apr_off_t raw_length[2];
1024   int i;
1025   svn_diff__normalize_state_t state[2];
1026
1027   file_token[0] = token1;
1028   file_token[1] = token2;
1029   if (file_token[0]->length < file_token[1]->length)
1030     {
1031       *compare = -1;
1032       return SVN_NO_ERROR;
1033     }
1034
1035   if (file_token[0]->length > file_token[1]->length)
1036     {
1037       *compare = 1;
1038       return SVN_NO_ERROR;
1039     }
1040
1041   total_length = file_token[0]->length;
1042   if (total_length == 0)
1043     {
1044       *compare = 0;
1045       return SVN_NO_ERROR;
1046     }
1047
1048   for (i = 0; i < 2; ++i)
1049     {
1050       int idx = datasource_to_index(file_token[i]->datasource);
1051
1052       file[i] = &file_baton->files[idx];
1053       offset[i] = file_token[i]->norm_offset;
1054       state[i] = svn_diff__normalize_state_normal;
1055
1056       if (offset_to_chunk(offset[i]) == file[i]->chunk)
1057         {
1058           /* If the start of the token is in memory, the entire token is
1059            * in memory.
1060            */
1061           bufp[i] = file[i]->buffer;
1062           bufp[i] += offset_in_chunk(offset[i]);
1063
1064           length[i] = total_length;
1065           raw_length[i] = 0;
1066         }
1067       else
1068         {
1069           apr_off_t skipped;
1070
1071           length[i] = 0;
1072
1073           /* When we skipped the first part of the token via the whitespace
1074              normalization we must reduce the raw length of the token */
1075           skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1076
1077           raw_length[i] = file_token[i]->raw_length - skipped;
1078         }
1079     }
1080
1081   do
1082     {
1083       apr_off_t len;
1084       for (i = 0; i < 2; i++)
1085         {
1086           if (length[i] == 0)
1087             {
1088               /* Error if raw_length is 0, that's an unexpected change
1089                * of the file that can happen when ingoring whitespace
1090                * and that can lead to an infinite loop. */
1091               if (raw_length[i] == 0)
1092                 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1093                                          NULL,
1094                                          _("The file '%s' changed unexpectedly"
1095                                            " during diff"),
1096                                          file[i]->path);
1097
1098               /* Read a chunk from disk into a buffer */
1099               bufp[i] = buffer[i];
1100               length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1101                 COMPARE_CHUNK_SIZE : raw_length[i];
1102
1103               SVN_ERR(read_chunk(file[i]->file,
1104                                  file[i]->path,
1105                                  bufp[i], length[i], offset[i],
1106                                  file_baton->pool));
1107               offset[i] += length[i];
1108               raw_length[i] -= length[i];
1109               /* bufp[i] gets reset to buffer[i] before reading each chunk,
1110                  so, overwriting it isn't a problem */
1111               svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1112                                          bufp[i], file_baton->options);
1113
1114               /* assert(length[i] == file_token[i]->length); */
1115             }
1116         }
1117
1118       len = length[0] > length[1] ? length[1] : length[0];
1119
1120       /* Compare two chunks (that could be entire tokens if they both reside
1121        * in memory).
1122        */
1123       *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1124       if (*compare != 0)
1125         return SVN_NO_ERROR;
1126
1127       total_length -= len;
1128       length[0] -= len;
1129       length[1] -= len;
1130       bufp[0] += len;
1131       bufp[1] += len;
1132     }
1133   while(total_length > 0);
1134
1135   *compare = 0;
1136   return SVN_NO_ERROR;
1137 }
1138
1139
1140 /* Implements svn_diff_fns2_t::token_discard */
1141 static void
1142 token_discard(void *baton, void *token)
1143 {
1144   svn_diff__file_baton_t *file_baton = baton;
1145   svn_diff__file_token_t *file_token = token;
1146
1147   /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1148   file_token->next = file_baton->tokens;
1149   file_baton->tokens = file_token;
1150 }
1151
1152
1153 /* Implements svn_diff_fns2_t::token_discard_all */
1154 static void
1155 token_discard_all(void *baton)
1156 {
1157   svn_diff__file_baton_t *file_baton = baton;
1158
1159   /* Discard all memory in use by the tokens, and close all open files. */
1160   svn_pool_clear(file_baton->pool);
1161 }
1162
1163
1164 static const svn_diff_fns2_t svn_diff__file_vtable =
1165 {
1166   datasources_open,
1167   datasource_close,
1168   datasource_get_next_token,
1169   token_compare,
1170   token_discard,
1171   token_discard_all
1172 };
1173
1174 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1175 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1176
1177 /* Options supported by svn_diff_file_options_parse(). */
1178 static const apr_getopt_option_t diff_options[] =
1179 {
1180   { "ignore-space-change", 'b', 0, NULL },
1181   { "ignore-all-space", 'w', 0, NULL },
1182   { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1183   { "show-c-function", 'p', 0, NULL },
1184   /* ### For compatibility; we don't support the argument to -u, because
1185    * ### we don't have optional argument support. */
1186   { "unified", 'u', 0, NULL },
1187   { NULL, 0, 0, NULL }
1188 };
1189
1190 svn_diff_file_options_t *
1191 svn_diff_file_options_create(apr_pool_t *pool)
1192 {
1193   return apr_pcalloc(pool, sizeof(svn_diff_file_options_t));
1194 }
1195
1196 /* A baton for use with opt_parsing_error_func(). */
1197 struct opt_parsing_error_baton_t
1198 {
1199   svn_error_t *err;
1200   apr_pool_t *pool;
1201 };
1202
1203 /* Store an error message from apr_getopt_long().  Set BATON->err to a new
1204  * error with a message generated from FMT and the remaining arguments.
1205  * Implements apr_getopt_err_fn_t. */
1206 static void
1207 opt_parsing_error_func(void *baton,
1208                        const char *fmt, ...)
1209 {
1210   struct opt_parsing_error_baton_t *b = baton;
1211   const char *message;
1212   va_list ap;
1213
1214   va_start(ap, fmt);
1215   message = apr_pvsprintf(b->pool, fmt, ap);
1216   va_end(ap);
1217
1218   /* Skip leading ": " (if present, which it always is in known cases). */
1219   if (strncmp(message, ": ", 2) == 0)
1220     message += 2;
1221
1222   b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1223 }
1224
1225 svn_error_t *
1226 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1227                             const apr_array_header_t *args,
1228                             apr_pool_t *pool)
1229 {
1230   apr_getopt_t *os;
1231   struct opt_parsing_error_baton_t opt_parsing_error_baton;
1232   /* Make room for each option (starting at index 1) plus trailing NULL. */
1233   const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1234
1235   opt_parsing_error_baton.err = NULL;
1236   opt_parsing_error_baton.pool = pool;
1237
1238   argv[0] = "";
1239   memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts);
1240   argv[args->nelts + 1] = NULL;
1241
1242   apr_getopt_init(&os, pool, args->nelts + 1, argv);
1243
1244   /* Capture any error message from apr_getopt_long().  This will typically
1245    * say which option is wrong, which we would not otherwise know. */
1246   os->errfn = opt_parsing_error_func;
1247   os->errarg = &opt_parsing_error_baton;
1248
1249   while (1)
1250     {
1251       const char *opt_arg;
1252       int opt_id;
1253       apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1254
1255       if (APR_STATUS_IS_EOF(err))
1256         break;
1257       if (err)
1258         /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1259          * it always will produce one, but never mind if it doesn't.  Avoid
1260          * using the message associated with the return code ERR, because
1261          * it refers to the "command line" which may be misleading here. */
1262         return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1263                                 opt_parsing_error_baton.err,
1264                                 _("Error in options to internal diff"));
1265
1266       switch (opt_id)
1267         {
1268         case 'b':
1269           /* -w takes precedence over -b. */
1270           if (! options->ignore_space)
1271             options->ignore_space = svn_diff_file_ignore_space_change;
1272           break;
1273         case 'w':
1274           options->ignore_space = svn_diff_file_ignore_space_all;
1275           break;
1276         case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1277           options->ignore_eol_style = TRUE;
1278           break;
1279         case 'p':
1280           options->show_c_function = TRUE;
1281           break;
1282         default:
1283           break;
1284         }
1285     }
1286
1287   /* Check for spurious arguments. */
1288   if (os->ind < os->argc)
1289     return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1290                              _("Invalid argument '%s' in diff options"),
1291                              os->argv[os->ind]);
1292
1293   return SVN_NO_ERROR;
1294 }
1295
1296 svn_error_t *
1297 svn_diff_file_diff_2(svn_diff_t **diff,
1298                      const char *original,
1299                      const char *modified,
1300                      const svn_diff_file_options_t *options,
1301                      apr_pool_t *pool)
1302 {
1303   svn_diff__file_baton_t baton = { 0 };
1304
1305   baton.options = options;
1306   baton.files[0].path = original;
1307   baton.files[1].path = modified;
1308   baton.pool = svn_pool_create(pool);
1309
1310   SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1311
1312   svn_pool_destroy(baton.pool);
1313   return SVN_NO_ERROR;
1314 }
1315
1316 svn_error_t *
1317 svn_diff_file_diff3_2(svn_diff_t **diff,
1318                       const char *original,
1319                       const char *modified,
1320                       const char *latest,
1321                       const svn_diff_file_options_t *options,
1322                       apr_pool_t *pool)
1323 {
1324   svn_diff__file_baton_t baton = { 0 };
1325
1326   baton.options = options;
1327   baton.files[0].path = original;
1328   baton.files[1].path = modified;
1329   baton.files[2].path = latest;
1330   baton.pool = svn_pool_create(pool);
1331
1332   SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1333
1334   svn_pool_destroy(baton.pool);
1335   return SVN_NO_ERROR;
1336 }
1337
1338 svn_error_t *
1339 svn_diff_file_diff4_2(svn_diff_t **diff,
1340                       const char *original,
1341                       const char *modified,
1342                       const char *latest,
1343                       const char *ancestor,
1344                       const svn_diff_file_options_t *options,
1345                       apr_pool_t *pool)
1346 {
1347   svn_diff__file_baton_t baton = { 0 };
1348
1349   baton.options = options;
1350   baton.files[0].path = original;
1351   baton.files[1].path = modified;
1352   baton.files[2].path = latest;
1353   baton.files[3].path = ancestor;
1354   baton.pool = svn_pool_create(pool);
1355
1356   SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1357
1358   svn_pool_destroy(baton.pool);
1359   return SVN_NO_ERROR;
1360 }
1361
1362 \f
1363 /** Display unified context diffs **/
1364
1365 /* Maximum length of the extra context to show when show_c_function is set.
1366  * GNU diff uses 40, let's be brave and use 50 instead. */
1367 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1368 typedef struct svn_diff__file_output_baton_t
1369 {
1370   svn_stream_t *output_stream;
1371   const char *header_encoding;
1372
1373   /* Cached markers, in header_encoding. */
1374   const char *context_str;
1375   const char *delete_str;
1376   const char *insert_str;
1377
1378   const char *path[2];
1379   apr_file_t *file[2];
1380
1381   apr_off_t   current_line[2];
1382
1383   char        buffer[2][4096];
1384   apr_size_t  length[2];
1385   char       *curp[2];
1386
1387   apr_off_t   hunk_start[2];
1388   apr_off_t   hunk_length[2];
1389   svn_stringbuf_t *hunk;
1390
1391   /* Should we emit C functions in the unified diff header */
1392   svn_boolean_t show_c_function;
1393   /* Extra strings to skip over if we match. */
1394   apr_array_header_t *extra_skip_match;
1395   /* "Context" to append to the @@ line when the show_c_function option
1396    * is set. */
1397   svn_stringbuf_t *extra_context;
1398   /* Extra context for the current hunk. */
1399   char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1400
1401   apr_pool_t *pool;
1402 } svn_diff__file_output_baton_t;
1403
1404 typedef enum svn_diff__file_output_unified_type_e
1405 {
1406   svn_diff__file_output_unified_skip,
1407   svn_diff__file_output_unified_context,
1408   svn_diff__file_output_unified_delete,
1409   svn_diff__file_output_unified_insert
1410 } svn_diff__file_output_unified_type_e;
1411
1412
1413 static svn_error_t *
1414 output_unified_line(svn_diff__file_output_baton_t *baton,
1415                     svn_diff__file_output_unified_type_e type, int idx)
1416 {
1417   char *curp;
1418   char *eol;
1419   apr_size_t length;
1420   svn_error_t *err;
1421   svn_boolean_t bytes_processed = FALSE;
1422   svn_boolean_t had_cr = FALSE;
1423   /* Are we collecting extra context? */
1424   svn_boolean_t collect_extra = FALSE;
1425
1426   length = baton->length[idx];
1427   curp = baton->curp[idx];
1428
1429   /* Lazily update the current line even if we're at EOF.
1430    * This way we fake output of context at EOF
1431    */
1432   baton->current_line[idx]++;
1433
1434   if (length == 0 && apr_file_eof(baton->file[idx]))
1435     {
1436       return SVN_NO_ERROR;
1437     }
1438
1439   do
1440     {
1441       if (length > 0)
1442         {
1443           if (!bytes_processed)
1444             {
1445               switch (type)
1446                 {
1447                 case svn_diff__file_output_unified_context:
1448                   svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1449                   baton->hunk_length[0]++;
1450                   baton->hunk_length[1]++;
1451                   break;
1452                 case svn_diff__file_output_unified_delete:
1453                   svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1454                   baton->hunk_length[0]++;
1455                   break;
1456                 case svn_diff__file_output_unified_insert:
1457                   svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1458                   baton->hunk_length[1]++;
1459                   break;
1460                 default:
1461                   break;
1462                 }
1463
1464               if (baton->show_c_function
1465                   && (type == svn_diff__file_output_unified_skip
1466                       || type == svn_diff__file_output_unified_context)
1467                   && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1468                   && !svn_cstring_match_glob_list(curp,
1469                                                   baton->extra_skip_match))
1470                 {
1471                   svn_stringbuf_setempty(baton->extra_context);
1472                   collect_extra = TRUE;
1473                 }
1474             }
1475
1476           eol = svn_eol__find_eol_start(curp, length);
1477
1478           if (eol != NULL)
1479             {
1480               apr_size_t len;
1481
1482               had_cr = (*eol == '\r');
1483               eol++;
1484               len = (apr_size_t)(eol - curp);
1485
1486               if (! had_cr || len < length)
1487                 {
1488                   if (had_cr && *eol == '\n')
1489                     {
1490                       ++eol;
1491                       ++len;
1492                     }
1493
1494                   length -= len;
1495
1496                   if (type != svn_diff__file_output_unified_skip)
1497                     {
1498                       svn_stringbuf_appendbytes(baton->hunk, curp, len);
1499                     }
1500                   if (collect_extra)
1501                     {
1502                       svn_stringbuf_appendbytes(baton->extra_context,
1503                                                 curp, len);
1504                     }
1505
1506                   baton->curp[idx] = eol;
1507                   baton->length[idx] = length;
1508
1509                   err = SVN_NO_ERROR;
1510
1511                   break;
1512                 }
1513             }
1514
1515           if (type != svn_diff__file_output_unified_skip)
1516             {
1517               svn_stringbuf_appendbytes(baton->hunk, curp, length);
1518             }
1519
1520           if (collect_extra)
1521             {
1522               svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1523             }
1524
1525           bytes_processed = TRUE;
1526         }
1527
1528       curp = baton->buffer[idx];
1529       length = sizeof(baton->buffer[idx]);
1530
1531       err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1532
1533       /* If the last chunk ended with a CR, we look for an LF at the start
1534          of this chunk. */
1535       if (had_cr)
1536         {
1537           if (! err && length > 0 && *curp == '\n')
1538             {
1539               if (type != svn_diff__file_output_unified_skip)
1540                 {
1541                   svn_stringbuf_appendbyte(baton->hunk, *curp);
1542                 }
1543               /* We don't append the LF to extra_context, since it would
1544                * just be stripped anyway. */
1545               ++curp;
1546               --length;
1547             }
1548
1549           baton->curp[idx] = curp;
1550           baton->length[idx] = length;
1551
1552           break;
1553         }
1554     }
1555   while (! err);
1556
1557   if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1558     return err;
1559
1560   if (err && APR_STATUS_IS_EOF(err->apr_err))
1561     {
1562       svn_error_clear(err);
1563       /* Special case if we reach the end of file AND the last line is in the
1564          changed range AND the file doesn't end with a newline */
1565       if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1566           && ! had_cr)
1567         {
1568           SVN_ERR(svn_diff__unified_append_no_newline_msg(
1569                     baton->hunk, baton->header_encoding, baton->pool));
1570         }
1571
1572       baton->length[idx] = 0;
1573     }
1574
1575   return SVN_NO_ERROR;
1576 }
1577
1578 static APR_INLINE svn_error_t *
1579 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1580                           int source,
1581                           svn_diff__file_output_unified_type_e type,
1582                           apr_off_t until)
1583 {
1584   while (output_baton->current_line[source] < until)
1585     {
1586       SVN_ERR(output_unified_line(output_baton, type, source));
1587     }
1588   return SVN_NO_ERROR;
1589 }
1590
1591 static svn_error_t *
1592 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1593 {
1594   apr_off_t target_line;
1595   apr_size_t hunk_len;
1596   apr_off_t old_start;
1597   apr_off_t new_start;
1598
1599   if (svn_stringbuf_isempty(baton->hunk))
1600     {
1601       /* Nothing to flush */
1602       return SVN_NO_ERROR;
1603     }
1604
1605   target_line = baton->hunk_start[0] + baton->hunk_length[0]
1606                 + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1607
1608   /* Add trailing context to the hunk */
1609   SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1610                                     svn_diff__file_output_unified_context,
1611                                     target_line));
1612
1613   old_start = baton->hunk_start[0];
1614   new_start = baton->hunk_start[1];
1615
1616   /* If the file is non-empty, convert the line indexes from
1617      zero based to one based */
1618   if (baton->hunk_length[0])
1619     old_start++;
1620   if (baton->hunk_length[1])
1621     new_start++;
1622
1623   /* Write the hunk header */
1624   SVN_ERR(svn_diff__unified_write_hunk_header(
1625             baton->output_stream, baton->header_encoding, "@@",
1626             old_start, baton->hunk_length[0],
1627             new_start, baton->hunk_length[1],
1628             baton->hunk_extra_context,
1629             baton->pool));
1630
1631   /* Output the hunk content */
1632   hunk_len = baton->hunk->len;
1633   SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1634                            &hunk_len));
1635
1636   /* Prepare for the next hunk */
1637   baton->hunk_length[0] = 0;
1638   baton->hunk_length[1] = 0;
1639   baton->hunk_start[0] = 0;
1640   baton->hunk_start[1] = 0;
1641   svn_stringbuf_setempty(baton->hunk);
1642
1643   return SVN_NO_ERROR;
1644 }
1645
1646 static svn_error_t *
1647 output_unified_diff_modified(void *baton,
1648   apr_off_t original_start, apr_off_t original_length,
1649   apr_off_t modified_start, apr_off_t modified_length,
1650   apr_off_t latest_start, apr_off_t latest_length)
1651 {
1652   svn_diff__file_output_baton_t *output_baton = baton;
1653   apr_off_t context_prefix_length;
1654   apr_off_t prev_context_end;
1655   svn_boolean_t init_hunk = FALSE;
1656
1657   if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
1658     context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1659   else
1660     context_prefix_length = original_start;
1661
1662   /* Calculate where the previous hunk will end if we would write it now
1663      (including the necessary context at the end) */
1664   if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1665     {
1666       prev_context_end = output_baton->hunk_start[0]
1667                          + output_baton->hunk_length[0]
1668                          + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1669     }
1670   else
1671     {
1672       prev_context_end = -1;
1673
1674       if (output_baton->hunk_start[0] == 0
1675           && (original_length > 0 || modified_length > 0))
1676         init_hunk = TRUE;
1677     }
1678
1679   /* If the changed range is far enough from the previous range, flush the current
1680      hunk. */
1681   {
1682     apr_off_t new_hunk_start = (original_start - context_prefix_length);
1683
1684     if (output_baton->current_line[0] < new_hunk_start
1685           && prev_context_end <= new_hunk_start)
1686       {
1687         SVN_ERR(output_unified_flush_hunk(output_baton));
1688         init_hunk = TRUE;
1689       }
1690     else if (output_baton->hunk_length[0] > 0
1691              || output_baton->hunk_length[1] > 0)
1692       {
1693         /* We extend the current hunk */
1694
1695
1696         /* Original: Output the context preceding the changed range */
1697         SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1698                                           svn_diff__file_output_unified_context,
1699                                           original_start));
1700       }
1701   }
1702
1703   /* Original: Skip lines until we are at the beginning of the context we want
1704      to display */
1705   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1706                                     svn_diff__file_output_unified_skip,
1707                                     original_start - context_prefix_length));
1708
1709   /* Note that the above skip stores data for the show_c_function support below */
1710
1711   if (init_hunk)
1712     {
1713       SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1714                      && output_baton->hunk_length[1] == 0);
1715
1716       output_baton->hunk_start[0] = original_start - context_prefix_length;
1717       output_baton->hunk_start[1] = modified_start - context_prefix_length;
1718     }
1719
1720   if (init_hunk && output_baton->show_c_function)
1721     {
1722       apr_size_t p;
1723       const char *invalid_character;
1724
1725       /* Save the extra context for later use.
1726        * Note that the last byte of the hunk_extra_context array is never
1727        * touched after it is zero-initialized, so the array is always
1728        * 0-terminated. */
1729       strncpy(output_baton->hunk_extra_context,
1730               output_baton->extra_context->data,
1731               SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1732       /* Trim whitespace at the end, most notably to get rid of any
1733        * newline characters. */
1734       p = strlen(output_baton->hunk_extra_context);
1735       while (p > 0
1736              && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1737         {
1738           output_baton->hunk_extra_context[--p] = '\0';
1739         }
1740       invalid_character =
1741         svn_utf__last_valid(output_baton->hunk_extra_context,
1742                             SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1743       for (p = invalid_character - output_baton->hunk_extra_context;
1744            p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1745         {
1746           output_baton->hunk_extra_context[p] = '\0';
1747         }
1748     }
1749
1750   /* Modified: Skip lines until we are at the start of the changed range */
1751   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1752                                     svn_diff__file_output_unified_skip,
1753                                     modified_start));
1754
1755   /* Original: Output the context preceding the changed range */
1756   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1757                                     svn_diff__file_output_unified_context,
1758                                     original_start));
1759
1760   /* Both: Output the changed range */
1761   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1762                                     svn_diff__file_output_unified_delete,
1763                                     original_start + original_length));
1764   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1765                                     svn_diff__file_output_unified_insert,
1766                                     modified_start + modified_length));
1767
1768   return SVN_NO_ERROR;
1769 }
1770
1771 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1772 static svn_error_t *
1773 output_unified_default_hdr(const char **header, const char *path,
1774                            apr_pool_t *pool)
1775 {
1776   apr_finfo_t file_info;
1777   apr_time_exp_t exploded_time;
1778   char time_buffer[64];
1779   apr_size_t time_len;
1780   const char *utf8_timestr;
1781
1782   SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1783   apr_time_exp_lt(&exploded_time, file_info.mtime);
1784
1785   apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1786   /* Order of date components can be different in different languages */
1787                _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1788
1789   SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1790
1791   *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1792
1793   return SVN_NO_ERROR;
1794 }
1795
1796 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1797 {
1798   NULL, /* output_common */
1799   output_unified_diff_modified,
1800   NULL, /* output_diff_latest */
1801   NULL, /* output_diff_common */
1802   NULL  /* output_conflict */
1803 };
1804
1805 svn_error_t *
1806 svn_diff_file_output_unified3(svn_stream_t *output_stream,
1807                               svn_diff_t *diff,
1808                               const char *original_path,
1809                               const char *modified_path,
1810                               const char *original_header,
1811                               const char *modified_header,
1812                               const char *header_encoding,
1813                               const char *relative_to_dir,
1814                               svn_boolean_t show_c_function,
1815                               apr_pool_t *pool)
1816 {
1817   if (svn_diff_contains_diffs(diff))
1818     {
1819       svn_diff__file_output_baton_t baton;
1820       int i;
1821
1822       memset(&baton, 0, sizeof(baton));
1823       baton.output_stream = output_stream;
1824       baton.pool = pool;
1825       baton.header_encoding = header_encoding;
1826       baton.path[0] = original_path;
1827       baton.path[1] = modified_path;
1828       baton.hunk = svn_stringbuf_create_empty(pool);
1829       baton.show_c_function = show_c_function;
1830       baton.extra_context = svn_stringbuf_create_empty(pool);
1831
1832       if (show_c_function)
1833         {
1834           baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1835
1836           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1837           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1838           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1839         }
1840
1841       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1842                                             header_encoding, pool));
1843       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1844                                             header_encoding, pool));
1845       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1846                                             header_encoding, pool));
1847
1848       if (relative_to_dir)
1849         {
1850           /* Possibly adjust the "original" and "modified" paths shown in
1851              the output (see issue #2723). */
1852           const char *child_path;
1853
1854           if (! original_header)
1855             {
1856               child_path = svn_dirent_is_child(relative_to_dir,
1857                                                original_path, pool);
1858               if (child_path)
1859                 original_path = child_path;
1860               else
1861                 return svn_error_createf(
1862                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1863                                    _("Path '%s' must be inside "
1864                                      "the directory '%s'"),
1865                                    svn_dirent_local_style(original_path, pool),
1866                                    svn_dirent_local_style(relative_to_dir,
1867                                                           pool));
1868             }
1869
1870           if (! modified_header)
1871             {
1872               child_path = svn_dirent_is_child(relative_to_dir,
1873                                                modified_path, pool);
1874               if (child_path)
1875                 modified_path = child_path;
1876               else
1877                 return svn_error_createf(
1878                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1879                                    _("Path '%s' must be inside "
1880                                      "the directory '%s'"),
1881                                    svn_dirent_local_style(modified_path, pool),
1882                                    svn_dirent_local_style(relative_to_dir,
1883                                                           pool));
1884             }
1885         }
1886
1887       for (i = 0; i < 2; i++)
1888         {
1889           SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1890                                    APR_READ, APR_OS_DEFAULT, pool));
1891         }
1892
1893       if (original_header == NULL)
1894         {
1895           SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1896                                              pool));
1897         }
1898
1899       if (modified_header == NULL)
1900         {
1901           SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1902                                              pool));
1903         }
1904
1905       SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1906                                              original_header, modified_header,
1907                                              pool));
1908
1909       SVN_ERR(svn_diff_output(diff, &baton,
1910                               &svn_diff__file_output_unified_vtable));
1911       SVN_ERR(output_unified_flush_hunk(&baton));
1912
1913       for (i = 0; i < 2; i++)
1914         {
1915           SVN_ERR(svn_io_file_close(baton.file[i], pool));
1916         }
1917     }
1918
1919   return SVN_NO_ERROR;
1920 }
1921
1922 \f
1923 /** Display diff3 **/
1924
1925 /* A stream to remember *leading* context.  Note that this stream does
1926    *not* copy the data that it is remembering; it just saves
1927    *pointers! */
1928 typedef struct context_saver_t {
1929   svn_stream_t *stream;
1930   const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1931   apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1932   apr_size_t next_slot;
1933   apr_size_t total_written;
1934 } context_saver_t;
1935
1936
1937 static svn_error_t *
1938 context_saver_stream_write(void *baton,
1939                            const char *data,
1940                            apr_size_t *len)
1941 {
1942   context_saver_t *cs = baton;
1943   cs->data[cs->next_slot] = data;
1944   cs->len[cs->next_slot] = *len;
1945   cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1946   cs->total_written++;
1947   return SVN_NO_ERROR;
1948 }
1949
1950 typedef struct svn_diff3__file_output_baton_t
1951 {
1952   svn_stream_t *output_stream;
1953
1954   const char *path[3];
1955
1956   apr_off_t   current_line[3];
1957
1958   char       *buffer[3];
1959   char       *endp[3];
1960   char       *curp[3];
1961
1962   /* The following four members are in the encoding used for the output. */
1963   const char *conflict_modified;
1964   const char *conflict_original;
1965   const char *conflict_separator;
1966   const char *conflict_latest;
1967
1968   const char *marker_eol;
1969
1970   svn_diff_conflict_display_style_t conflict_style;
1971
1972   /* The rest of the fields are for
1973      svn_diff_conflict_display_only_conflicts only.  Note that for
1974      these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
1975      (soon after a conflict) a "trailing context stream", never the
1976      actual output stream.*/
1977   /* The actual output stream. */
1978   svn_stream_t *real_output_stream;
1979   context_saver_t *context_saver;
1980   /* Used to allocate context_saver and trailing context streams, and
1981      for some printfs. */
1982   apr_pool_t *pool;
1983 } svn_diff3__file_output_baton_t;
1984
1985 static svn_error_t *
1986 flush_context_saver(context_saver_t *cs,
1987                     svn_stream_t *output_stream)
1988 {
1989   int i;
1990   for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++)
1991     {
1992       apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1993       if (cs->data[slot])
1994         {
1995           apr_size_t len = cs->len[slot];
1996           SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
1997         }
1998     }
1999   return SVN_NO_ERROR;
2000 }
2001
2002 static void
2003 make_context_saver(svn_diff3__file_output_baton_t *fob)
2004 {
2005   context_saver_t *cs;
2006
2007   svn_pool_clear(fob->pool);
2008   cs = apr_pcalloc(fob->pool, sizeof(*cs));
2009   cs->stream = svn_stream_empty(fob->pool);
2010   svn_stream_set_baton(cs->stream, cs);
2011   svn_stream_set_write(cs->stream, context_saver_stream_write);
2012   fob->context_saver = cs;
2013   fob->output_stream = cs->stream;
2014 }
2015
2016
2017 /* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to
2018    BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2019    a context_saver; used for *trailing* context. */
2020
2021 struct trailing_context_printer {
2022   apr_size_t lines_to_print;
2023   svn_diff3__file_output_baton_t *fob;
2024 };
2025
2026
2027
2028 static svn_error_t *
2029 trailing_context_printer_write(void *baton,
2030                                const char *data,
2031                                apr_size_t *len)
2032 {
2033   struct trailing_context_printer *tcp = baton;
2034   SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2035   SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2036   tcp->lines_to_print--;
2037   if (tcp->lines_to_print == 0)
2038     make_context_saver(tcp->fob);
2039   return SVN_NO_ERROR;
2040 }
2041
2042
2043 static void
2044 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2045 {
2046   struct trailing_context_printer *tcp;
2047   svn_stream_t *s;
2048
2049   svn_pool_clear(btn->pool);
2050
2051   tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2052   tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2053   tcp->fob = btn;
2054   s = svn_stream_empty(btn->pool);
2055   svn_stream_set_baton(s, tcp);
2056   svn_stream_set_write(s, trailing_context_printer_write);
2057   btn->output_stream = s;
2058 }
2059
2060
2061
2062 typedef enum svn_diff3__file_output_type_e
2063 {
2064   svn_diff3__file_output_skip,
2065   svn_diff3__file_output_normal
2066 } svn_diff3__file_output_type_e;
2067
2068
2069 static svn_error_t *
2070 output_line(svn_diff3__file_output_baton_t *baton,
2071             svn_diff3__file_output_type_e type, int idx)
2072 {
2073   char *curp;
2074   char *endp;
2075   char *eol;
2076   apr_size_t len;
2077
2078   curp = baton->curp[idx];
2079   endp = baton->endp[idx];
2080
2081   /* Lazily update the current line even if we're at EOF.
2082    */
2083   baton->current_line[idx]++;
2084
2085   if (curp == endp)
2086     return SVN_NO_ERROR;
2087
2088   eol = svn_eol__find_eol_start(curp, endp - curp);
2089   if (!eol)
2090     eol = endp;
2091   else
2092     {
2093       svn_boolean_t had_cr = (*eol == '\r');
2094       eol++;
2095       if (had_cr && eol != endp && *eol == '\n')
2096         eol++;
2097     }
2098
2099   if (type != svn_diff3__file_output_skip)
2100     {
2101       len = eol - curp;
2102       /* Note that the trailing context printer assumes that
2103          svn_stream_write is called exactly once per line. */
2104       SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2105     }
2106
2107   baton->curp[idx] = eol;
2108
2109   return SVN_NO_ERROR;
2110 }
2111
2112 static svn_error_t *
2113 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2114 {
2115   return svn_stream_puts(btn->output_stream, btn->marker_eol);
2116 }
2117
2118 static svn_error_t *
2119 output_hunk(void *baton, int idx, apr_off_t target_line,
2120             apr_off_t target_length)
2121 {
2122   svn_diff3__file_output_baton_t *output_baton = baton;
2123
2124   /* Skip lines until we are at the start of the changed range */
2125   while (output_baton->current_line[idx] < target_line)
2126     {
2127       SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2128     }
2129
2130   target_line += target_length;
2131
2132   while (output_baton->current_line[idx] < target_line)
2133     {
2134       SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2135     }
2136
2137   return SVN_NO_ERROR;
2138 }
2139
2140 static svn_error_t *
2141 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2142               apr_off_t modified_start, apr_off_t modified_length,
2143               apr_off_t latest_start, apr_off_t latest_length)
2144 {
2145   return output_hunk(baton, 1, modified_start, modified_length);
2146 }
2147
2148 static svn_error_t *
2149 output_diff_modified(void *baton,
2150                      apr_off_t original_start, apr_off_t original_length,
2151                      apr_off_t modified_start, apr_off_t modified_length,
2152                      apr_off_t latest_start, apr_off_t latest_length)
2153 {
2154   return output_hunk(baton, 1, modified_start, modified_length);
2155 }
2156
2157 static svn_error_t *
2158 output_diff_latest(void *baton,
2159                    apr_off_t original_start, apr_off_t original_length,
2160                    apr_off_t modified_start, apr_off_t modified_length,
2161                    apr_off_t latest_start, apr_off_t latest_length)
2162 {
2163   return output_hunk(baton, 2, latest_start, latest_length);
2164 }
2165
2166 static svn_error_t *
2167 output_conflict(void *baton,
2168                 apr_off_t original_start, apr_off_t original_length,
2169                 apr_off_t modified_start, apr_off_t modified_length,
2170                 apr_off_t latest_start, apr_off_t latest_length,
2171                 svn_diff_t *diff);
2172
2173 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2174 {
2175   output_common,
2176   output_diff_modified,
2177   output_diff_latest,
2178   output_diff_modified, /* output_diff_common */
2179   output_conflict
2180 };
2181
2182
2183
2184 static svn_error_t *
2185 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2186                              apr_off_t original_start,
2187                              apr_off_t original_length,
2188                              apr_off_t modified_start,
2189                              apr_off_t modified_length,
2190                              apr_off_t latest_start,
2191                              apr_off_t latest_length)
2192 {
2193   /* Are we currently saving starting context (as opposed to printing
2194      trailing context)?  If so, flush it. */
2195   if (btn->output_stream == btn->context_saver->stream)
2196     {
2197       if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
2198         SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2199       SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2200     }
2201
2202   /* Print to the real output stream. */
2203   btn->output_stream = btn->real_output_stream;
2204
2205   /* Output the conflict itself. */
2206   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2207                             (modified_length == 1
2208                              ? "%s (%" APR_OFF_T_FMT ")"
2209                              : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2210                             btn->conflict_modified,
2211                             modified_start + 1, modified_length));
2212   SVN_ERR(output_marker_eol(btn));
2213   SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2214
2215   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2216                             (original_length == 1
2217                              ? "%s (%" APR_OFF_T_FMT ")"
2218                              : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2219                             btn->conflict_original,
2220                             original_start + 1, original_length));
2221   SVN_ERR(output_marker_eol(btn));
2222   SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2223
2224   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2225                             "%s%s", btn->conflict_separator, btn->marker_eol));
2226   SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2227   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2228                             (latest_length == 1
2229                              ? "%s (%" APR_OFF_T_FMT ")"
2230                              : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2231                             btn->conflict_latest,
2232                             latest_start + 1, latest_length));
2233   SVN_ERR(output_marker_eol(btn));
2234
2235   /* Go into print-trailing-context mode instead. */
2236   make_trailing_context_printer(btn);
2237
2238   return SVN_NO_ERROR;
2239 }
2240
2241
2242 static svn_error_t *
2243 output_conflict(void *baton,
2244                 apr_off_t original_start, apr_off_t original_length,
2245                 apr_off_t modified_start, apr_off_t modified_length,
2246                 apr_off_t latest_start, apr_off_t latest_length,
2247                 svn_diff_t *diff)
2248 {
2249   svn_diff3__file_output_baton_t *file_baton = baton;
2250
2251   svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2252
2253   if (style == svn_diff_conflict_display_only_conflicts)
2254     return output_conflict_with_context(file_baton,
2255                                         original_start, original_length,
2256                                         modified_start, modified_length,
2257                                         latest_start, latest_length);
2258
2259   if (style == svn_diff_conflict_display_resolved_modified_latest)
2260     {
2261       if (diff)
2262         return svn_diff_output(diff, baton,
2263                                &svn_diff3__file_output_vtable);
2264       else
2265         style = svn_diff_conflict_display_modified_latest;
2266     }
2267
2268   if (style == svn_diff_conflict_display_modified_latest ||
2269       style == svn_diff_conflict_display_modified_original_latest)
2270     {
2271       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2272                                file_baton->conflict_modified));
2273       SVN_ERR(output_marker_eol(file_baton));
2274
2275       SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2276
2277       if (style == svn_diff_conflict_display_modified_original_latest)
2278         {
2279           SVN_ERR(svn_stream_puts(file_baton->output_stream,
2280                                    file_baton->conflict_original));
2281           SVN_ERR(output_marker_eol(file_baton));
2282           SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2283         }
2284
2285       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2286                               file_baton->conflict_separator));
2287       SVN_ERR(output_marker_eol(file_baton));
2288
2289       SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2290
2291       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2292                               file_baton->conflict_latest));
2293       SVN_ERR(output_marker_eol(file_baton));
2294     }
2295   else if (style == svn_diff_conflict_display_modified)
2296     SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2297   else if (style == svn_diff_conflict_display_latest)
2298     SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2299   else /* unknown style */
2300     SVN_ERR_MALFUNCTION();
2301
2302   return SVN_NO_ERROR;
2303 }
2304
2305 svn_error_t *
2306 svn_diff_file_output_merge2(svn_stream_t *output_stream,
2307                             svn_diff_t *diff,
2308                             const char *original_path,
2309                             const char *modified_path,
2310                             const char *latest_path,
2311                             const char *conflict_original,
2312                             const char *conflict_modified,
2313                             const char *conflict_latest,
2314                             const char *conflict_separator,
2315                             svn_diff_conflict_display_style_t style,
2316                             apr_pool_t *pool)
2317 {
2318   svn_diff3__file_output_baton_t baton;
2319   apr_file_t *file[3];
2320   int idx;
2321 #if APR_HAS_MMAP
2322   apr_mmap_t *mm[3] = { 0 };
2323 #endif /* APR_HAS_MMAP */
2324   const char *eol;
2325   svn_boolean_t conflicts_only =
2326     (style == svn_diff_conflict_display_only_conflicts);
2327
2328   memset(&baton, 0, sizeof(baton));
2329   if (conflicts_only)
2330     {
2331       baton.pool = svn_pool_create(pool);
2332       make_context_saver(&baton);
2333       baton.real_output_stream = output_stream;
2334     }
2335   else
2336     baton.output_stream = output_stream;
2337   baton.path[0] = original_path;
2338   baton.path[1] = modified_path;
2339   baton.path[2] = latest_path;
2340   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2341                                     conflict_modified ? conflict_modified
2342                                     : apr_psprintf(pool, "<<<<<<< %s",
2343                                                    modified_path),
2344                                     pool));
2345   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2346                                     conflict_original ? conflict_original
2347                                     : apr_psprintf(pool, "||||||| %s",
2348                                                    original_path),
2349                                     pool));
2350   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2351                                     conflict_separator ? conflict_separator
2352                                     : "=======", pool));
2353   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2354                                     conflict_latest ? conflict_latest
2355                                     : apr_psprintf(pool, ">>>>>>> %s",
2356                                                    latest_path),
2357                                     pool));
2358
2359   baton.conflict_style = style;
2360
2361   for (idx = 0; idx < 3; idx++)
2362     {
2363       apr_off_t size;
2364
2365       SVN_ERR(map_or_read_file(&file[idx],
2366                                MMAP_T_ARG(mm[idx])
2367                                &baton.buffer[idx], &size,
2368                                baton.path[idx], pool));
2369
2370       baton.curp[idx] = baton.buffer[idx];
2371       baton.endp[idx] = baton.buffer[idx];
2372
2373       if (baton.endp[idx])
2374         baton.endp[idx] += size;
2375     }
2376
2377   /* Check what eol marker we should use for conflict markers.
2378      We use the eol marker of the modified file and fall back on the
2379      platform's eol marker if that file doesn't contain any newlines. */
2380   eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2381                             NULL);
2382   if (! eol)
2383     eol = APR_EOL_STR;
2384   baton.marker_eol = eol;
2385
2386   SVN_ERR(svn_diff_output(diff, &baton,
2387                           &svn_diff3__file_output_vtable));
2388
2389   for (idx = 0; idx < 3; idx++)
2390     {
2391 #if APR_HAS_MMAP
2392       if (mm[idx])
2393         {
2394           apr_status_t rv = apr_mmap_delete(mm[idx]);
2395           if (rv != APR_SUCCESS)
2396             {
2397               return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2398                                         baton.path[idx]);
2399             }
2400         }
2401 #endif /* APR_HAS_MMAP */
2402
2403       if (file[idx])
2404         {
2405           SVN_ERR(svn_io_file_close(file[idx], pool));
2406         }
2407     }
2408
2409   if (conflicts_only)
2410     svn_pool_destroy(baton.pool);
2411
2412   return SVN_NO_ERROR;
2413 }
2414