contrib/subversion/subversion/libsvn_diff/diff_file.c

   1 /*
   2  * diff_file.c :  routines for doing diffs on files
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <apr.h>
  26 #include <apr_pools.h>
  27 #include <apr_general.h>
  28 #include <apr_file_io.h>
  29 #include <apr_file_info.h>
  30 #include <apr_time.h>
  31 #include <apr_mmap.h>
  32 #include <apr_getopt.h>
  33
  34 #include "svn_error.h"
  35 #include "svn_diff.h"
  36 #include "svn_types.h"
  37 #include "svn_string.h"
  38 #include "svn_subst.h"
  39 #include "svn_io.h"
  40 #include "svn_utf.h"
  41 #include "svn_pools.h"
  42 #include "diff.h"
  43 #include "svn_private_config.h"
  44 #include "svn_path.h"
  45 #include "svn_ctype.h"
  46
  47 #include "private/svn_utf_private.h"
  48 #include "private/svn_eol_private.h"
  49 #include "private/svn_dep_compat.h"
  50 #include "private/svn_adler32.h"
  51 #include "private/svn_diff_private.h"
  52
  53 /* A token, i.e. a line read from a file. */
  54 typedef struct svn_diff__file_token_t
  55 {
  56   /* Next token in free list. */
  57   struct svn_diff__file_token_t *next;
  58   svn_diff_datasource_e datasource;
  59   /* Offset in the datasource. */
  60   apr_off_t offset;
  61   /* Offset of the normalized token (may skip leading whitespace) */
  62   apr_off_t norm_offset;
  63   /* Total length - before normalization. */
  64   apr_off_t raw_length;
  65   /* Total length - after normalization. */
  66   apr_off_t length;
  67 } svn_diff__file_token_t;
  68
  69
  70 typedef struct svn_diff__file_baton_t
  71 {
  72   const svn_diff_file_options_t *options;
  73
  74   struct file_info {
  75     const char *path;  /* path to this file, absolute or relative to CWD */
  76
  77     /* All the following fields are active while this datasource is open */
  78     apr_file_t *file;  /* handle of this file */
  79     apr_off_t size;    /* total raw size in bytes of this file */
  80
  81     /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
  82     int chunk;     /* the current chunk number, zero-based */
  83     char *buffer;  /* a buffer containing the current chunk */
  84     char *curp;    /* current position in the current chunk */
  85     char *endp;    /* next memory address after the current chunk */
  86
  87     svn_diff__normalize_state_t normalize_state;
  88
  89     /* Where the identical suffix starts in this datasource */
  90     int suffix_start_chunk;
  91     apr_off_t suffix_offset_in_chunk;
  92   } files[4];
  93
  94   /* List of free tokens that may be reused. */
  95   svn_diff__file_token_t *tokens;
  96
  97   apr_pool_t *pool;
  98 } svn_diff__file_baton_t;
  99
 100 static int
 101 datasource_to_index(svn_diff_datasource_e datasource)
 102 {
 103   switch (datasource)
 104     {
 105     case svn_diff_datasource_original:
 106       return 0;
 107
 108     case svn_diff_datasource_modified:
 109       return 1;
 110
 111     case svn_diff_datasource_latest:
 112       return 2;
 113
 114     case svn_diff_datasource_ancestor:
 115       return 3;
 116     }
 117
 118   return -1;
 119 }
 120
 121 /* Files are read in chunks of 128k.  There is no support for this number
 122  * whatsoever.  If there is a number someone comes up with that has some
 123  * argumentation, let's use that.
 124  */
 125 /* If you change this number, update test_norm_offset(),
 126  * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
 127  */
 128 #define CHUNK_SHIFT 17
 129 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
 130
 131 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
 132 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
 133 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
 134
 135
 136 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
 137  * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
 138  */
 139 static APR_INLINE svn_error_t *
 140 read_chunk(apr_file_t *file, const char *path,
 141            char *buffer, apr_off_t length,
 142            apr_off_t offset, apr_pool_t *pool)
 143 {
 144   /* XXX: The final offset may not be the one we asked for.
 145    * XXX: Check.
 146    */
 147   SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
 148   return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
 149                                 NULL, NULL, pool);
 150 }
 151
 152
 153 /* Map or read a file at PATH. *BUFFER will point to the file
 154  * contents; if the file was mapped, *FILE and *MM will contain the
 155  * mmap context; otherwise they will be NULL.  SIZE will contain the
 156  * file size.  Allocate from POOL.
 157  */
 158 #if APR_HAS_MMAP
 159 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
 160 #define MMAP_T_ARG(NAME)   &(NAME),
 161 #else
 162 #define MMAP_T_PARAM(NAME)
 163 #define MMAP_T_ARG(NAME)
 164 #endif
 165
 166 static svn_error_t *
 167 map_or_read_file(apr_file_t **file,
 168                  MMAP_T_PARAM(mm)
 169                  char **buffer, apr_size_t *size_p,
 170                  const char *path, apr_pool_t *pool)
 171 {
 172   apr_finfo_t finfo;
 173   apr_status_t rv;
 174   apr_size_t size;
 175
 176   *buffer = NULL;
 177
 178   SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
 179   SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
 180
 181   if (finfo.size > APR_SIZE_MAX)
 182     {
 183       return svn_error_createf(APR_ENOMEM, NULL,
 184                                _("File '%s' is too large to be read in "
 185                                  "to memory"), path);
 186     }
 187
 188   size = (apr_size_t) finfo.size;
 189 #if APR_HAS_MMAP
 190   if (size > APR_MMAP_THRESHOLD)
 191     {
 192       rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
 193       if (rv == APR_SUCCESS)
 194         {
 195           *buffer = (*mm)->mm;
 196         }
 197       else
 198         {
 199           /* Clear *MM because output parameters are undefined on error. */
 200           *mm = NULL;
 201         }
 202
 203       /* On failure we just fall through and try reading the file into
 204        * memory instead.
 205        */
 206     }
 207 #endif /* APR_HAS_MMAP */
 208
 209    if (*buffer == NULL && size > 0)
 210     {
 211       *buffer = apr_palloc(pool, size);
 212
 213       SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
 214
 215       /* Since we have the entire contents of the file we can
 216        * close it now.
 217        */
 218       SVN_ERR(svn_io_file_close(*file, pool));
 219
 220       *file = NULL;
 221     }
 222
 223   *size_p = size;
 224
 225   return SVN_NO_ERROR;
 226 }
 227
 228
 229 /* For all files in the FILE array, increment the curp pointer.  If a file
 230  * points before the beginning of file, let it point at the first byte again.
 231  * If the end of the current chunk is reached, read the next chunk in the
 232  * buffer and point curp to the start of the chunk.  If EOF is reached, set
 233  * curp equal to endp to indicate EOF. */
 234 #define INCREMENT_POINTERS(all_files, files_len, pool)                       \
 235   do {                                                                       \
 236     apr_size_t svn_macro__i;                                                 \
 237                                                                              \
 238     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 239     {                                                                        \
 240       if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
 241         (all_files)[svn_macro__i].curp++;                                    \
 242       else                                                                   \
 243         SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
 244     }                                                                        \
 245   } while (0)
 246
 247
 248 /* For all files in the FILE array, decrement the curp pointer.  If the
 249  * start of a chunk is reached, read the previous chunk in the buffer and
 250  * point curp to the last byte of the chunk.  If the beginning of a FILE is
 251  * reached, set chunk to -1 to indicate BOF. */
 252 #define DECREMENT_POINTERS(all_files, files_len, pool)                       \
 253   do {                                                                       \
 254     apr_size_t svn_macro__i;                                                 \
 255                                                                              \
 256     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 257     {                                                                        \
 258       if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
 259         (all_files)[svn_macro__i].curp--;                                    \
 260       else                                                                   \
 261         SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
 262     }                                                                        \
 263   } while (0)
 264
 265
 266 static svn_error_t *
 267 increment_chunk(struct file_info *file, apr_pool_t *pool)
 268 {
 269   apr_off_t length;
 270   apr_off_t last_chunk = offset_to_chunk(file->size);
 271
 272   if (file->chunk == -1)
 273     {
 274       /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
 275       file->chunk = 0;
 276       file->curp = file->buffer;
 277     }
 278   else if (file->chunk == last_chunk)
 279     {
 280       /* We are at the last chunk. Indicate EOF by setting curp == endp. */
 281       file->curp = file->endp;
 282     }
 283   else
 284     {
 285       /* There are still chunks left. Read next chunk and reset pointers. */
 286       file->chunk++;
 287       length = file->chunk == last_chunk ?
 288         offset_in_chunk(file->size) : CHUNK_SIZE;
 289       SVN_ERR(read_chunk(file->file, file->path, file->buffer,
 290                          length, chunk_to_offset(file->chunk),
 291                          pool));
 292       file->endp = file->buffer + length;
 293       file->curp = file->buffer;
 294     }
 295
 296   return SVN_NO_ERROR;
 297 }
 298
 299
 300 static svn_error_t *
 301 decrement_chunk(struct file_info *file, apr_pool_t *pool)
 302 {
 303   if (file->chunk == 0)
 304     {
 305       /* We are already at the first chunk. Indicate BOF (Beginning Of File)
 306          by setting chunk = -1 and curp = endp - 1. Both conditions are
 307          important. They help the increment step to catch the BOF situation
 308          in an efficient way. */
 309       file->chunk--;
 310       file->curp = file->endp - 1;
 311     }
 312   else
 313     {
 314       /* Read previous chunk and reset pointers. */
 315       file->chunk--;
 316       SVN_ERR(read_chunk(file->file, file->path, file->buffer,
 317                          CHUNK_SIZE, chunk_to_offset(file->chunk),
 318                          pool));
 319       file->endp = file->buffer + CHUNK_SIZE;
 320       file->curp = file->endp - 1;
 321     }
 322
 323   return SVN_NO_ERROR;
 324 }
 325
 326
 327 /* Check whether one of the FILEs has its pointers 'before' the beginning of
 328  * the file (this can happen while scanning backwards). This is the case if
 329  * one of them has chunk == -1. */
 330 static svn_boolean_t
 331 is_one_at_bof(struct file_info file[], apr_size_t file_len)
 332 {
 333   apr_size_t i;
 334
 335   for (i = 0; i < file_len; i++)
 336     if (file[i].chunk == -1)
 337       return TRUE;
 338
 339   return FALSE;
 340 }
 341
 342 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
 343  * one of them has curp == endp (this can only happen at the last chunk)) */
 344 static svn_boolean_t
 345 is_one_at_eof(struct file_info file[], apr_size_t file_len)
 346 {
 347   apr_size_t i;
 348
 349   for (i = 0; i < file_len; i++)
 350     if (file[i].curp == file[i].endp)
 351       return TRUE;
 352
 353   return FALSE;
 354 }
 355
 356 /* Quickly determine whether there is a eol char in CHUNK.
 357  * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
 358  */
 359
 360 #if SVN_UNALIGNED_ACCESS_IS_OK
 361 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
 362 {
 363   apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
 364   apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
 365
 366   r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 367   n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 368
 369   return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
 370 }
 371 #endif
 372
 373 /* Find the prefix which is identical between all elements of the FILE array.
 374  * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
 375  * set to TRUE if one of the FILEs reached its end while scanning prefix,
 376  * i.e. at least one file consisted entirely of prefix.  Otherwise,
 377  * REACHED_ONE_EOF is set to FALSE.
 378  *
 379  * After this function is finished, the buffers, chunks, curp's and endp's
 380  * of the FILEs are set to point at the first byte after the prefix. */
 381 static svn_error_t *
 382 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
 383                       struct file_info file[], apr_size_t file_len,
 384                       apr_pool_t *pool)
 385 {
 386   svn_boolean_t had_cr = FALSE;
 387   svn_boolean_t is_match;
 388   apr_off_t lines = 0;
 389   apr_size_t i;
 390
 391   *reached_one_eof = FALSE;
 392
 393   for (i = 1, is_match = TRUE; i < file_len; i++)
 394     is_match = is_match && *file[0].curp == *file[i].curp;
 395   while (is_match)
 396     {
 397 #if SVN_UNALIGNED_ACCESS_IS_OK
 398       apr_ssize_t max_delta, delta;
 399 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 400
 401       /* ### TODO: see if we can take advantage of
 402          diff options like ignore_eol_style or ignore_space. */
 403       /* check for eol, and count */
 404       if (*file[0].curp == '\r')
 405         {
 406           lines++;
 407           had_cr = TRUE;
 408         }
 409       else if (*file[0].curp == '\n' && !had_cr)
 410         {
 411           lines++;
 412         }
 413       else
 414         {
 415           had_cr = FALSE;
 416         }
 417
 418       INCREMENT_POINTERS(file, file_len, pool);
 419
 420 #if SVN_UNALIGNED_ACCESS_IS_OK
 421
 422       /* Try to advance as far as possible with machine-word granularity.
 423        * Determine how far we may advance with chunky ops without reaching
 424        * endp for any of the files.
 425        * Signedness is important here if curp gets close to endp.
 426        */
 427       max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
 428       for (i = 1; i < file_len; i++)
 429         {
 430           delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
 431           if (delta < max_delta)
 432             max_delta = delta;
 433         }
 434
 435       is_match = TRUE;
 436       for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
 437         {
 438           apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
 439           if (contains_eol(chunk))
 440             break;
 441
 442           for (i = 1; i < file_len; i++)
 443             if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
 444               {
 445                 is_match = FALSE;
 446                 break;
 447               }
 448
 449           if (! is_match)
 450             break;
 451         }
 452
 453       if (delta /* > 0*/)
 454         {
 455           /* We either found a mismatch or an EOL at or shortly behind curp+delta
 456            * or we cannot proceed with chunky ops without exceeding endp.
 457            * In any way, everything up to curp + delta is equal and not an EOL.
 458            */
 459           for (i = 0; i < file_len; i++)
 460             file[i].curp += delta;
 461
 462           /* Skipped data without EOL markers, so last char was not a CR. */
 463           had_cr = FALSE;
 464         }
 465 #endif
 466
 467       *reached_one_eof = is_one_at_eof(file, file_len);
 468       if (*reached_one_eof)
 469         break;
 470       else
 471         for (i = 1, is_match = TRUE; i < file_len; i++)
 472           is_match = is_match && *file[0].curp == *file[i].curp;
 473     }
 474
 475   if (had_cr)
 476     {
 477       /* Check if we ended in the middle of a \r\n for one file, but \r for
 478          another. If so, back up one byte, so the next loop will back up
 479          the entire line. Also decrement lines, since we counted one
 480          too many for the \r. */
 481       svn_boolean_t ended_at_nonmatching_newline = FALSE;
 482       for (i = 0; i < file_len; i++)
 483         if (file[i].curp < file[i].endp)
 484           ended_at_nonmatching_newline = ended_at_nonmatching_newline
 485                                          || *file[i].curp == '\n';
 486       if (ended_at_nonmatching_newline)
 487         {
 488           lines--;
 489           DECREMENT_POINTERS(file, file_len, pool);
 490         }
 491     }
 492
 493   /* Back up one byte, so we point at the last identical byte */
 494   DECREMENT_POINTERS(file, file_len, pool);
 495
 496   /* Back up to the last eol sequence (\n, \r\n or \r) */
 497   while (!is_one_at_bof(file, file_len) &&
 498          *file[0].curp != '\n' && *file[0].curp != '\r')
 499     DECREMENT_POINTERS(file, file_len, pool);
 500
 501   /* Slide one byte forward, to point past the eol sequence */
 502   INCREMENT_POINTERS(file, file_len, pool);
 503
 504   *prefix_lines = lines;
 505
 506   return SVN_NO_ERROR;
 507 }
 508
 509
 510 /* The number of identical suffix lines to keep with the middle section. These
 511  * lines are not eliminated as suffix, and can be picked up by the token
 512  * parsing and lcs steps. This is mainly for backward compatibility with
 513  * the previous diff (and blame) output (if there are multiple diff solutions,
 514  * our lcs algorithm prefers taking common lines from the start, rather than
 515  * from the end. By giving it back some suffix lines, we give it some wiggle
 516  * room to find the exact same diff as before).
 517  *
 518  * The number 50 is more or less arbitrary, based on some real-world tests
 519  * with big files (and then doubling the required number to be on the safe
 520  * side). This has a negligible effect on the power of the optimization. */
 521 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
 522 #ifndef SUFFIX_LINES_TO_KEEP
 523 #define SUFFIX_LINES_TO_KEEP 50
 524 #endif
 525
 526 /* Find the suffix which is identical between all elements of the FILE array.
 527  * Return the number of suffix lines in SUFFIX_LINES.
 528  *
 529  * Before this function is called the FILEs' pointers and chunks should be
 530  * positioned right after the identical prefix (which is the case after
 531  * find_identical_prefix), so we can determine where suffix scanning should
 532  * ultimately stop. */
 533 static svn_error_t *
 534 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
 535                       apr_size_t file_len, apr_pool_t *pool)
 536 {
 537   struct file_info file_for_suffix[4] = { { 0 }  };
 538   apr_off_t length[4];
 539   apr_off_t suffix_min_chunk0;
 540   apr_off_t suffix_min_offset0;
 541   apr_off_t min_file_size;
 542   int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
 543   svn_boolean_t is_match;
 544   apr_off_t lines = 0;
 545   svn_boolean_t had_cr;
 546   svn_boolean_t had_nl;
 547   apr_size_t i;
 548
 549   /* Initialize file_for_suffix[].
 550      Read last chunk, position curp at last byte. */
 551   for (i = 0; i < file_len; i++)
 552     {
 553       file_for_suffix[i].path = file[i].path;
 554       file_for_suffix[i].file = file[i].file;
 555       file_for_suffix[i].size = file[i].size;
 556       file_for_suffix[i].chunk =
 557         (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
 558       length[i] = offset_in_chunk(file_for_suffix[i].size);
 559       if (length[i] == 0)
 560         {
 561           /* last chunk is an empty chunk -> start at next-to-last chunk */
 562           file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
 563           length[i] = CHUNK_SIZE;
 564         }
 565
 566       if (file_for_suffix[i].chunk == file[i].chunk)
 567         {
 568           /* Prefix ended in last chunk, so we can reuse the prefix buffer */
 569           file_for_suffix[i].buffer = file[i].buffer;
 570         }
 571       else
 572         {
 573           /* There is at least more than 1 chunk,
 574              so allocate full chunk size buffer */
 575           file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
 576           SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path,
 577                              file_for_suffix[i].buffer, length[i],
 578                              chunk_to_offset(file_for_suffix[i].chunk),
 579                              pool));
 580         }
 581       file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
 582       file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
 583     }
 584
 585   /* Get the chunk and pointer offset (for file[0]) at which we should stop
 586      scanning backward for the identical suffix, i.e. when we reach prefix. */
 587   suffix_min_chunk0 = file[0].chunk;
 588   suffix_min_offset0 = file[0].curp - file[0].buffer;
 589
 590   /* Compensate if other files are smaller than file[0] */
 591   for (i = 1, min_file_size = file[0].size; i < file_len; i++)
 592     if (file[i].size < min_file_size)
 593       min_file_size = file[i].size;
 594   if (file[0].size > min_file_size)
 595     {
 596       suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
 597       suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
 598     }
 599
 600   /* Scan backwards until mismatch or until we reach the prefix. */
 601   for (i = 1, is_match = TRUE; i < file_len; i++)
 602     is_match = is_match
 603                && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 604   if (is_match && *file_for_suffix[0].curp != '\r'
 605                && *file_for_suffix[0].curp != '\n')
 606     /* Count an extra line for the last line not ending in an eol. */
 607     lines++;
 608
 609   had_nl = FALSE;
 610   while (is_match)
 611     {
 612       svn_boolean_t reached_prefix;
 613 #if SVN_UNALIGNED_ACCESS_IS_OK
 614       /* Initialize the minimum pointer positions. */
 615       const char *min_curp[4];
 616       svn_boolean_t can_read_word;
 617 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 618
 619       /* ### TODO: see if we can take advantage of
 620          diff options like ignore_eol_style or ignore_space. */
 621       /* check for eol, and count */
 622       if (*file_for_suffix[0].curp == '\n')
 623         {
 624           lines++;
 625           had_nl = TRUE;
 626         }
 627       else if (*file_for_suffix[0].curp == '\r' && !had_nl)
 628         {
 629           lines++;
 630         }
 631       else
 632         {
 633           had_nl = FALSE;
 634         }
 635
 636       DECREMENT_POINTERS(file_for_suffix, file_len, pool);
 637
 638 #if SVN_UNALIGNED_ACCESS_IS_OK
 639       for (i = 0; i < file_len; i++)
 640         min_curp[i] = file_for_suffix[i].buffer;
 641
 642       /* If we are in the same chunk that contains the last part of the common
 643          prefix, use the min_curp[0] pointer to make sure we don't get a
 644          suffix that overlaps the already determined common prefix. */
 645       if (file_for_suffix[0].chunk == suffix_min_chunk0)
 646         min_curp[0] += suffix_min_offset0;
 647
 648       /* Scan quickly by reading with machine-word granularity. */
 649       for (i = 0, can_read_word = TRUE; i < file_len; i++)
 650         can_read_word = can_read_word
 651                         && (  (file_for_suffix[i].curp + 1
 652                                  - sizeof(apr_uintptr_t))
 653                             > min_curp[i]);
 654       while (can_read_word)
 655         {
 656           apr_uintptr_t chunk;
 657
 658           /* For each file curp is positioned at the current byte, but we
 659              want to examine the current byte and the ones before the current
 660              location as one machine word. */
 661
 662           chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
 663                                              - sizeof(apr_uintptr_t));
 664           if (contains_eol(chunk))
 665             break;
 666
 667           for (i = 1, is_match = TRUE; i < file_len; i++)
 668             is_match = is_match
 669                        && (   chunk
 670                            == *(const apr_uintptr_t *)
 671                                     (file_for_suffix[i].curp + 1
 672                                        - sizeof(apr_uintptr_t)));
 673
 674           if (! is_match)
 675             break;
 676
 677           for (i = 0; i < file_len; i++)
 678             {
 679               file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
 680               can_read_word = can_read_word
 681                               && (  (file_for_suffix[i].curp + 1
 682                                        - sizeof(apr_uintptr_t))
 683                                   > min_curp[i]);
 684             }
 685
 686           /* We skipped some bytes, so there are no closing EOLs */
 687           had_nl = FALSE;
 688           had_cr = FALSE;
 689         }
 690
 691       /* The > min_curp[i] check leaves at least one final byte for checking
 692          in the non block optimized case below. */
 693 #endif
 694
 695       reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
 696                        && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
 697                           == suffix_min_offset0;
 698       if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
 699         break;
 700
 701       is_match = TRUE;
 702       for (i = 1; i < file_len; i++)
 703         is_match = is_match
 704                    && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 705     }
 706
 707   /* Slide one byte forward, to point at the first byte of identical suffix */
 708   INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 709
 710   /* Slide forward until we find an eol sequence to add the rest of the line
 711      we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
 712      one file reaches its end. */
 713   do
 714     {
 715       had_cr = FALSE;
 716       while (!is_one_at_eof(file_for_suffix, file_len)
 717              && *file_for_suffix[0].curp != '\n'
 718              && *file_for_suffix[0].curp != '\r')
 719         INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 720
 721       /* Slide one or two more bytes, to point past the eol. */
 722       if (!is_one_at_eof(file_for_suffix, file_len)
 723           && *file_for_suffix[0].curp == '\r')
 724         {
 725           lines--;
 726           had_cr = TRUE;
 727           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 728         }
 729       if (!is_one_at_eof(file_for_suffix, file_len)
 730           && *file_for_suffix[0].curp == '\n')
 731         {
 732           if (!had_cr)
 733             lines--;
 734           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 735         }
 736     }
 737   while (!is_one_at_eof(file_for_suffix, file_len)
 738          && suffix_lines_to_keep--);
 739
 740   if (is_one_at_eof(file_for_suffix, file_len))
 741     lines = 0;
 742
 743   /* Save the final suffix information in the original file_info */
 744   for (i = 0; i < file_len; i++)
 745     {
 746       file[i].suffix_start_chunk = file_for_suffix[i].chunk;
 747       file[i].suffix_offset_in_chunk =
 748         file_for_suffix[i].curp - file_for_suffix[i].buffer;
 749     }
 750
 751   *suffix_lines = lines;
 752
 753   return SVN_NO_ERROR;
 754 }
 755
 756
 757 /* Let FILE stand for the array of file_info struct elements of BATON->files
 758  * that are indexed by the elements of the DATASOURCE array.
 759  * BATON's type is (svn_diff__file_baton_t *).
 760  *
 761  * For each file in the FILE array, open the file at FILE.path; initialize
 762  * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
 763  * buffer and read the first chunk.  Then find the prefix and suffix lines
 764  * which are identical between all the files.  Return the number of identical
 765  * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
 766  * SUFFIX_LINES.
 767  *
 768  * Finding the identical prefix and suffix allows us to exclude those from the
 769  * rest of the diff algorithm, which increases performance by reducing the
 770  * problem space.
 771  *
 772  * Implements svn_diff_fns2_t::datasources_open. */
 773 static svn_error_t *
 774 datasources_open(void *baton,
 775                  apr_off_t *prefix_lines,
 776                  apr_off_t *suffix_lines,
 777                  const svn_diff_datasource_e *datasources,
 778                  apr_size_t datasources_len)
 779 {
 780   svn_diff__file_baton_t *file_baton = baton;
 781   struct file_info files[4];
 782   apr_finfo_t finfo[4];
 783   apr_off_t length[4];
 784 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 785   svn_boolean_t reached_one_eof;
 786 #endif
 787   apr_size_t i;
 788
 789   /* Make sure prefix_lines and suffix_lines are set correctly, even if we
 790    * exit early because one of the files is empty. */
 791   *prefix_lines = 0;
 792   *suffix_lines = 0;
 793
 794   /* Open datasources and read first chunk */
 795   for (i = 0; i < datasources_len; i++)
 796     {
 797       struct file_info *file
 798           = &file_baton->files[datasource_to_index(datasources[i])];
 799       SVN_ERR(svn_io_file_open(&file->file, file->path,
 800                                APR_READ, APR_OS_DEFAULT, file_baton->pool));
 801       SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
 802                                    file->file, file_baton->pool));
 803       file->size = finfo[i].size;
 804       length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
 805       file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
 806       SVN_ERR(read_chunk(file->file, file->path, file->buffer,
 807                          length[i], 0, file_baton->pool));
 808       file->endp = file->buffer + length[i];
 809       file->curp = file->buffer;
 810       /* Set suffix_start_chunk to a guard value, so if suffix scanning is
 811        * skipped because one of the files is empty, or because of
 812        * reached_one_eof, we can still easily check for the suffix during
 813        * token reading (datasource_get_next_token). */
 814       file->suffix_start_chunk = -1;
 815
 816       files[i] = *file;
 817     }
 818
 819   for (i = 0; i < datasources_len; i++)
 820     if (length[i] == 0)
 821       /* There will not be any identical prefix/suffix, so we're done. */
 822       return SVN_NO_ERROR;
 823
 824 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 825
 826   SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
 827                                 files, datasources_len, file_baton->pool));
 828
 829   if (!reached_one_eof)
 830     /* No file consisted totally of identical prefix,
 831      * so there may be some identical suffix.  */
 832     SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
 833                                   file_baton->pool));
 834
 835 #endif
 836
 837   /* Copy local results back to baton. */
 838   for (i = 0; i < datasources_len; i++)
 839     file_baton->files[datasource_to_index(datasources[i])] = files[i];
 840
 841   return SVN_NO_ERROR;
 842 }
 843
 844
 845 /* Implements svn_diff_fns2_t::datasource_close */
 846 static svn_error_t *
 847 datasource_close(void *baton, svn_diff_datasource_e datasource)
 848 {
 849   /* Do nothing.  The compare_token function needs previous datasources
 850    * to stay available until all datasources are processed.
 851    */
 852
 853   return SVN_NO_ERROR;
 854 }
 855
 856 /* Implements svn_diff_fns2_t::datasource_get_next_token */
 857 static svn_error_t *
 858 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
 859                           svn_diff_datasource_e datasource)
 860 {
 861   svn_diff__file_baton_t *file_baton = baton;
 862   svn_diff__file_token_t *file_token;
 863   struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
 864   char *endp;
 865   char *curp;
 866   char *eol;
 867   apr_off_t last_chunk;
 868   apr_off_t length;
 869   apr_uint32_t h = 0;
 870   /* Did the last chunk end in a CR character? */
 871   svn_boolean_t had_cr = FALSE;
 872
 873   *token = NULL;
 874
 875   curp = file->curp;
 876   endp = file->endp;
 877
 878   last_chunk = offset_to_chunk(file->size);
 879
 880   /* Are we already at the end of a chunk? */
 881   if (curp == endp)
 882     {
 883       /* Are we at EOF */
 884       if (last_chunk == file->chunk)
 885         return SVN_NO_ERROR; /* EOF */
 886
 887       /* Or right before an identical suffix in the next chunk? */
 888       if (file->chunk + 1 == file->suffix_start_chunk
 889           && file->suffix_offset_in_chunk == 0)
 890         return SVN_NO_ERROR;
 891     }
 892
 893   /* Stop when we encounter the identical suffix. If suffix scanning was not
 894    * performed, suffix_start_chunk will be -1, so this condition will never
 895    * be true. */
 896   if (file->chunk == file->suffix_start_chunk
 897       && (curp - file->buffer) == file->suffix_offset_in_chunk)
 898     return SVN_NO_ERROR;
 899
 900   /* Allocate a new token, or fetch one from the "reusable tokens" list. */
 901   file_token = file_baton->tokens;
 902   if (file_token)
 903     {
 904       file_baton->tokens = file_token->next;
 905     }
 906   else
 907     {
 908       file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
 909     }
 910
 911   file_token->datasource = datasource;
 912   file_token->offset = chunk_to_offset(file->chunk)
 913                        + (curp - file->buffer);
 914   file_token->norm_offset = file_token->offset;
 915   file_token->raw_length = 0;
 916   file_token->length = 0;
 917
 918   while (1)
 919     {
 920       eol = svn_eol__find_eol_start(curp, endp - curp);
 921       if (eol)
 922         {
 923           had_cr = (*eol == '\r');
 924           eol++;
 925           /* If we have the whole eol sequence in the chunk... */
 926           if (!(had_cr && eol == endp))
 927             {
 928               /* Also skip past the '\n' in an '\r\n' sequence. */
 929               if (had_cr && *eol == '\n')
 930                 eol++;
 931               break;
 932             }
 933         }
 934
 935       if (file->chunk == last_chunk)
 936         {
 937           eol = endp;
 938           break;
 939         }
 940
 941       length = endp - curp;
 942       file_token->raw_length += length;
 943       {
 944         char *c = curp;
 945
 946         svn_diff__normalize_buffer(&c, &length,
 947                                    &file->normalize_state,
 948                                    curp, file_baton->options);
 949         if (file_token->length == 0)
 950           {
 951             /* When we are reading the first part of the token, move the
 952                normalized offset past leading ignored characters, if any. */
 953             file_token->norm_offset += (c - curp);
 954           }
 955         file_token->length += length;
 956         h = svn__adler32(h, c, length);
 957       }
 958
 959       curp = endp = file->buffer;
 960       file->chunk++;
 961       length = file->chunk == last_chunk ?
 962         offset_in_chunk(file->size) : CHUNK_SIZE;
 963       endp += length;
 964       file->endp = endp;
 965
 966       /* Issue #4283: Normally we should have checked for reaching the skipped
 967          suffix here, but because we assume that a suffix always starts on a
 968          line and token boundary we rely on catching the suffix earlier in this
 969          function.
 970
 971          When changing things here, make sure the whitespace settings are
 972          applied, or we mught not reach the exact suffix boundary as token
 973          boundary. */
 974       SVN_ERR(read_chunk(file->file, file->path,
 975                          curp, length,
 976                          chunk_to_offset(file->chunk),
 977                          file_baton->pool));
 978
 979       /* If the last chunk ended in a CR, we're done. */
 980       if (had_cr)
 981         {
 982           eol = curp;
 983           if (*curp == '\n')
 984             ++eol;
 985           break;
 986         }
 987     }
 988
 989   length = eol - curp;
 990   file_token->raw_length += length;
 991   file->curp = eol;
 992
 993   /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
 994    * with a spurious empty token.  Avoid returning it.
 995    * Note that we use the unnormalized length; we don't want a line containing
 996    * only spaces (and no trailing newline) to appear like a non-existent
 997    * line. */
 998   if (file_token->raw_length > 0)
 999     {
1000       char *c = curp;
1001       svn_diff__normalize_buffer(&c, &length,
1002                                  &file->normalize_state,
1003                                  curp, file_baton->options);
1004       if (file_token->length == 0)
1005         {
1006           /* When we are reading the first part of the token, move the
1007              normalized offset past leading ignored characters, if any. */
1008           file_token->norm_offset += (c - curp);
1009         }
1010
1011       file_token->length += length;
1012
1013       *hash = svn__adler32(h, c, length);
1014       *token = file_token;
1015     }
1016
1017   return SVN_NO_ERROR;
1018 }
1019
1020 #define COMPARE_CHUNK_SIZE 4096
1021
1022 /* Implements svn_diff_fns2_t::token_compare */
1023 static svn_error_t *
1024 token_compare(void *baton, void *token1, void *token2, int *compare)
1025 {
1026   svn_diff__file_baton_t *file_baton = baton;
1027   svn_diff__file_token_t *file_token[2];
1028   char buffer[2][COMPARE_CHUNK_SIZE];
1029   char *bufp[2];
1030   apr_off_t offset[2];
1031   struct file_info *file[2];
1032   apr_off_t length[2];
1033   apr_off_t total_length;
1034   /* How much is left to read of each token from the file. */
1035   apr_off_t raw_length[2];
1036   int i;
1037   svn_diff__normalize_state_t state[2];
1038
1039   file_token[0] = token1;
1040   file_token[1] = token2;
1041   if (file_token[0]->length < file_token[1]->length)
1042     {
1043       *compare = -1;
1044       return SVN_NO_ERROR;
1045     }
1046
1047   if (file_token[0]->length > file_token[1]->length)
1048     {
1049       *compare = 1;
1050       return SVN_NO_ERROR;
1051     }
1052
1053   total_length = file_token[0]->length;
1054   if (total_length == 0)
1055     {
1056       *compare = 0;
1057       return SVN_NO_ERROR;
1058     }
1059
1060   for (i = 0; i < 2; ++i)
1061     {
1062       int idx = datasource_to_index(file_token[i]->datasource);
1063
1064       file[i] = &file_baton->files[idx];
1065       offset[i] = file_token[i]->norm_offset;
1066       state[i] = svn_diff__normalize_state_normal;
1067
1068       if (offset_to_chunk(offset[i]) == file[i]->chunk)
1069         {
1070           /* If the start of the token is in memory, the entire token is
1071            * in memory.
1072            */
1073           bufp[i] = file[i]->buffer;
1074           bufp[i] += offset_in_chunk(offset[i]);
1075
1076           length[i] = total_length;
1077           raw_length[i] = 0;
1078         }
1079       else
1080         {
1081           apr_off_t skipped;
1082
1083           length[i] = 0;
1084
1085           /* When we skipped the first part of the token via the whitespace
1086              normalization we must reduce the raw length of the token */
1087           skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1088
1089           raw_length[i] = file_token[i]->raw_length - skipped;
1090         }
1091     }
1092
1093   do
1094     {
1095       apr_off_t len;
1096       for (i = 0; i < 2; i++)
1097         {
1098           if (length[i] == 0)
1099             {
1100               /* Error if raw_length is 0, that's an unexpected change
1101                * of the file that can happen when ingoring whitespace
1102                * and that can lead to an infinite loop. */
1103               if (raw_length[i] == 0)
1104                 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1105                                          NULL,
1106                                          _("The file '%s' changed unexpectedly"
1107                                            " during diff"),
1108                                          file[i]->path);
1109
1110               /* Read a chunk from disk into a buffer */
1111               bufp[i] = buffer[i];
1112               length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1113                 COMPARE_CHUNK_SIZE : raw_length[i];
1114
1115               SVN_ERR(read_chunk(file[i]->file,
1116                                  file[i]->path,
1117                                  bufp[i], length[i], offset[i],
1118                                  file_baton->pool));
1119               offset[i] += length[i];
1120               raw_length[i] -= length[i];
1121               /* bufp[i] gets reset to buffer[i] before reading each chunk,
1122                  so, overwriting it isn't a problem */
1123               svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1124                                          bufp[i], file_baton->options);
1125
1126               /* assert(length[i] == file_token[i]->length); */
1127             }
1128         }
1129
1130       len = length[0] > length[1] ? length[1] : length[0];
1131
1132       /* Compare two chunks (that could be entire tokens if they both reside
1133        * in memory).
1134        */
1135       *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1136       if (*compare != 0)
1137         return SVN_NO_ERROR;
1138
1139       total_length -= len;
1140       length[0] -= len;
1141       length[1] -= len;
1142       bufp[0] += len;
1143       bufp[1] += len;
1144     }
1145   while(total_length > 0);
1146
1147   *compare = 0;
1148   return SVN_NO_ERROR;
1149 }
1150
1151
1152 /* Implements svn_diff_fns2_t::token_discard */
1153 static void
1154 token_discard(void *baton, void *token)
1155 {
1156   svn_diff__file_baton_t *file_baton = baton;
1157   svn_diff__file_token_t *file_token = token;
1158
1159   /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1160   file_token->next = file_baton->tokens;
1161   file_baton->tokens = file_token;
1162 }
1163
1164
1165 /* Implements svn_diff_fns2_t::token_discard_all */
1166 static void
1167 token_discard_all(void *baton)
1168 {
1169   svn_diff__file_baton_t *file_baton = baton;
1170
1171   /* Discard all memory in use by the tokens, and close all open files. */
1172   svn_pool_clear(file_baton->pool);
1173 }
1174
1175
1176 static const svn_diff_fns2_t svn_diff__file_vtable =
1177 {
1178   datasources_open,
1179   datasource_close,
1180   datasource_get_next_token,
1181   token_compare,
1182   token_discard,
1183   token_discard_all
1184 };
1185
1186 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1187 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1188
1189 /* Options supported by svn_diff_file_options_parse(). */
1190 static const apr_getopt_option_t diff_options[] =
1191 {
1192   { "ignore-space-change", 'b', 0, NULL },
1193   { "ignore-all-space", 'w', 0, NULL },
1194   { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1195   { "show-c-function", 'p', 0, NULL },
1196   /* ### For compatibility; we don't support the argument to -u, because
1197    * ### we don't have optional argument support. */
1198   { "unified", 'u', 0, NULL },
1199   { NULL, 0, 0, NULL }
1200 };
1201
1202 svn_diff_file_options_t *
1203 svn_diff_file_options_create(apr_pool_t *pool)
1204 {
1205   return apr_pcalloc(pool, sizeof(svn_diff_file_options_t));
1206 }
1207
1208 /* A baton for use with opt_parsing_error_func(). */
1209 struct opt_parsing_error_baton_t
1210 {
1211   svn_error_t *err;
1212   apr_pool_t *pool;
1213 };
1214
1215 /* Store an error message from apr_getopt_long().  Set BATON->err to a new
1216  * error with a message generated from FMT and the remaining arguments.
1217  * Implements apr_getopt_err_fn_t. */
1218 static void
1219 opt_parsing_error_func(void *baton,
1220                        const char *fmt, ...)
1221 {
1222   struct opt_parsing_error_baton_t *b = baton;
1223   const char *message;
1224   va_list ap;
1225
1226   va_start(ap, fmt);
1227   message = apr_pvsprintf(b->pool, fmt, ap);
1228   va_end(ap);
1229
1230   /* Skip leading ": " (if present, which it always is in known cases). */
1231   if (strncmp(message, ": ", 2) == 0)
1232     message += 2;
1233
1234   b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1235 }
1236
1237 svn_error_t *
1238 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1239                             const apr_array_header_t *args,
1240                             apr_pool_t *pool)
1241 {
1242   apr_getopt_t *os;
1243   struct opt_parsing_error_baton_t opt_parsing_error_baton;
1244   /* Make room for each option (starting at index 1) plus trailing NULL. */
1245   const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1246
1247   opt_parsing_error_baton.err = NULL;
1248   opt_parsing_error_baton.pool = pool;
1249
1250   argv[0] = "";
1251   memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts);
1252   argv[args->nelts + 1] = NULL;
1253
1254   apr_getopt_init(&os, pool, args->nelts + 1, argv);
1255
1256   /* Capture any error message from apr_getopt_long().  This will typically
1257    * say which option is wrong, which we would not otherwise know. */
1258   os->errfn = opt_parsing_error_func;
1259   os->errarg = &opt_parsing_error_baton;
1260
1261   while (1)
1262     {
1263       const char *opt_arg;
1264       int opt_id;
1265       apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1266
1267       if (APR_STATUS_IS_EOF(err))
1268         break;
1269       if (err)
1270         /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1271          * it always will produce one, but never mind if it doesn't.  Avoid
1272          * using the message associated with the return code ERR, because
1273          * it refers to the "command line" which may be misleading here. */
1274         return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1275                                 opt_parsing_error_baton.err,
1276                                 _("Error in options to internal diff"));
1277
1278       switch (opt_id)
1279         {
1280         case 'b':
1281           /* -w takes precedence over -b. */
1282           if (! options->ignore_space)
1283             options->ignore_space = svn_diff_file_ignore_space_change;
1284           break;
1285         case 'w':
1286           options->ignore_space = svn_diff_file_ignore_space_all;
1287           break;
1288         case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1289           options->ignore_eol_style = TRUE;
1290           break;
1291         case 'p':
1292           options->show_c_function = TRUE;
1293           break;
1294         default:
1295           break;
1296         }
1297     }
1298
1299   /* Check for spurious arguments. */
1300   if (os->ind < os->argc)
1301     return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1302                              _("Invalid argument '%s' in diff options"),
1303                              os->argv[os->ind]);
1304
1305   return SVN_NO_ERROR;
1306 }
1307
1308 svn_error_t *
1309 svn_diff_file_diff_2(svn_diff_t **diff,
1310                      const char *original,
1311                      const char *modified,
1312                      const svn_diff_file_options_t *options,
1313                      apr_pool_t *pool)
1314 {
1315   svn_diff__file_baton_t baton = { 0 };
1316
1317   baton.options = options;
1318   baton.files[0].path = original;
1319   baton.files[1].path = modified;
1320   baton.pool = svn_pool_create(pool);
1321
1322   SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1323
1324   svn_pool_destroy(baton.pool);
1325   return SVN_NO_ERROR;
1326 }
1327
1328 svn_error_t *
1329 svn_diff_file_diff3_2(svn_diff_t **diff,
1330                       const char *original,
1331                       const char *modified,
1332                       const char *latest,
1333                       const svn_diff_file_options_t *options,
1334                       apr_pool_t *pool)
1335 {
1336   svn_diff__file_baton_t baton = { 0 };
1337
1338   baton.options = options;
1339   baton.files[0].path = original;
1340   baton.files[1].path = modified;
1341   baton.files[2].path = latest;
1342   baton.pool = svn_pool_create(pool);
1343
1344   SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1345
1346   svn_pool_destroy(baton.pool);
1347   return SVN_NO_ERROR;
1348 }
1349
1350 svn_error_t *
1351 svn_diff_file_diff4_2(svn_diff_t **diff,
1352                       const char *original,
1353                       const char *modified,
1354                       const char *latest,
1355                       const char *ancestor,
1356                       const svn_diff_file_options_t *options,
1357                       apr_pool_t *pool)
1358 {
1359   svn_diff__file_baton_t baton = { 0 };
1360
1361   baton.options = options;
1362   baton.files[0].path = original;
1363   baton.files[1].path = modified;
1364   baton.files[2].path = latest;
1365   baton.files[3].path = ancestor;
1366   baton.pool = svn_pool_create(pool);
1367
1368   SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1369
1370   svn_pool_destroy(baton.pool);
1371   return SVN_NO_ERROR;
1372 }
1373
1374 \f
1375 /** Display unified context diffs **/
1376
1377 /* Maximum length of the extra context to show when show_c_function is set.
1378  * GNU diff uses 40, let's be brave and use 50 instead. */
1379 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1380 typedef struct svn_diff__file_output_baton_t
1381 {
1382   svn_stream_t *output_stream;
1383   const char *header_encoding;
1384
1385   /* Cached markers, in header_encoding. */
1386   const char *context_str;
1387   const char *delete_str;
1388   const char *insert_str;
1389
1390   const char *path[2];
1391   apr_file_t *file[2];
1392
1393   apr_off_t   current_line[2];
1394
1395   char        buffer[2][4096];
1396   apr_size_t  length[2];
1397   char       *curp[2];
1398
1399   apr_off_t   hunk_start[2];
1400   apr_off_t   hunk_length[2];
1401   svn_stringbuf_t *hunk;
1402
1403   /* Should we emit C functions in the unified diff header */
1404   svn_boolean_t show_c_function;
1405   /* Extra strings to skip over if we match. */
1406   apr_array_header_t *extra_skip_match;
1407   /* "Context" to append to the @@ line when the show_c_function option
1408    * is set. */
1409   svn_stringbuf_t *extra_context;
1410   /* Extra context for the current hunk. */
1411   char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1412
1413   apr_pool_t *pool;
1414 } svn_diff__file_output_baton_t;
1415
1416 typedef enum svn_diff__file_output_unified_type_e
1417 {
1418   svn_diff__file_output_unified_skip,
1419   svn_diff__file_output_unified_context,
1420   svn_diff__file_output_unified_delete,
1421   svn_diff__file_output_unified_insert
1422 } svn_diff__file_output_unified_type_e;
1423
1424
1425 static svn_error_t *
1426 output_unified_line(svn_diff__file_output_baton_t *baton,
1427                     svn_diff__file_output_unified_type_e type, int idx)
1428 {
1429   char *curp;
1430   char *eol;
1431   apr_size_t length;
1432   svn_error_t *err;
1433   svn_boolean_t bytes_processed = FALSE;
1434   svn_boolean_t had_cr = FALSE;
1435   /* Are we collecting extra context? */
1436   svn_boolean_t collect_extra = FALSE;
1437
1438   length = baton->length[idx];
1439   curp = baton->curp[idx];
1440
1441   /* Lazily update the current line even if we're at EOF.
1442    * This way we fake output of context at EOF
1443    */
1444   baton->current_line[idx]++;
1445
1446   if (length == 0 && apr_file_eof(baton->file[idx]))
1447     {
1448       return SVN_NO_ERROR;
1449     }
1450
1451   do
1452     {
1453       if (length > 0)
1454         {
1455           if (!bytes_processed)
1456             {
1457               switch (type)
1458                 {
1459                 case svn_diff__file_output_unified_context:
1460                   svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1461                   baton->hunk_length[0]++;
1462                   baton->hunk_length[1]++;
1463                   break;
1464                 case svn_diff__file_output_unified_delete:
1465                   svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1466                   baton->hunk_length[0]++;
1467                   break;
1468                 case svn_diff__file_output_unified_insert:
1469                   svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1470                   baton->hunk_length[1]++;
1471                   break;
1472                 default:
1473                   break;
1474                 }
1475
1476               if (baton->show_c_function
1477                   && (type == svn_diff__file_output_unified_skip
1478                       || type == svn_diff__file_output_unified_context)
1479                   && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1480                   && !svn_cstring_match_glob_list(curp,
1481                                                   baton->extra_skip_match))
1482                 {
1483                   svn_stringbuf_setempty(baton->extra_context);
1484                   collect_extra = TRUE;
1485                 }
1486             }
1487
1488           eol = svn_eol__find_eol_start(curp, length);
1489
1490           if (eol != NULL)
1491             {
1492               apr_size_t len;
1493
1494               had_cr = (*eol == '\r');
1495               eol++;
1496               len = (apr_size_t)(eol - curp);
1497
1498               if (! had_cr || len < length)
1499                 {
1500                   if (had_cr && *eol == '\n')
1501                     {
1502                       ++eol;
1503                       ++len;
1504                     }
1505
1506                   length -= len;
1507
1508                   if (type != svn_diff__file_output_unified_skip)
1509                     {
1510                       svn_stringbuf_appendbytes(baton->hunk, curp, len);
1511                     }
1512                   if (collect_extra)
1513                     {
1514                       svn_stringbuf_appendbytes(baton->extra_context,
1515                                                 curp, len);
1516                     }
1517
1518                   baton->curp[idx] = eol;
1519                   baton->length[idx] = length;
1520
1521                   err = SVN_NO_ERROR;
1522
1523                   break;
1524                 }
1525             }
1526
1527           if (type != svn_diff__file_output_unified_skip)
1528             {
1529               svn_stringbuf_appendbytes(baton->hunk, curp, length);
1530             }
1531
1532           if (collect_extra)
1533             {
1534               svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1535             }
1536
1537           bytes_processed = TRUE;
1538         }
1539
1540       curp = baton->buffer[idx];
1541       length = sizeof(baton->buffer[idx]);
1542
1543       err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1544
1545       /* If the last chunk ended with a CR, we look for an LF at the start
1546          of this chunk. */
1547       if (had_cr)
1548         {
1549           if (! err && length > 0 && *curp == '\n')
1550             {
1551               if (type != svn_diff__file_output_unified_skip)
1552                 {
1553                   svn_stringbuf_appendbyte(baton->hunk, *curp);
1554                 }
1555               /* We don't append the LF to extra_context, since it would
1556                * just be stripped anyway. */
1557               ++curp;
1558               --length;
1559             }
1560
1561           baton->curp[idx] = curp;
1562           baton->length[idx] = length;
1563
1564           break;
1565         }
1566     }
1567   while (! err);
1568
1569   if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1570     return err;
1571
1572   if (err && APR_STATUS_IS_EOF(err->apr_err))
1573     {
1574       svn_error_clear(err);
1575       /* Special case if we reach the end of file AND the last line is in the
1576          changed range AND the file doesn't end with a newline */
1577       if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1578           && ! had_cr)
1579         {
1580           SVN_ERR(svn_diff__unified_append_no_newline_msg(
1581                     baton->hunk, baton->header_encoding, baton->pool));
1582         }
1583
1584       baton->length[idx] = 0;
1585     }
1586
1587   return SVN_NO_ERROR;
1588 }
1589
1590 static APR_INLINE svn_error_t *
1591 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1592                           int source,
1593                           svn_diff__file_output_unified_type_e type,
1594                           apr_off_t until)
1595 {
1596   while (output_baton->current_line[source] < until)
1597     {
1598       SVN_ERR(output_unified_line(output_baton, type, source));
1599     }
1600   return SVN_NO_ERROR;
1601 }
1602
1603 static svn_error_t *
1604 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1605 {
1606   apr_off_t target_line;
1607   apr_size_t hunk_len;
1608   apr_off_t old_start;
1609   apr_off_t new_start;
1610
1611   if (svn_stringbuf_isempty(baton->hunk))
1612     {
1613       /* Nothing to flush */
1614       return SVN_NO_ERROR;
1615     }
1616
1617   target_line = baton->hunk_start[0] + baton->hunk_length[0]
1618                 + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1619
1620   /* Add trailing context to the hunk */
1621   SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1622                                     svn_diff__file_output_unified_context,
1623                                     target_line));
1624
1625   old_start = baton->hunk_start[0];
1626   new_start = baton->hunk_start[1];
1627
1628   /* If the file is non-empty, convert the line indexes from
1629      zero based to one based */
1630   if (baton->hunk_length[0])
1631     old_start++;
1632   if (baton->hunk_length[1])
1633     new_start++;
1634
1635   /* Write the hunk header */
1636   SVN_ERR(svn_diff__unified_write_hunk_header(
1637             baton->output_stream, baton->header_encoding, "@@",
1638             old_start, baton->hunk_length[0],
1639             new_start, baton->hunk_length[1],
1640             baton->hunk_extra_context,
1641             baton->pool));
1642
1643   /* Output the hunk content */
1644   hunk_len = baton->hunk->len;
1645   SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1646                            &hunk_len));
1647
1648   /* Prepare for the next hunk */
1649   baton->hunk_length[0] = 0;
1650   baton->hunk_length[1] = 0;
1651   baton->hunk_start[0] = 0;
1652   baton->hunk_start[1] = 0;
1653   svn_stringbuf_setempty(baton->hunk);
1654
1655   return SVN_NO_ERROR;
1656 }
1657
1658 static svn_error_t *
1659 output_unified_diff_modified(void *baton,
1660   apr_off_t original_start, apr_off_t original_length,
1661   apr_off_t modified_start, apr_off_t modified_length,
1662   apr_off_t latest_start, apr_off_t latest_length)
1663 {
1664   svn_diff__file_output_baton_t *output_baton = baton;
1665   apr_off_t context_prefix_length;
1666   apr_off_t prev_context_end;
1667   svn_boolean_t init_hunk = FALSE;
1668
1669   if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
1670     context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1671   else
1672     context_prefix_length = original_start;
1673
1674   /* Calculate where the previous hunk will end if we would write it now
1675      (including the necessary context at the end) */
1676   if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1677     {
1678       prev_context_end = output_baton->hunk_start[0]
1679                          + output_baton->hunk_length[0]
1680                          + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1681     }
1682   else
1683     {
1684       prev_context_end = -1;
1685
1686       if (output_baton->hunk_start[0] == 0
1687           && (original_length > 0 || modified_length > 0))
1688         init_hunk = TRUE;
1689     }
1690
1691   /* If the changed range is far enough from the previous range, flush the current
1692      hunk. */
1693   {
1694     apr_off_t new_hunk_start = (original_start - context_prefix_length);
1695
1696     if (output_baton->current_line[0] < new_hunk_start
1697           && prev_context_end <= new_hunk_start)
1698       {
1699         SVN_ERR(output_unified_flush_hunk(output_baton));
1700         init_hunk = TRUE;
1701       }
1702     else if (output_baton->hunk_length[0] > 0
1703              || output_baton->hunk_length[1] > 0)
1704       {
1705         /* We extend the current hunk */
1706
1707
1708         /* Original: Output the context preceding the changed range */
1709         SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1710                                           svn_diff__file_output_unified_context,
1711                                           original_start));
1712       }
1713   }
1714
1715   /* Original: Skip lines until we are at the beginning of the context we want
1716      to display */
1717   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1718                                     svn_diff__file_output_unified_skip,
1719                                     original_start - context_prefix_length));
1720
1721   /* Note that the above skip stores data for the show_c_function support below */
1722
1723   if (init_hunk)
1724     {
1725       SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1726                      && output_baton->hunk_length[1] == 0);
1727
1728       output_baton->hunk_start[0] = original_start - context_prefix_length;
1729       output_baton->hunk_start[1] = modified_start - context_prefix_length;
1730     }
1731
1732   if (init_hunk && output_baton->show_c_function)
1733     {
1734       apr_size_t p;
1735       const char *invalid_character;
1736
1737       /* Save the extra context for later use.
1738        * Note that the last byte of the hunk_extra_context array is never
1739        * touched after it is zero-initialized, so the array is always
1740        * 0-terminated. */
1741       strncpy(output_baton->hunk_extra_context,
1742               output_baton->extra_context->data,
1743               SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1744       /* Trim whitespace at the end, most notably to get rid of any
1745        * newline characters. */
1746       p = strlen(output_baton->hunk_extra_context);
1747       while (p > 0
1748              && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1749         {
1750           output_baton->hunk_extra_context[--p] = '\0';
1751         }
1752       invalid_character =
1753         svn_utf__last_valid(output_baton->hunk_extra_context,
1754                             SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1755       for (p = invalid_character - output_baton->hunk_extra_context;
1756            p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1757         {
1758           output_baton->hunk_extra_context[p] = '\0';
1759         }
1760     }
1761
1762   /* Modified: Skip lines until we are at the start of the changed range */
1763   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1764                                     svn_diff__file_output_unified_skip,
1765                                     modified_start));
1766
1767   /* Original: Output the context preceding the changed range */
1768   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1769                                     svn_diff__file_output_unified_context,
1770                                     original_start));
1771
1772   /* Both: Output the changed range */
1773   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1774                                     svn_diff__file_output_unified_delete,
1775                                     original_start + original_length));
1776   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1777                                     svn_diff__file_output_unified_insert,
1778                                     modified_start + modified_length));
1779
1780   return SVN_NO_ERROR;
1781 }
1782
1783 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1784 static svn_error_t *
1785 output_unified_default_hdr(const char **header, const char *path,
1786                            apr_pool_t *pool)
1787 {
1788   apr_finfo_t file_info;
1789   apr_time_exp_t exploded_time;
1790   char time_buffer[64];
1791   apr_size_t time_len;
1792   const char *utf8_timestr;
1793
1794   SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1795   apr_time_exp_lt(&exploded_time, file_info.mtime);
1796
1797   apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1798   /* Order of date components can be different in different languages */
1799                _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1800
1801   SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1802
1803   *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1804
1805   return SVN_NO_ERROR;
1806 }
1807
1808 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1809 {
1810   NULL, /* output_common */
1811   output_unified_diff_modified,
1812   NULL, /* output_diff_latest */
1813   NULL, /* output_diff_common */
1814   NULL  /* output_conflict */
1815 };
1816
1817 svn_error_t *
1818 svn_diff_file_output_unified3(svn_stream_t *output_stream,
1819                               svn_diff_t *diff,
1820                               const char *original_path,
1821                               const char *modified_path,
1822                               const char *original_header,
1823                               const char *modified_header,
1824                               const char *header_encoding,
1825                               const char *relative_to_dir,
1826                               svn_boolean_t show_c_function,
1827                               apr_pool_t *pool)
1828 {
1829   if (svn_diff_contains_diffs(diff))
1830     {
1831       svn_diff__file_output_baton_t baton;
1832       int i;
1833
1834       memset(&baton, 0, sizeof(baton));
1835       baton.output_stream = output_stream;
1836       baton.pool = pool;
1837       baton.header_encoding = header_encoding;
1838       baton.path[0] = original_path;
1839       baton.path[1] = modified_path;
1840       baton.hunk = svn_stringbuf_create_empty(pool);
1841       baton.show_c_function = show_c_function;
1842       baton.extra_context = svn_stringbuf_create_empty(pool);
1843
1844       if (show_c_function)
1845         {
1846           baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1847
1848           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1849           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1850           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1851         }
1852
1853       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1854                                             header_encoding, pool));
1855       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1856                                             header_encoding, pool));
1857       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1858                                             header_encoding, pool));
1859
1860       if (relative_to_dir)
1861         {
1862           /* Possibly adjust the "original" and "modified" paths shown in
1863              the output (see issue #2723). */
1864           const char *child_path;
1865
1866           if (! original_header)
1867             {
1868               child_path = svn_dirent_is_child(relative_to_dir,
1869                                                original_path, pool);
1870               if (child_path)
1871                 original_path = child_path;
1872               else
1873                 return svn_error_createf(
1874                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1875                                    _("Path '%s' must be inside "
1876                                      "the directory '%s'"),
1877                                    svn_dirent_local_style(original_path, pool),
1878                                    svn_dirent_local_style(relative_to_dir,
1879                                                           pool));
1880             }
1881
1882           if (! modified_header)
1883             {
1884               child_path = svn_dirent_is_child(relative_to_dir,
1885                                                modified_path, pool);
1886               if (child_path)
1887                 modified_path = child_path;
1888               else
1889                 return svn_error_createf(
1890                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1891                                    _("Path '%s' must be inside "
1892                                      "the directory '%s'"),
1893                                    svn_dirent_local_style(modified_path, pool),
1894                                    svn_dirent_local_style(relative_to_dir,
1895                                                           pool));
1896             }
1897         }
1898
1899       for (i = 0; i < 2; i++)
1900         {
1901           SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1902                                    APR_READ, APR_OS_DEFAULT, pool));
1903         }
1904
1905       if (original_header == NULL)
1906         {
1907           SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1908                                              pool));
1909         }
1910
1911       if (modified_header == NULL)
1912         {
1913           SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1914                                              pool));
1915         }
1916
1917       SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1918                                              original_header, modified_header,
1919                                              pool));
1920
1921       SVN_ERR(svn_diff_output(diff, &baton,
1922                               &svn_diff__file_output_unified_vtable));
1923       SVN_ERR(output_unified_flush_hunk(&baton));
1924
1925       for (i = 0; i < 2; i++)
1926         {
1927           SVN_ERR(svn_io_file_close(baton.file[i], pool));
1928         }
1929     }
1930
1931   return SVN_NO_ERROR;
1932 }
1933
1934 \f
1935 /** Display diff3 **/
1936
1937 /* A stream to remember *leading* context.  Note that this stream does
1938    *not* copy the data that it is remembering; it just saves
1939    *pointers! */
1940 typedef struct context_saver_t {
1941   svn_stream_t *stream;
1942   const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1943   apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1944   apr_size_t next_slot;
1945   apr_size_t total_written;
1946 } context_saver_t;
1947
1948
1949 static svn_error_t *
1950 context_saver_stream_write(void *baton,
1951                            const char *data,
1952                            apr_size_t *len)
1953 {
1954   context_saver_t *cs = baton;
1955   cs->data[cs->next_slot] = data;
1956   cs->len[cs->next_slot] = *len;
1957   cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1958   cs->total_written++;
1959   return SVN_NO_ERROR;
1960 }
1961
1962 typedef struct svn_diff3__file_output_baton_t
1963 {
1964   svn_stream_t *output_stream;
1965
1966   const char *path[3];
1967
1968   apr_off_t   current_line[3];
1969
1970   char       *buffer[3];
1971   char       *endp[3];
1972   char       *curp[3];
1973
1974   /* The following four members are in the encoding used for the output. */
1975   const char *conflict_modified;
1976   const char *conflict_original;
1977   const char *conflict_separator;
1978   const char *conflict_latest;
1979
1980   const char *marker_eol;
1981
1982   svn_diff_conflict_display_style_t conflict_style;
1983
1984   /* The rest of the fields are for
1985      svn_diff_conflict_display_only_conflicts only.  Note that for
1986      these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
1987      (soon after a conflict) a "trailing context stream", never the
1988      actual output stream.*/
1989   /* The actual output stream. */
1990   svn_stream_t *real_output_stream;
1991   context_saver_t *context_saver;
1992   /* Used to allocate context_saver and trailing context streams, and
1993      for some printfs. */
1994   apr_pool_t *pool;
1995 } svn_diff3__file_output_baton_t;
1996
1997 static svn_error_t *
1998 flush_context_saver(context_saver_t *cs,
1999                     svn_stream_t *output_stream)
2000 {
2001   int i;
2002   for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++)
2003     {
2004       apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2005       if (cs->data[slot])
2006         {
2007           apr_size_t len = cs->len[slot];
2008           SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2009         }
2010     }
2011   return SVN_NO_ERROR;
2012 }
2013
2014 static void
2015 make_context_saver(svn_diff3__file_output_baton_t *fob)
2016 {
2017   context_saver_t *cs;
2018
2019   svn_pool_clear(fob->pool);
2020   cs = apr_pcalloc(fob->pool, sizeof(*cs));
2021   cs->stream = svn_stream_empty(fob->pool);
2022   svn_stream_set_baton(cs->stream, cs);
2023   svn_stream_set_write(cs->stream, context_saver_stream_write);
2024   fob->context_saver = cs;
2025   fob->output_stream = cs->stream;
2026 }
2027
2028
2029 /* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to
2030    BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2031    a context_saver; used for *trailing* context. */
2032
2033 struct trailing_context_printer {
2034   apr_size_t lines_to_print;
2035   svn_diff3__file_output_baton_t *fob;
2036 };
2037
2038
2039
2040 static svn_error_t *
2041 trailing_context_printer_write(void *baton,
2042                                const char *data,
2043                                apr_size_t *len)
2044 {
2045   struct trailing_context_printer *tcp = baton;
2046   SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2047   SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2048   tcp->lines_to_print--;
2049   if (tcp->lines_to_print == 0)
2050     make_context_saver(tcp->fob);
2051   return SVN_NO_ERROR;
2052 }
2053
2054
2055 static void
2056 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2057 {
2058   struct trailing_context_printer *tcp;
2059   svn_stream_t *s;
2060
2061   svn_pool_clear(btn->pool);
2062
2063   tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2064   tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2065   tcp->fob = btn;
2066   s = svn_stream_empty(btn->pool);
2067   svn_stream_set_baton(s, tcp);
2068   svn_stream_set_write(s, trailing_context_printer_write);
2069   btn->output_stream = s;
2070 }
2071
2072
2073
2074 typedef enum svn_diff3__file_output_type_e
2075 {
2076   svn_diff3__file_output_skip,
2077   svn_diff3__file_output_normal
2078 } svn_diff3__file_output_type_e;
2079
2080
2081 static svn_error_t *
2082 output_line(svn_diff3__file_output_baton_t *baton,
2083             svn_diff3__file_output_type_e type, int idx)
2084 {
2085   char *curp;
2086   char *endp;
2087   char *eol;
2088   apr_size_t len;
2089
2090   curp = baton->curp[idx];
2091   endp = baton->endp[idx];
2092
2093   /* Lazily update the current line even if we're at EOF.
2094    */
2095   baton->current_line[idx]++;
2096
2097   if (curp == endp)
2098     return SVN_NO_ERROR;
2099
2100   eol = svn_eol__find_eol_start(curp, endp - curp);
2101   if (!eol)
2102     eol = endp;
2103   else
2104     {
2105       svn_boolean_t had_cr = (*eol == '\r');
2106       eol++;
2107       if (had_cr && eol != endp && *eol == '\n')
2108         eol++;
2109     }
2110
2111   if (type != svn_diff3__file_output_skip)
2112     {
2113       len = eol - curp;
2114       /* Note that the trailing context printer assumes that
2115          svn_stream_write is called exactly once per line. */
2116       SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2117     }
2118
2119   baton->curp[idx] = eol;
2120
2121   return SVN_NO_ERROR;
2122 }
2123
2124 static svn_error_t *
2125 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2126 {
2127   return svn_stream_puts(btn->output_stream, btn->marker_eol);
2128 }
2129
2130 static svn_error_t *
2131 output_hunk(void *baton, int idx, apr_off_t target_line,
2132             apr_off_t target_length)
2133 {
2134   svn_diff3__file_output_baton_t *output_baton = baton;
2135
2136   /* Skip lines until we are at the start of the changed range */
2137   while (output_baton->current_line[idx] < target_line)
2138     {
2139       SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2140     }
2141
2142   target_line += target_length;
2143
2144   while (output_baton->current_line[idx] < target_line)
2145     {
2146       SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2147     }
2148
2149   return SVN_NO_ERROR;
2150 }
2151
2152 static svn_error_t *
2153 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2154               apr_off_t modified_start, apr_off_t modified_length,
2155               apr_off_t latest_start, apr_off_t latest_length)
2156 {
2157   return output_hunk(baton, 1, modified_start, modified_length);
2158 }
2159
2160 static svn_error_t *
2161 output_diff_modified(void *baton,
2162                      apr_off_t original_start, apr_off_t original_length,
2163                      apr_off_t modified_start, apr_off_t modified_length,
2164                      apr_off_t latest_start, apr_off_t latest_length)
2165 {
2166   return output_hunk(baton, 1, modified_start, modified_length);
2167 }
2168
2169 static svn_error_t *
2170 output_diff_latest(void *baton,
2171                    apr_off_t original_start, apr_off_t original_length,
2172                    apr_off_t modified_start, apr_off_t modified_length,
2173                    apr_off_t latest_start, apr_off_t latest_length)
2174 {
2175   return output_hunk(baton, 2, latest_start, latest_length);
2176 }
2177
2178 static svn_error_t *
2179 output_conflict(void *baton,
2180                 apr_off_t original_start, apr_off_t original_length,
2181                 apr_off_t modified_start, apr_off_t modified_length,
2182                 apr_off_t latest_start, apr_off_t latest_length,
2183                 svn_diff_t *diff);
2184
2185 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2186 {
2187   output_common,
2188   output_diff_modified,
2189   output_diff_latest,
2190   output_diff_modified, /* output_diff_common */
2191   output_conflict
2192 };
2193
2194
2195
2196 static svn_error_t *
2197 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2198                              apr_off_t original_start,
2199                              apr_off_t original_length,
2200                              apr_off_t modified_start,
2201                              apr_off_t modified_length,
2202                              apr_off_t latest_start,
2203                              apr_off_t latest_length)
2204 {
2205   /* Are we currently saving starting context (as opposed to printing
2206      trailing context)?  If so, flush it. */
2207   if (btn->output_stream == btn->context_saver->stream)
2208     {
2209       if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
2210         SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2211       SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2212     }
2213
2214   /* Print to the real output stream. */
2215   btn->output_stream = btn->real_output_stream;
2216
2217   /* Output the conflict itself. */
2218   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2219                             (modified_length == 1
2220                              ? "%s (%" APR_OFF_T_FMT ")"
2221                              : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2222                             btn->conflict_modified,
2223                             modified_start + 1, modified_length));
2224   SVN_ERR(output_marker_eol(btn));
2225   SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2226
2227   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2228                             (original_length == 1
2229                              ? "%s (%" APR_OFF_T_FMT ")"
2230                              : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2231                             btn->conflict_original,
2232                             original_start + 1, original_length));
2233   SVN_ERR(output_marker_eol(btn));
2234   SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2235
2236   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2237                             "%s%s", btn->conflict_separator, btn->marker_eol));
2238   SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2239   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2240                             (latest_length == 1
2241                              ? "%s (%" APR_OFF_T_FMT ")"
2242                              : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2243                             btn->conflict_latest,
2244                             latest_start + 1, latest_length));
2245   SVN_ERR(output_marker_eol(btn));
2246
2247   /* Go into print-trailing-context mode instead. */
2248   make_trailing_context_printer(btn);
2249
2250   return SVN_NO_ERROR;
2251 }
2252
2253
2254 static svn_error_t *
2255 output_conflict(void *baton,
2256                 apr_off_t original_start, apr_off_t original_length,
2257                 apr_off_t modified_start, apr_off_t modified_length,
2258                 apr_off_t latest_start, apr_off_t latest_length,
2259                 svn_diff_t *diff)
2260 {
2261   svn_diff3__file_output_baton_t *file_baton = baton;
2262
2263   svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2264
2265   if (style == svn_diff_conflict_display_only_conflicts)
2266     return output_conflict_with_context(file_baton,
2267                                         original_start, original_length,
2268                                         modified_start, modified_length,
2269                                         latest_start, latest_length);
2270
2271   if (style == svn_diff_conflict_display_resolved_modified_latest)
2272     {
2273       if (diff)
2274         return svn_diff_output(diff, baton,
2275                                &svn_diff3__file_output_vtable);
2276       else
2277         style = svn_diff_conflict_display_modified_latest;
2278     }
2279
2280   if (style == svn_diff_conflict_display_modified_latest ||
2281       style == svn_diff_conflict_display_modified_original_latest)
2282     {
2283       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2284                                file_baton->conflict_modified));
2285       SVN_ERR(output_marker_eol(file_baton));
2286
2287       SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2288
2289       if (style == svn_diff_conflict_display_modified_original_latest)
2290         {
2291           SVN_ERR(svn_stream_puts(file_baton->output_stream,
2292                                    file_baton->conflict_original));
2293           SVN_ERR(output_marker_eol(file_baton));
2294           SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2295         }
2296
2297       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2298                               file_baton->conflict_separator));
2299       SVN_ERR(output_marker_eol(file_baton));
2300
2301       SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2302
2303       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2304                               file_baton->conflict_latest));
2305       SVN_ERR(output_marker_eol(file_baton));
2306     }
2307   else if (style == svn_diff_conflict_display_modified)
2308     SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2309   else if (style == svn_diff_conflict_display_latest)
2310     SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2311   else /* unknown style */
2312     SVN_ERR_MALFUNCTION();
2313
2314   return SVN_NO_ERROR;
2315 }
2316
2317 svn_error_t *
2318 svn_diff_file_output_merge2(svn_stream_t *output_stream,
2319                             svn_diff_t *diff,
2320                             const char *original_path,
2321                             const char *modified_path,
2322                             const char *latest_path,
2323                             const char *conflict_original,
2324                             const char *conflict_modified,
2325                             const char *conflict_latest,
2326                             const char *conflict_separator,
2327                             svn_diff_conflict_display_style_t style,
2328                             apr_pool_t *pool)
2329 {
2330   svn_diff3__file_output_baton_t baton;
2331   apr_file_t *file[3];
2332   int idx;
2333 #if APR_HAS_MMAP
2334   apr_mmap_t *mm[3] = { 0 };
2335 #endif /* APR_HAS_MMAP */
2336   const char *eol;
2337   svn_boolean_t conflicts_only =
2338     (style == svn_diff_conflict_display_only_conflicts);
2339
2340   memset(&baton, 0, sizeof(baton));
2341   if (conflicts_only)
2342     {
2343       baton.pool = svn_pool_create(pool);
2344       make_context_saver(&baton);
2345       baton.real_output_stream = output_stream;
2346     }
2347   else
2348     baton.output_stream = output_stream;
2349   baton.path[0] = original_path;
2350   baton.path[1] = modified_path;
2351   baton.path[2] = latest_path;
2352   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2353                                     conflict_modified ? conflict_modified
2354                                     : apr_psprintf(pool, "<<<<<<< %s",
2355                                                    modified_path),
2356                                     pool));
2357   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2358                                     conflict_original ? conflict_original
2359                                     : apr_psprintf(pool, "||||||| %s",
2360                                                    original_path),
2361                                     pool));
2362   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2363                                     conflict_separator ? conflict_separator
2364                                     : "=======", pool));
2365   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2366                                     conflict_latest ? conflict_latest
2367                                     : apr_psprintf(pool, ">>>>>>> %s",
2368                                                    latest_path),
2369                                     pool));
2370
2371   baton.conflict_style = style;
2372
2373   for (idx = 0; idx < 3; idx++)
2374     {
2375       apr_size_t size;
2376
2377       SVN_ERR(map_or_read_file(&file[idx],
2378                                MMAP_T_ARG(mm[idx])
2379                                &baton.buffer[idx], &size,
2380                                baton.path[idx], pool));
2381
2382       baton.curp[idx] = baton.buffer[idx];
2383       baton.endp[idx] = baton.buffer[idx];
2384
2385       if (baton.endp[idx])
2386         baton.endp[idx] += size;
2387     }
2388
2389   /* Check what eol marker we should use for conflict markers.
2390      We use the eol marker of the modified file and fall back on the
2391      platform's eol marker if that file doesn't contain any newlines. */
2392   eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2393                             NULL);
2394   if (! eol)
2395     eol = APR_EOL_STR;
2396   baton.marker_eol = eol;
2397
2398   SVN_ERR(svn_diff_output(diff, &baton,
2399                           &svn_diff3__file_output_vtable));
2400
2401   for (idx = 0; idx < 3; idx++)
2402     {
2403 #if APR_HAS_MMAP
2404       if (mm[idx])
2405         {
2406           apr_status_t rv = apr_mmap_delete(mm[idx]);
2407           if (rv != APR_SUCCESS)
2408             {
2409               return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2410                                         baton.path[idx]);
2411             }
2412         }
2413 #endif /* APR_HAS_MMAP */
2414
2415       if (file[idx])
2416         {
2417           SVN_ERR(svn_io_file_close(file[idx], pool));
2418         }
2419     }
2420
2421   if (conflicts_only)
2422     svn_pool_destroy(baton.pool);
2423
2424   return SVN_NO_ERROR;
2425 }
2426