contrib/subversion/subversion/libsvn_diff/diff_file.c

   1 /*
   2  * diff_file.c :  routines for doing diffs on files
   3  *
   4  * ====================================================================
   5  *    Licensed to the Apache Software Foundation (ASF) under one
   6  *    or more contributor license agreements.  See the NOTICE file
   7  *    distributed with this work for additional information
   8  *    regarding copyright ownership.  The ASF licenses this file
   9  *    to you under the Apache License, Version 2.0 (the
  10  *    "License"); you may not use this file except in compliance
  11  *    with the License.  You may obtain a copy of the License at
  12  *
  13  *      http://www.apache.org/licenses/LICENSE-2.0
  14  *
  15  *    Unless required by applicable law or agreed to in writing,
  16  *    software distributed under the License is distributed on an
  17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  18  *    KIND, either express or implied.  See the License for the
  19  *    specific language governing permissions and limitations
  20  *    under the License.
  21  * ====================================================================
  22  */
  23
  24
  25 #include <apr.h>
  26 #include <apr_pools.h>
  27 #include <apr_general.h>
  28 #include <apr_file_io.h>
  29 #include <apr_file_info.h>
  30 #include <apr_time.h>
  31 #include <apr_mmap.h>
  32 #include <apr_getopt.h>
  33
  34 #include <assert.h>
  35
  36 #include "svn_error.h"
  37 #include "svn_diff.h"
  38 #include "svn_types.h"
  39 #include "svn_string.h"
  40 #include "svn_subst.h"
  41 #include "svn_io.h"
  42 #include "svn_utf.h"
  43 #include "svn_pools.h"
  44 #include "diff.h"
  45 #include "svn_private_config.h"
  46 #include "svn_path.h"
  47 #include "svn_ctype.h"
  48
  49 #include "private/svn_utf_private.h"
  50 #include "private/svn_eol_private.h"
  51 #include "private/svn_dep_compat.h"
  52 #include "private/svn_adler32.h"
  53 #include "private/svn_diff_private.h"
  54
  55 /* A token, i.e. a line read from a file. */
  56 typedef struct svn_diff__file_token_t
  57 {
  58   /* Next token in free list. */
  59   struct svn_diff__file_token_t *next;
  60   svn_diff_datasource_e datasource;
  61   /* Offset in the datasource. */
  62   apr_off_t offset;
  63   /* Offset of the normalized token (may skip leading whitespace) */
  64   apr_off_t norm_offset;
  65   /* Total length - before normalization. */
  66   apr_off_t raw_length;
  67   /* Total length - after normalization. */
  68   apr_off_t length;
  69 } svn_diff__file_token_t;
  70
  71
  72 typedef struct svn_diff__file_baton_t
  73 {
  74   const svn_diff_file_options_t *options;
  75
  76   struct file_info {
  77     const char *path;  /* path to this file, absolute or relative to CWD */
  78
  79     /* All the following fields are active while this datasource is open */
  80     apr_file_t *file;  /* handle of this file */
  81     apr_off_t size;    /* total raw size in bytes of this file */
  82
  83     /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
  84     int chunk;     /* the current chunk number, zero-based */
  85     char *buffer;  /* a buffer containing the current chunk */
  86     char *curp;    /* current position in the current chunk */
  87     char *endp;    /* next memory address after the current chunk */
  88
  89     svn_diff__normalize_state_t normalize_state;
  90
  91     /* Where the identical suffix starts in this datasource */
  92     int suffix_start_chunk;
  93     apr_off_t suffix_offset_in_chunk;
  94   } files[4];
  95
  96   /* List of free tokens that may be reused. */
  97   svn_diff__file_token_t *tokens;
  98
  99   apr_pool_t *pool;
 100 } svn_diff__file_baton_t;
 101
 102 static int
 103 datasource_to_index(svn_diff_datasource_e datasource)
 104 {
 105   switch (datasource)
 106     {
 107     case svn_diff_datasource_original:
 108       return 0;
 109
 110     case svn_diff_datasource_modified:
 111       return 1;
 112
 113     case svn_diff_datasource_latest:
 114       return 2;
 115
 116     case svn_diff_datasource_ancestor:
 117       return 3;
 118     }
 119
 120   return -1;
 121 }
 122
 123 /* Files are read in chunks of 128k.  There is no support for this number
 124  * whatsoever.  If there is a number someone comes up with that has some
 125  * argumentation, let's use that.
 126  */
 127 /* If you change this number, update test_norm_offset(),
 128  * test_identical_suffix() and and test_token_compare()  in diff-diff3-test.c.
 129  */
 130 #define CHUNK_SHIFT 17
 131 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
 132
 133 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
 134 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
 135 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
 136
 137
 138 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
 139  * *LENGTH.  The actual bytes read are stored in *LENGTH on return.
 140  */
 141 static APR_INLINE svn_error_t *
 142 read_chunk(apr_file_t *file,
 143            char *buffer, apr_off_t length,
 144            apr_off_t offset, apr_pool_t *scratch_pool)
 145 {
 146   /* XXX: The final offset may not be the one we asked for.
 147    * XXX: Check.
 148    */
 149   SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
 150   return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
 151                                 NULL, NULL, scratch_pool);
 152 }
 153
 154
 155 /* Map or read a file at PATH. *BUFFER will point to the file
 156  * contents; if the file was mapped, *FILE and *MM will contain the
 157  * mmap context; otherwise they will be NULL.  SIZE will contain the
 158  * file size.  Allocate from POOL.
 159  */
 160 #if APR_HAS_MMAP
 161 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
 162 #define MMAP_T_ARG(NAME)   &(NAME),
 163 #else
 164 #define MMAP_T_PARAM(NAME)
 165 #define MMAP_T_ARG(NAME)
 166 #endif
 167
 168 static svn_error_t *
 169 map_or_read_file(apr_file_t **file,
 170                  MMAP_T_PARAM(mm)
 171                  char **buffer, apr_size_t *size_p,
 172                  const char *path, apr_pool_t *pool)
 173 {
 174   apr_finfo_t finfo;
 175   apr_status_t rv;
 176   apr_size_t size;
 177
 178   *buffer = NULL;
 179
 180   SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
 181   SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
 182
 183   if (finfo.size > APR_SIZE_MAX)
 184     {
 185       return svn_error_createf(APR_ENOMEM, NULL,
 186                                _("File '%s' is too large to be read in "
 187                                  "to memory"), path);
 188     }
 189
 190   size = (apr_size_t) finfo.size;
 191 #if APR_HAS_MMAP
 192   if (size > APR_MMAP_THRESHOLD)
 193     {
 194       rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
 195       if (rv == APR_SUCCESS)
 196         {
 197           *buffer = (*mm)->mm;
 198         }
 199       else
 200         {
 201           /* Clear *MM because output parameters are undefined on error. */
 202           *mm = NULL;
 203         }
 204
 205       /* On failure we just fall through and try reading the file into
 206        * memory instead.
 207        */
 208     }
 209 #endif /* APR_HAS_MMAP */
 210
 211    if (*buffer == NULL && size > 0)
 212     {
 213       *buffer = apr_palloc(pool, size);
 214
 215       SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
 216
 217       /* Since we have the entire contents of the file we can
 218        * close it now.
 219        */
 220       SVN_ERR(svn_io_file_close(*file, pool));
 221
 222       *file = NULL;
 223     }
 224
 225   *size_p = size;
 226
 227   return SVN_NO_ERROR;
 228 }
 229
 230
 231 /* For all files in the FILE array, increment the curp pointer.  If a file
 232  * points before the beginning of file, let it point at the first byte again.
 233  * If the end of the current chunk is reached, read the next chunk in the
 234  * buffer and point curp to the start of the chunk.  If EOF is reached, set
 235  * curp equal to endp to indicate EOF. */
 236 #define INCREMENT_POINTERS(all_files, files_len, pool)                       \
 237   do {                                                                       \
 238     apr_size_t svn_macro__i;                                                 \
 239                                                                              \
 240     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 241     {                                                                        \
 242       if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
 243         (all_files)[svn_macro__i].curp++;                                    \
 244       else                                                                   \
 245         SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool)));        \
 246     }                                                                        \
 247   } while (0)
 248
 249
 250 /* For all files in the FILE array, decrement the curp pointer.  If the
 251  * start of a chunk is reached, read the previous chunk in the buffer and
 252  * point curp to the last byte of the chunk.  If the beginning of a FILE is
 253  * reached, set chunk to -1 to indicate BOF. */
 254 #define DECREMENT_POINTERS(all_files, files_len, pool)                       \
 255   do {                                                                       \
 256     apr_size_t svn_macro__i;                                                 \
 257                                                                              \
 258     for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++)       \
 259     {                                                                        \
 260       if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
 261         (all_files)[svn_macro__i].curp--;                                    \
 262       else                                                                   \
 263         SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool)));        \
 264     }                                                                        \
 265   } while (0)
 266
 267
 268 static svn_error_t *
 269 increment_chunk(struct file_info *file, apr_pool_t *pool)
 270 {
 271   apr_off_t length;
 272   apr_off_t last_chunk = offset_to_chunk(file->size);
 273
 274   if (file->chunk == -1)
 275     {
 276       /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
 277       file->chunk = 0;
 278       file->curp = file->buffer;
 279     }
 280   else if (file->chunk == last_chunk)
 281     {
 282       /* We are at the last chunk. Indicate EOF by setting curp == endp. */
 283       file->curp = file->endp;
 284     }
 285   else
 286     {
 287       /* There are still chunks left. Read next chunk and reset pointers. */
 288       file->chunk++;
 289       length = file->chunk == last_chunk ?
 290         offset_in_chunk(file->size) : CHUNK_SIZE;
 291       SVN_ERR(read_chunk(file->file, file->buffer,
 292                          length, chunk_to_offset(file->chunk),
 293                          pool));
 294       file->endp = file->buffer + length;
 295       file->curp = file->buffer;
 296     }
 297
 298   return SVN_NO_ERROR;
 299 }
 300
 301
 302 static svn_error_t *
 303 decrement_chunk(struct file_info *file, apr_pool_t *pool)
 304 {
 305   if (file->chunk == 0)
 306     {
 307       /* We are already at the first chunk. Indicate BOF (Beginning Of File)
 308          by setting chunk = -1 and curp = endp - 1. Both conditions are
 309          important. They help the increment step to catch the BOF situation
 310          in an efficient way. */
 311       file->chunk--;
 312       file->curp = file->endp - 1;
 313     }
 314   else
 315     {
 316       /* Read previous chunk and reset pointers. */
 317       file->chunk--;
 318       SVN_ERR(read_chunk(file->file, file->buffer,
 319                          CHUNK_SIZE, chunk_to_offset(file->chunk),
 320                          pool));
 321       file->endp = file->buffer + CHUNK_SIZE;
 322       file->curp = file->endp - 1;
 323     }
 324
 325   return SVN_NO_ERROR;
 326 }
 327
 328
 329 /* Check whether one of the FILEs has its pointers 'before' the beginning of
 330  * the file (this can happen while scanning backwards). This is the case if
 331  * one of them has chunk == -1. */
 332 static svn_boolean_t
 333 is_one_at_bof(struct file_info file[], apr_size_t file_len)
 334 {
 335   apr_size_t i;
 336
 337   for (i = 0; i < file_len; i++)
 338     if (file[i].chunk == -1)
 339       return TRUE;
 340
 341   return FALSE;
 342 }
 343
 344 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
 345  * one of them has curp == endp (this can only happen at the last chunk)) */
 346 static svn_boolean_t
 347 is_one_at_eof(struct file_info file[], apr_size_t file_len)
 348 {
 349   apr_size_t i;
 350
 351   for (i = 0; i < file_len; i++)
 352     if (file[i].curp == file[i].endp)
 353       return TRUE;
 354
 355   return FALSE;
 356 }
 357
 358 /* Quickly determine whether there is a eol char in CHUNK.
 359  * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
 360  */
 361
 362 #if SVN_UNALIGNED_ACCESS_IS_OK
 363 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
 364 {
 365   apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
 366   apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
 367
 368   r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 369   n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
 370
 371   return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
 372 }
 373 #endif
 374
 375 /* Find the prefix which is identical between all elements of the FILE array.
 376  * Return the number of prefix lines in PREFIX_LINES.  REACHED_ONE_EOF will be
 377  * set to TRUE if one of the FILEs reached its end while scanning prefix,
 378  * i.e. at least one file consisted entirely of prefix.  Otherwise,
 379  * REACHED_ONE_EOF is set to FALSE.
 380  *
 381  * After this function is finished, the buffers, chunks, curp's and endp's
 382  * of the FILEs are set to point at the first byte after the prefix. */
 383 static svn_error_t *
 384 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
 385                       struct file_info file[], apr_size_t file_len,
 386                       apr_pool_t *pool)
 387 {
 388   svn_boolean_t had_cr = FALSE;
 389   svn_boolean_t is_match;
 390   apr_off_t lines = 0;
 391   apr_size_t i;
 392
 393   *reached_one_eof = FALSE;
 394
 395   for (i = 1, is_match = TRUE; i < file_len; i++)
 396     is_match = is_match && *file[0].curp == *file[i].curp;
 397   while (is_match)
 398     {
 399 #if SVN_UNALIGNED_ACCESS_IS_OK
 400       apr_ssize_t max_delta, delta;
 401 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 402
 403       /* ### TODO: see if we can take advantage of
 404          diff options like ignore_eol_style or ignore_space. */
 405       /* check for eol, and count */
 406       if (*file[0].curp == '\r')
 407         {
 408           lines++;
 409           had_cr = TRUE;
 410         }
 411       else if (*file[0].curp == '\n' && !had_cr)
 412         {
 413           lines++;
 414         }
 415       else
 416         {
 417           had_cr = FALSE;
 418         }
 419
 420       INCREMENT_POINTERS(file, file_len, pool);
 421
 422 #if SVN_UNALIGNED_ACCESS_IS_OK
 423
 424       /* Try to advance as far as possible with machine-word granularity.
 425        * Determine how far we may advance with chunky ops without reaching
 426        * endp for any of the files.
 427        * Signedness is important here if curp gets close to endp.
 428        */
 429       max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
 430       for (i = 1; i < file_len; i++)
 431         {
 432           delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
 433           if (delta < max_delta)
 434             max_delta = delta;
 435         }
 436
 437       is_match = TRUE;
 438       for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
 439         {
 440           apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
 441           if (contains_eol(chunk))
 442             break;
 443
 444           for (i = 1; i < file_len; i++)
 445             if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
 446               {
 447                 is_match = FALSE;
 448                 break;
 449               }
 450
 451           if (! is_match)
 452             break;
 453         }
 454
 455       if (delta /* > 0*/)
 456         {
 457           /* We either found a mismatch or an EOL at or shortly behind curp+delta
 458            * or we cannot proceed with chunky ops without exceeding endp.
 459            * In any way, everything up to curp + delta is equal and not an EOL.
 460            */
 461           for (i = 0; i < file_len; i++)
 462             file[i].curp += delta;
 463
 464           /* Skipped data without EOL markers, so last char was not a CR. */
 465           had_cr = FALSE;
 466         }
 467 #endif
 468
 469       *reached_one_eof = is_one_at_eof(file, file_len);
 470       if (*reached_one_eof)
 471         break;
 472       else
 473         for (i = 1, is_match = TRUE; i < file_len; i++)
 474           is_match = is_match && *file[0].curp == *file[i].curp;
 475     }
 476
 477   if (had_cr)
 478     {
 479       /* Check if we ended in the middle of a \r\n for one file, but \r for
 480          another. If so, back up one byte, so the next loop will back up
 481          the entire line. Also decrement lines, since we counted one
 482          too many for the \r. */
 483       svn_boolean_t ended_at_nonmatching_newline = FALSE;
 484       for (i = 0; i < file_len; i++)
 485         if (file[i].curp < file[i].endp)
 486           ended_at_nonmatching_newline = ended_at_nonmatching_newline
 487                                          || *file[i].curp == '\n';
 488       if (ended_at_nonmatching_newline)
 489         {
 490           lines--;
 491           DECREMENT_POINTERS(file, file_len, pool);
 492         }
 493     }
 494
 495   /* Back up one byte, so we point at the last identical byte */
 496   DECREMENT_POINTERS(file, file_len, pool);
 497
 498   /* Back up to the last eol sequence (\n, \r\n or \r) */
 499   while (!is_one_at_bof(file, file_len) &&
 500          *file[0].curp != '\n' && *file[0].curp != '\r')
 501     DECREMENT_POINTERS(file, file_len, pool);
 502
 503   /* Slide one byte forward, to point past the eol sequence */
 504   INCREMENT_POINTERS(file, file_len, pool);
 505
 506   *prefix_lines = lines;
 507
 508   return SVN_NO_ERROR;
 509 }
 510
 511
 512 /* The number of identical suffix lines to keep with the middle section. These
 513  * lines are not eliminated as suffix, and can be picked up by the token
 514  * parsing and lcs steps. This is mainly for backward compatibility with
 515  * the previous diff (and blame) output (if there are multiple diff solutions,
 516  * our lcs algorithm prefers taking common lines from the start, rather than
 517  * from the end. By giving it back some suffix lines, we give it some wiggle
 518  * room to find the exact same diff as before).
 519  *
 520  * The number 50 is more or less arbitrary, based on some real-world tests
 521  * with big files (and then doubling the required number to be on the safe
 522  * side). This has a negligible effect on the power of the optimization. */
 523 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
 524 #ifndef SUFFIX_LINES_TO_KEEP
 525 #define SUFFIX_LINES_TO_KEEP 50
 526 #endif
 527
 528 /* Find the suffix which is identical between all elements of the FILE array.
 529  * Return the number of suffix lines in SUFFIX_LINES.
 530  *
 531  * Before this function is called the FILEs' pointers and chunks should be
 532  * positioned right after the identical prefix (which is the case after
 533  * find_identical_prefix), so we can determine where suffix scanning should
 534  * ultimately stop. */
 535 static svn_error_t *
 536 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
 537                       apr_size_t file_len, apr_pool_t *pool)
 538 {
 539   struct file_info file_for_suffix[4] = { { 0 }  };
 540   apr_off_t length[4];
 541   apr_off_t suffix_min_chunk0;
 542   apr_off_t suffix_min_offset0;
 543   apr_off_t min_file_size;
 544   int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
 545   svn_boolean_t is_match;
 546   apr_off_t lines = 0;
 547   svn_boolean_t had_nl;
 548   apr_size_t i;
 549
 550   /* Initialize file_for_suffix[].
 551      Read last chunk, position curp at last byte. */
 552   for (i = 0; i < file_len; i++)
 553     {
 554       file_for_suffix[i].path = file[i].path;
 555       file_for_suffix[i].file = file[i].file;
 556       file_for_suffix[i].size = file[i].size;
 557       file_for_suffix[i].chunk =
 558         (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
 559       length[i] = offset_in_chunk(file_for_suffix[i].size);
 560       if (length[i] == 0)
 561         {
 562           /* last chunk is an empty chunk -> start at next-to-last chunk */
 563           file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
 564           length[i] = CHUNK_SIZE;
 565         }
 566
 567       if (file_for_suffix[i].chunk == file[i].chunk)
 568         {
 569           /* Prefix ended in last chunk, so we can reuse the prefix buffer */
 570           file_for_suffix[i].buffer = file[i].buffer;
 571         }
 572       else
 573         {
 574           /* There is at least more than 1 chunk,
 575              so allocate full chunk size buffer */
 576           file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
 577           SVN_ERR(read_chunk(file_for_suffix[i].file,
 578                              file_for_suffix[i].buffer, length[i],
 579                              chunk_to_offset(file_for_suffix[i].chunk),
 580                              pool));
 581         }
 582       file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
 583       file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
 584     }
 585
 586   /* Get the chunk and pointer offset (for file[0]) at which we should stop
 587      scanning backward for the identical suffix, i.e. when we reach prefix. */
 588   suffix_min_chunk0 = file[0].chunk;
 589   suffix_min_offset0 = file[0].curp - file[0].buffer;
 590
 591   /* Compensate if other files are smaller than file[0] */
 592   for (i = 1, min_file_size = file[0].size; i < file_len; i++)
 593     if (file[i].size < min_file_size)
 594       min_file_size = file[i].size;
 595   if (file[0].size > min_file_size)
 596     {
 597       suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
 598       suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
 599     }
 600
 601   /* Scan backwards until mismatch or until we reach the prefix. */
 602   for (i = 1, is_match = TRUE; i < file_len; i++)
 603     is_match = is_match
 604                && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 605   if (is_match && *file_for_suffix[0].curp != '\r'
 606                && *file_for_suffix[0].curp != '\n')
 607     /* Count an extra line for the last line not ending in an eol. */
 608     lines++;
 609
 610   had_nl = FALSE;
 611   while (is_match)
 612     {
 613       svn_boolean_t reached_prefix;
 614 #if SVN_UNALIGNED_ACCESS_IS_OK
 615       /* Initialize the minimum pointer positions. */
 616       const char *min_curp[4];
 617       svn_boolean_t can_read_word;
 618 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
 619
 620       /* ### TODO: see if we can take advantage of
 621          diff options like ignore_eol_style or ignore_space. */
 622       /* check for eol, and count */
 623       if (*file_for_suffix[0].curp == '\n')
 624         {
 625           lines++;
 626           had_nl = TRUE;
 627         }
 628       else if (*file_for_suffix[0].curp == '\r' && !had_nl)
 629         {
 630           lines++;
 631         }
 632       else
 633         {
 634           had_nl = FALSE;
 635         }
 636
 637       DECREMENT_POINTERS(file_for_suffix, file_len, pool);
 638
 639 #if SVN_UNALIGNED_ACCESS_IS_OK
 640       for (i = 0; i < file_len; i++)
 641         min_curp[i] = file_for_suffix[i].buffer;
 642
 643       /* If we are in the same chunk that contains the last part of the common
 644          prefix, use the min_curp[0] pointer to make sure we don't get a
 645          suffix that overlaps the already determined common prefix. */
 646       if (file_for_suffix[0].chunk == suffix_min_chunk0)
 647         min_curp[0] += suffix_min_offset0;
 648
 649       /* Scan quickly by reading with machine-word granularity. */
 650       for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
 651         can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
 652                          > min_curp[i]);
 653
 654       while (can_read_word)
 655         {
 656           apr_uintptr_t chunk;
 657
 658           /* For each file curp is positioned at the current byte, but we
 659              want to examine the current byte and the ones before the current
 660              location as one machine word. */
 661
 662           chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
 663                                              - sizeof(apr_uintptr_t));
 664           if (contains_eol(chunk))
 665             break;
 666
 667           for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
 668             is_match = (chunk
 669                            == *(const apr_uintptr_t *)
 670                                     (file_for_suffix[i].curp + 1
 671                                        - sizeof(apr_uintptr_t)));
 672
 673           if (! is_match)
 674             break;
 675
 676           for (i = 0; i < file_len; i++)
 677             {
 678               file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
 679               can_read_word = can_read_word
 680                               && (  (file_for_suffix[i].curp + 1
 681                                        - sizeof(apr_uintptr_t))
 682                                   > min_curp[i]);
 683             }
 684
 685           /* We skipped some bytes, so there are no closing EOLs */
 686           had_nl = FALSE;
 687         }
 688
 689       /* The > min_curp[i] check leaves at least one final byte for checking
 690          in the non block optimized case below. */
 691 #endif
 692
 693       reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
 694                        && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
 695                           == suffix_min_offset0;
 696       if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
 697         break;
 698
 699       is_match = TRUE;
 700       for (i = 1; i < file_len; i++)
 701         is_match = is_match
 702                    && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
 703     }
 704
 705   /* Slide one byte forward, to point at the first byte of identical suffix */
 706   INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 707
 708   /* Slide forward until we find an eol sequence to add the rest of the line
 709      we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
 710      one file reaches its end. */
 711   do
 712     {
 713       svn_boolean_t had_cr = FALSE;
 714       while (!is_one_at_eof(file_for_suffix, file_len)
 715              && *file_for_suffix[0].curp != '\n'
 716              && *file_for_suffix[0].curp != '\r')
 717         INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 718
 719       /* Slide one or two more bytes, to point past the eol. */
 720       if (!is_one_at_eof(file_for_suffix, file_len)
 721           && *file_for_suffix[0].curp == '\r')
 722         {
 723           lines--;
 724           had_cr = TRUE;
 725           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 726         }
 727       if (!is_one_at_eof(file_for_suffix, file_len)
 728           && *file_for_suffix[0].curp == '\n')
 729         {
 730           if (!had_cr)
 731             lines--;
 732           INCREMENT_POINTERS(file_for_suffix, file_len, pool);
 733         }
 734     }
 735   while (!is_one_at_eof(file_for_suffix, file_len)
 736          && suffix_lines_to_keep--);
 737
 738   if (is_one_at_eof(file_for_suffix, file_len))
 739     lines = 0;
 740
 741   /* Save the final suffix information in the original file_info */
 742   for (i = 0; i < file_len; i++)
 743     {
 744       file[i].suffix_start_chunk = file_for_suffix[i].chunk;
 745       file[i].suffix_offset_in_chunk =
 746         file_for_suffix[i].curp - file_for_suffix[i].buffer;
 747     }
 748
 749   *suffix_lines = lines;
 750
 751   return SVN_NO_ERROR;
 752 }
 753
 754
 755 /* Let FILE stand for the array of file_info struct elements of BATON->files
 756  * that are indexed by the elements of the DATASOURCE array.
 757  * BATON's type is (svn_diff__file_baton_t *).
 758  *
 759  * For each file in the FILE array, open the file at FILE.path; initialize
 760  * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
 761  * buffer and read the first chunk.  Then find the prefix and suffix lines
 762  * which are identical between all the files.  Return the number of identical
 763  * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
 764  * SUFFIX_LINES.
 765  *
 766  * Finding the identical prefix and suffix allows us to exclude those from the
 767  * rest of the diff algorithm, which increases performance by reducing the
 768  * problem space.
 769  *
 770  * Implements svn_diff_fns2_t::datasources_open. */
 771 static svn_error_t *
 772 datasources_open(void *baton,
 773                  apr_off_t *prefix_lines,
 774                  apr_off_t *suffix_lines,
 775                  const svn_diff_datasource_e *datasources,
 776                  apr_size_t datasources_len)
 777 {
 778   svn_diff__file_baton_t *file_baton = baton;
 779   struct file_info files[4];
 780   apr_finfo_t finfo[4];
 781   apr_off_t length[4];
 782 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 783   svn_boolean_t reached_one_eof;
 784 #endif
 785   apr_size_t i;
 786
 787   /* Make sure prefix_lines and suffix_lines are set correctly, even if we
 788    * exit early because one of the files is empty. */
 789   *prefix_lines = 0;
 790   *suffix_lines = 0;
 791
 792   /* Open datasources and read first chunk */
 793   for (i = 0; i < datasources_len; i++)
 794     {
 795       struct file_info *file
 796           = &file_baton->files[datasource_to_index(datasources[i])];
 797       SVN_ERR(svn_io_file_open(&file->file, file->path,
 798                                APR_READ, APR_OS_DEFAULT, file_baton->pool));
 799       SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
 800                                    file->file, file_baton->pool));
 801       file->size = finfo[i].size;
 802       length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
 803       file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
 804       SVN_ERR(read_chunk(file->file, file->buffer,
 805                          length[i], 0, file_baton->pool));
 806       file->endp = file->buffer + length[i];
 807       file->curp = file->buffer;
 808       /* Set suffix_start_chunk to a guard value, so if suffix scanning is
 809        * skipped because one of the files is empty, or because of
 810        * reached_one_eof, we can still easily check for the suffix during
 811        * token reading (datasource_get_next_token). */
 812       file->suffix_start_chunk = -1;
 813
 814       files[i] = *file;
 815     }
 816
 817   for (i = 0; i < datasources_len; i++)
 818     if (length[i] == 0)
 819       /* There will not be any identical prefix/suffix, so we're done. */
 820       return SVN_NO_ERROR;
 821
 822 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
 823
 824   SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
 825                                 files, datasources_len, file_baton->pool));
 826
 827   if (!reached_one_eof)
 828     /* No file consisted totally of identical prefix,
 829      * so there may be some identical suffix.  */
 830     SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
 831                                   file_baton->pool));
 832
 833 #endif
 834
 835   /* Copy local results back to baton. */
 836   for (i = 0; i < datasources_len; i++)
 837     file_baton->files[datasource_to_index(datasources[i])] = files[i];
 838
 839   return SVN_NO_ERROR;
 840 }
 841
 842
 843 /* Implements svn_diff_fns2_t::datasource_close */
 844 static svn_error_t *
 845 datasource_close(void *baton, svn_diff_datasource_e datasource)
 846 {
 847   /* Do nothing.  The compare_token function needs previous datasources
 848    * to stay available until all datasources are processed.
 849    */
 850
 851   return SVN_NO_ERROR;
 852 }
 853
 854 /* Implements svn_diff_fns2_t::datasource_get_next_token */
 855 static svn_error_t *
 856 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
 857                           svn_diff_datasource_e datasource)
 858 {
 859   svn_diff__file_baton_t *file_baton = baton;
 860   svn_diff__file_token_t *file_token;
 861   struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
 862   char *endp;
 863   char *curp;
 864   char *eol;
 865   apr_off_t last_chunk;
 866   apr_off_t length;
 867   apr_uint32_t h = 0;
 868   /* Did the last chunk end in a CR character? */
 869   svn_boolean_t had_cr = FALSE;
 870
 871   *token = NULL;
 872
 873   curp = file->curp;
 874   endp = file->endp;
 875
 876   last_chunk = offset_to_chunk(file->size);
 877
 878   /* Are we already at the end of a chunk? */
 879   if (curp == endp)
 880     {
 881       /* Are we at EOF */
 882       if (last_chunk == file->chunk)
 883         return SVN_NO_ERROR; /* EOF */
 884
 885       /* Or right before an identical suffix in the next chunk? */
 886       if (file->chunk + 1 == file->suffix_start_chunk
 887           && file->suffix_offset_in_chunk == 0)
 888         return SVN_NO_ERROR;
 889     }
 890
 891   /* Stop when we encounter the identical suffix. If suffix scanning was not
 892    * performed, suffix_start_chunk will be -1, so this condition will never
 893    * be true. */
 894   if (file->chunk == file->suffix_start_chunk
 895       && (curp - file->buffer) == file->suffix_offset_in_chunk)
 896     return SVN_NO_ERROR;
 897
 898   /* Allocate a new token, or fetch one from the "reusable tokens" list. */
 899   file_token = file_baton->tokens;
 900   if (file_token)
 901     {
 902       file_baton->tokens = file_token->next;
 903     }
 904   else
 905     {
 906       file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
 907     }
 908
 909   file_token->datasource = datasource;
 910   file_token->offset = chunk_to_offset(file->chunk)
 911                        + (curp - file->buffer);
 912   file_token->norm_offset = file_token->offset;
 913   file_token->raw_length = 0;
 914   file_token->length = 0;
 915
 916   while (1)
 917     {
 918       eol = svn_eol__find_eol_start(curp, endp - curp);
 919       if (eol)
 920         {
 921           had_cr = (*eol == '\r');
 922           eol++;
 923           /* If we have the whole eol sequence in the chunk... */
 924           if (!(had_cr && eol == endp))
 925             {
 926               /* Also skip past the '\n' in an '\r\n' sequence. */
 927               if (had_cr && *eol == '\n')
 928                 eol++;
 929               break;
 930             }
 931         }
 932
 933       if (file->chunk == last_chunk)
 934         {
 935           eol = endp;
 936           break;
 937         }
 938
 939       length = endp - curp;
 940       file_token->raw_length += length;
 941       {
 942         char *c = curp;
 943
 944         svn_diff__normalize_buffer(&c, &length,
 945                                    &file->normalize_state,
 946                                    curp, file_baton->options);
 947         if (file_token->length == 0)
 948           {
 949             /* When we are reading the first part of the token, move the
 950                normalized offset past leading ignored characters, if any. */
 951             file_token->norm_offset += (c - curp);
 952           }
 953         file_token->length += length;
 954         h = svn__adler32(h, c, length);
 955       }
 956
 957       curp = endp = file->buffer;
 958       file->chunk++;
 959       length = file->chunk == last_chunk ?
 960         offset_in_chunk(file->size) : CHUNK_SIZE;
 961       endp += length;
 962       file->endp = endp;
 963
 964       /* Issue #4283: Normally we should have checked for reaching the skipped
 965          suffix here, but because we assume that a suffix always starts on a
 966          line and token boundary we rely on catching the suffix earlier in this
 967          function.
 968
 969          When changing things here, make sure the whitespace settings are
 970          applied, or we might not reach the exact suffix boundary as token
 971          boundary. */
 972       SVN_ERR(read_chunk(file->file,
 973                          curp, length,
 974                          chunk_to_offset(file->chunk),
 975                          file_baton->pool));
 976
 977       /* If the last chunk ended in a CR, we're done. */
 978       if (had_cr)
 979         {
 980           eol = curp;
 981           if (*curp == '\n')
 982             ++eol;
 983           break;
 984         }
 985     }
 986
 987   length = eol - curp;
 988   file_token->raw_length += length;
 989   file->curp = eol;
 990
 991   /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
 992    * with a spurious empty token.  Avoid returning it.
 993    * Note that we use the unnormalized length; we don't want a line containing
 994    * only spaces (and no trailing newline) to appear like a non-existent
 995    * line. */
 996   if (file_token->raw_length > 0)
 997     {
 998       char *c = curp;
 999       svn_diff__normalize_buffer(&c, &length,
1000                                  &file->normalize_state,
1001                                  curp, file_baton->options);
1002       if (file_token->length == 0)
1003         {
1004           /* When we are reading the first part of the token, move the
1005              normalized offset past leading ignored characters, if any. */
1006           file_token->norm_offset += (c - curp);
1007         }
1008
1009       file_token->length += length;
1010
1011       *hash = svn__adler32(h, c, length);
1012       *token = file_token;
1013     }
1014
1015   return SVN_NO_ERROR;
1016 }
1017
1018 #define COMPARE_CHUNK_SIZE 4096
1019
1020 /* Implements svn_diff_fns2_t::token_compare */
1021 static svn_error_t *
1022 token_compare(void *baton, void *token1, void *token2, int *compare)
1023 {
1024   svn_diff__file_baton_t *file_baton = baton;
1025   svn_diff__file_token_t *file_token[2];
1026   char buffer[2][COMPARE_CHUNK_SIZE];
1027   char *bufp[2];
1028   apr_off_t offset[2];
1029   struct file_info *file[2];
1030   apr_off_t length[2];
1031   apr_off_t total_length;
1032   /* How much is left to read of each token from the file. */
1033   apr_off_t raw_length[2];
1034   int i;
1035   svn_diff__normalize_state_t state[2];
1036
1037   file_token[0] = token1;
1038   file_token[1] = token2;
1039   if (file_token[0]->length < file_token[1]->length)
1040     {
1041       *compare = -1;
1042       return SVN_NO_ERROR;
1043     }
1044
1045   if (file_token[0]->length > file_token[1]->length)
1046     {
1047       *compare = 1;
1048       return SVN_NO_ERROR;
1049     }
1050
1051   total_length = file_token[0]->length;
1052   if (total_length == 0)
1053     {
1054       *compare = 0;
1055       return SVN_NO_ERROR;
1056     }
1057
1058   for (i = 0; i < 2; ++i)
1059     {
1060       int idx = datasource_to_index(file_token[i]->datasource);
1061
1062       file[i] = &file_baton->files[idx];
1063       offset[i] = file_token[i]->norm_offset;
1064       state[i] = svn_diff__normalize_state_normal;
1065
1066       if (offset_to_chunk(offset[i]) == file[i]->chunk)
1067         {
1068           /* If the start of the token is in memory, the entire token is
1069            * in memory.
1070            */
1071           bufp[i] = file[i]->buffer;
1072           bufp[i] += offset_in_chunk(offset[i]);
1073
1074           length[i] = total_length;
1075           raw_length[i] = 0;
1076         }
1077       else
1078         {
1079           apr_off_t skipped;
1080
1081           length[i] = 0;
1082
1083           /* When we skipped the first part of the token via the whitespace
1084              normalization we must reduce the raw length of the token */
1085           skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1086
1087           raw_length[i] = file_token[i]->raw_length - skipped;
1088         }
1089     }
1090
1091   do
1092     {
1093       apr_off_t len;
1094       for (i = 0; i < 2; i++)
1095         {
1096           if (length[i] == 0)
1097             {
1098               /* Error if raw_length is 0, that's an unexpected change
1099                * of the file that can happen when ingoring whitespace
1100                * and that can lead to an infinite loop. */
1101               if (raw_length[i] == 0)
1102                 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1103                                          NULL,
1104                                          _("The file '%s' changed unexpectedly"
1105                                            " during diff"),
1106                                          file[i]->path);
1107
1108               /* Read a chunk from disk into a buffer */
1109               bufp[i] = buffer[i];
1110               length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1111                 COMPARE_CHUNK_SIZE : raw_length[i];
1112
1113               SVN_ERR(read_chunk(file[i]->file,
1114                                  bufp[i], length[i], offset[i],
1115                                  file_baton->pool));
1116               offset[i] += length[i];
1117               raw_length[i] -= length[i];
1118               /* bufp[i] gets reset to buffer[i] before reading each chunk,
1119                  so, overwriting it isn't a problem */
1120               svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1121                                          bufp[i], file_baton->options);
1122
1123               /* assert(length[i] == file_token[i]->length); */
1124             }
1125         }
1126
1127       len = length[0] > length[1] ? length[1] : length[0];
1128
1129       /* Compare two chunks (that could be entire tokens if they both reside
1130        * in memory).
1131        */
1132       *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1133       if (*compare != 0)
1134         return SVN_NO_ERROR;
1135
1136       total_length -= len;
1137       length[0] -= len;
1138       length[1] -= len;
1139       bufp[0] += len;
1140       bufp[1] += len;
1141     }
1142   while(total_length > 0);
1143
1144   *compare = 0;
1145   return SVN_NO_ERROR;
1146 }
1147
1148
1149 /* Implements svn_diff_fns2_t::token_discard */
1150 static void
1151 token_discard(void *baton, void *token)
1152 {
1153   svn_diff__file_baton_t *file_baton = baton;
1154   svn_diff__file_token_t *file_token = token;
1155
1156   /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1157   file_token->next = file_baton->tokens;
1158   file_baton->tokens = file_token;
1159 }
1160
1161
1162 /* Implements svn_diff_fns2_t::token_discard_all */
1163 static void
1164 token_discard_all(void *baton)
1165 {
1166   svn_diff__file_baton_t *file_baton = baton;
1167
1168   /* Discard all memory in use by the tokens, and close all open files. */
1169   svn_pool_clear(file_baton->pool);
1170 }
1171
1172
1173 static const svn_diff_fns2_t svn_diff__file_vtable =
1174 {
1175   datasources_open,
1176   datasource_close,
1177   datasource_get_next_token,
1178   token_compare,
1179   token_discard,
1180   token_discard_all
1181 };
1182
1183 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1184 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1185
1186 /* Options supported by svn_diff_file_options_parse(). */
1187 static const apr_getopt_option_t diff_options[] =
1188 {
1189   { "ignore-space-change", 'b', 0, NULL },
1190   { "ignore-all-space", 'w', 0, NULL },
1191   { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1192   { "show-c-function", 'p', 0, NULL },
1193   /* ### For compatibility; we don't support the argument to -u, because
1194    * ### we don't have optional argument support. */
1195   { "unified", 'u', 0, NULL },
1196   { "context", 'U', 1, NULL },
1197   { NULL, 0, 0, NULL }
1198 };
1199
1200 svn_diff_file_options_t *
1201 svn_diff_file_options_create(apr_pool_t *pool)
1202 {
1203   svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1204
1205   opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1206
1207   return opts;
1208 }
1209
1210 /* A baton for use with opt_parsing_error_func(). */
1211 struct opt_parsing_error_baton_t
1212 {
1213   svn_error_t *err;
1214   apr_pool_t *pool;
1215 };
1216
1217 /* Store an error message from apr_getopt_long().  Set BATON->err to a new
1218  * error with a message generated from FMT and the remaining arguments.
1219  * Implements apr_getopt_err_fn_t. */
1220 static void
1221 opt_parsing_error_func(void *baton,
1222                        const char *fmt, ...)
1223 {
1224   struct opt_parsing_error_baton_t *b = baton;
1225   const char *message;
1226   va_list ap;
1227
1228   va_start(ap, fmt);
1229   message = apr_pvsprintf(b->pool, fmt, ap);
1230   va_end(ap);
1231
1232   /* Skip leading ": " (if present, which it always is in known cases). */
1233   if (strncmp(message, ": ", 2) == 0)
1234     message += 2;
1235
1236   b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1237 }
1238
1239 svn_error_t *
1240 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1241                             const apr_array_header_t *args,
1242                             apr_pool_t *pool)
1243 {
1244   apr_getopt_t *os;
1245   struct opt_parsing_error_baton_t opt_parsing_error_baton;
1246   /* Make room for each option (starting at index 1) plus trailing NULL. */
1247   const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1248
1249   opt_parsing_error_baton.err = NULL;
1250   opt_parsing_error_baton.pool = pool;
1251
1252   argv[0] = "";
1253   memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts);
1254   argv[args->nelts + 1] = NULL;
1255
1256   apr_getopt_init(&os, pool, args->nelts + 1, argv);
1257
1258   /* Capture any error message from apr_getopt_long().  This will typically
1259    * say which option is wrong, which we would not otherwise know. */
1260   os->errfn = opt_parsing_error_func;
1261   os->errarg = &opt_parsing_error_baton;
1262
1263   while (1)
1264     {
1265       const char *opt_arg;
1266       int opt_id;
1267       apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1268
1269       if (APR_STATUS_IS_EOF(err))
1270         break;
1271       if (err)
1272         /* Wrap apr_getopt_long()'s error message.  Its doc string implies
1273          * it always will produce one, but never mind if it doesn't.  Avoid
1274          * using the message associated with the return code ERR, because
1275          * it refers to the "command line" which may be misleading here. */
1276         return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1277                                 opt_parsing_error_baton.err,
1278                                 _("Error in options to internal diff"));
1279
1280       switch (opt_id)
1281         {
1282         case 'b':
1283           /* -w takes precedence over -b. */
1284           if (! options->ignore_space)
1285             options->ignore_space = svn_diff_file_ignore_space_change;
1286           break;
1287         case 'w':
1288           options->ignore_space = svn_diff_file_ignore_space_all;
1289           break;
1290         case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1291           options->ignore_eol_style = TRUE;
1292           break;
1293         case 'p':
1294           options->show_c_function = TRUE;
1295           break;
1296         case 'U':
1297           SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1298           break;
1299         default:
1300           break;
1301         }
1302     }
1303
1304   /* Check for spurious arguments. */
1305   if (os->ind < os->argc)
1306     return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1307                              _("Invalid argument '%s' in diff options"),
1308                              os->argv[os->ind]);
1309
1310   return SVN_NO_ERROR;
1311 }
1312
1313 svn_error_t *
1314 svn_diff_file_diff_2(svn_diff_t **diff,
1315                      const char *original,
1316                      const char *modified,
1317                      const svn_diff_file_options_t *options,
1318                      apr_pool_t *pool)
1319 {
1320   svn_diff__file_baton_t baton = { 0 };
1321
1322   baton.options = options;
1323   baton.files[0].path = original;
1324   baton.files[1].path = modified;
1325   baton.pool = svn_pool_create(pool);
1326
1327   SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1328
1329   svn_pool_destroy(baton.pool);
1330   return SVN_NO_ERROR;
1331 }
1332
1333 svn_error_t *
1334 svn_diff_file_diff3_2(svn_diff_t **diff,
1335                       const char *original,
1336                       const char *modified,
1337                       const char *latest,
1338                       const svn_diff_file_options_t *options,
1339                       apr_pool_t *pool)
1340 {
1341   svn_diff__file_baton_t baton = { 0 };
1342
1343   baton.options = options;
1344   baton.files[0].path = original;
1345   baton.files[1].path = modified;
1346   baton.files[2].path = latest;
1347   baton.pool = svn_pool_create(pool);
1348
1349   SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1350
1351   svn_pool_destroy(baton.pool);
1352   return SVN_NO_ERROR;
1353 }
1354
1355 svn_error_t *
1356 svn_diff_file_diff4_2(svn_diff_t **diff,
1357                       const char *original,
1358                       const char *modified,
1359                       const char *latest,
1360                       const char *ancestor,
1361                       const svn_diff_file_options_t *options,
1362                       apr_pool_t *pool)
1363 {
1364   svn_diff__file_baton_t baton = { 0 };
1365
1366   baton.options = options;
1367   baton.files[0].path = original;
1368   baton.files[1].path = modified;
1369   baton.files[2].path = latest;
1370   baton.files[3].path = ancestor;
1371   baton.pool = svn_pool_create(pool);
1372
1373   SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1374
1375   svn_pool_destroy(baton.pool);
1376   return SVN_NO_ERROR;
1377 }
1378
1379 \f
1380 /** Display unified context diffs **/
1381
1382 /* Maximum length of the extra context to show when show_c_function is set.
1383  * GNU diff uses 40, let's be brave and use 50 instead. */
1384 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1385 typedef struct svn_diff__file_output_baton_t
1386 {
1387   svn_stream_t *output_stream;
1388   const char *header_encoding;
1389
1390   /* Cached markers, in header_encoding. */
1391   const char *context_str;
1392   const char *delete_str;
1393   const char *insert_str;
1394
1395   const char *path[2];
1396   apr_file_t *file[2];
1397
1398   apr_off_t   current_line[2];
1399
1400   char        buffer[2][4096];
1401   apr_size_t  length[2];
1402   char       *curp[2];
1403
1404   apr_off_t   hunk_start[2];
1405   apr_off_t   hunk_length[2];
1406   svn_stringbuf_t *hunk;
1407
1408   /* Should we emit C functions in the unified diff header */
1409   svn_boolean_t show_c_function;
1410   /* Extra strings to skip over if we match. */
1411   apr_array_header_t *extra_skip_match;
1412   /* "Context" to append to the @@ line when the show_c_function option
1413    * is set. */
1414   svn_stringbuf_t *extra_context;
1415   /* Extra context for the current hunk. */
1416   char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1417
1418   int context_size;
1419
1420   apr_pool_t *pool;
1421 } svn_diff__file_output_baton_t;
1422
1423 typedef enum svn_diff__file_output_unified_type_e
1424 {
1425   svn_diff__file_output_unified_skip,
1426   svn_diff__file_output_unified_context,
1427   svn_diff__file_output_unified_delete,
1428   svn_diff__file_output_unified_insert
1429 } svn_diff__file_output_unified_type_e;
1430
1431
1432 static svn_error_t *
1433 output_unified_line(svn_diff__file_output_baton_t *baton,
1434                     svn_diff__file_output_unified_type_e type, int idx)
1435 {
1436   char *curp;
1437   char *eol;
1438   apr_size_t length;
1439   svn_error_t *err;
1440   svn_boolean_t bytes_processed = FALSE;
1441   svn_boolean_t had_cr = FALSE;
1442   /* Are we collecting extra context? */
1443   svn_boolean_t collect_extra = FALSE;
1444
1445   length = baton->length[idx];
1446   curp = baton->curp[idx];
1447
1448   /* Lazily update the current line even if we're at EOF.
1449    * This way we fake output of context at EOF
1450    */
1451   baton->current_line[idx]++;
1452
1453   if (length == 0 && apr_file_eof(baton->file[idx]))
1454     {
1455       return SVN_NO_ERROR;
1456     }
1457
1458   do
1459     {
1460       if (length > 0)
1461         {
1462           if (!bytes_processed)
1463             {
1464               switch (type)
1465                 {
1466                 case svn_diff__file_output_unified_context:
1467                   svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1468                   baton->hunk_length[0]++;
1469                   baton->hunk_length[1]++;
1470                   break;
1471                 case svn_diff__file_output_unified_delete:
1472                   svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1473                   baton->hunk_length[0]++;
1474                   break;
1475                 case svn_diff__file_output_unified_insert:
1476                   svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1477                   baton->hunk_length[1]++;
1478                   break;
1479                 default:
1480                   break;
1481                 }
1482
1483               if (baton->show_c_function
1484                   && (type == svn_diff__file_output_unified_skip
1485                       || type == svn_diff__file_output_unified_context)
1486                   && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1487                   && !svn_cstring_match_glob_list(curp,
1488                                                   baton->extra_skip_match))
1489                 {
1490                   svn_stringbuf_setempty(baton->extra_context);
1491                   collect_extra = TRUE;
1492                 }
1493             }
1494
1495           eol = svn_eol__find_eol_start(curp, length);
1496
1497           if (eol != NULL)
1498             {
1499               apr_size_t len;
1500
1501               had_cr = (*eol == '\r');
1502               eol++;
1503               len = (apr_size_t)(eol - curp);
1504
1505               if (! had_cr || len < length)
1506                 {
1507                   if (had_cr && *eol == '\n')
1508                     {
1509                       ++eol;
1510                       ++len;
1511                     }
1512
1513                   length -= len;
1514
1515                   if (type != svn_diff__file_output_unified_skip)
1516                     {
1517                       svn_stringbuf_appendbytes(baton->hunk, curp, len);
1518                     }
1519                   if (collect_extra)
1520                     {
1521                       svn_stringbuf_appendbytes(baton->extra_context,
1522                                                 curp, len);
1523                     }
1524
1525                   baton->curp[idx] = eol;
1526                   baton->length[idx] = length;
1527
1528                   err = SVN_NO_ERROR;
1529
1530                   break;
1531                 }
1532             }
1533
1534           if (type != svn_diff__file_output_unified_skip)
1535             {
1536               svn_stringbuf_appendbytes(baton->hunk, curp, length);
1537             }
1538
1539           if (collect_extra)
1540             {
1541               svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1542             }
1543
1544           bytes_processed = TRUE;
1545         }
1546
1547       curp = baton->buffer[idx];
1548       length = sizeof(baton->buffer[idx]);
1549
1550       err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1551
1552       /* If the last chunk ended with a CR, we look for an LF at the start
1553          of this chunk. */
1554       if (had_cr)
1555         {
1556           if (! err && length > 0 && *curp == '\n')
1557             {
1558               if (type != svn_diff__file_output_unified_skip)
1559                 {
1560                   svn_stringbuf_appendbyte(baton->hunk, *curp);
1561                 }
1562               /* We don't append the LF to extra_context, since it would
1563                * just be stripped anyway. */
1564               ++curp;
1565               --length;
1566             }
1567
1568           baton->curp[idx] = curp;
1569           baton->length[idx] = length;
1570
1571           break;
1572         }
1573     }
1574   while (! err);
1575
1576   if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1577     return err;
1578
1579   if (err && APR_STATUS_IS_EOF(err->apr_err))
1580     {
1581       svn_error_clear(err);
1582       /* Special case if we reach the end of file AND the last line is in the
1583          changed range AND the file doesn't end with a newline */
1584       if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1585           && ! had_cr)
1586         {
1587           SVN_ERR(svn_diff__unified_append_no_newline_msg(
1588                     baton->hunk, baton->header_encoding, baton->pool));
1589         }
1590
1591       baton->length[idx] = 0;
1592     }
1593
1594   return SVN_NO_ERROR;
1595 }
1596
1597 static APR_INLINE svn_error_t *
1598 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1599                           int source,
1600                           svn_diff__file_output_unified_type_e type,
1601                           apr_off_t until)
1602 {
1603   while (output_baton->current_line[source] < until)
1604     {
1605       SVN_ERR(output_unified_line(output_baton, type, source));
1606     }
1607   return SVN_NO_ERROR;
1608 }
1609
1610 static svn_error_t *
1611 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1612 {
1613   apr_off_t target_line;
1614   apr_size_t hunk_len;
1615   apr_off_t old_start;
1616   apr_off_t new_start;
1617
1618   if (svn_stringbuf_isempty(baton->hunk))
1619     {
1620       /* Nothing to flush */
1621       return SVN_NO_ERROR;
1622     }
1623
1624   target_line = baton->hunk_start[0] + baton->hunk_length[0]
1625                 + baton->context_size;
1626
1627   /* Add trailing context to the hunk */
1628   SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1629                                     svn_diff__file_output_unified_context,
1630                                     target_line));
1631
1632   old_start = baton->hunk_start[0];
1633   new_start = baton->hunk_start[1];
1634
1635   /* If the file is non-empty, convert the line indexes from
1636      zero based to one based */
1637   if (baton->hunk_length[0])
1638     old_start++;
1639   if (baton->hunk_length[1])
1640     new_start++;
1641
1642   /* Write the hunk header */
1643   SVN_ERR(svn_diff__unified_write_hunk_header(
1644             baton->output_stream, baton->header_encoding, "@@",
1645             old_start, baton->hunk_length[0],
1646             new_start, baton->hunk_length[1],
1647             baton->hunk_extra_context,
1648             baton->pool));
1649
1650   /* Output the hunk content */
1651   hunk_len = baton->hunk->len;
1652   SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1653                            &hunk_len));
1654
1655   /* Prepare for the next hunk */
1656   baton->hunk_length[0] = 0;
1657   baton->hunk_length[1] = 0;
1658   baton->hunk_start[0] = 0;
1659   baton->hunk_start[1] = 0;
1660   svn_stringbuf_setempty(baton->hunk);
1661
1662   return SVN_NO_ERROR;
1663 }
1664
1665 static svn_error_t *
1666 output_unified_diff_modified(void *baton,
1667   apr_off_t original_start, apr_off_t original_length,
1668   apr_off_t modified_start, apr_off_t modified_length,
1669   apr_off_t latest_start, apr_off_t latest_length)
1670 {
1671   svn_diff__file_output_baton_t *output_baton = baton;
1672   apr_off_t context_prefix_length;
1673   apr_off_t prev_context_end;
1674   svn_boolean_t init_hunk = FALSE;
1675
1676   if (original_start > output_baton->context_size)
1677     context_prefix_length = output_baton->context_size;
1678   else
1679     context_prefix_length = original_start;
1680
1681   /* Calculate where the previous hunk will end if we would write it now
1682      (including the necessary context at the end) */
1683   if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1684     {
1685       prev_context_end = output_baton->hunk_start[0]
1686                          + output_baton->hunk_length[0]
1687                          + output_baton->context_size;
1688     }
1689   else
1690     {
1691       prev_context_end = -1;
1692
1693       if (output_baton->hunk_start[0] == 0
1694           && (original_length > 0 || modified_length > 0))
1695         init_hunk = TRUE;
1696     }
1697
1698   /* If the changed range is far enough from the previous range, flush the current
1699      hunk. */
1700   {
1701     apr_off_t new_hunk_start = (original_start - context_prefix_length);
1702
1703     if (output_baton->current_line[0] < new_hunk_start
1704           && prev_context_end <= new_hunk_start)
1705       {
1706         SVN_ERR(output_unified_flush_hunk(output_baton));
1707         init_hunk = TRUE;
1708       }
1709     else if (output_baton->hunk_length[0] > 0
1710              || output_baton->hunk_length[1] > 0)
1711       {
1712         /* We extend the current hunk */
1713
1714
1715         /* Original: Output the context preceding the changed range */
1716         SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1717                                           svn_diff__file_output_unified_context,
1718                                           original_start));
1719       }
1720   }
1721
1722   /* Original: Skip lines until we are at the beginning of the context we want
1723      to display */
1724   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1725                                     svn_diff__file_output_unified_skip,
1726                                     original_start - context_prefix_length));
1727
1728   /* Note that the above skip stores data for the show_c_function support below */
1729
1730   if (init_hunk)
1731     {
1732       SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1733                      && output_baton->hunk_length[1] == 0);
1734
1735       output_baton->hunk_start[0] = original_start - context_prefix_length;
1736       output_baton->hunk_start[1] = modified_start - context_prefix_length;
1737     }
1738
1739   if (init_hunk && output_baton->show_c_function)
1740     {
1741       apr_size_t p;
1742       const char *invalid_character;
1743
1744       /* Save the extra context for later use.
1745        * Note that the last byte of the hunk_extra_context array is never
1746        * touched after it is zero-initialized, so the array is always
1747        * 0-terminated. */
1748       strncpy(output_baton->hunk_extra_context,
1749               output_baton->extra_context->data,
1750               SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1751       /* Trim whitespace at the end, most notably to get rid of any
1752        * newline characters. */
1753       p = strlen(output_baton->hunk_extra_context);
1754       while (p > 0
1755              && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1756         {
1757           output_baton->hunk_extra_context[--p] = '\0';
1758         }
1759       invalid_character =
1760         svn_utf__last_valid(output_baton->hunk_extra_context,
1761                             SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1762       for (p = invalid_character - output_baton->hunk_extra_context;
1763            p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1764         {
1765           output_baton->hunk_extra_context[p] = '\0';
1766         }
1767     }
1768
1769   /* Modified: Skip lines until we are at the start of the changed range */
1770   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1771                                     svn_diff__file_output_unified_skip,
1772                                     modified_start));
1773
1774   /* Original: Output the context preceding the changed range */
1775   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1776                                     svn_diff__file_output_unified_context,
1777                                     original_start));
1778
1779   /* Both: Output the changed range */
1780   SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1781                                     svn_diff__file_output_unified_delete,
1782                                     original_start + original_length));
1783   SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1784                                     svn_diff__file_output_unified_insert,
1785                                     modified_start + modified_length));
1786
1787   return SVN_NO_ERROR;
1788 }
1789
1790 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1791 static svn_error_t *
1792 output_unified_default_hdr(const char **header, const char *path,
1793                            apr_pool_t *pool)
1794 {
1795   apr_finfo_t file_info;
1796   apr_time_exp_t exploded_time;
1797   char time_buffer[64];
1798   apr_size_t time_len;
1799   const char *utf8_timestr;
1800
1801   SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1802   apr_time_exp_lt(&exploded_time, file_info.mtime);
1803
1804   apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1805   /* Order of date components can be different in different languages */
1806                _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1807
1808   SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1809
1810   *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1811
1812   return SVN_NO_ERROR;
1813 }
1814
1815 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1816 {
1817   NULL, /* output_common */
1818   output_unified_diff_modified,
1819   NULL, /* output_diff_latest */
1820   NULL, /* output_diff_common */
1821   NULL  /* output_conflict */
1822 };
1823
1824 svn_error_t *
1825 svn_diff_file_output_unified4(svn_stream_t *output_stream,
1826                               svn_diff_t *diff,
1827                               const char *original_path,
1828                               const char *modified_path,
1829                               const char *original_header,
1830                               const char *modified_header,
1831                               const char *header_encoding,
1832                               const char *relative_to_dir,
1833                               svn_boolean_t show_c_function,
1834                               int context_size,
1835                               svn_cancel_func_t cancel_func,
1836                               void *cancel_baton,
1837                               apr_pool_t *pool)
1838 {
1839   if (svn_diff_contains_diffs(diff))
1840     {
1841       svn_diff__file_output_baton_t baton;
1842       int i;
1843
1844       memset(&baton, 0, sizeof(baton));
1845       baton.output_stream = output_stream;
1846       baton.pool = pool;
1847       baton.header_encoding = header_encoding;
1848       baton.path[0] = original_path;
1849       baton.path[1] = modified_path;
1850       baton.hunk = svn_stringbuf_create_empty(pool);
1851       baton.show_c_function = show_c_function;
1852       baton.extra_context = svn_stringbuf_create_empty(pool);
1853       baton.context_size = (context_size >= 0) ? context_size
1854                                               : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1855
1856       if (show_c_function)
1857         {
1858           baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1859
1860           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1861           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1862           APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1863         }
1864
1865       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1866                                             header_encoding, pool));
1867       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1868                                             header_encoding, pool));
1869       SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1870                                             header_encoding, pool));
1871
1872       if (relative_to_dir)
1873         {
1874           /* Possibly adjust the "original" and "modified" paths shown in
1875              the output (see issue #2723). */
1876           const char *child_path;
1877
1878           if (! original_header)
1879             {
1880               child_path = svn_dirent_is_child(relative_to_dir,
1881                                                original_path, pool);
1882               if (child_path)
1883                 original_path = child_path;
1884               else
1885                 return svn_error_createf(
1886                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1887                                    _("Path '%s' must be inside "
1888                                      "the directory '%s'"),
1889                                    svn_dirent_local_style(original_path, pool),
1890                                    svn_dirent_local_style(relative_to_dir,
1891                                                           pool));
1892             }
1893
1894           if (! modified_header)
1895             {
1896               child_path = svn_dirent_is_child(relative_to_dir,
1897                                                modified_path, pool);
1898               if (child_path)
1899                 modified_path = child_path;
1900               else
1901                 return svn_error_createf(
1902                                    SVN_ERR_BAD_RELATIVE_PATH, NULL,
1903                                    _("Path '%s' must be inside "
1904                                      "the directory '%s'"),
1905                                    svn_dirent_local_style(modified_path, pool),
1906                                    svn_dirent_local_style(relative_to_dir,
1907                                                           pool));
1908             }
1909         }
1910
1911       for (i = 0; i < 2; i++)
1912         {
1913           SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1914                                    APR_READ, APR_OS_DEFAULT, pool));
1915         }
1916
1917       if (original_header == NULL)
1918         {
1919           SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1920                                              pool));
1921         }
1922
1923       if (modified_header == NULL)
1924         {
1925           SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1926                                              pool));
1927         }
1928
1929       SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1930                                              original_header, modified_header,
1931                                              pool));
1932
1933       SVN_ERR(svn_diff_output2(diff, &baton,
1934                                &svn_diff__file_output_unified_vtable,
1935                                cancel_func, cancel_baton));
1936       SVN_ERR(output_unified_flush_hunk(&baton));
1937
1938       for (i = 0; i < 2; i++)
1939         {
1940           SVN_ERR(svn_io_file_close(baton.file[i], pool));
1941         }
1942     }
1943
1944   return SVN_NO_ERROR;
1945 }
1946
1947 \f
1948 /** Display diff3 **/
1949
1950 /* A stream to remember *leading* context.  Note that this stream does
1951    *not* copy the data that it is remembering; it just saves
1952    *pointers! */
1953 typedef struct context_saver_t {
1954   svn_stream_t *stream;
1955   int context_size;
1956   const char **data; /* const char *data[context_size] */
1957   apr_size_t *len;   /* apr_size_t len[context_size] */
1958   apr_size_t next_slot;
1959   apr_size_t total_written;
1960 } context_saver_t;
1961
1962
1963 static svn_error_t *
1964 context_saver_stream_write(void *baton,
1965                            const char *data,
1966                            apr_size_t *len)
1967 {
1968   context_saver_t *cs = baton;
1969
1970   if (cs->context_size > 0)
1971     {
1972       cs->data[cs->next_slot] = data;
1973       cs->len[cs->next_slot] = *len;
1974       cs->next_slot = (cs->next_slot + 1) % cs->context_size;
1975       cs->total_written++;
1976     }
1977   return SVN_NO_ERROR;
1978 }
1979
1980 typedef struct svn_diff3__file_output_baton_t
1981 {
1982   svn_stream_t *output_stream;
1983
1984   const char *path[3];
1985
1986   apr_off_t   current_line[3];
1987
1988   char       *buffer[3];
1989   char       *endp[3];
1990   char       *curp[3];
1991
1992   /* The following four members are in the encoding used for the output. */
1993   const char *conflict_modified;
1994   const char *conflict_original;
1995   const char *conflict_separator;
1996   const char *conflict_latest;
1997
1998   const char *marker_eol;
1999
2000   svn_diff_conflict_display_style_t conflict_style;
2001   int context_size;
2002
2003   /* cancel support */
2004   svn_cancel_func_t cancel_func;
2005   void *cancel_baton;
2006
2007   /* The rest of the fields are for
2008      svn_diff_conflict_display_only_conflicts only.  Note that for
2009      these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2010      (soon after a conflict) a "trailing context stream", never the
2011      actual output stream.*/
2012   /* The actual output stream. */
2013   svn_stream_t *real_output_stream;
2014   context_saver_t *context_saver;
2015   /* Used to allocate context_saver and trailing context streams, and
2016      for some printfs. */
2017   apr_pool_t *pool;
2018 } svn_diff3__file_output_baton_t;
2019
2020 static svn_error_t *
2021 flush_context_saver(context_saver_t *cs,
2022                     svn_stream_t *output_stream)
2023 {
2024   int i;
2025   for (i = 0; i < cs->context_size; i++)
2026     {
2027       apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2028       if (cs->data[slot])
2029         {
2030           apr_size_t len = cs->len[slot];
2031           SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2032         }
2033     }
2034   return SVN_NO_ERROR;
2035 }
2036
2037 static void
2038 make_context_saver(svn_diff3__file_output_baton_t *fob)
2039 {
2040   context_saver_t *cs;
2041
2042   assert(fob->context_size > 0); /* Or nothing to save */
2043
2044   svn_pool_clear(fob->pool);
2045   cs = apr_pcalloc(fob->pool, sizeof(*cs));
2046   cs->stream = svn_stream_empty(fob->pool);
2047   svn_stream_set_baton(cs->stream, cs);
2048   svn_stream_set_write(cs->stream, context_saver_stream_write);
2049   fob->context_saver = cs;
2050   fob->output_stream = cs->stream;
2051   cs->context_size = fob->context_size;
2052   cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2053   cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2054 }
2055
2056
2057 /* A stream which prints LINES_TO_PRINT (based on context size) lines to
2058    BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2059    a context_saver; used for *trailing* context. */
2060
2061 struct trailing_context_printer {
2062   apr_size_t lines_to_print;
2063   svn_diff3__file_output_baton_t *fob;
2064 };
2065
2066
2067
2068 static svn_error_t *
2069 trailing_context_printer_write(void *baton,
2070                                const char *data,
2071                                apr_size_t *len)
2072 {
2073   struct trailing_context_printer *tcp = baton;
2074   SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2075   SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2076   tcp->lines_to_print--;
2077   if (tcp->lines_to_print == 0)
2078     make_context_saver(tcp->fob);
2079   return SVN_NO_ERROR;
2080 }
2081
2082
2083 static void
2084 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2085 {
2086   struct trailing_context_printer *tcp;
2087   svn_stream_t *s;
2088
2089   svn_pool_clear(btn->pool);
2090
2091   tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2092   tcp->lines_to_print = btn->context_size;
2093   tcp->fob = btn;
2094   s = svn_stream_empty(btn->pool);
2095   svn_stream_set_baton(s, tcp);
2096   svn_stream_set_write(s, trailing_context_printer_write);
2097   btn->output_stream = s;
2098 }
2099
2100
2101
2102 typedef enum svn_diff3__file_output_type_e
2103 {
2104   svn_diff3__file_output_skip,
2105   svn_diff3__file_output_normal
2106 } svn_diff3__file_output_type_e;
2107
2108
2109 static svn_error_t *
2110 output_line(svn_diff3__file_output_baton_t *baton,
2111             svn_diff3__file_output_type_e type, int idx)
2112 {
2113   char *curp;
2114   char *endp;
2115   char *eol;
2116   apr_size_t len;
2117
2118   curp = baton->curp[idx];
2119   endp = baton->endp[idx];
2120
2121   /* Lazily update the current line even if we're at EOF.
2122    */
2123   baton->current_line[idx]++;
2124
2125   if (curp == endp)
2126     return SVN_NO_ERROR;
2127
2128   eol = svn_eol__find_eol_start(curp, endp - curp);
2129   if (!eol)
2130     eol = endp;
2131   else
2132     {
2133       svn_boolean_t had_cr = (*eol == '\r');
2134       eol++;
2135       if (had_cr && eol != endp && *eol == '\n')
2136         eol++;
2137     }
2138
2139   if (type != svn_diff3__file_output_skip)
2140     {
2141       len = eol - curp;
2142       /* Note that the trailing context printer assumes that
2143          svn_stream_write is called exactly once per line. */
2144       SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2145     }
2146
2147   baton->curp[idx] = eol;
2148
2149   return SVN_NO_ERROR;
2150 }
2151
2152 static svn_error_t *
2153 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2154 {
2155   return svn_stream_puts(btn->output_stream, btn->marker_eol);
2156 }
2157
2158 static svn_error_t *
2159 output_hunk(void *baton, int idx, apr_off_t target_line,
2160             apr_off_t target_length)
2161 {
2162   svn_diff3__file_output_baton_t *output_baton = baton;
2163
2164   /* Skip lines until we are at the start of the changed range */
2165   while (output_baton->current_line[idx] < target_line)
2166     {
2167       SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2168     }
2169
2170   target_line += target_length;
2171
2172   while (output_baton->current_line[idx] < target_line)
2173     {
2174       SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2175     }
2176
2177   return SVN_NO_ERROR;
2178 }
2179
2180 static svn_error_t *
2181 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2182               apr_off_t modified_start, apr_off_t modified_length,
2183               apr_off_t latest_start, apr_off_t latest_length)
2184 {
2185   return output_hunk(baton, 1, modified_start, modified_length);
2186 }
2187
2188 static svn_error_t *
2189 output_diff_modified(void *baton,
2190                      apr_off_t original_start, apr_off_t original_length,
2191                      apr_off_t modified_start, apr_off_t modified_length,
2192                      apr_off_t latest_start, apr_off_t latest_length)
2193 {
2194   return output_hunk(baton, 1, modified_start, modified_length);
2195 }
2196
2197 static svn_error_t *
2198 output_diff_latest(void *baton,
2199                    apr_off_t original_start, apr_off_t original_length,
2200                    apr_off_t modified_start, apr_off_t modified_length,
2201                    apr_off_t latest_start, apr_off_t latest_length)
2202 {
2203   return output_hunk(baton, 2, latest_start, latest_length);
2204 }
2205
2206 static svn_error_t *
2207 output_conflict(void *baton,
2208                 apr_off_t original_start, apr_off_t original_length,
2209                 apr_off_t modified_start, apr_off_t modified_length,
2210                 apr_off_t latest_start, apr_off_t latest_length,
2211                 svn_diff_t *diff);
2212
2213 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2214 {
2215   output_common,
2216   output_diff_modified,
2217   output_diff_latest,
2218   output_diff_modified, /* output_diff_common */
2219   output_conflict
2220 };
2221
2222 static svn_error_t *
2223 output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2224                                     const char *label,
2225                                     apr_off_t start,
2226                                     apr_off_t length)
2227 {
2228   if (length == 1)
2229     SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2230                               "%s (%" APR_OFF_T_FMT ")",
2231                               label, start + 1));
2232   else
2233     SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2234                               "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2235                               label, start + 1, length));
2236
2237   SVN_ERR(output_marker_eol(btn));
2238
2239   return SVN_NO_ERROR;
2240 }
2241
2242 static svn_error_t *
2243 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2244                              apr_off_t original_start,
2245                              apr_off_t original_length,
2246                              apr_off_t modified_start,
2247                              apr_off_t modified_length,
2248                              apr_off_t latest_start,
2249                              apr_off_t latest_length)
2250 {
2251   /* Are we currently saving starting context (as opposed to printing
2252      trailing context)?  If so, flush it. */
2253   if (btn->output_stream == btn->context_saver->stream)
2254     {
2255       if (btn->context_saver->total_written > btn->context_size)
2256         SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2257       SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2258     }
2259
2260   /* Print to the real output stream. */
2261   btn->output_stream = btn->real_output_stream;
2262
2263   /* Output the conflict itself. */
2264   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2265                                               modified_start, modified_length));
2266   SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2267
2268   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2269                                               original_start, original_length));
2270   SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2271
2272   SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2273                             "%s%s", btn->conflict_separator, btn->marker_eol));
2274   SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2275   SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2276                                               latest_start, latest_length));
2277
2278   /* Go into print-trailing-context mode instead. */
2279   make_trailing_context_printer(btn);
2280
2281   return SVN_NO_ERROR;
2282 }
2283
2284
2285 static svn_error_t *
2286 output_conflict(void *baton,
2287                 apr_off_t original_start, apr_off_t original_length,
2288                 apr_off_t modified_start, apr_off_t modified_length,
2289                 apr_off_t latest_start, apr_off_t latest_length,
2290                 svn_diff_t *diff)
2291 {
2292   svn_diff3__file_output_baton_t *file_baton = baton;
2293
2294   svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2295
2296   if (style == svn_diff_conflict_display_only_conflicts)
2297     return output_conflict_with_context(file_baton,
2298                                         original_start, original_length,
2299                                         modified_start, modified_length,
2300                                         latest_start, latest_length);
2301
2302   if (style == svn_diff_conflict_display_resolved_modified_latest)
2303     {
2304       if (diff)
2305         return svn_diff_output2(diff, baton,
2306                                 &svn_diff3__file_output_vtable,
2307                                 file_baton->cancel_func,
2308                                 file_baton->cancel_baton);
2309       else
2310         style = svn_diff_conflict_display_modified_latest;
2311     }
2312
2313   if (style == svn_diff_conflict_display_modified_latest ||
2314       style == svn_diff_conflict_display_modified_original_latest)
2315     {
2316       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2317                                file_baton->conflict_modified));
2318       SVN_ERR(output_marker_eol(file_baton));
2319
2320       SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2321
2322       if (style == svn_diff_conflict_display_modified_original_latest)
2323         {
2324           SVN_ERR(svn_stream_puts(file_baton->output_stream,
2325                                    file_baton->conflict_original));
2326           SVN_ERR(output_marker_eol(file_baton));
2327           SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2328         }
2329
2330       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2331                               file_baton->conflict_separator));
2332       SVN_ERR(output_marker_eol(file_baton));
2333
2334       SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2335
2336       SVN_ERR(svn_stream_puts(file_baton->output_stream,
2337                               file_baton->conflict_latest));
2338       SVN_ERR(output_marker_eol(file_baton));
2339     }
2340   else if (style == svn_diff_conflict_display_modified)
2341     SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2342   else if (style == svn_diff_conflict_display_latest)
2343     SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2344   else /* unknown style */
2345     SVN_ERR_MALFUNCTION();
2346
2347   return SVN_NO_ERROR;
2348 }
2349
2350 svn_error_t *
2351 svn_diff_file_output_merge3(svn_stream_t *output_stream,
2352                             svn_diff_t *diff,
2353                             const char *original_path,
2354                             const char *modified_path,
2355                             const char *latest_path,
2356                             const char *conflict_original,
2357                             const char *conflict_modified,
2358                             const char *conflict_latest,
2359                             const char *conflict_separator,
2360                             svn_diff_conflict_display_style_t style,
2361                             svn_cancel_func_t cancel_func,
2362                             void *cancel_baton,
2363                             apr_pool_t *scratch_pool)
2364 {
2365   svn_diff3__file_output_baton_t baton;
2366   apr_file_t *file[3];
2367   int idx;
2368 #if APR_HAS_MMAP
2369   apr_mmap_t *mm[3] = { 0 };
2370 #endif /* APR_HAS_MMAP */
2371   const char *eol;
2372   svn_boolean_t conflicts_only =
2373     (style == svn_diff_conflict_display_only_conflicts);
2374
2375   memset(&baton, 0, sizeof(baton));
2376   baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2377   if (conflicts_only)
2378     {
2379       baton.pool = svn_pool_create(scratch_pool);
2380       make_context_saver(&baton);
2381       baton.real_output_stream = output_stream;
2382     }
2383   else
2384     baton.output_stream = output_stream;
2385   baton.path[0] = original_path;
2386   baton.path[1] = modified_path;
2387   baton.path[2] = latest_path;
2388   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2389                                     conflict_modified ? conflict_modified
2390                                     : apr_psprintf(scratch_pool, "<<<<<<< %s",
2391                                                    modified_path),
2392                                     scratch_pool));
2393   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2394                                     conflict_original ? conflict_original
2395                                     : apr_psprintf(scratch_pool, "||||||| %s",
2396                                                    original_path),
2397                                     scratch_pool));
2398   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2399                                     conflict_separator ? conflict_separator
2400                                     : "=======", scratch_pool));
2401   SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2402                                     conflict_latest ? conflict_latest
2403                                     : apr_psprintf(scratch_pool, ">>>>>>> %s",
2404                                                    latest_path),
2405                                     scratch_pool));
2406
2407   baton.conflict_style = style;
2408
2409   for (idx = 0; idx < 3; idx++)
2410     {
2411       apr_size_t size;
2412
2413       SVN_ERR(map_or_read_file(&file[idx],
2414                                MMAP_T_ARG(mm[idx])
2415                                &baton.buffer[idx], &size,
2416                                baton.path[idx], scratch_pool));
2417
2418       baton.curp[idx] = baton.buffer[idx];
2419       baton.endp[idx] = baton.buffer[idx];
2420
2421       if (baton.endp[idx])
2422         baton.endp[idx] += size;
2423     }
2424
2425   /* Check what eol marker we should use for conflict markers.
2426      We use the eol marker of the modified file and fall back on the
2427      platform's eol marker if that file doesn't contain any newlines. */
2428   eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2429                             NULL);
2430   if (! eol)
2431     eol = APR_EOL_STR;
2432   baton.marker_eol = eol;
2433
2434   baton.cancel_func = cancel_func;
2435   baton.cancel_baton = cancel_baton;
2436
2437   SVN_ERR(svn_diff_output2(diff, &baton,
2438                           &svn_diff3__file_output_vtable,
2439                           cancel_func, cancel_baton));
2440
2441   for (idx = 0; idx < 3; idx++)
2442     {
2443 #if APR_HAS_MMAP
2444       if (mm[idx])
2445         {
2446           apr_status_t rv = apr_mmap_delete(mm[idx]);
2447           if (rv != APR_SUCCESS)
2448             {
2449               return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2450                                         baton.path[idx]);
2451             }
2452         }
2453 #endif /* APR_HAS_MMAP */
2454
2455       if (file[idx])
2456         {
2457           SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2458         }
2459     }
2460
2461   if (conflicts_only)
2462     svn_pool_destroy(baton.pool);
2463
2464   return SVN_NO_ERROR;
2465 }
2466