2 * diff_file.c : routines for doing diffs on files
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
26 #include <apr_pools.h>
27 #include <apr_general.h>
28 #include <apr_file_io.h>
29 #include <apr_file_info.h>
32 #include <apr_getopt.h>
34 #include "svn_error.h"
36 #include "svn_types.h"
37 #include "svn_string.h"
38 #include "svn_subst.h"
41 #include "svn_pools.h"
43 #include "svn_private_config.h"
45 #include "svn_ctype.h"
47 #include "private/svn_utf_private.h"
48 #include "private/svn_eol_private.h"
49 #include "private/svn_dep_compat.h"
50 #include "private/svn_adler32.h"
51 #include "private/svn_diff_private.h"
53 /* A token, i.e. a line read from a file. */
54 typedef struct svn_diff__file_token_t
56 /* Next token in free list. */
57 struct svn_diff__file_token_t *next;
58 svn_diff_datasource_e datasource;
59 /* Offset in the datasource. */
61 /* Offset of the normalized token (may skip leading whitespace) */
62 apr_off_t norm_offset;
63 /* Total length - before normalization. */
65 /* Total length - after normalization. */
67 } svn_diff__file_token_t;
70 typedef struct svn_diff__file_baton_t
72 const svn_diff_file_options_t *options;
75 const char *path; /* path to this file, absolute or relative to CWD */
77 /* All the following fields are active while this datasource is open */
78 apr_file_t *file; /* handle of this file */
79 apr_off_t size; /* total raw size in bytes of this file */
81 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
82 int chunk; /* the current chunk number, zero-based */
83 char *buffer; /* a buffer containing the current chunk */
84 char *curp; /* current position in the current chunk */
85 char *endp; /* next memory address after the current chunk */
87 svn_diff__normalize_state_t normalize_state;
89 /* Where the identical suffix starts in this datasource */
90 int suffix_start_chunk;
91 apr_off_t suffix_offset_in_chunk;
94 /* List of free tokens that may be reused. */
95 svn_diff__file_token_t *tokens;
98 } svn_diff__file_baton_t;
101 datasource_to_index(svn_diff_datasource_e datasource)
105 case svn_diff_datasource_original:
108 case svn_diff_datasource_modified:
111 case svn_diff_datasource_latest:
114 case svn_diff_datasource_ancestor:
121 /* Files are read in chunks of 128k. There is no support for this number
122 * whatsoever. If there is a number someone comes up with that has some
123 * argumentation, let's use that.
125 /* If you change this number, update test_norm_offset(),
126 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c.
128 #define CHUNK_SHIFT 17
129 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
131 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
132 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
133 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
136 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
137 * *LENGTH. The actual bytes read are stored in *LENGTH on return.
139 static APR_INLINE svn_error_t *
140 read_chunk(apr_file_t *file, const char *path,
141 char *buffer, apr_off_t length,
142 apr_off_t offset, apr_pool_t *pool)
144 /* XXX: The final offset may not be the one we asked for.
147 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool));
148 return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
153 /* Map or read a file at PATH. *BUFFER will point to the file
154 * contents; if the file was mapped, *FILE and *MM will contain the
155 * mmap context; otherwise they will be NULL. SIZE will contain the
156 * file size. Allocate from POOL.
159 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
160 #define MMAP_T_ARG(NAME) &(NAME),
162 #define MMAP_T_PARAM(NAME)
163 #define MMAP_T_ARG(NAME)
167 map_or_read_file(apr_file_t **file,
169 char **buffer, apr_size_t *size_p,
170 const char *path, apr_pool_t *pool)
178 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
179 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
181 if (finfo.size > APR_SIZE_MAX)
183 return svn_error_createf(APR_ENOMEM, NULL,
184 _("File '%s' is too large to be read in "
188 size = (apr_size_t) finfo.size;
190 if (size > APR_MMAP_THRESHOLD)
192 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
193 if (rv == APR_SUCCESS)
199 /* Clear *MM because output parameters are undefined on error. */
203 /* On failure we just fall through and try reading the file into
207 #endif /* APR_HAS_MMAP */
209 if (*buffer == NULL && size > 0)
211 *buffer = apr_palloc(pool, size);
213 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
215 /* Since we have the entire contents of the file we can
218 SVN_ERR(svn_io_file_close(*file, pool));
229 /* For all files in the FILE array, increment the curp pointer. If a file
230 * points before the beginning of file, let it point at the first byte again.
231 * If the end of the current chunk is reached, read the next chunk in the
232 * buffer and point curp to the start of the chunk. If EOF is reached, set
233 * curp equal to endp to indicate EOF. */
234 #define INCREMENT_POINTERS(all_files, files_len, pool) \
236 apr_size_t svn_macro__i; \
238 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
240 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
241 (all_files)[svn_macro__i].curp++; \
243 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \
248 /* For all files in the FILE array, decrement the curp pointer. If the
249 * start of a chunk is reached, read the previous chunk in the buffer and
250 * point curp to the last byte of the chunk. If the beginning of a FILE is
251 * reached, set chunk to -1 to indicate BOF. */
252 #define DECREMENT_POINTERS(all_files, files_len, pool) \
254 apr_size_t svn_macro__i; \
256 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
258 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
259 (all_files)[svn_macro__i].curp--; \
261 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \
267 increment_chunk(struct file_info *file, apr_pool_t *pool)
270 apr_off_t last_chunk = offset_to_chunk(file->size);
272 if (file->chunk == -1)
274 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
276 file->curp = file->buffer;
278 else if (file->chunk == last_chunk)
280 /* We are at the last chunk. Indicate EOF by setting curp == endp. */
281 file->curp = file->endp;
285 /* There are still chunks left. Read next chunk and reset pointers. */
287 length = file->chunk == last_chunk ?
288 offset_in_chunk(file->size) : CHUNK_SIZE;
289 SVN_ERR(read_chunk(file->file, file->path, file->buffer,
290 length, chunk_to_offset(file->chunk),
292 file->endp = file->buffer + length;
293 file->curp = file->buffer;
301 decrement_chunk(struct file_info *file, apr_pool_t *pool)
303 if (file->chunk == 0)
305 /* We are already at the first chunk. Indicate BOF (Beginning Of File)
306 by setting chunk = -1 and curp = endp - 1. Both conditions are
307 important. They help the increment step to catch the BOF situation
308 in an efficient way. */
310 file->curp = file->endp - 1;
314 /* Read previous chunk and reset pointers. */
316 SVN_ERR(read_chunk(file->file, file->path, file->buffer,
317 CHUNK_SIZE, chunk_to_offset(file->chunk),
319 file->endp = file->buffer + CHUNK_SIZE;
320 file->curp = file->endp - 1;
327 /* Check whether one of the FILEs has its pointers 'before' the beginning of
328 * the file (this can happen while scanning backwards). This is the case if
329 * one of them has chunk == -1. */
331 is_one_at_bof(struct file_info file[], apr_size_t file_len)
335 for (i = 0; i < file_len; i++)
336 if (file[i].chunk == -1)
342 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
343 * one of them has curp == endp (this can only happen at the last chunk)) */
345 is_one_at_eof(struct file_info file[], apr_size_t file_len)
349 for (i = 0; i < file_len; i++)
350 if (file[i].curp == file[i].endp)
356 /* Quickly determine whether there is a eol char in CHUNK.
357 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
360 #if SVN_UNALIGNED_ACCESS_IS_OK
361 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
363 apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
364 apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
366 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
367 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
373 /* Find the prefix which is identical between all elements of the FILE array.
374 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
375 * set to TRUE if one of the FILEs reached its end while scanning prefix,
376 * i.e. at least one file consisted entirely of prefix. Otherwise,
377 * REACHED_ONE_EOF is set to FALSE.
379 * After this function is finished, the buffers, chunks, curp's and endp's
380 * of the FILEs are set to point at the first byte after the prefix. */
382 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
383 struct file_info file[], apr_size_t file_len,
386 svn_boolean_t had_cr = FALSE;
387 svn_boolean_t is_match;
391 *reached_one_eof = FALSE;
393 for (i = 1, is_match = TRUE; i < file_len; i++)
394 is_match = is_match && *file[0].curp == *file[i].curp;
397 #if SVN_UNALIGNED_ACCESS_IS_OK
398 apr_ssize_t max_delta, delta;
399 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
401 /* ### TODO: see if we can take advantage of
402 diff options like ignore_eol_style or ignore_space. */
403 /* check for eol, and count */
404 if (*file[0].curp == '\r')
409 else if (*file[0].curp == '\n' && !had_cr)
418 INCREMENT_POINTERS(file, file_len, pool);
420 #if SVN_UNALIGNED_ACCESS_IS_OK
422 /* Try to advance as far as possible with machine-word granularity.
423 * Determine how far we may advance with chunky ops without reaching
424 * endp for any of the files.
425 * Signedness is important here if curp gets close to endp.
427 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
428 for (i = 1; i < file_len; i++)
430 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
431 if (delta < max_delta)
436 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
438 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
439 if (contains_eol(chunk))
442 for (i = 1; i < file_len; i++)
443 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
455 /* We either found a mismatch or an EOL at or shortly behind curp+delta
456 * or we cannot proceed with chunky ops without exceeding endp.
457 * In any way, everything up to curp + delta is equal and not an EOL.
459 for (i = 0; i < file_len; i++)
460 file[i].curp += delta;
462 /* Skipped data without EOL markers, so last char was not a CR. */
467 *reached_one_eof = is_one_at_eof(file, file_len);
468 if (*reached_one_eof)
471 for (i = 1, is_match = TRUE; i < file_len; i++)
472 is_match = is_match && *file[0].curp == *file[i].curp;
477 /* Check if we ended in the middle of a \r\n for one file, but \r for
478 another. If so, back up one byte, so the next loop will back up
479 the entire line. Also decrement lines, since we counted one
480 too many for the \r. */
481 svn_boolean_t ended_at_nonmatching_newline = FALSE;
482 for (i = 0; i < file_len; i++)
483 if (file[i].curp < file[i].endp)
484 ended_at_nonmatching_newline = ended_at_nonmatching_newline
485 || *file[i].curp == '\n';
486 if (ended_at_nonmatching_newline)
489 DECREMENT_POINTERS(file, file_len, pool);
493 /* Back up one byte, so we point at the last identical byte */
494 DECREMENT_POINTERS(file, file_len, pool);
496 /* Back up to the last eol sequence (\n, \r\n or \r) */
497 while (!is_one_at_bof(file, file_len) &&
498 *file[0].curp != '\n' && *file[0].curp != '\r')
499 DECREMENT_POINTERS(file, file_len, pool);
501 /* Slide one byte forward, to point past the eol sequence */
502 INCREMENT_POINTERS(file, file_len, pool);
504 *prefix_lines = lines;
510 /* The number of identical suffix lines to keep with the middle section. These
511 * lines are not eliminated as suffix, and can be picked up by the token
512 * parsing and lcs steps. This is mainly for backward compatibility with
513 * the previous diff (and blame) output (if there are multiple diff solutions,
514 * our lcs algorithm prefers taking common lines from the start, rather than
515 * from the end. By giving it back some suffix lines, we give it some wiggle
516 * room to find the exact same diff as before).
518 * The number 50 is more or less arbitrary, based on some real-world tests
519 * with big files (and then doubling the required number to be on the safe
520 * side). This has a negligible effect on the power of the optimization. */
521 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
522 #ifndef SUFFIX_LINES_TO_KEEP
523 #define SUFFIX_LINES_TO_KEEP 50
526 /* Find the suffix which is identical between all elements of the FILE array.
527 * Return the number of suffix lines in SUFFIX_LINES.
529 * Before this function is called the FILEs' pointers and chunks should be
530 * positioned right after the identical prefix (which is the case after
531 * find_identical_prefix), so we can determine where suffix scanning should
532 * ultimately stop. */
534 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
535 apr_size_t file_len, apr_pool_t *pool)
537 struct file_info file_for_suffix[4] = { { 0 } };
539 apr_off_t suffix_min_chunk0;
540 apr_off_t suffix_min_offset0;
541 apr_off_t min_file_size;
542 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
543 svn_boolean_t is_match;
545 svn_boolean_t had_cr;
546 svn_boolean_t had_nl;
549 /* Initialize file_for_suffix[].
550 Read last chunk, position curp at last byte. */
551 for (i = 0; i < file_len; i++)
553 file_for_suffix[i].path = file[i].path;
554 file_for_suffix[i].file = file[i].file;
555 file_for_suffix[i].size = file[i].size;
556 file_for_suffix[i].chunk =
557 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
558 length[i] = offset_in_chunk(file_for_suffix[i].size);
561 /* last chunk is an empty chunk -> start at next-to-last chunk */
562 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
563 length[i] = CHUNK_SIZE;
566 if (file_for_suffix[i].chunk == file[i].chunk)
568 /* Prefix ended in last chunk, so we can reuse the prefix buffer */
569 file_for_suffix[i].buffer = file[i].buffer;
573 /* There is at least more than 1 chunk,
574 so allocate full chunk size buffer */
575 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
576 SVN_ERR(read_chunk(file_for_suffix[i].file, file_for_suffix[i].path,
577 file_for_suffix[i].buffer, length[i],
578 chunk_to_offset(file_for_suffix[i].chunk),
581 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
582 file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
585 /* Get the chunk and pointer offset (for file[0]) at which we should stop
586 scanning backward for the identical suffix, i.e. when we reach prefix. */
587 suffix_min_chunk0 = file[0].chunk;
588 suffix_min_offset0 = file[0].curp - file[0].buffer;
590 /* Compensate if other files are smaller than file[0] */
591 for (i = 1, min_file_size = file[0].size; i < file_len; i++)
592 if (file[i].size < min_file_size)
593 min_file_size = file[i].size;
594 if (file[0].size > min_file_size)
596 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
597 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
600 /* Scan backwards until mismatch or until we reach the prefix. */
601 for (i = 1, is_match = TRUE; i < file_len; i++)
603 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
604 if (is_match && *file_for_suffix[0].curp != '\r'
605 && *file_for_suffix[0].curp != '\n')
606 /* Count an extra line for the last line not ending in an eol. */
612 svn_boolean_t reached_prefix;
613 #if SVN_UNALIGNED_ACCESS_IS_OK
614 /* Initialize the minimum pointer positions. */
615 const char *min_curp[4];
616 svn_boolean_t can_read_word;
617 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
619 /* ### TODO: see if we can take advantage of
620 diff options like ignore_eol_style or ignore_space. */
621 /* check for eol, and count */
622 if (*file_for_suffix[0].curp == '\n')
627 else if (*file_for_suffix[0].curp == '\r' && !had_nl)
636 DECREMENT_POINTERS(file_for_suffix, file_len, pool);
638 #if SVN_UNALIGNED_ACCESS_IS_OK
639 for (i = 0; i < file_len; i++)
640 min_curp[i] = file_for_suffix[i].buffer;
642 /* If we are in the same chunk that contains the last part of the common
643 prefix, use the min_curp[0] pointer to make sure we don't get a
644 suffix that overlaps the already determined common prefix. */
645 if (file_for_suffix[0].chunk == suffix_min_chunk0)
646 min_curp[0] += suffix_min_offset0;
648 /* Scan quickly by reading with machine-word granularity. */
649 for (i = 0, can_read_word = TRUE; i < file_len; i++)
650 can_read_word = can_read_word
651 && ( (file_for_suffix[i].curp + 1
652 - sizeof(apr_uintptr_t))
654 while (can_read_word)
658 /* For each file curp is positioned at the current byte, but we
659 want to examine the current byte and the ones before the current
660 location as one machine word. */
662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663 - sizeof(apr_uintptr_t));
664 if (contains_eol(chunk))
667 for (i = 1, is_match = TRUE; i < file_len; i++)
670 == *(const apr_uintptr_t *)
671 (file_for_suffix[i].curp + 1
672 - sizeof(apr_uintptr_t)));
677 for (i = 0; i < file_len; i++)
679 file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
680 can_read_word = can_read_word
681 && ( (file_for_suffix[i].curp + 1
682 - sizeof(apr_uintptr_t))
686 /* We skipped some bytes, so there are no closing EOLs */
691 /* The > min_curp[i] check leaves at least one final byte for checking
692 in the non block optimized case below. */
695 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
696 && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
697 == suffix_min_offset0;
698 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
702 for (i = 1; i < file_len; i++)
704 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
707 /* Slide one byte forward, to point at the first byte of identical suffix */
708 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
710 /* Slide forward until we find an eol sequence to add the rest of the line
711 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
712 one file reaches its end. */
716 while (!is_one_at_eof(file_for_suffix, file_len)
717 && *file_for_suffix[0].curp != '\n'
718 && *file_for_suffix[0].curp != '\r')
719 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
721 /* Slide one or two more bytes, to point past the eol. */
722 if (!is_one_at_eof(file_for_suffix, file_len)
723 && *file_for_suffix[0].curp == '\r')
727 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
729 if (!is_one_at_eof(file_for_suffix, file_len)
730 && *file_for_suffix[0].curp == '\n')
734 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
737 while (!is_one_at_eof(file_for_suffix, file_len)
738 && suffix_lines_to_keep--);
740 if (is_one_at_eof(file_for_suffix, file_len))
743 /* Save the final suffix information in the original file_info */
744 for (i = 0; i < file_len; i++)
746 file[i].suffix_start_chunk = file_for_suffix[i].chunk;
747 file[i].suffix_offset_in_chunk =
748 file_for_suffix[i].curp - file_for_suffix[i].buffer;
751 *suffix_lines = lines;
757 /* Let FILE stand for the array of file_info struct elements of BATON->files
758 * that are indexed by the elements of the DATASOURCE array.
759 * BATON's type is (svn_diff__file_baton_t *).
761 * For each file in the FILE array, open the file at FILE.path; initialize
762 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
763 * buffer and read the first chunk. Then find the prefix and suffix lines
764 * which are identical between all the files. Return the number of identical
765 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
768 * Finding the identical prefix and suffix allows us to exclude those from the
769 * rest of the diff algorithm, which increases performance by reducing the
772 * Implements svn_diff_fns2_t::datasources_open. */
774 datasources_open(void *baton,
775 apr_off_t *prefix_lines,
776 apr_off_t *suffix_lines,
777 const svn_diff_datasource_e *datasources,
778 apr_size_t datasources_len)
780 svn_diff__file_baton_t *file_baton = baton;
781 struct file_info files[4];
782 apr_finfo_t finfo[4];
784 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
785 svn_boolean_t reached_one_eof;
789 /* Make sure prefix_lines and suffix_lines are set correctly, even if we
790 * exit early because one of the files is empty. */
794 /* Open datasources and read first chunk */
795 for (i = 0; i < datasources_len; i++)
797 struct file_info *file
798 = &file_baton->files[datasource_to_index(datasources[i])];
799 SVN_ERR(svn_io_file_open(&file->file, file->path,
800 APR_READ, APR_OS_DEFAULT, file_baton->pool));
801 SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
802 file->file, file_baton->pool));
803 file->size = finfo[i].size;
804 length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
805 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
806 SVN_ERR(read_chunk(file->file, file->path, file->buffer,
807 length[i], 0, file_baton->pool));
808 file->endp = file->buffer + length[i];
809 file->curp = file->buffer;
810 /* Set suffix_start_chunk to a guard value, so if suffix scanning is
811 * skipped because one of the files is empty, or because of
812 * reached_one_eof, we can still easily check for the suffix during
813 * token reading (datasource_get_next_token). */
814 file->suffix_start_chunk = -1;
819 for (i = 0; i < datasources_len; i++)
821 /* There will not be any identical prefix/suffix, so we're done. */
824 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
826 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
827 files, datasources_len, file_baton->pool));
829 if (!reached_one_eof)
830 /* No file consisted totally of identical prefix,
831 * so there may be some identical suffix. */
832 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
837 /* Copy local results back to baton. */
838 for (i = 0; i < datasources_len; i++)
839 file_baton->files[datasource_to_index(datasources[i])] = files[i];
845 /* Implements svn_diff_fns2_t::datasource_close */
847 datasource_close(void *baton, svn_diff_datasource_e datasource)
849 /* Do nothing. The compare_token function needs previous datasources
850 * to stay available until all datasources are processed.
856 /* Implements svn_diff_fns2_t::datasource_get_next_token */
858 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
859 svn_diff_datasource_e datasource)
861 svn_diff__file_baton_t *file_baton = baton;
862 svn_diff__file_token_t *file_token;
863 struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
867 apr_off_t last_chunk;
870 /* Did the last chunk end in a CR character? */
871 svn_boolean_t had_cr = FALSE;
878 last_chunk = offset_to_chunk(file->size);
880 /* Are we already at the end of a chunk? */
884 if (last_chunk == file->chunk)
885 return SVN_NO_ERROR; /* EOF */
887 /* Or right before an identical suffix in the next chunk? */
888 if (file->chunk + 1 == file->suffix_start_chunk
889 && file->suffix_offset_in_chunk == 0)
893 /* Stop when we encounter the identical suffix. If suffix scanning was not
894 * performed, suffix_start_chunk will be -1, so this condition will never
896 if (file->chunk == file->suffix_start_chunk
897 && (curp - file->buffer) == file->suffix_offset_in_chunk)
900 /* Allocate a new token, or fetch one from the "reusable tokens" list. */
901 file_token = file_baton->tokens;
904 file_baton->tokens = file_token->next;
908 file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
911 file_token->datasource = datasource;
912 file_token->offset = chunk_to_offset(file->chunk)
913 + (curp - file->buffer);
914 file_token->norm_offset = file_token->offset;
915 file_token->raw_length = 0;
916 file_token->length = 0;
920 eol = svn_eol__find_eol_start(curp, endp - curp);
923 had_cr = (*eol == '\r');
925 /* If we have the whole eol sequence in the chunk... */
926 if (!(had_cr && eol == endp))
928 /* Also skip past the '\n' in an '\r\n' sequence. */
929 if (had_cr && *eol == '\n')
935 if (file->chunk == last_chunk)
941 length = endp - curp;
942 file_token->raw_length += length;
946 svn_diff__normalize_buffer(&c, &length,
947 &file->normalize_state,
948 curp, file_baton->options);
949 if (file_token->length == 0)
951 /* When we are reading the first part of the token, move the
952 normalized offset past leading ignored characters, if any. */
953 file_token->norm_offset += (c - curp);
955 file_token->length += length;
956 h = svn__adler32(h, c, length);
959 curp = endp = file->buffer;
961 length = file->chunk == last_chunk ?
962 offset_in_chunk(file->size) : CHUNK_SIZE;
966 /* Issue #4283: Normally we should have checked for reaching the skipped
967 suffix here, but because we assume that a suffix always starts on a
968 line and token boundary we rely on catching the suffix earlier in this
971 When changing things here, make sure the whitespace settings are
972 applied, or we mught not reach the exact suffix boundary as token
974 SVN_ERR(read_chunk(file->file, file->path,
976 chunk_to_offset(file->chunk),
979 /* If the last chunk ended in a CR, we're done. */
990 file_token->raw_length += length;
993 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
994 * with a spurious empty token. Avoid returning it.
995 * Note that we use the unnormalized length; we don't want a line containing
996 * only spaces (and no trailing newline) to appear like a non-existent
998 if (file_token->raw_length > 0)
1001 svn_diff__normalize_buffer(&c, &length,
1002 &file->normalize_state,
1003 curp, file_baton->options);
1004 if (file_token->length == 0)
1006 /* When we are reading the first part of the token, move the
1007 normalized offset past leading ignored characters, if any. */
1008 file_token->norm_offset += (c - curp);
1011 file_token->length += length;
1013 *hash = svn__adler32(h, c, length);
1014 *token = file_token;
1017 return SVN_NO_ERROR;
1020 #define COMPARE_CHUNK_SIZE 4096
1022 /* Implements svn_diff_fns2_t::token_compare */
1023 static svn_error_t *
1024 token_compare(void *baton, void *token1, void *token2, int *compare)
1026 svn_diff__file_baton_t *file_baton = baton;
1027 svn_diff__file_token_t *file_token[2];
1028 char buffer[2][COMPARE_CHUNK_SIZE];
1030 apr_off_t offset[2];
1031 struct file_info *file[2];
1032 apr_off_t length[2];
1033 apr_off_t total_length;
1034 /* How much is left to read of each token from the file. */
1035 apr_off_t raw_length[2];
1037 svn_diff__normalize_state_t state[2];
1039 file_token[0] = token1;
1040 file_token[1] = token2;
1041 if (file_token[0]->length < file_token[1]->length)
1044 return SVN_NO_ERROR;
1047 if (file_token[0]->length > file_token[1]->length)
1050 return SVN_NO_ERROR;
1053 total_length = file_token[0]->length;
1054 if (total_length == 0)
1057 return SVN_NO_ERROR;
1060 for (i = 0; i < 2; ++i)
1062 int idx = datasource_to_index(file_token[i]->datasource);
1064 file[i] = &file_baton->files[idx];
1065 offset[i] = file_token[i]->norm_offset;
1066 state[i] = svn_diff__normalize_state_normal;
1068 if (offset_to_chunk(offset[i]) == file[i]->chunk)
1070 /* If the start of the token is in memory, the entire token is
1073 bufp[i] = file[i]->buffer;
1074 bufp[i] += offset_in_chunk(offset[i]);
1076 length[i] = total_length;
1085 /* When we skipped the first part of the token via the whitespace
1086 normalization we must reduce the raw length of the token */
1087 skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1089 raw_length[i] = file_token[i]->raw_length - skipped;
1096 for (i = 0; i < 2; i++)
1100 /* Error if raw_length is 0, that's an unexpected change
1101 * of the file that can happen when ingoring whitespace
1102 * and that can lead to an infinite loop. */
1103 if (raw_length[i] == 0)
1104 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1106 _("The file '%s' changed unexpectedly"
1110 /* Read a chunk from disk into a buffer */
1111 bufp[i] = buffer[i];
1112 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1113 COMPARE_CHUNK_SIZE : raw_length[i];
1115 SVN_ERR(read_chunk(file[i]->file,
1117 bufp[i], length[i], offset[i],
1119 offset[i] += length[i];
1120 raw_length[i] -= length[i];
1121 /* bufp[i] gets reset to buffer[i] before reading each chunk,
1122 so, overwriting it isn't a problem */
1123 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1124 bufp[i], file_baton->options);
1126 /* assert(length[i] == file_token[i]->length); */
1130 len = length[0] > length[1] ? length[1] : length[0];
1132 /* Compare two chunks (that could be entire tokens if they both reside
1135 *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1137 return SVN_NO_ERROR;
1139 total_length -= len;
1145 while(total_length > 0);
1148 return SVN_NO_ERROR;
1152 /* Implements svn_diff_fns2_t::token_discard */
1154 token_discard(void *baton, void *token)
1156 svn_diff__file_baton_t *file_baton = baton;
1157 svn_diff__file_token_t *file_token = token;
1159 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1160 file_token->next = file_baton->tokens;
1161 file_baton->tokens = file_token;
1165 /* Implements svn_diff_fns2_t::token_discard_all */
1167 token_discard_all(void *baton)
1169 svn_diff__file_baton_t *file_baton = baton;
1171 /* Discard all memory in use by the tokens, and close all open files. */
1172 svn_pool_clear(file_baton->pool);
1176 static const svn_diff_fns2_t svn_diff__file_vtable =
1180 datasource_get_next_token,
1186 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1187 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1189 /* Options supported by svn_diff_file_options_parse(). */
1190 static const apr_getopt_option_t diff_options[] =
1192 { "ignore-space-change", 'b', 0, NULL },
1193 { "ignore-all-space", 'w', 0, NULL },
1194 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1195 { "show-c-function", 'p', 0, NULL },
1196 /* ### For compatibility; we don't support the argument to -u, because
1197 * ### we don't have optional argument support. */
1198 { "unified", 'u', 0, NULL },
1199 { NULL, 0, 0, NULL }
1202 svn_diff_file_options_t *
1203 svn_diff_file_options_create(apr_pool_t *pool)
1205 return apr_pcalloc(pool, sizeof(svn_diff_file_options_t));
1208 /* A baton for use with opt_parsing_error_func(). */
1209 struct opt_parsing_error_baton_t
1215 /* Store an error message from apr_getopt_long(). Set BATON->err to a new
1216 * error with a message generated from FMT and the remaining arguments.
1217 * Implements apr_getopt_err_fn_t. */
1219 opt_parsing_error_func(void *baton,
1220 const char *fmt, ...)
1222 struct opt_parsing_error_baton_t *b = baton;
1223 const char *message;
1227 message = apr_pvsprintf(b->pool, fmt, ap);
1230 /* Skip leading ": " (if present, which it always is in known cases). */
1231 if (strncmp(message, ": ", 2) == 0)
1234 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1238 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1239 const apr_array_header_t *args,
1243 struct opt_parsing_error_baton_t opt_parsing_error_baton;
1244 /* Make room for each option (starting at index 1) plus trailing NULL. */
1245 const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1247 opt_parsing_error_baton.err = NULL;
1248 opt_parsing_error_baton.pool = pool;
1251 memcpy((void *) (argv + 1), args->elts, sizeof(char*) * args->nelts);
1252 argv[args->nelts + 1] = NULL;
1254 apr_getopt_init(&os, pool, args->nelts + 1, argv);
1256 /* Capture any error message from apr_getopt_long(). This will typically
1257 * say which option is wrong, which we would not otherwise know. */
1258 os->errfn = opt_parsing_error_func;
1259 os->errarg = &opt_parsing_error_baton;
1263 const char *opt_arg;
1265 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1267 if (APR_STATUS_IS_EOF(err))
1270 /* Wrap apr_getopt_long()'s error message. Its doc string implies
1271 * it always will produce one, but never mind if it doesn't. Avoid
1272 * using the message associated with the return code ERR, because
1273 * it refers to the "command line" which may be misleading here. */
1274 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1275 opt_parsing_error_baton.err,
1276 _("Error in options to internal diff"));
1281 /* -w takes precedence over -b. */
1282 if (! options->ignore_space)
1283 options->ignore_space = svn_diff_file_ignore_space_change;
1286 options->ignore_space = svn_diff_file_ignore_space_all;
1288 case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1289 options->ignore_eol_style = TRUE;
1292 options->show_c_function = TRUE;
1299 /* Check for spurious arguments. */
1300 if (os->ind < os->argc)
1301 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1302 _("Invalid argument '%s' in diff options"),
1305 return SVN_NO_ERROR;
1309 svn_diff_file_diff_2(svn_diff_t **diff,
1310 const char *original,
1311 const char *modified,
1312 const svn_diff_file_options_t *options,
1315 svn_diff__file_baton_t baton = { 0 };
1317 baton.options = options;
1318 baton.files[0].path = original;
1319 baton.files[1].path = modified;
1320 baton.pool = svn_pool_create(pool);
1322 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1324 svn_pool_destroy(baton.pool);
1325 return SVN_NO_ERROR;
1329 svn_diff_file_diff3_2(svn_diff_t **diff,
1330 const char *original,
1331 const char *modified,
1333 const svn_diff_file_options_t *options,
1336 svn_diff__file_baton_t baton = { 0 };
1338 baton.options = options;
1339 baton.files[0].path = original;
1340 baton.files[1].path = modified;
1341 baton.files[2].path = latest;
1342 baton.pool = svn_pool_create(pool);
1344 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1346 svn_pool_destroy(baton.pool);
1347 return SVN_NO_ERROR;
1351 svn_diff_file_diff4_2(svn_diff_t **diff,
1352 const char *original,
1353 const char *modified,
1355 const char *ancestor,
1356 const svn_diff_file_options_t *options,
1359 svn_diff__file_baton_t baton = { 0 };
1361 baton.options = options;
1362 baton.files[0].path = original;
1363 baton.files[1].path = modified;
1364 baton.files[2].path = latest;
1365 baton.files[3].path = ancestor;
1366 baton.pool = svn_pool_create(pool);
1368 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1370 svn_pool_destroy(baton.pool);
1371 return SVN_NO_ERROR;
1375 /** Display unified context diffs **/
1377 /* Maximum length of the extra context to show when show_c_function is set.
1378 * GNU diff uses 40, let's be brave and use 50 instead. */
1379 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1380 typedef struct svn_diff__file_output_baton_t
1382 svn_stream_t *output_stream;
1383 const char *header_encoding;
1385 /* Cached markers, in header_encoding. */
1386 const char *context_str;
1387 const char *delete_str;
1388 const char *insert_str;
1390 const char *path[2];
1391 apr_file_t *file[2];
1393 apr_off_t current_line[2];
1395 char buffer[2][4096];
1396 apr_size_t length[2];
1399 apr_off_t hunk_start[2];
1400 apr_off_t hunk_length[2];
1401 svn_stringbuf_t *hunk;
1403 /* Should we emit C functions in the unified diff header */
1404 svn_boolean_t show_c_function;
1405 /* Extra strings to skip over if we match. */
1406 apr_array_header_t *extra_skip_match;
1407 /* "Context" to append to the @@ line when the show_c_function option
1409 svn_stringbuf_t *extra_context;
1410 /* Extra context for the current hunk. */
1411 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1414 } svn_diff__file_output_baton_t;
1416 typedef enum svn_diff__file_output_unified_type_e
1418 svn_diff__file_output_unified_skip,
1419 svn_diff__file_output_unified_context,
1420 svn_diff__file_output_unified_delete,
1421 svn_diff__file_output_unified_insert
1422 } svn_diff__file_output_unified_type_e;
1425 static svn_error_t *
1426 output_unified_line(svn_diff__file_output_baton_t *baton,
1427 svn_diff__file_output_unified_type_e type, int idx)
1433 svn_boolean_t bytes_processed = FALSE;
1434 svn_boolean_t had_cr = FALSE;
1435 /* Are we collecting extra context? */
1436 svn_boolean_t collect_extra = FALSE;
1438 length = baton->length[idx];
1439 curp = baton->curp[idx];
1441 /* Lazily update the current line even if we're at EOF.
1442 * This way we fake output of context at EOF
1444 baton->current_line[idx]++;
1446 if (length == 0 && apr_file_eof(baton->file[idx]))
1448 return SVN_NO_ERROR;
1455 if (!bytes_processed)
1459 case svn_diff__file_output_unified_context:
1460 svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1461 baton->hunk_length[0]++;
1462 baton->hunk_length[1]++;
1464 case svn_diff__file_output_unified_delete:
1465 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1466 baton->hunk_length[0]++;
1468 case svn_diff__file_output_unified_insert:
1469 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1470 baton->hunk_length[1]++;
1476 if (baton->show_c_function
1477 && (type == svn_diff__file_output_unified_skip
1478 || type == svn_diff__file_output_unified_context)
1479 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1480 && !svn_cstring_match_glob_list(curp,
1481 baton->extra_skip_match))
1483 svn_stringbuf_setempty(baton->extra_context);
1484 collect_extra = TRUE;
1488 eol = svn_eol__find_eol_start(curp, length);
1494 had_cr = (*eol == '\r');
1496 len = (apr_size_t)(eol - curp);
1498 if (! had_cr || len < length)
1500 if (had_cr && *eol == '\n')
1508 if (type != svn_diff__file_output_unified_skip)
1510 svn_stringbuf_appendbytes(baton->hunk, curp, len);
1514 svn_stringbuf_appendbytes(baton->extra_context,
1518 baton->curp[idx] = eol;
1519 baton->length[idx] = length;
1527 if (type != svn_diff__file_output_unified_skip)
1529 svn_stringbuf_appendbytes(baton->hunk, curp, length);
1534 svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1537 bytes_processed = TRUE;
1540 curp = baton->buffer[idx];
1541 length = sizeof(baton->buffer[idx]);
1543 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1545 /* If the last chunk ended with a CR, we look for an LF at the start
1549 if (! err && length > 0 && *curp == '\n')
1551 if (type != svn_diff__file_output_unified_skip)
1553 svn_stringbuf_appendbyte(baton->hunk, *curp);
1555 /* We don't append the LF to extra_context, since it would
1556 * just be stripped anyway. */
1561 baton->curp[idx] = curp;
1562 baton->length[idx] = length;
1569 if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1572 if (err && APR_STATUS_IS_EOF(err->apr_err))
1574 svn_error_clear(err);
1575 /* Special case if we reach the end of file AND the last line is in the
1576 changed range AND the file doesn't end with a newline */
1577 if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1580 SVN_ERR(svn_diff__unified_append_no_newline_msg(
1581 baton->hunk, baton->header_encoding, baton->pool));
1584 baton->length[idx] = 0;
1587 return SVN_NO_ERROR;
1590 static APR_INLINE svn_error_t *
1591 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1593 svn_diff__file_output_unified_type_e type,
1596 while (output_baton->current_line[source] < until)
1598 SVN_ERR(output_unified_line(output_baton, type, source));
1600 return SVN_NO_ERROR;
1603 static svn_error_t *
1604 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1606 apr_off_t target_line;
1607 apr_size_t hunk_len;
1608 apr_off_t old_start;
1609 apr_off_t new_start;
1611 if (svn_stringbuf_isempty(baton->hunk))
1613 /* Nothing to flush */
1614 return SVN_NO_ERROR;
1617 target_line = baton->hunk_start[0] + baton->hunk_length[0]
1618 + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1620 /* Add trailing context to the hunk */
1621 SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1622 svn_diff__file_output_unified_context,
1625 old_start = baton->hunk_start[0];
1626 new_start = baton->hunk_start[1];
1628 /* If the file is non-empty, convert the line indexes from
1629 zero based to one based */
1630 if (baton->hunk_length[0])
1632 if (baton->hunk_length[1])
1635 /* Write the hunk header */
1636 SVN_ERR(svn_diff__unified_write_hunk_header(
1637 baton->output_stream, baton->header_encoding, "@@",
1638 old_start, baton->hunk_length[0],
1639 new_start, baton->hunk_length[1],
1640 baton->hunk_extra_context,
1643 /* Output the hunk content */
1644 hunk_len = baton->hunk->len;
1645 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1648 /* Prepare for the next hunk */
1649 baton->hunk_length[0] = 0;
1650 baton->hunk_length[1] = 0;
1651 baton->hunk_start[0] = 0;
1652 baton->hunk_start[1] = 0;
1653 svn_stringbuf_setempty(baton->hunk);
1655 return SVN_NO_ERROR;
1658 static svn_error_t *
1659 output_unified_diff_modified(void *baton,
1660 apr_off_t original_start, apr_off_t original_length,
1661 apr_off_t modified_start, apr_off_t modified_length,
1662 apr_off_t latest_start, apr_off_t latest_length)
1664 svn_diff__file_output_baton_t *output_baton = baton;
1665 apr_off_t context_prefix_length;
1666 apr_off_t prev_context_end;
1667 svn_boolean_t init_hunk = FALSE;
1669 if (original_start > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
1670 context_prefix_length = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1672 context_prefix_length = original_start;
1674 /* Calculate where the previous hunk will end if we would write it now
1675 (including the necessary context at the end) */
1676 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1678 prev_context_end = output_baton->hunk_start[0]
1679 + output_baton->hunk_length[0]
1680 + SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1684 prev_context_end = -1;
1686 if (output_baton->hunk_start[0] == 0
1687 && (original_length > 0 || modified_length > 0))
1691 /* If the changed range is far enough from the previous range, flush the current
1694 apr_off_t new_hunk_start = (original_start - context_prefix_length);
1696 if (output_baton->current_line[0] < new_hunk_start
1697 && prev_context_end <= new_hunk_start)
1699 SVN_ERR(output_unified_flush_hunk(output_baton));
1702 else if (output_baton->hunk_length[0] > 0
1703 || output_baton->hunk_length[1] > 0)
1705 /* We extend the current hunk */
1708 /* Original: Output the context preceding the changed range */
1709 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1710 svn_diff__file_output_unified_context,
1715 /* Original: Skip lines until we are at the beginning of the context we want
1717 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1718 svn_diff__file_output_unified_skip,
1719 original_start - context_prefix_length));
1721 /* Note that the above skip stores data for the show_c_function support below */
1725 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1726 && output_baton->hunk_length[1] == 0);
1728 output_baton->hunk_start[0] = original_start - context_prefix_length;
1729 output_baton->hunk_start[1] = modified_start - context_prefix_length;
1732 if (init_hunk && output_baton->show_c_function)
1735 const char *invalid_character;
1737 /* Save the extra context for later use.
1738 * Note that the last byte of the hunk_extra_context array is never
1739 * touched after it is zero-initialized, so the array is always
1741 strncpy(output_baton->hunk_extra_context,
1742 output_baton->extra_context->data,
1743 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1744 /* Trim whitespace at the end, most notably to get rid of any
1745 * newline characters. */
1746 p = strlen(output_baton->hunk_extra_context);
1748 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1750 output_baton->hunk_extra_context[--p] = '\0';
1753 svn_utf__last_valid(output_baton->hunk_extra_context,
1754 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1755 for (p = invalid_character - output_baton->hunk_extra_context;
1756 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1758 output_baton->hunk_extra_context[p] = '\0';
1762 /* Modified: Skip lines until we are at the start of the changed range */
1763 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1764 svn_diff__file_output_unified_skip,
1767 /* Original: Output the context preceding the changed range */
1768 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1769 svn_diff__file_output_unified_context,
1772 /* Both: Output the changed range */
1773 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1774 svn_diff__file_output_unified_delete,
1775 original_start + original_length));
1776 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1777 svn_diff__file_output_unified_insert,
1778 modified_start + modified_length));
1780 return SVN_NO_ERROR;
1783 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1784 static svn_error_t *
1785 output_unified_default_hdr(const char **header, const char *path,
1788 apr_finfo_t file_info;
1789 apr_time_exp_t exploded_time;
1790 char time_buffer[64];
1791 apr_size_t time_len;
1792 const char *utf8_timestr;
1794 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1795 apr_time_exp_lt(&exploded_time, file_info.mtime);
1797 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1798 /* Order of date components can be different in different languages */
1799 _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1801 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1803 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1805 return SVN_NO_ERROR;
1808 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1810 NULL, /* output_common */
1811 output_unified_diff_modified,
1812 NULL, /* output_diff_latest */
1813 NULL, /* output_diff_common */
1814 NULL /* output_conflict */
1818 svn_diff_file_output_unified3(svn_stream_t *output_stream,
1820 const char *original_path,
1821 const char *modified_path,
1822 const char *original_header,
1823 const char *modified_header,
1824 const char *header_encoding,
1825 const char *relative_to_dir,
1826 svn_boolean_t show_c_function,
1829 if (svn_diff_contains_diffs(diff))
1831 svn_diff__file_output_baton_t baton;
1834 memset(&baton, 0, sizeof(baton));
1835 baton.output_stream = output_stream;
1837 baton.header_encoding = header_encoding;
1838 baton.path[0] = original_path;
1839 baton.path[1] = modified_path;
1840 baton.hunk = svn_stringbuf_create_empty(pool);
1841 baton.show_c_function = show_c_function;
1842 baton.extra_context = svn_stringbuf_create_empty(pool);
1844 if (show_c_function)
1846 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1848 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1849 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1850 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1853 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1854 header_encoding, pool));
1855 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1856 header_encoding, pool));
1857 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1858 header_encoding, pool));
1860 if (relative_to_dir)
1862 /* Possibly adjust the "original" and "modified" paths shown in
1863 the output (see issue #2723). */
1864 const char *child_path;
1866 if (! original_header)
1868 child_path = svn_dirent_is_child(relative_to_dir,
1869 original_path, pool);
1871 original_path = child_path;
1873 return svn_error_createf(
1874 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1875 _("Path '%s' must be inside "
1876 "the directory '%s'"),
1877 svn_dirent_local_style(original_path, pool),
1878 svn_dirent_local_style(relative_to_dir,
1882 if (! modified_header)
1884 child_path = svn_dirent_is_child(relative_to_dir,
1885 modified_path, pool);
1887 modified_path = child_path;
1889 return svn_error_createf(
1890 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1891 _("Path '%s' must be inside "
1892 "the directory '%s'"),
1893 svn_dirent_local_style(modified_path, pool),
1894 svn_dirent_local_style(relative_to_dir,
1899 for (i = 0; i < 2; i++)
1901 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1902 APR_READ, APR_OS_DEFAULT, pool));
1905 if (original_header == NULL)
1907 SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1911 if (modified_header == NULL)
1913 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1917 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1918 original_header, modified_header,
1921 SVN_ERR(svn_diff_output(diff, &baton,
1922 &svn_diff__file_output_unified_vtable));
1923 SVN_ERR(output_unified_flush_hunk(&baton));
1925 for (i = 0; i < 2; i++)
1927 SVN_ERR(svn_io_file_close(baton.file[i], pool));
1931 return SVN_NO_ERROR;
1935 /** Display diff3 **/
1937 /* A stream to remember *leading* context. Note that this stream does
1938 *not* copy the data that it is remembering; it just saves
1940 typedef struct context_saver_t {
1941 svn_stream_t *stream;
1942 const char *data[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1943 apr_size_t len[SVN_DIFF__UNIFIED_CONTEXT_SIZE];
1944 apr_size_t next_slot;
1945 apr_size_t total_written;
1949 static svn_error_t *
1950 context_saver_stream_write(void *baton,
1954 context_saver_t *cs = baton;
1955 cs->data[cs->next_slot] = data;
1956 cs->len[cs->next_slot] = *len;
1957 cs->next_slot = (cs->next_slot + 1) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1958 cs->total_written++;
1959 return SVN_NO_ERROR;
1962 typedef struct svn_diff3__file_output_baton_t
1964 svn_stream_t *output_stream;
1966 const char *path[3];
1968 apr_off_t current_line[3];
1974 /* The following four members are in the encoding used for the output. */
1975 const char *conflict_modified;
1976 const char *conflict_original;
1977 const char *conflict_separator;
1978 const char *conflict_latest;
1980 const char *marker_eol;
1982 svn_diff_conflict_display_style_t conflict_style;
1984 /* The rest of the fields are for
1985 svn_diff_conflict_display_only_conflicts only. Note that for
1986 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
1987 (soon after a conflict) a "trailing context stream", never the
1988 actual output stream.*/
1989 /* The actual output stream. */
1990 svn_stream_t *real_output_stream;
1991 context_saver_t *context_saver;
1992 /* Used to allocate context_saver and trailing context streams, and
1993 for some printfs. */
1995 } svn_diff3__file_output_baton_t;
1997 static svn_error_t *
1998 flush_context_saver(context_saver_t *cs,
1999 svn_stream_t *output_stream)
2002 for (i = 0; i < SVN_DIFF__UNIFIED_CONTEXT_SIZE; i++)
2004 apr_size_t slot = (i + cs->next_slot) % SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2007 apr_size_t len = cs->len[slot];
2008 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2011 return SVN_NO_ERROR;
2015 make_context_saver(svn_diff3__file_output_baton_t *fob)
2017 context_saver_t *cs;
2019 svn_pool_clear(fob->pool);
2020 cs = apr_pcalloc(fob->pool, sizeof(*cs));
2021 cs->stream = svn_stream_empty(fob->pool);
2022 svn_stream_set_baton(cs->stream, cs);
2023 svn_stream_set_write(cs->stream, context_saver_stream_write);
2024 fob->context_saver = cs;
2025 fob->output_stream = cs->stream;
2029 /* A stream which prints SVN_DIFF__UNIFIED_CONTEXT_SIZE lines to
2030 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2031 a context_saver; used for *trailing* context. */
2033 struct trailing_context_printer {
2034 apr_size_t lines_to_print;
2035 svn_diff3__file_output_baton_t *fob;
2040 static svn_error_t *
2041 trailing_context_printer_write(void *baton,
2045 struct trailing_context_printer *tcp = baton;
2046 SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2047 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2048 tcp->lines_to_print--;
2049 if (tcp->lines_to_print == 0)
2050 make_context_saver(tcp->fob);
2051 return SVN_NO_ERROR;
2056 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2058 struct trailing_context_printer *tcp;
2061 svn_pool_clear(btn->pool);
2063 tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2064 tcp->lines_to_print = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2066 s = svn_stream_empty(btn->pool);
2067 svn_stream_set_baton(s, tcp);
2068 svn_stream_set_write(s, trailing_context_printer_write);
2069 btn->output_stream = s;
2074 typedef enum svn_diff3__file_output_type_e
2076 svn_diff3__file_output_skip,
2077 svn_diff3__file_output_normal
2078 } svn_diff3__file_output_type_e;
2081 static svn_error_t *
2082 output_line(svn_diff3__file_output_baton_t *baton,
2083 svn_diff3__file_output_type_e type, int idx)
2090 curp = baton->curp[idx];
2091 endp = baton->endp[idx];
2093 /* Lazily update the current line even if we're at EOF.
2095 baton->current_line[idx]++;
2098 return SVN_NO_ERROR;
2100 eol = svn_eol__find_eol_start(curp, endp - curp);
2105 svn_boolean_t had_cr = (*eol == '\r');
2107 if (had_cr && eol != endp && *eol == '\n')
2111 if (type != svn_diff3__file_output_skip)
2114 /* Note that the trailing context printer assumes that
2115 svn_stream_write is called exactly once per line. */
2116 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2119 baton->curp[idx] = eol;
2121 return SVN_NO_ERROR;
2124 static svn_error_t *
2125 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2127 return svn_stream_puts(btn->output_stream, btn->marker_eol);
2130 static svn_error_t *
2131 output_hunk(void *baton, int idx, apr_off_t target_line,
2132 apr_off_t target_length)
2134 svn_diff3__file_output_baton_t *output_baton = baton;
2136 /* Skip lines until we are at the start of the changed range */
2137 while (output_baton->current_line[idx] < target_line)
2139 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2142 target_line += target_length;
2144 while (output_baton->current_line[idx] < target_line)
2146 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2149 return SVN_NO_ERROR;
2152 static svn_error_t *
2153 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2154 apr_off_t modified_start, apr_off_t modified_length,
2155 apr_off_t latest_start, apr_off_t latest_length)
2157 return output_hunk(baton, 1, modified_start, modified_length);
2160 static svn_error_t *
2161 output_diff_modified(void *baton,
2162 apr_off_t original_start, apr_off_t original_length,
2163 apr_off_t modified_start, apr_off_t modified_length,
2164 apr_off_t latest_start, apr_off_t latest_length)
2166 return output_hunk(baton, 1, modified_start, modified_length);
2169 static svn_error_t *
2170 output_diff_latest(void *baton,
2171 apr_off_t original_start, apr_off_t original_length,
2172 apr_off_t modified_start, apr_off_t modified_length,
2173 apr_off_t latest_start, apr_off_t latest_length)
2175 return output_hunk(baton, 2, latest_start, latest_length);
2178 static svn_error_t *
2179 output_conflict(void *baton,
2180 apr_off_t original_start, apr_off_t original_length,
2181 apr_off_t modified_start, apr_off_t modified_length,
2182 apr_off_t latest_start, apr_off_t latest_length,
2185 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2188 output_diff_modified,
2190 output_diff_modified, /* output_diff_common */
2196 static svn_error_t *
2197 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2198 apr_off_t original_start,
2199 apr_off_t original_length,
2200 apr_off_t modified_start,
2201 apr_off_t modified_length,
2202 apr_off_t latest_start,
2203 apr_off_t latest_length)
2205 /* Are we currently saving starting context (as opposed to printing
2206 trailing context)? If so, flush it. */
2207 if (btn->output_stream == btn->context_saver->stream)
2209 if (btn->context_saver->total_written > SVN_DIFF__UNIFIED_CONTEXT_SIZE)
2210 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2211 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2214 /* Print to the real output stream. */
2215 btn->output_stream = btn->real_output_stream;
2217 /* Output the conflict itself. */
2218 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2219 (modified_length == 1
2220 ? "%s (%" APR_OFF_T_FMT ")"
2221 : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2222 btn->conflict_modified,
2223 modified_start + 1, modified_length));
2224 SVN_ERR(output_marker_eol(btn));
2225 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2227 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2228 (original_length == 1
2229 ? "%s (%" APR_OFF_T_FMT ")"
2230 : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2231 btn->conflict_original,
2232 original_start + 1, original_length));
2233 SVN_ERR(output_marker_eol(btn));
2234 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2236 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2237 "%s%s", btn->conflict_separator, btn->marker_eol));
2238 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2239 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2241 ? "%s (%" APR_OFF_T_FMT ")"
2242 : "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")"),
2243 btn->conflict_latest,
2244 latest_start + 1, latest_length));
2245 SVN_ERR(output_marker_eol(btn));
2247 /* Go into print-trailing-context mode instead. */
2248 make_trailing_context_printer(btn);
2250 return SVN_NO_ERROR;
2254 static svn_error_t *
2255 output_conflict(void *baton,
2256 apr_off_t original_start, apr_off_t original_length,
2257 apr_off_t modified_start, apr_off_t modified_length,
2258 apr_off_t latest_start, apr_off_t latest_length,
2261 svn_diff3__file_output_baton_t *file_baton = baton;
2263 svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2265 if (style == svn_diff_conflict_display_only_conflicts)
2266 return output_conflict_with_context(file_baton,
2267 original_start, original_length,
2268 modified_start, modified_length,
2269 latest_start, latest_length);
2271 if (style == svn_diff_conflict_display_resolved_modified_latest)
2274 return svn_diff_output(diff, baton,
2275 &svn_diff3__file_output_vtable);
2277 style = svn_diff_conflict_display_modified_latest;
2280 if (style == svn_diff_conflict_display_modified_latest ||
2281 style == svn_diff_conflict_display_modified_original_latest)
2283 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2284 file_baton->conflict_modified));
2285 SVN_ERR(output_marker_eol(file_baton));
2287 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2289 if (style == svn_diff_conflict_display_modified_original_latest)
2291 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2292 file_baton->conflict_original));
2293 SVN_ERR(output_marker_eol(file_baton));
2294 SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2297 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2298 file_baton->conflict_separator));
2299 SVN_ERR(output_marker_eol(file_baton));
2301 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2303 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2304 file_baton->conflict_latest));
2305 SVN_ERR(output_marker_eol(file_baton));
2307 else if (style == svn_diff_conflict_display_modified)
2308 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2309 else if (style == svn_diff_conflict_display_latest)
2310 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2311 else /* unknown style */
2312 SVN_ERR_MALFUNCTION();
2314 return SVN_NO_ERROR;
2318 svn_diff_file_output_merge2(svn_stream_t *output_stream,
2320 const char *original_path,
2321 const char *modified_path,
2322 const char *latest_path,
2323 const char *conflict_original,
2324 const char *conflict_modified,
2325 const char *conflict_latest,
2326 const char *conflict_separator,
2327 svn_diff_conflict_display_style_t style,
2330 svn_diff3__file_output_baton_t baton;
2331 apr_file_t *file[3];
2334 apr_mmap_t *mm[3] = { 0 };
2335 #endif /* APR_HAS_MMAP */
2337 svn_boolean_t conflicts_only =
2338 (style == svn_diff_conflict_display_only_conflicts);
2340 memset(&baton, 0, sizeof(baton));
2343 baton.pool = svn_pool_create(pool);
2344 make_context_saver(&baton);
2345 baton.real_output_stream = output_stream;
2348 baton.output_stream = output_stream;
2349 baton.path[0] = original_path;
2350 baton.path[1] = modified_path;
2351 baton.path[2] = latest_path;
2352 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2353 conflict_modified ? conflict_modified
2354 : apr_psprintf(pool, "<<<<<<< %s",
2357 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2358 conflict_original ? conflict_original
2359 : apr_psprintf(pool, "||||||| %s",
2362 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2363 conflict_separator ? conflict_separator
2364 : "=======", pool));
2365 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2366 conflict_latest ? conflict_latest
2367 : apr_psprintf(pool, ">>>>>>> %s",
2371 baton.conflict_style = style;
2373 for (idx = 0; idx < 3; idx++)
2377 SVN_ERR(map_or_read_file(&file[idx],
2379 &baton.buffer[idx], &size,
2380 baton.path[idx], pool));
2382 baton.curp[idx] = baton.buffer[idx];
2383 baton.endp[idx] = baton.buffer[idx];
2385 if (baton.endp[idx])
2386 baton.endp[idx] += size;
2389 /* Check what eol marker we should use for conflict markers.
2390 We use the eol marker of the modified file and fall back on the
2391 platform's eol marker if that file doesn't contain any newlines. */
2392 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2396 baton.marker_eol = eol;
2398 SVN_ERR(svn_diff_output(diff, &baton,
2399 &svn_diff3__file_output_vtable));
2401 for (idx = 0; idx < 3; idx++)
2406 apr_status_t rv = apr_mmap_delete(mm[idx]);
2407 if (rv != APR_SUCCESS)
2409 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2413 #endif /* APR_HAS_MMAP */
2417 SVN_ERR(svn_io_file_close(file[idx], pool));
2422 svn_pool_destroy(baton.pool);
2424 return SVN_NO_ERROR;