2 * diff_file.c : routines for doing diffs on files
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
26 #include <apr_pools.h>
27 #include <apr_general.h>
28 #include <apr_file_io.h>
29 #include <apr_file_info.h>
32 #include <apr_getopt.h>
36 #include "svn_error.h"
38 #include "svn_types.h"
39 #include "svn_string.h"
40 #include "svn_subst.h"
43 #include "svn_pools.h"
45 #include "svn_private_config.h"
47 #include "svn_ctype.h"
49 #include "private/svn_utf_private.h"
50 #include "private/svn_eol_private.h"
51 #include "private/svn_dep_compat.h"
52 #include "private/svn_adler32.h"
53 #include "private/svn_diff_private.h"
55 /* A token, i.e. a line read from a file. */
56 typedef struct svn_diff__file_token_t
58 /* Next token in free list. */
59 struct svn_diff__file_token_t *next;
60 svn_diff_datasource_e datasource;
61 /* Offset in the datasource. */
63 /* Offset of the normalized token (may skip leading whitespace) */
64 apr_off_t norm_offset;
65 /* Total length - before normalization. */
67 /* Total length - after normalization. */
69 } svn_diff__file_token_t;
72 typedef struct svn_diff__file_baton_t
74 const svn_diff_file_options_t *options;
77 const char *path; /* path to this file, absolute or relative to CWD */
79 /* All the following fields are active while this datasource is open */
80 apr_file_t *file; /* handle of this file */
81 apr_off_t size; /* total raw size in bytes of this file */
83 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84 int chunk; /* the current chunk number, zero-based */
85 char *buffer; /* a buffer containing the current chunk */
86 char *curp; /* current position in the current chunk */
87 char *endp; /* next memory address after the current chunk */
89 svn_diff__normalize_state_t normalize_state;
91 /* Where the identical suffix starts in this datasource */
92 int suffix_start_chunk;
93 apr_off_t suffix_offset_in_chunk;
96 /* List of free tokens that may be reused. */
97 svn_diff__file_token_t *tokens;
100 } svn_diff__file_baton_t;
103 datasource_to_index(svn_diff_datasource_e datasource)
107 case svn_diff_datasource_original:
110 case svn_diff_datasource_modified:
113 case svn_diff_datasource_latest:
116 case svn_diff_datasource_ancestor:
123 /* Files are read in chunks of 128k. There is no support for this number
124 * whatsoever. If there is a number someone comes up with that has some
125 * argumentation, let's use that.
127 /* If you change this number, update test_norm_offset(),
128 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c.
130 #define CHUNK_SHIFT 17
131 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
133 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
138 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139 * *LENGTH. The actual bytes read are stored in *LENGTH on return.
141 static APR_INLINE svn_error_t *
142 read_chunk(apr_file_t *file,
143 char *buffer, apr_off_t length,
144 apr_off_t offset, apr_pool_t *scratch_pool)
146 /* XXX: The final offset may not be the one we asked for.
149 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150 return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151 NULL, NULL, scratch_pool);
155 /* Map or read a file at PATH. *BUFFER will point to the file
156 * contents; if the file was mapped, *FILE and *MM will contain the
157 * mmap context; otherwise they will be NULL. SIZE will contain the
158 * file size. Allocate from POOL.
161 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162 #define MMAP_T_ARG(NAME) &(NAME),
164 #define MMAP_T_PARAM(NAME)
165 #define MMAP_T_ARG(NAME)
169 map_or_read_file(apr_file_t **file,
171 char **buffer, apr_size_t *size_p,
172 const char *path, apr_pool_t *pool)
180 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
183 if (finfo.size > APR_SIZE_MAX)
185 return svn_error_createf(APR_ENOMEM, NULL,
186 _("File '%s' is too large to be read in "
190 size = (apr_size_t) finfo.size;
192 if (size > APR_MMAP_THRESHOLD)
194 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195 if (rv == APR_SUCCESS)
201 /* Clear *MM because output parameters are undefined on error. */
205 /* On failure we just fall through and try reading the file into
209 #endif /* APR_HAS_MMAP */
211 if (*buffer == NULL && size > 0)
213 *buffer = apr_palloc(pool, size);
215 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
217 /* Since we have the entire contents of the file we can
220 SVN_ERR(svn_io_file_close(*file, pool));
231 /* For all files in the FILE array, increment the curp pointer. If a file
232 * points before the beginning of file, let it point at the first byte again.
233 * If the end of the current chunk is reached, read the next chunk in the
234 * buffer and point curp to the start of the chunk. If EOF is reached, set
235 * curp equal to endp to indicate EOF. */
236 #define INCREMENT_POINTERS(all_files, files_len, pool) \
238 apr_size_t svn_macro__i; \
240 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
242 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243 (all_files)[svn_macro__i].curp++; \
245 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \
250 /* For all files in the FILE array, decrement the curp pointer. If the
251 * start of a chunk is reached, read the previous chunk in the buffer and
252 * point curp to the last byte of the chunk. If the beginning of a FILE is
253 * reached, set chunk to -1 to indicate BOF. */
254 #define DECREMENT_POINTERS(all_files, files_len, pool) \
256 apr_size_t svn_macro__i; \
258 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
260 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261 (all_files)[svn_macro__i].curp--; \
263 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \
269 increment_chunk(struct file_info *file, apr_pool_t *pool)
272 apr_off_t last_chunk = offset_to_chunk(file->size);
274 if (file->chunk == -1)
276 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
278 file->curp = file->buffer;
280 else if (file->chunk == last_chunk)
282 /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283 file->curp = file->endp;
287 /* There are still chunks left. Read next chunk and reset pointers. */
289 length = file->chunk == last_chunk ?
290 offset_in_chunk(file->size) : CHUNK_SIZE;
291 SVN_ERR(read_chunk(file->file, file->buffer,
292 length, chunk_to_offset(file->chunk),
294 file->endp = file->buffer + length;
295 file->curp = file->buffer;
303 decrement_chunk(struct file_info *file, apr_pool_t *pool)
305 if (file->chunk == 0)
307 /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308 by setting chunk = -1 and curp = endp - 1. Both conditions are
309 important. They help the increment step to catch the BOF situation
310 in an efficient way. */
312 file->curp = file->endp - 1;
316 /* Read previous chunk and reset pointers. */
318 SVN_ERR(read_chunk(file->file, file->buffer,
319 CHUNK_SIZE, chunk_to_offset(file->chunk),
321 file->endp = file->buffer + CHUNK_SIZE;
322 file->curp = file->endp - 1;
329 /* Check whether one of the FILEs has its pointers 'before' the beginning of
330 * the file (this can happen while scanning backwards). This is the case if
331 * one of them has chunk == -1. */
333 is_one_at_bof(struct file_info file[], apr_size_t file_len)
337 for (i = 0; i < file_len; i++)
338 if (file[i].chunk == -1)
344 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
345 * one of them has curp == endp (this can only happen at the last chunk)) */
347 is_one_at_eof(struct file_info file[], apr_size_t file_len)
351 for (i = 0; i < file_len; i++)
352 if (file[i].curp == file[i].endp)
358 /* Quickly determine whether there is a eol char in CHUNK.
359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
362 #if SVN_UNALIGNED_ACCESS_IS_OK
363 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
365 apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366 apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
368 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
371 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
375 /* Find the prefix which is identical between all elements of the FILE array.
376 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
377 * set to TRUE if one of the FILEs reached its end while scanning prefix,
378 * i.e. at least one file consisted entirely of prefix. Otherwise,
379 * REACHED_ONE_EOF is set to FALSE.
381 * After this function is finished, the buffers, chunks, curp's and endp's
382 * of the FILEs are set to point at the first byte after the prefix. */
384 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385 struct file_info file[], apr_size_t file_len,
388 svn_boolean_t had_cr = FALSE;
389 svn_boolean_t is_match;
393 *reached_one_eof = FALSE;
395 for (i = 1, is_match = TRUE; i < file_len; i++)
396 is_match = is_match && *file[0].curp == *file[i].curp;
399 #if SVN_UNALIGNED_ACCESS_IS_OK
400 apr_ssize_t max_delta, delta;
401 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
403 /* ### TODO: see if we can take advantage of
404 diff options like ignore_eol_style or ignore_space. */
405 /* check for eol, and count */
406 if (*file[0].curp == '\r')
411 else if (*file[0].curp == '\n' && !had_cr)
420 INCREMENT_POINTERS(file, file_len, pool);
422 #if SVN_UNALIGNED_ACCESS_IS_OK
424 /* Try to advance as far as possible with machine-word granularity.
425 * Determine how far we may advance with chunky ops without reaching
426 * endp for any of the files.
427 * Signedness is important here if curp gets close to endp.
429 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430 for (i = 1; i < file_len; i++)
432 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433 if (delta < max_delta)
438 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
440 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441 if (contains_eol(chunk))
444 for (i = 1; i < file_len; i++)
445 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
457 /* We either found a mismatch or an EOL at or shortly behind curp+delta
458 * or we cannot proceed with chunky ops without exceeding endp.
459 * In any way, everything up to curp + delta is equal and not an EOL.
461 for (i = 0; i < file_len; i++)
462 file[i].curp += delta;
464 /* Skipped data without EOL markers, so last char was not a CR. */
469 *reached_one_eof = is_one_at_eof(file, file_len);
470 if (*reached_one_eof)
473 for (i = 1, is_match = TRUE; i < file_len; i++)
474 is_match = is_match && *file[0].curp == *file[i].curp;
479 /* Check if we ended in the middle of a \r\n for one file, but \r for
480 another. If so, back up one byte, so the next loop will back up
481 the entire line. Also decrement lines, since we counted one
482 too many for the \r. */
483 svn_boolean_t ended_at_nonmatching_newline = FALSE;
484 for (i = 0; i < file_len; i++)
485 if (file[i].curp < file[i].endp)
486 ended_at_nonmatching_newline = ended_at_nonmatching_newline
487 || *file[i].curp == '\n';
488 if (ended_at_nonmatching_newline)
491 DECREMENT_POINTERS(file, file_len, pool);
495 /* Back up one byte, so we point at the last identical byte */
496 DECREMENT_POINTERS(file, file_len, pool);
498 /* Back up to the last eol sequence (\n, \r\n or \r) */
499 while (!is_one_at_bof(file, file_len) &&
500 *file[0].curp != '\n' && *file[0].curp != '\r')
501 DECREMENT_POINTERS(file, file_len, pool);
503 /* Slide one byte forward, to point past the eol sequence */
504 INCREMENT_POINTERS(file, file_len, pool);
506 *prefix_lines = lines;
512 /* The number of identical suffix lines to keep with the middle section. These
513 * lines are not eliminated as suffix, and can be picked up by the token
514 * parsing and lcs steps. This is mainly for backward compatibility with
515 * the previous diff (and blame) output (if there are multiple diff solutions,
516 * our lcs algorithm prefers taking common lines from the start, rather than
517 * from the end. By giving it back some suffix lines, we give it some wiggle
518 * room to find the exact same diff as before).
520 * The number 50 is more or less arbitrary, based on some real-world tests
521 * with big files (and then doubling the required number to be on the safe
522 * side). This has a negligible effect on the power of the optimization. */
523 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524 #ifndef SUFFIX_LINES_TO_KEEP
525 #define SUFFIX_LINES_TO_KEEP 50
528 /* Find the suffix which is identical between all elements of the FILE array.
529 * Return the number of suffix lines in SUFFIX_LINES.
531 * Before this function is called the FILEs' pointers and chunks should be
532 * positioned right after the identical prefix (which is the case after
533 * find_identical_prefix), so we can determine where suffix scanning should
534 * ultimately stop. */
536 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537 apr_size_t file_len, apr_pool_t *pool)
539 struct file_info file_for_suffix[4] = { { 0 } };
541 apr_off_t suffix_min_chunk0;
542 apr_off_t suffix_min_offset0;
543 apr_off_t min_file_size;
544 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545 svn_boolean_t is_match;
547 svn_boolean_t had_nl;
550 /* Initialize file_for_suffix[].
551 Read last chunk, position curp at last byte. */
552 for (i = 0; i < file_len; i++)
554 file_for_suffix[i].path = file[i].path;
555 file_for_suffix[i].file = file[i].file;
556 file_for_suffix[i].size = file[i].size;
557 file_for_suffix[i].chunk =
558 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559 length[i] = offset_in_chunk(file_for_suffix[i].size);
562 /* last chunk is an empty chunk -> start at next-to-last chunk */
563 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564 length[i] = CHUNK_SIZE;
567 if (file_for_suffix[i].chunk == file[i].chunk)
569 /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570 file_for_suffix[i].buffer = file[i].buffer;
574 /* There is at least more than 1 chunk,
575 so allocate full chunk size buffer */
576 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577 SVN_ERR(read_chunk(file_for_suffix[i].file,
578 file_for_suffix[i].buffer, length[i],
579 chunk_to_offset(file_for_suffix[i].chunk),
582 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583 file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
586 /* Get the chunk and pointer offset (for file[0]) at which we should stop
587 scanning backward for the identical suffix, i.e. when we reach prefix. */
588 suffix_min_chunk0 = file[0].chunk;
589 suffix_min_offset0 = file[0].curp - file[0].buffer;
591 /* Compensate if other files are smaller than file[0] */
592 for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593 if (file[i].size < min_file_size)
594 min_file_size = file[i].size;
595 if (file[0].size > min_file_size)
597 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
601 /* Scan backwards until mismatch or until we reach the prefix. */
602 for (i = 1, is_match = TRUE; i < file_len; i++)
604 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605 if (is_match && *file_for_suffix[0].curp != '\r'
606 && *file_for_suffix[0].curp != '\n')
607 /* Count an extra line for the last line not ending in an eol. */
613 svn_boolean_t reached_prefix;
614 #if SVN_UNALIGNED_ACCESS_IS_OK
615 /* Initialize the minimum pointer positions. */
616 const char *min_curp[4];
617 svn_boolean_t can_read_word;
618 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
620 /* ### TODO: see if we can take advantage of
621 diff options like ignore_eol_style or ignore_space. */
622 /* check for eol, and count */
623 if (*file_for_suffix[0].curp == '\n')
628 else if (*file_for_suffix[0].curp == '\r' && !had_nl)
637 DECREMENT_POINTERS(file_for_suffix, file_len, pool);
639 #if SVN_UNALIGNED_ACCESS_IS_OK
640 for (i = 0; i < file_len; i++)
641 min_curp[i] = file_for_suffix[i].buffer;
643 /* If we are in the same chunk that contains the last part of the common
644 prefix, use the min_curp[0] pointer to make sure we don't get a
645 suffix that overlaps the already determined common prefix. */
646 if (file_for_suffix[0].chunk == suffix_min_chunk0)
647 min_curp[0] += suffix_min_offset0;
649 /* Scan quickly by reading with machine-word granularity. */
650 for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651 can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
654 while (can_read_word)
658 /* For each file curp is positioned at the current byte, but we
659 want to examine the current byte and the ones before the current
660 location as one machine word. */
662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663 - sizeof(apr_uintptr_t));
664 if (contains_eol(chunk))
667 for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
669 == *(const apr_uintptr_t *)
670 (file_for_suffix[i].curp + 1
671 - sizeof(apr_uintptr_t)));
676 for (i = 0; i < file_len; i++)
678 file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679 can_read_word = can_read_word
680 && ( (file_for_suffix[i].curp + 1
681 - sizeof(apr_uintptr_t))
685 /* We skipped some bytes, so there are no closing EOLs */
689 /* The > min_curp[i] check leaves at least one final byte for checking
690 in the non block optimized case below. */
693 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694 && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695 == suffix_min_offset0;
696 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
700 for (i = 1; i < file_len; i++)
702 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
705 /* Slide one byte forward, to point at the first byte of identical suffix */
706 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
708 /* Slide forward until we find an eol sequence to add the rest of the line
709 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710 one file reaches its end. */
713 svn_boolean_t had_cr = FALSE;
714 while (!is_one_at_eof(file_for_suffix, file_len)
715 && *file_for_suffix[0].curp != '\n'
716 && *file_for_suffix[0].curp != '\r')
717 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
719 /* Slide one or two more bytes, to point past the eol. */
720 if (!is_one_at_eof(file_for_suffix, file_len)
721 && *file_for_suffix[0].curp == '\r')
725 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
727 if (!is_one_at_eof(file_for_suffix, file_len)
728 && *file_for_suffix[0].curp == '\n')
732 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
735 while (!is_one_at_eof(file_for_suffix, file_len)
736 && suffix_lines_to_keep--);
738 if (is_one_at_eof(file_for_suffix, file_len))
741 /* Save the final suffix information in the original file_info */
742 for (i = 0; i < file_len; i++)
744 file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745 file[i].suffix_offset_in_chunk =
746 file_for_suffix[i].curp - file_for_suffix[i].buffer;
749 *suffix_lines = lines;
755 /* Let FILE stand for the array of file_info struct elements of BATON->files
756 * that are indexed by the elements of the DATASOURCE array.
757 * BATON's type is (svn_diff__file_baton_t *).
759 * For each file in the FILE array, open the file at FILE.path; initialize
760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761 * buffer and read the first chunk. Then find the prefix and suffix lines
762 * which are identical between all the files. Return the number of identical
763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
766 * Finding the identical prefix and suffix allows us to exclude those from the
767 * rest of the diff algorithm, which increases performance by reducing the
770 * Implements svn_diff_fns2_t::datasources_open. */
772 datasources_open(void *baton,
773 apr_off_t *prefix_lines,
774 apr_off_t *suffix_lines,
775 const svn_diff_datasource_e *datasources,
776 apr_size_t datasources_len)
778 svn_diff__file_baton_t *file_baton = baton;
779 struct file_info files[4];
781 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
782 svn_boolean_t reached_one_eof;
786 /* Make sure prefix_lines and suffix_lines are set correctly, even if we
787 * exit early because one of the files is empty. */
791 /* Open datasources and read first chunk */
792 for (i = 0; i < datasources_len; i++)
794 svn_filesize_t filesize;
795 struct file_info *file
796 = &file_baton->files[datasource_to_index(datasources[i])];
797 SVN_ERR(svn_io_file_open(&file->file, file->path,
798 APR_READ, APR_OS_DEFAULT, file_baton->pool));
799 SVN_ERR(svn_io_file_size_get(&filesize, file->file, file_baton->pool));
800 file->size = filesize;
801 length[i] = filesize > CHUNK_SIZE ? CHUNK_SIZE : filesize;
802 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
803 SVN_ERR(read_chunk(file->file, file->buffer,
804 length[i], 0, file_baton->pool));
805 file->endp = file->buffer + length[i];
806 file->curp = file->buffer;
807 /* Set suffix_start_chunk to a guard value, so if suffix scanning is
808 * skipped because one of the files is empty, or because of
809 * reached_one_eof, we can still easily check for the suffix during
810 * token reading (datasource_get_next_token). */
811 file->suffix_start_chunk = -1;
816 for (i = 0; i < datasources_len; i++)
818 /* There will not be any identical prefix/suffix, so we're done. */
821 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
823 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
824 files, datasources_len, file_baton->pool));
826 if (!reached_one_eof)
827 /* No file consisted totally of identical prefix,
828 * so there may be some identical suffix. */
829 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
834 /* Copy local results back to baton. */
835 for (i = 0; i < datasources_len; i++)
836 file_baton->files[datasource_to_index(datasources[i])] = files[i];
842 /* Implements svn_diff_fns2_t::datasource_close */
844 datasource_close(void *baton, svn_diff_datasource_e datasource)
846 /* Do nothing. The compare_token function needs previous datasources
847 * to stay available until all datasources are processed.
853 /* Implements svn_diff_fns2_t::datasource_get_next_token */
855 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
856 svn_diff_datasource_e datasource)
858 svn_diff__file_baton_t *file_baton = baton;
859 svn_diff__file_token_t *file_token;
860 struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
864 apr_off_t last_chunk;
867 /* Did the last chunk end in a CR character? */
868 svn_boolean_t had_cr = FALSE;
875 last_chunk = offset_to_chunk(file->size);
877 /* Are we already at the end of a chunk? */
881 if (last_chunk == file->chunk)
882 return SVN_NO_ERROR; /* EOF */
884 /* Or right before an identical suffix in the next chunk? */
885 if (file->chunk + 1 == file->suffix_start_chunk
886 && file->suffix_offset_in_chunk == 0)
890 /* Stop when we encounter the identical suffix. If suffix scanning was not
891 * performed, suffix_start_chunk will be -1, so this condition will never
893 if (file->chunk == file->suffix_start_chunk
894 && (curp - file->buffer) == file->suffix_offset_in_chunk)
897 /* Allocate a new token, or fetch one from the "reusable tokens" list. */
898 file_token = file_baton->tokens;
901 file_baton->tokens = file_token->next;
905 file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
908 file_token->datasource = datasource;
909 file_token->offset = chunk_to_offset(file->chunk)
910 + (curp - file->buffer);
911 file_token->norm_offset = file_token->offset;
912 file_token->raw_length = 0;
913 file_token->length = 0;
917 eol = svn_eol__find_eol_start(curp, endp - curp);
920 had_cr = (*eol == '\r');
922 /* If we have the whole eol sequence in the chunk... */
923 if (!(had_cr && eol == endp))
925 /* Also skip past the '\n' in an '\r\n' sequence. */
926 if (had_cr && *eol == '\n')
932 if (file->chunk == last_chunk)
938 length = endp - curp;
939 file_token->raw_length += length;
943 svn_diff__normalize_buffer(&c, &length,
944 &file->normalize_state,
945 curp, file_baton->options);
946 if (file_token->length == 0)
948 /* When we are reading the first part of the token, move the
949 normalized offset past leading ignored characters, if any. */
950 file_token->norm_offset += (c - curp);
952 file_token->length += length;
953 h = svn__adler32(h, c, length);
956 curp = endp = file->buffer;
958 length = file->chunk == last_chunk ?
959 offset_in_chunk(file->size) : CHUNK_SIZE;
963 /* Issue #4283: Normally we should have checked for reaching the skipped
964 suffix here, but because we assume that a suffix always starts on a
965 line and token boundary we rely on catching the suffix earlier in this
968 When changing things here, make sure the whitespace settings are
969 applied, or we might not reach the exact suffix boundary as token
971 SVN_ERR(read_chunk(file->file,
973 chunk_to_offset(file->chunk),
976 /* If the last chunk ended in a CR, we're done. */
987 file_token->raw_length += length;
990 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
991 * with a spurious empty token. Avoid returning it.
992 * Note that we use the unnormalized length; we don't want a line containing
993 * only spaces (and no trailing newline) to appear like a non-existent
995 if (file_token->raw_length > 0)
998 svn_diff__normalize_buffer(&c, &length,
999 &file->normalize_state,
1000 curp, file_baton->options);
1001 if (file_token->length == 0)
1003 /* When we are reading the first part of the token, move the
1004 normalized offset past leading ignored characters, if any. */
1005 file_token->norm_offset += (c - curp);
1008 file_token->length += length;
1010 *hash = svn__adler32(h, c, length);
1011 *token = file_token;
1014 return SVN_NO_ERROR;
1017 #define COMPARE_CHUNK_SIZE 4096
1019 /* Implements svn_diff_fns2_t::token_compare */
1020 static svn_error_t *
1021 token_compare(void *baton, void *token1, void *token2, int *compare)
1023 svn_diff__file_baton_t *file_baton = baton;
1024 svn_diff__file_token_t *file_token[2];
1025 char buffer[2][COMPARE_CHUNK_SIZE];
1027 apr_off_t offset[2];
1028 struct file_info *file[2];
1029 apr_off_t length[2];
1030 apr_off_t total_length;
1031 /* How much is left to read of each token from the file. */
1032 apr_off_t raw_length[2];
1034 svn_diff__normalize_state_t state[2];
1036 file_token[0] = token1;
1037 file_token[1] = token2;
1038 if (file_token[0]->length < file_token[1]->length)
1041 return SVN_NO_ERROR;
1044 if (file_token[0]->length > file_token[1]->length)
1047 return SVN_NO_ERROR;
1050 total_length = file_token[0]->length;
1051 if (total_length == 0)
1054 return SVN_NO_ERROR;
1057 for (i = 0; i < 2; ++i)
1059 int idx = datasource_to_index(file_token[i]->datasource);
1061 file[i] = &file_baton->files[idx];
1062 offset[i] = file_token[i]->norm_offset;
1063 state[i] = svn_diff__normalize_state_normal;
1065 if (offset_to_chunk(offset[i]) == file[i]->chunk)
1067 /* If the start of the token is in memory, the entire token is
1070 bufp[i] = file[i]->buffer;
1071 bufp[i] += offset_in_chunk(offset[i]);
1073 length[i] = total_length;
1082 /* When we skipped the first part of the token via the whitespace
1083 normalization we must reduce the raw length of the token */
1084 skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1086 raw_length[i] = file_token[i]->raw_length - skipped;
1093 for (i = 0; i < 2; i++)
1097 /* Error if raw_length is 0, that's an unexpected change
1098 * of the file that can happen when ingoring whitespace
1099 * and that can lead to an infinite loop. */
1100 if (raw_length[i] == 0)
1101 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1103 _("The file '%s' changed unexpectedly"
1107 /* Read a chunk from disk into a buffer */
1108 bufp[i] = buffer[i];
1109 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1110 COMPARE_CHUNK_SIZE : raw_length[i];
1112 SVN_ERR(read_chunk(file[i]->file,
1113 bufp[i], length[i], offset[i],
1115 offset[i] += length[i];
1116 raw_length[i] -= length[i];
1117 /* bufp[i] gets reset to buffer[i] before reading each chunk,
1118 so, overwriting it isn't a problem */
1119 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1120 bufp[i], file_baton->options);
1122 /* assert(length[i] == file_token[i]->length); */
1126 len = length[0] > length[1] ? length[1] : length[0];
1128 /* Compare two chunks (that could be entire tokens if they both reside
1131 *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1133 return SVN_NO_ERROR;
1135 total_length -= len;
1141 while(total_length > 0);
1144 return SVN_NO_ERROR;
1148 /* Implements svn_diff_fns2_t::token_discard */
1150 token_discard(void *baton, void *token)
1152 svn_diff__file_baton_t *file_baton = baton;
1153 svn_diff__file_token_t *file_token = token;
1155 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1156 file_token->next = file_baton->tokens;
1157 file_baton->tokens = file_token;
1161 /* Implements svn_diff_fns2_t::token_discard_all */
1163 token_discard_all(void *baton)
1165 svn_diff__file_baton_t *file_baton = baton;
1167 /* Discard all memory in use by the tokens, and close all open files. */
1168 svn_pool_clear(file_baton->pool);
1172 static const svn_diff_fns2_t svn_diff__file_vtable =
1176 datasource_get_next_token,
1182 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1183 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1185 /* Options supported by svn_diff_file_options_parse(). */
1186 static const apr_getopt_option_t diff_options[] =
1188 { "ignore-space-change", 'b', 0, NULL },
1189 { "ignore-all-space", 'w', 0, NULL },
1190 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1191 { "show-c-function", 'p', 0, NULL },
1192 /* ### For compatibility; we don't support the argument to -u, because
1193 * ### we don't have optional argument support. */
1194 { "unified", 'u', 0, NULL },
1195 { "context", 'U', 1, NULL },
1196 { NULL, 0, 0, NULL }
1199 svn_diff_file_options_t *
1200 svn_diff_file_options_create(apr_pool_t *pool)
1202 svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1204 opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1209 /* A baton for use with opt_parsing_error_func(). */
1210 struct opt_parsing_error_baton_t
1216 /* Store an error message from apr_getopt_long(). Set BATON->err to a new
1217 * error with a message generated from FMT and the remaining arguments.
1218 * Implements apr_getopt_err_fn_t. */
1220 opt_parsing_error_func(void *baton,
1221 const char *fmt, ...)
1223 struct opt_parsing_error_baton_t *b = baton;
1224 const char *message;
1228 message = apr_pvsprintf(b->pool, fmt, ap);
1231 /* Skip leading ": " (if present, which it always is in known cases). */
1232 if (strncmp(message, ": ", 2) == 0)
1235 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1239 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1240 const apr_array_header_t *args,
1244 struct opt_parsing_error_baton_t opt_parsing_error_baton;
1245 apr_array_header_t *argv;
1247 opt_parsing_error_baton.err = NULL;
1248 opt_parsing_error_baton.pool = pool;
1250 /* Make room for each option (starting at index 1) plus trailing NULL. */
1251 argv = apr_array_make(pool, args->nelts + 2, sizeof(char*));
1252 APR_ARRAY_PUSH(argv, const char *) = "";
1253 apr_array_cat(argv, args);
1254 APR_ARRAY_PUSH(argv, const char *) = NULL;
1256 apr_getopt_init(&os, pool,
1257 argv->nelts - 1 /* Exclude trailing NULL */,
1258 (const char *const *) argv->elts);
1260 /* Capture any error message from apr_getopt_long(). This will typically
1261 * say which option is wrong, which we would not otherwise know. */
1262 os->errfn = opt_parsing_error_func;
1263 os->errarg = &opt_parsing_error_baton;
1267 const char *opt_arg;
1269 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1271 if (APR_STATUS_IS_EOF(err))
1274 /* Wrap apr_getopt_long()'s error message. Its doc string implies
1275 * it always will produce one, but never mind if it doesn't. Avoid
1276 * using the message associated with the return code ERR, because
1277 * it refers to the "command line" which may be misleading here. */
1278 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1279 opt_parsing_error_baton.err,
1280 _("Error in options to internal diff"));
1285 /* -w takes precedence over -b. */
1286 if (! options->ignore_space)
1287 options->ignore_space = svn_diff_file_ignore_space_change;
1290 options->ignore_space = svn_diff_file_ignore_space_all;
1292 case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1293 options->ignore_eol_style = TRUE;
1296 options->show_c_function = TRUE;
1299 SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1306 /* Check for spurious arguments. */
1307 if (os->ind < os->argc)
1308 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1309 _("Invalid argument '%s' in diff options"),
1312 return SVN_NO_ERROR;
1316 svn_diff_file_diff_2(svn_diff_t **diff,
1317 const char *original,
1318 const char *modified,
1319 const svn_diff_file_options_t *options,
1322 svn_diff__file_baton_t baton = { 0 };
1324 baton.options = options;
1325 baton.files[0].path = original;
1326 baton.files[1].path = modified;
1327 baton.pool = svn_pool_create(pool);
1329 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1331 svn_pool_destroy(baton.pool);
1332 return SVN_NO_ERROR;
1336 svn_diff_file_diff3_2(svn_diff_t **diff,
1337 const char *original,
1338 const char *modified,
1340 const svn_diff_file_options_t *options,
1343 svn_diff__file_baton_t baton = { 0 };
1345 baton.options = options;
1346 baton.files[0].path = original;
1347 baton.files[1].path = modified;
1348 baton.files[2].path = latest;
1349 baton.pool = svn_pool_create(pool);
1351 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1353 svn_pool_destroy(baton.pool);
1354 return SVN_NO_ERROR;
1358 svn_diff_file_diff4_2(svn_diff_t **diff,
1359 const char *original,
1360 const char *modified,
1362 const char *ancestor,
1363 const svn_diff_file_options_t *options,
1366 svn_diff__file_baton_t baton = { 0 };
1368 baton.options = options;
1369 baton.files[0].path = original;
1370 baton.files[1].path = modified;
1371 baton.files[2].path = latest;
1372 baton.files[3].path = ancestor;
1373 baton.pool = svn_pool_create(pool);
1375 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1377 svn_pool_destroy(baton.pool);
1378 return SVN_NO_ERROR;
1382 /** Display unified context diffs **/
1384 /* Maximum length of the extra context to show when show_c_function is set.
1385 * GNU diff uses 40, let's be brave and use 50 instead. */
1386 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1387 typedef struct svn_diff__file_output_baton_t
1389 svn_stream_t *output_stream;
1390 const char *header_encoding;
1392 /* Cached markers, in header_encoding. */
1393 const char *context_str;
1394 const char *delete_str;
1395 const char *insert_str;
1397 const char *path[2];
1398 apr_file_t *file[2];
1400 apr_off_t current_line[2];
1402 char buffer[2][4096];
1403 apr_size_t length[2];
1406 apr_off_t hunk_start[2];
1407 apr_off_t hunk_length[2];
1408 svn_stringbuf_t *hunk;
1410 /* Should we emit C functions in the unified diff header */
1411 svn_boolean_t show_c_function;
1412 /* Extra strings to skip over if we match. */
1413 apr_array_header_t *extra_skip_match;
1414 /* "Context" to append to the @@ line when the show_c_function option
1416 svn_stringbuf_t *extra_context;
1417 /* Extra context for the current hunk. */
1418 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1422 /* Cancel handler */
1423 svn_cancel_func_t cancel_func;
1427 } svn_diff__file_output_baton_t;
1429 typedef enum svn_diff__file_output_unified_type_e
1431 svn_diff__file_output_unified_skip,
1432 svn_diff__file_output_unified_context,
1433 svn_diff__file_output_unified_delete,
1434 svn_diff__file_output_unified_insert
1435 } svn_diff__file_output_unified_type_e;
1438 static svn_error_t *
1439 output_unified_line(svn_diff__file_output_baton_t *baton,
1440 svn_diff__file_output_unified_type_e type, int idx)
1446 svn_boolean_t bytes_processed = FALSE;
1447 svn_boolean_t had_cr = FALSE;
1448 /* Are we collecting extra context? */
1449 svn_boolean_t collect_extra = FALSE;
1451 length = baton->length[idx];
1452 curp = baton->curp[idx];
1454 /* Lazily update the current line even if we're at EOF.
1455 * This way we fake output of context at EOF
1457 baton->current_line[idx]++;
1459 if (length == 0 && apr_file_eof(baton->file[idx]))
1461 return SVN_NO_ERROR;
1468 if (!bytes_processed)
1472 case svn_diff__file_output_unified_context:
1473 svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1474 baton->hunk_length[0]++;
1475 baton->hunk_length[1]++;
1477 case svn_diff__file_output_unified_delete:
1478 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1479 baton->hunk_length[0]++;
1481 case svn_diff__file_output_unified_insert:
1482 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1483 baton->hunk_length[1]++;
1489 if (baton->show_c_function
1490 && (type == svn_diff__file_output_unified_skip
1491 || type == svn_diff__file_output_unified_context)
1492 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1493 && !svn_cstring_match_glob_list(curp,
1494 baton->extra_skip_match))
1496 svn_stringbuf_setempty(baton->extra_context);
1497 collect_extra = TRUE;
1501 eol = svn_eol__find_eol_start(curp, length);
1507 had_cr = (*eol == '\r');
1509 len = (apr_size_t)(eol - curp);
1511 if (! had_cr || len < length)
1513 if (had_cr && *eol == '\n')
1521 if (type != svn_diff__file_output_unified_skip)
1523 svn_stringbuf_appendbytes(baton->hunk, curp, len);
1527 svn_stringbuf_appendbytes(baton->extra_context,
1531 baton->curp[idx] = eol;
1532 baton->length[idx] = length;
1540 if (type != svn_diff__file_output_unified_skip)
1542 svn_stringbuf_appendbytes(baton->hunk, curp, length);
1547 svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1550 bytes_processed = TRUE;
1553 curp = baton->buffer[idx];
1554 length = sizeof(baton->buffer[idx]);
1556 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1558 /* If the last chunk ended with a CR, we look for an LF at the start
1562 if (! err && length > 0 && *curp == '\n')
1564 if (type != svn_diff__file_output_unified_skip)
1566 svn_stringbuf_appendbyte(baton->hunk, *curp);
1568 /* We don't append the LF to extra_context, since it would
1569 * just be stripped anyway. */
1574 baton->curp[idx] = curp;
1575 baton->length[idx] = length;
1582 if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1585 if (err && APR_STATUS_IS_EOF(err->apr_err))
1587 svn_error_clear(err);
1588 /* Special case if we reach the end of file AND the last line is in the
1589 changed range AND the file doesn't end with a newline */
1590 if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1593 SVN_ERR(svn_diff__unified_append_no_newline_msg(
1594 baton->hunk, baton->header_encoding, baton->pool));
1597 baton->length[idx] = 0;
1600 return SVN_NO_ERROR;
1603 static APR_INLINE svn_error_t *
1604 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1606 svn_diff__file_output_unified_type_e type,
1608 svn_cancel_func_t cancel_func,
1611 while (output_baton->current_line[source] < until)
1614 SVN_ERR(cancel_func(cancel_baton));
1616 SVN_ERR(output_unified_line(output_baton, type, source));
1618 return SVN_NO_ERROR;
1621 static svn_error_t *
1622 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1624 apr_off_t target_line;
1625 apr_size_t hunk_len;
1626 apr_off_t old_start;
1627 apr_off_t new_start;
1629 if (svn_stringbuf_isempty(baton->hunk))
1631 /* Nothing to flush */
1632 return SVN_NO_ERROR;
1635 target_line = baton->hunk_start[0] + baton->hunk_length[0]
1636 + baton->context_size;
1638 /* Add trailing context to the hunk */
1639 SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1640 svn_diff__file_output_unified_context,
1642 baton->cancel_func, baton->cancel_baton));
1644 old_start = baton->hunk_start[0];
1645 new_start = baton->hunk_start[1];
1647 /* If the file is non-empty, convert the line indexes from
1648 zero based to one based */
1649 if (baton->hunk_length[0])
1651 if (baton->hunk_length[1])
1654 /* Write the hunk header */
1655 SVN_ERR(svn_diff__unified_write_hunk_header(
1656 baton->output_stream, baton->header_encoding, "@@",
1657 old_start, baton->hunk_length[0],
1658 new_start, baton->hunk_length[1],
1659 baton->hunk_extra_context,
1662 /* Output the hunk content */
1663 hunk_len = baton->hunk->len;
1664 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1667 /* Prepare for the next hunk */
1668 baton->hunk_length[0] = 0;
1669 baton->hunk_length[1] = 0;
1670 baton->hunk_start[0] = 0;
1671 baton->hunk_start[1] = 0;
1672 svn_stringbuf_setempty(baton->hunk);
1674 return SVN_NO_ERROR;
1677 static svn_error_t *
1678 output_unified_diff_modified(void *baton,
1679 apr_off_t original_start, apr_off_t original_length,
1680 apr_off_t modified_start, apr_off_t modified_length,
1681 apr_off_t latest_start, apr_off_t latest_length)
1683 svn_diff__file_output_baton_t *output_baton = baton;
1684 apr_off_t context_prefix_length;
1685 apr_off_t prev_context_end;
1686 svn_boolean_t init_hunk = FALSE;
1688 if (original_start > output_baton->context_size)
1689 context_prefix_length = output_baton->context_size;
1691 context_prefix_length = original_start;
1693 /* Calculate where the previous hunk will end if we would write it now
1694 (including the necessary context at the end) */
1695 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1697 prev_context_end = output_baton->hunk_start[0]
1698 + output_baton->hunk_length[0]
1699 + output_baton->context_size;
1703 prev_context_end = -1;
1705 if (output_baton->hunk_start[0] == 0
1706 && (original_length > 0 || modified_length > 0))
1710 /* If the changed range is far enough from the previous range, flush the current
1713 apr_off_t new_hunk_start = (original_start - context_prefix_length);
1715 if (output_baton->current_line[0] < new_hunk_start
1716 && prev_context_end <= new_hunk_start)
1718 SVN_ERR(output_unified_flush_hunk(output_baton));
1721 else if (output_baton->hunk_length[0] > 0
1722 || output_baton->hunk_length[1] > 0)
1724 /* We extend the current hunk */
1727 /* Original: Output the context preceding the changed range */
1728 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1729 svn_diff__file_output_unified_context,
1731 output_baton->cancel_func,
1732 output_baton->cancel_baton));
1736 /* Original: Skip lines until we are at the beginning of the context we want
1738 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1739 svn_diff__file_output_unified_skip,
1740 original_start - context_prefix_length,
1741 output_baton->cancel_func,
1742 output_baton->cancel_baton));
1744 /* Note that the above skip stores data for the show_c_function support below */
1748 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1749 && output_baton->hunk_length[1] == 0);
1751 output_baton->hunk_start[0] = original_start - context_prefix_length;
1752 output_baton->hunk_start[1] = modified_start - context_prefix_length;
1755 if (init_hunk && output_baton->show_c_function)
1758 const char *invalid_character;
1760 /* Save the extra context for later use.
1761 * Note that the last byte of the hunk_extra_context array is never
1762 * touched after it is zero-initialized, so the array is always
1764 strncpy(output_baton->hunk_extra_context,
1765 output_baton->extra_context->data,
1766 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1767 /* Trim whitespace at the end, most notably to get rid of any
1768 * newline characters. */
1769 p = strlen(output_baton->hunk_extra_context);
1771 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1773 output_baton->hunk_extra_context[--p] = '\0';
1776 svn_utf__last_valid(output_baton->hunk_extra_context,
1777 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1778 for (p = invalid_character - output_baton->hunk_extra_context;
1779 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1781 output_baton->hunk_extra_context[p] = '\0';
1785 /* Modified: Skip lines until we are at the start of the changed range */
1786 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1787 svn_diff__file_output_unified_skip,
1789 output_baton->cancel_func,
1790 output_baton->cancel_baton));
1792 /* Original: Output the context preceding the changed range */
1793 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1794 svn_diff__file_output_unified_context,
1796 output_baton->cancel_func,
1797 output_baton->cancel_baton));
1799 /* Both: Output the changed range */
1800 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1801 svn_diff__file_output_unified_delete,
1802 original_start + original_length,
1803 output_baton->cancel_func,
1804 output_baton->cancel_baton));
1805 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1806 svn_diff__file_output_unified_insert,
1807 modified_start + modified_length,
1808 output_baton->cancel_func,
1809 output_baton->cancel_baton));
1811 return SVN_NO_ERROR;
1814 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1815 static svn_error_t *
1816 output_unified_default_hdr(const char **header, const char *path,
1819 apr_finfo_t file_info;
1820 apr_time_exp_t exploded_time;
1821 char time_buffer[64];
1822 apr_size_t time_len;
1823 const char *utf8_timestr;
1825 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1826 apr_time_exp_lt(&exploded_time, file_info.mtime);
1828 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1829 /* Order of date components can be different in different languages */
1830 _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1832 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1834 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1836 return SVN_NO_ERROR;
1839 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1841 NULL, /* output_common */
1842 output_unified_diff_modified,
1843 NULL, /* output_diff_latest */
1844 NULL, /* output_diff_common */
1845 NULL /* output_conflict */
1849 svn_diff_file_output_unified4(svn_stream_t *output_stream,
1851 const char *original_path,
1852 const char *modified_path,
1853 const char *original_header,
1854 const char *modified_header,
1855 const char *header_encoding,
1856 const char *relative_to_dir,
1857 svn_boolean_t show_c_function,
1859 svn_cancel_func_t cancel_func,
1863 if (svn_diff_contains_diffs(diff))
1865 svn_diff__file_output_baton_t baton;
1868 memset(&baton, 0, sizeof(baton));
1869 baton.output_stream = output_stream;
1870 baton.cancel_func = cancel_func;
1871 baton.cancel_baton = cancel_baton;
1873 baton.header_encoding = header_encoding;
1874 baton.path[0] = original_path;
1875 baton.path[1] = modified_path;
1876 baton.hunk = svn_stringbuf_create_empty(pool);
1877 baton.show_c_function = show_c_function;
1878 baton.extra_context = svn_stringbuf_create_empty(pool);
1879 baton.context_size = (context_size >= 0) ? context_size
1880 : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1882 if (show_c_function)
1884 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1886 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1887 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1888 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1891 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1892 header_encoding, pool));
1893 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1894 header_encoding, pool));
1895 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1896 header_encoding, pool));
1898 if (relative_to_dir)
1900 /* Possibly adjust the "original" and "modified" paths shown in
1901 the output (see issue #2723). */
1902 const char *child_path;
1904 if (! original_header)
1906 child_path = svn_dirent_is_child(relative_to_dir,
1907 original_path, pool);
1909 original_path = child_path;
1911 return svn_error_createf(
1912 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1913 _("Path '%s' must be inside "
1914 "the directory '%s'"),
1915 svn_dirent_local_style(original_path, pool),
1916 svn_dirent_local_style(relative_to_dir,
1920 if (! modified_header)
1922 child_path = svn_dirent_is_child(relative_to_dir,
1923 modified_path, pool);
1925 modified_path = child_path;
1927 return svn_error_createf(
1928 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1929 _("Path '%s' must be inside "
1930 "the directory '%s'"),
1931 svn_dirent_local_style(modified_path, pool),
1932 svn_dirent_local_style(relative_to_dir,
1937 for (i = 0; i < 2; i++)
1939 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1940 APR_READ, APR_OS_DEFAULT, pool));
1943 if (original_header == NULL)
1945 SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1949 if (modified_header == NULL)
1951 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1955 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1956 original_header, modified_header,
1959 SVN_ERR(svn_diff_output2(diff, &baton,
1960 &svn_diff__file_output_unified_vtable,
1961 cancel_func, cancel_baton));
1962 SVN_ERR(output_unified_flush_hunk(&baton));
1964 for (i = 0; i < 2; i++)
1966 SVN_ERR(svn_io_file_close(baton.file[i], pool));
1970 return SVN_NO_ERROR;
1974 /** Display diff3 **/
1976 /* A stream to remember *leading* context. Note that this stream does
1977 *not* copy the data that it is remembering; it just saves
1979 typedef struct context_saver_t {
1980 svn_stream_t *stream;
1982 const char **data; /* const char *data[context_size] */
1983 apr_size_t *len; /* apr_size_t len[context_size] */
1984 apr_size_t next_slot;
1985 apr_ssize_t total_writes;
1989 static svn_error_t *
1990 context_saver_stream_write(void *baton,
1994 context_saver_t *cs = baton;
1996 if (cs->context_size > 0)
1998 cs->data[cs->next_slot] = data;
1999 cs->len[cs->next_slot] = *len;
2000 cs->next_slot = (cs->next_slot + 1) % cs->context_size;
2003 return SVN_NO_ERROR;
2006 typedef struct svn_diff3__file_output_baton_t
2008 svn_stream_t *output_stream;
2010 const char *path[3];
2012 apr_off_t current_line[3];
2018 /* The following four members are in the encoding used for the output. */
2019 const char *conflict_modified;
2020 const char *conflict_original;
2021 const char *conflict_separator;
2022 const char *conflict_latest;
2024 const char *marker_eol;
2026 svn_diff_conflict_display_style_t conflict_style;
2029 /* cancel support */
2030 svn_cancel_func_t cancel_func;
2033 /* The rest of the fields are for
2034 svn_diff_conflict_display_only_conflicts only. Note that for
2035 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2036 (soon after a conflict) a "trailing context stream", never the
2037 actual output stream.*/
2038 /* The actual output stream. */
2039 svn_stream_t *real_output_stream;
2040 context_saver_t *context_saver;
2041 /* Used to allocate context_saver and trailing context streams, and
2042 for some printfs. */
2044 } svn_diff3__file_output_baton_t;
2046 static svn_error_t *
2047 flush_context_saver(context_saver_t *cs,
2048 svn_stream_t *output_stream)
2051 for (i = 0; i < cs->context_size; i++)
2053 apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2056 apr_size_t len = cs->len[slot];
2057 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2060 return SVN_NO_ERROR;
2064 make_context_saver(svn_diff3__file_output_baton_t *fob)
2066 context_saver_t *cs;
2068 assert(fob->context_size > 0); /* Or nothing to save */
2070 svn_pool_clear(fob->pool);
2071 cs = apr_pcalloc(fob->pool, sizeof(*cs));
2072 cs->stream = svn_stream_empty(fob->pool);
2073 svn_stream_set_baton(cs->stream, cs);
2074 svn_stream_set_write(cs->stream, context_saver_stream_write);
2075 fob->context_saver = cs;
2076 fob->output_stream = cs->stream;
2077 cs->context_size = fob->context_size;
2078 cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2079 cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2083 /* A stream which prints LINES_TO_PRINT (based on context size) lines to
2084 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2085 a context_saver; used for *trailing* context. */
2087 struct trailing_context_printer {
2088 apr_size_t lines_to_print;
2089 svn_diff3__file_output_baton_t *fob;
2094 static svn_error_t *
2095 trailing_context_printer_write(void *baton,
2099 struct trailing_context_printer *tcp = baton;
2100 SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2101 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2102 tcp->lines_to_print--;
2103 if (tcp->lines_to_print == 0)
2104 make_context_saver(tcp->fob);
2105 return SVN_NO_ERROR;
2110 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2112 struct trailing_context_printer *tcp;
2115 svn_pool_clear(btn->pool);
2117 tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2118 tcp->lines_to_print = btn->context_size;
2120 s = svn_stream_empty(btn->pool);
2121 svn_stream_set_baton(s, tcp);
2122 svn_stream_set_write(s, trailing_context_printer_write);
2123 btn->output_stream = s;
2128 typedef enum svn_diff3__file_output_type_e
2130 svn_diff3__file_output_skip,
2131 svn_diff3__file_output_normal
2132 } svn_diff3__file_output_type_e;
2135 static svn_error_t *
2136 output_line(svn_diff3__file_output_baton_t *baton,
2137 svn_diff3__file_output_type_e type, int idx)
2144 curp = baton->curp[idx];
2145 endp = baton->endp[idx];
2147 /* Lazily update the current line even if we're at EOF.
2149 baton->current_line[idx]++;
2152 return SVN_NO_ERROR;
2154 eol = svn_eol__find_eol_start(curp, endp - curp);
2159 svn_boolean_t had_cr = (*eol == '\r');
2161 if (had_cr && eol != endp && *eol == '\n')
2165 if (type != svn_diff3__file_output_skip)
2168 /* Note that the trailing context printer assumes that
2169 svn_stream_write is called exactly once per line. */
2170 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2173 baton->curp[idx] = eol;
2175 return SVN_NO_ERROR;
2178 static svn_error_t *
2179 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2181 return svn_stream_puts(btn->output_stream, btn->marker_eol);
2184 static svn_error_t *
2185 output_hunk(void *baton, int idx, apr_off_t target_line,
2186 apr_off_t target_length)
2188 svn_diff3__file_output_baton_t *output_baton = baton;
2190 /* Skip lines until we are at the start of the changed range */
2191 while (output_baton->current_line[idx] < target_line)
2193 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2196 target_line += target_length;
2198 while (output_baton->current_line[idx] < target_line)
2200 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2203 return SVN_NO_ERROR;
2206 static svn_error_t *
2207 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2208 apr_off_t modified_start, apr_off_t modified_length,
2209 apr_off_t latest_start, apr_off_t latest_length)
2211 return output_hunk(baton, 1, modified_start, modified_length);
2214 static svn_error_t *
2215 output_diff_modified(void *baton,
2216 apr_off_t original_start, apr_off_t original_length,
2217 apr_off_t modified_start, apr_off_t modified_length,
2218 apr_off_t latest_start, apr_off_t latest_length)
2220 return output_hunk(baton, 1, modified_start, modified_length);
2223 static svn_error_t *
2224 output_diff_latest(void *baton,
2225 apr_off_t original_start, apr_off_t original_length,
2226 apr_off_t modified_start, apr_off_t modified_length,
2227 apr_off_t latest_start, apr_off_t latest_length)
2229 return output_hunk(baton, 2, latest_start, latest_length);
2232 static svn_error_t *
2233 output_conflict(void *baton,
2234 apr_off_t original_start, apr_off_t original_length,
2235 apr_off_t modified_start, apr_off_t modified_length,
2236 apr_off_t latest_start, apr_off_t latest_length,
2239 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2242 output_diff_modified,
2244 output_diff_modified, /* output_diff_common */
2248 static svn_error_t *
2249 output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2255 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2256 "%s (%" APR_OFF_T_FMT ")",
2259 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2260 "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2261 label, start + 1, length));
2263 SVN_ERR(output_marker_eol(btn));
2265 return SVN_NO_ERROR;
2268 static svn_error_t *
2269 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2270 apr_off_t original_start,
2271 apr_off_t original_length,
2272 apr_off_t modified_start,
2273 apr_off_t modified_length,
2274 apr_off_t latest_start,
2275 apr_off_t latest_length)
2277 /* Are we currently saving starting context (as opposed to printing
2278 trailing context)? If so, flush it. */
2279 if (btn->output_stream == btn->context_saver->stream)
2281 if (btn->context_saver->total_writes > btn->context_size)
2282 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2283 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2286 /* Print to the real output stream. */
2287 btn->output_stream = btn->real_output_stream;
2289 /* Output the conflict itself. */
2290 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2291 modified_start, modified_length));
2292 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2294 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2295 original_start, original_length));
2296 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2298 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2299 "%s%s", btn->conflict_separator, btn->marker_eol));
2300 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2301 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2302 latest_start, latest_length));
2304 /* Go into print-trailing-context mode instead. */
2305 make_trailing_context_printer(btn);
2307 return SVN_NO_ERROR;
2311 static svn_error_t *
2312 output_conflict(void *baton,
2313 apr_off_t original_start, apr_off_t original_length,
2314 apr_off_t modified_start, apr_off_t modified_length,
2315 apr_off_t latest_start, apr_off_t latest_length,
2318 svn_diff3__file_output_baton_t *file_baton = baton;
2320 svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2322 if (style == svn_diff_conflict_display_only_conflicts)
2323 return output_conflict_with_context(file_baton,
2324 original_start, original_length,
2325 modified_start, modified_length,
2326 latest_start, latest_length);
2328 if (style == svn_diff_conflict_display_resolved_modified_latest)
2331 return svn_diff_output2(diff, baton,
2332 &svn_diff3__file_output_vtable,
2333 file_baton->cancel_func,
2334 file_baton->cancel_baton);
2336 style = svn_diff_conflict_display_modified_latest;
2339 if (style == svn_diff_conflict_display_modified_latest ||
2340 style == svn_diff_conflict_display_modified_original_latest)
2342 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2343 file_baton->conflict_modified));
2344 SVN_ERR(output_marker_eol(file_baton));
2346 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2348 if (style == svn_diff_conflict_display_modified_original_latest)
2350 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2351 file_baton->conflict_original));
2352 SVN_ERR(output_marker_eol(file_baton));
2353 SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2356 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2357 file_baton->conflict_separator));
2358 SVN_ERR(output_marker_eol(file_baton));
2360 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2362 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2363 file_baton->conflict_latest));
2364 SVN_ERR(output_marker_eol(file_baton));
2366 else if (style == svn_diff_conflict_display_modified)
2367 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2368 else if (style == svn_diff_conflict_display_latest)
2369 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2370 else /* unknown style */
2371 SVN_ERR_MALFUNCTION();
2373 return SVN_NO_ERROR;
2377 svn_diff_file_output_merge3(svn_stream_t *output_stream,
2379 const char *original_path,
2380 const char *modified_path,
2381 const char *latest_path,
2382 const char *conflict_original,
2383 const char *conflict_modified,
2384 const char *conflict_latest,
2385 const char *conflict_separator,
2386 svn_diff_conflict_display_style_t style,
2387 svn_cancel_func_t cancel_func,
2389 apr_pool_t *scratch_pool)
2391 svn_diff3__file_output_baton_t baton;
2392 apr_file_t *file[3];
2395 apr_mmap_t *mm[3] = { 0 };
2396 #endif /* APR_HAS_MMAP */
2398 svn_boolean_t conflicts_only =
2399 (style == svn_diff_conflict_display_only_conflicts);
2401 memset(&baton, 0, sizeof(baton));
2402 baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2405 baton.pool = svn_pool_create(scratch_pool);
2406 make_context_saver(&baton);
2407 baton.real_output_stream = output_stream;
2410 baton.output_stream = output_stream;
2411 baton.path[0] = original_path;
2412 baton.path[1] = modified_path;
2413 baton.path[2] = latest_path;
2414 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2415 conflict_modified ? conflict_modified
2416 : apr_psprintf(scratch_pool, "<<<<<<< %s",
2419 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2420 conflict_original ? conflict_original
2421 : apr_psprintf(scratch_pool, "||||||| %s",
2424 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2425 conflict_separator ? conflict_separator
2426 : "=======", scratch_pool));
2427 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2428 conflict_latest ? conflict_latest
2429 : apr_psprintf(scratch_pool, ">>>>>>> %s",
2433 baton.conflict_style = style;
2435 for (idx = 0; idx < 3; idx++)
2439 SVN_ERR(map_or_read_file(&file[idx],
2441 &baton.buffer[idx], &size,
2442 baton.path[idx], scratch_pool));
2444 baton.curp[idx] = baton.buffer[idx];
2445 baton.endp[idx] = baton.buffer[idx];
2447 if (baton.endp[idx])
2448 baton.endp[idx] += size;
2451 /* Check what eol marker we should use for conflict markers.
2452 We use the eol marker of the modified file and fall back on the
2453 platform's eol marker if that file doesn't contain any newlines. */
2454 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2458 baton.marker_eol = eol;
2460 baton.cancel_func = cancel_func;
2461 baton.cancel_baton = cancel_baton;
2463 SVN_ERR(svn_diff_output2(diff, &baton,
2464 &svn_diff3__file_output_vtable,
2465 cancel_func, cancel_baton));
2467 for (idx = 0; idx < 3; idx++)
2472 apr_status_t rv = apr_mmap_delete(mm[idx]);
2473 if (rv != APR_SUCCESS)
2475 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2479 #endif /* APR_HAS_MMAP */
2483 SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2488 svn_pool_destroy(baton.pool);
2490 return SVN_NO_ERROR;