2 * diff_file.c : routines for doing diffs on files
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
13 * http://www.apache.org/licenses/LICENSE-2.0
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
21 * ====================================================================
26 #include <apr_pools.h>
27 #include <apr_general.h>
28 #include <apr_file_io.h>
29 #include <apr_file_info.h>
32 #include <apr_getopt.h>
36 #include "svn_error.h"
38 #include "svn_types.h"
39 #include "svn_string.h"
40 #include "svn_subst.h"
43 #include "svn_pools.h"
45 #include "svn_private_config.h"
47 #include "svn_ctype.h"
49 #include "private/svn_utf_private.h"
50 #include "private/svn_eol_private.h"
51 #include "private/svn_dep_compat.h"
52 #include "private/svn_adler32.h"
53 #include "private/svn_diff_private.h"
55 /* A token, i.e. a line read from a file. */
56 typedef struct svn_diff__file_token_t
58 /* Next token in free list. */
59 struct svn_diff__file_token_t *next;
60 svn_diff_datasource_e datasource;
61 /* Offset in the datasource. */
63 /* Offset of the normalized token (may skip leading whitespace) */
64 apr_off_t norm_offset;
65 /* Total length - before normalization. */
67 /* Total length - after normalization. */
69 } svn_diff__file_token_t;
72 typedef struct svn_diff__file_baton_t
74 const svn_diff_file_options_t *options;
77 const char *path; /* path to this file, absolute or relative to CWD */
79 /* All the following fields are active while this datasource is open */
80 apr_file_t *file; /* handle of this file */
81 apr_off_t size; /* total raw size in bytes of this file */
83 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84 int chunk; /* the current chunk number, zero-based */
85 char *buffer; /* a buffer containing the current chunk */
86 char *curp; /* current position in the current chunk */
87 char *endp; /* next memory address after the current chunk */
89 svn_diff__normalize_state_t normalize_state;
91 /* Where the identical suffix starts in this datasource */
92 int suffix_start_chunk;
93 apr_off_t suffix_offset_in_chunk;
96 /* List of free tokens that may be reused. */
97 svn_diff__file_token_t *tokens;
100 } svn_diff__file_baton_t;
103 datasource_to_index(svn_diff_datasource_e datasource)
107 case svn_diff_datasource_original:
110 case svn_diff_datasource_modified:
113 case svn_diff_datasource_latest:
116 case svn_diff_datasource_ancestor:
123 /* Files are read in chunks of 128k. There is no support for this number
124 * whatsoever. If there is a number someone comes up with that has some
125 * argumentation, let's use that.
127 /* If you change this number, update test_norm_offset(),
128 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c.
130 #define CHUNK_SHIFT 17
131 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
133 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
138 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139 * *LENGTH. The actual bytes read are stored in *LENGTH on return.
141 static APR_INLINE svn_error_t *
142 read_chunk(apr_file_t *file,
143 char *buffer, apr_off_t length,
144 apr_off_t offset, apr_pool_t *scratch_pool)
146 /* XXX: The final offset may not be the one we asked for.
149 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150 return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151 NULL, NULL, scratch_pool);
155 /* Map or read a file at PATH. *BUFFER will point to the file
156 * contents; if the file was mapped, *FILE and *MM will contain the
157 * mmap context; otherwise they will be NULL. SIZE will contain the
158 * file size. Allocate from POOL.
161 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162 #define MMAP_T_ARG(NAME) &(NAME),
164 #define MMAP_T_PARAM(NAME)
165 #define MMAP_T_ARG(NAME)
169 map_or_read_file(apr_file_t **file,
171 char **buffer, apr_size_t *size_p,
172 const char *path, apr_pool_t *pool)
180 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
183 if (finfo.size > APR_SIZE_MAX)
185 return svn_error_createf(APR_ENOMEM, NULL,
186 _("File '%s' is too large to be read in "
190 size = (apr_size_t) finfo.size;
192 if (size > APR_MMAP_THRESHOLD)
194 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195 if (rv == APR_SUCCESS)
201 /* Clear *MM because output parameters are undefined on error. */
205 /* On failure we just fall through and try reading the file into
209 #endif /* APR_HAS_MMAP */
211 if (*buffer == NULL && size > 0)
213 *buffer = apr_palloc(pool, size);
215 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
217 /* Since we have the entire contents of the file we can
220 SVN_ERR(svn_io_file_close(*file, pool));
231 /* For all files in the FILE array, increment the curp pointer. If a file
232 * points before the beginning of file, let it point at the first byte again.
233 * If the end of the current chunk is reached, read the next chunk in the
234 * buffer and point curp to the start of the chunk. If EOF is reached, set
235 * curp equal to endp to indicate EOF. */
236 #define INCREMENT_POINTERS(all_files, files_len, pool) \
238 apr_size_t svn_macro__i; \
240 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
242 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243 (all_files)[svn_macro__i].curp++; \
245 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \
250 /* For all files in the FILE array, decrement the curp pointer. If the
251 * start of a chunk is reached, read the previous chunk in the buffer and
252 * point curp to the last byte of the chunk. If the beginning of a FILE is
253 * reached, set chunk to -1 to indicate BOF. */
254 #define DECREMENT_POINTERS(all_files, files_len, pool) \
256 apr_size_t svn_macro__i; \
258 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
260 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261 (all_files)[svn_macro__i].curp--; \
263 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \
269 increment_chunk(struct file_info *file, apr_pool_t *pool)
272 apr_off_t last_chunk = offset_to_chunk(file->size);
274 if (file->chunk == -1)
276 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
278 file->curp = file->buffer;
280 else if (file->chunk == last_chunk)
282 /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283 file->curp = file->endp;
287 /* There are still chunks left. Read next chunk and reset pointers. */
289 length = file->chunk == last_chunk ?
290 offset_in_chunk(file->size) : CHUNK_SIZE;
291 SVN_ERR(read_chunk(file->file, file->buffer,
292 length, chunk_to_offset(file->chunk),
294 file->endp = file->buffer + length;
295 file->curp = file->buffer;
303 decrement_chunk(struct file_info *file, apr_pool_t *pool)
305 if (file->chunk == 0)
307 /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308 by setting chunk = -1 and curp = endp - 1. Both conditions are
309 important. They help the increment step to catch the BOF situation
310 in an efficient way. */
312 file->curp = file->endp - 1;
316 /* Read previous chunk and reset pointers. */
318 SVN_ERR(read_chunk(file->file, file->buffer,
319 CHUNK_SIZE, chunk_to_offset(file->chunk),
321 file->endp = file->buffer + CHUNK_SIZE;
322 file->curp = file->endp - 1;
329 /* Check whether one of the FILEs has its pointers 'before' the beginning of
330 * the file (this can happen while scanning backwards). This is the case if
331 * one of them has chunk == -1. */
333 is_one_at_bof(struct file_info file[], apr_size_t file_len)
337 for (i = 0; i < file_len; i++)
338 if (file[i].chunk == -1)
344 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
345 * one of them has curp == endp (this can only happen at the last chunk)) */
347 is_one_at_eof(struct file_info file[], apr_size_t file_len)
351 for (i = 0; i < file_len; i++)
352 if (file[i].curp == file[i].endp)
358 /* Quickly determine whether there is a eol char in CHUNK.
359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
362 #if SVN_UNALIGNED_ACCESS_IS_OK
363 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
365 apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366 apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
368 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
371 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
375 /* Find the prefix which is identical between all elements of the FILE array.
376 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
377 * set to TRUE if one of the FILEs reached its end while scanning prefix,
378 * i.e. at least one file consisted entirely of prefix. Otherwise,
379 * REACHED_ONE_EOF is set to FALSE.
381 * After this function is finished, the buffers, chunks, curp's and endp's
382 * of the FILEs are set to point at the first byte after the prefix. */
384 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385 struct file_info file[], apr_size_t file_len,
388 svn_boolean_t had_cr = FALSE;
389 svn_boolean_t is_match;
393 *reached_one_eof = FALSE;
395 for (i = 1, is_match = TRUE; i < file_len; i++)
396 is_match = is_match && *file[0].curp == *file[i].curp;
399 #if SVN_UNALIGNED_ACCESS_IS_OK
400 apr_ssize_t max_delta, delta;
401 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
403 /* ### TODO: see if we can take advantage of
404 diff options like ignore_eol_style or ignore_space. */
405 /* check for eol, and count */
406 if (*file[0].curp == '\r')
411 else if (*file[0].curp == '\n' && !had_cr)
420 INCREMENT_POINTERS(file, file_len, pool);
422 #if SVN_UNALIGNED_ACCESS_IS_OK
424 /* Try to advance as far as possible with machine-word granularity.
425 * Determine how far we may advance with chunky ops without reaching
426 * endp for any of the files.
427 * Signedness is important here if curp gets close to endp.
429 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430 for (i = 1; i < file_len; i++)
432 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433 if (delta < max_delta)
438 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
440 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441 if (contains_eol(chunk))
444 for (i = 1; i < file_len; i++)
445 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
457 /* We either found a mismatch or an EOL at or shortly behind curp+delta
458 * or we cannot proceed with chunky ops without exceeding endp.
459 * In any way, everything up to curp + delta is equal and not an EOL.
461 for (i = 0; i < file_len; i++)
462 file[i].curp += delta;
464 /* Skipped data without EOL markers, so last char was not a CR. */
469 *reached_one_eof = is_one_at_eof(file, file_len);
470 if (*reached_one_eof)
473 for (i = 1, is_match = TRUE; i < file_len; i++)
474 is_match = is_match && *file[0].curp == *file[i].curp;
479 /* Check if we ended in the middle of a \r\n for one file, but \r for
480 another. If so, back up one byte, so the next loop will back up
481 the entire line. Also decrement lines, since we counted one
482 too many for the \r. */
483 svn_boolean_t ended_at_nonmatching_newline = FALSE;
484 for (i = 0; i < file_len; i++)
485 if (file[i].curp < file[i].endp)
486 ended_at_nonmatching_newline = ended_at_nonmatching_newline
487 || *file[i].curp == '\n';
488 if (ended_at_nonmatching_newline)
491 DECREMENT_POINTERS(file, file_len, pool);
495 /* Back up one byte, so we point at the last identical byte */
496 DECREMENT_POINTERS(file, file_len, pool);
498 /* Back up to the last eol sequence (\n, \r\n or \r) */
499 while (!is_one_at_bof(file, file_len) &&
500 *file[0].curp != '\n' && *file[0].curp != '\r')
501 DECREMENT_POINTERS(file, file_len, pool);
503 /* Slide one byte forward, to point past the eol sequence */
504 INCREMENT_POINTERS(file, file_len, pool);
506 *prefix_lines = lines;
512 /* The number of identical suffix lines to keep with the middle section. These
513 * lines are not eliminated as suffix, and can be picked up by the token
514 * parsing and lcs steps. This is mainly for backward compatibility with
515 * the previous diff (and blame) output (if there are multiple diff solutions,
516 * our lcs algorithm prefers taking common lines from the start, rather than
517 * from the end. By giving it back some suffix lines, we give it some wiggle
518 * room to find the exact same diff as before).
520 * The number 50 is more or less arbitrary, based on some real-world tests
521 * with big files (and then doubling the required number to be on the safe
522 * side). This has a negligible effect on the power of the optimization. */
523 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524 #ifndef SUFFIX_LINES_TO_KEEP
525 #define SUFFIX_LINES_TO_KEEP 50
528 /* Find the suffix which is identical between all elements of the FILE array.
529 * Return the number of suffix lines in SUFFIX_LINES.
531 * Before this function is called the FILEs' pointers and chunks should be
532 * positioned right after the identical prefix (which is the case after
533 * find_identical_prefix), so we can determine where suffix scanning should
534 * ultimately stop. */
536 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537 apr_size_t file_len, apr_pool_t *pool)
539 struct file_info file_for_suffix[4] = { { 0 } };
541 apr_off_t suffix_min_chunk0;
542 apr_off_t suffix_min_offset0;
543 apr_off_t min_file_size;
544 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545 svn_boolean_t is_match;
547 svn_boolean_t had_nl;
550 /* Initialize file_for_suffix[].
551 Read last chunk, position curp at last byte. */
552 for (i = 0; i < file_len; i++)
554 file_for_suffix[i].path = file[i].path;
555 file_for_suffix[i].file = file[i].file;
556 file_for_suffix[i].size = file[i].size;
557 file_for_suffix[i].chunk =
558 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559 length[i] = offset_in_chunk(file_for_suffix[i].size);
562 /* last chunk is an empty chunk -> start at next-to-last chunk */
563 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564 length[i] = CHUNK_SIZE;
567 if (file_for_suffix[i].chunk == file[i].chunk)
569 /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570 file_for_suffix[i].buffer = file[i].buffer;
574 /* There is at least more than 1 chunk,
575 so allocate full chunk size buffer */
576 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577 SVN_ERR(read_chunk(file_for_suffix[i].file,
578 file_for_suffix[i].buffer, length[i],
579 chunk_to_offset(file_for_suffix[i].chunk),
582 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583 file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
586 /* Get the chunk and pointer offset (for file[0]) at which we should stop
587 scanning backward for the identical suffix, i.e. when we reach prefix. */
588 suffix_min_chunk0 = file[0].chunk;
589 suffix_min_offset0 = file[0].curp - file[0].buffer;
591 /* Compensate if other files are smaller than file[0] */
592 for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593 if (file[i].size < min_file_size)
594 min_file_size = file[i].size;
595 if (file[0].size > min_file_size)
597 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
601 /* Scan backwards until mismatch or until we reach the prefix. */
602 for (i = 1, is_match = TRUE; i < file_len; i++)
604 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605 if (is_match && *file_for_suffix[0].curp != '\r'
606 && *file_for_suffix[0].curp != '\n')
607 /* Count an extra line for the last line not ending in an eol. */
613 svn_boolean_t reached_prefix;
614 #if SVN_UNALIGNED_ACCESS_IS_OK
615 /* Initialize the minimum pointer positions. */
616 const char *min_curp[4];
617 svn_boolean_t can_read_word;
618 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
620 /* ### TODO: see if we can take advantage of
621 diff options like ignore_eol_style or ignore_space. */
622 /* check for eol, and count */
623 if (*file_for_suffix[0].curp == '\n')
628 else if (*file_for_suffix[0].curp == '\r' && !had_nl)
637 DECREMENT_POINTERS(file_for_suffix, file_len, pool);
639 #if SVN_UNALIGNED_ACCESS_IS_OK
640 for (i = 0; i < file_len; i++)
641 min_curp[i] = file_for_suffix[i].buffer;
643 /* If we are in the same chunk that contains the last part of the common
644 prefix, use the min_curp[0] pointer to make sure we don't get a
645 suffix that overlaps the already determined common prefix. */
646 if (file_for_suffix[0].chunk == suffix_min_chunk0)
647 min_curp[0] += suffix_min_offset0;
649 /* Scan quickly by reading with machine-word granularity. */
650 for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651 can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
654 while (can_read_word)
658 /* For each file curp is positioned at the current byte, but we
659 want to examine the current byte and the ones before the current
660 location as one machine word. */
662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663 - sizeof(apr_uintptr_t));
664 if (contains_eol(chunk))
667 for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
669 == *(const apr_uintptr_t *)
670 (file_for_suffix[i].curp + 1
671 - sizeof(apr_uintptr_t)));
676 for (i = 0; i < file_len; i++)
678 file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679 can_read_word = can_read_word
680 && ( (file_for_suffix[i].curp + 1
681 - sizeof(apr_uintptr_t))
685 /* We skipped some bytes, so there are no closing EOLs */
689 /* The > min_curp[i] check leaves at least one final byte for checking
690 in the non block optimized case below. */
693 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694 && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695 == suffix_min_offset0;
696 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
700 for (i = 1; i < file_len; i++)
702 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
705 /* Slide one byte forward, to point at the first byte of identical suffix */
706 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
708 /* Slide forward until we find an eol sequence to add the rest of the line
709 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710 one file reaches its end. */
713 svn_boolean_t had_cr = FALSE;
714 while (!is_one_at_eof(file_for_suffix, file_len)
715 && *file_for_suffix[0].curp != '\n'
716 && *file_for_suffix[0].curp != '\r')
717 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
719 /* Slide one or two more bytes, to point past the eol. */
720 if (!is_one_at_eof(file_for_suffix, file_len)
721 && *file_for_suffix[0].curp == '\r')
725 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
727 if (!is_one_at_eof(file_for_suffix, file_len)
728 && *file_for_suffix[0].curp == '\n')
732 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
735 while (!is_one_at_eof(file_for_suffix, file_len)
736 && suffix_lines_to_keep--);
738 if (is_one_at_eof(file_for_suffix, file_len))
741 /* Save the final suffix information in the original file_info */
742 for (i = 0; i < file_len; i++)
744 file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745 file[i].suffix_offset_in_chunk =
746 file_for_suffix[i].curp - file_for_suffix[i].buffer;
749 *suffix_lines = lines;
755 /* Let FILE stand for the array of file_info struct elements of BATON->files
756 * that are indexed by the elements of the DATASOURCE array.
757 * BATON's type is (svn_diff__file_baton_t *).
759 * For each file in the FILE array, open the file at FILE.path; initialize
760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761 * buffer and read the first chunk. Then find the prefix and suffix lines
762 * which are identical between all the files. Return the number of identical
763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
766 * Finding the identical prefix and suffix allows us to exclude those from the
767 * rest of the diff algorithm, which increases performance by reducing the
770 * Implements svn_diff_fns2_t::datasources_open. */
772 datasources_open(void *baton,
773 apr_off_t *prefix_lines,
774 apr_off_t *suffix_lines,
775 const svn_diff_datasource_e *datasources,
776 apr_size_t datasources_len)
778 svn_diff__file_baton_t *file_baton = baton;
779 struct file_info files[4];
780 apr_finfo_t finfo[4];
782 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
783 svn_boolean_t reached_one_eof;
787 /* Make sure prefix_lines and suffix_lines are set correctly, even if we
788 * exit early because one of the files is empty. */
792 /* Open datasources and read first chunk */
793 for (i = 0; i < datasources_len; i++)
795 struct file_info *file
796 = &file_baton->files[datasource_to_index(datasources[i])];
797 SVN_ERR(svn_io_file_open(&file->file, file->path,
798 APR_READ, APR_OS_DEFAULT, file_baton->pool));
799 SVN_ERR(svn_io_file_info_get(&finfo[i], APR_FINFO_SIZE,
800 file->file, file_baton->pool));
801 file->size = finfo[i].size;
802 length[i] = finfo[i].size > CHUNK_SIZE ? CHUNK_SIZE : finfo[i].size;
803 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
804 SVN_ERR(read_chunk(file->file, file->buffer,
805 length[i], 0, file_baton->pool));
806 file->endp = file->buffer + length[i];
807 file->curp = file->buffer;
808 /* Set suffix_start_chunk to a guard value, so if suffix scanning is
809 * skipped because one of the files is empty, or because of
810 * reached_one_eof, we can still easily check for the suffix during
811 * token reading (datasource_get_next_token). */
812 file->suffix_start_chunk = -1;
817 for (i = 0; i < datasources_len; i++)
819 /* There will not be any identical prefix/suffix, so we're done. */
822 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
824 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
825 files, datasources_len, file_baton->pool));
827 if (!reached_one_eof)
828 /* No file consisted totally of identical prefix,
829 * so there may be some identical suffix. */
830 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
835 /* Copy local results back to baton. */
836 for (i = 0; i < datasources_len; i++)
837 file_baton->files[datasource_to_index(datasources[i])] = files[i];
843 /* Implements svn_diff_fns2_t::datasource_close */
845 datasource_close(void *baton, svn_diff_datasource_e datasource)
847 /* Do nothing. The compare_token function needs previous datasources
848 * to stay available until all datasources are processed.
854 /* Implements svn_diff_fns2_t::datasource_get_next_token */
856 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
857 svn_diff_datasource_e datasource)
859 svn_diff__file_baton_t *file_baton = baton;
860 svn_diff__file_token_t *file_token;
861 struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
865 apr_off_t last_chunk;
868 /* Did the last chunk end in a CR character? */
869 svn_boolean_t had_cr = FALSE;
876 last_chunk = offset_to_chunk(file->size);
878 /* Are we already at the end of a chunk? */
882 if (last_chunk == file->chunk)
883 return SVN_NO_ERROR; /* EOF */
885 /* Or right before an identical suffix in the next chunk? */
886 if (file->chunk + 1 == file->suffix_start_chunk
887 && file->suffix_offset_in_chunk == 0)
891 /* Stop when we encounter the identical suffix. If suffix scanning was not
892 * performed, suffix_start_chunk will be -1, so this condition will never
894 if (file->chunk == file->suffix_start_chunk
895 && (curp - file->buffer) == file->suffix_offset_in_chunk)
898 /* Allocate a new token, or fetch one from the "reusable tokens" list. */
899 file_token = file_baton->tokens;
902 file_baton->tokens = file_token->next;
906 file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
909 file_token->datasource = datasource;
910 file_token->offset = chunk_to_offset(file->chunk)
911 + (curp - file->buffer);
912 file_token->norm_offset = file_token->offset;
913 file_token->raw_length = 0;
914 file_token->length = 0;
918 eol = svn_eol__find_eol_start(curp, endp - curp);
921 had_cr = (*eol == '\r');
923 /* If we have the whole eol sequence in the chunk... */
924 if (!(had_cr && eol == endp))
926 /* Also skip past the '\n' in an '\r\n' sequence. */
927 if (had_cr && *eol == '\n')
933 if (file->chunk == last_chunk)
939 length = endp - curp;
940 file_token->raw_length += length;
944 svn_diff__normalize_buffer(&c, &length,
945 &file->normalize_state,
946 curp, file_baton->options);
947 if (file_token->length == 0)
949 /* When we are reading the first part of the token, move the
950 normalized offset past leading ignored characters, if any. */
951 file_token->norm_offset += (c - curp);
953 file_token->length += length;
954 h = svn__adler32(h, c, length);
957 curp = endp = file->buffer;
959 length = file->chunk == last_chunk ?
960 offset_in_chunk(file->size) : CHUNK_SIZE;
964 /* Issue #4283: Normally we should have checked for reaching the skipped
965 suffix here, but because we assume that a suffix always starts on a
966 line and token boundary we rely on catching the suffix earlier in this
969 When changing things here, make sure the whitespace settings are
970 applied, or we might not reach the exact suffix boundary as token
972 SVN_ERR(read_chunk(file->file,
974 chunk_to_offset(file->chunk),
977 /* If the last chunk ended in a CR, we're done. */
988 file_token->raw_length += length;
991 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
992 * with a spurious empty token. Avoid returning it.
993 * Note that we use the unnormalized length; we don't want a line containing
994 * only spaces (and no trailing newline) to appear like a non-existent
996 if (file_token->raw_length > 0)
999 svn_diff__normalize_buffer(&c, &length,
1000 &file->normalize_state,
1001 curp, file_baton->options);
1002 if (file_token->length == 0)
1004 /* When we are reading the first part of the token, move the
1005 normalized offset past leading ignored characters, if any. */
1006 file_token->norm_offset += (c - curp);
1009 file_token->length += length;
1011 *hash = svn__adler32(h, c, length);
1012 *token = file_token;
1015 return SVN_NO_ERROR;
1018 #define COMPARE_CHUNK_SIZE 4096
1020 /* Implements svn_diff_fns2_t::token_compare */
1021 static svn_error_t *
1022 token_compare(void *baton, void *token1, void *token2, int *compare)
1024 svn_diff__file_baton_t *file_baton = baton;
1025 svn_diff__file_token_t *file_token[2];
1026 char buffer[2][COMPARE_CHUNK_SIZE];
1028 apr_off_t offset[2];
1029 struct file_info *file[2];
1030 apr_off_t length[2];
1031 apr_off_t total_length;
1032 /* How much is left to read of each token from the file. */
1033 apr_off_t raw_length[2];
1035 svn_diff__normalize_state_t state[2];
1037 file_token[0] = token1;
1038 file_token[1] = token2;
1039 if (file_token[0]->length < file_token[1]->length)
1042 return SVN_NO_ERROR;
1045 if (file_token[0]->length > file_token[1]->length)
1048 return SVN_NO_ERROR;
1051 total_length = file_token[0]->length;
1052 if (total_length == 0)
1055 return SVN_NO_ERROR;
1058 for (i = 0; i < 2; ++i)
1060 int idx = datasource_to_index(file_token[i]->datasource);
1062 file[i] = &file_baton->files[idx];
1063 offset[i] = file_token[i]->norm_offset;
1064 state[i] = svn_diff__normalize_state_normal;
1066 if (offset_to_chunk(offset[i]) == file[i]->chunk)
1068 /* If the start of the token is in memory, the entire token is
1071 bufp[i] = file[i]->buffer;
1072 bufp[i] += offset_in_chunk(offset[i]);
1074 length[i] = total_length;
1083 /* When we skipped the first part of the token via the whitespace
1084 normalization we must reduce the raw length of the token */
1085 skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1087 raw_length[i] = file_token[i]->raw_length - skipped;
1094 for (i = 0; i < 2; i++)
1098 /* Error if raw_length is 0, that's an unexpected change
1099 * of the file that can happen when ingoring whitespace
1100 * and that can lead to an infinite loop. */
1101 if (raw_length[i] == 0)
1102 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1104 _("The file '%s' changed unexpectedly"
1108 /* Read a chunk from disk into a buffer */
1109 bufp[i] = buffer[i];
1110 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1111 COMPARE_CHUNK_SIZE : raw_length[i];
1113 SVN_ERR(read_chunk(file[i]->file,
1114 bufp[i], length[i], offset[i],
1116 offset[i] += length[i];
1117 raw_length[i] -= length[i];
1118 /* bufp[i] gets reset to buffer[i] before reading each chunk,
1119 so, overwriting it isn't a problem */
1120 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1121 bufp[i], file_baton->options);
1123 /* assert(length[i] == file_token[i]->length); */
1127 len = length[0] > length[1] ? length[1] : length[0];
1129 /* Compare two chunks (that could be entire tokens if they both reside
1132 *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1134 return SVN_NO_ERROR;
1136 total_length -= len;
1142 while(total_length > 0);
1145 return SVN_NO_ERROR;
1149 /* Implements svn_diff_fns2_t::token_discard */
1151 token_discard(void *baton, void *token)
1153 svn_diff__file_baton_t *file_baton = baton;
1154 svn_diff__file_token_t *file_token = token;
1156 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1157 file_token->next = file_baton->tokens;
1158 file_baton->tokens = file_token;
1162 /* Implements svn_diff_fns2_t::token_discard_all */
1164 token_discard_all(void *baton)
1166 svn_diff__file_baton_t *file_baton = baton;
1168 /* Discard all memory in use by the tokens, and close all open files. */
1169 svn_pool_clear(file_baton->pool);
1173 static const svn_diff_fns2_t svn_diff__file_vtable =
1177 datasource_get_next_token,
1183 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1184 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1186 /* Options supported by svn_diff_file_options_parse(). */
1187 static const apr_getopt_option_t diff_options[] =
1189 { "ignore-space-change", 'b', 0, NULL },
1190 { "ignore-all-space", 'w', 0, NULL },
1191 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1192 { "show-c-function", 'p', 0, NULL },
1193 /* ### For compatibility; we don't support the argument to -u, because
1194 * ### we don't have optional argument support. */
1195 { "unified", 'u', 0, NULL },
1196 { "context", 'U', 1, NULL },
1197 { NULL, 0, 0, NULL }
1200 svn_diff_file_options_t *
1201 svn_diff_file_options_create(apr_pool_t *pool)
1203 svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1205 opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1210 /* A baton for use with opt_parsing_error_func(). */
1211 struct opt_parsing_error_baton_t
1217 /* Store an error message from apr_getopt_long(). Set BATON->err to a new
1218 * error with a message generated from FMT and the remaining arguments.
1219 * Implements apr_getopt_err_fn_t. */
1221 opt_parsing_error_func(void *baton,
1222 const char *fmt, ...)
1224 struct opt_parsing_error_baton_t *b = baton;
1225 const char *message;
1229 message = apr_pvsprintf(b->pool, fmt, ap);
1232 /* Skip leading ": " (if present, which it always is in known cases). */
1233 if (strncmp(message, ": ", 2) == 0)
1236 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1240 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1241 const apr_array_header_t *args,
1245 struct opt_parsing_error_baton_t opt_parsing_error_baton;
1246 /* Make room for each option (starting at index 1) plus trailing NULL. */
1247 const char **argv = apr_palloc(pool, sizeof(char*) * (args->nelts + 2));
1249 opt_parsing_error_baton.err = NULL;
1250 opt_parsing_error_baton.pool = pool;
1253 memcpy(argv + 1, args->elts, sizeof(char*) * args->nelts);
1254 argv[args->nelts + 1] = NULL;
1256 apr_getopt_init(&os, pool, args->nelts + 1, argv);
1258 /* Capture any error message from apr_getopt_long(). This will typically
1259 * say which option is wrong, which we would not otherwise know. */
1260 os->errfn = opt_parsing_error_func;
1261 os->errarg = &opt_parsing_error_baton;
1265 const char *opt_arg;
1267 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1269 if (APR_STATUS_IS_EOF(err))
1272 /* Wrap apr_getopt_long()'s error message. Its doc string implies
1273 * it always will produce one, but never mind if it doesn't. Avoid
1274 * using the message associated with the return code ERR, because
1275 * it refers to the "command line" which may be misleading here. */
1276 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1277 opt_parsing_error_baton.err,
1278 _("Error in options to internal diff"));
1283 /* -w takes precedence over -b. */
1284 if (! options->ignore_space)
1285 options->ignore_space = svn_diff_file_ignore_space_change;
1288 options->ignore_space = svn_diff_file_ignore_space_all;
1290 case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1291 options->ignore_eol_style = TRUE;
1294 options->show_c_function = TRUE;
1297 SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1304 /* Check for spurious arguments. */
1305 if (os->ind < os->argc)
1306 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1307 _("Invalid argument '%s' in diff options"),
1310 return SVN_NO_ERROR;
1314 svn_diff_file_diff_2(svn_diff_t **diff,
1315 const char *original,
1316 const char *modified,
1317 const svn_diff_file_options_t *options,
1320 svn_diff__file_baton_t baton = { 0 };
1322 baton.options = options;
1323 baton.files[0].path = original;
1324 baton.files[1].path = modified;
1325 baton.pool = svn_pool_create(pool);
1327 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1329 svn_pool_destroy(baton.pool);
1330 return SVN_NO_ERROR;
1334 svn_diff_file_diff3_2(svn_diff_t **diff,
1335 const char *original,
1336 const char *modified,
1338 const svn_diff_file_options_t *options,
1341 svn_diff__file_baton_t baton = { 0 };
1343 baton.options = options;
1344 baton.files[0].path = original;
1345 baton.files[1].path = modified;
1346 baton.files[2].path = latest;
1347 baton.pool = svn_pool_create(pool);
1349 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1351 svn_pool_destroy(baton.pool);
1352 return SVN_NO_ERROR;
1356 svn_diff_file_diff4_2(svn_diff_t **diff,
1357 const char *original,
1358 const char *modified,
1360 const char *ancestor,
1361 const svn_diff_file_options_t *options,
1364 svn_diff__file_baton_t baton = { 0 };
1366 baton.options = options;
1367 baton.files[0].path = original;
1368 baton.files[1].path = modified;
1369 baton.files[2].path = latest;
1370 baton.files[3].path = ancestor;
1371 baton.pool = svn_pool_create(pool);
1373 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1375 svn_pool_destroy(baton.pool);
1376 return SVN_NO_ERROR;
1380 /** Display unified context diffs **/
1382 /* Maximum length of the extra context to show when show_c_function is set.
1383 * GNU diff uses 40, let's be brave and use 50 instead. */
1384 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1385 typedef struct svn_diff__file_output_baton_t
1387 svn_stream_t *output_stream;
1388 const char *header_encoding;
1390 /* Cached markers, in header_encoding. */
1391 const char *context_str;
1392 const char *delete_str;
1393 const char *insert_str;
1395 const char *path[2];
1396 apr_file_t *file[2];
1398 apr_off_t current_line[2];
1400 char buffer[2][4096];
1401 apr_size_t length[2];
1404 apr_off_t hunk_start[2];
1405 apr_off_t hunk_length[2];
1406 svn_stringbuf_t *hunk;
1408 /* Should we emit C functions in the unified diff header */
1409 svn_boolean_t show_c_function;
1410 /* Extra strings to skip over if we match. */
1411 apr_array_header_t *extra_skip_match;
1412 /* "Context" to append to the @@ line when the show_c_function option
1414 svn_stringbuf_t *extra_context;
1415 /* Extra context for the current hunk. */
1416 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1421 } svn_diff__file_output_baton_t;
1423 typedef enum svn_diff__file_output_unified_type_e
1425 svn_diff__file_output_unified_skip,
1426 svn_diff__file_output_unified_context,
1427 svn_diff__file_output_unified_delete,
1428 svn_diff__file_output_unified_insert
1429 } svn_diff__file_output_unified_type_e;
1432 static svn_error_t *
1433 output_unified_line(svn_diff__file_output_baton_t *baton,
1434 svn_diff__file_output_unified_type_e type, int idx)
1440 svn_boolean_t bytes_processed = FALSE;
1441 svn_boolean_t had_cr = FALSE;
1442 /* Are we collecting extra context? */
1443 svn_boolean_t collect_extra = FALSE;
1445 length = baton->length[idx];
1446 curp = baton->curp[idx];
1448 /* Lazily update the current line even if we're at EOF.
1449 * This way we fake output of context at EOF
1451 baton->current_line[idx]++;
1453 if (length == 0 && apr_file_eof(baton->file[idx]))
1455 return SVN_NO_ERROR;
1462 if (!bytes_processed)
1466 case svn_diff__file_output_unified_context:
1467 svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1468 baton->hunk_length[0]++;
1469 baton->hunk_length[1]++;
1471 case svn_diff__file_output_unified_delete:
1472 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1473 baton->hunk_length[0]++;
1475 case svn_diff__file_output_unified_insert:
1476 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1477 baton->hunk_length[1]++;
1483 if (baton->show_c_function
1484 && (type == svn_diff__file_output_unified_skip
1485 || type == svn_diff__file_output_unified_context)
1486 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1487 && !svn_cstring_match_glob_list(curp,
1488 baton->extra_skip_match))
1490 svn_stringbuf_setempty(baton->extra_context);
1491 collect_extra = TRUE;
1495 eol = svn_eol__find_eol_start(curp, length);
1501 had_cr = (*eol == '\r');
1503 len = (apr_size_t)(eol - curp);
1505 if (! had_cr || len < length)
1507 if (had_cr && *eol == '\n')
1515 if (type != svn_diff__file_output_unified_skip)
1517 svn_stringbuf_appendbytes(baton->hunk, curp, len);
1521 svn_stringbuf_appendbytes(baton->extra_context,
1525 baton->curp[idx] = eol;
1526 baton->length[idx] = length;
1534 if (type != svn_diff__file_output_unified_skip)
1536 svn_stringbuf_appendbytes(baton->hunk, curp, length);
1541 svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1544 bytes_processed = TRUE;
1547 curp = baton->buffer[idx];
1548 length = sizeof(baton->buffer[idx]);
1550 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1552 /* If the last chunk ended with a CR, we look for an LF at the start
1556 if (! err && length > 0 && *curp == '\n')
1558 if (type != svn_diff__file_output_unified_skip)
1560 svn_stringbuf_appendbyte(baton->hunk, *curp);
1562 /* We don't append the LF to extra_context, since it would
1563 * just be stripped anyway. */
1568 baton->curp[idx] = curp;
1569 baton->length[idx] = length;
1576 if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1579 if (err && APR_STATUS_IS_EOF(err->apr_err))
1581 svn_error_clear(err);
1582 /* Special case if we reach the end of file AND the last line is in the
1583 changed range AND the file doesn't end with a newline */
1584 if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1587 SVN_ERR(svn_diff__unified_append_no_newline_msg(
1588 baton->hunk, baton->header_encoding, baton->pool));
1591 baton->length[idx] = 0;
1594 return SVN_NO_ERROR;
1597 static APR_INLINE svn_error_t *
1598 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1600 svn_diff__file_output_unified_type_e type,
1603 while (output_baton->current_line[source] < until)
1605 SVN_ERR(output_unified_line(output_baton, type, source));
1607 return SVN_NO_ERROR;
1610 static svn_error_t *
1611 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1613 apr_off_t target_line;
1614 apr_size_t hunk_len;
1615 apr_off_t old_start;
1616 apr_off_t new_start;
1618 if (svn_stringbuf_isempty(baton->hunk))
1620 /* Nothing to flush */
1621 return SVN_NO_ERROR;
1624 target_line = baton->hunk_start[0] + baton->hunk_length[0]
1625 + baton->context_size;
1627 /* Add trailing context to the hunk */
1628 SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1629 svn_diff__file_output_unified_context,
1632 old_start = baton->hunk_start[0];
1633 new_start = baton->hunk_start[1];
1635 /* If the file is non-empty, convert the line indexes from
1636 zero based to one based */
1637 if (baton->hunk_length[0])
1639 if (baton->hunk_length[1])
1642 /* Write the hunk header */
1643 SVN_ERR(svn_diff__unified_write_hunk_header(
1644 baton->output_stream, baton->header_encoding, "@@",
1645 old_start, baton->hunk_length[0],
1646 new_start, baton->hunk_length[1],
1647 baton->hunk_extra_context,
1650 /* Output the hunk content */
1651 hunk_len = baton->hunk->len;
1652 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1655 /* Prepare for the next hunk */
1656 baton->hunk_length[0] = 0;
1657 baton->hunk_length[1] = 0;
1658 baton->hunk_start[0] = 0;
1659 baton->hunk_start[1] = 0;
1660 svn_stringbuf_setempty(baton->hunk);
1662 return SVN_NO_ERROR;
1665 static svn_error_t *
1666 output_unified_diff_modified(void *baton,
1667 apr_off_t original_start, apr_off_t original_length,
1668 apr_off_t modified_start, apr_off_t modified_length,
1669 apr_off_t latest_start, apr_off_t latest_length)
1671 svn_diff__file_output_baton_t *output_baton = baton;
1672 apr_off_t context_prefix_length;
1673 apr_off_t prev_context_end;
1674 svn_boolean_t init_hunk = FALSE;
1676 if (original_start > output_baton->context_size)
1677 context_prefix_length = output_baton->context_size;
1679 context_prefix_length = original_start;
1681 /* Calculate where the previous hunk will end if we would write it now
1682 (including the necessary context at the end) */
1683 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1685 prev_context_end = output_baton->hunk_start[0]
1686 + output_baton->hunk_length[0]
1687 + output_baton->context_size;
1691 prev_context_end = -1;
1693 if (output_baton->hunk_start[0] == 0
1694 && (original_length > 0 || modified_length > 0))
1698 /* If the changed range is far enough from the previous range, flush the current
1701 apr_off_t new_hunk_start = (original_start - context_prefix_length);
1703 if (output_baton->current_line[0] < new_hunk_start
1704 && prev_context_end <= new_hunk_start)
1706 SVN_ERR(output_unified_flush_hunk(output_baton));
1709 else if (output_baton->hunk_length[0] > 0
1710 || output_baton->hunk_length[1] > 0)
1712 /* We extend the current hunk */
1715 /* Original: Output the context preceding the changed range */
1716 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1717 svn_diff__file_output_unified_context,
1722 /* Original: Skip lines until we are at the beginning of the context we want
1724 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1725 svn_diff__file_output_unified_skip,
1726 original_start - context_prefix_length));
1728 /* Note that the above skip stores data for the show_c_function support below */
1732 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1733 && output_baton->hunk_length[1] == 0);
1735 output_baton->hunk_start[0] = original_start - context_prefix_length;
1736 output_baton->hunk_start[1] = modified_start - context_prefix_length;
1739 if (init_hunk && output_baton->show_c_function)
1742 const char *invalid_character;
1744 /* Save the extra context for later use.
1745 * Note that the last byte of the hunk_extra_context array is never
1746 * touched after it is zero-initialized, so the array is always
1748 strncpy(output_baton->hunk_extra_context,
1749 output_baton->extra_context->data,
1750 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1751 /* Trim whitespace at the end, most notably to get rid of any
1752 * newline characters. */
1753 p = strlen(output_baton->hunk_extra_context);
1755 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1757 output_baton->hunk_extra_context[--p] = '\0';
1760 svn_utf__last_valid(output_baton->hunk_extra_context,
1761 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1762 for (p = invalid_character - output_baton->hunk_extra_context;
1763 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1765 output_baton->hunk_extra_context[p] = '\0';
1769 /* Modified: Skip lines until we are at the start of the changed range */
1770 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1771 svn_diff__file_output_unified_skip,
1774 /* Original: Output the context preceding the changed range */
1775 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1776 svn_diff__file_output_unified_context,
1779 /* Both: Output the changed range */
1780 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1781 svn_diff__file_output_unified_delete,
1782 original_start + original_length));
1783 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1784 svn_diff__file_output_unified_insert,
1785 modified_start + modified_length));
1787 return SVN_NO_ERROR;
1790 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1791 static svn_error_t *
1792 output_unified_default_hdr(const char **header, const char *path,
1795 apr_finfo_t file_info;
1796 apr_time_exp_t exploded_time;
1797 char time_buffer[64];
1798 apr_size_t time_len;
1799 const char *utf8_timestr;
1801 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1802 apr_time_exp_lt(&exploded_time, file_info.mtime);
1804 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1805 /* Order of date components can be different in different languages */
1806 _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1808 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1810 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1812 return SVN_NO_ERROR;
1815 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1817 NULL, /* output_common */
1818 output_unified_diff_modified,
1819 NULL, /* output_diff_latest */
1820 NULL, /* output_diff_common */
1821 NULL /* output_conflict */
1825 svn_diff_file_output_unified4(svn_stream_t *output_stream,
1827 const char *original_path,
1828 const char *modified_path,
1829 const char *original_header,
1830 const char *modified_header,
1831 const char *header_encoding,
1832 const char *relative_to_dir,
1833 svn_boolean_t show_c_function,
1835 svn_cancel_func_t cancel_func,
1839 if (svn_diff_contains_diffs(diff))
1841 svn_diff__file_output_baton_t baton;
1844 memset(&baton, 0, sizeof(baton));
1845 baton.output_stream = output_stream;
1847 baton.header_encoding = header_encoding;
1848 baton.path[0] = original_path;
1849 baton.path[1] = modified_path;
1850 baton.hunk = svn_stringbuf_create_empty(pool);
1851 baton.show_c_function = show_c_function;
1852 baton.extra_context = svn_stringbuf_create_empty(pool);
1853 baton.context_size = (context_size >= 0) ? context_size
1854 : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1856 if (show_c_function)
1858 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1860 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1861 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1862 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1865 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1866 header_encoding, pool));
1867 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1868 header_encoding, pool));
1869 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1870 header_encoding, pool));
1872 if (relative_to_dir)
1874 /* Possibly adjust the "original" and "modified" paths shown in
1875 the output (see issue #2723). */
1876 const char *child_path;
1878 if (! original_header)
1880 child_path = svn_dirent_is_child(relative_to_dir,
1881 original_path, pool);
1883 original_path = child_path;
1885 return svn_error_createf(
1886 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1887 _("Path '%s' must be inside "
1888 "the directory '%s'"),
1889 svn_dirent_local_style(original_path, pool),
1890 svn_dirent_local_style(relative_to_dir,
1894 if (! modified_header)
1896 child_path = svn_dirent_is_child(relative_to_dir,
1897 modified_path, pool);
1899 modified_path = child_path;
1901 return svn_error_createf(
1902 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1903 _("Path '%s' must be inside "
1904 "the directory '%s'"),
1905 svn_dirent_local_style(modified_path, pool),
1906 svn_dirent_local_style(relative_to_dir,
1911 for (i = 0; i < 2; i++)
1913 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1914 APR_READ, APR_OS_DEFAULT, pool));
1917 if (original_header == NULL)
1919 SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1923 if (modified_header == NULL)
1925 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1929 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1930 original_header, modified_header,
1933 SVN_ERR(svn_diff_output2(diff, &baton,
1934 &svn_diff__file_output_unified_vtable,
1935 cancel_func, cancel_baton));
1936 SVN_ERR(output_unified_flush_hunk(&baton));
1938 for (i = 0; i < 2; i++)
1940 SVN_ERR(svn_io_file_close(baton.file[i], pool));
1944 return SVN_NO_ERROR;
1948 /** Display diff3 **/
1950 /* A stream to remember *leading* context. Note that this stream does
1951 *not* copy the data that it is remembering; it just saves
1953 typedef struct context_saver_t {
1954 svn_stream_t *stream;
1956 const char **data; /* const char *data[context_size] */
1957 apr_size_t *len; /* apr_size_t len[context_size] */
1958 apr_size_t next_slot;
1959 apr_size_t total_written;
1963 static svn_error_t *
1964 context_saver_stream_write(void *baton,
1968 context_saver_t *cs = baton;
1970 if (cs->context_size > 0)
1972 cs->data[cs->next_slot] = data;
1973 cs->len[cs->next_slot] = *len;
1974 cs->next_slot = (cs->next_slot + 1) % cs->context_size;
1975 cs->total_written++;
1977 return SVN_NO_ERROR;
1980 typedef struct svn_diff3__file_output_baton_t
1982 svn_stream_t *output_stream;
1984 const char *path[3];
1986 apr_off_t current_line[3];
1992 /* The following four members are in the encoding used for the output. */
1993 const char *conflict_modified;
1994 const char *conflict_original;
1995 const char *conflict_separator;
1996 const char *conflict_latest;
1998 const char *marker_eol;
2000 svn_diff_conflict_display_style_t conflict_style;
2003 /* cancel support */
2004 svn_cancel_func_t cancel_func;
2007 /* The rest of the fields are for
2008 svn_diff_conflict_display_only_conflicts only. Note that for
2009 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2010 (soon after a conflict) a "trailing context stream", never the
2011 actual output stream.*/
2012 /* The actual output stream. */
2013 svn_stream_t *real_output_stream;
2014 context_saver_t *context_saver;
2015 /* Used to allocate context_saver and trailing context streams, and
2016 for some printfs. */
2018 } svn_diff3__file_output_baton_t;
2020 static svn_error_t *
2021 flush_context_saver(context_saver_t *cs,
2022 svn_stream_t *output_stream)
2025 for (i = 0; i < cs->context_size; i++)
2027 apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2030 apr_size_t len = cs->len[slot];
2031 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2034 return SVN_NO_ERROR;
2038 make_context_saver(svn_diff3__file_output_baton_t *fob)
2040 context_saver_t *cs;
2042 assert(fob->context_size > 0); /* Or nothing to save */
2044 svn_pool_clear(fob->pool);
2045 cs = apr_pcalloc(fob->pool, sizeof(*cs));
2046 cs->stream = svn_stream_empty(fob->pool);
2047 svn_stream_set_baton(cs->stream, cs);
2048 svn_stream_set_write(cs->stream, context_saver_stream_write);
2049 fob->context_saver = cs;
2050 fob->output_stream = cs->stream;
2051 cs->context_size = fob->context_size;
2052 cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2053 cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2057 /* A stream which prints LINES_TO_PRINT (based on context size) lines to
2058 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2059 a context_saver; used for *trailing* context. */
2061 struct trailing_context_printer {
2062 apr_size_t lines_to_print;
2063 svn_diff3__file_output_baton_t *fob;
2068 static svn_error_t *
2069 trailing_context_printer_write(void *baton,
2073 struct trailing_context_printer *tcp = baton;
2074 SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2075 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2076 tcp->lines_to_print--;
2077 if (tcp->lines_to_print == 0)
2078 make_context_saver(tcp->fob);
2079 return SVN_NO_ERROR;
2084 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2086 struct trailing_context_printer *tcp;
2089 svn_pool_clear(btn->pool);
2091 tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2092 tcp->lines_to_print = btn->context_size;
2094 s = svn_stream_empty(btn->pool);
2095 svn_stream_set_baton(s, tcp);
2096 svn_stream_set_write(s, trailing_context_printer_write);
2097 btn->output_stream = s;
2102 typedef enum svn_diff3__file_output_type_e
2104 svn_diff3__file_output_skip,
2105 svn_diff3__file_output_normal
2106 } svn_diff3__file_output_type_e;
2109 static svn_error_t *
2110 output_line(svn_diff3__file_output_baton_t *baton,
2111 svn_diff3__file_output_type_e type, int idx)
2118 curp = baton->curp[idx];
2119 endp = baton->endp[idx];
2121 /* Lazily update the current line even if we're at EOF.
2123 baton->current_line[idx]++;
2126 return SVN_NO_ERROR;
2128 eol = svn_eol__find_eol_start(curp, endp - curp);
2133 svn_boolean_t had_cr = (*eol == '\r');
2135 if (had_cr && eol != endp && *eol == '\n')
2139 if (type != svn_diff3__file_output_skip)
2142 /* Note that the trailing context printer assumes that
2143 svn_stream_write is called exactly once per line. */
2144 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2147 baton->curp[idx] = eol;
2149 return SVN_NO_ERROR;
2152 static svn_error_t *
2153 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2155 return svn_stream_puts(btn->output_stream, btn->marker_eol);
2158 static svn_error_t *
2159 output_hunk(void *baton, int idx, apr_off_t target_line,
2160 apr_off_t target_length)
2162 svn_diff3__file_output_baton_t *output_baton = baton;
2164 /* Skip lines until we are at the start of the changed range */
2165 while (output_baton->current_line[idx] < target_line)
2167 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2170 target_line += target_length;
2172 while (output_baton->current_line[idx] < target_line)
2174 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2177 return SVN_NO_ERROR;
2180 static svn_error_t *
2181 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2182 apr_off_t modified_start, apr_off_t modified_length,
2183 apr_off_t latest_start, apr_off_t latest_length)
2185 return output_hunk(baton, 1, modified_start, modified_length);
2188 static svn_error_t *
2189 output_diff_modified(void *baton,
2190 apr_off_t original_start, apr_off_t original_length,
2191 apr_off_t modified_start, apr_off_t modified_length,
2192 apr_off_t latest_start, apr_off_t latest_length)
2194 return output_hunk(baton, 1, modified_start, modified_length);
2197 static svn_error_t *
2198 output_diff_latest(void *baton,
2199 apr_off_t original_start, apr_off_t original_length,
2200 apr_off_t modified_start, apr_off_t modified_length,
2201 apr_off_t latest_start, apr_off_t latest_length)
2203 return output_hunk(baton, 2, latest_start, latest_length);
2206 static svn_error_t *
2207 output_conflict(void *baton,
2208 apr_off_t original_start, apr_off_t original_length,
2209 apr_off_t modified_start, apr_off_t modified_length,
2210 apr_off_t latest_start, apr_off_t latest_length,
2213 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2216 output_diff_modified,
2218 output_diff_modified, /* output_diff_common */
2222 static svn_error_t *
2223 output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2229 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2230 "%s (%" APR_OFF_T_FMT ")",
2233 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2234 "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2235 label, start + 1, length));
2237 SVN_ERR(output_marker_eol(btn));
2239 return SVN_NO_ERROR;
2242 static svn_error_t *
2243 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2244 apr_off_t original_start,
2245 apr_off_t original_length,
2246 apr_off_t modified_start,
2247 apr_off_t modified_length,
2248 apr_off_t latest_start,
2249 apr_off_t latest_length)
2251 /* Are we currently saving starting context (as opposed to printing
2252 trailing context)? If so, flush it. */
2253 if (btn->output_stream == btn->context_saver->stream)
2255 if (btn->context_saver->total_written > btn->context_size)
2256 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2257 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2260 /* Print to the real output stream. */
2261 btn->output_stream = btn->real_output_stream;
2263 /* Output the conflict itself. */
2264 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2265 modified_start, modified_length));
2266 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2268 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2269 original_start, original_length));
2270 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2272 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2273 "%s%s", btn->conflict_separator, btn->marker_eol));
2274 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2275 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2276 latest_start, latest_length));
2278 /* Go into print-trailing-context mode instead. */
2279 make_trailing_context_printer(btn);
2281 return SVN_NO_ERROR;
2285 static svn_error_t *
2286 output_conflict(void *baton,
2287 apr_off_t original_start, apr_off_t original_length,
2288 apr_off_t modified_start, apr_off_t modified_length,
2289 apr_off_t latest_start, apr_off_t latest_length,
2292 svn_diff3__file_output_baton_t *file_baton = baton;
2294 svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2296 if (style == svn_diff_conflict_display_only_conflicts)
2297 return output_conflict_with_context(file_baton,
2298 original_start, original_length,
2299 modified_start, modified_length,
2300 latest_start, latest_length);
2302 if (style == svn_diff_conflict_display_resolved_modified_latest)
2305 return svn_diff_output2(diff, baton,
2306 &svn_diff3__file_output_vtable,
2307 file_baton->cancel_func,
2308 file_baton->cancel_baton);
2310 style = svn_diff_conflict_display_modified_latest;
2313 if (style == svn_diff_conflict_display_modified_latest ||
2314 style == svn_diff_conflict_display_modified_original_latest)
2316 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2317 file_baton->conflict_modified));
2318 SVN_ERR(output_marker_eol(file_baton));
2320 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2322 if (style == svn_diff_conflict_display_modified_original_latest)
2324 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2325 file_baton->conflict_original));
2326 SVN_ERR(output_marker_eol(file_baton));
2327 SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2330 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2331 file_baton->conflict_separator));
2332 SVN_ERR(output_marker_eol(file_baton));
2334 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2336 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2337 file_baton->conflict_latest));
2338 SVN_ERR(output_marker_eol(file_baton));
2340 else if (style == svn_diff_conflict_display_modified)
2341 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2342 else if (style == svn_diff_conflict_display_latest)
2343 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2344 else /* unknown style */
2345 SVN_ERR_MALFUNCTION();
2347 return SVN_NO_ERROR;
2351 svn_diff_file_output_merge3(svn_stream_t *output_stream,
2353 const char *original_path,
2354 const char *modified_path,
2355 const char *latest_path,
2356 const char *conflict_original,
2357 const char *conflict_modified,
2358 const char *conflict_latest,
2359 const char *conflict_separator,
2360 svn_diff_conflict_display_style_t style,
2361 svn_cancel_func_t cancel_func,
2363 apr_pool_t *scratch_pool)
2365 svn_diff3__file_output_baton_t baton;
2366 apr_file_t *file[3];
2369 apr_mmap_t *mm[3] = { 0 };
2370 #endif /* APR_HAS_MMAP */
2372 svn_boolean_t conflicts_only =
2373 (style == svn_diff_conflict_display_only_conflicts);
2375 memset(&baton, 0, sizeof(baton));
2376 baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2379 baton.pool = svn_pool_create(scratch_pool);
2380 make_context_saver(&baton);
2381 baton.real_output_stream = output_stream;
2384 baton.output_stream = output_stream;
2385 baton.path[0] = original_path;
2386 baton.path[1] = modified_path;
2387 baton.path[2] = latest_path;
2388 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2389 conflict_modified ? conflict_modified
2390 : apr_psprintf(scratch_pool, "<<<<<<< %s",
2393 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2394 conflict_original ? conflict_original
2395 : apr_psprintf(scratch_pool, "||||||| %s",
2398 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2399 conflict_separator ? conflict_separator
2400 : "=======", scratch_pool));
2401 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2402 conflict_latest ? conflict_latest
2403 : apr_psprintf(scratch_pool, ">>>>>>> %s",
2407 baton.conflict_style = style;
2409 for (idx = 0; idx < 3; idx++)
2413 SVN_ERR(map_or_read_file(&file[idx],
2415 &baton.buffer[idx], &size,
2416 baton.path[idx], scratch_pool));
2418 baton.curp[idx] = baton.buffer[idx];
2419 baton.endp[idx] = baton.buffer[idx];
2421 if (baton.endp[idx])
2422 baton.endp[idx] += size;
2425 /* Check what eol marker we should use for conflict markers.
2426 We use the eol marker of the modified file and fall back on the
2427 platform's eol marker if that file doesn't contain any newlines. */
2428 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2432 baton.marker_eol = eol;
2434 baton.cancel_func = cancel_func;
2435 baton.cancel_baton = cancel_baton;
2437 SVN_ERR(svn_diff_output2(diff, &baton,
2438 &svn_diff3__file_output_vtable,
2439 cancel_func, cancel_baton));
2441 for (idx = 0; idx < 3; idx++)
2446 apr_status_t rv = apr_mmap_delete(mm[idx]);
2447 if (rv != APR_SUCCESS)
2449 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2453 #endif /* APR_HAS_MMAP */
2457 SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2462 svn_pool_destroy(baton.pool);
2464 return SVN_NO_ERROR;